{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 49180, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010166734444896299, "grad_norm": 5.670832633972168, "learning_rate": 2.03334688897926e-09, "loss": 0.8058, "step": 1 }, { "epoch": 0.00020333468889792598, "grad_norm": 5.870175838470459, "learning_rate": 4.06669377795852e-09, "loss": 0.8379, "step": 2 }, { "epoch": 0.00030500203334688896, "grad_norm": 5.98758602142334, "learning_rate": 6.10004066693778e-09, "loss": 0.8563, "step": 3 }, { "epoch": 0.00040666937779585197, "grad_norm": 5.9070963859558105, "learning_rate": 8.13338755591704e-09, "loss": 0.8339, "step": 4 }, { "epoch": 0.0005083367222448149, "grad_norm": 6.067192554473877, "learning_rate": 1.0166734444896299e-08, "loss": 0.8689, "step": 5 }, { "epoch": 0.0006100040666937779, "grad_norm": 6.001704216003418, "learning_rate": 1.220008133387556e-08, "loss": 0.8585, "step": 6 }, { "epoch": 0.0007116714111427409, "grad_norm": 6.128532886505127, "learning_rate": 1.423342822285482e-08, "loss": 0.846, "step": 7 }, { "epoch": 0.0008133387555917039, "grad_norm": 5.93750524520874, "learning_rate": 1.626677511183408e-08, "loss": 0.8273, "step": 8 }, { "epoch": 0.0009150061000406669, "grad_norm": 5.7091803550720215, "learning_rate": 1.830012200081334e-08, "loss": 0.8103, "step": 9 }, { "epoch": 0.0010166734444896298, "grad_norm": 5.934541702270508, "learning_rate": 2.0333468889792598e-08, "loss": 0.871, "step": 10 }, { "epoch": 0.0011183407889385928, "grad_norm": 5.904168605804443, "learning_rate": 2.2366815778771857e-08, "loss": 0.8252, "step": 11 }, { "epoch": 0.0012200081333875558, "grad_norm": 6.026313781738281, "learning_rate": 2.440016266775112e-08, "loss": 0.8806, "step": 12 }, { "epoch": 0.0013216754778365189, "grad_norm": 5.891928672790527, "learning_rate": 2.643350955673038e-08, "loss": 0.8208, "step": 13 }, { "epoch": 0.0014233428222854819, "grad_norm": 5.946406841278076, "learning_rate": 2.846685644570964e-08, "loss": 0.8674, "step": 14 }, { "epoch": 0.0015250101667344449, "grad_norm": 5.91173791885376, "learning_rate": 3.05002033346889e-08, "loss": 0.8339, "step": 15 }, { "epoch": 0.0016266775111834079, "grad_norm": 6.0547027587890625, "learning_rate": 3.253355022366816e-08, "loss": 0.8911, "step": 16 }, { "epoch": 0.0017283448556323709, "grad_norm": 5.92095422744751, "learning_rate": 3.456689711264742e-08, "loss": 0.8838, "step": 17 }, { "epoch": 0.0018300122000813339, "grad_norm": 5.846055507659912, "learning_rate": 3.660024400162668e-08, "loss": 0.8303, "step": 18 }, { "epoch": 0.0019316795445302969, "grad_norm": 6.130923271179199, "learning_rate": 3.863359089060594e-08, "loss": 0.8487, "step": 19 }, { "epoch": 0.0020333468889792597, "grad_norm": 5.7639689445495605, "learning_rate": 4.0666937779585195e-08, "loss": 0.8104, "step": 20 }, { "epoch": 0.0021350142334282227, "grad_norm": 5.782861232757568, "learning_rate": 4.270028466856446e-08, "loss": 0.819, "step": 21 }, { "epoch": 0.0022366815778771857, "grad_norm": 6.012997627258301, "learning_rate": 4.4733631557543715e-08, "loss": 0.8576, "step": 22 }, { "epoch": 0.0023383489223261487, "grad_norm": 6.1153082847595215, "learning_rate": 4.676697844652298e-08, "loss": 0.881, "step": 23 }, { "epoch": 0.0024400162667751117, "grad_norm": 5.76541805267334, "learning_rate": 4.880032533550224e-08, "loss": 0.8141, "step": 24 }, { "epoch": 0.0025416836112240747, "grad_norm": 6.264101982116699, "learning_rate": 5.08336722244815e-08, "loss": 0.8718, "step": 25 }, { "epoch": 0.0026433509556730377, "grad_norm": 6.070985317230225, "learning_rate": 5.286701911346076e-08, "loss": 0.8723, "step": 26 }, { "epoch": 0.0027450183001220007, "grad_norm": 5.605895519256592, "learning_rate": 5.490036600244002e-08, "loss": 0.8442, "step": 27 }, { "epoch": 0.0028466856445709637, "grad_norm": 5.672568321228027, "learning_rate": 5.693371289141928e-08, "loss": 0.863, "step": 28 }, { "epoch": 0.0029483529890199267, "grad_norm": 5.915777206420898, "learning_rate": 5.8967059780398536e-08, "loss": 0.8581, "step": 29 }, { "epoch": 0.0030500203334688897, "grad_norm": 5.5926194190979, "learning_rate": 6.10004066693778e-08, "loss": 0.8436, "step": 30 }, { "epoch": 0.0031516876779178527, "grad_norm": 5.905734062194824, "learning_rate": 6.303375355835706e-08, "loss": 0.8508, "step": 31 }, { "epoch": 0.0032533550223668157, "grad_norm": 5.6202592849731445, "learning_rate": 6.506710044733633e-08, "loss": 0.8308, "step": 32 }, { "epoch": 0.0033550223668157787, "grad_norm": 5.838216781616211, "learning_rate": 6.710044733631558e-08, "loss": 0.8494, "step": 33 }, { "epoch": 0.0034566897112647418, "grad_norm": 5.973973274230957, "learning_rate": 6.913379422529484e-08, "loss": 0.8706, "step": 34 }, { "epoch": 0.0035583570557137048, "grad_norm": 5.98486852645874, "learning_rate": 7.11671411142741e-08, "loss": 0.8406, "step": 35 }, { "epoch": 0.0036600244001626678, "grad_norm": 5.967552661895752, "learning_rate": 7.320048800325336e-08, "loss": 0.8439, "step": 36 }, { "epoch": 0.0037616917446116308, "grad_norm": 5.793359279632568, "learning_rate": 7.523383489223263e-08, "loss": 0.801, "step": 37 }, { "epoch": 0.0038633590890605938, "grad_norm": 5.985463619232178, "learning_rate": 7.726718178121188e-08, "loss": 0.8355, "step": 38 }, { "epoch": 0.003965026433509556, "grad_norm": 5.856400966644287, "learning_rate": 7.930052867019113e-08, "loss": 0.8438, "step": 39 }, { "epoch": 0.004066693777958519, "grad_norm": 5.940770149230957, "learning_rate": 8.133387555917039e-08, "loss": 0.8374, "step": 40 }, { "epoch": 0.004168361122407482, "grad_norm": 5.802334308624268, "learning_rate": 8.336722244814965e-08, "loss": 0.8122, "step": 41 }, { "epoch": 0.004270028466856445, "grad_norm": 5.841108798980713, "learning_rate": 8.540056933712892e-08, "loss": 0.8027, "step": 42 }, { "epoch": 0.004371695811305408, "grad_norm": 5.857553005218506, "learning_rate": 8.743391622610818e-08, "loss": 0.8622, "step": 43 }, { "epoch": 0.004473363155754371, "grad_norm": 6.00078821182251, "learning_rate": 8.946726311508743e-08, "loss": 0.8637, "step": 44 }, { "epoch": 0.004575030500203334, "grad_norm": 5.696402072906494, "learning_rate": 9.150061000406669e-08, "loss": 0.8303, "step": 45 }, { "epoch": 0.004676697844652297, "grad_norm": 5.5771002769470215, "learning_rate": 9.353395689304596e-08, "loss": 0.7862, "step": 46 }, { "epoch": 0.00477836518910126, "grad_norm": 5.569007873535156, "learning_rate": 9.556730378202522e-08, "loss": 0.8267, "step": 47 }, { "epoch": 0.004880032533550223, "grad_norm": 5.348180770874023, "learning_rate": 9.760065067100448e-08, "loss": 0.8058, "step": 48 }, { "epoch": 0.004981699877999186, "grad_norm": 5.666584491729736, "learning_rate": 9.963399755998373e-08, "loss": 0.7887, "step": 49 }, { "epoch": 0.005083367222448149, "grad_norm": 5.534947395324707, "learning_rate": 1.01667344448963e-07, "loss": 0.8221, "step": 50 }, { "epoch": 0.005185034566897112, "grad_norm": 5.576561450958252, "learning_rate": 1.0370069133794226e-07, "loss": 0.8205, "step": 51 }, { "epoch": 0.005286701911346075, "grad_norm": 5.563027858734131, "learning_rate": 1.0573403822692152e-07, "loss": 0.8397, "step": 52 }, { "epoch": 0.005388369255795038, "grad_norm": 5.567111968994141, "learning_rate": 1.0776738511590078e-07, "loss": 0.8097, "step": 53 }, { "epoch": 0.005490036600244001, "grad_norm": 5.3945183753967285, "learning_rate": 1.0980073200488003e-07, "loss": 0.7803, "step": 54 }, { "epoch": 0.005591703944692964, "grad_norm": 5.540281295776367, "learning_rate": 1.118340788938593e-07, "loss": 0.8189, "step": 55 }, { "epoch": 0.0056933712891419274, "grad_norm": 5.357436180114746, "learning_rate": 1.1386742578283856e-07, "loss": 0.7893, "step": 56 }, { "epoch": 0.0057950386335908904, "grad_norm": 5.100893020629883, "learning_rate": 1.1590077267181782e-07, "loss": 0.7704, "step": 57 }, { "epoch": 0.0058967059780398534, "grad_norm": 5.362837791442871, "learning_rate": 1.1793411956079707e-07, "loss": 0.7791, "step": 58 }, { "epoch": 0.0059983733224888165, "grad_norm": 5.448094367980957, "learning_rate": 1.1996746644977635e-07, "loss": 0.8085, "step": 59 }, { "epoch": 0.0061000406669377795, "grad_norm": 5.731112957000732, "learning_rate": 1.220008133387556e-07, "loss": 0.8578, "step": 60 }, { "epoch": 0.0062017080113867425, "grad_norm": 5.38215446472168, "learning_rate": 1.2403416022773485e-07, "loss": 0.8123, "step": 61 }, { "epoch": 0.0063033753558357055, "grad_norm": 5.369283199310303, "learning_rate": 1.2606750711671412e-07, "loss": 0.7971, "step": 62 }, { "epoch": 0.0064050427002846685, "grad_norm": 5.47731351852417, "learning_rate": 1.2810085400569337e-07, "loss": 0.8521, "step": 63 }, { "epoch": 0.0065067100447336315, "grad_norm": 4.8916916847229, "learning_rate": 1.3013420089467265e-07, "loss": 0.7854, "step": 64 }, { "epoch": 0.0066083773891825945, "grad_norm": 4.655156135559082, "learning_rate": 1.321675477836519e-07, "loss": 0.8015, "step": 65 }, { "epoch": 0.0067100447336315575, "grad_norm": 4.617881774902344, "learning_rate": 1.3420089467263115e-07, "loss": 0.8049, "step": 66 }, { "epoch": 0.0068117120780805205, "grad_norm": 4.82505989074707, "learning_rate": 1.3623424156161043e-07, "loss": 0.8165, "step": 67 }, { "epoch": 0.0069133794225294835, "grad_norm": 4.758472442626953, "learning_rate": 1.3826758845058968e-07, "loss": 0.8236, "step": 68 }, { "epoch": 0.0070150467669784465, "grad_norm": 4.638957977294922, "learning_rate": 1.4030093533956895e-07, "loss": 0.7923, "step": 69 }, { "epoch": 0.0071167141114274095, "grad_norm": 4.492940425872803, "learning_rate": 1.423342822285482e-07, "loss": 0.7782, "step": 70 }, { "epoch": 0.0072183814558763725, "grad_norm": 4.823362827301025, "learning_rate": 1.4436762911752745e-07, "loss": 0.7933, "step": 71 }, { "epoch": 0.0073200488003253355, "grad_norm": 4.660890102386475, "learning_rate": 1.4640097600650673e-07, "loss": 0.8121, "step": 72 }, { "epoch": 0.0074217161447742985, "grad_norm": 4.57586145401001, "learning_rate": 1.4843432289548598e-07, "loss": 0.8001, "step": 73 }, { "epoch": 0.0075233834892232615, "grad_norm": 4.48982572555542, "learning_rate": 1.5046766978446526e-07, "loss": 0.788, "step": 74 }, { "epoch": 0.0076250508336722245, "grad_norm": 4.477692127227783, "learning_rate": 1.525010166734445e-07, "loss": 0.7645, "step": 75 }, { "epoch": 0.0077267181781211875, "grad_norm": 4.263734340667725, "learning_rate": 1.5453436356242376e-07, "loss": 0.741, "step": 76 }, { "epoch": 0.00782838552257015, "grad_norm": 4.187688827514648, "learning_rate": 1.5656771045140303e-07, "loss": 0.7607, "step": 77 }, { "epoch": 0.007930052867019113, "grad_norm": 4.468946933746338, "learning_rate": 1.5860105734038225e-07, "loss": 0.789, "step": 78 }, { "epoch": 0.008031720211468077, "grad_norm": 4.089660167694092, "learning_rate": 1.6063440422936156e-07, "loss": 0.7779, "step": 79 }, { "epoch": 0.008133387555917039, "grad_norm": 4.4403204917907715, "learning_rate": 1.6266775111834078e-07, "loss": 0.7772, "step": 80 }, { "epoch": 0.008235054900366003, "grad_norm": 4.446974754333496, "learning_rate": 1.6470109800732006e-07, "loss": 0.7887, "step": 81 }, { "epoch": 0.008336722244814965, "grad_norm": 4.220653533935547, "learning_rate": 1.667344448962993e-07, "loss": 0.7758, "step": 82 }, { "epoch": 0.008438389589263929, "grad_norm": 4.202587127685547, "learning_rate": 1.6876779178527858e-07, "loss": 0.7764, "step": 83 }, { "epoch": 0.00854005693371289, "grad_norm": 4.276822566986084, "learning_rate": 1.7080113867425783e-07, "loss": 0.778, "step": 84 }, { "epoch": 0.008641724278161855, "grad_norm": 4.028562545776367, "learning_rate": 1.728344855632371e-07, "loss": 0.7705, "step": 85 }, { "epoch": 0.008743391622610817, "grad_norm": 4.094489574432373, "learning_rate": 1.7486783245221636e-07, "loss": 0.8358, "step": 86 }, { "epoch": 0.00884505896705978, "grad_norm": 3.5485644340515137, "learning_rate": 1.7690117934119564e-07, "loss": 0.7482, "step": 87 }, { "epoch": 0.008946726311508743, "grad_norm": 3.1307289600372314, "learning_rate": 1.7893452623017486e-07, "loss": 0.7092, "step": 88 }, { "epoch": 0.009048393655957707, "grad_norm": 3.1037652492523193, "learning_rate": 1.8096787311915414e-07, "loss": 0.7488, "step": 89 }, { "epoch": 0.009150061000406669, "grad_norm": 2.7413153648376465, "learning_rate": 1.8300122000813339e-07, "loss": 0.7443, "step": 90 }, { "epoch": 0.009251728344855633, "grad_norm": 2.5475080013275146, "learning_rate": 1.8503456689711266e-07, "loss": 0.7449, "step": 91 }, { "epoch": 0.009353395689304595, "grad_norm": 2.5498523712158203, "learning_rate": 1.870679137860919e-07, "loss": 0.7489, "step": 92 }, { "epoch": 0.009455063033753559, "grad_norm": 2.485433340072632, "learning_rate": 1.891012606750712e-07, "loss": 0.7258, "step": 93 }, { "epoch": 0.00955673037820252, "grad_norm": 2.5321433544158936, "learning_rate": 1.9113460756405044e-07, "loss": 0.7472, "step": 94 }, { "epoch": 0.009658397722651485, "grad_norm": 2.5493104457855225, "learning_rate": 1.9316795445302971e-07, "loss": 0.7468, "step": 95 }, { "epoch": 0.009760065067100447, "grad_norm": 2.4223859310150146, "learning_rate": 1.9520130134200896e-07, "loss": 0.7265, "step": 96 }, { "epoch": 0.00986173241154941, "grad_norm": 2.3864715099334717, "learning_rate": 1.9723464823098824e-07, "loss": 0.7216, "step": 97 }, { "epoch": 0.009963399755998373, "grad_norm": 2.579179286956787, "learning_rate": 1.9926799511996746e-07, "loss": 0.7226, "step": 98 }, { "epoch": 0.010065067100447337, "grad_norm": 2.385096788406372, "learning_rate": 2.0130134200894674e-07, "loss": 0.7362, "step": 99 }, { "epoch": 0.010166734444896299, "grad_norm": 2.3760077953338623, "learning_rate": 2.03334688897926e-07, "loss": 0.7511, "step": 100 }, { "epoch": 0.010268401789345263, "grad_norm": 2.2647342681884766, "learning_rate": 2.0536803578690527e-07, "loss": 0.7047, "step": 101 }, { "epoch": 0.010370069133794225, "grad_norm": 2.4037458896636963, "learning_rate": 2.0740138267588452e-07, "loss": 0.7722, "step": 102 }, { "epoch": 0.010471736478243189, "grad_norm": 2.314349889755249, "learning_rate": 2.094347295648638e-07, "loss": 0.6872, "step": 103 }, { "epoch": 0.01057340382269215, "grad_norm": 2.196850299835205, "learning_rate": 2.1146807645384304e-07, "loss": 0.761, "step": 104 }, { "epoch": 0.010675071167141115, "grad_norm": 2.150430202484131, "learning_rate": 2.1350142334282232e-07, "loss": 0.7078, "step": 105 }, { "epoch": 0.010776738511590077, "grad_norm": 2.235858201980591, "learning_rate": 2.1553477023180157e-07, "loss": 0.7114, "step": 106 }, { "epoch": 0.01087840585603904, "grad_norm": 2.121310234069824, "learning_rate": 2.1756811712078084e-07, "loss": 0.677, "step": 107 }, { "epoch": 0.010980073200488003, "grad_norm": 2.1846940517425537, "learning_rate": 2.1960146400976007e-07, "loss": 0.7317, "step": 108 }, { "epoch": 0.011081740544936967, "grad_norm": 2.1461737155914307, "learning_rate": 2.2163481089873934e-07, "loss": 0.721, "step": 109 }, { "epoch": 0.011183407889385929, "grad_norm": 2.0259158611297607, "learning_rate": 2.236681577877186e-07, "loss": 0.7323, "step": 110 }, { "epoch": 0.011285075233834893, "grad_norm": 1.9080307483673096, "learning_rate": 2.2570150467669787e-07, "loss": 0.7457, "step": 111 }, { "epoch": 0.011386742578283855, "grad_norm": 1.9312000274658203, "learning_rate": 2.2773485156567712e-07, "loss": 0.7361, "step": 112 }, { "epoch": 0.011488409922732819, "grad_norm": 1.7540158033370972, "learning_rate": 2.297681984546564e-07, "loss": 0.7024, "step": 113 }, { "epoch": 0.011590077267181781, "grad_norm": 1.8384944200515747, "learning_rate": 2.3180154534363565e-07, "loss": 0.6857, "step": 114 }, { "epoch": 0.011691744611630745, "grad_norm": 1.6657557487487793, "learning_rate": 2.3383489223261492e-07, "loss": 0.695, "step": 115 }, { "epoch": 0.011793411956079707, "grad_norm": 1.725393295288086, "learning_rate": 2.3586823912159415e-07, "loss": 0.6786, "step": 116 }, { "epoch": 0.01189507930052867, "grad_norm": 1.5224730968475342, "learning_rate": 2.3790158601057342e-07, "loss": 0.6604, "step": 117 }, { "epoch": 0.011996746644977633, "grad_norm": 1.4707621335983276, "learning_rate": 2.399349328995527e-07, "loss": 0.6587, "step": 118 }, { "epoch": 0.012098413989426597, "grad_norm": 1.257912278175354, "learning_rate": 2.41968279788532e-07, "loss": 0.6657, "step": 119 }, { "epoch": 0.012200081333875559, "grad_norm": 1.3974709510803223, "learning_rate": 2.440016266775112e-07, "loss": 0.682, "step": 120 }, { "epoch": 0.012301748678324523, "grad_norm": 1.4024285078048706, "learning_rate": 2.460349735664905e-07, "loss": 0.6683, "step": 121 }, { "epoch": 0.012403416022773485, "grad_norm": 1.373729944229126, "learning_rate": 2.480683204554697e-07, "loss": 0.6618, "step": 122 }, { "epoch": 0.012505083367222449, "grad_norm": 1.4144959449768066, "learning_rate": 2.50101667344449e-07, "loss": 0.7192, "step": 123 }, { "epoch": 0.012606750711671411, "grad_norm": 1.2912664413452148, "learning_rate": 2.5213501423342825e-07, "loss": 0.7053, "step": 124 }, { "epoch": 0.012708418056120375, "grad_norm": 1.2790590524673462, "learning_rate": 2.541683611224075e-07, "loss": 0.6458, "step": 125 }, { "epoch": 0.012810085400569337, "grad_norm": 1.282686471939087, "learning_rate": 2.5620170801138675e-07, "loss": 0.6458, "step": 126 }, { "epoch": 0.0129117527450183, "grad_norm": 1.3339178562164307, "learning_rate": 2.58235054900366e-07, "loss": 0.65, "step": 127 }, { "epoch": 0.013013420089467263, "grad_norm": 1.2779150009155273, "learning_rate": 2.602684017893453e-07, "loss": 0.7032, "step": 128 }, { "epoch": 0.013115087433916227, "grad_norm": 1.211343765258789, "learning_rate": 2.623017486783246e-07, "loss": 0.6766, "step": 129 }, { "epoch": 0.013216754778365189, "grad_norm": 1.2840662002563477, "learning_rate": 2.643350955673038e-07, "loss": 0.6411, "step": 130 }, { "epoch": 0.013318422122814153, "grad_norm": 1.2345068454742432, "learning_rate": 2.663684424562831e-07, "loss": 0.645, "step": 131 }, { "epoch": 0.013420089467263115, "grad_norm": 1.1529237031936646, "learning_rate": 2.684017893452623e-07, "loss": 0.6124, "step": 132 }, { "epoch": 0.013521756811712079, "grad_norm": 1.2445119619369507, "learning_rate": 2.704351362342416e-07, "loss": 0.6388, "step": 133 }, { "epoch": 0.013623424156161041, "grad_norm": 1.1293812990188599, "learning_rate": 2.7246848312322085e-07, "loss": 0.6802, "step": 134 }, { "epoch": 0.013725091500610005, "grad_norm": 1.160652756690979, "learning_rate": 2.7450183001220013e-07, "loss": 0.6733, "step": 135 }, { "epoch": 0.013826758845058967, "grad_norm": 1.1229499578475952, "learning_rate": 2.7653517690117935e-07, "loss": 0.6193, "step": 136 }, { "epoch": 0.013928426189507931, "grad_norm": 1.1005268096923828, "learning_rate": 2.7856852379015863e-07, "loss": 0.6187, "step": 137 }, { "epoch": 0.014030093533956893, "grad_norm": 1.0743780136108398, "learning_rate": 2.806018706791379e-07, "loss": 0.6173, "step": 138 }, { "epoch": 0.014131760878405857, "grad_norm": 1.0720669031143188, "learning_rate": 2.826352175681172e-07, "loss": 0.6506, "step": 139 }, { "epoch": 0.014233428222854819, "grad_norm": 1.1136987209320068, "learning_rate": 2.846685644570964e-07, "loss": 0.6941, "step": 140 }, { "epoch": 0.014335095567303781, "grad_norm": 1.0360004901885986, "learning_rate": 2.8670191134607563e-07, "loss": 0.631, "step": 141 }, { "epoch": 0.014436762911752745, "grad_norm": 1.0038762092590332, "learning_rate": 2.887352582350549e-07, "loss": 0.5961, "step": 142 }, { "epoch": 0.014538430256201707, "grad_norm": 1.0346605777740479, "learning_rate": 2.907686051240342e-07, "loss": 0.6448, "step": 143 }, { "epoch": 0.014640097600650671, "grad_norm": 0.9600797295570374, "learning_rate": 2.9280195201301346e-07, "loss": 0.6417, "step": 144 }, { "epoch": 0.014741764945099633, "grad_norm": 0.962285578250885, "learning_rate": 2.948352989019927e-07, "loss": 0.6418, "step": 145 }, { "epoch": 0.014843432289548597, "grad_norm": 0.9603754878044128, "learning_rate": 2.9686864579097196e-07, "loss": 0.6005, "step": 146 }, { "epoch": 0.01494509963399756, "grad_norm": 0.8873457908630371, "learning_rate": 2.9890199267995123e-07, "loss": 0.6681, "step": 147 }, { "epoch": 0.015046766978446523, "grad_norm": 0.8543599247932434, "learning_rate": 3.009353395689305e-07, "loss": 0.6385, "step": 148 }, { "epoch": 0.015148434322895485, "grad_norm": 0.838042140007019, "learning_rate": 3.0296868645790973e-07, "loss": 0.6309, "step": 149 }, { "epoch": 0.015250101667344449, "grad_norm": 0.8199598789215088, "learning_rate": 3.05002033346889e-07, "loss": 0.6137, "step": 150 }, { "epoch": 0.015351769011793411, "grad_norm": 0.8192221522331238, "learning_rate": 3.0703538023586823e-07, "loss": 0.5815, "step": 151 }, { "epoch": 0.015453436356242375, "grad_norm": 0.8107279539108276, "learning_rate": 3.090687271248475e-07, "loss": 0.6353, "step": 152 }, { "epoch": 0.015555103700691337, "grad_norm": 0.8458641767501831, "learning_rate": 3.111020740138268e-07, "loss": 0.62, "step": 153 }, { "epoch": 0.0156567710451403, "grad_norm": 0.761311411857605, "learning_rate": 3.1313542090280606e-07, "loss": 0.6261, "step": 154 }, { "epoch": 0.015758438389589263, "grad_norm": 0.7077866792678833, "learning_rate": 3.151687677917853e-07, "loss": 0.6381, "step": 155 }, { "epoch": 0.015860105734038225, "grad_norm": 0.7554299831390381, "learning_rate": 3.172021146807645e-07, "loss": 0.6205, "step": 156 }, { "epoch": 0.01596177307848719, "grad_norm": 0.6949068307876587, "learning_rate": 3.1923546156974384e-07, "loss": 0.6234, "step": 157 }, { "epoch": 0.016063440422936153, "grad_norm": 0.6779155135154724, "learning_rate": 3.212688084587231e-07, "loss": 0.5768, "step": 158 }, { "epoch": 0.016165107767385115, "grad_norm": 0.693779706954956, "learning_rate": 3.2330215534770234e-07, "loss": 0.6204, "step": 159 }, { "epoch": 0.016266775111834077, "grad_norm": 0.6752498149871826, "learning_rate": 3.2533550223668156e-07, "loss": 0.6036, "step": 160 }, { "epoch": 0.016368442456283043, "grad_norm": 0.6137181520462036, "learning_rate": 3.273688491256609e-07, "loss": 0.6, "step": 161 }, { "epoch": 0.016470109800732005, "grad_norm": 0.6637799739837646, "learning_rate": 3.294021960146401e-07, "loss": 0.6192, "step": 162 }, { "epoch": 0.016571777145180967, "grad_norm": 0.6832599639892578, "learning_rate": 3.314355429036194e-07, "loss": 0.6346, "step": 163 }, { "epoch": 0.01667344448962993, "grad_norm": 0.6968366503715515, "learning_rate": 3.334688897925986e-07, "loss": 0.6241, "step": 164 }, { "epoch": 0.016775111834078895, "grad_norm": 0.6345505118370056, "learning_rate": 3.3550223668157794e-07, "loss": 0.5823, "step": 165 }, { "epoch": 0.016876779178527857, "grad_norm": 0.6267337203025818, "learning_rate": 3.3753558357055717e-07, "loss": 0.6021, "step": 166 }, { "epoch": 0.01697844652297682, "grad_norm": 0.6034756898880005, "learning_rate": 3.395689304595364e-07, "loss": 0.6225, "step": 167 }, { "epoch": 0.01708011386742578, "grad_norm": 0.633628249168396, "learning_rate": 3.4160227734851567e-07, "loss": 0.6049, "step": 168 }, { "epoch": 0.017181781211874747, "grad_norm": 0.5961881279945374, "learning_rate": 3.43635624237495e-07, "loss": 0.5689, "step": 169 }, { "epoch": 0.01728344855632371, "grad_norm": 0.5798847079277039, "learning_rate": 3.456689711264742e-07, "loss": 0.5758, "step": 170 }, { "epoch": 0.01738511590077267, "grad_norm": 0.6241961717605591, "learning_rate": 3.4770231801545344e-07, "loss": 0.567, "step": 171 }, { "epoch": 0.017486783245221633, "grad_norm": 0.6077755689620972, "learning_rate": 3.497356649044327e-07, "loss": 0.6162, "step": 172 }, { "epoch": 0.0175884505896706, "grad_norm": 0.5657208561897278, "learning_rate": 3.51769011793412e-07, "loss": 0.6147, "step": 173 }, { "epoch": 0.01769011793411956, "grad_norm": 0.5649067759513855, "learning_rate": 3.5380235868239127e-07, "loss": 0.6118, "step": 174 }, { "epoch": 0.017791785278568523, "grad_norm": 0.6001821756362915, "learning_rate": 3.558357055713705e-07, "loss": 0.5684, "step": 175 }, { "epoch": 0.017893452623017485, "grad_norm": 0.5623234510421753, "learning_rate": 3.578690524603497e-07, "loss": 0.6151, "step": 176 }, { "epoch": 0.01799511996746645, "grad_norm": 0.5705460906028748, "learning_rate": 3.5990239934932905e-07, "loss": 0.5533, "step": 177 }, { "epoch": 0.018096787311915413, "grad_norm": 0.533943235874176, "learning_rate": 3.6193574623830827e-07, "loss": 0.5627, "step": 178 }, { "epoch": 0.018198454656364375, "grad_norm": 0.6197754740715027, "learning_rate": 3.6396909312728755e-07, "loss": 0.6309, "step": 179 }, { "epoch": 0.018300122000813338, "grad_norm": 0.589789092540741, "learning_rate": 3.6600244001626677e-07, "loss": 0.6128, "step": 180 }, { "epoch": 0.018401789345262303, "grad_norm": 0.5393962860107422, "learning_rate": 3.680357869052461e-07, "loss": 0.5888, "step": 181 }, { "epoch": 0.018503456689711265, "grad_norm": 0.5771440267562866, "learning_rate": 3.700691337942253e-07, "loss": 0.5569, "step": 182 }, { "epoch": 0.018605124034160227, "grad_norm": 0.5519322156906128, "learning_rate": 3.721024806832046e-07, "loss": 0.5781, "step": 183 }, { "epoch": 0.01870679137860919, "grad_norm": 0.5145623683929443, "learning_rate": 3.741358275721838e-07, "loss": 0.6137, "step": 184 }, { "epoch": 0.018808458723058155, "grad_norm": 0.5220843553543091, "learning_rate": 3.7616917446116315e-07, "loss": 0.5497, "step": 185 }, { "epoch": 0.018910126067507117, "grad_norm": 0.5697413682937622, "learning_rate": 3.782025213501424e-07, "loss": 0.6, "step": 186 }, { "epoch": 0.01901179341195608, "grad_norm": 0.49897781014442444, "learning_rate": 3.802358682391216e-07, "loss": 0.6009, "step": 187 }, { "epoch": 0.01911346075640504, "grad_norm": 0.5992524027824402, "learning_rate": 3.822692151281009e-07, "loss": 0.6195, "step": 188 }, { "epoch": 0.019215128100854007, "grad_norm": 0.5185039043426514, "learning_rate": 3.8430256201708015e-07, "loss": 0.5799, "step": 189 }, { "epoch": 0.01931679544530297, "grad_norm": 0.5139262080192566, "learning_rate": 3.8633590890605943e-07, "loss": 0.5805, "step": 190 }, { "epoch": 0.01941846278975193, "grad_norm": 0.5364255905151367, "learning_rate": 3.8836925579503865e-07, "loss": 0.5858, "step": 191 }, { "epoch": 0.019520130134200894, "grad_norm": 0.5618810057640076, "learning_rate": 3.9040260268401793e-07, "loss": 0.5796, "step": 192 }, { "epoch": 0.01962179747864986, "grad_norm": 0.5162318348884583, "learning_rate": 3.924359495729972e-07, "loss": 0.561, "step": 193 }, { "epoch": 0.01972346482309882, "grad_norm": 0.5385941863059998, "learning_rate": 3.944692964619765e-07, "loss": 0.5489, "step": 194 }, { "epoch": 0.019825132167547783, "grad_norm": 0.5405482053756714, "learning_rate": 3.965026433509557e-07, "loss": 0.632, "step": 195 }, { "epoch": 0.019926799511996746, "grad_norm": 0.5006873607635498, "learning_rate": 3.985359902399349e-07, "loss": 0.5641, "step": 196 }, { "epoch": 0.02002846685644571, "grad_norm": 0.530721127986908, "learning_rate": 4.0056933712891426e-07, "loss": 0.5646, "step": 197 }, { "epoch": 0.020130134200894673, "grad_norm": 0.500575065612793, "learning_rate": 4.026026840178935e-07, "loss": 0.5685, "step": 198 }, { "epoch": 0.020231801545343635, "grad_norm": 0.5129515528678894, "learning_rate": 4.0463603090687276e-07, "loss": 0.5793, "step": 199 }, { "epoch": 0.020333468889792598, "grad_norm": 0.5044754147529602, "learning_rate": 4.06669377795852e-07, "loss": 0.582, "step": 200 }, { "epoch": 0.020435136234241563, "grad_norm": 0.5432814359664917, "learning_rate": 4.087027246848313e-07, "loss": 0.5593, "step": 201 }, { "epoch": 0.020536803578690525, "grad_norm": 0.4925788640975952, "learning_rate": 4.1073607157381053e-07, "loss": 0.542, "step": 202 }, { "epoch": 0.020638470923139488, "grad_norm": 0.4812926650047302, "learning_rate": 4.127694184627898e-07, "loss": 0.5371, "step": 203 }, { "epoch": 0.02074013826758845, "grad_norm": 0.5054100751876831, "learning_rate": 4.1480276535176903e-07, "loss": 0.589, "step": 204 }, { "epoch": 0.020841805612037415, "grad_norm": 0.49479296803474426, "learning_rate": 4.1683611224074836e-07, "loss": 0.5896, "step": 205 }, { "epoch": 0.020943472956486377, "grad_norm": 0.5525335073471069, "learning_rate": 4.188694591297276e-07, "loss": 0.5867, "step": 206 }, { "epoch": 0.02104514030093534, "grad_norm": 0.47335463762283325, "learning_rate": 4.209028060187068e-07, "loss": 0.5768, "step": 207 }, { "epoch": 0.0211468076453843, "grad_norm": 0.49078604578971863, "learning_rate": 4.229361529076861e-07, "loss": 0.5522, "step": 208 }, { "epoch": 0.021248474989833267, "grad_norm": 0.5065805315971375, "learning_rate": 4.2496949979666536e-07, "loss": 0.5766, "step": 209 }, { "epoch": 0.02135014233428223, "grad_norm": 0.48478156328201294, "learning_rate": 4.2700284668564464e-07, "loss": 0.5788, "step": 210 }, { "epoch": 0.02145180967873119, "grad_norm": 0.49492210149765015, "learning_rate": 4.2903619357462386e-07, "loss": 0.5691, "step": 211 }, { "epoch": 0.021553477023180154, "grad_norm": 0.46588394045829773, "learning_rate": 4.3106954046360314e-07, "loss": 0.5799, "step": 212 }, { "epoch": 0.021655144367629116, "grad_norm": 0.5327966809272766, "learning_rate": 4.3310288735258236e-07, "loss": 0.5889, "step": 213 }, { "epoch": 0.02175681171207808, "grad_norm": 0.47049397230148315, "learning_rate": 4.351362342415617e-07, "loss": 0.5748, "step": 214 }, { "epoch": 0.021858479056527044, "grad_norm": 0.5341375470161438, "learning_rate": 4.371695811305409e-07, "loss": 0.5596, "step": 215 }, { "epoch": 0.021960146400976006, "grad_norm": 0.5142979621887207, "learning_rate": 4.3920292801952013e-07, "loss": 0.5703, "step": 216 }, { "epoch": 0.022061813745424968, "grad_norm": 0.4464692175388336, "learning_rate": 4.412362749084994e-07, "loss": 0.5438, "step": 217 }, { "epoch": 0.022163481089873933, "grad_norm": 0.4583651125431061, "learning_rate": 4.432696217974787e-07, "loss": 0.5529, "step": 218 }, { "epoch": 0.022265148434322896, "grad_norm": 0.488396555185318, "learning_rate": 4.4530296868645796e-07, "loss": 0.5321, "step": 219 }, { "epoch": 0.022366815778771858, "grad_norm": 0.5243180394172668, "learning_rate": 4.473363155754372e-07, "loss": 0.5865, "step": 220 }, { "epoch": 0.02246848312322082, "grad_norm": 0.48576778173446655, "learning_rate": 4.493696624644164e-07, "loss": 0.5269, "step": 221 }, { "epoch": 0.022570150467669785, "grad_norm": 0.5040196180343628, "learning_rate": 4.5140300935339574e-07, "loss": 0.5864, "step": 222 }, { "epoch": 0.022671817812118748, "grad_norm": 0.48464706540107727, "learning_rate": 4.5343635624237496e-07, "loss": 0.5429, "step": 223 }, { "epoch": 0.02277348515656771, "grad_norm": 0.4784921407699585, "learning_rate": 4.5546970313135424e-07, "loss": 0.5401, "step": 224 }, { "epoch": 0.022875152501016672, "grad_norm": 0.6090867519378662, "learning_rate": 4.5750305002033346e-07, "loss": 0.6072, "step": 225 }, { "epoch": 0.022976819845465637, "grad_norm": 0.46272367238998413, "learning_rate": 4.595363969093128e-07, "loss": 0.5455, "step": 226 }, { "epoch": 0.0230784871899146, "grad_norm": 0.49991273880004883, "learning_rate": 4.61569743798292e-07, "loss": 0.5988, "step": 227 }, { "epoch": 0.023180154534363562, "grad_norm": 0.4822327196598053, "learning_rate": 4.636030906872713e-07, "loss": 0.5669, "step": 228 }, { "epoch": 0.023281821878812524, "grad_norm": 0.5128304362297058, "learning_rate": 4.656364375762505e-07, "loss": 0.5436, "step": 229 }, { "epoch": 0.02338348922326149, "grad_norm": 0.4830930531024933, "learning_rate": 4.6766978446522984e-07, "loss": 0.5687, "step": 230 }, { "epoch": 0.02348515656771045, "grad_norm": 0.46558433771133423, "learning_rate": 4.6970313135420907e-07, "loss": 0.5279, "step": 231 }, { "epoch": 0.023586823912159414, "grad_norm": 0.47982653975486755, "learning_rate": 4.717364782431883e-07, "loss": 0.5288, "step": 232 }, { "epoch": 0.023688491256608376, "grad_norm": 0.5279809832572937, "learning_rate": 4.7376982513216757e-07, "loss": 0.601, "step": 233 }, { "epoch": 0.02379015860105734, "grad_norm": 0.4272576868534088, "learning_rate": 4.7580317202114684e-07, "loss": 0.5222, "step": 234 }, { "epoch": 0.023891825945506304, "grad_norm": 0.4786357581615448, "learning_rate": 4.778365189101261e-07, "loss": 0.5327, "step": 235 }, { "epoch": 0.023993493289955266, "grad_norm": 0.4747963845729828, "learning_rate": 4.798698657991054e-07, "loss": 0.5774, "step": 236 }, { "epoch": 0.024095160634404228, "grad_norm": 0.5225430727005005, "learning_rate": 4.819032126880846e-07, "loss": 0.4997, "step": 237 }, { "epoch": 0.024196827978853194, "grad_norm": 0.48786112666130066, "learning_rate": 4.83936559577064e-07, "loss": 0.571, "step": 238 }, { "epoch": 0.024298495323302156, "grad_norm": 0.4985489547252655, "learning_rate": 4.859699064660431e-07, "loss": 0.57, "step": 239 }, { "epoch": 0.024400162667751118, "grad_norm": 0.4363041818141937, "learning_rate": 4.880032533550224e-07, "loss": 0.5438, "step": 240 }, { "epoch": 0.02450183001220008, "grad_norm": 0.4878476858139038, "learning_rate": 4.900366002440017e-07, "loss": 0.5744, "step": 241 }, { "epoch": 0.024603497356649046, "grad_norm": 0.45038217306137085, "learning_rate": 4.92069947132981e-07, "loss": 0.5258, "step": 242 }, { "epoch": 0.024705164701098008, "grad_norm": 0.5154913663864136, "learning_rate": 4.941032940219602e-07, "loss": 0.5739, "step": 243 }, { "epoch": 0.02480683204554697, "grad_norm": 0.44033190608024597, "learning_rate": 4.961366409109394e-07, "loss": 0.5391, "step": 244 }, { "epoch": 0.024908499389995932, "grad_norm": 0.4571286141872406, "learning_rate": 4.981699877999187e-07, "loss": 0.595, "step": 245 }, { "epoch": 0.025010166734444898, "grad_norm": 0.4312285780906677, "learning_rate": 5.00203334688898e-07, "loss": 0.5752, "step": 246 }, { "epoch": 0.02511183407889386, "grad_norm": 0.506676435470581, "learning_rate": 5.022366815778772e-07, "loss": 0.5553, "step": 247 }, { "epoch": 0.025213501423342822, "grad_norm": 0.45684298872947693, "learning_rate": 5.042700284668565e-07, "loss": 0.534, "step": 248 }, { "epoch": 0.025315168767791784, "grad_norm": 0.4922883212566376, "learning_rate": 5.063033753558357e-07, "loss": 0.5588, "step": 249 }, { "epoch": 0.02541683611224075, "grad_norm": 0.46570879220962524, "learning_rate": 5.08336722244815e-07, "loss": 0.5387, "step": 250 }, { "epoch": 0.025518503456689712, "grad_norm": 0.5438944101333618, "learning_rate": 5.103700691337943e-07, "loss": 0.5435, "step": 251 }, { "epoch": 0.025620170801138674, "grad_norm": 0.49370327591896057, "learning_rate": 5.124034160227735e-07, "loss": 0.5465, "step": 252 }, { "epoch": 0.025721838145587636, "grad_norm": 0.4712304472923279, "learning_rate": 5.144367629117528e-07, "loss": 0.5254, "step": 253 }, { "epoch": 0.0258235054900366, "grad_norm": 0.48809337615966797, "learning_rate": 5.16470109800732e-07, "loss": 0.5573, "step": 254 }, { "epoch": 0.025925172834485564, "grad_norm": 0.45528534054756165, "learning_rate": 5.185034566897113e-07, "loss": 0.5536, "step": 255 }, { "epoch": 0.026026840178934526, "grad_norm": 0.49360889196395874, "learning_rate": 5.205368035786906e-07, "loss": 0.5346, "step": 256 }, { "epoch": 0.026128507523383488, "grad_norm": 0.4655972719192505, "learning_rate": 5.225701504676698e-07, "loss": 0.561, "step": 257 }, { "epoch": 0.026230174867832454, "grad_norm": 0.47486817836761475, "learning_rate": 5.246034973566492e-07, "loss": 0.597, "step": 258 }, { "epoch": 0.026331842212281416, "grad_norm": 0.45206528902053833, "learning_rate": 5.266368442456283e-07, "loss": 0.5729, "step": 259 }, { "epoch": 0.026433509556730378, "grad_norm": 0.4284036457538605, "learning_rate": 5.286701911346076e-07, "loss": 0.5395, "step": 260 }, { "epoch": 0.02653517690117934, "grad_norm": 0.43399521708488464, "learning_rate": 5.307035380235869e-07, "loss": 0.5167, "step": 261 }, { "epoch": 0.026636844245628306, "grad_norm": 0.47772642970085144, "learning_rate": 5.327368849125662e-07, "loss": 0.5184, "step": 262 }, { "epoch": 0.026738511590077268, "grad_norm": 0.4270094335079193, "learning_rate": 5.347702318015454e-07, "loss": 0.5611, "step": 263 }, { "epoch": 0.02684017893452623, "grad_norm": 0.48472633957862854, "learning_rate": 5.368035786905246e-07, "loss": 0.5378, "step": 264 }, { "epoch": 0.026941846278975192, "grad_norm": 0.5233398079872131, "learning_rate": 5.388369255795039e-07, "loss": 0.5861, "step": 265 }, { "epoch": 0.027043513623424158, "grad_norm": 0.4899256229400635, "learning_rate": 5.408702724684832e-07, "loss": 0.5761, "step": 266 }, { "epoch": 0.02714518096787312, "grad_norm": 0.4894525408744812, "learning_rate": 5.429036193574624e-07, "loss": 0.5275, "step": 267 }, { "epoch": 0.027246848312322082, "grad_norm": 0.5027545094490051, "learning_rate": 5.449369662464417e-07, "loss": 0.5149, "step": 268 }, { "epoch": 0.027348515656771044, "grad_norm": 0.438126802444458, "learning_rate": 5.469703131354209e-07, "loss": 0.5158, "step": 269 }, { "epoch": 0.02745018300122001, "grad_norm": 0.4454238712787628, "learning_rate": 5.490036600244003e-07, "loss": 0.5464, "step": 270 }, { "epoch": 0.027551850345668972, "grad_norm": 0.48994311690330505, "learning_rate": 5.510370069133794e-07, "loss": 0.5479, "step": 271 }, { "epoch": 0.027653517690117934, "grad_norm": 0.43848100304603577, "learning_rate": 5.530703538023587e-07, "loss": 0.5517, "step": 272 }, { "epoch": 0.027755185034566896, "grad_norm": 0.4400968551635742, "learning_rate": 5.55103700691338e-07, "loss": 0.5539, "step": 273 }, { "epoch": 0.027856852379015862, "grad_norm": 0.43202704191207886, "learning_rate": 5.571370475803173e-07, "loss": 0.5496, "step": 274 }, { "epoch": 0.027958519723464824, "grad_norm": 0.5023850798606873, "learning_rate": 5.591703944692965e-07, "loss": 0.5389, "step": 275 }, { "epoch": 0.028060187067913786, "grad_norm": 0.42085781693458557, "learning_rate": 5.612037413582758e-07, "loss": 0.5319, "step": 276 }, { "epoch": 0.028161854412362748, "grad_norm": 0.4531801640987396, "learning_rate": 5.63237088247255e-07, "loss": 0.5572, "step": 277 }, { "epoch": 0.028263521756811714, "grad_norm": 0.4558365046977997, "learning_rate": 5.652704351362344e-07, "loss": 0.4928, "step": 278 }, { "epoch": 0.028365189101260676, "grad_norm": 0.46041199564933777, "learning_rate": 5.673037820252135e-07, "loss": 0.5469, "step": 279 }, { "epoch": 0.028466856445709638, "grad_norm": 0.48836323618888855, "learning_rate": 5.693371289141928e-07, "loss": 0.5398, "step": 280 }, { "epoch": 0.0285685237901586, "grad_norm": 0.44388675689697266, "learning_rate": 5.713704758031721e-07, "loss": 0.5424, "step": 281 }, { "epoch": 0.028670191134607562, "grad_norm": 0.47196662425994873, "learning_rate": 5.734038226921513e-07, "loss": 0.5314, "step": 282 }, { "epoch": 0.028771858479056528, "grad_norm": 0.4708406627178192, "learning_rate": 5.754371695811306e-07, "loss": 0.5555, "step": 283 }, { "epoch": 0.02887352582350549, "grad_norm": 0.5015203952789307, "learning_rate": 5.774705164701098e-07, "loss": 0.5456, "step": 284 }, { "epoch": 0.028975193167954452, "grad_norm": 0.4709261357784271, "learning_rate": 5.795038633590891e-07, "loss": 0.5419, "step": 285 }, { "epoch": 0.029076860512403414, "grad_norm": 0.457049697637558, "learning_rate": 5.815372102480684e-07, "loss": 0.5384, "step": 286 }, { "epoch": 0.02917852785685238, "grad_norm": 0.4247683584690094, "learning_rate": 5.835705571370476e-07, "loss": 0.5428, "step": 287 }, { "epoch": 0.029280195201301342, "grad_norm": 0.4308735728263855, "learning_rate": 5.856039040260269e-07, "loss": 0.5232, "step": 288 }, { "epoch": 0.029381862545750304, "grad_norm": 0.5677388906478882, "learning_rate": 5.876372509150061e-07, "loss": 0.5659, "step": 289 }, { "epoch": 0.029483529890199266, "grad_norm": 0.4390091001987457, "learning_rate": 5.896705978039854e-07, "loss": 0.5306, "step": 290 }, { "epoch": 0.029585197234648232, "grad_norm": 0.43453463912010193, "learning_rate": 5.917039446929646e-07, "loss": 0.5175, "step": 291 }, { "epoch": 0.029686864579097194, "grad_norm": 0.4411603510379791, "learning_rate": 5.937372915819439e-07, "loss": 0.5335, "step": 292 }, { "epoch": 0.029788531923546156, "grad_norm": 0.44036340713500977, "learning_rate": 5.957706384709232e-07, "loss": 0.5562, "step": 293 }, { "epoch": 0.02989019926799512, "grad_norm": 0.42446765303611755, "learning_rate": 5.978039853599025e-07, "loss": 0.5504, "step": 294 }, { "epoch": 0.029991866612444084, "grad_norm": 0.4769378900527954, "learning_rate": 5.998373322488817e-07, "loss": 0.5565, "step": 295 }, { "epoch": 0.030093533956893046, "grad_norm": 0.4504096508026123, "learning_rate": 6.01870679137861e-07, "loss": 0.5375, "step": 296 }, { "epoch": 0.03019520130134201, "grad_norm": 0.45689624547958374, "learning_rate": 6.039040260268402e-07, "loss": 0.5267, "step": 297 }, { "epoch": 0.03029686864579097, "grad_norm": 0.4559730589389801, "learning_rate": 6.059373729158195e-07, "loss": 0.5263, "step": 298 }, { "epoch": 0.030398535990239936, "grad_norm": 0.46427860856056213, "learning_rate": 6.079707198047987e-07, "loss": 0.5253, "step": 299 }, { "epoch": 0.030500203334688898, "grad_norm": 0.4633890986442566, "learning_rate": 6.10004066693778e-07, "loss": 0.5516, "step": 300 }, { "epoch": 0.03060187067913786, "grad_norm": 0.48541969060897827, "learning_rate": 6.120374135827573e-07, "loss": 0.4825, "step": 301 }, { "epoch": 0.030703538023586822, "grad_norm": 0.46874162554740906, "learning_rate": 6.140707604717365e-07, "loss": 0.5818, "step": 302 }, { "epoch": 0.030805205368035788, "grad_norm": 0.458627313375473, "learning_rate": 6.161041073607158e-07, "loss": 0.5163, "step": 303 }, { "epoch": 0.03090687271248475, "grad_norm": 0.45771104097366333, "learning_rate": 6.18137454249695e-07, "loss": 0.504, "step": 304 }, { "epoch": 0.031008540056933712, "grad_norm": 0.40868884325027466, "learning_rate": 6.201708011386743e-07, "loss": 0.5055, "step": 305 }, { "epoch": 0.031110207401382674, "grad_norm": 0.4687100052833557, "learning_rate": 6.222041480276536e-07, "loss": 0.5626, "step": 306 }, { "epoch": 0.03121187474583164, "grad_norm": 0.5164995193481445, "learning_rate": 6.242374949166328e-07, "loss": 0.5085, "step": 307 }, { "epoch": 0.0313135420902806, "grad_norm": 0.47654688358306885, "learning_rate": 6.262708418056121e-07, "loss": 0.5354, "step": 308 }, { "epoch": 0.031415209434729564, "grad_norm": 0.43844127655029297, "learning_rate": 6.283041886945913e-07, "loss": 0.5357, "step": 309 }, { "epoch": 0.031516876779178526, "grad_norm": 0.5267373919487, "learning_rate": 6.303375355835706e-07, "loss": 0.5235, "step": 310 }, { "epoch": 0.03161854412362749, "grad_norm": 0.46734198927879333, "learning_rate": 6.323708824725498e-07, "loss": 0.5213, "step": 311 }, { "epoch": 0.03172021146807645, "grad_norm": 0.4444679319858551, "learning_rate": 6.34404229361529e-07, "loss": 0.4924, "step": 312 }, { "epoch": 0.03182187881252542, "grad_norm": 0.4866565465927124, "learning_rate": 6.364375762505084e-07, "loss": 0.5163, "step": 313 }, { "epoch": 0.03192354615697438, "grad_norm": 0.4697796702384949, "learning_rate": 6.384709231394877e-07, "loss": 0.5503, "step": 314 }, { "epoch": 0.032025213501423344, "grad_norm": 0.44489309191703796, "learning_rate": 6.40504270028467e-07, "loss": 0.5211, "step": 315 }, { "epoch": 0.032126880845872306, "grad_norm": 0.4343816637992859, "learning_rate": 6.425376169174462e-07, "loss": 0.5428, "step": 316 }, { "epoch": 0.03222854819032127, "grad_norm": 0.4599783718585968, "learning_rate": 6.445709638064254e-07, "loss": 0.5349, "step": 317 }, { "epoch": 0.03233021553477023, "grad_norm": 0.5201519131660461, "learning_rate": 6.466043106954047e-07, "loss": 0.5464, "step": 318 }, { "epoch": 0.03243188287921919, "grad_norm": 0.4422397315502167, "learning_rate": 6.48637657584384e-07, "loss": 0.5129, "step": 319 }, { "epoch": 0.032533550223668155, "grad_norm": 0.47230035066604614, "learning_rate": 6.506710044733631e-07, "loss": 0.5308, "step": 320 }, { "epoch": 0.032635217568117124, "grad_norm": 0.478522390127182, "learning_rate": 6.527043513623425e-07, "loss": 0.5299, "step": 321 }, { "epoch": 0.032736884912566086, "grad_norm": 0.4499979019165039, "learning_rate": 6.547376982513218e-07, "loss": 0.5107, "step": 322 }, { "epoch": 0.03283855225701505, "grad_norm": 0.4274335205554962, "learning_rate": 6.567710451403011e-07, "loss": 0.5198, "step": 323 }, { "epoch": 0.03294021960146401, "grad_norm": 0.4488288164138794, "learning_rate": 6.588043920292802e-07, "loss": 0.5136, "step": 324 }, { "epoch": 0.03304188694591297, "grad_norm": 0.5068367123603821, "learning_rate": 6.608377389182595e-07, "loss": 0.5154, "step": 325 }, { "epoch": 0.033143554290361935, "grad_norm": 0.3933063745498657, "learning_rate": 6.628710858072388e-07, "loss": 0.4852, "step": 326 }, { "epoch": 0.0332452216348109, "grad_norm": 0.44923120737075806, "learning_rate": 6.64904432696218e-07, "loss": 0.5521, "step": 327 }, { "epoch": 0.03334688897925986, "grad_norm": 0.5409775376319885, "learning_rate": 6.669377795851972e-07, "loss": 0.5346, "step": 328 }, { "epoch": 0.03344855632370883, "grad_norm": 0.46453458070755005, "learning_rate": 6.689711264741766e-07, "loss": 0.5342, "step": 329 }, { "epoch": 0.03355022366815779, "grad_norm": 0.46676719188690186, "learning_rate": 6.710044733631559e-07, "loss": 0.5061, "step": 330 }, { "epoch": 0.03365189101260675, "grad_norm": 0.4315769076347351, "learning_rate": 6.730378202521351e-07, "loss": 0.5121, "step": 331 }, { "epoch": 0.033753558357055714, "grad_norm": 0.47794705629348755, "learning_rate": 6.750711671411143e-07, "loss": 0.5144, "step": 332 }, { "epoch": 0.033855225701504676, "grad_norm": 0.43276840448379517, "learning_rate": 6.771045140300936e-07, "loss": 0.5532, "step": 333 }, { "epoch": 0.03395689304595364, "grad_norm": 0.4610294997692108, "learning_rate": 6.791378609190728e-07, "loss": 0.5521, "step": 334 }, { "epoch": 0.0340585603904026, "grad_norm": 0.4444323182106018, "learning_rate": 6.811712078080521e-07, "loss": 0.5664, "step": 335 }, { "epoch": 0.03416022773485156, "grad_norm": 0.5167050957679749, "learning_rate": 6.832045546970313e-07, "loss": 0.5453, "step": 336 }, { "epoch": 0.03426189507930053, "grad_norm": 0.465343713760376, "learning_rate": 6.852379015860107e-07, "loss": 0.5076, "step": 337 }, { "epoch": 0.034363562423749494, "grad_norm": 0.4403412640094757, "learning_rate": 6.8727124847499e-07, "loss": 0.5107, "step": 338 }, { "epoch": 0.034465229768198456, "grad_norm": 0.4904836118221283, "learning_rate": 6.893045953639692e-07, "loss": 0.5734, "step": 339 }, { "epoch": 0.03456689711264742, "grad_norm": 0.44764527678489685, "learning_rate": 6.913379422529484e-07, "loss": 0.4972, "step": 340 }, { "epoch": 0.03466856445709638, "grad_norm": 0.40682926774024963, "learning_rate": 6.933712891419277e-07, "loss": 0.5165, "step": 341 }, { "epoch": 0.03477023180154534, "grad_norm": 0.47413143515586853, "learning_rate": 6.954046360309069e-07, "loss": 0.5246, "step": 342 }, { "epoch": 0.034871899145994305, "grad_norm": 0.40526479482650757, "learning_rate": 6.974379829198862e-07, "loss": 0.549, "step": 343 }, { "epoch": 0.03497356649044327, "grad_norm": 0.4525914192199707, "learning_rate": 6.994713298088654e-07, "loss": 0.5388, "step": 344 }, { "epoch": 0.035075233834892236, "grad_norm": 0.49897000193595886, "learning_rate": 7.015046766978448e-07, "loss": 0.5466, "step": 345 }, { "epoch": 0.0351769011793412, "grad_norm": 0.4299665093421936, "learning_rate": 7.03538023586824e-07, "loss": 0.4934, "step": 346 }, { "epoch": 0.03527856852379016, "grad_norm": 0.48260563611984253, "learning_rate": 7.055713704758033e-07, "loss": 0.498, "step": 347 }, { "epoch": 0.03538023586823912, "grad_norm": 0.4518226385116577, "learning_rate": 7.076047173647825e-07, "loss": 0.5472, "step": 348 }, { "epoch": 0.035481903212688085, "grad_norm": 0.47968196868896484, "learning_rate": 7.096380642537617e-07, "loss": 0.5462, "step": 349 }, { "epoch": 0.03558357055713705, "grad_norm": 0.45944011211395264, "learning_rate": 7.11671411142741e-07, "loss": 0.5315, "step": 350 }, { "epoch": 0.03568523790158601, "grad_norm": 0.44177713990211487, "learning_rate": 7.137047580317203e-07, "loss": 0.5174, "step": 351 }, { "epoch": 0.03578690524603497, "grad_norm": 0.4776689112186432, "learning_rate": 7.157381049206994e-07, "loss": 0.5408, "step": 352 }, { "epoch": 0.03588857259048393, "grad_norm": 0.44745853543281555, "learning_rate": 7.177714518096787e-07, "loss": 0.5526, "step": 353 }, { "epoch": 0.0359902399349329, "grad_norm": 0.5441786646842957, "learning_rate": 7.198047986986581e-07, "loss": 0.5248, "step": 354 }, { "epoch": 0.036091907279381864, "grad_norm": 0.4008581340312958, "learning_rate": 7.218381455876374e-07, "loss": 0.5352, "step": 355 }, { "epoch": 0.036193574623830826, "grad_norm": 0.4823162853717804, "learning_rate": 7.238714924766165e-07, "loss": 0.5272, "step": 356 }, { "epoch": 0.03629524196827979, "grad_norm": 0.47189053893089294, "learning_rate": 7.259048393655958e-07, "loss": 0.5058, "step": 357 }, { "epoch": 0.03639690931272875, "grad_norm": 0.44324028491973877, "learning_rate": 7.279381862545751e-07, "loss": 0.509, "step": 358 }, { "epoch": 0.03649857665717771, "grad_norm": 0.41246163845062256, "learning_rate": 7.299715331435544e-07, "loss": 0.4988, "step": 359 }, { "epoch": 0.036600244001626675, "grad_norm": 0.4447213113307953, "learning_rate": 7.320048800325335e-07, "loss": 0.5162, "step": 360 }, { "epoch": 0.03670191134607564, "grad_norm": 0.45600298047065735, "learning_rate": 7.340382269215128e-07, "loss": 0.5419, "step": 361 }, { "epoch": 0.036803578690524606, "grad_norm": 0.4342942535877228, "learning_rate": 7.360715738104922e-07, "loss": 0.5144, "step": 362 }, { "epoch": 0.03690524603497357, "grad_norm": 0.4540587067604065, "learning_rate": 7.381049206994715e-07, "loss": 0.5134, "step": 363 }, { "epoch": 0.03700691337942253, "grad_norm": 0.43721094727516174, "learning_rate": 7.401382675884506e-07, "loss": 0.5274, "step": 364 }, { "epoch": 0.03710858072387149, "grad_norm": 0.46331876516342163, "learning_rate": 7.421716144774299e-07, "loss": 0.5106, "step": 365 }, { "epoch": 0.037210248068320455, "grad_norm": 0.4101259708404541, "learning_rate": 7.442049613664092e-07, "loss": 0.5137, "step": 366 }, { "epoch": 0.03731191541276942, "grad_norm": 0.46844103932380676, "learning_rate": 7.462383082553884e-07, "loss": 0.5247, "step": 367 }, { "epoch": 0.03741358275721838, "grad_norm": 0.4871175289154053, "learning_rate": 7.482716551443676e-07, "loss": 0.5246, "step": 368 }, { "epoch": 0.03751525010166734, "grad_norm": 0.4367859363555908, "learning_rate": 7.503050020333469e-07, "loss": 0.5223, "step": 369 }, { "epoch": 0.03761691744611631, "grad_norm": 0.44525837898254395, "learning_rate": 7.523383489223263e-07, "loss": 0.5597, "step": 370 }, { "epoch": 0.03771858479056527, "grad_norm": 0.4375249445438385, "learning_rate": 7.543716958113055e-07, "loss": 0.5089, "step": 371 }, { "epoch": 0.037820252135014235, "grad_norm": 0.46928367018699646, "learning_rate": 7.564050427002847e-07, "loss": 0.5112, "step": 372 }, { "epoch": 0.0379219194794632, "grad_norm": 0.4760361909866333, "learning_rate": 7.58438389589264e-07, "loss": 0.5324, "step": 373 }, { "epoch": 0.03802358682391216, "grad_norm": 0.41278693079948425, "learning_rate": 7.604717364782432e-07, "loss": 0.5372, "step": 374 }, { "epoch": 0.03812525416836112, "grad_norm": 0.469170480966568, "learning_rate": 7.625050833672225e-07, "loss": 0.4823, "step": 375 }, { "epoch": 0.03822692151281008, "grad_norm": 0.4612383246421814, "learning_rate": 7.645384302562017e-07, "loss": 0.523, "step": 376 }, { "epoch": 0.038328588857259045, "grad_norm": 0.4607977271080017, "learning_rate": 7.665717771451809e-07, "loss": 0.5379, "step": 377 }, { "epoch": 0.038430256201708014, "grad_norm": 0.43743523955345154, "learning_rate": 7.686051240341603e-07, "loss": 0.5325, "step": 378 }, { "epoch": 0.038531923546156976, "grad_norm": 0.49949783086776733, "learning_rate": 7.706384709231396e-07, "loss": 0.5271, "step": 379 }, { "epoch": 0.03863359089060594, "grad_norm": 0.4535428285598755, "learning_rate": 7.726718178121189e-07, "loss": 0.5456, "step": 380 }, { "epoch": 0.0387352582350549, "grad_norm": 0.451235294342041, "learning_rate": 7.747051647010981e-07, "loss": 0.4835, "step": 381 }, { "epoch": 0.03883692557950386, "grad_norm": 0.4695568084716797, "learning_rate": 7.767385115900773e-07, "loss": 0.4888, "step": 382 }, { "epoch": 0.038938592923952825, "grad_norm": 0.43607985973358154, "learning_rate": 7.787718584790566e-07, "loss": 0.5164, "step": 383 }, { "epoch": 0.03904026026840179, "grad_norm": 0.5413311719894409, "learning_rate": 7.808052053680359e-07, "loss": 0.5053, "step": 384 }, { "epoch": 0.03914192761285075, "grad_norm": 0.4585031270980835, "learning_rate": 7.82838552257015e-07, "loss": 0.5259, "step": 385 }, { "epoch": 0.03924359495729972, "grad_norm": 0.47047632932662964, "learning_rate": 7.848718991459944e-07, "loss": 0.5236, "step": 386 }, { "epoch": 0.03934526230174868, "grad_norm": 0.4606753885746002, "learning_rate": 7.869052460349737e-07, "loss": 0.501, "step": 387 }, { "epoch": 0.03944692964619764, "grad_norm": 0.4215405583381653, "learning_rate": 7.88938592923953e-07, "loss": 0.4903, "step": 388 }, { "epoch": 0.039548596990646605, "grad_norm": 0.4653787314891815, "learning_rate": 7.909719398129321e-07, "loss": 0.5102, "step": 389 }, { "epoch": 0.03965026433509557, "grad_norm": 0.45176365971565247, "learning_rate": 7.930052867019114e-07, "loss": 0.5238, "step": 390 }, { "epoch": 0.03975193167954453, "grad_norm": 0.42499950528144836, "learning_rate": 7.950386335908907e-07, "loss": 0.4979, "step": 391 }, { "epoch": 0.03985359902399349, "grad_norm": 0.48214760422706604, "learning_rate": 7.970719804798699e-07, "loss": 0.5295, "step": 392 }, { "epoch": 0.03995526636844245, "grad_norm": 0.4166196286678314, "learning_rate": 7.991053273688491e-07, "loss": 0.5088, "step": 393 }, { "epoch": 0.04005693371289142, "grad_norm": 0.47632694244384766, "learning_rate": 8.011386742578285e-07, "loss": 0.4851, "step": 394 }, { "epoch": 0.040158601057340385, "grad_norm": 0.4166395366191864, "learning_rate": 8.031720211468078e-07, "loss": 0.5299, "step": 395 }, { "epoch": 0.04026026840178935, "grad_norm": 0.4755884110927582, "learning_rate": 8.05205368035787e-07, "loss": 0.5178, "step": 396 }, { "epoch": 0.04036193574623831, "grad_norm": 0.4016595482826233, "learning_rate": 8.072387149247662e-07, "loss": 0.5117, "step": 397 }, { "epoch": 0.04046360309068727, "grad_norm": 0.4710274040699005, "learning_rate": 8.092720618137455e-07, "loss": 0.5586, "step": 398 }, { "epoch": 0.04056527043513623, "grad_norm": 0.4859751760959625, "learning_rate": 8.113054087027247e-07, "loss": 0.5009, "step": 399 }, { "epoch": 0.040666937779585195, "grad_norm": 0.4258805513381958, "learning_rate": 8.13338755591704e-07, "loss": 0.4793, "step": 400 }, { "epoch": 0.04076860512403416, "grad_norm": 0.44227075576782227, "learning_rate": 8.153721024806832e-07, "loss": 0.479, "step": 401 }, { "epoch": 0.040870272468483126, "grad_norm": 0.4490945637226105, "learning_rate": 8.174054493696626e-07, "loss": 0.5292, "step": 402 }, { "epoch": 0.04097193981293209, "grad_norm": 0.4580917954444885, "learning_rate": 8.194387962586418e-07, "loss": 0.5306, "step": 403 }, { "epoch": 0.04107360715738105, "grad_norm": 0.483123779296875, "learning_rate": 8.214721431476211e-07, "loss": 0.5319, "step": 404 }, { "epoch": 0.04117527450183001, "grad_norm": 0.45484450459480286, "learning_rate": 8.235054900366003e-07, "loss": 0.4868, "step": 405 }, { "epoch": 0.041276941846278975, "grad_norm": 0.4193057715892792, "learning_rate": 8.255388369255796e-07, "loss": 0.4976, "step": 406 }, { "epoch": 0.04137860919072794, "grad_norm": 0.4733670651912689, "learning_rate": 8.275721838145588e-07, "loss": 0.499, "step": 407 }, { "epoch": 0.0414802765351769, "grad_norm": 0.5292720794677734, "learning_rate": 8.296055307035381e-07, "loss": 0.5306, "step": 408 }, { "epoch": 0.04158194387962586, "grad_norm": 0.4893740713596344, "learning_rate": 8.316388775925173e-07, "loss": 0.5085, "step": 409 }, { "epoch": 0.04168361122407483, "grad_norm": 0.525725781917572, "learning_rate": 8.336722244814967e-07, "loss": 0.5368, "step": 410 }, { "epoch": 0.04178527856852379, "grad_norm": 0.43418511748313904, "learning_rate": 8.357055713704759e-07, "loss": 0.5177, "step": 411 }, { "epoch": 0.041886945912972755, "grad_norm": 0.45118099451065063, "learning_rate": 8.377389182594552e-07, "loss": 0.5386, "step": 412 }, { "epoch": 0.04198861325742172, "grad_norm": 0.39959657192230225, "learning_rate": 8.397722651484344e-07, "loss": 0.4687, "step": 413 }, { "epoch": 0.04209028060187068, "grad_norm": 0.4978584945201874, "learning_rate": 8.418056120374136e-07, "loss": 0.4979, "step": 414 }, { "epoch": 0.04219194794631964, "grad_norm": 0.4904720187187195, "learning_rate": 8.438389589263929e-07, "loss": 0.526, "step": 415 }, { "epoch": 0.0422936152907686, "grad_norm": 0.47549310326576233, "learning_rate": 8.458723058153722e-07, "loss": 0.5149, "step": 416 }, { "epoch": 0.042395282635217565, "grad_norm": 0.4695572555065155, "learning_rate": 8.479056527043513e-07, "loss": 0.5204, "step": 417 }, { "epoch": 0.042496949979666535, "grad_norm": 0.48136502504348755, "learning_rate": 8.499389995933307e-07, "loss": 0.5239, "step": 418 }, { "epoch": 0.0425986173241155, "grad_norm": 0.412747859954834, "learning_rate": 8.5197234648231e-07, "loss": 0.5214, "step": 419 }, { "epoch": 0.04270028466856446, "grad_norm": 0.47563841938972473, "learning_rate": 8.540056933712893e-07, "loss": 0.5104, "step": 420 }, { "epoch": 0.04280195201301342, "grad_norm": 0.43522921204566956, "learning_rate": 8.560390402602684e-07, "loss": 0.4989, "step": 421 }, { "epoch": 0.04290361935746238, "grad_norm": 0.44048869609832764, "learning_rate": 8.580723871492477e-07, "loss": 0.5511, "step": 422 }, { "epoch": 0.043005286701911345, "grad_norm": 0.42371442914009094, "learning_rate": 8.60105734038227e-07, "loss": 0.4863, "step": 423 }, { "epoch": 0.04310695404636031, "grad_norm": 0.44432300329208374, "learning_rate": 8.621390809272063e-07, "loss": 0.4836, "step": 424 }, { "epoch": 0.04320862139080927, "grad_norm": 0.47042375802993774, "learning_rate": 8.641724278161854e-07, "loss": 0.4808, "step": 425 }, { "epoch": 0.04331028873525823, "grad_norm": 0.4450913965702057, "learning_rate": 8.662057747051647e-07, "loss": 0.4978, "step": 426 }, { "epoch": 0.0434119560797072, "grad_norm": 0.5725521445274353, "learning_rate": 8.682391215941441e-07, "loss": 0.5584, "step": 427 }, { "epoch": 0.04351362342415616, "grad_norm": 0.47543686628341675, "learning_rate": 8.702724684831234e-07, "loss": 0.5092, "step": 428 }, { "epoch": 0.043615290768605125, "grad_norm": 0.41893717646598816, "learning_rate": 8.723058153721025e-07, "loss": 0.5249, "step": 429 }, { "epoch": 0.04371695811305409, "grad_norm": 0.5018414258956909, "learning_rate": 8.743391622610818e-07, "loss": 0.4907, "step": 430 }, { "epoch": 0.04381862545750305, "grad_norm": 0.494534969329834, "learning_rate": 8.763725091500611e-07, "loss": 0.4941, "step": 431 }, { "epoch": 0.04392029280195201, "grad_norm": 0.4298931360244751, "learning_rate": 8.784058560390403e-07, "loss": 0.5102, "step": 432 }, { "epoch": 0.044021960146400974, "grad_norm": 0.43279317021369934, "learning_rate": 8.804392029280195e-07, "loss": 0.5541, "step": 433 }, { "epoch": 0.044123627490849936, "grad_norm": 0.46411123871803284, "learning_rate": 8.824725498169988e-07, "loss": 0.5161, "step": 434 }, { "epoch": 0.044225294835298905, "grad_norm": 0.48940151929855347, "learning_rate": 8.845058967059782e-07, "loss": 0.5153, "step": 435 }, { "epoch": 0.04432696217974787, "grad_norm": 0.47353842854499817, "learning_rate": 8.865392435949574e-07, "loss": 0.5127, "step": 436 }, { "epoch": 0.04442862952419683, "grad_norm": 0.4312630593776703, "learning_rate": 8.885725904839367e-07, "loss": 0.4505, "step": 437 }, { "epoch": 0.04453029686864579, "grad_norm": 0.4815777540206909, "learning_rate": 8.906059373729159e-07, "loss": 0.5322, "step": 438 }, { "epoch": 0.04463196421309475, "grad_norm": 0.5279520153999329, "learning_rate": 8.926392842618951e-07, "loss": 0.4542, "step": 439 }, { "epoch": 0.044733631557543715, "grad_norm": 0.42197513580322266, "learning_rate": 8.946726311508744e-07, "loss": 0.5118, "step": 440 }, { "epoch": 0.04483529890199268, "grad_norm": 0.4875023365020752, "learning_rate": 8.967059780398537e-07, "loss": 0.5463, "step": 441 }, { "epoch": 0.04493696624644164, "grad_norm": 0.44151920080184937, "learning_rate": 8.987393249288328e-07, "loss": 0.4657, "step": 442 }, { "epoch": 0.04503863359089061, "grad_norm": 0.43355754017829895, "learning_rate": 9.007726718178122e-07, "loss": 0.4924, "step": 443 }, { "epoch": 0.04514030093533957, "grad_norm": 0.41763514280319214, "learning_rate": 9.028060187067915e-07, "loss": 0.4446, "step": 444 }, { "epoch": 0.04524196827978853, "grad_norm": 0.4432315230369568, "learning_rate": 9.048393655957708e-07, "loss": 0.4743, "step": 445 }, { "epoch": 0.045343635624237495, "grad_norm": 0.43268680572509766, "learning_rate": 9.068727124847499e-07, "loss": 0.4821, "step": 446 }, { "epoch": 0.04544530296868646, "grad_norm": 0.45978057384490967, "learning_rate": 9.089060593737292e-07, "loss": 0.531, "step": 447 }, { "epoch": 0.04554697031313542, "grad_norm": 0.41593390703201294, "learning_rate": 9.109394062627085e-07, "loss": 0.5401, "step": 448 }, { "epoch": 0.04564863765758438, "grad_norm": 0.43619799613952637, "learning_rate": 9.129727531516878e-07, "loss": 0.4804, "step": 449 }, { "epoch": 0.045750305002033344, "grad_norm": 0.4100296199321747, "learning_rate": 9.150061000406669e-07, "loss": 0.5144, "step": 450 }, { "epoch": 0.04585197234648231, "grad_norm": 0.43320903182029724, "learning_rate": 9.170394469296463e-07, "loss": 0.5192, "step": 451 }, { "epoch": 0.045953639690931275, "grad_norm": 0.3929443061351776, "learning_rate": 9.190727938186256e-07, "loss": 0.48, "step": 452 }, { "epoch": 0.04605530703538024, "grad_norm": 0.4653967320919037, "learning_rate": 9.211061407076049e-07, "loss": 0.4988, "step": 453 }, { "epoch": 0.0461569743798292, "grad_norm": 0.4733966290950775, "learning_rate": 9.23139487596584e-07, "loss": 0.52, "step": 454 }, { "epoch": 0.04625864172427816, "grad_norm": 0.4701070189476013, "learning_rate": 9.251728344855633e-07, "loss": 0.5223, "step": 455 }, { "epoch": 0.046360309068727124, "grad_norm": 0.47504615783691406, "learning_rate": 9.272061813745426e-07, "loss": 0.4954, "step": 456 }, { "epoch": 0.046461976413176086, "grad_norm": 0.4735565781593323, "learning_rate": 9.292395282635218e-07, "loss": 0.4924, "step": 457 }, { "epoch": 0.04656364375762505, "grad_norm": 0.4039488732814789, "learning_rate": 9.31272875152501e-07, "loss": 0.5188, "step": 458 }, { "epoch": 0.04666531110207402, "grad_norm": 0.4179602563381195, "learning_rate": 9.333062220414804e-07, "loss": 0.5127, "step": 459 }, { "epoch": 0.04676697844652298, "grad_norm": 0.4551088511943817, "learning_rate": 9.353395689304597e-07, "loss": 0.5139, "step": 460 }, { "epoch": 0.04686864579097194, "grad_norm": 0.4713256061077118, "learning_rate": 9.373729158194389e-07, "loss": 0.4706, "step": 461 }, { "epoch": 0.0469703131354209, "grad_norm": 0.4898405969142914, "learning_rate": 9.394062627084181e-07, "loss": 0.5275, "step": 462 }, { "epoch": 0.047071980479869865, "grad_norm": 0.417212575674057, "learning_rate": 9.414396095973974e-07, "loss": 0.525, "step": 463 }, { "epoch": 0.04717364782431883, "grad_norm": 0.4675290882587433, "learning_rate": 9.434729564863766e-07, "loss": 0.4952, "step": 464 }, { "epoch": 0.04727531516876779, "grad_norm": 0.512333869934082, "learning_rate": 9.455063033753559e-07, "loss": 0.5093, "step": 465 }, { "epoch": 0.04737698251321675, "grad_norm": 0.42721694707870483, "learning_rate": 9.475396502643351e-07, "loss": 0.504, "step": 466 }, { "epoch": 0.04747864985766572, "grad_norm": 0.4725632071495056, "learning_rate": 9.495729971533145e-07, "loss": 0.5019, "step": 467 }, { "epoch": 0.04758031720211468, "grad_norm": 0.43053296208381653, "learning_rate": 9.516063440422937e-07, "loss": 0.5069, "step": 468 }, { "epoch": 0.047681984546563645, "grad_norm": 0.4608587324619293, "learning_rate": 9.53639690931273e-07, "loss": 0.5488, "step": 469 }, { "epoch": 0.04778365189101261, "grad_norm": 0.42508816719055176, "learning_rate": 9.556730378202522e-07, "loss": 0.485, "step": 470 }, { "epoch": 0.04788531923546157, "grad_norm": 0.4080602526664734, "learning_rate": 9.577063847092315e-07, "loss": 0.5083, "step": 471 }, { "epoch": 0.04798698657991053, "grad_norm": 0.4427502155303955, "learning_rate": 9.597397315982108e-07, "loss": 0.4987, "step": 472 }, { "epoch": 0.048088653924359494, "grad_norm": 0.4061151146888733, "learning_rate": 9.617730784871899e-07, "loss": 0.4775, "step": 473 }, { "epoch": 0.048190321268808456, "grad_norm": 0.4895259439945221, "learning_rate": 9.638064253761691e-07, "loss": 0.5295, "step": 474 }, { "epoch": 0.048291988613257425, "grad_norm": 0.42699119448661804, "learning_rate": 9.658397722651486e-07, "loss": 0.4934, "step": 475 }, { "epoch": 0.04839365595770639, "grad_norm": 0.4626932442188263, "learning_rate": 9.67873119154128e-07, "loss": 0.5097, "step": 476 }, { "epoch": 0.04849532330215535, "grad_norm": 0.44911080598831177, "learning_rate": 9.69906466043107e-07, "loss": 0.493, "step": 477 }, { "epoch": 0.04859699064660431, "grad_norm": 0.42143896222114563, "learning_rate": 9.719398129320862e-07, "loss": 0.4684, "step": 478 }, { "epoch": 0.048698657991053274, "grad_norm": 0.4082300066947937, "learning_rate": 9.739731598210655e-07, "loss": 0.5048, "step": 479 }, { "epoch": 0.048800325335502236, "grad_norm": 0.4407929480075836, "learning_rate": 9.760065067100448e-07, "loss": 0.5028, "step": 480 }, { "epoch": 0.0489019926799512, "grad_norm": 0.38297614455223083, "learning_rate": 9.78039853599024e-07, "loss": 0.508, "step": 481 }, { "epoch": 0.04900366002440016, "grad_norm": 0.46486377716064453, "learning_rate": 9.800732004880033e-07, "loss": 0.4859, "step": 482 }, { "epoch": 0.04910532736884913, "grad_norm": 0.4037909507751465, "learning_rate": 9.821065473769826e-07, "loss": 0.4717, "step": 483 }, { "epoch": 0.04920699471329809, "grad_norm": 0.43329092860221863, "learning_rate": 9.84139894265962e-07, "loss": 0.5021, "step": 484 }, { "epoch": 0.04930866205774705, "grad_norm": 0.4120742678642273, "learning_rate": 9.861732411549412e-07, "loss": 0.5025, "step": 485 }, { "epoch": 0.049410329402196015, "grad_norm": 0.4264141321182251, "learning_rate": 9.882065880439204e-07, "loss": 0.4664, "step": 486 }, { "epoch": 0.04951199674664498, "grad_norm": 0.44031381607055664, "learning_rate": 9.902399349328997e-07, "loss": 0.5012, "step": 487 }, { "epoch": 0.04961366409109394, "grad_norm": 0.476924329996109, "learning_rate": 9.922732818218788e-07, "loss": 0.5448, "step": 488 }, { "epoch": 0.0497153314355429, "grad_norm": 0.4272846579551697, "learning_rate": 9.94306628710858e-07, "loss": 0.5126, "step": 489 }, { "epoch": 0.049816998779991864, "grad_norm": 0.4094320237636566, "learning_rate": 9.963399755998373e-07, "loss": 0.4744, "step": 490 }, { "epoch": 0.04991866612444083, "grad_norm": 0.4221530854701996, "learning_rate": 9.983733224888168e-07, "loss": 0.4906, "step": 491 }, { "epoch": 0.050020333468889795, "grad_norm": 0.4496288299560547, "learning_rate": 1.000406669377796e-06, "loss": 0.513, "step": 492 }, { "epoch": 0.05012200081333876, "grad_norm": 0.4374641478061676, "learning_rate": 1.0024400162667752e-06, "loss": 0.5158, "step": 493 }, { "epoch": 0.05022366815778772, "grad_norm": 0.46234560012817383, "learning_rate": 1.0044733631557544e-06, "loss": 0.504, "step": 494 }, { "epoch": 0.05032533550223668, "grad_norm": 0.49681755900382996, "learning_rate": 1.0065067100447337e-06, "loss": 0.4833, "step": 495 }, { "epoch": 0.050427002846685644, "grad_norm": 0.4650215804576874, "learning_rate": 1.008540056933713e-06, "loss": 0.5195, "step": 496 }, { "epoch": 0.050528670191134606, "grad_norm": 0.4183270037174225, "learning_rate": 1.0105734038226923e-06, "loss": 0.5162, "step": 497 }, { "epoch": 0.05063033753558357, "grad_norm": 0.4551979601383209, "learning_rate": 1.0126067507116713e-06, "loss": 0.4921, "step": 498 }, { "epoch": 0.05073200488003253, "grad_norm": 0.45358985662460327, "learning_rate": 1.0146400976006506e-06, "loss": 0.4871, "step": 499 }, { "epoch": 0.0508336722244815, "grad_norm": 0.4306871294975281, "learning_rate": 1.01667344448963e-06, "loss": 0.4993, "step": 500 }, { "epoch": 0.05093533956893046, "grad_norm": 0.4368368089199066, "learning_rate": 1.0187067913786094e-06, "loss": 0.4756, "step": 501 }, { "epoch": 0.051037006913379424, "grad_norm": 0.5135526061058044, "learning_rate": 1.0207401382675887e-06, "loss": 0.4999, "step": 502 }, { "epoch": 0.051138674257828386, "grad_norm": 0.4152102470397949, "learning_rate": 1.0227734851565677e-06, "loss": 0.479, "step": 503 }, { "epoch": 0.05124034160227735, "grad_norm": 0.437686562538147, "learning_rate": 1.024806832045547e-06, "loss": 0.4907, "step": 504 }, { "epoch": 0.05134200894672631, "grad_norm": 0.4709370732307434, "learning_rate": 1.0268401789345263e-06, "loss": 0.4879, "step": 505 }, { "epoch": 0.05144367629117527, "grad_norm": 0.4641379415988922, "learning_rate": 1.0288735258235056e-06, "loss": 0.5138, "step": 506 }, { "epoch": 0.051545343635624234, "grad_norm": 0.4794265627861023, "learning_rate": 1.0309068727124848e-06, "loss": 0.489, "step": 507 }, { "epoch": 0.0516470109800732, "grad_norm": 0.4354081451892853, "learning_rate": 1.032940219601464e-06, "loss": 0.4836, "step": 508 }, { "epoch": 0.051748678324522165, "grad_norm": 0.42371001839637756, "learning_rate": 1.0349735664904434e-06, "loss": 0.5356, "step": 509 }, { "epoch": 0.05185034566897113, "grad_norm": 0.4682040810585022, "learning_rate": 1.0370069133794227e-06, "loss": 0.5027, "step": 510 }, { "epoch": 0.05195201301342009, "grad_norm": 0.4052731692790985, "learning_rate": 1.039040260268402e-06, "loss": 0.482, "step": 511 }, { "epoch": 0.05205368035786905, "grad_norm": 0.4332220256328583, "learning_rate": 1.0410736071573812e-06, "loss": 0.5196, "step": 512 }, { "epoch": 0.052155347702318014, "grad_norm": 0.47532686591148376, "learning_rate": 1.0431069540463603e-06, "loss": 0.4997, "step": 513 }, { "epoch": 0.052257015046766976, "grad_norm": 0.4862993359565735, "learning_rate": 1.0451403009353395e-06, "loss": 0.5362, "step": 514 }, { "epoch": 0.05235868239121594, "grad_norm": 0.3771111071109772, "learning_rate": 1.0471736478243188e-06, "loss": 0.4643, "step": 515 }, { "epoch": 0.05246034973566491, "grad_norm": 0.4372335970401764, "learning_rate": 1.0492069947132983e-06, "loss": 0.5107, "step": 516 }, { "epoch": 0.05256201708011387, "grad_norm": 0.48654159903526306, "learning_rate": 1.0512403416022774e-06, "loss": 0.5126, "step": 517 }, { "epoch": 0.05266368442456283, "grad_norm": 0.4464114010334015, "learning_rate": 1.0532736884912567e-06, "loss": 0.5361, "step": 518 }, { "epoch": 0.052765351769011794, "grad_norm": 0.4824067950248718, "learning_rate": 1.055307035380236e-06, "loss": 0.4939, "step": 519 }, { "epoch": 0.052867019113460756, "grad_norm": 0.43481042981147766, "learning_rate": 1.0573403822692152e-06, "loss": 0.5261, "step": 520 }, { "epoch": 0.05296868645790972, "grad_norm": 0.4781104624271393, "learning_rate": 1.0593737291581945e-06, "loss": 0.5072, "step": 521 }, { "epoch": 0.05307035380235868, "grad_norm": 0.43881523609161377, "learning_rate": 1.0614070760471738e-06, "loss": 0.5134, "step": 522 }, { "epoch": 0.05317202114680764, "grad_norm": 0.42178526520729065, "learning_rate": 1.063440422936153e-06, "loss": 0.489, "step": 523 }, { "epoch": 0.05327368849125661, "grad_norm": 0.4430757462978363, "learning_rate": 1.0654737698251323e-06, "loss": 0.5274, "step": 524 }, { "epoch": 0.053375355835705574, "grad_norm": 0.42775648832321167, "learning_rate": 1.0675071167141116e-06, "loss": 0.4947, "step": 525 }, { "epoch": 0.053477023180154536, "grad_norm": 0.5074836611747742, "learning_rate": 1.0695404636030909e-06, "loss": 0.4917, "step": 526 }, { "epoch": 0.0535786905246035, "grad_norm": 0.4495405852794647, "learning_rate": 1.0715738104920701e-06, "loss": 0.5057, "step": 527 }, { "epoch": 0.05368035786905246, "grad_norm": 0.45561516284942627, "learning_rate": 1.0736071573810492e-06, "loss": 0.4881, "step": 528 }, { "epoch": 0.05378202521350142, "grad_norm": 0.40696224570274353, "learning_rate": 1.0756405042700285e-06, "loss": 0.5206, "step": 529 }, { "epoch": 0.053883692557950384, "grad_norm": 0.4097561240196228, "learning_rate": 1.0776738511590078e-06, "loss": 0.5094, "step": 530 }, { "epoch": 0.053985359902399346, "grad_norm": 0.47025495767593384, "learning_rate": 1.079707198047987e-06, "loss": 0.4587, "step": 531 }, { "epoch": 0.054087027246848315, "grad_norm": 0.42884036898612976, "learning_rate": 1.0817405449369663e-06, "loss": 0.5065, "step": 532 }, { "epoch": 0.05418869459129728, "grad_norm": 0.4442511796951294, "learning_rate": 1.0837738918259456e-06, "loss": 0.4944, "step": 533 }, { "epoch": 0.05429036193574624, "grad_norm": 0.43486785888671875, "learning_rate": 1.0858072387149249e-06, "loss": 0.4987, "step": 534 }, { "epoch": 0.0543920292801952, "grad_norm": 0.4186501204967499, "learning_rate": 1.0878405856039041e-06, "loss": 0.483, "step": 535 }, { "epoch": 0.054493696624644164, "grad_norm": 0.4274844229221344, "learning_rate": 1.0898739324928834e-06, "loss": 0.4957, "step": 536 }, { "epoch": 0.054595363969093126, "grad_norm": 0.48924291133880615, "learning_rate": 1.0919072793818627e-06, "loss": 0.528, "step": 537 }, { "epoch": 0.05469703131354209, "grad_norm": 0.41660651564598083, "learning_rate": 1.0939406262708418e-06, "loss": 0.4757, "step": 538 }, { "epoch": 0.05479869865799105, "grad_norm": 0.4570632874965668, "learning_rate": 1.095973973159821e-06, "loss": 0.4765, "step": 539 }, { "epoch": 0.05490036600244002, "grad_norm": 0.45351865887641907, "learning_rate": 1.0980073200488005e-06, "loss": 0.513, "step": 540 }, { "epoch": 0.05500203334688898, "grad_norm": 0.4553813934326172, "learning_rate": 1.1000406669377798e-06, "loss": 0.4742, "step": 541 }, { "epoch": 0.055103700691337944, "grad_norm": 0.48232316970825195, "learning_rate": 1.1020740138267589e-06, "loss": 0.5117, "step": 542 }, { "epoch": 0.055205368035786906, "grad_norm": 0.6067999601364136, "learning_rate": 1.1041073607157381e-06, "loss": 0.5575, "step": 543 }, { "epoch": 0.05530703538023587, "grad_norm": 0.4252871870994568, "learning_rate": 1.1061407076047174e-06, "loss": 0.5116, "step": 544 }, { "epoch": 0.05540870272468483, "grad_norm": 0.4649628698825836, "learning_rate": 1.1081740544936967e-06, "loss": 0.5029, "step": 545 }, { "epoch": 0.05551037006913379, "grad_norm": 0.42631545662879944, "learning_rate": 1.110207401382676e-06, "loss": 0.4936, "step": 546 }, { "epoch": 0.055612037413582754, "grad_norm": 0.43985748291015625, "learning_rate": 1.1122407482716552e-06, "loss": 0.4943, "step": 547 }, { "epoch": 0.055713704758031724, "grad_norm": 0.3991602659225464, "learning_rate": 1.1142740951606345e-06, "loss": 0.4895, "step": 548 }, { "epoch": 0.055815372102480686, "grad_norm": 0.42657703161239624, "learning_rate": 1.1163074420496138e-06, "loss": 0.5196, "step": 549 }, { "epoch": 0.05591703944692965, "grad_norm": 0.41951990127563477, "learning_rate": 1.118340788938593e-06, "loss": 0.4651, "step": 550 }, { "epoch": 0.05601870679137861, "grad_norm": 0.4471606910228729, "learning_rate": 1.1203741358275723e-06, "loss": 0.5397, "step": 551 }, { "epoch": 0.05612037413582757, "grad_norm": 0.4386795461177826, "learning_rate": 1.1224074827165516e-06, "loss": 0.4734, "step": 552 }, { "epoch": 0.056222041480276534, "grad_norm": 0.4026103913784027, "learning_rate": 1.1244408296055307e-06, "loss": 0.5074, "step": 553 }, { "epoch": 0.056323708824725496, "grad_norm": 0.3927057087421417, "learning_rate": 1.12647417649451e-06, "loss": 0.4676, "step": 554 }, { "epoch": 0.05642537616917446, "grad_norm": 0.47030144929885864, "learning_rate": 1.1285075233834892e-06, "loss": 0.4429, "step": 555 }, { "epoch": 0.05652704351362343, "grad_norm": 0.4319796860218048, "learning_rate": 1.1305408702724687e-06, "loss": 0.5087, "step": 556 }, { "epoch": 0.05662871085807239, "grad_norm": 0.4378107488155365, "learning_rate": 1.1325742171614478e-06, "loss": 0.4981, "step": 557 }, { "epoch": 0.05673037820252135, "grad_norm": 0.46981140971183777, "learning_rate": 1.134607564050427e-06, "loss": 0.4943, "step": 558 }, { "epoch": 0.056832045546970314, "grad_norm": 0.4147314429283142, "learning_rate": 1.1366409109394063e-06, "loss": 0.4714, "step": 559 }, { "epoch": 0.056933712891419276, "grad_norm": 0.4492015838623047, "learning_rate": 1.1386742578283856e-06, "loss": 0.5071, "step": 560 }, { "epoch": 0.05703538023586824, "grad_norm": 0.41831979155540466, "learning_rate": 1.140707604717365e-06, "loss": 0.4841, "step": 561 }, { "epoch": 0.0571370475803172, "grad_norm": 0.41760897636413574, "learning_rate": 1.1427409516063442e-06, "loss": 0.4806, "step": 562 }, { "epoch": 0.05723871492476616, "grad_norm": 0.4189195930957794, "learning_rate": 1.1447742984953232e-06, "loss": 0.4775, "step": 563 }, { "epoch": 0.057340382269215125, "grad_norm": 0.46528592705726624, "learning_rate": 1.1468076453843025e-06, "loss": 0.4893, "step": 564 }, { "epoch": 0.057442049613664094, "grad_norm": 0.4627227783203125, "learning_rate": 1.148840992273282e-06, "loss": 0.4703, "step": 565 }, { "epoch": 0.057543716958113056, "grad_norm": 0.4669135808944702, "learning_rate": 1.1508743391622613e-06, "loss": 0.519, "step": 566 }, { "epoch": 0.05764538430256202, "grad_norm": 0.47210806608200073, "learning_rate": 1.1529076860512406e-06, "loss": 0.4719, "step": 567 }, { "epoch": 0.05774705164701098, "grad_norm": 0.4893702268600464, "learning_rate": 1.1549410329402196e-06, "loss": 0.451, "step": 568 }, { "epoch": 0.05784871899145994, "grad_norm": 0.42399898171424866, "learning_rate": 1.156974379829199e-06, "loss": 0.5026, "step": 569 }, { "epoch": 0.057950386335908904, "grad_norm": 0.40870726108551025, "learning_rate": 1.1590077267181782e-06, "loss": 0.4987, "step": 570 }, { "epoch": 0.05805205368035787, "grad_norm": 0.4554711580276489, "learning_rate": 1.1610410736071575e-06, "loss": 0.5253, "step": 571 }, { "epoch": 0.05815372102480683, "grad_norm": 0.5190366506576538, "learning_rate": 1.1630744204961367e-06, "loss": 0.5482, "step": 572 }, { "epoch": 0.0582553883692558, "grad_norm": 0.39432293176651, "learning_rate": 1.165107767385116e-06, "loss": 0.4993, "step": 573 }, { "epoch": 0.05835705571370476, "grad_norm": 0.41972815990448, "learning_rate": 1.1671411142740953e-06, "loss": 0.478, "step": 574 }, { "epoch": 0.05845872305815372, "grad_norm": 0.452543169260025, "learning_rate": 1.1691744611630746e-06, "loss": 0.4951, "step": 575 }, { "epoch": 0.058560390402602684, "grad_norm": 0.4106872081756592, "learning_rate": 1.1712078080520538e-06, "loss": 0.4898, "step": 576 }, { "epoch": 0.058662057747051646, "grad_norm": 0.4087735712528229, "learning_rate": 1.1732411549410331e-06, "loss": 0.4767, "step": 577 }, { "epoch": 0.05876372509150061, "grad_norm": 0.45593711733818054, "learning_rate": 1.1752745018300122e-06, "loss": 0.4912, "step": 578 }, { "epoch": 0.05886539243594957, "grad_norm": 0.46376705169677734, "learning_rate": 1.1773078487189915e-06, "loss": 0.4825, "step": 579 }, { "epoch": 0.05896705978039853, "grad_norm": 0.4356173276901245, "learning_rate": 1.1793411956079707e-06, "loss": 0.5423, "step": 580 }, { "epoch": 0.0590687271248475, "grad_norm": 0.4697704613208771, "learning_rate": 1.1813745424969502e-06, "loss": 0.4894, "step": 581 }, { "epoch": 0.059170394469296464, "grad_norm": 0.44700270891189575, "learning_rate": 1.1834078893859293e-06, "loss": 0.4654, "step": 582 }, { "epoch": 0.059272061813745426, "grad_norm": 0.464592844247818, "learning_rate": 1.1854412362749086e-06, "loss": 0.4945, "step": 583 }, { "epoch": 0.05937372915819439, "grad_norm": 0.5060032606124878, "learning_rate": 1.1874745831638878e-06, "loss": 0.479, "step": 584 }, { "epoch": 0.05947539650264335, "grad_norm": 0.43086713552474976, "learning_rate": 1.1895079300528671e-06, "loss": 0.5114, "step": 585 }, { "epoch": 0.05957706384709231, "grad_norm": 0.4065074920654297, "learning_rate": 1.1915412769418464e-06, "loss": 0.4577, "step": 586 }, { "epoch": 0.059678731191541275, "grad_norm": 0.49677780270576477, "learning_rate": 1.1935746238308257e-06, "loss": 0.4831, "step": 587 }, { "epoch": 0.05978039853599024, "grad_norm": 0.4813932180404663, "learning_rate": 1.195607970719805e-06, "loss": 0.4552, "step": 588 }, { "epoch": 0.059882065880439206, "grad_norm": 0.4764229357242584, "learning_rate": 1.1976413176087842e-06, "loss": 0.539, "step": 589 }, { "epoch": 0.05998373322488817, "grad_norm": 0.4312247633934021, "learning_rate": 1.1996746644977635e-06, "loss": 0.5028, "step": 590 }, { "epoch": 0.06008540056933713, "grad_norm": 0.4556752145290375, "learning_rate": 1.2017080113867428e-06, "loss": 0.5127, "step": 591 }, { "epoch": 0.06018706791378609, "grad_norm": 0.4022732675075531, "learning_rate": 1.203741358275722e-06, "loss": 0.4752, "step": 592 }, { "epoch": 0.060288735258235054, "grad_norm": 0.43643075227737427, "learning_rate": 1.2057747051647011e-06, "loss": 0.4898, "step": 593 }, { "epoch": 0.06039040260268402, "grad_norm": 0.4864041209220886, "learning_rate": 1.2078080520536804e-06, "loss": 0.4915, "step": 594 }, { "epoch": 0.06049206994713298, "grad_norm": 0.440122127532959, "learning_rate": 1.2098413989426597e-06, "loss": 0.5004, "step": 595 }, { "epoch": 0.06059373729158194, "grad_norm": 0.47135981917381287, "learning_rate": 1.211874745831639e-06, "loss": 0.4779, "step": 596 }, { "epoch": 0.06069540463603091, "grad_norm": 0.46108224987983704, "learning_rate": 1.2139080927206182e-06, "loss": 0.4805, "step": 597 }, { "epoch": 0.06079707198047987, "grad_norm": 0.41167473793029785, "learning_rate": 1.2159414396095975e-06, "loss": 0.5154, "step": 598 }, { "epoch": 0.060898739324928834, "grad_norm": 0.3927072286605835, "learning_rate": 1.2179747864985768e-06, "loss": 0.4608, "step": 599 }, { "epoch": 0.061000406669377796, "grad_norm": 0.45785507559776306, "learning_rate": 1.220008133387556e-06, "loss": 0.4923, "step": 600 }, { "epoch": 0.06110207401382676, "grad_norm": 0.4431859850883484, "learning_rate": 1.2220414802765353e-06, "loss": 0.4555, "step": 601 }, { "epoch": 0.06120374135827572, "grad_norm": 0.4073033630847931, "learning_rate": 1.2240748271655146e-06, "loss": 0.4406, "step": 602 }, { "epoch": 0.06130540870272468, "grad_norm": 0.3881551921367645, "learning_rate": 1.2261081740544937e-06, "loss": 0.4979, "step": 603 }, { "epoch": 0.061407076047173645, "grad_norm": 0.3842197060585022, "learning_rate": 1.228141520943473e-06, "loss": 0.4942, "step": 604 }, { "epoch": 0.061508743391622614, "grad_norm": 0.4699423015117645, "learning_rate": 1.2301748678324524e-06, "loss": 0.4724, "step": 605 }, { "epoch": 0.061610410736071576, "grad_norm": 0.4850243926048279, "learning_rate": 1.2322082147214317e-06, "loss": 0.5329, "step": 606 }, { "epoch": 0.06171207808052054, "grad_norm": 0.40967315435409546, "learning_rate": 1.2342415616104108e-06, "loss": 0.474, "step": 607 }, { "epoch": 0.0618137454249695, "grad_norm": 0.4603155255317688, "learning_rate": 1.23627490849939e-06, "loss": 0.4743, "step": 608 }, { "epoch": 0.06191541276941846, "grad_norm": 0.4027801752090454, "learning_rate": 1.2383082553883693e-06, "loss": 0.5125, "step": 609 }, { "epoch": 0.062017080113867425, "grad_norm": 0.4529291093349457, "learning_rate": 1.2403416022773486e-06, "loss": 0.4912, "step": 610 }, { "epoch": 0.06211874745831639, "grad_norm": 0.42813947796821594, "learning_rate": 1.2423749491663279e-06, "loss": 0.5214, "step": 611 }, { "epoch": 0.06222041480276535, "grad_norm": 0.42272722721099854, "learning_rate": 1.2444082960553071e-06, "loss": 0.4973, "step": 612 }, { "epoch": 0.06232208214721432, "grad_norm": 0.4676191508769989, "learning_rate": 1.2464416429442864e-06, "loss": 0.524, "step": 613 }, { "epoch": 0.06242374949166328, "grad_norm": 0.40574783086776733, "learning_rate": 1.2484749898332657e-06, "loss": 0.4414, "step": 614 }, { "epoch": 0.06252541683611224, "grad_norm": 0.43442943692207336, "learning_rate": 1.2505083367222448e-06, "loss": 0.4918, "step": 615 }, { "epoch": 0.0626270841805612, "grad_norm": 0.46796002984046936, "learning_rate": 1.2525416836112243e-06, "loss": 0.4863, "step": 616 }, { "epoch": 0.06272875152501017, "grad_norm": 0.4463183581829071, "learning_rate": 1.2545750305002035e-06, "loss": 0.5016, "step": 617 }, { "epoch": 0.06283041886945913, "grad_norm": 0.4635653495788574, "learning_rate": 1.2566083773891826e-06, "loss": 0.4695, "step": 618 }, { "epoch": 0.0629320862139081, "grad_norm": 0.4097527265548706, "learning_rate": 1.258641724278162e-06, "loss": 0.4899, "step": 619 }, { "epoch": 0.06303375355835705, "grad_norm": 0.41007697582244873, "learning_rate": 1.2606750711671411e-06, "loss": 0.4542, "step": 620 }, { "epoch": 0.06313542090280602, "grad_norm": 0.42737531661987305, "learning_rate": 1.2627084180561206e-06, "loss": 0.4854, "step": 621 }, { "epoch": 0.06323708824725498, "grad_norm": 0.4391747713088989, "learning_rate": 1.2647417649450997e-06, "loss": 0.4779, "step": 622 }, { "epoch": 0.06333875559170395, "grad_norm": 0.4203856885433197, "learning_rate": 1.266775111834079e-06, "loss": 0.4698, "step": 623 }, { "epoch": 0.0634404229361529, "grad_norm": 0.4453001320362091, "learning_rate": 1.268808458723058e-06, "loss": 0.5205, "step": 624 }, { "epoch": 0.06354209028060187, "grad_norm": 0.41025692224502563, "learning_rate": 1.2708418056120375e-06, "loss": 0.434, "step": 625 }, { "epoch": 0.06364375762505084, "grad_norm": 0.4319896101951599, "learning_rate": 1.2728751525010168e-06, "loss": 0.5254, "step": 626 }, { "epoch": 0.0637454249694998, "grad_norm": 0.408408522605896, "learning_rate": 1.274908499389996e-06, "loss": 0.4978, "step": 627 }, { "epoch": 0.06384709231394876, "grad_norm": 0.42409440875053406, "learning_rate": 1.2769418462789754e-06, "loss": 0.488, "step": 628 }, { "epoch": 0.06394875965839772, "grad_norm": 0.4764566421508789, "learning_rate": 1.2789751931679544e-06, "loss": 0.4943, "step": 629 }, { "epoch": 0.06405042700284669, "grad_norm": 0.42563745379447937, "learning_rate": 1.281008540056934e-06, "loss": 0.5011, "step": 630 }, { "epoch": 0.06415209434729564, "grad_norm": 0.4741663932800293, "learning_rate": 1.283041886945913e-06, "loss": 0.5289, "step": 631 }, { "epoch": 0.06425376169174461, "grad_norm": 0.4195590019226074, "learning_rate": 1.2850752338348925e-06, "loss": 0.5077, "step": 632 }, { "epoch": 0.06435542903619357, "grad_norm": 0.3990451693534851, "learning_rate": 1.2871085807238715e-06, "loss": 0.4902, "step": 633 }, { "epoch": 0.06445709638064254, "grad_norm": 0.39492881298065186, "learning_rate": 1.2891419276128508e-06, "loss": 0.4704, "step": 634 }, { "epoch": 0.0645587637250915, "grad_norm": 0.4490697383880615, "learning_rate": 1.2911752745018303e-06, "loss": 0.479, "step": 635 }, { "epoch": 0.06466043106954046, "grad_norm": 0.45601725578308105, "learning_rate": 1.2932086213908094e-06, "loss": 0.5284, "step": 636 }, { "epoch": 0.06476209841398943, "grad_norm": 0.4530569612979889, "learning_rate": 1.2952419682797886e-06, "loss": 0.4741, "step": 637 }, { "epoch": 0.06486376575843839, "grad_norm": 0.4542689621448517, "learning_rate": 1.297275315168768e-06, "loss": 0.4807, "step": 638 }, { "epoch": 0.06496543310288735, "grad_norm": 0.5080663561820984, "learning_rate": 1.2993086620577472e-06, "loss": 0.5321, "step": 639 }, { "epoch": 0.06506710044733631, "grad_norm": 0.3731689155101776, "learning_rate": 1.3013420089467262e-06, "loss": 0.4858, "step": 640 }, { "epoch": 0.06516876779178528, "grad_norm": 0.49011629819869995, "learning_rate": 1.3033753558357057e-06, "loss": 0.4901, "step": 641 }, { "epoch": 0.06527043513623425, "grad_norm": 0.42405757308006287, "learning_rate": 1.305408702724685e-06, "loss": 0.5262, "step": 642 }, { "epoch": 0.0653721024806832, "grad_norm": 0.4409715235233307, "learning_rate": 1.307442049613664e-06, "loss": 0.4853, "step": 643 }, { "epoch": 0.06547376982513217, "grad_norm": 0.4920077621936798, "learning_rate": 1.3094753965026436e-06, "loss": 0.476, "step": 644 }, { "epoch": 0.06557543716958113, "grad_norm": 0.40444615483283997, "learning_rate": 1.3115087433916226e-06, "loss": 0.4769, "step": 645 }, { "epoch": 0.0656771045140301, "grad_norm": 0.4637254476547241, "learning_rate": 1.3135420902806021e-06, "loss": 0.4719, "step": 646 }, { "epoch": 0.06577877185847905, "grad_norm": 0.42432454228401184, "learning_rate": 1.3155754371695812e-06, "loss": 0.4785, "step": 647 }, { "epoch": 0.06588043920292802, "grad_norm": 0.534658670425415, "learning_rate": 1.3176087840585605e-06, "loss": 0.495, "step": 648 }, { "epoch": 0.06598210654737698, "grad_norm": 0.46007978916168213, "learning_rate": 1.3196421309475395e-06, "loss": 0.4722, "step": 649 }, { "epoch": 0.06608377389182594, "grad_norm": 0.5129948854446411, "learning_rate": 1.321675477836519e-06, "loss": 0.4951, "step": 650 }, { "epoch": 0.06618544123627491, "grad_norm": 0.40460774302482605, "learning_rate": 1.3237088247254983e-06, "loss": 0.5033, "step": 651 }, { "epoch": 0.06628710858072387, "grad_norm": 0.42135170102119446, "learning_rate": 1.3257421716144776e-06, "loss": 0.4546, "step": 652 }, { "epoch": 0.06638877592517284, "grad_norm": 0.46884989738464355, "learning_rate": 1.3277755185034568e-06, "loss": 0.46, "step": 653 }, { "epoch": 0.0664904432696218, "grad_norm": 0.4831921458244324, "learning_rate": 1.329808865392436e-06, "loss": 0.4911, "step": 654 }, { "epoch": 0.06659211061407076, "grad_norm": 0.45308613777160645, "learning_rate": 1.3318422122814154e-06, "loss": 0.5203, "step": 655 }, { "epoch": 0.06669377795851972, "grad_norm": 0.44124636054039, "learning_rate": 1.3338755591703945e-06, "loss": 0.471, "step": 656 }, { "epoch": 0.06679544530296869, "grad_norm": 0.44585174322128296, "learning_rate": 1.335908906059374e-06, "loss": 0.48, "step": 657 }, { "epoch": 0.06689711264741766, "grad_norm": 0.46710360050201416, "learning_rate": 1.3379422529483532e-06, "loss": 0.5042, "step": 658 }, { "epoch": 0.06699877999186661, "grad_norm": 0.4442225396633148, "learning_rate": 1.3399755998373323e-06, "loss": 0.5447, "step": 659 }, { "epoch": 0.06710044733631558, "grad_norm": 0.45622557401657104, "learning_rate": 1.3420089467263118e-06, "loss": 0.5155, "step": 660 }, { "epoch": 0.06720211468076454, "grad_norm": 0.4395131468772888, "learning_rate": 1.3440422936152908e-06, "loss": 0.4836, "step": 661 }, { "epoch": 0.0673037820252135, "grad_norm": 0.44457945227622986, "learning_rate": 1.3460756405042701e-06, "loss": 0.4916, "step": 662 }, { "epoch": 0.06740544936966246, "grad_norm": 0.44476425647735596, "learning_rate": 1.3481089873932494e-06, "loss": 0.4988, "step": 663 }, { "epoch": 0.06750711671411143, "grad_norm": 0.5132933855056763, "learning_rate": 1.3501423342822287e-06, "loss": 0.526, "step": 664 }, { "epoch": 0.06760878405856038, "grad_norm": 0.45858803391456604, "learning_rate": 1.3521756811712077e-06, "loss": 0.5113, "step": 665 }, { "epoch": 0.06771045140300935, "grad_norm": 0.5175828337669373, "learning_rate": 1.3542090280601872e-06, "loss": 0.4711, "step": 666 }, { "epoch": 0.06781211874745832, "grad_norm": 0.5160278081893921, "learning_rate": 1.3562423749491665e-06, "loss": 0.4598, "step": 667 }, { "epoch": 0.06791378609190728, "grad_norm": 0.4701520502567291, "learning_rate": 1.3582757218381456e-06, "loss": 0.5175, "step": 668 }, { "epoch": 0.06801545343635625, "grad_norm": 0.5177890658378601, "learning_rate": 1.360309068727125e-06, "loss": 0.4923, "step": 669 }, { "epoch": 0.0681171207808052, "grad_norm": 0.495369017124176, "learning_rate": 1.3623424156161041e-06, "loss": 0.4861, "step": 670 }, { "epoch": 0.06821878812525417, "grad_norm": 0.42965036630630493, "learning_rate": 1.3643757625050836e-06, "loss": 0.4892, "step": 671 }, { "epoch": 0.06832045546970313, "grad_norm": 0.494777113199234, "learning_rate": 1.3664091093940627e-06, "loss": 0.49, "step": 672 }, { "epoch": 0.0684221228141521, "grad_norm": 0.5228115320205688, "learning_rate": 1.368442456283042e-06, "loss": 0.4389, "step": 673 }, { "epoch": 0.06852379015860106, "grad_norm": 0.42423784732818604, "learning_rate": 1.3704758031720214e-06, "loss": 0.5118, "step": 674 }, { "epoch": 0.06862545750305002, "grad_norm": 0.4575692117214203, "learning_rate": 1.3725091500610005e-06, "loss": 0.5006, "step": 675 }, { "epoch": 0.06872712484749899, "grad_norm": 0.4995758831501007, "learning_rate": 1.37454249694998e-06, "loss": 0.5031, "step": 676 }, { "epoch": 0.06882879219194794, "grad_norm": 0.506231427192688, "learning_rate": 1.376575843838959e-06, "loss": 0.4883, "step": 677 }, { "epoch": 0.06893045953639691, "grad_norm": 0.410285621881485, "learning_rate": 1.3786091907279383e-06, "loss": 0.4537, "step": 678 }, { "epoch": 0.06903212688084587, "grad_norm": 0.43074819445610046, "learning_rate": 1.3806425376169174e-06, "loss": 0.508, "step": 679 }, { "epoch": 0.06913379422529484, "grad_norm": 0.4410320818424225, "learning_rate": 1.3826758845058969e-06, "loss": 0.4777, "step": 680 }, { "epoch": 0.06923546156974379, "grad_norm": 0.43899989128112793, "learning_rate": 1.384709231394876e-06, "loss": 0.4739, "step": 681 }, { "epoch": 0.06933712891419276, "grad_norm": 0.46809765696525574, "learning_rate": 1.3867425782838554e-06, "loss": 0.4726, "step": 682 }, { "epoch": 0.06943879625864173, "grad_norm": 0.4383663833141327, "learning_rate": 1.3887759251728347e-06, "loss": 0.4844, "step": 683 }, { "epoch": 0.06954046360309069, "grad_norm": 0.44504067301750183, "learning_rate": 1.3908092720618138e-06, "loss": 0.4822, "step": 684 }, { "epoch": 0.06964213094753965, "grad_norm": 0.45402252674102783, "learning_rate": 1.3928426189507933e-06, "loss": 0.5016, "step": 685 }, { "epoch": 0.06974379829198861, "grad_norm": 0.49329763650894165, "learning_rate": 1.3948759658397723e-06, "loss": 0.4868, "step": 686 }, { "epoch": 0.06984546563643758, "grad_norm": 0.49637025594711304, "learning_rate": 1.3969093127287516e-06, "loss": 0.5063, "step": 687 }, { "epoch": 0.06994713298088653, "grad_norm": 0.4859904944896698, "learning_rate": 1.3989426596177309e-06, "loss": 0.4779, "step": 688 }, { "epoch": 0.0700488003253355, "grad_norm": 0.42835524678230286, "learning_rate": 1.4009760065067102e-06, "loss": 0.4784, "step": 689 }, { "epoch": 0.07015046766978447, "grad_norm": 0.41405758261680603, "learning_rate": 1.4030093533956896e-06, "loss": 0.4703, "step": 690 }, { "epoch": 0.07025213501423343, "grad_norm": 0.4227381944656372, "learning_rate": 1.4050427002846687e-06, "loss": 0.4484, "step": 691 }, { "epoch": 0.0703538023586824, "grad_norm": 0.47030049562454224, "learning_rate": 1.407076047173648e-06, "loss": 0.4914, "step": 692 }, { "epoch": 0.07045546970313135, "grad_norm": 0.46459686756134033, "learning_rate": 1.409109394062627e-06, "loss": 0.4702, "step": 693 }, { "epoch": 0.07055713704758032, "grad_norm": 0.43718162178993225, "learning_rate": 1.4111427409516065e-06, "loss": 0.4539, "step": 694 }, { "epoch": 0.07065880439202928, "grad_norm": 0.49321117997169495, "learning_rate": 1.4131760878405856e-06, "loss": 0.4737, "step": 695 }, { "epoch": 0.07076047173647824, "grad_norm": 0.4384630620479584, "learning_rate": 1.415209434729565e-06, "loss": 0.5325, "step": 696 }, { "epoch": 0.0708621390809272, "grad_norm": 0.4159007966518402, "learning_rate": 1.4172427816185441e-06, "loss": 0.4577, "step": 697 }, { "epoch": 0.07096380642537617, "grad_norm": 0.4114665985107422, "learning_rate": 1.4192761285075234e-06, "loss": 0.4551, "step": 698 }, { "epoch": 0.07106547376982514, "grad_norm": 0.43806982040405273, "learning_rate": 1.421309475396503e-06, "loss": 0.4964, "step": 699 }, { "epoch": 0.0711671411142741, "grad_norm": 0.46691396832466125, "learning_rate": 1.423342822285482e-06, "loss": 0.4923, "step": 700 }, { "epoch": 0.07126880845872306, "grad_norm": 0.4589865803718567, "learning_rate": 1.4253761691744615e-06, "loss": 0.4849, "step": 701 }, { "epoch": 0.07137047580317202, "grad_norm": 0.42230603098869324, "learning_rate": 1.4274095160634405e-06, "loss": 0.4827, "step": 702 }, { "epoch": 0.07147214314762099, "grad_norm": 0.412904292345047, "learning_rate": 1.4294428629524198e-06, "loss": 0.4822, "step": 703 }, { "epoch": 0.07157381049206994, "grad_norm": 0.4128805696964264, "learning_rate": 1.4314762098413989e-06, "loss": 0.4724, "step": 704 }, { "epoch": 0.07167547783651891, "grad_norm": 0.4036747217178345, "learning_rate": 1.4335095567303784e-06, "loss": 0.485, "step": 705 }, { "epoch": 0.07177714518096787, "grad_norm": 0.4409852623939514, "learning_rate": 1.4355429036193574e-06, "loss": 0.4618, "step": 706 }, { "epoch": 0.07187881252541684, "grad_norm": 0.4137263596057892, "learning_rate": 1.437576250508337e-06, "loss": 0.4518, "step": 707 }, { "epoch": 0.0719804798698658, "grad_norm": 0.4585385024547577, "learning_rate": 1.4396095973973162e-06, "loss": 0.4826, "step": 708 }, { "epoch": 0.07208214721431476, "grad_norm": 0.4147311747074127, "learning_rate": 1.4416429442862953e-06, "loss": 0.4905, "step": 709 }, { "epoch": 0.07218381455876373, "grad_norm": 0.40784603357315063, "learning_rate": 1.4436762911752747e-06, "loss": 0.4932, "step": 710 }, { "epoch": 0.07228548190321268, "grad_norm": 0.4026813507080078, "learning_rate": 1.4457096380642538e-06, "loss": 0.4688, "step": 711 }, { "epoch": 0.07238714924766165, "grad_norm": 0.44282910227775574, "learning_rate": 1.447742984953233e-06, "loss": 0.4724, "step": 712 }, { "epoch": 0.07248881659211061, "grad_norm": 0.4422295093536377, "learning_rate": 1.4497763318422124e-06, "loss": 0.4652, "step": 713 }, { "epoch": 0.07259048393655958, "grad_norm": 0.42249321937561035, "learning_rate": 1.4518096787311916e-06, "loss": 0.4587, "step": 714 }, { "epoch": 0.07269215128100855, "grad_norm": 0.4513908922672272, "learning_rate": 1.4538430256201711e-06, "loss": 0.4948, "step": 715 }, { "epoch": 0.0727938186254575, "grad_norm": 0.4027200937271118, "learning_rate": 1.4558763725091502e-06, "loss": 0.4861, "step": 716 }, { "epoch": 0.07289548596990647, "grad_norm": 0.434726357460022, "learning_rate": 1.4579097193981295e-06, "loss": 0.4382, "step": 717 }, { "epoch": 0.07299715331435543, "grad_norm": 0.41772976517677307, "learning_rate": 1.4599430662871087e-06, "loss": 0.4821, "step": 718 }, { "epoch": 0.0730988206588044, "grad_norm": 0.4175754487514496, "learning_rate": 1.461976413176088e-06, "loss": 0.4729, "step": 719 }, { "epoch": 0.07320048800325335, "grad_norm": 0.40409910678863525, "learning_rate": 1.464009760065067e-06, "loss": 0.4798, "step": 720 }, { "epoch": 0.07330215534770232, "grad_norm": 0.4559865891933441, "learning_rate": 1.4660431069540466e-06, "loss": 0.4576, "step": 721 }, { "epoch": 0.07340382269215127, "grad_norm": 0.48112741112709045, "learning_rate": 1.4680764538430256e-06, "loss": 0.4823, "step": 722 }, { "epoch": 0.07350549003660024, "grad_norm": 0.43786299228668213, "learning_rate": 1.470109800732005e-06, "loss": 0.5002, "step": 723 }, { "epoch": 0.07360715738104921, "grad_norm": 0.4169076681137085, "learning_rate": 1.4721431476209844e-06, "loss": 0.4609, "step": 724 }, { "epoch": 0.07370882472549817, "grad_norm": 0.4010978639125824, "learning_rate": 1.4741764945099635e-06, "loss": 0.4486, "step": 725 }, { "epoch": 0.07381049206994714, "grad_norm": 0.40815892815589905, "learning_rate": 1.476209841398943e-06, "loss": 0.5179, "step": 726 }, { "epoch": 0.07391215941439609, "grad_norm": 0.3841859996318817, "learning_rate": 1.478243188287922e-06, "loss": 0.4816, "step": 727 }, { "epoch": 0.07401382675884506, "grad_norm": 0.43549495935440063, "learning_rate": 1.4802765351769013e-06, "loss": 0.4874, "step": 728 }, { "epoch": 0.07411549410329402, "grad_norm": 0.41649454832077026, "learning_rate": 1.4823098820658804e-06, "loss": 0.4402, "step": 729 }, { "epoch": 0.07421716144774299, "grad_norm": 0.426058292388916, "learning_rate": 1.4843432289548598e-06, "loss": 0.4957, "step": 730 }, { "epoch": 0.07431882879219195, "grad_norm": 0.4238767921924591, "learning_rate": 1.4863765758438391e-06, "loss": 0.4788, "step": 731 }, { "epoch": 0.07442049613664091, "grad_norm": 0.43161773681640625, "learning_rate": 1.4884099227328184e-06, "loss": 0.4996, "step": 732 }, { "epoch": 0.07452216348108988, "grad_norm": 0.437599241733551, "learning_rate": 1.4904432696217977e-06, "loss": 0.4973, "step": 733 }, { "epoch": 0.07462383082553883, "grad_norm": 0.4339340329170227, "learning_rate": 1.4924766165107767e-06, "loss": 0.5327, "step": 734 }, { "epoch": 0.0747254981699878, "grad_norm": 0.4429869055747986, "learning_rate": 1.4945099633997562e-06, "loss": 0.4807, "step": 735 }, { "epoch": 0.07482716551443676, "grad_norm": 0.4406000077724457, "learning_rate": 1.4965433102887353e-06, "loss": 0.4726, "step": 736 }, { "epoch": 0.07492883285888573, "grad_norm": 0.44256311655044556, "learning_rate": 1.4985766571777146e-06, "loss": 0.5011, "step": 737 }, { "epoch": 0.07503050020333468, "grad_norm": 0.4490628242492676, "learning_rate": 1.5006100040666938e-06, "loss": 0.4582, "step": 738 }, { "epoch": 0.07513216754778365, "grad_norm": 0.4388032853603363, "learning_rate": 1.5026433509556731e-06, "loss": 0.4829, "step": 739 }, { "epoch": 0.07523383489223262, "grad_norm": 0.4435540735721588, "learning_rate": 1.5046766978446526e-06, "loss": 0.4514, "step": 740 }, { "epoch": 0.07533550223668158, "grad_norm": 0.41471222043037415, "learning_rate": 1.5067100447336317e-06, "loss": 0.4873, "step": 741 }, { "epoch": 0.07543716958113054, "grad_norm": 0.4686475098133087, "learning_rate": 1.508743391622611e-06, "loss": 0.5049, "step": 742 }, { "epoch": 0.0755388369255795, "grad_norm": 0.4629373848438263, "learning_rate": 1.5107767385115902e-06, "loss": 0.456, "step": 743 }, { "epoch": 0.07564050427002847, "grad_norm": 0.4760156571865082, "learning_rate": 1.5128100854005695e-06, "loss": 0.4463, "step": 744 }, { "epoch": 0.07574217161447742, "grad_norm": 0.43287545442581177, "learning_rate": 1.5148434322895486e-06, "loss": 0.462, "step": 745 }, { "epoch": 0.0758438389589264, "grad_norm": 0.4248722195625305, "learning_rate": 1.516876779178528e-06, "loss": 0.4598, "step": 746 }, { "epoch": 0.07594550630337536, "grad_norm": 0.4305913746356964, "learning_rate": 1.5189101260675073e-06, "loss": 0.4957, "step": 747 }, { "epoch": 0.07604717364782432, "grad_norm": 0.40971097350120544, "learning_rate": 1.5209434729564864e-06, "loss": 0.4774, "step": 748 }, { "epoch": 0.07614884099227329, "grad_norm": 0.42846769094467163, "learning_rate": 1.5229768198454659e-06, "loss": 0.4646, "step": 749 }, { "epoch": 0.07625050833672224, "grad_norm": 0.475768506526947, "learning_rate": 1.525010166734445e-06, "loss": 0.4792, "step": 750 }, { "epoch": 0.07635217568117121, "grad_norm": 0.44488781690597534, "learning_rate": 1.5270435136234244e-06, "loss": 0.4907, "step": 751 }, { "epoch": 0.07645384302562017, "grad_norm": 0.4282025098800659, "learning_rate": 1.5290768605124035e-06, "loss": 0.4748, "step": 752 }, { "epoch": 0.07655551037006914, "grad_norm": 0.43197697401046753, "learning_rate": 1.5311102074013828e-06, "loss": 0.4631, "step": 753 }, { "epoch": 0.07665717771451809, "grad_norm": 0.4292028844356537, "learning_rate": 1.5331435542903618e-06, "loss": 0.44, "step": 754 }, { "epoch": 0.07675884505896706, "grad_norm": 0.38603124022483826, "learning_rate": 1.5351769011793413e-06, "loss": 0.4838, "step": 755 }, { "epoch": 0.07686051240341603, "grad_norm": 0.42480745911598206, "learning_rate": 1.5372102480683206e-06, "loss": 0.4697, "step": 756 }, { "epoch": 0.07696217974786498, "grad_norm": 0.3940789997577667, "learning_rate": 1.5392435949572999e-06, "loss": 0.4902, "step": 757 }, { "epoch": 0.07706384709231395, "grad_norm": 0.3943816125392914, "learning_rate": 1.5412769418462792e-06, "loss": 0.4873, "step": 758 }, { "epoch": 0.07716551443676291, "grad_norm": 0.4202495813369751, "learning_rate": 1.5433102887352582e-06, "loss": 0.478, "step": 759 }, { "epoch": 0.07726718178121188, "grad_norm": 0.4385833442211151, "learning_rate": 1.5453436356242377e-06, "loss": 0.4855, "step": 760 }, { "epoch": 0.07736884912566083, "grad_norm": 0.43452924489974976, "learning_rate": 1.5473769825132168e-06, "loss": 0.4789, "step": 761 }, { "epoch": 0.0774705164701098, "grad_norm": 0.4005676805973053, "learning_rate": 1.5494103294021963e-06, "loss": 0.5103, "step": 762 }, { "epoch": 0.07757218381455877, "grad_norm": 0.48270612955093384, "learning_rate": 1.5514436762911755e-06, "loss": 0.4796, "step": 763 }, { "epoch": 0.07767385115900773, "grad_norm": 0.4427226483821869, "learning_rate": 1.5534770231801546e-06, "loss": 0.4981, "step": 764 }, { "epoch": 0.0777755185034567, "grad_norm": 0.42238664627075195, "learning_rate": 1.555510370069134e-06, "loss": 0.4687, "step": 765 }, { "epoch": 0.07787718584790565, "grad_norm": 0.4166165292263031, "learning_rate": 1.5575437169581132e-06, "loss": 0.4846, "step": 766 }, { "epoch": 0.07797885319235462, "grad_norm": 0.420974463224411, "learning_rate": 1.5595770638470924e-06, "loss": 0.4413, "step": 767 }, { "epoch": 0.07808052053680357, "grad_norm": 0.4438067078590393, "learning_rate": 1.5616104107360717e-06, "loss": 0.5124, "step": 768 }, { "epoch": 0.07818218788125254, "grad_norm": 0.44130295515060425, "learning_rate": 1.563643757625051e-06, "loss": 0.4849, "step": 769 }, { "epoch": 0.0782838552257015, "grad_norm": 0.45883795619010925, "learning_rate": 1.56567710451403e-06, "loss": 0.4958, "step": 770 }, { "epoch": 0.07838552257015047, "grad_norm": 0.4376908242702484, "learning_rate": 1.5677104514030095e-06, "loss": 0.4501, "step": 771 }, { "epoch": 0.07848718991459944, "grad_norm": 0.42439374327659607, "learning_rate": 1.5697437982919888e-06, "loss": 0.4614, "step": 772 }, { "epoch": 0.07858885725904839, "grad_norm": 0.46415087580680847, "learning_rate": 1.5717771451809679e-06, "loss": 0.4677, "step": 773 }, { "epoch": 0.07869052460349736, "grad_norm": 0.43144848942756653, "learning_rate": 1.5738104920699474e-06, "loss": 0.4657, "step": 774 }, { "epoch": 0.07879219194794632, "grad_norm": 0.4359631836414337, "learning_rate": 1.5758438389589264e-06, "loss": 0.445, "step": 775 }, { "epoch": 0.07889385929239529, "grad_norm": 0.4511893391609192, "learning_rate": 1.577877185847906e-06, "loss": 0.4791, "step": 776 }, { "epoch": 0.07899552663684424, "grad_norm": 0.42409539222717285, "learning_rate": 1.579910532736885e-06, "loss": 0.4616, "step": 777 }, { "epoch": 0.07909719398129321, "grad_norm": 0.40381067991256714, "learning_rate": 1.5819438796258643e-06, "loss": 0.4558, "step": 778 }, { "epoch": 0.07919886132574216, "grad_norm": 0.4634299874305725, "learning_rate": 1.5839772265148433e-06, "loss": 0.5135, "step": 779 }, { "epoch": 0.07930052867019113, "grad_norm": 0.4677280783653259, "learning_rate": 1.5860105734038228e-06, "loss": 0.4666, "step": 780 }, { "epoch": 0.0794021960146401, "grad_norm": 0.4186694025993347, "learning_rate": 1.588043920292802e-06, "loss": 0.4643, "step": 781 }, { "epoch": 0.07950386335908906, "grad_norm": 0.4253520369529724, "learning_rate": 1.5900772671817814e-06, "loss": 0.4773, "step": 782 }, { "epoch": 0.07960553070353803, "grad_norm": 0.4096919894218445, "learning_rate": 1.5921106140707606e-06, "loss": 0.4575, "step": 783 }, { "epoch": 0.07970719804798698, "grad_norm": 0.4612966775894165, "learning_rate": 1.5941439609597397e-06, "loss": 0.4772, "step": 784 }, { "epoch": 0.07980886539243595, "grad_norm": 0.453443706035614, "learning_rate": 1.5961773078487192e-06, "loss": 0.4949, "step": 785 }, { "epoch": 0.0799105327368849, "grad_norm": 0.4133256673812866, "learning_rate": 1.5982106547376983e-06, "loss": 0.4676, "step": 786 }, { "epoch": 0.08001220008133388, "grad_norm": 0.4610084593296051, "learning_rate": 1.6002440016266777e-06, "loss": 0.4866, "step": 787 }, { "epoch": 0.08011386742578284, "grad_norm": 0.4530973434448242, "learning_rate": 1.602277348515657e-06, "loss": 0.4755, "step": 788 }, { "epoch": 0.0802155347702318, "grad_norm": 0.4293586313724518, "learning_rate": 1.604310695404636e-06, "loss": 0.4967, "step": 789 }, { "epoch": 0.08031720211468077, "grad_norm": 0.43435871601104736, "learning_rate": 1.6063440422936156e-06, "loss": 0.4741, "step": 790 }, { "epoch": 0.08041886945912972, "grad_norm": 0.4308905005455017, "learning_rate": 1.6083773891825946e-06, "loss": 0.495, "step": 791 }, { "epoch": 0.0805205368035787, "grad_norm": 0.4386695921421051, "learning_rate": 1.610410736071574e-06, "loss": 0.4967, "step": 792 }, { "epoch": 0.08062220414802765, "grad_norm": 0.4964194893836975, "learning_rate": 1.6124440829605532e-06, "loss": 0.5107, "step": 793 }, { "epoch": 0.08072387149247662, "grad_norm": 0.4328537881374359, "learning_rate": 1.6144774298495325e-06, "loss": 0.4808, "step": 794 }, { "epoch": 0.08082553883692557, "grad_norm": 0.43484947085380554, "learning_rate": 1.6165107767385115e-06, "loss": 0.5023, "step": 795 }, { "epoch": 0.08092720618137454, "grad_norm": 0.4195117950439453, "learning_rate": 1.618544123627491e-06, "loss": 0.4963, "step": 796 }, { "epoch": 0.08102887352582351, "grad_norm": 0.502868115901947, "learning_rate": 1.6205774705164703e-06, "loss": 0.4992, "step": 797 }, { "epoch": 0.08113054087027247, "grad_norm": 0.4374631345272064, "learning_rate": 1.6226108174054494e-06, "loss": 0.4722, "step": 798 }, { "epoch": 0.08123220821472144, "grad_norm": 0.43619513511657715, "learning_rate": 1.6246441642944288e-06, "loss": 0.5007, "step": 799 }, { "epoch": 0.08133387555917039, "grad_norm": 0.5336178541183472, "learning_rate": 1.626677511183408e-06, "loss": 0.4559, "step": 800 }, { "epoch": 0.08143554290361936, "grad_norm": 0.4643417000770569, "learning_rate": 1.6287108580723874e-06, "loss": 0.4651, "step": 801 }, { "epoch": 0.08153721024806831, "grad_norm": 0.4065360128879547, "learning_rate": 1.6307442049613665e-06, "loss": 0.4912, "step": 802 }, { "epoch": 0.08163887759251728, "grad_norm": 0.429252952337265, "learning_rate": 1.6327775518503457e-06, "loss": 0.4898, "step": 803 }, { "epoch": 0.08174054493696625, "grad_norm": 0.46686863899230957, "learning_rate": 1.6348108987393252e-06, "loss": 0.4316, "step": 804 }, { "epoch": 0.08184221228141521, "grad_norm": 0.4792773425579071, "learning_rate": 1.6368442456283043e-06, "loss": 0.4579, "step": 805 }, { "epoch": 0.08194387962586418, "grad_norm": 0.4466759264469147, "learning_rate": 1.6388775925172836e-06, "loss": 0.5022, "step": 806 }, { "epoch": 0.08204554697031313, "grad_norm": 0.4229101538658142, "learning_rate": 1.6409109394062628e-06, "loss": 0.4299, "step": 807 }, { "epoch": 0.0821472143147621, "grad_norm": 0.47155851125717163, "learning_rate": 1.6429442862952421e-06, "loss": 0.483, "step": 808 }, { "epoch": 0.08224888165921106, "grad_norm": 0.5525528192520142, "learning_rate": 1.6449776331842212e-06, "loss": 0.462, "step": 809 }, { "epoch": 0.08235054900366003, "grad_norm": 0.47473886609077454, "learning_rate": 1.6470109800732007e-06, "loss": 0.514, "step": 810 }, { "epoch": 0.08245221634810898, "grad_norm": 0.43060657382011414, "learning_rate": 1.6490443269621797e-06, "loss": 0.4546, "step": 811 }, { "epoch": 0.08255388369255795, "grad_norm": 0.47940295934677124, "learning_rate": 1.6510776738511592e-06, "loss": 0.5047, "step": 812 }, { "epoch": 0.08265555103700692, "grad_norm": 0.4985547661781311, "learning_rate": 1.6531110207401385e-06, "loss": 0.4999, "step": 813 }, { "epoch": 0.08275721838145587, "grad_norm": 0.4138842225074768, "learning_rate": 1.6551443676291176e-06, "loss": 0.4802, "step": 814 }, { "epoch": 0.08285888572590484, "grad_norm": 0.4823184013366699, "learning_rate": 1.657177714518097e-06, "loss": 0.4928, "step": 815 }, { "epoch": 0.0829605530703538, "grad_norm": 0.4552677571773529, "learning_rate": 1.6592110614070761e-06, "loss": 0.4506, "step": 816 }, { "epoch": 0.08306222041480277, "grad_norm": 0.43632301688194275, "learning_rate": 1.6612444082960554e-06, "loss": 0.5368, "step": 817 }, { "epoch": 0.08316388775925172, "grad_norm": 0.5305154323577881, "learning_rate": 1.6632777551850347e-06, "loss": 0.4733, "step": 818 }, { "epoch": 0.08326555510370069, "grad_norm": 0.4945686161518097, "learning_rate": 1.665311102074014e-06, "loss": 0.492, "step": 819 }, { "epoch": 0.08336722244814966, "grad_norm": 0.4344322085380554, "learning_rate": 1.6673444489629934e-06, "loss": 0.4655, "step": 820 }, { "epoch": 0.08346888979259862, "grad_norm": 0.409159779548645, "learning_rate": 1.6693777958519725e-06, "loss": 0.4725, "step": 821 }, { "epoch": 0.08357055713704759, "grad_norm": 0.403200626373291, "learning_rate": 1.6714111427409518e-06, "loss": 0.4922, "step": 822 }, { "epoch": 0.08367222448149654, "grad_norm": 0.44487687945365906, "learning_rate": 1.6734444896299308e-06, "loss": 0.4749, "step": 823 }, { "epoch": 0.08377389182594551, "grad_norm": 0.4492526352405548, "learning_rate": 1.6754778365189103e-06, "loss": 0.4769, "step": 824 }, { "epoch": 0.08387555917039446, "grad_norm": 0.4581888020038605, "learning_rate": 1.6775111834078894e-06, "loss": 0.4882, "step": 825 }, { "epoch": 0.08397722651484343, "grad_norm": 0.40134862065315247, "learning_rate": 1.6795445302968689e-06, "loss": 0.4452, "step": 826 }, { "epoch": 0.08407889385929239, "grad_norm": 0.3931429088115692, "learning_rate": 1.681577877185848e-06, "loss": 0.4455, "step": 827 }, { "epoch": 0.08418056120374136, "grad_norm": 0.4550941288471222, "learning_rate": 1.6836112240748272e-06, "loss": 0.4979, "step": 828 }, { "epoch": 0.08428222854819033, "grad_norm": 0.42251649498939514, "learning_rate": 1.6856445709638067e-06, "loss": 0.4702, "step": 829 }, { "epoch": 0.08438389589263928, "grad_norm": 0.46612852811813354, "learning_rate": 1.6876779178527858e-06, "loss": 0.4835, "step": 830 }, { "epoch": 0.08448556323708825, "grad_norm": 0.40361347794532776, "learning_rate": 1.6897112647417653e-06, "loss": 0.4984, "step": 831 }, { "epoch": 0.0845872305815372, "grad_norm": 0.44889089465141296, "learning_rate": 1.6917446116307443e-06, "loss": 0.4936, "step": 832 }, { "epoch": 0.08468889792598618, "grad_norm": 0.4612816572189331, "learning_rate": 1.6937779585197236e-06, "loss": 0.4712, "step": 833 }, { "epoch": 0.08479056527043513, "grad_norm": 0.4107799828052521, "learning_rate": 1.6958113054087027e-06, "loss": 0.4489, "step": 834 }, { "epoch": 0.0848922326148841, "grad_norm": 0.4490150213241577, "learning_rate": 1.6978446522976822e-06, "loss": 0.4689, "step": 835 }, { "epoch": 0.08499389995933307, "grad_norm": 0.3805161416530609, "learning_rate": 1.6998779991866614e-06, "loss": 0.4894, "step": 836 }, { "epoch": 0.08509556730378202, "grad_norm": 0.46853944659233093, "learning_rate": 1.7019113460756407e-06, "loss": 0.4829, "step": 837 }, { "epoch": 0.085197234648231, "grad_norm": 0.4433964788913727, "learning_rate": 1.70394469296462e-06, "loss": 0.4623, "step": 838 }, { "epoch": 0.08529890199267995, "grad_norm": 0.4095345735549927, "learning_rate": 1.705978039853599e-06, "loss": 0.4622, "step": 839 }, { "epoch": 0.08540056933712892, "grad_norm": 0.38908255100250244, "learning_rate": 1.7080113867425785e-06, "loss": 0.4781, "step": 840 }, { "epoch": 0.08550223668157787, "grad_norm": 0.4097222685813904, "learning_rate": 1.7100447336315576e-06, "loss": 0.4586, "step": 841 }, { "epoch": 0.08560390402602684, "grad_norm": 0.37202849984169006, "learning_rate": 1.7120780805205369e-06, "loss": 0.4886, "step": 842 }, { "epoch": 0.0857055713704758, "grad_norm": 0.4224902093410492, "learning_rate": 1.7141114274095162e-06, "loss": 0.4532, "step": 843 }, { "epoch": 0.08580723871492477, "grad_norm": 0.416897714138031, "learning_rate": 1.7161447742984954e-06, "loss": 0.48, "step": 844 }, { "epoch": 0.08590890605937374, "grad_norm": 0.4923073649406433, "learning_rate": 1.718178121187475e-06, "loss": 0.5198, "step": 845 }, { "epoch": 0.08601057340382269, "grad_norm": 0.4457766115665436, "learning_rate": 1.720211468076454e-06, "loss": 0.4574, "step": 846 }, { "epoch": 0.08611224074827166, "grad_norm": 0.4797053039073944, "learning_rate": 1.7222448149654333e-06, "loss": 0.4595, "step": 847 }, { "epoch": 0.08621390809272061, "grad_norm": 0.4424244463443756, "learning_rate": 1.7242781618544125e-06, "loss": 0.478, "step": 848 }, { "epoch": 0.08631557543716958, "grad_norm": 0.43947821855545044, "learning_rate": 1.7263115087433918e-06, "loss": 0.5082, "step": 849 }, { "epoch": 0.08641724278161854, "grad_norm": 0.42060860991477966, "learning_rate": 1.7283448556323709e-06, "loss": 0.439, "step": 850 }, { "epoch": 0.08651891012606751, "grad_norm": 0.4284212291240692, "learning_rate": 1.7303782025213504e-06, "loss": 0.4524, "step": 851 }, { "epoch": 0.08662057747051646, "grad_norm": 0.4332228899002075, "learning_rate": 1.7324115494103294e-06, "loss": 0.485, "step": 852 }, { "epoch": 0.08672224481496543, "grad_norm": 0.4238344430923462, "learning_rate": 1.7344448962993087e-06, "loss": 0.4543, "step": 853 }, { "epoch": 0.0868239121594144, "grad_norm": 0.43853890895843506, "learning_rate": 1.7364782431882882e-06, "loss": 0.4729, "step": 854 }, { "epoch": 0.08692557950386336, "grad_norm": 0.44233816862106323, "learning_rate": 1.7385115900772673e-06, "loss": 0.4824, "step": 855 }, { "epoch": 0.08702724684831233, "grad_norm": 0.4032129943370819, "learning_rate": 1.7405449369662468e-06, "loss": 0.4605, "step": 856 }, { "epoch": 0.08712891419276128, "grad_norm": 0.4210849702358246, "learning_rate": 1.7425782838552258e-06, "loss": 0.4691, "step": 857 }, { "epoch": 0.08723058153721025, "grad_norm": 0.4756966829299927, "learning_rate": 1.744611630744205e-06, "loss": 0.4821, "step": 858 }, { "epoch": 0.0873322488816592, "grad_norm": 0.4559876024723053, "learning_rate": 1.7466449776331842e-06, "loss": 0.4626, "step": 859 }, { "epoch": 0.08743391622610817, "grad_norm": 0.4095000922679901, "learning_rate": 1.7486783245221636e-06, "loss": 0.4945, "step": 860 }, { "epoch": 0.08753558357055714, "grad_norm": 0.4389612078666687, "learning_rate": 1.750711671411143e-06, "loss": 0.4414, "step": 861 }, { "epoch": 0.0876372509150061, "grad_norm": 0.43925783038139343, "learning_rate": 1.7527450183001222e-06, "loss": 0.4983, "step": 862 }, { "epoch": 0.08773891825945507, "grad_norm": 0.4117547571659088, "learning_rate": 1.7547783651891015e-06, "loss": 0.4802, "step": 863 }, { "epoch": 0.08784058560390402, "grad_norm": 0.42722830176353455, "learning_rate": 1.7568117120780805e-06, "loss": 0.449, "step": 864 }, { "epoch": 0.08794225294835299, "grad_norm": 0.445214182138443, "learning_rate": 1.75884505896706e-06, "loss": 0.4767, "step": 865 }, { "epoch": 0.08804392029280195, "grad_norm": 0.42260482907295227, "learning_rate": 1.760878405856039e-06, "loss": 0.457, "step": 866 }, { "epoch": 0.08814558763725092, "grad_norm": 0.4646793305873871, "learning_rate": 1.7629117527450184e-06, "loss": 0.5046, "step": 867 }, { "epoch": 0.08824725498169987, "grad_norm": 0.5068920254707336, "learning_rate": 1.7649450996339976e-06, "loss": 0.4572, "step": 868 }, { "epoch": 0.08834892232614884, "grad_norm": 0.46537452936172485, "learning_rate": 1.766978446522977e-06, "loss": 0.4752, "step": 869 }, { "epoch": 0.08845058967059781, "grad_norm": 0.5195790529251099, "learning_rate": 1.7690117934119564e-06, "loss": 0.4664, "step": 870 }, { "epoch": 0.08855225701504676, "grad_norm": 0.4177710711956024, "learning_rate": 1.7710451403009355e-06, "loss": 0.4781, "step": 871 }, { "epoch": 0.08865392435949573, "grad_norm": 0.4047723114490509, "learning_rate": 1.7730784871899147e-06, "loss": 0.451, "step": 872 }, { "epoch": 0.08875559170394469, "grad_norm": 0.4282885789871216, "learning_rate": 1.775111834078894e-06, "loss": 0.4496, "step": 873 }, { "epoch": 0.08885725904839366, "grad_norm": 0.4474763572216034, "learning_rate": 1.7771451809678733e-06, "loss": 0.4795, "step": 874 }, { "epoch": 0.08895892639284261, "grad_norm": 0.4397208094596863, "learning_rate": 1.7791785278568524e-06, "loss": 0.5059, "step": 875 }, { "epoch": 0.08906059373729158, "grad_norm": 0.4178096354007721, "learning_rate": 1.7812118747458319e-06, "loss": 0.4879, "step": 876 }, { "epoch": 0.08916226108174055, "grad_norm": 0.4294348955154419, "learning_rate": 1.7832452216348111e-06, "loss": 0.4803, "step": 877 }, { "epoch": 0.0892639284261895, "grad_norm": 0.42849355936050415, "learning_rate": 1.7852785685237902e-06, "loss": 0.5108, "step": 878 }, { "epoch": 0.08936559577063848, "grad_norm": 0.4371388852596283, "learning_rate": 1.7873119154127697e-06, "loss": 0.4826, "step": 879 }, { "epoch": 0.08946726311508743, "grad_norm": 0.4154582917690277, "learning_rate": 1.7893452623017487e-06, "loss": 0.435, "step": 880 }, { "epoch": 0.0895689304595364, "grad_norm": 0.38309144973754883, "learning_rate": 1.7913786091907282e-06, "loss": 0.4419, "step": 881 }, { "epoch": 0.08967059780398536, "grad_norm": 0.4196988642215729, "learning_rate": 1.7934119560797073e-06, "loss": 0.4447, "step": 882 }, { "epoch": 0.08977226514843432, "grad_norm": 0.4342006742954254, "learning_rate": 1.7954453029686866e-06, "loss": 0.498, "step": 883 }, { "epoch": 0.08987393249288328, "grad_norm": 0.4087722897529602, "learning_rate": 1.7974786498576656e-06, "loss": 0.4652, "step": 884 }, { "epoch": 0.08997559983733225, "grad_norm": 0.4035814702510834, "learning_rate": 1.7995119967466451e-06, "loss": 0.4303, "step": 885 }, { "epoch": 0.09007726718178122, "grad_norm": 0.4133078157901764, "learning_rate": 1.8015453436356244e-06, "loss": 0.4261, "step": 886 }, { "epoch": 0.09017893452623017, "grad_norm": 0.5128663778305054, "learning_rate": 1.8035786905246037e-06, "loss": 0.446, "step": 887 }, { "epoch": 0.09028060187067914, "grad_norm": 0.4309379458427429, "learning_rate": 1.805612037413583e-06, "loss": 0.4554, "step": 888 }, { "epoch": 0.0903822692151281, "grad_norm": 0.4221213161945343, "learning_rate": 1.807645384302562e-06, "loss": 0.4792, "step": 889 }, { "epoch": 0.09048393655957707, "grad_norm": 0.4406570494174957, "learning_rate": 1.8096787311915415e-06, "loss": 0.4378, "step": 890 }, { "epoch": 0.09058560390402602, "grad_norm": 0.4152646064758301, "learning_rate": 1.8117120780805206e-06, "loss": 0.4593, "step": 891 }, { "epoch": 0.09068727124847499, "grad_norm": 0.42497697472572327, "learning_rate": 1.8137454249694999e-06, "loss": 0.4693, "step": 892 }, { "epoch": 0.09078893859292396, "grad_norm": 0.4462893009185791, "learning_rate": 1.8157787718584793e-06, "loss": 0.5013, "step": 893 }, { "epoch": 0.09089060593737291, "grad_norm": 0.4896056652069092, "learning_rate": 1.8178121187474584e-06, "loss": 0.4784, "step": 894 }, { "epoch": 0.09099227328182188, "grad_norm": 0.4472463130950928, "learning_rate": 1.8198454656364379e-06, "loss": 0.4924, "step": 895 }, { "epoch": 0.09109394062627084, "grad_norm": 0.46051597595214844, "learning_rate": 1.821878812525417e-06, "loss": 0.4764, "step": 896 }, { "epoch": 0.09119560797071981, "grad_norm": 0.5551267862319946, "learning_rate": 1.8239121594143962e-06, "loss": 0.4863, "step": 897 }, { "epoch": 0.09129727531516876, "grad_norm": 0.47215619683265686, "learning_rate": 1.8259455063033755e-06, "loss": 0.4503, "step": 898 }, { "epoch": 0.09139894265961773, "grad_norm": 0.41418227553367615, "learning_rate": 1.8279788531923548e-06, "loss": 0.4545, "step": 899 }, { "epoch": 0.09150061000406669, "grad_norm": 0.6163618564605713, "learning_rate": 1.8300122000813339e-06, "loss": 0.4393, "step": 900 }, { "epoch": 0.09160227734851566, "grad_norm": 0.44237229228019714, "learning_rate": 1.8320455469703133e-06, "loss": 0.4663, "step": 901 }, { "epoch": 0.09170394469296463, "grad_norm": 0.4526953101158142, "learning_rate": 1.8340788938592926e-06, "loss": 0.5045, "step": 902 }, { "epoch": 0.09180561203741358, "grad_norm": 0.4632759392261505, "learning_rate": 1.8361122407482717e-06, "loss": 0.469, "step": 903 }, { "epoch": 0.09190727938186255, "grad_norm": 0.46509817242622375, "learning_rate": 1.8381455876372512e-06, "loss": 0.4779, "step": 904 }, { "epoch": 0.0920089467263115, "grad_norm": 0.4350663721561432, "learning_rate": 1.8401789345262302e-06, "loss": 0.4748, "step": 905 }, { "epoch": 0.09211061407076047, "grad_norm": 0.4464540183544159, "learning_rate": 1.8422122814152097e-06, "loss": 0.4845, "step": 906 }, { "epoch": 0.09221228141520943, "grad_norm": 0.43235740065574646, "learning_rate": 1.8442456283041888e-06, "loss": 0.461, "step": 907 }, { "epoch": 0.0923139487596584, "grad_norm": 0.42287805676460266, "learning_rate": 1.846278975193168e-06, "loss": 0.4524, "step": 908 }, { "epoch": 0.09241561610410737, "grad_norm": 0.4433877766132355, "learning_rate": 1.8483123220821475e-06, "loss": 0.502, "step": 909 }, { "epoch": 0.09251728344855632, "grad_norm": 0.3949868679046631, "learning_rate": 1.8503456689711266e-06, "loss": 0.4804, "step": 910 }, { "epoch": 0.09261895079300529, "grad_norm": 0.45708614587783813, "learning_rate": 1.8523790158601059e-06, "loss": 0.4736, "step": 911 }, { "epoch": 0.09272061813745425, "grad_norm": 0.44069457054138184, "learning_rate": 1.8544123627490852e-06, "loss": 0.4918, "step": 912 }, { "epoch": 0.09282228548190322, "grad_norm": 0.4709485173225403, "learning_rate": 1.8564457096380644e-06, "loss": 0.4836, "step": 913 }, { "epoch": 0.09292395282635217, "grad_norm": 0.43765807151794434, "learning_rate": 1.8584790565270435e-06, "loss": 0.4746, "step": 914 }, { "epoch": 0.09302562017080114, "grad_norm": 0.4335680902004242, "learning_rate": 1.860512403416023e-06, "loss": 0.484, "step": 915 }, { "epoch": 0.0931272875152501, "grad_norm": 0.3881761431694031, "learning_rate": 1.862545750305002e-06, "loss": 0.4686, "step": 916 }, { "epoch": 0.09322895485969906, "grad_norm": 0.4863176643848419, "learning_rate": 1.8645790971939815e-06, "loss": 0.476, "step": 917 }, { "epoch": 0.09333062220414803, "grad_norm": 0.409242182970047, "learning_rate": 1.8666124440829608e-06, "loss": 0.4628, "step": 918 }, { "epoch": 0.09343228954859699, "grad_norm": 0.41993457078933716, "learning_rate": 1.8686457909719399e-06, "loss": 0.4622, "step": 919 }, { "epoch": 0.09353395689304596, "grad_norm": 0.43729689717292786, "learning_rate": 1.8706791378609194e-06, "loss": 0.4982, "step": 920 }, { "epoch": 0.09363562423749491, "grad_norm": 0.4456024467945099, "learning_rate": 1.8727124847498984e-06, "loss": 0.4694, "step": 921 }, { "epoch": 0.09373729158194388, "grad_norm": 0.39931365847587585, "learning_rate": 1.8747458316388777e-06, "loss": 0.5235, "step": 922 }, { "epoch": 0.09383895892639284, "grad_norm": 0.4241826832294464, "learning_rate": 1.876779178527857e-06, "loss": 0.4767, "step": 923 }, { "epoch": 0.0939406262708418, "grad_norm": 0.467349648475647, "learning_rate": 1.8788125254168363e-06, "loss": 0.5002, "step": 924 }, { "epoch": 0.09404229361529076, "grad_norm": 0.43846869468688965, "learning_rate": 1.8808458723058153e-06, "loss": 0.4849, "step": 925 }, { "epoch": 0.09414396095973973, "grad_norm": 0.42070072889328003, "learning_rate": 1.8828792191947948e-06, "loss": 0.5052, "step": 926 }, { "epoch": 0.0942456283041887, "grad_norm": 0.409988671541214, "learning_rate": 1.884912566083774e-06, "loss": 0.4533, "step": 927 }, { "epoch": 0.09434729564863766, "grad_norm": 0.4413056969642639, "learning_rate": 1.8869459129727532e-06, "loss": 0.4735, "step": 928 }, { "epoch": 0.09444896299308662, "grad_norm": 0.462348997592926, "learning_rate": 1.8889792598617327e-06, "loss": 0.4885, "step": 929 }, { "epoch": 0.09455063033753558, "grad_norm": 0.44020599126815796, "learning_rate": 1.8910126067507117e-06, "loss": 0.4414, "step": 930 }, { "epoch": 0.09465229768198455, "grad_norm": 0.43123289942741394, "learning_rate": 1.8930459536396912e-06, "loss": 0.4651, "step": 931 }, { "epoch": 0.0947539650264335, "grad_norm": 0.4220948815345764, "learning_rate": 1.8950793005286703e-06, "loss": 0.5249, "step": 932 }, { "epoch": 0.09485563237088247, "grad_norm": 0.447476327419281, "learning_rate": 1.8971126474176495e-06, "loss": 0.482, "step": 933 }, { "epoch": 0.09495729971533144, "grad_norm": 0.4578755497932434, "learning_rate": 1.899145994306629e-06, "loss": 0.4608, "step": 934 }, { "epoch": 0.0950589670597804, "grad_norm": 0.46080172061920166, "learning_rate": 1.901179341195608e-06, "loss": 0.4656, "step": 935 }, { "epoch": 0.09516063440422937, "grad_norm": 0.4010032117366791, "learning_rate": 1.9032126880845874e-06, "loss": 0.4521, "step": 936 }, { "epoch": 0.09526230174867832, "grad_norm": 0.4745318591594696, "learning_rate": 1.9052460349735667e-06, "loss": 0.448, "step": 937 }, { "epoch": 0.09536396909312729, "grad_norm": 0.45571231842041016, "learning_rate": 1.907279381862546e-06, "loss": 0.4589, "step": 938 }, { "epoch": 0.09546563643757625, "grad_norm": 0.42764732241630554, "learning_rate": 1.909312728751525e-06, "loss": 0.498, "step": 939 }, { "epoch": 0.09556730378202521, "grad_norm": 0.43596649169921875, "learning_rate": 1.9113460756405045e-06, "loss": 0.4767, "step": 940 }, { "epoch": 0.09566897112647417, "grad_norm": 0.3947821855545044, "learning_rate": 1.9133794225294835e-06, "loss": 0.4813, "step": 941 }, { "epoch": 0.09577063847092314, "grad_norm": 0.40387943387031555, "learning_rate": 1.915412769418463e-06, "loss": 0.4473, "step": 942 }, { "epoch": 0.09587230581537211, "grad_norm": 0.3898303210735321, "learning_rate": 1.9174461163074425e-06, "loss": 0.4552, "step": 943 }, { "epoch": 0.09597397315982106, "grad_norm": 0.4200487732887268, "learning_rate": 1.9194794631964216e-06, "loss": 0.4811, "step": 944 }, { "epoch": 0.09607564050427003, "grad_norm": 0.39104902744293213, "learning_rate": 1.9215128100854006e-06, "loss": 0.4547, "step": 945 }, { "epoch": 0.09617730784871899, "grad_norm": 0.4154428541660309, "learning_rate": 1.9235461569743797e-06, "loss": 0.434, "step": 946 }, { "epoch": 0.09627897519316796, "grad_norm": 0.44142597913742065, "learning_rate": 1.925579503863359e-06, "loss": 0.4881, "step": 947 }, { "epoch": 0.09638064253761691, "grad_norm": 0.42361700534820557, "learning_rate": 1.9276128507523383e-06, "loss": 0.4918, "step": 948 }, { "epoch": 0.09648230988206588, "grad_norm": 0.3923143744468689, "learning_rate": 1.9296461976413178e-06, "loss": 0.4735, "step": 949 }, { "epoch": 0.09658397722651485, "grad_norm": 0.4425416588783264, "learning_rate": 1.9316795445302972e-06, "loss": 0.4979, "step": 950 }, { "epoch": 0.0966856445709638, "grad_norm": 0.46131250262260437, "learning_rate": 1.9337128914192763e-06, "loss": 0.474, "step": 951 }, { "epoch": 0.09678731191541277, "grad_norm": 0.43660351634025574, "learning_rate": 1.935746238308256e-06, "loss": 0.4661, "step": 952 }, { "epoch": 0.09688897925986173, "grad_norm": 0.3842215836048126, "learning_rate": 1.937779585197235e-06, "loss": 0.4584, "step": 953 }, { "epoch": 0.0969906466043107, "grad_norm": 0.3991558849811554, "learning_rate": 1.939812932086214e-06, "loss": 0.4835, "step": 954 }, { "epoch": 0.09709231394875965, "grad_norm": 0.429194837808609, "learning_rate": 1.9418462789751934e-06, "loss": 0.4745, "step": 955 }, { "epoch": 0.09719398129320862, "grad_norm": 0.4229052662849426, "learning_rate": 1.9438796258641725e-06, "loss": 0.4606, "step": 956 }, { "epoch": 0.09729564863765758, "grad_norm": 0.4641473591327667, "learning_rate": 1.9459129727531515e-06, "loss": 0.48, "step": 957 }, { "epoch": 0.09739731598210655, "grad_norm": 0.420730859041214, "learning_rate": 1.947946319642131e-06, "loss": 0.4443, "step": 958 }, { "epoch": 0.09749898332655552, "grad_norm": 0.4130661189556122, "learning_rate": 1.9499796665311105e-06, "loss": 0.4521, "step": 959 }, { "epoch": 0.09760065067100447, "grad_norm": 0.38971224427223206, "learning_rate": 1.9520130134200896e-06, "loss": 0.4328, "step": 960 }, { "epoch": 0.09770231801545344, "grad_norm": 0.47186389565467834, "learning_rate": 1.954046360309069e-06, "loss": 0.4352, "step": 961 }, { "epoch": 0.0978039853599024, "grad_norm": 0.48285147547721863, "learning_rate": 1.956079707198048e-06, "loss": 0.4518, "step": 962 }, { "epoch": 0.09790565270435136, "grad_norm": 0.45237886905670166, "learning_rate": 1.9581130540870276e-06, "loss": 0.4797, "step": 963 }, { "epoch": 0.09800732004880032, "grad_norm": 0.47316116094589233, "learning_rate": 1.9601464009760067e-06, "loss": 0.4929, "step": 964 }, { "epoch": 0.09810898739324929, "grad_norm": 0.4531801640987396, "learning_rate": 1.9621797478649858e-06, "loss": 0.4306, "step": 965 }, { "epoch": 0.09821065473769826, "grad_norm": 0.4651638865470886, "learning_rate": 1.9642130947539652e-06, "loss": 0.4661, "step": 966 }, { "epoch": 0.09831232208214721, "grad_norm": 0.47340041399002075, "learning_rate": 1.9662464416429443e-06, "loss": 0.4911, "step": 967 }, { "epoch": 0.09841398942659618, "grad_norm": 0.4086328148841858, "learning_rate": 1.968279788531924e-06, "loss": 0.4435, "step": 968 }, { "epoch": 0.09851565677104514, "grad_norm": 0.42204201221466064, "learning_rate": 1.970313135420903e-06, "loss": 0.4421, "step": 969 }, { "epoch": 0.0986173241154941, "grad_norm": 0.4480123519897461, "learning_rate": 1.9723464823098823e-06, "loss": 0.464, "step": 970 }, { "epoch": 0.09871899145994306, "grad_norm": 0.45157840847969055, "learning_rate": 1.9743798291988614e-06, "loss": 0.4697, "step": 971 }, { "epoch": 0.09882065880439203, "grad_norm": 0.4948574900627136, "learning_rate": 1.976413176087841e-06, "loss": 0.4295, "step": 972 }, { "epoch": 0.09892232614884099, "grad_norm": 0.4356659948825836, "learning_rate": 1.97844652297682e-06, "loss": 0.46, "step": 973 }, { "epoch": 0.09902399349328996, "grad_norm": 0.47670620679855347, "learning_rate": 1.9804798698657994e-06, "loss": 0.5076, "step": 974 }, { "epoch": 0.09912566083773892, "grad_norm": 0.47566914558410645, "learning_rate": 1.9825132167547785e-06, "loss": 0.4763, "step": 975 }, { "epoch": 0.09922732818218788, "grad_norm": 0.4225310683250427, "learning_rate": 1.9845465636437576e-06, "loss": 0.4688, "step": 976 }, { "epoch": 0.09932899552663685, "grad_norm": 0.4674275517463684, "learning_rate": 1.986579910532737e-06, "loss": 0.5053, "step": 977 }, { "epoch": 0.0994306628710858, "grad_norm": 0.4591468274593353, "learning_rate": 1.988613257421716e-06, "loss": 0.4687, "step": 978 }, { "epoch": 0.09953233021553477, "grad_norm": 0.4578147232532501, "learning_rate": 1.9906466043106956e-06, "loss": 0.4591, "step": 979 }, { "epoch": 0.09963399755998373, "grad_norm": 0.42843446135520935, "learning_rate": 1.9926799511996747e-06, "loss": 0.4404, "step": 980 }, { "epoch": 0.0997356649044327, "grad_norm": 0.512990415096283, "learning_rate": 1.994713298088654e-06, "loss": 0.4926, "step": 981 }, { "epoch": 0.09983733224888167, "grad_norm": 0.44916245341300964, "learning_rate": 1.9967466449776337e-06, "loss": 0.4264, "step": 982 }, { "epoch": 0.09993899959333062, "grad_norm": 0.4405478537082672, "learning_rate": 1.9987799918666127e-06, "loss": 0.4635, "step": 983 }, { "epoch": 0.10004066693777959, "grad_norm": 0.40618282556533813, "learning_rate": 2.000813338755592e-06, "loss": 0.4498, "step": 984 }, { "epoch": 0.10014233428222855, "grad_norm": 0.4211396872997284, "learning_rate": 2.0028466856445713e-06, "loss": 0.4586, "step": 985 }, { "epoch": 0.10024400162667751, "grad_norm": 0.4573737382888794, "learning_rate": 2.0048800325335503e-06, "loss": 0.4587, "step": 986 }, { "epoch": 0.10034566897112647, "grad_norm": 0.44980067014694214, "learning_rate": 2.0069133794225294e-06, "loss": 0.4739, "step": 987 }, { "epoch": 0.10044733631557544, "grad_norm": 0.48181381821632385, "learning_rate": 2.008946726311509e-06, "loss": 0.4935, "step": 988 }, { "epoch": 0.1005490036600244, "grad_norm": 0.4774457812309265, "learning_rate": 2.010980073200488e-06, "loss": 0.4331, "step": 989 }, { "epoch": 0.10065067100447336, "grad_norm": 0.4443321228027344, "learning_rate": 2.0130134200894674e-06, "loss": 0.4704, "step": 990 }, { "epoch": 0.10075233834892233, "grad_norm": 0.4493865668773651, "learning_rate": 2.015046766978447e-06, "loss": 0.4401, "step": 991 }, { "epoch": 0.10085400569337129, "grad_norm": 0.4510371685028076, "learning_rate": 2.017080113867426e-06, "loss": 0.4806, "step": 992 }, { "epoch": 0.10095567303782026, "grad_norm": 0.45565831661224365, "learning_rate": 2.0191134607564055e-06, "loss": 0.4585, "step": 993 }, { "epoch": 0.10105734038226921, "grad_norm": 0.4568254053592682, "learning_rate": 2.0211468076453846e-06, "loss": 0.4531, "step": 994 }, { "epoch": 0.10115900772671818, "grad_norm": 0.489434152841568, "learning_rate": 2.0231801545343636e-06, "loss": 0.505, "step": 995 }, { "epoch": 0.10126067507116714, "grad_norm": 0.4297303557395935, "learning_rate": 2.0252135014233427e-06, "loss": 0.48, "step": 996 }, { "epoch": 0.1013623424156161, "grad_norm": 0.4479207694530487, "learning_rate": 2.027246848312322e-06, "loss": 0.4442, "step": 997 }, { "epoch": 0.10146400976006506, "grad_norm": 0.41712456941604614, "learning_rate": 2.0292801952013012e-06, "loss": 0.446, "step": 998 }, { "epoch": 0.10156567710451403, "grad_norm": 0.41547316312789917, "learning_rate": 2.0313135420902807e-06, "loss": 0.4753, "step": 999 }, { "epoch": 0.101667344448963, "grad_norm": 0.435382217168808, "learning_rate": 2.03334688897926e-06, "loss": 0.4717, "step": 1000 }, { "epoch": 0.10176901179341195, "grad_norm": 0.4337117671966553, "learning_rate": 2.0353802358682393e-06, "loss": 0.4858, "step": 1001 }, { "epoch": 0.10187067913786092, "grad_norm": 0.48241522908210754, "learning_rate": 2.0374135827572188e-06, "loss": 0.4234, "step": 1002 }, { "epoch": 0.10197234648230988, "grad_norm": 0.4211669862270355, "learning_rate": 2.039446929646198e-06, "loss": 0.4778, "step": 1003 }, { "epoch": 0.10207401382675885, "grad_norm": 0.3950549066066742, "learning_rate": 2.0414802765351773e-06, "loss": 0.4527, "step": 1004 }, { "epoch": 0.1021756811712078, "grad_norm": 0.4319285750389099, "learning_rate": 2.0435136234241564e-06, "loss": 0.4761, "step": 1005 }, { "epoch": 0.10227734851565677, "grad_norm": 0.45142316818237305, "learning_rate": 2.0455469703131354e-06, "loss": 0.472, "step": 1006 }, { "epoch": 0.10237901586010574, "grad_norm": 0.46644678711891174, "learning_rate": 2.047580317202115e-06, "loss": 0.4631, "step": 1007 }, { "epoch": 0.1024806832045547, "grad_norm": 0.43135178089141846, "learning_rate": 2.049613664091094e-06, "loss": 0.4507, "step": 1008 }, { "epoch": 0.10258235054900366, "grad_norm": 0.4401921033859253, "learning_rate": 2.0516470109800735e-06, "loss": 0.4846, "step": 1009 }, { "epoch": 0.10268401789345262, "grad_norm": 0.45365986227989197, "learning_rate": 2.0536803578690525e-06, "loss": 0.473, "step": 1010 }, { "epoch": 0.10278568523790159, "grad_norm": 0.42496564984321594, "learning_rate": 2.055713704758032e-06, "loss": 0.4686, "step": 1011 }, { "epoch": 0.10288735258235054, "grad_norm": 0.4661194086074829, "learning_rate": 2.057747051647011e-06, "loss": 0.477, "step": 1012 }, { "epoch": 0.10298901992679951, "grad_norm": 0.43606895208358765, "learning_rate": 2.0597803985359906e-06, "loss": 0.4474, "step": 1013 }, { "epoch": 0.10309068727124847, "grad_norm": 0.4521263837814331, "learning_rate": 2.0618137454249697e-06, "loss": 0.4519, "step": 1014 }, { "epoch": 0.10319235461569744, "grad_norm": 0.45562297105789185, "learning_rate": 2.0638470923139487e-06, "loss": 0.4662, "step": 1015 }, { "epoch": 0.1032940219601464, "grad_norm": 0.49812084436416626, "learning_rate": 2.065880439202928e-06, "loss": 0.4851, "step": 1016 }, { "epoch": 0.10339568930459536, "grad_norm": 0.5067127346992493, "learning_rate": 2.0679137860919073e-06, "loss": 0.4425, "step": 1017 }, { "epoch": 0.10349735664904433, "grad_norm": 0.4560871422290802, "learning_rate": 2.0699471329808868e-06, "loss": 0.4696, "step": 1018 }, { "epoch": 0.10359902399349329, "grad_norm": 0.46542221307754517, "learning_rate": 2.071980479869866e-06, "loss": 0.4478, "step": 1019 }, { "epoch": 0.10370069133794226, "grad_norm": 0.4371488690376282, "learning_rate": 2.0740138267588453e-06, "loss": 0.4459, "step": 1020 }, { "epoch": 0.10380235868239121, "grad_norm": 0.6015408039093018, "learning_rate": 2.0760471736478244e-06, "loss": 0.464, "step": 1021 }, { "epoch": 0.10390402602684018, "grad_norm": 0.5145712494850159, "learning_rate": 2.078080520536804e-06, "loss": 0.4933, "step": 1022 }, { "epoch": 0.10400569337128915, "grad_norm": 0.47199517488479614, "learning_rate": 2.080113867425783e-06, "loss": 0.4944, "step": 1023 }, { "epoch": 0.1041073607157381, "grad_norm": 0.43035557866096497, "learning_rate": 2.0821472143147624e-06, "loss": 0.4526, "step": 1024 }, { "epoch": 0.10420902806018707, "grad_norm": 0.4932800531387329, "learning_rate": 2.0841805612037415e-06, "loss": 0.4735, "step": 1025 }, { "epoch": 0.10431069540463603, "grad_norm": 0.46334975957870483, "learning_rate": 2.0862139080927205e-06, "loss": 0.5148, "step": 1026 }, { "epoch": 0.104412362749085, "grad_norm": 0.47310423851013184, "learning_rate": 2.0882472549817e-06, "loss": 0.469, "step": 1027 }, { "epoch": 0.10451403009353395, "grad_norm": 0.4806954562664032, "learning_rate": 2.090280601870679e-06, "loss": 0.5174, "step": 1028 }, { "epoch": 0.10461569743798292, "grad_norm": 0.43708866834640503, "learning_rate": 2.0923139487596586e-06, "loss": 0.4443, "step": 1029 }, { "epoch": 0.10471736478243188, "grad_norm": 0.4533339738845825, "learning_rate": 2.0943472956486377e-06, "loss": 0.4678, "step": 1030 }, { "epoch": 0.10481903212688085, "grad_norm": 0.4599463939666748, "learning_rate": 2.096380642537617e-06, "loss": 0.4543, "step": 1031 }, { "epoch": 0.10492069947132981, "grad_norm": 0.4188119173049927, "learning_rate": 2.0984139894265966e-06, "loss": 0.4786, "step": 1032 }, { "epoch": 0.10502236681577877, "grad_norm": 0.44885557889938354, "learning_rate": 2.1004473363155757e-06, "loss": 0.502, "step": 1033 }, { "epoch": 0.10512403416022774, "grad_norm": 0.4389401078224182, "learning_rate": 2.1024806832045548e-06, "loss": 0.4666, "step": 1034 }, { "epoch": 0.1052257015046767, "grad_norm": 0.3868423104286194, "learning_rate": 2.1045140300935342e-06, "loss": 0.4722, "step": 1035 }, { "epoch": 0.10532736884912566, "grad_norm": 0.45963338017463684, "learning_rate": 2.1065473769825133e-06, "loss": 0.4868, "step": 1036 }, { "epoch": 0.10542903619357462, "grad_norm": 0.44059455394744873, "learning_rate": 2.1085807238714924e-06, "loss": 0.4833, "step": 1037 }, { "epoch": 0.10553070353802359, "grad_norm": 0.39836549758911133, "learning_rate": 2.110614070760472e-06, "loss": 0.4888, "step": 1038 }, { "epoch": 0.10563237088247256, "grad_norm": 0.4152512848377228, "learning_rate": 2.1126474176494514e-06, "loss": 0.4815, "step": 1039 }, { "epoch": 0.10573403822692151, "grad_norm": 0.4502132534980774, "learning_rate": 2.1146807645384304e-06, "loss": 0.473, "step": 1040 }, { "epoch": 0.10583570557137048, "grad_norm": 0.42055413126945496, "learning_rate": 2.11671411142741e-06, "loss": 0.4598, "step": 1041 }, { "epoch": 0.10593737291581944, "grad_norm": 0.4457545876502991, "learning_rate": 2.118747458316389e-06, "loss": 0.4642, "step": 1042 }, { "epoch": 0.1060390402602684, "grad_norm": 0.422768235206604, "learning_rate": 2.1207808052053685e-06, "loss": 0.4167, "step": 1043 }, { "epoch": 0.10614070760471736, "grad_norm": 0.45864367485046387, "learning_rate": 2.1228141520943475e-06, "loss": 0.4611, "step": 1044 }, { "epoch": 0.10624237494916633, "grad_norm": 0.4008235037326813, "learning_rate": 2.1248474989833266e-06, "loss": 0.4606, "step": 1045 }, { "epoch": 0.10634404229361528, "grad_norm": 0.43533575534820557, "learning_rate": 2.126880845872306e-06, "loss": 0.4521, "step": 1046 }, { "epoch": 0.10644570963806425, "grad_norm": 0.4814954996109009, "learning_rate": 2.128914192761285e-06, "loss": 0.4711, "step": 1047 }, { "epoch": 0.10654737698251322, "grad_norm": 0.43181636929512024, "learning_rate": 2.1309475396502646e-06, "loss": 0.4861, "step": 1048 }, { "epoch": 0.10664904432696218, "grad_norm": 0.44365623593330383, "learning_rate": 2.1329808865392437e-06, "loss": 0.4683, "step": 1049 }, { "epoch": 0.10675071167141115, "grad_norm": 0.4592335820198059, "learning_rate": 2.135014233428223e-06, "loss": 0.4993, "step": 1050 }, { "epoch": 0.1068523790158601, "grad_norm": 0.41530007123947144, "learning_rate": 2.1370475803172022e-06, "loss": 0.4712, "step": 1051 }, { "epoch": 0.10695404636030907, "grad_norm": 0.45602479577064514, "learning_rate": 2.1390809272061817e-06, "loss": 0.4501, "step": 1052 }, { "epoch": 0.10705571370475803, "grad_norm": 0.4567805230617523, "learning_rate": 2.141114274095161e-06, "loss": 0.448, "step": 1053 }, { "epoch": 0.107157381049207, "grad_norm": 0.4895797669887543, "learning_rate": 2.1431476209841403e-06, "loss": 0.4443, "step": 1054 }, { "epoch": 0.10725904839365595, "grad_norm": 0.4382070004940033, "learning_rate": 2.1451809678731193e-06, "loss": 0.4393, "step": 1055 }, { "epoch": 0.10736071573810492, "grad_norm": 0.42325258255004883, "learning_rate": 2.1472143147620984e-06, "loss": 0.471, "step": 1056 }, { "epoch": 0.10746238308255389, "grad_norm": 0.40847697854042053, "learning_rate": 2.149247661651078e-06, "loss": 0.4822, "step": 1057 }, { "epoch": 0.10756405042700284, "grad_norm": 0.4371722340583801, "learning_rate": 2.151281008540057e-06, "loss": 0.4587, "step": 1058 }, { "epoch": 0.10766571777145181, "grad_norm": 0.44765859842300415, "learning_rate": 2.1533143554290365e-06, "loss": 0.4968, "step": 1059 }, { "epoch": 0.10776738511590077, "grad_norm": 0.4280761182308197, "learning_rate": 2.1553477023180155e-06, "loss": 0.442, "step": 1060 }, { "epoch": 0.10786905246034974, "grad_norm": 0.45260298252105713, "learning_rate": 2.157381049206995e-06, "loss": 0.4523, "step": 1061 }, { "epoch": 0.10797071980479869, "grad_norm": 0.41880446672439575, "learning_rate": 2.159414396095974e-06, "loss": 0.4831, "step": 1062 }, { "epoch": 0.10807238714924766, "grad_norm": 0.44285666942596436, "learning_rate": 2.1614477429849536e-06, "loss": 0.4489, "step": 1063 }, { "epoch": 0.10817405449369663, "grad_norm": 0.43426066637039185, "learning_rate": 2.1634810898739326e-06, "loss": 0.4513, "step": 1064 }, { "epoch": 0.10827572183814559, "grad_norm": 0.4590331017971039, "learning_rate": 2.1655144367629117e-06, "loss": 0.5, "step": 1065 }, { "epoch": 0.10837738918259456, "grad_norm": 0.4594675302505493, "learning_rate": 2.167547783651891e-06, "loss": 0.4741, "step": 1066 }, { "epoch": 0.10847905652704351, "grad_norm": 0.45411649346351624, "learning_rate": 2.1695811305408702e-06, "loss": 0.4978, "step": 1067 }, { "epoch": 0.10858072387149248, "grad_norm": 0.4477453827857971, "learning_rate": 2.1716144774298497e-06, "loss": 0.4283, "step": 1068 }, { "epoch": 0.10868239121594143, "grad_norm": 0.43324899673461914, "learning_rate": 2.173647824318829e-06, "loss": 0.4794, "step": 1069 }, { "epoch": 0.1087840585603904, "grad_norm": 0.4607120156288147, "learning_rate": 2.1756811712078083e-06, "loss": 0.4893, "step": 1070 }, { "epoch": 0.10888572590483936, "grad_norm": 0.4346695840358734, "learning_rate": 2.1777145180967873e-06, "loss": 0.4535, "step": 1071 }, { "epoch": 0.10898739324928833, "grad_norm": 0.4687034487724304, "learning_rate": 2.179747864985767e-06, "loss": 0.4499, "step": 1072 }, { "epoch": 0.1090890605937373, "grad_norm": 0.418893039226532, "learning_rate": 2.1817812118747463e-06, "loss": 0.4645, "step": 1073 }, { "epoch": 0.10919072793818625, "grad_norm": 0.4471275508403778, "learning_rate": 2.1838145587637254e-06, "loss": 0.4478, "step": 1074 }, { "epoch": 0.10929239528263522, "grad_norm": 0.40721264481544495, "learning_rate": 2.1858479056527045e-06, "loss": 0.4568, "step": 1075 }, { "epoch": 0.10939406262708418, "grad_norm": 0.44723716378211975, "learning_rate": 2.1878812525416835e-06, "loss": 0.4966, "step": 1076 }, { "epoch": 0.10949572997153315, "grad_norm": 0.4284946024417877, "learning_rate": 2.189914599430663e-06, "loss": 0.4704, "step": 1077 }, { "epoch": 0.1095973973159821, "grad_norm": 0.4913545548915863, "learning_rate": 2.191947946319642e-06, "loss": 0.4412, "step": 1078 }, { "epoch": 0.10969906466043107, "grad_norm": 0.41304346919059753, "learning_rate": 2.1939812932086216e-06, "loss": 0.4356, "step": 1079 }, { "epoch": 0.10980073200488004, "grad_norm": 0.42547866702079773, "learning_rate": 2.196014640097601e-06, "loss": 0.4168, "step": 1080 }, { "epoch": 0.109902399349329, "grad_norm": 0.4196743071079254, "learning_rate": 2.19804798698658e-06, "loss": 0.4586, "step": 1081 }, { "epoch": 0.11000406669377796, "grad_norm": 0.45034682750701904, "learning_rate": 2.2000813338755596e-06, "loss": 0.4281, "step": 1082 }, { "epoch": 0.11010573403822692, "grad_norm": 0.4128366708755493, "learning_rate": 2.2021146807645387e-06, "loss": 0.4719, "step": 1083 }, { "epoch": 0.11020740138267589, "grad_norm": 0.44237253069877625, "learning_rate": 2.2041480276535177e-06, "loss": 0.4541, "step": 1084 }, { "epoch": 0.11030906872712484, "grad_norm": 0.4231768846511841, "learning_rate": 2.2061813745424972e-06, "loss": 0.4798, "step": 1085 }, { "epoch": 0.11041073607157381, "grad_norm": 0.46595117449760437, "learning_rate": 2.2082147214314763e-06, "loss": 0.462, "step": 1086 }, { "epoch": 0.11051240341602277, "grad_norm": 0.39009782671928406, "learning_rate": 2.2102480683204553e-06, "loss": 0.4466, "step": 1087 }, { "epoch": 0.11061407076047174, "grad_norm": 0.4511227011680603, "learning_rate": 2.212281415209435e-06, "loss": 0.4335, "step": 1088 }, { "epoch": 0.1107157381049207, "grad_norm": 0.4527212977409363, "learning_rate": 2.2143147620984143e-06, "loss": 0.4425, "step": 1089 }, { "epoch": 0.11081740544936966, "grad_norm": 0.41018542647361755, "learning_rate": 2.2163481089873934e-06, "loss": 0.449, "step": 1090 }, { "epoch": 0.11091907279381863, "grad_norm": 0.4276340901851654, "learning_rate": 2.218381455876373e-06, "loss": 0.4757, "step": 1091 }, { "epoch": 0.11102074013826758, "grad_norm": 0.4181230962276459, "learning_rate": 2.220414802765352e-06, "loss": 0.4498, "step": 1092 }, { "epoch": 0.11112240748271655, "grad_norm": 0.41723233461380005, "learning_rate": 2.2224481496543314e-06, "loss": 0.4674, "step": 1093 }, { "epoch": 0.11122407482716551, "grad_norm": 0.4668218791484833, "learning_rate": 2.2244814965433105e-06, "loss": 0.4737, "step": 1094 }, { "epoch": 0.11132574217161448, "grad_norm": 0.4087064862251282, "learning_rate": 2.2265148434322896e-06, "loss": 0.4624, "step": 1095 }, { "epoch": 0.11142740951606345, "grad_norm": 0.4057465195655823, "learning_rate": 2.228548190321269e-06, "loss": 0.4566, "step": 1096 }, { "epoch": 0.1115290768605124, "grad_norm": 0.44730034470558167, "learning_rate": 2.230581537210248e-06, "loss": 0.4512, "step": 1097 }, { "epoch": 0.11163074420496137, "grad_norm": 0.3977893590927124, "learning_rate": 2.2326148840992276e-06, "loss": 0.4849, "step": 1098 }, { "epoch": 0.11173241154941033, "grad_norm": 0.411030113697052, "learning_rate": 2.2346482309882067e-06, "loss": 0.4341, "step": 1099 }, { "epoch": 0.1118340788938593, "grad_norm": 0.42771515250205994, "learning_rate": 2.236681577877186e-06, "loss": 0.4604, "step": 1100 }, { "epoch": 0.11193574623830825, "grad_norm": 0.4401977062225342, "learning_rate": 2.238714924766165e-06, "loss": 0.4491, "step": 1101 }, { "epoch": 0.11203741358275722, "grad_norm": 0.43205031752586365, "learning_rate": 2.2407482716551447e-06, "loss": 0.4658, "step": 1102 }, { "epoch": 0.11213908092720618, "grad_norm": 0.43720048666000366, "learning_rate": 2.2427816185441238e-06, "loss": 0.449, "step": 1103 }, { "epoch": 0.11224074827165514, "grad_norm": 0.4343700706958771, "learning_rate": 2.2448149654331033e-06, "loss": 0.4698, "step": 1104 }, { "epoch": 0.11234241561610411, "grad_norm": 0.4114152789115906, "learning_rate": 2.2468483123220823e-06, "loss": 0.4337, "step": 1105 }, { "epoch": 0.11244408296055307, "grad_norm": 0.43635857105255127, "learning_rate": 2.2488816592110614e-06, "loss": 0.449, "step": 1106 }, { "epoch": 0.11254575030500204, "grad_norm": 0.4839904308319092, "learning_rate": 2.250915006100041e-06, "loss": 0.4954, "step": 1107 }, { "epoch": 0.11264741764945099, "grad_norm": 0.4166222810745239, "learning_rate": 2.25294835298902e-06, "loss": 0.4557, "step": 1108 }, { "epoch": 0.11274908499389996, "grad_norm": 0.43004533648490906, "learning_rate": 2.2549816998779994e-06, "loss": 0.4453, "step": 1109 }, { "epoch": 0.11285075233834892, "grad_norm": 0.40528228878974915, "learning_rate": 2.2570150467669785e-06, "loss": 0.4195, "step": 1110 }, { "epoch": 0.11295241968279789, "grad_norm": 0.4210710823535919, "learning_rate": 2.259048393655958e-06, "loss": 0.4272, "step": 1111 }, { "epoch": 0.11305408702724686, "grad_norm": 0.4360107481479645, "learning_rate": 2.2610817405449375e-06, "loss": 0.4544, "step": 1112 }, { "epoch": 0.11315575437169581, "grad_norm": 0.45728757977485657, "learning_rate": 2.2631150874339165e-06, "loss": 0.4824, "step": 1113 }, { "epoch": 0.11325742171614478, "grad_norm": 0.4504103362560272, "learning_rate": 2.2651484343228956e-06, "loss": 0.4815, "step": 1114 }, { "epoch": 0.11335908906059373, "grad_norm": 0.4159921705722809, "learning_rate": 2.267181781211875e-06, "loss": 0.4972, "step": 1115 }, { "epoch": 0.1134607564050427, "grad_norm": 0.417548805475235, "learning_rate": 2.269215128100854e-06, "loss": 0.4481, "step": 1116 }, { "epoch": 0.11356242374949166, "grad_norm": 0.46538934111595154, "learning_rate": 2.271248474989833e-06, "loss": 0.4782, "step": 1117 }, { "epoch": 0.11366409109394063, "grad_norm": 0.46205469965934753, "learning_rate": 2.2732818218788127e-06, "loss": 0.4638, "step": 1118 }, { "epoch": 0.11376575843838958, "grad_norm": 0.43398237228393555, "learning_rate": 2.2753151687677918e-06, "loss": 0.4527, "step": 1119 }, { "epoch": 0.11386742578283855, "grad_norm": 0.4111020267009735, "learning_rate": 2.2773485156567712e-06, "loss": 0.4483, "step": 1120 }, { "epoch": 0.11396909312728752, "grad_norm": 0.39751315116882324, "learning_rate": 2.2793818625457507e-06, "loss": 0.4578, "step": 1121 }, { "epoch": 0.11407076047173648, "grad_norm": 0.4568365216255188, "learning_rate": 2.28141520943473e-06, "loss": 0.4575, "step": 1122 }, { "epoch": 0.11417242781618545, "grad_norm": 0.49010372161865234, "learning_rate": 2.2834485563237093e-06, "loss": 0.4758, "step": 1123 }, { "epoch": 0.1142740951606344, "grad_norm": 0.44671621918678284, "learning_rate": 2.2854819032126884e-06, "loss": 0.4711, "step": 1124 }, { "epoch": 0.11437576250508337, "grad_norm": 0.4223269820213318, "learning_rate": 2.2875152501016674e-06, "loss": 0.4647, "step": 1125 }, { "epoch": 0.11447742984953233, "grad_norm": 0.48307666182518005, "learning_rate": 2.2895485969906465e-06, "loss": 0.4761, "step": 1126 }, { "epoch": 0.1145790971939813, "grad_norm": 0.40034234523773193, "learning_rate": 2.291581943879626e-06, "loss": 0.4828, "step": 1127 }, { "epoch": 0.11468076453843025, "grad_norm": 0.4631451964378357, "learning_rate": 2.293615290768605e-06, "loss": 0.4499, "step": 1128 }, { "epoch": 0.11478243188287922, "grad_norm": 0.44287699460983276, "learning_rate": 2.2956486376575845e-06, "loss": 0.4564, "step": 1129 }, { "epoch": 0.11488409922732819, "grad_norm": 0.40660035610198975, "learning_rate": 2.297681984546564e-06, "loss": 0.4685, "step": 1130 }, { "epoch": 0.11498576657177714, "grad_norm": 0.43594279885292053, "learning_rate": 2.299715331435543e-06, "loss": 0.5228, "step": 1131 }, { "epoch": 0.11508743391622611, "grad_norm": 0.41298866271972656, "learning_rate": 2.3017486783245226e-06, "loss": 0.4848, "step": 1132 }, { "epoch": 0.11518910126067507, "grad_norm": 0.4252341687679291, "learning_rate": 2.3037820252135016e-06, "loss": 0.4454, "step": 1133 }, { "epoch": 0.11529076860512404, "grad_norm": 0.4629703462123871, "learning_rate": 2.305815372102481e-06, "loss": 0.4946, "step": 1134 }, { "epoch": 0.11539243594957299, "grad_norm": 0.3804275691509247, "learning_rate": 2.30784871899146e-06, "loss": 0.4265, "step": 1135 }, { "epoch": 0.11549410329402196, "grad_norm": 0.4479413628578186, "learning_rate": 2.3098820658804392e-06, "loss": 0.4287, "step": 1136 }, { "epoch": 0.11559577063847093, "grad_norm": 0.4488312005996704, "learning_rate": 2.3119154127694187e-06, "loss": 0.457, "step": 1137 }, { "epoch": 0.11569743798291988, "grad_norm": 0.3882199227809906, "learning_rate": 2.313948759658398e-06, "loss": 0.4983, "step": 1138 }, { "epoch": 0.11579910532736885, "grad_norm": 0.4288121461868286, "learning_rate": 2.3159821065473773e-06, "loss": 0.4251, "step": 1139 }, { "epoch": 0.11590077267181781, "grad_norm": 0.42962560057640076, "learning_rate": 2.3180154534363564e-06, "loss": 0.46, "step": 1140 }, { "epoch": 0.11600244001626678, "grad_norm": 0.4716186821460724, "learning_rate": 2.320048800325336e-06, "loss": 0.4828, "step": 1141 }, { "epoch": 0.11610410736071573, "grad_norm": 0.4100567102432251, "learning_rate": 2.322082147214315e-06, "loss": 0.4486, "step": 1142 }, { "epoch": 0.1162057747051647, "grad_norm": 0.4318966865539551, "learning_rate": 2.3241154941032944e-06, "loss": 0.4635, "step": 1143 }, { "epoch": 0.11630744204961366, "grad_norm": 0.4076835513114929, "learning_rate": 2.3261488409922735e-06, "loss": 0.4497, "step": 1144 }, { "epoch": 0.11640910939406263, "grad_norm": 0.42299649119377136, "learning_rate": 2.3281821878812525e-06, "loss": 0.4568, "step": 1145 }, { "epoch": 0.1165107767385116, "grad_norm": 0.4252814054489136, "learning_rate": 2.330215534770232e-06, "loss": 0.4307, "step": 1146 }, { "epoch": 0.11661244408296055, "grad_norm": 0.4316556453704834, "learning_rate": 2.332248881659211e-06, "loss": 0.4185, "step": 1147 }, { "epoch": 0.11671411142740952, "grad_norm": 0.3913451135158539, "learning_rate": 2.3342822285481906e-06, "loss": 0.4443, "step": 1148 }, { "epoch": 0.11681577877185848, "grad_norm": 0.42717236280441284, "learning_rate": 2.3363155754371696e-06, "loss": 0.4342, "step": 1149 }, { "epoch": 0.11691744611630744, "grad_norm": 0.43730947375297546, "learning_rate": 2.338348922326149e-06, "loss": 0.4827, "step": 1150 }, { "epoch": 0.1170191134607564, "grad_norm": 0.43182459473609924, "learning_rate": 2.340382269215128e-06, "loss": 0.4502, "step": 1151 }, { "epoch": 0.11712078080520537, "grad_norm": 0.4208647608757019, "learning_rate": 2.3424156161041077e-06, "loss": 0.4726, "step": 1152 }, { "epoch": 0.11722244814965434, "grad_norm": 0.3707115352153778, "learning_rate": 2.3444489629930867e-06, "loss": 0.4505, "step": 1153 }, { "epoch": 0.11732411549410329, "grad_norm": 0.4292318820953369, "learning_rate": 2.3464823098820662e-06, "loss": 0.4565, "step": 1154 }, { "epoch": 0.11742578283855226, "grad_norm": 0.4250704348087311, "learning_rate": 2.3485156567710453e-06, "loss": 0.4931, "step": 1155 }, { "epoch": 0.11752745018300122, "grad_norm": 0.4219478666782379, "learning_rate": 2.3505490036600243e-06, "loss": 0.4148, "step": 1156 }, { "epoch": 0.11762911752745019, "grad_norm": 0.4035142660140991, "learning_rate": 2.352582350549004e-06, "loss": 0.413, "step": 1157 }, { "epoch": 0.11773078487189914, "grad_norm": 0.39028647541999817, "learning_rate": 2.354615697437983e-06, "loss": 0.4433, "step": 1158 }, { "epoch": 0.11783245221634811, "grad_norm": 0.4375642240047455, "learning_rate": 2.3566490443269624e-06, "loss": 0.4616, "step": 1159 }, { "epoch": 0.11793411956079707, "grad_norm": 0.4126935601234436, "learning_rate": 2.3586823912159415e-06, "loss": 0.4924, "step": 1160 }, { "epoch": 0.11803578690524603, "grad_norm": 0.40374070405960083, "learning_rate": 2.360715738104921e-06, "loss": 0.4673, "step": 1161 }, { "epoch": 0.118137454249695, "grad_norm": 0.42404428124427795, "learning_rate": 2.3627490849939004e-06, "loss": 0.4242, "step": 1162 }, { "epoch": 0.11823912159414396, "grad_norm": 0.42185917496681213, "learning_rate": 2.3647824318828795e-06, "loss": 0.4816, "step": 1163 }, { "epoch": 0.11834078893859293, "grad_norm": 0.45367613434791565, "learning_rate": 2.3668157787718586e-06, "loss": 0.4859, "step": 1164 }, { "epoch": 0.11844245628304188, "grad_norm": 0.3893878757953644, "learning_rate": 2.368849125660838e-06, "loss": 0.476, "step": 1165 }, { "epoch": 0.11854412362749085, "grad_norm": 0.39503878355026245, "learning_rate": 2.370882472549817e-06, "loss": 0.466, "step": 1166 }, { "epoch": 0.11864579097193981, "grad_norm": 0.4640415608882904, "learning_rate": 2.372915819438796e-06, "loss": 0.4338, "step": 1167 }, { "epoch": 0.11874745831638878, "grad_norm": 0.3970538079738617, "learning_rate": 2.3749491663277757e-06, "loss": 0.4351, "step": 1168 }, { "epoch": 0.11884912566083775, "grad_norm": 0.45963630080223083, "learning_rate": 2.376982513216755e-06, "loss": 0.4671, "step": 1169 }, { "epoch": 0.1189507930052867, "grad_norm": 0.4896050989627838, "learning_rate": 2.3790158601057342e-06, "loss": 0.4667, "step": 1170 }, { "epoch": 0.11905246034973567, "grad_norm": 0.4129877984523773, "learning_rate": 2.3810492069947137e-06, "loss": 0.514, "step": 1171 }, { "epoch": 0.11915412769418463, "grad_norm": 0.4457924962043762, "learning_rate": 2.3830825538836928e-06, "loss": 0.4589, "step": 1172 }, { "epoch": 0.1192557950386336, "grad_norm": 0.48049506545066833, "learning_rate": 2.3851159007726723e-06, "loss": 0.4746, "step": 1173 }, { "epoch": 0.11935746238308255, "grad_norm": 0.452651709318161, "learning_rate": 2.3871492476616513e-06, "loss": 0.4735, "step": 1174 }, { "epoch": 0.11945912972753152, "grad_norm": 0.47849661111831665, "learning_rate": 2.3891825945506304e-06, "loss": 0.5027, "step": 1175 }, { "epoch": 0.11956079707198047, "grad_norm": 0.4320659041404724, "learning_rate": 2.39121594143961e-06, "loss": 0.4567, "step": 1176 }, { "epoch": 0.11966246441642944, "grad_norm": 0.4324432611465454, "learning_rate": 2.393249288328589e-06, "loss": 0.4611, "step": 1177 }, { "epoch": 0.11976413176087841, "grad_norm": 0.4405266344547272, "learning_rate": 2.3952826352175684e-06, "loss": 0.4287, "step": 1178 }, { "epoch": 0.11986579910532737, "grad_norm": 0.44933706521987915, "learning_rate": 2.3973159821065475e-06, "loss": 0.428, "step": 1179 }, { "epoch": 0.11996746644977634, "grad_norm": 0.44347327947616577, "learning_rate": 2.399349328995527e-06, "loss": 0.4271, "step": 1180 }, { "epoch": 0.12006913379422529, "grad_norm": 0.413217157125473, "learning_rate": 2.401382675884506e-06, "loss": 0.4279, "step": 1181 }, { "epoch": 0.12017080113867426, "grad_norm": 0.39944586157798767, "learning_rate": 2.4034160227734855e-06, "loss": 0.4232, "step": 1182 }, { "epoch": 0.12027246848312322, "grad_norm": 0.40007415413856506, "learning_rate": 2.4054493696624646e-06, "loss": 0.4489, "step": 1183 }, { "epoch": 0.12037413582757218, "grad_norm": 0.4521000385284424, "learning_rate": 2.407482716551444e-06, "loss": 0.4518, "step": 1184 }, { "epoch": 0.12047580317202115, "grad_norm": 0.5010474324226379, "learning_rate": 2.409516063440423e-06, "loss": 0.47, "step": 1185 }, { "epoch": 0.12057747051647011, "grad_norm": 0.4273622930049896, "learning_rate": 2.4115494103294022e-06, "loss": 0.4641, "step": 1186 }, { "epoch": 0.12067913786091908, "grad_norm": 0.38766953349113464, "learning_rate": 2.4135827572183817e-06, "loss": 0.422, "step": 1187 }, { "epoch": 0.12078080520536803, "grad_norm": 0.40261539816856384, "learning_rate": 2.4156161041073608e-06, "loss": 0.4395, "step": 1188 }, { "epoch": 0.120882472549817, "grad_norm": 0.4752447307109833, "learning_rate": 2.4176494509963403e-06, "loss": 0.4805, "step": 1189 }, { "epoch": 0.12098413989426596, "grad_norm": 0.551490843296051, "learning_rate": 2.4196827978853193e-06, "loss": 0.458, "step": 1190 }, { "epoch": 0.12108580723871493, "grad_norm": 0.45410212874412537, "learning_rate": 2.421716144774299e-06, "loss": 0.441, "step": 1191 }, { "epoch": 0.12118747458316388, "grad_norm": 0.4674712121486664, "learning_rate": 2.423749491663278e-06, "loss": 0.4546, "step": 1192 }, { "epoch": 0.12128914192761285, "grad_norm": 0.4735391438007355, "learning_rate": 2.4257828385522574e-06, "loss": 0.4341, "step": 1193 }, { "epoch": 0.12139080927206182, "grad_norm": 0.5230594277381897, "learning_rate": 2.4278161854412364e-06, "loss": 0.4417, "step": 1194 }, { "epoch": 0.12149247661651078, "grad_norm": 0.42903298139572144, "learning_rate": 2.4298495323302155e-06, "loss": 0.4743, "step": 1195 }, { "epoch": 0.12159414396095974, "grad_norm": 0.3862263858318329, "learning_rate": 2.431882879219195e-06, "loss": 0.4589, "step": 1196 }, { "epoch": 0.1216958113054087, "grad_norm": 0.4883868396282196, "learning_rate": 2.433916226108174e-06, "loss": 0.478, "step": 1197 }, { "epoch": 0.12179747864985767, "grad_norm": 0.4407711327075958, "learning_rate": 2.4359495729971535e-06, "loss": 0.4342, "step": 1198 }, { "epoch": 0.12189914599430662, "grad_norm": 0.44129011034965515, "learning_rate": 2.4379829198861326e-06, "loss": 0.4313, "step": 1199 }, { "epoch": 0.12200081333875559, "grad_norm": 0.46756550669670105, "learning_rate": 2.440016266775112e-06, "loss": 0.4709, "step": 1200 }, { "epoch": 0.12210248068320455, "grad_norm": 0.3885224461555481, "learning_rate": 2.442049613664091e-06, "loss": 0.4968, "step": 1201 }, { "epoch": 0.12220414802765352, "grad_norm": 0.45384302735328674, "learning_rate": 2.4440829605530706e-06, "loss": 0.4491, "step": 1202 }, { "epoch": 0.12230581537210249, "grad_norm": 0.48885107040405273, "learning_rate": 2.44611630744205e-06, "loss": 0.4928, "step": 1203 }, { "epoch": 0.12240748271655144, "grad_norm": 0.4693854749202728, "learning_rate": 2.448149654331029e-06, "loss": 0.4518, "step": 1204 }, { "epoch": 0.12250915006100041, "grad_norm": 0.47831296920776367, "learning_rate": 2.4501830012200083e-06, "loss": 0.4836, "step": 1205 }, { "epoch": 0.12261081740544937, "grad_norm": 0.42214325070381165, "learning_rate": 2.4522163481089873e-06, "loss": 0.4445, "step": 1206 }, { "epoch": 0.12271248474989833, "grad_norm": 0.5049413442611694, "learning_rate": 2.454249694997967e-06, "loss": 0.4814, "step": 1207 }, { "epoch": 0.12281415209434729, "grad_norm": 0.4481274485588074, "learning_rate": 2.456283041886946e-06, "loss": 0.4328, "step": 1208 }, { "epoch": 0.12291581943879626, "grad_norm": 0.4873949885368347, "learning_rate": 2.4583163887759254e-06, "loss": 0.4806, "step": 1209 }, { "epoch": 0.12301748678324523, "grad_norm": 0.4285384714603424, "learning_rate": 2.460349735664905e-06, "loss": 0.4701, "step": 1210 }, { "epoch": 0.12311915412769418, "grad_norm": 0.42340409755706787, "learning_rate": 2.462383082553884e-06, "loss": 0.4659, "step": 1211 }, { "epoch": 0.12322082147214315, "grad_norm": 0.4458845257759094, "learning_rate": 2.4644164294428634e-06, "loss": 0.4618, "step": 1212 }, { "epoch": 0.12332248881659211, "grad_norm": 0.40556541085243225, "learning_rate": 2.4664497763318425e-06, "loss": 0.4576, "step": 1213 }, { "epoch": 0.12342415616104108, "grad_norm": 0.47225433588027954, "learning_rate": 2.4684831232208215e-06, "loss": 0.4855, "step": 1214 }, { "epoch": 0.12352582350549003, "grad_norm": 0.4401853680610657, "learning_rate": 2.470516470109801e-06, "loss": 0.4538, "step": 1215 }, { "epoch": 0.123627490849939, "grad_norm": 0.45675674080848694, "learning_rate": 2.47254981699878e-06, "loss": 0.4753, "step": 1216 }, { "epoch": 0.12372915819438796, "grad_norm": 0.40815258026123047, "learning_rate": 2.474583163887759e-06, "loss": 0.4416, "step": 1217 }, { "epoch": 0.12383082553883693, "grad_norm": 0.44909408688545227, "learning_rate": 2.4766165107767386e-06, "loss": 0.4662, "step": 1218 }, { "epoch": 0.1239324928832859, "grad_norm": 0.4233357310295105, "learning_rate": 2.478649857665718e-06, "loss": 0.4771, "step": 1219 }, { "epoch": 0.12403416022773485, "grad_norm": 0.39162376523017883, "learning_rate": 2.480683204554697e-06, "loss": 0.4326, "step": 1220 }, { "epoch": 0.12413582757218382, "grad_norm": 0.4016764461994171, "learning_rate": 2.4827165514436767e-06, "loss": 0.4271, "step": 1221 }, { "epoch": 0.12423749491663277, "grad_norm": 0.43641653656959534, "learning_rate": 2.4847498983326557e-06, "loss": 0.4912, "step": 1222 }, { "epoch": 0.12433916226108174, "grad_norm": 0.3872354328632355, "learning_rate": 2.4867832452216352e-06, "loss": 0.4761, "step": 1223 }, { "epoch": 0.1244408296055307, "grad_norm": 0.4470702111721039, "learning_rate": 2.4888165921106143e-06, "loss": 0.433, "step": 1224 }, { "epoch": 0.12454249694997967, "grad_norm": 0.4750601649284363, "learning_rate": 2.4908499389995934e-06, "loss": 0.47, "step": 1225 }, { "epoch": 0.12464416429442864, "grad_norm": 0.41505375504493713, "learning_rate": 2.492883285888573e-06, "loss": 0.4328, "step": 1226 }, { "epoch": 0.12474583163887759, "grad_norm": 0.4788786768913269, "learning_rate": 2.494916632777552e-06, "loss": 0.4694, "step": 1227 }, { "epoch": 0.12484749898332656, "grad_norm": 0.46763792634010315, "learning_rate": 2.4969499796665314e-06, "loss": 0.4339, "step": 1228 }, { "epoch": 0.12494916632777552, "grad_norm": 0.4173242449760437, "learning_rate": 2.4989833265555105e-06, "loss": 0.3961, "step": 1229 }, { "epoch": 0.12505083367222447, "grad_norm": 0.4287344217300415, "learning_rate": 2.5010166734444895e-06, "loss": 0.4575, "step": 1230 }, { "epoch": 0.12515250101667344, "grad_norm": 0.43617868423461914, "learning_rate": 2.503050020333469e-06, "loss": 0.4827, "step": 1231 }, { "epoch": 0.1252541683611224, "grad_norm": 0.46558257937431335, "learning_rate": 2.5050833672224485e-06, "loss": 0.4716, "step": 1232 }, { "epoch": 0.12535583570557138, "grad_norm": 0.4355822503566742, "learning_rate": 2.5071167141114276e-06, "loss": 0.4472, "step": 1233 }, { "epoch": 0.12545750305002035, "grad_norm": 0.5044629573822021, "learning_rate": 2.509150061000407e-06, "loss": 0.5158, "step": 1234 }, { "epoch": 0.1255591703944693, "grad_norm": 0.4419892132282257, "learning_rate": 2.511183407889386e-06, "loss": 0.4287, "step": 1235 }, { "epoch": 0.12566083773891826, "grad_norm": 0.43669191002845764, "learning_rate": 2.513216754778365e-06, "loss": 0.4652, "step": 1236 }, { "epoch": 0.12576250508336723, "grad_norm": 0.4280166029930115, "learning_rate": 2.5152501016673447e-06, "loss": 0.42, "step": 1237 }, { "epoch": 0.1258641724278162, "grad_norm": 0.5101788640022278, "learning_rate": 2.517283448556324e-06, "loss": 0.4743, "step": 1238 }, { "epoch": 0.12596583977226514, "grad_norm": 0.4770960211753845, "learning_rate": 2.519316795445303e-06, "loss": 0.4604, "step": 1239 }, { "epoch": 0.1260675071167141, "grad_norm": 0.44267258048057556, "learning_rate": 2.5213501423342823e-06, "loss": 0.4681, "step": 1240 }, { "epoch": 0.12616917446116307, "grad_norm": 0.4488193988800049, "learning_rate": 2.5233834892232618e-06, "loss": 0.4648, "step": 1241 }, { "epoch": 0.12627084180561204, "grad_norm": 0.49608945846557617, "learning_rate": 2.5254168361122413e-06, "loss": 0.4391, "step": 1242 }, { "epoch": 0.126372509150061, "grad_norm": 0.41317349672317505, "learning_rate": 2.5274501830012203e-06, "loss": 0.426, "step": 1243 }, { "epoch": 0.12647417649450995, "grad_norm": 0.43366172909736633, "learning_rate": 2.5294835298901994e-06, "loss": 0.4605, "step": 1244 }, { "epoch": 0.12657584383895892, "grad_norm": 0.44946572184562683, "learning_rate": 2.531516876779179e-06, "loss": 0.4777, "step": 1245 }, { "epoch": 0.1266775111834079, "grad_norm": 0.4401210844516754, "learning_rate": 2.533550223668158e-06, "loss": 0.4561, "step": 1246 }, { "epoch": 0.12677917852785686, "grad_norm": 0.5112131237983704, "learning_rate": 2.5355835705571374e-06, "loss": 0.5035, "step": 1247 }, { "epoch": 0.1268808458723058, "grad_norm": 0.4083808362483978, "learning_rate": 2.537616917446116e-06, "loss": 0.4782, "step": 1248 }, { "epoch": 0.12698251321675477, "grad_norm": 0.4654710590839386, "learning_rate": 2.5396502643350956e-06, "loss": 0.4426, "step": 1249 }, { "epoch": 0.12708418056120374, "grad_norm": 0.5030589699745178, "learning_rate": 2.541683611224075e-06, "loss": 0.4475, "step": 1250 }, { "epoch": 0.1271858479056527, "grad_norm": 0.40744394063949585, "learning_rate": 2.5437169581130545e-06, "loss": 0.4516, "step": 1251 }, { "epoch": 0.12728751525010168, "grad_norm": 0.398817241191864, "learning_rate": 2.5457503050020336e-06, "loss": 0.454, "step": 1252 }, { "epoch": 0.12738918259455062, "grad_norm": 0.4745034873485565, "learning_rate": 2.5477836518910127e-06, "loss": 0.4932, "step": 1253 }, { "epoch": 0.1274908499389996, "grad_norm": 0.4487113952636719, "learning_rate": 2.549816998779992e-06, "loss": 0.4631, "step": 1254 }, { "epoch": 0.12759251728344856, "grad_norm": 0.44526052474975586, "learning_rate": 2.5518503456689712e-06, "loss": 0.401, "step": 1255 }, { "epoch": 0.12769418462789753, "grad_norm": 0.4280645549297333, "learning_rate": 2.5538836925579507e-06, "loss": 0.4698, "step": 1256 }, { "epoch": 0.12779585197234647, "grad_norm": 0.47687336802482605, "learning_rate": 2.5559170394469298e-06, "loss": 0.4357, "step": 1257 }, { "epoch": 0.12789751931679544, "grad_norm": 0.4307197630405426, "learning_rate": 2.557950386335909e-06, "loss": 0.4581, "step": 1258 }, { "epoch": 0.1279991866612444, "grad_norm": 0.426239937543869, "learning_rate": 2.5599837332248883e-06, "loss": 0.4638, "step": 1259 }, { "epoch": 0.12810085400569338, "grad_norm": 0.4530990421772003, "learning_rate": 2.562017080113868e-06, "loss": 0.4267, "step": 1260 }, { "epoch": 0.12820252135014235, "grad_norm": 0.4439399838447571, "learning_rate": 2.5640504270028473e-06, "loss": 0.4352, "step": 1261 }, { "epoch": 0.1283041886945913, "grad_norm": 0.5250755548477173, "learning_rate": 2.566083773891826e-06, "loss": 0.438, "step": 1262 }, { "epoch": 0.12840585603904026, "grad_norm": 0.44611889123916626, "learning_rate": 2.5681171207808054e-06, "loss": 0.4551, "step": 1263 }, { "epoch": 0.12850752338348922, "grad_norm": 0.4563615918159485, "learning_rate": 2.570150467669785e-06, "loss": 0.4705, "step": 1264 }, { "epoch": 0.1286091907279382, "grad_norm": 0.49736374616622925, "learning_rate": 2.572183814558764e-06, "loss": 0.4568, "step": 1265 }, { "epoch": 0.12871085807238714, "grad_norm": 0.4922572374343872, "learning_rate": 2.574217161447743e-06, "loss": 0.4682, "step": 1266 }, { "epoch": 0.1288125254168361, "grad_norm": 0.4475443363189697, "learning_rate": 2.576250508336722e-06, "loss": 0.4767, "step": 1267 }, { "epoch": 0.12891419276128507, "grad_norm": 0.4772830307483673, "learning_rate": 2.5782838552257016e-06, "loss": 0.4653, "step": 1268 }, { "epoch": 0.12901586010573404, "grad_norm": 0.43002668023109436, "learning_rate": 2.580317202114681e-06, "loss": 0.4696, "step": 1269 }, { "epoch": 0.129117527450183, "grad_norm": 0.4334867596626282, "learning_rate": 2.5823505490036606e-06, "loss": 0.499, "step": 1270 }, { "epoch": 0.12921919479463195, "grad_norm": 0.48639175295829773, "learning_rate": 2.5843838958926392e-06, "loss": 0.4458, "step": 1271 }, { "epoch": 0.12932086213908092, "grad_norm": 0.43199092149734497, "learning_rate": 2.5864172427816187e-06, "loss": 0.4701, "step": 1272 }, { "epoch": 0.1294225294835299, "grad_norm": 0.43034231662750244, "learning_rate": 2.588450589670598e-06, "loss": 0.4611, "step": 1273 }, { "epoch": 0.12952419682797886, "grad_norm": 0.45593222975730896, "learning_rate": 2.5904839365595773e-06, "loss": 0.472, "step": 1274 }, { "epoch": 0.12962586417242783, "grad_norm": 0.4255603551864624, "learning_rate": 2.5925172834485567e-06, "loss": 0.4507, "step": 1275 }, { "epoch": 0.12972753151687677, "grad_norm": 0.43339383602142334, "learning_rate": 2.594550630337536e-06, "loss": 0.4218, "step": 1276 }, { "epoch": 0.12982919886132574, "grad_norm": 0.44578230381011963, "learning_rate": 2.596583977226515e-06, "loss": 0.4187, "step": 1277 }, { "epoch": 0.1299308662057747, "grad_norm": 0.42503622174263, "learning_rate": 2.5986173241154944e-06, "loss": 0.4521, "step": 1278 }, { "epoch": 0.13003253355022368, "grad_norm": 0.5111249089241028, "learning_rate": 2.600650671004474e-06, "loss": 0.4492, "step": 1279 }, { "epoch": 0.13013420089467262, "grad_norm": 0.48479709029197693, "learning_rate": 2.6026840178934525e-06, "loss": 0.4454, "step": 1280 }, { "epoch": 0.1302358682391216, "grad_norm": 0.4760638177394867, "learning_rate": 2.604717364782432e-06, "loss": 0.4707, "step": 1281 }, { "epoch": 0.13033753558357056, "grad_norm": 0.4470771253108978, "learning_rate": 2.6067507116714115e-06, "loss": 0.4738, "step": 1282 }, { "epoch": 0.13043920292801953, "grad_norm": 0.5367510914802551, "learning_rate": 2.6087840585603905e-06, "loss": 0.4639, "step": 1283 }, { "epoch": 0.1305408702724685, "grad_norm": 0.4415576159954071, "learning_rate": 2.61081740544937e-06, "loss": 0.4517, "step": 1284 }, { "epoch": 0.13064253761691744, "grad_norm": 0.43358737230300903, "learning_rate": 2.612850752338349e-06, "loss": 0.4451, "step": 1285 }, { "epoch": 0.1307442049613664, "grad_norm": 0.40090879797935486, "learning_rate": 2.614884099227328e-06, "loss": 0.4312, "step": 1286 }, { "epoch": 0.13084587230581537, "grad_norm": 0.4714467525482178, "learning_rate": 2.6169174461163076e-06, "loss": 0.4422, "step": 1287 }, { "epoch": 0.13094753965026434, "grad_norm": 0.5075427293777466, "learning_rate": 2.618950793005287e-06, "loss": 0.4921, "step": 1288 }, { "epoch": 0.13104920699471329, "grad_norm": 0.4397391378879547, "learning_rate": 2.6209841398942658e-06, "loss": 0.4556, "step": 1289 }, { "epoch": 0.13115087433916225, "grad_norm": 0.402925580739975, "learning_rate": 2.6230174867832453e-06, "loss": 0.4739, "step": 1290 }, { "epoch": 0.13125254168361122, "grad_norm": 0.4544444680213928, "learning_rate": 2.6250508336722247e-06, "loss": 0.4265, "step": 1291 }, { "epoch": 0.1313542090280602, "grad_norm": 0.5103829503059387, "learning_rate": 2.6270841805612042e-06, "loss": 0.4712, "step": 1292 }, { "epoch": 0.13145587637250916, "grad_norm": 0.5012019276618958, "learning_rate": 2.6291175274501833e-06, "loss": 0.4927, "step": 1293 }, { "epoch": 0.1315575437169581, "grad_norm": 0.42981189489364624, "learning_rate": 2.6311508743391624e-06, "loss": 0.4621, "step": 1294 }, { "epoch": 0.13165921106140707, "grad_norm": 0.491060733795166, "learning_rate": 2.633184221228142e-06, "loss": 0.4329, "step": 1295 }, { "epoch": 0.13176087840585604, "grad_norm": 0.45893043279647827, "learning_rate": 2.635217568117121e-06, "loss": 0.4224, "step": 1296 }, { "epoch": 0.131862545750305, "grad_norm": 0.4251467287540436, "learning_rate": 2.6372509150061004e-06, "loss": 0.4702, "step": 1297 }, { "epoch": 0.13196421309475395, "grad_norm": 0.45025357604026794, "learning_rate": 2.639284261895079e-06, "loss": 0.4509, "step": 1298 }, { "epoch": 0.13206588043920292, "grad_norm": 0.47174668312072754, "learning_rate": 2.6413176087840585e-06, "loss": 0.4454, "step": 1299 }, { "epoch": 0.1321675477836519, "grad_norm": 0.4659214913845062, "learning_rate": 2.643350955673038e-06, "loss": 0.4427, "step": 1300 }, { "epoch": 0.13226921512810086, "grad_norm": 0.4354251027107239, "learning_rate": 2.6453843025620175e-06, "loss": 0.4694, "step": 1301 }, { "epoch": 0.13237088247254983, "grad_norm": 0.42185088992118835, "learning_rate": 2.6474176494509966e-06, "loss": 0.4463, "step": 1302 }, { "epoch": 0.13247254981699877, "grad_norm": 0.4877132773399353, "learning_rate": 2.6494509963399756e-06, "loss": 0.4655, "step": 1303 }, { "epoch": 0.13257421716144774, "grad_norm": 0.45629480481147766, "learning_rate": 2.651484343228955e-06, "loss": 0.4535, "step": 1304 }, { "epoch": 0.1326758845058967, "grad_norm": 0.4937935173511505, "learning_rate": 2.653517690117934e-06, "loss": 0.4399, "step": 1305 }, { "epoch": 0.13277755185034568, "grad_norm": 0.4727637469768524, "learning_rate": 2.6555510370069137e-06, "loss": 0.4361, "step": 1306 }, { "epoch": 0.13287921919479465, "grad_norm": 0.4255765378475189, "learning_rate": 2.657584383895893e-06, "loss": 0.4582, "step": 1307 }, { "epoch": 0.1329808865392436, "grad_norm": 0.4521445035934448, "learning_rate": 2.659617730784872e-06, "loss": 0.4444, "step": 1308 }, { "epoch": 0.13308255388369256, "grad_norm": 0.4493958055973053, "learning_rate": 2.6616510776738513e-06, "loss": 0.4695, "step": 1309 }, { "epoch": 0.13318422122814152, "grad_norm": 0.43139857053756714, "learning_rate": 2.6636844245628308e-06, "loss": 0.4077, "step": 1310 }, { "epoch": 0.1332858885725905, "grad_norm": 0.42649394273757935, "learning_rate": 2.6657177714518103e-06, "loss": 0.4736, "step": 1311 }, { "epoch": 0.13338755591703944, "grad_norm": 0.45345693826675415, "learning_rate": 2.667751118340789e-06, "loss": 0.4397, "step": 1312 }, { "epoch": 0.1334892232614884, "grad_norm": 0.4464998245239258, "learning_rate": 2.6697844652297684e-06, "loss": 0.4907, "step": 1313 }, { "epoch": 0.13359089060593737, "grad_norm": 0.41941386461257935, "learning_rate": 2.671817812118748e-06, "loss": 0.4335, "step": 1314 }, { "epoch": 0.13369255795038634, "grad_norm": 0.4177592694759369, "learning_rate": 2.673851159007727e-06, "loss": 0.4565, "step": 1315 }, { "epoch": 0.1337942252948353, "grad_norm": 0.46642792224884033, "learning_rate": 2.6758845058967064e-06, "loss": 0.4278, "step": 1316 }, { "epoch": 0.13389589263928425, "grad_norm": 0.5000512599945068, "learning_rate": 2.677917852785685e-06, "loss": 0.4263, "step": 1317 }, { "epoch": 0.13399755998373322, "grad_norm": 0.4252658188343048, "learning_rate": 2.6799511996746646e-06, "loss": 0.4578, "step": 1318 }, { "epoch": 0.1340992273281822, "grad_norm": 0.4902384579181671, "learning_rate": 2.681984546563644e-06, "loss": 0.459, "step": 1319 }, { "epoch": 0.13420089467263116, "grad_norm": 0.47961777448654175, "learning_rate": 2.6840178934526235e-06, "loss": 0.4235, "step": 1320 }, { "epoch": 0.1343025620170801, "grad_norm": 0.4567233920097351, "learning_rate": 2.686051240341602e-06, "loss": 0.4469, "step": 1321 }, { "epoch": 0.13440422936152907, "grad_norm": 0.40064918994903564, "learning_rate": 2.6880845872305817e-06, "loss": 0.4356, "step": 1322 }, { "epoch": 0.13450589670597804, "grad_norm": 0.4459376931190491, "learning_rate": 2.690117934119561e-06, "loss": 0.4452, "step": 1323 }, { "epoch": 0.134607564050427, "grad_norm": 0.46685442328453064, "learning_rate": 2.6921512810085402e-06, "loss": 0.4749, "step": 1324 }, { "epoch": 0.13470923139487598, "grad_norm": 0.47472602128982544, "learning_rate": 2.6941846278975197e-06, "loss": 0.4576, "step": 1325 }, { "epoch": 0.13481089873932492, "grad_norm": 0.49437275528907776, "learning_rate": 2.6962179747864988e-06, "loss": 0.4264, "step": 1326 }, { "epoch": 0.1349125660837739, "grad_norm": 0.4248851537704468, "learning_rate": 2.698251321675478e-06, "loss": 0.4622, "step": 1327 }, { "epoch": 0.13501423342822286, "grad_norm": 0.457780659198761, "learning_rate": 2.7002846685644573e-06, "loss": 0.4648, "step": 1328 }, { "epoch": 0.13511590077267183, "grad_norm": 0.4411025643348694, "learning_rate": 2.702318015453437e-06, "loss": 0.4524, "step": 1329 }, { "epoch": 0.13521756811712077, "grad_norm": 0.4138345420360565, "learning_rate": 2.7043513623424155e-06, "loss": 0.4324, "step": 1330 }, { "epoch": 0.13531923546156974, "grad_norm": 0.45089393854141235, "learning_rate": 2.706384709231395e-06, "loss": 0.4643, "step": 1331 }, { "epoch": 0.1354209028060187, "grad_norm": 0.40472832322120667, "learning_rate": 2.7084180561203744e-06, "loss": 0.4585, "step": 1332 }, { "epoch": 0.13552257015046767, "grad_norm": 0.4390346109867096, "learning_rate": 2.710451403009354e-06, "loss": 0.4483, "step": 1333 }, { "epoch": 0.13562423749491664, "grad_norm": 0.38604506850242615, "learning_rate": 2.712484749898333e-06, "loss": 0.4446, "step": 1334 }, { "epoch": 0.13572590483936559, "grad_norm": 0.4418511986732483, "learning_rate": 2.714518096787312e-06, "loss": 0.4598, "step": 1335 }, { "epoch": 0.13582757218381455, "grad_norm": 0.46431034803390503, "learning_rate": 2.716551443676291e-06, "loss": 0.4756, "step": 1336 }, { "epoch": 0.13592923952826352, "grad_norm": 0.4329689145088196, "learning_rate": 2.7185847905652706e-06, "loss": 0.5045, "step": 1337 }, { "epoch": 0.1360309068727125, "grad_norm": 0.4276464283466339, "learning_rate": 2.72061813745425e-06, "loss": 0.4416, "step": 1338 }, { "epoch": 0.13613257421716143, "grad_norm": 0.41415882110595703, "learning_rate": 2.7226514843432287e-06, "loss": 0.4606, "step": 1339 }, { "epoch": 0.1362342415616104, "grad_norm": 0.43342339992523193, "learning_rate": 2.7246848312322082e-06, "loss": 0.4752, "step": 1340 }, { "epoch": 0.13633590890605937, "grad_norm": 0.4300815463066101, "learning_rate": 2.7267181781211877e-06, "loss": 0.4721, "step": 1341 }, { "epoch": 0.13643757625050834, "grad_norm": 0.40079906582832336, "learning_rate": 2.728751525010167e-06, "loss": 0.4498, "step": 1342 }, { "epoch": 0.1365392435949573, "grad_norm": 0.4136153757572174, "learning_rate": 2.7307848718991463e-06, "loss": 0.4655, "step": 1343 }, { "epoch": 0.13664091093940625, "grad_norm": 0.45025119185447693, "learning_rate": 2.7328182187881253e-06, "loss": 0.4746, "step": 1344 }, { "epoch": 0.13674257828385522, "grad_norm": 0.4160768389701843, "learning_rate": 2.734851565677105e-06, "loss": 0.4491, "step": 1345 }, { "epoch": 0.1368442456283042, "grad_norm": 0.4254283607006073, "learning_rate": 2.736884912566084e-06, "loss": 0.4319, "step": 1346 }, { "epoch": 0.13694591297275316, "grad_norm": 0.42087191343307495, "learning_rate": 2.7389182594550634e-06, "loss": 0.464, "step": 1347 }, { "epoch": 0.13704758031720213, "grad_norm": 0.4399558901786804, "learning_rate": 2.740951606344043e-06, "loss": 0.4247, "step": 1348 }, { "epoch": 0.13714924766165107, "grad_norm": 0.4659210443496704, "learning_rate": 2.7429849532330215e-06, "loss": 0.4744, "step": 1349 }, { "epoch": 0.13725091500610004, "grad_norm": 0.5346238613128662, "learning_rate": 2.745018300122001e-06, "loss": 0.4316, "step": 1350 }, { "epoch": 0.137352582350549, "grad_norm": 0.4123973548412323, "learning_rate": 2.7470516470109805e-06, "loss": 0.455, "step": 1351 }, { "epoch": 0.13745424969499798, "grad_norm": 0.4053090512752533, "learning_rate": 2.74908499389996e-06, "loss": 0.4245, "step": 1352 }, { "epoch": 0.13755591703944692, "grad_norm": 0.43811148405075073, "learning_rate": 2.7511183407889386e-06, "loss": 0.4761, "step": 1353 }, { "epoch": 0.1376575843838959, "grad_norm": 0.4259202182292938, "learning_rate": 2.753151687677918e-06, "loss": 0.4582, "step": 1354 }, { "epoch": 0.13775925172834486, "grad_norm": 0.42637431621551514, "learning_rate": 2.755185034566897e-06, "loss": 0.4482, "step": 1355 }, { "epoch": 0.13786091907279382, "grad_norm": 0.4544549286365509, "learning_rate": 2.7572183814558766e-06, "loss": 0.4967, "step": 1356 }, { "epoch": 0.1379625864172428, "grad_norm": 0.4133221507072449, "learning_rate": 2.759251728344856e-06, "loss": 0.4537, "step": 1357 }, { "epoch": 0.13806425376169174, "grad_norm": 0.4119890332221985, "learning_rate": 2.7612850752338348e-06, "loss": 0.4487, "step": 1358 }, { "epoch": 0.1381659211061407, "grad_norm": 0.44272544980049133, "learning_rate": 2.7633184221228143e-06, "loss": 0.4508, "step": 1359 }, { "epoch": 0.13826758845058967, "grad_norm": 0.42408689856529236, "learning_rate": 2.7653517690117938e-06, "loss": 0.4845, "step": 1360 }, { "epoch": 0.13836925579503864, "grad_norm": 0.3866632282733917, "learning_rate": 2.7673851159007732e-06, "loss": 0.4253, "step": 1361 }, { "epoch": 0.13847092313948758, "grad_norm": 0.4226381182670593, "learning_rate": 2.769418462789752e-06, "loss": 0.4518, "step": 1362 }, { "epoch": 0.13857259048393655, "grad_norm": 0.4037894904613495, "learning_rate": 2.7714518096787314e-06, "loss": 0.433, "step": 1363 }, { "epoch": 0.13867425782838552, "grad_norm": 0.4395843744277954, "learning_rate": 2.773485156567711e-06, "loss": 0.4351, "step": 1364 }, { "epoch": 0.1387759251728345, "grad_norm": 0.4099331796169281, "learning_rate": 2.77551850345669e-06, "loss": 0.4398, "step": 1365 }, { "epoch": 0.13887759251728346, "grad_norm": 0.39396151900291443, "learning_rate": 2.7775518503456694e-06, "loss": 0.418, "step": 1366 }, { "epoch": 0.1389792598617324, "grad_norm": 0.39716970920562744, "learning_rate": 2.779585197234648e-06, "loss": 0.4405, "step": 1367 }, { "epoch": 0.13908092720618137, "grad_norm": 0.4313790202140808, "learning_rate": 2.7816185441236275e-06, "loss": 0.4702, "step": 1368 }, { "epoch": 0.13918259455063034, "grad_norm": 0.4520091712474823, "learning_rate": 2.783651891012607e-06, "loss": 0.4778, "step": 1369 }, { "epoch": 0.1392842618950793, "grad_norm": 0.4602294862270355, "learning_rate": 2.7856852379015865e-06, "loss": 0.4293, "step": 1370 }, { "epoch": 0.13938592923952825, "grad_norm": 0.4687252938747406, "learning_rate": 2.787718584790565e-06, "loss": 0.4551, "step": 1371 }, { "epoch": 0.13948759658397722, "grad_norm": 0.42047742009162903, "learning_rate": 2.7897519316795446e-06, "loss": 0.4505, "step": 1372 }, { "epoch": 0.1395892639284262, "grad_norm": 0.38718104362487793, "learning_rate": 2.791785278568524e-06, "loss": 0.4251, "step": 1373 }, { "epoch": 0.13969093127287516, "grad_norm": 0.4094961881637573, "learning_rate": 2.793818625457503e-06, "loss": 0.4248, "step": 1374 }, { "epoch": 0.13979259861732413, "grad_norm": 0.404663622379303, "learning_rate": 2.7958519723464827e-06, "loss": 0.4241, "step": 1375 }, { "epoch": 0.13989426596177307, "grad_norm": 0.44527289271354675, "learning_rate": 2.7978853192354617e-06, "loss": 0.4252, "step": 1376 }, { "epoch": 0.13999593330622204, "grad_norm": 0.423064261674881, "learning_rate": 2.799918666124441e-06, "loss": 0.4497, "step": 1377 }, { "epoch": 0.140097600650671, "grad_norm": 0.45223748683929443, "learning_rate": 2.8019520130134203e-06, "loss": 0.4658, "step": 1378 }, { "epoch": 0.14019926799511997, "grad_norm": 0.42647290229797363, "learning_rate": 2.8039853599024e-06, "loss": 0.4588, "step": 1379 }, { "epoch": 0.14030093533956894, "grad_norm": 0.41116684675216675, "learning_rate": 2.8060187067913793e-06, "loss": 0.4455, "step": 1380 }, { "epoch": 0.14040260268401789, "grad_norm": 0.44613608717918396, "learning_rate": 2.808052053680358e-06, "loss": 0.4199, "step": 1381 }, { "epoch": 0.14050427002846685, "grad_norm": 0.451973557472229, "learning_rate": 2.8100854005693374e-06, "loss": 0.4574, "step": 1382 }, { "epoch": 0.14060593737291582, "grad_norm": 0.4601975977420807, "learning_rate": 2.812118747458317e-06, "loss": 0.4949, "step": 1383 }, { "epoch": 0.1407076047173648, "grad_norm": 0.46744784712791443, "learning_rate": 2.814152094347296e-06, "loss": 0.5092, "step": 1384 }, { "epoch": 0.14080927206181373, "grad_norm": 0.44436943531036377, "learning_rate": 2.816185441236275e-06, "loss": 0.4493, "step": 1385 }, { "epoch": 0.1409109394062627, "grad_norm": 0.45904484391212463, "learning_rate": 2.818218788125254e-06, "loss": 0.4656, "step": 1386 }, { "epoch": 0.14101260675071167, "grad_norm": 0.5300613045692444, "learning_rate": 2.8202521350142336e-06, "loss": 0.4944, "step": 1387 }, { "epoch": 0.14111427409516064, "grad_norm": 0.4282524883747101, "learning_rate": 2.822285481903213e-06, "loss": 0.4652, "step": 1388 }, { "epoch": 0.1412159414396096, "grad_norm": 0.4250996708869934, "learning_rate": 2.8243188287921926e-06, "loss": 0.4469, "step": 1389 }, { "epoch": 0.14131760878405855, "grad_norm": 0.47121328115463257, "learning_rate": 2.826352175681171e-06, "loss": 0.4426, "step": 1390 }, { "epoch": 0.14141927612850752, "grad_norm": 0.5030543208122253, "learning_rate": 2.8283855225701507e-06, "loss": 0.4716, "step": 1391 }, { "epoch": 0.1415209434729565, "grad_norm": 0.44112834334373474, "learning_rate": 2.83041886945913e-06, "loss": 0.4784, "step": 1392 }, { "epoch": 0.14162261081740546, "grad_norm": 0.44680023193359375, "learning_rate": 2.8324522163481092e-06, "loss": 0.4935, "step": 1393 }, { "epoch": 0.1417242781618544, "grad_norm": 0.4567418694496155, "learning_rate": 2.8344855632370883e-06, "loss": 0.4345, "step": 1394 }, { "epoch": 0.14182594550630337, "grad_norm": 0.49121302366256714, "learning_rate": 2.8365189101260678e-06, "loss": 0.4583, "step": 1395 }, { "epoch": 0.14192761285075234, "grad_norm": 0.43096277117729187, "learning_rate": 2.838552257015047e-06, "loss": 0.4776, "step": 1396 }, { "epoch": 0.1420292801952013, "grad_norm": 0.4394112229347229, "learning_rate": 2.8405856039040263e-06, "loss": 0.4595, "step": 1397 }, { "epoch": 0.14213094753965028, "grad_norm": 0.46008700132369995, "learning_rate": 2.842618950793006e-06, "loss": 0.4179, "step": 1398 }, { "epoch": 0.14223261488409922, "grad_norm": 0.4399815499782562, "learning_rate": 2.8446522976819845e-06, "loss": 0.4285, "step": 1399 }, { "epoch": 0.1423342822285482, "grad_norm": 0.4639195501804352, "learning_rate": 2.846685644570964e-06, "loss": 0.4978, "step": 1400 }, { "epoch": 0.14243594957299716, "grad_norm": 0.4485456943511963, "learning_rate": 2.8487189914599434e-06, "loss": 0.4765, "step": 1401 }, { "epoch": 0.14253761691744612, "grad_norm": 0.41283470392227173, "learning_rate": 2.850752338348923e-06, "loss": 0.408, "step": 1402 }, { "epoch": 0.14263928426189507, "grad_norm": 0.43284597992897034, "learning_rate": 2.8527856852379016e-06, "loss": 0.4499, "step": 1403 }, { "epoch": 0.14274095160634404, "grad_norm": 0.431179016828537, "learning_rate": 2.854819032126881e-06, "loss": 0.4509, "step": 1404 }, { "epoch": 0.142842618950793, "grad_norm": 0.4248412251472473, "learning_rate": 2.85685237901586e-06, "loss": 0.4627, "step": 1405 }, { "epoch": 0.14294428629524197, "grad_norm": 0.43378233909606934, "learning_rate": 2.8588857259048396e-06, "loss": 0.4644, "step": 1406 }, { "epoch": 0.14304595363969094, "grad_norm": 0.45351642370224, "learning_rate": 2.860919072793819e-06, "loss": 0.4174, "step": 1407 }, { "epoch": 0.14314762098413988, "grad_norm": 0.44974514842033386, "learning_rate": 2.8629524196827977e-06, "loss": 0.4287, "step": 1408 }, { "epoch": 0.14324928832858885, "grad_norm": 0.4233833849430084, "learning_rate": 2.8649857665717772e-06, "loss": 0.4208, "step": 1409 }, { "epoch": 0.14335095567303782, "grad_norm": 0.46832412481307983, "learning_rate": 2.8670191134607567e-06, "loss": 0.4638, "step": 1410 }, { "epoch": 0.1434526230174868, "grad_norm": 0.4259045422077179, "learning_rate": 2.869052460349736e-06, "loss": 0.4461, "step": 1411 }, { "epoch": 0.14355429036193573, "grad_norm": 0.3972061574459076, "learning_rate": 2.871085807238715e-06, "loss": 0.423, "step": 1412 }, { "epoch": 0.1436559577063847, "grad_norm": 0.4697939455509186, "learning_rate": 2.8731191541276943e-06, "loss": 0.4734, "step": 1413 }, { "epoch": 0.14375762505083367, "grad_norm": 0.4948234260082245, "learning_rate": 2.875152501016674e-06, "loss": 0.4647, "step": 1414 }, { "epoch": 0.14385929239528264, "grad_norm": 0.4350748062133789, "learning_rate": 2.877185847905653e-06, "loss": 0.442, "step": 1415 }, { "epoch": 0.1439609597397316, "grad_norm": 0.42974165081977844, "learning_rate": 2.8792191947946324e-06, "loss": 0.4751, "step": 1416 }, { "epoch": 0.14406262708418055, "grad_norm": 0.40473833680152893, "learning_rate": 2.8812525416836114e-06, "loss": 0.4574, "step": 1417 }, { "epoch": 0.14416429442862952, "grad_norm": 0.4174473285675049, "learning_rate": 2.8832858885725905e-06, "loss": 0.4423, "step": 1418 }, { "epoch": 0.1442659617730785, "grad_norm": 0.45466941595077515, "learning_rate": 2.88531923546157e-06, "loss": 0.4275, "step": 1419 }, { "epoch": 0.14436762911752746, "grad_norm": 0.4290061295032501, "learning_rate": 2.8873525823505495e-06, "loss": 0.4272, "step": 1420 }, { "epoch": 0.14446929646197643, "grad_norm": 0.4480822682380676, "learning_rate": 2.889385929239529e-06, "loss": 0.4445, "step": 1421 }, { "epoch": 0.14457096380642537, "grad_norm": 0.43742862343788147, "learning_rate": 2.8914192761285076e-06, "loss": 0.4742, "step": 1422 }, { "epoch": 0.14467263115087434, "grad_norm": 0.4474250376224518, "learning_rate": 2.893452623017487e-06, "loss": 0.4453, "step": 1423 }, { "epoch": 0.1447742984953233, "grad_norm": 0.44515863060951233, "learning_rate": 2.895485969906466e-06, "loss": 0.4463, "step": 1424 }, { "epoch": 0.14487596583977227, "grad_norm": 0.4461544454097748, "learning_rate": 2.8975193167954457e-06, "loss": 0.4118, "step": 1425 }, { "epoch": 0.14497763318422122, "grad_norm": 0.45336028933525085, "learning_rate": 2.8995526636844247e-06, "loss": 0.4675, "step": 1426 }, { "epoch": 0.14507930052867019, "grad_norm": 0.4410496652126312, "learning_rate": 2.9015860105734038e-06, "loss": 0.4706, "step": 1427 }, { "epoch": 0.14518096787311915, "grad_norm": 0.42255541682243347, "learning_rate": 2.9036193574623833e-06, "loss": 0.451, "step": 1428 }, { "epoch": 0.14528263521756812, "grad_norm": 0.43764933943748474, "learning_rate": 2.9056527043513628e-06, "loss": 0.4872, "step": 1429 }, { "epoch": 0.1453843025620171, "grad_norm": 0.4435217082500458, "learning_rate": 2.9076860512403422e-06, "loss": 0.4521, "step": 1430 }, { "epoch": 0.14548596990646603, "grad_norm": 0.4649190306663513, "learning_rate": 2.909719398129321e-06, "loss": 0.4663, "step": 1431 }, { "epoch": 0.145587637250915, "grad_norm": 0.4403851330280304, "learning_rate": 2.9117527450183004e-06, "loss": 0.4492, "step": 1432 }, { "epoch": 0.14568930459536397, "grad_norm": 0.48271679878234863, "learning_rate": 2.91378609190728e-06, "loss": 0.4681, "step": 1433 }, { "epoch": 0.14579097193981294, "grad_norm": 0.4388434886932373, "learning_rate": 2.915819438796259e-06, "loss": 0.4448, "step": 1434 }, { "epoch": 0.14589263928426188, "grad_norm": 0.5116831064224243, "learning_rate": 2.917852785685238e-06, "loss": 0.4353, "step": 1435 }, { "epoch": 0.14599430662871085, "grad_norm": 0.4412594735622406, "learning_rate": 2.9198861325742175e-06, "loss": 0.4902, "step": 1436 }, { "epoch": 0.14609597397315982, "grad_norm": 0.4197998642921448, "learning_rate": 2.9219194794631965e-06, "loss": 0.4605, "step": 1437 }, { "epoch": 0.1461976413176088, "grad_norm": 0.5088281631469727, "learning_rate": 2.923952826352176e-06, "loss": 0.4609, "step": 1438 }, { "epoch": 0.14629930866205776, "grad_norm": 0.4637308120727539, "learning_rate": 2.9259861732411555e-06, "loss": 0.4356, "step": 1439 }, { "epoch": 0.1464009760065067, "grad_norm": 0.4085618555545807, "learning_rate": 2.928019520130134e-06, "loss": 0.4766, "step": 1440 }, { "epoch": 0.14650264335095567, "grad_norm": 0.4256598949432373, "learning_rate": 2.9300528670191136e-06, "loss": 0.4384, "step": 1441 }, { "epoch": 0.14660431069540464, "grad_norm": 0.45945799350738525, "learning_rate": 2.932086213908093e-06, "loss": 0.4407, "step": 1442 }, { "epoch": 0.1467059780398536, "grad_norm": 0.44671550393104553, "learning_rate": 2.934119560797072e-06, "loss": 0.4515, "step": 1443 }, { "epoch": 0.14680764538430255, "grad_norm": 0.49148690700531006, "learning_rate": 2.9361529076860513e-06, "loss": 0.4272, "step": 1444 }, { "epoch": 0.14690931272875152, "grad_norm": 0.48350512981414795, "learning_rate": 2.9381862545750308e-06, "loss": 0.4577, "step": 1445 }, { "epoch": 0.1470109800732005, "grad_norm": 0.4151526391506195, "learning_rate": 2.94021960146401e-06, "loss": 0.4458, "step": 1446 }, { "epoch": 0.14711264741764946, "grad_norm": 0.42530280351638794, "learning_rate": 2.9422529483529893e-06, "loss": 0.4728, "step": 1447 }, { "epoch": 0.14721431476209842, "grad_norm": 0.490001380443573, "learning_rate": 2.944286295241969e-06, "loss": 0.502, "step": 1448 }, { "epoch": 0.14731598210654737, "grad_norm": 0.4667587876319885, "learning_rate": 2.9463196421309474e-06, "loss": 0.4271, "step": 1449 }, { "epoch": 0.14741764945099634, "grad_norm": 0.42037761211395264, "learning_rate": 2.948352989019927e-06, "loss": 0.4552, "step": 1450 }, { "epoch": 0.1475193167954453, "grad_norm": 0.4164193272590637, "learning_rate": 2.9503863359089064e-06, "loss": 0.4377, "step": 1451 }, { "epoch": 0.14762098413989427, "grad_norm": 0.4285193383693695, "learning_rate": 2.952419682797886e-06, "loss": 0.4677, "step": 1452 }, { "epoch": 0.14772265148434324, "grad_norm": 0.46018844842910767, "learning_rate": 2.954453029686865e-06, "loss": 0.4407, "step": 1453 }, { "epoch": 0.14782431882879218, "grad_norm": 0.4103016257286072, "learning_rate": 2.956486376575844e-06, "loss": 0.4665, "step": 1454 }, { "epoch": 0.14792598617324115, "grad_norm": 0.4068242609500885, "learning_rate": 2.958519723464823e-06, "loss": 0.4286, "step": 1455 }, { "epoch": 0.14802765351769012, "grad_norm": 0.44538190960884094, "learning_rate": 2.9605530703538026e-06, "loss": 0.4663, "step": 1456 }, { "epoch": 0.1481293208621391, "grad_norm": 0.4777407944202423, "learning_rate": 2.962586417242782e-06, "loss": 0.4477, "step": 1457 }, { "epoch": 0.14823098820658803, "grad_norm": 0.42447027564048767, "learning_rate": 2.9646197641317607e-06, "loss": 0.4397, "step": 1458 }, { "epoch": 0.148332655551037, "grad_norm": 0.4216947853565216, "learning_rate": 2.96665311102074e-06, "loss": 0.4161, "step": 1459 }, { "epoch": 0.14843432289548597, "grad_norm": 0.4063373804092407, "learning_rate": 2.9686864579097197e-06, "loss": 0.4433, "step": 1460 }, { "epoch": 0.14853599023993494, "grad_norm": 0.4467712640762329, "learning_rate": 2.970719804798699e-06, "loss": 0.4848, "step": 1461 }, { "epoch": 0.1486376575843839, "grad_norm": 0.44431787729263306, "learning_rate": 2.9727531516876782e-06, "loss": 0.4191, "step": 1462 }, { "epoch": 0.14873932492883285, "grad_norm": 0.46452265977859497, "learning_rate": 2.9747864985766573e-06, "loss": 0.4864, "step": 1463 }, { "epoch": 0.14884099227328182, "grad_norm": 0.4436097741127014, "learning_rate": 2.976819845465637e-06, "loss": 0.475, "step": 1464 }, { "epoch": 0.1489426596177308, "grad_norm": 0.40526771545410156, "learning_rate": 2.978853192354616e-06, "loss": 0.4129, "step": 1465 }, { "epoch": 0.14904432696217976, "grad_norm": 0.44368791580200195, "learning_rate": 2.9808865392435953e-06, "loss": 0.4464, "step": 1466 }, { "epoch": 0.1491459943066287, "grad_norm": 0.470971941947937, "learning_rate": 2.9829198861325744e-06, "loss": 0.4299, "step": 1467 }, { "epoch": 0.14924766165107767, "grad_norm": 0.44396358728408813, "learning_rate": 2.9849532330215535e-06, "loss": 0.427, "step": 1468 }, { "epoch": 0.14934932899552664, "grad_norm": 0.4143546223640442, "learning_rate": 2.986986579910533e-06, "loss": 0.4454, "step": 1469 }, { "epoch": 0.1494509963399756, "grad_norm": 0.46552160382270813, "learning_rate": 2.9890199267995124e-06, "loss": 0.493, "step": 1470 }, { "epoch": 0.14955266368442457, "grad_norm": 0.49511992931365967, "learning_rate": 2.991053273688492e-06, "loss": 0.44, "step": 1471 }, { "epoch": 0.14965433102887352, "grad_norm": 0.4208703339099884, "learning_rate": 2.9930866205774706e-06, "loss": 0.4537, "step": 1472 }, { "epoch": 0.14975599837332249, "grad_norm": 0.45366382598876953, "learning_rate": 2.99511996746645e-06, "loss": 0.4375, "step": 1473 }, { "epoch": 0.14985766571777145, "grad_norm": 0.44800621271133423, "learning_rate": 2.997153314355429e-06, "loss": 0.4755, "step": 1474 }, { "epoch": 0.14995933306222042, "grad_norm": 0.4615341126918793, "learning_rate": 2.9991866612444086e-06, "loss": 0.4915, "step": 1475 }, { "epoch": 0.15006100040666936, "grad_norm": 0.4509502649307251, "learning_rate": 3.0012200081333877e-06, "loss": 0.412, "step": 1476 }, { "epoch": 0.15016266775111833, "grad_norm": 0.4833737015724182, "learning_rate": 3.0032533550223667e-06, "loss": 0.4377, "step": 1477 }, { "epoch": 0.1502643350955673, "grad_norm": 0.4730900526046753, "learning_rate": 3.0052867019113462e-06, "loss": 0.4688, "step": 1478 }, { "epoch": 0.15036600244001627, "grad_norm": 0.4260619580745697, "learning_rate": 3.0073200488003257e-06, "loss": 0.4336, "step": 1479 }, { "epoch": 0.15046766978446524, "grad_norm": 0.4387178122997284, "learning_rate": 3.0093533956893052e-06, "loss": 0.4291, "step": 1480 }, { "epoch": 0.15056933712891418, "grad_norm": 0.46927401423454285, "learning_rate": 3.011386742578284e-06, "loss": 0.4583, "step": 1481 }, { "epoch": 0.15067100447336315, "grad_norm": 0.5095216631889343, "learning_rate": 3.0134200894672633e-06, "loss": 0.4617, "step": 1482 }, { "epoch": 0.15077267181781212, "grad_norm": 0.4616481363773346, "learning_rate": 3.015453436356243e-06, "loss": 0.4309, "step": 1483 }, { "epoch": 0.1508743391622611, "grad_norm": 0.43478965759277344, "learning_rate": 3.017486783245222e-06, "loss": 0.4345, "step": 1484 }, { "epoch": 0.15097600650671003, "grad_norm": 0.4717453718185425, "learning_rate": 3.019520130134201e-06, "loss": 0.4311, "step": 1485 }, { "epoch": 0.151077673851159, "grad_norm": 0.4534740447998047, "learning_rate": 3.0215534770231804e-06, "loss": 0.4451, "step": 1486 }, { "epoch": 0.15117934119560797, "grad_norm": 0.4540848731994629, "learning_rate": 3.0235868239121595e-06, "loss": 0.4472, "step": 1487 }, { "epoch": 0.15128100854005694, "grad_norm": 0.46223071217536926, "learning_rate": 3.025620170801139e-06, "loss": 0.4012, "step": 1488 }, { "epoch": 0.1513826758845059, "grad_norm": 0.47881555557250977, "learning_rate": 3.0276535176901185e-06, "loss": 0.4606, "step": 1489 }, { "epoch": 0.15148434322895485, "grad_norm": 0.43350011110305786, "learning_rate": 3.029686864579097e-06, "loss": 0.4394, "step": 1490 }, { "epoch": 0.15158601057340382, "grad_norm": 0.44218602776527405, "learning_rate": 3.0317202114680766e-06, "loss": 0.4828, "step": 1491 }, { "epoch": 0.1516876779178528, "grad_norm": 0.4523692727088928, "learning_rate": 3.033753558357056e-06, "loss": 0.4596, "step": 1492 }, { "epoch": 0.15178934526230176, "grad_norm": 0.4392681419849396, "learning_rate": 3.035786905246035e-06, "loss": 0.4563, "step": 1493 }, { "epoch": 0.15189101260675072, "grad_norm": 0.47975245118141174, "learning_rate": 3.0378202521350147e-06, "loss": 0.4416, "step": 1494 }, { "epoch": 0.15199267995119967, "grad_norm": 0.4421062767505646, "learning_rate": 3.0398535990239937e-06, "loss": 0.4495, "step": 1495 }, { "epoch": 0.15209434729564864, "grad_norm": 0.4782341420650482, "learning_rate": 3.0418869459129728e-06, "loss": 0.4619, "step": 1496 }, { "epoch": 0.1521960146400976, "grad_norm": 0.40248799324035645, "learning_rate": 3.0439202928019523e-06, "loss": 0.4649, "step": 1497 }, { "epoch": 0.15229768198454657, "grad_norm": 0.4251200258731842, "learning_rate": 3.0459536396909318e-06, "loss": 0.4307, "step": 1498 }, { "epoch": 0.15239934932899551, "grad_norm": 0.4919760525226593, "learning_rate": 3.0479869865799104e-06, "loss": 0.4488, "step": 1499 }, { "epoch": 0.15250101667344448, "grad_norm": 0.4196550250053406, "learning_rate": 3.05002033346889e-06, "loss": 0.4238, "step": 1500 }, { "epoch": 0.15260268401789345, "grad_norm": 0.4213441014289856, "learning_rate": 3.0520536803578694e-06, "loss": 0.4766, "step": 1501 }, { "epoch": 0.15270435136234242, "grad_norm": 0.4769326448440552, "learning_rate": 3.054087027246849e-06, "loss": 0.4576, "step": 1502 }, { "epoch": 0.1528060187067914, "grad_norm": 0.4309329092502594, "learning_rate": 3.056120374135828e-06, "loss": 0.4676, "step": 1503 }, { "epoch": 0.15290768605124033, "grad_norm": 0.44071826338768005, "learning_rate": 3.058153721024807e-06, "loss": 0.4916, "step": 1504 }, { "epoch": 0.1530093533956893, "grad_norm": 0.45136165618896484, "learning_rate": 3.0601870679137865e-06, "loss": 0.4921, "step": 1505 }, { "epoch": 0.15311102074013827, "grad_norm": 0.39971429109573364, "learning_rate": 3.0622204148027655e-06, "loss": 0.433, "step": 1506 }, { "epoch": 0.15321268808458724, "grad_norm": 0.516322910785675, "learning_rate": 3.064253761691745e-06, "loss": 0.4419, "step": 1507 }, { "epoch": 0.15331435542903618, "grad_norm": 0.47061318159103394, "learning_rate": 3.0662871085807237e-06, "loss": 0.4818, "step": 1508 }, { "epoch": 0.15341602277348515, "grad_norm": 0.38594692945480347, "learning_rate": 3.068320455469703e-06, "loss": 0.4308, "step": 1509 }, { "epoch": 0.15351769011793412, "grad_norm": 0.428695410490036, "learning_rate": 3.0703538023586827e-06, "loss": 0.4199, "step": 1510 }, { "epoch": 0.1536193574623831, "grad_norm": 0.48548972606658936, "learning_rate": 3.072387149247662e-06, "loss": 0.4665, "step": 1511 }, { "epoch": 0.15372102480683206, "grad_norm": 0.5046842098236084, "learning_rate": 3.074420496136641e-06, "loss": 0.4471, "step": 1512 }, { "epoch": 0.153822692151281, "grad_norm": 0.4552917182445526, "learning_rate": 3.0764538430256203e-06, "loss": 0.4207, "step": 1513 }, { "epoch": 0.15392435949572997, "grad_norm": 0.4231373071670532, "learning_rate": 3.0784871899145998e-06, "loss": 0.431, "step": 1514 }, { "epoch": 0.15402602684017894, "grad_norm": 0.43813058733940125, "learning_rate": 3.080520536803579e-06, "loss": 0.4575, "step": 1515 }, { "epoch": 0.1541276941846279, "grad_norm": 0.5054848790168762, "learning_rate": 3.0825538836925583e-06, "loss": 0.447, "step": 1516 }, { "epoch": 0.15422936152907685, "grad_norm": 0.49774882197380066, "learning_rate": 3.0845872305815374e-06, "loss": 0.4976, "step": 1517 }, { "epoch": 0.15433102887352582, "grad_norm": 0.4566698372364044, "learning_rate": 3.0866205774705164e-06, "loss": 0.4924, "step": 1518 }, { "epoch": 0.15443269621797479, "grad_norm": 0.40350234508514404, "learning_rate": 3.088653924359496e-06, "loss": 0.4392, "step": 1519 }, { "epoch": 0.15453436356242375, "grad_norm": 0.4721231460571289, "learning_rate": 3.0906872712484754e-06, "loss": 0.4665, "step": 1520 }, { "epoch": 0.15463603090687272, "grad_norm": 0.42616212368011475, "learning_rate": 3.092720618137455e-06, "loss": 0.4583, "step": 1521 }, { "epoch": 0.15473769825132166, "grad_norm": 0.4016944468021393, "learning_rate": 3.0947539650264335e-06, "loss": 0.4409, "step": 1522 }, { "epoch": 0.15483936559577063, "grad_norm": 0.4659681022167206, "learning_rate": 3.096787311915413e-06, "loss": 0.4312, "step": 1523 }, { "epoch": 0.1549410329402196, "grad_norm": 0.4277579188346863, "learning_rate": 3.0988206588043925e-06, "loss": 0.4727, "step": 1524 }, { "epoch": 0.15504270028466857, "grad_norm": 0.40752336382865906, "learning_rate": 3.1008540056933716e-06, "loss": 0.4365, "step": 1525 }, { "epoch": 0.15514436762911754, "grad_norm": 0.44759100675582886, "learning_rate": 3.102887352582351e-06, "loss": 0.4638, "step": 1526 }, { "epoch": 0.15524603497356648, "grad_norm": 0.41707009077072144, "learning_rate": 3.1049206994713297e-06, "loss": 0.4532, "step": 1527 }, { "epoch": 0.15534770231801545, "grad_norm": 0.44129279255867004, "learning_rate": 3.106954046360309e-06, "loss": 0.4312, "step": 1528 }, { "epoch": 0.15544936966246442, "grad_norm": 0.4545295238494873, "learning_rate": 3.1089873932492887e-06, "loss": 0.4442, "step": 1529 }, { "epoch": 0.1555510370069134, "grad_norm": 0.4516942799091339, "learning_rate": 3.111020740138268e-06, "loss": 0.4377, "step": 1530 }, { "epoch": 0.15565270435136233, "grad_norm": 0.4298968017101288, "learning_rate": 3.113054087027247e-06, "loss": 0.4387, "step": 1531 }, { "epoch": 0.1557543716958113, "grad_norm": 0.4114346206188202, "learning_rate": 3.1150874339162263e-06, "loss": 0.4208, "step": 1532 }, { "epoch": 0.15585603904026027, "grad_norm": 0.4752582609653473, "learning_rate": 3.117120780805206e-06, "loss": 0.4178, "step": 1533 }, { "epoch": 0.15595770638470924, "grad_norm": 0.425863653421402, "learning_rate": 3.119154127694185e-06, "loss": 0.4721, "step": 1534 }, { "epoch": 0.1560593737291582, "grad_norm": 0.4410390257835388, "learning_rate": 3.1211874745831644e-06, "loss": 0.4529, "step": 1535 }, { "epoch": 0.15616104107360715, "grad_norm": 0.46964231133461, "learning_rate": 3.1232208214721434e-06, "loss": 0.4198, "step": 1536 }, { "epoch": 0.15626270841805612, "grad_norm": 0.4655097723007202, "learning_rate": 3.1252541683611225e-06, "loss": 0.4807, "step": 1537 }, { "epoch": 0.1563643757625051, "grad_norm": 0.42747557163238525, "learning_rate": 3.127287515250102e-06, "loss": 0.4215, "step": 1538 }, { "epoch": 0.15646604310695406, "grad_norm": 0.4951540529727936, "learning_rate": 3.1293208621390815e-06, "loss": 0.4482, "step": 1539 }, { "epoch": 0.156567710451403, "grad_norm": 0.4407449960708618, "learning_rate": 3.13135420902806e-06, "loss": 0.4618, "step": 1540 }, { "epoch": 0.15666937779585197, "grad_norm": 0.40298548340797424, "learning_rate": 3.1333875559170396e-06, "loss": 0.4518, "step": 1541 }, { "epoch": 0.15677104514030094, "grad_norm": 0.41614651679992676, "learning_rate": 3.135420902806019e-06, "loss": 0.4435, "step": 1542 }, { "epoch": 0.1568727124847499, "grad_norm": 0.4827863872051239, "learning_rate": 3.137454249694998e-06, "loss": 0.4779, "step": 1543 }, { "epoch": 0.15697437982919887, "grad_norm": 0.5212220549583435, "learning_rate": 3.1394875965839776e-06, "loss": 0.4567, "step": 1544 }, { "epoch": 0.15707604717364781, "grad_norm": 0.4412948787212372, "learning_rate": 3.1415209434729567e-06, "loss": 0.4419, "step": 1545 }, { "epoch": 0.15717771451809678, "grad_norm": 0.4284112751483917, "learning_rate": 3.1435542903619358e-06, "loss": 0.4115, "step": 1546 }, { "epoch": 0.15727938186254575, "grad_norm": 0.47468772530555725, "learning_rate": 3.1455876372509152e-06, "loss": 0.4375, "step": 1547 }, { "epoch": 0.15738104920699472, "grad_norm": 0.4822860658168793, "learning_rate": 3.1476209841398947e-06, "loss": 0.4307, "step": 1548 }, { "epoch": 0.15748271655144366, "grad_norm": 0.4396516680717468, "learning_rate": 3.1496543310288734e-06, "loss": 0.4467, "step": 1549 }, { "epoch": 0.15758438389589263, "grad_norm": 0.40740272402763367, "learning_rate": 3.151687677917853e-06, "loss": 0.4634, "step": 1550 }, { "epoch": 0.1576860512403416, "grad_norm": 0.4147791564464569, "learning_rate": 3.1537210248068323e-06, "loss": 0.467, "step": 1551 }, { "epoch": 0.15778771858479057, "grad_norm": 0.45690298080444336, "learning_rate": 3.155754371695812e-06, "loss": 0.4364, "step": 1552 }, { "epoch": 0.15788938592923954, "grad_norm": 0.4686639606952667, "learning_rate": 3.157787718584791e-06, "loss": 0.4443, "step": 1553 }, { "epoch": 0.15799105327368848, "grad_norm": 0.4898325204849243, "learning_rate": 3.15982106547377e-06, "loss": 0.44, "step": 1554 }, { "epoch": 0.15809272061813745, "grad_norm": 0.4553045630455017, "learning_rate": 3.1618544123627495e-06, "loss": 0.4322, "step": 1555 }, { "epoch": 0.15819438796258642, "grad_norm": 0.42595914006233215, "learning_rate": 3.1638877592517285e-06, "loss": 0.4193, "step": 1556 }, { "epoch": 0.1582960553070354, "grad_norm": 0.4602007567882538, "learning_rate": 3.165921106140708e-06, "loss": 0.4322, "step": 1557 }, { "epoch": 0.15839772265148433, "grad_norm": 0.4897863566875458, "learning_rate": 3.1679544530296866e-06, "loss": 0.4551, "step": 1558 }, { "epoch": 0.1584993899959333, "grad_norm": 0.41386696696281433, "learning_rate": 3.169987799918666e-06, "loss": 0.4583, "step": 1559 }, { "epoch": 0.15860105734038227, "grad_norm": 0.44197991490364075, "learning_rate": 3.1720211468076456e-06, "loss": 0.4273, "step": 1560 }, { "epoch": 0.15870272468483124, "grad_norm": 0.41866937279701233, "learning_rate": 3.174054493696625e-06, "loss": 0.4407, "step": 1561 }, { "epoch": 0.1588043920292802, "grad_norm": 0.4982314109802246, "learning_rate": 3.176087840585604e-06, "loss": 0.4469, "step": 1562 }, { "epoch": 0.15890605937372915, "grad_norm": 0.4347786009311676, "learning_rate": 3.1781211874745832e-06, "loss": 0.4425, "step": 1563 }, { "epoch": 0.15900772671817812, "grad_norm": 0.439227432012558, "learning_rate": 3.1801545343635627e-06, "loss": 0.437, "step": 1564 }, { "epoch": 0.15910939406262709, "grad_norm": 0.4599471092224121, "learning_rate": 3.182187881252542e-06, "loss": 0.4664, "step": 1565 }, { "epoch": 0.15921106140707605, "grad_norm": 0.408348023891449, "learning_rate": 3.1842212281415213e-06, "loss": 0.4437, "step": 1566 }, { "epoch": 0.15931272875152502, "grad_norm": 0.4332578778266907, "learning_rate": 3.1862545750305008e-06, "loss": 0.4657, "step": 1567 }, { "epoch": 0.15941439609597396, "grad_norm": 0.4074971675872803, "learning_rate": 3.1882879219194794e-06, "loss": 0.4559, "step": 1568 }, { "epoch": 0.15951606344042293, "grad_norm": 0.43935680389404297, "learning_rate": 3.190321268808459e-06, "loss": 0.4751, "step": 1569 }, { "epoch": 0.1596177307848719, "grad_norm": 0.4447115659713745, "learning_rate": 3.1923546156974384e-06, "loss": 0.4195, "step": 1570 }, { "epoch": 0.15971939812932087, "grad_norm": 0.4242421090602875, "learning_rate": 3.194387962586418e-06, "loss": 0.4533, "step": 1571 }, { "epoch": 0.1598210654737698, "grad_norm": 0.4272487461566925, "learning_rate": 3.1964213094753965e-06, "loss": 0.4383, "step": 1572 }, { "epoch": 0.15992273281821878, "grad_norm": 0.4639332592487335, "learning_rate": 3.198454656364376e-06, "loss": 0.4656, "step": 1573 }, { "epoch": 0.16002440016266775, "grad_norm": 0.435274213552475, "learning_rate": 3.2004880032533555e-06, "loss": 0.4564, "step": 1574 }, { "epoch": 0.16012606750711672, "grad_norm": 0.43209320306777954, "learning_rate": 3.2025213501423346e-06, "loss": 0.43, "step": 1575 }, { "epoch": 0.1602277348515657, "grad_norm": 0.39453136920928955, "learning_rate": 3.204554697031314e-06, "loss": 0.4442, "step": 1576 }, { "epoch": 0.16032940219601463, "grad_norm": 0.4228665828704834, "learning_rate": 3.2065880439202927e-06, "loss": 0.4427, "step": 1577 }, { "epoch": 0.1604310695404636, "grad_norm": 0.44161468744277954, "learning_rate": 3.208621390809272e-06, "loss": 0.4302, "step": 1578 }, { "epoch": 0.16053273688491257, "grad_norm": 0.4057163596153259, "learning_rate": 3.2106547376982517e-06, "loss": 0.4424, "step": 1579 }, { "epoch": 0.16063440422936154, "grad_norm": 0.41535642743110657, "learning_rate": 3.212688084587231e-06, "loss": 0.4318, "step": 1580 }, { "epoch": 0.16073607157381048, "grad_norm": 0.455471009016037, "learning_rate": 3.21472143147621e-06, "loss": 0.4363, "step": 1581 }, { "epoch": 0.16083773891825945, "grad_norm": 0.4496380090713501, "learning_rate": 3.2167547783651893e-06, "loss": 0.4711, "step": 1582 }, { "epoch": 0.16093940626270842, "grad_norm": 0.405036062002182, "learning_rate": 3.2187881252541688e-06, "loss": 0.4647, "step": 1583 }, { "epoch": 0.1610410736071574, "grad_norm": 0.43637561798095703, "learning_rate": 3.220821472143148e-06, "loss": 0.4701, "step": 1584 }, { "epoch": 0.16114274095160636, "grad_norm": 0.5226619243621826, "learning_rate": 3.2228548190321273e-06, "loss": 0.4847, "step": 1585 }, { "epoch": 0.1612444082960553, "grad_norm": 0.41538840532302856, "learning_rate": 3.2248881659211064e-06, "loss": 0.4386, "step": 1586 }, { "epoch": 0.16134607564050427, "grad_norm": 0.4716877043247223, "learning_rate": 3.2269215128100854e-06, "loss": 0.4073, "step": 1587 }, { "epoch": 0.16144774298495324, "grad_norm": 0.492411345243454, "learning_rate": 3.228954859699065e-06, "loss": 0.4535, "step": 1588 }, { "epoch": 0.1615494103294022, "grad_norm": 0.4131610691547394, "learning_rate": 3.2309882065880444e-06, "loss": 0.4326, "step": 1589 }, { "epoch": 0.16165107767385115, "grad_norm": 0.4141775667667389, "learning_rate": 3.233021553477023e-06, "loss": 0.4077, "step": 1590 }, { "epoch": 0.16175274501830011, "grad_norm": 0.42306363582611084, "learning_rate": 3.2350549003660026e-06, "loss": 0.4365, "step": 1591 }, { "epoch": 0.16185441236274908, "grad_norm": 0.4903462529182434, "learning_rate": 3.237088247254982e-06, "loss": 0.47, "step": 1592 }, { "epoch": 0.16195607970719805, "grad_norm": 0.45697930455207825, "learning_rate": 3.2391215941439615e-06, "loss": 0.4524, "step": 1593 }, { "epoch": 0.16205774705164702, "grad_norm": 0.42803990840911865, "learning_rate": 3.2411549410329406e-06, "loss": 0.4712, "step": 1594 }, { "epoch": 0.16215941439609596, "grad_norm": 0.4073791205883026, "learning_rate": 3.2431882879219197e-06, "loss": 0.4344, "step": 1595 }, { "epoch": 0.16226108174054493, "grad_norm": 0.4312219023704529, "learning_rate": 3.2452216348108987e-06, "loss": 0.4088, "step": 1596 }, { "epoch": 0.1623627490849939, "grad_norm": 0.48814472556114197, "learning_rate": 3.247254981699878e-06, "loss": 0.4481, "step": 1597 }, { "epoch": 0.16246441642944287, "grad_norm": 0.4500901401042938, "learning_rate": 3.2492883285888577e-06, "loss": 0.4374, "step": 1598 }, { "epoch": 0.16256608377389184, "grad_norm": 0.41078540682792664, "learning_rate": 3.251321675477837e-06, "loss": 0.4698, "step": 1599 }, { "epoch": 0.16266775111834078, "grad_norm": 0.4437686800956726, "learning_rate": 3.253355022366816e-06, "loss": 0.4799, "step": 1600 }, { "epoch": 0.16276941846278975, "grad_norm": 0.4914959669113159, "learning_rate": 3.2553883692557953e-06, "loss": 0.4578, "step": 1601 }, { "epoch": 0.16287108580723872, "grad_norm": 0.4927668273448944, "learning_rate": 3.257421716144775e-06, "loss": 0.4879, "step": 1602 }, { "epoch": 0.1629727531516877, "grad_norm": 0.43949195742607117, "learning_rate": 3.259455063033754e-06, "loss": 0.4063, "step": 1603 }, { "epoch": 0.16307442049613663, "grad_norm": 0.4727190434932709, "learning_rate": 3.261488409922733e-06, "loss": 0.4559, "step": 1604 }, { "epoch": 0.1631760878405856, "grad_norm": 0.47736799716949463, "learning_rate": 3.2635217568117124e-06, "loss": 0.4397, "step": 1605 }, { "epoch": 0.16327775518503457, "grad_norm": 0.5173642635345459, "learning_rate": 3.2655551037006915e-06, "loss": 0.4381, "step": 1606 }, { "epoch": 0.16337942252948354, "grad_norm": 0.4946501553058624, "learning_rate": 3.267588450589671e-06, "loss": 0.4697, "step": 1607 }, { "epoch": 0.1634810898739325, "grad_norm": 0.46314162015914917, "learning_rate": 3.2696217974786505e-06, "loss": 0.4493, "step": 1608 }, { "epoch": 0.16358275721838145, "grad_norm": 0.49298208951950073, "learning_rate": 3.271655144367629e-06, "loss": 0.4352, "step": 1609 }, { "epoch": 0.16368442456283042, "grad_norm": 0.4655551612377167, "learning_rate": 3.2736884912566086e-06, "loss": 0.4869, "step": 1610 }, { "epoch": 0.16378609190727939, "grad_norm": 0.4461700916290283, "learning_rate": 3.275721838145588e-06, "loss": 0.4474, "step": 1611 }, { "epoch": 0.16388775925172835, "grad_norm": 0.42279139161109924, "learning_rate": 3.277755185034567e-06, "loss": 0.4361, "step": 1612 }, { "epoch": 0.1639894265961773, "grad_norm": 0.43053340911865234, "learning_rate": 3.279788531923546e-06, "loss": 0.4128, "step": 1613 }, { "epoch": 0.16409109394062626, "grad_norm": 0.46283334493637085, "learning_rate": 3.2818218788125257e-06, "loss": 0.4315, "step": 1614 }, { "epoch": 0.16419276128507523, "grad_norm": 0.4892898201942444, "learning_rate": 3.2838552257015048e-06, "loss": 0.4355, "step": 1615 }, { "epoch": 0.1642944286295242, "grad_norm": 0.42034780979156494, "learning_rate": 3.2858885725904842e-06, "loss": 0.4903, "step": 1616 }, { "epoch": 0.16439609597397317, "grad_norm": 0.4260304868221283, "learning_rate": 3.2879219194794637e-06, "loss": 0.4739, "step": 1617 }, { "epoch": 0.1644977633184221, "grad_norm": 0.4198191463947296, "learning_rate": 3.2899552663684424e-06, "loss": 0.444, "step": 1618 }, { "epoch": 0.16459943066287108, "grad_norm": 0.4863678514957428, "learning_rate": 3.291988613257422e-06, "loss": 0.4367, "step": 1619 }, { "epoch": 0.16470109800732005, "grad_norm": 0.45170924067497253, "learning_rate": 3.2940219601464014e-06, "loss": 0.478, "step": 1620 }, { "epoch": 0.16480276535176902, "grad_norm": 0.4104048013687134, "learning_rate": 3.296055307035381e-06, "loss": 0.4424, "step": 1621 }, { "epoch": 0.16490443269621796, "grad_norm": 0.42790311574935913, "learning_rate": 3.2980886539243595e-06, "loss": 0.4339, "step": 1622 }, { "epoch": 0.16500610004066693, "grad_norm": 0.4334721863269806, "learning_rate": 3.300122000813339e-06, "loss": 0.4501, "step": 1623 }, { "epoch": 0.1651077673851159, "grad_norm": 0.41710278391838074, "learning_rate": 3.3021553477023185e-06, "loss": 0.4452, "step": 1624 }, { "epoch": 0.16520943472956487, "grad_norm": 0.3980618417263031, "learning_rate": 3.3041886945912975e-06, "loss": 0.4111, "step": 1625 }, { "epoch": 0.16531110207401384, "grad_norm": 0.45535969734191895, "learning_rate": 3.306222041480277e-06, "loss": 0.3955, "step": 1626 }, { "epoch": 0.16541276941846278, "grad_norm": 0.385847806930542, "learning_rate": 3.3082553883692557e-06, "loss": 0.4481, "step": 1627 }, { "epoch": 0.16551443676291175, "grad_norm": 0.4156036376953125, "learning_rate": 3.310288735258235e-06, "loss": 0.4444, "step": 1628 }, { "epoch": 0.16561610410736072, "grad_norm": 0.4060019552707672, "learning_rate": 3.3123220821472146e-06, "loss": 0.4619, "step": 1629 }, { "epoch": 0.1657177714518097, "grad_norm": 0.4559542238712311, "learning_rate": 3.314355429036194e-06, "loss": 0.4659, "step": 1630 }, { "epoch": 0.16581943879625863, "grad_norm": 0.4348824620246887, "learning_rate": 3.3163887759251728e-06, "loss": 0.446, "step": 1631 }, { "epoch": 0.1659211061407076, "grad_norm": 0.3864809274673462, "learning_rate": 3.3184221228141522e-06, "loss": 0.4084, "step": 1632 }, { "epoch": 0.16602277348515657, "grad_norm": 0.4024776220321655, "learning_rate": 3.3204554697031317e-06, "loss": 0.4341, "step": 1633 }, { "epoch": 0.16612444082960554, "grad_norm": 0.43982431292533875, "learning_rate": 3.322488816592111e-06, "loss": 0.4395, "step": 1634 }, { "epoch": 0.1662261081740545, "grad_norm": 0.38855746388435364, "learning_rate": 3.3245221634810903e-06, "loss": 0.4504, "step": 1635 }, { "epoch": 0.16632777551850345, "grad_norm": 0.39749234914779663, "learning_rate": 3.3265555103700694e-06, "loss": 0.4199, "step": 1636 }, { "epoch": 0.16642944286295241, "grad_norm": 0.4260016977787018, "learning_rate": 3.3285888572590484e-06, "loss": 0.4573, "step": 1637 }, { "epoch": 0.16653111020740138, "grad_norm": 0.41348913311958313, "learning_rate": 3.330622204148028e-06, "loss": 0.4166, "step": 1638 }, { "epoch": 0.16663277755185035, "grad_norm": 0.4466964304447174, "learning_rate": 3.3326555510370074e-06, "loss": 0.4272, "step": 1639 }, { "epoch": 0.16673444489629932, "grad_norm": 0.39867159724235535, "learning_rate": 3.334688897925987e-06, "loss": 0.413, "step": 1640 }, { "epoch": 0.16683611224074826, "grad_norm": 0.42037466168403625, "learning_rate": 3.3367222448149655e-06, "loss": 0.4436, "step": 1641 }, { "epoch": 0.16693777958519723, "grad_norm": 0.46220430731773376, "learning_rate": 3.338755591703945e-06, "loss": 0.4315, "step": 1642 }, { "epoch": 0.1670394469296462, "grad_norm": 0.4254417419433594, "learning_rate": 3.3407889385929245e-06, "loss": 0.4277, "step": 1643 }, { "epoch": 0.16714111427409517, "grad_norm": 0.4511423707008362, "learning_rate": 3.3428222854819036e-06, "loss": 0.4408, "step": 1644 }, { "epoch": 0.1672427816185441, "grad_norm": 0.43997475504875183, "learning_rate": 3.3448556323708826e-06, "loss": 0.4601, "step": 1645 }, { "epoch": 0.16734444896299308, "grad_norm": 0.4558468759059906, "learning_rate": 3.3468889792598617e-06, "loss": 0.4602, "step": 1646 }, { "epoch": 0.16744611630744205, "grad_norm": 0.43848368525505066, "learning_rate": 3.348922326148841e-06, "loss": 0.4129, "step": 1647 }, { "epoch": 0.16754778365189102, "grad_norm": 0.4555511176586151, "learning_rate": 3.3509556730378207e-06, "loss": 0.4475, "step": 1648 }, { "epoch": 0.16764945099634, "grad_norm": 0.4250737130641937, "learning_rate": 3.3529890199268e-06, "loss": 0.4272, "step": 1649 }, { "epoch": 0.16775111834078893, "grad_norm": 0.4369901120662689, "learning_rate": 3.355022366815779e-06, "loss": 0.4379, "step": 1650 }, { "epoch": 0.1678527856852379, "grad_norm": 0.42111504077911377, "learning_rate": 3.3570557137047583e-06, "loss": 0.4356, "step": 1651 }, { "epoch": 0.16795445302968687, "grad_norm": 0.4253358542919159, "learning_rate": 3.3590890605937378e-06, "loss": 0.4461, "step": 1652 }, { "epoch": 0.16805612037413584, "grad_norm": 0.44102564454078674, "learning_rate": 3.361122407482717e-06, "loss": 0.4349, "step": 1653 }, { "epoch": 0.16815778771858478, "grad_norm": 0.4148152470588684, "learning_rate": 3.363155754371696e-06, "loss": 0.4388, "step": 1654 }, { "epoch": 0.16825945506303375, "grad_norm": 0.4448606073856354, "learning_rate": 3.3651891012606754e-06, "loss": 0.4593, "step": 1655 }, { "epoch": 0.16836112240748272, "grad_norm": 0.4911953806877136, "learning_rate": 3.3672224481496545e-06, "loss": 0.4516, "step": 1656 }, { "epoch": 0.16846278975193169, "grad_norm": 0.44979557394981384, "learning_rate": 3.369255795038634e-06, "loss": 0.4109, "step": 1657 }, { "epoch": 0.16856445709638065, "grad_norm": 0.40738627314567566, "learning_rate": 3.3712891419276134e-06, "loss": 0.4379, "step": 1658 }, { "epoch": 0.1686661244408296, "grad_norm": 0.42903932929039, "learning_rate": 3.373322488816592e-06, "loss": 0.4307, "step": 1659 }, { "epoch": 0.16876779178527856, "grad_norm": 0.42886030673980713, "learning_rate": 3.3753558357055716e-06, "loss": 0.4675, "step": 1660 }, { "epoch": 0.16886945912972753, "grad_norm": 0.4326353669166565, "learning_rate": 3.377389182594551e-06, "loss": 0.4405, "step": 1661 }, { "epoch": 0.1689711264741765, "grad_norm": 0.4539009928703308, "learning_rate": 3.3794225294835305e-06, "loss": 0.446, "step": 1662 }, { "epoch": 0.16907279381862544, "grad_norm": 0.4370630085468292, "learning_rate": 3.381455876372509e-06, "loss": 0.4333, "step": 1663 }, { "epoch": 0.1691744611630744, "grad_norm": 0.4227251708507538, "learning_rate": 3.3834892232614887e-06, "loss": 0.4401, "step": 1664 }, { "epoch": 0.16927612850752338, "grad_norm": 0.4365409016609192, "learning_rate": 3.3855225701504677e-06, "loss": 0.4201, "step": 1665 }, { "epoch": 0.16937779585197235, "grad_norm": 0.446969598531723, "learning_rate": 3.3875559170394472e-06, "loss": 0.4517, "step": 1666 }, { "epoch": 0.16947946319642132, "grad_norm": 0.47467610239982605, "learning_rate": 3.3895892639284267e-06, "loss": 0.4364, "step": 1667 }, { "epoch": 0.16958113054087026, "grad_norm": 0.42802444100379944, "learning_rate": 3.3916226108174053e-06, "loss": 0.4348, "step": 1668 }, { "epoch": 0.16968279788531923, "grad_norm": 0.431679368019104, "learning_rate": 3.393655957706385e-06, "loss": 0.465, "step": 1669 }, { "epoch": 0.1697844652297682, "grad_norm": 0.46418431401252747, "learning_rate": 3.3956893045953643e-06, "loss": 0.4761, "step": 1670 }, { "epoch": 0.16988613257421717, "grad_norm": 0.4502606689929962, "learning_rate": 3.397722651484344e-06, "loss": 0.4709, "step": 1671 }, { "epoch": 0.16998779991866614, "grad_norm": 0.43113842606544495, "learning_rate": 3.399755998373323e-06, "loss": 0.4419, "step": 1672 }, { "epoch": 0.17008946726311508, "grad_norm": 0.4120866358280182, "learning_rate": 3.401789345262302e-06, "loss": 0.4663, "step": 1673 }, { "epoch": 0.17019113460756405, "grad_norm": 0.40663662552833557, "learning_rate": 3.4038226921512814e-06, "loss": 0.4571, "step": 1674 }, { "epoch": 0.17029280195201302, "grad_norm": 0.46701598167419434, "learning_rate": 3.4058560390402605e-06, "loss": 0.4077, "step": 1675 }, { "epoch": 0.170394469296462, "grad_norm": 0.4843701720237732, "learning_rate": 3.40788938592924e-06, "loss": 0.4354, "step": 1676 }, { "epoch": 0.17049613664091093, "grad_norm": 0.39433446526527405, "learning_rate": 3.409922732818219e-06, "loss": 0.4376, "step": 1677 }, { "epoch": 0.1705978039853599, "grad_norm": 0.4429308772087097, "learning_rate": 3.411956079707198e-06, "loss": 0.4331, "step": 1678 }, { "epoch": 0.17069947132980887, "grad_norm": 0.4175412654876709, "learning_rate": 3.4139894265961776e-06, "loss": 0.417, "step": 1679 }, { "epoch": 0.17080113867425784, "grad_norm": 0.40285417437553406, "learning_rate": 3.416022773485157e-06, "loss": 0.4481, "step": 1680 }, { "epoch": 0.1709028060187068, "grad_norm": 0.4473859369754791, "learning_rate": 3.4180561203741366e-06, "loss": 0.4364, "step": 1681 }, { "epoch": 0.17100447336315575, "grad_norm": 0.45577359199523926, "learning_rate": 3.4200894672631152e-06, "loss": 0.4204, "step": 1682 }, { "epoch": 0.17110614070760471, "grad_norm": 0.4276508688926697, "learning_rate": 3.4221228141520947e-06, "loss": 0.4169, "step": 1683 }, { "epoch": 0.17120780805205368, "grad_norm": 0.388005793094635, "learning_rate": 3.4241561610410738e-06, "loss": 0.4471, "step": 1684 }, { "epoch": 0.17130947539650265, "grad_norm": 0.4313599467277527, "learning_rate": 3.4261895079300533e-06, "loss": 0.4247, "step": 1685 }, { "epoch": 0.1714111427409516, "grad_norm": 0.4376920163631439, "learning_rate": 3.4282228548190323e-06, "loss": 0.4595, "step": 1686 }, { "epoch": 0.17151281008540056, "grad_norm": 0.40951210260391235, "learning_rate": 3.4302562017080114e-06, "loss": 0.4459, "step": 1687 }, { "epoch": 0.17161447742984953, "grad_norm": 0.4605402946472168, "learning_rate": 3.432289548596991e-06, "loss": 0.4827, "step": 1688 }, { "epoch": 0.1717161447742985, "grad_norm": 0.43437638878822327, "learning_rate": 3.4343228954859704e-06, "loss": 0.455, "step": 1689 }, { "epoch": 0.17181781211874747, "grad_norm": 0.47532492876052856, "learning_rate": 3.43635624237495e-06, "loss": 0.43, "step": 1690 }, { "epoch": 0.1719194794631964, "grad_norm": 0.46618103981018066, "learning_rate": 3.4383895892639285e-06, "loss": 0.4573, "step": 1691 }, { "epoch": 0.17202114680764538, "grad_norm": 0.4212239384651184, "learning_rate": 3.440422936152908e-06, "loss": 0.4209, "step": 1692 }, { "epoch": 0.17212281415209435, "grad_norm": 0.47633931040763855, "learning_rate": 3.4424562830418875e-06, "loss": 0.4223, "step": 1693 }, { "epoch": 0.17222448149654332, "grad_norm": 0.4988136291503906, "learning_rate": 3.4444896299308665e-06, "loss": 0.4616, "step": 1694 }, { "epoch": 0.17232614884099226, "grad_norm": 0.4545200765132904, "learning_rate": 3.4465229768198456e-06, "loss": 0.3927, "step": 1695 }, { "epoch": 0.17242781618544123, "grad_norm": 0.43198516964912415, "learning_rate": 3.448556323708825e-06, "loss": 0.4898, "step": 1696 }, { "epoch": 0.1725294835298902, "grad_norm": 0.4349169135093689, "learning_rate": 3.450589670597804e-06, "loss": 0.4516, "step": 1697 }, { "epoch": 0.17263115087433917, "grad_norm": 0.5010718703269958, "learning_rate": 3.4526230174867836e-06, "loss": 0.4443, "step": 1698 }, { "epoch": 0.17273281821878814, "grad_norm": 0.4477728009223938, "learning_rate": 3.454656364375763e-06, "loss": 0.4646, "step": 1699 }, { "epoch": 0.17283448556323708, "grad_norm": 0.4748818874359131, "learning_rate": 3.4566897112647418e-06, "loss": 0.4629, "step": 1700 }, { "epoch": 0.17293615290768605, "grad_norm": 0.42258742451667786, "learning_rate": 3.4587230581537213e-06, "loss": 0.4098, "step": 1701 }, { "epoch": 0.17303782025213502, "grad_norm": 0.4396205246448517, "learning_rate": 3.4607564050427007e-06, "loss": 0.4444, "step": 1702 }, { "epoch": 0.17313948759658399, "grad_norm": 0.39942899346351624, "learning_rate": 3.46278975193168e-06, "loss": 0.4714, "step": 1703 }, { "epoch": 0.17324115494103293, "grad_norm": 0.4509454667568207, "learning_rate": 3.464823098820659e-06, "loss": 0.4632, "step": 1704 }, { "epoch": 0.1733428222854819, "grad_norm": 0.391343891620636, "learning_rate": 3.4668564457096384e-06, "loss": 0.4625, "step": 1705 }, { "epoch": 0.17344448962993086, "grad_norm": 0.42373114824295044, "learning_rate": 3.4688897925986174e-06, "loss": 0.4476, "step": 1706 }, { "epoch": 0.17354615697437983, "grad_norm": 0.4125097692012787, "learning_rate": 3.470923139487597e-06, "loss": 0.4105, "step": 1707 }, { "epoch": 0.1736478243188288, "grad_norm": 0.3976241946220398, "learning_rate": 3.4729564863765764e-06, "loss": 0.4672, "step": 1708 }, { "epoch": 0.17374949166327774, "grad_norm": 0.4193742871284485, "learning_rate": 3.474989833265555e-06, "loss": 0.4305, "step": 1709 }, { "epoch": 0.1738511590077267, "grad_norm": 0.438408225774765, "learning_rate": 3.4770231801545345e-06, "loss": 0.417, "step": 1710 }, { "epoch": 0.17395282635217568, "grad_norm": 0.4634453356266022, "learning_rate": 3.479056527043514e-06, "loss": 0.4996, "step": 1711 }, { "epoch": 0.17405449369662465, "grad_norm": 0.4756069481372833, "learning_rate": 3.4810898739324935e-06, "loss": 0.4675, "step": 1712 }, { "epoch": 0.17415616104107362, "grad_norm": 0.4314899444580078, "learning_rate": 3.4831232208214726e-06, "loss": 0.4483, "step": 1713 }, { "epoch": 0.17425782838552256, "grad_norm": 0.46629372239112854, "learning_rate": 3.4851565677104516e-06, "loss": 0.4516, "step": 1714 }, { "epoch": 0.17435949572997153, "grad_norm": 0.4902535378932953, "learning_rate": 3.4871899145994307e-06, "loss": 0.4421, "step": 1715 }, { "epoch": 0.1744611630744205, "grad_norm": 0.48704448342323303, "learning_rate": 3.48922326148841e-06, "loss": 0.4562, "step": 1716 }, { "epoch": 0.17456283041886947, "grad_norm": 0.4224906265735626, "learning_rate": 3.4912566083773897e-06, "loss": 0.4484, "step": 1717 }, { "epoch": 0.1746644977633184, "grad_norm": 0.4859018325805664, "learning_rate": 3.4932899552663683e-06, "loss": 0.4415, "step": 1718 }, { "epoch": 0.17476616510776738, "grad_norm": 0.47143372893333435, "learning_rate": 3.495323302155348e-06, "loss": 0.4222, "step": 1719 }, { "epoch": 0.17486783245221635, "grad_norm": 0.4161459803581238, "learning_rate": 3.4973566490443273e-06, "loss": 0.4462, "step": 1720 }, { "epoch": 0.17496949979666532, "grad_norm": 0.453581303358078, "learning_rate": 3.4993899959333068e-06, "loss": 0.4516, "step": 1721 }, { "epoch": 0.1750711671411143, "grad_norm": 0.4139174222946167, "learning_rate": 3.501423342822286e-06, "loss": 0.4631, "step": 1722 }, { "epoch": 0.17517283448556323, "grad_norm": 0.48241308331489563, "learning_rate": 3.503456689711265e-06, "loss": 0.448, "step": 1723 }, { "epoch": 0.1752745018300122, "grad_norm": 0.48197996616363525, "learning_rate": 3.5054900366002444e-06, "loss": 0.4315, "step": 1724 }, { "epoch": 0.17537616917446117, "grad_norm": 0.4401833713054657, "learning_rate": 3.5075233834892235e-06, "loss": 0.4596, "step": 1725 }, { "epoch": 0.17547783651891014, "grad_norm": 0.4786582887172699, "learning_rate": 3.509556730378203e-06, "loss": 0.4326, "step": 1726 }, { "epoch": 0.17557950386335908, "grad_norm": 0.4255821406841278, "learning_rate": 3.511590077267182e-06, "loss": 0.4652, "step": 1727 }, { "epoch": 0.17568117120780805, "grad_norm": 0.4126879572868347, "learning_rate": 3.513623424156161e-06, "loss": 0.4326, "step": 1728 }, { "epoch": 0.17578283855225701, "grad_norm": 0.4234658479690552, "learning_rate": 3.5156567710451406e-06, "loss": 0.444, "step": 1729 }, { "epoch": 0.17588450589670598, "grad_norm": 0.4300245940685272, "learning_rate": 3.51769011793412e-06, "loss": 0.4049, "step": 1730 }, { "epoch": 0.17598617324115495, "grad_norm": 0.46504107117652893, "learning_rate": 3.5197234648230995e-06, "loss": 0.4481, "step": 1731 }, { "epoch": 0.1760878405856039, "grad_norm": 0.42606237530708313, "learning_rate": 3.521756811712078e-06, "loss": 0.4019, "step": 1732 }, { "epoch": 0.17618950793005286, "grad_norm": 0.43267765641212463, "learning_rate": 3.5237901586010577e-06, "loss": 0.4028, "step": 1733 }, { "epoch": 0.17629117527450183, "grad_norm": 0.5188160538673401, "learning_rate": 3.5258235054900367e-06, "loss": 0.4181, "step": 1734 }, { "epoch": 0.1763928426189508, "grad_norm": 0.4395909607410431, "learning_rate": 3.5278568523790162e-06, "loss": 0.4182, "step": 1735 }, { "epoch": 0.17649450996339974, "grad_norm": 0.4453396499156952, "learning_rate": 3.5298901992679953e-06, "loss": 0.473, "step": 1736 }, { "epoch": 0.1765961773078487, "grad_norm": 0.4194498062133789, "learning_rate": 3.5319235461569744e-06, "loss": 0.4463, "step": 1737 }, { "epoch": 0.17669784465229768, "grad_norm": 0.4151761829853058, "learning_rate": 3.533956893045954e-06, "loss": 0.3905, "step": 1738 }, { "epoch": 0.17679951199674665, "grad_norm": 0.45652496814727783, "learning_rate": 3.5359902399349333e-06, "loss": 0.447, "step": 1739 }, { "epoch": 0.17690117934119562, "grad_norm": 0.4140760600566864, "learning_rate": 3.538023586823913e-06, "loss": 0.4288, "step": 1740 }, { "epoch": 0.17700284668564456, "grad_norm": 0.46402040123939514, "learning_rate": 3.5400569337128915e-06, "loss": 0.4169, "step": 1741 }, { "epoch": 0.17710451403009353, "grad_norm": 0.4323519170284271, "learning_rate": 3.542090280601871e-06, "loss": 0.469, "step": 1742 }, { "epoch": 0.1772061813745425, "grad_norm": 0.4236105680465698, "learning_rate": 3.5441236274908504e-06, "loss": 0.4481, "step": 1743 }, { "epoch": 0.17730784871899147, "grad_norm": 0.5193100571632385, "learning_rate": 3.5461569743798295e-06, "loss": 0.4551, "step": 1744 }, { "epoch": 0.17740951606344044, "grad_norm": 0.4560650587081909, "learning_rate": 3.548190321268809e-06, "loss": 0.4409, "step": 1745 }, { "epoch": 0.17751118340788938, "grad_norm": 0.47115063667297363, "learning_rate": 3.550223668157788e-06, "loss": 0.4441, "step": 1746 }, { "epoch": 0.17761285075233835, "grad_norm": 0.46176591515541077, "learning_rate": 3.552257015046767e-06, "loss": 0.4393, "step": 1747 }, { "epoch": 0.17771451809678732, "grad_norm": 0.48615679144859314, "learning_rate": 3.5542903619357466e-06, "loss": 0.445, "step": 1748 }, { "epoch": 0.17781618544123629, "grad_norm": 0.42222028970718384, "learning_rate": 3.556323708824726e-06, "loss": 0.4033, "step": 1749 }, { "epoch": 0.17791785278568523, "grad_norm": 0.4629284739494324, "learning_rate": 3.5583570557137047e-06, "loss": 0.4072, "step": 1750 }, { "epoch": 0.1780195201301342, "grad_norm": 0.48630762100219727, "learning_rate": 3.5603904026026842e-06, "loss": 0.4729, "step": 1751 }, { "epoch": 0.17812118747458316, "grad_norm": 0.4807948172092438, "learning_rate": 3.5624237494916637e-06, "loss": 0.4729, "step": 1752 }, { "epoch": 0.17822285481903213, "grad_norm": 0.4519045054912567, "learning_rate": 3.5644570963806428e-06, "loss": 0.4338, "step": 1753 }, { "epoch": 0.1783245221634811, "grad_norm": 0.47935232520103455, "learning_rate": 3.5664904432696223e-06, "loss": 0.4521, "step": 1754 }, { "epoch": 0.17842618950793004, "grad_norm": 0.4458390176296234, "learning_rate": 3.5685237901586013e-06, "loss": 0.4287, "step": 1755 }, { "epoch": 0.178527856852379, "grad_norm": 0.4195765554904938, "learning_rate": 3.5705571370475804e-06, "loss": 0.4543, "step": 1756 }, { "epoch": 0.17862952419682798, "grad_norm": 0.46935179829597473, "learning_rate": 3.57259048393656e-06, "loss": 0.4114, "step": 1757 }, { "epoch": 0.17873119154127695, "grad_norm": 0.4087062180042267, "learning_rate": 3.5746238308255394e-06, "loss": 0.4606, "step": 1758 }, { "epoch": 0.1788328588857259, "grad_norm": 0.4017527103424072, "learning_rate": 3.576657177714518e-06, "loss": 0.4584, "step": 1759 }, { "epoch": 0.17893452623017486, "grad_norm": 0.4632887840270996, "learning_rate": 3.5786905246034975e-06, "loss": 0.4172, "step": 1760 }, { "epoch": 0.17903619357462383, "grad_norm": 0.42776811122894287, "learning_rate": 3.580723871492477e-06, "loss": 0.4706, "step": 1761 }, { "epoch": 0.1791378609190728, "grad_norm": 0.44369029998779297, "learning_rate": 3.5827572183814565e-06, "loss": 0.4849, "step": 1762 }, { "epoch": 0.17923952826352177, "grad_norm": 0.47350579500198364, "learning_rate": 3.5847905652704355e-06, "loss": 0.4587, "step": 1763 }, { "epoch": 0.1793411956079707, "grad_norm": 0.4610123932361603, "learning_rate": 3.5868239121594146e-06, "loss": 0.4614, "step": 1764 }, { "epoch": 0.17944286295241968, "grad_norm": 0.4139368236064911, "learning_rate": 3.588857259048394e-06, "loss": 0.4025, "step": 1765 }, { "epoch": 0.17954453029686865, "grad_norm": 0.46733328700065613, "learning_rate": 3.590890605937373e-06, "loss": 0.5077, "step": 1766 }, { "epoch": 0.17964619764131762, "grad_norm": 0.4484449625015259, "learning_rate": 3.5929239528263526e-06, "loss": 0.443, "step": 1767 }, { "epoch": 0.17974786498576656, "grad_norm": 0.4659464359283447, "learning_rate": 3.5949572997153313e-06, "loss": 0.4644, "step": 1768 }, { "epoch": 0.17984953233021553, "grad_norm": 0.48095107078552246, "learning_rate": 3.5969906466043108e-06, "loss": 0.435, "step": 1769 }, { "epoch": 0.1799511996746645, "grad_norm": 0.4489186406135559, "learning_rate": 3.5990239934932903e-06, "loss": 0.482, "step": 1770 }, { "epoch": 0.18005286701911347, "grad_norm": 0.42770063877105713, "learning_rate": 3.6010573403822697e-06, "loss": 0.4385, "step": 1771 }, { "epoch": 0.18015453436356244, "grad_norm": 0.4694976508617401, "learning_rate": 3.603090687271249e-06, "loss": 0.4164, "step": 1772 }, { "epoch": 0.18025620170801138, "grad_norm": 0.40561798214912415, "learning_rate": 3.605124034160228e-06, "loss": 0.4505, "step": 1773 }, { "epoch": 0.18035786905246035, "grad_norm": 0.4328070282936096, "learning_rate": 3.6071573810492074e-06, "loss": 0.4168, "step": 1774 }, { "epoch": 0.18045953639690931, "grad_norm": 0.44380757212638855, "learning_rate": 3.6091907279381864e-06, "loss": 0.4522, "step": 1775 }, { "epoch": 0.18056120374135828, "grad_norm": 0.3898465037345886, "learning_rate": 3.611224074827166e-06, "loss": 0.4429, "step": 1776 }, { "epoch": 0.18066287108580723, "grad_norm": 0.42052531242370605, "learning_rate": 3.613257421716145e-06, "loss": 0.3803, "step": 1777 }, { "epoch": 0.1807645384302562, "grad_norm": 0.41857871413230896, "learning_rate": 3.615290768605124e-06, "loss": 0.4203, "step": 1778 }, { "epoch": 0.18086620577470516, "grad_norm": 0.463819682598114, "learning_rate": 3.6173241154941035e-06, "loss": 0.4477, "step": 1779 }, { "epoch": 0.18096787311915413, "grad_norm": 0.45706596970558167, "learning_rate": 3.619357462383083e-06, "loss": 0.4495, "step": 1780 }, { "epoch": 0.1810695404636031, "grad_norm": 0.4054023027420044, "learning_rate": 3.6213908092720625e-06, "loss": 0.4242, "step": 1781 }, { "epoch": 0.18117120780805204, "grad_norm": 0.4461591839790344, "learning_rate": 3.623424156161041e-06, "loss": 0.4712, "step": 1782 }, { "epoch": 0.181272875152501, "grad_norm": 0.41631466150283813, "learning_rate": 3.6254575030500206e-06, "loss": 0.4581, "step": 1783 }, { "epoch": 0.18137454249694998, "grad_norm": 0.4498569965362549, "learning_rate": 3.6274908499389997e-06, "loss": 0.4384, "step": 1784 }, { "epoch": 0.18147620984139895, "grad_norm": 0.4028233587741852, "learning_rate": 3.629524196827979e-06, "loss": 0.4355, "step": 1785 }, { "epoch": 0.18157787718584792, "grad_norm": 0.44964665174484253, "learning_rate": 3.6315575437169587e-06, "loss": 0.4647, "step": 1786 }, { "epoch": 0.18167954453029686, "grad_norm": 0.44406208395957947, "learning_rate": 3.6335908906059373e-06, "loss": 0.4243, "step": 1787 }, { "epoch": 0.18178121187474583, "grad_norm": 0.4251469671726227, "learning_rate": 3.635624237494917e-06, "loss": 0.4191, "step": 1788 }, { "epoch": 0.1818828792191948, "grad_norm": 0.4318047761917114, "learning_rate": 3.6376575843838963e-06, "loss": 0.4294, "step": 1789 }, { "epoch": 0.18198454656364377, "grad_norm": 0.4761623740196228, "learning_rate": 3.6396909312728758e-06, "loss": 0.4256, "step": 1790 }, { "epoch": 0.1820862139080927, "grad_norm": 0.41853711009025574, "learning_rate": 3.6417242781618544e-06, "loss": 0.4581, "step": 1791 }, { "epoch": 0.18218788125254168, "grad_norm": 0.4854110777378082, "learning_rate": 3.643757625050834e-06, "loss": 0.4418, "step": 1792 }, { "epoch": 0.18228954859699065, "grad_norm": 0.49133890867233276, "learning_rate": 3.6457909719398134e-06, "loss": 0.4269, "step": 1793 }, { "epoch": 0.18239121594143962, "grad_norm": 0.46882128715515137, "learning_rate": 3.6478243188287925e-06, "loss": 0.434, "step": 1794 }, { "epoch": 0.18249288328588859, "grad_norm": 0.4196183383464813, "learning_rate": 3.649857665717772e-06, "loss": 0.4712, "step": 1795 }, { "epoch": 0.18259455063033753, "grad_norm": 0.48363885283470154, "learning_rate": 3.651891012606751e-06, "loss": 0.4501, "step": 1796 }, { "epoch": 0.1826962179747865, "grad_norm": 0.47239500284194946, "learning_rate": 3.65392435949573e-06, "loss": 0.4231, "step": 1797 }, { "epoch": 0.18279788531923546, "grad_norm": 0.4451482892036438, "learning_rate": 3.6559577063847096e-06, "loss": 0.4465, "step": 1798 }, { "epoch": 0.18289955266368443, "grad_norm": 0.43398499488830566, "learning_rate": 3.657991053273689e-06, "loss": 0.4354, "step": 1799 }, { "epoch": 0.18300122000813338, "grad_norm": 0.42160764336586, "learning_rate": 3.6600244001626677e-06, "loss": 0.4782, "step": 1800 }, { "epoch": 0.18310288735258234, "grad_norm": 0.4600845277309418, "learning_rate": 3.662057747051647e-06, "loss": 0.418, "step": 1801 }, { "epoch": 0.1832045546970313, "grad_norm": 0.45037493109703064, "learning_rate": 3.6640910939406267e-06, "loss": 0.459, "step": 1802 }, { "epoch": 0.18330622204148028, "grad_norm": 0.3968804180622101, "learning_rate": 3.6661244408296057e-06, "loss": 0.4312, "step": 1803 }, { "epoch": 0.18340788938592925, "grad_norm": 0.4269091486930847, "learning_rate": 3.6681577877185852e-06, "loss": 0.4492, "step": 1804 }, { "epoch": 0.1835095567303782, "grad_norm": 0.4515901207923889, "learning_rate": 3.6701911346075643e-06, "loss": 0.4576, "step": 1805 }, { "epoch": 0.18361122407482716, "grad_norm": 0.42724403738975525, "learning_rate": 3.6722244814965434e-06, "loss": 0.4203, "step": 1806 }, { "epoch": 0.18371289141927613, "grad_norm": 0.4385621249675751, "learning_rate": 3.674257828385523e-06, "loss": 0.4847, "step": 1807 }, { "epoch": 0.1838145587637251, "grad_norm": 0.4433053135871887, "learning_rate": 3.6762911752745023e-06, "loss": 0.4196, "step": 1808 }, { "epoch": 0.18391622610817404, "grad_norm": 0.4470483958721161, "learning_rate": 3.678324522163481e-06, "loss": 0.4475, "step": 1809 }, { "epoch": 0.184017893452623, "grad_norm": 0.4046623408794403, "learning_rate": 3.6803578690524605e-06, "loss": 0.4386, "step": 1810 }, { "epoch": 0.18411956079707198, "grad_norm": 0.48158612847328186, "learning_rate": 3.68239121594144e-06, "loss": 0.4263, "step": 1811 }, { "epoch": 0.18422122814152095, "grad_norm": 0.4455550014972687, "learning_rate": 3.6844245628304194e-06, "loss": 0.4546, "step": 1812 }, { "epoch": 0.18432289548596992, "grad_norm": 0.4283021092414856, "learning_rate": 3.6864579097193985e-06, "loss": 0.4344, "step": 1813 }, { "epoch": 0.18442456283041886, "grad_norm": 0.4215046465396881, "learning_rate": 3.6884912566083776e-06, "loss": 0.4402, "step": 1814 }, { "epoch": 0.18452623017486783, "grad_norm": 0.4266360104084015, "learning_rate": 3.690524603497357e-06, "loss": 0.4406, "step": 1815 }, { "epoch": 0.1846278975193168, "grad_norm": 0.4174173176288605, "learning_rate": 3.692557950386336e-06, "loss": 0.4066, "step": 1816 }, { "epoch": 0.18472956486376577, "grad_norm": 0.44534429907798767, "learning_rate": 3.6945912972753156e-06, "loss": 0.4738, "step": 1817 }, { "epoch": 0.18483123220821474, "grad_norm": 0.43316447734832764, "learning_rate": 3.696624644164295e-06, "loss": 0.4815, "step": 1818 }, { "epoch": 0.18493289955266368, "grad_norm": 0.4060583710670471, "learning_rate": 3.6986579910532737e-06, "loss": 0.3888, "step": 1819 }, { "epoch": 0.18503456689711265, "grad_norm": 0.4890337586402893, "learning_rate": 3.7006913379422532e-06, "loss": 0.4489, "step": 1820 }, { "epoch": 0.18513623424156161, "grad_norm": 0.4478903114795685, "learning_rate": 3.7027246848312327e-06, "loss": 0.4459, "step": 1821 }, { "epoch": 0.18523790158601058, "grad_norm": 0.4812358021736145, "learning_rate": 3.7047580317202118e-06, "loss": 0.4813, "step": 1822 }, { "epoch": 0.18533956893045953, "grad_norm": 0.39666977524757385, "learning_rate": 3.706791378609191e-06, "loss": 0.4748, "step": 1823 }, { "epoch": 0.1854412362749085, "grad_norm": 0.4407067596912384, "learning_rate": 3.7088247254981703e-06, "loss": 0.4539, "step": 1824 }, { "epoch": 0.18554290361935746, "grad_norm": 0.4737170338630676, "learning_rate": 3.7108580723871494e-06, "loss": 0.4465, "step": 1825 }, { "epoch": 0.18564457096380643, "grad_norm": 0.43097084760665894, "learning_rate": 3.712891419276129e-06, "loss": 0.4206, "step": 1826 }, { "epoch": 0.1857462383082554, "grad_norm": 0.4228501617908478, "learning_rate": 3.7149247661651084e-06, "loss": 0.4424, "step": 1827 }, { "epoch": 0.18584790565270434, "grad_norm": 0.4357137382030487, "learning_rate": 3.716958113054087e-06, "loss": 0.397, "step": 1828 }, { "epoch": 0.1859495729971533, "grad_norm": 0.420529842376709, "learning_rate": 3.7189914599430665e-06, "loss": 0.4382, "step": 1829 }, { "epoch": 0.18605124034160228, "grad_norm": 0.3828253746032715, "learning_rate": 3.721024806832046e-06, "loss": 0.4732, "step": 1830 }, { "epoch": 0.18615290768605125, "grad_norm": 0.4518014192581177, "learning_rate": 3.7230581537210255e-06, "loss": 0.4572, "step": 1831 }, { "epoch": 0.1862545750305002, "grad_norm": 0.4282655119895935, "learning_rate": 3.725091500610004e-06, "loss": 0.4267, "step": 1832 }, { "epoch": 0.18635624237494916, "grad_norm": 0.4257271885871887, "learning_rate": 3.7271248474989836e-06, "loss": 0.402, "step": 1833 }, { "epoch": 0.18645790971939813, "grad_norm": 0.4550526440143585, "learning_rate": 3.729158194387963e-06, "loss": 0.4337, "step": 1834 }, { "epoch": 0.1865595770638471, "grad_norm": 0.42067277431488037, "learning_rate": 3.731191541276942e-06, "loss": 0.4376, "step": 1835 }, { "epoch": 0.18666124440829607, "grad_norm": 0.4475754201412201, "learning_rate": 3.7332248881659216e-06, "loss": 0.3953, "step": 1836 }, { "epoch": 0.186762911752745, "grad_norm": 0.40526050329208374, "learning_rate": 3.7352582350549003e-06, "loss": 0.441, "step": 1837 }, { "epoch": 0.18686457909719398, "grad_norm": 0.3944125175476074, "learning_rate": 3.7372915819438798e-06, "loss": 0.4781, "step": 1838 }, { "epoch": 0.18696624644164295, "grad_norm": 0.4198635220527649, "learning_rate": 3.7393249288328593e-06, "loss": 0.468, "step": 1839 }, { "epoch": 0.18706791378609192, "grad_norm": 0.4326404631137848, "learning_rate": 3.7413582757218388e-06, "loss": 0.4457, "step": 1840 }, { "epoch": 0.18716958113054086, "grad_norm": 0.43130117654800415, "learning_rate": 3.7433916226108174e-06, "loss": 0.4295, "step": 1841 }, { "epoch": 0.18727124847498983, "grad_norm": 0.4609772861003876, "learning_rate": 3.745424969499797e-06, "loss": 0.4435, "step": 1842 }, { "epoch": 0.1873729158194388, "grad_norm": 0.46234753727912903, "learning_rate": 3.7474583163887764e-06, "loss": 0.4187, "step": 1843 }, { "epoch": 0.18747458316388776, "grad_norm": 0.4429203271865845, "learning_rate": 3.7494916632777554e-06, "loss": 0.4332, "step": 1844 }, { "epoch": 0.18757625050833673, "grad_norm": 0.48729318380355835, "learning_rate": 3.751525010166735e-06, "loss": 0.4665, "step": 1845 }, { "epoch": 0.18767791785278568, "grad_norm": 0.41905471682548523, "learning_rate": 3.753558357055714e-06, "loss": 0.4326, "step": 1846 }, { "epoch": 0.18777958519723464, "grad_norm": 0.4280823767185211, "learning_rate": 3.755591703944693e-06, "loss": 0.4403, "step": 1847 }, { "epoch": 0.1878812525416836, "grad_norm": 0.4261646568775177, "learning_rate": 3.7576250508336725e-06, "loss": 0.4248, "step": 1848 }, { "epoch": 0.18798291988613258, "grad_norm": 0.4399169683456421, "learning_rate": 3.759658397722652e-06, "loss": 0.4484, "step": 1849 }, { "epoch": 0.18808458723058152, "grad_norm": 0.4960821270942688, "learning_rate": 3.7616917446116307e-06, "loss": 0.417, "step": 1850 }, { "epoch": 0.1881862545750305, "grad_norm": 0.4488385021686554, "learning_rate": 3.76372509150061e-06, "loss": 0.4329, "step": 1851 }, { "epoch": 0.18828792191947946, "grad_norm": 0.4210408926010132, "learning_rate": 3.7657584383895896e-06, "loss": 0.436, "step": 1852 }, { "epoch": 0.18838958926392843, "grad_norm": 0.4541274905204773, "learning_rate": 3.767791785278569e-06, "loss": 0.4179, "step": 1853 }, { "epoch": 0.1884912566083774, "grad_norm": 0.4658469557762146, "learning_rate": 3.769825132167548e-06, "loss": 0.4113, "step": 1854 }, { "epoch": 0.18859292395282634, "grad_norm": 0.41293057799339294, "learning_rate": 3.7718584790565273e-06, "loss": 0.4439, "step": 1855 }, { "epoch": 0.1886945912972753, "grad_norm": 0.4308307468891144, "learning_rate": 3.7738918259455063e-06, "loss": 0.4558, "step": 1856 }, { "epoch": 0.18879625864172428, "grad_norm": 0.4697847366333008, "learning_rate": 3.775925172834486e-06, "loss": 0.4427, "step": 1857 }, { "epoch": 0.18889792598617325, "grad_norm": 0.4940520226955414, "learning_rate": 3.7779585197234653e-06, "loss": 0.4099, "step": 1858 }, { "epoch": 0.18899959333062222, "grad_norm": 0.4092544913291931, "learning_rate": 3.779991866612445e-06, "loss": 0.3988, "step": 1859 }, { "epoch": 0.18910126067507116, "grad_norm": 0.4605688452720642, "learning_rate": 3.7820252135014234e-06, "loss": 0.4187, "step": 1860 }, { "epoch": 0.18920292801952013, "grad_norm": 0.43602848052978516, "learning_rate": 3.784058560390403e-06, "loss": 0.4318, "step": 1861 }, { "epoch": 0.1893045953639691, "grad_norm": 0.4481951594352722, "learning_rate": 3.7860919072793824e-06, "loss": 0.4371, "step": 1862 }, { "epoch": 0.18940626270841807, "grad_norm": 0.47287502884864807, "learning_rate": 3.7881252541683615e-06, "loss": 0.4635, "step": 1863 }, { "epoch": 0.189507930052867, "grad_norm": 0.4312603771686554, "learning_rate": 3.7901586010573405e-06, "loss": 0.4459, "step": 1864 }, { "epoch": 0.18960959739731598, "grad_norm": 0.4474823474884033, "learning_rate": 3.79219194794632e-06, "loss": 0.4478, "step": 1865 }, { "epoch": 0.18971126474176495, "grad_norm": 0.48075637221336365, "learning_rate": 3.794225294835299e-06, "loss": 0.4286, "step": 1866 }, { "epoch": 0.18981293208621391, "grad_norm": 0.4411603808403015, "learning_rate": 3.7962586417242786e-06, "loss": 0.4632, "step": 1867 }, { "epoch": 0.18991459943066288, "grad_norm": 0.4830527901649475, "learning_rate": 3.798291988613258e-06, "loss": 0.4474, "step": 1868 }, { "epoch": 0.19001626677511183, "grad_norm": 0.493110328912735, "learning_rate": 3.8003253355022367e-06, "loss": 0.433, "step": 1869 }, { "epoch": 0.1901179341195608, "grad_norm": 0.486791729927063, "learning_rate": 3.802358682391216e-06, "loss": 0.4838, "step": 1870 }, { "epoch": 0.19021960146400976, "grad_norm": 0.4170401990413666, "learning_rate": 3.8043920292801957e-06, "loss": 0.4561, "step": 1871 }, { "epoch": 0.19032126880845873, "grad_norm": 0.4577926993370056, "learning_rate": 3.8064253761691747e-06, "loss": 0.432, "step": 1872 }, { "epoch": 0.19042293615290767, "grad_norm": 0.440291166305542, "learning_rate": 3.808458723058154e-06, "loss": 0.4308, "step": 1873 }, { "epoch": 0.19052460349735664, "grad_norm": 0.514254629611969, "learning_rate": 3.8104920699471333e-06, "loss": 0.4423, "step": 1874 }, { "epoch": 0.1906262708418056, "grad_norm": 0.47901007533073425, "learning_rate": 3.8125254168361124e-06, "loss": 0.4173, "step": 1875 }, { "epoch": 0.19072793818625458, "grad_norm": 0.49288344383239746, "learning_rate": 3.814558763725092e-06, "loss": 0.4544, "step": 1876 }, { "epoch": 0.19082960553070355, "grad_norm": 0.46086329221725464, "learning_rate": 3.816592110614071e-06, "loss": 0.3807, "step": 1877 }, { "epoch": 0.1909312728751525, "grad_norm": 0.45837071537971497, "learning_rate": 3.81862545750305e-06, "loss": 0.4199, "step": 1878 }, { "epoch": 0.19103294021960146, "grad_norm": 0.4864150583744049, "learning_rate": 3.8206588043920295e-06, "loss": 0.4748, "step": 1879 }, { "epoch": 0.19113460756405043, "grad_norm": 0.5234465003013611, "learning_rate": 3.822692151281009e-06, "loss": 0.4368, "step": 1880 }, { "epoch": 0.1912362749084994, "grad_norm": 0.4360193610191345, "learning_rate": 3.8247254981699884e-06, "loss": 0.4369, "step": 1881 }, { "epoch": 0.19133794225294834, "grad_norm": 0.44815683364868164, "learning_rate": 3.826758845058967e-06, "loss": 0.4526, "step": 1882 }, { "epoch": 0.1914396095973973, "grad_norm": 0.4445130527019501, "learning_rate": 3.8287921919479466e-06, "loss": 0.4413, "step": 1883 }, { "epoch": 0.19154127694184628, "grad_norm": 0.41122904419898987, "learning_rate": 3.830825538836926e-06, "loss": 0.4003, "step": 1884 }, { "epoch": 0.19164294428629525, "grad_norm": 0.44357505440711975, "learning_rate": 3.8328588857259056e-06, "loss": 0.427, "step": 1885 }, { "epoch": 0.19174461163074422, "grad_norm": 0.4961344599723816, "learning_rate": 3.834892232614885e-06, "loss": 0.4568, "step": 1886 }, { "epoch": 0.19184627897519316, "grad_norm": 0.4078041613101959, "learning_rate": 3.836925579503864e-06, "loss": 0.4086, "step": 1887 }, { "epoch": 0.19194794631964213, "grad_norm": 0.4420379102230072, "learning_rate": 3.838958926392843e-06, "loss": 0.4604, "step": 1888 }, { "epoch": 0.1920496136640911, "grad_norm": 0.46656903624534607, "learning_rate": 3.840992273281822e-06, "loss": 0.437, "step": 1889 }, { "epoch": 0.19215128100854006, "grad_norm": 0.43748608231544495, "learning_rate": 3.843025620170801e-06, "loss": 0.4385, "step": 1890 }, { "epoch": 0.19225294835298903, "grad_norm": 0.4073517322540283, "learning_rate": 3.845058967059781e-06, "loss": 0.4406, "step": 1891 }, { "epoch": 0.19235461569743798, "grad_norm": 0.4742704927921295, "learning_rate": 3.8470923139487594e-06, "loss": 0.416, "step": 1892 }, { "epoch": 0.19245628304188694, "grad_norm": 0.4559768736362457, "learning_rate": 3.849125660837739e-06, "loss": 0.4876, "step": 1893 }, { "epoch": 0.1925579503863359, "grad_norm": 0.4324682056903839, "learning_rate": 3.851159007726718e-06, "loss": 0.4314, "step": 1894 }, { "epoch": 0.19265961773078488, "grad_norm": 0.43639931082725525, "learning_rate": 3.853192354615698e-06, "loss": 0.4641, "step": 1895 }, { "epoch": 0.19276128507523382, "grad_norm": 0.42597532272338867, "learning_rate": 3.8552257015046765e-06, "loss": 0.4507, "step": 1896 }, { "epoch": 0.1928629524196828, "grad_norm": 0.4100615978240967, "learning_rate": 3.857259048393656e-06, "loss": 0.4435, "step": 1897 }, { "epoch": 0.19296461976413176, "grad_norm": 0.4362509548664093, "learning_rate": 3.8592923952826355e-06, "loss": 0.4425, "step": 1898 }, { "epoch": 0.19306628710858073, "grad_norm": 0.47373801469802856, "learning_rate": 3.861325742171615e-06, "loss": 0.4333, "step": 1899 }, { "epoch": 0.1931679544530297, "grad_norm": 0.43473020195961, "learning_rate": 3.8633590890605945e-06, "loss": 0.4373, "step": 1900 }, { "epoch": 0.19326962179747864, "grad_norm": 0.3910529315471649, "learning_rate": 3.865392435949573e-06, "loss": 0.4145, "step": 1901 }, { "epoch": 0.1933712891419276, "grad_norm": 0.4183042049407959, "learning_rate": 3.867425782838553e-06, "loss": 0.4209, "step": 1902 }, { "epoch": 0.19347295648637658, "grad_norm": 0.4526323676109314, "learning_rate": 3.869459129727532e-06, "loss": 0.4591, "step": 1903 }, { "epoch": 0.19357462383082555, "grad_norm": 0.4081551134586334, "learning_rate": 3.871492476616512e-06, "loss": 0.4404, "step": 1904 }, { "epoch": 0.1936762911752745, "grad_norm": 0.43508824706077576, "learning_rate": 3.87352582350549e-06, "loss": 0.4531, "step": 1905 }, { "epoch": 0.19377795851972346, "grad_norm": 0.4671156406402588, "learning_rate": 3.87555917039447e-06, "loss": 0.4567, "step": 1906 }, { "epoch": 0.19387962586417243, "grad_norm": 0.4275037944316864, "learning_rate": 3.877592517283449e-06, "loss": 0.4057, "step": 1907 }, { "epoch": 0.1939812932086214, "grad_norm": 0.4084031283855438, "learning_rate": 3.879625864172428e-06, "loss": 0.4226, "step": 1908 }, { "epoch": 0.19408296055307037, "grad_norm": 0.4027625620365143, "learning_rate": 3.881659211061407e-06, "loss": 0.4496, "step": 1909 }, { "epoch": 0.1941846278975193, "grad_norm": 0.47286808490753174, "learning_rate": 3.883692557950387e-06, "loss": 0.4551, "step": 1910 }, { "epoch": 0.19428629524196828, "grad_norm": 0.4560431241989136, "learning_rate": 3.8857259048393655e-06, "loss": 0.4209, "step": 1911 }, { "epoch": 0.19438796258641725, "grad_norm": 0.4729948341846466, "learning_rate": 3.887759251728345e-06, "loss": 0.4645, "step": 1912 }, { "epoch": 0.19448962993086621, "grad_norm": 0.4109589755535126, "learning_rate": 3.8897925986173244e-06, "loss": 0.4073, "step": 1913 }, { "epoch": 0.19459129727531516, "grad_norm": 0.4092501103878021, "learning_rate": 3.891825945506303e-06, "loss": 0.4236, "step": 1914 }, { "epoch": 0.19469296461976413, "grad_norm": 0.4969636797904968, "learning_rate": 3.8938592923952826e-06, "loss": 0.4228, "step": 1915 }, { "epoch": 0.1947946319642131, "grad_norm": 0.4039774239063263, "learning_rate": 3.895892639284262e-06, "loss": 0.4436, "step": 1916 }, { "epoch": 0.19489629930866206, "grad_norm": 0.45988497138023376, "learning_rate": 3.8979259861732415e-06, "loss": 0.4591, "step": 1917 }, { "epoch": 0.19499796665311103, "grad_norm": 0.48370522260665894, "learning_rate": 3.899959333062221e-06, "loss": 0.4579, "step": 1918 }, { "epoch": 0.19509963399755997, "grad_norm": 0.4854569137096405, "learning_rate": 3.9019926799512e-06, "loss": 0.4355, "step": 1919 }, { "epoch": 0.19520130134200894, "grad_norm": 0.4075334668159485, "learning_rate": 3.904026026840179e-06, "loss": 0.431, "step": 1920 }, { "epoch": 0.1953029686864579, "grad_norm": 0.4847993850708008, "learning_rate": 3.906059373729159e-06, "loss": 0.4497, "step": 1921 }, { "epoch": 0.19540463603090688, "grad_norm": 0.4827776849269867, "learning_rate": 3.908092720618138e-06, "loss": 0.4324, "step": 1922 }, { "epoch": 0.19550630337535582, "grad_norm": 0.45423269271850586, "learning_rate": 3.910126067507117e-06, "loss": 0.4635, "step": 1923 }, { "epoch": 0.1956079707198048, "grad_norm": 0.3975495994091034, "learning_rate": 3.912159414396096e-06, "loss": 0.3958, "step": 1924 }, { "epoch": 0.19570963806425376, "grad_norm": 0.4928736686706543, "learning_rate": 3.914192761285076e-06, "loss": 0.4306, "step": 1925 }, { "epoch": 0.19581130540870273, "grad_norm": 0.541772186756134, "learning_rate": 3.916226108174055e-06, "loss": 0.4405, "step": 1926 }, { "epoch": 0.1959129727531517, "grad_norm": 0.4454517066478729, "learning_rate": 3.918259455063034e-06, "loss": 0.4355, "step": 1927 }, { "epoch": 0.19601464009760064, "grad_norm": 0.4793093204498291, "learning_rate": 3.920292801952013e-06, "loss": 0.4157, "step": 1928 }, { "epoch": 0.1961163074420496, "grad_norm": 0.4066844880580902, "learning_rate": 3.922326148840993e-06, "loss": 0.4443, "step": 1929 }, { "epoch": 0.19621797478649858, "grad_norm": 0.4307781457901001, "learning_rate": 3.9243594957299715e-06, "loss": 0.423, "step": 1930 }, { "epoch": 0.19631964213094755, "grad_norm": 0.4776417016983032, "learning_rate": 3.926392842618951e-06, "loss": 0.4511, "step": 1931 }, { "epoch": 0.19642130947539652, "grad_norm": 0.5173813104629517, "learning_rate": 3.9284261895079305e-06, "loss": 0.4546, "step": 1932 }, { "epoch": 0.19652297681984546, "grad_norm": 0.45621129870414734, "learning_rate": 3.930459536396909e-06, "loss": 0.4237, "step": 1933 }, { "epoch": 0.19662464416429443, "grad_norm": 0.516631007194519, "learning_rate": 3.932492883285889e-06, "loss": 0.4383, "step": 1934 }, { "epoch": 0.1967263115087434, "grad_norm": 0.490856409072876, "learning_rate": 3.934526230174868e-06, "loss": 0.4376, "step": 1935 }, { "epoch": 0.19682797885319236, "grad_norm": 0.5347199440002441, "learning_rate": 3.936559577063848e-06, "loss": 0.4265, "step": 1936 }, { "epoch": 0.1969296461976413, "grad_norm": 0.4621342122554779, "learning_rate": 3.938592923952826e-06, "loss": 0.4266, "step": 1937 }, { "epoch": 0.19703131354209028, "grad_norm": 0.4745312035083771, "learning_rate": 3.940626270841806e-06, "loss": 0.4501, "step": 1938 }, { "epoch": 0.19713298088653924, "grad_norm": 0.5146150588989258, "learning_rate": 3.942659617730785e-06, "loss": 0.4401, "step": 1939 }, { "epoch": 0.1972346482309882, "grad_norm": 0.4705788493156433, "learning_rate": 3.944692964619765e-06, "loss": 0.4621, "step": 1940 }, { "epoch": 0.19733631557543718, "grad_norm": 0.4671039581298828, "learning_rate": 3.946726311508744e-06, "loss": 0.4316, "step": 1941 }, { "epoch": 0.19743798291988612, "grad_norm": 0.4581219255924225, "learning_rate": 3.948759658397723e-06, "loss": 0.4556, "step": 1942 }, { "epoch": 0.1975396502643351, "grad_norm": 0.5256187915802002, "learning_rate": 3.950793005286702e-06, "loss": 0.418, "step": 1943 }, { "epoch": 0.19764131760878406, "grad_norm": 0.4686668813228607, "learning_rate": 3.952826352175682e-06, "loss": 0.4641, "step": 1944 }, { "epoch": 0.19774298495323303, "grad_norm": 0.4200027585029602, "learning_rate": 3.954859699064661e-06, "loss": 0.4181, "step": 1945 }, { "epoch": 0.19784465229768197, "grad_norm": 0.48579153418540955, "learning_rate": 3.95689304595364e-06, "loss": 0.4425, "step": 1946 }, { "epoch": 0.19794631964213094, "grad_norm": 0.46326446533203125, "learning_rate": 3.958926392842619e-06, "loss": 0.431, "step": 1947 }, { "epoch": 0.1980479869865799, "grad_norm": 0.44562870264053345, "learning_rate": 3.960959739731599e-06, "loss": 0.4589, "step": 1948 }, { "epoch": 0.19814965433102888, "grad_norm": 0.420807421207428, "learning_rate": 3.9629930866205775e-06, "loss": 0.4125, "step": 1949 }, { "epoch": 0.19825132167547785, "grad_norm": 0.4715389609336853, "learning_rate": 3.965026433509557e-06, "loss": 0.4447, "step": 1950 }, { "epoch": 0.1983529890199268, "grad_norm": 0.4655235707759857, "learning_rate": 3.9670597803985365e-06, "loss": 0.4334, "step": 1951 }, { "epoch": 0.19845465636437576, "grad_norm": 0.42867982387542725, "learning_rate": 3.969093127287515e-06, "loss": 0.4316, "step": 1952 }, { "epoch": 0.19855632370882473, "grad_norm": 0.4755600392818451, "learning_rate": 3.971126474176495e-06, "loss": 0.4489, "step": 1953 }, { "epoch": 0.1986579910532737, "grad_norm": 0.46874797344207764, "learning_rate": 3.973159821065474e-06, "loss": 0.4285, "step": 1954 }, { "epoch": 0.19875965839772264, "grad_norm": 0.4580056667327881, "learning_rate": 3.975193167954453e-06, "loss": 0.4285, "step": 1955 }, { "epoch": 0.1988613257421716, "grad_norm": 0.5039417743682861, "learning_rate": 3.977226514843432e-06, "loss": 0.469, "step": 1956 }, { "epoch": 0.19896299308662058, "grad_norm": 0.42110711336135864, "learning_rate": 3.979259861732412e-06, "loss": 0.4338, "step": 1957 }, { "epoch": 0.19906466043106955, "grad_norm": 0.4680413603782654, "learning_rate": 3.981293208621391e-06, "loss": 0.4304, "step": 1958 }, { "epoch": 0.19916632777551851, "grad_norm": 0.44419553875923157, "learning_rate": 3.983326555510371e-06, "loss": 0.4855, "step": 1959 }, { "epoch": 0.19926799511996746, "grad_norm": 0.40808942914009094, "learning_rate": 3.985359902399349e-06, "loss": 0.422, "step": 1960 }, { "epoch": 0.19936966246441643, "grad_norm": 0.4880446493625641, "learning_rate": 3.987393249288329e-06, "loss": 0.4485, "step": 1961 }, { "epoch": 0.1994713298088654, "grad_norm": 0.4668942987918854, "learning_rate": 3.989426596177308e-06, "loss": 0.4192, "step": 1962 }, { "epoch": 0.19957299715331436, "grad_norm": 0.45569688081741333, "learning_rate": 3.991459943066288e-06, "loss": 0.43, "step": 1963 }, { "epoch": 0.19967466449776333, "grad_norm": 0.4640485644340515, "learning_rate": 3.993493289955267e-06, "loss": 0.4267, "step": 1964 }, { "epoch": 0.19977633184221227, "grad_norm": 0.4419100880622864, "learning_rate": 3.995526636844246e-06, "loss": 0.442, "step": 1965 }, { "epoch": 0.19987799918666124, "grad_norm": 0.4322074353694916, "learning_rate": 3.9975599837332254e-06, "loss": 0.4814, "step": 1966 }, { "epoch": 0.1999796665311102, "grad_norm": 0.4462650418281555, "learning_rate": 3.999593330622205e-06, "loss": 0.4168, "step": 1967 }, { "epoch": 0.20008133387555918, "grad_norm": 0.4442670941352844, "learning_rate": 4.001626677511184e-06, "loss": 0.4757, "step": 1968 }, { "epoch": 0.20018300122000812, "grad_norm": 0.38540786504745483, "learning_rate": 4.003660024400163e-06, "loss": 0.4414, "step": 1969 }, { "epoch": 0.2002846685644571, "grad_norm": 0.43795663118362427, "learning_rate": 4.0056933712891426e-06, "loss": 0.4679, "step": 1970 }, { "epoch": 0.20038633590890606, "grad_norm": 0.45848947763442993, "learning_rate": 4.007726718178121e-06, "loss": 0.3733, "step": 1971 }, { "epoch": 0.20048800325335503, "grad_norm": 0.41229248046875, "learning_rate": 4.009760065067101e-06, "loss": 0.3897, "step": 1972 }, { "epoch": 0.200589670597804, "grad_norm": 0.41980552673339844, "learning_rate": 4.01179341195608e-06, "loss": 0.4269, "step": 1973 }, { "epoch": 0.20069133794225294, "grad_norm": 0.45909571647644043, "learning_rate": 4.013826758845059e-06, "loss": 0.4269, "step": 1974 }, { "epoch": 0.2007930052867019, "grad_norm": 0.4720286428928375, "learning_rate": 4.015860105734038e-06, "loss": 0.4286, "step": 1975 }, { "epoch": 0.20089467263115088, "grad_norm": 0.4644245505332947, "learning_rate": 4.017893452623018e-06, "loss": 0.4336, "step": 1976 }, { "epoch": 0.20099633997559985, "grad_norm": 0.449317991733551, "learning_rate": 4.019926799511997e-06, "loss": 0.4151, "step": 1977 }, { "epoch": 0.2010980073200488, "grad_norm": 0.45845046639442444, "learning_rate": 4.021960146400976e-06, "loss": 0.4406, "step": 1978 }, { "epoch": 0.20119967466449776, "grad_norm": 0.4992486238479614, "learning_rate": 4.023993493289955e-06, "loss": 0.4526, "step": 1979 }, { "epoch": 0.20130134200894673, "grad_norm": 0.4190391004085541, "learning_rate": 4.026026840178935e-06, "loss": 0.4353, "step": 1980 }, { "epoch": 0.2014030093533957, "grad_norm": 0.4291895627975464, "learning_rate": 4.028060187067914e-06, "loss": 0.4151, "step": 1981 }, { "epoch": 0.20150467669784466, "grad_norm": 0.4589078426361084, "learning_rate": 4.030093533956894e-06, "loss": 0.4073, "step": 1982 }, { "epoch": 0.2016063440422936, "grad_norm": 0.45677560567855835, "learning_rate": 4.0321268808458725e-06, "loss": 0.4249, "step": 1983 }, { "epoch": 0.20170801138674258, "grad_norm": 0.45639440417289734, "learning_rate": 4.034160227734852e-06, "loss": 0.4429, "step": 1984 }, { "epoch": 0.20180967873119154, "grad_norm": 0.43564677238464355, "learning_rate": 4.0361935746238315e-06, "loss": 0.4697, "step": 1985 }, { "epoch": 0.2019113460756405, "grad_norm": 0.440963476896286, "learning_rate": 4.038226921512811e-06, "loss": 0.4105, "step": 1986 }, { "epoch": 0.20201301342008945, "grad_norm": 0.47351229190826416, "learning_rate": 4.04026026840179e-06, "loss": 0.4432, "step": 1987 }, { "epoch": 0.20211468076453842, "grad_norm": 0.4593416750431061, "learning_rate": 4.042293615290769e-06, "loss": 0.4527, "step": 1988 }, { "epoch": 0.2022163481089874, "grad_norm": 0.45499202609062195, "learning_rate": 4.044326962179749e-06, "loss": 0.407, "step": 1989 }, { "epoch": 0.20231801545343636, "grad_norm": 0.4391322135925293, "learning_rate": 4.046360309068727e-06, "loss": 0.4088, "step": 1990 }, { "epoch": 0.20241968279788533, "grad_norm": 0.4750841557979584, "learning_rate": 4.048393655957707e-06, "loss": 0.4718, "step": 1991 }, { "epoch": 0.20252135014233427, "grad_norm": 0.4327443540096283, "learning_rate": 4.050427002846685e-06, "loss": 0.3776, "step": 1992 }, { "epoch": 0.20262301748678324, "grad_norm": 0.48168373107910156, "learning_rate": 4.052460349735665e-06, "loss": 0.4574, "step": 1993 }, { "epoch": 0.2027246848312322, "grad_norm": 0.440294474363327, "learning_rate": 4.054493696624644e-06, "loss": 0.4564, "step": 1994 }, { "epoch": 0.20282635217568118, "grad_norm": 0.43581509590148926, "learning_rate": 4.056527043513624e-06, "loss": 0.4783, "step": 1995 }, { "epoch": 0.20292801952013012, "grad_norm": 0.4564903974533081, "learning_rate": 4.0585603904026025e-06, "loss": 0.3974, "step": 1996 }, { "epoch": 0.2030296868645791, "grad_norm": 0.46228253841400146, "learning_rate": 4.060593737291582e-06, "loss": 0.4614, "step": 1997 }, { "epoch": 0.20313135420902806, "grad_norm": 0.43851789832115173, "learning_rate": 4.0626270841805614e-06, "loss": 0.4271, "step": 1998 }, { "epoch": 0.20323302155347703, "grad_norm": 0.47384920716285706, "learning_rate": 4.064660431069541e-06, "loss": 0.4397, "step": 1999 }, { "epoch": 0.203334688897926, "grad_norm": 0.49829596281051636, "learning_rate": 4.06669377795852e-06, "loss": 0.4507, "step": 2000 }, { "epoch": 0.20343635624237494, "grad_norm": 0.4719436764717102, "learning_rate": 4.068727124847499e-06, "loss": 0.4415, "step": 2001 }, { "epoch": 0.2035380235868239, "grad_norm": 0.5814014077186584, "learning_rate": 4.0707604717364785e-06, "loss": 0.4135, "step": 2002 }, { "epoch": 0.20363969093127288, "grad_norm": 0.4968004822731018, "learning_rate": 4.072793818625458e-06, "loss": 0.427, "step": 2003 }, { "epoch": 0.20374135827572185, "grad_norm": 0.39943069219589233, "learning_rate": 4.0748271655144375e-06, "loss": 0.4267, "step": 2004 }, { "epoch": 0.20384302562017081, "grad_norm": 0.5125576257705688, "learning_rate": 4.076860512403417e-06, "loss": 0.4349, "step": 2005 }, { "epoch": 0.20394469296461976, "grad_norm": 0.5312303900718689, "learning_rate": 4.078893859292396e-06, "loss": 0.43, "step": 2006 }, { "epoch": 0.20404636030906873, "grad_norm": 0.41535520553588867, "learning_rate": 4.080927206181375e-06, "loss": 0.4555, "step": 2007 }, { "epoch": 0.2041480276535177, "grad_norm": 0.47175726294517517, "learning_rate": 4.082960553070355e-06, "loss": 0.4489, "step": 2008 }, { "epoch": 0.20424969499796666, "grad_norm": 0.46529507637023926, "learning_rate": 4.084993899959333e-06, "loss": 0.4733, "step": 2009 }, { "epoch": 0.2043513623424156, "grad_norm": 0.43832600116729736, "learning_rate": 4.087027246848313e-06, "loss": 0.4553, "step": 2010 }, { "epoch": 0.20445302968686457, "grad_norm": 0.4345901906490326, "learning_rate": 4.089060593737291e-06, "loss": 0.417, "step": 2011 }, { "epoch": 0.20455469703131354, "grad_norm": 0.42813974618911743, "learning_rate": 4.091093940626271e-06, "loss": 0.4156, "step": 2012 }, { "epoch": 0.2046563643757625, "grad_norm": 0.4076453149318695, "learning_rate": 4.09312728751525e-06, "loss": 0.4265, "step": 2013 }, { "epoch": 0.20475803172021148, "grad_norm": 0.4532502293586731, "learning_rate": 4.09516063440423e-06, "loss": 0.4428, "step": 2014 }, { "epoch": 0.20485969906466042, "grad_norm": 0.4853637218475342, "learning_rate": 4.0971939812932085e-06, "loss": 0.4335, "step": 2015 }, { "epoch": 0.2049613664091094, "grad_norm": 0.46878254413604736, "learning_rate": 4.099227328182188e-06, "loss": 0.442, "step": 2016 }, { "epoch": 0.20506303375355836, "grad_norm": 0.4454513490200043, "learning_rate": 4.1012606750711675e-06, "loss": 0.427, "step": 2017 }, { "epoch": 0.20516470109800733, "grad_norm": 0.4629579782485962, "learning_rate": 4.103294021960147e-06, "loss": 0.4411, "step": 2018 }, { "epoch": 0.20526636844245627, "grad_norm": 0.48591357469558716, "learning_rate": 4.105327368849126e-06, "loss": 0.4327, "step": 2019 }, { "epoch": 0.20536803578690524, "grad_norm": 0.4438474178314209, "learning_rate": 4.107360715738105e-06, "loss": 0.4515, "step": 2020 }, { "epoch": 0.2054697031313542, "grad_norm": 0.4684240221977234, "learning_rate": 4.109394062627085e-06, "loss": 0.4465, "step": 2021 }, { "epoch": 0.20557137047580318, "grad_norm": 0.45120203495025635, "learning_rate": 4.111427409516064e-06, "loss": 0.4642, "step": 2022 }, { "epoch": 0.20567303782025215, "grad_norm": 0.38553398847579956, "learning_rate": 4.1134607564050436e-06, "loss": 0.427, "step": 2023 }, { "epoch": 0.2057747051647011, "grad_norm": 0.4542439579963684, "learning_rate": 4.115494103294022e-06, "loss": 0.4206, "step": 2024 }, { "epoch": 0.20587637250915006, "grad_norm": 0.45410314202308655, "learning_rate": 4.117527450183002e-06, "loss": 0.4341, "step": 2025 }, { "epoch": 0.20597803985359903, "grad_norm": 0.3935036361217499, "learning_rate": 4.119560797071981e-06, "loss": 0.4378, "step": 2026 }, { "epoch": 0.206079707198048, "grad_norm": 0.4201730191707611, "learning_rate": 4.121594143960961e-06, "loss": 0.4366, "step": 2027 }, { "epoch": 0.20618137454249694, "grad_norm": 0.4465537965297699, "learning_rate": 4.123627490849939e-06, "loss": 0.436, "step": 2028 }, { "epoch": 0.2062830418869459, "grad_norm": 0.4682713449001312, "learning_rate": 4.125660837738919e-06, "loss": 0.4247, "step": 2029 }, { "epoch": 0.20638470923139488, "grad_norm": 0.41133245825767517, "learning_rate": 4.1276941846278974e-06, "loss": 0.4449, "step": 2030 }, { "epoch": 0.20648637657584384, "grad_norm": 0.4708579182624817, "learning_rate": 4.129727531516877e-06, "loss": 0.4495, "step": 2031 }, { "epoch": 0.2065880439202928, "grad_norm": 0.43059438467025757, "learning_rate": 4.131760878405856e-06, "loss": 0.4635, "step": 2032 }, { "epoch": 0.20668971126474175, "grad_norm": 0.4701962172985077, "learning_rate": 4.133794225294835e-06, "loss": 0.4439, "step": 2033 }, { "epoch": 0.20679137860919072, "grad_norm": 0.46612823009490967, "learning_rate": 4.1358275721838145e-06, "loss": 0.4558, "step": 2034 }, { "epoch": 0.2068930459536397, "grad_norm": 0.4551079273223877, "learning_rate": 4.137860919072794e-06, "loss": 0.389, "step": 2035 }, { "epoch": 0.20699471329808866, "grad_norm": 0.5016604661941528, "learning_rate": 4.1398942659617735e-06, "loss": 0.4489, "step": 2036 }, { "epoch": 0.20709638064253763, "grad_norm": 0.4707598090171814, "learning_rate": 4.141927612850753e-06, "loss": 0.4135, "step": 2037 }, { "epoch": 0.20719804798698657, "grad_norm": 0.4289417564868927, "learning_rate": 4.143960959739732e-06, "loss": 0.4403, "step": 2038 }, { "epoch": 0.20729971533143554, "grad_norm": 0.40878909826278687, "learning_rate": 4.145994306628711e-06, "loss": 0.4382, "step": 2039 }, { "epoch": 0.2074013826758845, "grad_norm": 0.4317556619644165, "learning_rate": 4.148027653517691e-06, "loss": 0.424, "step": 2040 }, { "epoch": 0.20750305002033348, "grad_norm": 0.5020987391471863, "learning_rate": 4.15006100040667e-06, "loss": 0.4617, "step": 2041 }, { "epoch": 0.20760471736478242, "grad_norm": 0.42539048194885254, "learning_rate": 4.152094347295649e-06, "loss": 0.4387, "step": 2042 }, { "epoch": 0.2077063847092314, "grad_norm": 0.4237653315067291, "learning_rate": 4.154127694184628e-06, "loss": 0.4566, "step": 2043 }, { "epoch": 0.20780805205368036, "grad_norm": 0.46345800161361694, "learning_rate": 4.156161041073608e-06, "loss": 0.4326, "step": 2044 }, { "epoch": 0.20790971939812933, "grad_norm": 0.45122355222702026, "learning_rate": 4.158194387962587e-06, "loss": 0.4227, "step": 2045 }, { "epoch": 0.2080113867425783, "grad_norm": 0.4748322367668152, "learning_rate": 4.160227734851566e-06, "loss": 0.4126, "step": 2046 }, { "epoch": 0.20811305408702724, "grad_norm": 0.45973196625709534, "learning_rate": 4.162261081740545e-06, "loss": 0.437, "step": 2047 }, { "epoch": 0.2082147214314762, "grad_norm": 0.49747270345687866, "learning_rate": 4.164294428629525e-06, "loss": 0.4257, "step": 2048 }, { "epoch": 0.20831638877592518, "grad_norm": 0.4935675263404846, "learning_rate": 4.1663277755185035e-06, "loss": 0.4407, "step": 2049 }, { "epoch": 0.20841805612037415, "grad_norm": 0.4291512072086334, "learning_rate": 4.168361122407483e-06, "loss": 0.4454, "step": 2050 }, { "epoch": 0.2085197234648231, "grad_norm": 0.5203393697738647, "learning_rate": 4.1703944692964625e-06, "loss": 0.4333, "step": 2051 }, { "epoch": 0.20862139080927206, "grad_norm": 0.4578210711479187, "learning_rate": 4.172427816185441e-06, "loss": 0.425, "step": 2052 }, { "epoch": 0.20872305815372102, "grad_norm": 0.44938206672668457, "learning_rate": 4.174461163074421e-06, "loss": 0.4203, "step": 2053 }, { "epoch": 0.20882472549817, "grad_norm": 0.4617995619773865, "learning_rate": 4.1764945099634e-06, "loss": 0.4255, "step": 2054 }, { "epoch": 0.20892639284261896, "grad_norm": 0.4698721468448639, "learning_rate": 4.1785278568523796e-06, "loss": 0.441, "step": 2055 }, { "epoch": 0.2090280601870679, "grad_norm": 0.46313801407814026, "learning_rate": 4.180561203741358e-06, "loss": 0.4728, "step": 2056 }, { "epoch": 0.20912972753151687, "grad_norm": 0.5335911512374878, "learning_rate": 4.182594550630338e-06, "loss": 0.4596, "step": 2057 }, { "epoch": 0.20923139487596584, "grad_norm": 0.3911966383457184, "learning_rate": 4.184627897519317e-06, "loss": 0.4648, "step": 2058 }, { "epoch": 0.2093330622204148, "grad_norm": 0.4066739082336426, "learning_rate": 4.186661244408297e-06, "loss": 0.4586, "step": 2059 }, { "epoch": 0.20943472956486375, "grad_norm": 0.45700886845588684, "learning_rate": 4.188694591297275e-06, "loss": 0.4355, "step": 2060 }, { "epoch": 0.20953639690931272, "grad_norm": 0.4217414855957031, "learning_rate": 4.190727938186255e-06, "loss": 0.4391, "step": 2061 }, { "epoch": 0.2096380642537617, "grad_norm": 0.41246190667152405, "learning_rate": 4.192761285075234e-06, "loss": 0.441, "step": 2062 }, { "epoch": 0.20973973159821066, "grad_norm": 0.46026837825775146, "learning_rate": 4.194794631964214e-06, "loss": 0.4583, "step": 2063 }, { "epoch": 0.20984139894265963, "grad_norm": 0.42241546511650085, "learning_rate": 4.196827978853193e-06, "loss": 0.4604, "step": 2064 }, { "epoch": 0.20994306628710857, "grad_norm": 0.4054868519306183, "learning_rate": 4.198861325742172e-06, "loss": 0.4505, "step": 2065 }, { "epoch": 0.21004473363155754, "grad_norm": 0.42431506514549255, "learning_rate": 4.200894672631151e-06, "loss": 0.4054, "step": 2066 }, { "epoch": 0.2101464009760065, "grad_norm": 0.4447379410266876, "learning_rate": 4.202928019520131e-06, "loss": 0.406, "step": 2067 }, { "epoch": 0.21024806832045548, "grad_norm": 0.41345393657684326, "learning_rate": 4.2049613664091095e-06, "loss": 0.4223, "step": 2068 }, { "epoch": 0.21034973566490442, "grad_norm": 0.4326888918876648, "learning_rate": 4.206994713298089e-06, "loss": 0.4283, "step": 2069 }, { "epoch": 0.2104514030093534, "grad_norm": 0.4370231628417969, "learning_rate": 4.2090280601870685e-06, "loss": 0.4174, "step": 2070 }, { "epoch": 0.21055307035380236, "grad_norm": 0.4745163321495056, "learning_rate": 4.211061407076047e-06, "loss": 0.4316, "step": 2071 }, { "epoch": 0.21065473769825133, "grad_norm": 0.43024152517318726, "learning_rate": 4.213094753965027e-06, "loss": 0.4643, "step": 2072 }, { "epoch": 0.2107564050427003, "grad_norm": 0.4102058410644531, "learning_rate": 4.215128100854006e-06, "loss": 0.442, "step": 2073 }, { "epoch": 0.21085807238714924, "grad_norm": 0.5015562772750854, "learning_rate": 4.217161447742985e-06, "loss": 0.4381, "step": 2074 }, { "epoch": 0.2109597397315982, "grad_norm": 0.42752936482429504, "learning_rate": 4.219194794631964e-06, "loss": 0.4254, "step": 2075 }, { "epoch": 0.21106140707604717, "grad_norm": 0.3962649405002594, "learning_rate": 4.221228141520944e-06, "loss": 0.4301, "step": 2076 }, { "epoch": 0.21116307442049614, "grad_norm": 0.4870575964450836, "learning_rate": 4.223261488409923e-06, "loss": 0.4356, "step": 2077 }, { "epoch": 0.2112647417649451, "grad_norm": 0.4403369724750519, "learning_rate": 4.225294835298903e-06, "loss": 0.423, "step": 2078 }, { "epoch": 0.21136640910939405, "grad_norm": 0.4677857458591461, "learning_rate": 4.227328182187881e-06, "loss": 0.4316, "step": 2079 }, { "epoch": 0.21146807645384302, "grad_norm": 0.41371262073516846, "learning_rate": 4.229361529076861e-06, "loss": 0.4205, "step": 2080 }, { "epoch": 0.211569743798292, "grad_norm": 0.41817930340766907, "learning_rate": 4.23139487596584e-06, "loss": 0.4553, "step": 2081 }, { "epoch": 0.21167141114274096, "grad_norm": 0.4694051742553711, "learning_rate": 4.23342822285482e-06, "loss": 0.4514, "step": 2082 }, { "epoch": 0.2117730784871899, "grad_norm": 0.4349919557571411, "learning_rate": 4.2354615697437984e-06, "loss": 0.4297, "step": 2083 }, { "epoch": 0.21187474583163887, "grad_norm": 0.4837413728237152, "learning_rate": 4.237494916632778e-06, "loss": 0.4571, "step": 2084 }, { "epoch": 0.21197641317608784, "grad_norm": 0.4495464861392975, "learning_rate": 4.2395282635217574e-06, "loss": 0.4231, "step": 2085 }, { "epoch": 0.2120780805205368, "grad_norm": 0.4820268154144287, "learning_rate": 4.241561610410737e-06, "loss": 0.4531, "step": 2086 }, { "epoch": 0.21217974786498578, "grad_norm": 0.4168175160884857, "learning_rate": 4.2435949572997156e-06, "loss": 0.4044, "step": 2087 }, { "epoch": 0.21228141520943472, "grad_norm": 0.4062737822532654, "learning_rate": 4.245628304188695e-06, "loss": 0.3937, "step": 2088 }, { "epoch": 0.2123830825538837, "grad_norm": 0.48339614272117615, "learning_rate": 4.2476616510776745e-06, "loss": 0.4105, "step": 2089 }, { "epoch": 0.21248474989833266, "grad_norm": 0.4385930597782135, "learning_rate": 4.249694997966653e-06, "loss": 0.4069, "step": 2090 }, { "epoch": 0.21258641724278163, "grad_norm": 0.47020870447158813, "learning_rate": 4.251728344855633e-06, "loss": 0.4458, "step": 2091 }, { "epoch": 0.21268808458723057, "grad_norm": 0.5328769683837891, "learning_rate": 4.253761691744612e-06, "loss": 0.4652, "step": 2092 }, { "epoch": 0.21278975193167954, "grad_norm": 0.46073731780052185, "learning_rate": 4.255795038633591e-06, "loss": 0.463, "step": 2093 }, { "epoch": 0.2128914192761285, "grad_norm": 0.4300006330013275, "learning_rate": 4.25782838552257e-06, "loss": 0.4415, "step": 2094 }, { "epoch": 0.21299308662057748, "grad_norm": 0.5039165616035461, "learning_rate": 4.25986173241155e-06, "loss": 0.4371, "step": 2095 }, { "epoch": 0.21309475396502645, "grad_norm": 0.5285782814025879, "learning_rate": 4.261895079300529e-06, "loss": 0.4473, "step": 2096 }, { "epoch": 0.2131964213094754, "grad_norm": 0.4531356990337372, "learning_rate": 4.263928426189508e-06, "loss": 0.4153, "step": 2097 }, { "epoch": 0.21329808865392436, "grad_norm": 0.4528908431529999, "learning_rate": 4.265961773078487e-06, "loss": 0.4726, "step": 2098 }, { "epoch": 0.21339975599837332, "grad_norm": 0.5337269902229309, "learning_rate": 4.267995119967467e-06, "loss": 0.4297, "step": 2099 }, { "epoch": 0.2135014233428223, "grad_norm": 0.5163633227348328, "learning_rate": 4.270028466856446e-06, "loss": 0.4698, "step": 2100 }, { "epoch": 0.21360309068727124, "grad_norm": 0.4547649323940277, "learning_rate": 4.272061813745425e-06, "loss": 0.4253, "step": 2101 }, { "epoch": 0.2137047580317202, "grad_norm": 0.6047237515449524, "learning_rate": 4.2740951606344045e-06, "loss": 0.4253, "step": 2102 }, { "epoch": 0.21380642537616917, "grad_norm": 0.5023913979530334, "learning_rate": 4.276128507523384e-06, "loss": 0.4265, "step": 2103 }, { "epoch": 0.21390809272061814, "grad_norm": 0.45940306782722473, "learning_rate": 4.2781618544123635e-06, "loss": 0.4367, "step": 2104 }, { "epoch": 0.2140097600650671, "grad_norm": 0.4814336895942688, "learning_rate": 4.280195201301343e-06, "loss": 0.4422, "step": 2105 }, { "epoch": 0.21411142740951605, "grad_norm": 0.5092993974685669, "learning_rate": 4.282228548190322e-06, "loss": 0.4381, "step": 2106 }, { "epoch": 0.21421309475396502, "grad_norm": 0.428974449634552, "learning_rate": 4.284261895079301e-06, "loss": 0.4364, "step": 2107 }, { "epoch": 0.214314762098414, "grad_norm": 0.4748293161392212, "learning_rate": 4.2862952419682806e-06, "loss": 0.4507, "step": 2108 }, { "epoch": 0.21441642944286296, "grad_norm": 0.4604235291481018, "learning_rate": 4.288328588857259e-06, "loss": 0.4378, "step": 2109 }, { "epoch": 0.2145180967873119, "grad_norm": 0.4252183437347412, "learning_rate": 4.290361935746239e-06, "loss": 0.4393, "step": 2110 }, { "epoch": 0.21461976413176087, "grad_norm": 0.41324499249458313, "learning_rate": 4.292395282635218e-06, "loss": 0.4469, "step": 2111 }, { "epoch": 0.21472143147620984, "grad_norm": 0.4776741564273834, "learning_rate": 4.294428629524197e-06, "loss": 0.4402, "step": 2112 }, { "epoch": 0.2148230988206588, "grad_norm": 0.413015753030777, "learning_rate": 4.296461976413176e-06, "loss": 0.4262, "step": 2113 }, { "epoch": 0.21492476616510778, "grad_norm": 0.4892931282520294, "learning_rate": 4.298495323302156e-06, "loss": 0.4172, "step": 2114 }, { "epoch": 0.21502643350955672, "grad_norm": 0.46044376492500305, "learning_rate": 4.3005286701911344e-06, "loss": 0.4541, "step": 2115 }, { "epoch": 0.2151281008540057, "grad_norm": 0.41812989115715027, "learning_rate": 4.302562017080114e-06, "loss": 0.419, "step": 2116 }, { "epoch": 0.21522976819845466, "grad_norm": 0.4494164288043976, "learning_rate": 4.304595363969093e-06, "loss": 0.4136, "step": 2117 }, { "epoch": 0.21533143554290363, "grad_norm": 0.4460306763648987, "learning_rate": 4.306628710858073e-06, "loss": 0.4537, "step": 2118 }, { "epoch": 0.2154331028873526, "grad_norm": 0.43012017011642456, "learning_rate": 4.308662057747052e-06, "loss": 0.4506, "step": 2119 }, { "epoch": 0.21553477023180154, "grad_norm": 0.4928675591945648, "learning_rate": 4.310695404636031e-06, "loss": 0.4259, "step": 2120 }, { "epoch": 0.2156364375762505, "grad_norm": 0.4322715699672699, "learning_rate": 4.3127287515250105e-06, "loss": 0.4577, "step": 2121 }, { "epoch": 0.21573810492069947, "grad_norm": 0.4825986921787262, "learning_rate": 4.31476209841399e-06, "loss": 0.4445, "step": 2122 }, { "epoch": 0.21583977226514844, "grad_norm": 0.4378889203071594, "learning_rate": 4.3167954453029695e-06, "loss": 0.4284, "step": 2123 }, { "epoch": 0.21594143960959739, "grad_norm": 0.45905399322509766, "learning_rate": 4.318828792191948e-06, "loss": 0.4591, "step": 2124 }, { "epoch": 0.21604310695404635, "grad_norm": 0.4272385835647583, "learning_rate": 4.320862139080928e-06, "loss": 0.4307, "step": 2125 }, { "epoch": 0.21614477429849532, "grad_norm": 0.41851386427879333, "learning_rate": 4.322895485969907e-06, "loss": 0.4469, "step": 2126 }, { "epoch": 0.2162464416429443, "grad_norm": 0.43040770292282104, "learning_rate": 4.324928832858887e-06, "loss": 0.4283, "step": 2127 }, { "epoch": 0.21634810898739326, "grad_norm": 0.4347197711467743, "learning_rate": 4.326962179747865e-06, "loss": 0.4705, "step": 2128 }, { "epoch": 0.2164497763318422, "grad_norm": 0.46298742294311523, "learning_rate": 4.328995526636845e-06, "loss": 0.3914, "step": 2129 }, { "epoch": 0.21655144367629117, "grad_norm": 0.47281110286712646, "learning_rate": 4.331028873525823e-06, "loss": 0.4459, "step": 2130 }, { "epoch": 0.21665311102074014, "grad_norm": 0.43933379650115967, "learning_rate": 4.333062220414803e-06, "loss": 0.4315, "step": 2131 }, { "epoch": 0.2167547783651891, "grad_norm": 0.44243529438972473, "learning_rate": 4.335095567303782e-06, "loss": 0.4471, "step": 2132 }, { "epoch": 0.21685644570963805, "grad_norm": 0.41773906350135803, "learning_rate": 4.337128914192761e-06, "loss": 0.4126, "step": 2133 }, { "epoch": 0.21695811305408702, "grad_norm": 0.45453882217407227, "learning_rate": 4.3391622610817405e-06, "loss": 0.4555, "step": 2134 }, { "epoch": 0.217059780398536, "grad_norm": 0.4373781681060791, "learning_rate": 4.34119560797072e-06, "loss": 0.4182, "step": 2135 }, { "epoch": 0.21716144774298496, "grad_norm": 0.44116199016571045, "learning_rate": 4.3432289548596995e-06, "loss": 0.4191, "step": 2136 }, { "epoch": 0.21726311508743393, "grad_norm": 0.42638009786605835, "learning_rate": 4.345262301748679e-06, "loss": 0.4149, "step": 2137 }, { "epoch": 0.21736478243188287, "grad_norm": 0.42201218008995056, "learning_rate": 4.347295648637658e-06, "loss": 0.4073, "step": 2138 }, { "epoch": 0.21746644977633184, "grad_norm": 0.3850140869617462, "learning_rate": 4.349328995526637e-06, "loss": 0.4169, "step": 2139 }, { "epoch": 0.2175681171207808, "grad_norm": 0.4329265058040619, "learning_rate": 4.3513623424156166e-06, "loss": 0.4664, "step": 2140 }, { "epoch": 0.21766978446522978, "grad_norm": 0.4103236198425293, "learning_rate": 4.353395689304596e-06, "loss": 0.4016, "step": 2141 }, { "epoch": 0.21777145180967872, "grad_norm": 0.40864959359169006, "learning_rate": 4.355429036193575e-06, "loss": 0.4236, "step": 2142 }, { "epoch": 0.2178731191541277, "grad_norm": 0.4698231518268585, "learning_rate": 4.357462383082554e-06, "loss": 0.4664, "step": 2143 }, { "epoch": 0.21797478649857666, "grad_norm": 0.37453609704971313, "learning_rate": 4.359495729971534e-06, "loss": 0.4166, "step": 2144 }, { "epoch": 0.21807645384302562, "grad_norm": 0.49479806423187256, "learning_rate": 4.361529076860513e-06, "loss": 0.4503, "step": 2145 }, { "epoch": 0.2181781211874746, "grad_norm": 0.46832215785980225, "learning_rate": 4.363562423749493e-06, "loss": 0.4336, "step": 2146 }, { "epoch": 0.21827978853192354, "grad_norm": 0.40820711851119995, "learning_rate": 4.365595770638471e-06, "loss": 0.4372, "step": 2147 }, { "epoch": 0.2183814558763725, "grad_norm": 0.5004523396492004, "learning_rate": 4.367629117527451e-06, "loss": 0.4443, "step": 2148 }, { "epoch": 0.21848312322082147, "grad_norm": 0.41940662264823914, "learning_rate": 4.369662464416429e-06, "loss": 0.427, "step": 2149 }, { "epoch": 0.21858479056527044, "grad_norm": 0.4792390763759613, "learning_rate": 4.371695811305409e-06, "loss": 0.4283, "step": 2150 }, { "epoch": 0.2186864579097194, "grad_norm": 0.4326638877391815, "learning_rate": 4.373729158194388e-06, "loss": 0.4375, "step": 2151 }, { "epoch": 0.21878812525416835, "grad_norm": 0.4283997118473053, "learning_rate": 4.375762505083367e-06, "loss": 0.4208, "step": 2152 }, { "epoch": 0.21888979259861732, "grad_norm": 0.4383041560649872, "learning_rate": 4.3777958519723465e-06, "loss": 0.4439, "step": 2153 }, { "epoch": 0.2189914599430663, "grad_norm": 0.44436895847320557, "learning_rate": 4.379829198861326e-06, "loss": 0.4118, "step": 2154 }, { "epoch": 0.21909312728751526, "grad_norm": 0.4364605247974396, "learning_rate": 4.3818625457503055e-06, "loss": 0.4563, "step": 2155 }, { "epoch": 0.2191947946319642, "grad_norm": 0.4392209053039551, "learning_rate": 4.383895892639284e-06, "loss": 0.4275, "step": 2156 }, { "epoch": 0.21929646197641317, "grad_norm": 0.4030469059944153, "learning_rate": 4.385929239528264e-06, "loss": 0.4196, "step": 2157 }, { "epoch": 0.21939812932086214, "grad_norm": 0.4054739773273468, "learning_rate": 4.387962586417243e-06, "loss": 0.4051, "step": 2158 }, { "epoch": 0.2194997966653111, "grad_norm": 0.4455888271331787, "learning_rate": 4.389995933306223e-06, "loss": 0.4512, "step": 2159 }, { "epoch": 0.21960146400976008, "grad_norm": 0.43539419770240784, "learning_rate": 4.392029280195202e-06, "loss": 0.416, "step": 2160 }, { "epoch": 0.21970313135420902, "grad_norm": 0.4201143980026245, "learning_rate": 4.394062627084181e-06, "loss": 0.4132, "step": 2161 }, { "epoch": 0.219804798698658, "grad_norm": 0.4605482220649719, "learning_rate": 4.39609597397316e-06, "loss": 0.4511, "step": 2162 }, { "epoch": 0.21990646604310696, "grad_norm": 0.43898439407348633, "learning_rate": 4.39812932086214e-06, "loss": 0.448, "step": 2163 }, { "epoch": 0.22000813338755593, "grad_norm": 0.4812052845954895, "learning_rate": 4.400162667751119e-06, "loss": 0.4268, "step": 2164 }, { "epoch": 0.22010980073200487, "grad_norm": 0.41198432445526123, "learning_rate": 4.402196014640098e-06, "loss": 0.4212, "step": 2165 }, { "epoch": 0.22021146807645384, "grad_norm": 0.5120342969894409, "learning_rate": 4.404229361529077e-06, "loss": 0.4422, "step": 2166 }, { "epoch": 0.2203131354209028, "grad_norm": 0.4686117470264435, "learning_rate": 4.406262708418057e-06, "loss": 0.4441, "step": 2167 }, { "epoch": 0.22041480276535177, "grad_norm": 0.48539549112319946, "learning_rate": 4.4082960553070355e-06, "loss": 0.4432, "step": 2168 }, { "epoch": 0.22051647010980074, "grad_norm": 0.44065430760383606, "learning_rate": 4.410329402196015e-06, "loss": 0.4215, "step": 2169 }, { "epoch": 0.22061813745424969, "grad_norm": 0.44430044293403625, "learning_rate": 4.4123627490849944e-06, "loss": 0.3937, "step": 2170 }, { "epoch": 0.22071980479869865, "grad_norm": 0.45128050446510315, "learning_rate": 4.414396095973973e-06, "loss": 0.4153, "step": 2171 }, { "epoch": 0.22082147214314762, "grad_norm": 0.5156168937683105, "learning_rate": 4.4164294428629526e-06, "loss": 0.423, "step": 2172 }, { "epoch": 0.2209231394875966, "grad_norm": 0.4146345555782318, "learning_rate": 4.418462789751932e-06, "loss": 0.3985, "step": 2173 }, { "epoch": 0.22102480683204553, "grad_norm": 0.4218195080757141, "learning_rate": 4.420496136640911e-06, "loss": 0.4624, "step": 2174 }, { "epoch": 0.2211264741764945, "grad_norm": 0.4342691898345947, "learning_rate": 4.42252948352989e-06, "loss": 0.4222, "step": 2175 }, { "epoch": 0.22122814152094347, "grad_norm": 0.4297550916671753, "learning_rate": 4.42456283041887e-06, "loss": 0.4183, "step": 2176 }, { "epoch": 0.22132980886539244, "grad_norm": 0.42037323117256165, "learning_rate": 4.426596177307849e-06, "loss": 0.4281, "step": 2177 }, { "epoch": 0.2214314762098414, "grad_norm": 0.45720621943473816, "learning_rate": 4.428629524196829e-06, "loss": 0.4357, "step": 2178 }, { "epoch": 0.22153314355429035, "grad_norm": 0.40646040439605713, "learning_rate": 4.430662871085807e-06, "loss": 0.4388, "step": 2179 }, { "epoch": 0.22163481089873932, "grad_norm": 0.4912278652191162, "learning_rate": 4.432696217974787e-06, "loss": 0.3855, "step": 2180 }, { "epoch": 0.2217364782431883, "grad_norm": 0.4426892399787903, "learning_rate": 4.434729564863766e-06, "loss": 0.4591, "step": 2181 }, { "epoch": 0.22183814558763726, "grad_norm": 0.41843897104263306, "learning_rate": 4.436762911752746e-06, "loss": 0.4151, "step": 2182 }, { "epoch": 0.2219398129320862, "grad_norm": 0.432088166475296, "learning_rate": 4.438796258641724e-06, "loss": 0.3968, "step": 2183 }, { "epoch": 0.22204148027653517, "grad_norm": 0.43399858474731445, "learning_rate": 4.440829605530704e-06, "loss": 0.4328, "step": 2184 }, { "epoch": 0.22214314762098414, "grad_norm": 0.4003892242908478, "learning_rate": 4.442862952419683e-06, "loss": 0.4123, "step": 2185 }, { "epoch": 0.2222448149654331, "grad_norm": 0.4621577262878418, "learning_rate": 4.444896299308663e-06, "loss": 0.4342, "step": 2186 }, { "epoch": 0.22234648230988208, "grad_norm": 0.4919121563434601, "learning_rate": 4.4469296461976415e-06, "loss": 0.4435, "step": 2187 }, { "epoch": 0.22244814965433102, "grad_norm": 0.45324110984802246, "learning_rate": 4.448962993086621e-06, "loss": 0.4413, "step": 2188 }, { "epoch": 0.22254981699878, "grad_norm": 0.4248271584510803, "learning_rate": 4.4509963399756005e-06, "loss": 0.4593, "step": 2189 }, { "epoch": 0.22265148434322896, "grad_norm": 0.5058746933937073, "learning_rate": 4.453029686864579e-06, "loss": 0.4375, "step": 2190 }, { "epoch": 0.22275315168767792, "grad_norm": 0.4273928701877594, "learning_rate": 4.455063033753559e-06, "loss": 0.4241, "step": 2191 }, { "epoch": 0.2228548190321269, "grad_norm": 0.45638832449913025, "learning_rate": 4.457096380642538e-06, "loss": 0.4432, "step": 2192 }, { "epoch": 0.22295648637657584, "grad_norm": 0.4370132088661194, "learning_rate": 4.459129727531517e-06, "loss": 0.4333, "step": 2193 }, { "epoch": 0.2230581537210248, "grad_norm": 0.408003032207489, "learning_rate": 4.461163074420496e-06, "loss": 0.4025, "step": 2194 }, { "epoch": 0.22315982106547377, "grad_norm": 0.46287283301353455, "learning_rate": 4.463196421309476e-06, "loss": 0.4219, "step": 2195 }, { "epoch": 0.22326148840992274, "grad_norm": 0.4112083613872528, "learning_rate": 4.465229768198455e-06, "loss": 0.4491, "step": 2196 }, { "epoch": 0.22336315575437168, "grad_norm": 0.45548102259635925, "learning_rate": 4.467263115087434e-06, "loss": 0.4161, "step": 2197 }, { "epoch": 0.22346482309882065, "grad_norm": 0.4627721905708313, "learning_rate": 4.469296461976413e-06, "loss": 0.4891, "step": 2198 }, { "epoch": 0.22356649044326962, "grad_norm": 0.4712580740451813, "learning_rate": 4.471329808865393e-06, "loss": 0.4479, "step": 2199 }, { "epoch": 0.2236681577877186, "grad_norm": 0.4544267952442169, "learning_rate": 4.473363155754372e-06, "loss": 0.4481, "step": 2200 }, { "epoch": 0.22376982513216756, "grad_norm": 0.4236783981323242, "learning_rate": 4.475396502643352e-06, "loss": 0.4331, "step": 2201 }, { "epoch": 0.2238714924766165, "grad_norm": 0.4059840142726898, "learning_rate": 4.47742984953233e-06, "loss": 0.4121, "step": 2202 }, { "epoch": 0.22397315982106547, "grad_norm": 0.47648537158966064, "learning_rate": 4.47946319642131e-06, "loss": 0.4273, "step": 2203 }, { "epoch": 0.22407482716551444, "grad_norm": 0.47215425968170166, "learning_rate": 4.481496543310289e-06, "loss": 0.4702, "step": 2204 }, { "epoch": 0.2241764945099634, "grad_norm": 0.428463339805603, "learning_rate": 4.483529890199269e-06, "loss": 0.4144, "step": 2205 }, { "epoch": 0.22427816185441235, "grad_norm": 0.43833622336387634, "learning_rate": 4.4855632370882475e-06, "loss": 0.4169, "step": 2206 }, { "epoch": 0.22437982919886132, "grad_norm": 0.43681642413139343, "learning_rate": 4.487596583977227e-06, "loss": 0.4343, "step": 2207 }, { "epoch": 0.2244814965433103, "grad_norm": 0.4399432837963104, "learning_rate": 4.4896299308662065e-06, "loss": 0.4565, "step": 2208 }, { "epoch": 0.22458316388775926, "grad_norm": 0.45308372378349304, "learning_rate": 4.491663277755185e-06, "loss": 0.4126, "step": 2209 }, { "epoch": 0.22468483123220823, "grad_norm": 0.4455092251300812, "learning_rate": 4.493696624644165e-06, "loss": 0.4239, "step": 2210 }, { "epoch": 0.22478649857665717, "grad_norm": 0.5279456973075867, "learning_rate": 4.495729971533144e-06, "loss": 0.4606, "step": 2211 }, { "epoch": 0.22488816592110614, "grad_norm": 0.42773929238319397, "learning_rate": 4.497763318422123e-06, "loss": 0.4176, "step": 2212 }, { "epoch": 0.2249898332655551, "grad_norm": 0.4188400208950043, "learning_rate": 4.499796665311102e-06, "loss": 0.4076, "step": 2213 }, { "epoch": 0.22509150061000407, "grad_norm": 0.45990288257598877, "learning_rate": 4.501830012200082e-06, "loss": 0.4256, "step": 2214 }, { "epoch": 0.22519316795445302, "grad_norm": 0.4991512596607208, "learning_rate": 4.50386335908906e-06, "loss": 0.4216, "step": 2215 }, { "epoch": 0.22529483529890199, "grad_norm": 0.44663137197494507, "learning_rate": 4.50589670597804e-06, "loss": 0.419, "step": 2216 }, { "epoch": 0.22539650264335095, "grad_norm": 0.4789818823337555, "learning_rate": 4.507930052867019e-06, "loss": 0.4279, "step": 2217 }, { "epoch": 0.22549816998779992, "grad_norm": 0.451800674200058, "learning_rate": 4.509963399755999e-06, "loss": 0.4125, "step": 2218 }, { "epoch": 0.2255998373322489, "grad_norm": 0.4300822615623474, "learning_rate": 4.511996746644978e-06, "loss": 0.4354, "step": 2219 }, { "epoch": 0.22570150467669783, "grad_norm": 0.4396800696849823, "learning_rate": 4.514030093533957e-06, "loss": 0.4107, "step": 2220 }, { "epoch": 0.2258031720211468, "grad_norm": 0.5798729062080383, "learning_rate": 4.5160634404229365e-06, "loss": 0.4062, "step": 2221 }, { "epoch": 0.22590483936559577, "grad_norm": 0.43912607431411743, "learning_rate": 4.518096787311916e-06, "loss": 0.4038, "step": 2222 }, { "epoch": 0.22600650671004474, "grad_norm": 0.4622056484222412, "learning_rate": 4.5201301342008954e-06, "loss": 0.4224, "step": 2223 }, { "epoch": 0.2261081740544937, "grad_norm": 0.4851472079753876, "learning_rate": 4.522163481089875e-06, "loss": 0.4722, "step": 2224 }, { "epoch": 0.22620984139894265, "grad_norm": 0.579922616481781, "learning_rate": 4.5241968279788536e-06, "loss": 0.418, "step": 2225 }, { "epoch": 0.22631150874339162, "grad_norm": 0.42270755767822266, "learning_rate": 4.526230174867833e-06, "loss": 0.4671, "step": 2226 }, { "epoch": 0.2264131760878406, "grad_norm": 0.4201939105987549, "learning_rate": 4.5282635217568125e-06, "loss": 0.4054, "step": 2227 }, { "epoch": 0.22651484343228956, "grad_norm": 0.5359200239181519, "learning_rate": 4.530296868645791e-06, "loss": 0.4357, "step": 2228 }, { "epoch": 0.2266165107767385, "grad_norm": 0.5060031414031982, "learning_rate": 4.532330215534771e-06, "loss": 0.4352, "step": 2229 }, { "epoch": 0.22671817812118747, "grad_norm": 0.4403473436832428, "learning_rate": 4.53436356242375e-06, "loss": 0.4625, "step": 2230 }, { "epoch": 0.22681984546563644, "grad_norm": 0.5431466102600098, "learning_rate": 4.536396909312729e-06, "loss": 0.439, "step": 2231 }, { "epoch": 0.2269215128100854, "grad_norm": 0.5010639429092407, "learning_rate": 4.538430256201708e-06, "loss": 0.4339, "step": 2232 }, { "epoch": 0.22702318015453438, "grad_norm": 0.44365206360816956, "learning_rate": 4.540463603090688e-06, "loss": 0.4521, "step": 2233 }, { "epoch": 0.22712484749898332, "grad_norm": 0.4537076950073242, "learning_rate": 4.542496949979666e-06, "loss": 0.4513, "step": 2234 }, { "epoch": 0.2272265148434323, "grad_norm": 0.49667033553123474, "learning_rate": 4.544530296868646e-06, "loss": 0.4178, "step": 2235 }, { "epoch": 0.22732818218788126, "grad_norm": 0.41758859157562256, "learning_rate": 4.546563643757625e-06, "loss": 0.4321, "step": 2236 }, { "epoch": 0.22742984953233022, "grad_norm": 0.44621285796165466, "learning_rate": 4.548596990646605e-06, "loss": 0.4293, "step": 2237 }, { "epoch": 0.22753151687677917, "grad_norm": 0.45123153924942017, "learning_rate": 4.5506303375355835e-06, "loss": 0.4149, "step": 2238 }, { "epoch": 0.22763318422122814, "grad_norm": 0.5097553133964539, "learning_rate": 4.552663684424563e-06, "loss": 0.4151, "step": 2239 }, { "epoch": 0.2277348515656771, "grad_norm": 0.42790907621383667, "learning_rate": 4.5546970313135425e-06, "loss": 0.421, "step": 2240 }, { "epoch": 0.22783651891012607, "grad_norm": 0.49576637148857117, "learning_rate": 4.556730378202522e-06, "loss": 0.4736, "step": 2241 }, { "epoch": 0.22793818625457504, "grad_norm": 0.5209833979606628, "learning_rate": 4.5587637250915015e-06, "loss": 0.4287, "step": 2242 }, { "epoch": 0.22803985359902398, "grad_norm": 0.46051281690597534, "learning_rate": 4.56079707198048e-06, "loss": 0.4534, "step": 2243 }, { "epoch": 0.22814152094347295, "grad_norm": 0.4821177124977112, "learning_rate": 4.56283041886946e-06, "loss": 0.4458, "step": 2244 }, { "epoch": 0.22824318828792192, "grad_norm": 0.5168496370315552, "learning_rate": 4.564863765758439e-06, "loss": 0.4043, "step": 2245 }, { "epoch": 0.2283448556323709, "grad_norm": 0.4672718346118927, "learning_rate": 4.566897112647419e-06, "loss": 0.4094, "step": 2246 }, { "epoch": 0.22844652297681983, "grad_norm": 0.4269811511039734, "learning_rate": 4.568930459536397e-06, "loss": 0.4283, "step": 2247 }, { "epoch": 0.2285481903212688, "grad_norm": 0.527012050151825, "learning_rate": 4.570963806425377e-06, "loss": 0.4548, "step": 2248 }, { "epoch": 0.22864985766571777, "grad_norm": 0.5112593173980713, "learning_rate": 4.572997153314356e-06, "loss": 0.4244, "step": 2249 }, { "epoch": 0.22875152501016674, "grad_norm": 0.4632225036621094, "learning_rate": 4.575030500203335e-06, "loss": 0.465, "step": 2250 }, { "epoch": 0.2288531923546157, "grad_norm": 0.4905025064945221, "learning_rate": 4.577063847092314e-06, "loss": 0.4162, "step": 2251 }, { "epoch": 0.22895485969906465, "grad_norm": 0.44249939918518066, "learning_rate": 4.579097193981293e-06, "loss": 0.418, "step": 2252 }, { "epoch": 0.22905652704351362, "grad_norm": 0.3798874318599701, "learning_rate": 4.5811305408702725e-06, "loss": 0.402, "step": 2253 }, { "epoch": 0.2291581943879626, "grad_norm": 0.43738502264022827, "learning_rate": 4.583163887759252e-06, "loss": 0.4255, "step": 2254 }, { "epoch": 0.22925986173241156, "grad_norm": 0.46188071370124817, "learning_rate": 4.5851972346482314e-06, "loss": 0.4129, "step": 2255 }, { "epoch": 0.2293615290768605, "grad_norm": 0.43701937794685364, "learning_rate": 4.58723058153721e-06, "loss": 0.4342, "step": 2256 }, { "epoch": 0.22946319642130947, "grad_norm": 0.4200954735279083, "learning_rate": 4.5892639284261896e-06, "loss": 0.4311, "step": 2257 }, { "epoch": 0.22956486376575844, "grad_norm": 0.4837677776813507, "learning_rate": 4.591297275315169e-06, "loss": 0.4239, "step": 2258 }, { "epoch": 0.2296665311102074, "grad_norm": 0.49631398916244507, "learning_rate": 4.5933306222041485e-06, "loss": 0.4373, "step": 2259 }, { "epoch": 0.22976819845465637, "grad_norm": 0.46831992268562317, "learning_rate": 4.595363969093128e-06, "loss": 0.4366, "step": 2260 }, { "epoch": 0.22986986579910532, "grad_norm": 0.5474421977996826, "learning_rate": 4.597397315982107e-06, "loss": 0.4619, "step": 2261 }, { "epoch": 0.22997153314355429, "grad_norm": 0.4602077305316925, "learning_rate": 4.599430662871086e-06, "loss": 0.4644, "step": 2262 }, { "epoch": 0.23007320048800325, "grad_norm": 0.45843854546546936, "learning_rate": 4.601464009760066e-06, "loss": 0.4364, "step": 2263 }, { "epoch": 0.23017486783245222, "grad_norm": 0.4292723834514618, "learning_rate": 4.603497356649045e-06, "loss": 0.4361, "step": 2264 }, { "epoch": 0.2302765351769012, "grad_norm": 0.49387550354003906, "learning_rate": 4.605530703538025e-06, "loss": 0.4464, "step": 2265 }, { "epoch": 0.23037820252135013, "grad_norm": 0.4760640859603882, "learning_rate": 4.607564050427003e-06, "loss": 0.4476, "step": 2266 }, { "epoch": 0.2304798698657991, "grad_norm": 0.44296202063560486, "learning_rate": 4.609597397315983e-06, "loss": 0.4758, "step": 2267 }, { "epoch": 0.23058153721024807, "grad_norm": 0.4545210003852844, "learning_rate": 4.611630744204962e-06, "loss": 0.4178, "step": 2268 }, { "epoch": 0.23068320455469704, "grad_norm": 0.46424219012260437, "learning_rate": 4.613664091093941e-06, "loss": 0.4174, "step": 2269 }, { "epoch": 0.23078487189914598, "grad_norm": 0.45503538846969604, "learning_rate": 4.61569743798292e-06, "loss": 0.4278, "step": 2270 }, { "epoch": 0.23088653924359495, "grad_norm": 0.4008791148662567, "learning_rate": 4.617730784871899e-06, "loss": 0.3916, "step": 2271 }, { "epoch": 0.23098820658804392, "grad_norm": 0.4334769546985626, "learning_rate": 4.6197641317608785e-06, "loss": 0.4465, "step": 2272 }, { "epoch": 0.2310898739324929, "grad_norm": 0.41092658042907715, "learning_rate": 4.621797478649858e-06, "loss": 0.4179, "step": 2273 }, { "epoch": 0.23119154127694186, "grad_norm": 0.44374701380729675, "learning_rate": 4.6238308255388375e-06, "loss": 0.4277, "step": 2274 }, { "epoch": 0.2312932086213908, "grad_norm": 0.4794762432575226, "learning_rate": 4.625864172427816e-06, "loss": 0.4617, "step": 2275 }, { "epoch": 0.23139487596583977, "grad_norm": 0.41754424571990967, "learning_rate": 4.627897519316796e-06, "loss": 0.4321, "step": 2276 }, { "epoch": 0.23149654331028874, "grad_norm": 0.4677295386791229, "learning_rate": 4.629930866205775e-06, "loss": 0.4298, "step": 2277 }, { "epoch": 0.2315982106547377, "grad_norm": 0.4547785520553589, "learning_rate": 4.6319642130947546e-06, "loss": 0.4517, "step": 2278 }, { "epoch": 0.23169987799918665, "grad_norm": 0.46369320154190063, "learning_rate": 4.633997559983733e-06, "loss": 0.4479, "step": 2279 }, { "epoch": 0.23180154534363562, "grad_norm": 0.48569655418395996, "learning_rate": 4.636030906872713e-06, "loss": 0.4332, "step": 2280 }, { "epoch": 0.2319032126880846, "grad_norm": 0.45528411865234375, "learning_rate": 4.638064253761692e-06, "loss": 0.4489, "step": 2281 }, { "epoch": 0.23200488003253356, "grad_norm": 0.4426608979701996, "learning_rate": 4.640097600650672e-06, "loss": 0.4238, "step": 2282 }, { "epoch": 0.23210654737698252, "grad_norm": 0.49291694164276123, "learning_rate": 4.642130947539651e-06, "loss": 0.4585, "step": 2283 }, { "epoch": 0.23220821472143147, "grad_norm": 0.40130412578582764, "learning_rate": 4.64416429442863e-06, "loss": 0.4079, "step": 2284 }, { "epoch": 0.23230988206588044, "grad_norm": 0.44553840160369873, "learning_rate": 4.646197641317609e-06, "loss": 0.4383, "step": 2285 }, { "epoch": 0.2324115494103294, "grad_norm": 0.42347994446754456, "learning_rate": 4.648230988206589e-06, "loss": 0.4818, "step": 2286 }, { "epoch": 0.23251321675477837, "grad_norm": 0.4471082091331482, "learning_rate": 4.650264335095568e-06, "loss": 0.3807, "step": 2287 }, { "epoch": 0.23261488409922731, "grad_norm": 0.43964841961860657, "learning_rate": 4.652297681984547e-06, "loss": 0.4453, "step": 2288 }, { "epoch": 0.23271655144367628, "grad_norm": 0.4241390526294708, "learning_rate": 4.654331028873526e-06, "loss": 0.4475, "step": 2289 }, { "epoch": 0.23281821878812525, "grad_norm": 0.4243983030319214, "learning_rate": 4.656364375762505e-06, "loss": 0.4144, "step": 2290 }, { "epoch": 0.23291988613257422, "grad_norm": 0.4027256369590759, "learning_rate": 4.6583977226514845e-06, "loss": 0.4787, "step": 2291 }, { "epoch": 0.2330215534770232, "grad_norm": 0.42860493063926697, "learning_rate": 4.660431069540464e-06, "loss": 0.4619, "step": 2292 }, { "epoch": 0.23312322082147213, "grad_norm": 0.41163286566734314, "learning_rate": 4.662464416429443e-06, "loss": 0.4256, "step": 2293 }, { "epoch": 0.2332248881659211, "grad_norm": 0.4210297763347626, "learning_rate": 4.664497763318422e-06, "loss": 0.4905, "step": 2294 }, { "epoch": 0.23332655551037007, "grad_norm": 0.4318993389606476, "learning_rate": 4.666531110207402e-06, "loss": 0.4196, "step": 2295 }, { "epoch": 0.23342822285481904, "grad_norm": 0.4157285690307617, "learning_rate": 4.668564457096381e-06, "loss": 0.4107, "step": 2296 }, { "epoch": 0.233529890199268, "grad_norm": 0.45276501774787903, "learning_rate": 4.670597803985361e-06, "loss": 0.4195, "step": 2297 }, { "epoch": 0.23363155754371695, "grad_norm": 0.39976006746292114, "learning_rate": 4.672631150874339e-06, "loss": 0.3889, "step": 2298 }, { "epoch": 0.23373322488816592, "grad_norm": 0.4479045867919922, "learning_rate": 4.674664497763319e-06, "loss": 0.4338, "step": 2299 }, { "epoch": 0.2338348922326149, "grad_norm": 0.44888249039649963, "learning_rate": 4.676697844652298e-06, "loss": 0.4137, "step": 2300 }, { "epoch": 0.23393655957706386, "grad_norm": 0.4519737660884857, "learning_rate": 4.678731191541278e-06, "loss": 0.4134, "step": 2301 }, { "epoch": 0.2340382269215128, "grad_norm": 0.41430890560150146, "learning_rate": 4.680764538430256e-06, "loss": 0.4406, "step": 2302 }, { "epoch": 0.23413989426596177, "grad_norm": 0.42556726932525635, "learning_rate": 4.682797885319236e-06, "loss": 0.4413, "step": 2303 }, { "epoch": 0.23424156161041074, "grad_norm": 0.4614289402961731, "learning_rate": 4.684831232208215e-06, "loss": 0.4201, "step": 2304 }, { "epoch": 0.2343432289548597, "grad_norm": 0.4118112623691559, "learning_rate": 4.686864579097195e-06, "loss": 0.419, "step": 2305 }, { "epoch": 0.23444489629930867, "grad_norm": 0.40701818466186523, "learning_rate": 4.6888979259861735e-06, "loss": 0.427, "step": 2306 }, { "epoch": 0.23454656364375762, "grad_norm": 0.4704825282096863, "learning_rate": 4.690931272875153e-06, "loss": 0.4107, "step": 2307 }, { "epoch": 0.23464823098820659, "grad_norm": 0.4691907465457916, "learning_rate": 4.6929646197641324e-06, "loss": 0.4312, "step": 2308 }, { "epoch": 0.23474989833265555, "grad_norm": 0.4117100238800049, "learning_rate": 4.694997966653111e-06, "loss": 0.4611, "step": 2309 }, { "epoch": 0.23485156567710452, "grad_norm": 0.47897398471832275, "learning_rate": 4.6970313135420906e-06, "loss": 0.4539, "step": 2310 }, { "epoch": 0.23495323302155346, "grad_norm": 0.3925648033618927, "learning_rate": 4.69906466043107e-06, "loss": 0.4133, "step": 2311 }, { "epoch": 0.23505490036600243, "grad_norm": 0.4573636054992676, "learning_rate": 4.701098007320049e-06, "loss": 0.4257, "step": 2312 }, { "epoch": 0.2351565677104514, "grad_norm": 0.42527657747268677, "learning_rate": 4.703131354209028e-06, "loss": 0.46, "step": 2313 }, { "epoch": 0.23525823505490037, "grad_norm": 0.48353642225265503, "learning_rate": 4.705164701098008e-06, "loss": 0.4334, "step": 2314 }, { "epoch": 0.23535990239934934, "grad_norm": 0.5184385180473328, "learning_rate": 4.707198047986987e-06, "loss": 0.4489, "step": 2315 }, { "epoch": 0.23546156974379828, "grad_norm": 0.4644487798213959, "learning_rate": 4.709231394875966e-06, "loss": 0.417, "step": 2316 }, { "epoch": 0.23556323708824725, "grad_norm": 0.4363429546356201, "learning_rate": 4.711264741764945e-06, "loss": 0.4192, "step": 2317 }, { "epoch": 0.23566490443269622, "grad_norm": 0.4472484588623047, "learning_rate": 4.713298088653925e-06, "loss": 0.4264, "step": 2318 }, { "epoch": 0.2357665717771452, "grad_norm": 0.44218143820762634, "learning_rate": 4.715331435542904e-06, "loss": 0.4313, "step": 2319 }, { "epoch": 0.23586823912159413, "grad_norm": 0.48180097341537476, "learning_rate": 4.717364782431883e-06, "loss": 0.4165, "step": 2320 }, { "epoch": 0.2359699064660431, "grad_norm": 0.43971338868141174, "learning_rate": 4.719398129320862e-06, "loss": 0.4317, "step": 2321 }, { "epoch": 0.23607157381049207, "grad_norm": 0.40111422538757324, "learning_rate": 4.721431476209842e-06, "loss": 0.4576, "step": 2322 }, { "epoch": 0.23617324115494104, "grad_norm": 0.46649110317230225, "learning_rate": 4.723464823098821e-06, "loss": 0.4313, "step": 2323 }, { "epoch": 0.23627490849939, "grad_norm": 0.42508459091186523, "learning_rate": 4.725498169987801e-06, "loss": 0.4633, "step": 2324 }, { "epoch": 0.23637657584383895, "grad_norm": 0.47273746132850647, "learning_rate": 4.7275315168767795e-06, "loss": 0.416, "step": 2325 }, { "epoch": 0.23647824318828792, "grad_norm": 0.5135967135429382, "learning_rate": 4.729564863765759e-06, "loss": 0.4318, "step": 2326 }, { "epoch": 0.2365799105327369, "grad_norm": 0.4303535223007202, "learning_rate": 4.7315982106547385e-06, "loss": 0.4328, "step": 2327 }, { "epoch": 0.23668157787718586, "grad_norm": 0.43410053849220276, "learning_rate": 4.733631557543717e-06, "loss": 0.3782, "step": 2328 }, { "epoch": 0.2367832452216348, "grad_norm": 0.4388290345668793, "learning_rate": 4.735664904432697e-06, "loss": 0.4175, "step": 2329 }, { "epoch": 0.23688491256608377, "grad_norm": 0.45809656381607056, "learning_rate": 4.737698251321676e-06, "loss": 0.4171, "step": 2330 }, { "epoch": 0.23698657991053274, "grad_norm": 0.39080795645713806, "learning_rate": 4.739731598210655e-06, "loss": 0.3956, "step": 2331 }, { "epoch": 0.2370882472549817, "grad_norm": 0.409326434135437, "learning_rate": 4.741764945099634e-06, "loss": 0.3955, "step": 2332 }, { "epoch": 0.23718991459943067, "grad_norm": 0.4207221567630768, "learning_rate": 4.743798291988614e-06, "loss": 0.4348, "step": 2333 }, { "epoch": 0.23729158194387961, "grad_norm": 0.41953980922698975, "learning_rate": 4.745831638877592e-06, "loss": 0.4193, "step": 2334 }, { "epoch": 0.23739324928832858, "grad_norm": 0.39022380113601685, "learning_rate": 4.747864985766572e-06, "loss": 0.4241, "step": 2335 }, { "epoch": 0.23749491663277755, "grad_norm": 0.4360845983028412, "learning_rate": 4.749898332655551e-06, "loss": 0.4371, "step": 2336 }, { "epoch": 0.23759658397722652, "grad_norm": 0.42991942167282104, "learning_rate": 4.751931679544531e-06, "loss": 0.4496, "step": 2337 }, { "epoch": 0.2376982513216755, "grad_norm": 0.42730647325515747, "learning_rate": 4.75396502643351e-06, "loss": 0.4279, "step": 2338 }, { "epoch": 0.23779991866612443, "grad_norm": 0.4619034230709076, "learning_rate": 4.755998373322489e-06, "loss": 0.4124, "step": 2339 }, { "epoch": 0.2379015860105734, "grad_norm": 0.44071924686431885, "learning_rate": 4.7580317202114684e-06, "loss": 0.4197, "step": 2340 }, { "epoch": 0.23800325335502237, "grad_norm": 0.40015172958374023, "learning_rate": 4.760065067100448e-06, "loss": 0.438, "step": 2341 }, { "epoch": 0.23810492069947134, "grad_norm": 0.44374141097068787, "learning_rate": 4.762098413989427e-06, "loss": 0.43, "step": 2342 }, { "epoch": 0.23820658804392028, "grad_norm": 0.5008513927459717, "learning_rate": 4.764131760878406e-06, "loss": 0.4197, "step": 2343 }, { "epoch": 0.23830825538836925, "grad_norm": 0.41386324167251587, "learning_rate": 4.7661651077673855e-06, "loss": 0.4031, "step": 2344 }, { "epoch": 0.23840992273281822, "grad_norm": 0.42672356963157654, "learning_rate": 4.768198454656365e-06, "loss": 0.41, "step": 2345 }, { "epoch": 0.2385115900772672, "grad_norm": 0.5152791738510132, "learning_rate": 4.7702318015453445e-06, "loss": 0.4541, "step": 2346 }, { "epoch": 0.23861325742171616, "grad_norm": 0.48112183809280396, "learning_rate": 4.772265148434323e-06, "loss": 0.4154, "step": 2347 }, { "epoch": 0.2387149247661651, "grad_norm": 0.47671863436698914, "learning_rate": 4.774298495323303e-06, "loss": 0.4168, "step": 2348 }, { "epoch": 0.23881659211061407, "grad_norm": 0.47216129302978516, "learning_rate": 4.776331842212282e-06, "loss": 0.4356, "step": 2349 }, { "epoch": 0.23891825945506304, "grad_norm": 0.47364985942840576, "learning_rate": 4.778365189101261e-06, "loss": 0.4717, "step": 2350 }, { "epoch": 0.239019926799512, "grad_norm": 0.38752061128616333, "learning_rate": 4.78039853599024e-06, "loss": 0.4202, "step": 2351 }, { "epoch": 0.23912159414396095, "grad_norm": 0.4354473352432251, "learning_rate": 4.78243188287922e-06, "loss": 0.441, "step": 2352 }, { "epoch": 0.23922326148840992, "grad_norm": 0.5319405198097229, "learning_rate": 4.784465229768198e-06, "loss": 0.4226, "step": 2353 }, { "epoch": 0.23932492883285889, "grad_norm": 0.49046197533607483, "learning_rate": 4.786498576657178e-06, "loss": 0.4461, "step": 2354 }, { "epoch": 0.23942659617730785, "grad_norm": 0.4293956756591797, "learning_rate": 4.788531923546157e-06, "loss": 0.423, "step": 2355 }, { "epoch": 0.23952826352175682, "grad_norm": 0.487222820520401, "learning_rate": 4.790565270435137e-06, "loss": 0.4357, "step": 2356 }, { "epoch": 0.23962993086620576, "grad_norm": 0.5057321786880493, "learning_rate": 4.7925986173241155e-06, "loss": 0.4452, "step": 2357 }, { "epoch": 0.23973159821065473, "grad_norm": 0.4406130313873291, "learning_rate": 4.794631964213095e-06, "loss": 0.4182, "step": 2358 }, { "epoch": 0.2398332655551037, "grad_norm": 0.42162811756134033, "learning_rate": 4.7966653111020745e-06, "loss": 0.3813, "step": 2359 }, { "epoch": 0.23993493289955267, "grad_norm": 0.46229931712150574, "learning_rate": 4.798698657991054e-06, "loss": 0.4489, "step": 2360 }, { "epoch": 0.2400366002440016, "grad_norm": 0.4680103361606598, "learning_rate": 4.800732004880033e-06, "loss": 0.4371, "step": 2361 }, { "epoch": 0.24013826758845058, "grad_norm": 0.4257441759109497, "learning_rate": 4.802765351769012e-06, "loss": 0.4344, "step": 2362 }, { "epoch": 0.24023993493289955, "grad_norm": 0.5043687224388123, "learning_rate": 4.804798698657992e-06, "loss": 0.4339, "step": 2363 }, { "epoch": 0.24034160227734852, "grad_norm": 0.4058975279331207, "learning_rate": 4.806832045546971e-06, "loss": 0.3998, "step": 2364 }, { "epoch": 0.2404432696217975, "grad_norm": 0.4042733311653137, "learning_rate": 4.8088653924359506e-06, "loss": 0.4507, "step": 2365 }, { "epoch": 0.24054493696624643, "grad_norm": 0.4521684944629669, "learning_rate": 4.810898739324929e-06, "loss": 0.4208, "step": 2366 }, { "epoch": 0.2406466043106954, "grad_norm": 0.41079363226890564, "learning_rate": 4.812932086213909e-06, "loss": 0.4078, "step": 2367 }, { "epoch": 0.24074827165514437, "grad_norm": 0.4342345893383026, "learning_rate": 4.814965433102888e-06, "loss": 0.4093, "step": 2368 }, { "epoch": 0.24084993899959334, "grad_norm": 0.42004698514938354, "learning_rate": 4.816998779991867e-06, "loss": 0.4374, "step": 2369 }, { "epoch": 0.2409516063440423, "grad_norm": 0.44684144854545593, "learning_rate": 4.819032126880846e-06, "loss": 0.4048, "step": 2370 }, { "epoch": 0.24105327368849125, "grad_norm": 0.4208434820175171, "learning_rate": 4.821065473769826e-06, "loss": 0.4023, "step": 2371 }, { "epoch": 0.24115494103294022, "grad_norm": 0.4543077051639557, "learning_rate": 4.8230988206588044e-06, "loss": 0.4069, "step": 2372 }, { "epoch": 0.2412566083773892, "grad_norm": 0.4346877336502075, "learning_rate": 4.825132167547784e-06, "loss": 0.4222, "step": 2373 }, { "epoch": 0.24135827572183816, "grad_norm": 0.4112613797187805, "learning_rate": 4.827165514436763e-06, "loss": 0.4573, "step": 2374 }, { "epoch": 0.2414599430662871, "grad_norm": 0.3803420662879944, "learning_rate": 4.829198861325742e-06, "loss": 0.4156, "step": 2375 }, { "epoch": 0.24156161041073607, "grad_norm": 0.4150071144104004, "learning_rate": 4.8312322082147215e-06, "loss": 0.4252, "step": 2376 }, { "epoch": 0.24166327775518504, "grad_norm": 0.4523390531539917, "learning_rate": 4.833265555103701e-06, "loss": 0.436, "step": 2377 }, { "epoch": 0.241764945099634, "grad_norm": 0.43008238077163696, "learning_rate": 4.8352989019926805e-06, "loss": 0.4692, "step": 2378 }, { "epoch": 0.24186661244408297, "grad_norm": 0.4084428548812866, "learning_rate": 4.83733224888166e-06, "loss": 0.4239, "step": 2379 }, { "epoch": 0.24196827978853191, "grad_norm": 0.49105915427207947, "learning_rate": 4.839365595770639e-06, "loss": 0.4377, "step": 2380 }, { "epoch": 0.24206994713298088, "grad_norm": 0.46570315957069397, "learning_rate": 4.841398942659618e-06, "loss": 0.4287, "step": 2381 }, { "epoch": 0.24217161447742985, "grad_norm": 0.4139470160007477, "learning_rate": 4.843432289548598e-06, "loss": 0.408, "step": 2382 }, { "epoch": 0.24227328182187882, "grad_norm": 0.42753174901008606, "learning_rate": 4.845465636437577e-06, "loss": 0.4268, "step": 2383 }, { "epoch": 0.24237494916632776, "grad_norm": 0.45712941884994507, "learning_rate": 4.847498983326556e-06, "loss": 0.4048, "step": 2384 }, { "epoch": 0.24247661651077673, "grad_norm": 0.42149677872657776, "learning_rate": 4.849532330215535e-06, "loss": 0.438, "step": 2385 }, { "epoch": 0.2425782838552257, "grad_norm": 0.4781014025211334, "learning_rate": 4.851565677104515e-06, "loss": 0.4591, "step": 2386 }, { "epoch": 0.24267995119967467, "grad_norm": 0.4197218716144562, "learning_rate": 4.853599023993494e-06, "loss": 0.4041, "step": 2387 }, { "epoch": 0.24278161854412364, "grad_norm": 0.4751385450363159, "learning_rate": 4.855632370882473e-06, "loss": 0.4478, "step": 2388 }, { "epoch": 0.24288328588857258, "grad_norm": 0.5075346231460571, "learning_rate": 4.857665717771452e-06, "loss": 0.441, "step": 2389 }, { "epoch": 0.24298495323302155, "grad_norm": 0.4586661756038666, "learning_rate": 4.859699064660431e-06, "loss": 0.4281, "step": 2390 }, { "epoch": 0.24308662057747052, "grad_norm": 0.5102365612983704, "learning_rate": 4.8617324115494105e-06, "loss": 0.4673, "step": 2391 }, { "epoch": 0.2431882879219195, "grad_norm": 0.4875304102897644, "learning_rate": 4.86376575843839e-06, "loss": 0.4205, "step": 2392 }, { "epoch": 0.24328995526636843, "grad_norm": 0.5167120695114136, "learning_rate": 4.865799105327369e-06, "loss": 0.4475, "step": 2393 }, { "epoch": 0.2433916226108174, "grad_norm": 0.5118466019630432, "learning_rate": 4.867832452216348e-06, "loss": 0.3976, "step": 2394 }, { "epoch": 0.24349328995526637, "grad_norm": 0.4712766706943512, "learning_rate": 4.8698657991053276e-06, "loss": 0.4144, "step": 2395 }, { "epoch": 0.24359495729971534, "grad_norm": 0.4614070951938629, "learning_rate": 4.871899145994307e-06, "loss": 0.4494, "step": 2396 }, { "epoch": 0.2436966246441643, "grad_norm": 0.44888806343078613, "learning_rate": 4.8739324928832865e-06, "loss": 0.4333, "step": 2397 }, { "epoch": 0.24379829198861325, "grad_norm": 0.4672258198261261, "learning_rate": 4.875965839772265e-06, "loss": 0.4586, "step": 2398 }, { "epoch": 0.24389995933306222, "grad_norm": 0.44360846281051636, "learning_rate": 4.877999186661245e-06, "loss": 0.4183, "step": 2399 }, { "epoch": 0.24400162667751119, "grad_norm": 0.4292154908180237, "learning_rate": 4.880032533550224e-06, "loss": 0.4417, "step": 2400 }, { "epoch": 0.24410329402196015, "grad_norm": 0.49577784538269043, "learning_rate": 4.882065880439204e-06, "loss": 0.4098, "step": 2401 }, { "epoch": 0.2442049613664091, "grad_norm": 0.43661579489707947, "learning_rate": 4.884099227328182e-06, "loss": 0.4419, "step": 2402 }, { "epoch": 0.24430662871085806, "grad_norm": 0.4537522792816162, "learning_rate": 4.886132574217162e-06, "loss": 0.4284, "step": 2403 }, { "epoch": 0.24440829605530703, "grad_norm": 0.4726867079734802, "learning_rate": 4.888165921106141e-06, "loss": 0.4201, "step": 2404 }, { "epoch": 0.244509963399756, "grad_norm": 0.4728323519229889, "learning_rate": 4.890199267995121e-06, "loss": 0.4488, "step": 2405 }, { "epoch": 0.24461163074420497, "grad_norm": 0.3949965834617615, "learning_rate": 4.8922326148841e-06, "loss": 0.4466, "step": 2406 }, { "epoch": 0.2447132980886539, "grad_norm": 0.4278331398963928, "learning_rate": 4.894265961773079e-06, "loss": 0.4253, "step": 2407 }, { "epoch": 0.24481496543310288, "grad_norm": 0.5502114295959473, "learning_rate": 4.896299308662058e-06, "loss": 0.445, "step": 2408 }, { "epoch": 0.24491663277755185, "grad_norm": 0.42770811915397644, "learning_rate": 4.898332655551037e-06, "loss": 0.4438, "step": 2409 }, { "epoch": 0.24501830012200082, "grad_norm": 0.3913479149341583, "learning_rate": 4.9003660024400165e-06, "loss": 0.432, "step": 2410 }, { "epoch": 0.2451199674664498, "grad_norm": 0.4496048092842102, "learning_rate": 4.902399349328996e-06, "loss": 0.4367, "step": 2411 }, { "epoch": 0.24522163481089873, "grad_norm": 0.49491870403289795, "learning_rate": 4.904432696217975e-06, "loss": 0.406, "step": 2412 }, { "epoch": 0.2453233021553477, "grad_norm": 0.4160254895687103, "learning_rate": 4.906466043106954e-06, "loss": 0.4234, "step": 2413 }, { "epoch": 0.24542496949979667, "grad_norm": 0.4344702661037445, "learning_rate": 4.908499389995934e-06, "loss": 0.4667, "step": 2414 }, { "epoch": 0.24552663684424564, "grad_norm": 0.45223814249038696, "learning_rate": 4.910532736884913e-06, "loss": 0.4021, "step": 2415 }, { "epoch": 0.24562830418869458, "grad_norm": 0.4726780951023102, "learning_rate": 4.912566083773892e-06, "loss": 0.4691, "step": 2416 }, { "epoch": 0.24572997153314355, "grad_norm": 0.4364553391933441, "learning_rate": 4.914599430662871e-06, "loss": 0.4259, "step": 2417 }, { "epoch": 0.24583163887759252, "grad_norm": 0.45836225152015686, "learning_rate": 4.916632777551851e-06, "loss": 0.4174, "step": 2418 }, { "epoch": 0.2459333062220415, "grad_norm": 0.4486674666404724, "learning_rate": 4.91866612444083e-06, "loss": 0.4518, "step": 2419 }, { "epoch": 0.24603497356649046, "grad_norm": 0.48917391896247864, "learning_rate": 4.92069947132981e-06, "loss": 0.4138, "step": 2420 }, { "epoch": 0.2461366409109394, "grad_norm": 0.47999587655067444, "learning_rate": 4.922732818218788e-06, "loss": 0.433, "step": 2421 }, { "epoch": 0.24623830825538837, "grad_norm": 0.43165984749794006, "learning_rate": 4.924766165107768e-06, "loss": 0.4626, "step": 2422 }, { "epoch": 0.24633997559983734, "grad_norm": 0.46352046728134155, "learning_rate": 4.926799511996747e-06, "loss": 0.4518, "step": 2423 }, { "epoch": 0.2464416429442863, "grad_norm": 0.4568158686161041, "learning_rate": 4.928832858885727e-06, "loss": 0.412, "step": 2424 }, { "epoch": 0.24654331028873525, "grad_norm": 0.4608744978904724, "learning_rate": 4.9308662057747054e-06, "loss": 0.4464, "step": 2425 }, { "epoch": 0.24664497763318421, "grad_norm": 0.5121987462043762, "learning_rate": 4.932899552663685e-06, "loss": 0.4485, "step": 2426 }, { "epoch": 0.24674664497763318, "grad_norm": 0.46290841698646545, "learning_rate": 4.934932899552664e-06, "loss": 0.4221, "step": 2427 }, { "epoch": 0.24684831232208215, "grad_norm": 0.42262712121009827, "learning_rate": 4.936966246441643e-06, "loss": 0.4165, "step": 2428 }, { "epoch": 0.24694997966653112, "grad_norm": 0.40937548875808716, "learning_rate": 4.9389995933306225e-06, "loss": 0.4555, "step": 2429 }, { "epoch": 0.24705164701098006, "grad_norm": 0.4141698479652405, "learning_rate": 4.941032940219602e-06, "loss": 0.4341, "step": 2430 }, { "epoch": 0.24715331435542903, "grad_norm": 0.4740343987941742, "learning_rate": 4.943066287108581e-06, "loss": 0.4165, "step": 2431 }, { "epoch": 0.247254981699878, "grad_norm": 0.4497627317905426, "learning_rate": 4.94509963399756e-06, "loss": 0.4513, "step": 2432 }, { "epoch": 0.24735664904432697, "grad_norm": 0.39115774631500244, "learning_rate": 4.94713298088654e-06, "loss": 0.4167, "step": 2433 }, { "epoch": 0.2474583163887759, "grad_norm": 0.4854341745376587, "learning_rate": 4.949166327775518e-06, "loss": 0.4418, "step": 2434 }, { "epoch": 0.24755998373322488, "grad_norm": 0.47321566939353943, "learning_rate": 4.951199674664498e-06, "loss": 0.4058, "step": 2435 }, { "epoch": 0.24766165107767385, "grad_norm": 0.43110018968582153, "learning_rate": 4.953233021553477e-06, "loss": 0.421, "step": 2436 }, { "epoch": 0.24776331842212282, "grad_norm": 0.44582751393318176, "learning_rate": 4.955266368442457e-06, "loss": 0.4291, "step": 2437 }, { "epoch": 0.2478649857665718, "grad_norm": 0.4221442937850952, "learning_rate": 4.957299715331436e-06, "loss": 0.4206, "step": 2438 }, { "epoch": 0.24796665311102073, "grad_norm": 0.5071836113929749, "learning_rate": 4.959333062220415e-06, "loss": 0.406, "step": 2439 }, { "epoch": 0.2480683204554697, "grad_norm": 0.4608624279499054, "learning_rate": 4.961366409109394e-06, "loss": 0.4377, "step": 2440 }, { "epoch": 0.24816998779991867, "grad_norm": 0.44073978066444397, "learning_rate": 4.963399755998374e-06, "loss": 0.4243, "step": 2441 }, { "epoch": 0.24827165514436764, "grad_norm": 0.47339075803756714, "learning_rate": 4.965433102887353e-06, "loss": 0.4594, "step": 2442 }, { "epoch": 0.2483733224888166, "grad_norm": 0.5195388197898865, "learning_rate": 4.967466449776333e-06, "loss": 0.4184, "step": 2443 }, { "epoch": 0.24847498983326555, "grad_norm": 0.4812975823879242, "learning_rate": 4.9694997966653115e-06, "loss": 0.4334, "step": 2444 }, { "epoch": 0.24857665717771452, "grad_norm": 0.41750314831733704, "learning_rate": 4.971533143554291e-06, "loss": 0.4266, "step": 2445 }, { "epoch": 0.24867832452216349, "grad_norm": 0.4495498836040497, "learning_rate": 4.9735664904432705e-06, "loss": 0.4655, "step": 2446 }, { "epoch": 0.24877999186661245, "grad_norm": 0.4193887412548065, "learning_rate": 4.975599837332249e-06, "loss": 0.4282, "step": 2447 }, { "epoch": 0.2488816592110614, "grad_norm": 0.47710418701171875, "learning_rate": 4.977633184221229e-06, "loss": 0.4218, "step": 2448 }, { "epoch": 0.24898332655551036, "grad_norm": 0.43264347314834595, "learning_rate": 4.979666531110208e-06, "loss": 0.4117, "step": 2449 }, { "epoch": 0.24908499389995933, "grad_norm": 0.3942376971244812, "learning_rate": 4.981699877999187e-06, "loss": 0.4097, "step": 2450 }, { "epoch": 0.2491866612444083, "grad_norm": 0.4907291531562805, "learning_rate": 4.983733224888166e-06, "loss": 0.408, "step": 2451 }, { "epoch": 0.24928832858885727, "grad_norm": 0.42168328166007996, "learning_rate": 4.985766571777146e-06, "loss": 0.4737, "step": 2452 }, { "epoch": 0.2493899959333062, "grad_norm": 0.448772668838501, "learning_rate": 4.987799918666124e-06, "loss": 0.4282, "step": 2453 }, { "epoch": 0.24949166327775518, "grad_norm": 0.5147849917411804, "learning_rate": 4.989833265555104e-06, "loss": 0.4635, "step": 2454 }, { "epoch": 0.24959333062220415, "grad_norm": 0.455840140581131, "learning_rate": 4.991866612444083e-06, "loss": 0.3941, "step": 2455 }, { "epoch": 0.24969499796665312, "grad_norm": 0.45197445154190063, "learning_rate": 4.993899959333063e-06, "loss": 0.4235, "step": 2456 }, { "epoch": 0.24979666531110206, "grad_norm": 0.41104376316070557, "learning_rate": 4.9959333062220414e-06, "loss": 0.4234, "step": 2457 }, { "epoch": 0.24989833265555103, "grad_norm": 0.5178118348121643, "learning_rate": 4.997966653111021e-06, "loss": 0.4482, "step": 2458 }, { "epoch": 0.25, "grad_norm": 0.45565131306648254, "learning_rate": 5e-06, "loss": 0.458, "step": 2459 }, { "epoch": 0.25010166734444894, "grad_norm": 0.4086945056915283, "learning_rate": 5.002033346888979e-06, "loss": 0.4322, "step": 2460 }, { "epoch": 0.25020333468889794, "grad_norm": 0.44741496443748474, "learning_rate": 5.004066693777959e-06, "loss": 0.4332, "step": 2461 }, { "epoch": 0.2503050020333469, "grad_norm": 0.4324156939983368, "learning_rate": 5.006100040666938e-06, "loss": 0.4595, "step": 2462 }, { "epoch": 0.2504066693777959, "grad_norm": 0.41143786907196045, "learning_rate": 5.0081333875559175e-06, "loss": 0.4097, "step": 2463 }, { "epoch": 0.2505083367222448, "grad_norm": 0.4185080826282501, "learning_rate": 5.010166734444897e-06, "loss": 0.4393, "step": 2464 }, { "epoch": 0.25061000406669376, "grad_norm": 0.4171674847602844, "learning_rate": 5.012200081333876e-06, "loss": 0.4351, "step": 2465 }, { "epoch": 0.25071167141114276, "grad_norm": 0.4788547456264496, "learning_rate": 5.014233428222855e-06, "loss": 0.4382, "step": 2466 }, { "epoch": 0.2508133387555917, "grad_norm": 0.47102561593055725, "learning_rate": 5.016266775111835e-06, "loss": 0.4246, "step": 2467 }, { "epoch": 0.2509150061000407, "grad_norm": 0.4258047044277191, "learning_rate": 5.018300122000814e-06, "loss": 0.4287, "step": 2468 }, { "epoch": 0.25101667344448964, "grad_norm": 0.4479339122772217, "learning_rate": 5.020333468889793e-06, "loss": 0.4323, "step": 2469 }, { "epoch": 0.2511183407889386, "grad_norm": 0.461650013923645, "learning_rate": 5.022366815778772e-06, "loss": 0.4027, "step": 2470 }, { "epoch": 0.2512200081333876, "grad_norm": 0.40040159225463867, "learning_rate": 5.024400162667752e-06, "loss": 0.4129, "step": 2471 }, { "epoch": 0.2513216754778365, "grad_norm": 0.5176302790641785, "learning_rate": 5.02643350955673e-06, "loss": 0.4358, "step": 2472 }, { "epoch": 0.25142334282228546, "grad_norm": 0.4212696850299835, "learning_rate": 5.02846685644571e-06, "loss": 0.4079, "step": 2473 }, { "epoch": 0.25152501016673445, "grad_norm": 0.4101543128490448, "learning_rate": 5.030500203334689e-06, "loss": 0.4467, "step": 2474 }, { "epoch": 0.2516266775111834, "grad_norm": 0.45497816801071167, "learning_rate": 5.032533550223668e-06, "loss": 0.381, "step": 2475 }, { "epoch": 0.2517283448556324, "grad_norm": 0.5060075521469116, "learning_rate": 5.034566897112648e-06, "loss": 0.4384, "step": 2476 }, { "epoch": 0.25183001220008133, "grad_norm": 0.44004419445991516, "learning_rate": 5.036600244001627e-06, "loss": 0.4435, "step": 2477 }, { "epoch": 0.2519316795445303, "grad_norm": 0.41979533433914185, "learning_rate": 5.038633590890606e-06, "loss": 0.4781, "step": 2478 }, { "epoch": 0.25203334688897927, "grad_norm": 0.44942864775657654, "learning_rate": 5.040666937779586e-06, "loss": 0.4205, "step": 2479 }, { "epoch": 0.2521350142334282, "grad_norm": 0.4886516034603119, "learning_rate": 5.0427002846685646e-06, "loss": 0.4342, "step": 2480 }, { "epoch": 0.2522366815778772, "grad_norm": 0.3897854685783386, "learning_rate": 5.044733631557545e-06, "loss": 0.4152, "step": 2481 }, { "epoch": 0.25233834892232615, "grad_norm": 0.46928170323371887, "learning_rate": 5.0467669784465236e-06, "loss": 0.4543, "step": 2482 }, { "epoch": 0.2524400162667751, "grad_norm": 0.49765899777412415, "learning_rate": 5.048800325335502e-06, "loss": 0.4172, "step": 2483 }, { "epoch": 0.2525416836112241, "grad_norm": 0.4581778049468994, "learning_rate": 5.0508336722244825e-06, "loss": 0.4512, "step": 2484 }, { "epoch": 0.25264335095567303, "grad_norm": 0.4208236038684845, "learning_rate": 5.052867019113461e-06, "loss": 0.4711, "step": 2485 }, { "epoch": 0.252745018300122, "grad_norm": 0.4733237624168396, "learning_rate": 5.054900366002441e-06, "loss": 0.3951, "step": 2486 }, { "epoch": 0.25284668564457097, "grad_norm": 0.45711421966552734, "learning_rate": 5.05693371289142e-06, "loss": 0.4022, "step": 2487 }, { "epoch": 0.2529483529890199, "grad_norm": 0.46065568923950195, "learning_rate": 5.058967059780399e-06, "loss": 0.4536, "step": 2488 }, { "epoch": 0.2530500203334689, "grad_norm": 0.3948501944541931, "learning_rate": 5.061000406669378e-06, "loss": 0.4013, "step": 2489 }, { "epoch": 0.25315168767791785, "grad_norm": 0.4847368001937866, "learning_rate": 5.063033753558358e-06, "loss": 0.4435, "step": 2490 }, { "epoch": 0.2532533550223668, "grad_norm": 0.45354726910591125, "learning_rate": 5.065067100447336e-06, "loss": 0.4452, "step": 2491 }, { "epoch": 0.2533550223668158, "grad_norm": 0.438492089509964, "learning_rate": 5.067100447336316e-06, "loss": 0.4461, "step": 2492 }, { "epoch": 0.2534566897112647, "grad_norm": 0.44841432571411133, "learning_rate": 5.0691337942252945e-06, "loss": 0.4294, "step": 2493 }, { "epoch": 0.2535583570557137, "grad_norm": 0.4471572935581207, "learning_rate": 5.071167141114275e-06, "loss": 0.4417, "step": 2494 }, { "epoch": 0.25366002440016266, "grad_norm": 0.41472864151000977, "learning_rate": 5.0732004880032535e-06, "loss": 0.4252, "step": 2495 }, { "epoch": 0.2537616917446116, "grad_norm": 0.4395364224910736, "learning_rate": 5.075233834892232e-06, "loss": 0.423, "step": 2496 }, { "epoch": 0.2538633590890606, "grad_norm": 0.44512465596199036, "learning_rate": 5.0772671817812125e-06, "loss": 0.4096, "step": 2497 }, { "epoch": 0.25396502643350954, "grad_norm": 0.4616410434246063, "learning_rate": 5.079300528670191e-06, "loss": 0.4111, "step": 2498 }, { "epoch": 0.25406669377795854, "grad_norm": 0.4573870003223419, "learning_rate": 5.0813338755591715e-06, "loss": 0.4435, "step": 2499 }, { "epoch": 0.2541683611224075, "grad_norm": 0.4181568920612335, "learning_rate": 5.08336722244815e-06, "loss": 0.4399, "step": 2500 }, { "epoch": 0.2542700284668564, "grad_norm": 0.4461185038089752, "learning_rate": 5.085400569337129e-06, "loss": 0.4259, "step": 2501 }, { "epoch": 0.2543716958113054, "grad_norm": 0.4550215005874634, "learning_rate": 5.087433916226109e-06, "loss": 0.4281, "step": 2502 }, { "epoch": 0.25447336315575436, "grad_norm": 0.41203781962394714, "learning_rate": 5.089467263115088e-06, "loss": 0.4107, "step": 2503 }, { "epoch": 0.25457503050020336, "grad_norm": 0.4447058439254761, "learning_rate": 5.091500610004067e-06, "loss": 0.4021, "step": 2504 }, { "epoch": 0.2546766978446523, "grad_norm": 0.49621570110321045, "learning_rate": 5.093533956893047e-06, "loss": 0.4556, "step": 2505 }, { "epoch": 0.25477836518910124, "grad_norm": 0.4397449791431427, "learning_rate": 5.095567303782025e-06, "loss": 0.4492, "step": 2506 }, { "epoch": 0.25488003253355024, "grad_norm": 0.4154845178127289, "learning_rate": 5.097600650671005e-06, "loss": 0.4125, "step": 2507 }, { "epoch": 0.2549816998779992, "grad_norm": 0.46767884492874146, "learning_rate": 5.099633997559984e-06, "loss": 0.4386, "step": 2508 }, { "epoch": 0.2550833672224482, "grad_norm": 0.4360179901123047, "learning_rate": 5.101667344448964e-06, "loss": 0.447, "step": 2509 }, { "epoch": 0.2551850345668971, "grad_norm": 0.46652495861053467, "learning_rate": 5.1037006913379424e-06, "loss": 0.4556, "step": 2510 }, { "epoch": 0.25528670191134606, "grad_norm": 0.4111737906932831, "learning_rate": 5.105734038226922e-06, "loss": 0.4071, "step": 2511 }, { "epoch": 0.25538836925579506, "grad_norm": 0.39511364698410034, "learning_rate": 5.107767385115901e-06, "loss": 0.3894, "step": 2512 }, { "epoch": 0.255490036600244, "grad_norm": 0.45628857612609863, "learning_rate": 5.10980073200488e-06, "loss": 0.4328, "step": 2513 }, { "epoch": 0.25559170394469294, "grad_norm": 0.4105208218097687, "learning_rate": 5.1118340788938595e-06, "loss": 0.4349, "step": 2514 }, { "epoch": 0.25569337128914194, "grad_norm": 0.38528957962989807, "learning_rate": 5.113867425782839e-06, "loss": 0.4112, "step": 2515 }, { "epoch": 0.2557950386335909, "grad_norm": 0.5127962231636047, "learning_rate": 5.115900772671818e-06, "loss": 0.4264, "step": 2516 }, { "epoch": 0.2558967059780399, "grad_norm": 0.4489695727825165, "learning_rate": 5.117934119560798e-06, "loss": 0.4159, "step": 2517 }, { "epoch": 0.2559983733224888, "grad_norm": 0.4939177334308624, "learning_rate": 5.119967466449777e-06, "loss": 0.4434, "step": 2518 }, { "epoch": 0.25610004066693776, "grad_norm": 0.4411337971687317, "learning_rate": 5.122000813338755e-06, "loss": 0.4344, "step": 2519 }, { "epoch": 0.25620170801138675, "grad_norm": 0.4452061653137207, "learning_rate": 5.124034160227736e-06, "loss": 0.4095, "step": 2520 }, { "epoch": 0.2563033753558357, "grad_norm": 0.5598706007003784, "learning_rate": 5.126067507116714e-06, "loss": 0.4067, "step": 2521 }, { "epoch": 0.2564050427002847, "grad_norm": 0.4705623686313629, "learning_rate": 5.128100854005695e-06, "loss": 0.4217, "step": 2522 }, { "epoch": 0.25650671004473363, "grad_norm": 0.4213141202926636, "learning_rate": 5.130134200894673e-06, "loss": 0.4114, "step": 2523 }, { "epoch": 0.2566083773891826, "grad_norm": 0.4294053316116333, "learning_rate": 5.132167547783652e-06, "loss": 0.4259, "step": 2524 }, { "epoch": 0.25671004473363157, "grad_norm": 0.4700494408607483, "learning_rate": 5.134200894672632e-06, "loss": 0.4126, "step": 2525 }, { "epoch": 0.2568117120780805, "grad_norm": 0.45309239625930786, "learning_rate": 5.136234241561611e-06, "loss": 0.4455, "step": 2526 }, { "epoch": 0.2569133794225295, "grad_norm": 0.3810575008392334, "learning_rate": 5.13826758845059e-06, "loss": 0.3991, "step": 2527 }, { "epoch": 0.25701504676697845, "grad_norm": 0.4130721092224121, "learning_rate": 5.14030093533957e-06, "loss": 0.402, "step": 2528 }, { "epoch": 0.2571167141114274, "grad_norm": 0.4179353713989258, "learning_rate": 5.1423342822285485e-06, "loss": 0.4033, "step": 2529 }, { "epoch": 0.2572183814558764, "grad_norm": 0.40820828080177307, "learning_rate": 5.144367629117528e-06, "loss": 0.4369, "step": 2530 }, { "epoch": 0.25732004880032533, "grad_norm": 0.38583904504776, "learning_rate": 5.146400976006507e-06, "loss": 0.4137, "step": 2531 }, { "epoch": 0.25742171614477427, "grad_norm": 0.44091686606407166, "learning_rate": 5.148434322895486e-06, "loss": 0.4076, "step": 2532 }, { "epoch": 0.25752338348922327, "grad_norm": 0.4160868525505066, "learning_rate": 5.150467669784466e-06, "loss": 0.4482, "step": 2533 }, { "epoch": 0.2576250508336722, "grad_norm": 0.3881140351295471, "learning_rate": 5.152501016673444e-06, "loss": 0.4336, "step": 2534 }, { "epoch": 0.2577267181781212, "grad_norm": 0.39361774921417236, "learning_rate": 5.1545343635624246e-06, "loss": 0.4238, "step": 2535 }, { "epoch": 0.25782838552257015, "grad_norm": 0.46881818771362305, "learning_rate": 5.156567710451403e-06, "loss": 0.4069, "step": 2536 }, { "epoch": 0.2579300528670191, "grad_norm": 0.4120520353317261, "learning_rate": 5.158601057340382e-06, "loss": 0.4061, "step": 2537 }, { "epoch": 0.2580317202114681, "grad_norm": 0.42166125774383545, "learning_rate": 5.160634404229362e-06, "loss": 0.4266, "step": 2538 }, { "epoch": 0.258133387555917, "grad_norm": 0.44457441568374634, "learning_rate": 5.162667751118341e-06, "loss": 0.3951, "step": 2539 }, { "epoch": 0.258235054900366, "grad_norm": 0.5965074896812439, "learning_rate": 5.164701098007321e-06, "loss": 0.4049, "step": 2540 }, { "epoch": 0.25833672224481496, "grad_norm": 0.46124592423439026, "learning_rate": 5.1667344448963e-06, "loss": 0.4379, "step": 2541 }, { "epoch": 0.2584383895892639, "grad_norm": 0.45495039224624634, "learning_rate": 5.1687677917852784e-06, "loss": 0.4176, "step": 2542 }, { "epoch": 0.2585400569337129, "grad_norm": 0.4595933258533478, "learning_rate": 5.170801138674259e-06, "loss": 0.4144, "step": 2543 }, { "epoch": 0.25864172427816184, "grad_norm": 0.4919365644454956, "learning_rate": 5.172834485563237e-06, "loss": 0.4266, "step": 2544 }, { "epoch": 0.25874339162261084, "grad_norm": 0.4299485385417938, "learning_rate": 5.174867832452217e-06, "loss": 0.4402, "step": 2545 }, { "epoch": 0.2588450589670598, "grad_norm": 0.42563319206237793, "learning_rate": 5.176901179341196e-06, "loss": 0.4155, "step": 2546 }, { "epoch": 0.2589467263115087, "grad_norm": 0.49343374371528625, "learning_rate": 5.178934526230175e-06, "loss": 0.4069, "step": 2547 }, { "epoch": 0.2590483936559577, "grad_norm": 0.47214388847351074, "learning_rate": 5.1809678731191545e-06, "loss": 0.4063, "step": 2548 }, { "epoch": 0.25915006100040666, "grad_norm": 0.47086259722709656, "learning_rate": 5.183001220008134e-06, "loss": 0.4272, "step": 2549 }, { "epoch": 0.25925172834485566, "grad_norm": 0.47134071588516235, "learning_rate": 5.1850345668971135e-06, "loss": 0.4266, "step": 2550 }, { "epoch": 0.2593533956893046, "grad_norm": 0.44554978609085083, "learning_rate": 5.187067913786092e-06, "loss": 0.457, "step": 2551 }, { "epoch": 0.25945506303375354, "grad_norm": 0.4659487009048462, "learning_rate": 5.189101260675072e-06, "loss": 0.4604, "step": 2552 }, { "epoch": 0.25955673037820254, "grad_norm": 0.43974897265434265, "learning_rate": 5.191134607564051e-06, "loss": 0.4232, "step": 2553 }, { "epoch": 0.2596583977226515, "grad_norm": 0.4083200991153717, "learning_rate": 5.19316795445303e-06, "loss": 0.3892, "step": 2554 }, { "epoch": 0.2597600650671004, "grad_norm": 0.42086198925971985, "learning_rate": 5.195201301342009e-06, "loss": 0.4447, "step": 2555 }, { "epoch": 0.2598617324115494, "grad_norm": 0.4335998594760895, "learning_rate": 5.197234648230989e-06, "loss": 0.4387, "step": 2556 }, { "epoch": 0.25996339975599836, "grad_norm": 0.416227787733078, "learning_rate": 5.199267995119967e-06, "loss": 0.4542, "step": 2557 }, { "epoch": 0.26006506710044736, "grad_norm": 0.46135249733924866, "learning_rate": 5.201301342008948e-06, "loss": 0.4231, "step": 2558 }, { "epoch": 0.2601667344448963, "grad_norm": 0.4610973000526428, "learning_rate": 5.203334688897926e-06, "loss": 0.433, "step": 2559 }, { "epoch": 0.26026840178934524, "grad_norm": 0.414462149143219, "learning_rate": 5.205368035786905e-06, "loss": 0.4468, "step": 2560 }, { "epoch": 0.26037006913379424, "grad_norm": 0.4345826804637909, "learning_rate": 5.207401382675885e-06, "loss": 0.3999, "step": 2561 }, { "epoch": 0.2604717364782432, "grad_norm": 0.48330533504486084, "learning_rate": 5.209434729564864e-06, "loss": 0.4143, "step": 2562 }, { "epoch": 0.2605734038226922, "grad_norm": 0.500277578830719, "learning_rate": 5.211468076453844e-06, "loss": 0.4256, "step": 2563 }, { "epoch": 0.2606750711671411, "grad_norm": 0.42096009850502014, "learning_rate": 5.213501423342823e-06, "loss": 0.4312, "step": 2564 }, { "epoch": 0.26077673851159006, "grad_norm": 0.41250482201576233, "learning_rate": 5.215534770231802e-06, "loss": 0.4244, "step": 2565 }, { "epoch": 0.26087840585603905, "grad_norm": 0.4488624036312103, "learning_rate": 5.217568117120781e-06, "loss": 0.4201, "step": 2566 }, { "epoch": 0.260980073200488, "grad_norm": 0.4119029939174652, "learning_rate": 5.2196014640097606e-06, "loss": 0.4255, "step": 2567 }, { "epoch": 0.261081740544937, "grad_norm": 0.43445727229118347, "learning_rate": 5.22163481089874e-06, "loss": 0.4322, "step": 2568 }, { "epoch": 0.26118340788938593, "grad_norm": 0.4422747790813446, "learning_rate": 5.223668157787719e-06, "loss": 0.4213, "step": 2569 }, { "epoch": 0.2612850752338349, "grad_norm": 0.39663755893707275, "learning_rate": 5.225701504676698e-06, "loss": 0.442, "step": 2570 }, { "epoch": 0.26138674257828387, "grad_norm": 0.4161422848701477, "learning_rate": 5.227734851565678e-06, "loss": 0.4217, "step": 2571 }, { "epoch": 0.2614884099227328, "grad_norm": 0.47807103395462036, "learning_rate": 5.229768198454656e-06, "loss": 0.4082, "step": 2572 }, { "epoch": 0.2615900772671818, "grad_norm": 0.4534614682197571, "learning_rate": 5.231801545343637e-06, "loss": 0.4225, "step": 2573 }, { "epoch": 0.26169174461163075, "grad_norm": 0.4125213027000427, "learning_rate": 5.233834892232615e-06, "loss": 0.4138, "step": 2574 }, { "epoch": 0.2617934119560797, "grad_norm": 0.38314762711524963, "learning_rate": 5.235868239121594e-06, "loss": 0.4421, "step": 2575 }, { "epoch": 0.2618950793005287, "grad_norm": 0.4260903596878052, "learning_rate": 5.237901586010574e-06, "loss": 0.411, "step": 2576 }, { "epoch": 0.26199674664497763, "grad_norm": 0.44558537006378174, "learning_rate": 5.239934932899553e-06, "loss": 0.4383, "step": 2577 }, { "epoch": 0.26209841398942657, "grad_norm": 0.40943482518196106, "learning_rate": 5.2419682797885315e-06, "loss": 0.4153, "step": 2578 }, { "epoch": 0.26220008133387557, "grad_norm": 0.457253098487854, "learning_rate": 5.244001626677512e-06, "loss": 0.4238, "step": 2579 }, { "epoch": 0.2623017486783245, "grad_norm": 0.42384830117225647, "learning_rate": 5.2460349735664905e-06, "loss": 0.4326, "step": 2580 }, { "epoch": 0.2624034160227735, "grad_norm": 0.4068945348262787, "learning_rate": 5.248068320455471e-06, "loss": 0.4162, "step": 2581 }, { "epoch": 0.26250508336722245, "grad_norm": 0.42479777336120605, "learning_rate": 5.2501016673444495e-06, "loss": 0.4577, "step": 2582 }, { "epoch": 0.2626067507116714, "grad_norm": 0.4548262357711792, "learning_rate": 5.252135014233428e-06, "loss": 0.4108, "step": 2583 }, { "epoch": 0.2627084180561204, "grad_norm": 0.4239172339439392, "learning_rate": 5.2541683611224085e-06, "loss": 0.4206, "step": 2584 }, { "epoch": 0.2628100854005693, "grad_norm": 0.4951481223106384, "learning_rate": 5.256201708011387e-06, "loss": 0.435, "step": 2585 }, { "epoch": 0.2629117527450183, "grad_norm": 0.3984479308128357, "learning_rate": 5.258235054900367e-06, "loss": 0.4658, "step": 2586 }, { "epoch": 0.26301342008946726, "grad_norm": 0.4240545332431793, "learning_rate": 5.260268401789346e-06, "loss": 0.4533, "step": 2587 }, { "epoch": 0.2631150874339162, "grad_norm": 0.43157970905303955, "learning_rate": 5.262301748678325e-06, "loss": 0.4, "step": 2588 }, { "epoch": 0.2632167547783652, "grad_norm": 0.40012025833129883, "learning_rate": 5.264335095567304e-06, "loss": 0.4197, "step": 2589 }, { "epoch": 0.26331842212281414, "grad_norm": 0.4517349600791931, "learning_rate": 5.266368442456284e-06, "loss": 0.4186, "step": 2590 }, { "epoch": 0.26342008946726314, "grad_norm": 0.42040184140205383, "learning_rate": 5.268401789345263e-06, "loss": 0.4462, "step": 2591 }, { "epoch": 0.2635217568117121, "grad_norm": 0.44062408804893494, "learning_rate": 5.270435136234242e-06, "loss": 0.4676, "step": 2592 }, { "epoch": 0.263623424156161, "grad_norm": 0.4697314202785492, "learning_rate": 5.272468483123221e-06, "loss": 0.455, "step": 2593 }, { "epoch": 0.26372509150061, "grad_norm": 0.4717121720314026, "learning_rate": 5.274501830012201e-06, "loss": 0.4194, "step": 2594 }, { "epoch": 0.26382675884505896, "grad_norm": 0.4063834846019745, "learning_rate": 5.2765351769011794e-06, "loss": 0.4152, "step": 2595 }, { "epoch": 0.2639284261895079, "grad_norm": 0.4672137498855591, "learning_rate": 5.278568523790158e-06, "loss": 0.4029, "step": 2596 }, { "epoch": 0.2640300935339569, "grad_norm": 0.5077112913131714, "learning_rate": 5.280601870679138e-06, "loss": 0.4237, "step": 2597 }, { "epoch": 0.26413176087840584, "grad_norm": 0.4408867657184601, "learning_rate": 5.282635217568117e-06, "loss": 0.4402, "step": 2598 }, { "epoch": 0.26423342822285484, "grad_norm": 0.4286407232284546, "learning_rate": 5.284668564457097e-06, "loss": 0.4339, "step": 2599 }, { "epoch": 0.2643350955673038, "grad_norm": 0.4293016791343689, "learning_rate": 5.286701911346076e-06, "loss": 0.4317, "step": 2600 }, { "epoch": 0.2644367629117527, "grad_norm": 0.5179833173751831, "learning_rate": 5.288735258235055e-06, "loss": 0.4247, "step": 2601 }, { "epoch": 0.2645384302562017, "grad_norm": 0.421432226896286, "learning_rate": 5.290768605124035e-06, "loss": 0.4173, "step": 2602 }, { "epoch": 0.26464009760065066, "grad_norm": 0.43098706007003784, "learning_rate": 5.292801952013014e-06, "loss": 0.4117, "step": 2603 }, { "epoch": 0.26474176494509966, "grad_norm": 0.48620760440826416, "learning_rate": 5.294835298901993e-06, "loss": 0.446, "step": 2604 }, { "epoch": 0.2648434322895486, "grad_norm": 0.4654383063316345, "learning_rate": 5.296868645790973e-06, "loss": 0.3968, "step": 2605 }, { "epoch": 0.26494509963399754, "grad_norm": 0.4177037179470062, "learning_rate": 5.298901992679951e-06, "loss": 0.4096, "step": 2606 }, { "epoch": 0.26504676697844654, "grad_norm": 0.44216710329055786, "learning_rate": 5.300935339568931e-06, "loss": 0.4355, "step": 2607 }, { "epoch": 0.2651484343228955, "grad_norm": 0.40635302662849426, "learning_rate": 5.30296868645791e-06, "loss": 0.4464, "step": 2608 }, { "epoch": 0.2652501016673445, "grad_norm": 0.49273204803466797, "learning_rate": 5.30500203334689e-06, "loss": 0.4593, "step": 2609 }, { "epoch": 0.2653517690117934, "grad_norm": 0.40934211015701294, "learning_rate": 5.307035380235868e-06, "loss": 0.3954, "step": 2610 }, { "epoch": 0.26545343635624236, "grad_norm": 0.3847826421260834, "learning_rate": 5.309068727124848e-06, "loss": 0.4026, "step": 2611 }, { "epoch": 0.26555510370069135, "grad_norm": 0.43352651596069336, "learning_rate": 5.311102074013827e-06, "loss": 0.4656, "step": 2612 }, { "epoch": 0.2656567710451403, "grad_norm": 0.4305773675441742, "learning_rate": 5.313135420902806e-06, "loss": 0.4095, "step": 2613 }, { "epoch": 0.2657584383895893, "grad_norm": 0.42725616693496704, "learning_rate": 5.315168767791786e-06, "loss": 0.4384, "step": 2614 }, { "epoch": 0.26586010573403823, "grad_norm": 0.38788145780563354, "learning_rate": 5.317202114680765e-06, "loss": 0.4254, "step": 2615 }, { "epoch": 0.2659617730784872, "grad_norm": 0.4151305854320526, "learning_rate": 5.319235461569744e-06, "loss": 0.4175, "step": 2616 }, { "epoch": 0.26606344042293617, "grad_norm": 0.3974916934967041, "learning_rate": 5.321268808458724e-06, "loss": 0.4405, "step": 2617 }, { "epoch": 0.2661651077673851, "grad_norm": 0.4999844431877136, "learning_rate": 5.323302155347703e-06, "loss": 0.4081, "step": 2618 }, { "epoch": 0.26626677511183405, "grad_norm": 0.4283415675163269, "learning_rate": 5.325335502236681e-06, "loss": 0.428, "step": 2619 }, { "epoch": 0.26636844245628305, "grad_norm": 0.42007020115852356, "learning_rate": 5.3273688491256616e-06, "loss": 0.4669, "step": 2620 }, { "epoch": 0.266470109800732, "grad_norm": 0.42856016755104065, "learning_rate": 5.32940219601464e-06, "loss": 0.4101, "step": 2621 }, { "epoch": 0.266571777145181, "grad_norm": 0.4755350947380066, "learning_rate": 5.3314355429036205e-06, "loss": 0.4245, "step": 2622 }, { "epoch": 0.26667344448962993, "grad_norm": 0.409927636384964, "learning_rate": 5.333468889792599e-06, "loss": 0.4627, "step": 2623 }, { "epoch": 0.26677511183407887, "grad_norm": 0.40122804045677185, "learning_rate": 5.335502236681578e-06, "loss": 0.4205, "step": 2624 }, { "epoch": 0.26687677917852787, "grad_norm": 0.4063666760921478, "learning_rate": 5.337535583570558e-06, "loss": 0.3908, "step": 2625 }, { "epoch": 0.2669784465229768, "grad_norm": 0.4768059253692627, "learning_rate": 5.339568930459537e-06, "loss": 0.4417, "step": 2626 }, { "epoch": 0.2670801138674258, "grad_norm": 0.433749794960022, "learning_rate": 5.341602277348516e-06, "loss": 0.421, "step": 2627 }, { "epoch": 0.26718178121187475, "grad_norm": 0.4233391582965851, "learning_rate": 5.343635624237496e-06, "loss": 0.4035, "step": 2628 }, { "epoch": 0.2672834485563237, "grad_norm": 0.44630667567253113, "learning_rate": 5.345668971126474e-06, "loss": 0.4391, "step": 2629 }, { "epoch": 0.2673851159007727, "grad_norm": 0.46530571579933167, "learning_rate": 5.347702318015454e-06, "loss": 0.4004, "step": 2630 }, { "epoch": 0.2674867832452216, "grad_norm": 0.4174386262893677, "learning_rate": 5.3497356649044325e-06, "loss": 0.3866, "step": 2631 }, { "epoch": 0.2675884505896706, "grad_norm": 0.4098917245864868, "learning_rate": 5.351769011793413e-06, "loss": 0.4399, "step": 2632 }, { "epoch": 0.26769011793411956, "grad_norm": 0.4869607090950012, "learning_rate": 5.3538023586823915e-06, "loss": 0.4231, "step": 2633 }, { "epoch": 0.2677917852785685, "grad_norm": 0.43637868762016296, "learning_rate": 5.35583570557137e-06, "loss": 0.4418, "step": 2634 }, { "epoch": 0.2678934526230175, "grad_norm": 0.4499669671058655, "learning_rate": 5.3578690524603505e-06, "loss": 0.4636, "step": 2635 }, { "epoch": 0.26799511996746644, "grad_norm": 0.46375155448913574, "learning_rate": 5.359902399349329e-06, "loss": 0.393, "step": 2636 }, { "epoch": 0.2680967873119154, "grad_norm": 0.5007246136665344, "learning_rate": 5.361935746238308e-06, "loss": 0.4448, "step": 2637 }, { "epoch": 0.2681984546563644, "grad_norm": 0.4267692565917969, "learning_rate": 5.363969093127288e-06, "loss": 0.4106, "step": 2638 }, { "epoch": 0.2683001220008133, "grad_norm": 0.47249239683151245, "learning_rate": 5.366002440016267e-06, "loss": 0.3986, "step": 2639 }, { "epoch": 0.2684017893452623, "grad_norm": 0.4687618017196655, "learning_rate": 5.368035786905247e-06, "loss": 0.4223, "step": 2640 }, { "epoch": 0.26850345668971126, "grad_norm": 0.44007381796836853, "learning_rate": 5.370069133794226e-06, "loss": 0.4053, "step": 2641 }, { "epoch": 0.2686051240341602, "grad_norm": 0.4666826128959656, "learning_rate": 5.372102480683204e-06, "loss": 0.4079, "step": 2642 }, { "epoch": 0.2687067913786092, "grad_norm": 0.43400880694389343, "learning_rate": 5.374135827572185e-06, "loss": 0.4507, "step": 2643 }, { "epoch": 0.26880845872305814, "grad_norm": 0.45605555176734924, "learning_rate": 5.376169174461163e-06, "loss": 0.447, "step": 2644 }, { "epoch": 0.26891012606750714, "grad_norm": 0.45992767810821533, "learning_rate": 5.378202521350143e-06, "loss": 0.4384, "step": 2645 }, { "epoch": 0.2690117934119561, "grad_norm": 0.4970798194408417, "learning_rate": 5.380235868239122e-06, "loss": 0.4354, "step": 2646 }, { "epoch": 0.269113460756405, "grad_norm": 0.43884047865867615, "learning_rate": 5.382269215128101e-06, "loss": 0.4194, "step": 2647 }, { "epoch": 0.269215128100854, "grad_norm": 0.5556594729423523, "learning_rate": 5.3843025620170805e-06, "loss": 0.4267, "step": 2648 }, { "epoch": 0.26931679544530296, "grad_norm": 0.48265737295150757, "learning_rate": 5.38633590890606e-06, "loss": 0.4354, "step": 2649 }, { "epoch": 0.26941846278975196, "grad_norm": 0.4455290138721466, "learning_rate": 5.3883692557950394e-06, "loss": 0.4299, "step": 2650 }, { "epoch": 0.2695201301342009, "grad_norm": 0.44012585282325745, "learning_rate": 5.390402602684018e-06, "loss": 0.415, "step": 2651 }, { "epoch": 0.26962179747864984, "grad_norm": 0.46015316247940063, "learning_rate": 5.3924359495729976e-06, "loss": 0.4121, "step": 2652 }, { "epoch": 0.26972346482309884, "grad_norm": 0.43206462264060974, "learning_rate": 5.394469296461977e-06, "loss": 0.4255, "step": 2653 }, { "epoch": 0.2698251321675478, "grad_norm": 0.40848907828330994, "learning_rate": 5.396502643350956e-06, "loss": 0.451, "step": 2654 }, { "epoch": 0.2699267995119968, "grad_norm": 0.4556804895401001, "learning_rate": 5.398535990239936e-06, "loss": 0.4251, "step": 2655 }, { "epoch": 0.2700284668564457, "grad_norm": 0.42836660146713257, "learning_rate": 5.400569337128915e-06, "loss": 0.4426, "step": 2656 }, { "epoch": 0.27013013420089466, "grad_norm": 0.4368216395378113, "learning_rate": 5.402602684017893e-06, "loss": 0.4342, "step": 2657 }, { "epoch": 0.27023180154534365, "grad_norm": 0.404812753200531, "learning_rate": 5.404636030906874e-06, "loss": 0.4273, "step": 2658 }, { "epoch": 0.2703334688897926, "grad_norm": 0.48585250973701477, "learning_rate": 5.406669377795852e-06, "loss": 0.4363, "step": 2659 }, { "epoch": 0.27043513623424154, "grad_norm": 0.4196794033050537, "learning_rate": 5.408702724684831e-06, "loss": 0.4655, "step": 2660 }, { "epoch": 0.27053680357869053, "grad_norm": 0.45795878767967224, "learning_rate": 5.410736071573811e-06, "loss": 0.4283, "step": 2661 }, { "epoch": 0.2706384709231395, "grad_norm": 0.4092988669872284, "learning_rate": 5.41276941846279e-06, "loss": 0.4412, "step": 2662 }, { "epoch": 0.27074013826758847, "grad_norm": 0.4500284492969513, "learning_rate": 5.41480276535177e-06, "loss": 0.4164, "step": 2663 }, { "epoch": 0.2708418056120374, "grad_norm": 0.4939691126346588, "learning_rate": 5.416836112240749e-06, "loss": 0.4463, "step": 2664 }, { "epoch": 0.27094347295648635, "grad_norm": 0.5484199523925781, "learning_rate": 5.4188694591297275e-06, "loss": 0.4146, "step": 2665 }, { "epoch": 0.27104514030093535, "grad_norm": 0.39458534121513367, "learning_rate": 5.420902806018708e-06, "loss": 0.3722, "step": 2666 }, { "epoch": 0.2711468076453843, "grad_norm": 0.47068679332733154, "learning_rate": 5.4229361529076865e-06, "loss": 0.4597, "step": 2667 }, { "epoch": 0.2712484749898333, "grad_norm": 0.3948974311351776, "learning_rate": 5.424969499796666e-06, "loss": 0.4121, "step": 2668 }, { "epoch": 0.27135014233428223, "grad_norm": 0.4228554964065552, "learning_rate": 5.427002846685645e-06, "loss": 0.409, "step": 2669 }, { "epoch": 0.27145180967873117, "grad_norm": 0.4620669186115265, "learning_rate": 5.429036193574624e-06, "loss": 0.4024, "step": 2670 }, { "epoch": 0.27155347702318017, "grad_norm": 0.42488861083984375, "learning_rate": 5.431069540463604e-06, "loss": 0.433, "step": 2671 }, { "epoch": 0.2716551443676291, "grad_norm": 0.4576481878757477, "learning_rate": 5.433102887352582e-06, "loss": 0.4294, "step": 2672 }, { "epoch": 0.2717568117120781, "grad_norm": 0.4242667853832245, "learning_rate": 5.4351362342415626e-06, "loss": 0.4069, "step": 2673 }, { "epoch": 0.27185847905652705, "grad_norm": 0.4791247248649597, "learning_rate": 5.437169581130541e-06, "loss": 0.4559, "step": 2674 }, { "epoch": 0.271960146400976, "grad_norm": 0.47967299818992615, "learning_rate": 5.43920292801952e-06, "loss": 0.4597, "step": 2675 }, { "epoch": 0.272061813745425, "grad_norm": 0.4657423496246338, "learning_rate": 5.4412362749085e-06, "loss": 0.4167, "step": 2676 }, { "epoch": 0.2721634810898739, "grad_norm": 0.535374641418457, "learning_rate": 5.443269621797479e-06, "loss": 0.4633, "step": 2677 }, { "epoch": 0.27226514843432287, "grad_norm": 0.4181104600429535, "learning_rate": 5.4453029686864575e-06, "loss": 0.4368, "step": 2678 }, { "epoch": 0.27236681577877186, "grad_norm": 0.5944842100143433, "learning_rate": 5.447336315575438e-06, "loss": 0.4487, "step": 2679 }, { "epoch": 0.2724684831232208, "grad_norm": 0.5106532573699951, "learning_rate": 5.4493696624644164e-06, "loss": 0.4384, "step": 2680 }, { "epoch": 0.2725701504676698, "grad_norm": 0.42524516582489014, "learning_rate": 5.451403009353397e-06, "loss": 0.4245, "step": 2681 }, { "epoch": 0.27267181781211874, "grad_norm": 0.4777844250202179, "learning_rate": 5.4534363562423754e-06, "loss": 0.4198, "step": 2682 }, { "epoch": 0.2727734851565677, "grad_norm": 0.577665388584137, "learning_rate": 5.455469703131354e-06, "loss": 0.4595, "step": 2683 }, { "epoch": 0.2728751525010167, "grad_norm": 0.4458968937397003, "learning_rate": 5.457503050020334e-06, "loss": 0.4399, "step": 2684 }, { "epoch": 0.2729768198454656, "grad_norm": 0.47815293073654175, "learning_rate": 5.459536396909313e-06, "loss": 0.4405, "step": 2685 }, { "epoch": 0.2730784871899146, "grad_norm": 0.42185288667678833, "learning_rate": 5.4615697437982925e-06, "loss": 0.4395, "step": 2686 }, { "epoch": 0.27318015453436356, "grad_norm": 0.4156978130340576, "learning_rate": 5.463603090687272e-06, "loss": 0.4077, "step": 2687 }, { "epoch": 0.2732818218788125, "grad_norm": 0.4117267429828644, "learning_rate": 5.465636437576251e-06, "loss": 0.4247, "step": 2688 }, { "epoch": 0.2733834892232615, "grad_norm": 0.4654900133609772, "learning_rate": 5.46766978446523e-06, "loss": 0.4388, "step": 2689 }, { "epoch": 0.27348515656771044, "grad_norm": 0.41194218397140503, "learning_rate": 5.46970313135421e-06, "loss": 0.4137, "step": 2690 }, { "epoch": 0.27358682391215944, "grad_norm": 0.44206881523132324, "learning_rate": 5.471736478243189e-06, "loss": 0.4253, "step": 2691 }, { "epoch": 0.2736884912566084, "grad_norm": 0.4363841414451599, "learning_rate": 5.473769825132168e-06, "loss": 0.392, "step": 2692 }, { "epoch": 0.2737901586010573, "grad_norm": 0.38650596141815186, "learning_rate": 5.475803172021147e-06, "loss": 0.4424, "step": 2693 }, { "epoch": 0.2738918259455063, "grad_norm": 0.4661596715450287, "learning_rate": 5.477836518910127e-06, "loss": 0.4442, "step": 2694 }, { "epoch": 0.27399349328995526, "grad_norm": 0.47244447469711304, "learning_rate": 5.479869865799105e-06, "loss": 0.45, "step": 2695 }, { "epoch": 0.27409516063440426, "grad_norm": 0.5040395259857178, "learning_rate": 5.481903212688086e-06, "loss": 0.3947, "step": 2696 }, { "epoch": 0.2741968279788532, "grad_norm": 0.43752521276474, "learning_rate": 5.483936559577064e-06, "loss": 0.4397, "step": 2697 }, { "epoch": 0.27429849532330214, "grad_norm": 0.430831640958786, "learning_rate": 5.485969906466043e-06, "loss": 0.4345, "step": 2698 }, { "epoch": 0.27440016266775114, "grad_norm": 0.4371959865093231, "learning_rate": 5.488003253355023e-06, "loss": 0.4167, "step": 2699 }, { "epoch": 0.2745018300122001, "grad_norm": 0.41431641578674316, "learning_rate": 5.490036600244002e-06, "loss": 0.421, "step": 2700 }, { "epoch": 0.274603497356649, "grad_norm": 0.43651139736175537, "learning_rate": 5.492069947132981e-06, "loss": 0.4254, "step": 2701 }, { "epoch": 0.274705164701098, "grad_norm": 0.4208395183086395, "learning_rate": 5.494103294021961e-06, "loss": 0.4434, "step": 2702 }, { "epoch": 0.27480683204554696, "grad_norm": 0.3997268080711365, "learning_rate": 5.49613664091094e-06, "loss": 0.4202, "step": 2703 }, { "epoch": 0.27490849938999595, "grad_norm": 0.43046632409095764, "learning_rate": 5.49816998779992e-06, "loss": 0.4266, "step": 2704 }, { "epoch": 0.2750101667344449, "grad_norm": 0.42007604241371155, "learning_rate": 5.5002033346888986e-06, "loss": 0.421, "step": 2705 }, { "epoch": 0.27511183407889384, "grad_norm": 0.44548285007476807, "learning_rate": 5.502236681577877e-06, "loss": 0.4012, "step": 2706 }, { "epoch": 0.27521350142334283, "grad_norm": 0.42623743414878845, "learning_rate": 5.504270028466857e-06, "loss": 0.443, "step": 2707 }, { "epoch": 0.2753151687677918, "grad_norm": 0.4256812632083893, "learning_rate": 5.506303375355836e-06, "loss": 0.4056, "step": 2708 }, { "epoch": 0.27541683611224077, "grad_norm": 0.4134275019168854, "learning_rate": 5.508336722244816e-06, "loss": 0.439, "step": 2709 }, { "epoch": 0.2755185034566897, "grad_norm": 0.4229419529438019, "learning_rate": 5.510370069133794e-06, "loss": 0.439, "step": 2710 }, { "epoch": 0.27562017080113865, "grad_norm": 0.44816118478775024, "learning_rate": 5.512403416022774e-06, "loss": 0.4143, "step": 2711 }, { "epoch": 0.27572183814558765, "grad_norm": 0.39203622937202454, "learning_rate": 5.514436762911753e-06, "loss": 0.4066, "step": 2712 }, { "epoch": 0.2758235054900366, "grad_norm": 0.39318153262138367, "learning_rate": 5.516470109800732e-06, "loss": 0.3952, "step": 2713 }, { "epoch": 0.2759251728344856, "grad_norm": 0.4069969654083252, "learning_rate": 5.518503456689712e-06, "loss": 0.4209, "step": 2714 }, { "epoch": 0.27602684017893453, "grad_norm": 0.4288293421268463, "learning_rate": 5.520536803578691e-06, "loss": 0.4165, "step": 2715 }, { "epoch": 0.27612850752338347, "grad_norm": 0.40011265873908997, "learning_rate": 5.5225701504676695e-06, "loss": 0.4626, "step": 2716 }, { "epoch": 0.27623017486783247, "grad_norm": 0.39728835225105286, "learning_rate": 5.52460349735665e-06, "loss": 0.4339, "step": 2717 }, { "epoch": 0.2763318422122814, "grad_norm": 0.4253547191619873, "learning_rate": 5.5266368442456285e-06, "loss": 0.421, "step": 2718 }, { "epoch": 0.2764335095567304, "grad_norm": 0.3999021649360657, "learning_rate": 5.528670191134609e-06, "loss": 0.4097, "step": 2719 }, { "epoch": 0.27653517690117935, "grad_norm": 0.4259986877441406, "learning_rate": 5.5307035380235875e-06, "loss": 0.4178, "step": 2720 }, { "epoch": 0.2766368442456283, "grad_norm": 0.3930121064186096, "learning_rate": 5.532736884912566e-06, "loss": 0.4745, "step": 2721 }, { "epoch": 0.2767385115900773, "grad_norm": 0.40735334157943726, "learning_rate": 5.5347702318015465e-06, "loss": 0.4352, "step": 2722 }, { "epoch": 0.2768401789345262, "grad_norm": 0.406861811876297, "learning_rate": 5.536803578690525e-06, "loss": 0.4118, "step": 2723 }, { "epoch": 0.27694184627897517, "grad_norm": 0.4125331938266754, "learning_rate": 5.538836925579504e-06, "loss": 0.3995, "step": 2724 }, { "epoch": 0.27704351362342416, "grad_norm": 0.4190373122692108, "learning_rate": 5.540870272468484e-06, "loss": 0.4387, "step": 2725 }, { "epoch": 0.2771451809678731, "grad_norm": 0.39684075117111206, "learning_rate": 5.542903619357463e-06, "loss": 0.4124, "step": 2726 }, { "epoch": 0.2772468483123221, "grad_norm": 0.43755781650543213, "learning_rate": 5.544936966246442e-06, "loss": 0.3922, "step": 2727 }, { "epoch": 0.27734851565677104, "grad_norm": 0.40928128361701965, "learning_rate": 5.546970313135422e-06, "loss": 0.4863, "step": 2728 }, { "epoch": 0.27745018300122, "grad_norm": 0.4421575367450714, "learning_rate": 5.5490036600244e-06, "loss": 0.4173, "step": 2729 }, { "epoch": 0.277551850345669, "grad_norm": 0.38005414605140686, "learning_rate": 5.55103700691338e-06, "loss": 0.4137, "step": 2730 }, { "epoch": 0.2776535176901179, "grad_norm": 0.4216756224632263, "learning_rate": 5.553070353802359e-06, "loss": 0.4299, "step": 2731 }, { "epoch": 0.2777551850345669, "grad_norm": 0.48111796379089355, "learning_rate": 5.555103700691339e-06, "loss": 0.4341, "step": 2732 }, { "epoch": 0.27785685237901586, "grad_norm": 0.45227596163749695, "learning_rate": 5.5571370475803175e-06, "loss": 0.3956, "step": 2733 }, { "epoch": 0.2779585197234648, "grad_norm": 0.5150575637817383, "learning_rate": 5.559170394469296e-06, "loss": 0.4567, "step": 2734 }, { "epoch": 0.2780601870679138, "grad_norm": 0.4502858817577362, "learning_rate": 5.5612037413582764e-06, "loss": 0.4083, "step": 2735 }, { "epoch": 0.27816185441236274, "grad_norm": 0.402208149433136, "learning_rate": 5.563237088247255e-06, "loss": 0.4668, "step": 2736 }, { "epoch": 0.27826352175681174, "grad_norm": 0.5047203898429871, "learning_rate": 5.565270435136235e-06, "loss": 0.4288, "step": 2737 }, { "epoch": 0.2783651891012607, "grad_norm": 0.47104018926620483, "learning_rate": 5.567303782025214e-06, "loss": 0.4768, "step": 2738 }, { "epoch": 0.2784668564457096, "grad_norm": 0.44440650939941406, "learning_rate": 5.569337128914193e-06, "loss": 0.4447, "step": 2739 }, { "epoch": 0.2785685237901586, "grad_norm": 0.4161560833454132, "learning_rate": 5.571370475803173e-06, "loss": 0.3931, "step": 2740 }, { "epoch": 0.27867019113460756, "grad_norm": 0.426014244556427, "learning_rate": 5.573403822692152e-06, "loss": 0.403, "step": 2741 }, { "epoch": 0.2787718584790565, "grad_norm": 0.39702144265174866, "learning_rate": 5.57543716958113e-06, "loss": 0.3977, "step": 2742 }, { "epoch": 0.2788735258235055, "grad_norm": 0.45906195044517517, "learning_rate": 5.577470516470111e-06, "loss": 0.4181, "step": 2743 }, { "epoch": 0.27897519316795444, "grad_norm": 0.45994454622268677, "learning_rate": 5.579503863359089e-06, "loss": 0.4757, "step": 2744 }, { "epoch": 0.27907686051240344, "grad_norm": 0.4624069929122925, "learning_rate": 5.581537210248069e-06, "loss": 0.4263, "step": 2745 }, { "epoch": 0.2791785278568524, "grad_norm": 0.39367231726646423, "learning_rate": 5.583570557137048e-06, "loss": 0.448, "step": 2746 }, { "epoch": 0.2792801952013013, "grad_norm": 0.4213443994522095, "learning_rate": 5.585603904026027e-06, "loss": 0.4359, "step": 2747 }, { "epoch": 0.2793818625457503, "grad_norm": 0.4390604794025421, "learning_rate": 5.587637250915006e-06, "loss": 0.4227, "step": 2748 }, { "epoch": 0.27948352989019926, "grad_norm": 0.4018273651599884, "learning_rate": 5.589670597803986e-06, "loss": 0.4009, "step": 2749 }, { "epoch": 0.27958519723464825, "grad_norm": 0.4292339086532593, "learning_rate": 5.591703944692965e-06, "loss": 0.4544, "step": 2750 }, { "epoch": 0.2796868645790972, "grad_norm": 0.4060991406440735, "learning_rate": 5.593737291581944e-06, "loss": 0.4145, "step": 2751 }, { "epoch": 0.27978853192354614, "grad_norm": 0.4943375885486603, "learning_rate": 5.5957706384709235e-06, "loss": 0.4337, "step": 2752 }, { "epoch": 0.27989019926799513, "grad_norm": 0.45550715923309326, "learning_rate": 5.597803985359903e-06, "loss": 0.4237, "step": 2753 }, { "epoch": 0.2799918666124441, "grad_norm": 0.44291484355926514, "learning_rate": 5.599837332248882e-06, "loss": 0.4491, "step": 2754 }, { "epoch": 0.28009353395689307, "grad_norm": 0.4099193811416626, "learning_rate": 5.601870679137862e-06, "loss": 0.4459, "step": 2755 }, { "epoch": 0.280195201301342, "grad_norm": 0.4623476564884186, "learning_rate": 5.603904026026841e-06, "loss": 0.4643, "step": 2756 }, { "epoch": 0.28029686864579095, "grad_norm": 0.4553830325603485, "learning_rate": 5.605937372915819e-06, "loss": 0.3924, "step": 2757 }, { "epoch": 0.28039853599023995, "grad_norm": 0.4357491135597229, "learning_rate": 5.6079707198048e-06, "loss": 0.4112, "step": 2758 }, { "epoch": 0.2805002033346889, "grad_norm": 0.4338492453098297, "learning_rate": 5.610004066693778e-06, "loss": 0.4369, "step": 2759 }, { "epoch": 0.2806018706791379, "grad_norm": 0.4593670070171356, "learning_rate": 5.6120374135827586e-06, "loss": 0.4447, "step": 2760 }, { "epoch": 0.28070353802358683, "grad_norm": 0.406145304441452, "learning_rate": 5.614070760471737e-06, "loss": 0.3802, "step": 2761 }, { "epoch": 0.28080520536803577, "grad_norm": 0.4680458605289459, "learning_rate": 5.616104107360716e-06, "loss": 0.4636, "step": 2762 }, { "epoch": 0.28090687271248477, "grad_norm": 0.4528254568576813, "learning_rate": 5.618137454249696e-06, "loss": 0.427, "step": 2763 }, { "epoch": 0.2810085400569337, "grad_norm": 0.430430144071579, "learning_rate": 5.620170801138675e-06, "loss": 0.4257, "step": 2764 }, { "epoch": 0.28111020740138265, "grad_norm": 0.4446328580379486, "learning_rate": 5.6222041480276535e-06, "loss": 0.3615, "step": 2765 }, { "epoch": 0.28121187474583165, "grad_norm": 0.4760669469833374, "learning_rate": 5.624237494916634e-06, "loss": 0.4519, "step": 2766 }, { "epoch": 0.2813135420902806, "grad_norm": 0.4293553829193115, "learning_rate": 5.6262708418056124e-06, "loss": 0.4179, "step": 2767 }, { "epoch": 0.2814152094347296, "grad_norm": 0.44886448979377747, "learning_rate": 5.628304188694592e-06, "loss": 0.4461, "step": 2768 }, { "epoch": 0.2815168767791785, "grad_norm": 0.4840641915798187, "learning_rate": 5.630337535583571e-06, "loss": 0.4266, "step": 2769 }, { "epoch": 0.28161854412362747, "grad_norm": 0.4680357277393341, "learning_rate": 5.63237088247255e-06, "loss": 0.4063, "step": 2770 }, { "epoch": 0.28172021146807646, "grad_norm": 0.4430641829967499, "learning_rate": 5.6344042293615295e-06, "loss": 0.4108, "step": 2771 }, { "epoch": 0.2818218788125254, "grad_norm": 0.47801077365875244, "learning_rate": 5.636437576250508e-06, "loss": 0.4513, "step": 2772 }, { "epoch": 0.2819235461569744, "grad_norm": 0.5184930562973022, "learning_rate": 5.6384709231394885e-06, "loss": 0.4323, "step": 2773 }, { "epoch": 0.28202521350142334, "grad_norm": 0.47269493341445923, "learning_rate": 5.640504270028467e-06, "loss": 0.4201, "step": 2774 }, { "epoch": 0.2821268808458723, "grad_norm": 0.43962112069129944, "learning_rate": 5.642537616917446e-06, "loss": 0.4384, "step": 2775 }, { "epoch": 0.2822285481903213, "grad_norm": 0.5111146569252014, "learning_rate": 5.644570963806426e-06, "loss": 0.4599, "step": 2776 }, { "epoch": 0.2823302155347702, "grad_norm": 0.44271063804626465, "learning_rate": 5.646604310695405e-06, "loss": 0.4768, "step": 2777 }, { "epoch": 0.2824318828792192, "grad_norm": 0.4611600935459137, "learning_rate": 5.648637657584385e-06, "loss": 0.4386, "step": 2778 }, { "epoch": 0.28253355022366816, "grad_norm": 0.4586274027824402, "learning_rate": 5.650671004473364e-06, "loss": 0.4358, "step": 2779 }, { "epoch": 0.2826352175681171, "grad_norm": 0.44931545853614807, "learning_rate": 5.652704351362342e-06, "loss": 0.4296, "step": 2780 }, { "epoch": 0.2827368849125661, "grad_norm": 0.4613912105560303, "learning_rate": 5.654737698251323e-06, "loss": 0.4257, "step": 2781 }, { "epoch": 0.28283855225701504, "grad_norm": 0.431095689535141, "learning_rate": 5.656771045140301e-06, "loss": 0.4535, "step": 2782 }, { "epoch": 0.282940219601464, "grad_norm": 0.43786826729774475, "learning_rate": 5.65880439202928e-06, "loss": 0.4097, "step": 2783 }, { "epoch": 0.283041886945913, "grad_norm": 0.4430679678916931, "learning_rate": 5.66083773891826e-06, "loss": 0.4129, "step": 2784 }, { "epoch": 0.2831435542903619, "grad_norm": 0.44829586148262024, "learning_rate": 5.662871085807239e-06, "loss": 0.3984, "step": 2785 }, { "epoch": 0.2832452216348109, "grad_norm": 0.4302108585834503, "learning_rate": 5.6649044326962185e-06, "loss": 0.4282, "step": 2786 }, { "epoch": 0.28334688897925986, "grad_norm": 0.478351891040802, "learning_rate": 5.666937779585198e-06, "loss": 0.4283, "step": 2787 }, { "epoch": 0.2834485563237088, "grad_norm": 0.4625405967235565, "learning_rate": 5.668971126474177e-06, "loss": 0.427, "step": 2788 }, { "epoch": 0.2835502236681578, "grad_norm": 0.41638150811195374, "learning_rate": 5.671004473363156e-06, "loss": 0.407, "step": 2789 }, { "epoch": 0.28365189101260674, "grad_norm": 0.4772336483001709, "learning_rate": 5.6730378202521356e-06, "loss": 0.4213, "step": 2790 }, { "epoch": 0.28375355835705574, "grad_norm": 0.4900238811969757, "learning_rate": 5.675071167141115e-06, "loss": 0.4096, "step": 2791 }, { "epoch": 0.2838552257015047, "grad_norm": 0.44500717520713806, "learning_rate": 5.677104514030094e-06, "loss": 0.4314, "step": 2792 }, { "epoch": 0.2839568930459536, "grad_norm": 0.4067450761795044, "learning_rate": 5.679137860919073e-06, "loss": 0.4239, "step": 2793 }, { "epoch": 0.2840585603904026, "grad_norm": 0.4971644878387451, "learning_rate": 5.681171207808053e-06, "loss": 0.4292, "step": 2794 }, { "epoch": 0.28416022773485156, "grad_norm": 0.47631147503852844, "learning_rate": 5.683204554697031e-06, "loss": 0.4201, "step": 2795 }, { "epoch": 0.28426189507930055, "grad_norm": 0.4373021423816681, "learning_rate": 5.685237901586012e-06, "loss": 0.4546, "step": 2796 }, { "epoch": 0.2843635624237495, "grad_norm": 0.49092790484428406, "learning_rate": 5.68727124847499e-06, "loss": 0.4075, "step": 2797 }, { "epoch": 0.28446522976819844, "grad_norm": 0.44997739791870117, "learning_rate": 5.689304595363969e-06, "loss": 0.4001, "step": 2798 }, { "epoch": 0.28456689711264743, "grad_norm": 0.43155211210250854, "learning_rate": 5.691337942252949e-06, "loss": 0.4036, "step": 2799 }, { "epoch": 0.2846685644570964, "grad_norm": 0.49149399995803833, "learning_rate": 5.693371289141928e-06, "loss": 0.4299, "step": 2800 }, { "epoch": 0.28477023180154537, "grad_norm": 0.5107448697090149, "learning_rate": 5.695404636030908e-06, "loss": 0.4613, "step": 2801 }, { "epoch": 0.2848718991459943, "grad_norm": 0.44477608799934387, "learning_rate": 5.697437982919887e-06, "loss": 0.4273, "step": 2802 }, { "epoch": 0.28497356649044325, "grad_norm": 0.5340315699577332, "learning_rate": 5.6994713298088655e-06, "loss": 0.4486, "step": 2803 }, { "epoch": 0.28507523383489225, "grad_norm": 0.6116926670074463, "learning_rate": 5.701504676697846e-06, "loss": 0.4501, "step": 2804 }, { "epoch": 0.2851769011793412, "grad_norm": 0.44443780183792114, "learning_rate": 5.7035380235868245e-06, "loss": 0.456, "step": 2805 }, { "epoch": 0.28527856852379013, "grad_norm": 0.44060200452804565, "learning_rate": 5.705571370475803e-06, "loss": 0.4091, "step": 2806 }, { "epoch": 0.28538023586823913, "grad_norm": 0.5501381158828735, "learning_rate": 5.707604717364783e-06, "loss": 0.4347, "step": 2807 }, { "epoch": 0.28548190321268807, "grad_norm": 0.4522358179092407, "learning_rate": 5.709638064253762e-06, "loss": 0.4139, "step": 2808 }, { "epoch": 0.28558357055713707, "grad_norm": 0.4330846667289734, "learning_rate": 5.711671411142742e-06, "loss": 0.4487, "step": 2809 }, { "epoch": 0.285685237901586, "grad_norm": 0.42511212825775146, "learning_rate": 5.71370475803172e-06, "loss": 0.4141, "step": 2810 }, { "epoch": 0.28578690524603495, "grad_norm": 0.4577392637729645, "learning_rate": 5.7157381049207e-06, "loss": 0.4256, "step": 2811 }, { "epoch": 0.28588857259048395, "grad_norm": 0.44588834047317505, "learning_rate": 5.717771451809679e-06, "loss": 0.456, "step": 2812 }, { "epoch": 0.2859902399349329, "grad_norm": 0.46809083223342896, "learning_rate": 5.719804798698658e-06, "loss": 0.42, "step": 2813 }, { "epoch": 0.2860919072793819, "grad_norm": 0.4759026765823364, "learning_rate": 5.721838145587638e-06, "loss": 0.4442, "step": 2814 }, { "epoch": 0.2861935746238308, "grad_norm": 0.4556356966495514, "learning_rate": 5.723871492476617e-06, "loss": 0.4597, "step": 2815 }, { "epoch": 0.28629524196827977, "grad_norm": 0.42732569575309753, "learning_rate": 5.7259048393655955e-06, "loss": 0.4118, "step": 2816 }, { "epoch": 0.28639690931272876, "grad_norm": 0.4409513771533966, "learning_rate": 5.727938186254576e-06, "loss": 0.4451, "step": 2817 }, { "epoch": 0.2864985766571777, "grad_norm": 0.40303733944892883, "learning_rate": 5.7299715331435545e-06, "loss": 0.4206, "step": 2818 }, { "epoch": 0.2866002440016267, "grad_norm": 0.44556543231010437, "learning_rate": 5.732004880032535e-06, "loss": 0.4129, "step": 2819 }, { "epoch": 0.28670191134607564, "grad_norm": 0.4842827022075653, "learning_rate": 5.7340382269215134e-06, "loss": 0.4356, "step": 2820 }, { "epoch": 0.2868035786905246, "grad_norm": 0.4408382475376129, "learning_rate": 5.736071573810492e-06, "loss": 0.4417, "step": 2821 }, { "epoch": 0.2869052460349736, "grad_norm": 0.45499393343925476, "learning_rate": 5.738104920699472e-06, "loss": 0.4161, "step": 2822 }, { "epoch": 0.2870069133794225, "grad_norm": 0.41440296173095703, "learning_rate": 5.740138267588451e-06, "loss": 0.4307, "step": 2823 }, { "epoch": 0.28710858072387146, "grad_norm": 0.41263264417648315, "learning_rate": 5.74217161447743e-06, "loss": 0.419, "step": 2824 }, { "epoch": 0.28721024806832046, "grad_norm": 0.4369261562824249, "learning_rate": 5.74420496136641e-06, "loss": 0.4357, "step": 2825 }, { "epoch": 0.2873119154127694, "grad_norm": 0.470626562833786, "learning_rate": 5.746238308255389e-06, "loss": 0.3846, "step": 2826 }, { "epoch": 0.2874135827572184, "grad_norm": 0.42231491208076477, "learning_rate": 5.748271655144368e-06, "loss": 0.444, "step": 2827 }, { "epoch": 0.28751525010166734, "grad_norm": 0.3846167325973511, "learning_rate": 5.750305002033348e-06, "loss": 0.4165, "step": 2828 }, { "epoch": 0.2876169174461163, "grad_norm": 0.4421161413192749, "learning_rate": 5.752338348922326e-06, "loss": 0.4122, "step": 2829 }, { "epoch": 0.2877185847905653, "grad_norm": 0.4660250246524811, "learning_rate": 5.754371695811306e-06, "loss": 0.4214, "step": 2830 }, { "epoch": 0.2878202521350142, "grad_norm": 0.4519292116165161, "learning_rate": 5.756405042700285e-06, "loss": 0.4469, "step": 2831 }, { "epoch": 0.2879219194794632, "grad_norm": 0.4391860365867615, "learning_rate": 5.758438389589265e-06, "loss": 0.4302, "step": 2832 }, { "epoch": 0.28802358682391216, "grad_norm": 0.42113080620765686, "learning_rate": 5.760471736478243e-06, "loss": 0.386, "step": 2833 }, { "epoch": 0.2881252541683611, "grad_norm": 0.39754122495651245, "learning_rate": 5.762505083367223e-06, "loss": 0.4475, "step": 2834 }, { "epoch": 0.2882269215128101, "grad_norm": 0.4038446545600891, "learning_rate": 5.764538430256202e-06, "loss": 0.4535, "step": 2835 }, { "epoch": 0.28832858885725904, "grad_norm": 0.4462076425552368, "learning_rate": 5.766571777145181e-06, "loss": 0.4345, "step": 2836 }, { "epoch": 0.28843025620170804, "grad_norm": 0.43609824776649475, "learning_rate": 5.768605124034161e-06, "loss": 0.4189, "step": 2837 }, { "epoch": 0.288531923546157, "grad_norm": 0.4182426333427429, "learning_rate": 5.77063847092314e-06, "loss": 0.3985, "step": 2838 }, { "epoch": 0.2886335908906059, "grad_norm": 0.44630950689315796, "learning_rate": 5.772671817812119e-06, "loss": 0.414, "step": 2839 }, { "epoch": 0.2887352582350549, "grad_norm": 0.4138738214969635, "learning_rate": 5.774705164701099e-06, "loss": 0.4505, "step": 2840 }, { "epoch": 0.28883692557950386, "grad_norm": 0.4367012679576874, "learning_rate": 5.776738511590078e-06, "loss": 0.4174, "step": 2841 }, { "epoch": 0.28893859292395285, "grad_norm": 0.43657058477401733, "learning_rate": 5.778771858479058e-06, "loss": 0.4422, "step": 2842 }, { "epoch": 0.2890402602684018, "grad_norm": 0.44345006346702576, "learning_rate": 5.780805205368037e-06, "loss": 0.4326, "step": 2843 }, { "epoch": 0.28914192761285074, "grad_norm": 0.47613638639450073, "learning_rate": 5.782838552257015e-06, "loss": 0.4362, "step": 2844 }, { "epoch": 0.28924359495729973, "grad_norm": 0.4727286994457245, "learning_rate": 5.784871899145995e-06, "loss": 0.4491, "step": 2845 }, { "epoch": 0.2893452623017487, "grad_norm": 0.43664059042930603, "learning_rate": 5.786905246034974e-06, "loss": 0.4104, "step": 2846 }, { "epoch": 0.2894469296461976, "grad_norm": 0.417115181684494, "learning_rate": 5.788938592923953e-06, "loss": 0.3965, "step": 2847 }, { "epoch": 0.2895485969906466, "grad_norm": 0.4072217643260956, "learning_rate": 5.790971939812932e-06, "loss": 0.3953, "step": 2848 }, { "epoch": 0.28965026433509555, "grad_norm": 0.4377451539039612, "learning_rate": 5.793005286701912e-06, "loss": 0.4436, "step": 2849 }, { "epoch": 0.28975193167954455, "grad_norm": 0.4004904329776764, "learning_rate": 5.795038633590891e-06, "loss": 0.4233, "step": 2850 }, { "epoch": 0.2898535990239935, "grad_norm": 0.41638967394828796, "learning_rate": 5.79707198047987e-06, "loss": 0.4553, "step": 2851 }, { "epoch": 0.28995526636844243, "grad_norm": 0.4013989567756653, "learning_rate": 5.7991053273688494e-06, "loss": 0.3986, "step": 2852 }, { "epoch": 0.29005693371289143, "grad_norm": 0.412418395280838, "learning_rate": 5.801138674257829e-06, "loss": 0.3938, "step": 2853 }, { "epoch": 0.29015860105734037, "grad_norm": 0.39299461245536804, "learning_rate": 5.8031720211468076e-06, "loss": 0.4126, "step": 2854 }, { "epoch": 0.29026026840178937, "grad_norm": 0.39972180128097534, "learning_rate": 5.805205368035788e-06, "loss": 0.4489, "step": 2855 }, { "epoch": 0.2903619357462383, "grad_norm": 0.44328606128692627, "learning_rate": 5.8072387149247665e-06, "loss": 0.4246, "step": 2856 }, { "epoch": 0.29046360309068725, "grad_norm": 0.38608789443969727, "learning_rate": 5.809272061813745e-06, "loss": 0.4006, "step": 2857 }, { "epoch": 0.29056527043513625, "grad_norm": 0.4896695017814636, "learning_rate": 5.8113054087027255e-06, "loss": 0.4327, "step": 2858 }, { "epoch": 0.2906669377795852, "grad_norm": 0.41996678709983826, "learning_rate": 5.813338755591704e-06, "loss": 0.4449, "step": 2859 }, { "epoch": 0.2907686051240342, "grad_norm": 0.41346198320388794, "learning_rate": 5.8153721024806845e-06, "loss": 0.4291, "step": 2860 }, { "epoch": 0.2908702724684831, "grad_norm": 0.4300538897514343, "learning_rate": 5.817405449369663e-06, "loss": 0.4335, "step": 2861 }, { "epoch": 0.29097193981293207, "grad_norm": 0.44853630661964417, "learning_rate": 5.819438796258642e-06, "loss": 0.4234, "step": 2862 }, { "epoch": 0.29107360715738106, "grad_norm": 0.42423325777053833, "learning_rate": 5.821472143147622e-06, "loss": 0.4219, "step": 2863 }, { "epoch": 0.29117527450183, "grad_norm": 0.41681885719299316, "learning_rate": 5.823505490036601e-06, "loss": 0.4178, "step": 2864 }, { "epoch": 0.291276941846279, "grad_norm": 0.44436585903167725, "learning_rate": 5.82553883692558e-06, "loss": 0.4204, "step": 2865 }, { "epoch": 0.29137860919072794, "grad_norm": 0.41891661286354065, "learning_rate": 5.82757218381456e-06, "loss": 0.4137, "step": 2866 }, { "epoch": 0.2914802765351769, "grad_norm": 0.47312113642692566, "learning_rate": 5.829605530703538e-06, "loss": 0.4293, "step": 2867 }, { "epoch": 0.2915819438796259, "grad_norm": 0.4370277523994446, "learning_rate": 5.831638877592518e-06, "loss": 0.4337, "step": 2868 }, { "epoch": 0.2916836112240748, "grad_norm": 0.422870010137558, "learning_rate": 5.833672224481497e-06, "loss": 0.4278, "step": 2869 }, { "epoch": 0.29178527856852376, "grad_norm": 0.4540294408798218, "learning_rate": 5.835705571370476e-06, "loss": 0.4086, "step": 2870 }, { "epoch": 0.29188694591297276, "grad_norm": 0.44440922141075134, "learning_rate": 5.8377389182594555e-06, "loss": 0.4241, "step": 2871 }, { "epoch": 0.2919886132574217, "grad_norm": 0.4651002287864685, "learning_rate": 5.839772265148435e-06, "loss": 0.4185, "step": 2872 }, { "epoch": 0.2920902806018707, "grad_norm": 0.43832528591156006, "learning_rate": 5.8418056120374144e-06, "loss": 0.4483, "step": 2873 }, { "epoch": 0.29219194794631964, "grad_norm": 0.3902309834957123, "learning_rate": 5.843838958926393e-06, "loss": 0.4302, "step": 2874 }, { "epoch": 0.2922936152907686, "grad_norm": 0.4723833501338959, "learning_rate": 5.845872305815372e-06, "loss": 0.4353, "step": 2875 }, { "epoch": 0.2923952826352176, "grad_norm": 0.4741506278514862, "learning_rate": 5.847905652704352e-06, "loss": 0.4443, "step": 2876 }, { "epoch": 0.2924969499796665, "grad_norm": 0.4353075325489044, "learning_rate": 5.849938999593331e-06, "loss": 0.412, "step": 2877 }, { "epoch": 0.2925986173241155, "grad_norm": 0.5104483962059021, "learning_rate": 5.851972346482311e-06, "loss": 0.411, "step": 2878 }, { "epoch": 0.29270028466856446, "grad_norm": 0.4933713376522064, "learning_rate": 5.85400569337129e-06, "loss": 0.3852, "step": 2879 }, { "epoch": 0.2928019520130134, "grad_norm": 0.41149213910102844, "learning_rate": 5.856039040260268e-06, "loss": 0.4252, "step": 2880 }, { "epoch": 0.2929036193574624, "grad_norm": 0.457311749458313, "learning_rate": 5.858072387149249e-06, "loss": 0.4253, "step": 2881 }, { "epoch": 0.29300528670191134, "grad_norm": 0.4509248435497284, "learning_rate": 5.860105734038227e-06, "loss": 0.442, "step": 2882 }, { "epoch": 0.29310695404636034, "grad_norm": 0.4561326503753662, "learning_rate": 5.862139080927207e-06, "loss": 0.3895, "step": 2883 }, { "epoch": 0.2932086213908093, "grad_norm": 0.4552188515663147, "learning_rate": 5.864172427816186e-06, "loss": 0.4124, "step": 2884 }, { "epoch": 0.2933102887352582, "grad_norm": 0.46244505047798157, "learning_rate": 5.866205774705165e-06, "loss": 0.4068, "step": 2885 }, { "epoch": 0.2934119560797072, "grad_norm": 0.4725990891456604, "learning_rate": 5.868239121594144e-06, "loss": 0.433, "step": 2886 }, { "epoch": 0.29351362342415616, "grad_norm": 0.41812974214553833, "learning_rate": 5.870272468483124e-06, "loss": 0.4318, "step": 2887 }, { "epoch": 0.2936152907686051, "grad_norm": 0.4113747775554657, "learning_rate": 5.8723058153721025e-06, "loss": 0.3967, "step": 2888 }, { "epoch": 0.2937169581130541, "grad_norm": 0.4177694022655487, "learning_rate": 5.874339162261082e-06, "loss": 0.4128, "step": 2889 }, { "epoch": 0.29381862545750304, "grad_norm": 0.42378777265548706, "learning_rate": 5.8763725091500615e-06, "loss": 0.4163, "step": 2890 }, { "epoch": 0.29392029280195203, "grad_norm": 0.42762666940689087, "learning_rate": 5.878405856039041e-06, "loss": 0.4458, "step": 2891 }, { "epoch": 0.294021960146401, "grad_norm": 0.3896626830101013, "learning_rate": 5.88043920292802e-06, "loss": 0.4181, "step": 2892 }, { "epoch": 0.2941236274908499, "grad_norm": 0.4299125373363495, "learning_rate": 5.882472549816999e-06, "loss": 0.4259, "step": 2893 }, { "epoch": 0.2942252948352989, "grad_norm": 0.3865123689174652, "learning_rate": 5.884505896705979e-06, "loss": 0.4405, "step": 2894 }, { "epoch": 0.29432696217974785, "grad_norm": 0.44310659170150757, "learning_rate": 5.886539243594957e-06, "loss": 0.4249, "step": 2895 }, { "epoch": 0.29442862952419685, "grad_norm": 0.46469810605049133, "learning_rate": 5.888572590483938e-06, "loss": 0.4644, "step": 2896 }, { "epoch": 0.2945302968686458, "grad_norm": 0.4070850610733032, "learning_rate": 5.890605937372916e-06, "loss": 0.4116, "step": 2897 }, { "epoch": 0.29463196421309473, "grad_norm": 0.47301074862480164, "learning_rate": 5.892639284261895e-06, "loss": 0.4291, "step": 2898 }, { "epoch": 0.29473363155754373, "grad_norm": 0.40566718578338623, "learning_rate": 5.894672631150875e-06, "loss": 0.4024, "step": 2899 }, { "epoch": 0.29483529890199267, "grad_norm": 0.45105981826782227, "learning_rate": 5.896705978039854e-06, "loss": 0.453, "step": 2900 }, { "epoch": 0.29493696624644167, "grad_norm": 0.44110679626464844, "learning_rate": 5.898739324928834e-06, "loss": 0.4302, "step": 2901 }, { "epoch": 0.2950386335908906, "grad_norm": 0.46201780438423157, "learning_rate": 5.900772671817813e-06, "loss": 0.409, "step": 2902 }, { "epoch": 0.29514030093533955, "grad_norm": 0.4749026298522949, "learning_rate": 5.9028060187067915e-06, "loss": 0.4347, "step": 2903 }, { "epoch": 0.29524196827978855, "grad_norm": 0.3831827938556671, "learning_rate": 5.904839365595772e-06, "loss": 0.3791, "step": 2904 }, { "epoch": 0.2953436356242375, "grad_norm": 0.4292316436767578, "learning_rate": 5.9068727124847504e-06, "loss": 0.4501, "step": 2905 }, { "epoch": 0.2954453029686865, "grad_norm": 0.5308430790901184, "learning_rate": 5.90890605937373e-06, "loss": 0.4266, "step": 2906 }, { "epoch": 0.2955469703131354, "grad_norm": 0.4498460590839386, "learning_rate": 5.910939406262709e-06, "loss": 0.4082, "step": 2907 }, { "epoch": 0.29564863765758437, "grad_norm": 0.5140566229820251, "learning_rate": 5.912972753151688e-06, "loss": 0.4089, "step": 2908 }, { "epoch": 0.29575030500203336, "grad_norm": 0.49016016721725464, "learning_rate": 5.9150061000406675e-06, "loss": 0.3953, "step": 2909 }, { "epoch": 0.2958519723464823, "grad_norm": 0.446196585893631, "learning_rate": 5.917039446929646e-06, "loss": 0.4533, "step": 2910 }, { "epoch": 0.29595363969093125, "grad_norm": 0.4925822615623474, "learning_rate": 5.919072793818626e-06, "loss": 0.4137, "step": 2911 }, { "epoch": 0.29605530703538024, "grad_norm": 0.4993203282356262, "learning_rate": 5.921106140707605e-06, "loss": 0.4066, "step": 2912 }, { "epoch": 0.2961569743798292, "grad_norm": 0.4290909469127655, "learning_rate": 5.923139487596584e-06, "loss": 0.3919, "step": 2913 }, { "epoch": 0.2962586417242782, "grad_norm": 0.44885310530662537, "learning_rate": 5.925172834485564e-06, "loss": 0.4245, "step": 2914 }, { "epoch": 0.2963603090687271, "grad_norm": 0.5041319727897644, "learning_rate": 5.927206181374543e-06, "loss": 0.4297, "step": 2915 }, { "epoch": 0.29646197641317606, "grad_norm": 0.4586556553840637, "learning_rate": 5.929239528263521e-06, "loss": 0.4649, "step": 2916 }, { "epoch": 0.29656364375762506, "grad_norm": 0.47720804810523987, "learning_rate": 5.931272875152502e-06, "loss": 0.4341, "step": 2917 }, { "epoch": 0.296665311102074, "grad_norm": 0.5094249248504639, "learning_rate": 5.93330622204148e-06, "loss": 0.4104, "step": 2918 }, { "epoch": 0.296766978446523, "grad_norm": 0.4299613833427429, "learning_rate": 5.935339568930461e-06, "loss": 0.4214, "step": 2919 }, { "epoch": 0.29686864579097194, "grad_norm": 0.46016058325767517, "learning_rate": 5.937372915819439e-06, "loss": 0.4434, "step": 2920 }, { "epoch": 0.2969703131354209, "grad_norm": 0.4612347483634949, "learning_rate": 5.939406262708418e-06, "loss": 0.4362, "step": 2921 }, { "epoch": 0.2970719804798699, "grad_norm": 0.4183039963245392, "learning_rate": 5.941439609597398e-06, "loss": 0.413, "step": 2922 }, { "epoch": 0.2971736478243188, "grad_norm": 0.4698634147644043, "learning_rate": 5.943472956486377e-06, "loss": 0.4122, "step": 2923 }, { "epoch": 0.2972753151687678, "grad_norm": 0.47154301404953003, "learning_rate": 5.9455063033753565e-06, "loss": 0.4057, "step": 2924 }, { "epoch": 0.29737698251321676, "grad_norm": 0.45675045251846313, "learning_rate": 5.947539650264336e-06, "loss": 0.4276, "step": 2925 }, { "epoch": 0.2974786498576657, "grad_norm": 0.4441818594932556, "learning_rate": 5.949572997153315e-06, "loss": 0.4016, "step": 2926 }, { "epoch": 0.2975803172021147, "grad_norm": 0.4364909827709198, "learning_rate": 5.951606344042294e-06, "loss": 0.4099, "step": 2927 }, { "epoch": 0.29768198454656364, "grad_norm": 0.43199676275253296, "learning_rate": 5.953639690931274e-06, "loss": 0.4014, "step": 2928 }, { "epoch": 0.2977836518910126, "grad_norm": 0.4160953760147095, "learning_rate": 5.955673037820252e-06, "loss": 0.3988, "step": 2929 }, { "epoch": 0.2978853192354616, "grad_norm": 0.49816474318504333, "learning_rate": 5.957706384709232e-06, "loss": 0.4623, "step": 2930 }, { "epoch": 0.2979869865799105, "grad_norm": 0.4367910623550415, "learning_rate": 5.959739731598211e-06, "loss": 0.4114, "step": 2931 }, { "epoch": 0.2980886539243595, "grad_norm": 0.432414710521698, "learning_rate": 5.961773078487191e-06, "loss": 0.4796, "step": 2932 }, { "epoch": 0.29819032126880846, "grad_norm": 0.4882549047470093, "learning_rate": 5.963806425376169e-06, "loss": 0.4722, "step": 2933 }, { "epoch": 0.2982919886132574, "grad_norm": 0.4428075850009918, "learning_rate": 5.965839772265149e-06, "loss": 0.4647, "step": 2934 }, { "epoch": 0.2983936559577064, "grad_norm": 0.4622853398323059, "learning_rate": 5.967873119154128e-06, "loss": 0.3943, "step": 2935 }, { "epoch": 0.29849532330215534, "grad_norm": 0.4456459879875183, "learning_rate": 5.969906466043107e-06, "loss": 0.4081, "step": 2936 }, { "epoch": 0.29859699064660433, "grad_norm": 0.49127697944641113, "learning_rate": 5.971939812932087e-06, "loss": 0.4709, "step": 2937 }, { "epoch": 0.2986986579910533, "grad_norm": 0.4315669536590576, "learning_rate": 5.973973159821066e-06, "loss": 0.4534, "step": 2938 }, { "epoch": 0.2988003253355022, "grad_norm": 0.4410107135772705, "learning_rate": 5.9760065067100446e-06, "loss": 0.4424, "step": 2939 }, { "epoch": 0.2989019926799512, "grad_norm": 0.45235469937324524, "learning_rate": 5.978039853599025e-06, "loss": 0.4, "step": 2940 }, { "epoch": 0.29900366002440015, "grad_norm": 0.4209049344062805, "learning_rate": 5.9800732004880035e-06, "loss": 0.46, "step": 2941 }, { "epoch": 0.29910532736884915, "grad_norm": 0.4712408781051636, "learning_rate": 5.982106547376984e-06, "loss": 0.4033, "step": 2942 }, { "epoch": 0.2992069947132981, "grad_norm": 0.4558609426021576, "learning_rate": 5.9841398942659625e-06, "loss": 0.4144, "step": 2943 }, { "epoch": 0.29930866205774703, "grad_norm": 0.5112335681915283, "learning_rate": 5.986173241154941e-06, "loss": 0.446, "step": 2944 }, { "epoch": 0.29941032940219603, "grad_norm": 0.3977620303630829, "learning_rate": 5.9882065880439215e-06, "loss": 0.387, "step": 2945 }, { "epoch": 0.29951199674664497, "grad_norm": 0.49050629138946533, "learning_rate": 5.9902399349329e-06, "loss": 0.4232, "step": 2946 }, { "epoch": 0.29961366409109397, "grad_norm": 0.4993349611759186, "learning_rate": 5.99227328182188e-06, "loss": 0.4212, "step": 2947 }, { "epoch": 0.2997153314355429, "grad_norm": 0.4450210630893707, "learning_rate": 5.994306628710858e-06, "loss": 0.4499, "step": 2948 }, { "epoch": 0.29981699877999185, "grad_norm": 0.4233285188674927, "learning_rate": 5.996339975599838e-06, "loss": 0.3859, "step": 2949 }, { "epoch": 0.29991866612444085, "grad_norm": 0.4129095673561096, "learning_rate": 5.998373322488817e-06, "loss": 0.4096, "step": 2950 }, { "epoch": 0.3000203334688898, "grad_norm": 0.45695334672927856, "learning_rate": 6.000406669377796e-06, "loss": 0.4596, "step": 2951 }, { "epoch": 0.30012200081333873, "grad_norm": 0.41501113772392273, "learning_rate": 6.002440016266775e-06, "loss": 0.441, "step": 2952 }, { "epoch": 0.3002236681577877, "grad_norm": 0.37519681453704834, "learning_rate": 6.004473363155755e-06, "loss": 0.3828, "step": 2953 }, { "epoch": 0.30032533550223667, "grad_norm": 0.4038192629814148, "learning_rate": 6.0065067100447335e-06, "loss": 0.4069, "step": 2954 }, { "epoch": 0.30042700284668566, "grad_norm": 0.4436470568180084, "learning_rate": 6.008540056933714e-06, "loss": 0.4164, "step": 2955 }, { "epoch": 0.3005286701911346, "grad_norm": 0.40063539147377014, "learning_rate": 6.0105734038226925e-06, "loss": 0.4417, "step": 2956 }, { "epoch": 0.30063033753558355, "grad_norm": 0.461082398891449, "learning_rate": 6.012606750711671e-06, "loss": 0.4435, "step": 2957 }, { "epoch": 0.30073200488003254, "grad_norm": 0.4371843934059143, "learning_rate": 6.0146400976006514e-06, "loss": 0.4006, "step": 2958 }, { "epoch": 0.3008336722244815, "grad_norm": 0.40007534623146057, "learning_rate": 6.01667344448963e-06, "loss": 0.4196, "step": 2959 }, { "epoch": 0.3009353395689305, "grad_norm": 0.46784141659736633, "learning_rate": 6.0187067913786104e-06, "loss": 0.4487, "step": 2960 }, { "epoch": 0.3010370069133794, "grad_norm": 0.39515721797943115, "learning_rate": 6.020740138267589e-06, "loss": 0.4068, "step": 2961 }, { "epoch": 0.30113867425782836, "grad_norm": 0.4093768000602722, "learning_rate": 6.022773485156568e-06, "loss": 0.4773, "step": 2962 }, { "epoch": 0.30124034160227736, "grad_norm": 0.37309613823890686, "learning_rate": 6.024806832045548e-06, "loss": 0.401, "step": 2963 }, { "epoch": 0.3013420089467263, "grad_norm": 0.408689945936203, "learning_rate": 6.026840178934527e-06, "loss": 0.4189, "step": 2964 }, { "epoch": 0.3014436762911753, "grad_norm": 0.4308720827102661, "learning_rate": 6.028873525823506e-06, "loss": 0.4099, "step": 2965 }, { "epoch": 0.30154534363562424, "grad_norm": 0.46957939863204956, "learning_rate": 6.030906872712486e-06, "loss": 0.4364, "step": 2966 }, { "epoch": 0.3016470109800732, "grad_norm": 0.3949540853500366, "learning_rate": 6.032940219601464e-06, "loss": 0.3983, "step": 2967 }, { "epoch": 0.3017486783245222, "grad_norm": 0.43454837799072266, "learning_rate": 6.034973566490444e-06, "loss": 0.3992, "step": 2968 }, { "epoch": 0.3018503456689711, "grad_norm": 0.517899751663208, "learning_rate": 6.037006913379423e-06, "loss": 0.4152, "step": 2969 }, { "epoch": 0.30195201301342006, "grad_norm": 0.3946042060852051, "learning_rate": 6.039040260268402e-06, "loss": 0.4022, "step": 2970 }, { "epoch": 0.30205368035786906, "grad_norm": 0.4004805386066437, "learning_rate": 6.041073607157381e-06, "loss": 0.4241, "step": 2971 }, { "epoch": 0.302155347702318, "grad_norm": 0.4435374438762665, "learning_rate": 6.043106954046361e-06, "loss": 0.4215, "step": 2972 }, { "epoch": 0.302257015046767, "grad_norm": 0.42880821228027344, "learning_rate": 6.04514030093534e-06, "loss": 0.442, "step": 2973 }, { "epoch": 0.30235868239121594, "grad_norm": 0.40148624777793884, "learning_rate": 6.047173647824319e-06, "loss": 0.3961, "step": 2974 }, { "epoch": 0.3024603497356649, "grad_norm": 0.41079801321029663, "learning_rate": 6.049206994713298e-06, "loss": 0.4294, "step": 2975 }, { "epoch": 0.3025620170801139, "grad_norm": 0.4642702341079712, "learning_rate": 6.051240341602278e-06, "loss": 0.3888, "step": 2976 }, { "epoch": 0.3026636844245628, "grad_norm": 0.42031484842300415, "learning_rate": 6.053273688491257e-06, "loss": 0.4238, "step": 2977 }, { "epoch": 0.3027653517690118, "grad_norm": 0.4061662554740906, "learning_rate": 6.055307035380237e-06, "loss": 0.4309, "step": 2978 }, { "epoch": 0.30286701911346076, "grad_norm": 0.4348539412021637, "learning_rate": 6.057340382269216e-06, "loss": 0.4306, "step": 2979 }, { "epoch": 0.3029686864579097, "grad_norm": 0.4207412302494049, "learning_rate": 6.059373729158194e-06, "loss": 0.402, "step": 2980 }, { "epoch": 0.3030703538023587, "grad_norm": 0.4061996042728424, "learning_rate": 6.061407076047175e-06, "loss": 0.3744, "step": 2981 }, { "epoch": 0.30317202114680764, "grad_norm": 0.4042004942893982, "learning_rate": 6.063440422936153e-06, "loss": 0.4558, "step": 2982 }, { "epoch": 0.30327368849125663, "grad_norm": 0.439167857170105, "learning_rate": 6.065473769825133e-06, "loss": 0.435, "step": 2983 }, { "epoch": 0.3033753558357056, "grad_norm": 0.4517461061477661, "learning_rate": 6.067507116714112e-06, "loss": 0.4496, "step": 2984 }, { "epoch": 0.3034770231801545, "grad_norm": 0.403134286403656, "learning_rate": 6.069540463603091e-06, "loss": 0.4511, "step": 2985 }, { "epoch": 0.3035786905246035, "grad_norm": 0.3991672098636627, "learning_rate": 6.07157381049207e-06, "loss": 0.4222, "step": 2986 }, { "epoch": 0.30368035786905245, "grad_norm": 0.3975208103656769, "learning_rate": 6.07360715738105e-06, "loss": 0.3683, "step": 2987 }, { "epoch": 0.30378202521350145, "grad_norm": 0.4431905746459961, "learning_rate": 6.075640504270029e-06, "loss": 0.4639, "step": 2988 }, { "epoch": 0.3038836925579504, "grad_norm": 0.4080812633037567, "learning_rate": 6.077673851159008e-06, "loss": 0.3947, "step": 2989 }, { "epoch": 0.30398535990239933, "grad_norm": 0.4387376308441162, "learning_rate": 6.0797071980479874e-06, "loss": 0.4004, "step": 2990 }, { "epoch": 0.30408702724684833, "grad_norm": 0.47752824425697327, "learning_rate": 6.081740544936967e-06, "loss": 0.4331, "step": 2991 }, { "epoch": 0.30418869459129727, "grad_norm": 0.40338489413261414, "learning_rate": 6.0837738918259456e-06, "loss": 0.4247, "step": 2992 }, { "epoch": 0.3042903619357462, "grad_norm": 0.46128758788108826, "learning_rate": 6.085807238714925e-06, "loss": 0.43, "step": 2993 }, { "epoch": 0.3043920292801952, "grad_norm": 0.45027703046798706, "learning_rate": 6.0878405856039045e-06, "loss": 0.4181, "step": 2994 }, { "epoch": 0.30449369662464415, "grad_norm": 0.44791510701179504, "learning_rate": 6.089873932492883e-06, "loss": 0.4566, "step": 2995 }, { "epoch": 0.30459536396909315, "grad_norm": 0.44718241691589355, "learning_rate": 6.0919072793818635e-06, "loss": 0.4074, "step": 2996 }, { "epoch": 0.3046970313135421, "grad_norm": 0.4676651060581207, "learning_rate": 6.093940626270842e-06, "loss": 0.4762, "step": 2997 }, { "epoch": 0.30479869865799103, "grad_norm": 0.48000478744506836, "learning_rate": 6.095973973159821e-06, "loss": 0.4387, "step": 2998 }, { "epoch": 0.30490036600244, "grad_norm": 0.4549228847026825, "learning_rate": 6.098007320048801e-06, "loss": 0.4244, "step": 2999 }, { "epoch": 0.30500203334688897, "grad_norm": 0.40319111943244934, "learning_rate": 6.10004066693778e-06, "loss": 0.427, "step": 3000 }, { "epoch": 0.30510370069133796, "grad_norm": 0.4108218550682068, "learning_rate": 6.10207401382676e-06, "loss": 0.4117, "step": 3001 }, { "epoch": 0.3052053680357869, "grad_norm": 0.47248226404190063, "learning_rate": 6.104107360715739e-06, "loss": 0.4165, "step": 3002 }, { "epoch": 0.30530703538023585, "grad_norm": 0.4552236795425415, "learning_rate": 6.106140707604717e-06, "loss": 0.4124, "step": 3003 }, { "epoch": 0.30540870272468484, "grad_norm": 0.4235081374645233, "learning_rate": 6.108174054493698e-06, "loss": 0.3977, "step": 3004 }, { "epoch": 0.3055103700691338, "grad_norm": 0.4061669409275055, "learning_rate": 6.110207401382676e-06, "loss": 0.388, "step": 3005 }, { "epoch": 0.3056120374135828, "grad_norm": 0.46834596991539, "learning_rate": 6.112240748271656e-06, "loss": 0.3798, "step": 3006 }, { "epoch": 0.3057137047580317, "grad_norm": 0.4217863976955414, "learning_rate": 6.114274095160635e-06, "loss": 0.3981, "step": 3007 }, { "epoch": 0.30581537210248066, "grad_norm": 0.4180501401424408, "learning_rate": 6.116307442049614e-06, "loss": 0.4286, "step": 3008 }, { "epoch": 0.30591703944692966, "grad_norm": 0.5112261176109314, "learning_rate": 6.1183407889385935e-06, "loss": 0.4322, "step": 3009 }, { "epoch": 0.3060187067913786, "grad_norm": 0.4454021751880646, "learning_rate": 6.120374135827573e-06, "loss": 0.3962, "step": 3010 }, { "epoch": 0.3061203741358276, "grad_norm": 0.3966260254383087, "learning_rate": 6.1224074827165525e-06, "loss": 0.4081, "step": 3011 }, { "epoch": 0.30622204148027654, "grad_norm": 0.45175573229789734, "learning_rate": 6.124440829605531e-06, "loss": 0.4137, "step": 3012 }, { "epoch": 0.3063237088247255, "grad_norm": 0.4848605990409851, "learning_rate": 6.12647417649451e-06, "loss": 0.4138, "step": 3013 }, { "epoch": 0.3064253761691745, "grad_norm": 0.46911466121673584, "learning_rate": 6.12850752338349e-06, "loss": 0.4228, "step": 3014 }, { "epoch": 0.3065270435136234, "grad_norm": 0.4377933442592621, "learning_rate": 6.130540870272469e-06, "loss": 0.4267, "step": 3015 }, { "epoch": 0.30662871085807236, "grad_norm": 0.4428386092185974, "learning_rate": 6.132574217161447e-06, "loss": 0.441, "step": 3016 }, { "epoch": 0.30673037820252136, "grad_norm": 0.48005321621894836, "learning_rate": 6.134607564050428e-06, "loss": 0.402, "step": 3017 }, { "epoch": 0.3068320455469703, "grad_norm": 0.4074268639087677, "learning_rate": 6.136640910939406e-06, "loss": 0.4207, "step": 3018 }, { "epoch": 0.3069337128914193, "grad_norm": 0.3973088562488556, "learning_rate": 6.138674257828387e-06, "loss": 0.4113, "step": 3019 }, { "epoch": 0.30703538023586824, "grad_norm": 0.5056679248809814, "learning_rate": 6.140707604717365e-06, "loss": 0.4359, "step": 3020 }, { "epoch": 0.3071370475803172, "grad_norm": 0.3927408456802368, "learning_rate": 6.142740951606344e-06, "loss": 0.3969, "step": 3021 }, { "epoch": 0.3072387149247662, "grad_norm": 0.4235185980796814, "learning_rate": 6.144774298495324e-06, "loss": 0.3963, "step": 3022 }, { "epoch": 0.3073403822692151, "grad_norm": 0.43311452865600586, "learning_rate": 6.146807645384303e-06, "loss": 0.3953, "step": 3023 }, { "epoch": 0.3074420496136641, "grad_norm": 0.4321458339691162, "learning_rate": 6.148840992273282e-06, "loss": 0.4115, "step": 3024 }, { "epoch": 0.30754371695811306, "grad_norm": 0.39344972372055054, "learning_rate": 6.150874339162262e-06, "loss": 0.3812, "step": 3025 }, { "epoch": 0.307645384302562, "grad_norm": 0.4347269833087921, "learning_rate": 6.1529076860512405e-06, "loss": 0.3941, "step": 3026 }, { "epoch": 0.307747051647011, "grad_norm": 0.4675047993659973, "learning_rate": 6.15494103294022e-06, "loss": 0.3864, "step": 3027 }, { "epoch": 0.30784871899145994, "grad_norm": 0.43356460332870483, "learning_rate": 6.1569743798291995e-06, "loss": 0.4329, "step": 3028 }, { "epoch": 0.30795038633590893, "grad_norm": 0.4440443217754364, "learning_rate": 6.159007726718179e-06, "loss": 0.3921, "step": 3029 }, { "epoch": 0.3080520536803579, "grad_norm": 0.42651239037513733, "learning_rate": 6.161041073607158e-06, "loss": 0.4217, "step": 3030 }, { "epoch": 0.3081537210248068, "grad_norm": 0.46088874340057373, "learning_rate": 6.163074420496137e-06, "loss": 0.4281, "step": 3031 }, { "epoch": 0.3082553883692558, "grad_norm": 0.4500730037689209, "learning_rate": 6.165107767385117e-06, "loss": 0.4126, "step": 3032 }, { "epoch": 0.30835705571370475, "grad_norm": 0.47427910566329956, "learning_rate": 6.167141114274095e-06, "loss": 0.3947, "step": 3033 }, { "epoch": 0.3084587230581537, "grad_norm": 0.4285535514354706, "learning_rate": 6.169174461163075e-06, "loss": 0.4238, "step": 3034 }, { "epoch": 0.3085603904026027, "grad_norm": 0.45352765917778015, "learning_rate": 6.171207808052054e-06, "loss": 0.4313, "step": 3035 }, { "epoch": 0.30866205774705163, "grad_norm": 0.4323723614215851, "learning_rate": 6.173241154941033e-06, "loss": 0.4062, "step": 3036 }, { "epoch": 0.30876372509150063, "grad_norm": 0.41933760046958923, "learning_rate": 6.175274501830013e-06, "loss": 0.4082, "step": 3037 }, { "epoch": 0.30886539243594957, "grad_norm": 0.416179358959198, "learning_rate": 6.177307848718992e-06, "loss": 0.4509, "step": 3038 }, { "epoch": 0.3089670597803985, "grad_norm": 0.440053790807724, "learning_rate": 6.1793411956079705e-06, "loss": 0.4056, "step": 3039 }, { "epoch": 0.3090687271248475, "grad_norm": 0.424672931432724, "learning_rate": 6.181374542496951e-06, "loss": 0.4366, "step": 3040 }, { "epoch": 0.30917039446929645, "grad_norm": 0.39512595534324646, "learning_rate": 6.1834078893859295e-06, "loss": 0.3934, "step": 3041 }, { "epoch": 0.30927206181374545, "grad_norm": 0.46973317861557007, "learning_rate": 6.18544123627491e-06, "loss": 0.4633, "step": 3042 }, { "epoch": 0.3093737291581944, "grad_norm": 0.4475657641887665, "learning_rate": 6.1874745831638885e-06, "loss": 0.4182, "step": 3043 }, { "epoch": 0.30947539650264333, "grad_norm": 0.4214252233505249, "learning_rate": 6.189507930052867e-06, "loss": 0.4092, "step": 3044 }, { "epoch": 0.3095770638470923, "grad_norm": 0.4459281861782074, "learning_rate": 6.1915412769418474e-06, "loss": 0.399, "step": 3045 }, { "epoch": 0.30967873119154127, "grad_norm": 0.5189003944396973, "learning_rate": 6.193574623830826e-06, "loss": 0.4193, "step": 3046 }, { "epoch": 0.30978039853599026, "grad_norm": 0.4268452227115631, "learning_rate": 6.1956079707198056e-06, "loss": 0.4182, "step": 3047 }, { "epoch": 0.3098820658804392, "grad_norm": 0.43888169527053833, "learning_rate": 6.197641317608785e-06, "loss": 0.4218, "step": 3048 }, { "epoch": 0.30998373322488815, "grad_norm": 0.5157898664474487, "learning_rate": 6.199674664497764e-06, "loss": 0.3785, "step": 3049 }, { "epoch": 0.31008540056933714, "grad_norm": 0.4037718176841736, "learning_rate": 6.201708011386743e-06, "loss": 0.3737, "step": 3050 }, { "epoch": 0.3101870679137861, "grad_norm": 0.3906484544277191, "learning_rate": 6.203741358275722e-06, "loss": 0.4089, "step": 3051 }, { "epoch": 0.3102887352582351, "grad_norm": 0.47029170393943787, "learning_rate": 6.205774705164702e-06, "loss": 0.4512, "step": 3052 }, { "epoch": 0.310390402602684, "grad_norm": 0.4361439347267151, "learning_rate": 6.207808052053681e-06, "loss": 0.4364, "step": 3053 }, { "epoch": 0.31049206994713296, "grad_norm": 0.4079779088497162, "learning_rate": 6.2098413989426594e-06, "loss": 0.411, "step": 3054 }, { "epoch": 0.31059373729158196, "grad_norm": 0.43379420042037964, "learning_rate": 6.21187474583164e-06, "loss": 0.4211, "step": 3055 }, { "epoch": 0.3106954046360309, "grad_norm": 0.4350428581237793, "learning_rate": 6.213908092720618e-06, "loss": 0.4181, "step": 3056 }, { "epoch": 0.31079707198047984, "grad_norm": 0.41732171177864075, "learning_rate": 6.215941439609597e-06, "loss": 0.4184, "step": 3057 }, { "epoch": 0.31089873932492884, "grad_norm": 0.40273597836494446, "learning_rate": 6.217974786498577e-06, "loss": 0.3777, "step": 3058 }, { "epoch": 0.3110004066693778, "grad_norm": 0.41408196091651917, "learning_rate": 6.220008133387556e-06, "loss": 0.4417, "step": 3059 }, { "epoch": 0.3111020740138268, "grad_norm": 0.41529303789138794, "learning_rate": 6.222041480276536e-06, "loss": 0.4227, "step": 3060 }, { "epoch": 0.3112037413582757, "grad_norm": 0.4174092411994934, "learning_rate": 6.224074827165515e-06, "loss": 0.4276, "step": 3061 }, { "epoch": 0.31130540870272466, "grad_norm": 0.44393572211265564, "learning_rate": 6.226108174054494e-06, "loss": 0.4316, "step": 3062 }, { "epoch": 0.31140707604717366, "grad_norm": 0.4406792223453522, "learning_rate": 6.228141520943474e-06, "loss": 0.429, "step": 3063 }, { "epoch": 0.3115087433916226, "grad_norm": 0.4278298318386078, "learning_rate": 6.230174867832453e-06, "loss": 0.4077, "step": 3064 }, { "epoch": 0.3116104107360716, "grad_norm": 0.4211595356464386, "learning_rate": 6.232208214721432e-06, "loss": 0.4055, "step": 3065 }, { "epoch": 0.31171207808052054, "grad_norm": 0.42777061462402344, "learning_rate": 6.234241561610412e-06, "loss": 0.4353, "step": 3066 }, { "epoch": 0.3118137454249695, "grad_norm": 0.41136547923088074, "learning_rate": 6.23627490849939e-06, "loss": 0.4482, "step": 3067 }, { "epoch": 0.3119154127694185, "grad_norm": 0.45806965231895447, "learning_rate": 6.23830825538837e-06, "loss": 0.4356, "step": 3068 }, { "epoch": 0.3120170801138674, "grad_norm": 0.3915051519870758, "learning_rate": 6.240341602277349e-06, "loss": 0.4097, "step": 3069 }, { "epoch": 0.3121187474583164, "grad_norm": 0.4997025430202484, "learning_rate": 6.242374949166329e-06, "loss": 0.3965, "step": 3070 }, { "epoch": 0.31222041480276536, "grad_norm": 0.41885268688201904, "learning_rate": 6.244408296055307e-06, "loss": 0.4294, "step": 3071 }, { "epoch": 0.3123220821472143, "grad_norm": 0.4129427969455719, "learning_rate": 6.246441642944287e-06, "loss": 0.4197, "step": 3072 }, { "epoch": 0.3124237494916633, "grad_norm": 0.46564149856567383, "learning_rate": 6.248474989833266e-06, "loss": 0.4461, "step": 3073 }, { "epoch": 0.31252541683611224, "grad_norm": 0.42328014969825745, "learning_rate": 6.250508336722245e-06, "loss": 0.4224, "step": 3074 }, { "epoch": 0.3126270841805612, "grad_norm": 0.40076249837875366, "learning_rate": 6.2525416836112244e-06, "loss": 0.4046, "step": 3075 }, { "epoch": 0.3127287515250102, "grad_norm": 0.5514982342720032, "learning_rate": 6.254575030500204e-06, "loss": 0.4074, "step": 3076 }, { "epoch": 0.3128304188694591, "grad_norm": 0.43758389353752136, "learning_rate": 6.256608377389183e-06, "loss": 0.4296, "step": 3077 }, { "epoch": 0.3129320862139081, "grad_norm": 0.4582415223121643, "learning_rate": 6.258641724278163e-06, "loss": 0.4043, "step": 3078 }, { "epoch": 0.31303375355835705, "grad_norm": 0.44266200065612793, "learning_rate": 6.2606750711671416e-06, "loss": 0.4206, "step": 3079 }, { "epoch": 0.313135420902806, "grad_norm": 0.4499458968639374, "learning_rate": 6.26270841805612e-06, "loss": 0.3911, "step": 3080 }, { "epoch": 0.313237088247255, "grad_norm": 0.46882110834121704, "learning_rate": 6.2647417649451005e-06, "loss": 0.4513, "step": 3081 }, { "epoch": 0.31333875559170393, "grad_norm": 0.39341607689857483, "learning_rate": 6.266775111834079e-06, "loss": 0.4266, "step": 3082 }, { "epoch": 0.31344042293615293, "grad_norm": 0.45328831672668457, "learning_rate": 6.2688084587230595e-06, "loss": 0.4356, "step": 3083 }, { "epoch": 0.31354209028060187, "grad_norm": 0.44344866275787354, "learning_rate": 6.270841805612038e-06, "loss": 0.4501, "step": 3084 }, { "epoch": 0.3136437576250508, "grad_norm": 0.45488008856773376, "learning_rate": 6.272875152501017e-06, "loss": 0.44, "step": 3085 }, { "epoch": 0.3137454249694998, "grad_norm": 0.41698309779167175, "learning_rate": 6.274908499389996e-06, "loss": 0.4258, "step": 3086 }, { "epoch": 0.31384709231394875, "grad_norm": 0.42860090732574463, "learning_rate": 6.276941846278976e-06, "loss": 0.4283, "step": 3087 }, { "epoch": 0.31394875965839775, "grad_norm": 0.39016979932785034, "learning_rate": 6.278975193167955e-06, "loss": 0.4204, "step": 3088 }, { "epoch": 0.3140504270028467, "grad_norm": 0.40683671832084656, "learning_rate": 6.281008540056934e-06, "loss": 0.4094, "step": 3089 }, { "epoch": 0.31415209434729563, "grad_norm": 0.4175693094730377, "learning_rate": 6.283041886945913e-06, "loss": 0.4081, "step": 3090 }, { "epoch": 0.3142537616917446, "grad_norm": 0.4425366222858429, "learning_rate": 6.285075233834893e-06, "loss": 0.436, "step": 3091 }, { "epoch": 0.31435542903619357, "grad_norm": 0.4579852819442749, "learning_rate": 6.2871085807238715e-06, "loss": 0.4206, "step": 3092 }, { "epoch": 0.31445709638064256, "grad_norm": 0.39453473687171936, "learning_rate": 6.289141927612852e-06, "loss": 0.396, "step": 3093 }, { "epoch": 0.3145587637250915, "grad_norm": 0.42065247893333435, "learning_rate": 6.2911752745018305e-06, "loss": 0.4214, "step": 3094 }, { "epoch": 0.31466043106954045, "grad_norm": 0.46247559785842896, "learning_rate": 6.293208621390809e-06, "loss": 0.4525, "step": 3095 }, { "epoch": 0.31476209841398944, "grad_norm": 0.4242019057273865, "learning_rate": 6.2952419682797895e-06, "loss": 0.3844, "step": 3096 }, { "epoch": 0.3148637657584384, "grad_norm": 0.43480369448661804, "learning_rate": 6.297275315168768e-06, "loss": 0.4134, "step": 3097 }, { "epoch": 0.3149654331028873, "grad_norm": 0.3980948030948639, "learning_rate": 6.299308662057747e-06, "loss": 0.4026, "step": 3098 }, { "epoch": 0.3150671004473363, "grad_norm": 0.42407023906707764, "learning_rate": 6.301342008946727e-06, "loss": 0.4116, "step": 3099 }, { "epoch": 0.31516876779178526, "grad_norm": 0.4859808385372162, "learning_rate": 6.303375355835706e-06, "loss": 0.4651, "step": 3100 }, { "epoch": 0.31527043513623426, "grad_norm": 0.4392884075641632, "learning_rate": 6.305408702724686e-06, "loss": 0.4297, "step": 3101 }, { "epoch": 0.3153721024806832, "grad_norm": 0.4479958415031433, "learning_rate": 6.307442049613665e-06, "loss": 0.4114, "step": 3102 }, { "epoch": 0.31547376982513214, "grad_norm": 0.39641711115837097, "learning_rate": 6.309475396502643e-06, "loss": 0.4166, "step": 3103 }, { "epoch": 0.31557543716958114, "grad_norm": 0.4699353575706482, "learning_rate": 6.311508743391624e-06, "loss": 0.4243, "step": 3104 }, { "epoch": 0.3156771045140301, "grad_norm": 0.4580841660499573, "learning_rate": 6.313542090280602e-06, "loss": 0.4307, "step": 3105 }, { "epoch": 0.3157787718584791, "grad_norm": 0.39189088344573975, "learning_rate": 6.315575437169582e-06, "loss": 0.3876, "step": 3106 }, { "epoch": 0.315880439202928, "grad_norm": 0.4510602355003357, "learning_rate": 6.317608784058561e-06, "loss": 0.4243, "step": 3107 }, { "epoch": 0.31598210654737696, "grad_norm": 0.37495893239974976, "learning_rate": 6.31964213094754e-06, "loss": 0.4164, "step": 3108 }, { "epoch": 0.31608377389182596, "grad_norm": 0.4281296730041504, "learning_rate": 6.321675477836519e-06, "loss": 0.4003, "step": 3109 }, { "epoch": 0.3161854412362749, "grad_norm": 0.48700445890426636, "learning_rate": 6.323708824725499e-06, "loss": 0.4254, "step": 3110 }, { "epoch": 0.3162871085807239, "grad_norm": 0.4421270489692688, "learning_rate": 6.325742171614478e-06, "loss": 0.4311, "step": 3111 }, { "epoch": 0.31638877592517284, "grad_norm": 0.47832217812538147, "learning_rate": 6.327775518503457e-06, "loss": 0.4096, "step": 3112 }, { "epoch": 0.3164904432696218, "grad_norm": 0.4970409572124481, "learning_rate": 6.3298088653924365e-06, "loss": 0.4276, "step": 3113 }, { "epoch": 0.3165921106140708, "grad_norm": 0.40829262137413025, "learning_rate": 6.331842212281416e-06, "loss": 0.4568, "step": 3114 }, { "epoch": 0.3166937779585197, "grad_norm": 0.4430679976940155, "learning_rate": 6.333875559170395e-06, "loss": 0.4342, "step": 3115 }, { "epoch": 0.31679544530296866, "grad_norm": 0.4171191155910492, "learning_rate": 6.335908906059373e-06, "loss": 0.4032, "step": 3116 }, { "epoch": 0.31689711264741766, "grad_norm": 0.5453039407730103, "learning_rate": 6.337942252948354e-06, "loss": 0.419, "step": 3117 }, { "epoch": 0.3169987799918666, "grad_norm": 0.5013139843940735, "learning_rate": 6.339975599837332e-06, "loss": 0.4562, "step": 3118 }, { "epoch": 0.3171004473363156, "grad_norm": 0.43103164434432983, "learning_rate": 6.342008946726313e-06, "loss": 0.4456, "step": 3119 }, { "epoch": 0.31720211468076454, "grad_norm": 0.4353092312812805, "learning_rate": 6.344042293615291e-06, "loss": 0.4556, "step": 3120 }, { "epoch": 0.3173037820252135, "grad_norm": 0.47612684965133667, "learning_rate": 6.34607564050427e-06, "loss": 0.4214, "step": 3121 }, { "epoch": 0.3174054493696625, "grad_norm": 0.48696547746658325, "learning_rate": 6.34810898739325e-06, "loss": 0.4248, "step": 3122 }, { "epoch": 0.3175071167141114, "grad_norm": 0.4480572044849396, "learning_rate": 6.350142334282229e-06, "loss": 0.447, "step": 3123 }, { "epoch": 0.3176087840585604, "grad_norm": 0.4203639030456543, "learning_rate": 6.352175681171208e-06, "loss": 0.4239, "step": 3124 }, { "epoch": 0.31771045140300935, "grad_norm": 0.5332240462303162, "learning_rate": 6.354209028060188e-06, "loss": 0.4265, "step": 3125 }, { "epoch": 0.3178121187474583, "grad_norm": 0.5216339230537415, "learning_rate": 6.3562423749491665e-06, "loss": 0.4294, "step": 3126 }, { "epoch": 0.3179137860919073, "grad_norm": 0.42694711685180664, "learning_rate": 6.358275721838146e-06, "loss": 0.4218, "step": 3127 }, { "epoch": 0.31801545343635623, "grad_norm": 0.444244384765625, "learning_rate": 6.3603090687271255e-06, "loss": 0.4188, "step": 3128 }, { "epoch": 0.31811712078080523, "grad_norm": 0.4760603904724121, "learning_rate": 6.362342415616105e-06, "loss": 0.4524, "step": 3129 }, { "epoch": 0.31821878812525417, "grad_norm": 0.4399612247943878, "learning_rate": 6.364375762505084e-06, "loss": 0.404, "step": 3130 }, { "epoch": 0.3183204554697031, "grad_norm": 0.4238714873790741, "learning_rate": 6.366409109394063e-06, "loss": 0.4277, "step": 3131 }, { "epoch": 0.3184221228141521, "grad_norm": 0.4498923420906067, "learning_rate": 6.3684424562830426e-06, "loss": 0.3986, "step": 3132 }, { "epoch": 0.31852379015860105, "grad_norm": 0.4705927073955536, "learning_rate": 6.370475803172021e-06, "loss": 0.4323, "step": 3133 }, { "epoch": 0.31862545750305005, "grad_norm": 0.4532615542411804, "learning_rate": 6.3725091500610015e-06, "loss": 0.4248, "step": 3134 }, { "epoch": 0.318727124847499, "grad_norm": 0.4671918451786041, "learning_rate": 6.37454249694998e-06, "loss": 0.4339, "step": 3135 }, { "epoch": 0.31882879219194793, "grad_norm": 0.43872931599617004, "learning_rate": 6.376575843838959e-06, "loss": 0.4031, "step": 3136 }, { "epoch": 0.3189304595363969, "grad_norm": 0.45482707023620605, "learning_rate": 6.378609190727939e-06, "loss": 0.4043, "step": 3137 }, { "epoch": 0.31903212688084587, "grad_norm": 0.40161895751953125, "learning_rate": 6.380642537616918e-06, "loss": 0.4319, "step": 3138 }, { "epoch": 0.3191337942252948, "grad_norm": 0.4236201047897339, "learning_rate": 6.3826758845058964e-06, "loss": 0.4308, "step": 3139 }, { "epoch": 0.3192354615697438, "grad_norm": 0.45679333806037903, "learning_rate": 6.384709231394877e-06, "loss": 0.4284, "step": 3140 }, { "epoch": 0.31933712891419275, "grad_norm": 0.490237832069397, "learning_rate": 6.386742578283855e-06, "loss": 0.4362, "step": 3141 }, { "epoch": 0.31943879625864174, "grad_norm": 0.4160141944885254, "learning_rate": 6.388775925172836e-06, "loss": 0.4171, "step": 3142 }, { "epoch": 0.3195404636030907, "grad_norm": 0.39221230149269104, "learning_rate": 6.390809272061814e-06, "loss": 0.4075, "step": 3143 }, { "epoch": 0.3196421309475396, "grad_norm": 0.4397794306278229, "learning_rate": 6.392842618950793e-06, "loss": 0.4284, "step": 3144 }, { "epoch": 0.3197437982919886, "grad_norm": 0.45505961775779724, "learning_rate": 6.394875965839773e-06, "loss": 0.4038, "step": 3145 }, { "epoch": 0.31984546563643756, "grad_norm": 0.436190664768219, "learning_rate": 6.396909312728752e-06, "loss": 0.4522, "step": 3146 }, { "epoch": 0.31994713298088656, "grad_norm": 0.46573013067245483, "learning_rate": 6.3989426596177315e-06, "loss": 0.4278, "step": 3147 }, { "epoch": 0.3200488003253355, "grad_norm": 0.49013376235961914, "learning_rate": 6.400976006506711e-06, "loss": 0.4519, "step": 3148 }, { "epoch": 0.32015046766978444, "grad_norm": 0.42793405055999756, "learning_rate": 6.40300935339569e-06, "loss": 0.4128, "step": 3149 }, { "epoch": 0.32025213501423344, "grad_norm": 0.4237061142921448, "learning_rate": 6.405042700284669e-06, "loss": 0.4271, "step": 3150 }, { "epoch": 0.3203538023586824, "grad_norm": 0.5071455240249634, "learning_rate": 6.407076047173648e-06, "loss": 0.4115, "step": 3151 }, { "epoch": 0.3204554697031314, "grad_norm": 0.4207386374473572, "learning_rate": 6.409109394062628e-06, "loss": 0.4548, "step": 3152 }, { "epoch": 0.3205571370475803, "grad_norm": 0.420321524143219, "learning_rate": 6.411142740951607e-06, "loss": 0.4274, "step": 3153 }, { "epoch": 0.32065880439202926, "grad_norm": 0.4819234609603882, "learning_rate": 6.413176087840585e-06, "loss": 0.4033, "step": 3154 }, { "epoch": 0.32076047173647826, "grad_norm": 0.46210846304893494, "learning_rate": 6.415209434729566e-06, "loss": 0.4259, "step": 3155 }, { "epoch": 0.3208621390809272, "grad_norm": 0.4170364737510681, "learning_rate": 6.417242781618544e-06, "loss": 0.4458, "step": 3156 }, { "epoch": 0.3209638064253762, "grad_norm": 0.4634082317352295, "learning_rate": 6.419276128507525e-06, "loss": 0.4294, "step": 3157 }, { "epoch": 0.32106547376982514, "grad_norm": 0.4642658233642578, "learning_rate": 6.421309475396503e-06, "loss": 0.4356, "step": 3158 }, { "epoch": 0.3211671411142741, "grad_norm": 0.5103486776351929, "learning_rate": 6.423342822285482e-06, "loss": 0.4009, "step": 3159 }, { "epoch": 0.3212688084587231, "grad_norm": 0.40538397431373596, "learning_rate": 6.425376169174462e-06, "loss": 0.4196, "step": 3160 }, { "epoch": 0.321370475803172, "grad_norm": 0.49381551146507263, "learning_rate": 6.427409516063441e-06, "loss": 0.3999, "step": 3161 }, { "epoch": 0.32147214314762096, "grad_norm": 0.4337180256843567, "learning_rate": 6.42944286295242e-06, "loss": 0.4003, "step": 3162 }, { "epoch": 0.32157381049206996, "grad_norm": 0.40915125608444214, "learning_rate": 6.4314762098414e-06, "loss": 0.412, "step": 3163 }, { "epoch": 0.3216754778365189, "grad_norm": 0.44473010301589966, "learning_rate": 6.4335095567303786e-06, "loss": 0.4243, "step": 3164 }, { "epoch": 0.3217771451809679, "grad_norm": 0.4202111065387726, "learning_rate": 6.435542903619358e-06, "loss": 0.4192, "step": 3165 }, { "epoch": 0.32187881252541684, "grad_norm": 0.47076430916786194, "learning_rate": 6.4375762505083375e-06, "loss": 0.4504, "step": 3166 }, { "epoch": 0.3219804798698658, "grad_norm": 0.5034295320510864, "learning_rate": 6.439609597397316e-06, "loss": 0.4304, "step": 3167 }, { "epoch": 0.3220821472143148, "grad_norm": 0.4351365864276886, "learning_rate": 6.441642944286296e-06, "loss": 0.4076, "step": 3168 }, { "epoch": 0.3221838145587637, "grad_norm": 0.5084272027015686, "learning_rate": 6.443676291175275e-06, "loss": 0.4437, "step": 3169 }, { "epoch": 0.3222854819032127, "grad_norm": 0.424862802028656, "learning_rate": 6.445709638064255e-06, "loss": 0.4499, "step": 3170 }, { "epoch": 0.32238714924766165, "grad_norm": 0.3934189975261688, "learning_rate": 6.447742984953233e-06, "loss": 0.4327, "step": 3171 }, { "epoch": 0.3224888165921106, "grad_norm": 0.4839114844799042, "learning_rate": 6.449776331842213e-06, "loss": 0.4342, "step": 3172 }, { "epoch": 0.3225904839365596, "grad_norm": 0.4022045135498047, "learning_rate": 6.451809678731192e-06, "loss": 0.387, "step": 3173 }, { "epoch": 0.32269215128100853, "grad_norm": 0.389250248670578, "learning_rate": 6.453843025620171e-06, "loss": 0.4219, "step": 3174 }, { "epoch": 0.32279381862545753, "grad_norm": 0.4486852288246155, "learning_rate": 6.455876372509151e-06, "loss": 0.397, "step": 3175 }, { "epoch": 0.32289548596990647, "grad_norm": 0.453098326921463, "learning_rate": 6.45790971939813e-06, "loss": 0.425, "step": 3176 }, { "epoch": 0.3229971533143554, "grad_norm": 0.39054441452026367, "learning_rate": 6.4599430662871085e-06, "loss": 0.3953, "step": 3177 }, { "epoch": 0.3230988206588044, "grad_norm": 0.44486120343208313, "learning_rate": 6.461976413176089e-06, "loss": 0.4302, "step": 3178 }, { "epoch": 0.32320048800325335, "grad_norm": 0.44864511489868164, "learning_rate": 6.4640097600650675e-06, "loss": 0.41, "step": 3179 }, { "epoch": 0.3233021553477023, "grad_norm": 0.4216596484184265, "learning_rate": 6.466043106954046e-06, "loss": 0.4379, "step": 3180 }, { "epoch": 0.3234038226921513, "grad_norm": 0.4074352979660034, "learning_rate": 6.4680764538430265e-06, "loss": 0.4257, "step": 3181 }, { "epoch": 0.32350549003660023, "grad_norm": 0.4169524908065796, "learning_rate": 6.470109800732005e-06, "loss": 0.4015, "step": 3182 }, { "epoch": 0.3236071573810492, "grad_norm": 0.4333823323249817, "learning_rate": 6.4721431476209854e-06, "loss": 0.4244, "step": 3183 }, { "epoch": 0.32370882472549817, "grad_norm": 0.42916974425315857, "learning_rate": 6.474176494509964e-06, "loss": 0.46, "step": 3184 }, { "epoch": 0.3238104920699471, "grad_norm": 0.3799753487110138, "learning_rate": 6.476209841398943e-06, "loss": 0.4227, "step": 3185 }, { "epoch": 0.3239121594143961, "grad_norm": 0.41993486881256104, "learning_rate": 6.478243188287923e-06, "loss": 0.4107, "step": 3186 }, { "epoch": 0.32401382675884505, "grad_norm": 0.45906704664230347, "learning_rate": 6.480276535176902e-06, "loss": 0.3843, "step": 3187 }, { "epoch": 0.32411549410329404, "grad_norm": 0.4780639410018921, "learning_rate": 6.482309882065881e-06, "loss": 0.4328, "step": 3188 }, { "epoch": 0.324217161447743, "grad_norm": 0.44546210765838623, "learning_rate": 6.48434322895486e-06, "loss": 0.4426, "step": 3189 }, { "epoch": 0.3243188287921919, "grad_norm": 0.41077521443367004, "learning_rate": 6.486376575843839e-06, "loss": 0.4028, "step": 3190 }, { "epoch": 0.3244204961366409, "grad_norm": 0.4691389501094818, "learning_rate": 6.488409922732819e-06, "loss": 0.4454, "step": 3191 }, { "epoch": 0.32452216348108986, "grad_norm": 0.5279639363288879, "learning_rate": 6.4904432696217974e-06, "loss": 0.4426, "step": 3192 }, { "epoch": 0.32462383082553886, "grad_norm": 0.4772337079048157, "learning_rate": 6.492476616510778e-06, "loss": 0.4246, "step": 3193 }, { "epoch": 0.3247254981699878, "grad_norm": 0.4341283440589905, "learning_rate": 6.494509963399756e-06, "loss": 0.3825, "step": 3194 }, { "epoch": 0.32482716551443674, "grad_norm": 0.4502370059490204, "learning_rate": 6.496543310288735e-06, "loss": 0.4591, "step": 3195 }, { "epoch": 0.32492883285888574, "grad_norm": 0.4411553740501404, "learning_rate": 6.498576657177715e-06, "loss": 0.4271, "step": 3196 }, { "epoch": 0.3250305002033347, "grad_norm": 0.5202268362045288, "learning_rate": 6.500610004066694e-06, "loss": 0.4449, "step": 3197 }, { "epoch": 0.3251321675477837, "grad_norm": 0.46849581599235535, "learning_rate": 6.502643350955674e-06, "loss": 0.3951, "step": 3198 }, { "epoch": 0.3252338348922326, "grad_norm": 0.4635663628578186, "learning_rate": 6.504676697844653e-06, "loss": 0.4084, "step": 3199 }, { "epoch": 0.32533550223668156, "grad_norm": 0.48923900723457336, "learning_rate": 6.506710044733632e-06, "loss": 0.4409, "step": 3200 }, { "epoch": 0.32543716958113056, "grad_norm": 0.5130882859230042, "learning_rate": 6.508743391622612e-06, "loss": 0.4223, "step": 3201 }, { "epoch": 0.3255388369255795, "grad_norm": 0.40621450543403625, "learning_rate": 6.510776738511591e-06, "loss": 0.3952, "step": 3202 }, { "epoch": 0.32564050427002844, "grad_norm": 0.5267394781112671, "learning_rate": 6.512810085400569e-06, "loss": 0.4221, "step": 3203 }, { "epoch": 0.32574217161447744, "grad_norm": 0.45054543018341064, "learning_rate": 6.51484343228955e-06, "loss": 0.3887, "step": 3204 }, { "epoch": 0.3258438389589264, "grad_norm": 0.43054863810539246, "learning_rate": 6.516876779178528e-06, "loss": 0.4012, "step": 3205 }, { "epoch": 0.3259455063033754, "grad_norm": 0.47061291337013245, "learning_rate": 6.518910126067508e-06, "loss": 0.3968, "step": 3206 }, { "epoch": 0.3260471736478243, "grad_norm": 0.4904537498950958, "learning_rate": 6.520943472956487e-06, "loss": 0.431, "step": 3207 }, { "epoch": 0.32614884099227326, "grad_norm": 0.4176555275917053, "learning_rate": 6.522976819845466e-06, "loss": 0.4533, "step": 3208 }, { "epoch": 0.32625050833672226, "grad_norm": 0.4345606565475464, "learning_rate": 6.525010166734445e-06, "loss": 0.4278, "step": 3209 }, { "epoch": 0.3263521756811712, "grad_norm": 0.471933513879776, "learning_rate": 6.527043513623425e-06, "loss": 0.4252, "step": 3210 }, { "epoch": 0.3264538430256202, "grad_norm": 0.3768507242202759, "learning_rate": 6.529076860512404e-06, "loss": 0.4329, "step": 3211 }, { "epoch": 0.32655551037006914, "grad_norm": 0.48824378848075867, "learning_rate": 6.531110207401383e-06, "loss": 0.438, "step": 3212 }, { "epoch": 0.3266571777145181, "grad_norm": 0.445260226726532, "learning_rate": 6.5331435542903625e-06, "loss": 0.4167, "step": 3213 }, { "epoch": 0.3267588450589671, "grad_norm": 0.4322957992553711, "learning_rate": 6.535176901179342e-06, "loss": 0.4145, "step": 3214 }, { "epoch": 0.326860512403416, "grad_norm": 0.40826383233070374, "learning_rate": 6.537210248068321e-06, "loss": 0.4497, "step": 3215 }, { "epoch": 0.326962179747865, "grad_norm": 0.46879687905311584, "learning_rate": 6.539243594957301e-06, "loss": 0.4032, "step": 3216 }, { "epoch": 0.32706384709231395, "grad_norm": 0.47657790780067444, "learning_rate": 6.5412769418462796e-06, "loss": 0.4188, "step": 3217 }, { "epoch": 0.3271655144367629, "grad_norm": 0.403461754322052, "learning_rate": 6.543310288735258e-06, "loss": 0.4161, "step": 3218 }, { "epoch": 0.3272671817812119, "grad_norm": 0.4321630299091339, "learning_rate": 6.5453436356242385e-06, "loss": 0.4261, "step": 3219 }, { "epoch": 0.32736884912566083, "grad_norm": 0.4314133822917938, "learning_rate": 6.547376982513217e-06, "loss": 0.4168, "step": 3220 }, { "epoch": 0.3274705164701098, "grad_norm": 0.40784189105033875, "learning_rate": 6.549410329402196e-06, "loss": 0.4223, "step": 3221 }, { "epoch": 0.32757218381455877, "grad_norm": 0.4213393032550812, "learning_rate": 6.551443676291176e-06, "loss": 0.4388, "step": 3222 }, { "epoch": 0.3276738511590077, "grad_norm": 0.398611456155777, "learning_rate": 6.553477023180155e-06, "loss": 0.4335, "step": 3223 }, { "epoch": 0.3277755185034567, "grad_norm": 0.49628525972366333, "learning_rate": 6.555510370069134e-06, "loss": 0.4156, "step": 3224 }, { "epoch": 0.32787718584790565, "grad_norm": 0.4835865795612335, "learning_rate": 6.557543716958114e-06, "loss": 0.4264, "step": 3225 }, { "epoch": 0.3279788531923546, "grad_norm": 0.4184619188308716, "learning_rate": 6.559577063847092e-06, "loss": 0.4474, "step": 3226 }, { "epoch": 0.3280805205368036, "grad_norm": 0.5085829496383667, "learning_rate": 6.561610410736072e-06, "loss": 0.4642, "step": 3227 }, { "epoch": 0.32818218788125253, "grad_norm": 0.45904770493507385, "learning_rate": 6.563643757625051e-06, "loss": 0.3922, "step": 3228 }, { "epoch": 0.3282838552257015, "grad_norm": 0.3802149295806885, "learning_rate": 6.565677104514031e-06, "loss": 0.3759, "step": 3229 }, { "epoch": 0.32838552257015047, "grad_norm": 0.42181137204170227, "learning_rate": 6.5677104514030095e-06, "loss": 0.4116, "step": 3230 }, { "epoch": 0.3284871899145994, "grad_norm": 0.49083301424980164, "learning_rate": 6.569743798291989e-06, "loss": 0.4538, "step": 3231 }, { "epoch": 0.3285888572590484, "grad_norm": 0.4633442461490631, "learning_rate": 6.5717771451809685e-06, "loss": 0.4178, "step": 3232 }, { "epoch": 0.32869052460349735, "grad_norm": 0.3958665132522583, "learning_rate": 6.573810492069947e-06, "loss": 0.4143, "step": 3233 }, { "epoch": 0.32879219194794634, "grad_norm": 0.4935019612312317, "learning_rate": 6.5758438389589275e-06, "loss": 0.4323, "step": 3234 }, { "epoch": 0.3288938592923953, "grad_norm": 0.47854554653167725, "learning_rate": 6.577877185847906e-06, "loss": 0.4116, "step": 3235 }, { "epoch": 0.3289955266368442, "grad_norm": 0.4549857974052429, "learning_rate": 6.579910532736885e-06, "loss": 0.4041, "step": 3236 }, { "epoch": 0.3290971939812932, "grad_norm": 0.4959878921508789, "learning_rate": 6.581943879625865e-06, "loss": 0.4287, "step": 3237 }, { "epoch": 0.32919886132574216, "grad_norm": 0.5337091088294983, "learning_rate": 6.583977226514844e-06, "loss": 0.4095, "step": 3238 }, { "epoch": 0.32930052867019116, "grad_norm": 0.4074569642543793, "learning_rate": 6.586010573403824e-06, "loss": 0.4119, "step": 3239 }, { "epoch": 0.3294021960146401, "grad_norm": 0.49796250462532043, "learning_rate": 6.588043920292803e-06, "loss": 0.4119, "step": 3240 }, { "epoch": 0.32950386335908904, "grad_norm": 0.4646332859992981, "learning_rate": 6.590077267181781e-06, "loss": 0.4345, "step": 3241 }, { "epoch": 0.32960553070353804, "grad_norm": 0.42757686972618103, "learning_rate": 6.592110614070762e-06, "loss": 0.3968, "step": 3242 }, { "epoch": 0.329707198047987, "grad_norm": 0.4460178315639496, "learning_rate": 6.59414396095974e-06, "loss": 0.3981, "step": 3243 }, { "epoch": 0.3298088653924359, "grad_norm": 0.5088998675346375, "learning_rate": 6.596177307848719e-06, "loss": 0.3971, "step": 3244 }, { "epoch": 0.3299105327368849, "grad_norm": 0.498786985874176, "learning_rate": 6.598210654737699e-06, "loss": 0.4224, "step": 3245 }, { "epoch": 0.33001220008133386, "grad_norm": 0.5238330960273743, "learning_rate": 6.600244001626678e-06, "loss": 0.4521, "step": 3246 }, { "epoch": 0.33011386742578286, "grad_norm": 0.5198475122451782, "learning_rate": 6.6022773485156574e-06, "loss": 0.421, "step": 3247 }, { "epoch": 0.3302155347702318, "grad_norm": 0.4417383372783661, "learning_rate": 6.604310695404637e-06, "loss": 0.4343, "step": 3248 }, { "epoch": 0.33031720211468074, "grad_norm": 0.43127331137657166, "learning_rate": 6.6063440422936156e-06, "loss": 0.4137, "step": 3249 }, { "epoch": 0.33041886945912974, "grad_norm": 0.4727824330329895, "learning_rate": 6.608377389182595e-06, "loss": 0.4611, "step": 3250 }, { "epoch": 0.3305205368035787, "grad_norm": 0.41573870182037354, "learning_rate": 6.6104107360715745e-06, "loss": 0.4515, "step": 3251 }, { "epoch": 0.3306222041480277, "grad_norm": 0.4383689761161804, "learning_rate": 6.612444082960554e-06, "loss": 0.4503, "step": 3252 }, { "epoch": 0.3307238714924766, "grad_norm": 0.45168352127075195, "learning_rate": 6.614477429849533e-06, "loss": 0.4186, "step": 3253 }, { "epoch": 0.33082553883692556, "grad_norm": 0.40772897005081177, "learning_rate": 6.616510776738511e-06, "loss": 0.427, "step": 3254 }, { "epoch": 0.33092720618137456, "grad_norm": 0.44610902667045593, "learning_rate": 6.618544123627492e-06, "loss": 0.3926, "step": 3255 }, { "epoch": 0.3310288735258235, "grad_norm": 0.4505733549594879, "learning_rate": 6.62057747051647e-06, "loss": 0.3954, "step": 3256 }, { "epoch": 0.3311305408702725, "grad_norm": 0.4256165325641632, "learning_rate": 6.622610817405451e-06, "loss": 0.4221, "step": 3257 }, { "epoch": 0.33123220821472144, "grad_norm": 0.4222491681575775, "learning_rate": 6.624644164294429e-06, "loss": 0.4337, "step": 3258 }, { "epoch": 0.3313338755591704, "grad_norm": 0.45919638872146606, "learning_rate": 6.626677511183408e-06, "loss": 0.4233, "step": 3259 }, { "epoch": 0.3314355429036194, "grad_norm": 0.43499910831451416, "learning_rate": 6.628710858072388e-06, "loss": 0.4377, "step": 3260 }, { "epoch": 0.3315372102480683, "grad_norm": 0.44935065507888794, "learning_rate": 6.630744204961367e-06, "loss": 0.4088, "step": 3261 }, { "epoch": 0.33163887759251726, "grad_norm": 0.41129162907600403, "learning_rate": 6.6327775518503455e-06, "loss": 0.4123, "step": 3262 }, { "epoch": 0.33174054493696625, "grad_norm": 0.45765966176986694, "learning_rate": 6.634810898739326e-06, "loss": 0.4005, "step": 3263 }, { "epoch": 0.3318422122814152, "grad_norm": 0.43993064761161804, "learning_rate": 6.6368442456283045e-06, "loss": 0.3644, "step": 3264 }, { "epoch": 0.3319438796258642, "grad_norm": 0.4209497272968292, "learning_rate": 6.638877592517284e-06, "loss": 0.396, "step": 3265 }, { "epoch": 0.33204554697031313, "grad_norm": 0.40861019492149353, "learning_rate": 6.6409109394062635e-06, "loss": 0.4514, "step": 3266 }, { "epoch": 0.3321472143147621, "grad_norm": 0.49735286831855774, "learning_rate": 6.642944286295242e-06, "loss": 0.4335, "step": 3267 }, { "epoch": 0.33224888165921107, "grad_norm": 0.47275209426879883, "learning_rate": 6.644977633184222e-06, "loss": 0.3858, "step": 3268 }, { "epoch": 0.33235054900366, "grad_norm": 0.42865386605262756, "learning_rate": 6.647010980073201e-06, "loss": 0.414, "step": 3269 }, { "epoch": 0.332452216348109, "grad_norm": 0.4656120240688324, "learning_rate": 6.6490443269621806e-06, "loss": 0.4399, "step": 3270 }, { "epoch": 0.33255388369255795, "grad_norm": 0.4699527621269226, "learning_rate": 6.651077673851159e-06, "loss": 0.4043, "step": 3271 }, { "epoch": 0.3326555510370069, "grad_norm": 0.4815455377101898, "learning_rate": 6.653111020740139e-06, "loss": 0.4086, "step": 3272 }, { "epoch": 0.3327572183814559, "grad_norm": 0.46975260972976685, "learning_rate": 6.655144367629118e-06, "loss": 0.4375, "step": 3273 }, { "epoch": 0.33285888572590483, "grad_norm": 0.5096257328987122, "learning_rate": 6.657177714518097e-06, "loss": 0.4008, "step": 3274 }, { "epoch": 0.3329605530703538, "grad_norm": 0.4692162275314331, "learning_rate": 6.659211061407077e-06, "loss": 0.4502, "step": 3275 }, { "epoch": 0.33306222041480277, "grad_norm": 0.4225755035877228, "learning_rate": 6.661244408296056e-06, "loss": 0.4291, "step": 3276 }, { "epoch": 0.3331638877592517, "grad_norm": 0.5703200697898865, "learning_rate": 6.6632777551850344e-06, "loss": 0.4368, "step": 3277 }, { "epoch": 0.3332655551037007, "grad_norm": 0.4851718246936798, "learning_rate": 6.665311102074015e-06, "loss": 0.4063, "step": 3278 }, { "epoch": 0.33336722244814965, "grad_norm": 0.43908244371414185, "learning_rate": 6.6673444489629934e-06, "loss": 0.4419, "step": 3279 }, { "epoch": 0.33346888979259864, "grad_norm": 0.4014371335506439, "learning_rate": 6.669377795851974e-06, "loss": 0.4504, "step": 3280 }, { "epoch": 0.3335705571370476, "grad_norm": 0.43738049268722534, "learning_rate": 6.671411142740952e-06, "loss": 0.4099, "step": 3281 }, { "epoch": 0.3336722244814965, "grad_norm": 0.4195721745491028, "learning_rate": 6.673444489629931e-06, "loss": 0.4162, "step": 3282 }, { "epoch": 0.3337738918259455, "grad_norm": 0.45908302068710327, "learning_rate": 6.675477836518911e-06, "loss": 0.4067, "step": 3283 }, { "epoch": 0.33387555917039446, "grad_norm": 0.42138609290122986, "learning_rate": 6.67751118340789e-06, "loss": 0.421, "step": 3284 }, { "epoch": 0.3339772265148434, "grad_norm": 0.46449753642082214, "learning_rate": 6.679544530296869e-06, "loss": 0.4297, "step": 3285 }, { "epoch": 0.3340788938592924, "grad_norm": 0.3854832351207733, "learning_rate": 6.681577877185849e-06, "loss": 0.4158, "step": 3286 }, { "epoch": 0.33418056120374134, "grad_norm": 0.42870017886161804, "learning_rate": 6.683611224074828e-06, "loss": 0.4133, "step": 3287 }, { "epoch": 0.33428222854819034, "grad_norm": 0.408470094203949, "learning_rate": 6.685644570963807e-06, "loss": 0.4113, "step": 3288 }, { "epoch": 0.3343838958926393, "grad_norm": 0.45674243569374084, "learning_rate": 6.687677917852787e-06, "loss": 0.434, "step": 3289 }, { "epoch": 0.3344855632370882, "grad_norm": 0.44414034485816956, "learning_rate": 6.689711264741765e-06, "loss": 0.4906, "step": 3290 }, { "epoch": 0.3345872305815372, "grad_norm": 0.39896875619888306, "learning_rate": 6.691744611630745e-06, "loss": 0.4005, "step": 3291 }, { "epoch": 0.33468889792598616, "grad_norm": 0.4896189570426941, "learning_rate": 6.693777958519723e-06, "loss": 0.4104, "step": 3292 }, { "epoch": 0.33479056527043516, "grad_norm": 0.5031077265739441, "learning_rate": 6.695811305408704e-06, "loss": 0.4587, "step": 3293 }, { "epoch": 0.3348922326148841, "grad_norm": 0.44255372881889343, "learning_rate": 6.697844652297682e-06, "loss": 0.4232, "step": 3294 }, { "epoch": 0.33499389995933304, "grad_norm": 0.5581326484680176, "learning_rate": 6.699877999186661e-06, "loss": 0.4134, "step": 3295 }, { "epoch": 0.33509556730378204, "grad_norm": 0.46005573868751526, "learning_rate": 6.701911346075641e-06, "loss": 0.4245, "step": 3296 }, { "epoch": 0.335197234648231, "grad_norm": 0.4815884232521057, "learning_rate": 6.70394469296462e-06, "loss": 0.3899, "step": 3297 }, { "epoch": 0.33529890199268, "grad_norm": 0.4676990509033203, "learning_rate": 6.7059780398536e-06, "loss": 0.417, "step": 3298 }, { "epoch": 0.3354005693371289, "grad_norm": 0.4858793318271637, "learning_rate": 6.708011386742579e-06, "loss": 0.4234, "step": 3299 }, { "epoch": 0.33550223668157786, "grad_norm": 0.45501428842544556, "learning_rate": 6.710044733631558e-06, "loss": 0.43, "step": 3300 }, { "epoch": 0.33560390402602686, "grad_norm": 0.4107590317726135, "learning_rate": 6.712078080520538e-06, "loss": 0.3957, "step": 3301 }, { "epoch": 0.3357055713704758, "grad_norm": 0.47145673632621765, "learning_rate": 6.7141114274095166e-06, "loss": 0.4332, "step": 3302 }, { "epoch": 0.33580723871492474, "grad_norm": 0.4813827574253082, "learning_rate": 6.716144774298495e-06, "loss": 0.3797, "step": 3303 }, { "epoch": 0.33590890605937374, "grad_norm": 0.4533947706222534, "learning_rate": 6.7181781211874755e-06, "loss": 0.4194, "step": 3304 }, { "epoch": 0.3360105734038227, "grad_norm": 0.4448186159133911, "learning_rate": 6.720211468076454e-06, "loss": 0.4409, "step": 3305 }, { "epoch": 0.3361122407482717, "grad_norm": 0.5054139494895935, "learning_rate": 6.722244814965434e-06, "loss": 0.4353, "step": 3306 }, { "epoch": 0.3362139080927206, "grad_norm": 0.4930519163608551, "learning_rate": 6.724278161854413e-06, "loss": 0.4073, "step": 3307 }, { "epoch": 0.33631557543716956, "grad_norm": 0.38372161984443665, "learning_rate": 6.726311508743392e-06, "loss": 0.4143, "step": 3308 }, { "epoch": 0.33641724278161855, "grad_norm": 0.43672752380371094, "learning_rate": 6.728344855632371e-06, "loss": 0.4321, "step": 3309 }, { "epoch": 0.3365189101260675, "grad_norm": 0.49500516057014465, "learning_rate": 6.730378202521351e-06, "loss": 0.422, "step": 3310 }, { "epoch": 0.3366205774705165, "grad_norm": 0.40971994400024414, "learning_rate": 6.73241154941033e-06, "loss": 0.4228, "step": 3311 }, { "epoch": 0.33672224481496543, "grad_norm": 0.4090944528579712, "learning_rate": 6.734444896299309e-06, "loss": 0.4546, "step": 3312 }, { "epoch": 0.3368239121594144, "grad_norm": 0.48321235179901123, "learning_rate": 6.736478243188288e-06, "loss": 0.4032, "step": 3313 }, { "epoch": 0.33692557950386337, "grad_norm": 0.4094434380531311, "learning_rate": 6.738511590077268e-06, "loss": 0.4088, "step": 3314 }, { "epoch": 0.3370272468483123, "grad_norm": 0.4130152463912964, "learning_rate": 6.7405449369662465e-06, "loss": 0.4168, "step": 3315 }, { "epoch": 0.3371289141927613, "grad_norm": 0.497341513633728, "learning_rate": 6.742578283855227e-06, "loss": 0.4516, "step": 3316 }, { "epoch": 0.33723058153721025, "grad_norm": 0.45858752727508545, "learning_rate": 6.7446116307442055e-06, "loss": 0.427, "step": 3317 }, { "epoch": 0.3373322488816592, "grad_norm": 0.4360431730747223, "learning_rate": 6.746644977633184e-06, "loss": 0.404, "step": 3318 }, { "epoch": 0.3374339162261082, "grad_norm": 0.4489501416683197, "learning_rate": 6.7486783245221645e-06, "loss": 0.4158, "step": 3319 }, { "epoch": 0.33753558357055713, "grad_norm": 0.4721360206604004, "learning_rate": 6.750711671411143e-06, "loss": 0.4306, "step": 3320 }, { "epoch": 0.3376372509150061, "grad_norm": 0.4607105851173401, "learning_rate": 6.7527450183001235e-06, "loss": 0.4257, "step": 3321 }, { "epoch": 0.33773891825945507, "grad_norm": 0.42281657457351685, "learning_rate": 6.754778365189102e-06, "loss": 0.4302, "step": 3322 }, { "epoch": 0.337840585603904, "grad_norm": 0.4436890482902527, "learning_rate": 6.756811712078081e-06, "loss": 0.421, "step": 3323 }, { "epoch": 0.337942252948353, "grad_norm": 0.4533417522907257, "learning_rate": 6.758845058967061e-06, "loss": 0.3935, "step": 3324 }, { "epoch": 0.33804392029280195, "grad_norm": 0.42335090041160583, "learning_rate": 6.76087840585604e-06, "loss": 0.4001, "step": 3325 }, { "epoch": 0.3381455876372509, "grad_norm": 0.4168330430984497, "learning_rate": 6.762911752745018e-06, "loss": 0.4254, "step": 3326 }, { "epoch": 0.3382472549816999, "grad_norm": 0.49888259172439575, "learning_rate": 6.764945099633998e-06, "loss": 0.4187, "step": 3327 }, { "epoch": 0.3383489223261488, "grad_norm": 0.41646477580070496, "learning_rate": 6.766978446522977e-06, "loss": 0.4371, "step": 3328 }, { "epoch": 0.3384505896705978, "grad_norm": 0.409584641456604, "learning_rate": 6.769011793411957e-06, "loss": 0.4045, "step": 3329 }, { "epoch": 0.33855225701504676, "grad_norm": 0.454332560300827, "learning_rate": 6.7710451403009355e-06, "loss": 0.4082, "step": 3330 }, { "epoch": 0.3386539243594957, "grad_norm": 0.5093256831169128, "learning_rate": 6.773078487189915e-06, "loss": 0.4348, "step": 3331 }, { "epoch": 0.3387555917039447, "grad_norm": 0.43443626165390015, "learning_rate": 6.7751118340788944e-06, "loss": 0.424, "step": 3332 }, { "epoch": 0.33885725904839364, "grad_norm": 0.44325533509254456, "learning_rate": 6.777145180967873e-06, "loss": 0.4151, "step": 3333 }, { "epoch": 0.33895892639284264, "grad_norm": 0.48867836594581604, "learning_rate": 6.779178527856853e-06, "loss": 0.3927, "step": 3334 }, { "epoch": 0.3390605937372916, "grad_norm": 0.4001937806606293, "learning_rate": 6.781211874745832e-06, "loss": 0.4271, "step": 3335 }, { "epoch": 0.3391622610817405, "grad_norm": 0.398786336183548, "learning_rate": 6.783245221634811e-06, "loss": 0.4407, "step": 3336 }, { "epoch": 0.3392639284261895, "grad_norm": 0.439819872379303, "learning_rate": 6.785278568523791e-06, "loss": 0.4402, "step": 3337 }, { "epoch": 0.33936559577063846, "grad_norm": 0.4506281018257141, "learning_rate": 6.78731191541277e-06, "loss": 0.418, "step": 3338 }, { "epoch": 0.33946726311508746, "grad_norm": 0.40827950835227966, "learning_rate": 6.78934526230175e-06, "loss": 0.4375, "step": 3339 }, { "epoch": 0.3395689304595364, "grad_norm": 0.39851951599121094, "learning_rate": 6.791378609190729e-06, "loss": 0.4104, "step": 3340 }, { "epoch": 0.33967059780398534, "grad_norm": 0.4207122027873993, "learning_rate": 6.793411956079707e-06, "loss": 0.4299, "step": 3341 }, { "epoch": 0.33977226514843434, "grad_norm": 0.46324679255485535, "learning_rate": 6.795445302968688e-06, "loss": 0.4097, "step": 3342 }, { "epoch": 0.3398739324928833, "grad_norm": 0.44224658608436584, "learning_rate": 6.797478649857666e-06, "loss": 0.4195, "step": 3343 }, { "epoch": 0.3399755998373323, "grad_norm": 0.43478140234947205, "learning_rate": 6.799511996746646e-06, "loss": 0.4196, "step": 3344 }, { "epoch": 0.3400772671817812, "grad_norm": 0.44732797145843506, "learning_rate": 6.801545343635625e-06, "loss": 0.4507, "step": 3345 }, { "epoch": 0.34017893452623016, "grad_norm": 0.40604257583618164, "learning_rate": 6.803578690524604e-06, "loss": 0.4089, "step": 3346 }, { "epoch": 0.34028060187067916, "grad_norm": 0.4312998056411743, "learning_rate": 6.805612037413583e-06, "loss": 0.4486, "step": 3347 }, { "epoch": 0.3403822692151281, "grad_norm": 0.42842528223991394, "learning_rate": 6.807645384302563e-06, "loss": 0.4481, "step": 3348 }, { "epoch": 0.34048393655957704, "grad_norm": 0.4652990400791168, "learning_rate": 6.8096787311915415e-06, "loss": 0.4228, "step": 3349 }, { "epoch": 0.34058560390402604, "grad_norm": 0.4446001946926117, "learning_rate": 6.811712078080521e-06, "loss": 0.4175, "step": 3350 }, { "epoch": 0.340687271248475, "grad_norm": 0.4361828565597534, "learning_rate": 6.8137454249695005e-06, "loss": 0.4465, "step": 3351 }, { "epoch": 0.340788938592924, "grad_norm": 0.4059670865535736, "learning_rate": 6.81577877185848e-06, "loss": 0.384, "step": 3352 }, { "epoch": 0.3408906059373729, "grad_norm": 0.4141475558280945, "learning_rate": 6.817812118747459e-06, "loss": 0.4032, "step": 3353 }, { "epoch": 0.34099227328182186, "grad_norm": 0.4006440341472626, "learning_rate": 6.819845465636438e-06, "loss": 0.3989, "step": 3354 }, { "epoch": 0.34109394062627085, "grad_norm": 0.45166242122650146, "learning_rate": 6.821878812525418e-06, "loss": 0.4042, "step": 3355 }, { "epoch": 0.3411956079707198, "grad_norm": 0.4147440493106842, "learning_rate": 6.823912159414396e-06, "loss": 0.4495, "step": 3356 }, { "epoch": 0.3412972753151688, "grad_norm": 0.432729572057724, "learning_rate": 6.8259455063033766e-06, "loss": 0.4318, "step": 3357 }, { "epoch": 0.34139894265961773, "grad_norm": 0.42510050535202026, "learning_rate": 6.827978853192355e-06, "loss": 0.4153, "step": 3358 }, { "epoch": 0.3415006100040667, "grad_norm": 0.4031899869441986, "learning_rate": 6.830012200081334e-06, "loss": 0.4226, "step": 3359 }, { "epoch": 0.34160227734851567, "grad_norm": 0.43591487407684326, "learning_rate": 6.832045546970314e-06, "loss": 0.4056, "step": 3360 }, { "epoch": 0.3417039446929646, "grad_norm": 0.4372031092643738, "learning_rate": 6.834078893859293e-06, "loss": 0.444, "step": 3361 }, { "epoch": 0.3418056120374136, "grad_norm": 0.4238738715648651, "learning_rate": 6.836112240748273e-06, "loss": 0.4105, "step": 3362 }, { "epoch": 0.34190727938186255, "grad_norm": 0.3956810235977173, "learning_rate": 6.838145587637252e-06, "loss": 0.4312, "step": 3363 }, { "epoch": 0.3420089467263115, "grad_norm": 0.4333479702472687, "learning_rate": 6.8401789345262304e-06, "loss": 0.4457, "step": 3364 }, { "epoch": 0.3421106140707605, "grad_norm": 0.40958765149116516, "learning_rate": 6.84221228141521e-06, "loss": 0.4112, "step": 3365 }, { "epoch": 0.34221228141520943, "grad_norm": 0.3912850022315979, "learning_rate": 6.844245628304189e-06, "loss": 0.4214, "step": 3366 }, { "epoch": 0.34231394875965837, "grad_norm": 0.3900681138038635, "learning_rate": 6.846278975193168e-06, "loss": 0.3909, "step": 3367 }, { "epoch": 0.34241561610410737, "grad_norm": 0.44451576471328735, "learning_rate": 6.8483123220821475e-06, "loss": 0.4245, "step": 3368 }, { "epoch": 0.3425172834485563, "grad_norm": 0.41716739535331726, "learning_rate": 6.850345668971127e-06, "loss": 0.4528, "step": 3369 }, { "epoch": 0.3426189507930053, "grad_norm": 0.49438712000846863, "learning_rate": 6.8523790158601065e-06, "loss": 0.4166, "step": 3370 }, { "epoch": 0.34272061813745425, "grad_norm": 0.42830055952072144, "learning_rate": 6.854412362749085e-06, "loss": 0.4095, "step": 3371 }, { "epoch": 0.3428222854819032, "grad_norm": 0.46499869227409363, "learning_rate": 6.856445709638065e-06, "loss": 0.4432, "step": 3372 }, { "epoch": 0.3429239528263522, "grad_norm": 0.4932594299316406, "learning_rate": 6.858479056527044e-06, "loss": 0.4529, "step": 3373 }, { "epoch": 0.3430256201708011, "grad_norm": 0.41770312190055847, "learning_rate": 6.860512403416023e-06, "loss": 0.456, "step": 3374 }, { "epoch": 0.3431272875152501, "grad_norm": 0.45248332619667053, "learning_rate": 6.862545750305003e-06, "loss": 0.3977, "step": 3375 }, { "epoch": 0.34322895485969906, "grad_norm": 0.4512592554092407, "learning_rate": 6.864579097193982e-06, "loss": 0.4229, "step": 3376 }, { "epoch": 0.343330622204148, "grad_norm": 0.37241536378860474, "learning_rate": 6.86661244408296e-06, "loss": 0.4319, "step": 3377 }, { "epoch": 0.343432289548597, "grad_norm": 0.38947543501853943, "learning_rate": 6.868645790971941e-06, "loss": 0.425, "step": 3378 }, { "epoch": 0.34353395689304594, "grad_norm": 0.4197116196155548, "learning_rate": 6.870679137860919e-06, "loss": 0.3978, "step": 3379 }, { "epoch": 0.34363562423749494, "grad_norm": 0.3787309229373932, "learning_rate": 6.8727124847499e-06, "loss": 0.4114, "step": 3380 }, { "epoch": 0.3437372915819439, "grad_norm": 0.43026623129844666, "learning_rate": 6.874745831638878e-06, "loss": 0.3991, "step": 3381 }, { "epoch": 0.3438389589263928, "grad_norm": 0.48303043842315674, "learning_rate": 6.876779178527857e-06, "loss": 0.4246, "step": 3382 }, { "epoch": 0.3439406262708418, "grad_norm": 0.4170559048652649, "learning_rate": 6.878812525416837e-06, "loss": 0.4251, "step": 3383 }, { "epoch": 0.34404229361529076, "grad_norm": 0.43172281980514526, "learning_rate": 6.880845872305816e-06, "loss": 0.4202, "step": 3384 }, { "epoch": 0.34414396095973976, "grad_norm": 0.4755009710788727, "learning_rate": 6.8828792191947954e-06, "loss": 0.4335, "step": 3385 }, { "epoch": 0.3442456283041887, "grad_norm": 0.4211137890815735, "learning_rate": 6.884912566083775e-06, "loss": 0.4393, "step": 3386 }, { "epoch": 0.34434729564863764, "grad_norm": 0.42873653769493103, "learning_rate": 6.8869459129727536e-06, "loss": 0.419, "step": 3387 }, { "epoch": 0.34444896299308664, "grad_norm": 0.43254417181015015, "learning_rate": 6.888979259861733e-06, "loss": 0.4291, "step": 3388 }, { "epoch": 0.3445506303375356, "grad_norm": 0.4146343171596527, "learning_rate": 6.8910126067507125e-06, "loss": 0.4314, "step": 3389 }, { "epoch": 0.3446522976819845, "grad_norm": 0.4362363815307617, "learning_rate": 6.893045953639691e-06, "loss": 0.411, "step": 3390 }, { "epoch": 0.3447539650264335, "grad_norm": 0.4149000644683838, "learning_rate": 6.895079300528671e-06, "loss": 0.4175, "step": 3391 }, { "epoch": 0.34485563237088246, "grad_norm": 0.41795772314071655, "learning_rate": 6.89711264741765e-06, "loss": 0.4202, "step": 3392 }, { "epoch": 0.34495729971533146, "grad_norm": 0.4942264258861542, "learning_rate": 6.89914599430663e-06, "loss": 0.4592, "step": 3393 }, { "epoch": 0.3450589670597804, "grad_norm": 0.5006908774375916, "learning_rate": 6.901179341195608e-06, "loss": 0.4146, "step": 3394 }, { "epoch": 0.34516063440422934, "grad_norm": 0.4292660057544708, "learning_rate": 6.903212688084587e-06, "loss": 0.4172, "step": 3395 }, { "epoch": 0.34526230174867834, "grad_norm": 0.4681025445461273, "learning_rate": 6.905246034973567e-06, "loss": 0.4097, "step": 3396 }, { "epoch": 0.3453639690931273, "grad_norm": 0.5013593435287476, "learning_rate": 6.907279381862546e-06, "loss": 0.444, "step": 3397 }, { "epoch": 0.3454656364375763, "grad_norm": 0.46783751249313354, "learning_rate": 6.909312728751526e-06, "loss": 0.3958, "step": 3398 }, { "epoch": 0.3455673037820252, "grad_norm": 0.49713367223739624, "learning_rate": 6.911346075640505e-06, "loss": 0.425, "step": 3399 }, { "epoch": 0.34566897112647416, "grad_norm": 0.4653433859348297, "learning_rate": 6.9133794225294835e-06, "loss": 0.4542, "step": 3400 }, { "epoch": 0.34577063847092315, "grad_norm": 0.5179040431976318, "learning_rate": 6.915412769418464e-06, "loss": 0.4429, "step": 3401 }, { "epoch": 0.3458723058153721, "grad_norm": 0.5403037071228027, "learning_rate": 6.9174461163074425e-06, "loss": 0.4132, "step": 3402 }, { "epoch": 0.3459739731598211, "grad_norm": 0.43470633029937744, "learning_rate": 6.919479463196422e-06, "loss": 0.3873, "step": 3403 }, { "epoch": 0.34607564050427003, "grad_norm": 0.5419204831123352, "learning_rate": 6.9215128100854015e-06, "loss": 0.4108, "step": 3404 }, { "epoch": 0.346177307848719, "grad_norm": 0.4985010325908661, "learning_rate": 6.92354615697438e-06, "loss": 0.4237, "step": 3405 }, { "epoch": 0.34627897519316797, "grad_norm": 0.42887020111083984, "learning_rate": 6.92557950386336e-06, "loss": 0.4037, "step": 3406 }, { "epoch": 0.3463806425376169, "grad_norm": 0.5249342322349548, "learning_rate": 6.927612850752339e-06, "loss": 0.4405, "step": 3407 }, { "epoch": 0.34648230988206585, "grad_norm": 0.48459336161613464, "learning_rate": 6.929646197641318e-06, "loss": 0.4183, "step": 3408 }, { "epoch": 0.34658397722651485, "grad_norm": 0.4141990542411804, "learning_rate": 6.931679544530297e-06, "loss": 0.4256, "step": 3409 }, { "epoch": 0.3466856445709638, "grad_norm": 0.49123120307922363, "learning_rate": 6.933712891419277e-06, "loss": 0.4212, "step": 3410 }, { "epoch": 0.3467873119154128, "grad_norm": 0.43230336904525757, "learning_rate": 6.935746238308256e-06, "loss": 0.3889, "step": 3411 }, { "epoch": 0.34688897925986173, "grad_norm": 0.46542271971702576, "learning_rate": 6.937779585197235e-06, "loss": 0.3917, "step": 3412 }, { "epoch": 0.34699064660431067, "grad_norm": 0.4787951409816742, "learning_rate": 6.939812932086214e-06, "loss": 0.4229, "step": 3413 }, { "epoch": 0.34709231394875967, "grad_norm": 0.4662987291812897, "learning_rate": 6.941846278975194e-06, "loss": 0.4237, "step": 3414 }, { "epoch": 0.3471939812932086, "grad_norm": 0.4515829384326935, "learning_rate": 6.9438796258641725e-06, "loss": 0.4178, "step": 3415 }, { "epoch": 0.3472956486376576, "grad_norm": 0.3976280689239502, "learning_rate": 6.945912972753153e-06, "loss": 0.4065, "step": 3416 }, { "epoch": 0.34739731598210655, "grad_norm": 0.40972959995269775, "learning_rate": 6.9479463196421314e-06, "loss": 0.4074, "step": 3417 }, { "epoch": 0.3474989833265555, "grad_norm": 0.5045133233070374, "learning_rate": 6.94997966653111e-06, "loss": 0.4574, "step": 3418 }, { "epoch": 0.3476006506710045, "grad_norm": 0.4653635323047638, "learning_rate": 6.95201301342009e-06, "loss": 0.4202, "step": 3419 }, { "epoch": 0.3477023180154534, "grad_norm": 0.4016011953353882, "learning_rate": 6.954046360309069e-06, "loss": 0.4297, "step": 3420 }, { "epoch": 0.3478039853599024, "grad_norm": 0.4539982080459595, "learning_rate": 6.956079707198049e-06, "loss": 0.4264, "step": 3421 }, { "epoch": 0.34790565270435136, "grad_norm": 0.4097331166267395, "learning_rate": 6.958113054087028e-06, "loss": 0.3905, "step": 3422 }, { "epoch": 0.3480073200488003, "grad_norm": 0.5211836695671082, "learning_rate": 6.960146400976007e-06, "loss": 0.4304, "step": 3423 }, { "epoch": 0.3481089873932493, "grad_norm": 0.38161298632621765, "learning_rate": 6.962179747864987e-06, "loss": 0.4125, "step": 3424 }, { "epoch": 0.34821065473769824, "grad_norm": 0.4459885060787201, "learning_rate": 6.964213094753966e-06, "loss": 0.4071, "step": 3425 }, { "epoch": 0.34831232208214724, "grad_norm": 0.4135267436504364, "learning_rate": 6.966246441642945e-06, "loss": 0.4335, "step": 3426 }, { "epoch": 0.3484139894265962, "grad_norm": 0.3872796893119812, "learning_rate": 6.968279788531925e-06, "loss": 0.4445, "step": 3427 }, { "epoch": 0.3485156567710451, "grad_norm": 0.40926292538642883, "learning_rate": 6.970313135420903e-06, "loss": 0.4562, "step": 3428 }, { "epoch": 0.3486173241154941, "grad_norm": 0.41446179151535034, "learning_rate": 6.972346482309883e-06, "loss": 0.4245, "step": 3429 }, { "epoch": 0.34871899145994306, "grad_norm": 0.42501556873321533, "learning_rate": 6.974379829198861e-06, "loss": 0.4019, "step": 3430 }, { "epoch": 0.348820658804392, "grad_norm": 0.4523670971393585, "learning_rate": 6.976413176087841e-06, "loss": 0.4469, "step": 3431 }, { "epoch": 0.348922326148841, "grad_norm": 0.42647784948349, "learning_rate": 6.97844652297682e-06, "loss": 0.4436, "step": 3432 }, { "epoch": 0.34902399349328994, "grad_norm": 0.4191354811191559, "learning_rate": 6.980479869865799e-06, "loss": 0.4066, "step": 3433 }, { "epoch": 0.34912566083773894, "grad_norm": 0.4343677759170532, "learning_rate": 6.982513216754779e-06, "loss": 0.4448, "step": 3434 }, { "epoch": 0.3492273281821879, "grad_norm": 0.4398675262928009, "learning_rate": 6.984546563643758e-06, "loss": 0.4419, "step": 3435 }, { "epoch": 0.3493289955266368, "grad_norm": 0.38736051321029663, "learning_rate": 6.986579910532737e-06, "loss": 0.4195, "step": 3436 }, { "epoch": 0.3494306628710858, "grad_norm": 0.437296062707901, "learning_rate": 6.988613257421717e-06, "loss": 0.3984, "step": 3437 }, { "epoch": 0.34953233021553476, "grad_norm": 0.37818536162376404, "learning_rate": 6.990646604310696e-06, "loss": 0.4109, "step": 3438 }, { "epoch": 0.34963399755998376, "grad_norm": 0.43558982014656067, "learning_rate": 6.992679951199676e-06, "loss": 0.4369, "step": 3439 }, { "epoch": 0.3497356649044327, "grad_norm": 0.4026961326599121, "learning_rate": 6.994713298088655e-06, "loss": 0.4219, "step": 3440 }, { "epoch": 0.34983733224888164, "grad_norm": 0.4257175922393799, "learning_rate": 6.996746644977633e-06, "loss": 0.4514, "step": 3441 }, { "epoch": 0.34993899959333064, "grad_norm": 0.40780237317085266, "learning_rate": 6.9987799918666136e-06, "loss": 0.433, "step": 3442 }, { "epoch": 0.3500406669377796, "grad_norm": 0.4262479543685913, "learning_rate": 7.000813338755592e-06, "loss": 0.4262, "step": 3443 }, { "epoch": 0.3501423342822286, "grad_norm": 0.37250077724456787, "learning_rate": 7.002846685644572e-06, "loss": 0.4224, "step": 3444 }, { "epoch": 0.3502440016266775, "grad_norm": 0.40428295731544495, "learning_rate": 7.004880032533551e-06, "loss": 0.417, "step": 3445 }, { "epoch": 0.35034566897112646, "grad_norm": 0.4768735468387604, "learning_rate": 7.00691337942253e-06, "loss": 0.4256, "step": 3446 }, { "epoch": 0.35044733631557545, "grad_norm": 0.4096571207046509, "learning_rate": 7.008946726311509e-06, "loss": 0.4042, "step": 3447 }, { "epoch": 0.3505490036600244, "grad_norm": 0.40980473160743713, "learning_rate": 7.010980073200489e-06, "loss": 0.4241, "step": 3448 }, { "epoch": 0.35065067100447334, "grad_norm": 0.41313493251800537, "learning_rate": 7.0130134200894674e-06, "loss": 0.4065, "step": 3449 }, { "epoch": 0.35075233834892233, "grad_norm": 0.43094804883003235, "learning_rate": 7.015046766978447e-06, "loss": 0.3951, "step": 3450 }, { "epoch": 0.3508540056933713, "grad_norm": 0.5097748637199402, "learning_rate": 7.017080113867426e-06, "loss": 0.4568, "step": 3451 }, { "epoch": 0.35095567303782027, "grad_norm": 0.40665408968925476, "learning_rate": 7.019113460756406e-06, "loss": 0.4114, "step": 3452 }, { "epoch": 0.3510573403822692, "grad_norm": 0.47390666604042053, "learning_rate": 7.0211468076453845e-06, "loss": 0.4307, "step": 3453 }, { "epoch": 0.35115900772671815, "grad_norm": 0.43430420756340027, "learning_rate": 7.023180154534364e-06, "loss": 0.4132, "step": 3454 }, { "epoch": 0.35126067507116715, "grad_norm": 0.41588732600212097, "learning_rate": 7.0252135014233435e-06, "loss": 0.4417, "step": 3455 }, { "epoch": 0.3513623424156161, "grad_norm": 0.43323203921318054, "learning_rate": 7.027246848312322e-06, "loss": 0.4301, "step": 3456 }, { "epoch": 0.3514640097600651, "grad_norm": 0.3921782970428467, "learning_rate": 7.0292801952013025e-06, "loss": 0.4222, "step": 3457 }, { "epoch": 0.35156567710451403, "grad_norm": 0.42210549116134644, "learning_rate": 7.031313542090281e-06, "loss": 0.4465, "step": 3458 }, { "epoch": 0.35166734444896297, "grad_norm": 0.4066821336746216, "learning_rate": 7.03334688897926e-06, "loss": 0.4059, "step": 3459 }, { "epoch": 0.35176901179341197, "grad_norm": 0.40162762999534607, "learning_rate": 7.03538023586824e-06, "loss": 0.4236, "step": 3460 }, { "epoch": 0.3518706791378609, "grad_norm": 0.42866089940071106, "learning_rate": 7.037413582757219e-06, "loss": 0.4331, "step": 3461 }, { "epoch": 0.3519723464823099, "grad_norm": 0.39132222533226013, "learning_rate": 7.039446929646199e-06, "loss": 0.4285, "step": 3462 }, { "epoch": 0.35207401382675885, "grad_norm": 0.457267701625824, "learning_rate": 7.041480276535178e-06, "loss": 0.4052, "step": 3463 }, { "epoch": 0.3521756811712078, "grad_norm": 0.43325117230415344, "learning_rate": 7.043513623424156e-06, "loss": 0.4267, "step": 3464 }, { "epoch": 0.3522773485156568, "grad_norm": 0.4114523231983185, "learning_rate": 7.045546970313137e-06, "loss": 0.4243, "step": 3465 }, { "epoch": 0.3523790158601057, "grad_norm": 0.4281534254550934, "learning_rate": 7.047580317202115e-06, "loss": 0.445, "step": 3466 }, { "epoch": 0.3524806832045547, "grad_norm": 0.41533732414245605, "learning_rate": 7.049613664091095e-06, "loss": 0.4333, "step": 3467 }, { "epoch": 0.35258235054900366, "grad_norm": 0.4568343460559845, "learning_rate": 7.0516470109800735e-06, "loss": 0.4161, "step": 3468 }, { "epoch": 0.3526840178934526, "grad_norm": 0.48414504528045654, "learning_rate": 7.053680357869053e-06, "loss": 0.424, "step": 3469 }, { "epoch": 0.3527856852379016, "grad_norm": 0.39740878343582153, "learning_rate": 7.0557137047580324e-06, "loss": 0.4494, "step": 3470 }, { "epoch": 0.35288735258235054, "grad_norm": 0.46263861656188965, "learning_rate": 7.057747051647011e-06, "loss": 0.427, "step": 3471 }, { "epoch": 0.3529890199267995, "grad_norm": 0.5105704069137573, "learning_rate": 7.0597803985359906e-06, "loss": 0.3953, "step": 3472 }, { "epoch": 0.3530906872712485, "grad_norm": 0.40759363770484924, "learning_rate": 7.06181374542497e-06, "loss": 0.3932, "step": 3473 }, { "epoch": 0.3531923546156974, "grad_norm": 0.4756262004375458, "learning_rate": 7.063847092313949e-06, "loss": 0.3897, "step": 3474 }, { "epoch": 0.3532940219601464, "grad_norm": 0.5780848860740662, "learning_rate": 7.065880439202929e-06, "loss": 0.4147, "step": 3475 }, { "epoch": 0.35339568930459536, "grad_norm": 0.4521206319332123, "learning_rate": 7.067913786091908e-06, "loss": 0.4375, "step": 3476 }, { "epoch": 0.3534973566490443, "grad_norm": 0.4505571722984314, "learning_rate": 7.069947132980886e-06, "loss": 0.4111, "step": 3477 }, { "epoch": 0.3535990239934933, "grad_norm": 0.44770368933677673, "learning_rate": 7.071980479869867e-06, "loss": 0.3695, "step": 3478 }, { "epoch": 0.35370069133794224, "grad_norm": 0.5083760619163513, "learning_rate": 7.074013826758845e-06, "loss": 0.4633, "step": 3479 }, { "epoch": 0.35380235868239124, "grad_norm": 0.43417736887931824, "learning_rate": 7.076047173647826e-06, "loss": 0.4355, "step": 3480 }, { "epoch": 0.3539040260268402, "grad_norm": 0.42055806517601013, "learning_rate": 7.078080520536804e-06, "loss": 0.4311, "step": 3481 }, { "epoch": 0.3540056933712891, "grad_norm": 0.4959772825241089, "learning_rate": 7.080113867425783e-06, "loss": 0.4354, "step": 3482 }, { "epoch": 0.3541073607157381, "grad_norm": 0.451463907957077, "learning_rate": 7.082147214314763e-06, "loss": 0.416, "step": 3483 }, { "epoch": 0.35420902806018706, "grad_norm": 0.4526614844799042, "learning_rate": 7.084180561203742e-06, "loss": 0.3861, "step": 3484 }, { "epoch": 0.35431069540463606, "grad_norm": 0.48761099576950073, "learning_rate": 7.086213908092721e-06, "loss": 0.4459, "step": 3485 }, { "epoch": 0.354412362749085, "grad_norm": 0.4146411120891571, "learning_rate": 7.088247254981701e-06, "loss": 0.4125, "step": 3486 }, { "epoch": 0.35451403009353394, "grad_norm": 0.5161066055297852, "learning_rate": 7.0902806018706795e-06, "loss": 0.4336, "step": 3487 }, { "epoch": 0.35461569743798294, "grad_norm": 0.4332158863544464, "learning_rate": 7.092313948759659e-06, "loss": 0.3882, "step": 3488 }, { "epoch": 0.3547173647824319, "grad_norm": 0.43678346276283264, "learning_rate": 7.0943472956486385e-06, "loss": 0.4132, "step": 3489 }, { "epoch": 0.3548190321268809, "grad_norm": 0.5294787287712097, "learning_rate": 7.096380642537618e-06, "loss": 0.4199, "step": 3490 }, { "epoch": 0.3549206994713298, "grad_norm": 0.4639604091644287, "learning_rate": 7.098413989426597e-06, "loss": 0.4344, "step": 3491 }, { "epoch": 0.35502236681577876, "grad_norm": 0.435712605714798, "learning_rate": 7.100447336315576e-06, "loss": 0.4375, "step": 3492 }, { "epoch": 0.35512403416022775, "grad_norm": 0.5634521842002869, "learning_rate": 7.102480683204556e-06, "loss": 0.4176, "step": 3493 }, { "epoch": 0.3552257015046767, "grad_norm": 0.4516550898551941, "learning_rate": 7.104514030093534e-06, "loss": 0.4176, "step": 3494 }, { "epoch": 0.35532736884912564, "grad_norm": 0.4442336857318878, "learning_rate": 7.106547376982513e-06, "loss": 0.4371, "step": 3495 }, { "epoch": 0.35542903619357463, "grad_norm": 0.4683355391025543, "learning_rate": 7.108580723871493e-06, "loss": 0.4432, "step": 3496 }, { "epoch": 0.3555307035380236, "grad_norm": 0.45415860414505005, "learning_rate": 7.110614070760472e-06, "loss": 0.4291, "step": 3497 }, { "epoch": 0.35563237088247257, "grad_norm": 0.48837533593177795, "learning_rate": 7.112647417649452e-06, "loss": 0.4304, "step": 3498 }, { "epoch": 0.3557340382269215, "grad_norm": 0.4740116000175476, "learning_rate": 7.114680764538431e-06, "loss": 0.4318, "step": 3499 }, { "epoch": 0.35583570557137045, "grad_norm": 0.4881664216518402, "learning_rate": 7.1167141114274095e-06, "loss": 0.4266, "step": 3500 }, { "epoch": 0.35593737291581945, "grad_norm": 0.48715922236442566, "learning_rate": 7.11874745831639e-06, "loss": 0.4147, "step": 3501 }, { "epoch": 0.3560390402602684, "grad_norm": 0.5268921256065369, "learning_rate": 7.1207808052053684e-06, "loss": 0.4043, "step": 3502 }, { "epoch": 0.3561407076047174, "grad_norm": 0.5215180516242981, "learning_rate": 7.122814152094348e-06, "loss": 0.43, "step": 3503 }, { "epoch": 0.35624237494916633, "grad_norm": 0.44578295946121216, "learning_rate": 7.124847498983327e-06, "loss": 0.447, "step": 3504 }, { "epoch": 0.35634404229361527, "grad_norm": 0.5152943730354309, "learning_rate": 7.126880845872306e-06, "loss": 0.3908, "step": 3505 }, { "epoch": 0.35644570963806427, "grad_norm": 0.48789724707603455, "learning_rate": 7.1289141927612855e-06, "loss": 0.4262, "step": 3506 }, { "epoch": 0.3565473769825132, "grad_norm": 0.4370717704296112, "learning_rate": 7.130947539650265e-06, "loss": 0.4286, "step": 3507 }, { "epoch": 0.3566490443269622, "grad_norm": 0.44329512119293213, "learning_rate": 7.1329808865392445e-06, "loss": 0.3916, "step": 3508 }, { "epoch": 0.35675071167141115, "grad_norm": 0.5065397620201111, "learning_rate": 7.135014233428223e-06, "loss": 0.4022, "step": 3509 }, { "epoch": 0.3568523790158601, "grad_norm": 0.42176181077957153, "learning_rate": 7.137047580317203e-06, "loss": 0.3921, "step": 3510 }, { "epoch": 0.3569540463603091, "grad_norm": 0.4619876444339752, "learning_rate": 7.139080927206182e-06, "loss": 0.3851, "step": 3511 }, { "epoch": 0.357055713704758, "grad_norm": 0.4979182481765747, "learning_rate": 7.141114274095161e-06, "loss": 0.3995, "step": 3512 }, { "epoch": 0.35715738104920697, "grad_norm": 0.5086048245429993, "learning_rate": 7.14314762098414e-06, "loss": 0.4415, "step": 3513 }, { "epoch": 0.35725904839365596, "grad_norm": 0.4464053809642792, "learning_rate": 7.14518096787312e-06, "loss": 0.4046, "step": 3514 }, { "epoch": 0.3573607157381049, "grad_norm": 0.4617592990398407, "learning_rate": 7.147214314762098e-06, "loss": 0.4289, "step": 3515 }, { "epoch": 0.3574623830825539, "grad_norm": 0.4838792681694031, "learning_rate": 7.149247661651079e-06, "loss": 0.4124, "step": 3516 }, { "epoch": 0.35756405042700284, "grad_norm": 0.44142183661460876, "learning_rate": 7.151281008540057e-06, "loss": 0.4372, "step": 3517 }, { "epoch": 0.3576657177714518, "grad_norm": 0.4480069577693939, "learning_rate": 7.153314355429036e-06, "loss": 0.4264, "step": 3518 }, { "epoch": 0.3577673851159008, "grad_norm": 0.45390352606773376, "learning_rate": 7.155347702318016e-06, "loss": 0.4209, "step": 3519 }, { "epoch": 0.3578690524603497, "grad_norm": 0.47037631273269653, "learning_rate": 7.157381049206995e-06, "loss": 0.4214, "step": 3520 }, { "epoch": 0.3579707198047987, "grad_norm": 0.40953999757766724, "learning_rate": 7.159414396095975e-06, "loss": 0.3926, "step": 3521 }, { "epoch": 0.35807238714924766, "grad_norm": 0.38073408603668213, "learning_rate": 7.161447742984954e-06, "loss": 0.4069, "step": 3522 }, { "epoch": 0.3581740544936966, "grad_norm": 0.39583489298820496, "learning_rate": 7.163481089873933e-06, "loss": 0.4072, "step": 3523 }, { "epoch": 0.3582757218381456, "grad_norm": 0.3940235674381256, "learning_rate": 7.165514436762913e-06, "loss": 0.4133, "step": 3524 }, { "epoch": 0.35837738918259454, "grad_norm": 0.4168816804885864, "learning_rate": 7.167547783651892e-06, "loss": 0.4188, "step": 3525 }, { "epoch": 0.35847905652704354, "grad_norm": 0.42653611302375793, "learning_rate": 7.169581130540871e-06, "loss": 0.4112, "step": 3526 }, { "epoch": 0.3585807238714925, "grad_norm": 0.4296618103981018, "learning_rate": 7.1716144774298506e-06, "loss": 0.4255, "step": 3527 }, { "epoch": 0.3586823912159414, "grad_norm": 0.4417455196380615, "learning_rate": 7.173647824318829e-06, "loss": 0.4428, "step": 3528 }, { "epoch": 0.3587840585603904, "grad_norm": 0.3713923692703247, "learning_rate": 7.175681171207809e-06, "loss": 0.3981, "step": 3529 }, { "epoch": 0.35888572590483936, "grad_norm": 0.41123414039611816, "learning_rate": 7.177714518096788e-06, "loss": 0.4046, "step": 3530 }, { "epoch": 0.35898739324928836, "grad_norm": 0.4821734130382538, "learning_rate": 7.179747864985768e-06, "loss": 0.4145, "step": 3531 }, { "epoch": 0.3590890605937373, "grad_norm": 0.4414313733577728, "learning_rate": 7.181781211874746e-06, "loss": 0.4355, "step": 3532 }, { "epoch": 0.35919072793818624, "grad_norm": 0.4009377360343933, "learning_rate": 7.183814558763725e-06, "loss": 0.3837, "step": 3533 }, { "epoch": 0.35929239528263524, "grad_norm": 0.44238466024398804, "learning_rate": 7.185847905652705e-06, "loss": 0.4059, "step": 3534 }, { "epoch": 0.3593940626270842, "grad_norm": 0.44464921951293945, "learning_rate": 7.187881252541684e-06, "loss": 0.4204, "step": 3535 }, { "epoch": 0.3594957299715331, "grad_norm": 0.46000033617019653, "learning_rate": 7.1899145994306626e-06, "loss": 0.4021, "step": 3536 }, { "epoch": 0.3595973973159821, "grad_norm": 0.39763325452804565, "learning_rate": 7.191947946319643e-06, "loss": 0.3842, "step": 3537 }, { "epoch": 0.35969906466043106, "grad_norm": 0.42775338888168335, "learning_rate": 7.1939812932086215e-06, "loss": 0.4381, "step": 3538 }, { "epoch": 0.35980073200488005, "grad_norm": 0.41692498326301575, "learning_rate": 7.196014640097602e-06, "loss": 0.4054, "step": 3539 }, { "epoch": 0.359902399349329, "grad_norm": 0.46292421221733093, "learning_rate": 7.1980479869865805e-06, "loss": 0.415, "step": 3540 }, { "epoch": 0.36000406669377794, "grad_norm": 0.42128437757492065, "learning_rate": 7.200081333875559e-06, "loss": 0.427, "step": 3541 }, { "epoch": 0.36010573403822693, "grad_norm": 0.3928142488002777, "learning_rate": 7.2021146807645395e-06, "loss": 0.4095, "step": 3542 }, { "epoch": 0.3602074013826759, "grad_norm": 0.408286452293396, "learning_rate": 7.204148027653518e-06, "loss": 0.3968, "step": 3543 }, { "epoch": 0.36030906872712487, "grad_norm": 0.4156087040901184, "learning_rate": 7.206181374542498e-06, "loss": 0.4184, "step": 3544 }, { "epoch": 0.3604107360715738, "grad_norm": 0.40819695591926575, "learning_rate": 7.208214721431477e-06, "loss": 0.409, "step": 3545 }, { "epoch": 0.36051240341602275, "grad_norm": 0.39977511763572693, "learning_rate": 7.210248068320456e-06, "loss": 0.4393, "step": 3546 }, { "epoch": 0.36061407076047175, "grad_norm": 0.3815235197544098, "learning_rate": 7.212281415209435e-06, "loss": 0.3747, "step": 3547 }, { "epoch": 0.3607157381049207, "grad_norm": 0.4219866394996643, "learning_rate": 7.214314762098415e-06, "loss": 0.4465, "step": 3548 }, { "epoch": 0.3608174054493697, "grad_norm": 0.39774414896965027, "learning_rate": 7.216348108987394e-06, "loss": 0.4208, "step": 3549 }, { "epoch": 0.36091907279381863, "grad_norm": 0.4153074026107788, "learning_rate": 7.218381455876373e-06, "loss": 0.4345, "step": 3550 }, { "epoch": 0.36102074013826757, "grad_norm": 0.4503909647464752, "learning_rate": 7.220414802765352e-06, "loss": 0.3915, "step": 3551 }, { "epoch": 0.36112240748271657, "grad_norm": 0.40898585319519043, "learning_rate": 7.222448149654332e-06, "loss": 0.3915, "step": 3552 }, { "epoch": 0.3612240748271655, "grad_norm": 0.4326813220977783, "learning_rate": 7.2244814965433105e-06, "loss": 0.4441, "step": 3553 }, { "epoch": 0.36132574217161445, "grad_norm": 0.4547937214374542, "learning_rate": 7.22651484343229e-06, "loss": 0.426, "step": 3554 }, { "epoch": 0.36142740951606345, "grad_norm": 0.37488579750061035, "learning_rate": 7.2285481903212695e-06, "loss": 0.4208, "step": 3555 }, { "epoch": 0.3615290768605124, "grad_norm": 0.4571703374385834, "learning_rate": 7.230581537210248e-06, "loss": 0.4093, "step": 3556 }, { "epoch": 0.3616307442049614, "grad_norm": 0.45197734236717224, "learning_rate": 7.2326148840992284e-06, "loss": 0.438, "step": 3557 }, { "epoch": 0.3617324115494103, "grad_norm": 0.38274258375167847, "learning_rate": 7.234648230988207e-06, "loss": 0.3964, "step": 3558 }, { "epoch": 0.36183407889385927, "grad_norm": 0.4325817823410034, "learning_rate": 7.236681577877186e-06, "loss": 0.3914, "step": 3559 }, { "epoch": 0.36193574623830826, "grad_norm": 0.48818469047546387, "learning_rate": 7.238714924766166e-06, "loss": 0.3983, "step": 3560 }, { "epoch": 0.3620374135827572, "grad_norm": 0.3892441987991333, "learning_rate": 7.240748271655145e-06, "loss": 0.3927, "step": 3561 }, { "epoch": 0.3621390809272062, "grad_norm": 0.4689837396144867, "learning_rate": 7.242781618544125e-06, "loss": 0.4352, "step": 3562 }, { "epoch": 0.36224074827165514, "grad_norm": 0.4042886793613434, "learning_rate": 7.244814965433104e-06, "loss": 0.4281, "step": 3563 }, { "epoch": 0.3623424156161041, "grad_norm": 0.43496865034103394, "learning_rate": 7.246848312322082e-06, "loss": 0.4243, "step": 3564 }, { "epoch": 0.3624440829605531, "grad_norm": 0.3880364000797272, "learning_rate": 7.248881659211063e-06, "loss": 0.3781, "step": 3565 }, { "epoch": 0.362545750305002, "grad_norm": 0.41520121693611145, "learning_rate": 7.250915006100041e-06, "loss": 0.3747, "step": 3566 }, { "epoch": 0.362647417649451, "grad_norm": 0.4284118413925171, "learning_rate": 7.252948352989021e-06, "loss": 0.3937, "step": 3567 }, { "epoch": 0.36274908499389996, "grad_norm": 0.386325865983963, "learning_rate": 7.254981699877999e-06, "loss": 0.386, "step": 3568 }, { "epoch": 0.3628507523383489, "grad_norm": 0.398837149143219, "learning_rate": 7.257015046766979e-06, "loss": 0.3993, "step": 3569 }, { "epoch": 0.3629524196827979, "grad_norm": 0.44710612297058105, "learning_rate": 7.259048393655958e-06, "loss": 0.409, "step": 3570 }, { "epoch": 0.36305408702724684, "grad_norm": 0.4056098759174347, "learning_rate": 7.261081740544937e-06, "loss": 0.4442, "step": 3571 }, { "epoch": 0.36315575437169584, "grad_norm": 0.4229477047920227, "learning_rate": 7.263115087433917e-06, "loss": 0.4221, "step": 3572 }, { "epoch": 0.3632574217161448, "grad_norm": 0.4191337823867798, "learning_rate": 7.265148434322896e-06, "loss": 0.4189, "step": 3573 }, { "epoch": 0.3633590890605937, "grad_norm": 0.38266244530677795, "learning_rate": 7.267181781211875e-06, "loss": 0.4058, "step": 3574 }, { "epoch": 0.3634607564050427, "grad_norm": 0.4560667872428894, "learning_rate": 7.269215128100855e-06, "loss": 0.4094, "step": 3575 }, { "epoch": 0.36356242374949166, "grad_norm": 0.42605075240135193, "learning_rate": 7.271248474989834e-06, "loss": 0.3771, "step": 3576 }, { "epoch": 0.3636640910939406, "grad_norm": 0.44054046273231506, "learning_rate": 7.273281821878812e-06, "loss": 0.4271, "step": 3577 }, { "epoch": 0.3637657584383896, "grad_norm": 0.4125033915042877, "learning_rate": 7.275315168767793e-06, "loss": 0.4325, "step": 3578 }, { "epoch": 0.36386742578283854, "grad_norm": 0.4022153913974762, "learning_rate": 7.277348515656771e-06, "loss": 0.4114, "step": 3579 }, { "epoch": 0.36396909312728754, "grad_norm": 0.4435798227787018, "learning_rate": 7.2793818625457516e-06, "loss": 0.4365, "step": 3580 }, { "epoch": 0.3640707604717365, "grad_norm": 0.43925797939300537, "learning_rate": 7.28141520943473e-06, "loss": 0.4091, "step": 3581 }, { "epoch": 0.3641724278161854, "grad_norm": 0.43633174896240234, "learning_rate": 7.283448556323709e-06, "loss": 0.3876, "step": 3582 }, { "epoch": 0.3642740951606344, "grad_norm": 0.43375545740127563, "learning_rate": 7.285481903212689e-06, "loss": 0.4015, "step": 3583 }, { "epoch": 0.36437576250508336, "grad_norm": 0.4853553771972656, "learning_rate": 7.287515250101668e-06, "loss": 0.4308, "step": 3584 }, { "epoch": 0.36447742984953235, "grad_norm": 0.40801677107810974, "learning_rate": 7.289548596990647e-06, "loss": 0.426, "step": 3585 }, { "epoch": 0.3645790971939813, "grad_norm": 0.4473017454147339, "learning_rate": 7.291581943879627e-06, "loss": 0.4061, "step": 3586 }, { "epoch": 0.36468076453843024, "grad_norm": 0.4625186622142792, "learning_rate": 7.2936152907686054e-06, "loss": 0.4203, "step": 3587 }, { "epoch": 0.36478243188287923, "grad_norm": 0.4106335937976837, "learning_rate": 7.295648637657585e-06, "loss": 0.4289, "step": 3588 }, { "epoch": 0.3648840992273282, "grad_norm": 0.4167969822883606, "learning_rate": 7.297681984546564e-06, "loss": 0.4302, "step": 3589 }, { "epoch": 0.36498576657177717, "grad_norm": 0.4929027259349823, "learning_rate": 7.299715331435544e-06, "loss": 0.4702, "step": 3590 }, { "epoch": 0.3650874339162261, "grad_norm": 0.4437313675880432, "learning_rate": 7.3017486783245226e-06, "loss": 0.3922, "step": 3591 }, { "epoch": 0.36518910126067505, "grad_norm": 0.48561036586761475, "learning_rate": 7.303782025213502e-06, "loss": 0.4156, "step": 3592 }, { "epoch": 0.36529076860512405, "grad_norm": 0.41496163606643677, "learning_rate": 7.3058153721024815e-06, "loss": 0.4315, "step": 3593 }, { "epoch": 0.365392435949573, "grad_norm": 0.3763669729232788, "learning_rate": 7.30784871899146e-06, "loss": 0.4073, "step": 3594 }, { "epoch": 0.36549410329402193, "grad_norm": 0.42514896392822266, "learning_rate": 7.30988206588044e-06, "loss": 0.4041, "step": 3595 }, { "epoch": 0.36559577063847093, "grad_norm": 0.43527546525001526, "learning_rate": 7.311915412769419e-06, "loss": 0.4145, "step": 3596 }, { "epoch": 0.36569743798291987, "grad_norm": 0.42496535181999207, "learning_rate": 7.313948759658398e-06, "loss": 0.442, "step": 3597 }, { "epoch": 0.36579910532736887, "grad_norm": 0.5053771138191223, "learning_rate": 7.315982106547378e-06, "loss": 0.4771, "step": 3598 }, { "epoch": 0.3659007726718178, "grad_norm": 0.38163796067237854, "learning_rate": 7.318015453436357e-06, "loss": 0.4266, "step": 3599 }, { "epoch": 0.36600244001626675, "grad_norm": 0.43248072266578674, "learning_rate": 7.320048800325335e-06, "loss": 0.4049, "step": 3600 }, { "epoch": 0.36610410736071575, "grad_norm": 0.4888609051704407, "learning_rate": 7.322082147214316e-06, "loss": 0.4143, "step": 3601 }, { "epoch": 0.3662057747051647, "grad_norm": 0.4139796197414398, "learning_rate": 7.324115494103294e-06, "loss": 0.4312, "step": 3602 }, { "epoch": 0.3663074420496137, "grad_norm": 0.5118071436882019, "learning_rate": 7.326148840992275e-06, "loss": 0.4515, "step": 3603 }, { "epoch": 0.3664091093940626, "grad_norm": 0.4278026223182678, "learning_rate": 7.328182187881253e-06, "loss": 0.4517, "step": 3604 }, { "epoch": 0.36651077673851157, "grad_norm": 0.3826557695865631, "learning_rate": 7.330215534770232e-06, "loss": 0.441, "step": 3605 }, { "epoch": 0.36661244408296056, "grad_norm": 0.42566514015197754, "learning_rate": 7.3322488816592115e-06, "loss": 0.3963, "step": 3606 }, { "epoch": 0.3667141114274095, "grad_norm": 0.4362446069717407, "learning_rate": 7.334282228548191e-06, "loss": 0.4033, "step": 3607 }, { "epoch": 0.3668157787718585, "grad_norm": 0.42688116431236267, "learning_rate": 7.3363155754371705e-06, "loss": 0.4495, "step": 3608 }, { "epoch": 0.36691744611630744, "grad_norm": 0.3901854157447815, "learning_rate": 7.338348922326149e-06, "loss": 0.411, "step": 3609 }, { "epoch": 0.3670191134607564, "grad_norm": 0.4369089901447296, "learning_rate": 7.340382269215129e-06, "loss": 0.4217, "step": 3610 }, { "epoch": 0.3671207808052054, "grad_norm": 0.3882315456867218, "learning_rate": 7.342415616104108e-06, "loss": 0.4102, "step": 3611 }, { "epoch": 0.3672224481496543, "grad_norm": 0.4237940013408661, "learning_rate": 7.344448962993087e-06, "loss": 0.4301, "step": 3612 }, { "epoch": 0.3673241154941033, "grad_norm": 0.40288490056991577, "learning_rate": 7.346482309882067e-06, "loss": 0.4226, "step": 3613 }, { "epoch": 0.36742578283855226, "grad_norm": 0.42244818806648254, "learning_rate": 7.348515656771046e-06, "loss": 0.3975, "step": 3614 }, { "epoch": 0.3675274501830012, "grad_norm": 0.41986697912216187, "learning_rate": 7.350549003660024e-06, "loss": 0.4183, "step": 3615 }, { "epoch": 0.3676291175274502, "grad_norm": 0.4536346197128296, "learning_rate": 7.352582350549005e-06, "loss": 0.4315, "step": 3616 }, { "epoch": 0.36773078487189914, "grad_norm": 0.3960375189781189, "learning_rate": 7.354615697437983e-06, "loss": 0.4117, "step": 3617 }, { "epoch": 0.3678324522163481, "grad_norm": 0.4057196378707886, "learning_rate": 7.356649044326962e-06, "loss": 0.4179, "step": 3618 }, { "epoch": 0.3679341195607971, "grad_norm": 0.4438278377056122, "learning_rate": 7.358682391215942e-06, "loss": 0.4246, "step": 3619 }, { "epoch": 0.368035786905246, "grad_norm": 0.45484259724617004, "learning_rate": 7.360715738104921e-06, "loss": 0.4264, "step": 3620 }, { "epoch": 0.368137454249695, "grad_norm": 0.4426898658275604, "learning_rate": 7.362749084993901e-06, "loss": 0.4277, "step": 3621 }, { "epoch": 0.36823912159414396, "grad_norm": 0.4879579544067383, "learning_rate": 7.36478243188288e-06, "loss": 0.4358, "step": 3622 }, { "epoch": 0.3683407889385929, "grad_norm": 0.43581581115722656, "learning_rate": 7.3668157787718585e-06, "loss": 0.4049, "step": 3623 }, { "epoch": 0.3684424562830419, "grad_norm": 0.44520577788352966, "learning_rate": 7.368849125660839e-06, "loss": 0.4411, "step": 3624 }, { "epoch": 0.36854412362749084, "grad_norm": 0.47435227036476135, "learning_rate": 7.3708824725498175e-06, "loss": 0.3964, "step": 3625 }, { "epoch": 0.36864579097193984, "grad_norm": 0.3818162977695465, "learning_rate": 7.372915819438797e-06, "loss": 0.3864, "step": 3626 }, { "epoch": 0.3687474583163888, "grad_norm": 0.43128910660743713, "learning_rate": 7.3749491663277765e-06, "loss": 0.4043, "step": 3627 }, { "epoch": 0.3688491256608377, "grad_norm": 0.4558899700641632, "learning_rate": 7.376982513216755e-06, "loss": 0.4222, "step": 3628 }, { "epoch": 0.3689507930052867, "grad_norm": 0.4452418088912964, "learning_rate": 7.379015860105735e-06, "loss": 0.4189, "step": 3629 }, { "epoch": 0.36905246034973566, "grad_norm": 0.42341047525405884, "learning_rate": 7.381049206994714e-06, "loss": 0.3996, "step": 3630 }, { "epoch": 0.36915412769418465, "grad_norm": 0.44631388783454895, "learning_rate": 7.383082553883694e-06, "loss": 0.4337, "step": 3631 }, { "epoch": 0.3692557950386336, "grad_norm": 0.4485633671283722, "learning_rate": 7.385115900772672e-06, "loss": 0.4327, "step": 3632 }, { "epoch": 0.36935746238308254, "grad_norm": 0.36905795335769653, "learning_rate": 7.387149247661652e-06, "loss": 0.3736, "step": 3633 }, { "epoch": 0.36945912972753153, "grad_norm": 0.5172373056411743, "learning_rate": 7.389182594550631e-06, "loss": 0.3677, "step": 3634 }, { "epoch": 0.3695607970719805, "grad_norm": 0.3886813521385193, "learning_rate": 7.39121594143961e-06, "loss": 0.4224, "step": 3635 }, { "epoch": 0.36966246441642947, "grad_norm": 0.42852887511253357, "learning_rate": 7.39324928832859e-06, "loss": 0.4254, "step": 3636 }, { "epoch": 0.3697641317608784, "grad_norm": 0.4081610143184662, "learning_rate": 7.395282635217569e-06, "loss": 0.3968, "step": 3637 }, { "epoch": 0.36986579910532735, "grad_norm": 0.38856732845306396, "learning_rate": 7.3973159821065475e-06, "loss": 0.3951, "step": 3638 }, { "epoch": 0.36996746644977635, "grad_norm": 0.46073657274246216, "learning_rate": 7.399349328995528e-06, "loss": 0.4229, "step": 3639 }, { "epoch": 0.3700691337942253, "grad_norm": 0.40321609377861023, "learning_rate": 7.4013826758845065e-06, "loss": 0.4039, "step": 3640 }, { "epoch": 0.37017080113867423, "grad_norm": 0.4293766915798187, "learning_rate": 7.403416022773485e-06, "loss": 0.4075, "step": 3641 }, { "epoch": 0.37027246848312323, "grad_norm": 0.45162415504455566, "learning_rate": 7.4054493696624654e-06, "loss": 0.4122, "step": 3642 }, { "epoch": 0.37037413582757217, "grad_norm": 0.4447178840637207, "learning_rate": 7.407482716551444e-06, "loss": 0.4006, "step": 3643 }, { "epoch": 0.37047580317202117, "grad_norm": 0.43172186613082886, "learning_rate": 7.4095160634404236e-06, "loss": 0.3872, "step": 3644 }, { "epoch": 0.3705774705164701, "grad_norm": 0.4822739064693451, "learning_rate": 7.411549410329403e-06, "loss": 0.4439, "step": 3645 }, { "epoch": 0.37067913786091905, "grad_norm": 0.4372139275074005, "learning_rate": 7.413582757218382e-06, "loss": 0.4135, "step": 3646 }, { "epoch": 0.37078080520536805, "grad_norm": 0.4653835594654083, "learning_rate": 7.415616104107361e-06, "loss": 0.4128, "step": 3647 }, { "epoch": 0.370882472549817, "grad_norm": 0.504126250743866, "learning_rate": 7.417649450996341e-06, "loss": 0.3876, "step": 3648 }, { "epoch": 0.370984139894266, "grad_norm": 0.5394798517227173, "learning_rate": 7.41968279788532e-06, "loss": 0.4454, "step": 3649 }, { "epoch": 0.3710858072387149, "grad_norm": 0.4236983060836792, "learning_rate": 7.421716144774299e-06, "loss": 0.4158, "step": 3650 }, { "epoch": 0.37118747458316387, "grad_norm": 0.4205022156238556, "learning_rate": 7.423749491663278e-06, "loss": 0.4078, "step": 3651 }, { "epoch": 0.37128914192761286, "grad_norm": 0.5008835792541504, "learning_rate": 7.425782838552258e-06, "loss": 0.4114, "step": 3652 }, { "epoch": 0.3713908092720618, "grad_norm": 0.4416963756084442, "learning_rate": 7.427816185441236e-06, "loss": 0.4007, "step": 3653 }, { "epoch": 0.3714924766165108, "grad_norm": 0.4683370590209961, "learning_rate": 7.429849532330217e-06, "loss": 0.4295, "step": 3654 }, { "epoch": 0.37159414396095974, "grad_norm": 0.4013287425041199, "learning_rate": 7.431882879219195e-06, "loss": 0.4133, "step": 3655 }, { "epoch": 0.3716958113054087, "grad_norm": 0.46204283833503723, "learning_rate": 7.433916226108174e-06, "loss": 0.4491, "step": 3656 }, { "epoch": 0.3717974786498577, "grad_norm": 0.3897463083267212, "learning_rate": 7.435949572997154e-06, "loss": 0.4293, "step": 3657 }, { "epoch": 0.3718991459943066, "grad_norm": 0.4564107358455658, "learning_rate": 7.437982919886133e-06, "loss": 0.4185, "step": 3658 }, { "epoch": 0.37200081333875556, "grad_norm": 0.44889864325523376, "learning_rate": 7.440016266775112e-06, "loss": 0.4141, "step": 3659 }, { "epoch": 0.37210248068320456, "grad_norm": 0.37776991724967957, "learning_rate": 7.442049613664092e-06, "loss": 0.4264, "step": 3660 }, { "epoch": 0.3722041480276535, "grad_norm": 0.41099128127098083, "learning_rate": 7.444082960553071e-06, "loss": 0.4043, "step": 3661 }, { "epoch": 0.3723058153721025, "grad_norm": 0.4108096659183502, "learning_rate": 7.446116307442051e-06, "loss": 0.4429, "step": 3662 }, { "epoch": 0.37240748271655144, "grad_norm": 0.42135879397392273, "learning_rate": 7.44814965433103e-06, "loss": 0.4028, "step": 3663 }, { "epoch": 0.3725091500610004, "grad_norm": 0.3826853334903717, "learning_rate": 7.450183001220008e-06, "loss": 0.3887, "step": 3664 }, { "epoch": 0.3726108174054494, "grad_norm": 0.4110395312309265, "learning_rate": 7.4522163481089886e-06, "loss": 0.4101, "step": 3665 }, { "epoch": 0.3727124847498983, "grad_norm": 0.39208030700683594, "learning_rate": 7.454249694997967e-06, "loss": 0.4186, "step": 3666 }, { "epoch": 0.3728141520943473, "grad_norm": 0.39722636342048645, "learning_rate": 7.456283041886947e-06, "loss": 0.4155, "step": 3667 }, { "epoch": 0.37291581943879626, "grad_norm": 0.4346177577972412, "learning_rate": 7.458316388775926e-06, "loss": 0.4508, "step": 3668 }, { "epoch": 0.3730174867832452, "grad_norm": 0.38082966208457947, "learning_rate": 7.460349735664905e-06, "loss": 0.4146, "step": 3669 }, { "epoch": 0.3731191541276942, "grad_norm": 0.412024587392807, "learning_rate": 7.462383082553884e-06, "loss": 0.4043, "step": 3670 }, { "epoch": 0.37322082147214314, "grad_norm": 0.42268088459968567, "learning_rate": 7.464416429442863e-06, "loss": 0.4071, "step": 3671 }, { "epoch": 0.37332248881659214, "grad_norm": 0.40825390815734863, "learning_rate": 7.466449776331843e-06, "loss": 0.4279, "step": 3672 }, { "epoch": 0.3734241561610411, "grad_norm": 0.4337216317653656, "learning_rate": 7.468483123220822e-06, "loss": 0.3927, "step": 3673 }, { "epoch": 0.37352582350549, "grad_norm": 0.46009379625320435, "learning_rate": 7.470516470109801e-06, "loss": 0.433, "step": 3674 }, { "epoch": 0.373627490849939, "grad_norm": 0.44478726387023926, "learning_rate": 7.472549816998781e-06, "loss": 0.4223, "step": 3675 }, { "epoch": 0.37372915819438796, "grad_norm": 0.4237539768218994, "learning_rate": 7.4745831638877596e-06, "loss": 0.4136, "step": 3676 }, { "epoch": 0.37383082553883695, "grad_norm": 0.4196555018424988, "learning_rate": 7.47661651077674e-06, "loss": 0.4379, "step": 3677 }, { "epoch": 0.3739324928832859, "grad_norm": 0.4134184420108795, "learning_rate": 7.4786498576657185e-06, "loss": 0.3946, "step": 3678 }, { "epoch": 0.37403416022773484, "grad_norm": 0.47907328605651855, "learning_rate": 7.480683204554697e-06, "loss": 0.4115, "step": 3679 }, { "epoch": 0.37413582757218383, "grad_norm": 0.40721288323402405, "learning_rate": 7.4827165514436775e-06, "loss": 0.4028, "step": 3680 }, { "epoch": 0.3742374949166328, "grad_norm": 0.47752469778060913, "learning_rate": 7.484749898332656e-06, "loss": 0.4269, "step": 3681 }, { "epoch": 0.3743391622610817, "grad_norm": 0.41818690299987793, "learning_rate": 7.486783245221635e-06, "loss": 0.4272, "step": 3682 }, { "epoch": 0.3744408296055307, "grad_norm": 0.41406911611557007, "learning_rate": 7.488816592110615e-06, "loss": 0.4546, "step": 3683 }, { "epoch": 0.37454249694997965, "grad_norm": 0.45911118388175964, "learning_rate": 7.490849938999594e-06, "loss": 0.4264, "step": 3684 }, { "epoch": 0.37464416429442865, "grad_norm": 0.4094408452510834, "learning_rate": 7.492883285888573e-06, "loss": 0.4256, "step": 3685 }, { "epoch": 0.3747458316388776, "grad_norm": 0.3985269069671631, "learning_rate": 7.494916632777553e-06, "loss": 0.434, "step": 3686 }, { "epoch": 0.37484749898332653, "grad_norm": 0.38127243518829346, "learning_rate": 7.496949979666531e-06, "loss": 0.3962, "step": 3687 }, { "epoch": 0.37494916632777553, "grad_norm": 0.42851683497428894, "learning_rate": 7.498983326555511e-06, "loss": 0.3909, "step": 3688 }, { "epoch": 0.37505083367222447, "grad_norm": 0.4470978081226349, "learning_rate": 7.50101667344449e-06, "loss": 0.4151, "step": 3689 }, { "epoch": 0.37515250101667347, "grad_norm": 0.4150305986404419, "learning_rate": 7.50305002033347e-06, "loss": 0.426, "step": 3690 }, { "epoch": 0.3752541683611224, "grad_norm": 0.4364697337150574, "learning_rate": 7.5050833672224485e-06, "loss": 0.4083, "step": 3691 }, { "epoch": 0.37535583570557135, "grad_norm": 0.4326828122138977, "learning_rate": 7.507116714111428e-06, "loss": 0.4044, "step": 3692 }, { "epoch": 0.37545750305002035, "grad_norm": 0.4065888822078705, "learning_rate": 7.5091500610004075e-06, "loss": 0.406, "step": 3693 }, { "epoch": 0.3755591703944693, "grad_norm": 0.4526701867580414, "learning_rate": 7.511183407889386e-06, "loss": 0.465, "step": 3694 }, { "epoch": 0.3756608377389183, "grad_norm": 0.4345529079437256, "learning_rate": 7.5132167547783664e-06, "loss": 0.4235, "step": 3695 }, { "epoch": 0.3757625050833672, "grad_norm": 0.38666361570358276, "learning_rate": 7.515250101667345e-06, "loss": 0.3855, "step": 3696 }, { "epoch": 0.37586417242781617, "grad_norm": 0.4272262752056122, "learning_rate": 7.517283448556324e-06, "loss": 0.4262, "step": 3697 }, { "epoch": 0.37596583977226516, "grad_norm": 0.4118242859840393, "learning_rate": 7.519316795445304e-06, "loss": 0.4079, "step": 3698 }, { "epoch": 0.3760675071167141, "grad_norm": 0.3936805725097656, "learning_rate": 7.521350142334283e-06, "loss": 0.3974, "step": 3699 }, { "epoch": 0.37616917446116305, "grad_norm": 0.4437422752380371, "learning_rate": 7.523383489223261e-06, "loss": 0.4227, "step": 3700 }, { "epoch": 0.37627084180561204, "grad_norm": 0.39224404096603394, "learning_rate": 7.525416836112242e-06, "loss": 0.4362, "step": 3701 }, { "epoch": 0.376372509150061, "grad_norm": 0.44436532258987427, "learning_rate": 7.52745018300122e-06, "loss": 0.3912, "step": 3702 }, { "epoch": 0.37647417649451, "grad_norm": 0.41386181116104126, "learning_rate": 7.529483529890201e-06, "loss": 0.4374, "step": 3703 }, { "epoch": 0.3765758438389589, "grad_norm": 0.40073078870773315, "learning_rate": 7.531516876779179e-06, "loss": 0.4257, "step": 3704 }, { "epoch": 0.37667751118340786, "grad_norm": 0.4731745719909668, "learning_rate": 7.533550223668158e-06, "loss": 0.4454, "step": 3705 }, { "epoch": 0.37677917852785686, "grad_norm": 0.4554497301578522, "learning_rate": 7.535583570557138e-06, "loss": 0.4125, "step": 3706 }, { "epoch": 0.3768808458723058, "grad_norm": 0.41405099630355835, "learning_rate": 7.537616917446117e-06, "loss": 0.4187, "step": 3707 }, { "epoch": 0.3769825132167548, "grad_norm": 0.47089487314224243, "learning_rate": 7.539650264335096e-06, "loss": 0.4583, "step": 3708 }, { "epoch": 0.37708418056120374, "grad_norm": 0.41654929518699646, "learning_rate": 7.541683611224075e-06, "loss": 0.4079, "step": 3709 }, { "epoch": 0.3771858479056527, "grad_norm": 0.44074547290802, "learning_rate": 7.5437169581130545e-06, "loss": 0.4611, "step": 3710 }, { "epoch": 0.3772875152501017, "grad_norm": 0.42345982789993286, "learning_rate": 7.545750305002034e-06, "loss": 0.4127, "step": 3711 }, { "epoch": 0.3773891825945506, "grad_norm": 0.4340636134147644, "learning_rate": 7.547783651891013e-06, "loss": 0.4023, "step": 3712 }, { "epoch": 0.3774908499389996, "grad_norm": 0.38613101840019226, "learning_rate": 7.549816998779993e-06, "loss": 0.412, "step": 3713 }, { "epoch": 0.37759251728344856, "grad_norm": 0.4730665385723114, "learning_rate": 7.551850345668972e-06, "loss": 0.4471, "step": 3714 }, { "epoch": 0.3776941846278975, "grad_norm": 0.4594733715057373, "learning_rate": 7.55388369255795e-06, "loss": 0.4413, "step": 3715 }, { "epoch": 0.3777958519723465, "grad_norm": 0.42867177724838257, "learning_rate": 7.555917039446931e-06, "loss": 0.3958, "step": 3716 }, { "epoch": 0.37789751931679544, "grad_norm": 0.4700140357017517, "learning_rate": 7.557950386335909e-06, "loss": 0.436, "step": 3717 }, { "epoch": 0.37799918666124444, "grad_norm": 0.39334535598754883, "learning_rate": 7.55998373322489e-06, "loss": 0.4026, "step": 3718 }, { "epoch": 0.3781008540056934, "grad_norm": 0.40816885232925415, "learning_rate": 7.562017080113868e-06, "loss": 0.4114, "step": 3719 }, { "epoch": 0.3782025213501423, "grad_norm": 0.40013113617897034, "learning_rate": 7.564050427002847e-06, "loss": 0.3981, "step": 3720 }, { "epoch": 0.3783041886945913, "grad_norm": 0.4536610543727875, "learning_rate": 7.566083773891827e-06, "loss": 0.4101, "step": 3721 }, { "epoch": 0.37840585603904026, "grad_norm": 0.4779417812824249, "learning_rate": 7.568117120780806e-06, "loss": 0.4327, "step": 3722 }, { "epoch": 0.3785075233834892, "grad_norm": 0.4700045883655548, "learning_rate": 7.5701504676697845e-06, "loss": 0.3877, "step": 3723 }, { "epoch": 0.3786091907279382, "grad_norm": 0.4237096309661865, "learning_rate": 7.572183814558765e-06, "loss": 0.4037, "step": 3724 }, { "epoch": 0.37871085807238714, "grad_norm": 0.5264432430267334, "learning_rate": 7.5742171614477435e-06, "loss": 0.4285, "step": 3725 }, { "epoch": 0.37881252541683613, "grad_norm": 0.433864951133728, "learning_rate": 7.576250508336723e-06, "loss": 0.4003, "step": 3726 }, { "epoch": 0.3789141927612851, "grad_norm": 0.396457701921463, "learning_rate": 7.5782838552257024e-06, "loss": 0.426, "step": 3727 }, { "epoch": 0.379015860105734, "grad_norm": 0.41195622086524963, "learning_rate": 7.580317202114681e-06, "loss": 0.4101, "step": 3728 }, { "epoch": 0.379117527450183, "grad_norm": 0.42687270045280457, "learning_rate": 7.5823505490036606e-06, "loss": 0.384, "step": 3729 }, { "epoch": 0.37921919479463195, "grad_norm": 0.4146171808242798, "learning_rate": 7.58438389589264e-06, "loss": 0.3726, "step": 3730 }, { "epoch": 0.37932086213908095, "grad_norm": 0.37495535612106323, "learning_rate": 7.5864172427816195e-06, "loss": 0.4089, "step": 3731 }, { "epoch": 0.3794225294835299, "grad_norm": 0.4178856909275055, "learning_rate": 7.588450589670598e-06, "loss": 0.4318, "step": 3732 }, { "epoch": 0.37952419682797883, "grad_norm": 0.420803427696228, "learning_rate": 7.590483936559578e-06, "loss": 0.4343, "step": 3733 }, { "epoch": 0.37962586417242783, "grad_norm": 0.4111131429672241, "learning_rate": 7.592517283448557e-06, "loss": 0.3845, "step": 3734 }, { "epoch": 0.37972753151687677, "grad_norm": 0.39696836471557617, "learning_rate": 7.594550630337536e-06, "loss": 0.3744, "step": 3735 }, { "epoch": 0.37982919886132577, "grad_norm": 0.406189888715744, "learning_rate": 7.596583977226516e-06, "loss": 0.4275, "step": 3736 }, { "epoch": 0.3799308662057747, "grad_norm": 0.3773740828037262, "learning_rate": 7.598617324115495e-06, "loss": 0.4195, "step": 3737 }, { "epoch": 0.38003253355022365, "grad_norm": 0.42119964957237244, "learning_rate": 7.600650671004473e-06, "loss": 0.3974, "step": 3738 }, { "epoch": 0.38013420089467265, "grad_norm": 0.414913147687912, "learning_rate": 7.602684017893454e-06, "loss": 0.4076, "step": 3739 }, { "epoch": 0.3802358682391216, "grad_norm": 0.41587939858436584, "learning_rate": 7.604717364782432e-06, "loss": 0.412, "step": 3740 }, { "epoch": 0.38033753558357053, "grad_norm": 0.44132882356643677, "learning_rate": 7.606750711671411e-06, "loss": 0.4028, "step": 3741 }, { "epoch": 0.3804392029280195, "grad_norm": 0.41974937915802, "learning_rate": 7.608784058560391e-06, "loss": 0.4436, "step": 3742 }, { "epoch": 0.38054087027246847, "grad_norm": 0.4080899655818939, "learning_rate": 7.61081740544937e-06, "loss": 0.4567, "step": 3743 }, { "epoch": 0.38064253761691746, "grad_norm": 0.43825361132621765, "learning_rate": 7.6128507523383495e-06, "loss": 0.3783, "step": 3744 }, { "epoch": 0.3807442049613664, "grad_norm": 0.45013901591300964, "learning_rate": 7.614884099227329e-06, "loss": 0.4095, "step": 3745 }, { "epoch": 0.38084587230581535, "grad_norm": 0.3912576138973236, "learning_rate": 7.616917446116308e-06, "loss": 0.3865, "step": 3746 }, { "epoch": 0.38094753965026434, "grad_norm": 0.40491506457328796, "learning_rate": 7.618950793005287e-06, "loss": 0.4324, "step": 3747 }, { "epoch": 0.3810492069947133, "grad_norm": 0.45276394486427307, "learning_rate": 7.620984139894267e-06, "loss": 0.4206, "step": 3748 }, { "epoch": 0.3811508743391623, "grad_norm": 0.42034822702407837, "learning_rate": 7.623017486783246e-06, "loss": 0.3816, "step": 3749 }, { "epoch": 0.3812525416836112, "grad_norm": 0.4428333342075348, "learning_rate": 7.625050833672225e-06, "loss": 0.4021, "step": 3750 }, { "epoch": 0.38135420902806016, "grad_norm": 0.38964205980300903, "learning_rate": 7.627084180561204e-06, "loss": 0.4055, "step": 3751 }, { "epoch": 0.38145587637250916, "grad_norm": 0.40573421120643616, "learning_rate": 7.629117527450184e-06, "loss": 0.4194, "step": 3752 }, { "epoch": 0.3815575437169581, "grad_norm": 0.39787057042121887, "learning_rate": 7.631150874339162e-06, "loss": 0.4016, "step": 3753 }, { "epoch": 0.3816592110614071, "grad_norm": 0.4055712819099426, "learning_rate": 7.633184221228143e-06, "loss": 0.3989, "step": 3754 }, { "epoch": 0.38176087840585604, "grad_norm": 0.3749294877052307, "learning_rate": 7.635217568117121e-06, "loss": 0.3983, "step": 3755 }, { "epoch": 0.381862545750305, "grad_norm": 0.4232330620288849, "learning_rate": 7.6372509150061e-06, "loss": 0.4057, "step": 3756 }, { "epoch": 0.381964213094754, "grad_norm": 0.41861844062805176, "learning_rate": 7.63928426189508e-06, "loss": 0.4163, "step": 3757 }, { "epoch": 0.3820658804392029, "grad_norm": 0.4211529493331909, "learning_rate": 7.641317608784059e-06, "loss": 0.389, "step": 3758 }, { "epoch": 0.3821675477836519, "grad_norm": 0.39282944798469543, "learning_rate": 7.64335095567304e-06, "loss": 0.3938, "step": 3759 }, { "epoch": 0.38226921512810086, "grad_norm": 0.4337790608406067, "learning_rate": 7.645384302562018e-06, "loss": 0.3956, "step": 3760 }, { "epoch": 0.3823708824725498, "grad_norm": 0.47524040937423706, "learning_rate": 7.647417649450997e-06, "loss": 0.4561, "step": 3761 }, { "epoch": 0.3824725498169988, "grad_norm": 0.3960392475128174, "learning_rate": 7.649450996339977e-06, "loss": 0.4071, "step": 3762 }, { "epoch": 0.38257421716144774, "grad_norm": 0.4483673870563507, "learning_rate": 7.651484343228956e-06, "loss": 0.4008, "step": 3763 }, { "epoch": 0.3826758845058967, "grad_norm": 0.40340152382850647, "learning_rate": 7.653517690117934e-06, "loss": 0.3997, "step": 3764 }, { "epoch": 0.3827775518503457, "grad_norm": 0.49660491943359375, "learning_rate": 7.655551037006915e-06, "loss": 0.4145, "step": 3765 }, { "epoch": 0.3828792191947946, "grad_norm": 0.4554922878742218, "learning_rate": 7.657584383895893e-06, "loss": 0.4361, "step": 3766 }, { "epoch": 0.3829808865392436, "grad_norm": 0.4910030663013458, "learning_rate": 7.659617730784873e-06, "loss": 0.4177, "step": 3767 }, { "epoch": 0.38308255388369256, "grad_norm": 0.5618075728416443, "learning_rate": 7.661651077673852e-06, "loss": 0.4242, "step": 3768 }, { "epoch": 0.3831842212281415, "grad_norm": 0.4206021726131439, "learning_rate": 7.66368442456283e-06, "loss": 0.4368, "step": 3769 }, { "epoch": 0.3832858885725905, "grad_norm": 0.48034706711769104, "learning_rate": 7.665717771451811e-06, "loss": 0.4085, "step": 3770 }, { "epoch": 0.38338755591703944, "grad_norm": 0.4712890088558197, "learning_rate": 7.66775111834079e-06, "loss": 0.4473, "step": 3771 }, { "epoch": 0.38348922326148843, "grad_norm": 0.46075430512428284, "learning_rate": 7.66978446522977e-06, "loss": 0.4222, "step": 3772 }, { "epoch": 0.3835908906059374, "grad_norm": 0.39772143959999084, "learning_rate": 7.671817812118749e-06, "loss": 0.3998, "step": 3773 }, { "epoch": 0.3836925579503863, "grad_norm": 0.4381335377693176, "learning_rate": 7.673851159007727e-06, "loss": 0.4259, "step": 3774 }, { "epoch": 0.3837942252948353, "grad_norm": 0.3969384729862213, "learning_rate": 7.675884505896708e-06, "loss": 0.4637, "step": 3775 }, { "epoch": 0.38389589263928425, "grad_norm": 0.3953992426395416, "learning_rate": 7.677917852785686e-06, "loss": 0.4206, "step": 3776 }, { "epoch": 0.38399755998373325, "grad_norm": 0.42153796553611755, "learning_rate": 7.679951199674665e-06, "loss": 0.3937, "step": 3777 }, { "epoch": 0.3840992273281822, "grad_norm": 0.4531148374080658, "learning_rate": 7.681984546563644e-06, "loss": 0.3939, "step": 3778 }, { "epoch": 0.38420089467263113, "grad_norm": 0.3871057629585266, "learning_rate": 7.684017893452624e-06, "loss": 0.418, "step": 3779 }, { "epoch": 0.38430256201708013, "grad_norm": 0.42169249057769775, "learning_rate": 7.686051240341603e-06, "loss": 0.3976, "step": 3780 }, { "epoch": 0.38440422936152907, "grad_norm": 0.47850027680397034, "learning_rate": 7.688084587230581e-06, "loss": 0.4091, "step": 3781 }, { "epoch": 0.38450589670597807, "grad_norm": 0.4967005252838135, "learning_rate": 7.690117934119562e-06, "loss": 0.3839, "step": 3782 }, { "epoch": 0.384607564050427, "grad_norm": 0.558172345161438, "learning_rate": 7.69215128100854e-06, "loss": 0.4789, "step": 3783 }, { "epoch": 0.38470923139487595, "grad_norm": 0.4046630263328552, "learning_rate": 7.694184627897519e-06, "loss": 0.427, "step": 3784 }, { "epoch": 0.38481089873932495, "grad_norm": 0.4430234432220459, "learning_rate": 7.6962179747865e-06, "loss": 0.3794, "step": 3785 }, { "epoch": 0.3849125660837739, "grad_norm": 0.46870654821395874, "learning_rate": 7.698251321675478e-06, "loss": 0.4073, "step": 3786 }, { "epoch": 0.38501423342822283, "grad_norm": 0.449321985244751, "learning_rate": 7.700284668564456e-06, "loss": 0.436, "step": 3787 }, { "epoch": 0.3851159007726718, "grad_norm": 0.4357844889163971, "learning_rate": 7.702318015453437e-06, "loss": 0.4181, "step": 3788 }, { "epoch": 0.38521756811712077, "grad_norm": 0.44821858406066895, "learning_rate": 7.704351362342415e-06, "loss": 0.4385, "step": 3789 }, { "epoch": 0.38531923546156976, "grad_norm": 0.46342191100120544, "learning_rate": 7.706384709231396e-06, "loss": 0.3986, "step": 3790 }, { "epoch": 0.3854209028060187, "grad_norm": 0.4084048271179199, "learning_rate": 7.708418056120374e-06, "loss": 0.4331, "step": 3791 }, { "epoch": 0.38552257015046765, "grad_norm": 0.43332594633102417, "learning_rate": 7.710451403009353e-06, "loss": 0.465, "step": 3792 }, { "epoch": 0.38562423749491664, "grad_norm": 0.38756752014160156, "learning_rate": 7.712484749898333e-06, "loss": 0.4613, "step": 3793 }, { "epoch": 0.3857259048393656, "grad_norm": 0.44625020027160645, "learning_rate": 7.714518096787312e-06, "loss": 0.4242, "step": 3794 }, { "epoch": 0.3858275721838146, "grad_norm": 0.3883957266807556, "learning_rate": 7.716551443676292e-06, "loss": 0.3983, "step": 3795 }, { "epoch": 0.3859292395282635, "grad_norm": 0.4049265384674072, "learning_rate": 7.718584790565271e-06, "loss": 0.4157, "step": 3796 }, { "epoch": 0.38603090687271246, "grad_norm": 0.38222625851631165, "learning_rate": 7.72061813745425e-06, "loss": 0.3798, "step": 3797 }, { "epoch": 0.38613257421716146, "grad_norm": 0.40150678157806396, "learning_rate": 7.72265148434323e-06, "loss": 0.4247, "step": 3798 }, { "epoch": 0.3862342415616104, "grad_norm": 0.3804439306259155, "learning_rate": 7.724684831232209e-06, "loss": 0.4008, "step": 3799 }, { "epoch": 0.3863359089060594, "grad_norm": 0.44103381037712097, "learning_rate": 7.726718178121189e-06, "loss": 0.3899, "step": 3800 }, { "epoch": 0.38643757625050834, "grad_norm": 0.41894060373306274, "learning_rate": 7.728751525010168e-06, "loss": 0.41, "step": 3801 }, { "epoch": 0.3865392435949573, "grad_norm": 0.3915061354637146, "learning_rate": 7.730784871899146e-06, "loss": 0.4014, "step": 3802 }, { "epoch": 0.3866409109394063, "grad_norm": 0.43485918641090393, "learning_rate": 7.732818218788127e-06, "loss": 0.3868, "step": 3803 }, { "epoch": 0.3867425782838552, "grad_norm": 0.38426244258880615, "learning_rate": 7.734851565677105e-06, "loss": 0.4107, "step": 3804 }, { "epoch": 0.38684424562830416, "grad_norm": 0.46727606654167175, "learning_rate": 7.736884912566084e-06, "loss": 0.3967, "step": 3805 }, { "epoch": 0.38694591297275316, "grad_norm": 0.4435487985610962, "learning_rate": 7.738918259455064e-06, "loss": 0.4137, "step": 3806 }, { "epoch": 0.3870475803172021, "grad_norm": 0.4120232164859772, "learning_rate": 7.740951606344043e-06, "loss": 0.4036, "step": 3807 }, { "epoch": 0.3871492476616511, "grad_norm": 0.5922103524208069, "learning_rate": 7.742984953233023e-06, "loss": 0.4445, "step": 3808 }, { "epoch": 0.38725091500610004, "grad_norm": 0.42176881432533264, "learning_rate": 7.745018300122002e-06, "loss": 0.3786, "step": 3809 }, { "epoch": 0.387352582350549, "grad_norm": 0.42798376083374023, "learning_rate": 7.74705164701098e-06, "loss": 0.4165, "step": 3810 }, { "epoch": 0.387454249694998, "grad_norm": 0.5576077699661255, "learning_rate": 7.74908499389996e-06, "loss": 0.447, "step": 3811 }, { "epoch": 0.3875559170394469, "grad_norm": 0.4293479025363922, "learning_rate": 7.75111834078894e-06, "loss": 0.3956, "step": 3812 }, { "epoch": 0.3876575843838959, "grad_norm": 0.4237635135650635, "learning_rate": 7.75315168767792e-06, "loss": 0.4593, "step": 3813 }, { "epoch": 0.38775925172834486, "grad_norm": 0.43426841497421265, "learning_rate": 7.755185034566898e-06, "loss": 0.4291, "step": 3814 }, { "epoch": 0.3878609190727938, "grad_norm": 0.4580658972263336, "learning_rate": 7.757218381455877e-06, "loss": 0.4352, "step": 3815 }, { "epoch": 0.3879625864172428, "grad_norm": 0.39107972383499146, "learning_rate": 7.759251728344856e-06, "loss": 0.4247, "step": 3816 }, { "epoch": 0.38806425376169174, "grad_norm": 0.4203658401966095, "learning_rate": 7.761285075233836e-06, "loss": 0.4162, "step": 3817 }, { "epoch": 0.38816592110614073, "grad_norm": 0.38750946521759033, "learning_rate": 7.763318422122815e-06, "loss": 0.3976, "step": 3818 }, { "epoch": 0.3882675884505897, "grad_norm": 0.4062536954879761, "learning_rate": 7.765351769011793e-06, "loss": 0.4177, "step": 3819 }, { "epoch": 0.3883692557950386, "grad_norm": 0.4569326639175415, "learning_rate": 7.767385115900774e-06, "loss": 0.4258, "step": 3820 }, { "epoch": 0.3884709231394876, "grad_norm": 0.4038718640804291, "learning_rate": 7.769418462789752e-06, "loss": 0.4272, "step": 3821 }, { "epoch": 0.38857259048393655, "grad_norm": 0.4557458758354187, "learning_rate": 7.771451809678731e-06, "loss": 0.4172, "step": 3822 }, { "epoch": 0.38867425782838555, "grad_norm": 0.40748098492622375, "learning_rate": 7.773485156567711e-06, "loss": 0.3767, "step": 3823 }, { "epoch": 0.3887759251728345, "grad_norm": 0.4040936827659607, "learning_rate": 7.77551850345669e-06, "loss": 0.3876, "step": 3824 }, { "epoch": 0.38887759251728343, "grad_norm": 0.4784311056137085, "learning_rate": 7.777551850345669e-06, "loss": 0.4331, "step": 3825 }, { "epoch": 0.38897925986173243, "grad_norm": 0.37177762389183044, "learning_rate": 7.779585197234649e-06, "loss": 0.4597, "step": 3826 }, { "epoch": 0.38908092720618137, "grad_norm": 0.4325404167175293, "learning_rate": 7.781618544123628e-06, "loss": 0.4034, "step": 3827 }, { "epoch": 0.3891825945506303, "grad_norm": 0.41467612981796265, "learning_rate": 7.783651891012606e-06, "loss": 0.4039, "step": 3828 }, { "epoch": 0.3892842618950793, "grad_norm": 0.46409422159194946, "learning_rate": 7.785685237901586e-06, "loss": 0.4, "step": 3829 }, { "epoch": 0.38938592923952825, "grad_norm": 0.4368534982204437, "learning_rate": 7.787718584790565e-06, "loss": 0.4238, "step": 3830 }, { "epoch": 0.38948759658397725, "grad_norm": 0.40460115671157837, "learning_rate": 7.789751931679545e-06, "loss": 0.4002, "step": 3831 }, { "epoch": 0.3895892639284262, "grad_norm": 0.4771466553211212, "learning_rate": 7.791785278568524e-06, "loss": 0.4005, "step": 3832 }, { "epoch": 0.38969093127287513, "grad_norm": 0.43065083026885986, "learning_rate": 7.793818625457503e-06, "loss": 0.4078, "step": 3833 }, { "epoch": 0.3897925986173241, "grad_norm": 0.425070583820343, "learning_rate": 7.795851972346483e-06, "loss": 0.3935, "step": 3834 }, { "epoch": 0.38989426596177307, "grad_norm": 0.41130349040031433, "learning_rate": 7.797885319235462e-06, "loss": 0.3923, "step": 3835 }, { "epoch": 0.38999593330622206, "grad_norm": 0.45088234543800354, "learning_rate": 7.799918666124442e-06, "loss": 0.4135, "step": 3836 }, { "epoch": 0.390097600650671, "grad_norm": 0.4499385952949524, "learning_rate": 7.80195201301342e-06, "loss": 0.419, "step": 3837 }, { "epoch": 0.39019926799511995, "grad_norm": 0.45843562483787537, "learning_rate": 7.8039853599024e-06, "loss": 0.4166, "step": 3838 }, { "epoch": 0.39030093533956894, "grad_norm": 0.39932575821876526, "learning_rate": 7.80601870679138e-06, "loss": 0.4108, "step": 3839 }, { "epoch": 0.3904026026840179, "grad_norm": 0.4497871994972229, "learning_rate": 7.808052053680358e-06, "loss": 0.4262, "step": 3840 }, { "epoch": 0.3905042700284669, "grad_norm": 0.4386548101902008, "learning_rate": 7.810085400569339e-06, "loss": 0.3963, "step": 3841 }, { "epoch": 0.3906059373729158, "grad_norm": 0.40415486693382263, "learning_rate": 7.812118747458317e-06, "loss": 0.4469, "step": 3842 }, { "epoch": 0.39070760471736476, "grad_norm": 0.39169153571128845, "learning_rate": 7.814152094347296e-06, "loss": 0.3926, "step": 3843 }, { "epoch": 0.39080927206181376, "grad_norm": 0.4631483852863312, "learning_rate": 7.816185441236276e-06, "loss": 0.3983, "step": 3844 }, { "epoch": 0.3909109394062627, "grad_norm": 0.42055851221084595, "learning_rate": 7.818218788125255e-06, "loss": 0.3974, "step": 3845 }, { "epoch": 0.39101260675071164, "grad_norm": 0.456775426864624, "learning_rate": 7.820252135014234e-06, "loss": 0.4248, "step": 3846 }, { "epoch": 0.39111427409516064, "grad_norm": 0.39603039622306824, "learning_rate": 7.822285481903214e-06, "loss": 0.4071, "step": 3847 }, { "epoch": 0.3912159414396096, "grad_norm": 0.4297311305999756, "learning_rate": 7.824318828792193e-06, "loss": 0.4308, "step": 3848 }, { "epoch": 0.3913176087840586, "grad_norm": 0.4171481132507324, "learning_rate": 7.826352175681173e-06, "loss": 0.3815, "step": 3849 }, { "epoch": 0.3914192761285075, "grad_norm": 0.44738322496414185, "learning_rate": 7.828385522570152e-06, "loss": 0.4311, "step": 3850 }, { "epoch": 0.39152094347295646, "grad_norm": 0.3974134027957916, "learning_rate": 7.83041886945913e-06, "loss": 0.4173, "step": 3851 }, { "epoch": 0.39162261081740546, "grad_norm": 0.45080360770225525, "learning_rate": 7.83245221634811e-06, "loss": 0.3923, "step": 3852 }, { "epoch": 0.3917242781618544, "grad_norm": 0.43890559673309326, "learning_rate": 7.834485563237089e-06, "loss": 0.3904, "step": 3853 }, { "epoch": 0.3918259455063034, "grad_norm": 0.42147964239120483, "learning_rate": 7.836518910126068e-06, "loss": 0.4127, "step": 3854 }, { "epoch": 0.39192761285075234, "grad_norm": 0.4516327679157257, "learning_rate": 7.838552257015048e-06, "loss": 0.4026, "step": 3855 }, { "epoch": 0.3920292801952013, "grad_norm": 0.40323421359062195, "learning_rate": 7.840585603904027e-06, "loss": 0.4174, "step": 3856 }, { "epoch": 0.3921309475396503, "grad_norm": 0.43645113706588745, "learning_rate": 7.842618950793005e-06, "loss": 0.4399, "step": 3857 }, { "epoch": 0.3922326148840992, "grad_norm": 0.46016645431518555, "learning_rate": 7.844652297681986e-06, "loss": 0.4336, "step": 3858 }, { "epoch": 0.3923342822285482, "grad_norm": 0.4543761610984802, "learning_rate": 7.846685644570964e-06, "loss": 0.4005, "step": 3859 }, { "epoch": 0.39243594957299716, "grad_norm": 0.4619835615158081, "learning_rate": 7.848718991459943e-06, "loss": 0.4118, "step": 3860 }, { "epoch": 0.3925376169174461, "grad_norm": 0.3851828873157501, "learning_rate": 7.850752338348923e-06, "loss": 0.4013, "step": 3861 }, { "epoch": 0.3926392842618951, "grad_norm": 0.45317089557647705, "learning_rate": 7.852785685237902e-06, "loss": 0.4107, "step": 3862 }, { "epoch": 0.39274095160634404, "grad_norm": 0.4446124732494354, "learning_rate": 7.85481903212688e-06, "loss": 0.4263, "step": 3863 }, { "epoch": 0.39284261895079303, "grad_norm": 0.4246711730957031, "learning_rate": 7.856852379015861e-06, "loss": 0.3633, "step": 3864 }, { "epoch": 0.392944286295242, "grad_norm": 0.4617730379104614, "learning_rate": 7.85888572590484e-06, "loss": 0.3908, "step": 3865 }, { "epoch": 0.3930459536396909, "grad_norm": 0.4940980076789856, "learning_rate": 7.860919072793818e-06, "loss": 0.3933, "step": 3866 }, { "epoch": 0.3931476209841399, "grad_norm": 0.407962828874588, "learning_rate": 7.862952419682799e-06, "loss": 0.4028, "step": 3867 }, { "epoch": 0.39324928832858885, "grad_norm": 0.4118044972419739, "learning_rate": 7.864985766571777e-06, "loss": 0.4215, "step": 3868 }, { "epoch": 0.3933509556730378, "grad_norm": 0.4126124083995819, "learning_rate": 7.867019113460756e-06, "loss": 0.4476, "step": 3869 }, { "epoch": 0.3934526230174868, "grad_norm": 0.4144071936607361, "learning_rate": 7.869052460349736e-06, "loss": 0.4518, "step": 3870 }, { "epoch": 0.39355429036193573, "grad_norm": 0.3973812162876129, "learning_rate": 7.871085807238715e-06, "loss": 0.4488, "step": 3871 }, { "epoch": 0.39365595770638473, "grad_norm": 0.4833017587661743, "learning_rate": 7.873119154127695e-06, "loss": 0.398, "step": 3872 }, { "epoch": 0.39375762505083367, "grad_norm": 0.4333761930465698, "learning_rate": 7.875152501016674e-06, "loss": 0.4172, "step": 3873 }, { "epoch": 0.3938592923952826, "grad_norm": 0.4190934896469116, "learning_rate": 7.877185847905652e-06, "loss": 0.4136, "step": 3874 }, { "epoch": 0.3939609597397316, "grad_norm": 0.43904736638069153, "learning_rate": 7.879219194794633e-06, "loss": 0.4042, "step": 3875 }, { "epoch": 0.39406262708418055, "grad_norm": 0.4213937819004059, "learning_rate": 7.881252541683611e-06, "loss": 0.4218, "step": 3876 }, { "epoch": 0.39416429442862955, "grad_norm": 0.4421740472316742, "learning_rate": 7.883285888572592e-06, "loss": 0.4367, "step": 3877 }, { "epoch": 0.3942659617730785, "grad_norm": 0.4471587836742401, "learning_rate": 7.88531923546157e-06, "loss": 0.3996, "step": 3878 }, { "epoch": 0.39436762911752743, "grad_norm": 0.42337021231651306, "learning_rate": 7.887352582350549e-06, "loss": 0.3812, "step": 3879 }, { "epoch": 0.3944692964619764, "grad_norm": 0.4427785277366638, "learning_rate": 7.88938592923953e-06, "loss": 0.4379, "step": 3880 }, { "epoch": 0.39457096380642537, "grad_norm": 0.41400590538978577, "learning_rate": 7.891419276128508e-06, "loss": 0.3858, "step": 3881 }, { "epoch": 0.39467263115087436, "grad_norm": 0.48831140995025635, "learning_rate": 7.893452623017488e-06, "loss": 0.3989, "step": 3882 }, { "epoch": 0.3947742984953233, "grad_norm": 0.4218374192714691, "learning_rate": 7.895485969906467e-06, "loss": 0.3781, "step": 3883 }, { "epoch": 0.39487596583977225, "grad_norm": 0.46349161863327026, "learning_rate": 7.897519316795446e-06, "loss": 0.4392, "step": 3884 }, { "epoch": 0.39497763318422124, "grad_norm": 0.41497111320495605, "learning_rate": 7.899552663684426e-06, "loss": 0.4328, "step": 3885 }, { "epoch": 0.3950793005286702, "grad_norm": 0.4661777913570404, "learning_rate": 7.901586010573405e-06, "loss": 0.4271, "step": 3886 }, { "epoch": 0.3951809678731191, "grad_norm": 0.5349489450454712, "learning_rate": 7.903619357462383e-06, "loss": 0.4018, "step": 3887 }, { "epoch": 0.3952826352175681, "grad_norm": 0.396243155002594, "learning_rate": 7.905652704351364e-06, "loss": 0.4003, "step": 3888 }, { "epoch": 0.39538430256201706, "grad_norm": 0.40540480613708496, "learning_rate": 7.907686051240342e-06, "loss": 0.361, "step": 3889 }, { "epoch": 0.39548596990646606, "grad_norm": 0.5843912363052368, "learning_rate": 7.909719398129323e-06, "loss": 0.4172, "step": 3890 }, { "epoch": 0.395587637250915, "grad_norm": 0.5113210082054138, "learning_rate": 7.911752745018301e-06, "loss": 0.4156, "step": 3891 }, { "epoch": 0.39568930459536394, "grad_norm": 0.43920329213142395, "learning_rate": 7.91378609190728e-06, "loss": 0.4121, "step": 3892 }, { "epoch": 0.39579097193981294, "grad_norm": 0.4468645751476288, "learning_rate": 7.91581943879626e-06, "loss": 0.3922, "step": 3893 }, { "epoch": 0.3958926392842619, "grad_norm": 0.4514812231063843, "learning_rate": 7.917852785685239e-06, "loss": 0.3895, "step": 3894 }, { "epoch": 0.3959943066287109, "grad_norm": 0.42950066924095154, "learning_rate": 7.919886132574217e-06, "loss": 0.3969, "step": 3895 }, { "epoch": 0.3960959739731598, "grad_norm": 0.4669659435749054, "learning_rate": 7.921919479463198e-06, "loss": 0.3855, "step": 3896 }, { "epoch": 0.39619764131760876, "grad_norm": 0.44683218002319336, "learning_rate": 7.923952826352176e-06, "loss": 0.4077, "step": 3897 }, { "epoch": 0.39629930866205776, "grad_norm": 0.43508026003837585, "learning_rate": 7.925986173241155e-06, "loss": 0.4261, "step": 3898 }, { "epoch": 0.3964009760065067, "grad_norm": 0.4718450903892517, "learning_rate": 7.928019520130135e-06, "loss": 0.465, "step": 3899 }, { "epoch": 0.3965026433509557, "grad_norm": 0.401648610830307, "learning_rate": 7.930052867019114e-06, "loss": 0.3723, "step": 3900 }, { "epoch": 0.39660431069540464, "grad_norm": 0.40948718786239624, "learning_rate": 7.932086213908093e-06, "loss": 0.3871, "step": 3901 }, { "epoch": 0.3967059780398536, "grad_norm": 0.4393310844898224, "learning_rate": 7.934119560797073e-06, "loss": 0.4055, "step": 3902 }, { "epoch": 0.3968076453843026, "grad_norm": 0.46154090762138367, "learning_rate": 7.936152907686052e-06, "loss": 0.3803, "step": 3903 }, { "epoch": 0.3969093127287515, "grad_norm": 0.42898377776145935, "learning_rate": 7.93818625457503e-06, "loss": 0.3908, "step": 3904 }, { "epoch": 0.3970109800732005, "grad_norm": 0.45999258756637573, "learning_rate": 7.94021960146401e-06, "loss": 0.4121, "step": 3905 }, { "epoch": 0.39711264741764946, "grad_norm": 0.4434054493904114, "learning_rate": 7.94225294835299e-06, "loss": 0.4517, "step": 3906 }, { "epoch": 0.3972143147620984, "grad_norm": 0.40777552127838135, "learning_rate": 7.944286295241968e-06, "loss": 0.3943, "step": 3907 }, { "epoch": 0.3973159821065474, "grad_norm": 0.4677248001098633, "learning_rate": 7.946319642130948e-06, "loss": 0.4355, "step": 3908 }, { "epoch": 0.39741764945099634, "grad_norm": 0.4857018291950226, "learning_rate": 7.948352989019927e-06, "loss": 0.4369, "step": 3909 }, { "epoch": 0.3975193167954453, "grad_norm": 0.36694031953811646, "learning_rate": 7.950386335908906e-06, "loss": 0.3915, "step": 3910 }, { "epoch": 0.3976209841398943, "grad_norm": 0.4535990059375763, "learning_rate": 7.952419682797886e-06, "loss": 0.4302, "step": 3911 }, { "epoch": 0.3977226514843432, "grad_norm": 0.5230423808097839, "learning_rate": 7.954453029686865e-06, "loss": 0.4187, "step": 3912 }, { "epoch": 0.3978243188287922, "grad_norm": 0.38329654932022095, "learning_rate": 7.956486376575845e-06, "loss": 0.3908, "step": 3913 }, { "epoch": 0.39792598617324115, "grad_norm": 0.41188547015190125, "learning_rate": 7.958519723464824e-06, "loss": 0.4134, "step": 3914 }, { "epoch": 0.3980276535176901, "grad_norm": 0.4732881486415863, "learning_rate": 7.960553070353802e-06, "loss": 0.4101, "step": 3915 }, { "epoch": 0.3981293208621391, "grad_norm": 0.4360586106777191, "learning_rate": 7.962586417242782e-06, "loss": 0.4335, "step": 3916 }, { "epoch": 0.39823098820658803, "grad_norm": 0.44733864068984985, "learning_rate": 7.964619764131761e-06, "loss": 0.426, "step": 3917 }, { "epoch": 0.39833265555103703, "grad_norm": 0.4281613826751709, "learning_rate": 7.966653111020741e-06, "loss": 0.4278, "step": 3918 }, { "epoch": 0.39843432289548597, "grad_norm": 0.41498327255249023, "learning_rate": 7.96868645790972e-06, "loss": 0.4078, "step": 3919 }, { "epoch": 0.3985359902399349, "grad_norm": 0.4017188251018524, "learning_rate": 7.970719804798699e-06, "loss": 0.3914, "step": 3920 }, { "epoch": 0.3986376575843839, "grad_norm": 0.4422755837440491, "learning_rate": 7.972753151687679e-06, "loss": 0.3982, "step": 3921 }, { "epoch": 0.39873932492883285, "grad_norm": 0.46890273690223694, "learning_rate": 7.974786498576658e-06, "loss": 0.3826, "step": 3922 }, { "epoch": 0.39884099227328185, "grad_norm": 0.40997031331062317, "learning_rate": 7.976819845465638e-06, "loss": 0.4092, "step": 3923 }, { "epoch": 0.3989426596177308, "grad_norm": 0.43112629652023315, "learning_rate": 7.978853192354617e-06, "loss": 0.4169, "step": 3924 }, { "epoch": 0.39904432696217973, "grad_norm": 0.39298954606056213, "learning_rate": 7.980886539243595e-06, "loss": 0.3889, "step": 3925 }, { "epoch": 0.3991459943066287, "grad_norm": 0.4704844653606415, "learning_rate": 7.982919886132576e-06, "loss": 0.3907, "step": 3926 }, { "epoch": 0.39924766165107767, "grad_norm": 0.4247663915157318, "learning_rate": 7.984953233021554e-06, "loss": 0.3952, "step": 3927 }, { "epoch": 0.39934932899552666, "grad_norm": 0.4580625593662262, "learning_rate": 7.986986579910535e-06, "loss": 0.393, "step": 3928 }, { "epoch": 0.3994509963399756, "grad_norm": 0.4603789448738098, "learning_rate": 7.989019926799513e-06, "loss": 0.3969, "step": 3929 }, { "epoch": 0.39955266368442455, "grad_norm": 0.4100044369697571, "learning_rate": 7.991053273688492e-06, "loss": 0.4153, "step": 3930 }, { "epoch": 0.39965433102887354, "grad_norm": 0.4517185389995575, "learning_rate": 7.993086620577472e-06, "loss": 0.4118, "step": 3931 }, { "epoch": 0.3997559983733225, "grad_norm": 0.40478309988975525, "learning_rate": 7.995119967466451e-06, "loss": 0.3977, "step": 3932 }, { "epoch": 0.3998576657177714, "grad_norm": 0.4097086191177368, "learning_rate": 7.99715331435543e-06, "loss": 0.4016, "step": 3933 }, { "epoch": 0.3999593330622204, "grad_norm": 0.4241132140159607, "learning_rate": 7.99918666124441e-06, "loss": 0.3766, "step": 3934 }, { "epoch": 0.40006100040666936, "grad_norm": 0.45716458559036255, "learning_rate": 8.001220008133389e-06, "loss": 0.4513, "step": 3935 }, { "epoch": 0.40016266775111836, "grad_norm": 0.4017058312892914, "learning_rate": 8.003253355022367e-06, "loss": 0.4126, "step": 3936 }, { "epoch": 0.4002643350955673, "grad_norm": 0.391579270362854, "learning_rate": 8.005286701911347e-06, "loss": 0.3851, "step": 3937 }, { "epoch": 0.40036600244001624, "grad_norm": 0.39801666140556335, "learning_rate": 8.007320048800326e-06, "loss": 0.416, "step": 3938 }, { "epoch": 0.40046766978446524, "grad_norm": 0.4381849765777588, "learning_rate": 8.009353395689305e-06, "loss": 0.4307, "step": 3939 }, { "epoch": 0.4005693371289142, "grad_norm": 0.39836424589157104, "learning_rate": 8.011386742578285e-06, "loss": 0.4437, "step": 3940 }, { "epoch": 0.4006710044733632, "grad_norm": 0.4312516748905182, "learning_rate": 8.013420089467264e-06, "loss": 0.4225, "step": 3941 }, { "epoch": 0.4007726718178121, "grad_norm": 0.3902672231197357, "learning_rate": 8.015453436356242e-06, "loss": 0.3917, "step": 3942 }, { "epoch": 0.40087433916226106, "grad_norm": 0.39918699860572815, "learning_rate": 8.017486783245223e-06, "loss": 0.4316, "step": 3943 }, { "epoch": 0.40097600650671006, "grad_norm": 0.42944133281707764, "learning_rate": 8.019520130134201e-06, "loss": 0.3754, "step": 3944 }, { "epoch": 0.401077673851159, "grad_norm": 0.4056539535522461, "learning_rate": 8.02155347702318e-06, "loss": 0.4088, "step": 3945 }, { "epoch": 0.401179341195608, "grad_norm": 0.3761369585990906, "learning_rate": 8.02358682391216e-06, "loss": 0.4082, "step": 3946 }, { "epoch": 0.40128100854005694, "grad_norm": 0.41592302918434143, "learning_rate": 8.025620170801139e-06, "loss": 0.4147, "step": 3947 }, { "epoch": 0.4013826758845059, "grad_norm": 0.4084551930427551, "learning_rate": 8.027653517690118e-06, "loss": 0.4052, "step": 3948 }, { "epoch": 0.4014843432289549, "grad_norm": 0.39142391085624695, "learning_rate": 8.029686864579098e-06, "loss": 0.4187, "step": 3949 }, { "epoch": 0.4015860105734038, "grad_norm": 0.3805461525917053, "learning_rate": 8.031720211468077e-06, "loss": 0.3792, "step": 3950 }, { "epoch": 0.40168767791785276, "grad_norm": 0.39509567618370056, "learning_rate": 8.033753558357055e-06, "loss": 0.4189, "step": 3951 }, { "epoch": 0.40178934526230176, "grad_norm": 0.44057825207710266, "learning_rate": 8.035786905246036e-06, "loss": 0.417, "step": 3952 }, { "epoch": 0.4018910126067507, "grad_norm": 0.43389514088630676, "learning_rate": 8.037820252135014e-06, "loss": 0.4082, "step": 3953 }, { "epoch": 0.4019926799511997, "grad_norm": 0.3993610739707947, "learning_rate": 8.039853599023995e-06, "loss": 0.4201, "step": 3954 }, { "epoch": 0.40209434729564864, "grad_norm": 0.5041239261627197, "learning_rate": 8.041886945912973e-06, "loss": 0.4532, "step": 3955 }, { "epoch": 0.4021960146400976, "grad_norm": 0.40862202644348145, "learning_rate": 8.043920292801952e-06, "loss": 0.4306, "step": 3956 }, { "epoch": 0.4022976819845466, "grad_norm": 0.38669463992118835, "learning_rate": 8.045953639690932e-06, "loss": 0.3751, "step": 3957 }, { "epoch": 0.4023993493289955, "grad_norm": 0.43117427825927734, "learning_rate": 8.04798698657991e-06, "loss": 0.4109, "step": 3958 }, { "epoch": 0.4025010166734445, "grad_norm": 0.4045901298522949, "learning_rate": 8.050020333468891e-06, "loss": 0.393, "step": 3959 }, { "epoch": 0.40260268401789345, "grad_norm": 0.43272602558135986, "learning_rate": 8.05205368035787e-06, "loss": 0.4162, "step": 3960 }, { "epoch": 0.4027043513623424, "grad_norm": 0.4316345453262329, "learning_rate": 8.054087027246848e-06, "loss": 0.4726, "step": 3961 }, { "epoch": 0.4028060187067914, "grad_norm": 0.4816652834415436, "learning_rate": 8.056120374135829e-06, "loss": 0.4203, "step": 3962 }, { "epoch": 0.40290768605124033, "grad_norm": 0.38938578963279724, "learning_rate": 8.058153721024807e-06, "loss": 0.4311, "step": 3963 }, { "epoch": 0.40300935339568933, "grad_norm": 0.47473424673080444, "learning_rate": 8.060187067913788e-06, "loss": 0.4196, "step": 3964 }, { "epoch": 0.40311102074013827, "grad_norm": 0.4075961112976074, "learning_rate": 8.062220414802766e-06, "loss": 0.4137, "step": 3965 }, { "epoch": 0.4032126880845872, "grad_norm": 0.4121589958667755, "learning_rate": 8.064253761691745e-06, "loss": 0.4253, "step": 3966 }, { "epoch": 0.4033143554290362, "grad_norm": 0.4118986129760742, "learning_rate": 8.066287108580725e-06, "loss": 0.4223, "step": 3967 }, { "epoch": 0.40341602277348515, "grad_norm": 0.36716601252555847, "learning_rate": 8.068320455469704e-06, "loss": 0.3838, "step": 3968 }, { "epoch": 0.40351769011793415, "grad_norm": 0.43579474091529846, "learning_rate": 8.070353802358684e-06, "loss": 0.4308, "step": 3969 }, { "epoch": 0.4036193574623831, "grad_norm": 0.437764048576355, "learning_rate": 8.072387149247663e-06, "loss": 0.4078, "step": 3970 }, { "epoch": 0.40372102480683203, "grad_norm": 0.4566429555416107, "learning_rate": 8.074420496136642e-06, "loss": 0.4322, "step": 3971 }, { "epoch": 0.403822692151281, "grad_norm": 0.3966040313243866, "learning_rate": 8.076453843025622e-06, "loss": 0.4247, "step": 3972 }, { "epoch": 0.40392435949572997, "grad_norm": 0.4509325325489044, "learning_rate": 8.0784871899146e-06, "loss": 0.4307, "step": 3973 }, { "epoch": 0.4040260268401789, "grad_norm": 0.3655455708503723, "learning_rate": 8.08052053680358e-06, "loss": 0.4091, "step": 3974 }, { "epoch": 0.4041276941846279, "grad_norm": 0.41842639446258545, "learning_rate": 8.08255388369256e-06, "loss": 0.4469, "step": 3975 }, { "epoch": 0.40422936152907685, "grad_norm": 0.3645839989185333, "learning_rate": 8.084587230581538e-06, "loss": 0.4057, "step": 3976 }, { "epoch": 0.40433102887352584, "grad_norm": 0.4415800869464874, "learning_rate": 8.086620577470517e-06, "loss": 0.4008, "step": 3977 }, { "epoch": 0.4044326962179748, "grad_norm": 0.4038427174091339, "learning_rate": 8.088653924359497e-06, "loss": 0.4305, "step": 3978 }, { "epoch": 0.4045343635624237, "grad_norm": 0.4157603979110718, "learning_rate": 8.090687271248476e-06, "loss": 0.3905, "step": 3979 }, { "epoch": 0.4046360309068727, "grad_norm": 0.4280976355075836, "learning_rate": 8.092720618137454e-06, "loss": 0.4168, "step": 3980 }, { "epoch": 0.40473769825132166, "grad_norm": 0.38254785537719727, "learning_rate": 8.094753965026435e-06, "loss": 0.412, "step": 3981 }, { "epoch": 0.40483936559577066, "grad_norm": 0.397844523191452, "learning_rate": 8.096787311915413e-06, "loss": 0.4118, "step": 3982 }, { "epoch": 0.4049410329402196, "grad_norm": 0.4006428122520447, "learning_rate": 8.098820658804392e-06, "loss": 0.4263, "step": 3983 }, { "epoch": 0.40504270028466854, "grad_norm": 0.3948727250099182, "learning_rate": 8.10085400569337e-06, "loss": 0.4419, "step": 3984 }, { "epoch": 0.40514436762911754, "grad_norm": 0.4294884204864502, "learning_rate": 8.102887352582351e-06, "loss": 0.4272, "step": 3985 }, { "epoch": 0.4052460349735665, "grad_norm": 0.41112077236175537, "learning_rate": 8.10492069947133e-06, "loss": 0.3973, "step": 3986 }, { "epoch": 0.4053477023180155, "grad_norm": 0.4084532558917999, "learning_rate": 8.10695404636031e-06, "loss": 0.385, "step": 3987 }, { "epoch": 0.4054493696624644, "grad_norm": 0.4513685703277588, "learning_rate": 8.108987393249289e-06, "loss": 0.4143, "step": 3988 }, { "epoch": 0.40555103700691336, "grad_norm": 0.42943698167800903, "learning_rate": 8.111020740138267e-06, "loss": 0.4195, "step": 3989 }, { "epoch": 0.40565270435136236, "grad_norm": 0.46874570846557617, "learning_rate": 8.113054087027248e-06, "loss": 0.443, "step": 3990 }, { "epoch": 0.4057543716958113, "grad_norm": 0.423178106546402, "learning_rate": 8.115087433916226e-06, "loss": 0.4055, "step": 3991 }, { "epoch": 0.40585603904026024, "grad_norm": 0.455327570438385, "learning_rate": 8.117120780805205e-06, "loss": 0.4219, "step": 3992 }, { "epoch": 0.40595770638470924, "grad_norm": 0.45174461603164673, "learning_rate": 8.119154127694185e-06, "loss": 0.3691, "step": 3993 }, { "epoch": 0.4060593737291582, "grad_norm": 0.44535988569259644, "learning_rate": 8.121187474583164e-06, "loss": 0.4011, "step": 3994 }, { "epoch": 0.4061610410736072, "grad_norm": 0.451612263917923, "learning_rate": 8.123220821472144e-06, "loss": 0.4201, "step": 3995 }, { "epoch": 0.4062627084180561, "grad_norm": 0.4711666405200958, "learning_rate": 8.125254168361123e-06, "loss": 0.4095, "step": 3996 }, { "epoch": 0.40636437576250506, "grad_norm": 0.43519994616508484, "learning_rate": 8.127287515250102e-06, "loss": 0.4321, "step": 3997 }, { "epoch": 0.40646604310695406, "grad_norm": 0.3875891864299774, "learning_rate": 8.129320862139082e-06, "loss": 0.4133, "step": 3998 }, { "epoch": 0.406567710451403, "grad_norm": 0.4156077802181244, "learning_rate": 8.13135420902806e-06, "loss": 0.4249, "step": 3999 }, { "epoch": 0.406669377795852, "grad_norm": 0.4289940297603607, "learning_rate": 8.13338755591704e-06, "loss": 0.3969, "step": 4000 }, { "epoch": 0.40677104514030094, "grad_norm": 0.3817248046398163, "learning_rate": 8.13542090280602e-06, "loss": 0.4041, "step": 4001 }, { "epoch": 0.4068727124847499, "grad_norm": 0.390470027923584, "learning_rate": 8.137454249694998e-06, "loss": 0.399, "step": 4002 }, { "epoch": 0.4069743798291989, "grad_norm": 0.4330753684043884, "learning_rate": 8.139487596583978e-06, "loss": 0.422, "step": 4003 }, { "epoch": 0.4070760471736478, "grad_norm": 0.38180676102638245, "learning_rate": 8.141520943472957e-06, "loss": 0.3925, "step": 4004 }, { "epoch": 0.4071777145180968, "grad_norm": 0.4240204095840454, "learning_rate": 8.143554290361937e-06, "loss": 0.3805, "step": 4005 }, { "epoch": 0.40727938186254575, "grad_norm": 0.44053417444229126, "learning_rate": 8.145587637250916e-06, "loss": 0.4066, "step": 4006 }, { "epoch": 0.4073810492069947, "grad_norm": 0.42633137106895447, "learning_rate": 8.147620984139895e-06, "loss": 0.4128, "step": 4007 }, { "epoch": 0.4074827165514437, "grad_norm": 0.38671931624412537, "learning_rate": 8.149654331028875e-06, "loss": 0.3832, "step": 4008 }, { "epoch": 0.40758438389589263, "grad_norm": 0.41183707118034363, "learning_rate": 8.151687677917854e-06, "loss": 0.4046, "step": 4009 }, { "epoch": 0.40768605124034163, "grad_norm": 0.4267338812351227, "learning_rate": 8.153721024806834e-06, "loss": 0.3932, "step": 4010 }, { "epoch": 0.40778771858479057, "grad_norm": 0.41283419728279114, "learning_rate": 8.155754371695813e-06, "loss": 0.4252, "step": 4011 }, { "epoch": 0.4078893859292395, "grad_norm": 0.48369100689888, "learning_rate": 8.157787718584791e-06, "loss": 0.4143, "step": 4012 }, { "epoch": 0.4079910532736885, "grad_norm": 0.3940795361995697, "learning_rate": 8.159821065473772e-06, "loss": 0.4073, "step": 4013 }, { "epoch": 0.40809272061813745, "grad_norm": 0.4349784255027771, "learning_rate": 8.16185441236275e-06, "loss": 0.3923, "step": 4014 }, { "epoch": 0.4081943879625864, "grad_norm": 0.4359276592731476, "learning_rate": 8.163887759251729e-06, "loss": 0.4559, "step": 4015 }, { "epoch": 0.4082960553070354, "grad_norm": 0.5221400260925293, "learning_rate": 8.16592110614071e-06, "loss": 0.4393, "step": 4016 }, { "epoch": 0.40839772265148433, "grad_norm": 0.42290207743644714, "learning_rate": 8.167954453029688e-06, "loss": 0.437, "step": 4017 }, { "epoch": 0.4084993899959333, "grad_norm": 0.4837964177131653, "learning_rate": 8.169987799918667e-06, "loss": 0.4226, "step": 4018 }, { "epoch": 0.40860105734038227, "grad_norm": 0.4712226986885071, "learning_rate": 8.172021146807645e-06, "loss": 0.4504, "step": 4019 }, { "epoch": 0.4087027246848312, "grad_norm": 0.42378371953964233, "learning_rate": 8.174054493696626e-06, "loss": 0.4178, "step": 4020 }, { "epoch": 0.4088043920292802, "grad_norm": 0.4501039981842041, "learning_rate": 8.176087840585604e-06, "loss": 0.408, "step": 4021 }, { "epoch": 0.40890605937372915, "grad_norm": 0.47030308842658997, "learning_rate": 8.178121187474583e-06, "loss": 0.4267, "step": 4022 }, { "epoch": 0.40900772671817814, "grad_norm": 0.4162866473197937, "learning_rate": 8.180154534363563e-06, "loss": 0.4272, "step": 4023 }, { "epoch": 0.4091093940626271, "grad_norm": 0.4348587393760681, "learning_rate": 8.182187881252542e-06, "loss": 0.4282, "step": 4024 }, { "epoch": 0.409211061407076, "grad_norm": 0.4418146312236786, "learning_rate": 8.18422122814152e-06, "loss": 0.4274, "step": 4025 }, { "epoch": 0.409312728751525, "grad_norm": 0.4415634572505951, "learning_rate": 8.1862545750305e-06, "loss": 0.421, "step": 4026 }, { "epoch": 0.40941439609597396, "grad_norm": 0.4610012173652649, "learning_rate": 8.18828792191948e-06, "loss": 0.4403, "step": 4027 }, { "epoch": 0.40951606344042296, "grad_norm": 0.41827091574668884, "learning_rate": 8.19032126880846e-06, "loss": 0.4353, "step": 4028 }, { "epoch": 0.4096177307848719, "grad_norm": 0.43204280734062195, "learning_rate": 8.192354615697438e-06, "loss": 0.4048, "step": 4029 }, { "epoch": 0.40971939812932084, "grad_norm": 0.40211573243141174, "learning_rate": 8.194387962586417e-06, "loss": 0.4392, "step": 4030 }, { "epoch": 0.40982106547376984, "grad_norm": 0.3967616558074951, "learning_rate": 8.196421309475397e-06, "loss": 0.3962, "step": 4031 }, { "epoch": 0.4099227328182188, "grad_norm": 0.43565669655799866, "learning_rate": 8.198454656364376e-06, "loss": 0.446, "step": 4032 }, { "epoch": 0.4100244001626677, "grad_norm": 0.4045708477497101, "learning_rate": 8.200488003253355e-06, "loss": 0.416, "step": 4033 }, { "epoch": 0.4101260675071167, "grad_norm": 0.4334208071231842, "learning_rate": 8.202521350142335e-06, "loss": 0.4371, "step": 4034 }, { "epoch": 0.41022773485156566, "grad_norm": 0.42618414759635925, "learning_rate": 8.204554697031314e-06, "loss": 0.4567, "step": 4035 }, { "epoch": 0.41032940219601466, "grad_norm": 0.41381603479385376, "learning_rate": 8.206588043920294e-06, "loss": 0.3944, "step": 4036 }, { "epoch": 0.4104310695404636, "grad_norm": 0.4082993268966675, "learning_rate": 8.208621390809273e-06, "loss": 0.3932, "step": 4037 }, { "epoch": 0.41053273688491254, "grad_norm": 0.4108276963233948, "learning_rate": 8.210654737698251e-06, "loss": 0.3791, "step": 4038 }, { "epoch": 0.41063440422936154, "grad_norm": 0.4356340765953064, "learning_rate": 8.212688084587232e-06, "loss": 0.414, "step": 4039 }, { "epoch": 0.4107360715738105, "grad_norm": 0.4444098174571991, "learning_rate": 8.21472143147621e-06, "loss": 0.3965, "step": 4040 }, { "epoch": 0.4108377389182595, "grad_norm": 0.41502639651298523, "learning_rate": 8.21675477836519e-06, "loss": 0.3749, "step": 4041 }, { "epoch": 0.4109394062627084, "grad_norm": 0.4329933822154999, "learning_rate": 8.21878812525417e-06, "loss": 0.4345, "step": 4042 }, { "epoch": 0.41104107360715736, "grad_norm": 0.4427274167537689, "learning_rate": 8.220821472143148e-06, "loss": 0.4256, "step": 4043 }, { "epoch": 0.41114274095160636, "grad_norm": 0.42189568281173706, "learning_rate": 8.222854819032128e-06, "loss": 0.3975, "step": 4044 }, { "epoch": 0.4112444082960553, "grad_norm": 0.49223393201828003, "learning_rate": 8.224888165921107e-06, "loss": 0.4088, "step": 4045 }, { "epoch": 0.4113460756405043, "grad_norm": 0.4532151222229004, "learning_rate": 8.226921512810087e-06, "loss": 0.4255, "step": 4046 }, { "epoch": 0.41144774298495324, "grad_norm": 0.38959798216819763, "learning_rate": 8.228954859699066e-06, "loss": 0.4004, "step": 4047 }, { "epoch": 0.4115494103294022, "grad_norm": 0.4368985891342163, "learning_rate": 8.230988206588044e-06, "loss": 0.4577, "step": 4048 }, { "epoch": 0.4116510776738512, "grad_norm": 0.49218034744262695, "learning_rate": 8.233021553477025e-06, "loss": 0.392, "step": 4049 }, { "epoch": 0.4117527450183001, "grad_norm": 0.39135393500328064, "learning_rate": 8.235054900366003e-06, "loss": 0.3921, "step": 4050 }, { "epoch": 0.4118544123627491, "grad_norm": 0.3994194269180298, "learning_rate": 8.237088247254984e-06, "loss": 0.4217, "step": 4051 }, { "epoch": 0.41195607970719805, "grad_norm": 0.4008583426475525, "learning_rate": 8.239121594143962e-06, "loss": 0.4976, "step": 4052 }, { "epoch": 0.412057747051647, "grad_norm": 0.48648786544799805, "learning_rate": 8.241154941032941e-06, "loss": 0.4196, "step": 4053 }, { "epoch": 0.412159414396096, "grad_norm": 0.3973768949508667, "learning_rate": 8.243188287921921e-06, "loss": 0.4054, "step": 4054 }, { "epoch": 0.41226108174054493, "grad_norm": 0.41866064071655273, "learning_rate": 8.2452216348109e-06, "loss": 0.3739, "step": 4055 }, { "epoch": 0.4123627490849939, "grad_norm": 0.44507086277008057, "learning_rate": 8.247254981699879e-06, "loss": 0.4165, "step": 4056 }, { "epoch": 0.41246441642944287, "grad_norm": 0.42016446590423584, "learning_rate": 8.249288328588857e-06, "loss": 0.3847, "step": 4057 }, { "epoch": 0.4125660837738918, "grad_norm": 0.47258883714675903, "learning_rate": 8.251321675477838e-06, "loss": 0.403, "step": 4058 }, { "epoch": 0.4126677511183408, "grad_norm": 0.4709123373031616, "learning_rate": 8.253355022366816e-06, "loss": 0.3985, "step": 4059 }, { "epoch": 0.41276941846278975, "grad_norm": 0.4966508448123932, "learning_rate": 8.255388369255795e-06, "loss": 0.4092, "step": 4060 }, { "epoch": 0.4128710858072387, "grad_norm": 0.49550214409828186, "learning_rate": 8.257421716144775e-06, "loss": 0.4147, "step": 4061 }, { "epoch": 0.4129727531516877, "grad_norm": 0.445316880941391, "learning_rate": 8.259455063033754e-06, "loss": 0.4195, "step": 4062 }, { "epoch": 0.41307442049613663, "grad_norm": 0.4320122301578522, "learning_rate": 8.261488409922732e-06, "loss": 0.4001, "step": 4063 }, { "epoch": 0.4131760878405856, "grad_norm": 0.4624965786933899, "learning_rate": 8.263521756811713e-06, "loss": 0.4437, "step": 4064 }, { "epoch": 0.41327775518503457, "grad_norm": 0.38027092814445496, "learning_rate": 8.265555103700691e-06, "loss": 0.3907, "step": 4065 }, { "epoch": 0.4133794225294835, "grad_norm": 0.4154663681983948, "learning_rate": 8.26758845058967e-06, "loss": 0.4433, "step": 4066 }, { "epoch": 0.4134810898739325, "grad_norm": 0.40291425585746765, "learning_rate": 8.26962179747865e-06, "loss": 0.4284, "step": 4067 }, { "epoch": 0.41358275721838145, "grad_norm": 0.4534384608268738, "learning_rate": 8.271655144367629e-06, "loss": 0.4219, "step": 4068 }, { "epoch": 0.41368442456283044, "grad_norm": 0.43032148480415344, "learning_rate": 8.27368849125661e-06, "loss": 0.4309, "step": 4069 }, { "epoch": 0.4137860919072794, "grad_norm": 0.3949974775314331, "learning_rate": 8.275721838145588e-06, "loss": 0.4083, "step": 4070 }, { "epoch": 0.4138877592517283, "grad_norm": 0.48628658056259155, "learning_rate": 8.277755185034567e-06, "loss": 0.3904, "step": 4071 }, { "epoch": 0.4139894265961773, "grad_norm": 0.44588765501976013, "learning_rate": 8.279788531923547e-06, "loss": 0.4153, "step": 4072 }, { "epoch": 0.41409109394062626, "grad_norm": 0.43461740016937256, "learning_rate": 8.281821878812526e-06, "loss": 0.3862, "step": 4073 }, { "epoch": 0.41419276128507526, "grad_norm": 0.42162537574768066, "learning_rate": 8.283855225701506e-06, "loss": 0.3992, "step": 4074 }, { "epoch": 0.4142944286295242, "grad_norm": 0.45785611867904663, "learning_rate": 8.285888572590485e-06, "loss": 0.3916, "step": 4075 }, { "epoch": 0.41439609597397314, "grad_norm": 0.42809629440307617, "learning_rate": 8.287921919479463e-06, "loss": 0.4316, "step": 4076 }, { "epoch": 0.41449776331842214, "grad_norm": 0.4609629809856415, "learning_rate": 8.289955266368444e-06, "loss": 0.44, "step": 4077 }, { "epoch": 0.4145994306628711, "grad_norm": 0.40368878841400146, "learning_rate": 8.291988613257422e-06, "loss": 0.3987, "step": 4078 }, { "epoch": 0.41470109800732, "grad_norm": 0.4483141601085663, "learning_rate": 8.294021960146401e-06, "loss": 0.4136, "step": 4079 }, { "epoch": 0.414802765351769, "grad_norm": 0.5319515466690063, "learning_rate": 8.296055307035381e-06, "loss": 0.4098, "step": 4080 }, { "epoch": 0.41490443269621796, "grad_norm": 0.4056693911552429, "learning_rate": 8.29808865392436e-06, "loss": 0.4422, "step": 4081 }, { "epoch": 0.41500610004066696, "grad_norm": 0.4392434358596802, "learning_rate": 8.30012200081334e-06, "loss": 0.4182, "step": 4082 }, { "epoch": 0.4151077673851159, "grad_norm": 0.5032119750976562, "learning_rate": 8.302155347702319e-06, "loss": 0.3809, "step": 4083 }, { "epoch": 0.41520943472956484, "grad_norm": 0.42433980107307434, "learning_rate": 8.304188694591298e-06, "loss": 0.4188, "step": 4084 }, { "epoch": 0.41531110207401384, "grad_norm": 0.42562317848205566, "learning_rate": 8.306222041480278e-06, "loss": 0.4284, "step": 4085 }, { "epoch": 0.4154127694184628, "grad_norm": 0.43421193957328796, "learning_rate": 8.308255388369256e-06, "loss": 0.3694, "step": 4086 }, { "epoch": 0.4155144367629118, "grad_norm": 0.42518049478530884, "learning_rate": 8.310288735258237e-06, "loss": 0.4041, "step": 4087 }, { "epoch": 0.4156161041073607, "grad_norm": 0.5238400101661682, "learning_rate": 8.312322082147215e-06, "loss": 0.4775, "step": 4088 }, { "epoch": 0.41571777145180966, "grad_norm": 0.39537307620048523, "learning_rate": 8.314355429036194e-06, "loss": 0.3815, "step": 4089 }, { "epoch": 0.41581943879625866, "grad_norm": 0.41777628660202026, "learning_rate": 8.316388775925174e-06, "loss": 0.4204, "step": 4090 }, { "epoch": 0.4159211061407076, "grad_norm": 0.4188205599784851, "learning_rate": 8.318422122814153e-06, "loss": 0.4207, "step": 4091 }, { "epoch": 0.4160227734851566, "grad_norm": 0.38996201753616333, "learning_rate": 8.320455469703132e-06, "loss": 0.4154, "step": 4092 }, { "epoch": 0.41612444082960554, "grad_norm": 0.40290242433547974, "learning_rate": 8.322488816592112e-06, "loss": 0.407, "step": 4093 }, { "epoch": 0.4162261081740545, "grad_norm": 0.48081740736961365, "learning_rate": 8.32452216348109e-06, "loss": 0.4326, "step": 4094 }, { "epoch": 0.4163277755185035, "grad_norm": 0.4150233268737793, "learning_rate": 8.32655551037007e-06, "loss": 0.4483, "step": 4095 }, { "epoch": 0.4164294428629524, "grad_norm": 0.40761443972587585, "learning_rate": 8.32858885725905e-06, "loss": 0.4072, "step": 4096 }, { "epoch": 0.41653111020740136, "grad_norm": 0.39423078298568726, "learning_rate": 8.330622204148028e-06, "loss": 0.4374, "step": 4097 }, { "epoch": 0.41663277755185035, "grad_norm": 0.4079265892505646, "learning_rate": 8.332655551037007e-06, "loss": 0.3864, "step": 4098 }, { "epoch": 0.4167344448962993, "grad_norm": 0.3874396085739136, "learning_rate": 8.334688897925987e-06, "loss": 0.4178, "step": 4099 }, { "epoch": 0.4168361122407483, "grad_norm": 0.41741815209388733, "learning_rate": 8.336722244814966e-06, "loss": 0.4158, "step": 4100 }, { "epoch": 0.41693777958519723, "grad_norm": 0.43157127499580383, "learning_rate": 8.338755591703945e-06, "loss": 0.4051, "step": 4101 }, { "epoch": 0.4170394469296462, "grad_norm": 0.38683241605758667, "learning_rate": 8.340788938592925e-06, "loss": 0.4171, "step": 4102 }, { "epoch": 0.41714111427409517, "grad_norm": 0.5098957419395447, "learning_rate": 8.342822285481904e-06, "loss": 0.4076, "step": 4103 }, { "epoch": 0.4172427816185441, "grad_norm": 0.4424784779548645, "learning_rate": 8.344855632370882e-06, "loss": 0.4197, "step": 4104 }, { "epoch": 0.4173444489629931, "grad_norm": 0.506943941116333, "learning_rate": 8.346888979259863e-06, "loss": 0.407, "step": 4105 }, { "epoch": 0.41744611630744205, "grad_norm": 0.4989506006240845, "learning_rate": 8.348922326148841e-06, "loss": 0.3981, "step": 4106 }, { "epoch": 0.417547783651891, "grad_norm": 0.39784297347068787, "learning_rate": 8.35095567303782e-06, "loss": 0.432, "step": 4107 }, { "epoch": 0.41764945099634, "grad_norm": 0.42307502031326294, "learning_rate": 8.3529890199268e-06, "loss": 0.4137, "step": 4108 }, { "epoch": 0.41775111834078893, "grad_norm": 0.41785600781440735, "learning_rate": 8.355022366815779e-06, "loss": 0.3915, "step": 4109 }, { "epoch": 0.4178527856852379, "grad_norm": 0.43750983476638794, "learning_rate": 8.357055713704759e-06, "loss": 0.43, "step": 4110 }, { "epoch": 0.41795445302968687, "grad_norm": 0.4824860095977783, "learning_rate": 8.359089060593738e-06, "loss": 0.3989, "step": 4111 }, { "epoch": 0.4180561203741358, "grad_norm": 0.42378827929496765, "learning_rate": 8.361122407482716e-06, "loss": 0.3987, "step": 4112 }, { "epoch": 0.4181577877185848, "grad_norm": 0.4317674934864044, "learning_rate": 8.363155754371697e-06, "loss": 0.3962, "step": 4113 }, { "epoch": 0.41825945506303375, "grad_norm": 0.3730716407299042, "learning_rate": 8.365189101260675e-06, "loss": 0.396, "step": 4114 }, { "epoch": 0.41836112240748274, "grad_norm": 0.4292691946029663, "learning_rate": 8.367222448149656e-06, "loss": 0.4054, "step": 4115 }, { "epoch": 0.4184627897519317, "grad_norm": 0.4277247190475464, "learning_rate": 8.369255795038634e-06, "loss": 0.3917, "step": 4116 }, { "epoch": 0.4185644570963806, "grad_norm": 0.3786420524120331, "learning_rate": 8.371289141927613e-06, "loss": 0.4002, "step": 4117 }, { "epoch": 0.4186661244408296, "grad_norm": 0.4198223650455475, "learning_rate": 8.373322488816593e-06, "loss": 0.4315, "step": 4118 }, { "epoch": 0.41876779178527856, "grad_norm": 0.4126356244087219, "learning_rate": 8.375355835705572e-06, "loss": 0.4199, "step": 4119 }, { "epoch": 0.4188694591297275, "grad_norm": 0.48433351516723633, "learning_rate": 8.37738918259455e-06, "loss": 0.3891, "step": 4120 }, { "epoch": 0.4189711264741765, "grad_norm": 0.3960392475128174, "learning_rate": 8.379422529483531e-06, "loss": 0.4189, "step": 4121 }, { "epoch": 0.41907279381862544, "grad_norm": 0.4665159285068512, "learning_rate": 8.38145587637251e-06, "loss": 0.416, "step": 4122 }, { "epoch": 0.41917446116307444, "grad_norm": 0.42673367261886597, "learning_rate": 8.38348922326149e-06, "loss": 0.3882, "step": 4123 }, { "epoch": 0.4192761285075234, "grad_norm": 0.4290841519832611, "learning_rate": 8.385522570150469e-06, "loss": 0.4162, "step": 4124 }, { "epoch": 0.4193777958519723, "grad_norm": 0.4715336263179779, "learning_rate": 8.387555917039447e-06, "loss": 0.385, "step": 4125 }, { "epoch": 0.4194794631964213, "grad_norm": 0.4416440427303314, "learning_rate": 8.389589263928428e-06, "loss": 0.4038, "step": 4126 }, { "epoch": 0.41958113054087026, "grad_norm": 0.4315390884876251, "learning_rate": 8.391622610817406e-06, "loss": 0.444, "step": 4127 }, { "epoch": 0.41968279788531926, "grad_norm": 0.44986096024513245, "learning_rate": 8.393655957706387e-06, "loss": 0.4064, "step": 4128 }, { "epoch": 0.4197844652297682, "grad_norm": 0.38531166315078735, "learning_rate": 8.395689304595365e-06, "loss": 0.4485, "step": 4129 }, { "epoch": 0.41988613257421714, "grad_norm": 0.42065221071243286, "learning_rate": 8.397722651484344e-06, "loss": 0.4076, "step": 4130 }, { "epoch": 0.41998779991866614, "grad_norm": 0.41724899411201477, "learning_rate": 8.399755998373324e-06, "loss": 0.4311, "step": 4131 }, { "epoch": 0.4200894672631151, "grad_norm": 0.47289854288101196, "learning_rate": 8.401789345262303e-06, "loss": 0.4361, "step": 4132 }, { "epoch": 0.4201911346075641, "grad_norm": 0.3787846267223358, "learning_rate": 8.403822692151281e-06, "loss": 0.437, "step": 4133 }, { "epoch": 0.420292801952013, "grad_norm": 0.4526967406272888, "learning_rate": 8.405856039040262e-06, "loss": 0.4157, "step": 4134 }, { "epoch": 0.42039446929646196, "grad_norm": 0.42520272731781006, "learning_rate": 8.40788938592924e-06, "loss": 0.4334, "step": 4135 }, { "epoch": 0.42049613664091096, "grad_norm": 0.38947880268096924, "learning_rate": 8.409922732818219e-06, "loss": 0.4481, "step": 4136 }, { "epoch": 0.4205978039853599, "grad_norm": 0.410427451133728, "learning_rate": 8.4119560797072e-06, "loss": 0.4156, "step": 4137 }, { "epoch": 0.42069947132980884, "grad_norm": 0.40107524394989014, "learning_rate": 8.413989426596178e-06, "loss": 0.3795, "step": 4138 }, { "epoch": 0.42080113867425784, "grad_norm": 0.3721499741077423, "learning_rate": 8.416022773485157e-06, "loss": 0.4057, "step": 4139 }, { "epoch": 0.4209028060187068, "grad_norm": 0.47799018025398254, "learning_rate": 8.418056120374137e-06, "loss": 0.4274, "step": 4140 }, { "epoch": 0.4210044733631558, "grad_norm": 0.38237446546554565, "learning_rate": 8.420089467263116e-06, "loss": 0.3695, "step": 4141 }, { "epoch": 0.4211061407076047, "grad_norm": 0.4342944622039795, "learning_rate": 8.422122814152094e-06, "loss": 0.3915, "step": 4142 }, { "epoch": 0.42120780805205366, "grad_norm": 0.4004397392272949, "learning_rate": 8.424156161041075e-06, "loss": 0.4027, "step": 4143 }, { "epoch": 0.42130947539650265, "grad_norm": 0.3848949372768402, "learning_rate": 8.426189507930053e-06, "loss": 0.3994, "step": 4144 }, { "epoch": 0.4214111427409516, "grad_norm": 0.46627384424209595, "learning_rate": 8.428222854819032e-06, "loss": 0.418, "step": 4145 }, { "epoch": 0.4215128100854006, "grad_norm": 0.3885558843612671, "learning_rate": 8.430256201708012e-06, "loss": 0.4132, "step": 4146 }, { "epoch": 0.42161447742984953, "grad_norm": 0.4092462658882141, "learning_rate": 8.432289548596991e-06, "loss": 0.3995, "step": 4147 }, { "epoch": 0.4217161447742985, "grad_norm": 0.4780239462852478, "learning_rate": 8.43432289548597e-06, "loss": 0.4121, "step": 4148 }, { "epoch": 0.42181781211874747, "grad_norm": 0.383004754781723, "learning_rate": 8.43635624237495e-06, "loss": 0.3918, "step": 4149 }, { "epoch": 0.4219194794631964, "grad_norm": 0.4077533781528473, "learning_rate": 8.438389589263928e-06, "loss": 0.4006, "step": 4150 }, { "epoch": 0.4220211468076454, "grad_norm": 0.43202877044677734, "learning_rate": 8.440422936152909e-06, "loss": 0.3972, "step": 4151 }, { "epoch": 0.42212281415209435, "grad_norm": 0.3867311477661133, "learning_rate": 8.442456283041887e-06, "loss": 0.4003, "step": 4152 }, { "epoch": 0.4222244814965433, "grad_norm": 0.4508208632469177, "learning_rate": 8.444489629930866e-06, "loss": 0.4117, "step": 4153 }, { "epoch": 0.4223261488409923, "grad_norm": 0.3675280809402466, "learning_rate": 8.446522976819846e-06, "loss": 0.4379, "step": 4154 }, { "epoch": 0.42242781618544123, "grad_norm": 0.3801334798336029, "learning_rate": 8.448556323708825e-06, "loss": 0.385, "step": 4155 }, { "epoch": 0.4225294835298902, "grad_norm": 0.4683784246444702, "learning_rate": 8.450589670597805e-06, "loss": 0.436, "step": 4156 }, { "epoch": 0.42263115087433917, "grad_norm": 0.3928670585155487, "learning_rate": 8.452623017486784e-06, "loss": 0.3988, "step": 4157 }, { "epoch": 0.4227328182187881, "grad_norm": 0.47424599528312683, "learning_rate": 8.454656364375763e-06, "loss": 0.4342, "step": 4158 }, { "epoch": 0.4228344855632371, "grad_norm": 0.404228150844574, "learning_rate": 8.456689711264743e-06, "loss": 0.3862, "step": 4159 }, { "epoch": 0.42293615290768605, "grad_norm": 0.4556891918182373, "learning_rate": 8.458723058153722e-06, "loss": 0.4443, "step": 4160 }, { "epoch": 0.423037820252135, "grad_norm": 0.4472939968109131, "learning_rate": 8.4607564050427e-06, "loss": 0.4362, "step": 4161 }, { "epoch": 0.423139487596584, "grad_norm": 0.3563627600669861, "learning_rate": 8.46278975193168e-06, "loss": 0.3914, "step": 4162 }, { "epoch": 0.4232411549410329, "grad_norm": 0.4628826975822449, "learning_rate": 8.46482309882066e-06, "loss": 0.4551, "step": 4163 }, { "epoch": 0.4233428222854819, "grad_norm": 0.4154564142227173, "learning_rate": 8.46685644570964e-06, "loss": 0.4199, "step": 4164 }, { "epoch": 0.42344448962993086, "grad_norm": 0.3867882490158081, "learning_rate": 8.468889792598618e-06, "loss": 0.4111, "step": 4165 }, { "epoch": 0.4235461569743798, "grad_norm": 0.4224516749382019, "learning_rate": 8.470923139487597e-06, "loss": 0.4011, "step": 4166 }, { "epoch": 0.4236478243188288, "grad_norm": 0.43931400775909424, "learning_rate": 8.472956486376577e-06, "loss": 0.3996, "step": 4167 }, { "epoch": 0.42374949166327774, "grad_norm": 0.3881435692310333, "learning_rate": 8.474989833265556e-06, "loss": 0.4214, "step": 4168 }, { "epoch": 0.42385115900772674, "grad_norm": 0.38645654916763306, "learning_rate": 8.477023180154536e-06, "loss": 0.4106, "step": 4169 }, { "epoch": 0.4239528263521757, "grad_norm": 0.42132964730262756, "learning_rate": 8.479056527043515e-06, "loss": 0.4154, "step": 4170 }, { "epoch": 0.4240544936966246, "grad_norm": 0.4465177059173584, "learning_rate": 8.481089873932493e-06, "loss": 0.4168, "step": 4171 }, { "epoch": 0.4241561610410736, "grad_norm": 0.42878273129463196, "learning_rate": 8.483123220821474e-06, "loss": 0.4128, "step": 4172 }, { "epoch": 0.42425782838552256, "grad_norm": 0.4115605652332306, "learning_rate": 8.485156567710452e-06, "loss": 0.3948, "step": 4173 }, { "epoch": 0.42435949572997156, "grad_norm": 0.4086511731147766, "learning_rate": 8.487189914599431e-06, "loss": 0.3631, "step": 4174 }, { "epoch": 0.4244611630744205, "grad_norm": 0.38176894187927246, "learning_rate": 8.489223261488411e-06, "loss": 0.425, "step": 4175 }, { "epoch": 0.42456283041886944, "grad_norm": 0.4411890208721161, "learning_rate": 8.49125660837739e-06, "loss": 0.4287, "step": 4176 }, { "epoch": 0.42466449776331844, "grad_norm": 0.3878045380115509, "learning_rate": 8.493289955266369e-06, "loss": 0.4311, "step": 4177 }, { "epoch": 0.4247661651077674, "grad_norm": 0.4163363575935364, "learning_rate": 8.495323302155349e-06, "loss": 0.4372, "step": 4178 }, { "epoch": 0.4248678324522163, "grad_norm": 0.39572420716285706, "learning_rate": 8.497356649044328e-06, "loss": 0.3958, "step": 4179 }, { "epoch": 0.4249694997966653, "grad_norm": 0.3905032277107239, "learning_rate": 8.499389995933306e-06, "loss": 0.4117, "step": 4180 }, { "epoch": 0.42507116714111426, "grad_norm": 0.4065778851509094, "learning_rate": 8.501423342822287e-06, "loss": 0.4116, "step": 4181 }, { "epoch": 0.42517283448556326, "grad_norm": 0.460180401802063, "learning_rate": 8.503456689711265e-06, "loss": 0.4091, "step": 4182 }, { "epoch": 0.4252745018300122, "grad_norm": 0.4059653878211975, "learning_rate": 8.505490036600244e-06, "loss": 0.4343, "step": 4183 }, { "epoch": 0.42537616917446114, "grad_norm": 0.385658323764801, "learning_rate": 8.507523383489224e-06, "loss": 0.3961, "step": 4184 }, { "epoch": 0.42547783651891014, "grad_norm": 0.4959695637226105, "learning_rate": 8.509556730378203e-06, "loss": 0.4519, "step": 4185 }, { "epoch": 0.4255795038633591, "grad_norm": 0.43432164192199707, "learning_rate": 8.511590077267182e-06, "loss": 0.3889, "step": 4186 }, { "epoch": 0.4256811712078081, "grad_norm": 0.3960930407047272, "learning_rate": 8.513623424156162e-06, "loss": 0.3912, "step": 4187 }, { "epoch": 0.425782838552257, "grad_norm": 0.41097208857536316, "learning_rate": 8.51565677104514e-06, "loss": 0.4179, "step": 4188 }, { "epoch": 0.42588450589670596, "grad_norm": 0.46144524216651917, "learning_rate": 8.51769011793412e-06, "loss": 0.3812, "step": 4189 }, { "epoch": 0.42598617324115495, "grad_norm": 0.409523606300354, "learning_rate": 8.5197234648231e-06, "loss": 0.396, "step": 4190 }, { "epoch": 0.4260878405856039, "grad_norm": 0.4139784872531891, "learning_rate": 8.521756811712078e-06, "loss": 0.4085, "step": 4191 }, { "epoch": 0.4261895079300529, "grad_norm": 0.4263532757759094, "learning_rate": 8.523790158601059e-06, "loss": 0.3978, "step": 4192 }, { "epoch": 0.42629117527450183, "grad_norm": 0.44849488139152527, "learning_rate": 8.525823505490037e-06, "loss": 0.3672, "step": 4193 }, { "epoch": 0.4263928426189508, "grad_norm": 0.4623531997203827, "learning_rate": 8.527856852379016e-06, "loss": 0.4397, "step": 4194 }, { "epoch": 0.42649450996339977, "grad_norm": 0.43585625290870667, "learning_rate": 8.529890199267996e-06, "loss": 0.4355, "step": 4195 }, { "epoch": 0.4265961773078487, "grad_norm": 0.3968276381492615, "learning_rate": 8.531923546156975e-06, "loss": 0.4222, "step": 4196 }, { "epoch": 0.4266978446522977, "grad_norm": 0.4421926438808441, "learning_rate": 8.533956893045955e-06, "loss": 0.4379, "step": 4197 }, { "epoch": 0.42679951199674665, "grad_norm": 0.39210647344589233, "learning_rate": 8.535990239934934e-06, "loss": 0.4024, "step": 4198 }, { "epoch": 0.4269011793411956, "grad_norm": 0.3818303942680359, "learning_rate": 8.538023586823912e-06, "loss": 0.382, "step": 4199 }, { "epoch": 0.4270028466856446, "grad_norm": 0.4360332787036896, "learning_rate": 8.540056933712893e-06, "loss": 0.3829, "step": 4200 }, { "epoch": 0.42710451403009353, "grad_norm": 0.40036046504974365, "learning_rate": 8.542090280601871e-06, "loss": 0.4153, "step": 4201 }, { "epoch": 0.42720618137454247, "grad_norm": 0.4769487679004669, "learning_rate": 8.54412362749085e-06, "loss": 0.4205, "step": 4202 }, { "epoch": 0.42730784871899147, "grad_norm": 0.44411662220954895, "learning_rate": 8.54615697437983e-06, "loss": 0.408, "step": 4203 }, { "epoch": 0.4274095160634404, "grad_norm": 0.3791043162345886, "learning_rate": 8.548190321268809e-06, "loss": 0.3923, "step": 4204 }, { "epoch": 0.4275111834078894, "grad_norm": 0.41090092062950134, "learning_rate": 8.55022366815779e-06, "loss": 0.4011, "step": 4205 }, { "epoch": 0.42761285075233835, "grad_norm": 0.37021172046661377, "learning_rate": 8.552257015046768e-06, "loss": 0.457, "step": 4206 }, { "epoch": 0.4277145180967873, "grad_norm": 0.43818506598472595, "learning_rate": 8.554290361935747e-06, "loss": 0.4293, "step": 4207 }, { "epoch": 0.4278161854412363, "grad_norm": 0.432158499956131, "learning_rate": 8.556323708824727e-06, "loss": 0.4217, "step": 4208 }, { "epoch": 0.4279178527856852, "grad_norm": 0.381268709897995, "learning_rate": 8.558357055713706e-06, "loss": 0.4538, "step": 4209 }, { "epoch": 0.4280195201301342, "grad_norm": 0.4639776349067688, "learning_rate": 8.560390402602686e-06, "loss": 0.4405, "step": 4210 }, { "epoch": 0.42812118747458316, "grad_norm": 0.39437630772590637, "learning_rate": 8.562423749491665e-06, "loss": 0.4329, "step": 4211 }, { "epoch": 0.4282228548190321, "grad_norm": 0.41924363374710083, "learning_rate": 8.564457096380643e-06, "loss": 0.4219, "step": 4212 }, { "epoch": 0.4283245221634811, "grad_norm": 0.46260857582092285, "learning_rate": 8.566490443269624e-06, "loss": 0.4246, "step": 4213 }, { "epoch": 0.42842618950793004, "grad_norm": 0.40488794445991516, "learning_rate": 8.568523790158602e-06, "loss": 0.3981, "step": 4214 }, { "epoch": 0.42852785685237904, "grad_norm": 0.4605223536491394, "learning_rate": 8.57055713704758e-06, "loss": 0.4223, "step": 4215 }, { "epoch": 0.428629524196828, "grad_norm": 0.4525442123413086, "learning_rate": 8.572590483936561e-06, "loss": 0.381, "step": 4216 }, { "epoch": 0.4287311915412769, "grad_norm": 0.4136761426925659, "learning_rate": 8.57462383082554e-06, "loss": 0.433, "step": 4217 }, { "epoch": 0.4288328588857259, "grad_norm": 0.46498551964759827, "learning_rate": 8.576657177714518e-06, "loss": 0.4164, "step": 4218 }, { "epoch": 0.42893452623017486, "grad_norm": 0.5069331526756287, "learning_rate": 8.578690524603499e-06, "loss": 0.422, "step": 4219 }, { "epoch": 0.4290361935746238, "grad_norm": 0.3905940651893616, "learning_rate": 8.580723871492477e-06, "loss": 0.4033, "step": 4220 }, { "epoch": 0.4291378609190728, "grad_norm": 0.4592786431312561, "learning_rate": 8.582757218381456e-06, "loss": 0.4315, "step": 4221 }, { "epoch": 0.42923952826352174, "grad_norm": 0.3962712287902832, "learning_rate": 8.584790565270436e-06, "loss": 0.4139, "step": 4222 }, { "epoch": 0.42934119560797074, "grad_norm": 0.38768014311790466, "learning_rate": 8.586823912159415e-06, "loss": 0.4429, "step": 4223 }, { "epoch": 0.4294428629524197, "grad_norm": 0.4275583326816559, "learning_rate": 8.588857259048394e-06, "loss": 0.4108, "step": 4224 }, { "epoch": 0.4295445302968686, "grad_norm": 0.4015158712863922, "learning_rate": 8.590890605937372e-06, "loss": 0.3859, "step": 4225 }, { "epoch": 0.4296461976413176, "grad_norm": 0.43839654326438904, "learning_rate": 8.592923952826353e-06, "loss": 0.4185, "step": 4226 }, { "epoch": 0.42974786498576656, "grad_norm": 0.35599252581596375, "learning_rate": 8.594957299715331e-06, "loss": 0.3659, "step": 4227 }, { "epoch": 0.42984953233021556, "grad_norm": 0.44176092743873596, "learning_rate": 8.596990646604312e-06, "loss": 0.4367, "step": 4228 }, { "epoch": 0.4299511996746645, "grad_norm": 0.439820259809494, "learning_rate": 8.59902399349329e-06, "loss": 0.4169, "step": 4229 }, { "epoch": 0.43005286701911344, "grad_norm": 0.3885100483894348, "learning_rate": 8.601057340382269e-06, "loss": 0.4061, "step": 4230 }, { "epoch": 0.43015453436356244, "grad_norm": 0.4274640381336212, "learning_rate": 8.60309068727125e-06, "loss": 0.3975, "step": 4231 }, { "epoch": 0.4302562017080114, "grad_norm": 0.4156263470649719, "learning_rate": 8.605124034160228e-06, "loss": 0.4205, "step": 4232 }, { "epoch": 0.4303578690524604, "grad_norm": 0.37172630429267883, "learning_rate": 8.607157381049208e-06, "loss": 0.3805, "step": 4233 }, { "epoch": 0.4304595363969093, "grad_norm": 0.4337717294692993, "learning_rate": 8.609190727938187e-06, "loss": 0.4247, "step": 4234 }, { "epoch": 0.43056120374135826, "grad_norm": 0.41940486431121826, "learning_rate": 8.611224074827165e-06, "loss": 0.3681, "step": 4235 }, { "epoch": 0.43066287108580725, "grad_norm": 0.42593055963516235, "learning_rate": 8.613257421716146e-06, "loss": 0.4112, "step": 4236 }, { "epoch": 0.4307645384302562, "grad_norm": 0.46999236941337585, "learning_rate": 8.615290768605124e-06, "loss": 0.4499, "step": 4237 }, { "epoch": 0.4308662057747052, "grad_norm": 0.44163239002227783, "learning_rate": 8.617324115494105e-06, "loss": 0.4291, "step": 4238 }, { "epoch": 0.43096787311915413, "grad_norm": 0.4829721748828888, "learning_rate": 8.619357462383083e-06, "loss": 0.4369, "step": 4239 }, { "epoch": 0.4310695404636031, "grad_norm": 0.42347580194473267, "learning_rate": 8.621390809272062e-06, "loss": 0.4405, "step": 4240 }, { "epoch": 0.43117120780805207, "grad_norm": 0.40194979310035706, "learning_rate": 8.623424156161042e-06, "loss": 0.3873, "step": 4241 }, { "epoch": 0.431272875152501, "grad_norm": 0.4541454017162323, "learning_rate": 8.625457503050021e-06, "loss": 0.406, "step": 4242 }, { "epoch": 0.43137454249694995, "grad_norm": 0.3880855441093445, "learning_rate": 8.627490849939e-06, "loss": 0.3818, "step": 4243 }, { "epoch": 0.43147620984139895, "grad_norm": 0.40132853388786316, "learning_rate": 8.62952419682798e-06, "loss": 0.3876, "step": 4244 }, { "epoch": 0.4315778771858479, "grad_norm": 0.4134054481983185, "learning_rate": 8.631557543716959e-06, "loss": 0.4072, "step": 4245 }, { "epoch": 0.4316795445302969, "grad_norm": 0.4103614091873169, "learning_rate": 8.633590890605939e-06, "loss": 0.4294, "step": 4246 }, { "epoch": 0.43178121187474583, "grad_norm": 0.4284619987010956, "learning_rate": 8.635624237494918e-06, "loss": 0.4124, "step": 4247 }, { "epoch": 0.43188287921919477, "grad_norm": 0.4004119634628296, "learning_rate": 8.637657584383896e-06, "loss": 0.3918, "step": 4248 }, { "epoch": 0.43198454656364377, "grad_norm": 0.4322206676006317, "learning_rate": 8.639690931272877e-06, "loss": 0.4333, "step": 4249 }, { "epoch": 0.4320862139080927, "grad_norm": 0.39099282026290894, "learning_rate": 8.641724278161855e-06, "loss": 0.3672, "step": 4250 }, { "epoch": 0.4321878812525417, "grad_norm": 0.4248085916042328, "learning_rate": 8.643757625050836e-06, "loss": 0.3984, "step": 4251 }, { "epoch": 0.43228954859699065, "grad_norm": 0.4335343539714813, "learning_rate": 8.645790971939814e-06, "loss": 0.4232, "step": 4252 }, { "epoch": 0.4323912159414396, "grad_norm": 0.3575901985168457, "learning_rate": 8.647824318828793e-06, "loss": 0.3948, "step": 4253 }, { "epoch": 0.4324928832858886, "grad_norm": 0.4222557544708252, "learning_rate": 8.649857665717773e-06, "loss": 0.3892, "step": 4254 }, { "epoch": 0.4325945506303375, "grad_norm": 0.39687249064445496, "learning_rate": 8.651891012606752e-06, "loss": 0.4033, "step": 4255 }, { "epoch": 0.4326962179747865, "grad_norm": 0.3656511902809143, "learning_rate": 8.65392435949573e-06, "loss": 0.4288, "step": 4256 }, { "epoch": 0.43279788531923546, "grad_norm": 0.4219997227191925, "learning_rate": 8.65595770638471e-06, "loss": 0.4109, "step": 4257 }, { "epoch": 0.4328995526636844, "grad_norm": 0.3855822682380676, "learning_rate": 8.65799105327369e-06, "loss": 0.4152, "step": 4258 }, { "epoch": 0.4330012200081334, "grad_norm": 0.4625876247882843, "learning_rate": 8.660024400162668e-06, "loss": 0.4071, "step": 4259 }, { "epoch": 0.43310288735258234, "grad_norm": 0.3614194691181183, "learning_rate": 8.662057747051647e-06, "loss": 0.4135, "step": 4260 }, { "epoch": 0.43320455469703134, "grad_norm": 0.486120343208313, "learning_rate": 8.664091093940627e-06, "loss": 0.4308, "step": 4261 }, { "epoch": 0.4333062220414803, "grad_norm": 0.4541449546813965, "learning_rate": 8.666124440829606e-06, "loss": 0.4125, "step": 4262 }, { "epoch": 0.4334078893859292, "grad_norm": 0.4492911696434021, "learning_rate": 8.668157787718584e-06, "loss": 0.44, "step": 4263 }, { "epoch": 0.4335095567303782, "grad_norm": 0.3746497333049774, "learning_rate": 8.670191134607565e-06, "loss": 0.3944, "step": 4264 }, { "epoch": 0.43361122407482716, "grad_norm": 0.4532431960105896, "learning_rate": 8.672224481496543e-06, "loss": 0.4331, "step": 4265 }, { "epoch": 0.4337128914192761, "grad_norm": 0.4061166048049927, "learning_rate": 8.674257828385522e-06, "loss": 0.4057, "step": 4266 }, { "epoch": 0.4338145587637251, "grad_norm": 0.3777625560760498, "learning_rate": 8.676291175274502e-06, "loss": 0.4057, "step": 4267 }, { "epoch": 0.43391622610817404, "grad_norm": 0.38156577944755554, "learning_rate": 8.678324522163481e-06, "loss": 0.4076, "step": 4268 }, { "epoch": 0.43401789345262304, "grad_norm": 0.40842121839523315, "learning_rate": 8.680357869052461e-06, "loss": 0.4013, "step": 4269 }, { "epoch": 0.434119560797072, "grad_norm": 0.502001941204071, "learning_rate": 8.68239121594144e-06, "loss": 0.4208, "step": 4270 }, { "epoch": 0.4342212281415209, "grad_norm": 0.4038139879703522, "learning_rate": 8.684424562830419e-06, "loss": 0.4342, "step": 4271 }, { "epoch": 0.4343228954859699, "grad_norm": 0.4598295986652374, "learning_rate": 8.686457909719399e-06, "loss": 0.3959, "step": 4272 }, { "epoch": 0.43442456283041886, "grad_norm": 0.42931973934173584, "learning_rate": 8.688491256608378e-06, "loss": 0.3891, "step": 4273 }, { "epoch": 0.43452623017486786, "grad_norm": 0.41544514894485474, "learning_rate": 8.690524603497358e-06, "loss": 0.4032, "step": 4274 }, { "epoch": 0.4346278975193168, "grad_norm": 0.38176271319389343, "learning_rate": 8.692557950386337e-06, "loss": 0.3821, "step": 4275 }, { "epoch": 0.43472956486376574, "grad_norm": 0.4501520097255707, "learning_rate": 8.694591297275315e-06, "loss": 0.4283, "step": 4276 }, { "epoch": 0.43483123220821474, "grad_norm": 0.4469592869281769, "learning_rate": 8.696624644164296e-06, "loss": 0.3723, "step": 4277 }, { "epoch": 0.4349328995526637, "grad_norm": 0.4447523057460785, "learning_rate": 8.698657991053274e-06, "loss": 0.4373, "step": 4278 }, { "epoch": 0.4350345668971127, "grad_norm": 0.39153528213500977, "learning_rate": 8.700691337942254e-06, "loss": 0.4089, "step": 4279 }, { "epoch": 0.4351362342415616, "grad_norm": 0.528069794178009, "learning_rate": 8.702724684831233e-06, "loss": 0.3832, "step": 4280 }, { "epoch": 0.43523790158601056, "grad_norm": 0.4099902808666229, "learning_rate": 8.704758031720212e-06, "loss": 0.397, "step": 4281 }, { "epoch": 0.43533956893045955, "grad_norm": 0.534845232963562, "learning_rate": 8.706791378609192e-06, "loss": 0.4164, "step": 4282 }, { "epoch": 0.4354412362749085, "grad_norm": 0.46255120635032654, "learning_rate": 8.70882472549817e-06, "loss": 0.3623, "step": 4283 }, { "epoch": 0.43554290361935744, "grad_norm": 0.4395354688167572, "learning_rate": 8.71085807238715e-06, "loss": 0.4113, "step": 4284 }, { "epoch": 0.43564457096380643, "grad_norm": 0.48965945839881897, "learning_rate": 8.71289141927613e-06, "loss": 0.4434, "step": 4285 }, { "epoch": 0.4357462383082554, "grad_norm": 0.4871777296066284, "learning_rate": 8.714924766165108e-06, "loss": 0.3838, "step": 4286 }, { "epoch": 0.43584790565270437, "grad_norm": 0.5176495909690857, "learning_rate": 8.716958113054089e-06, "loss": 0.3982, "step": 4287 }, { "epoch": 0.4359495729971533, "grad_norm": 0.4917154908180237, "learning_rate": 8.718991459943067e-06, "loss": 0.4365, "step": 4288 }, { "epoch": 0.43605124034160225, "grad_norm": 0.47922560572624207, "learning_rate": 8.721024806832046e-06, "loss": 0.4072, "step": 4289 }, { "epoch": 0.43615290768605125, "grad_norm": 0.4569913148880005, "learning_rate": 8.723058153721026e-06, "loss": 0.4247, "step": 4290 }, { "epoch": 0.4362545750305002, "grad_norm": 0.39836910367012024, "learning_rate": 8.725091500610005e-06, "loss": 0.4214, "step": 4291 }, { "epoch": 0.4363562423749492, "grad_norm": 0.498723566532135, "learning_rate": 8.727124847498985e-06, "loss": 0.4308, "step": 4292 }, { "epoch": 0.43645790971939813, "grad_norm": 0.3869371712207794, "learning_rate": 8.729158194387964e-06, "loss": 0.3917, "step": 4293 }, { "epoch": 0.43655957706384707, "grad_norm": 0.3935408592224121, "learning_rate": 8.731191541276943e-06, "loss": 0.4215, "step": 4294 }, { "epoch": 0.43666124440829607, "grad_norm": 0.4323374330997467, "learning_rate": 8.733224888165923e-06, "loss": 0.3873, "step": 4295 }, { "epoch": 0.436762911752745, "grad_norm": 0.4309406876564026, "learning_rate": 8.735258235054902e-06, "loss": 0.4343, "step": 4296 }, { "epoch": 0.436864579097194, "grad_norm": 0.4022676944732666, "learning_rate": 8.73729158194388e-06, "loss": 0.4081, "step": 4297 }, { "epoch": 0.43696624644164295, "grad_norm": 0.4574071168899536, "learning_rate": 8.739324928832859e-06, "loss": 0.4025, "step": 4298 }, { "epoch": 0.4370679137860919, "grad_norm": 0.39119622111320496, "learning_rate": 8.74135827572184e-06, "loss": 0.4196, "step": 4299 }, { "epoch": 0.4371695811305409, "grad_norm": 0.43061450123786926, "learning_rate": 8.743391622610818e-06, "loss": 0.3956, "step": 4300 }, { "epoch": 0.4372712484749898, "grad_norm": 0.43576207756996155, "learning_rate": 8.745424969499796e-06, "loss": 0.4134, "step": 4301 }, { "epoch": 0.4373729158194388, "grad_norm": 0.41362500190734863, "learning_rate": 8.747458316388777e-06, "loss": 0.423, "step": 4302 }, { "epoch": 0.43747458316388776, "grad_norm": 0.3936476409435272, "learning_rate": 8.749491663277755e-06, "loss": 0.38, "step": 4303 }, { "epoch": 0.4375762505083367, "grad_norm": 0.43055370450019836, "learning_rate": 8.751525010166734e-06, "loss": 0.4319, "step": 4304 }, { "epoch": 0.4376779178527857, "grad_norm": 0.45866450667381287, "learning_rate": 8.753558357055714e-06, "loss": 0.4176, "step": 4305 }, { "epoch": 0.43777958519723464, "grad_norm": 0.43680357933044434, "learning_rate": 8.755591703944693e-06, "loss": 0.4069, "step": 4306 }, { "epoch": 0.4378812525416836, "grad_norm": 0.3838006258010864, "learning_rate": 8.757625050833672e-06, "loss": 0.397, "step": 4307 }, { "epoch": 0.4379829198861326, "grad_norm": 0.45346662402153015, "learning_rate": 8.759658397722652e-06, "loss": 0.4239, "step": 4308 }, { "epoch": 0.4380845872305815, "grad_norm": 0.42657315731048584, "learning_rate": 8.76169174461163e-06, "loss": 0.3904, "step": 4309 }, { "epoch": 0.4381862545750305, "grad_norm": 0.4299294054508209, "learning_rate": 8.763725091500611e-06, "loss": 0.4441, "step": 4310 }, { "epoch": 0.43828792191947946, "grad_norm": 0.4082796573638916, "learning_rate": 8.76575843838959e-06, "loss": 0.3823, "step": 4311 }, { "epoch": 0.4383895892639284, "grad_norm": 0.43198123574256897, "learning_rate": 8.767791785278568e-06, "loss": 0.4251, "step": 4312 }, { "epoch": 0.4384912566083774, "grad_norm": 0.3703377842903137, "learning_rate": 8.769825132167549e-06, "loss": 0.4087, "step": 4313 }, { "epoch": 0.43859292395282634, "grad_norm": 0.4223143756389618, "learning_rate": 8.771858479056527e-06, "loss": 0.4316, "step": 4314 }, { "epoch": 0.43869459129727534, "grad_norm": 0.3513493537902832, "learning_rate": 8.773891825945508e-06, "loss": 0.3598, "step": 4315 }, { "epoch": 0.4387962586417243, "grad_norm": 0.38789132237434387, "learning_rate": 8.775925172834486e-06, "loss": 0.4506, "step": 4316 }, { "epoch": 0.4388979259861732, "grad_norm": 0.40585842728614807, "learning_rate": 8.777958519723465e-06, "loss": 0.4472, "step": 4317 }, { "epoch": 0.4389995933306222, "grad_norm": 0.383289635181427, "learning_rate": 8.779991866612445e-06, "loss": 0.4017, "step": 4318 }, { "epoch": 0.43910126067507116, "grad_norm": 0.40474945306777954, "learning_rate": 8.782025213501424e-06, "loss": 0.4077, "step": 4319 }, { "epoch": 0.43920292801952016, "grad_norm": 0.371467649936676, "learning_rate": 8.784058560390404e-06, "loss": 0.4089, "step": 4320 }, { "epoch": 0.4393045953639691, "grad_norm": 0.3669441342353821, "learning_rate": 8.786091907279383e-06, "loss": 0.4232, "step": 4321 }, { "epoch": 0.43940626270841804, "grad_norm": 0.4349300265312195, "learning_rate": 8.788125254168361e-06, "loss": 0.4235, "step": 4322 }, { "epoch": 0.43950793005286704, "grad_norm": 0.4457842707633972, "learning_rate": 8.790158601057342e-06, "loss": 0.4424, "step": 4323 }, { "epoch": 0.439609597397316, "grad_norm": 0.44581183791160583, "learning_rate": 8.79219194794632e-06, "loss": 0.3965, "step": 4324 }, { "epoch": 0.4397112647417649, "grad_norm": 0.39780867099761963, "learning_rate": 8.794225294835299e-06, "loss": 0.4169, "step": 4325 }, { "epoch": 0.4398129320862139, "grad_norm": 0.4400380253791809, "learning_rate": 8.79625864172428e-06, "loss": 0.4226, "step": 4326 }, { "epoch": 0.43991459943066286, "grad_norm": 0.4632127583026886, "learning_rate": 8.798291988613258e-06, "loss": 0.4071, "step": 4327 }, { "epoch": 0.44001626677511185, "grad_norm": 0.3870084583759308, "learning_rate": 8.800325335502238e-06, "loss": 0.423, "step": 4328 }, { "epoch": 0.4401179341195608, "grad_norm": 0.4593184292316437, "learning_rate": 8.802358682391217e-06, "loss": 0.4355, "step": 4329 }, { "epoch": 0.44021960146400974, "grad_norm": 0.42049142718315125, "learning_rate": 8.804392029280196e-06, "loss": 0.4351, "step": 4330 }, { "epoch": 0.44032126880845873, "grad_norm": 0.4488576054573059, "learning_rate": 8.806425376169176e-06, "loss": 0.4212, "step": 4331 }, { "epoch": 0.4404229361529077, "grad_norm": 0.45202404260635376, "learning_rate": 8.808458723058155e-06, "loss": 0.425, "step": 4332 }, { "epoch": 0.44052460349735667, "grad_norm": 0.37506574392318726, "learning_rate": 8.810492069947133e-06, "loss": 0.3708, "step": 4333 }, { "epoch": 0.4406262708418056, "grad_norm": 0.4227878749370575, "learning_rate": 8.812525416836114e-06, "loss": 0.4143, "step": 4334 }, { "epoch": 0.44072793818625455, "grad_norm": 0.38049957156181335, "learning_rate": 8.814558763725092e-06, "loss": 0.4211, "step": 4335 }, { "epoch": 0.44082960553070355, "grad_norm": 0.4164814054965973, "learning_rate": 8.816592110614071e-06, "loss": 0.4154, "step": 4336 }, { "epoch": 0.4409312728751525, "grad_norm": 0.39380860328674316, "learning_rate": 8.818625457503051e-06, "loss": 0.4087, "step": 4337 }, { "epoch": 0.4410329402196015, "grad_norm": 0.40346983075141907, "learning_rate": 8.82065880439203e-06, "loss": 0.4063, "step": 4338 }, { "epoch": 0.44113460756405043, "grad_norm": 0.38568490743637085, "learning_rate": 8.822692151281009e-06, "loss": 0.3955, "step": 4339 }, { "epoch": 0.44123627490849937, "grad_norm": 0.39013242721557617, "learning_rate": 8.824725498169989e-06, "loss": 0.414, "step": 4340 }, { "epoch": 0.44133794225294837, "grad_norm": 0.37903812527656555, "learning_rate": 8.826758845058967e-06, "loss": 0.407, "step": 4341 }, { "epoch": 0.4414396095973973, "grad_norm": 0.37376418709754944, "learning_rate": 8.828792191947946e-06, "loss": 0.4088, "step": 4342 }, { "epoch": 0.4415412769418463, "grad_norm": 0.3877808749675751, "learning_rate": 8.830825538836926e-06, "loss": 0.4151, "step": 4343 }, { "epoch": 0.44164294428629525, "grad_norm": 0.38603681325912476, "learning_rate": 8.832858885725905e-06, "loss": 0.3846, "step": 4344 }, { "epoch": 0.4417446116307442, "grad_norm": 0.3705098628997803, "learning_rate": 8.834892232614884e-06, "loss": 0.4174, "step": 4345 }, { "epoch": 0.4418462789751932, "grad_norm": 0.37108877301216125, "learning_rate": 8.836925579503864e-06, "loss": 0.4592, "step": 4346 }, { "epoch": 0.4419479463196421, "grad_norm": 0.3842647075653076, "learning_rate": 8.838958926392843e-06, "loss": 0.4437, "step": 4347 }, { "epoch": 0.44204961366409107, "grad_norm": 0.4043777287006378, "learning_rate": 8.840992273281821e-06, "loss": 0.415, "step": 4348 }, { "epoch": 0.44215128100854006, "grad_norm": 0.41248032450675964, "learning_rate": 8.843025620170802e-06, "loss": 0.3962, "step": 4349 }, { "epoch": 0.442252948352989, "grad_norm": 0.3731939196586609, "learning_rate": 8.84505896705978e-06, "loss": 0.4235, "step": 4350 }, { "epoch": 0.442354615697438, "grad_norm": 0.37188220024108887, "learning_rate": 8.84709231394876e-06, "loss": 0.3729, "step": 4351 }, { "epoch": 0.44245628304188694, "grad_norm": 0.3761517405509949, "learning_rate": 8.84912566083774e-06, "loss": 0.3744, "step": 4352 }, { "epoch": 0.4425579503863359, "grad_norm": 0.3713599741458893, "learning_rate": 8.851159007726718e-06, "loss": 0.3819, "step": 4353 }, { "epoch": 0.4426596177307849, "grad_norm": 0.38808563351631165, "learning_rate": 8.853192354615698e-06, "loss": 0.4153, "step": 4354 }, { "epoch": 0.4427612850752338, "grad_norm": 0.39160796999931335, "learning_rate": 8.855225701504677e-06, "loss": 0.3836, "step": 4355 }, { "epoch": 0.4428629524196828, "grad_norm": 0.37135398387908936, "learning_rate": 8.857259048393657e-06, "loss": 0.4069, "step": 4356 }, { "epoch": 0.44296461976413176, "grad_norm": 0.37684574723243713, "learning_rate": 8.859292395282636e-06, "loss": 0.3842, "step": 4357 }, { "epoch": 0.4430662871085807, "grad_norm": 0.35735952854156494, "learning_rate": 8.861325742171615e-06, "loss": 0.3865, "step": 4358 }, { "epoch": 0.4431679544530297, "grad_norm": 0.3677223324775696, "learning_rate": 8.863359089060595e-06, "loss": 0.4094, "step": 4359 }, { "epoch": 0.44326962179747864, "grad_norm": 0.3911505341529846, "learning_rate": 8.865392435949574e-06, "loss": 0.4308, "step": 4360 }, { "epoch": 0.44337128914192764, "grad_norm": 0.3721427619457245, "learning_rate": 8.867425782838554e-06, "loss": 0.421, "step": 4361 }, { "epoch": 0.4434729564863766, "grad_norm": 0.42106083035469055, "learning_rate": 8.869459129727533e-06, "loss": 0.3887, "step": 4362 }, { "epoch": 0.4435746238308255, "grad_norm": 0.36645224690437317, "learning_rate": 8.871492476616511e-06, "loss": 0.3751, "step": 4363 }, { "epoch": 0.4436762911752745, "grad_norm": 0.403454065322876, "learning_rate": 8.873525823505491e-06, "loss": 0.4167, "step": 4364 }, { "epoch": 0.44377795851972346, "grad_norm": 0.3694010078907013, "learning_rate": 8.87555917039447e-06, "loss": 0.3849, "step": 4365 }, { "epoch": 0.4438796258641724, "grad_norm": 0.3987885117530823, "learning_rate": 8.877592517283449e-06, "loss": 0.4072, "step": 4366 }, { "epoch": 0.4439812932086214, "grad_norm": 0.3812457025051117, "learning_rate": 8.879625864172429e-06, "loss": 0.4379, "step": 4367 }, { "epoch": 0.44408296055307034, "grad_norm": 0.4071159064769745, "learning_rate": 8.881659211061408e-06, "loss": 0.4045, "step": 4368 }, { "epoch": 0.44418462789751934, "grad_norm": 0.3604007363319397, "learning_rate": 8.883692557950388e-06, "loss": 0.3756, "step": 4369 }, { "epoch": 0.4442862952419683, "grad_norm": 0.41787269711494446, "learning_rate": 8.885725904839367e-06, "loss": 0.4332, "step": 4370 }, { "epoch": 0.4443879625864172, "grad_norm": 0.3896903991699219, "learning_rate": 8.887759251728345e-06, "loss": 0.4336, "step": 4371 }, { "epoch": 0.4444896299308662, "grad_norm": 0.41402748227119446, "learning_rate": 8.889792598617326e-06, "loss": 0.419, "step": 4372 }, { "epoch": 0.44459129727531516, "grad_norm": 0.3851079046726227, "learning_rate": 8.891825945506304e-06, "loss": 0.3862, "step": 4373 }, { "epoch": 0.44469296461976415, "grad_norm": 0.4288838803768158, "learning_rate": 8.893859292395283e-06, "loss": 0.403, "step": 4374 }, { "epoch": 0.4447946319642131, "grad_norm": 0.38849177956581116, "learning_rate": 8.895892639284263e-06, "loss": 0.3823, "step": 4375 }, { "epoch": 0.44489629930866204, "grad_norm": 0.4079429507255554, "learning_rate": 8.897925986173242e-06, "loss": 0.4057, "step": 4376 }, { "epoch": 0.44499796665311103, "grad_norm": 0.42935770750045776, "learning_rate": 8.89995933306222e-06, "loss": 0.3728, "step": 4377 }, { "epoch": 0.44509963399756, "grad_norm": 0.3688420355319977, "learning_rate": 8.901992679951201e-06, "loss": 0.4178, "step": 4378 }, { "epoch": 0.44520130134200897, "grad_norm": 0.41577383875846863, "learning_rate": 8.90402602684018e-06, "loss": 0.3863, "step": 4379 }, { "epoch": 0.4453029686864579, "grad_norm": 0.390358030796051, "learning_rate": 8.906059373729158e-06, "loss": 0.3988, "step": 4380 }, { "epoch": 0.44540463603090685, "grad_norm": 0.3901219666004181, "learning_rate": 8.908092720618139e-06, "loss": 0.3714, "step": 4381 }, { "epoch": 0.44550630337535585, "grad_norm": 0.37527379393577576, "learning_rate": 8.910126067507117e-06, "loss": 0.3844, "step": 4382 }, { "epoch": 0.4456079707198048, "grad_norm": 0.3745323419570923, "learning_rate": 8.912159414396096e-06, "loss": 0.4265, "step": 4383 }, { "epoch": 0.4457096380642538, "grad_norm": 0.3733412027359009, "learning_rate": 8.914192761285076e-06, "loss": 0.4047, "step": 4384 }, { "epoch": 0.44581130540870273, "grad_norm": 0.3982495069503784, "learning_rate": 8.916226108174055e-06, "loss": 0.4398, "step": 4385 }, { "epoch": 0.44591297275315167, "grad_norm": 0.3893727660179138, "learning_rate": 8.918259455063033e-06, "loss": 0.4047, "step": 4386 }, { "epoch": 0.44601464009760067, "grad_norm": 0.3641378581523895, "learning_rate": 8.920292801952014e-06, "loss": 0.4341, "step": 4387 }, { "epoch": 0.4461163074420496, "grad_norm": 0.38677412271499634, "learning_rate": 8.922326148840992e-06, "loss": 0.4028, "step": 4388 }, { "epoch": 0.44621797478649855, "grad_norm": 0.4086957573890686, "learning_rate": 8.924359495729971e-06, "loss": 0.3932, "step": 4389 }, { "epoch": 0.44631964213094755, "grad_norm": 0.4003742039203644, "learning_rate": 8.926392842618951e-06, "loss": 0.3805, "step": 4390 }, { "epoch": 0.4464213094753965, "grad_norm": 0.3721180260181427, "learning_rate": 8.92842618950793e-06, "loss": 0.4139, "step": 4391 }, { "epoch": 0.4465229768198455, "grad_norm": 0.39593705534935, "learning_rate": 8.93045953639691e-06, "loss": 0.4053, "step": 4392 }, { "epoch": 0.4466246441642944, "grad_norm": 0.42061689496040344, "learning_rate": 8.932492883285889e-06, "loss": 0.4024, "step": 4393 }, { "epoch": 0.44672631150874337, "grad_norm": 0.38936787843704224, "learning_rate": 8.934526230174868e-06, "loss": 0.4276, "step": 4394 }, { "epoch": 0.44682797885319236, "grad_norm": 0.425618052482605, "learning_rate": 8.936559577063848e-06, "loss": 0.4547, "step": 4395 }, { "epoch": 0.4469296461976413, "grad_norm": 0.3856385350227356, "learning_rate": 8.938592923952827e-06, "loss": 0.4172, "step": 4396 }, { "epoch": 0.4470313135420903, "grad_norm": 0.41326552629470825, "learning_rate": 8.940626270841807e-06, "loss": 0.408, "step": 4397 }, { "epoch": 0.44713298088653924, "grad_norm": 0.37576305866241455, "learning_rate": 8.942659617730786e-06, "loss": 0.4058, "step": 4398 }, { "epoch": 0.4472346482309882, "grad_norm": 0.38664767146110535, "learning_rate": 8.944692964619764e-06, "loss": 0.4201, "step": 4399 }, { "epoch": 0.4473363155754372, "grad_norm": 0.38596341013908386, "learning_rate": 8.946726311508745e-06, "loss": 0.4225, "step": 4400 }, { "epoch": 0.4474379829198861, "grad_norm": 0.37950605154037476, "learning_rate": 8.948759658397723e-06, "loss": 0.3838, "step": 4401 }, { "epoch": 0.4475396502643351, "grad_norm": 0.36315104365348816, "learning_rate": 8.950793005286704e-06, "loss": 0.3927, "step": 4402 }, { "epoch": 0.44764131760878406, "grad_norm": 0.41377267241477966, "learning_rate": 8.952826352175682e-06, "loss": 0.405, "step": 4403 }, { "epoch": 0.447742984953233, "grad_norm": 0.40148091316223145, "learning_rate": 8.95485969906466e-06, "loss": 0.3972, "step": 4404 }, { "epoch": 0.447844652297682, "grad_norm": 0.36509737372398376, "learning_rate": 8.956893045953641e-06, "loss": 0.3636, "step": 4405 }, { "epoch": 0.44794631964213094, "grad_norm": 0.4004639685153961, "learning_rate": 8.95892639284262e-06, "loss": 0.415, "step": 4406 }, { "epoch": 0.44804798698657994, "grad_norm": 0.43682894110679626, "learning_rate": 8.9609597397316e-06, "loss": 0.3942, "step": 4407 }, { "epoch": 0.4481496543310289, "grad_norm": 0.37354573607444763, "learning_rate": 8.962993086620579e-06, "loss": 0.4037, "step": 4408 }, { "epoch": 0.4482513216754778, "grad_norm": 0.40223923325538635, "learning_rate": 8.965026433509557e-06, "loss": 0.4412, "step": 4409 }, { "epoch": 0.4483529890199268, "grad_norm": 0.3865615427494049, "learning_rate": 8.967059780398538e-06, "loss": 0.4305, "step": 4410 }, { "epoch": 0.44845465636437576, "grad_norm": 0.35886242985725403, "learning_rate": 8.969093127287516e-06, "loss": 0.4213, "step": 4411 }, { "epoch": 0.4485563237088247, "grad_norm": 0.4056847393512726, "learning_rate": 8.971126474176495e-06, "loss": 0.4235, "step": 4412 }, { "epoch": 0.4486579910532737, "grad_norm": 0.3532218337059021, "learning_rate": 8.973159821065475e-06, "loss": 0.4187, "step": 4413 }, { "epoch": 0.44875965839772264, "grad_norm": 0.39103204011917114, "learning_rate": 8.975193167954454e-06, "loss": 0.3815, "step": 4414 }, { "epoch": 0.44886132574217164, "grad_norm": 0.42319443821907043, "learning_rate": 8.977226514843433e-06, "loss": 0.3951, "step": 4415 }, { "epoch": 0.4489629930866206, "grad_norm": 0.396918922662735, "learning_rate": 8.979259861732413e-06, "loss": 0.3871, "step": 4416 }, { "epoch": 0.4490646604310695, "grad_norm": 0.4285188615322113, "learning_rate": 8.981293208621392e-06, "loss": 0.4372, "step": 4417 }, { "epoch": 0.4491663277755185, "grad_norm": 0.38122692704200745, "learning_rate": 8.98332655551037e-06, "loss": 0.3945, "step": 4418 }, { "epoch": 0.44926799511996746, "grad_norm": 0.35238078236579895, "learning_rate": 8.98535990239935e-06, "loss": 0.3902, "step": 4419 }, { "epoch": 0.44936966246441645, "grad_norm": 0.37436965107917786, "learning_rate": 8.98739324928833e-06, "loss": 0.3952, "step": 4420 }, { "epoch": 0.4494713298088654, "grad_norm": 0.3721599876880646, "learning_rate": 8.989426596177308e-06, "loss": 0.4657, "step": 4421 }, { "epoch": 0.44957299715331434, "grad_norm": 0.3683340847492218, "learning_rate": 8.991459943066288e-06, "loss": 0.3909, "step": 4422 }, { "epoch": 0.44967466449776333, "grad_norm": 0.40533140301704407, "learning_rate": 8.993493289955267e-06, "loss": 0.3961, "step": 4423 }, { "epoch": 0.4497763318422123, "grad_norm": 0.3805397152900696, "learning_rate": 8.995526636844246e-06, "loss": 0.4197, "step": 4424 }, { "epoch": 0.44987799918666127, "grad_norm": 0.3785358965396881, "learning_rate": 8.997559983733226e-06, "loss": 0.4156, "step": 4425 }, { "epoch": 0.4499796665311102, "grad_norm": 0.4070116877555847, "learning_rate": 8.999593330622204e-06, "loss": 0.4438, "step": 4426 }, { "epoch": 0.45008133387555915, "grad_norm": 0.37260863184928894, "learning_rate": 9.001626677511183e-06, "loss": 0.4279, "step": 4427 }, { "epoch": 0.45018300122000815, "grad_norm": 0.4113925099372864, "learning_rate": 9.003660024400163e-06, "loss": 0.438, "step": 4428 }, { "epoch": 0.4502846685644571, "grad_norm": 0.4252837598323822, "learning_rate": 9.005693371289142e-06, "loss": 0.3987, "step": 4429 }, { "epoch": 0.45038633590890603, "grad_norm": 0.4115541875362396, "learning_rate": 9.00772671817812e-06, "loss": 0.4108, "step": 4430 }, { "epoch": 0.45048800325335503, "grad_norm": 0.40544193983078003, "learning_rate": 9.009760065067101e-06, "loss": 0.3815, "step": 4431 }, { "epoch": 0.45058967059780397, "grad_norm": 0.48481762409210205, "learning_rate": 9.01179341195608e-06, "loss": 0.4263, "step": 4432 }, { "epoch": 0.45069133794225297, "grad_norm": 0.4104519486427307, "learning_rate": 9.01382675884506e-06, "loss": 0.3943, "step": 4433 }, { "epoch": 0.4507930052867019, "grad_norm": 0.4078885614871979, "learning_rate": 9.015860105734039e-06, "loss": 0.4161, "step": 4434 }, { "epoch": 0.45089467263115085, "grad_norm": 0.45071083307266235, "learning_rate": 9.017893452623017e-06, "loss": 0.4183, "step": 4435 }, { "epoch": 0.45099633997559985, "grad_norm": 0.4652364253997803, "learning_rate": 9.019926799511998e-06, "loss": 0.4204, "step": 4436 }, { "epoch": 0.4510980073200488, "grad_norm": 0.39179593324661255, "learning_rate": 9.021960146400976e-06, "loss": 0.4008, "step": 4437 }, { "epoch": 0.4511996746644978, "grad_norm": 0.45173752307891846, "learning_rate": 9.023993493289957e-06, "loss": 0.4163, "step": 4438 }, { "epoch": 0.4513013420089467, "grad_norm": 0.5114241242408752, "learning_rate": 9.026026840178935e-06, "loss": 0.4392, "step": 4439 }, { "epoch": 0.45140300935339567, "grad_norm": 0.4073337912559509, "learning_rate": 9.028060187067914e-06, "loss": 0.3614, "step": 4440 }, { "epoch": 0.45150467669784466, "grad_norm": 0.4170784652233124, "learning_rate": 9.030093533956894e-06, "loss": 0.4299, "step": 4441 }, { "epoch": 0.4516063440422936, "grad_norm": 0.42186620831489563, "learning_rate": 9.032126880845873e-06, "loss": 0.3831, "step": 4442 }, { "epoch": 0.4517080113867426, "grad_norm": 0.49163565039634705, "learning_rate": 9.034160227734853e-06, "loss": 0.436, "step": 4443 }, { "epoch": 0.45180967873119154, "grad_norm": 0.4276077449321747, "learning_rate": 9.036193574623832e-06, "loss": 0.4272, "step": 4444 }, { "epoch": 0.4519113460756405, "grad_norm": 0.426670640707016, "learning_rate": 9.03822692151281e-06, "loss": 0.4201, "step": 4445 }, { "epoch": 0.4520130134200895, "grad_norm": 0.44509539008140564, "learning_rate": 9.040260268401791e-06, "loss": 0.4285, "step": 4446 }, { "epoch": 0.4521146807645384, "grad_norm": 0.3896127939224243, "learning_rate": 9.04229361529077e-06, "loss": 0.4308, "step": 4447 }, { "epoch": 0.4522163481089874, "grad_norm": 0.4499744176864624, "learning_rate": 9.04432696217975e-06, "loss": 0.397, "step": 4448 }, { "epoch": 0.45231801545343636, "grad_norm": 0.47147348523139954, "learning_rate": 9.046360309068728e-06, "loss": 0.4469, "step": 4449 }, { "epoch": 0.4524196827978853, "grad_norm": 0.4111038148403168, "learning_rate": 9.048393655957707e-06, "loss": 0.4097, "step": 4450 }, { "epoch": 0.4525213501423343, "grad_norm": 0.4306010603904724, "learning_rate": 9.050427002846687e-06, "loss": 0.4164, "step": 4451 }, { "epoch": 0.45262301748678324, "grad_norm": 0.4037224054336548, "learning_rate": 9.052460349735666e-06, "loss": 0.3992, "step": 4452 }, { "epoch": 0.4527246848312322, "grad_norm": 0.5073670148849487, "learning_rate": 9.054493696624645e-06, "loss": 0.4182, "step": 4453 }, { "epoch": 0.4528263521756812, "grad_norm": 0.4630001187324524, "learning_rate": 9.056527043513625e-06, "loss": 0.4278, "step": 4454 }, { "epoch": 0.4529280195201301, "grad_norm": 0.46855199337005615, "learning_rate": 9.058560390402604e-06, "loss": 0.4027, "step": 4455 }, { "epoch": 0.4530296868645791, "grad_norm": 0.41284769773483276, "learning_rate": 9.060593737291582e-06, "loss": 0.4173, "step": 4456 }, { "epoch": 0.45313135420902806, "grad_norm": 0.4371607303619385, "learning_rate": 9.062627084180563e-06, "loss": 0.4262, "step": 4457 }, { "epoch": 0.453233021553477, "grad_norm": 0.48656538128852844, "learning_rate": 9.064660431069541e-06, "loss": 0.3768, "step": 4458 }, { "epoch": 0.453334688897926, "grad_norm": 0.45142680406570435, "learning_rate": 9.06669377795852e-06, "loss": 0.3821, "step": 4459 }, { "epoch": 0.45343635624237494, "grad_norm": 0.4247848093509674, "learning_rate": 9.0687271248475e-06, "loss": 0.4081, "step": 4460 }, { "epoch": 0.45353802358682394, "grad_norm": 0.4672918915748596, "learning_rate": 9.070760471736479e-06, "loss": 0.4426, "step": 4461 }, { "epoch": 0.4536396909312729, "grad_norm": 0.39835697412490845, "learning_rate": 9.072793818625458e-06, "loss": 0.3978, "step": 4462 }, { "epoch": 0.4537413582757218, "grad_norm": 0.46662425994873047, "learning_rate": 9.074827165514438e-06, "loss": 0.4573, "step": 4463 }, { "epoch": 0.4538430256201708, "grad_norm": 0.3976064622402191, "learning_rate": 9.076860512403417e-06, "loss": 0.3723, "step": 4464 }, { "epoch": 0.45394469296461976, "grad_norm": 0.3757140636444092, "learning_rate": 9.078893859292395e-06, "loss": 0.3863, "step": 4465 }, { "epoch": 0.45404636030906875, "grad_norm": 0.39210039377212524, "learning_rate": 9.080927206181376e-06, "loss": 0.4126, "step": 4466 }, { "epoch": 0.4541480276535177, "grad_norm": 0.411062628030777, "learning_rate": 9.082960553070354e-06, "loss": 0.3616, "step": 4467 }, { "epoch": 0.45424969499796664, "grad_norm": 0.4186210334300995, "learning_rate": 9.084993899959333e-06, "loss": 0.3787, "step": 4468 }, { "epoch": 0.45435136234241563, "grad_norm": 0.4352218806743622, "learning_rate": 9.087027246848313e-06, "loss": 0.4102, "step": 4469 }, { "epoch": 0.4544530296868646, "grad_norm": 0.42640969157218933, "learning_rate": 9.089060593737292e-06, "loss": 0.4173, "step": 4470 }, { "epoch": 0.4545546970313135, "grad_norm": 0.4534362256526947, "learning_rate": 9.09109394062627e-06, "loss": 0.4217, "step": 4471 }, { "epoch": 0.4546563643757625, "grad_norm": 0.41679203510284424, "learning_rate": 9.09312728751525e-06, "loss": 0.4072, "step": 4472 }, { "epoch": 0.45475803172021145, "grad_norm": 0.3540976941585541, "learning_rate": 9.09516063440423e-06, "loss": 0.3843, "step": 4473 }, { "epoch": 0.45485969906466045, "grad_norm": 0.42640602588653564, "learning_rate": 9.09719398129321e-06, "loss": 0.4192, "step": 4474 }, { "epoch": 0.4549613664091094, "grad_norm": 0.3720587491989136, "learning_rate": 9.099227328182188e-06, "loss": 0.426, "step": 4475 }, { "epoch": 0.45506303375355833, "grad_norm": 0.39835894107818604, "learning_rate": 9.101260675071167e-06, "loss": 0.4056, "step": 4476 }, { "epoch": 0.45516470109800733, "grad_norm": 0.41760921478271484, "learning_rate": 9.103294021960147e-06, "loss": 0.4026, "step": 4477 }, { "epoch": 0.45526636844245627, "grad_norm": 0.39982151985168457, "learning_rate": 9.105327368849126e-06, "loss": 0.4182, "step": 4478 }, { "epoch": 0.45536803578690527, "grad_norm": 0.38967326283454895, "learning_rate": 9.107360715738106e-06, "loss": 0.436, "step": 4479 }, { "epoch": 0.4554697031313542, "grad_norm": 0.38828468322753906, "learning_rate": 9.109394062627085e-06, "loss": 0.3949, "step": 4480 }, { "epoch": 0.45557137047580315, "grad_norm": 0.43043702840805054, "learning_rate": 9.111427409516064e-06, "loss": 0.383, "step": 4481 }, { "epoch": 0.45567303782025215, "grad_norm": 0.37959861755371094, "learning_rate": 9.113460756405044e-06, "loss": 0.4202, "step": 4482 }, { "epoch": 0.4557747051647011, "grad_norm": 0.4382533133029938, "learning_rate": 9.115494103294023e-06, "loss": 0.4018, "step": 4483 }, { "epoch": 0.4558763725091501, "grad_norm": 0.4366053640842438, "learning_rate": 9.117527450183003e-06, "loss": 0.4154, "step": 4484 }, { "epoch": 0.455978039853599, "grad_norm": 0.40348684787750244, "learning_rate": 9.119560797071982e-06, "loss": 0.4046, "step": 4485 }, { "epoch": 0.45607970719804797, "grad_norm": 0.408929705619812, "learning_rate": 9.12159414396096e-06, "loss": 0.3941, "step": 4486 }, { "epoch": 0.45618137454249696, "grad_norm": 0.4049082398414612, "learning_rate": 9.12362749084994e-06, "loss": 0.4136, "step": 4487 }, { "epoch": 0.4562830418869459, "grad_norm": 0.45421701669692993, "learning_rate": 9.12566083773892e-06, "loss": 0.396, "step": 4488 }, { "epoch": 0.4563847092313949, "grad_norm": 0.38307368755340576, "learning_rate": 9.1276941846279e-06, "loss": 0.4134, "step": 4489 }, { "epoch": 0.45648637657584384, "grad_norm": 0.3685634732246399, "learning_rate": 9.129727531516878e-06, "loss": 0.4237, "step": 4490 }, { "epoch": 0.4565880439202928, "grad_norm": 0.39808332920074463, "learning_rate": 9.131760878405857e-06, "loss": 0.396, "step": 4491 }, { "epoch": 0.4566897112647418, "grad_norm": 0.41355088353157043, "learning_rate": 9.133794225294837e-06, "loss": 0.401, "step": 4492 }, { "epoch": 0.4567913786091907, "grad_norm": 0.4169062674045563, "learning_rate": 9.135827572183816e-06, "loss": 0.4217, "step": 4493 }, { "epoch": 0.45689304595363966, "grad_norm": 0.4111965596675873, "learning_rate": 9.137860919072794e-06, "loss": 0.3875, "step": 4494 }, { "epoch": 0.45699471329808866, "grad_norm": 0.4005158841609955, "learning_rate": 9.139894265961775e-06, "loss": 0.4079, "step": 4495 }, { "epoch": 0.4570963806425376, "grad_norm": 0.40903109312057495, "learning_rate": 9.141927612850753e-06, "loss": 0.4065, "step": 4496 }, { "epoch": 0.4571980479869866, "grad_norm": 0.42000827193260193, "learning_rate": 9.143960959739732e-06, "loss": 0.3835, "step": 4497 }, { "epoch": 0.45729971533143554, "grad_norm": 0.38132211565971375, "learning_rate": 9.145994306628712e-06, "loss": 0.3999, "step": 4498 }, { "epoch": 0.4574013826758845, "grad_norm": 0.39631083607673645, "learning_rate": 9.148027653517691e-06, "loss": 0.406, "step": 4499 }, { "epoch": 0.4575030500203335, "grad_norm": 0.4085177779197693, "learning_rate": 9.15006100040667e-06, "loss": 0.3827, "step": 4500 }, { "epoch": 0.4576047173647824, "grad_norm": 0.4099067151546478, "learning_rate": 9.152094347295648e-06, "loss": 0.3997, "step": 4501 }, { "epoch": 0.4577063847092314, "grad_norm": 0.4054177701473236, "learning_rate": 9.154127694184629e-06, "loss": 0.3878, "step": 4502 }, { "epoch": 0.45780805205368036, "grad_norm": 0.4115853011608124, "learning_rate": 9.156161041073607e-06, "loss": 0.374, "step": 4503 }, { "epoch": 0.4579097193981293, "grad_norm": 0.43521758913993835, "learning_rate": 9.158194387962586e-06, "loss": 0.4258, "step": 4504 }, { "epoch": 0.4580113867425783, "grad_norm": 0.38275963068008423, "learning_rate": 9.160227734851566e-06, "loss": 0.4096, "step": 4505 }, { "epoch": 0.45811305408702724, "grad_norm": 0.3910321891307831, "learning_rate": 9.162261081740545e-06, "loss": 0.4142, "step": 4506 }, { "epoch": 0.45821472143147624, "grad_norm": 0.3923768103122711, "learning_rate": 9.164294428629525e-06, "loss": 0.4205, "step": 4507 }, { "epoch": 0.4583163887759252, "grad_norm": 0.3717910349369049, "learning_rate": 9.166327775518504e-06, "loss": 0.4202, "step": 4508 }, { "epoch": 0.4584180561203741, "grad_norm": 0.36852264404296875, "learning_rate": 9.168361122407483e-06, "loss": 0.3998, "step": 4509 }, { "epoch": 0.4585197234648231, "grad_norm": 0.42560920119285583, "learning_rate": 9.170394469296463e-06, "loss": 0.4377, "step": 4510 }, { "epoch": 0.45862139080927206, "grad_norm": 0.38832470774650574, "learning_rate": 9.172427816185442e-06, "loss": 0.4174, "step": 4511 }, { "epoch": 0.458723058153721, "grad_norm": 0.4186796545982361, "learning_rate": 9.17446116307442e-06, "loss": 0.4356, "step": 4512 }, { "epoch": 0.45882472549817, "grad_norm": 0.39005348086357117, "learning_rate": 9.1764945099634e-06, "loss": 0.3993, "step": 4513 }, { "epoch": 0.45892639284261894, "grad_norm": 0.40788012742996216, "learning_rate": 9.178527856852379e-06, "loss": 0.399, "step": 4514 }, { "epoch": 0.45902806018706793, "grad_norm": 0.43208450078964233, "learning_rate": 9.18056120374136e-06, "loss": 0.4447, "step": 4515 }, { "epoch": 0.4591297275315169, "grad_norm": 0.4221278131008148, "learning_rate": 9.182594550630338e-06, "loss": 0.3894, "step": 4516 }, { "epoch": 0.4592313948759658, "grad_norm": 0.38369670510292053, "learning_rate": 9.184627897519317e-06, "loss": 0.4143, "step": 4517 }, { "epoch": 0.4593330622204148, "grad_norm": 0.41029345989227295, "learning_rate": 9.186661244408297e-06, "loss": 0.4187, "step": 4518 }, { "epoch": 0.45943472956486375, "grad_norm": 0.432140052318573, "learning_rate": 9.188694591297276e-06, "loss": 0.408, "step": 4519 }, { "epoch": 0.45953639690931275, "grad_norm": 0.40795034170150757, "learning_rate": 9.190727938186256e-06, "loss": 0.4092, "step": 4520 }, { "epoch": 0.4596380642537617, "grad_norm": 0.36351320147514343, "learning_rate": 9.192761285075235e-06, "loss": 0.3913, "step": 4521 }, { "epoch": 0.45973973159821063, "grad_norm": 0.44155871868133545, "learning_rate": 9.194794631964213e-06, "loss": 0.4127, "step": 4522 }, { "epoch": 0.45984139894265963, "grad_norm": 0.4084012508392334, "learning_rate": 9.196827978853194e-06, "loss": 0.405, "step": 4523 }, { "epoch": 0.45994306628710857, "grad_norm": 0.3682622015476227, "learning_rate": 9.198861325742172e-06, "loss": 0.4158, "step": 4524 }, { "epoch": 0.46004473363155757, "grad_norm": 0.37955719232559204, "learning_rate": 9.200894672631153e-06, "loss": 0.3927, "step": 4525 }, { "epoch": 0.4601464009760065, "grad_norm": 0.4419156312942505, "learning_rate": 9.202928019520131e-06, "loss": 0.4498, "step": 4526 }, { "epoch": 0.46024806832045545, "grad_norm": 0.404713898897171, "learning_rate": 9.20496136640911e-06, "loss": 0.4195, "step": 4527 }, { "epoch": 0.46034973566490445, "grad_norm": 0.40993767976760864, "learning_rate": 9.20699471329809e-06, "loss": 0.419, "step": 4528 }, { "epoch": 0.4604514030093534, "grad_norm": 0.43780386447906494, "learning_rate": 9.209028060187069e-06, "loss": 0.4194, "step": 4529 }, { "epoch": 0.4605530703538024, "grad_norm": 0.40015915036201477, "learning_rate": 9.21106140707605e-06, "loss": 0.4263, "step": 4530 }, { "epoch": 0.4606547376982513, "grad_norm": 0.4298613369464874, "learning_rate": 9.213094753965028e-06, "loss": 0.4538, "step": 4531 }, { "epoch": 0.46075640504270027, "grad_norm": 0.43425843119621277, "learning_rate": 9.215128100854007e-06, "loss": 0.4469, "step": 4532 }, { "epoch": 0.46085807238714926, "grad_norm": 0.42443811893463135, "learning_rate": 9.217161447742987e-06, "loss": 0.4318, "step": 4533 }, { "epoch": 0.4609597397315982, "grad_norm": 0.4209756851196289, "learning_rate": 9.219194794631965e-06, "loss": 0.4153, "step": 4534 }, { "epoch": 0.46106140707604715, "grad_norm": 0.4309302866458893, "learning_rate": 9.221228141520944e-06, "loss": 0.4027, "step": 4535 }, { "epoch": 0.46116307442049614, "grad_norm": 0.3746008574962616, "learning_rate": 9.223261488409924e-06, "loss": 0.4056, "step": 4536 }, { "epoch": 0.4612647417649451, "grad_norm": 0.4651622474193573, "learning_rate": 9.225294835298903e-06, "loss": 0.427, "step": 4537 }, { "epoch": 0.4613664091093941, "grad_norm": 0.4041387140750885, "learning_rate": 9.227328182187882e-06, "loss": 0.3786, "step": 4538 }, { "epoch": 0.461468076453843, "grad_norm": 0.45756617188453674, "learning_rate": 9.22936152907686e-06, "loss": 0.3961, "step": 4539 }, { "epoch": 0.46156974379829196, "grad_norm": 0.398600310087204, "learning_rate": 9.23139487596584e-06, "loss": 0.3747, "step": 4540 }, { "epoch": 0.46167141114274096, "grad_norm": 0.3621343672275543, "learning_rate": 9.23342822285482e-06, "loss": 0.4247, "step": 4541 }, { "epoch": 0.4617730784871899, "grad_norm": 0.39067497849464417, "learning_rate": 9.235461569743798e-06, "loss": 0.4174, "step": 4542 }, { "epoch": 0.4618747458316389, "grad_norm": 0.4153994023799896, "learning_rate": 9.237494916632778e-06, "loss": 0.4134, "step": 4543 }, { "epoch": 0.46197641317608784, "grad_norm": 0.374182790517807, "learning_rate": 9.239528263521757e-06, "loss": 0.4135, "step": 4544 }, { "epoch": 0.4620780805205368, "grad_norm": 0.4514877200126648, "learning_rate": 9.241561610410736e-06, "loss": 0.4031, "step": 4545 }, { "epoch": 0.4621797478649858, "grad_norm": 0.42160165309906006, "learning_rate": 9.243594957299716e-06, "loss": 0.4202, "step": 4546 }, { "epoch": 0.4622814152094347, "grad_norm": 0.35821884870529175, "learning_rate": 9.245628304188695e-06, "loss": 0.4193, "step": 4547 }, { "epoch": 0.4623830825538837, "grad_norm": 0.43189290165901184, "learning_rate": 9.247661651077675e-06, "loss": 0.4224, "step": 4548 }, { "epoch": 0.46248474989833266, "grad_norm": 0.42491716146469116, "learning_rate": 9.249694997966654e-06, "loss": 0.4378, "step": 4549 }, { "epoch": 0.4625864172427816, "grad_norm": 0.3915877938270569, "learning_rate": 9.251728344855632e-06, "loss": 0.4231, "step": 4550 }, { "epoch": 0.4626880845872306, "grad_norm": 0.39532744884490967, "learning_rate": 9.253761691744613e-06, "loss": 0.3748, "step": 4551 }, { "epoch": 0.46278975193167954, "grad_norm": 0.4167715311050415, "learning_rate": 9.255795038633591e-06, "loss": 0.3994, "step": 4552 }, { "epoch": 0.46289141927612854, "grad_norm": 0.41395196318626404, "learning_rate": 9.257828385522572e-06, "loss": 0.4123, "step": 4553 }, { "epoch": 0.4629930866205775, "grad_norm": 0.4339336156845093, "learning_rate": 9.25986173241155e-06, "loss": 0.4463, "step": 4554 }, { "epoch": 0.4630947539650264, "grad_norm": 0.44385406374931335, "learning_rate": 9.261895079300529e-06, "loss": 0.4, "step": 4555 }, { "epoch": 0.4631964213094754, "grad_norm": 0.38546043634414673, "learning_rate": 9.263928426189509e-06, "loss": 0.3903, "step": 4556 }, { "epoch": 0.46329808865392436, "grad_norm": 0.4403095543384552, "learning_rate": 9.265961773078488e-06, "loss": 0.4422, "step": 4557 }, { "epoch": 0.4633997559983733, "grad_norm": 0.4283035695552826, "learning_rate": 9.267995119967466e-06, "loss": 0.4076, "step": 4558 }, { "epoch": 0.4635014233428223, "grad_norm": 0.402567058801651, "learning_rate": 9.270028466856447e-06, "loss": 0.4015, "step": 4559 }, { "epoch": 0.46360309068727124, "grad_norm": 0.38661906123161316, "learning_rate": 9.272061813745425e-06, "loss": 0.388, "step": 4560 }, { "epoch": 0.46370475803172023, "grad_norm": 0.3941015303134918, "learning_rate": 9.274095160634406e-06, "loss": 0.3978, "step": 4561 }, { "epoch": 0.4638064253761692, "grad_norm": 0.37551403045654297, "learning_rate": 9.276128507523384e-06, "loss": 0.3971, "step": 4562 }, { "epoch": 0.4639080927206181, "grad_norm": 0.4058309495449066, "learning_rate": 9.278161854412363e-06, "loss": 0.4224, "step": 4563 }, { "epoch": 0.4640097600650671, "grad_norm": 0.37984684109687805, "learning_rate": 9.280195201301343e-06, "loss": 0.417, "step": 4564 }, { "epoch": 0.46411142740951605, "grad_norm": 0.40903130173683167, "learning_rate": 9.282228548190322e-06, "loss": 0.4578, "step": 4565 }, { "epoch": 0.46421309475396505, "grad_norm": 0.35768646001815796, "learning_rate": 9.284261895079302e-06, "loss": 0.4069, "step": 4566 }, { "epoch": 0.464314762098414, "grad_norm": 0.37793034315109253, "learning_rate": 9.286295241968281e-06, "loss": 0.4187, "step": 4567 }, { "epoch": 0.46441642944286293, "grad_norm": 0.4046144485473633, "learning_rate": 9.28832858885726e-06, "loss": 0.4127, "step": 4568 }, { "epoch": 0.46451809678731193, "grad_norm": 0.3778937757015228, "learning_rate": 9.29036193574624e-06, "loss": 0.4022, "step": 4569 }, { "epoch": 0.46461976413176087, "grad_norm": 0.39856085181236267, "learning_rate": 9.292395282635219e-06, "loss": 0.4315, "step": 4570 }, { "epoch": 0.46472143147620987, "grad_norm": 0.38374263048171997, "learning_rate": 9.294428629524199e-06, "loss": 0.4246, "step": 4571 }, { "epoch": 0.4648230988206588, "grad_norm": 0.39857861399650574, "learning_rate": 9.296461976413178e-06, "loss": 0.3996, "step": 4572 }, { "epoch": 0.46492476616510775, "grad_norm": 0.3897898495197296, "learning_rate": 9.298495323302156e-06, "loss": 0.3981, "step": 4573 }, { "epoch": 0.46502643350955675, "grad_norm": 0.45586374402046204, "learning_rate": 9.300528670191137e-06, "loss": 0.434, "step": 4574 }, { "epoch": 0.4651281008540057, "grad_norm": 0.3848510682582855, "learning_rate": 9.302562017080115e-06, "loss": 0.4009, "step": 4575 }, { "epoch": 0.46522976819845463, "grad_norm": 0.47632116079330444, "learning_rate": 9.304595363969094e-06, "loss": 0.4246, "step": 4576 }, { "epoch": 0.4653314355429036, "grad_norm": 0.41246694326400757, "learning_rate": 9.306628710858072e-06, "loss": 0.4123, "step": 4577 }, { "epoch": 0.46543310288735257, "grad_norm": 0.4320681393146515, "learning_rate": 9.308662057747053e-06, "loss": 0.4075, "step": 4578 }, { "epoch": 0.46553477023180156, "grad_norm": 0.395108163356781, "learning_rate": 9.310695404636031e-06, "loss": 0.4, "step": 4579 }, { "epoch": 0.4656364375762505, "grad_norm": 0.44668978452682495, "learning_rate": 9.31272875152501e-06, "loss": 0.3997, "step": 4580 }, { "epoch": 0.46573810492069945, "grad_norm": 0.41759568452835083, "learning_rate": 9.31476209841399e-06, "loss": 0.3981, "step": 4581 }, { "epoch": 0.46583977226514844, "grad_norm": 0.394970178604126, "learning_rate": 9.316795445302969e-06, "loss": 0.4169, "step": 4582 }, { "epoch": 0.4659414396095974, "grad_norm": 0.4223938286304474, "learning_rate": 9.318828792191948e-06, "loss": 0.4411, "step": 4583 }, { "epoch": 0.4660431069540464, "grad_norm": 0.3881968855857849, "learning_rate": 9.320862139080928e-06, "loss": 0.3785, "step": 4584 }, { "epoch": 0.4661447742984953, "grad_norm": 0.4028259515762329, "learning_rate": 9.322895485969907e-06, "loss": 0.417, "step": 4585 }, { "epoch": 0.46624644164294426, "grad_norm": 0.4054601788520813, "learning_rate": 9.324928832858885e-06, "loss": 0.4268, "step": 4586 }, { "epoch": 0.46634810898739326, "grad_norm": 0.44279366731643677, "learning_rate": 9.326962179747866e-06, "loss": 0.4281, "step": 4587 }, { "epoch": 0.4664497763318422, "grad_norm": 0.4054912030696869, "learning_rate": 9.328995526636844e-06, "loss": 0.41, "step": 4588 }, { "epoch": 0.4665514436762912, "grad_norm": 0.4121018350124359, "learning_rate": 9.331028873525825e-06, "loss": 0.4325, "step": 4589 }, { "epoch": 0.46665311102074014, "grad_norm": 0.48567649722099304, "learning_rate": 9.333062220414803e-06, "loss": 0.4084, "step": 4590 }, { "epoch": 0.4667547783651891, "grad_norm": 0.41317975521087646, "learning_rate": 9.335095567303782e-06, "loss": 0.3986, "step": 4591 }, { "epoch": 0.4668564457096381, "grad_norm": 0.3977641463279724, "learning_rate": 9.337128914192762e-06, "loss": 0.4084, "step": 4592 }, { "epoch": 0.466958113054087, "grad_norm": 0.46506601572036743, "learning_rate": 9.339162261081741e-06, "loss": 0.4212, "step": 4593 }, { "epoch": 0.467059780398536, "grad_norm": 0.407771497964859, "learning_rate": 9.341195607970721e-06, "loss": 0.4144, "step": 4594 }, { "epoch": 0.46716144774298496, "grad_norm": 0.40220823884010315, "learning_rate": 9.3432289548597e-06, "loss": 0.3994, "step": 4595 }, { "epoch": 0.4672631150874339, "grad_norm": 0.4179666340351105, "learning_rate": 9.345262301748679e-06, "loss": 0.3873, "step": 4596 }, { "epoch": 0.4673647824318829, "grad_norm": 0.38035568594932556, "learning_rate": 9.347295648637659e-06, "loss": 0.3997, "step": 4597 }, { "epoch": 0.46746644977633184, "grad_norm": 0.3684576749801636, "learning_rate": 9.349328995526637e-06, "loss": 0.4278, "step": 4598 }, { "epoch": 0.4675681171207808, "grad_norm": 0.37133970856666565, "learning_rate": 9.351362342415616e-06, "loss": 0.3824, "step": 4599 }, { "epoch": 0.4676697844652298, "grad_norm": 0.45784255862236023, "learning_rate": 9.353395689304596e-06, "loss": 0.4224, "step": 4600 }, { "epoch": 0.4677714518096787, "grad_norm": 0.40504518151283264, "learning_rate": 9.355429036193575e-06, "loss": 0.3956, "step": 4601 }, { "epoch": 0.4678731191541277, "grad_norm": 0.3749191164970398, "learning_rate": 9.357462383082555e-06, "loss": 0.4065, "step": 4602 }, { "epoch": 0.46797478649857666, "grad_norm": 0.44525542855262756, "learning_rate": 9.359495729971534e-06, "loss": 0.4168, "step": 4603 }, { "epoch": 0.4680764538430256, "grad_norm": 0.4094005525112152, "learning_rate": 9.361529076860513e-06, "loss": 0.3906, "step": 4604 }, { "epoch": 0.4681781211874746, "grad_norm": 0.45213621854782104, "learning_rate": 9.363562423749493e-06, "loss": 0.4198, "step": 4605 }, { "epoch": 0.46827978853192354, "grad_norm": 0.3873979449272156, "learning_rate": 9.365595770638472e-06, "loss": 0.4048, "step": 4606 }, { "epoch": 0.46838145587637253, "grad_norm": 0.4100387990474701, "learning_rate": 9.367629117527452e-06, "loss": 0.3818, "step": 4607 }, { "epoch": 0.4684831232208215, "grad_norm": 0.44254034757614136, "learning_rate": 9.36966246441643e-06, "loss": 0.45, "step": 4608 }, { "epoch": 0.4685847905652704, "grad_norm": 0.3654833137989044, "learning_rate": 9.37169581130541e-06, "loss": 0.4197, "step": 4609 }, { "epoch": 0.4686864579097194, "grad_norm": 0.4174806475639343, "learning_rate": 9.37372915819439e-06, "loss": 0.4438, "step": 4610 }, { "epoch": 0.46878812525416835, "grad_norm": 0.3756217658519745, "learning_rate": 9.375762505083368e-06, "loss": 0.4051, "step": 4611 }, { "epoch": 0.46888979259861735, "grad_norm": 0.4115876257419586, "learning_rate": 9.377795851972347e-06, "loss": 0.3896, "step": 4612 }, { "epoch": 0.4689914599430663, "grad_norm": 0.38901054859161377, "learning_rate": 9.379829198861327e-06, "loss": 0.4072, "step": 4613 }, { "epoch": 0.46909312728751523, "grad_norm": 0.386626660823822, "learning_rate": 9.381862545750306e-06, "loss": 0.4104, "step": 4614 }, { "epoch": 0.46919479463196423, "grad_norm": 0.38154155015945435, "learning_rate": 9.383895892639285e-06, "loss": 0.4151, "step": 4615 }, { "epoch": 0.46929646197641317, "grad_norm": 0.3628562092781067, "learning_rate": 9.385929239528265e-06, "loss": 0.3926, "step": 4616 }, { "epoch": 0.4693981293208621, "grad_norm": 0.3807322382926941, "learning_rate": 9.387962586417244e-06, "loss": 0.411, "step": 4617 }, { "epoch": 0.4694997966653111, "grad_norm": 0.41547101736068726, "learning_rate": 9.389995933306222e-06, "loss": 0.382, "step": 4618 }, { "epoch": 0.46960146400976005, "grad_norm": 0.4096169173717499, "learning_rate": 9.392029280195202e-06, "loss": 0.4004, "step": 4619 }, { "epoch": 0.46970313135420905, "grad_norm": 0.41049593687057495, "learning_rate": 9.394062627084181e-06, "loss": 0.4262, "step": 4620 }, { "epoch": 0.469804798698658, "grad_norm": 0.37041759490966797, "learning_rate": 9.39609597397316e-06, "loss": 0.3863, "step": 4621 }, { "epoch": 0.46990646604310693, "grad_norm": 0.40202274918556213, "learning_rate": 9.39812932086214e-06, "loss": 0.391, "step": 4622 }, { "epoch": 0.4700081333875559, "grad_norm": 0.4280054569244385, "learning_rate": 9.400162667751119e-06, "loss": 0.4335, "step": 4623 }, { "epoch": 0.47010980073200487, "grad_norm": 0.3512095510959625, "learning_rate": 9.402196014640097e-06, "loss": 0.4092, "step": 4624 }, { "epoch": 0.47021146807645386, "grad_norm": 0.37862545251846313, "learning_rate": 9.404229361529078e-06, "loss": 0.3754, "step": 4625 }, { "epoch": 0.4703131354209028, "grad_norm": 0.35708484053611755, "learning_rate": 9.406262708418056e-06, "loss": 0.3968, "step": 4626 }, { "epoch": 0.47041480276535175, "grad_norm": 0.40876349806785583, "learning_rate": 9.408296055307035e-06, "loss": 0.4251, "step": 4627 }, { "epoch": 0.47051647010980074, "grad_norm": 0.47132402658462524, "learning_rate": 9.410329402196015e-06, "loss": 0.4048, "step": 4628 }, { "epoch": 0.4706181374542497, "grad_norm": 0.4265310764312744, "learning_rate": 9.412362749084994e-06, "loss": 0.3823, "step": 4629 }, { "epoch": 0.4707198047986987, "grad_norm": 0.36965420842170715, "learning_rate": 9.414396095973974e-06, "loss": 0.4074, "step": 4630 }, { "epoch": 0.4708214721431476, "grad_norm": 0.44787755608558655, "learning_rate": 9.416429442862953e-06, "loss": 0.4488, "step": 4631 }, { "epoch": 0.47092313948759656, "grad_norm": 0.43780001997947693, "learning_rate": 9.418462789751932e-06, "loss": 0.4278, "step": 4632 }, { "epoch": 0.47102480683204556, "grad_norm": 0.419780433177948, "learning_rate": 9.420496136640912e-06, "loss": 0.4138, "step": 4633 }, { "epoch": 0.4711264741764945, "grad_norm": 0.4423818588256836, "learning_rate": 9.42252948352989e-06, "loss": 0.3836, "step": 4634 }, { "epoch": 0.4712281415209435, "grad_norm": 0.40761351585388184, "learning_rate": 9.424562830418871e-06, "loss": 0.3885, "step": 4635 }, { "epoch": 0.47132980886539244, "grad_norm": 0.46884921193122864, "learning_rate": 9.42659617730785e-06, "loss": 0.4286, "step": 4636 }, { "epoch": 0.4714314762098414, "grad_norm": 0.4184020161628723, "learning_rate": 9.428629524196828e-06, "loss": 0.4109, "step": 4637 }, { "epoch": 0.4715331435542904, "grad_norm": 0.4059954285621643, "learning_rate": 9.430662871085809e-06, "loss": 0.4058, "step": 4638 }, { "epoch": 0.4716348108987393, "grad_norm": 0.43332573771476746, "learning_rate": 9.432696217974787e-06, "loss": 0.4091, "step": 4639 }, { "epoch": 0.47173647824318826, "grad_norm": 0.3693978190422058, "learning_rate": 9.434729564863766e-06, "loss": 0.4029, "step": 4640 }, { "epoch": 0.47183814558763726, "grad_norm": 0.37080511450767517, "learning_rate": 9.436762911752746e-06, "loss": 0.4073, "step": 4641 }, { "epoch": 0.4719398129320862, "grad_norm": 0.39608851075172424, "learning_rate": 9.438796258641725e-06, "loss": 0.4497, "step": 4642 }, { "epoch": 0.4720414802765352, "grad_norm": 0.3757934272289276, "learning_rate": 9.440829605530705e-06, "loss": 0.4124, "step": 4643 }, { "epoch": 0.47214314762098414, "grad_norm": 0.42118382453918457, "learning_rate": 9.442862952419684e-06, "loss": 0.4199, "step": 4644 }, { "epoch": 0.4722448149654331, "grad_norm": 0.3893331289291382, "learning_rate": 9.444896299308662e-06, "loss": 0.4444, "step": 4645 }, { "epoch": 0.4723464823098821, "grad_norm": 0.4328823685646057, "learning_rate": 9.446929646197643e-06, "loss": 0.436, "step": 4646 }, { "epoch": 0.472448149654331, "grad_norm": 0.41126179695129395, "learning_rate": 9.448962993086621e-06, "loss": 0.3811, "step": 4647 }, { "epoch": 0.47254981699878, "grad_norm": 0.4492729604244232, "learning_rate": 9.450996339975602e-06, "loss": 0.4007, "step": 4648 }, { "epoch": 0.47265148434322896, "grad_norm": 0.3904644846916199, "learning_rate": 9.45302968686458e-06, "loss": 0.4155, "step": 4649 }, { "epoch": 0.4727531516876779, "grad_norm": 0.3843422830104828, "learning_rate": 9.455063033753559e-06, "loss": 0.3938, "step": 4650 }, { "epoch": 0.4728548190321269, "grad_norm": 0.3884534537792206, "learning_rate": 9.45709638064254e-06, "loss": 0.4158, "step": 4651 }, { "epoch": 0.47295648637657584, "grad_norm": 0.41226309537887573, "learning_rate": 9.459129727531518e-06, "loss": 0.4375, "step": 4652 }, { "epoch": 0.47305815372102483, "grad_norm": 0.41231948137283325, "learning_rate": 9.461163074420497e-06, "loss": 0.399, "step": 4653 }, { "epoch": 0.4731598210654738, "grad_norm": 0.37331604957580566, "learning_rate": 9.463196421309477e-06, "loss": 0.3842, "step": 4654 }, { "epoch": 0.4732614884099227, "grad_norm": 0.43985515832901, "learning_rate": 9.465229768198456e-06, "loss": 0.4062, "step": 4655 }, { "epoch": 0.4733631557543717, "grad_norm": 0.45290738344192505, "learning_rate": 9.467263115087434e-06, "loss": 0.3884, "step": 4656 }, { "epoch": 0.47346482309882065, "grad_norm": 0.40391209721565247, "learning_rate": 9.469296461976415e-06, "loss": 0.4064, "step": 4657 }, { "epoch": 0.4735664904432696, "grad_norm": 0.5186455249786377, "learning_rate": 9.471329808865393e-06, "loss": 0.4058, "step": 4658 }, { "epoch": 0.4736681577877186, "grad_norm": 0.40110868215560913, "learning_rate": 9.473363155754372e-06, "loss": 0.4, "step": 4659 }, { "epoch": 0.47376982513216753, "grad_norm": 0.3733367323875427, "learning_rate": 9.475396502643352e-06, "loss": 0.4213, "step": 4660 }, { "epoch": 0.47387149247661653, "grad_norm": 0.5019350647926331, "learning_rate": 9.47742984953233e-06, "loss": 0.4053, "step": 4661 }, { "epoch": 0.47397315982106547, "grad_norm": 0.40120795369148254, "learning_rate": 9.47946319642131e-06, "loss": 0.3947, "step": 4662 }, { "epoch": 0.4740748271655144, "grad_norm": 0.4154285490512848, "learning_rate": 9.48149654331029e-06, "loss": 0.4174, "step": 4663 }, { "epoch": 0.4741764945099634, "grad_norm": 0.4184345006942749, "learning_rate": 9.483529890199268e-06, "loss": 0.4137, "step": 4664 }, { "epoch": 0.47427816185441235, "grad_norm": 0.4155222475528717, "learning_rate": 9.485563237088247e-06, "loss": 0.3933, "step": 4665 }, { "epoch": 0.47437982919886135, "grad_norm": 0.4421784579753876, "learning_rate": 9.487596583977227e-06, "loss": 0.4287, "step": 4666 }, { "epoch": 0.4744814965433103, "grad_norm": 0.41985565423965454, "learning_rate": 9.489629930866206e-06, "loss": 0.3894, "step": 4667 }, { "epoch": 0.47458316388775923, "grad_norm": 0.400172621011734, "learning_rate": 9.491663277755185e-06, "loss": 0.3903, "step": 4668 }, { "epoch": 0.4746848312322082, "grad_norm": 0.46042799949645996, "learning_rate": 9.493696624644165e-06, "loss": 0.4052, "step": 4669 }, { "epoch": 0.47478649857665717, "grad_norm": 0.38737139105796814, "learning_rate": 9.495729971533144e-06, "loss": 0.4126, "step": 4670 }, { "epoch": 0.47488816592110616, "grad_norm": 0.4405587315559387, "learning_rate": 9.497763318422124e-06, "loss": 0.4146, "step": 4671 }, { "epoch": 0.4749898332655551, "grad_norm": 0.4256661832332611, "learning_rate": 9.499796665311103e-06, "loss": 0.386, "step": 4672 }, { "epoch": 0.47509150061000405, "grad_norm": 0.3924521505832672, "learning_rate": 9.501830012200081e-06, "loss": 0.4014, "step": 4673 }, { "epoch": 0.47519316795445304, "grad_norm": 0.4773120880126953, "learning_rate": 9.503863359089062e-06, "loss": 0.3958, "step": 4674 }, { "epoch": 0.475294835298902, "grad_norm": 0.4563470482826233, "learning_rate": 9.50589670597804e-06, "loss": 0.4086, "step": 4675 }, { "epoch": 0.475396502643351, "grad_norm": 0.4163382947444916, "learning_rate": 9.50793005286702e-06, "loss": 0.3723, "step": 4676 }, { "epoch": 0.4754981699877999, "grad_norm": 0.5093685388565063, "learning_rate": 9.509963399756e-06, "loss": 0.3872, "step": 4677 }, { "epoch": 0.47559983733224886, "grad_norm": 0.3943632245063782, "learning_rate": 9.511996746644978e-06, "loss": 0.3752, "step": 4678 }, { "epoch": 0.47570150467669786, "grad_norm": 0.43563228845596313, "learning_rate": 9.514030093533958e-06, "loss": 0.3959, "step": 4679 }, { "epoch": 0.4758031720211468, "grad_norm": 0.49690449237823486, "learning_rate": 9.516063440422937e-06, "loss": 0.4321, "step": 4680 }, { "epoch": 0.47590483936559574, "grad_norm": 0.4320795238018036, "learning_rate": 9.518096787311916e-06, "loss": 0.424, "step": 4681 }, { "epoch": 0.47600650671004474, "grad_norm": 0.4166904091835022, "learning_rate": 9.520130134200896e-06, "loss": 0.401, "step": 4682 }, { "epoch": 0.4761081740544937, "grad_norm": 0.43345099687576294, "learning_rate": 9.522163481089874e-06, "loss": 0.4378, "step": 4683 }, { "epoch": 0.4762098413989427, "grad_norm": 0.4242613911628723, "learning_rate": 9.524196827978855e-06, "loss": 0.4108, "step": 4684 }, { "epoch": 0.4763115087433916, "grad_norm": 0.4438549280166626, "learning_rate": 9.526230174867833e-06, "loss": 0.4177, "step": 4685 }, { "epoch": 0.47641317608784056, "grad_norm": 0.4099796414375305, "learning_rate": 9.528263521756812e-06, "loss": 0.4203, "step": 4686 }, { "epoch": 0.47651484343228956, "grad_norm": 0.37744423747062683, "learning_rate": 9.530296868645792e-06, "loss": 0.4247, "step": 4687 }, { "epoch": 0.4766165107767385, "grad_norm": 0.41823601722717285, "learning_rate": 9.532330215534771e-06, "loss": 0.4156, "step": 4688 }, { "epoch": 0.4767181781211875, "grad_norm": 0.35854509472846985, "learning_rate": 9.534363562423751e-06, "loss": 0.3736, "step": 4689 }, { "epoch": 0.47681984546563644, "grad_norm": 0.42458727955818176, "learning_rate": 9.53639690931273e-06, "loss": 0.4381, "step": 4690 }, { "epoch": 0.4769215128100854, "grad_norm": 0.39141014218330383, "learning_rate": 9.538430256201709e-06, "loss": 0.431, "step": 4691 }, { "epoch": 0.4770231801545344, "grad_norm": 0.4110484719276428, "learning_rate": 9.540463603090689e-06, "loss": 0.441, "step": 4692 }, { "epoch": 0.4771248474989833, "grad_norm": 0.3485964834690094, "learning_rate": 9.542496949979668e-06, "loss": 0.4122, "step": 4693 }, { "epoch": 0.4772265148434323, "grad_norm": 0.40039700269699097, "learning_rate": 9.544530296868646e-06, "loss": 0.3917, "step": 4694 }, { "epoch": 0.47732818218788126, "grad_norm": 0.38975396752357483, "learning_rate": 9.546563643757627e-06, "loss": 0.386, "step": 4695 }, { "epoch": 0.4774298495323302, "grad_norm": 0.40988850593566895, "learning_rate": 9.548596990646605e-06, "loss": 0.4189, "step": 4696 }, { "epoch": 0.4775315168767792, "grad_norm": 0.4501359462738037, "learning_rate": 9.550630337535584e-06, "loss": 0.4157, "step": 4697 }, { "epoch": 0.47763318422122814, "grad_norm": 0.4315517842769623, "learning_rate": 9.552663684424564e-06, "loss": 0.4213, "step": 4698 }, { "epoch": 0.47773485156567713, "grad_norm": 0.4295046031475067, "learning_rate": 9.554697031313543e-06, "loss": 0.4464, "step": 4699 }, { "epoch": 0.4778365189101261, "grad_norm": 0.4121348261833191, "learning_rate": 9.556730378202522e-06, "loss": 0.4229, "step": 4700 }, { "epoch": 0.477938186254575, "grad_norm": 0.4085848927497864, "learning_rate": 9.558763725091502e-06, "loss": 0.4236, "step": 4701 }, { "epoch": 0.478039853599024, "grad_norm": 0.38668292760849, "learning_rate": 9.56079707198048e-06, "loss": 0.4138, "step": 4702 }, { "epoch": 0.47814152094347295, "grad_norm": 0.4154568910598755, "learning_rate": 9.56283041886946e-06, "loss": 0.414, "step": 4703 }, { "epoch": 0.4782431882879219, "grad_norm": 0.39029067754745483, "learning_rate": 9.56486376575844e-06, "loss": 0.4098, "step": 4704 }, { "epoch": 0.4783448556323709, "grad_norm": 0.4260229170322418, "learning_rate": 9.566897112647418e-06, "loss": 0.3738, "step": 4705 }, { "epoch": 0.47844652297681983, "grad_norm": 0.3990184962749481, "learning_rate": 9.568930459536397e-06, "loss": 0.4129, "step": 4706 }, { "epoch": 0.47854819032126883, "grad_norm": 0.3817390203475952, "learning_rate": 9.570963806425377e-06, "loss": 0.442, "step": 4707 }, { "epoch": 0.47864985766571777, "grad_norm": 0.420265257358551, "learning_rate": 9.572997153314356e-06, "loss": 0.4334, "step": 4708 }, { "epoch": 0.4787515250101667, "grad_norm": 0.3915216624736786, "learning_rate": 9.575030500203334e-06, "loss": 0.3994, "step": 4709 }, { "epoch": 0.4788531923546157, "grad_norm": 0.3775688409805298, "learning_rate": 9.577063847092315e-06, "loss": 0.3968, "step": 4710 }, { "epoch": 0.47895485969906465, "grad_norm": 0.39468836784362793, "learning_rate": 9.579097193981293e-06, "loss": 0.3971, "step": 4711 }, { "epoch": 0.47905652704351365, "grad_norm": 0.3732075095176697, "learning_rate": 9.581130540870274e-06, "loss": 0.39, "step": 4712 }, { "epoch": 0.4791581943879626, "grad_norm": 0.38125479221343994, "learning_rate": 9.583163887759252e-06, "loss": 0.425, "step": 4713 }, { "epoch": 0.47925986173241153, "grad_norm": 0.3908647894859314, "learning_rate": 9.585197234648231e-06, "loss": 0.3761, "step": 4714 }, { "epoch": 0.4793615290768605, "grad_norm": 0.3891780376434326, "learning_rate": 9.587230581537211e-06, "loss": 0.4307, "step": 4715 }, { "epoch": 0.47946319642130947, "grad_norm": 0.4132554531097412, "learning_rate": 9.58926392842619e-06, "loss": 0.4269, "step": 4716 }, { "epoch": 0.47956486376575846, "grad_norm": 0.3858942687511444, "learning_rate": 9.59129727531517e-06, "loss": 0.3976, "step": 4717 }, { "epoch": 0.4796665311102074, "grad_norm": 0.37340429425239563, "learning_rate": 9.593330622204149e-06, "loss": 0.4016, "step": 4718 }, { "epoch": 0.47976819845465635, "grad_norm": 0.3673578202724457, "learning_rate": 9.595363969093128e-06, "loss": 0.368, "step": 4719 }, { "epoch": 0.47986986579910534, "grad_norm": 0.373550683259964, "learning_rate": 9.597397315982108e-06, "loss": 0.416, "step": 4720 }, { "epoch": 0.4799715331435543, "grad_norm": 0.3656609356403351, "learning_rate": 9.599430662871087e-06, "loss": 0.4039, "step": 4721 }, { "epoch": 0.4800732004880032, "grad_norm": 0.4329501688480377, "learning_rate": 9.601464009760065e-06, "loss": 0.4015, "step": 4722 }, { "epoch": 0.4801748678324522, "grad_norm": 0.37434685230255127, "learning_rate": 9.603497356649046e-06, "loss": 0.3961, "step": 4723 }, { "epoch": 0.48027653517690116, "grad_norm": 0.39543551206588745, "learning_rate": 9.605530703538024e-06, "loss": 0.4041, "step": 4724 }, { "epoch": 0.48037820252135016, "grad_norm": 0.4323880076408386, "learning_rate": 9.607564050427005e-06, "loss": 0.4443, "step": 4725 }, { "epoch": 0.4804798698657991, "grad_norm": 0.45232826471328735, "learning_rate": 9.609597397315983e-06, "loss": 0.3726, "step": 4726 }, { "epoch": 0.48058153721024804, "grad_norm": 0.4552527070045471, "learning_rate": 9.611630744204962e-06, "loss": 0.3789, "step": 4727 }, { "epoch": 0.48068320455469704, "grad_norm": 0.399688184261322, "learning_rate": 9.613664091093942e-06, "loss": 0.434, "step": 4728 }, { "epoch": 0.480784871899146, "grad_norm": 0.445590078830719, "learning_rate": 9.61569743798292e-06, "loss": 0.4164, "step": 4729 }, { "epoch": 0.480886539243595, "grad_norm": 0.36570635437965393, "learning_rate": 9.617730784871901e-06, "loss": 0.4208, "step": 4730 }, { "epoch": 0.4809882065880439, "grad_norm": 0.4573957920074463, "learning_rate": 9.61976413176088e-06, "loss": 0.3754, "step": 4731 }, { "epoch": 0.48108987393249286, "grad_norm": 0.4211876094341278, "learning_rate": 9.621797478649858e-06, "loss": 0.4185, "step": 4732 }, { "epoch": 0.48119154127694186, "grad_norm": 0.38499653339385986, "learning_rate": 9.623830825538839e-06, "loss": 0.4116, "step": 4733 }, { "epoch": 0.4812932086213908, "grad_norm": 0.43020135164260864, "learning_rate": 9.625864172427817e-06, "loss": 0.3907, "step": 4734 }, { "epoch": 0.4813948759658398, "grad_norm": 0.39908501505851746, "learning_rate": 9.627897519316796e-06, "loss": 0.3721, "step": 4735 }, { "epoch": 0.48149654331028874, "grad_norm": 0.4160127639770508, "learning_rate": 9.629930866205776e-06, "loss": 0.3971, "step": 4736 }, { "epoch": 0.4815982106547377, "grad_norm": 0.40547722578048706, "learning_rate": 9.631964213094755e-06, "loss": 0.3964, "step": 4737 }, { "epoch": 0.4816998779991867, "grad_norm": 0.388387531042099, "learning_rate": 9.633997559983734e-06, "loss": 0.4127, "step": 4738 }, { "epoch": 0.4818015453436356, "grad_norm": 0.43396034836769104, "learning_rate": 9.636030906872714e-06, "loss": 0.4414, "step": 4739 }, { "epoch": 0.4819032126880846, "grad_norm": 0.3866749405860901, "learning_rate": 9.638064253761693e-06, "loss": 0.3956, "step": 4740 }, { "epoch": 0.48200488003253356, "grad_norm": 0.4042127728462219, "learning_rate": 9.640097600650671e-06, "loss": 0.4107, "step": 4741 }, { "epoch": 0.4821065473769825, "grad_norm": 0.3865413963794708, "learning_rate": 9.642130947539652e-06, "loss": 0.4314, "step": 4742 }, { "epoch": 0.4822082147214315, "grad_norm": 0.4162251949310303, "learning_rate": 9.64416429442863e-06, "loss": 0.421, "step": 4743 }, { "epoch": 0.48230988206588044, "grad_norm": 0.428475558757782, "learning_rate": 9.646197641317609e-06, "loss": 0.4082, "step": 4744 }, { "epoch": 0.4824115494103294, "grad_norm": 0.38173019886016846, "learning_rate": 9.648230988206587e-06, "loss": 0.4337, "step": 4745 }, { "epoch": 0.4825132167547784, "grad_norm": 0.3651435673236847, "learning_rate": 9.650264335095568e-06, "loss": 0.4251, "step": 4746 }, { "epoch": 0.4826148840992273, "grad_norm": 0.39507293701171875, "learning_rate": 9.652297681984546e-06, "loss": 0.3865, "step": 4747 }, { "epoch": 0.4827165514436763, "grad_norm": 0.4200981557369232, "learning_rate": 9.654331028873527e-06, "loss": 0.408, "step": 4748 }, { "epoch": 0.48281821878812525, "grad_norm": 0.41495704650878906, "learning_rate": 9.656364375762505e-06, "loss": 0.3981, "step": 4749 }, { "epoch": 0.4829198861325742, "grad_norm": 0.37642914056777954, "learning_rate": 9.658397722651484e-06, "loss": 0.3653, "step": 4750 }, { "epoch": 0.4830215534770232, "grad_norm": 0.42260250449180603, "learning_rate": 9.660431069540464e-06, "loss": 0.4396, "step": 4751 }, { "epoch": 0.48312322082147213, "grad_norm": 0.4426349103450775, "learning_rate": 9.662464416429443e-06, "loss": 0.4389, "step": 4752 }, { "epoch": 0.48322488816592113, "grad_norm": 0.3792797029018402, "learning_rate": 9.664497763318423e-06, "loss": 0.3694, "step": 4753 }, { "epoch": 0.48332655551037007, "grad_norm": 0.3892311155796051, "learning_rate": 9.666531110207402e-06, "loss": 0.4195, "step": 4754 }, { "epoch": 0.483428222854819, "grad_norm": 0.4143829345703125, "learning_rate": 9.66856445709638e-06, "loss": 0.4045, "step": 4755 }, { "epoch": 0.483529890199268, "grad_norm": 0.4640398621559143, "learning_rate": 9.670597803985361e-06, "loss": 0.4462, "step": 4756 }, { "epoch": 0.48363155754371695, "grad_norm": 0.36094334721565247, "learning_rate": 9.67263115087434e-06, "loss": 0.3792, "step": 4757 }, { "epoch": 0.48373322488816595, "grad_norm": 0.3882654011249542, "learning_rate": 9.67466449776332e-06, "loss": 0.4095, "step": 4758 }, { "epoch": 0.4838348922326149, "grad_norm": 0.5562126636505127, "learning_rate": 9.676697844652299e-06, "loss": 0.4341, "step": 4759 }, { "epoch": 0.48393655957706383, "grad_norm": 0.3763691186904907, "learning_rate": 9.678731191541277e-06, "loss": 0.4028, "step": 4760 }, { "epoch": 0.4840382269215128, "grad_norm": 0.46192124485969543, "learning_rate": 9.680764538430258e-06, "loss": 0.4224, "step": 4761 }, { "epoch": 0.48413989426596177, "grad_norm": 0.4093213379383087, "learning_rate": 9.682797885319236e-06, "loss": 0.4111, "step": 4762 }, { "epoch": 0.4842415616104107, "grad_norm": 0.4149594306945801, "learning_rate": 9.684831232208215e-06, "loss": 0.4202, "step": 4763 }, { "epoch": 0.4843432289548597, "grad_norm": 0.3861220180988312, "learning_rate": 9.686864579097195e-06, "loss": 0.422, "step": 4764 }, { "epoch": 0.48444489629930865, "grad_norm": 0.4175839424133301, "learning_rate": 9.688897925986174e-06, "loss": 0.4483, "step": 4765 }, { "epoch": 0.48454656364375764, "grad_norm": 0.40737655758857727, "learning_rate": 9.690931272875154e-06, "loss": 0.4098, "step": 4766 }, { "epoch": 0.4846482309882066, "grad_norm": 0.41751226782798767, "learning_rate": 9.692964619764133e-06, "loss": 0.438, "step": 4767 }, { "epoch": 0.4847498983326555, "grad_norm": 0.42432090640068054, "learning_rate": 9.694997966653111e-06, "loss": 0.399, "step": 4768 }, { "epoch": 0.4848515656771045, "grad_norm": 0.3951064646244049, "learning_rate": 9.697031313542092e-06, "loss": 0.3991, "step": 4769 }, { "epoch": 0.48495323302155346, "grad_norm": 0.43846753239631653, "learning_rate": 9.69906466043107e-06, "loss": 0.4005, "step": 4770 }, { "epoch": 0.48505490036600246, "grad_norm": 0.4458416700363159, "learning_rate": 9.70109800732005e-06, "loss": 0.4179, "step": 4771 }, { "epoch": 0.4851565677104514, "grad_norm": 0.41882339119911194, "learning_rate": 9.70313135420903e-06, "loss": 0.4741, "step": 4772 }, { "epoch": 0.48525823505490034, "grad_norm": 0.40802696347236633, "learning_rate": 9.705164701098008e-06, "loss": 0.3957, "step": 4773 }, { "epoch": 0.48535990239934934, "grad_norm": 0.5072979927062988, "learning_rate": 9.707198047986988e-06, "loss": 0.4019, "step": 4774 }, { "epoch": 0.4854615697437983, "grad_norm": 0.42080068588256836, "learning_rate": 9.709231394875967e-06, "loss": 0.4466, "step": 4775 }, { "epoch": 0.4855632370882473, "grad_norm": 0.3998749852180481, "learning_rate": 9.711264741764946e-06, "loss": 0.3846, "step": 4776 }, { "epoch": 0.4856649044326962, "grad_norm": 0.4837021231651306, "learning_rate": 9.713298088653926e-06, "loss": 0.4045, "step": 4777 }, { "epoch": 0.48576657177714516, "grad_norm": 0.3691498935222626, "learning_rate": 9.715331435542905e-06, "loss": 0.3911, "step": 4778 }, { "epoch": 0.48586823912159416, "grad_norm": 0.40610790252685547, "learning_rate": 9.717364782431883e-06, "loss": 0.433, "step": 4779 }, { "epoch": 0.4859699064660431, "grad_norm": 0.41667523980140686, "learning_rate": 9.719398129320862e-06, "loss": 0.4186, "step": 4780 }, { "epoch": 0.4860715738104921, "grad_norm": 0.38141289353370667, "learning_rate": 9.721431476209842e-06, "loss": 0.4175, "step": 4781 }, { "epoch": 0.48617324115494104, "grad_norm": 0.3677375316619873, "learning_rate": 9.723464823098821e-06, "loss": 0.374, "step": 4782 }, { "epoch": 0.48627490849939, "grad_norm": 0.4186026453971863, "learning_rate": 9.7254981699878e-06, "loss": 0.3933, "step": 4783 }, { "epoch": 0.486376575843839, "grad_norm": 0.40993040800094604, "learning_rate": 9.72753151687678e-06, "loss": 0.3888, "step": 4784 }, { "epoch": 0.4864782431882879, "grad_norm": 0.41524985432624817, "learning_rate": 9.729564863765759e-06, "loss": 0.395, "step": 4785 }, { "epoch": 0.48657991053273686, "grad_norm": 0.4032003581523895, "learning_rate": 9.731598210654737e-06, "loss": 0.4226, "step": 4786 }, { "epoch": 0.48668157787718586, "grad_norm": 0.3880484998226166, "learning_rate": 9.733631557543718e-06, "loss": 0.4192, "step": 4787 }, { "epoch": 0.4867832452216348, "grad_norm": 0.4857250452041626, "learning_rate": 9.735664904432696e-06, "loss": 0.4342, "step": 4788 }, { "epoch": 0.4868849125660838, "grad_norm": 0.4117268919944763, "learning_rate": 9.737698251321677e-06, "loss": 0.3937, "step": 4789 }, { "epoch": 0.48698657991053274, "grad_norm": 0.3942843973636627, "learning_rate": 9.739731598210655e-06, "loss": 0.3954, "step": 4790 }, { "epoch": 0.4870882472549817, "grad_norm": 0.41876402497291565, "learning_rate": 9.741764945099634e-06, "loss": 0.4306, "step": 4791 }, { "epoch": 0.4871899145994307, "grad_norm": 0.38322797417640686, "learning_rate": 9.743798291988614e-06, "loss": 0.3869, "step": 4792 }, { "epoch": 0.4872915819438796, "grad_norm": 0.39936915040016174, "learning_rate": 9.745831638877593e-06, "loss": 0.3871, "step": 4793 }, { "epoch": 0.4873932492883286, "grad_norm": 0.40614551305770874, "learning_rate": 9.747864985766573e-06, "loss": 0.4121, "step": 4794 }, { "epoch": 0.48749491663277755, "grad_norm": 0.43726420402526855, "learning_rate": 9.749898332655552e-06, "loss": 0.4241, "step": 4795 }, { "epoch": 0.4875965839772265, "grad_norm": 0.37814757227897644, "learning_rate": 9.75193167954453e-06, "loss": 0.4132, "step": 4796 }, { "epoch": 0.4876982513216755, "grad_norm": 0.4176695644855499, "learning_rate": 9.75396502643351e-06, "loss": 0.4213, "step": 4797 }, { "epoch": 0.48779991866612443, "grad_norm": 0.38698670268058777, "learning_rate": 9.75599837332249e-06, "loss": 0.3716, "step": 4798 }, { "epoch": 0.48790158601057343, "grad_norm": 0.40121760964393616, "learning_rate": 9.75803172021147e-06, "loss": 0.4286, "step": 4799 }, { "epoch": 0.48800325335502237, "grad_norm": 0.4040447771549225, "learning_rate": 9.760065067100448e-06, "loss": 0.4147, "step": 4800 }, { "epoch": 0.4881049206994713, "grad_norm": 0.4592590928077698, "learning_rate": 9.762098413989427e-06, "loss": 0.4114, "step": 4801 }, { "epoch": 0.4882065880439203, "grad_norm": 0.37765538692474365, "learning_rate": 9.764131760878407e-06, "loss": 0.4103, "step": 4802 }, { "epoch": 0.48830825538836925, "grad_norm": 0.43360427021980286, "learning_rate": 9.766165107767386e-06, "loss": 0.4083, "step": 4803 }, { "epoch": 0.4884099227328182, "grad_norm": 0.4401862621307373, "learning_rate": 9.768198454656365e-06, "loss": 0.4074, "step": 4804 }, { "epoch": 0.4885115900772672, "grad_norm": 0.3895278871059418, "learning_rate": 9.770231801545345e-06, "loss": 0.4238, "step": 4805 }, { "epoch": 0.48861325742171613, "grad_norm": 0.38275134563446045, "learning_rate": 9.772265148434324e-06, "loss": 0.4067, "step": 4806 }, { "epoch": 0.4887149247661651, "grad_norm": 0.43124350905418396, "learning_rate": 9.774298495323304e-06, "loss": 0.3956, "step": 4807 }, { "epoch": 0.48881659211061407, "grad_norm": 0.4339328110218048, "learning_rate": 9.776331842212283e-06, "loss": 0.4407, "step": 4808 }, { "epoch": 0.488918259455063, "grad_norm": 0.38861194252967834, "learning_rate": 9.778365189101261e-06, "loss": 0.4002, "step": 4809 }, { "epoch": 0.489019926799512, "grad_norm": 0.35192015767097473, "learning_rate": 9.780398535990242e-06, "loss": 0.3923, "step": 4810 }, { "epoch": 0.48912159414396095, "grad_norm": 0.4192329943180084, "learning_rate": 9.78243188287922e-06, "loss": 0.3921, "step": 4811 }, { "epoch": 0.48922326148840994, "grad_norm": 0.41857242584228516, "learning_rate": 9.7844652297682e-06, "loss": 0.4239, "step": 4812 }, { "epoch": 0.4893249288328589, "grad_norm": 0.3824096918106079, "learning_rate": 9.786498576657179e-06, "loss": 0.4621, "step": 4813 }, { "epoch": 0.4894265961773078, "grad_norm": 0.40308043360710144, "learning_rate": 9.788531923546158e-06, "loss": 0.4269, "step": 4814 }, { "epoch": 0.4895282635217568, "grad_norm": 0.3897024095058441, "learning_rate": 9.790565270435138e-06, "loss": 0.4152, "step": 4815 }, { "epoch": 0.48962993086620576, "grad_norm": 0.4103725552558899, "learning_rate": 9.792598617324117e-06, "loss": 0.4266, "step": 4816 }, { "epoch": 0.48973159821065476, "grad_norm": 0.4403732120990753, "learning_rate": 9.794631964213095e-06, "loss": 0.4137, "step": 4817 }, { "epoch": 0.4898332655551037, "grad_norm": 0.36789801716804504, "learning_rate": 9.796665311102074e-06, "loss": 0.4182, "step": 4818 }, { "epoch": 0.48993493289955264, "grad_norm": 0.4165187478065491, "learning_rate": 9.798698657991054e-06, "loss": 0.3956, "step": 4819 }, { "epoch": 0.49003660024400164, "grad_norm": 0.4211614429950714, "learning_rate": 9.800732004880033e-06, "loss": 0.4042, "step": 4820 }, { "epoch": 0.4901382675884506, "grad_norm": 0.4175916314125061, "learning_rate": 9.802765351769012e-06, "loss": 0.4142, "step": 4821 }, { "epoch": 0.4902399349328996, "grad_norm": 0.4091710150241852, "learning_rate": 9.804798698657992e-06, "loss": 0.3945, "step": 4822 }, { "epoch": 0.4903416022773485, "grad_norm": 0.374748170375824, "learning_rate": 9.80683204554697e-06, "loss": 0.4157, "step": 4823 }, { "epoch": 0.49044326962179746, "grad_norm": 0.4378897249698639, "learning_rate": 9.80886539243595e-06, "loss": 0.4097, "step": 4824 }, { "epoch": 0.49054493696624646, "grad_norm": 0.3918280601501465, "learning_rate": 9.81089873932493e-06, "loss": 0.4098, "step": 4825 }, { "epoch": 0.4906466043106954, "grad_norm": 0.47639602422714233, "learning_rate": 9.812932086213908e-06, "loss": 0.4318, "step": 4826 }, { "epoch": 0.49074827165514434, "grad_norm": 0.4015963077545166, "learning_rate": 9.814965433102887e-06, "loss": 0.3904, "step": 4827 }, { "epoch": 0.49084993899959334, "grad_norm": 0.3623785972595215, "learning_rate": 9.816998779991867e-06, "loss": 0.3944, "step": 4828 }, { "epoch": 0.4909516063440423, "grad_norm": 0.43028953671455383, "learning_rate": 9.819032126880846e-06, "loss": 0.4102, "step": 4829 }, { "epoch": 0.4910532736884913, "grad_norm": 0.4252936840057373, "learning_rate": 9.821065473769826e-06, "loss": 0.4074, "step": 4830 }, { "epoch": 0.4911549410329402, "grad_norm": 0.3913659155368805, "learning_rate": 9.823098820658805e-06, "loss": 0.4126, "step": 4831 }, { "epoch": 0.49125660837738916, "grad_norm": 0.3476693630218506, "learning_rate": 9.825132167547783e-06, "loss": 0.3949, "step": 4832 }, { "epoch": 0.49135827572183816, "grad_norm": 0.3986165523529053, "learning_rate": 9.827165514436764e-06, "loss": 0.3817, "step": 4833 }, { "epoch": 0.4914599430662871, "grad_norm": 0.3626299500465393, "learning_rate": 9.829198861325742e-06, "loss": 0.401, "step": 4834 }, { "epoch": 0.4915616104107361, "grad_norm": 0.3831329345703125, "learning_rate": 9.831232208214723e-06, "loss": 0.4244, "step": 4835 }, { "epoch": 0.49166327775518504, "grad_norm": 0.4194560647010803, "learning_rate": 9.833265555103701e-06, "loss": 0.409, "step": 4836 }, { "epoch": 0.491764945099634, "grad_norm": 0.36602839827537537, "learning_rate": 9.83529890199268e-06, "loss": 0.4054, "step": 4837 }, { "epoch": 0.491866612444083, "grad_norm": 0.3641825020313263, "learning_rate": 9.83733224888166e-06, "loss": 0.4045, "step": 4838 }, { "epoch": 0.4919682797885319, "grad_norm": 0.3417621850967407, "learning_rate": 9.839365595770639e-06, "loss": 0.4116, "step": 4839 }, { "epoch": 0.4920699471329809, "grad_norm": 0.40272292494773865, "learning_rate": 9.84139894265962e-06, "loss": 0.4106, "step": 4840 }, { "epoch": 0.49217161447742985, "grad_norm": 0.38911551237106323, "learning_rate": 9.843432289548598e-06, "loss": 0.3839, "step": 4841 }, { "epoch": 0.4922732818218788, "grad_norm": 0.3832871913909912, "learning_rate": 9.845465636437577e-06, "loss": 0.4205, "step": 4842 }, { "epoch": 0.4923749491663278, "grad_norm": 0.4221353828907013, "learning_rate": 9.847498983326557e-06, "loss": 0.4294, "step": 4843 }, { "epoch": 0.49247661651077673, "grad_norm": 0.3739832043647766, "learning_rate": 9.849532330215536e-06, "loss": 0.4043, "step": 4844 }, { "epoch": 0.49257828385522573, "grad_norm": 0.3909609019756317, "learning_rate": 9.851565677104516e-06, "loss": 0.3973, "step": 4845 }, { "epoch": 0.49267995119967467, "grad_norm": 0.35642021894454956, "learning_rate": 9.853599023993495e-06, "loss": 0.3543, "step": 4846 }, { "epoch": 0.4927816185441236, "grad_norm": 0.3920999765396118, "learning_rate": 9.855632370882473e-06, "loss": 0.3589, "step": 4847 }, { "epoch": 0.4928832858885726, "grad_norm": 0.36445125937461853, "learning_rate": 9.857665717771454e-06, "loss": 0.4112, "step": 4848 }, { "epoch": 0.49298495323302155, "grad_norm": 0.4134778380393982, "learning_rate": 9.859699064660432e-06, "loss": 0.4056, "step": 4849 }, { "epoch": 0.4930866205774705, "grad_norm": 0.37105879187583923, "learning_rate": 9.861732411549411e-06, "loss": 0.4575, "step": 4850 }, { "epoch": 0.4931882879219195, "grad_norm": 0.3750189542770386, "learning_rate": 9.863765758438391e-06, "loss": 0.4152, "step": 4851 }, { "epoch": 0.49328995526636843, "grad_norm": 0.42984819412231445, "learning_rate": 9.86579910532737e-06, "loss": 0.4308, "step": 4852 }, { "epoch": 0.4933916226108174, "grad_norm": 0.3842010498046875, "learning_rate": 9.867832452216348e-06, "loss": 0.4216, "step": 4853 }, { "epoch": 0.49349328995526637, "grad_norm": 0.4164332449436188, "learning_rate": 9.869865799105329e-06, "loss": 0.3882, "step": 4854 }, { "epoch": 0.4935949572997153, "grad_norm": 0.36762678623199463, "learning_rate": 9.871899145994307e-06, "loss": 0.4248, "step": 4855 }, { "epoch": 0.4936966246441643, "grad_norm": 0.40151676535606384, "learning_rate": 9.873932492883286e-06, "loss": 0.3807, "step": 4856 }, { "epoch": 0.49379829198861325, "grad_norm": 0.4160763919353485, "learning_rate": 9.875965839772266e-06, "loss": 0.4229, "step": 4857 }, { "epoch": 0.49389995933306224, "grad_norm": 0.3432534337043762, "learning_rate": 9.877999186661245e-06, "loss": 0.3881, "step": 4858 }, { "epoch": 0.4940016266775112, "grad_norm": 0.39753106236457825, "learning_rate": 9.880032533550224e-06, "loss": 0.4119, "step": 4859 }, { "epoch": 0.4941032940219601, "grad_norm": 0.36897650361061096, "learning_rate": 9.882065880439204e-06, "loss": 0.4648, "step": 4860 }, { "epoch": 0.4942049613664091, "grad_norm": 0.3955739736557007, "learning_rate": 9.884099227328183e-06, "loss": 0.3946, "step": 4861 }, { "epoch": 0.49430662871085806, "grad_norm": 0.3813461363315582, "learning_rate": 9.886132574217161e-06, "loss": 0.417, "step": 4862 }, { "epoch": 0.49440829605530706, "grad_norm": 0.3413260877132416, "learning_rate": 9.888165921106142e-06, "loss": 0.3912, "step": 4863 }, { "epoch": 0.494509963399756, "grad_norm": 0.36131325364112854, "learning_rate": 9.89019926799512e-06, "loss": 0.3734, "step": 4864 }, { "epoch": 0.49461163074420494, "grad_norm": 0.41177332401275635, "learning_rate": 9.892232614884099e-06, "loss": 0.4436, "step": 4865 }, { "epoch": 0.49471329808865394, "grad_norm": 0.3856479823589325, "learning_rate": 9.89426596177308e-06, "loss": 0.4067, "step": 4866 }, { "epoch": 0.4948149654331029, "grad_norm": 0.373725563287735, "learning_rate": 9.896299308662058e-06, "loss": 0.3797, "step": 4867 }, { "epoch": 0.4949166327775518, "grad_norm": 0.41768699884414673, "learning_rate": 9.898332655551037e-06, "loss": 0.4003, "step": 4868 }, { "epoch": 0.4950183001220008, "grad_norm": 0.38065072894096375, "learning_rate": 9.900366002440017e-06, "loss": 0.3789, "step": 4869 }, { "epoch": 0.49511996746644976, "grad_norm": 0.40291503071784973, "learning_rate": 9.902399349328996e-06, "loss": 0.389, "step": 4870 }, { "epoch": 0.49522163481089876, "grad_norm": 0.4023595154285431, "learning_rate": 9.904432696217976e-06, "loss": 0.4284, "step": 4871 }, { "epoch": 0.4953233021553477, "grad_norm": 0.40231236815452576, "learning_rate": 9.906466043106955e-06, "loss": 0.3922, "step": 4872 }, { "epoch": 0.49542496949979664, "grad_norm": 0.3479550778865814, "learning_rate": 9.908499389995933e-06, "loss": 0.4255, "step": 4873 }, { "epoch": 0.49552663684424564, "grad_norm": 0.39207884669303894, "learning_rate": 9.910532736884914e-06, "loss": 0.3573, "step": 4874 }, { "epoch": 0.4956283041886946, "grad_norm": 0.4345601201057434, "learning_rate": 9.912566083773892e-06, "loss": 0.4329, "step": 4875 }, { "epoch": 0.4957299715331436, "grad_norm": 0.3795695900917053, "learning_rate": 9.914599430662872e-06, "loss": 0.405, "step": 4876 }, { "epoch": 0.4958316388775925, "grad_norm": 0.40511399507522583, "learning_rate": 9.916632777551851e-06, "loss": 0.3941, "step": 4877 }, { "epoch": 0.49593330622204146, "grad_norm": 0.4486497938632965, "learning_rate": 9.91866612444083e-06, "loss": 0.4082, "step": 4878 }, { "epoch": 0.49603497356649046, "grad_norm": 0.39014896750450134, "learning_rate": 9.92069947132981e-06, "loss": 0.4289, "step": 4879 }, { "epoch": 0.4961366409109394, "grad_norm": 0.3811051845550537, "learning_rate": 9.922732818218789e-06, "loss": 0.4132, "step": 4880 }, { "epoch": 0.4962383082553884, "grad_norm": 0.44782906770706177, "learning_rate": 9.924766165107769e-06, "loss": 0.4028, "step": 4881 }, { "epoch": 0.49633997559983734, "grad_norm": 0.3799636960029602, "learning_rate": 9.926799511996748e-06, "loss": 0.4011, "step": 4882 }, { "epoch": 0.4964416429442863, "grad_norm": 0.38681560754776, "learning_rate": 9.928832858885726e-06, "loss": 0.4007, "step": 4883 }, { "epoch": 0.4965433102887353, "grad_norm": 0.42908212542533875, "learning_rate": 9.930866205774707e-06, "loss": 0.4383, "step": 4884 }, { "epoch": 0.4966449776331842, "grad_norm": 0.37515121698379517, "learning_rate": 9.932899552663685e-06, "loss": 0.392, "step": 4885 }, { "epoch": 0.4967466449776332, "grad_norm": 0.3942289650440216, "learning_rate": 9.934932899552666e-06, "loss": 0.3984, "step": 4886 }, { "epoch": 0.49684831232208215, "grad_norm": 0.4007449448108673, "learning_rate": 9.936966246441644e-06, "loss": 0.3794, "step": 4887 }, { "epoch": 0.4969499796665311, "grad_norm": 0.36496642231941223, "learning_rate": 9.938999593330623e-06, "loss": 0.415, "step": 4888 }, { "epoch": 0.4970516470109801, "grad_norm": 0.40295103192329407, "learning_rate": 9.941032940219603e-06, "loss": 0.4217, "step": 4889 }, { "epoch": 0.49715331435542903, "grad_norm": 0.403704434633255, "learning_rate": 9.943066287108582e-06, "loss": 0.4221, "step": 4890 }, { "epoch": 0.497254981699878, "grad_norm": 0.38548848032951355, "learning_rate": 9.94509963399756e-06, "loss": 0.4137, "step": 4891 }, { "epoch": 0.49735664904432697, "grad_norm": 0.35405364632606506, "learning_rate": 9.947132980886541e-06, "loss": 0.3907, "step": 4892 }, { "epoch": 0.4974583163887759, "grad_norm": 0.43341273069381714, "learning_rate": 9.94916632777552e-06, "loss": 0.4583, "step": 4893 }, { "epoch": 0.4975599837332249, "grad_norm": 0.37991565465927124, "learning_rate": 9.951199674664498e-06, "loss": 0.3977, "step": 4894 }, { "epoch": 0.49766165107767385, "grad_norm": 0.37841248512268066, "learning_rate": 9.953233021553479e-06, "loss": 0.4039, "step": 4895 }, { "epoch": 0.4977633184221228, "grad_norm": 0.3948635458946228, "learning_rate": 9.955266368442457e-06, "loss": 0.4011, "step": 4896 }, { "epoch": 0.4978649857665718, "grad_norm": 0.4017220735549927, "learning_rate": 9.957299715331436e-06, "loss": 0.4106, "step": 4897 }, { "epoch": 0.49796665311102073, "grad_norm": 0.4386610686779022, "learning_rate": 9.959333062220416e-06, "loss": 0.4408, "step": 4898 }, { "epoch": 0.4980683204554697, "grad_norm": 0.35342341661453247, "learning_rate": 9.961366409109395e-06, "loss": 0.3849, "step": 4899 }, { "epoch": 0.49816998779991867, "grad_norm": 0.4049927294254303, "learning_rate": 9.963399755998373e-06, "loss": 0.4073, "step": 4900 }, { "epoch": 0.4982716551443676, "grad_norm": 0.4326763451099396, "learning_rate": 9.965433102887354e-06, "loss": 0.4093, "step": 4901 }, { "epoch": 0.4983733224888166, "grad_norm": 0.39677026867866516, "learning_rate": 9.967466449776332e-06, "loss": 0.4065, "step": 4902 }, { "epoch": 0.49847498983326555, "grad_norm": 0.40209537744522095, "learning_rate": 9.969499796665311e-06, "loss": 0.3714, "step": 4903 }, { "epoch": 0.49857665717771454, "grad_norm": 0.4222102761268616, "learning_rate": 9.971533143554291e-06, "loss": 0.424, "step": 4904 }, { "epoch": 0.4986783245221635, "grad_norm": 0.3938067853450775, "learning_rate": 9.97356649044327e-06, "loss": 0.4318, "step": 4905 }, { "epoch": 0.4987799918666124, "grad_norm": 0.38196417689323425, "learning_rate": 9.975599837332249e-06, "loss": 0.3889, "step": 4906 }, { "epoch": 0.4988816592110614, "grad_norm": 0.40458014607429504, "learning_rate": 9.977633184221229e-06, "loss": 0.3761, "step": 4907 }, { "epoch": 0.49898332655551036, "grad_norm": 0.38958051800727844, "learning_rate": 9.979666531110208e-06, "loss": 0.4054, "step": 4908 }, { "epoch": 0.4990849938999593, "grad_norm": 0.352727472782135, "learning_rate": 9.981699877999186e-06, "loss": 0.399, "step": 4909 }, { "epoch": 0.4991866612444083, "grad_norm": 0.39570358395576477, "learning_rate": 9.983733224888167e-06, "loss": 0.3979, "step": 4910 }, { "epoch": 0.49928832858885724, "grad_norm": 0.3444613516330719, "learning_rate": 9.985766571777145e-06, "loss": 0.3735, "step": 4911 }, { "epoch": 0.49938999593330624, "grad_norm": 0.36811670660972595, "learning_rate": 9.987799918666126e-06, "loss": 0.3819, "step": 4912 }, { "epoch": 0.4994916632777552, "grad_norm": 0.36985933780670166, "learning_rate": 9.989833265555104e-06, "loss": 0.4195, "step": 4913 }, { "epoch": 0.4995933306222041, "grad_norm": 0.39557865262031555, "learning_rate": 9.991866612444083e-06, "loss": 0.372, "step": 4914 }, { "epoch": 0.4996949979666531, "grad_norm": 0.415293425321579, "learning_rate": 9.993899959333063e-06, "loss": 0.394, "step": 4915 }, { "epoch": 0.49979666531110206, "grad_norm": 0.38363197445869446, "learning_rate": 9.995933306222042e-06, "loss": 0.4307, "step": 4916 }, { "epoch": 0.49989833265555106, "grad_norm": 0.41073375940322876, "learning_rate": 9.997966653111022e-06, "loss": 0.3979, "step": 4917 }, { "epoch": 0.5, "grad_norm": 0.3751371204853058, "learning_rate": 1e-05, "loss": 0.3942, "step": 4918 }, { "epoch": 0.5001016673444489, "grad_norm": 0.3908940851688385, "learning_rate": 9.999999987405595e-06, "loss": 0.4266, "step": 4919 }, { "epoch": 0.5002033346888979, "grad_norm": 0.3835591673851013, "learning_rate": 9.999999949622376e-06, "loss": 0.388, "step": 4920 }, { "epoch": 0.5003050020333469, "grad_norm": 0.3722604811191559, "learning_rate": 9.999999886650348e-06, "loss": 0.3876, "step": 4921 }, { "epoch": 0.5004066693777959, "grad_norm": 0.38379985094070435, "learning_rate": 9.999999798489507e-06, "loss": 0.454, "step": 4922 }, { "epoch": 0.5005083367222448, "grad_norm": 0.3829493522644043, "learning_rate": 9.999999685139857e-06, "loss": 0.4195, "step": 4923 }, { "epoch": 0.5006100040666938, "grad_norm": 0.37734663486480713, "learning_rate": 9.999999546601396e-06, "loss": 0.382, "step": 4924 }, { "epoch": 0.5007116714111427, "grad_norm": 0.42206987738609314, "learning_rate": 9.999999382874123e-06, "loss": 0.4204, "step": 4925 }, { "epoch": 0.5008133387555918, "grad_norm": 0.3859146237373352, "learning_rate": 9.999999193958042e-06, "loss": 0.4176, "step": 4926 }, { "epoch": 0.5009150061000407, "grad_norm": 0.3889378607273102, "learning_rate": 9.999998979853157e-06, "loss": 0.3999, "step": 4927 }, { "epoch": 0.5010166734444896, "grad_norm": 0.4099138379096985, "learning_rate": 9.999998740559463e-06, "loss": 0.4326, "step": 4928 }, { "epoch": 0.5011183407889386, "grad_norm": 0.36711522936820984, "learning_rate": 9.999998476076962e-06, "loss": 0.4332, "step": 4929 }, { "epoch": 0.5012200081333875, "grad_norm": 0.43673175573349, "learning_rate": 9.99999818640566e-06, "loss": 0.3865, "step": 4930 }, { "epoch": 0.5013216754778366, "grad_norm": 0.3979143500328064, "learning_rate": 9.999997871545553e-06, "loss": 0.438, "step": 4931 }, { "epoch": 0.5014233428222855, "grad_norm": 0.38804540038108826, "learning_rate": 9.999997531496645e-06, "loss": 0.3869, "step": 4932 }, { "epoch": 0.5015250101667345, "grad_norm": 0.41553109884262085, "learning_rate": 9.999997166258938e-06, "loss": 0.3849, "step": 4933 }, { "epoch": 0.5016266775111834, "grad_norm": 0.3786296248435974, "learning_rate": 9.999996775832433e-06, "loss": 0.4155, "step": 4934 }, { "epoch": 0.5017283448556323, "grad_norm": 0.4196734130382538, "learning_rate": 9.999996360217134e-06, "loss": 0.4021, "step": 4935 }, { "epoch": 0.5018300122000814, "grad_norm": 0.4331638813018799, "learning_rate": 9.99999591941304e-06, "loss": 0.3749, "step": 4936 }, { "epoch": 0.5019316795445303, "grad_norm": 0.3725004196166992, "learning_rate": 9.999995453420154e-06, "loss": 0.4244, "step": 4937 }, { "epoch": 0.5020333468889793, "grad_norm": 0.42787715792655945, "learning_rate": 9.999994962238482e-06, "loss": 0.4321, "step": 4938 }, { "epoch": 0.5021350142334282, "grad_norm": 0.393482506275177, "learning_rate": 9.99999444586802e-06, "loss": 0.4183, "step": 4939 }, { "epoch": 0.5022366815778772, "grad_norm": 0.3455030620098114, "learning_rate": 9.999993904308776e-06, "loss": 0.3677, "step": 4940 }, { "epoch": 0.5023383489223261, "grad_norm": 0.3691576421260834, "learning_rate": 9.999993337560751e-06, "loss": 0.4078, "step": 4941 }, { "epoch": 0.5024400162667751, "grad_norm": 0.4138648509979248, "learning_rate": 9.99999274562395e-06, "loss": 0.3739, "step": 4942 }, { "epoch": 0.5025416836112241, "grad_norm": 0.4078255593776703, "learning_rate": 9.99999212849837e-06, "loss": 0.432, "step": 4943 }, { "epoch": 0.502643350955673, "grad_norm": 0.3560082018375397, "learning_rate": 9.99999148618402e-06, "loss": 0.3847, "step": 4944 }, { "epoch": 0.502745018300122, "grad_norm": 0.38923442363739014, "learning_rate": 9.9999908186809e-06, "loss": 0.4398, "step": 4945 }, { "epoch": 0.5028466856445709, "grad_norm": 0.41968271136283875, "learning_rate": 9.999990125989014e-06, "loss": 0.4271, "step": 4946 }, { "epoch": 0.50294835298902, "grad_norm": 0.40860357880592346, "learning_rate": 9.999989408108366e-06, "loss": 0.4428, "step": 4947 }, { "epoch": 0.5030500203334689, "grad_norm": 0.35223478078842163, "learning_rate": 9.999988665038962e-06, "loss": 0.4224, "step": 4948 }, { "epoch": 0.5031516876779178, "grad_norm": 0.3947370946407318, "learning_rate": 9.9999878967808e-06, "loss": 0.4091, "step": 4949 }, { "epoch": 0.5032533550223668, "grad_norm": 0.45935338735580444, "learning_rate": 9.99998710333389e-06, "loss": 0.4277, "step": 4950 }, { "epoch": 0.5033550223668157, "grad_norm": 0.43121108412742615, "learning_rate": 9.99998628469823e-06, "loss": 0.4138, "step": 4951 }, { "epoch": 0.5034566897112648, "grad_norm": 0.3799230456352234, "learning_rate": 9.99998544087383e-06, "loss": 0.4093, "step": 4952 }, { "epoch": 0.5035583570557137, "grad_norm": 0.4284423589706421, "learning_rate": 9.99998457186069e-06, "loss": 0.3851, "step": 4953 }, { "epoch": 0.5036600244001627, "grad_norm": 0.40778523683547974, "learning_rate": 9.999983677658817e-06, "loss": 0.4004, "step": 4954 }, { "epoch": 0.5037616917446116, "grad_norm": 0.405105322599411, "learning_rate": 9.999982758268214e-06, "loss": 0.394, "step": 4955 }, { "epoch": 0.5038633590890605, "grad_norm": 0.4302074611186981, "learning_rate": 9.999981813688886e-06, "loss": 0.4238, "step": 4956 }, { "epoch": 0.5039650264335096, "grad_norm": 0.41272643208503723, "learning_rate": 9.999980843920838e-06, "loss": 0.3903, "step": 4957 }, { "epoch": 0.5040666937779585, "grad_norm": 0.4315335154533386, "learning_rate": 9.999979848964074e-06, "loss": 0.3979, "step": 4958 }, { "epoch": 0.5041683611224075, "grad_norm": 0.4071986675262451, "learning_rate": 9.999978828818599e-06, "loss": 0.3838, "step": 4959 }, { "epoch": 0.5042700284668564, "grad_norm": 0.44822728633880615, "learning_rate": 9.99997778348442e-06, "loss": 0.4227, "step": 4960 }, { "epoch": 0.5043716958113054, "grad_norm": 0.36915960907936096, "learning_rate": 9.999976712961542e-06, "loss": 0.3721, "step": 4961 }, { "epoch": 0.5044733631557544, "grad_norm": 0.42828258872032166, "learning_rate": 9.999975617249969e-06, "loss": 0.398, "step": 4962 }, { "epoch": 0.5045750305002034, "grad_norm": 0.4839491844177246, "learning_rate": 9.999974496349707e-06, "loss": 0.4026, "step": 4963 }, { "epoch": 0.5046766978446523, "grad_norm": 0.42892974615097046, "learning_rate": 9.999973350260762e-06, "loss": 0.4103, "step": 4964 }, { "epoch": 0.5047783651891012, "grad_norm": 0.3682436943054199, "learning_rate": 9.999972178983136e-06, "loss": 0.3745, "step": 4965 }, { "epoch": 0.5048800325335502, "grad_norm": 0.4337086081504822, "learning_rate": 9.999970982516842e-06, "loss": 0.4142, "step": 4966 }, { "epoch": 0.5049816998779992, "grad_norm": 0.4077754020690918, "learning_rate": 9.999969760861881e-06, "loss": 0.3903, "step": 4967 }, { "epoch": 0.5050833672224482, "grad_norm": 0.41810110211372375, "learning_rate": 9.999968514018261e-06, "loss": 0.3764, "step": 4968 }, { "epoch": 0.5051850345668971, "grad_norm": 0.39749813079833984, "learning_rate": 9.999967241985989e-06, "loss": 0.3948, "step": 4969 }, { "epoch": 0.5052867019113461, "grad_norm": 0.37744951248168945, "learning_rate": 9.999965944765069e-06, "loss": 0.4294, "step": 4970 }, { "epoch": 0.505388369255795, "grad_norm": 0.40092992782592773, "learning_rate": 9.999964622355507e-06, "loss": 0.3872, "step": 4971 }, { "epoch": 0.505490036600244, "grad_norm": 0.41247227787971497, "learning_rate": 9.999963274757313e-06, "loss": 0.4086, "step": 4972 }, { "epoch": 0.505591703944693, "grad_norm": 0.45561519265174866, "learning_rate": 9.999961901970494e-06, "loss": 0.4327, "step": 4973 }, { "epoch": 0.5056933712891419, "grad_norm": 0.44713592529296875, "learning_rate": 9.999960503995052e-06, "loss": 0.4083, "step": 4974 }, { "epoch": 0.5057950386335909, "grad_norm": 0.383210152387619, "learning_rate": 9.999959080831e-06, "loss": 0.4298, "step": 4975 }, { "epoch": 0.5058967059780398, "grad_norm": 0.4016209840774536, "learning_rate": 9.99995763247834e-06, "loss": 0.4015, "step": 4976 }, { "epoch": 0.5059983733224889, "grad_norm": 0.4916684627532959, "learning_rate": 9.999956158937081e-06, "loss": 0.3981, "step": 4977 }, { "epoch": 0.5061000406669378, "grad_norm": 0.4072628617286682, "learning_rate": 9.999954660207234e-06, "loss": 0.402, "step": 4978 }, { "epoch": 0.5062017080113868, "grad_norm": 0.3609198033809662, "learning_rate": 9.999953136288803e-06, "loss": 0.3879, "step": 4979 }, { "epoch": 0.5063033753558357, "grad_norm": 0.430772066116333, "learning_rate": 9.999951587181794e-06, "loss": 0.4151, "step": 4980 }, { "epoch": 0.5064050427002846, "grad_norm": 0.4343527853488922, "learning_rate": 9.99995001288622e-06, "loss": 0.3809, "step": 4981 }, { "epoch": 0.5065067100447336, "grad_norm": 0.41755351424217224, "learning_rate": 9.999948413402082e-06, "loss": 0.4175, "step": 4982 }, { "epoch": 0.5066083773891826, "grad_norm": 0.4139944016933441, "learning_rate": 9.999946788729396e-06, "loss": 0.3946, "step": 4983 }, { "epoch": 0.5067100447336316, "grad_norm": 0.454925000667572, "learning_rate": 9.999945138868165e-06, "loss": 0.3914, "step": 4984 }, { "epoch": 0.5068117120780805, "grad_norm": 0.41334256529808044, "learning_rate": 9.999943463818398e-06, "loss": 0.4188, "step": 4985 }, { "epoch": 0.5069133794225295, "grad_norm": 0.4538329243659973, "learning_rate": 9.999941763580103e-06, "loss": 0.4008, "step": 4986 }, { "epoch": 0.5070150467669784, "grad_norm": 0.47799208760261536, "learning_rate": 9.999940038153291e-06, "loss": 0.3975, "step": 4987 }, { "epoch": 0.5071167141114274, "grad_norm": 0.4028439521789551, "learning_rate": 9.99993828753797e-06, "loss": 0.4108, "step": 4988 }, { "epoch": 0.5072183814558764, "grad_norm": 0.43256041407585144, "learning_rate": 9.999936511734148e-06, "loss": 0.4104, "step": 4989 }, { "epoch": 0.5073200488003253, "grad_norm": 0.4454803764820099, "learning_rate": 9.999934710741833e-06, "loss": 0.3767, "step": 4990 }, { "epoch": 0.5074217161447743, "grad_norm": 0.37361568212509155, "learning_rate": 9.999932884561035e-06, "loss": 0.3798, "step": 4991 }, { "epoch": 0.5075233834892232, "grad_norm": 0.39220917224884033, "learning_rate": 9.999931033191764e-06, "loss": 0.4238, "step": 4992 }, { "epoch": 0.5076250508336723, "grad_norm": 0.46496641635894775, "learning_rate": 9.999929156634027e-06, "loss": 0.4127, "step": 4993 }, { "epoch": 0.5077267181781212, "grad_norm": 0.42899394035339355, "learning_rate": 9.999927254887838e-06, "loss": 0.4305, "step": 4994 }, { "epoch": 0.5078283855225701, "grad_norm": 0.4021866023540497, "learning_rate": 9.999925327953202e-06, "loss": 0.3782, "step": 4995 }, { "epoch": 0.5079300528670191, "grad_norm": 0.4346875548362732, "learning_rate": 9.99992337583013e-06, "loss": 0.4022, "step": 4996 }, { "epoch": 0.508031720211468, "grad_norm": 0.46015480160713196, "learning_rate": 9.999921398518632e-06, "loss": 0.3759, "step": 4997 }, { "epoch": 0.5081333875559171, "grad_norm": 0.4526151418685913, "learning_rate": 9.999919396018719e-06, "loss": 0.4515, "step": 4998 }, { "epoch": 0.508235054900366, "grad_norm": 0.5272055268287659, "learning_rate": 9.999917368330399e-06, "loss": 0.4273, "step": 4999 }, { "epoch": 0.508336722244815, "grad_norm": 0.4409118592739105, "learning_rate": 9.999915315453686e-06, "loss": 0.3712, "step": 5000 }, { "epoch": 0.5084383895892639, "grad_norm": 0.41207846999168396, "learning_rate": 9.999913237388586e-06, "loss": 0.3942, "step": 5001 }, { "epoch": 0.5085400569337128, "grad_norm": 0.42756739258766174, "learning_rate": 9.99991113413511e-06, "loss": 0.3799, "step": 5002 }, { "epoch": 0.5086417242781619, "grad_norm": 0.3716377019882202, "learning_rate": 9.999909005693271e-06, "loss": 0.4008, "step": 5003 }, { "epoch": 0.5087433916226108, "grad_norm": 0.3460847735404968, "learning_rate": 9.99990685206308e-06, "loss": 0.3644, "step": 5004 }, { "epoch": 0.5088450589670598, "grad_norm": 0.3779104948043823, "learning_rate": 9.999904673244544e-06, "loss": 0.3922, "step": 5005 }, { "epoch": 0.5089467263115087, "grad_norm": 0.35748475790023804, "learning_rate": 9.999902469237678e-06, "loss": 0.4023, "step": 5006 }, { "epoch": 0.5090483936559577, "grad_norm": 0.3734561502933502, "learning_rate": 9.999900240042492e-06, "loss": 0.4, "step": 5007 }, { "epoch": 0.5091500610004067, "grad_norm": 0.36143723130226135, "learning_rate": 9.999897985658997e-06, "loss": 0.3813, "step": 5008 }, { "epoch": 0.5092517283448557, "grad_norm": 0.3954206705093384, "learning_rate": 9.999895706087202e-06, "loss": 0.4378, "step": 5009 }, { "epoch": 0.5093533956893046, "grad_norm": 0.37854668498039246, "learning_rate": 9.999893401327124e-06, "loss": 0.4302, "step": 5010 }, { "epoch": 0.5094550630337535, "grad_norm": 0.40135276317596436, "learning_rate": 9.999891071378766e-06, "loss": 0.3954, "step": 5011 }, { "epoch": 0.5095567303782025, "grad_norm": 0.3749154210090637, "learning_rate": 9.99988871624215e-06, "loss": 0.4064, "step": 5012 }, { "epoch": 0.5096583977226515, "grad_norm": 0.3969755172729492, "learning_rate": 9.999886335917279e-06, "loss": 0.4054, "step": 5013 }, { "epoch": 0.5097600650671005, "grad_norm": 0.38460472226142883, "learning_rate": 9.999883930404172e-06, "loss": 0.417, "step": 5014 }, { "epoch": 0.5098617324115494, "grad_norm": 0.37784314155578613, "learning_rate": 9.999881499702835e-06, "loss": 0.422, "step": 5015 }, { "epoch": 0.5099633997559984, "grad_norm": 0.3745918571949005, "learning_rate": 9.999879043813286e-06, "loss": 0.3758, "step": 5016 }, { "epoch": 0.5100650671004473, "grad_norm": 0.38089266419410706, "learning_rate": 9.999876562735534e-06, "loss": 0.4329, "step": 5017 }, { "epoch": 0.5101667344448964, "grad_norm": 0.4142489731311798, "learning_rate": 9.999874056469589e-06, "loss": 0.4021, "step": 5018 }, { "epoch": 0.5102684017893453, "grad_norm": 0.3763699233531952, "learning_rate": 9.999871525015469e-06, "loss": 0.407, "step": 5019 }, { "epoch": 0.5103700691337942, "grad_norm": 0.3816727101802826, "learning_rate": 9.999868968373185e-06, "loss": 0.4296, "step": 5020 }, { "epoch": 0.5104717364782432, "grad_norm": 0.37710854411125183, "learning_rate": 9.999866386542748e-06, "loss": 0.4565, "step": 5021 }, { "epoch": 0.5105734038226921, "grad_norm": 0.39291417598724365, "learning_rate": 9.999863779524173e-06, "loss": 0.4078, "step": 5022 }, { "epoch": 0.5106750711671411, "grad_norm": 0.38631683588027954, "learning_rate": 9.999861147317472e-06, "loss": 0.4263, "step": 5023 }, { "epoch": 0.5107767385115901, "grad_norm": 0.36634278297424316, "learning_rate": 9.99985848992266e-06, "loss": 0.4359, "step": 5024 }, { "epoch": 0.510878405856039, "grad_norm": 0.3553279936313629, "learning_rate": 9.999855807339748e-06, "loss": 0.4281, "step": 5025 }, { "epoch": 0.510980073200488, "grad_norm": 0.3622656464576721, "learning_rate": 9.99985309956875e-06, "loss": 0.4164, "step": 5026 }, { "epoch": 0.5110817405449369, "grad_norm": 0.41906383633613586, "learning_rate": 9.99985036660968e-06, "loss": 0.4136, "step": 5027 }, { "epoch": 0.5111834078893859, "grad_norm": 0.34813550114631653, "learning_rate": 9.999847608462554e-06, "loss": 0.3905, "step": 5028 }, { "epoch": 0.5112850752338349, "grad_norm": 0.353439599275589, "learning_rate": 9.999844825127383e-06, "loss": 0.395, "step": 5029 }, { "epoch": 0.5113867425782839, "grad_norm": 0.3620215356349945, "learning_rate": 9.99984201660418e-06, "loss": 0.3713, "step": 5030 }, { "epoch": 0.5114884099227328, "grad_norm": 0.4002252221107483, "learning_rate": 9.999839182892964e-06, "loss": 0.417, "step": 5031 }, { "epoch": 0.5115900772671818, "grad_norm": 0.3490366041660309, "learning_rate": 9.999836323993745e-06, "loss": 0.4065, "step": 5032 }, { "epoch": 0.5116917446116307, "grad_norm": 0.3843543827533722, "learning_rate": 9.999833439906538e-06, "loss": 0.4176, "step": 5033 }, { "epoch": 0.5117934119560797, "grad_norm": 0.37038856744766235, "learning_rate": 9.99983053063136e-06, "loss": 0.4054, "step": 5034 }, { "epoch": 0.5118950793005287, "grad_norm": 0.386350154876709, "learning_rate": 9.999827596168222e-06, "loss": 0.388, "step": 5035 }, { "epoch": 0.5119967466449776, "grad_norm": 0.4291435480117798, "learning_rate": 9.999824636517141e-06, "loss": 0.4126, "step": 5036 }, { "epoch": 0.5120984139894266, "grad_norm": 0.4066697359085083, "learning_rate": 9.999821651678133e-06, "loss": 0.3905, "step": 5037 }, { "epoch": 0.5122000813338755, "grad_norm": 0.3715919852256775, "learning_rate": 9.999818641651211e-06, "loss": 0.4295, "step": 5038 }, { "epoch": 0.5123017486783246, "grad_norm": 0.43945759534835815, "learning_rate": 9.999815606436391e-06, "loss": 0.3846, "step": 5039 }, { "epoch": 0.5124034160227735, "grad_norm": 0.3775150179862976, "learning_rate": 9.999812546033687e-06, "loss": 0.387, "step": 5040 }, { "epoch": 0.5125050833672224, "grad_norm": 0.38865378499031067, "learning_rate": 9.999809460443117e-06, "loss": 0.4148, "step": 5041 }, { "epoch": 0.5126067507116714, "grad_norm": 0.38626837730407715, "learning_rate": 9.999806349664694e-06, "loss": 0.4145, "step": 5042 }, { "epoch": 0.5127084180561203, "grad_norm": 0.3980121612548828, "learning_rate": 9.999803213698435e-06, "loss": 0.4216, "step": 5043 }, { "epoch": 0.5128100854005694, "grad_norm": 0.3863280415534973, "learning_rate": 9.999800052544358e-06, "loss": 0.4074, "step": 5044 }, { "epoch": 0.5129117527450183, "grad_norm": 0.36466965079307556, "learning_rate": 9.999796866202473e-06, "loss": 0.4265, "step": 5045 }, { "epoch": 0.5130134200894673, "grad_norm": 0.3616836369037628, "learning_rate": 9.9997936546728e-06, "loss": 0.3903, "step": 5046 }, { "epoch": 0.5131150874339162, "grad_norm": 0.3643728494644165, "learning_rate": 9.999790417955356e-06, "loss": 0.3875, "step": 5047 }, { "epoch": 0.5132167547783651, "grad_norm": 0.4560255706310272, "learning_rate": 9.999787156050155e-06, "loss": 0.4342, "step": 5048 }, { "epoch": 0.5133184221228142, "grad_norm": 0.3664717972278595, "learning_rate": 9.999783868957217e-06, "loss": 0.4421, "step": 5049 }, { "epoch": 0.5134200894672631, "grad_norm": 0.3921639919281006, "learning_rate": 9.999780556676552e-06, "loss": 0.4028, "step": 5050 }, { "epoch": 0.5135217568117121, "grad_norm": 0.42677485942840576, "learning_rate": 9.999777219208184e-06, "loss": 0.3999, "step": 5051 }, { "epoch": 0.513623424156161, "grad_norm": 0.4238027334213257, "learning_rate": 9.999773856552124e-06, "loss": 0.3877, "step": 5052 }, { "epoch": 0.51372509150061, "grad_norm": 0.36548346281051636, "learning_rate": 9.999770468708394e-06, "loss": 0.4091, "step": 5053 }, { "epoch": 0.513826758845059, "grad_norm": 0.45677945017814636, "learning_rate": 9.999767055677005e-06, "loss": 0.3912, "step": 5054 }, { "epoch": 0.513928426189508, "grad_norm": 0.41424956917762756, "learning_rate": 9.99976361745798e-06, "loss": 0.3839, "step": 5055 }, { "epoch": 0.5140300935339569, "grad_norm": 0.39359602332115173, "learning_rate": 9.999760154051335e-06, "loss": 0.3962, "step": 5056 }, { "epoch": 0.5141317608784058, "grad_norm": 0.4045751094818115, "learning_rate": 9.999756665457086e-06, "loss": 0.3779, "step": 5057 }, { "epoch": 0.5142334282228548, "grad_norm": 0.42303791642189026, "learning_rate": 9.999753151675249e-06, "loss": 0.4094, "step": 5058 }, { "epoch": 0.5143350955673038, "grad_norm": 0.3787386119365692, "learning_rate": 9.999749612705845e-06, "loss": 0.3991, "step": 5059 }, { "epoch": 0.5144367629117528, "grad_norm": 0.3858170509338379, "learning_rate": 9.99974604854889e-06, "loss": 0.4228, "step": 5060 }, { "epoch": 0.5145384302562017, "grad_norm": 0.3678818345069885, "learning_rate": 9.999742459204404e-06, "loss": 0.3969, "step": 5061 }, { "epoch": 0.5146400976006507, "grad_norm": 0.3763720989227295, "learning_rate": 9.999738844672403e-06, "loss": 0.3878, "step": 5062 }, { "epoch": 0.5147417649450996, "grad_norm": 0.41546425223350525, "learning_rate": 9.999735204952906e-06, "loss": 0.4008, "step": 5063 }, { "epoch": 0.5148434322895485, "grad_norm": 0.3586118519306183, "learning_rate": 9.99973154004593e-06, "loss": 0.3859, "step": 5064 }, { "epoch": 0.5149450996339976, "grad_norm": 0.4197964370250702, "learning_rate": 9.999727849951495e-06, "loss": 0.3861, "step": 5065 }, { "epoch": 0.5150467669784465, "grad_norm": 0.4206085503101349, "learning_rate": 9.99972413466962e-06, "loss": 0.4073, "step": 5066 }, { "epoch": 0.5151484343228955, "grad_norm": 0.368966281414032, "learning_rate": 9.999720394200323e-06, "loss": 0.4122, "step": 5067 }, { "epoch": 0.5152501016673444, "grad_norm": 0.3776046931743622, "learning_rate": 9.999716628543621e-06, "loss": 0.3706, "step": 5068 }, { "epoch": 0.5153517690117934, "grad_norm": 0.39276522397994995, "learning_rate": 9.999712837699537e-06, "loss": 0.4319, "step": 5069 }, { "epoch": 0.5154534363562424, "grad_norm": 0.36831134557724, "learning_rate": 9.999709021668087e-06, "loss": 0.3795, "step": 5070 }, { "epoch": 0.5155551037006914, "grad_norm": 0.40654072165489197, "learning_rate": 9.99970518044929e-06, "loss": 0.4035, "step": 5071 }, { "epoch": 0.5156567710451403, "grad_norm": 0.3759484887123108, "learning_rate": 9.999701314043167e-06, "loss": 0.4174, "step": 5072 }, { "epoch": 0.5157584383895892, "grad_norm": 0.44680824875831604, "learning_rate": 9.999697422449738e-06, "loss": 0.393, "step": 5073 }, { "epoch": 0.5158601057340382, "grad_norm": 0.3932551443576813, "learning_rate": 9.999693505669021e-06, "loss": 0.4149, "step": 5074 }, { "epoch": 0.5159617730784872, "grad_norm": 0.4008606970310211, "learning_rate": 9.999689563701036e-06, "loss": 0.3922, "step": 5075 }, { "epoch": 0.5160634404229362, "grad_norm": 0.46456921100616455, "learning_rate": 9.999685596545803e-06, "loss": 0.4379, "step": 5076 }, { "epoch": 0.5161651077673851, "grad_norm": 0.36101678013801575, "learning_rate": 9.999681604203341e-06, "loss": 0.3994, "step": 5077 }, { "epoch": 0.516266775111834, "grad_norm": 0.4136589467525482, "learning_rate": 9.999677586673673e-06, "loss": 0.4008, "step": 5078 }, { "epoch": 0.516368442456283, "grad_norm": 0.43196019530296326, "learning_rate": 9.999673543956816e-06, "loss": 0.4358, "step": 5079 }, { "epoch": 0.516470109800732, "grad_norm": 0.4134525954723358, "learning_rate": 9.999669476052793e-06, "loss": 0.4276, "step": 5080 }, { "epoch": 0.516571777145181, "grad_norm": 0.379122257232666, "learning_rate": 9.999665382961623e-06, "loss": 0.3728, "step": 5081 }, { "epoch": 0.5166734444896299, "grad_norm": 0.36371496319770813, "learning_rate": 9.999661264683327e-06, "loss": 0.396, "step": 5082 }, { "epoch": 0.5167751118340789, "grad_norm": 0.35274937748908997, "learning_rate": 9.999657121217925e-06, "loss": 0.4103, "step": 5083 }, { "epoch": 0.5168767791785278, "grad_norm": 0.3733312785625458, "learning_rate": 9.99965295256544e-06, "loss": 0.404, "step": 5084 }, { "epoch": 0.5169784465229769, "grad_norm": 0.37615495920181274, "learning_rate": 9.999648758725888e-06, "loss": 0.4364, "step": 5085 }, { "epoch": 0.5170801138674258, "grad_norm": 0.36985349655151367, "learning_rate": 9.999644539699298e-06, "loss": 0.426, "step": 5086 }, { "epoch": 0.5171817812118747, "grad_norm": 0.3764055073261261, "learning_rate": 9.999640295485685e-06, "loss": 0.4087, "step": 5087 }, { "epoch": 0.5172834485563237, "grad_norm": 0.39386555552482605, "learning_rate": 9.999636026085071e-06, "loss": 0.4093, "step": 5088 }, { "epoch": 0.5173851159007726, "grad_norm": 0.35215315222740173, "learning_rate": 9.999631731497482e-06, "loss": 0.3868, "step": 5089 }, { "epoch": 0.5174867832452217, "grad_norm": 0.386309951543808, "learning_rate": 9.999627411722933e-06, "loss": 0.4086, "step": 5090 }, { "epoch": 0.5175884505896706, "grad_norm": 0.36588048934936523, "learning_rate": 9.99962306676145e-06, "loss": 0.4082, "step": 5091 }, { "epoch": 0.5176901179341196, "grad_norm": 0.4190102815628052, "learning_rate": 9.999618696613056e-06, "loss": 0.3925, "step": 5092 }, { "epoch": 0.5177917852785685, "grad_norm": 0.41722431778907776, "learning_rate": 9.99961430127777e-06, "loss": 0.3959, "step": 5093 }, { "epoch": 0.5178934526230174, "grad_norm": 0.37305304408073425, "learning_rate": 9.999609880755614e-06, "loss": 0.4075, "step": 5094 }, { "epoch": 0.5179951199674665, "grad_norm": 0.4523454010486603, "learning_rate": 9.999605435046613e-06, "loss": 0.4015, "step": 5095 }, { "epoch": 0.5180967873119154, "grad_norm": 0.4408925473690033, "learning_rate": 9.999600964150787e-06, "loss": 0.376, "step": 5096 }, { "epoch": 0.5181984546563644, "grad_norm": 0.3529617190361023, "learning_rate": 9.99959646806816e-06, "loss": 0.4073, "step": 5097 }, { "epoch": 0.5183001220008133, "grad_norm": 0.3865986168384552, "learning_rate": 9.999591946798755e-06, "loss": 0.3835, "step": 5098 }, { "epoch": 0.5184017893452623, "grad_norm": 0.4250904619693756, "learning_rate": 9.999587400342592e-06, "loss": 0.4031, "step": 5099 }, { "epoch": 0.5185034566897113, "grad_norm": 0.37755924463272095, "learning_rate": 9.999582828699696e-06, "loss": 0.4336, "step": 5100 }, { "epoch": 0.5186051240341603, "grad_norm": 0.3892090320587158, "learning_rate": 9.99957823187009e-06, "loss": 0.4375, "step": 5101 }, { "epoch": 0.5187067913786092, "grad_norm": 0.4268067181110382, "learning_rate": 9.9995736098538e-06, "loss": 0.3933, "step": 5102 }, { "epoch": 0.5188084587230581, "grad_norm": 0.3867446482181549, "learning_rate": 9.999568962650841e-06, "loss": 0.3874, "step": 5103 }, { "epoch": 0.5189101260675071, "grad_norm": 0.42079460620880127, "learning_rate": 9.999564290261245e-06, "loss": 0.3825, "step": 5104 }, { "epoch": 0.5190117934119561, "grad_norm": 0.35295864939689636, "learning_rate": 9.999559592685031e-06, "loss": 0.3945, "step": 5105 }, { "epoch": 0.5191134607564051, "grad_norm": 0.383390337228775, "learning_rate": 9.999554869922226e-06, "loss": 0.4199, "step": 5106 }, { "epoch": 0.519215128100854, "grad_norm": 0.45472171902656555, "learning_rate": 9.99955012197285e-06, "loss": 0.4108, "step": 5107 }, { "epoch": 0.519316795445303, "grad_norm": 0.378980815410614, "learning_rate": 9.999545348836927e-06, "loss": 0.3768, "step": 5108 }, { "epoch": 0.5194184627897519, "grad_norm": 0.3795241117477417, "learning_rate": 9.999540550514484e-06, "loss": 0.4453, "step": 5109 }, { "epoch": 0.5195201301342008, "grad_norm": 0.411282479763031, "learning_rate": 9.999535727005545e-06, "loss": 0.4424, "step": 5110 }, { "epoch": 0.5196217974786499, "grad_norm": 0.3852003216743469, "learning_rate": 9.99953087831013e-06, "loss": 0.4315, "step": 5111 }, { "epoch": 0.5197234648230988, "grad_norm": 0.3773616552352905, "learning_rate": 9.999526004428269e-06, "loss": 0.3963, "step": 5112 }, { "epoch": 0.5198251321675478, "grad_norm": 0.3592230975627899, "learning_rate": 9.999521105359984e-06, "loss": 0.3827, "step": 5113 }, { "epoch": 0.5199267995119967, "grad_norm": 0.4590255916118622, "learning_rate": 9.9995161811053e-06, "loss": 0.4275, "step": 5114 }, { "epoch": 0.5200284668564457, "grad_norm": 0.3677652180194855, "learning_rate": 9.999511231664241e-06, "loss": 0.3925, "step": 5115 }, { "epoch": 0.5201301342008947, "grad_norm": 0.37925806641578674, "learning_rate": 9.999506257036831e-06, "loss": 0.4345, "step": 5116 }, { "epoch": 0.5202318015453437, "grad_norm": 0.40223732590675354, "learning_rate": 9.999501257223099e-06, "loss": 0.3915, "step": 5117 }, { "epoch": 0.5203334688897926, "grad_norm": 0.3965008854866028, "learning_rate": 9.999496232223067e-06, "loss": 0.3938, "step": 5118 }, { "epoch": 0.5204351362342415, "grad_norm": 0.3939516544342041, "learning_rate": 9.99949118203676e-06, "loss": 0.367, "step": 5119 }, { "epoch": 0.5205368035786905, "grad_norm": 0.4246233403682709, "learning_rate": 9.999486106664204e-06, "loss": 0.3925, "step": 5120 }, { "epoch": 0.5206384709231395, "grad_norm": 0.3709714114665985, "learning_rate": 9.999481006105426e-06, "loss": 0.388, "step": 5121 }, { "epoch": 0.5207401382675885, "grad_norm": 0.39813005924224854, "learning_rate": 9.999475880360453e-06, "loss": 0.3917, "step": 5122 }, { "epoch": 0.5208418056120374, "grad_norm": 0.3989529311656952, "learning_rate": 9.999470729429307e-06, "loss": 0.3661, "step": 5123 }, { "epoch": 0.5209434729564864, "grad_norm": 0.39351069927215576, "learning_rate": 9.999465553312014e-06, "loss": 0.3964, "step": 5124 }, { "epoch": 0.5210451403009353, "grad_norm": 0.432316392660141, "learning_rate": 9.9994603520086e-06, "loss": 0.4, "step": 5125 }, { "epoch": 0.5211468076453843, "grad_norm": 0.40582889318466187, "learning_rate": 9.999455125519097e-06, "loss": 0.3975, "step": 5126 }, { "epoch": 0.5212484749898333, "grad_norm": 0.4258161783218384, "learning_rate": 9.999449873843526e-06, "loss": 0.4021, "step": 5127 }, { "epoch": 0.5213501423342822, "grad_norm": 0.36495715379714966, "learning_rate": 9.999444596981913e-06, "loss": 0.3949, "step": 5128 }, { "epoch": 0.5214518096787312, "grad_norm": 0.4108552038669586, "learning_rate": 9.999439294934288e-06, "loss": 0.3902, "step": 5129 }, { "epoch": 0.5215534770231801, "grad_norm": 0.376115620136261, "learning_rate": 9.999433967700676e-06, "loss": 0.391, "step": 5130 }, { "epoch": 0.5216551443676292, "grad_norm": 0.4026782214641571, "learning_rate": 9.999428615281102e-06, "loss": 0.4322, "step": 5131 }, { "epoch": 0.5217568117120781, "grad_norm": 0.3557049036026001, "learning_rate": 9.999423237675596e-06, "loss": 0.3803, "step": 5132 }, { "epoch": 0.521858479056527, "grad_norm": 0.3894503116607666, "learning_rate": 9.999417834884183e-06, "loss": 0.3624, "step": 5133 }, { "epoch": 0.521960146400976, "grad_norm": 0.42654407024383545, "learning_rate": 9.999412406906892e-06, "loss": 0.402, "step": 5134 }, { "epoch": 0.5220618137454249, "grad_norm": 0.4002091586589813, "learning_rate": 9.999406953743748e-06, "loss": 0.4034, "step": 5135 }, { "epoch": 0.522163481089874, "grad_norm": 0.4155844449996948, "learning_rate": 9.99940147539478e-06, "loss": 0.4235, "step": 5136 }, { "epoch": 0.5222651484343229, "grad_norm": 0.3684343993663788, "learning_rate": 9.999395971860016e-06, "loss": 0.4165, "step": 5137 }, { "epoch": 0.5223668157787719, "grad_norm": 0.3852292597293854, "learning_rate": 9.999390443139483e-06, "loss": 0.385, "step": 5138 }, { "epoch": 0.5224684831232208, "grad_norm": 0.3797868490219116, "learning_rate": 9.999384889233209e-06, "loss": 0.4098, "step": 5139 }, { "epoch": 0.5225701504676697, "grad_norm": 0.39626559615135193, "learning_rate": 9.999379310141222e-06, "loss": 0.3772, "step": 5140 }, { "epoch": 0.5226718178121188, "grad_norm": 0.3436982333660126, "learning_rate": 9.99937370586355e-06, "loss": 0.3989, "step": 5141 }, { "epoch": 0.5227734851565677, "grad_norm": 0.4064716398715973, "learning_rate": 9.999368076400222e-06, "loss": 0.4105, "step": 5142 }, { "epoch": 0.5228751525010167, "grad_norm": 0.3916587829589844, "learning_rate": 9.999362421751264e-06, "loss": 0.3891, "step": 5143 }, { "epoch": 0.5229768198454656, "grad_norm": 0.35705646872520447, "learning_rate": 9.999356741916708e-06, "loss": 0.3966, "step": 5144 }, { "epoch": 0.5230784871899146, "grad_norm": 0.3645276129245758, "learning_rate": 9.99935103689658e-06, "loss": 0.4062, "step": 5145 }, { "epoch": 0.5231801545343636, "grad_norm": 0.3394211232662201, "learning_rate": 9.999345306690909e-06, "loss": 0.4218, "step": 5146 }, { "epoch": 0.5232818218788126, "grad_norm": 0.35602909326553345, "learning_rate": 9.999339551299724e-06, "loss": 0.3367, "step": 5147 }, { "epoch": 0.5233834892232615, "grad_norm": 0.3347792625427246, "learning_rate": 9.999333770723056e-06, "loss": 0.3976, "step": 5148 }, { "epoch": 0.5234851565677104, "grad_norm": 0.38568150997161865, "learning_rate": 9.999327964960931e-06, "loss": 0.407, "step": 5149 }, { "epoch": 0.5235868239121594, "grad_norm": 0.3468780517578125, "learning_rate": 9.99932213401338e-06, "loss": 0.3804, "step": 5150 }, { "epoch": 0.5236884912566083, "grad_norm": 0.3823658227920532, "learning_rate": 9.999316277880434e-06, "loss": 0.3678, "step": 5151 }, { "epoch": 0.5237901586010574, "grad_norm": 0.3957243263721466, "learning_rate": 9.999310396562118e-06, "loss": 0.412, "step": 5152 }, { "epoch": 0.5238918259455063, "grad_norm": 0.39804428815841675, "learning_rate": 9.999304490058466e-06, "loss": 0.3736, "step": 5153 }, { "epoch": 0.5239934932899553, "grad_norm": 0.3522684872150421, "learning_rate": 9.999298558369506e-06, "loss": 0.4012, "step": 5154 }, { "epoch": 0.5240951606344042, "grad_norm": 0.3962463140487671, "learning_rate": 9.999292601495266e-06, "loss": 0.3985, "step": 5155 }, { "epoch": 0.5241968279788531, "grad_norm": 0.3477359116077423, "learning_rate": 9.99928661943578e-06, "loss": 0.3967, "step": 5156 }, { "epoch": 0.5242984953233022, "grad_norm": 0.401699960231781, "learning_rate": 9.999280612191075e-06, "loss": 0.4178, "step": 5157 }, { "epoch": 0.5244001626677511, "grad_norm": 0.35389506816864014, "learning_rate": 9.999274579761182e-06, "loss": 0.452, "step": 5158 }, { "epoch": 0.5245018300122001, "grad_norm": 0.3675893247127533, "learning_rate": 9.999268522146134e-06, "loss": 0.4243, "step": 5159 }, { "epoch": 0.524603497356649, "grad_norm": 0.36889567971229553, "learning_rate": 9.999262439345956e-06, "loss": 0.4015, "step": 5160 }, { "epoch": 0.524705164701098, "grad_norm": 0.35002246499061584, "learning_rate": 9.999256331360682e-06, "loss": 0.3922, "step": 5161 }, { "epoch": 0.524806832045547, "grad_norm": 0.39246827363967896, "learning_rate": 9.999250198190344e-06, "loss": 0.3657, "step": 5162 }, { "epoch": 0.524908499389996, "grad_norm": 0.35937824845314026, "learning_rate": 9.999244039834973e-06, "loss": 0.3783, "step": 5163 }, { "epoch": 0.5250101667344449, "grad_norm": 0.45345011353492737, "learning_rate": 9.999237856294594e-06, "loss": 0.4215, "step": 5164 }, { "epoch": 0.5251118340788938, "grad_norm": 0.4029862582683563, "learning_rate": 9.999231647569247e-06, "loss": 0.4113, "step": 5165 }, { "epoch": 0.5252135014233428, "grad_norm": 0.39920172095298767, "learning_rate": 9.999225413658957e-06, "loss": 0.4006, "step": 5166 }, { "epoch": 0.5253151687677918, "grad_norm": 0.3665573298931122, "learning_rate": 9.999219154563758e-06, "loss": 0.3733, "step": 5167 }, { "epoch": 0.5254168361122408, "grad_norm": 0.4123651683330536, "learning_rate": 9.999212870283682e-06, "loss": 0.3988, "step": 5168 }, { "epoch": 0.5255185034566897, "grad_norm": 0.41249147057533264, "learning_rate": 9.999206560818756e-06, "loss": 0.4305, "step": 5169 }, { "epoch": 0.5256201708011387, "grad_norm": 0.3674705922603607, "learning_rate": 9.999200226169017e-06, "loss": 0.3894, "step": 5170 }, { "epoch": 0.5257218381455876, "grad_norm": 0.3975244164466858, "learning_rate": 9.999193866334497e-06, "loss": 0.4029, "step": 5171 }, { "epoch": 0.5258235054900366, "grad_norm": 0.35421255230903625, "learning_rate": 9.999187481315224e-06, "loss": 0.3949, "step": 5172 }, { "epoch": 0.5259251728344856, "grad_norm": 0.38341936469078064, "learning_rate": 9.999181071111235e-06, "loss": 0.4192, "step": 5173 }, { "epoch": 0.5260268401789345, "grad_norm": 0.407792329788208, "learning_rate": 9.999174635722557e-06, "loss": 0.3773, "step": 5174 }, { "epoch": 0.5261285075233835, "grad_norm": 0.3875182867050171, "learning_rate": 9.999168175149227e-06, "loss": 0.4473, "step": 5175 }, { "epoch": 0.5262301748678324, "grad_norm": 0.3865046203136444, "learning_rate": 9.999161689391275e-06, "loss": 0.3943, "step": 5176 }, { "epoch": 0.5263318422122815, "grad_norm": 0.4131028652191162, "learning_rate": 9.999155178448735e-06, "loss": 0.3906, "step": 5177 }, { "epoch": 0.5264335095567304, "grad_norm": 0.3747639060020447, "learning_rate": 9.999148642321638e-06, "loss": 0.3529, "step": 5178 }, { "epoch": 0.5265351769011793, "grad_norm": 0.3488934338092804, "learning_rate": 9.99914208101002e-06, "loss": 0.3945, "step": 5179 }, { "epoch": 0.5266368442456283, "grad_norm": 0.395801305770874, "learning_rate": 9.999135494513913e-06, "loss": 0.3799, "step": 5180 }, { "epoch": 0.5267385115900772, "grad_norm": 0.3758193552494049, "learning_rate": 9.999128882833346e-06, "loss": 0.4131, "step": 5181 }, { "epoch": 0.5268401789345263, "grad_norm": 0.3735918402671814, "learning_rate": 9.999122245968357e-06, "loss": 0.423, "step": 5182 }, { "epoch": 0.5269418462789752, "grad_norm": 0.3734823763370514, "learning_rate": 9.999115583918977e-06, "loss": 0.4022, "step": 5183 }, { "epoch": 0.5270435136234242, "grad_norm": 0.38990461826324463, "learning_rate": 9.999108896685243e-06, "loss": 0.4314, "step": 5184 }, { "epoch": 0.5271451809678731, "grad_norm": 0.3859401345252991, "learning_rate": 9.999102184267184e-06, "loss": 0.3838, "step": 5185 }, { "epoch": 0.527246848312322, "grad_norm": 0.38329437375068665, "learning_rate": 9.999095446664839e-06, "loss": 0.4084, "step": 5186 }, { "epoch": 0.5273485156567711, "grad_norm": 0.3542470932006836, "learning_rate": 9.999088683878238e-06, "loss": 0.3713, "step": 5187 }, { "epoch": 0.52745018300122, "grad_norm": 0.35761624574661255, "learning_rate": 9.999081895907414e-06, "loss": 0.4022, "step": 5188 }, { "epoch": 0.527551850345669, "grad_norm": 0.3468565344810486, "learning_rate": 9.999075082752406e-06, "loss": 0.3602, "step": 5189 }, { "epoch": 0.5276535176901179, "grad_norm": 0.40589433908462524, "learning_rate": 9.999068244413244e-06, "loss": 0.4467, "step": 5190 }, { "epoch": 0.5277551850345669, "grad_norm": 0.3514295816421509, "learning_rate": 9.999061380889965e-06, "loss": 0.3957, "step": 5191 }, { "epoch": 0.5278568523790158, "grad_norm": 0.3649596571922302, "learning_rate": 9.999054492182602e-06, "loss": 0.397, "step": 5192 }, { "epoch": 0.5279585197234649, "grad_norm": 0.4297211766242981, "learning_rate": 9.999047578291193e-06, "loss": 0.4306, "step": 5193 }, { "epoch": 0.5280601870679138, "grad_norm": 0.40575164556503296, "learning_rate": 9.999040639215766e-06, "loss": 0.3991, "step": 5194 }, { "epoch": 0.5281618544123627, "grad_norm": 0.40837955474853516, "learning_rate": 9.999033674956363e-06, "loss": 0.4221, "step": 5195 }, { "epoch": 0.5282635217568117, "grad_norm": 0.3769053518772125, "learning_rate": 9.999026685513015e-06, "loss": 0.3872, "step": 5196 }, { "epoch": 0.5283651891012606, "grad_norm": 0.42484039068222046, "learning_rate": 9.999019670885762e-06, "loss": 0.4219, "step": 5197 }, { "epoch": 0.5284668564457097, "grad_norm": 0.37087059020996094, "learning_rate": 9.999012631074631e-06, "loss": 0.3736, "step": 5198 }, { "epoch": 0.5285685237901586, "grad_norm": 0.4160288870334625, "learning_rate": 9.999005566079665e-06, "loss": 0.4021, "step": 5199 }, { "epoch": 0.5286701911346076, "grad_norm": 0.44966185092926025, "learning_rate": 9.998998475900896e-06, "loss": 0.3827, "step": 5200 }, { "epoch": 0.5287718584790565, "grad_norm": 0.36914771795272827, "learning_rate": 9.998991360538362e-06, "loss": 0.4198, "step": 5201 }, { "epoch": 0.5288735258235054, "grad_norm": 0.394828200340271, "learning_rate": 9.998984219992097e-06, "loss": 0.4144, "step": 5202 }, { "epoch": 0.5289751931679545, "grad_norm": 0.4203234612941742, "learning_rate": 9.998977054262136e-06, "loss": 0.4396, "step": 5203 }, { "epoch": 0.5290768605124034, "grad_norm": 0.36447596549987793, "learning_rate": 9.998969863348518e-06, "loss": 0.3766, "step": 5204 }, { "epoch": 0.5291785278568524, "grad_norm": 0.39529794454574585, "learning_rate": 9.998962647251277e-06, "loss": 0.4321, "step": 5205 }, { "epoch": 0.5292801952013013, "grad_norm": 0.3650361895561218, "learning_rate": 9.99895540597045e-06, "loss": 0.3998, "step": 5206 }, { "epoch": 0.5293818625457503, "grad_norm": 0.38251379132270813, "learning_rate": 9.998948139506074e-06, "loss": 0.3826, "step": 5207 }, { "epoch": 0.5294835298901993, "grad_norm": 0.3510476350784302, "learning_rate": 9.998940847858186e-06, "loss": 0.4267, "step": 5208 }, { "epoch": 0.5295851972346483, "grad_norm": 0.421557754278183, "learning_rate": 9.99893353102682e-06, "loss": 0.4058, "step": 5209 }, { "epoch": 0.5296868645790972, "grad_norm": 0.36266806721687317, "learning_rate": 9.998926189012015e-06, "loss": 0.367, "step": 5210 }, { "epoch": 0.5297885319235461, "grad_norm": 0.36687135696411133, "learning_rate": 9.99891882181381e-06, "loss": 0.4052, "step": 5211 }, { "epoch": 0.5298901992679951, "grad_norm": 0.3749564290046692, "learning_rate": 9.998911429432238e-06, "loss": 0.4193, "step": 5212 }, { "epoch": 0.5299918666124441, "grad_norm": 0.36376243829727173, "learning_rate": 9.99890401186734e-06, "loss": 0.4211, "step": 5213 }, { "epoch": 0.5300935339568931, "grad_norm": 0.37044742703437805, "learning_rate": 9.99889656911915e-06, "loss": 0.4019, "step": 5214 }, { "epoch": 0.530195201301342, "grad_norm": 0.38593924045562744, "learning_rate": 9.998889101187706e-06, "loss": 0.4075, "step": 5215 }, { "epoch": 0.530296868645791, "grad_norm": 0.4067159593105316, "learning_rate": 9.998881608073048e-06, "loss": 0.4423, "step": 5216 }, { "epoch": 0.5303985359902399, "grad_norm": 0.384647011756897, "learning_rate": 9.998874089775212e-06, "loss": 0.417, "step": 5217 }, { "epoch": 0.530500203334689, "grad_norm": 0.4025833010673523, "learning_rate": 9.998866546294237e-06, "loss": 0.4303, "step": 5218 }, { "epoch": 0.5306018706791379, "grad_norm": 0.35201650857925415, "learning_rate": 9.99885897763016e-06, "loss": 0.4116, "step": 5219 }, { "epoch": 0.5307035380235868, "grad_norm": 0.3755515217781067, "learning_rate": 9.99885138378302e-06, "loss": 0.4166, "step": 5220 }, { "epoch": 0.5308052053680358, "grad_norm": 0.38065895438194275, "learning_rate": 9.998843764752852e-06, "loss": 0.3982, "step": 5221 }, { "epoch": 0.5309068727124847, "grad_norm": 0.36039242148399353, "learning_rate": 9.998836120539699e-06, "loss": 0.3982, "step": 5222 }, { "epoch": 0.5310085400569338, "grad_norm": 0.38847991824150085, "learning_rate": 9.998828451143597e-06, "loss": 0.4401, "step": 5223 }, { "epoch": 0.5311102074013827, "grad_norm": 0.3820580840110779, "learning_rate": 9.998820756564585e-06, "loss": 0.3974, "step": 5224 }, { "epoch": 0.5312118747458316, "grad_norm": 0.36679404973983765, "learning_rate": 9.998813036802704e-06, "loss": 0.4158, "step": 5225 }, { "epoch": 0.5313135420902806, "grad_norm": 0.3450971245765686, "learning_rate": 9.998805291857989e-06, "loss": 0.4056, "step": 5226 }, { "epoch": 0.5314152094347295, "grad_norm": 0.3558803200721741, "learning_rate": 9.998797521730481e-06, "loss": 0.402, "step": 5227 }, { "epoch": 0.5315168767791786, "grad_norm": 0.3651132583618164, "learning_rate": 9.998789726420219e-06, "loss": 0.3879, "step": 5228 }, { "epoch": 0.5316185441236275, "grad_norm": 0.3598653972148895, "learning_rate": 9.998781905927243e-06, "loss": 0.365, "step": 5229 }, { "epoch": 0.5317202114680765, "grad_norm": 0.36921268701553345, "learning_rate": 9.99877406025159e-06, "loss": 0.4085, "step": 5230 }, { "epoch": 0.5318218788125254, "grad_norm": 0.341111958026886, "learning_rate": 9.998766189393301e-06, "loss": 0.3726, "step": 5231 }, { "epoch": 0.5319235461569743, "grad_norm": 0.375868558883667, "learning_rate": 9.998758293352417e-06, "loss": 0.3979, "step": 5232 }, { "epoch": 0.5320252135014233, "grad_norm": 0.3362879157066345, "learning_rate": 9.998750372128978e-06, "loss": 0.4047, "step": 5233 }, { "epoch": 0.5321268808458723, "grad_norm": 0.386014848947525, "learning_rate": 9.998742425723021e-06, "loss": 0.4379, "step": 5234 }, { "epoch": 0.5322285481903213, "grad_norm": 0.37943539023399353, "learning_rate": 9.998734454134586e-06, "loss": 0.3998, "step": 5235 }, { "epoch": 0.5323302155347702, "grad_norm": 0.357403427362442, "learning_rate": 9.998726457363716e-06, "loss": 0.3925, "step": 5236 }, { "epoch": 0.5324318828792192, "grad_norm": 0.3883993327617645, "learning_rate": 9.998718435410452e-06, "loss": 0.4577, "step": 5237 }, { "epoch": 0.5325335502236681, "grad_norm": 0.4238460659980774, "learning_rate": 9.99871038827483e-06, "loss": 0.4264, "step": 5238 }, { "epoch": 0.5326352175681172, "grad_norm": 0.41341152787208557, "learning_rate": 9.998702315956895e-06, "loss": 0.4115, "step": 5239 }, { "epoch": 0.5327368849125661, "grad_norm": 0.41464391350746155, "learning_rate": 9.998694218456683e-06, "loss": 0.4124, "step": 5240 }, { "epoch": 0.532838552257015, "grad_norm": 0.378302663564682, "learning_rate": 9.998686095774239e-06, "loss": 0.4186, "step": 5241 }, { "epoch": 0.532940219601464, "grad_norm": 0.3670460283756256, "learning_rate": 9.998677947909603e-06, "loss": 0.3844, "step": 5242 }, { "epoch": 0.5330418869459129, "grad_norm": 0.3772015869617462, "learning_rate": 9.998669774862813e-06, "loss": 0.4545, "step": 5243 }, { "epoch": 0.533143554290362, "grad_norm": 0.34664809703826904, "learning_rate": 9.998661576633914e-06, "loss": 0.4008, "step": 5244 }, { "epoch": 0.5332452216348109, "grad_norm": 0.37442901730537415, "learning_rate": 9.998653353222945e-06, "loss": 0.4131, "step": 5245 }, { "epoch": 0.5333468889792599, "grad_norm": 0.3530762791633606, "learning_rate": 9.998645104629949e-06, "loss": 0.4018, "step": 5246 }, { "epoch": 0.5334485563237088, "grad_norm": 0.38109639286994934, "learning_rate": 9.998636830854968e-06, "loss": 0.4129, "step": 5247 }, { "epoch": 0.5335502236681577, "grad_norm": 0.3616412878036499, "learning_rate": 9.99862853189804e-06, "loss": 0.414, "step": 5248 }, { "epoch": 0.5336518910126068, "grad_norm": 0.35744595527648926, "learning_rate": 9.99862020775921e-06, "loss": 0.3979, "step": 5249 }, { "epoch": 0.5337535583570557, "grad_norm": 0.32795318961143494, "learning_rate": 9.99861185843852e-06, "loss": 0.3776, "step": 5250 }, { "epoch": 0.5338552257015047, "grad_norm": 0.37229734659194946, "learning_rate": 9.998603483936012e-06, "loss": 0.4087, "step": 5251 }, { "epoch": 0.5339568930459536, "grad_norm": 0.3636098802089691, "learning_rate": 9.998595084251727e-06, "loss": 0.402, "step": 5252 }, { "epoch": 0.5340585603904026, "grad_norm": 0.37036824226379395, "learning_rate": 9.998586659385708e-06, "loss": 0.433, "step": 5253 }, { "epoch": 0.5341602277348516, "grad_norm": 0.38625091314315796, "learning_rate": 9.998578209337997e-06, "loss": 0.4172, "step": 5254 }, { "epoch": 0.5342618950793006, "grad_norm": 0.3707410991191864, "learning_rate": 9.998569734108637e-06, "loss": 0.3984, "step": 5255 }, { "epoch": 0.5343635624237495, "grad_norm": 0.3861269950866699, "learning_rate": 9.998561233697669e-06, "loss": 0.4092, "step": 5256 }, { "epoch": 0.5344652297681984, "grad_norm": 0.3894251585006714, "learning_rate": 9.998552708105137e-06, "loss": 0.397, "step": 5257 }, { "epoch": 0.5345668971126474, "grad_norm": 0.40331077575683594, "learning_rate": 9.998544157331087e-06, "loss": 0.3995, "step": 5258 }, { "epoch": 0.5346685644570964, "grad_norm": 0.34218281507492065, "learning_rate": 9.998535581375558e-06, "loss": 0.4299, "step": 5259 }, { "epoch": 0.5347702318015454, "grad_norm": 0.3936117887496948, "learning_rate": 9.998526980238594e-06, "loss": 0.382, "step": 5260 }, { "epoch": 0.5348718991459943, "grad_norm": 0.40548744797706604, "learning_rate": 9.998518353920242e-06, "loss": 0.3837, "step": 5261 }, { "epoch": 0.5349735664904433, "grad_norm": 0.3809502422809601, "learning_rate": 9.998509702420537e-06, "loss": 0.3729, "step": 5262 }, { "epoch": 0.5350752338348922, "grad_norm": 0.44939956068992615, "learning_rate": 9.998501025739532e-06, "loss": 0.4053, "step": 5263 }, { "epoch": 0.5351769011793412, "grad_norm": 0.4191508889198303, "learning_rate": 9.998492323877266e-06, "loss": 0.4031, "step": 5264 }, { "epoch": 0.5352785685237902, "grad_norm": 0.3780202865600586, "learning_rate": 9.998483596833783e-06, "loss": 0.4223, "step": 5265 }, { "epoch": 0.5353802358682391, "grad_norm": 0.3976743817329407, "learning_rate": 9.998474844609127e-06, "loss": 0.4113, "step": 5266 }, { "epoch": 0.5354819032126881, "grad_norm": 0.378461629152298, "learning_rate": 9.998466067203343e-06, "loss": 0.4033, "step": 5267 }, { "epoch": 0.535583570557137, "grad_norm": 0.3956618905067444, "learning_rate": 9.998457264616474e-06, "loss": 0.4109, "step": 5268 }, { "epoch": 0.5356852379015861, "grad_norm": 0.39011481404304504, "learning_rate": 9.998448436848568e-06, "loss": 0.4096, "step": 5269 }, { "epoch": 0.535786905246035, "grad_norm": 0.3567394018173218, "learning_rate": 9.998439583899664e-06, "loss": 0.4113, "step": 5270 }, { "epoch": 0.535888572590484, "grad_norm": 0.3886379897594452, "learning_rate": 9.99843070576981e-06, "loss": 0.419, "step": 5271 }, { "epoch": 0.5359902399349329, "grad_norm": 0.3726843297481537, "learning_rate": 9.998421802459048e-06, "loss": 0.4049, "step": 5272 }, { "epoch": 0.5360919072793818, "grad_norm": 0.37250617146492004, "learning_rate": 9.998412873967426e-06, "loss": 0.402, "step": 5273 }, { "epoch": 0.5361935746238308, "grad_norm": 0.382223904132843, "learning_rate": 9.99840392029499e-06, "loss": 0.4218, "step": 5274 }, { "epoch": 0.5362952419682798, "grad_norm": 0.3930076062679291, "learning_rate": 9.99839494144178e-06, "loss": 0.3868, "step": 5275 }, { "epoch": 0.5363969093127288, "grad_norm": 0.35695764422416687, "learning_rate": 9.998385937407845e-06, "loss": 0.3995, "step": 5276 }, { "epoch": 0.5364985766571777, "grad_norm": 0.3972027003765106, "learning_rate": 9.99837690819323e-06, "loss": 0.3999, "step": 5277 }, { "epoch": 0.5366002440016266, "grad_norm": 0.36778077483177185, "learning_rate": 9.99836785379798e-06, "loss": 0.4232, "step": 5278 }, { "epoch": 0.5367019113460756, "grad_norm": 0.3857656717300415, "learning_rate": 9.99835877422214e-06, "loss": 0.4039, "step": 5279 }, { "epoch": 0.5368035786905246, "grad_norm": 0.3730914890766144, "learning_rate": 9.998349669465757e-06, "loss": 0.4105, "step": 5280 }, { "epoch": 0.5369052460349736, "grad_norm": 0.37496063113212585, "learning_rate": 9.998340539528874e-06, "loss": 0.3975, "step": 5281 }, { "epoch": 0.5370069133794225, "grad_norm": 0.4132556915283203, "learning_rate": 9.998331384411541e-06, "loss": 0.3926, "step": 5282 }, { "epoch": 0.5371085807238715, "grad_norm": 0.3562310039997101, "learning_rate": 9.998322204113802e-06, "loss": 0.4647, "step": 5283 }, { "epoch": 0.5372102480683204, "grad_norm": 0.3877103328704834, "learning_rate": 9.998312998635704e-06, "loss": 0.3845, "step": 5284 }, { "epoch": 0.5373119154127695, "grad_norm": 0.4000145494937897, "learning_rate": 9.998303767977292e-06, "loss": 0.4292, "step": 5285 }, { "epoch": 0.5374135827572184, "grad_norm": 0.3334699869155884, "learning_rate": 9.998294512138613e-06, "loss": 0.3815, "step": 5286 }, { "epoch": 0.5375152501016673, "grad_norm": 0.40697893500328064, "learning_rate": 9.998285231119715e-06, "loss": 0.3596, "step": 5287 }, { "epoch": 0.5376169174461163, "grad_norm": 0.37456420063972473, "learning_rate": 9.998275924920644e-06, "loss": 0.4089, "step": 5288 }, { "epoch": 0.5377185847905652, "grad_norm": 0.3978056013584137, "learning_rate": 9.998266593541445e-06, "loss": 0.3996, "step": 5289 }, { "epoch": 0.5378202521350143, "grad_norm": 0.35001111030578613, "learning_rate": 9.99825723698217e-06, "loss": 0.4013, "step": 5290 }, { "epoch": 0.5379219194794632, "grad_norm": 0.3841021955013275, "learning_rate": 9.99824785524286e-06, "loss": 0.4016, "step": 5291 }, { "epoch": 0.5380235868239122, "grad_norm": 0.40003934502601624, "learning_rate": 9.998238448323566e-06, "loss": 0.3933, "step": 5292 }, { "epoch": 0.5381252541683611, "grad_norm": 0.4483519494533539, "learning_rate": 9.998229016224333e-06, "loss": 0.4024, "step": 5293 }, { "epoch": 0.53822692151281, "grad_norm": 0.36475372314453125, "learning_rate": 9.998219558945211e-06, "loss": 0.4095, "step": 5294 }, { "epoch": 0.5383285888572591, "grad_norm": 0.40619516372680664, "learning_rate": 9.998210076486248e-06, "loss": 0.4451, "step": 5295 }, { "epoch": 0.538430256201708, "grad_norm": 0.3741147220134735, "learning_rate": 9.998200568847488e-06, "loss": 0.4167, "step": 5296 }, { "epoch": 0.538531923546157, "grad_norm": 0.42870619893074036, "learning_rate": 9.998191036028984e-06, "loss": 0.4065, "step": 5297 }, { "epoch": 0.5386335908906059, "grad_norm": 0.39095187187194824, "learning_rate": 9.998181478030779e-06, "loss": 0.4121, "step": 5298 }, { "epoch": 0.5387352582350549, "grad_norm": 0.42674630880355835, "learning_rate": 9.998171894852923e-06, "loss": 0.4188, "step": 5299 }, { "epoch": 0.5388369255795039, "grad_norm": 0.39525532722473145, "learning_rate": 9.998162286495467e-06, "loss": 0.3724, "step": 5300 }, { "epoch": 0.5389385929239529, "grad_norm": 0.3872328996658325, "learning_rate": 9.998152652958457e-06, "loss": 0.4426, "step": 5301 }, { "epoch": 0.5390402602684018, "grad_norm": 0.38214728236198425, "learning_rate": 9.998142994241939e-06, "loss": 0.4176, "step": 5302 }, { "epoch": 0.5391419276128507, "grad_norm": 0.4240119159221649, "learning_rate": 9.998133310345967e-06, "loss": 0.3987, "step": 5303 }, { "epoch": 0.5392435949572997, "grad_norm": 0.39154618978500366, "learning_rate": 9.998123601270586e-06, "loss": 0.4091, "step": 5304 }, { "epoch": 0.5393452623017487, "grad_norm": 0.42173272371292114, "learning_rate": 9.998113867015846e-06, "loss": 0.4335, "step": 5305 }, { "epoch": 0.5394469296461977, "grad_norm": 0.43611180782318115, "learning_rate": 9.998104107581796e-06, "loss": 0.408, "step": 5306 }, { "epoch": 0.5395485969906466, "grad_norm": 0.37805843353271484, "learning_rate": 9.998094322968486e-06, "loss": 0.3903, "step": 5307 }, { "epoch": 0.5396502643350956, "grad_norm": 0.45963573455810547, "learning_rate": 9.998084513175964e-06, "loss": 0.3867, "step": 5308 }, { "epoch": 0.5397519316795445, "grad_norm": 0.43170973658561707, "learning_rate": 9.998074678204282e-06, "loss": 0.3886, "step": 5309 }, { "epoch": 0.5398535990239935, "grad_norm": 0.42476195096969604, "learning_rate": 9.998064818053485e-06, "loss": 0.4047, "step": 5310 }, { "epoch": 0.5399552663684425, "grad_norm": 0.4270475208759308, "learning_rate": 9.998054932723624e-06, "loss": 0.3771, "step": 5311 }, { "epoch": 0.5400569337128914, "grad_norm": 0.3962036073207855, "learning_rate": 9.998045022214753e-06, "loss": 0.4073, "step": 5312 }, { "epoch": 0.5401586010573404, "grad_norm": 0.49382027983665466, "learning_rate": 9.998035086526917e-06, "loss": 0.3495, "step": 5313 }, { "epoch": 0.5402602684017893, "grad_norm": 0.4288587272167206, "learning_rate": 9.998025125660168e-06, "loss": 0.4404, "step": 5314 }, { "epoch": 0.5403619357462383, "grad_norm": 0.43870437145233154, "learning_rate": 9.998015139614558e-06, "loss": 0.4022, "step": 5315 }, { "epoch": 0.5404636030906873, "grad_norm": 0.46674805879592896, "learning_rate": 9.998005128390134e-06, "loss": 0.4099, "step": 5316 }, { "epoch": 0.5405652704351362, "grad_norm": 0.376055508852005, "learning_rate": 9.997995091986947e-06, "loss": 0.4072, "step": 5317 }, { "epoch": 0.5406669377795852, "grad_norm": 0.4548684060573578, "learning_rate": 9.997985030405051e-06, "loss": 0.42, "step": 5318 }, { "epoch": 0.5407686051240341, "grad_norm": 0.4305882751941681, "learning_rate": 9.997974943644492e-06, "loss": 0.445, "step": 5319 }, { "epoch": 0.5408702724684831, "grad_norm": 0.4069269299507141, "learning_rate": 9.997964831705323e-06, "loss": 0.4417, "step": 5320 }, { "epoch": 0.5409719398129321, "grad_norm": 0.4192807674407959, "learning_rate": 9.997954694587595e-06, "loss": 0.4056, "step": 5321 }, { "epoch": 0.5410736071573811, "grad_norm": 0.37809982895851135, "learning_rate": 9.997944532291359e-06, "loss": 0.4058, "step": 5322 }, { "epoch": 0.54117527450183, "grad_norm": 0.35840389132499695, "learning_rate": 9.997934344816666e-06, "loss": 0.3794, "step": 5323 }, { "epoch": 0.541276941846279, "grad_norm": 0.4119402766227722, "learning_rate": 9.997924132163571e-06, "loss": 0.3956, "step": 5324 }, { "epoch": 0.5413786091907279, "grad_norm": 0.3880294859409332, "learning_rate": 9.997913894332116e-06, "loss": 0.3931, "step": 5325 }, { "epoch": 0.5414802765351769, "grad_norm": 0.4370887279510498, "learning_rate": 9.997903631322363e-06, "loss": 0.3941, "step": 5326 }, { "epoch": 0.5415819438796259, "grad_norm": 0.3608459234237671, "learning_rate": 9.997893343134357e-06, "loss": 0.3839, "step": 5327 }, { "epoch": 0.5416836112240748, "grad_norm": 0.4331575632095337, "learning_rate": 9.997883029768151e-06, "loss": 0.417, "step": 5328 }, { "epoch": 0.5417852785685238, "grad_norm": 0.3727622330188751, "learning_rate": 9.997872691223801e-06, "loss": 0.4134, "step": 5329 }, { "epoch": 0.5418869459129727, "grad_norm": 0.37414783239364624, "learning_rate": 9.997862327501356e-06, "loss": 0.3977, "step": 5330 }, { "epoch": 0.5419886132574218, "grad_norm": 0.36469393968582153, "learning_rate": 9.997851938600865e-06, "loss": 0.4021, "step": 5331 }, { "epoch": 0.5420902806018707, "grad_norm": 0.40741848945617676, "learning_rate": 9.997841524522386e-06, "loss": 0.3958, "step": 5332 }, { "epoch": 0.5421919479463196, "grad_norm": 0.36814311146736145, "learning_rate": 9.997831085265968e-06, "loss": 0.4098, "step": 5333 }, { "epoch": 0.5422936152907686, "grad_norm": 0.3611714541912079, "learning_rate": 9.997820620831664e-06, "loss": 0.3734, "step": 5334 }, { "epoch": 0.5423952826352175, "grad_norm": 0.40185102820396423, "learning_rate": 9.99781013121953e-06, "loss": 0.3819, "step": 5335 }, { "epoch": 0.5424969499796666, "grad_norm": 0.37938132882118225, "learning_rate": 9.997799616429615e-06, "loss": 0.3966, "step": 5336 }, { "epoch": 0.5425986173241155, "grad_norm": 0.38894957304000854, "learning_rate": 9.997789076461972e-06, "loss": 0.3713, "step": 5337 }, { "epoch": 0.5427002846685645, "grad_norm": 0.4611789286136627, "learning_rate": 9.997778511316657e-06, "loss": 0.4143, "step": 5338 }, { "epoch": 0.5428019520130134, "grad_norm": 0.38973450660705566, "learning_rate": 9.99776792099372e-06, "loss": 0.39, "step": 5339 }, { "epoch": 0.5429036193574623, "grad_norm": 0.3461834192276001, "learning_rate": 9.997757305493217e-06, "loss": 0.4238, "step": 5340 }, { "epoch": 0.5430052867019114, "grad_norm": 0.3709155023097992, "learning_rate": 9.9977466648152e-06, "loss": 0.3996, "step": 5341 }, { "epoch": 0.5431069540463603, "grad_norm": 0.41223856806755066, "learning_rate": 9.997735998959723e-06, "loss": 0.4161, "step": 5342 }, { "epoch": 0.5432086213908093, "grad_norm": 0.3618950843811035, "learning_rate": 9.99772530792684e-06, "loss": 0.3852, "step": 5343 }, { "epoch": 0.5433102887352582, "grad_norm": 0.3792663514614105, "learning_rate": 9.997714591716604e-06, "loss": 0.4233, "step": 5344 }, { "epoch": 0.5434119560797072, "grad_norm": 0.3600589632987976, "learning_rate": 9.99770385032907e-06, "loss": 0.4058, "step": 5345 }, { "epoch": 0.5435136234241562, "grad_norm": 0.3860953450202942, "learning_rate": 9.997693083764291e-06, "loss": 0.3854, "step": 5346 }, { "epoch": 0.5436152907686052, "grad_norm": 0.36249226331710815, "learning_rate": 9.997682292022322e-06, "loss": 0.3956, "step": 5347 }, { "epoch": 0.5437169581130541, "grad_norm": 0.3762686252593994, "learning_rate": 9.997671475103216e-06, "loss": 0.3972, "step": 5348 }, { "epoch": 0.543818625457503, "grad_norm": 0.3789753317832947, "learning_rate": 9.997660633007032e-06, "loss": 0.4057, "step": 5349 }, { "epoch": 0.543920292801952, "grad_norm": 0.35712558031082153, "learning_rate": 9.997649765733818e-06, "loss": 0.4104, "step": 5350 }, { "epoch": 0.544021960146401, "grad_norm": 0.3655332326889038, "learning_rate": 9.997638873283635e-06, "loss": 0.4331, "step": 5351 }, { "epoch": 0.54412362749085, "grad_norm": 0.3477121591567993, "learning_rate": 9.997627955656533e-06, "loss": 0.406, "step": 5352 }, { "epoch": 0.5442252948352989, "grad_norm": 0.36640480160713196, "learning_rate": 9.99761701285257e-06, "loss": 0.3974, "step": 5353 }, { "epoch": 0.5443269621797479, "grad_norm": 0.3868926465511322, "learning_rate": 9.9976060448718e-06, "loss": 0.4128, "step": 5354 }, { "epoch": 0.5444286295241968, "grad_norm": 0.4026058614253998, "learning_rate": 9.997595051714277e-06, "loss": 0.436, "step": 5355 }, { "epoch": 0.5445302968686457, "grad_norm": 0.3981127440929413, "learning_rate": 9.99758403338006e-06, "loss": 0.3851, "step": 5356 }, { "epoch": 0.5446319642130948, "grad_norm": 0.41160428524017334, "learning_rate": 9.9975729898692e-06, "loss": 0.3958, "step": 5357 }, { "epoch": 0.5447336315575437, "grad_norm": 0.3610653579235077, "learning_rate": 9.997561921181756e-06, "loss": 0.4283, "step": 5358 }, { "epoch": 0.5448352989019927, "grad_norm": 0.35170331597328186, "learning_rate": 9.997550827317783e-06, "loss": 0.4076, "step": 5359 }, { "epoch": 0.5449369662464416, "grad_norm": 0.3989690840244293, "learning_rate": 9.997539708277336e-06, "loss": 0.391, "step": 5360 }, { "epoch": 0.5450386335908906, "grad_norm": 0.3258199989795685, "learning_rate": 9.997528564060472e-06, "loss": 0.3875, "step": 5361 }, { "epoch": 0.5451403009353396, "grad_norm": 0.3660295009613037, "learning_rate": 9.997517394667247e-06, "loss": 0.3965, "step": 5362 }, { "epoch": 0.5452419682797885, "grad_norm": 0.39555391669273376, "learning_rate": 9.997506200097715e-06, "loss": 0.4429, "step": 5363 }, { "epoch": 0.5453436356242375, "grad_norm": 0.36648404598236084, "learning_rate": 9.997494980351936e-06, "loss": 0.3744, "step": 5364 }, { "epoch": 0.5454453029686864, "grad_norm": 0.37323689460754395, "learning_rate": 9.997483735429965e-06, "loss": 0.4058, "step": 5365 }, { "epoch": 0.5455469703131354, "grad_norm": 0.36702269315719604, "learning_rate": 9.997472465331858e-06, "loss": 0.4056, "step": 5366 }, { "epoch": 0.5456486376575844, "grad_norm": 0.34945181012153625, "learning_rate": 9.997461170057672e-06, "loss": 0.3865, "step": 5367 }, { "epoch": 0.5457503050020334, "grad_norm": 0.3572755753993988, "learning_rate": 9.997449849607464e-06, "loss": 0.3827, "step": 5368 }, { "epoch": 0.5458519723464823, "grad_norm": 0.37270867824554443, "learning_rate": 9.997438503981291e-06, "loss": 0.3847, "step": 5369 }, { "epoch": 0.5459536396909312, "grad_norm": 0.3896642327308655, "learning_rate": 9.997427133179211e-06, "loss": 0.4312, "step": 5370 }, { "epoch": 0.5460553070353802, "grad_norm": 0.38554567098617554, "learning_rate": 9.997415737201281e-06, "loss": 0.4211, "step": 5371 }, { "epoch": 0.5461569743798292, "grad_norm": 0.3538604974746704, "learning_rate": 9.997404316047559e-06, "loss": 0.4095, "step": 5372 }, { "epoch": 0.5462586417242782, "grad_norm": 0.3622012734413147, "learning_rate": 9.9973928697181e-06, "loss": 0.3989, "step": 5373 }, { "epoch": 0.5463603090687271, "grad_norm": 0.3602370321750641, "learning_rate": 9.997381398212963e-06, "loss": 0.4072, "step": 5374 }, { "epoch": 0.5464619764131761, "grad_norm": 0.3310834765434265, "learning_rate": 9.997369901532208e-06, "loss": 0.4412, "step": 5375 }, { "epoch": 0.546563643757625, "grad_norm": 0.3611258566379547, "learning_rate": 9.99735837967589e-06, "loss": 0.3928, "step": 5376 }, { "epoch": 0.5466653111020741, "grad_norm": 0.4052083194255829, "learning_rate": 9.997346832644067e-06, "loss": 0.407, "step": 5377 }, { "epoch": 0.546766978446523, "grad_norm": 0.4742351770401001, "learning_rate": 9.997335260436801e-06, "loss": 0.4199, "step": 5378 }, { "epoch": 0.5468686457909719, "grad_norm": 0.3832690119743347, "learning_rate": 9.997323663054147e-06, "loss": 0.3832, "step": 5379 }, { "epoch": 0.5469703131354209, "grad_norm": 0.3809313476085663, "learning_rate": 9.99731204049616e-06, "loss": 0.4517, "step": 5380 }, { "epoch": 0.5470719804798698, "grad_norm": 0.35652756690979004, "learning_rate": 9.997300392762904e-06, "loss": 0.3883, "step": 5381 }, { "epoch": 0.5471736478243189, "grad_norm": 0.35369881987571716, "learning_rate": 9.997288719854439e-06, "loss": 0.4025, "step": 5382 }, { "epoch": 0.5472753151687678, "grad_norm": 0.37773725390434265, "learning_rate": 9.997277021770819e-06, "loss": 0.4232, "step": 5383 }, { "epoch": 0.5473769825132168, "grad_norm": 0.33602920174598694, "learning_rate": 9.997265298512106e-06, "loss": 0.3787, "step": 5384 }, { "epoch": 0.5474786498576657, "grad_norm": 0.4113878607749939, "learning_rate": 9.997253550078356e-06, "loss": 0.4216, "step": 5385 }, { "epoch": 0.5475803172021146, "grad_norm": 0.39921069145202637, "learning_rate": 9.997241776469631e-06, "loss": 0.4058, "step": 5386 }, { "epoch": 0.5476819845465637, "grad_norm": 0.3451785743236542, "learning_rate": 9.997229977685991e-06, "loss": 0.3914, "step": 5387 }, { "epoch": 0.5477836518910126, "grad_norm": 0.3898395597934723, "learning_rate": 9.997218153727492e-06, "loss": 0.4086, "step": 5388 }, { "epoch": 0.5478853192354616, "grad_norm": 0.401668906211853, "learning_rate": 9.997206304594196e-06, "loss": 0.4113, "step": 5389 }, { "epoch": 0.5479869865799105, "grad_norm": 0.3548251986503601, "learning_rate": 9.997194430286163e-06, "loss": 0.3867, "step": 5390 }, { "epoch": 0.5480886539243595, "grad_norm": 0.398768812417984, "learning_rate": 9.997182530803451e-06, "loss": 0.4022, "step": 5391 }, { "epoch": 0.5481903212688085, "grad_norm": 0.3509903848171234, "learning_rate": 9.997170606146121e-06, "loss": 0.3976, "step": 5392 }, { "epoch": 0.5482919886132575, "grad_norm": 0.35291776061058044, "learning_rate": 9.997158656314231e-06, "loss": 0.4322, "step": 5393 }, { "epoch": 0.5483936559577064, "grad_norm": 0.44999057054519653, "learning_rate": 9.997146681307846e-06, "loss": 0.4206, "step": 5394 }, { "epoch": 0.5484953233021553, "grad_norm": 0.3413490355014801, "learning_rate": 9.997134681127024e-06, "loss": 0.4054, "step": 5395 }, { "epoch": 0.5485969906466043, "grad_norm": 0.38278263807296753, "learning_rate": 9.997122655771823e-06, "loss": 0.4506, "step": 5396 }, { "epoch": 0.5486986579910533, "grad_norm": 0.36542612314224243, "learning_rate": 9.997110605242305e-06, "loss": 0.3991, "step": 5397 }, { "epoch": 0.5488003253355023, "grad_norm": 0.36136218905448914, "learning_rate": 9.997098529538533e-06, "loss": 0.447, "step": 5398 }, { "epoch": 0.5489019926799512, "grad_norm": 0.34953171014785767, "learning_rate": 9.997086428660567e-06, "loss": 0.3801, "step": 5399 }, { "epoch": 0.5490036600244002, "grad_norm": 0.39659324288368225, "learning_rate": 9.997074302608467e-06, "loss": 0.4006, "step": 5400 }, { "epoch": 0.5491053273688491, "grad_norm": 0.38237422704696655, "learning_rate": 9.997062151382291e-06, "loss": 0.4128, "step": 5401 }, { "epoch": 0.549206994713298, "grad_norm": 0.3301585614681244, "learning_rate": 9.997049974982105e-06, "loss": 0.4257, "step": 5402 }, { "epoch": 0.5493086620577471, "grad_norm": 0.40463462471961975, "learning_rate": 9.99703777340797e-06, "loss": 0.39, "step": 5403 }, { "epoch": 0.549410329402196, "grad_norm": 0.37524744868278503, "learning_rate": 9.997025546659946e-06, "loss": 0.418, "step": 5404 }, { "epoch": 0.549511996746645, "grad_norm": 0.33697962760925293, "learning_rate": 9.997013294738094e-06, "loss": 0.3781, "step": 5405 }, { "epoch": 0.5496136640910939, "grad_norm": 0.368679404258728, "learning_rate": 9.997001017642475e-06, "loss": 0.4295, "step": 5406 }, { "epoch": 0.5497153314355429, "grad_norm": 0.3418644964694977, "learning_rate": 9.996988715373154e-06, "loss": 0.3793, "step": 5407 }, { "epoch": 0.5498169987799919, "grad_norm": 0.40415915846824646, "learning_rate": 9.99697638793019e-06, "loss": 0.397, "step": 5408 }, { "epoch": 0.5499186661244408, "grad_norm": 0.4116840362548828, "learning_rate": 9.996964035313648e-06, "loss": 0.4288, "step": 5409 }, { "epoch": 0.5500203334688898, "grad_norm": 0.37367209792137146, "learning_rate": 9.99695165752359e-06, "loss": 0.3959, "step": 5410 }, { "epoch": 0.5501220008133387, "grad_norm": 0.4271582067012787, "learning_rate": 9.996939254560075e-06, "loss": 0.3957, "step": 5411 }, { "epoch": 0.5502236681577877, "grad_norm": 0.3640911281108856, "learning_rate": 9.996926826423167e-06, "loss": 0.3875, "step": 5412 }, { "epoch": 0.5503253355022367, "grad_norm": 0.38880541920661926, "learning_rate": 9.996914373112928e-06, "loss": 0.406, "step": 5413 }, { "epoch": 0.5504270028466857, "grad_norm": 0.3983568251132965, "learning_rate": 9.996901894629425e-06, "loss": 0.3984, "step": 5414 }, { "epoch": 0.5505286701911346, "grad_norm": 0.3762674033641815, "learning_rate": 9.996889390972716e-06, "loss": 0.4218, "step": 5415 }, { "epoch": 0.5506303375355835, "grad_norm": 0.381257563829422, "learning_rate": 9.996876862142867e-06, "loss": 0.437, "step": 5416 }, { "epoch": 0.5507320048800325, "grad_norm": 0.37211742997169495, "learning_rate": 9.996864308139938e-06, "loss": 0.3716, "step": 5417 }, { "epoch": 0.5508336722244815, "grad_norm": 0.37453004717826843, "learning_rate": 9.996851728963995e-06, "loss": 0.4084, "step": 5418 }, { "epoch": 0.5509353395689305, "grad_norm": 0.3674636483192444, "learning_rate": 9.996839124615102e-06, "loss": 0.4073, "step": 5419 }, { "epoch": 0.5510370069133794, "grad_norm": 0.3651653826236725, "learning_rate": 9.996826495093317e-06, "loss": 0.41, "step": 5420 }, { "epoch": 0.5511386742578284, "grad_norm": 0.40498510003089905, "learning_rate": 9.99681384039871e-06, "loss": 0.4094, "step": 5421 }, { "epoch": 0.5512403416022773, "grad_norm": 0.3946883976459503, "learning_rate": 9.996801160531343e-06, "loss": 0.3867, "step": 5422 }, { "epoch": 0.5513420089467264, "grad_norm": 0.40405693650245667, "learning_rate": 9.996788455491277e-06, "loss": 0.4666, "step": 5423 }, { "epoch": 0.5514436762911753, "grad_norm": 0.36614713072776794, "learning_rate": 9.99677572527858e-06, "loss": 0.3772, "step": 5424 }, { "epoch": 0.5515453436356242, "grad_norm": 0.415122389793396, "learning_rate": 9.996762969893314e-06, "loss": 0.4077, "step": 5425 }, { "epoch": 0.5516470109800732, "grad_norm": 0.37788280844688416, "learning_rate": 9.996750189335544e-06, "loss": 0.4094, "step": 5426 }, { "epoch": 0.5517486783245221, "grad_norm": 0.3594651222229004, "learning_rate": 9.996737383605333e-06, "loss": 0.3606, "step": 5427 }, { "epoch": 0.5518503456689712, "grad_norm": 0.36482012271881104, "learning_rate": 9.996724552702746e-06, "loss": 0.4293, "step": 5428 }, { "epoch": 0.5519520130134201, "grad_norm": 0.3325005769729614, "learning_rate": 9.996711696627848e-06, "loss": 0.3966, "step": 5429 }, { "epoch": 0.5520536803578691, "grad_norm": 0.39602312445640564, "learning_rate": 9.996698815380705e-06, "loss": 0.42, "step": 5430 }, { "epoch": 0.552155347702318, "grad_norm": 0.37582775950431824, "learning_rate": 9.99668590896138e-06, "loss": 0.4036, "step": 5431 }, { "epoch": 0.5522570150467669, "grad_norm": 0.38519859313964844, "learning_rate": 9.996672977369941e-06, "loss": 0.3809, "step": 5432 }, { "epoch": 0.552358682391216, "grad_norm": 0.38473451137542725, "learning_rate": 9.996660020606447e-06, "loss": 0.4065, "step": 5433 }, { "epoch": 0.5524603497356649, "grad_norm": 0.3964139223098755, "learning_rate": 9.99664703867097e-06, "loss": 0.3943, "step": 5434 }, { "epoch": 0.5525620170801139, "grad_norm": 0.3996414542198181, "learning_rate": 9.996634031563573e-06, "loss": 0.4338, "step": 5435 }, { "epoch": 0.5526636844245628, "grad_norm": 0.3812852203845978, "learning_rate": 9.99662099928432e-06, "loss": 0.4099, "step": 5436 }, { "epoch": 0.5527653517690118, "grad_norm": 0.41724705696105957, "learning_rate": 9.996607941833279e-06, "loss": 0.4104, "step": 5437 }, { "epoch": 0.5528670191134608, "grad_norm": 0.3458341956138611, "learning_rate": 9.996594859210512e-06, "loss": 0.3708, "step": 5438 }, { "epoch": 0.5529686864579098, "grad_norm": 0.3751293420791626, "learning_rate": 9.99658175141609e-06, "loss": 0.4047, "step": 5439 }, { "epoch": 0.5530703538023587, "grad_norm": 0.4242578148841858, "learning_rate": 9.996568618450076e-06, "loss": 0.3907, "step": 5440 }, { "epoch": 0.5531720211468076, "grad_norm": 0.36903971433639526, "learning_rate": 9.996555460312536e-06, "loss": 0.4064, "step": 5441 }, { "epoch": 0.5532736884912566, "grad_norm": 0.38314905762672424, "learning_rate": 9.996542277003536e-06, "loss": 0.4476, "step": 5442 }, { "epoch": 0.5533753558357055, "grad_norm": 0.37938669323921204, "learning_rate": 9.996529068523144e-06, "loss": 0.3859, "step": 5443 }, { "epoch": 0.5534770231801546, "grad_norm": 0.4058951139450073, "learning_rate": 9.996515834871427e-06, "loss": 0.4164, "step": 5444 }, { "epoch": 0.5535786905246035, "grad_norm": 0.36547982692718506, "learning_rate": 9.99650257604845e-06, "loss": 0.4068, "step": 5445 }, { "epoch": 0.5536803578690525, "grad_norm": 0.3490249216556549, "learning_rate": 9.99648929205428e-06, "loss": 0.3665, "step": 5446 }, { "epoch": 0.5537820252135014, "grad_norm": 0.38375332951545715, "learning_rate": 9.996475982888985e-06, "loss": 0.4116, "step": 5447 }, { "epoch": 0.5538836925579503, "grad_norm": 0.4178580343723297, "learning_rate": 9.99646264855263e-06, "loss": 0.4131, "step": 5448 }, { "epoch": 0.5539853599023994, "grad_norm": 0.34339889883995056, "learning_rate": 9.996449289045284e-06, "loss": 0.4078, "step": 5449 }, { "epoch": 0.5540870272468483, "grad_norm": 0.36180105805397034, "learning_rate": 9.996435904367015e-06, "loss": 0.4188, "step": 5450 }, { "epoch": 0.5541886945912973, "grad_norm": 0.38296911120414734, "learning_rate": 9.996422494517889e-06, "loss": 0.3917, "step": 5451 }, { "epoch": 0.5542903619357462, "grad_norm": 0.3742249608039856, "learning_rate": 9.996409059497974e-06, "loss": 0.414, "step": 5452 }, { "epoch": 0.5543920292801952, "grad_norm": 0.37796735763549805, "learning_rate": 9.996395599307336e-06, "loss": 0.4089, "step": 5453 }, { "epoch": 0.5544936966246442, "grad_norm": 0.35556134581565857, "learning_rate": 9.996382113946046e-06, "loss": 0.388, "step": 5454 }, { "epoch": 0.5545953639690931, "grad_norm": 0.40686169266700745, "learning_rate": 9.996368603414168e-06, "loss": 0.4355, "step": 5455 }, { "epoch": 0.5546970313135421, "grad_norm": 0.3693907856941223, "learning_rate": 9.996355067711773e-06, "loss": 0.427, "step": 5456 }, { "epoch": 0.554798698657991, "grad_norm": 0.3916746973991394, "learning_rate": 9.99634150683893e-06, "loss": 0.4007, "step": 5457 }, { "epoch": 0.55490036600244, "grad_norm": 0.39226868748664856, "learning_rate": 9.996327920795705e-06, "loss": 0.4013, "step": 5458 }, { "epoch": 0.555002033346889, "grad_norm": 0.41243940591812134, "learning_rate": 9.996314309582166e-06, "loss": 0.429, "step": 5459 }, { "epoch": 0.555103700691338, "grad_norm": 0.3594648540019989, "learning_rate": 9.996300673198384e-06, "loss": 0.4059, "step": 5460 }, { "epoch": 0.5552053680357869, "grad_norm": 0.38309162855148315, "learning_rate": 9.996287011644428e-06, "loss": 0.4203, "step": 5461 }, { "epoch": 0.5553070353802358, "grad_norm": 0.3560532033443451, "learning_rate": 9.996273324920364e-06, "loss": 0.3918, "step": 5462 }, { "epoch": 0.5554087027246848, "grad_norm": 0.37281015515327454, "learning_rate": 9.996259613026263e-06, "loss": 0.3788, "step": 5463 }, { "epoch": 0.5555103700691338, "grad_norm": 0.39378732442855835, "learning_rate": 9.996245875962191e-06, "loss": 0.4456, "step": 5464 }, { "epoch": 0.5556120374135828, "grad_norm": 0.3749941289424896, "learning_rate": 9.996232113728222e-06, "loss": 0.3999, "step": 5465 }, { "epoch": 0.5557137047580317, "grad_norm": 0.3728281557559967, "learning_rate": 9.99621832632442e-06, "loss": 0.389, "step": 5466 }, { "epoch": 0.5558153721024807, "grad_norm": 0.4245086908340454, "learning_rate": 9.99620451375086e-06, "loss": 0.3992, "step": 5467 }, { "epoch": 0.5559170394469296, "grad_norm": 0.3594270646572113, "learning_rate": 9.996190676007609e-06, "loss": 0.394, "step": 5468 }, { "epoch": 0.5560187067913787, "grad_norm": 0.41342318058013916, "learning_rate": 9.996176813094736e-06, "loss": 0.4003, "step": 5469 }, { "epoch": 0.5561203741358276, "grad_norm": 0.37738823890686035, "learning_rate": 9.996162925012312e-06, "loss": 0.388, "step": 5470 }, { "epoch": 0.5562220414802765, "grad_norm": 0.4054540693759918, "learning_rate": 9.996149011760405e-06, "loss": 0.4038, "step": 5471 }, { "epoch": 0.5563237088247255, "grad_norm": 0.3961513638496399, "learning_rate": 9.996135073339089e-06, "loss": 0.4077, "step": 5472 }, { "epoch": 0.5564253761691744, "grad_norm": 0.3787252902984619, "learning_rate": 9.996121109748429e-06, "loss": 0.3929, "step": 5473 }, { "epoch": 0.5565270435136235, "grad_norm": 0.4199458658695221, "learning_rate": 9.996107120988499e-06, "loss": 0.4158, "step": 5474 }, { "epoch": 0.5566287108580724, "grad_norm": 0.35941317677497864, "learning_rate": 9.996093107059367e-06, "loss": 0.414, "step": 5475 }, { "epoch": 0.5567303782025214, "grad_norm": 0.3793736696243286, "learning_rate": 9.996079067961109e-06, "loss": 0.4228, "step": 5476 }, { "epoch": 0.5568320455469703, "grad_norm": 0.385578989982605, "learning_rate": 9.996065003693789e-06, "loss": 0.4378, "step": 5477 }, { "epoch": 0.5569337128914192, "grad_norm": 0.3892907500267029, "learning_rate": 9.996050914257482e-06, "loss": 0.413, "step": 5478 }, { "epoch": 0.5570353802358683, "grad_norm": 0.430803120136261, "learning_rate": 9.996036799652258e-06, "loss": 0.3898, "step": 5479 }, { "epoch": 0.5571370475803172, "grad_norm": 0.3911202847957611, "learning_rate": 9.996022659878187e-06, "loss": 0.4155, "step": 5480 }, { "epoch": 0.5572387149247662, "grad_norm": 0.38960397243499756, "learning_rate": 9.996008494935341e-06, "loss": 0.4119, "step": 5481 }, { "epoch": 0.5573403822692151, "grad_norm": 0.4288637638092041, "learning_rate": 9.995994304823792e-06, "loss": 0.3927, "step": 5482 }, { "epoch": 0.5574420496136641, "grad_norm": 0.3852010667324066, "learning_rate": 9.995980089543611e-06, "loss": 0.3781, "step": 5483 }, { "epoch": 0.557543716958113, "grad_norm": 0.4106208086013794, "learning_rate": 9.99596584909487e-06, "loss": 0.405, "step": 5484 }, { "epoch": 0.557645384302562, "grad_norm": 0.4555913507938385, "learning_rate": 9.99595158347764e-06, "loss": 0.4149, "step": 5485 }, { "epoch": 0.557747051647011, "grad_norm": 0.40286311507225037, "learning_rate": 9.99593729269199e-06, "loss": 0.3874, "step": 5486 }, { "epoch": 0.5578487189914599, "grad_norm": 0.3865772485733032, "learning_rate": 9.995922976738e-06, "loss": 0.4051, "step": 5487 }, { "epoch": 0.5579503863359089, "grad_norm": 0.45174989104270935, "learning_rate": 9.995908635615735e-06, "loss": 0.4512, "step": 5488 }, { "epoch": 0.5580520536803578, "grad_norm": 0.41230571269989014, "learning_rate": 9.995894269325268e-06, "loss": 0.4042, "step": 5489 }, { "epoch": 0.5581537210248069, "grad_norm": 0.3947283923625946, "learning_rate": 9.995879877866676e-06, "loss": 0.4213, "step": 5490 }, { "epoch": 0.5582553883692558, "grad_norm": 0.39944252371788025, "learning_rate": 9.995865461240026e-06, "loss": 0.4002, "step": 5491 }, { "epoch": 0.5583570557137048, "grad_norm": 0.3788453936576843, "learning_rate": 9.995851019445394e-06, "loss": 0.4256, "step": 5492 }, { "epoch": 0.5584587230581537, "grad_norm": 0.3835090100765228, "learning_rate": 9.99583655248285e-06, "loss": 0.4278, "step": 5493 }, { "epoch": 0.5585603904026026, "grad_norm": 0.35105931758880615, "learning_rate": 9.995822060352472e-06, "loss": 0.3743, "step": 5494 }, { "epoch": 0.5586620577470517, "grad_norm": 0.392000675201416, "learning_rate": 9.995807543054328e-06, "loss": 0.3929, "step": 5495 }, { "epoch": 0.5587637250915006, "grad_norm": 0.35646742582321167, "learning_rate": 9.995793000588492e-06, "loss": 0.4148, "step": 5496 }, { "epoch": 0.5588653924359496, "grad_norm": 0.40377962589263916, "learning_rate": 9.99577843295504e-06, "loss": 0.3883, "step": 5497 }, { "epoch": 0.5589670597803985, "grad_norm": 0.4488551616668701, "learning_rate": 9.99576384015404e-06, "loss": 0.3957, "step": 5498 }, { "epoch": 0.5590687271248475, "grad_norm": 0.36870652437210083, "learning_rate": 9.995749222185572e-06, "loss": 0.4307, "step": 5499 }, { "epoch": 0.5591703944692965, "grad_norm": 0.4006052315235138, "learning_rate": 9.995734579049706e-06, "loss": 0.4221, "step": 5500 }, { "epoch": 0.5592720618137454, "grad_norm": 0.408733606338501, "learning_rate": 9.995719910746515e-06, "loss": 0.3887, "step": 5501 }, { "epoch": 0.5593737291581944, "grad_norm": 0.35508763790130615, "learning_rate": 9.995705217276077e-06, "loss": 0.3901, "step": 5502 }, { "epoch": 0.5594753965026433, "grad_norm": 0.36930522322654724, "learning_rate": 9.995690498638461e-06, "loss": 0.3944, "step": 5503 }, { "epoch": 0.5595770638470923, "grad_norm": 0.3621896207332611, "learning_rate": 9.995675754833744e-06, "loss": 0.4034, "step": 5504 }, { "epoch": 0.5596787311915413, "grad_norm": 0.3583102822303772, "learning_rate": 9.995660985861998e-06, "loss": 0.402, "step": 5505 }, { "epoch": 0.5597803985359903, "grad_norm": 0.3924811780452728, "learning_rate": 9.995646191723303e-06, "loss": 0.4101, "step": 5506 }, { "epoch": 0.5598820658804392, "grad_norm": 0.37460190057754517, "learning_rate": 9.995631372417727e-06, "loss": 0.405, "step": 5507 }, { "epoch": 0.5599837332248881, "grad_norm": 0.375122994184494, "learning_rate": 9.995616527945348e-06, "loss": 0.4184, "step": 5508 }, { "epoch": 0.5600854005693371, "grad_norm": 0.380751371383667, "learning_rate": 9.99560165830624e-06, "loss": 0.3902, "step": 5509 }, { "epoch": 0.5601870679137861, "grad_norm": 0.3647414445877075, "learning_rate": 9.995586763500477e-06, "loss": 0.3462, "step": 5510 }, { "epoch": 0.5602887352582351, "grad_norm": 0.3788270652294159, "learning_rate": 9.995571843528135e-06, "loss": 0.3997, "step": 5511 }, { "epoch": 0.560390402602684, "grad_norm": 0.4062989056110382, "learning_rate": 9.995556898389291e-06, "loss": 0.402, "step": 5512 }, { "epoch": 0.560492069947133, "grad_norm": 0.36344093084335327, "learning_rate": 9.995541928084019e-06, "loss": 0.3768, "step": 5513 }, { "epoch": 0.5605937372915819, "grad_norm": 0.3764590322971344, "learning_rate": 9.995526932612391e-06, "loss": 0.3927, "step": 5514 }, { "epoch": 0.560695404636031, "grad_norm": 0.4017292559146881, "learning_rate": 9.995511911974487e-06, "loss": 0.4028, "step": 5515 }, { "epoch": 0.5607970719804799, "grad_norm": 0.44926655292510986, "learning_rate": 9.99549686617038e-06, "loss": 0.4073, "step": 5516 }, { "epoch": 0.5608987393249288, "grad_norm": 0.38496842980384827, "learning_rate": 9.995481795200148e-06, "loss": 0.3949, "step": 5517 }, { "epoch": 0.5610004066693778, "grad_norm": 0.3851948082447052, "learning_rate": 9.995466699063865e-06, "loss": 0.4627, "step": 5518 }, { "epoch": 0.5611020740138267, "grad_norm": 0.4195575416088104, "learning_rate": 9.99545157776161e-06, "loss": 0.4393, "step": 5519 }, { "epoch": 0.5612037413582758, "grad_norm": 0.38459715247154236, "learning_rate": 9.995436431293455e-06, "loss": 0.4024, "step": 5520 }, { "epoch": 0.5613054087027247, "grad_norm": 0.38261228799819946, "learning_rate": 9.99542125965948e-06, "loss": 0.3911, "step": 5521 }, { "epoch": 0.5614070760471737, "grad_norm": 0.43416526913642883, "learning_rate": 9.995406062859759e-06, "loss": 0.4061, "step": 5522 }, { "epoch": 0.5615087433916226, "grad_norm": 0.40243247151374817, "learning_rate": 9.995390840894368e-06, "loss": 0.3727, "step": 5523 }, { "epoch": 0.5616104107360715, "grad_norm": 0.4089510142803192, "learning_rate": 9.995375593763385e-06, "loss": 0.4196, "step": 5524 }, { "epoch": 0.5617120780805205, "grad_norm": 0.3660947382450104, "learning_rate": 9.99536032146689e-06, "loss": 0.402, "step": 5525 }, { "epoch": 0.5618137454249695, "grad_norm": 0.42646175622940063, "learning_rate": 9.995345024004953e-06, "loss": 0.4019, "step": 5526 }, { "epoch": 0.5619154127694185, "grad_norm": 0.3878166973590851, "learning_rate": 9.995329701377656e-06, "loss": 0.3904, "step": 5527 }, { "epoch": 0.5620170801138674, "grad_norm": 0.3976532816886902, "learning_rate": 9.995314353585077e-06, "loss": 0.3999, "step": 5528 }, { "epoch": 0.5621187474583164, "grad_norm": 0.37995028495788574, "learning_rate": 9.995298980627289e-06, "loss": 0.38, "step": 5529 }, { "epoch": 0.5622204148027653, "grad_norm": 0.3968295156955719, "learning_rate": 9.995283582504374e-06, "loss": 0.4403, "step": 5530 }, { "epoch": 0.5623220821472144, "grad_norm": 0.39396485686302185, "learning_rate": 9.995268159216405e-06, "loss": 0.4605, "step": 5531 }, { "epoch": 0.5624237494916633, "grad_norm": 0.41071560978889465, "learning_rate": 9.995252710763463e-06, "loss": 0.435, "step": 5532 }, { "epoch": 0.5625254168361122, "grad_norm": 0.3510538637638092, "learning_rate": 9.995237237145625e-06, "loss": 0.4241, "step": 5533 }, { "epoch": 0.5626270841805612, "grad_norm": 0.3717833161354065, "learning_rate": 9.995221738362968e-06, "loss": 0.4103, "step": 5534 }, { "epoch": 0.5627287515250101, "grad_norm": 0.38862714171409607, "learning_rate": 9.995206214415574e-06, "loss": 0.4173, "step": 5535 }, { "epoch": 0.5628304188694592, "grad_norm": 0.4132821559906006, "learning_rate": 9.995190665303516e-06, "loss": 0.397, "step": 5536 }, { "epoch": 0.5629320862139081, "grad_norm": 0.37076273560523987, "learning_rate": 9.995175091026874e-06, "loss": 0.3961, "step": 5537 }, { "epoch": 0.563033753558357, "grad_norm": 0.4511469304561615, "learning_rate": 9.995159491585727e-06, "loss": 0.44, "step": 5538 }, { "epoch": 0.563135420902806, "grad_norm": 0.3641599714756012, "learning_rate": 9.995143866980154e-06, "loss": 0.4172, "step": 5539 }, { "epoch": 0.5632370882472549, "grad_norm": 0.3597310483455658, "learning_rate": 9.995128217210232e-06, "loss": 0.3845, "step": 5540 }, { "epoch": 0.563338755591704, "grad_norm": 0.3887792229652405, "learning_rate": 9.995112542276042e-06, "loss": 0.3896, "step": 5541 }, { "epoch": 0.5634404229361529, "grad_norm": 0.4136491119861603, "learning_rate": 9.995096842177662e-06, "loss": 0.4336, "step": 5542 }, { "epoch": 0.5635420902806019, "grad_norm": 0.33312708139419556, "learning_rate": 9.995081116915173e-06, "loss": 0.3782, "step": 5543 }, { "epoch": 0.5636437576250508, "grad_norm": 0.3769824206829071, "learning_rate": 9.99506536648865e-06, "loss": 0.4285, "step": 5544 }, { "epoch": 0.5637454249694998, "grad_norm": 0.38053709268569946, "learning_rate": 9.995049590898176e-06, "loss": 0.3711, "step": 5545 }, { "epoch": 0.5638470923139488, "grad_norm": 0.3866463601589203, "learning_rate": 9.99503379014383e-06, "loss": 0.4233, "step": 5546 }, { "epoch": 0.5639487596583977, "grad_norm": 0.37131279706954956, "learning_rate": 9.995017964225688e-06, "loss": 0.4151, "step": 5547 }, { "epoch": 0.5640504270028467, "grad_norm": 0.381807416677475, "learning_rate": 9.995002113143834e-06, "loss": 0.3959, "step": 5548 }, { "epoch": 0.5641520943472956, "grad_norm": 0.3866422176361084, "learning_rate": 9.994986236898346e-06, "loss": 0.4064, "step": 5549 }, { "epoch": 0.5642537616917446, "grad_norm": 0.39181089401245117, "learning_rate": 9.994970335489304e-06, "loss": 0.3957, "step": 5550 }, { "epoch": 0.5643554290361936, "grad_norm": 0.33989617228507996, "learning_rate": 9.99495440891679e-06, "loss": 0.412, "step": 5551 }, { "epoch": 0.5644570963806426, "grad_norm": 0.40735673904418945, "learning_rate": 9.99493845718088e-06, "loss": 0.3914, "step": 5552 }, { "epoch": 0.5645587637250915, "grad_norm": 0.35949698090553284, "learning_rate": 9.994922480281659e-06, "loss": 0.3777, "step": 5553 }, { "epoch": 0.5646604310695404, "grad_norm": 0.3867775797843933, "learning_rate": 9.994906478219206e-06, "loss": 0.4202, "step": 5554 }, { "epoch": 0.5647620984139894, "grad_norm": 0.4207709729671478, "learning_rate": 9.9948904509936e-06, "loss": 0.3743, "step": 5555 }, { "epoch": 0.5648637657584384, "grad_norm": 0.36980557441711426, "learning_rate": 9.994874398604923e-06, "loss": 0.4037, "step": 5556 }, { "epoch": 0.5649654331028874, "grad_norm": 0.3972163200378418, "learning_rate": 9.994858321053256e-06, "loss": 0.3961, "step": 5557 }, { "epoch": 0.5650671004473363, "grad_norm": 0.39488422870635986, "learning_rate": 9.99484221833868e-06, "loss": 0.3919, "step": 5558 }, { "epoch": 0.5651687677917853, "grad_norm": 0.3961729109287262, "learning_rate": 9.994826090461275e-06, "loss": 0.3828, "step": 5559 }, { "epoch": 0.5652704351362342, "grad_norm": 0.3648153841495514, "learning_rate": 9.994809937421123e-06, "loss": 0.4329, "step": 5560 }, { "epoch": 0.5653721024806833, "grad_norm": 0.40116196870803833, "learning_rate": 9.994793759218306e-06, "loss": 0.428, "step": 5561 }, { "epoch": 0.5654737698251322, "grad_norm": 0.36879101395606995, "learning_rate": 9.994777555852907e-06, "loss": 0.4236, "step": 5562 }, { "epoch": 0.5655754371695811, "grad_norm": 0.36402857303619385, "learning_rate": 9.994761327325003e-06, "loss": 0.4098, "step": 5563 }, { "epoch": 0.5656771045140301, "grad_norm": 0.41582566499710083, "learning_rate": 9.994745073634679e-06, "loss": 0.3847, "step": 5564 }, { "epoch": 0.565778771858479, "grad_norm": 0.3530944287776947, "learning_rate": 9.994728794782017e-06, "loss": 0.4133, "step": 5565 }, { "epoch": 0.565880439202928, "grad_norm": 0.3927738666534424, "learning_rate": 9.994712490767097e-06, "loss": 0.4078, "step": 5566 }, { "epoch": 0.565982106547377, "grad_norm": 0.4296528100967407, "learning_rate": 9.994696161590003e-06, "loss": 0.4308, "step": 5567 }, { "epoch": 0.566083773891826, "grad_norm": 0.357593297958374, "learning_rate": 9.994679807250815e-06, "loss": 0.3701, "step": 5568 }, { "epoch": 0.5661854412362749, "grad_norm": 0.39136096835136414, "learning_rate": 9.99466342774962e-06, "loss": 0.435, "step": 5569 }, { "epoch": 0.5662871085807238, "grad_norm": 0.3723101019859314, "learning_rate": 9.994647023086497e-06, "loss": 0.3887, "step": 5570 }, { "epoch": 0.5663887759251728, "grad_norm": 0.3800886571407318, "learning_rate": 9.994630593261527e-06, "loss": 0.4108, "step": 5571 }, { "epoch": 0.5664904432696218, "grad_norm": 0.39913031458854675, "learning_rate": 9.994614138274796e-06, "loss": 0.3976, "step": 5572 }, { "epoch": 0.5665921106140708, "grad_norm": 0.3736438751220703, "learning_rate": 9.994597658126388e-06, "loss": 0.3812, "step": 5573 }, { "epoch": 0.5666937779585197, "grad_norm": 0.37361589074134827, "learning_rate": 9.994581152816382e-06, "loss": 0.3683, "step": 5574 }, { "epoch": 0.5667954453029687, "grad_norm": 0.36541253328323364, "learning_rate": 9.994564622344862e-06, "loss": 0.4471, "step": 5575 }, { "epoch": 0.5668971126474176, "grad_norm": 0.364522248506546, "learning_rate": 9.994548066711914e-06, "loss": 0.3731, "step": 5576 }, { "epoch": 0.5669987799918667, "grad_norm": 0.35144609212875366, "learning_rate": 9.99453148591762e-06, "loss": 0.4572, "step": 5577 }, { "epoch": 0.5671004473363156, "grad_norm": 0.3628386855125427, "learning_rate": 9.994514879962063e-06, "loss": 0.4116, "step": 5578 }, { "epoch": 0.5672021146807645, "grad_norm": 0.3890400528907776, "learning_rate": 9.994498248845326e-06, "loss": 0.4164, "step": 5579 }, { "epoch": 0.5673037820252135, "grad_norm": 0.36440199613571167, "learning_rate": 9.994481592567494e-06, "loss": 0.3924, "step": 5580 }, { "epoch": 0.5674054493696624, "grad_norm": 0.3800482451915741, "learning_rate": 9.99446491112865e-06, "loss": 0.4335, "step": 5581 }, { "epoch": 0.5675071167141115, "grad_norm": 0.36198171973228455, "learning_rate": 9.99444820452888e-06, "loss": 0.3852, "step": 5582 }, { "epoch": 0.5676087840585604, "grad_norm": 0.3917185664176941, "learning_rate": 9.994431472768267e-06, "loss": 0.4067, "step": 5583 }, { "epoch": 0.5677104514030094, "grad_norm": 0.36120545864105225, "learning_rate": 9.994414715846894e-06, "loss": 0.3915, "step": 5584 }, { "epoch": 0.5678121187474583, "grad_norm": 0.37429362535476685, "learning_rate": 9.994397933764847e-06, "loss": 0.4141, "step": 5585 }, { "epoch": 0.5679137860919072, "grad_norm": 0.36335018277168274, "learning_rate": 9.994381126522211e-06, "loss": 0.4203, "step": 5586 }, { "epoch": 0.5680154534363563, "grad_norm": 0.3780147135257721, "learning_rate": 9.994364294119067e-06, "loss": 0.3876, "step": 5587 }, { "epoch": 0.5681171207808052, "grad_norm": 0.3723793923854828, "learning_rate": 9.994347436555504e-06, "loss": 0.4, "step": 5588 }, { "epoch": 0.5682187881252542, "grad_norm": 0.3877057731151581, "learning_rate": 9.994330553831605e-06, "loss": 0.449, "step": 5589 }, { "epoch": 0.5683204554697031, "grad_norm": 0.37145423889160156, "learning_rate": 9.994313645947458e-06, "loss": 0.4181, "step": 5590 }, { "epoch": 0.568422122814152, "grad_norm": 0.3919890224933624, "learning_rate": 9.994296712903143e-06, "loss": 0.3937, "step": 5591 }, { "epoch": 0.5685237901586011, "grad_norm": 0.38455888628959656, "learning_rate": 9.994279754698749e-06, "loss": 0.3805, "step": 5592 }, { "epoch": 0.56862545750305, "grad_norm": 0.4141511619091034, "learning_rate": 9.99426277133436e-06, "loss": 0.3885, "step": 5593 }, { "epoch": 0.568727124847499, "grad_norm": 0.37140581011772156, "learning_rate": 9.994245762810062e-06, "loss": 0.3933, "step": 5594 }, { "epoch": 0.5688287921919479, "grad_norm": 0.43970176577568054, "learning_rate": 9.99422872912594e-06, "loss": 0.4114, "step": 5595 }, { "epoch": 0.5689304595363969, "grad_norm": 0.41733062267303467, "learning_rate": 9.994211670282082e-06, "loss": 0.4074, "step": 5596 }, { "epoch": 0.5690321268808459, "grad_norm": 0.37015730142593384, "learning_rate": 9.994194586278571e-06, "loss": 0.4071, "step": 5597 }, { "epoch": 0.5691337942252949, "grad_norm": 0.4020172357559204, "learning_rate": 9.994177477115496e-06, "loss": 0.4074, "step": 5598 }, { "epoch": 0.5692354615697438, "grad_norm": 0.3868664801120758, "learning_rate": 9.994160342792942e-06, "loss": 0.4213, "step": 5599 }, { "epoch": 0.5693371289141927, "grad_norm": 0.3404466509819031, "learning_rate": 9.994143183310993e-06, "loss": 0.3812, "step": 5600 }, { "epoch": 0.5694387962586417, "grad_norm": 0.3887292742729187, "learning_rate": 9.994125998669739e-06, "loss": 0.392, "step": 5601 }, { "epoch": 0.5695404636030907, "grad_norm": 0.3291984796524048, "learning_rate": 9.994108788869263e-06, "loss": 0.3613, "step": 5602 }, { "epoch": 0.5696421309475397, "grad_norm": 0.3605671525001526, "learning_rate": 9.994091553909656e-06, "loss": 0.4161, "step": 5603 }, { "epoch": 0.5697437982919886, "grad_norm": 0.40045681595802307, "learning_rate": 9.994074293791001e-06, "loss": 0.3759, "step": 5604 }, { "epoch": 0.5698454656364376, "grad_norm": 0.3748580515384674, "learning_rate": 9.994057008513386e-06, "loss": 0.3846, "step": 5605 }, { "epoch": 0.5699471329808865, "grad_norm": 0.3360591232776642, "learning_rate": 9.9940396980769e-06, "loss": 0.3838, "step": 5606 }, { "epoch": 0.5700488003253354, "grad_norm": 0.38540884852409363, "learning_rate": 9.99402236248163e-06, "loss": 0.377, "step": 5607 }, { "epoch": 0.5701504676697845, "grad_norm": 0.40655216574668884, "learning_rate": 9.99400500172766e-06, "loss": 0.3939, "step": 5608 }, { "epoch": 0.5702521350142334, "grad_norm": 0.36593976616859436, "learning_rate": 9.99398761581508e-06, "loss": 0.3845, "step": 5609 }, { "epoch": 0.5703538023586824, "grad_norm": 0.3763894736766815, "learning_rate": 9.993970204743976e-06, "loss": 0.3737, "step": 5610 }, { "epoch": 0.5704554697031313, "grad_norm": 0.37360841035842896, "learning_rate": 9.99395276851444e-06, "loss": 0.3821, "step": 5611 }, { "epoch": 0.5705571370475803, "grad_norm": 0.38948115706443787, "learning_rate": 9.993935307126554e-06, "loss": 0.3886, "step": 5612 }, { "epoch": 0.5706588043920293, "grad_norm": 0.3786278963088989, "learning_rate": 9.99391782058041e-06, "loss": 0.3877, "step": 5613 }, { "epoch": 0.5707604717364783, "grad_norm": 0.38866323232650757, "learning_rate": 9.993900308876095e-06, "loss": 0.3799, "step": 5614 }, { "epoch": 0.5708621390809272, "grad_norm": 0.3834127187728882, "learning_rate": 9.993882772013694e-06, "loss": 0.3951, "step": 5615 }, { "epoch": 0.5709638064253761, "grad_norm": 0.3766734302043915, "learning_rate": 9.993865209993302e-06, "loss": 0.4068, "step": 5616 }, { "epoch": 0.5710654737698251, "grad_norm": 0.40008723735809326, "learning_rate": 9.993847622815002e-06, "loss": 0.4007, "step": 5617 }, { "epoch": 0.5711671411142741, "grad_norm": 0.412466436624527, "learning_rate": 9.993830010478885e-06, "loss": 0.3964, "step": 5618 }, { "epoch": 0.5712688084587231, "grad_norm": 0.37082627415657043, "learning_rate": 9.993812372985038e-06, "loss": 0.3959, "step": 5619 }, { "epoch": 0.571370475803172, "grad_norm": 0.4060281217098236, "learning_rate": 9.993794710333552e-06, "loss": 0.3683, "step": 5620 }, { "epoch": 0.571472143147621, "grad_norm": 0.3905428349971771, "learning_rate": 9.993777022524515e-06, "loss": 0.4186, "step": 5621 }, { "epoch": 0.5715738104920699, "grad_norm": 0.3770306706428528, "learning_rate": 9.993759309558015e-06, "loss": 0.3635, "step": 5622 }, { "epoch": 0.571675477836519, "grad_norm": 0.3828560709953308, "learning_rate": 9.993741571434143e-06, "loss": 0.351, "step": 5623 }, { "epoch": 0.5717771451809679, "grad_norm": 0.42724406719207764, "learning_rate": 9.993723808152988e-06, "loss": 0.4137, "step": 5624 }, { "epoch": 0.5718788125254168, "grad_norm": 0.37469929456710815, "learning_rate": 9.99370601971464e-06, "loss": 0.4313, "step": 5625 }, { "epoch": 0.5719804798698658, "grad_norm": 0.4350302517414093, "learning_rate": 9.993688206119185e-06, "loss": 0.4107, "step": 5626 }, { "epoch": 0.5720821472143147, "grad_norm": 0.4310610294342041, "learning_rate": 9.993670367366717e-06, "loss": 0.3992, "step": 5627 }, { "epoch": 0.5721838145587638, "grad_norm": 0.34302371740341187, "learning_rate": 9.993652503457325e-06, "loss": 0.4332, "step": 5628 }, { "epoch": 0.5722854819032127, "grad_norm": 0.43193569779396057, "learning_rate": 9.993634614391098e-06, "loss": 0.3964, "step": 5629 }, { "epoch": 0.5723871492476617, "grad_norm": 0.4453292191028595, "learning_rate": 9.993616700168128e-06, "loss": 0.4185, "step": 5630 }, { "epoch": 0.5724888165921106, "grad_norm": 0.3807874321937561, "learning_rate": 9.993598760788501e-06, "loss": 0.3897, "step": 5631 }, { "epoch": 0.5725904839365595, "grad_norm": 0.4177320897579193, "learning_rate": 9.993580796252311e-06, "loss": 0.4191, "step": 5632 }, { "epoch": 0.5726921512810086, "grad_norm": 0.4132241904735565, "learning_rate": 9.993562806559648e-06, "loss": 0.3941, "step": 5633 }, { "epoch": 0.5727938186254575, "grad_norm": 0.3980855643749237, "learning_rate": 9.9935447917106e-06, "loss": 0.4317, "step": 5634 }, { "epoch": 0.5728954859699065, "grad_norm": 0.4068875312805176, "learning_rate": 9.993526751705264e-06, "loss": 0.4006, "step": 5635 }, { "epoch": 0.5729971533143554, "grad_norm": 0.36590439081192017, "learning_rate": 9.993508686543724e-06, "loss": 0.3872, "step": 5636 }, { "epoch": 0.5730988206588044, "grad_norm": 0.40124282240867615, "learning_rate": 9.993490596226076e-06, "loss": 0.4035, "step": 5637 }, { "epoch": 0.5732004880032534, "grad_norm": 0.36378908157348633, "learning_rate": 9.993472480752406e-06, "loss": 0.3902, "step": 5638 }, { "epoch": 0.5733021553477023, "grad_norm": 0.3523600995540619, "learning_rate": 9.993454340122811e-06, "loss": 0.398, "step": 5639 }, { "epoch": 0.5734038226921513, "grad_norm": 0.43578478693962097, "learning_rate": 9.993436174337378e-06, "loss": 0.4053, "step": 5640 }, { "epoch": 0.5735054900366002, "grad_norm": 0.42721012234687805, "learning_rate": 9.9934179833962e-06, "loss": 0.3923, "step": 5641 }, { "epoch": 0.5736071573810492, "grad_norm": 0.3673732876777649, "learning_rate": 9.993399767299372e-06, "loss": 0.3792, "step": 5642 }, { "epoch": 0.5737088247254982, "grad_norm": 0.37800225615501404, "learning_rate": 9.993381526046979e-06, "loss": 0.3939, "step": 5643 }, { "epoch": 0.5738104920699472, "grad_norm": 0.4070872664451599, "learning_rate": 9.993363259639119e-06, "loss": 0.4091, "step": 5644 }, { "epoch": 0.5739121594143961, "grad_norm": 0.40372708439826965, "learning_rate": 9.99334496807588e-06, "loss": 0.3932, "step": 5645 }, { "epoch": 0.574013826758845, "grad_norm": 0.4521203935146332, "learning_rate": 9.993326651357356e-06, "loss": 0.4078, "step": 5646 }, { "epoch": 0.574115494103294, "grad_norm": 0.37606239318847656, "learning_rate": 9.993308309483638e-06, "loss": 0.3996, "step": 5647 }, { "epoch": 0.5742171614477429, "grad_norm": 0.35933253169059753, "learning_rate": 9.99328994245482e-06, "loss": 0.3805, "step": 5648 }, { "epoch": 0.574318828792192, "grad_norm": 0.4013550281524658, "learning_rate": 9.993271550270994e-06, "loss": 0.381, "step": 5649 }, { "epoch": 0.5744204961366409, "grad_norm": 0.3721393048763275, "learning_rate": 9.993253132932252e-06, "loss": 0.3968, "step": 5650 }, { "epoch": 0.5745221634810899, "grad_norm": 0.3668087124824524, "learning_rate": 9.993234690438689e-06, "loss": 0.4025, "step": 5651 }, { "epoch": 0.5746238308255388, "grad_norm": 0.362993061542511, "learning_rate": 9.993216222790395e-06, "loss": 0.4148, "step": 5652 }, { "epoch": 0.5747254981699877, "grad_norm": 0.3658861517906189, "learning_rate": 9.993197729987464e-06, "loss": 0.3758, "step": 5653 }, { "epoch": 0.5748271655144368, "grad_norm": 0.39443010091781616, "learning_rate": 9.993179212029989e-06, "loss": 0.4243, "step": 5654 }, { "epoch": 0.5749288328588857, "grad_norm": 0.4405232071876526, "learning_rate": 9.993160668918065e-06, "loss": 0.4067, "step": 5655 }, { "epoch": 0.5750305002033347, "grad_norm": 0.37618014216423035, "learning_rate": 9.993142100651784e-06, "loss": 0.3983, "step": 5656 }, { "epoch": 0.5751321675477836, "grad_norm": 0.35076645016670227, "learning_rate": 9.993123507231239e-06, "loss": 0.4013, "step": 5657 }, { "epoch": 0.5752338348922326, "grad_norm": 0.4354308545589447, "learning_rate": 9.993104888656524e-06, "loss": 0.4147, "step": 5658 }, { "epoch": 0.5753355022366816, "grad_norm": 0.3974509835243225, "learning_rate": 9.993086244927735e-06, "loss": 0.4109, "step": 5659 }, { "epoch": 0.5754371695811306, "grad_norm": 0.39917436242103577, "learning_rate": 9.993067576044962e-06, "loss": 0.433, "step": 5660 }, { "epoch": 0.5755388369255795, "grad_norm": 0.37302079796791077, "learning_rate": 9.9930488820083e-06, "loss": 0.4085, "step": 5661 }, { "epoch": 0.5756405042700284, "grad_norm": 0.3981251120567322, "learning_rate": 9.993030162817847e-06, "loss": 0.4481, "step": 5662 }, { "epoch": 0.5757421716144774, "grad_norm": 0.37108030915260315, "learning_rate": 9.993011418473694e-06, "loss": 0.3775, "step": 5663 }, { "epoch": 0.5758438389589264, "grad_norm": 0.40554550290107727, "learning_rate": 9.992992648975935e-06, "loss": 0.4527, "step": 5664 }, { "epoch": 0.5759455063033754, "grad_norm": 0.40674668550491333, "learning_rate": 9.992973854324666e-06, "loss": 0.4544, "step": 5665 }, { "epoch": 0.5760471736478243, "grad_norm": 0.34069859981536865, "learning_rate": 9.992955034519982e-06, "loss": 0.3903, "step": 5666 }, { "epoch": 0.5761488409922733, "grad_norm": 0.3794933259487152, "learning_rate": 9.992936189561976e-06, "loss": 0.4, "step": 5667 }, { "epoch": 0.5762505083367222, "grad_norm": 0.419024795293808, "learning_rate": 9.992917319450745e-06, "loss": 0.4361, "step": 5668 }, { "epoch": 0.5763521756811713, "grad_norm": 0.3770343065261841, "learning_rate": 9.992898424186382e-06, "loss": 0.4033, "step": 5669 }, { "epoch": 0.5764538430256202, "grad_norm": 0.38748496770858765, "learning_rate": 9.992879503768984e-06, "loss": 0.4025, "step": 5670 }, { "epoch": 0.5765555103700691, "grad_norm": 0.37975168228149414, "learning_rate": 9.992860558198645e-06, "loss": 0.4193, "step": 5671 }, { "epoch": 0.5766571777145181, "grad_norm": 0.3825085163116455, "learning_rate": 9.992841587475461e-06, "loss": 0.4033, "step": 5672 }, { "epoch": 0.576758845058967, "grad_norm": 0.33920687437057495, "learning_rate": 9.992822591599527e-06, "loss": 0.3843, "step": 5673 }, { "epoch": 0.5768605124034161, "grad_norm": 0.36620327830314636, "learning_rate": 9.99280357057094e-06, "loss": 0.4188, "step": 5674 }, { "epoch": 0.576962179747865, "grad_norm": 0.3663368225097656, "learning_rate": 9.992784524389797e-06, "loss": 0.3934, "step": 5675 }, { "epoch": 0.577063847092314, "grad_norm": 0.359037846326828, "learning_rate": 9.992765453056188e-06, "loss": 0.3952, "step": 5676 }, { "epoch": 0.5771655144367629, "grad_norm": 0.37747183442115784, "learning_rate": 9.992746356570216e-06, "loss": 0.3693, "step": 5677 }, { "epoch": 0.5772671817812118, "grad_norm": 0.3787693977355957, "learning_rate": 9.992727234931973e-06, "loss": 0.4219, "step": 5678 }, { "epoch": 0.5773688491256609, "grad_norm": 0.4046002924442291, "learning_rate": 9.992708088141558e-06, "loss": 0.4052, "step": 5679 }, { "epoch": 0.5774705164701098, "grad_norm": 0.3635280430316925, "learning_rate": 9.992688916199065e-06, "loss": 0.3982, "step": 5680 }, { "epoch": 0.5775721838145588, "grad_norm": 0.36803868412971497, "learning_rate": 9.992669719104592e-06, "loss": 0.4337, "step": 5681 }, { "epoch": 0.5776738511590077, "grad_norm": 0.3757437467575073, "learning_rate": 9.992650496858234e-06, "loss": 0.4101, "step": 5682 }, { "epoch": 0.5777755185034567, "grad_norm": 0.34999769926071167, "learning_rate": 9.992631249460091e-06, "loss": 0.3928, "step": 5683 }, { "epoch": 0.5778771858479057, "grad_norm": 0.39365386962890625, "learning_rate": 9.992611976910257e-06, "loss": 0.4162, "step": 5684 }, { "epoch": 0.5779788531923546, "grad_norm": 0.4044620394706726, "learning_rate": 9.992592679208832e-06, "loss": 0.3859, "step": 5685 }, { "epoch": 0.5780805205368036, "grad_norm": 0.3540330231189728, "learning_rate": 9.99257335635591e-06, "loss": 0.4278, "step": 5686 }, { "epoch": 0.5781821878812525, "grad_norm": 0.37244167923927307, "learning_rate": 9.992554008351591e-06, "loss": 0.3721, "step": 5687 }, { "epoch": 0.5782838552257015, "grad_norm": 0.4023120701313019, "learning_rate": 9.992534635195971e-06, "loss": 0.4092, "step": 5688 }, { "epoch": 0.5783855225701505, "grad_norm": 0.39379313588142395, "learning_rate": 9.992515236889148e-06, "loss": 0.3888, "step": 5689 }, { "epoch": 0.5784871899145995, "grad_norm": 0.37224775552749634, "learning_rate": 9.992495813431219e-06, "loss": 0.4036, "step": 5690 }, { "epoch": 0.5785888572590484, "grad_norm": 0.3678138852119446, "learning_rate": 9.992476364822283e-06, "loss": 0.3891, "step": 5691 }, { "epoch": 0.5786905246034973, "grad_norm": 0.40043866634368896, "learning_rate": 9.992456891062436e-06, "loss": 0.4002, "step": 5692 }, { "epoch": 0.5787921919479463, "grad_norm": 0.3640805184841156, "learning_rate": 9.99243739215178e-06, "loss": 0.3893, "step": 5693 }, { "epoch": 0.5788938592923952, "grad_norm": 0.36462587118148804, "learning_rate": 9.99241786809041e-06, "loss": 0.3839, "step": 5694 }, { "epoch": 0.5789955266368443, "grad_norm": 0.3845444321632385, "learning_rate": 9.992398318878425e-06, "loss": 0.4127, "step": 5695 }, { "epoch": 0.5790971939812932, "grad_norm": 0.3982342481613159, "learning_rate": 9.992378744515922e-06, "loss": 0.3946, "step": 5696 }, { "epoch": 0.5791988613257422, "grad_norm": 0.38962242007255554, "learning_rate": 9.992359145003003e-06, "loss": 0.4202, "step": 5697 }, { "epoch": 0.5793005286701911, "grad_norm": 0.3826267421245575, "learning_rate": 9.992339520339764e-06, "loss": 0.3947, "step": 5698 }, { "epoch": 0.57940219601464, "grad_norm": 0.3984701335430145, "learning_rate": 9.992319870526307e-06, "loss": 0.392, "step": 5699 }, { "epoch": 0.5795038633590891, "grad_norm": 0.371210515499115, "learning_rate": 9.992300195562727e-06, "loss": 0.3696, "step": 5700 }, { "epoch": 0.579605530703538, "grad_norm": 0.35206061601638794, "learning_rate": 9.992280495449125e-06, "loss": 0.3844, "step": 5701 }, { "epoch": 0.579707198047987, "grad_norm": 0.3985733985900879, "learning_rate": 9.992260770185602e-06, "loss": 0.3985, "step": 5702 }, { "epoch": 0.5798088653924359, "grad_norm": 0.3785736560821533, "learning_rate": 9.992241019772252e-06, "loss": 0.4072, "step": 5703 }, { "epoch": 0.5799105327368849, "grad_norm": 0.35635945200920105, "learning_rate": 9.992221244209182e-06, "loss": 0.3871, "step": 5704 }, { "epoch": 0.5800122000813339, "grad_norm": 0.35163846611976624, "learning_rate": 9.992201443496485e-06, "loss": 0.4197, "step": 5705 }, { "epoch": 0.5801138674257829, "grad_norm": 0.38548511266708374, "learning_rate": 9.992181617634264e-06, "loss": 0.388, "step": 5706 }, { "epoch": 0.5802155347702318, "grad_norm": 0.35102370381355286, "learning_rate": 9.99216176662262e-06, "loss": 0.3596, "step": 5707 }, { "epoch": 0.5803172021146807, "grad_norm": 0.37255313992500305, "learning_rate": 9.99214189046165e-06, "loss": 0.3989, "step": 5708 }, { "epoch": 0.5804188694591297, "grad_norm": 0.36981528997421265, "learning_rate": 9.992121989151455e-06, "loss": 0.4287, "step": 5709 }, { "epoch": 0.5805205368035787, "grad_norm": 0.3764995336532593, "learning_rate": 9.992102062692137e-06, "loss": 0.4285, "step": 5710 }, { "epoch": 0.5806222041480277, "grad_norm": 0.36785009503364563, "learning_rate": 9.992082111083793e-06, "loss": 0.3768, "step": 5711 }, { "epoch": 0.5807238714924766, "grad_norm": 0.3835914134979248, "learning_rate": 9.992062134326528e-06, "loss": 0.4117, "step": 5712 }, { "epoch": 0.5808255388369256, "grad_norm": 0.3634195923805237, "learning_rate": 9.992042132420439e-06, "loss": 0.3976, "step": 5713 }, { "epoch": 0.5809272061813745, "grad_norm": 0.36291956901550293, "learning_rate": 9.992022105365629e-06, "loss": 0.3917, "step": 5714 }, { "epoch": 0.5810288735258236, "grad_norm": 0.3850342929363251, "learning_rate": 9.992002053162197e-06, "loss": 0.439, "step": 5715 }, { "epoch": 0.5811305408702725, "grad_norm": 0.3328167200088501, "learning_rate": 9.991981975810246e-06, "loss": 0.3798, "step": 5716 }, { "epoch": 0.5812322082147214, "grad_norm": 0.3540910482406616, "learning_rate": 9.991961873309875e-06, "loss": 0.4089, "step": 5717 }, { "epoch": 0.5813338755591704, "grad_norm": 0.3464222550392151, "learning_rate": 9.991941745661186e-06, "loss": 0.3924, "step": 5718 }, { "epoch": 0.5814355429036193, "grad_norm": 0.36709633469581604, "learning_rate": 9.991921592864281e-06, "loss": 0.4025, "step": 5719 }, { "epoch": 0.5815372102480684, "grad_norm": 0.3423864543437958, "learning_rate": 9.991901414919261e-06, "loss": 0.4111, "step": 5720 }, { "epoch": 0.5816388775925173, "grad_norm": 0.3755190372467041, "learning_rate": 9.99188121182623e-06, "loss": 0.4013, "step": 5721 }, { "epoch": 0.5817405449369663, "grad_norm": 0.36157041788101196, "learning_rate": 9.991860983585286e-06, "loss": 0.4173, "step": 5722 }, { "epoch": 0.5818422122814152, "grad_norm": 0.37190839648246765, "learning_rate": 9.991840730196532e-06, "loss": 0.4042, "step": 5723 }, { "epoch": 0.5819438796258641, "grad_norm": 0.3540525734424591, "learning_rate": 9.991820451660073e-06, "loss": 0.4035, "step": 5724 }, { "epoch": 0.5820455469703132, "grad_norm": 0.3833170235157013, "learning_rate": 9.991800147976007e-06, "loss": 0.392, "step": 5725 }, { "epoch": 0.5821472143147621, "grad_norm": 0.3932756185531616, "learning_rate": 9.991779819144439e-06, "loss": 0.4064, "step": 5726 }, { "epoch": 0.5822488816592111, "grad_norm": 0.3858078718185425, "learning_rate": 9.99175946516547e-06, "loss": 0.4444, "step": 5727 }, { "epoch": 0.58235054900366, "grad_norm": 0.3480064868927002, "learning_rate": 9.9917390860392e-06, "loss": 0.4194, "step": 5728 }, { "epoch": 0.582452216348109, "grad_norm": 0.4592145085334778, "learning_rate": 9.991718681765739e-06, "loss": 0.4018, "step": 5729 }, { "epoch": 0.582553883692558, "grad_norm": 0.3755744695663452, "learning_rate": 9.991698252345185e-06, "loss": 0.414, "step": 5730 }, { "epoch": 0.582655551037007, "grad_norm": 0.38143691420555115, "learning_rate": 9.991677797777641e-06, "loss": 0.3921, "step": 5731 }, { "epoch": 0.5827572183814559, "grad_norm": 0.3976876735687256, "learning_rate": 9.99165731806321e-06, "loss": 0.3996, "step": 5732 }, { "epoch": 0.5828588857259048, "grad_norm": 0.4184112548828125, "learning_rate": 9.991636813201995e-06, "loss": 0.4224, "step": 5733 }, { "epoch": 0.5829605530703538, "grad_norm": 0.3618507385253906, "learning_rate": 9.9916162831941e-06, "loss": 0.3809, "step": 5734 }, { "epoch": 0.5830622204148027, "grad_norm": 0.367552250623703, "learning_rate": 9.991595728039628e-06, "loss": 0.4042, "step": 5735 }, { "epoch": 0.5831638877592518, "grad_norm": 0.41302070021629333, "learning_rate": 9.991575147738686e-06, "loss": 0.3745, "step": 5736 }, { "epoch": 0.5832655551037007, "grad_norm": 0.3807887136936188, "learning_rate": 9.991554542291372e-06, "loss": 0.4218, "step": 5737 }, { "epoch": 0.5833672224481496, "grad_norm": 0.36026424169540405, "learning_rate": 9.991533911697793e-06, "loss": 0.3913, "step": 5738 }, { "epoch": 0.5834688897925986, "grad_norm": 0.3700700104236603, "learning_rate": 9.991513255958052e-06, "loss": 0.3961, "step": 5739 }, { "epoch": 0.5835705571370475, "grad_norm": 0.3496048152446747, "learning_rate": 9.991492575072253e-06, "loss": 0.3652, "step": 5740 }, { "epoch": 0.5836722244814966, "grad_norm": 0.3650016486644745, "learning_rate": 9.991471869040501e-06, "loss": 0.4115, "step": 5741 }, { "epoch": 0.5837738918259455, "grad_norm": 0.3692920506000519, "learning_rate": 9.9914511378629e-06, "loss": 0.3801, "step": 5742 }, { "epoch": 0.5838755591703945, "grad_norm": 0.35310664772987366, "learning_rate": 9.991430381539555e-06, "loss": 0.4192, "step": 5743 }, { "epoch": 0.5839772265148434, "grad_norm": 0.34339794516563416, "learning_rate": 9.991409600070568e-06, "loss": 0.3885, "step": 5744 }, { "epoch": 0.5840788938592923, "grad_norm": 0.3541375398635864, "learning_rate": 9.991388793456048e-06, "loss": 0.3873, "step": 5745 }, { "epoch": 0.5841805612037414, "grad_norm": 0.40173304080963135, "learning_rate": 9.991367961696096e-06, "loss": 0.4276, "step": 5746 }, { "epoch": 0.5842822285481903, "grad_norm": 0.3762510418891907, "learning_rate": 9.991347104790819e-06, "loss": 0.3928, "step": 5747 }, { "epoch": 0.5843838958926393, "grad_norm": 0.388578325510025, "learning_rate": 9.99132622274032e-06, "loss": 0.4482, "step": 5748 }, { "epoch": 0.5844855632370882, "grad_norm": 0.36667823791503906, "learning_rate": 9.991305315544705e-06, "loss": 0.3865, "step": 5749 }, { "epoch": 0.5845872305815372, "grad_norm": 0.41648000478744507, "learning_rate": 9.991284383204083e-06, "loss": 0.4126, "step": 5750 }, { "epoch": 0.5846888979259862, "grad_norm": 0.3893551826477051, "learning_rate": 9.991263425718555e-06, "loss": 0.3469, "step": 5751 }, { "epoch": 0.5847905652704352, "grad_norm": 0.3969678580760956, "learning_rate": 9.991242443088227e-06, "loss": 0.4074, "step": 5752 }, { "epoch": 0.5848922326148841, "grad_norm": 0.34209805727005005, "learning_rate": 9.991221435313207e-06, "loss": 0.3864, "step": 5753 }, { "epoch": 0.584993899959333, "grad_norm": 0.400407075881958, "learning_rate": 9.9912004023936e-06, "loss": 0.4133, "step": 5754 }, { "epoch": 0.585095567303782, "grad_norm": 0.3566748797893524, "learning_rate": 9.991179344329509e-06, "loss": 0.4481, "step": 5755 }, { "epoch": 0.585197234648231, "grad_norm": 0.353840708732605, "learning_rate": 9.991158261121044e-06, "loss": 0.3797, "step": 5756 }, { "epoch": 0.58529890199268, "grad_norm": 0.3726671040058136, "learning_rate": 9.99113715276831e-06, "loss": 0.3924, "step": 5757 }, { "epoch": 0.5854005693371289, "grad_norm": 0.3601699471473694, "learning_rate": 9.991116019271414e-06, "loss": 0.4265, "step": 5758 }, { "epoch": 0.5855022366815779, "grad_norm": 0.3577135503292084, "learning_rate": 9.991094860630459e-06, "loss": 0.4059, "step": 5759 }, { "epoch": 0.5856039040260268, "grad_norm": 0.3924664855003357, "learning_rate": 9.991073676845555e-06, "loss": 0.4019, "step": 5760 }, { "epoch": 0.5857055713704759, "grad_norm": 0.37554970383644104, "learning_rate": 9.99105246791681e-06, "loss": 0.4189, "step": 5761 }, { "epoch": 0.5858072387149248, "grad_norm": 0.35464099049568176, "learning_rate": 9.991031233844327e-06, "loss": 0.4001, "step": 5762 }, { "epoch": 0.5859089060593737, "grad_norm": 0.37912237644195557, "learning_rate": 9.991009974628214e-06, "loss": 0.3959, "step": 5763 }, { "epoch": 0.5860105734038227, "grad_norm": 0.39311352372169495, "learning_rate": 9.99098869026858e-06, "loss": 0.4018, "step": 5764 }, { "epoch": 0.5861122407482716, "grad_norm": 0.3599385619163513, "learning_rate": 9.990967380765531e-06, "loss": 0.4263, "step": 5765 }, { "epoch": 0.5862139080927207, "grad_norm": 0.35321274399757385, "learning_rate": 9.990946046119174e-06, "loss": 0.3965, "step": 5766 }, { "epoch": 0.5863155754371696, "grad_norm": 0.3493109345436096, "learning_rate": 9.990924686329616e-06, "loss": 0.3941, "step": 5767 }, { "epoch": 0.5864172427816186, "grad_norm": 0.3818613290786743, "learning_rate": 9.990903301396968e-06, "loss": 0.3888, "step": 5768 }, { "epoch": 0.5865189101260675, "grad_norm": 0.32735347747802734, "learning_rate": 9.990881891321333e-06, "loss": 0.4076, "step": 5769 }, { "epoch": 0.5866205774705164, "grad_norm": 0.35623472929000854, "learning_rate": 9.990860456102822e-06, "loss": 0.4054, "step": 5770 }, { "epoch": 0.5867222448149655, "grad_norm": 0.40734556317329407, "learning_rate": 9.990838995741541e-06, "loss": 0.4182, "step": 5771 }, { "epoch": 0.5868239121594144, "grad_norm": 0.385586142539978, "learning_rate": 9.9908175102376e-06, "loss": 0.4259, "step": 5772 }, { "epoch": 0.5869255795038634, "grad_norm": 0.3538523316383362, "learning_rate": 9.990795999591107e-06, "loss": 0.4114, "step": 5773 }, { "epoch": 0.5870272468483123, "grad_norm": 0.4367408752441406, "learning_rate": 9.990774463802169e-06, "loss": 0.4048, "step": 5774 }, { "epoch": 0.5871289141927613, "grad_norm": 0.379515141248703, "learning_rate": 9.990752902870895e-06, "loss": 0.3922, "step": 5775 }, { "epoch": 0.5872305815372102, "grad_norm": 0.33551469445228577, "learning_rate": 9.990731316797392e-06, "loss": 0.4158, "step": 5776 }, { "epoch": 0.5873322488816592, "grad_norm": 0.391812264919281, "learning_rate": 9.990709705581774e-06, "loss": 0.4367, "step": 5777 }, { "epoch": 0.5874339162261082, "grad_norm": 0.36452481150627136, "learning_rate": 9.990688069224147e-06, "loss": 0.4514, "step": 5778 }, { "epoch": 0.5875355835705571, "grad_norm": 0.421895831823349, "learning_rate": 9.990666407724616e-06, "loss": 0.4085, "step": 5779 }, { "epoch": 0.5876372509150061, "grad_norm": 0.3713633418083191, "learning_rate": 9.990644721083296e-06, "loss": 0.4373, "step": 5780 }, { "epoch": 0.587738918259455, "grad_norm": 0.366200715303421, "learning_rate": 9.990623009300291e-06, "loss": 0.3931, "step": 5781 }, { "epoch": 0.5878405856039041, "grad_norm": 0.4189659655094147, "learning_rate": 9.990601272375718e-06, "loss": 0.3919, "step": 5782 }, { "epoch": 0.587942252948353, "grad_norm": 0.3460742235183716, "learning_rate": 9.990579510309676e-06, "loss": 0.3995, "step": 5783 }, { "epoch": 0.588043920292802, "grad_norm": 0.3712138235569, "learning_rate": 9.990557723102286e-06, "loss": 0.3828, "step": 5784 }, { "epoch": 0.5881455876372509, "grad_norm": 0.3269917666912079, "learning_rate": 9.99053591075365e-06, "loss": 0.38, "step": 5785 }, { "epoch": 0.5882472549816998, "grad_norm": 0.393940806388855, "learning_rate": 9.990514073263878e-06, "loss": 0.3848, "step": 5786 }, { "epoch": 0.5883489223261489, "grad_norm": 0.3572569191455841, "learning_rate": 9.990492210633084e-06, "loss": 0.4224, "step": 5787 }, { "epoch": 0.5884505896705978, "grad_norm": 0.3703691065311432, "learning_rate": 9.990470322861375e-06, "loss": 0.4352, "step": 5788 }, { "epoch": 0.5885522570150468, "grad_norm": 0.36338046193122864, "learning_rate": 9.990448409948862e-06, "loss": 0.4216, "step": 5789 }, { "epoch": 0.5886539243594957, "grad_norm": 0.3935108482837677, "learning_rate": 9.990426471895658e-06, "loss": 0.426, "step": 5790 }, { "epoch": 0.5887555917039446, "grad_norm": 0.3861265480518341, "learning_rate": 9.99040450870187e-06, "loss": 0.3942, "step": 5791 }, { "epoch": 0.5888572590483937, "grad_norm": 0.33506855368614197, "learning_rate": 9.990382520367613e-06, "loss": 0.4008, "step": 5792 }, { "epoch": 0.5889589263928426, "grad_norm": 0.4152861535549164, "learning_rate": 9.99036050689299e-06, "loss": 0.4032, "step": 5793 }, { "epoch": 0.5890605937372916, "grad_norm": 0.3900994658470154, "learning_rate": 9.990338468278118e-06, "loss": 0.3845, "step": 5794 }, { "epoch": 0.5891622610817405, "grad_norm": 0.33031702041625977, "learning_rate": 9.990316404523109e-06, "loss": 0.4175, "step": 5795 }, { "epoch": 0.5892639284261895, "grad_norm": 0.4106873571872711, "learning_rate": 9.99029431562807e-06, "loss": 0.4093, "step": 5796 }, { "epoch": 0.5893655957706385, "grad_norm": 0.41300278902053833, "learning_rate": 9.990272201593115e-06, "loss": 0.4103, "step": 5797 }, { "epoch": 0.5894672631150875, "grad_norm": 0.36148279905319214, "learning_rate": 9.990250062418352e-06, "loss": 0.3931, "step": 5798 }, { "epoch": 0.5895689304595364, "grad_norm": 0.39838531613349915, "learning_rate": 9.990227898103898e-06, "loss": 0.4165, "step": 5799 }, { "epoch": 0.5896705978039853, "grad_norm": 0.5251750946044922, "learning_rate": 9.990205708649861e-06, "loss": 0.4055, "step": 5800 }, { "epoch": 0.5897722651484343, "grad_norm": 0.39142119884490967, "learning_rate": 9.990183494056352e-06, "loss": 0.4098, "step": 5801 }, { "epoch": 0.5898739324928833, "grad_norm": 0.3518460988998413, "learning_rate": 9.990161254323487e-06, "loss": 0.3792, "step": 5802 }, { "epoch": 0.5899755998373323, "grad_norm": 0.4144924283027649, "learning_rate": 9.990138989451373e-06, "loss": 0.3833, "step": 5803 }, { "epoch": 0.5900772671817812, "grad_norm": 0.3903249502182007, "learning_rate": 9.990116699440126e-06, "loss": 0.3971, "step": 5804 }, { "epoch": 0.5901789345262302, "grad_norm": 0.37379685044288635, "learning_rate": 9.990094384289857e-06, "loss": 0.4007, "step": 5805 }, { "epoch": 0.5902806018706791, "grad_norm": 0.3766538202762604, "learning_rate": 9.990072044000678e-06, "loss": 0.3878, "step": 5806 }, { "epoch": 0.5903822692151282, "grad_norm": 0.3593254089355469, "learning_rate": 9.9900496785727e-06, "loss": 0.42, "step": 5807 }, { "epoch": 0.5904839365595771, "grad_norm": 0.3757516145706177, "learning_rate": 9.99002728800604e-06, "loss": 0.4044, "step": 5808 }, { "epoch": 0.590585603904026, "grad_norm": 0.3797999322414398, "learning_rate": 9.990004872300808e-06, "loss": 0.4257, "step": 5809 }, { "epoch": 0.590687271248475, "grad_norm": 0.4232180714607239, "learning_rate": 9.989982431457118e-06, "loss": 0.4266, "step": 5810 }, { "epoch": 0.5907889385929239, "grad_norm": 0.3527495265007019, "learning_rate": 9.989959965475081e-06, "loss": 0.4164, "step": 5811 }, { "epoch": 0.590890605937373, "grad_norm": 0.39816024899482727, "learning_rate": 9.989937474354809e-06, "loss": 0.4082, "step": 5812 }, { "epoch": 0.5909922732818219, "grad_norm": 0.39662966132164, "learning_rate": 9.989914958096421e-06, "loss": 0.4526, "step": 5813 }, { "epoch": 0.5910939406262709, "grad_norm": 0.3651044964790344, "learning_rate": 9.989892416700027e-06, "loss": 0.3983, "step": 5814 }, { "epoch": 0.5911956079707198, "grad_norm": 0.3709346354007721, "learning_rate": 9.98986985016574e-06, "loss": 0.4123, "step": 5815 }, { "epoch": 0.5912972753151687, "grad_norm": 0.36341217160224915, "learning_rate": 9.989847258493675e-06, "loss": 0.3967, "step": 5816 }, { "epoch": 0.5913989426596177, "grad_norm": 0.35047540068626404, "learning_rate": 9.989824641683946e-06, "loss": 0.3641, "step": 5817 }, { "epoch": 0.5915006100040667, "grad_norm": 0.3721065819263458, "learning_rate": 9.989801999736665e-06, "loss": 0.417, "step": 5818 }, { "epoch": 0.5916022773485157, "grad_norm": 0.3709726929664612, "learning_rate": 9.989779332651947e-06, "loss": 0.4148, "step": 5819 }, { "epoch": 0.5917039446929646, "grad_norm": 0.3614822030067444, "learning_rate": 9.989756640429909e-06, "loss": 0.3737, "step": 5820 }, { "epoch": 0.5918056120374136, "grad_norm": 0.3553166389465332, "learning_rate": 9.98973392307066e-06, "loss": 0.377, "step": 5821 }, { "epoch": 0.5919072793818625, "grad_norm": 0.3678581118583679, "learning_rate": 9.989711180574318e-06, "loss": 0.3968, "step": 5822 }, { "epoch": 0.5920089467263115, "grad_norm": 0.3353995978832245, "learning_rate": 9.989688412940997e-06, "loss": 0.4081, "step": 5823 }, { "epoch": 0.5921106140707605, "grad_norm": 0.3225756585597992, "learning_rate": 9.989665620170811e-06, "loss": 0.3677, "step": 5824 }, { "epoch": 0.5922122814152094, "grad_norm": 0.352171391248703, "learning_rate": 9.989642802263876e-06, "loss": 0.4035, "step": 5825 }, { "epoch": 0.5923139487596584, "grad_norm": 0.3545815944671631, "learning_rate": 9.989619959220307e-06, "loss": 0.415, "step": 5826 }, { "epoch": 0.5924156161041073, "grad_norm": 0.38199472427368164, "learning_rate": 9.989597091040218e-06, "loss": 0.3989, "step": 5827 }, { "epoch": 0.5925172834485564, "grad_norm": 0.3648289740085602, "learning_rate": 9.989574197723723e-06, "loss": 0.379, "step": 5828 }, { "epoch": 0.5926189507930053, "grad_norm": 0.37822526693344116, "learning_rate": 9.98955127927094e-06, "loss": 0.4281, "step": 5829 }, { "epoch": 0.5927206181374542, "grad_norm": 0.3684717118740082, "learning_rate": 9.989528335681984e-06, "loss": 0.4434, "step": 5830 }, { "epoch": 0.5928222854819032, "grad_norm": 0.37641608715057373, "learning_rate": 9.989505366956969e-06, "loss": 0.4071, "step": 5831 }, { "epoch": 0.5929239528263521, "grad_norm": 0.3816588521003723, "learning_rate": 9.989482373096011e-06, "loss": 0.4143, "step": 5832 }, { "epoch": 0.5930256201708012, "grad_norm": 0.3693827986717224, "learning_rate": 9.989459354099228e-06, "loss": 0.386, "step": 5833 }, { "epoch": 0.5931272875152501, "grad_norm": 0.3339409828186035, "learning_rate": 9.989436309966733e-06, "loss": 0.4207, "step": 5834 }, { "epoch": 0.5932289548596991, "grad_norm": 0.330124169588089, "learning_rate": 9.989413240698646e-06, "loss": 0.4187, "step": 5835 }, { "epoch": 0.593330622204148, "grad_norm": 0.37194275856018066, "learning_rate": 9.98939014629508e-06, "loss": 0.4355, "step": 5836 }, { "epoch": 0.593432289548597, "grad_norm": 0.3501317501068115, "learning_rate": 9.989367026756149e-06, "loss": 0.3879, "step": 5837 }, { "epoch": 0.593533956893046, "grad_norm": 0.359249472618103, "learning_rate": 9.989343882081975e-06, "loss": 0.386, "step": 5838 }, { "epoch": 0.5936356242374949, "grad_norm": 0.3688179850578308, "learning_rate": 9.989320712272673e-06, "loss": 0.3822, "step": 5839 }, { "epoch": 0.5937372915819439, "grad_norm": 0.3762000501155853, "learning_rate": 9.989297517328359e-06, "loss": 0.4024, "step": 5840 }, { "epoch": 0.5938389589263928, "grad_norm": 0.33827322721481323, "learning_rate": 9.989274297249147e-06, "loss": 0.368, "step": 5841 }, { "epoch": 0.5939406262708418, "grad_norm": 0.36448124051094055, "learning_rate": 9.989251052035159e-06, "loss": 0.4024, "step": 5842 }, { "epoch": 0.5940422936152908, "grad_norm": 0.37104615569114685, "learning_rate": 9.989227781686508e-06, "loss": 0.3657, "step": 5843 }, { "epoch": 0.5941439609597398, "grad_norm": 0.3238832652568817, "learning_rate": 9.989204486203314e-06, "loss": 0.3757, "step": 5844 }, { "epoch": 0.5942456283041887, "grad_norm": 0.3439924418926239, "learning_rate": 9.989181165585696e-06, "loss": 0.3792, "step": 5845 }, { "epoch": 0.5943472956486376, "grad_norm": 0.39078187942504883, "learning_rate": 9.989157819833765e-06, "loss": 0.4582, "step": 5846 }, { "epoch": 0.5944489629930866, "grad_norm": 0.3355739116668701, "learning_rate": 9.989134448947644e-06, "loss": 0.3895, "step": 5847 }, { "epoch": 0.5945506303375356, "grad_norm": 0.36419764161109924, "learning_rate": 9.989111052927449e-06, "loss": 0.4341, "step": 5848 }, { "epoch": 0.5946522976819846, "grad_norm": 0.4081875681877136, "learning_rate": 9.989087631773298e-06, "loss": 0.3951, "step": 5849 }, { "epoch": 0.5947539650264335, "grad_norm": 0.38757753372192383, "learning_rate": 9.989064185485312e-06, "loss": 0.4108, "step": 5850 }, { "epoch": 0.5948556323708825, "grad_norm": 0.3515257239341736, "learning_rate": 9.989040714063603e-06, "loss": 0.4199, "step": 5851 }, { "epoch": 0.5949572997153314, "grad_norm": 0.38297292590141296, "learning_rate": 9.989017217508291e-06, "loss": 0.3842, "step": 5852 }, { "epoch": 0.5950589670597805, "grad_norm": 0.37483319640159607, "learning_rate": 9.9889936958195e-06, "loss": 0.3912, "step": 5853 }, { "epoch": 0.5951606344042294, "grad_norm": 0.351775586605072, "learning_rate": 9.98897014899734e-06, "loss": 0.3801, "step": 5854 }, { "epoch": 0.5952623017486783, "grad_norm": 0.3712601959705353, "learning_rate": 9.988946577041938e-06, "loss": 0.3907, "step": 5855 }, { "epoch": 0.5953639690931273, "grad_norm": 0.3647216260433197, "learning_rate": 9.988922979953407e-06, "loss": 0.3988, "step": 5856 }, { "epoch": 0.5954656364375762, "grad_norm": 0.36489027738571167, "learning_rate": 9.988899357731867e-06, "loss": 0.3625, "step": 5857 }, { "epoch": 0.5955673037820252, "grad_norm": 0.3469138443470001, "learning_rate": 9.988875710377439e-06, "loss": 0.4111, "step": 5858 }, { "epoch": 0.5956689711264742, "grad_norm": 0.3726671040058136, "learning_rate": 9.988852037890239e-06, "loss": 0.4054, "step": 5859 }, { "epoch": 0.5957706384709232, "grad_norm": 0.3647293746471405, "learning_rate": 9.988828340270389e-06, "loss": 0.4184, "step": 5860 }, { "epoch": 0.5958723058153721, "grad_norm": 0.3440771698951721, "learning_rate": 9.988804617518009e-06, "loss": 0.3888, "step": 5861 }, { "epoch": 0.595973973159821, "grad_norm": 0.34695321321487427, "learning_rate": 9.988780869633215e-06, "loss": 0.4182, "step": 5862 }, { "epoch": 0.59607564050427, "grad_norm": 0.3582749366760254, "learning_rate": 9.988757096616128e-06, "loss": 0.3938, "step": 5863 }, { "epoch": 0.596177307848719, "grad_norm": 0.3283177316188812, "learning_rate": 9.988733298466869e-06, "loss": 0.4424, "step": 5864 }, { "epoch": 0.596278975193168, "grad_norm": 0.398613840341568, "learning_rate": 9.988709475185556e-06, "loss": 0.4199, "step": 5865 }, { "epoch": 0.5963806425376169, "grad_norm": 0.3650723397731781, "learning_rate": 9.988685626772313e-06, "loss": 0.4045, "step": 5866 }, { "epoch": 0.5964823098820659, "grad_norm": 0.37575697898864746, "learning_rate": 9.988661753227253e-06, "loss": 0.3755, "step": 5867 }, { "epoch": 0.5965839772265148, "grad_norm": 0.36546844244003296, "learning_rate": 9.988637854550504e-06, "loss": 0.4296, "step": 5868 }, { "epoch": 0.5966856445709638, "grad_norm": 0.3614453375339508, "learning_rate": 9.988613930742181e-06, "loss": 0.433, "step": 5869 }, { "epoch": 0.5967873119154128, "grad_norm": 0.34576377272605896, "learning_rate": 9.988589981802407e-06, "loss": 0.3933, "step": 5870 }, { "epoch": 0.5968889792598617, "grad_norm": 0.36033907532691956, "learning_rate": 9.988566007731303e-06, "loss": 0.3456, "step": 5871 }, { "epoch": 0.5969906466043107, "grad_norm": 0.3477466404438019, "learning_rate": 9.988542008528989e-06, "loss": 0.3972, "step": 5872 }, { "epoch": 0.5970923139487596, "grad_norm": 0.3765092194080353, "learning_rate": 9.988517984195585e-06, "loss": 0.4667, "step": 5873 }, { "epoch": 0.5971939812932087, "grad_norm": 0.3559654951095581, "learning_rate": 9.988493934731213e-06, "loss": 0.4146, "step": 5874 }, { "epoch": 0.5972956486376576, "grad_norm": 0.3934946656227112, "learning_rate": 9.988469860135996e-06, "loss": 0.4116, "step": 5875 }, { "epoch": 0.5973973159821065, "grad_norm": 0.38624197244644165, "learning_rate": 9.988445760410051e-06, "loss": 0.3988, "step": 5876 }, { "epoch": 0.5974989833265555, "grad_norm": 0.4241539537906647, "learning_rate": 9.988421635553503e-06, "loss": 0.381, "step": 5877 }, { "epoch": 0.5976006506710044, "grad_norm": 0.40731504559516907, "learning_rate": 9.98839748556647e-06, "loss": 0.4476, "step": 5878 }, { "epoch": 0.5977023180154535, "grad_norm": 0.3948383629322052, "learning_rate": 9.988373310449078e-06, "loss": 0.3607, "step": 5879 }, { "epoch": 0.5978039853599024, "grad_norm": 0.38978052139282227, "learning_rate": 9.988349110201446e-06, "loss": 0.4032, "step": 5880 }, { "epoch": 0.5979056527043514, "grad_norm": 0.35169318318367004, "learning_rate": 9.988324884823696e-06, "loss": 0.3579, "step": 5881 }, { "epoch": 0.5980073200488003, "grad_norm": 0.3752216696739197, "learning_rate": 9.988300634315951e-06, "loss": 0.4308, "step": 5882 }, { "epoch": 0.5981089873932492, "grad_norm": 0.42525333166122437, "learning_rate": 9.988276358678333e-06, "loss": 0.4202, "step": 5883 }, { "epoch": 0.5982106547376983, "grad_norm": 0.33930328488349915, "learning_rate": 9.988252057910966e-06, "loss": 0.4339, "step": 5884 }, { "epoch": 0.5983123220821472, "grad_norm": 0.438506543636322, "learning_rate": 9.988227732013968e-06, "loss": 0.4098, "step": 5885 }, { "epoch": 0.5984139894265962, "grad_norm": 0.3923192620277405, "learning_rate": 9.988203380987465e-06, "loss": 0.3682, "step": 5886 }, { "epoch": 0.5985156567710451, "grad_norm": 0.34947776794433594, "learning_rate": 9.988179004831577e-06, "loss": 0.3755, "step": 5887 }, { "epoch": 0.5986173241154941, "grad_norm": 0.360808789730072, "learning_rate": 9.988154603546431e-06, "loss": 0.3822, "step": 5888 }, { "epoch": 0.5987189914599431, "grad_norm": 0.34019291400909424, "learning_rate": 9.988130177132145e-06, "loss": 0.4038, "step": 5889 }, { "epoch": 0.5988206588043921, "grad_norm": 0.36655741930007935, "learning_rate": 9.988105725588847e-06, "loss": 0.3867, "step": 5890 }, { "epoch": 0.598922326148841, "grad_norm": 0.3984968364238739, "learning_rate": 9.988081248916657e-06, "loss": 0.4143, "step": 5891 }, { "epoch": 0.5990239934932899, "grad_norm": 0.3815101981163025, "learning_rate": 9.988056747115699e-06, "loss": 0.4335, "step": 5892 }, { "epoch": 0.5991256608377389, "grad_norm": 0.4045764207839966, "learning_rate": 9.988032220186095e-06, "loss": 0.4162, "step": 5893 }, { "epoch": 0.5992273281821879, "grad_norm": 0.3916245698928833, "learning_rate": 9.988007668127973e-06, "loss": 0.4045, "step": 5894 }, { "epoch": 0.5993289955266369, "grad_norm": 0.3281383812427521, "learning_rate": 9.98798309094145e-06, "loss": 0.3783, "step": 5895 }, { "epoch": 0.5994306628710858, "grad_norm": 0.4446006417274475, "learning_rate": 9.987958488626656e-06, "loss": 0.4402, "step": 5896 }, { "epoch": 0.5995323302155348, "grad_norm": 0.3838367462158203, "learning_rate": 9.987933861183712e-06, "loss": 0.4038, "step": 5897 }, { "epoch": 0.5996339975599837, "grad_norm": 0.33732157945632935, "learning_rate": 9.987909208612742e-06, "loss": 0.3902, "step": 5898 }, { "epoch": 0.5997356649044326, "grad_norm": 0.3876080811023712, "learning_rate": 9.987884530913872e-06, "loss": 0.388, "step": 5899 }, { "epoch": 0.5998373322488817, "grad_norm": 0.38685691356658936, "learning_rate": 9.987859828087223e-06, "loss": 0.3883, "step": 5900 }, { "epoch": 0.5999389995933306, "grad_norm": 0.40129104256629944, "learning_rate": 9.987835100132923e-06, "loss": 0.4028, "step": 5901 }, { "epoch": 0.6000406669377796, "grad_norm": 0.33154574036598206, "learning_rate": 9.987810347051095e-06, "loss": 0.3954, "step": 5902 }, { "epoch": 0.6001423342822285, "grad_norm": 0.35759902000427246, "learning_rate": 9.987785568841864e-06, "loss": 0.4094, "step": 5903 }, { "epoch": 0.6002440016266775, "grad_norm": 0.3861733078956604, "learning_rate": 9.987760765505354e-06, "loss": 0.4105, "step": 5904 }, { "epoch": 0.6003456689711265, "grad_norm": 0.3569572865962982, "learning_rate": 9.98773593704169e-06, "loss": 0.4082, "step": 5905 }, { "epoch": 0.6004473363155755, "grad_norm": 0.3789040148258209, "learning_rate": 9.987711083451e-06, "loss": 0.4003, "step": 5906 }, { "epoch": 0.6005490036600244, "grad_norm": 0.40283340215682983, "learning_rate": 9.987686204733404e-06, "loss": 0.3963, "step": 5907 }, { "epoch": 0.6006506710044733, "grad_norm": 0.3667590022087097, "learning_rate": 9.987661300889031e-06, "loss": 0.3775, "step": 5908 }, { "epoch": 0.6007523383489223, "grad_norm": 0.40230730175971985, "learning_rate": 9.987636371918006e-06, "loss": 0.4024, "step": 5909 }, { "epoch": 0.6008540056933713, "grad_norm": 0.36423957347869873, "learning_rate": 9.987611417820452e-06, "loss": 0.3971, "step": 5910 }, { "epoch": 0.6009556730378203, "grad_norm": 0.3770765960216522, "learning_rate": 9.9875864385965e-06, "loss": 0.4241, "step": 5911 }, { "epoch": 0.6010573403822692, "grad_norm": 0.390628457069397, "learning_rate": 9.987561434246273e-06, "loss": 0.3677, "step": 5912 }, { "epoch": 0.6011590077267182, "grad_norm": 0.3744092583656311, "learning_rate": 9.987536404769895e-06, "loss": 0.4249, "step": 5913 }, { "epoch": 0.6012606750711671, "grad_norm": 0.3482321500778198, "learning_rate": 9.987511350167494e-06, "loss": 0.3762, "step": 5914 }, { "epoch": 0.6013623424156161, "grad_norm": 0.37538573145866394, "learning_rate": 9.987486270439196e-06, "loss": 0.4088, "step": 5915 }, { "epoch": 0.6014640097600651, "grad_norm": 0.4670482277870178, "learning_rate": 9.987461165585128e-06, "loss": 0.4189, "step": 5916 }, { "epoch": 0.601565677104514, "grad_norm": 0.3874708414077759, "learning_rate": 9.987436035605415e-06, "loss": 0.4292, "step": 5917 }, { "epoch": 0.601667344448963, "grad_norm": 0.35537204146385193, "learning_rate": 9.987410880500186e-06, "loss": 0.3797, "step": 5918 }, { "epoch": 0.6017690117934119, "grad_norm": 0.34854602813720703, "learning_rate": 9.987385700269565e-06, "loss": 0.4086, "step": 5919 }, { "epoch": 0.601870679137861, "grad_norm": 0.3852713406085968, "learning_rate": 9.98736049491368e-06, "loss": 0.412, "step": 5920 }, { "epoch": 0.6019723464823099, "grad_norm": 0.37059473991394043, "learning_rate": 9.987335264432659e-06, "loss": 0.4074, "step": 5921 }, { "epoch": 0.6020740138267588, "grad_norm": 0.34801411628723145, "learning_rate": 9.987310008826624e-06, "loss": 0.3982, "step": 5922 }, { "epoch": 0.6021756811712078, "grad_norm": 0.3719651401042938, "learning_rate": 9.987284728095713e-06, "loss": 0.4272, "step": 5923 }, { "epoch": 0.6022773485156567, "grad_norm": 0.35698309540748596, "learning_rate": 9.98725942224004e-06, "loss": 0.3903, "step": 5924 }, { "epoch": 0.6023790158601058, "grad_norm": 0.3981022238731384, "learning_rate": 9.987234091259744e-06, "loss": 0.4086, "step": 5925 }, { "epoch": 0.6024806832045547, "grad_norm": 0.3274787664413452, "learning_rate": 9.987208735154947e-06, "loss": 0.3697, "step": 5926 }, { "epoch": 0.6025823505490037, "grad_norm": 0.37897923588752747, "learning_rate": 9.987183353925776e-06, "loss": 0.4085, "step": 5927 }, { "epoch": 0.6026840178934526, "grad_norm": 0.34615686535835266, "learning_rate": 9.987157947572362e-06, "loss": 0.4164, "step": 5928 }, { "epoch": 0.6027856852379015, "grad_norm": 0.40771642327308655, "learning_rate": 9.98713251609483e-06, "loss": 0.406, "step": 5929 }, { "epoch": 0.6028873525823506, "grad_norm": 0.3909180164337158, "learning_rate": 9.987107059493311e-06, "loss": 0.4039, "step": 5930 }, { "epoch": 0.6029890199267995, "grad_norm": 0.39811480045318604, "learning_rate": 9.98708157776793e-06, "loss": 0.4328, "step": 5931 }, { "epoch": 0.6030906872712485, "grad_norm": 0.3902726471424103, "learning_rate": 9.987056070918819e-06, "loss": 0.4433, "step": 5932 }, { "epoch": 0.6031923546156974, "grad_norm": 0.3669617474079132, "learning_rate": 9.987030538946103e-06, "loss": 0.3864, "step": 5933 }, { "epoch": 0.6032940219601464, "grad_norm": 0.36824193596839905, "learning_rate": 9.987004981849913e-06, "loss": 0.3949, "step": 5934 }, { "epoch": 0.6033956893045954, "grad_norm": 0.3508731722831726, "learning_rate": 9.986979399630377e-06, "loss": 0.4186, "step": 5935 }, { "epoch": 0.6034973566490444, "grad_norm": 0.35125869512557983, "learning_rate": 9.986953792287623e-06, "loss": 0.4046, "step": 5936 }, { "epoch": 0.6035990239934933, "grad_norm": 0.32755962014198303, "learning_rate": 9.986928159821782e-06, "loss": 0.4, "step": 5937 }, { "epoch": 0.6037006913379422, "grad_norm": 0.4059489369392395, "learning_rate": 9.986902502232982e-06, "loss": 0.4316, "step": 5938 }, { "epoch": 0.6038023586823912, "grad_norm": 0.34774190187454224, "learning_rate": 9.986876819521351e-06, "loss": 0.4167, "step": 5939 }, { "epoch": 0.6039040260268401, "grad_norm": 0.3231297731399536, "learning_rate": 9.98685111168702e-06, "loss": 0.3717, "step": 5940 }, { "epoch": 0.6040056933712892, "grad_norm": 0.36845141649246216, "learning_rate": 9.986825378730119e-06, "loss": 0.409, "step": 5941 }, { "epoch": 0.6041073607157381, "grad_norm": 0.3252372443675995, "learning_rate": 9.986799620650776e-06, "loss": 0.4142, "step": 5942 }, { "epoch": 0.6042090280601871, "grad_norm": 0.35043424367904663, "learning_rate": 9.986773837449122e-06, "loss": 0.3937, "step": 5943 }, { "epoch": 0.604310695404636, "grad_norm": 0.3478587567806244, "learning_rate": 9.986748029125287e-06, "loss": 0.4043, "step": 5944 }, { "epoch": 0.6044123627490849, "grad_norm": 0.34802648425102234, "learning_rate": 9.9867221956794e-06, "loss": 0.4266, "step": 5945 }, { "epoch": 0.604514030093534, "grad_norm": 0.3256334662437439, "learning_rate": 9.986696337111593e-06, "loss": 0.4102, "step": 5946 }, { "epoch": 0.6046156974379829, "grad_norm": 0.36850693821907043, "learning_rate": 9.986670453421992e-06, "loss": 0.4212, "step": 5947 }, { "epoch": 0.6047173647824319, "grad_norm": 0.3816985785961151, "learning_rate": 9.986644544610732e-06, "loss": 0.3724, "step": 5948 }, { "epoch": 0.6048190321268808, "grad_norm": 0.33041778206825256, "learning_rate": 9.98661861067794e-06, "loss": 0.3975, "step": 5949 }, { "epoch": 0.6049206994713298, "grad_norm": 0.37100088596343994, "learning_rate": 9.98659265162375e-06, "loss": 0.3965, "step": 5950 }, { "epoch": 0.6050223668157788, "grad_norm": 0.3414044678211212, "learning_rate": 9.986566667448292e-06, "loss": 0.4033, "step": 5951 }, { "epoch": 0.6051240341602278, "grad_norm": 0.3457409739494324, "learning_rate": 9.986540658151696e-06, "loss": 0.4262, "step": 5952 }, { "epoch": 0.6052257015046767, "grad_norm": 0.34686508774757385, "learning_rate": 9.986514623734091e-06, "loss": 0.3899, "step": 5953 }, { "epoch": 0.6053273688491256, "grad_norm": 0.3271391987800598, "learning_rate": 9.986488564195613e-06, "loss": 0.3884, "step": 5954 }, { "epoch": 0.6054290361935746, "grad_norm": 0.37505486607551575, "learning_rate": 9.98646247953639e-06, "loss": 0.3956, "step": 5955 }, { "epoch": 0.6055307035380236, "grad_norm": 0.36421898007392883, "learning_rate": 9.986436369756551e-06, "loss": 0.4164, "step": 5956 }, { "epoch": 0.6056323708824726, "grad_norm": 0.38967466354370117, "learning_rate": 9.986410234856235e-06, "loss": 0.4177, "step": 5957 }, { "epoch": 0.6057340382269215, "grad_norm": 0.3769449293613434, "learning_rate": 9.986384074835567e-06, "loss": 0.4091, "step": 5958 }, { "epoch": 0.6058357055713705, "grad_norm": 0.3765537440776825, "learning_rate": 9.98635788969468e-06, "loss": 0.3841, "step": 5959 }, { "epoch": 0.6059373729158194, "grad_norm": 0.3960443139076233, "learning_rate": 9.986331679433707e-06, "loss": 0.4091, "step": 5960 }, { "epoch": 0.6060390402602684, "grad_norm": 0.3643449544906616, "learning_rate": 9.986305444052782e-06, "loss": 0.3773, "step": 5961 }, { "epoch": 0.6061407076047174, "grad_norm": 0.3832683861255646, "learning_rate": 9.986279183552033e-06, "loss": 0.4203, "step": 5962 }, { "epoch": 0.6062423749491663, "grad_norm": 0.36463668942451477, "learning_rate": 9.986252897931596e-06, "loss": 0.388, "step": 5963 }, { "epoch": 0.6063440422936153, "grad_norm": 0.3322597146034241, "learning_rate": 9.986226587191602e-06, "loss": 0.422, "step": 5964 }, { "epoch": 0.6064457096380642, "grad_norm": 0.3322649300098419, "learning_rate": 9.986200251332181e-06, "loss": 0.4078, "step": 5965 }, { "epoch": 0.6065473769825133, "grad_norm": 0.34657612442970276, "learning_rate": 9.98617389035347e-06, "loss": 0.4088, "step": 5966 }, { "epoch": 0.6066490443269622, "grad_norm": 0.3578338623046875, "learning_rate": 9.9861475042556e-06, "loss": 0.3741, "step": 5967 }, { "epoch": 0.6067507116714111, "grad_norm": 0.360230416059494, "learning_rate": 9.986121093038704e-06, "loss": 0.4057, "step": 5968 }, { "epoch": 0.6068523790158601, "grad_norm": 0.35369157791137695, "learning_rate": 9.986094656702912e-06, "loss": 0.3876, "step": 5969 }, { "epoch": 0.606954046360309, "grad_norm": 0.36195895075798035, "learning_rate": 9.986068195248362e-06, "loss": 0.4223, "step": 5970 }, { "epoch": 0.6070557137047581, "grad_norm": 0.3690353333950043, "learning_rate": 9.986041708675185e-06, "loss": 0.3868, "step": 5971 }, { "epoch": 0.607157381049207, "grad_norm": 0.3796399235725403, "learning_rate": 9.986015196983516e-06, "loss": 0.3874, "step": 5972 }, { "epoch": 0.607259048393656, "grad_norm": 0.37924036383628845, "learning_rate": 9.985988660173486e-06, "loss": 0.4245, "step": 5973 }, { "epoch": 0.6073607157381049, "grad_norm": 0.4365493655204773, "learning_rate": 9.98596209824523e-06, "loss": 0.4424, "step": 5974 }, { "epoch": 0.6074623830825538, "grad_norm": 0.3809219300746918, "learning_rate": 9.985935511198883e-06, "loss": 0.3881, "step": 5975 }, { "epoch": 0.6075640504270029, "grad_norm": 0.39882293343544006, "learning_rate": 9.985908899034576e-06, "loss": 0.3738, "step": 5976 }, { "epoch": 0.6076657177714518, "grad_norm": 0.41250863671302795, "learning_rate": 9.985882261752445e-06, "loss": 0.4142, "step": 5977 }, { "epoch": 0.6077673851159008, "grad_norm": 0.37182655930519104, "learning_rate": 9.985855599352625e-06, "loss": 0.4043, "step": 5978 }, { "epoch": 0.6078690524603497, "grad_norm": 0.37484481930732727, "learning_rate": 9.985828911835249e-06, "loss": 0.4033, "step": 5979 }, { "epoch": 0.6079707198047987, "grad_norm": 0.38640859723091125, "learning_rate": 9.985802199200452e-06, "loss": 0.4105, "step": 5980 }, { "epoch": 0.6080723871492476, "grad_norm": 0.3493005335330963, "learning_rate": 9.985775461448368e-06, "loss": 0.4379, "step": 5981 }, { "epoch": 0.6081740544936967, "grad_norm": 0.37765079736709595, "learning_rate": 9.985748698579132e-06, "loss": 0.3931, "step": 5982 }, { "epoch": 0.6082757218381456, "grad_norm": 0.36275309324264526, "learning_rate": 9.985721910592879e-06, "loss": 0.4273, "step": 5983 }, { "epoch": 0.6083773891825945, "grad_norm": 0.3656616508960724, "learning_rate": 9.985695097489743e-06, "loss": 0.3758, "step": 5984 }, { "epoch": 0.6084790565270435, "grad_norm": 0.3492248058319092, "learning_rate": 9.98566825926986e-06, "loss": 0.3728, "step": 5985 }, { "epoch": 0.6085807238714924, "grad_norm": 0.3715552091598511, "learning_rate": 9.985641395933366e-06, "loss": 0.3997, "step": 5986 }, { "epoch": 0.6086823912159415, "grad_norm": 0.3253456652164459, "learning_rate": 9.985614507480395e-06, "loss": 0.3766, "step": 5987 }, { "epoch": 0.6087840585603904, "grad_norm": 0.36242586374282837, "learning_rate": 9.985587593911084e-06, "loss": 0.3671, "step": 5988 }, { "epoch": 0.6088857259048394, "grad_norm": 0.3705151081085205, "learning_rate": 9.985560655225566e-06, "loss": 0.3971, "step": 5989 }, { "epoch": 0.6089873932492883, "grad_norm": 0.4053839445114136, "learning_rate": 9.985533691423979e-06, "loss": 0.4268, "step": 5990 }, { "epoch": 0.6090890605937372, "grad_norm": 0.46529480814933777, "learning_rate": 9.985506702506457e-06, "loss": 0.4116, "step": 5991 }, { "epoch": 0.6091907279381863, "grad_norm": 0.3469810485839844, "learning_rate": 9.985479688473137e-06, "loss": 0.3727, "step": 5992 }, { "epoch": 0.6092923952826352, "grad_norm": 0.3555385172367096, "learning_rate": 9.985452649324156e-06, "loss": 0.388, "step": 5993 }, { "epoch": 0.6093940626270842, "grad_norm": 0.5212783217430115, "learning_rate": 9.985425585059649e-06, "loss": 0.4099, "step": 5994 }, { "epoch": 0.6094957299715331, "grad_norm": 0.4388402998447418, "learning_rate": 9.985398495679753e-06, "loss": 0.3992, "step": 5995 }, { "epoch": 0.6095973973159821, "grad_norm": 0.4037195146083832, "learning_rate": 9.985371381184603e-06, "loss": 0.3944, "step": 5996 }, { "epoch": 0.6096990646604311, "grad_norm": 0.4333571791648865, "learning_rate": 9.985344241574338e-06, "loss": 0.4033, "step": 5997 }, { "epoch": 0.60980073200488, "grad_norm": 0.45936480164527893, "learning_rate": 9.985317076849091e-06, "loss": 0.422, "step": 5998 }, { "epoch": 0.609902399349329, "grad_norm": 0.3920799493789673, "learning_rate": 9.985289887009005e-06, "loss": 0.3893, "step": 5999 }, { "epoch": 0.6100040666937779, "grad_norm": 0.411149799823761, "learning_rate": 9.98526267205421e-06, "loss": 0.4275, "step": 6000 }, { "epoch": 0.6101057340382269, "grad_norm": 0.37419381737709045, "learning_rate": 9.985235431984848e-06, "loss": 0.3724, "step": 6001 }, { "epoch": 0.6102074013826759, "grad_norm": 0.3953026235103607, "learning_rate": 9.985208166801053e-06, "loss": 0.4042, "step": 6002 }, { "epoch": 0.6103090687271249, "grad_norm": 0.37568631768226624, "learning_rate": 9.985180876502964e-06, "loss": 0.393, "step": 6003 }, { "epoch": 0.6104107360715738, "grad_norm": 0.35291653871536255, "learning_rate": 9.985153561090722e-06, "loss": 0.3723, "step": 6004 }, { "epoch": 0.6105124034160228, "grad_norm": 0.4751870930194855, "learning_rate": 9.985126220564457e-06, "loss": 0.4339, "step": 6005 }, { "epoch": 0.6106140707604717, "grad_norm": 0.36492833495140076, "learning_rate": 9.985098854924313e-06, "loss": 0.385, "step": 6006 }, { "epoch": 0.6107157381049207, "grad_norm": 0.4659980833530426, "learning_rate": 9.985071464170424e-06, "loss": 0.425, "step": 6007 }, { "epoch": 0.6108174054493697, "grad_norm": 0.4103666841983795, "learning_rate": 9.98504404830293e-06, "loss": 0.3787, "step": 6008 }, { "epoch": 0.6109190727938186, "grad_norm": 0.3997429311275482, "learning_rate": 9.985016607321968e-06, "loss": 0.3778, "step": 6009 }, { "epoch": 0.6110207401382676, "grad_norm": 0.41749241948127747, "learning_rate": 9.984989141227679e-06, "loss": 0.402, "step": 6010 }, { "epoch": 0.6111224074827165, "grad_norm": 0.37883538007736206, "learning_rate": 9.984961650020199e-06, "loss": 0.4277, "step": 6011 }, { "epoch": 0.6112240748271656, "grad_norm": 0.38775014877319336, "learning_rate": 9.984934133699666e-06, "loss": 0.4309, "step": 6012 }, { "epoch": 0.6113257421716145, "grad_norm": 0.38352879881858826, "learning_rate": 9.98490659226622e-06, "loss": 0.3921, "step": 6013 }, { "epoch": 0.6114274095160634, "grad_norm": 0.342177152633667, "learning_rate": 9.984879025719999e-06, "loss": 0.3954, "step": 6014 }, { "epoch": 0.6115290768605124, "grad_norm": 0.3594283163547516, "learning_rate": 9.984851434061142e-06, "loss": 0.3904, "step": 6015 }, { "epoch": 0.6116307442049613, "grad_norm": 0.3975273370742798, "learning_rate": 9.984823817289788e-06, "loss": 0.4015, "step": 6016 }, { "epoch": 0.6117324115494104, "grad_norm": 0.34377479553222656, "learning_rate": 9.984796175406075e-06, "loss": 0.4144, "step": 6017 }, { "epoch": 0.6118340788938593, "grad_norm": 0.37122035026550293, "learning_rate": 9.984768508410145e-06, "loss": 0.3748, "step": 6018 }, { "epoch": 0.6119357462383083, "grad_norm": 0.3705699145793915, "learning_rate": 9.984740816302135e-06, "loss": 0.3827, "step": 6019 }, { "epoch": 0.6120374135827572, "grad_norm": 0.3887990713119507, "learning_rate": 9.984713099082186e-06, "loss": 0.3903, "step": 6020 }, { "epoch": 0.6121390809272061, "grad_norm": 0.39000001549720764, "learning_rate": 9.984685356750437e-06, "loss": 0.3768, "step": 6021 }, { "epoch": 0.6122407482716552, "grad_norm": 0.318062961101532, "learning_rate": 9.984657589307027e-06, "loss": 0.3775, "step": 6022 }, { "epoch": 0.6123424156161041, "grad_norm": 0.35145696997642517, "learning_rate": 9.984629796752096e-06, "loss": 0.3819, "step": 6023 }, { "epoch": 0.6124440829605531, "grad_norm": 0.3893960118293762, "learning_rate": 9.984601979085786e-06, "loss": 0.3892, "step": 6024 }, { "epoch": 0.612545750305002, "grad_norm": 0.3597557842731476, "learning_rate": 9.984574136308236e-06, "loss": 0.4043, "step": 6025 }, { "epoch": 0.612647417649451, "grad_norm": 0.362845242023468, "learning_rate": 9.984546268419586e-06, "loss": 0.3884, "step": 6026 }, { "epoch": 0.6127490849938999, "grad_norm": 0.3759978711605072, "learning_rate": 9.984518375419974e-06, "loss": 0.3722, "step": 6027 }, { "epoch": 0.612850752338349, "grad_norm": 0.38472631573677063, "learning_rate": 9.984490457309546e-06, "loss": 0.3991, "step": 6028 }, { "epoch": 0.6129524196827979, "grad_norm": 0.4050799012184143, "learning_rate": 9.984462514088438e-06, "loss": 0.4494, "step": 6029 }, { "epoch": 0.6130540870272468, "grad_norm": 0.39856261014938354, "learning_rate": 9.984434545756793e-06, "loss": 0.4106, "step": 6030 }, { "epoch": 0.6131557543716958, "grad_norm": 0.4007652997970581, "learning_rate": 9.98440655231475e-06, "loss": 0.3963, "step": 6031 }, { "epoch": 0.6132574217161447, "grad_norm": 0.3770444393157959, "learning_rate": 9.984378533762454e-06, "loss": 0.3592, "step": 6032 }, { "epoch": 0.6133590890605938, "grad_norm": 0.3825739026069641, "learning_rate": 9.98435049010004e-06, "loss": 0.4004, "step": 6033 }, { "epoch": 0.6134607564050427, "grad_norm": 0.35879355669021606, "learning_rate": 9.984322421327655e-06, "loss": 0.4111, "step": 6034 }, { "epoch": 0.6135624237494917, "grad_norm": 0.40492504835128784, "learning_rate": 9.984294327445437e-06, "loss": 0.3837, "step": 6035 }, { "epoch": 0.6136640910939406, "grad_norm": 0.3719651997089386, "learning_rate": 9.984266208453528e-06, "loss": 0.4005, "step": 6036 }, { "epoch": 0.6137657584383895, "grad_norm": 0.3890497088432312, "learning_rate": 9.984238064352072e-06, "loss": 0.3753, "step": 6037 }, { "epoch": 0.6138674257828386, "grad_norm": 0.3543199896812439, "learning_rate": 9.984209895141208e-06, "loss": 0.404, "step": 6038 }, { "epoch": 0.6139690931272875, "grad_norm": 0.38828620314598083, "learning_rate": 9.98418170082108e-06, "loss": 0.4093, "step": 6039 }, { "epoch": 0.6140707604717365, "grad_norm": 0.37940579652786255, "learning_rate": 9.984153481391827e-06, "loss": 0.3872, "step": 6040 }, { "epoch": 0.6141724278161854, "grad_norm": 0.31884288787841797, "learning_rate": 9.984125236853595e-06, "loss": 0.3933, "step": 6041 }, { "epoch": 0.6142740951606344, "grad_norm": 0.37512627243995667, "learning_rate": 9.984096967206525e-06, "loss": 0.415, "step": 6042 }, { "epoch": 0.6143757625050834, "grad_norm": 0.3738005757331848, "learning_rate": 9.984068672450757e-06, "loss": 0.3883, "step": 6043 }, { "epoch": 0.6144774298495324, "grad_norm": 0.3575802147388458, "learning_rate": 9.984040352586437e-06, "loss": 0.3834, "step": 6044 }, { "epoch": 0.6145790971939813, "grad_norm": 0.3317949175834656, "learning_rate": 9.984012007613704e-06, "loss": 0.4112, "step": 6045 }, { "epoch": 0.6146807645384302, "grad_norm": 0.3786429464817047, "learning_rate": 9.983983637532705e-06, "loss": 0.4175, "step": 6046 }, { "epoch": 0.6147824318828792, "grad_norm": 0.37930646538734436, "learning_rate": 9.98395524234358e-06, "loss": 0.4181, "step": 6047 }, { "epoch": 0.6148840992273282, "grad_norm": 0.359269917011261, "learning_rate": 9.983926822046472e-06, "loss": 0.4225, "step": 6048 }, { "epoch": 0.6149857665717772, "grad_norm": 0.3729499578475952, "learning_rate": 9.983898376641527e-06, "loss": 0.4011, "step": 6049 }, { "epoch": 0.6150874339162261, "grad_norm": 0.3787233233451843, "learning_rate": 9.983869906128884e-06, "loss": 0.4163, "step": 6050 }, { "epoch": 0.615189101260675, "grad_norm": 0.3966221809387207, "learning_rate": 9.98384141050869e-06, "loss": 0.414, "step": 6051 }, { "epoch": 0.615290768605124, "grad_norm": 0.36509066820144653, "learning_rate": 9.983812889781088e-06, "loss": 0.4214, "step": 6052 }, { "epoch": 0.615392435949573, "grad_norm": 0.35350167751312256, "learning_rate": 9.983784343946221e-06, "loss": 0.4072, "step": 6053 }, { "epoch": 0.615494103294022, "grad_norm": 0.4524416923522949, "learning_rate": 9.98375577300423e-06, "loss": 0.4282, "step": 6054 }, { "epoch": 0.6155957706384709, "grad_norm": 0.33680105209350586, "learning_rate": 9.983727176955265e-06, "loss": 0.3914, "step": 6055 }, { "epoch": 0.6156974379829199, "grad_norm": 0.3308264911174774, "learning_rate": 9.983698555799466e-06, "loss": 0.4368, "step": 6056 }, { "epoch": 0.6157991053273688, "grad_norm": 0.34896326065063477, "learning_rate": 9.983669909536976e-06, "loss": 0.39, "step": 6057 }, { "epoch": 0.6159007726718179, "grad_norm": 0.33903488516807556, "learning_rate": 9.983641238167944e-06, "loss": 0.3829, "step": 6058 }, { "epoch": 0.6160024400162668, "grad_norm": 0.32251477241516113, "learning_rate": 9.983612541692511e-06, "loss": 0.3844, "step": 6059 }, { "epoch": 0.6161041073607157, "grad_norm": 0.3587211072444916, "learning_rate": 9.983583820110822e-06, "loss": 0.3729, "step": 6060 }, { "epoch": 0.6162057747051647, "grad_norm": 0.33543452620506287, "learning_rate": 9.983555073423022e-06, "loss": 0.4187, "step": 6061 }, { "epoch": 0.6163074420496136, "grad_norm": 0.33043214678764343, "learning_rate": 9.983526301629255e-06, "loss": 0.4044, "step": 6062 }, { "epoch": 0.6164091093940627, "grad_norm": 0.33270591497421265, "learning_rate": 9.98349750472967e-06, "loss": 0.4057, "step": 6063 }, { "epoch": 0.6165107767385116, "grad_norm": 0.32173997163772583, "learning_rate": 9.983468682724406e-06, "loss": 0.4026, "step": 6064 }, { "epoch": 0.6166124440829606, "grad_norm": 0.34509849548339844, "learning_rate": 9.983439835613612e-06, "loss": 0.3812, "step": 6065 }, { "epoch": 0.6167141114274095, "grad_norm": 0.40724003314971924, "learning_rate": 9.983410963397431e-06, "loss": 0.4112, "step": 6066 }, { "epoch": 0.6168157787718584, "grad_norm": 0.37157806754112244, "learning_rate": 9.983382066076011e-06, "loss": 0.4098, "step": 6067 }, { "epoch": 0.6169174461163074, "grad_norm": 0.3999866843223572, "learning_rate": 9.983353143649495e-06, "loss": 0.4204, "step": 6068 }, { "epoch": 0.6170191134607564, "grad_norm": 0.414438933134079, "learning_rate": 9.983324196118031e-06, "loss": 0.397, "step": 6069 }, { "epoch": 0.6171207808052054, "grad_norm": 0.43204638361930847, "learning_rate": 9.983295223481763e-06, "loss": 0.4346, "step": 6070 }, { "epoch": 0.6172224481496543, "grad_norm": 0.39680835604667664, "learning_rate": 9.98326622574084e-06, "loss": 0.3658, "step": 6071 }, { "epoch": 0.6173241154941033, "grad_norm": 0.4141336977481842, "learning_rate": 9.983237202895405e-06, "loss": 0.3754, "step": 6072 }, { "epoch": 0.6174257828385522, "grad_norm": 0.40669572353363037, "learning_rate": 9.983208154945606e-06, "loss": 0.3808, "step": 6073 }, { "epoch": 0.6175274501830013, "grad_norm": 0.3813398778438568, "learning_rate": 9.983179081891586e-06, "loss": 0.4095, "step": 6074 }, { "epoch": 0.6176291175274502, "grad_norm": 0.36412250995635986, "learning_rate": 9.983149983733496e-06, "loss": 0.3908, "step": 6075 }, { "epoch": 0.6177307848718991, "grad_norm": 0.39484915137290955, "learning_rate": 9.98312086047148e-06, "loss": 0.3907, "step": 6076 }, { "epoch": 0.6178324522163481, "grad_norm": 0.342289537191391, "learning_rate": 9.983091712105686e-06, "loss": 0.3813, "step": 6077 }, { "epoch": 0.617934119560797, "grad_norm": 0.37837353348731995, "learning_rate": 9.98306253863626e-06, "loss": 0.3879, "step": 6078 }, { "epoch": 0.6180357869052461, "grad_norm": 0.4155801236629486, "learning_rate": 9.983033340063346e-06, "loss": 0.4278, "step": 6079 }, { "epoch": 0.618137454249695, "grad_norm": 0.35426896810531616, "learning_rate": 9.983004116387098e-06, "loss": 0.4118, "step": 6080 }, { "epoch": 0.618239121594144, "grad_norm": 0.3232283592224121, "learning_rate": 9.982974867607658e-06, "loss": 0.4126, "step": 6081 }, { "epoch": 0.6183407889385929, "grad_norm": 0.37592223286628723, "learning_rate": 9.982945593725175e-06, "loss": 0.4155, "step": 6082 }, { "epoch": 0.6184424562830418, "grad_norm": 0.3775980472564697, "learning_rate": 9.982916294739796e-06, "loss": 0.3997, "step": 6083 }, { "epoch": 0.6185441236274909, "grad_norm": 0.3703463077545166, "learning_rate": 9.982886970651669e-06, "loss": 0.4427, "step": 6084 }, { "epoch": 0.6186457909719398, "grad_norm": 0.3368549644947052, "learning_rate": 9.982857621460941e-06, "loss": 0.3961, "step": 6085 }, { "epoch": 0.6187474583163888, "grad_norm": 0.34478598833084106, "learning_rate": 9.982828247167762e-06, "loss": 0.4211, "step": 6086 }, { "epoch": 0.6188491256608377, "grad_norm": 0.38505861163139343, "learning_rate": 9.982798847772276e-06, "loss": 0.3387, "step": 6087 }, { "epoch": 0.6189507930052867, "grad_norm": 0.36964452266693115, "learning_rate": 9.982769423274634e-06, "loss": 0.4, "step": 6088 }, { "epoch": 0.6190524603497357, "grad_norm": 0.3385515511035919, "learning_rate": 9.982739973674986e-06, "loss": 0.428, "step": 6089 }, { "epoch": 0.6191541276941847, "grad_norm": 0.39200493693351746, "learning_rate": 9.982710498973476e-06, "loss": 0.4463, "step": 6090 }, { "epoch": 0.6192557950386336, "grad_norm": 0.3557828366756439, "learning_rate": 9.982680999170255e-06, "loss": 0.37, "step": 6091 }, { "epoch": 0.6193574623830825, "grad_norm": 0.3529227375984192, "learning_rate": 9.982651474265473e-06, "loss": 0.4041, "step": 6092 }, { "epoch": 0.6194591297275315, "grad_norm": 0.34656745195388794, "learning_rate": 9.982621924259275e-06, "loss": 0.3943, "step": 6093 }, { "epoch": 0.6195607970719805, "grad_norm": 0.35288408398628235, "learning_rate": 9.982592349151812e-06, "loss": 0.4005, "step": 6094 }, { "epoch": 0.6196624644164295, "grad_norm": 0.37876778841018677, "learning_rate": 9.982562748943231e-06, "loss": 0.3993, "step": 6095 }, { "epoch": 0.6197641317608784, "grad_norm": 0.33390653133392334, "learning_rate": 9.982533123633686e-06, "loss": 0.4143, "step": 6096 }, { "epoch": 0.6198657991053274, "grad_norm": 0.3351753354072571, "learning_rate": 9.982503473223321e-06, "loss": 0.4331, "step": 6097 }, { "epoch": 0.6199674664497763, "grad_norm": 0.3421652317047119, "learning_rate": 9.98247379771229e-06, "loss": 0.3955, "step": 6098 }, { "epoch": 0.6200691337942253, "grad_norm": 0.3326736390590668, "learning_rate": 9.982444097100738e-06, "loss": 0.3985, "step": 6099 }, { "epoch": 0.6201708011386743, "grad_norm": 0.35597413778305054, "learning_rate": 9.982414371388817e-06, "loss": 0.3862, "step": 6100 }, { "epoch": 0.6202724684831232, "grad_norm": 0.35527148842811584, "learning_rate": 9.982384620576677e-06, "loss": 0.401, "step": 6101 }, { "epoch": 0.6203741358275722, "grad_norm": 0.3920542895793915, "learning_rate": 9.982354844664467e-06, "loss": 0.3951, "step": 6102 }, { "epoch": 0.6204758031720211, "grad_norm": 0.3629273474216461, "learning_rate": 9.982325043652337e-06, "loss": 0.4405, "step": 6103 }, { "epoch": 0.6205774705164702, "grad_norm": 0.36107391119003296, "learning_rate": 9.982295217540437e-06, "loss": 0.3982, "step": 6104 }, { "epoch": 0.6206791378609191, "grad_norm": 0.38048380613327026, "learning_rate": 9.982265366328917e-06, "loss": 0.3849, "step": 6105 }, { "epoch": 0.620780805205368, "grad_norm": 0.36562493443489075, "learning_rate": 9.982235490017931e-06, "loss": 0.3846, "step": 6106 }, { "epoch": 0.620882472549817, "grad_norm": 0.3745977282524109, "learning_rate": 9.982205588607624e-06, "loss": 0.4006, "step": 6107 }, { "epoch": 0.6209841398942659, "grad_norm": 0.33066847920417786, "learning_rate": 9.98217566209815e-06, "loss": 0.3881, "step": 6108 }, { "epoch": 0.6210858072387149, "grad_norm": 0.36995983123779297, "learning_rate": 9.98214571048966e-06, "loss": 0.3936, "step": 6109 }, { "epoch": 0.6211874745831639, "grad_norm": 0.3576115071773529, "learning_rate": 9.982115733782303e-06, "loss": 0.3749, "step": 6110 }, { "epoch": 0.6212891419276129, "grad_norm": 0.3693684935569763, "learning_rate": 9.98208573197623e-06, "loss": 0.3825, "step": 6111 }, { "epoch": 0.6213908092720618, "grad_norm": 0.3702501356601715, "learning_rate": 9.982055705071593e-06, "loss": 0.3638, "step": 6112 }, { "epoch": 0.6214924766165107, "grad_norm": 0.4027341306209564, "learning_rate": 9.982025653068545e-06, "loss": 0.4061, "step": 6113 }, { "epoch": 0.6215941439609597, "grad_norm": 0.36381596326828003, "learning_rate": 9.981995575967235e-06, "loss": 0.4113, "step": 6114 }, { "epoch": 0.6216958113054087, "grad_norm": 0.3705875277519226, "learning_rate": 9.981965473767814e-06, "loss": 0.3422, "step": 6115 }, { "epoch": 0.6217974786498577, "grad_norm": 0.38672128319740295, "learning_rate": 9.981935346470436e-06, "loss": 0.4358, "step": 6116 }, { "epoch": 0.6218991459943066, "grad_norm": 0.3980149030685425, "learning_rate": 9.981905194075252e-06, "loss": 0.3731, "step": 6117 }, { "epoch": 0.6220008133387556, "grad_norm": 0.3908030092716217, "learning_rate": 9.981875016582411e-06, "loss": 0.4203, "step": 6118 }, { "epoch": 0.6221024806832045, "grad_norm": 0.36934247612953186, "learning_rate": 9.981844813992071e-06, "loss": 0.3882, "step": 6119 }, { "epoch": 0.6222041480276536, "grad_norm": 0.4123242199420929, "learning_rate": 9.98181458630438e-06, "loss": 0.3978, "step": 6120 }, { "epoch": 0.6223058153721025, "grad_norm": 0.459290087223053, "learning_rate": 9.981784333519488e-06, "loss": 0.4115, "step": 6121 }, { "epoch": 0.6224074827165514, "grad_norm": 0.37675175070762634, "learning_rate": 9.981754055637553e-06, "loss": 0.427, "step": 6122 }, { "epoch": 0.6225091500610004, "grad_norm": 0.40219971537590027, "learning_rate": 9.981723752658726e-06, "loss": 0.3992, "step": 6123 }, { "epoch": 0.6226108174054493, "grad_norm": 0.4453699588775635, "learning_rate": 9.981693424583155e-06, "loss": 0.4114, "step": 6124 }, { "epoch": 0.6227124847498984, "grad_norm": 0.39277103543281555, "learning_rate": 9.981663071410998e-06, "loss": 0.3782, "step": 6125 }, { "epoch": 0.6228141520943473, "grad_norm": 0.38899022340774536, "learning_rate": 9.981632693142408e-06, "loss": 0.4339, "step": 6126 }, { "epoch": 0.6229158194387963, "grad_norm": 0.42666876316070557, "learning_rate": 9.981602289777535e-06, "loss": 0.4133, "step": 6127 }, { "epoch": 0.6230174867832452, "grad_norm": 0.42732977867126465, "learning_rate": 9.981571861316532e-06, "loss": 0.3831, "step": 6128 }, { "epoch": 0.6231191541276941, "grad_norm": 0.37655556201934814, "learning_rate": 9.981541407759556e-06, "loss": 0.3752, "step": 6129 }, { "epoch": 0.6232208214721432, "grad_norm": 0.416623592376709, "learning_rate": 9.981510929106757e-06, "loss": 0.3906, "step": 6130 }, { "epoch": 0.6233224888165921, "grad_norm": 0.34618765115737915, "learning_rate": 9.98148042535829e-06, "loss": 0.3827, "step": 6131 }, { "epoch": 0.6234241561610411, "grad_norm": 0.382302850484848, "learning_rate": 9.981449896514308e-06, "loss": 0.4046, "step": 6132 }, { "epoch": 0.62352582350549, "grad_norm": 0.33678779006004333, "learning_rate": 9.981419342574965e-06, "loss": 0.3839, "step": 6133 }, { "epoch": 0.623627490849939, "grad_norm": 0.3974808156490326, "learning_rate": 9.981388763540416e-06, "loss": 0.4074, "step": 6134 }, { "epoch": 0.623729158194388, "grad_norm": 0.3909759521484375, "learning_rate": 9.981358159410814e-06, "loss": 0.4044, "step": 6135 }, { "epoch": 0.623830825538837, "grad_norm": 0.36904093623161316, "learning_rate": 9.981327530186314e-06, "loss": 0.3751, "step": 6136 }, { "epoch": 0.6239324928832859, "grad_norm": 0.3860967755317688, "learning_rate": 9.981296875867068e-06, "loss": 0.4089, "step": 6137 }, { "epoch": 0.6240341602277348, "grad_norm": 0.3859291970729828, "learning_rate": 9.981266196453233e-06, "loss": 0.4032, "step": 6138 }, { "epoch": 0.6241358275721838, "grad_norm": 0.40385720133781433, "learning_rate": 9.981235491944963e-06, "loss": 0.4337, "step": 6139 }, { "epoch": 0.6242374949166328, "grad_norm": 0.3573744297027588, "learning_rate": 9.981204762342412e-06, "loss": 0.433, "step": 6140 }, { "epoch": 0.6243391622610818, "grad_norm": 0.35561618208885193, "learning_rate": 9.981174007645735e-06, "loss": 0.4065, "step": 6141 }, { "epoch": 0.6244408296055307, "grad_norm": 0.38744834065437317, "learning_rate": 9.981143227855088e-06, "loss": 0.3753, "step": 6142 }, { "epoch": 0.6245424969499797, "grad_norm": 0.34229230880737305, "learning_rate": 9.981112422970623e-06, "loss": 0.4091, "step": 6143 }, { "epoch": 0.6246441642944286, "grad_norm": 0.33521226048469543, "learning_rate": 9.9810815929925e-06, "loss": 0.3894, "step": 6144 }, { "epoch": 0.6247458316388776, "grad_norm": 0.36644741892814636, "learning_rate": 9.981050737920868e-06, "loss": 0.3728, "step": 6145 }, { "epoch": 0.6248474989833266, "grad_norm": 0.3394868075847626, "learning_rate": 9.98101985775589e-06, "loss": 0.3754, "step": 6146 }, { "epoch": 0.6249491663277755, "grad_norm": 0.3756827712059021, "learning_rate": 9.980988952497714e-06, "loss": 0.3865, "step": 6147 }, { "epoch": 0.6250508336722245, "grad_norm": 0.34588706493377686, "learning_rate": 9.980958022146501e-06, "loss": 0.3875, "step": 6148 }, { "epoch": 0.6251525010166734, "grad_norm": 0.4065995514392853, "learning_rate": 9.980927066702406e-06, "loss": 0.4108, "step": 6149 }, { "epoch": 0.6252541683611224, "grad_norm": 0.42898625135421753, "learning_rate": 9.980896086165584e-06, "loss": 0.4066, "step": 6150 }, { "epoch": 0.6253558357055714, "grad_norm": 0.3658815920352936, "learning_rate": 9.980865080536191e-06, "loss": 0.42, "step": 6151 }, { "epoch": 0.6254575030500203, "grad_norm": 0.3806862533092499, "learning_rate": 9.980834049814381e-06, "loss": 0.3907, "step": 6152 }, { "epoch": 0.6255591703944693, "grad_norm": 0.39059779047966003, "learning_rate": 9.980802994000316e-06, "loss": 0.3982, "step": 6153 }, { "epoch": 0.6256608377389182, "grad_norm": 0.38217878341674805, "learning_rate": 9.980771913094148e-06, "loss": 0.4003, "step": 6154 }, { "epoch": 0.6257625050833672, "grad_norm": 0.3458539843559265, "learning_rate": 9.980740807096034e-06, "loss": 0.4041, "step": 6155 }, { "epoch": 0.6258641724278162, "grad_norm": 0.33675631880760193, "learning_rate": 9.980709676006132e-06, "loss": 0.3997, "step": 6156 }, { "epoch": 0.6259658397722652, "grad_norm": 0.36894330382347107, "learning_rate": 9.980678519824598e-06, "loss": 0.3895, "step": 6157 }, { "epoch": 0.6260675071167141, "grad_norm": 0.3577384650707245, "learning_rate": 9.980647338551589e-06, "loss": 0.427, "step": 6158 }, { "epoch": 0.626169174461163, "grad_norm": 0.38543328642845154, "learning_rate": 9.980616132187261e-06, "loss": 0.3825, "step": 6159 }, { "epoch": 0.626270841805612, "grad_norm": 0.3777797818183899, "learning_rate": 9.980584900731775e-06, "loss": 0.4159, "step": 6160 }, { "epoch": 0.626372509150061, "grad_norm": 0.3125631809234619, "learning_rate": 9.980553644185286e-06, "loss": 0.3881, "step": 6161 }, { "epoch": 0.62647417649451, "grad_norm": 0.412722647190094, "learning_rate": 9.980522362547949e-06, "loss": 0.388, "step": 6162 }, { "epoch": 0.6265758438389589, "grad_norm": 0.36808478832244873, "learning_rate": 9.980491055819926e-06, "loss": 0.3986, "step": 6163 }, { "epoch": 0.6266775111834079, "grad_norm": 0.3511715829372406, "learning_rate": 9.980459724001373e-06, "loss": 0.3878, "step": 6164 }, { "epoch": 0.6267791785278568, "grad_norm": 0.34967902302742004, "learning_rate": 9.980428367092445e-06, "loss": 0.4269, "step": 6165 }, { "epoch": 0.6268808458723059, "grad_norm": 0.323969304561615, "learning_rate": 9.980396985093303e-06, "loss": 0.3935, "step": 6166 }, { "epoch": 0.6269825132167548, "grad_norm": 0.35309627652168274, "learning_rate": 9.980365578004105e-06, "loss": 0.3905, "step": 6167 }, { "epoch": 0.6270841805612037, "grad_norm": 0.3468165099620819, "learning_rate": 9.98033414582501e-06, "loss": 0.4139, "step": 6168 }, { "epoch": 0.6271858479056527, "grad_norm": 0.35871636867523193, "learning_rate": 9.980302688556173e-06, "loss": 0.4334, "step": 6169 }, { "epoch": 0.6272875152501016, "grad_norm": 0.37510332465171814, "learning_rate": 9.980271206197756e-06, "loss": 0.3785, "step": 6170 }, { "epoch": 0.6273891825945507, "grad_norm": 0.40101325511932373, "learning_rate": 9.980239698749916e-06, "loss": 0.3942, "step": 6171 }, { "epoch": 0.6274908499389996, "grad_norm": 0.3563325107097626, "learning_rate": 9.980208166212811e-06, "loss": 0.3548, "step": 6172 }, { "epoch": 0.6275925172834486, "grad_norm": 0.4033886194229126, "learning_rate": 9.980176608586602e-06, "loss": 0.4023, "step": 6173 }, { "epoch": 0.6276941846278975, "grad_norm": 0.3686668276786804, "learning_rate": 9.980145025871447e-06, "loss": 0.3748, "step": 6174 }, { "epoch": 0.6277958519723464, "grad_norm": 0.3631550669670105, "learning_rate": 9.980113418067502e-06, "loss": 0.3961, "step": 6175 }, { "epoch": 0.6278975193167955, "grad_norm": 0.41253477334976196, "learning_rate": 9.980081785174932e-06, "loss": 0.4318, "step": 6176 }, { "epoch": 0.6279991866612444, "grad_norm": 0.3749055862426758, "learning_rate": 9.980050127193892e-06, "loss": 0.409, "step": 6177 }, { "epoch": 0.6281008540056934, "grad_norm": 0.37337273359298706, "learning_rate": 9.980018444124542e-06, "loss": 0.3834, "step": 6178 }, { "epoch": 0.6282025213501423, "grad_norm": 0.3507190942764282, "learning_rate": 9.979986735967044e-06, "loss": 0.417, "step": 6179 }, { "epoch": 0.6283041886945913, "grad_norm": 0.3609163463115692, "learning_rate": 9.979955002721557e-06, "loss": 0.3956, "step": 6180 }, { "epoch": 0.6284058560390403, "grad_norm": 0.37433016300201416, "learning_rate": 9.979923244388238e-06, "loss": 0.3832, "step": 6181 }, { "epoch": 0.6285075233834893, "grad_norm": 0.3493233323097229, "learning_rate": 9.97989146096725e-06, "loss": 0.3885, "step": 6182 }, { "epoch": 0.6286091907279382, "grad_norm": 0.35118216276168823, "learning_rate": 9.979859652458753e-06, "loss": 0.3759, "step": 6183 }, { "epoch": 0.6287108580723871, "grad_norm": 0.35911035537719727, "learning_rate": 9.979827818862906e-06, "loss": 0.3791, "step": 6184 }, { "epoch": 0.6288125254168361, "grad_norm": 0.3648625612258911, "learning_rate": 9.97979596017987e-06, "loss": 0.3977, "step": 6185 }, { "epoch": 0.6289141927612851, "grad_norm": 0.35724684596061707, "learning_rate": 9.979764076409804e-06, "loss": 0.396, "step": 6186 }, { "epoch": 0.6290158601057341, "grad_norm": 0.4014815092086792, "learning_rate": 9.979732167552871e-06, "loss": 0.4052, "step": 6187 }, { "epoch": 0.629117527450183, "grad_norm": 0.35339924693107605, "learning_rate": 9.97970023360923e-06, "loss": 0.3833, "step": 6188 }, { "epoch": 0.629219194794632, "grad_norm": 0.4191749393939972, "learning_rate": 9.979668274579045e-06, "loss": 0.4409, "step": 6189 }, { "epoch": 0.6293208621390809, "grad_norm": 0.40157410502433777, "learning_rate": 9.979636290462472e-06, "loss": 0.4022, "step": 6190 }, { "epoch": 0.6294225294835298, "grad_norm": 0.3480910658836365, "learning_rate": 9.979604281259676e-06, "loss": 0.3847, "step": 6191 }, { "epoch": 0.6295241968279789, "grad_norm": 0.339584618806839, "learning_rate": 9.979572246970816e-06, "loss": 0.3749, "step": 6192 }, { "epoch": 0.6296258641724278, "grad_norm": 0.3672187924385071, "learning_rate": 9.979540187596055e-06, "loss": 0.384, "step": 6193 }, { "epoch": 0.6297275315168768, "grad_norm": 0.3661254644393921, "learning_rate": 9.979508103135553e-06, "loss": 0.3923, "step": 6194 }, { "epoch": 0.6298291988613257, "grad_norm": 0.36375606060028076, "learning_rate": 9.979475993589474e-06, "loss": 0.403, "step": 6195 }, { "epoch": 0.6299308662057747, "grad_norm": 0.37343958020210266, "learning_rate": 9.979443858957978e-06, "loss": 0.4121, "step": 6196 }, { "epoch": 0.6300325335502237, "grad_norm": 0.3649011552333832, "learning_rate": 9.979411699241226e-06, "loss": 0.4386, "step": 6197 }, { "epoch": 0.6301342008946726, "grad_norm": 0.3967228829860687, "learning_rate": 9.979379514439383e-06, "loss": 0.4238, "step": 6198 }, { "epoch": 0.6302358682391216, "grad_norm": 0.3825851380825043, "learning_rate": 9.979347304552607e-06, "loss": 0.4361, "step": 6199 }, { "epoch": 0.6303375355835705, "grad_norm": 0.3401142358779907, "learning_rate": 9.979315069581064e-06, "loss": 0.4025, "step": 6200 }, { "epoch": 0.6304392029280195, "grad_norm": 0.4332196116447449, "learning_rate": 9.979282809524916e-06, "loss": 0.3955, "step": 6201 }, { "epoch": 0.6305408702724685, "grad_norm": 0.3623380661010742, "learning_rate": 9.979250524384321e-06, "loss": 0.4006, "step": 6202 }, { "epoch": 0.6306425376169175, "grad_norm": 0.3862922489643097, "learning_rate": 9.979218214159447e-06, "loss": 0.4041, "step": 6203 }, { "epoch": 0.6307442049613664, "grad_norm": 0.3448061943054199, "learning_rate": 9.979185878850454e-06, "loss": 0.3744, "step": 6204 }, { "epoch": 0.6308458723058153, "grad_norm": 0.3651982843875885, "learning_rate": 9.979153518457509e-06, "loss": 0.4653, "step": 6205 }, { "epoch": 0.6309475396502643, "grad_norm": 0.3463814854621887, "learning_rate": 9.97912113298077e-06, "loss": 0.3955, "step": 6206 }, { "epoch": 0.6310492069947133, "grad_norm": 0.3738979995250702, "learning_rate": 9.9790887224204e-06, "loss": 0.4201, "step": 6207 }, { "epoch": 0.6311508743391623, "grad_norm": 0.385539174079895, "learning_rate": 9.979056286776566e-06, "loss": 0.3934, "step": 6208 }, { "epoch": 0.6312525416836112, "grad_norm": 0.3743748068809509, "learning_rate": 9.97902382604943e-06, "loss": 0.4142, "step": 6209 }, { "epoch": 0.6313542090280602, "grad_norm": 0.4332244396209717, "learning_rate": 9.978991340239155e-06, "loss": 0.3982, "step": 6210 }, { "epoch": 0.6314558763725091, "grad_norm": 0.4071626365184784, "learning_rate": 9.978958829345904e-06, "loss": 0.4038, "step": 6211 }, { "epoch": 0.6315575437169582, "grad_norm": 0.3341491222381592, "learning_rate": 9.978926293369842e-06, "loss": 0.3792, "step": 6212 }, { "epoch": 0.6316592110614071, "grad_norm": 0.3746196925640106, "learning_rate": 9.978893732311133e-06, "loss": 0.3602, "step": 6213 }, { "epoch": 0.631760878405856, "grad_norm": 0.3885353207588196, "learning_rate": 9.97886114616994e-06, "loss": 0.3791, "step": 6214 }, { "epoch": 0.631862545750305, "grad_norm": 0.35743939876556396, "learning_rate": 9.978828534946428e-06, "loss": 0.3823, "step": 6215 }, { "epoch": 0.6319642130947539, "grad_norm": 0.35648974776268005, "learning_rate": 9.97879589864076e-06, "loss": 0.4142, "step": 6216 }, { "epoch": 0.632065880439203, "grad_norm": 0.37885525822639465, "learning_rate": 9.978763237253102e-06, "loss": 0.4077, "step": 6217 }, { "epoch": 0.6321675477836519, "grad_norm": 0.39139270782470703, "learning_rate": 9.978730550783618e-06, "loss": 0.3898, "step": 6218 }, { "epoch": 0.6322692151281009, "grad_norm": 0.3900725543498993, "learning_rate": 9.978697839232472e-06, "loss": 0.4152, "step": 6219 }, { "epoch": 0.6323708824725498, "grad_norm": 0.35167303681373596, "learning_rate": 9.978665102599831e-06, "loss": 0.4019, "step": 6220 }, { "epoch": 0.6324725498169987, "grad_norm": 0.3976067304611206, "learning_rate": 9.978632340885857e-06, "loss": 0.396, "step": 6221 }, { "epoch": 0.6325742171614478, "grad_norm": 0.3611083924770355, "learning_rate": 9.978599554090718e-06, "loss": 0.3721, "step": 6222 }, { "epoch": 0.6326758845058967, "grad_norm": 0.3445274233818054, "learning_rate": 9.978566742214575e-06, "loss": 0.4037, "step": 6223 }, { "epoch": 0.6327775518503457, "grad_norm": 0.379387229681015, "learning_rate": 9.978533905257597e-06, "loss": 0.4456, "step": 6224 }, { "epoch": 0.6328792191947946, "grad_norm": 0.41676339507102966, "learning_rate": 9.97850104321995e-06, "loss": 0.4001, "step": 6225 }, { "epoch": 0.6329808865392436, "grad_norm": 0.36150991916656494, "learning_rate": 9.978468156101794e-06, "loss": 0.4247, "step": 6226 }, { "epoch": 0.6330825538836926, "grad_norm": 0.3783532679080963, "learning_rate": 9.978435243903301e-06, "loss": 0.4123, "step": 6227 }, { "epoch": 0.6331842212281416, "grad_norm": 0.38458728790283203, "learning_rate": 9.978402306624636e-06, "loss": 0.4205, "step": 6228 }, { "epoch": 0.6332858885725905, "grad_norm": 0.3680011034011841, "learning_rate": 9.97836934426596e-06, "loss": 0.3878, "step": 6229 }, { "epoch": 0.6333875559170394, "grad_norm": 0.36980700492858887, "learning_rate": 9.978336356827444e-06, "loss": 0.4135, "step": 6230 }, { "epoch": 0.6334892232614884, "grad_norm": 0.3868509829044342, "learning_rate": 9.978303344309253e-06, "loss": 0.3597, "step": 6231 }, { "epoch": 0.6335908906059373, "grad_norm": 0.39901798963546753, "learning_rate": 9.97827030671155e-06, "loss": 0.3706, "step": 6232 }, { "epoch": 0.6336925579503864, "grad_norm": 0.33517324924468994, "learning_rate": 9.978237244034507e-06, "loss": 0.3726, "step": 6233 }, { "epoch": 0.6337942252948353, "grad_norm": 0.3812389373779297, "learning_rate": 9.978204156278287e-06, "loss": 0.4371, "step": 6234 }, { "epoch": 0.6338958926392843, "grad_norm": 0.4742456078529358, "learning_rate": 9.978171043443057e-06, "loss": 0.4413, "step": 6235 }, { "epoch": 0.6339975599837332, "grad_norm": 0.38428232073783875, "learning_rate": 9.978137905528985e-06, "loss": 0.3773, "step": 6236 }, { "epoch": 0.6340992273281821, "grad_norm": 0.3607318103313446, "learning_rate": 9.978104742536237e-06, "loss": 0.4111, "step": 6237 }, { "epoch": 0.6342008946726312, "grad_norm": 0.3571888506412506, "learning_rate": 9.978071554464978e-06, "loss": 0.354, "step": 6238 }, { "epoch": 0.6343025620170801, "grad_norm": 0.4399878978729248, "learning_rate": 9.978038341315381e-06, "loss": 0.4099, "step": 6239 }, { "epoch": 0.6344042293615291, "grad_norm": 0.3202328085899353, "learning_rate": 9.97800510308761e-06, "loss": 0.4016, "step": 6240 }, { "epoch": 0.634505896705978, "grad_norm": 0.43166959285736084, "learning_rate": 9.977971839781828e-06, "loss": 0.3703, "step": 6241 }, { "epoch": 0.634607564050427, "grad_norm": 0.4221067726612091, "learning_rate": 9.97793855139821e-06, "loss": 0.406, "step": 6242 }, { "epoch": 0.634709231394876, "grad_norm": 0.3758079707622528, "learning_rate": 9.97790523793692e-06, "loss": 0.4276, "step": 6243 }, { "epoch": 0.634810898739325, "grad_norm": 0.3951112926006317, "learning_rate": 9.977871899398125e-06, "loss": 0.4257, "step": 6244 }, { "epoch": 0.6349125660837739, "grad_norm": 0.4640159606933594, "learning_rate": 9.977838535781997e-06, "loss": 0.3877, "step": 6245 }, { "epoch": 0.6350142334282228, "grad_norm": 0.37857571244239807, "learning_rate": 9.9778051470887e-06, "loss": 0.4229, "step": 6246 }, { "epoch": 0.6351159007726718, "grad_norm": 0.3693452477455139, "learning_rate": 9.977771733318405e-06, "loss": 0.3913, "step": 6247 }, { "epoch": 0.6352175681171208, "grad_norm": 0.39371737837791443, "learning_rate": 9.977738294471275e-06, "loss": 0.3946, "step": 6248 }, { "epoch": 0.6353192354615698, "grad_norm": 0.3641490042209625, "learning_rate": 9.977704830547486e-06, "loss": 0.4063, "step": 6249 }, { "epoch": 0.6354209028060187, "grad_norm": 0.38278087973594666, "learning_rate": 9.977671341547201e-06, "loss": 0.3766, "step": 6250 }, { "epoch": 0.6355225701504676, "grad_norm": 0.4174720346927643, "learning_rate": 9.977637827470591e-06, "loss": 0.4251, "step": 6251 }, { "epoch": 0.6356242374949166, "grad_norm": 0.3442617952823639, "learning_rate": 9.977604288317827e-06, "loss": 0.3974, "step": 6252 }, { "epoch": 0.6357259048393656, "grad_norm": 0.35364624857902527, "learning_rate": 9.977570724089072e-06, "loss": 0.424, "step": 6253 }, { "epoch": 0.6358275721838146, "grad_norm": 0.3562037944793701, "learning_rate": 9.9775371347845e-06, "loss": 0.3998, "step": 6254 }, { "epoch": 0.6359292395282635, "grad_norm": 0.34080183506011963, "learning_rate": 9.977503520404278e-06, "loss": 0.3953, "step": 6255 }, { "epoch": 0.6360309068727125, "grad_norm": 0.33483776450157166, "learning_rate": 9.977469880948578e-06, "loss": 0.4039, "step": 6256 }, { "epoch": 0.6361325742171614, "grad_norm": 0.3782627582550049, "learning_rate": 9.977436216417566e-06, "loss": 0.372, "step": 6257 }, { "epoch": 0.6362342415616105, "grad_norm": 0.3497771620750427, "learning_rate": 9.977402526811412e-06, "loss": 0.397, "step": 6258 }, { "epoch": 0.6363359089060594, "grad_norm": 0.32696419954299927, "learning_rate": 9.97736881213029e-06, "loss": 0.3979, "step": 6259 }, { "epoch": 0.6364375762505083, "grad_norm": 0.33760055899620056, "learning_rate": 9.977335072374365e-06, "loss": 0.3956, "step": 6260 }, { "epoch": 0.6365392435949573, "grad_norm": 0.3552282154560089, "learning_rate": 9.977301307543809e-06, "loss": 0.438, "step": 6261 }, { "epoch": 0.6366409109394062, "grad_norm": 0.3257615268230438, "learning_rate": 9.977267517638792e-06, "loss": 0.3896, "step": 6262 }, { "epoch": 0.6367425782838553, "grad_norm": 0.3178052604198456, "learning_rate": 9.977233702659483e-06, "loss": 0.4089, "step": 6263 }, { "epoch": 0.6368442456283042, "grad_norm": 0.3323442339897156, "learning_rate": 9.977199862606053e-06, "loss": 0.4014, "step": 6264 }, { "epoch": 0.6369459129727532, "grad_norm": 0.35047125816345215, "learning_rate": 9.977165997478675e-06, "loss": 0.3957, "step": 6265 }, { "epoch": 0.6370475803172021, "grad_norm": 0.34143856167793274, "learning_rate": 9.977132107277516e-06, "loss": 0.386, "step": 6266 }, { "epoch": 0.637149247661651, "grad_norm": 0.3557368814945221, "learning_rate": 9.97709819200275e-06, "loss": 0.4249, "step": 6267 }, { "epoch": 0.6372509150061001, "grad_norm": 0.3608747124671936, "learning_rate": 9.977064251654542e-06, "loss": 0.4072, "step": 6268 }, { "epoch": 0.637352582350549, "grad_norm": 0.3378283679485321, "learning_rate": 9.977030286233072e-06, "loss": 0.4002, "step": 6269 }, { "epoch": 0.637454249694998, "grad_norm": 0.33413904905319214, "learning_rate": 9.976996295738503e-06, "loss": 0.4077, "step": 6270 }, { "epoch": 0.6375559170394469, "grad_norm": 0.36248862743377686, "learning_rate": 9.97696228017101e-06, "loss": 0.3851, "step": 6271 }, { "epoch": 0.6376575843838959, "grad_norm": 0.37379321455955505, "learning_rate": 9.976928239530762e-06, "loss": 0.3976, "step": 6272 }, { "epoch": 0.6377592517283448, "grad_norm": 0.3652868866920471, "learning_rate": 9.976894173817935e-06, "loss": 0.4226, "step": 6273 }, { "epoch": 0.6378609190727939, "grad_norm": 0.3674124479293823, "learning_rate": 9.976860083032698e-06, "loss": 0.4118, "step": 6274 }, { "epoch": 0.6379625864172428, "grad_norm": 0.4405622184276581, "learning_rate": 9.97682596717522e-06, "loss": 0.412, "step": 6275 }, { "epoch": 0.6380642537616917, "grad_norm": 0.38785314559936523, "learning_rate": 9.976791826245677e-06, "loss": 0.381, "step": 6276 }, { "epoch": 0.6381659211061407, "grad_norm": 0.39457574486732483, "learning_rate": 9.97675766024424e-06, "loss": 0.4097, "step": 6277 }, { "epoch": 0.6382675884505896, "grad_norm": 0.3939996361732483, "learning_rate": 9.97672346917108e-06, "loss": 0.3913, "step": 6278 }, { "epoch": 0.6383692557950387, "grad_norm": 0.368177205324173, "learning_rate": 9.97668925302637e-06, "loss": 0.3799, "step": 6279 }, { "epoch": 0.6384709231394876, "grad_norm": 0.36308756470680237, "learning_rate": 9.976655011810283e-06, "loss": 0.4061, "step": 6280 }, { "epoch": 0.6385725904839366, "grad_norm": 0.3665761649608612, "learning_rate": 9.976620745522989e-06, "loss": 0.407, "step": 6281 }, { "epoch": 0.6386742578283855, "grad_norm": 0.3549325466156006, "learning_rate": 9.976586454164662e-06, "loss": 0.4206, "step": 6282 }, { "epoch": 0.6387759251728344, "grad_norm": 0.35041338205337524, "learning_rate": 9.976552137735476e-06, "loss": 0.4109, "step": 6283 }, { "epoch": 0.6388775925172835, "grad_norm": 0.33828553557395935, "learning_rate": 9.976517796235604e-06, "loss": 0.4217, "step": 6284 }, { "epoch": 0.6389792598617324, "grad_norm": 0.38442713022232056, "learning_rate": 9.976483429665217e-06, "loss": 0.4047, "step": 6285 }, { "epoch": 0.6390809272061814, "grad_norm": 0.3470490574836731, "learning_rate": 9.976449038024489e-06, "loss": 0.4103, "step": 6286 }, { "epoch": 0.6391825945506303, "grad_norm": 0.3363296389579773, "learning_rate": 9.976414621313593e-06, "loss": 0.4001, "step": 6287 }, { "epoch": 0.6392842618950793, "grad_norm": 0.37502408027648926, "learning_rate": 9.976380179532703e-06, "loss": 0.3958, "step": 6288 }, { "epoch": 0.6393859292395283, "grad_norm": 0.3373505771160126, "learning_rate": 9.976345712681991e-06, "loss": 0.3877, "step": 6289 }, { "epoch": 0.6394875965839772, "grad_norm": 0.3412869870662689, "learning_rate": 9.976311220761634e-06, "loss": 0.4227, "step": 6290 }, { "epoch": 0.6395892639284262, "grad_norm": 0.34551313519477844, "learning_rate": 9.976276703771804e-06, "loss": 0.4166, "step": 6291 }, { "epoch": 0.6396909312728751, "grad_norm": 0.33574211597442627, "learning_rate": 9.976242161712672e-06, "loss": 0.4365, "step": 6292 }, { "epoch": 0.6397925986173241, "grad_norm": 0.31828656792640686, "learning_rate": 9.976207594584417e-06, "loss": 0.3629, "step": 6293 }, { "epoch": 0.6398942659617731, "grad_norm": 0.33863574266433716, "learning_rate": 9.976173002387209e-06, "loss": 0.3918, "step": 6294 }, { "epoch": 0.6399959333062221, "grad_norm": 0.33239883184432983, "learning_rate": 9.976138385121224e-06, "loss": 0.4406, "step": 6295 }, { "epoch": 0.640097600650671, "grad_norm": 0.36734092235565186, "learning_rate": 9.976103742786636e-06, "loss": 0.3725, "step": 6296 }, { "epoch": 0.64019926799512, "grad_norm": 0.33103951811790466, "learning_rate": 9.976069075383621e-06, "loss": 0.3911, "step": 6297 }, { "epoch": 0.6403009353395689, "grad_norm": 0.3729177713394165, "learning_rate": 9.976034382912352e-06, "loss": 0.3974, "step": 6298 }, { "epoch": 0.6404026026840179, "grad_norm": 0.34699615836143494, "learning_rate": 9.975999665373004e-06, "loss": 0.4242, "step": 6299 }, { "epoch": 0.6405042700284669, "grad_norm": 0.35311493277549744, "learning_rate": 9.975964922765754e-06, "loss": 0.3769, "step": 6300 }, { "epoch": 0.6406059373729158, "grad_norm": 0.35642868280410767, "learning_rate": 9.975930155090775e-06, "loss": 0.3932, "step": 6301 }, { "epoch": 0.6407076047173648, "grad_norm": 0.3203379213809967, "learning_rate": 9.975895362348239e-06, "loss": 0.3788, "step": 6302 }, { "epoch": 0.6408092720618137, "grad_norm": 0.3469259738922119, "learning_rate": 9.975860544538326e-06, "loss": 0.3947, "step": 6303 }, { "epoch": 0.6409109394062628, "grad_norm": 0.3078117370605469, "learning_rate": 9.975825701661211e-06, "loss": 0.39, "step": 6304 }, { "epoch": 0.6410126067507117, "grad_norm": 0.40377429127693176, "learning_rate": 9.975790833717069e-06, "loss": 0.3788, "step": 6305 }, { "epoch": 0.6411142740951606, "grad_norm": 0.4235888123512268, "learning_rate": 9.975755940706074e-06, "loss": 0.3605, "step": 6306 }, { "epoch": 0.6412159414396096, "grad_norm": 0.32204991579055786, "learning_rate": 9.975721022628405e-06, "loss": 0.3851, "step": 6307 }, { "epoch": 0.6413176087840585, "grad_norm": 0.4173498749732971, "learning_rate": 9.975686079484232e-06, "loss": 0.4098, "step": 6308 }, { "epoch": 0.6414192761285076, "grad_norm": 0.42287659645080566, "learning_rate": 9.975651111273738e-06, "loss": 0.4023, "step": 6309 }, { "epoch": 0.6415209434729565, "grad_norm": 0.3498070240020752, "learning_rate": 9.975616117997096e-06, "loss": 0.4246, "step": 6310 }, { "epoch": 0.6416226108174055, "grad_norm": 0.33993348479270935, "learning_rate": 9.975581099654481e-06, "loss": 0.3715, "step": 6311 }, { "epoch": 0.6417242781618544, "grad_norm": 0.37223565578460693, "learning_rate": 9.975546056246071e-06, "loss": 0.4029, "step": 6312 }, { "epoch": 0.6418259455063033, "grad_norm": 0.36965054273605347, "learning_rate": 9.975510987772044e-06, "loss": 0.4006, "step": 6313 }, { "epoch": 0.6419276128507524, "grad_norm": 0.3861196041107178, "learning_rate": 9.975475894232574e-06, "loss": 0.4128, "step": 6314 }, { "epoch": 0.6420292801952013, "grad_norm": 0.3555426597595215, "learning_rate": 9.975440775627838e-06, "loss": 0.388, "step": 6315 }, { "epoch": 0.6421309475396503, "grad_norm": 0.38078099489212036, "learning_rate": 9.975405631958015e-06, "loss": 0.3847, "step": 6316 }, { "epoch": 0.6422326148840992, "grad_norm": 0.3726940453052521, "learning_rate": 9.97537046322328e-06, "loss": 0.4227, "step": 6317 }, { "epoch": 0.6423342822285482, "grad_norm": 0.38166573643684387, "learning_rate": 9.975335269423811e-06, "loss": 0.4002, "step": 6318 }, { "epoch": 0.6424359495729971, "grad_norm": 0.34981220960617065, "learning_rate": 9.975300050559785e-06, "loss": 0.4381, "step": 6319 }, { "epoch": 0.6425376169174462, "grad_norm": 0.36659520864486694, "learning_rate": 9.975264806631379e-06, "loss": 0.3758, "step": 6320 }, { "epoch": 0.6426392842618951, "grad_norm": 0.3507065176963806, "learning_rate": 9.975229537638773e-06, "loss": 0.3924, "step": 6321 }, { "epoch": 0.642740951606344, "grad_norm": 0.3173876702785492, "learning_rate": 9.975194243582143e-06, "loss": 0.3841, "step": 6322 }, { "epoch": 0.642842618950793, "grad_norm": 0.3498629033565521, "learning_rate": 9.975158924461665e-06, "loss": 0.3859, "step": 6323 }, { "epoch": 0.6429442862952419, "grad_norm": 0.32153239846229553, "learning_rate": 9.975123580277518e-06, "loss": 0.3986, "step": 6324 }, { "epoch": 0.643045953639691, "grad_norm": 0.35411354899406433, "learning_rate": 9.975088211029884e-06, "loss": 0.4033, "step": 6325 }, { "epoch": 0.6431476209841399, "grad_norm": 0.33154723048210144, "learning_rate": 9.975052816718935e-06, "loss": 0.4426, "step": 6326 }, { "epoch": 0.6432492883285889, "grad_norm": 0.3760357201099396, "learning_rate": 9.975017397344853e-06, "loss": 0.4142, "step": 6327 }, { "epoch": 0.6433509556730378, "grad_norm": 0.34585586190223694, "learning_rate": 9.974981952907817e-06, "loss": 0.3799, "step": 6328 }, { "epoch": 0.6434526230174867, "grad_norm": 0.365315705537796, "learning_rate": 9.974946483408002e-06, "loss": 0.4222, "step": 6329 }, { "epoch": 0.6435542903619358, "grad_norm": 0.35401737689971924, "learning_rate": 9.97491098884559e-06, "loss": 0.4109, "step": 6330 }, { "epoch": 0.6436559577063847, "grad_norm": 0.3388572037220001, "learning_rate": 9.974875469220759e-06, "loss": 0.4038, "step": 6331 }, { "epoch": 0.6437576250508337, "grad_norm": 0.36286136507987976, "learning_rate": 9.974839924533685e-06, "loss": 0.4012, "step": 6332 }, { "epoch": 0.6438592923952826, "grad_norm": 0.32779622077941895, "learning_rate": 9.974804354784553e-06, "loss": 0.3932, "step": 6333 }, { "epoch": 0.6439609597397316, "grad_norm": 0.36207810044288635, "learning_rate": 9.974768759973538e-06, "loss": 0.405, "step": 6334 }, { "epoch": 0.6440626270841806, "grad_norm": 0.3352052867412567, "learning_rate": 9.974733140100818e-06, "loss": 0.3926, "step": 6335 }, { "epoch": 0.6441642944286295, "grad_norm": 0.34267717599868774, "learning_rate": 9.974697495166577e-06, "loss": 0.386, "step": 6336 }, { "epoch": 0.6442659617730785, "grad_norm": 0.31799712777137756, "learning_rate": 9.974661825170993e-06, "loss": 0.3872, "step": 6337 }, { "epoch": 0.6443676291175274, "grad_norm": 0.41054195165634155, "learning_rate": 9.974626130114243e-06, "loss": 0.4178, "step": 6338 }, { "epoch": 0.6444692964619764, "grad_norm": 0.31435057520866394, "learning_rate": 9.974590409996508e-06, "loss": 0.3878, "step": 6339 }, { "epoch": 0.6445709638064254, "grad_norm": 0.362383097410202, "learning_rate": 9.97455466481797e-06, "loss": 0.3902, "step": 6340 }, { "epoch": 0.6446726311508744, "grad_norm": 0.3552999794483185, "learning_rate": 9.974518894578807e-06, "loss": 0.4115, "step": 6341 }, { "epoch": 0.6447742984953233, "grad_norm": 0.38162896037101746, "learning_rate": 9.974483099279201e-06, "loss": 0.3869, "step": 6342 }, { "epoch": 0.6448759658397722, "grad_norm": 0.32505446672439575, "learning_rate": 9.974447278919331e-06, "loss": 0.3855, "step": 6343 }, { "epoch": 0.6449776331842212, "grad_norm": 0.3774759769439697, "learning_rate": 9.974411433499378e-06, "loss": 0.4177, "step": 6344 }, { "epoch": 0.6450793005286702, "grad_norm": 0.32891127467155457, "learning_rate": 9.974375563019521e-06, "loss": 0.3814, "step": 6345 }, { "epoch": 0.6451809678731192, "grad_norm": 0.34531694650650024, "learning_rate": 9.974339667479942e-06, "loss": 0.3714, "step": 6346 }, { "epoch": 0.6452826352175681, "grad_norm": 0.32749149203300476, "learning_rate": 9.974303746880822e-06, "loss": 0.3846, "step": 6347 }, { "epoch": 0.6453843025620171, "grad_norm": 0.3743855953216553, "learning_rate": 9.974267801222344e-06, "loss": 0.4209, "step": 6348 }, { "epoch": 0.645485969906466, "grad_norm": 0.4089248776435852, "learning_rate": 9.974231830504683e-06, "loss": 0.3992, "step": 6349 }, { "epoch": 0.6455876372509151, "grad_norm": 0.3337291181087494, "learning_rate": 9.974195834728028e-06, "loss": 0.3881, "step": 6350 }, { "epoch": 0.645689304595364, "grad_norm": 0.36859554052352905, "learning_rate": 9.974159813892554e-06, "loss": 0.3824, "step": 6351 }, { "epoch": 0.6457909719398129, "grad_norm": 0.38755735754966736, "learning_rate": 9.974123767998445e-06, "loss": 0.3957, "step": 6352 }, { "epoch": 0.6458926392842619, "grad_norm": 0.3640406131744385, "learning_rate": 9.974087697045885e-06, "loss": 0.3891, "step": 6353 }, { "epoch": 0.6459943066287108, "grad_norm": 0.3554825186729431, "learning_rate": 9.974051601035051e-06, "loss": 0.3867, "step": 6354 }, { "epoch": 0.6460959739731599, "grad_norm": 0.41200703382492065, "learning_rate": 9.974015479966126e-06, "loss": 0.4305, "step": 6355 }, { "epoch": 0.6461976413176088, "grad_norm": 0.38852086663246155, "learning_rate": 9.973979333839294e-06, "loss": 0.402, "step": 6356 }, { "epoch": 0.6462993086620578, "grad_norm": 0.3858267664909363, "learning_rate": 9.973943162654738e-06, "loss": 0.4171, "step": 6357 }, { "epoch": 0.6464009760065067, "grad_norm": 0.32148823142051697, "learning_rate": 9.973906966412637e-06, "loss": 0.3898, "step": 6358 }, { "epoch": 0.6465026433509556, "grad_norm": 0.32169008255004883, "learning_rate": 9.973870745113175e-06, "loss": 0.3708, "step": 6359 }, { "epoch": 0.6466043106954046, "grad_norm": 0.36658817529678345, "learning_rate": 9.973834498756532e-06, "loss": 0.4189, "step": 6360 }, { "epoch": 0.6467059780398536, "grad_norm": 0.389545738697052, "learning_rate": 9.973798227342895e-06, "loss": 0.4378, "step": 6361 }, { "epoch": 0.6468076453843026, "grad_norm": 0.3382219672203064, "learning_rate": 9.973761930872444e-06, "loss": 0.3926, "step": 6362 }, { "epoch": 0.6469093127287515, "grad_norm": 0.3342868387699127, "learning_rate": 9.973725609345362e-06, "loss": 0.4017, "step": 6363 }, { "epoch": 0.6470109800732005, "grad_norm": 0.3648550510406494, "learning_rate": 9.973689262761832e-06, "loss": 0.3941, "step": 6364 }, { "epoch": 0.6471126474176494, "grad_norm": 0.3115599453449249, "learning_rate": 9.973652891122038e-06, "loss": 0.382, "step": 6365 }, { "epoch": 0.6472143147620985, "grad_norm": 0.37556907534599304, "learning_rate": 9.973616494426163e-06, "loss": 0.4104, "step": 6366 }, { "epoch": 0.6473159821065474, "grad_norm": 0.32148846983909607, "learning_rate": 9.973580072674391e-06, "loss": 0.3723, "step": 6367 }, { "epoch": 0.6474176494509963, "grad_norm": 0.3759932816028595, "learning_rate": 9.973543625866901e-06, "loss": 0.4154, "step": 6368 }, { "epoch": 0.6475193167954453, "grad_norm": 0.34181299805641174, "learning_rate": 9.973507154003883e-06, "loss": 0.3883, "step": 6369 }, { "epoch": 0.6476209841398942, "grad_norm": 0.3427775204181671, "learning_rate": 9.973470657085517e-06, "loss": 0.371, "step": 6370 }, { "epoch": 0.6477226514843433, "grad_norm": 0.328730970621109, "learning_rate": 9.973434135111989e-06, "loss": 0.4157, "step": 6371 }, { "epoch": 0.6478243188287922, "grad_norm": 0.351392924785614, "learning_rate": 9.97339758808348e-06, "loss": 0.4272, "step": 6372 }, { "epoch": 0.6479259861732412, "grad_norm": 0.3970756232738495, "learning_rate": 9.973361016000176e-06, "loss": 0.3898, "step": 6373 }, { "epoch": 0.6480276535176901, "grad_norm": 0.3528008759021759, "learning_rate": 9.973324418862262e-06, "loss": 0.4345, "step": 6374 }, { "epoch": 0.648129320862139, "grad_norm": 0.3321060836315155, "learning_rate": 9.973287796669921e-06, "loss": 0.4135, "step": 6375 }, { "epoch": 0.6482309882065881, "grad_norm": 0.3728143870830536, "learning_rate": 9.973251149423338e-06, "loss": 0.4151, "step": 6376 }, { "epoch": 0.648332655551037, "grad_norm": 0.3428548276424408, "learning_rate": 9.973214477122697e-06, "loss": 0.3973, "step": 6377 }, { "epoch": 0.648434322895486, "grad_norm": 0.3578060567378998, "learning_rate": 9.973177779768184e-06, "loss": 0.4077, "step": 6378 }, { "epoch": 0.6485359902399349, "grad_norm": 0.34721824526786804, "learning_rate": 9.973141057359984e-06, "loss": 0.401, "step": 6379 }, { "epoch": 0.6486376575843839, "grad_norm": 0.3848171532154083, "learning_rate": 9.973104309898278e-06, "loss": 0.3972, "step": 6380 }, { "epoch": 0.6487393249288329, "grad_norm": 0.3669479191303253, "learning_rate": 9.973067537383256e-06, "loss": 0.3783, "step": 6381 }, { "epoch": 0.6488409922732818, "grad_norm": 0.3760349452495575, "learning_rate": 9.973030739815101e-06, "loss": 0.3849, "step": 6382 }, { "epoch": 0.6489426596177308, "grad_norm": 0.404228538274765, "learning_rate": 9.972993917194001e-06, "loss": 0.4361, "step": 6383 }, { "epoch": 0.6490443269621797, "grad_norm": 0.3288978338241577, "learning_rate": 9.972957069520138e-06, "loss": 0.3987, "step": 6384 }, { "epoch": 0.6491459943066287, "grad_norm": 0.4034995436668396, "learning_rate": 9.972920196793698e-06, "loss": 0.4127, "step": 6385 }, { "epoch": 0.6492476616510777, "grad_norm": 0.3534364104270935, "learning_rate": 9.97288329901487e-06, "loss": 0.4035, "step": 6386 }, { "epoch": 0.6493493289955267, "grad_norm": 0.35928112268447876, "learning_rate": 9.972846376183835e-06, "loss": 0.3776, "step": 6387 }, { "epoch": 0.6494509963399756, "grad_norm": 0.3478681147098541, "learning_rate": 9.972809428300785e-06, "loss": 0.4141, "step": 6388 }, { "epoch": 0.6495526636844245, "grad_norm": 0.3242185711860657, "learning_rate": 9.9727724553659e-06, "loss": 0.3682, "step": 6389 }, { "epoch": 0.6496543310288735, "grad_norm": 0.35941460728645325, "learning_rate": 9.97273545737937e-06, "loss": 0.4059, "step": 6390 }, { "epoch": 0.6497559983733225, "grad_norm": 0.340923935174942, "learning_rate": 9.972698434341382e-06, "loss": 0.3747, "step": 6391 }, { "epoch": 0.6498576657177715, "grad_norm": 0.3461742699146271, "learning_rate": 9.972661386252118e-06, "loss": 0.3673, "step": 6392 }, { "epoch": 0.6499593330622204, "grad_norm": 0.3839455842971802, "learning_rate": 9.972624313111769e-06, "loss": 0.4158, "step": 6393 }, { "epoch": 0.6500610004066694, "grad_norm": 0.35782909393310547, "learning_rate": 9.972587214920522e-06, "loss": 0.383, "step": 6394 }, { "epoch": 0.6501626677511183, "grad_norm": 0.3661976158618927, "learning_rate": 9.97255009167856e-06, "loss": 0.4324, "step": 6395 }, { "epoch": 0.6502643350955674, "grad_norm": 0.3510184586048126, "learning_rate": 9.972512943386072e-06, "loss": 0.4002, "step": 6396 }, { "epoch": 0.6503660024400163, "grad_norm": 0.35025298595428467, "learning_rate": 9.972475770043248e-06, "loss": 0.393, "step": 6397 }, { "epoch": 0.6504676697844652, "grad_norm": 0.3333870768547058, "learning_rate": 9.972438571650271e-06, "loss": 0.4019, "step": 6398 }, { "epoch": 0.6505693371289142, "grad_norm": 0.3464075028896332, "learning_rate": 9.97240134820733e-06, "loss": 0.4075, "step": 6399 }, { "epoch": 0.6506710044733631, "grad_norm": 0.3617277443408966, "learning_rate": 9.972364099714614e-06, "loss": 0.4067, "step": 6400 }, { "epoch": 0.6507726718178121, "grad_norm": 0.33979490399360657, "learning_rate": 9.972326826172307e-06, "loss": 0.3979, "step": 6401 }, { "epoch": 0.6508743391622611, "grad_norm": 0.3374578058719635, "learning_rate": 9.972289527580601e-06, "loss": 0.3908, "step": 6402 }, { "epoch": 0.6509760065067101, "grad_norm": 0.3608293831348419, "learning_rate": 9.97225220393968e-06, "loss": 0.4006, "step": 6403 }, { "epoch": 0.651077673851159, "grad_norm": 0.3343573212623596, "learning_rate": 9.972214855249737e-06, "loss": 0.3773, "step": 6404 }, { "epoch": 0.6511793411956079, "grad_norm": 0.3714580237865448, "learning_rate": 9.972177481510956e-06, "loss": 0.4269, "step": 6405 }, { "epoch": 0.6512810085400569, "grad_norm": 0.3516184389591217, "learning_rate": 9.972140082723525e-06, "loss": 0.4396, "step": 6406 }, { "epoch": 0.6513826758845059, "grad_norm": 0.3378244936466217, "learning_rate": 9.972102658887635e-06, "loss": 0.4055, "step": 6407 }, { "epoch": 0.6514843432289549, "grad_norm": 0.37598666548728943, "learning_rate": 9.972065210003472e-06, "loss": 0.3799, "step": 6408 }, { "epoch": 0.6515860105734038, "grad_norm": 0.3411756157875061, "learning_rate": 9.972027736071229e-06, "loss": 0.3621, "step": 6409 }, { "epoch": 0.6516876779178528, "grad_norm": 0.3653741180896759, "learning_rate": 9.97199023709109e-06, "loss": 0.4132, "step": 6410 }, { "epoch": 0.6517893452623017, "grad_norm": 0.35731297731399536, "learning_rate": 9.971952713063244e-06, "loss": 0.391, "step": 6411 }, { "epoch": 0.6518910126067508, "grad_norm": 0.3696269392967224, "learning_rate": 9.971915163987884e-06, "loss": 0.3881, "step": 6412 }, { "epoch": 0.6519926799511997, "grad_norm": 0.36359885334968567, "learning_rate": 9.971877589865195e-06, "loss": 0.383, "step": 6413 }, { "epoch": 0.6520943472956486, "grad_norm": 0.32351186871528625, "learning_rate": 9.97183999069537e-06, "loss": 0.3964, "step": 6414 }, { "epoch": 0.6521960146400976, "grad_norm": 0.3492833077907562, "learning_rate": 9.971802366478595e-06, "loss": 0.4125, "step": 6415 }, { "epoch": 0.6522976819845465, "grad_norm": 0.36668550968170166, "learning_rate": 9.971764717215063e-06, "loss": 0.3977, "step": 6416 }, { "epoch": 0.6523993493289956, "grad_norm": 0.36046814918518066, "learning_rate": 9.971727042904962e-06, "loss": 0.3892, "step": 6417 }, { "epoch": 0.6525010166734445, "grad_norm": 0.37879982590675354, "learning_rate": 9.97168934354848e-06, "loss": 0.4175, "step": 6418 }, { "epoch": 0.6526026840178935, "grad_norm": 0.33444419503211975, "learning_rate": 9.97165161914581e-06, "loss": 0.4001, "step": 6419 }, { "epoch": 0.6527043513623424, "grad_norm": 0.3352876305580139, "learning_rate": 9.971613869697138e-06, "loss": 0.3924, "step": 6420 }, { "epoch": 0.6528060187067913, "grad_norm": 0.3472079932689667, "learning_rate": 9.97157609520266e-06, "loss": 0.4171, "step": 6421 }, { "epoch": 0.6529076860512404, "grad_norm": 0.3507058620452881, "learning_rate": 9.971538295662561e-06, "loss": 0.3931, "step": 6422 }, { "epoch": 0.6530093533956893, "grad_norm": 0.3508952856063843, "learning_rate": 9.971500471077034e-06, "loss": 0.392, "step": 6423 }, { "epoch": 0.6531110207401383, "grad_norm": 0.35753676295280457, "learning_rate": 9.97146262144627e-06, "loss": 0.3838, "step": 6424 }, { "epoch": 0.6532126880845872, "grad_norm": 0.35137519240379333, "learning_rate": 9.971424746770457e-06, "loss": 0.4031, "step": 6425 }, { "epoch": 0.6533143554290362, "grad_norm": 0.3505471646785736, "learning_rate": 9.971386847049789e-06, "loss": 0.3876, "step": 6426 }, { "epoch": 0.6534160227734852, "grad_norm": 0.36154061555862427, "learning_rate": 9.971348922284454e-06, "loss": 0.3952, "step": 6427 }, { "epoch": 0.6535176901179341, "grad_norm": 0.3815355896949768, "learning_rate": 9.971310972474645e-06, "loss": 0.4248, "step": 6428 }, { "epoch": 0.6536193574623831, "grad_norm": 0.3781321346759796, "learning_rate": 9.971272997620554e-06, "loss": 0.4088, "step": 6429 }, { "epoch": 0.653721024806832, "grad_norm": 0.3413349688053131, "learning_rate": 9.97123499772237e-06, "loss": 0.3725, "step": 6430 }, { "epoch": 0.653822692151281, "grad_norm": 0.39564651250839233, "learning_rate": 9.971196972780287e-06, "loss": 0.4009, "step": 6431 }, { "epoch": 0.65392435949573, "grad_norm": 0.34854093194007874, "learning_rate": 9.971158922794492e-06, "loss": 0.3787, "step": 6432 }, { "epoch": 0.654026026840179, "grad_norm": 0.34427976608276367, "learning_rate": 9.971120847765182e-06, "loss": 0.403, "step": 6433 }, { "epoch": 0.6541276941846279, "grad_norm": 0.4007054567337036, "learning_rate": 9.971082747692545e-06, "loss": 0.3601, "step": 6434 }, { "epoch": 0.6542293615290768, "grad_norm": 0.3424345552921295, "learning_rate": 9.971044622576774e-06, "loss": 0.3929, "step": 6435 }, { "epoch": 0.6543310288735258, "grad_norm": 0.3550408184528351, "learning_rate": 9.971006472418064e-06, "loss": 0.3754, "step": 6436 }, { "epoch": 0.6544326962179748, "grad_norm": 0.3588477075099945, "learning_rate": 9.970968297216603e-06, "loss": 0.384, "step": 6437 }, { "epoch": 0.6545343635624238, "grad_norm": 0.3712683320045471, "learning_rate": 9.970930096972584e-06, "loss": 0.4168, "step": 6438 }, { "epoch": 0.6546360309068727, "grad_norm": 0.3301350176334381, "learning_rate": 9.970891871686202e-06, "loss": 0.4083, "step": 6439 }, { "epoch": 0.6547376982513217, "grad_norm": 0.35669752955436707, "learning_rate": 9.970853621357649e-06, "loss": 0.4118, "step": 6440 }, { "epoch": 0.6548393655957706, "grad_norm": 0.39323559403419495, "learning_rate": 9.970815345987115e-06, "loss": 0.4251, "step": 6441 }, { "epoch": 0.6549410329402195, "grad_norm": 0.3624630272388458, "learning_rate": 9.970777045574794e-06, "loss": 0.385, "step": 6442 }, { "epoch": 0.6550427002846686, "grad_norm": 0.3580881655216217, "learning_rate": 9.970738720120881e-06, "loss": 0.3701, "step": 6443 }, { "epoch": 0.6551443676291175, "grad_norm": 0.39267221093177795, "learning_rate": 9.970700369625565e-06, "loss": 0.3866, "step": 6444 }, { "epoch": 0.6552460349735665, "grad_norm": 0.3332422077655792, "learning_rate": 9.970661994089045e-06, "loss": 0.368, "step": 6445 }, { "epoch": 0.6553477023180154, "grad_norm": 0.380829393863678, "learning_rate": 9.97062359351151e-06, "loss": 0.4415, "step": 6446 }, { "epoch": 0.6554493696624644, "grad_norm": 0.36086615920066833, "learning_rate": 9.970585167893155e-06, "loss": 0.3948, "step": 6447 }, { "epoch": 0.6555510370069134, "grad_norm": 0.41217729449272156, "learning_rate": 9.97054671723417e-06, "loss": 0.4065, "step": 6448 }, { "epoch": 0.6556527043513624, "grad_norm": 0.4001385569572449, "learning_rate": 9.970508241534756e-06, "loss": 0.4128, "step": 6449 }, { "epoch": 0.6557543716958113, "grad_norm": 0.3473961651325226, "learning_rate": 9.970469740795101e-06, "loss": 0.4014, "step": 6450 }, { "epoch": 0.6558560390402602, "grad_norm": 0.36866769194602966, "learning_rate": 9.9704312150154e-06, "loss": 0.3877, "step": 6451 }, { "epoch": 0.6559577063847092, "grad_norm": 0.42954060435295105, "learning_rate": 9.97039266419585e-06, "loss": 0.4177, "step": 6452 }, { "epoch": 0.6560593737291582, "grad_norm": 0.31232860684394836, "learning_rate": 9.97035408833664e-06, "loss": 0.4291, "step": 6453 }, { "epoch": 0.6561610410736072, "grad_norm": 0.3478686809539795, "learning_rate": 9.970315487437969e-06, "loss": 0.425, "step": 6454 }, { "epoch": 0.6562627084180561, "grad_norm": 0.4425963759422302, "learning_rate": 9.970276861500027e-06, "loss": 0.3959, "step": 6455 }, { "epoch": 0.6563643757625051, "grad_norm": 0.3733263611793518, "learning_rate": 9.970238210523014e-06, "loss": 0.3834, "step": 6456 }, { "epoch": 0.656466043106954, "grad_norm": 0.3304051160812378, "learning_rate": 9.970199534507121e-06, "loss": 0.371, "step": 6457 }, { "epoch": 0.656567710451403, "grad_norm": 0.3596805930137634, "learning_rate": 9.970160833452545e-06, "loss": 0.3902, "step": 6458 }, { "epoch": 0.656669377795852, "grad_norm": 0.39418986439704895, "learning_rate": 9.97012210735948e-06, "loss": 0.4166, "step": 6459 }, { "epoch": 0.6567710451403009, "grad_norm": 0.37700483202934265, "learning_rate": 9.97008335622812e-06, "loss": 0.4166, "step": 6460 }, { "epoch": 0.6568727124847499, "grad_norm": 0.3510833978652954, "learning_rate": 9.97004458005866e-06, "loss": 0.3913, "step": 6461 }, { "epoch": 0.6569743798291988, "grad_norm": 0.3718253970146179, "learning_rate": 9.970005778851298e-06, "loss": 0.4337, "step": 6462 }, { "epoch": 0.6570760471736479, "grad_norm": 0.350323349237442, "learning_rate": 9.969966952606227e-06, "loss": 0.3772, "step": 6463 }, { "epoch": 0.6571777145180968, "grad_norm": 0.3443329930305481, "learning_rate": 9.969928101323644e-06, "loss": 0.3868, "step": 6464 }, { "epoch": 0.6572793818625458, "grad_norm": 0.37441369891166687, "learning_rate": 9.969889225003744e-06, "loss": 0.3713, "step": 6465 }, { "epoch": 0.6573810492069947, "grad_norm": 0.3828420341014862, "learning_rate": 9.969850323646724e-06, "loss": 0.3948, "step": 6466 }, { "epoch": 0.6574827165514436, "grad_norm": 0.3387364447116852, "learning_rate": 9.969811397252778e-06, "loss": 0.409, "step": 6467 }, { "epoch": 0.6575843838958927, "grad_norm": 0.37470248341560364, "learning_rate": 9.969772445822103e-06, "loss": 0.4014, "step": 6468 }, { "epoch": 0.6576860512403416, "grad_norm": 0.39523157477378845, "learning_rate": 9.969733469354896e-06, "loss": 0.3629, "step": 6469 }, { "epoch": 0.6577877185847906, "grad_norm": 0.3590283691883087, "learning_rate": 9.969694467851354e-06, "loss": 0.3818, "step": 6470 }, { "epoch": 0.6578893859292395, "grad_norm": 0.3547772169113159, "learning_rate": 9.969655441311671e-06, "loss": 0.3986, "step": 6471 }, { "epoch": 0.6579910532736885, "grad_norm": 0.368694007396698, "learning_rate": 9.969616389736045e-06, "loss": 0.4189, "step": 6472 }, { "epoch": 0.6580927206181375, "grad_norm": 0.34574687480926514, "learning_rate": 9.969577313124673e-06, "loss": 0.4162, "step": 6473 }, { "epoch": 0.6581943879625864, "grad_norm": 0.3854506015777588, "learning_rate": 9.96953821147775e-06, "loss": 0.3985, "step": 6474 }, { "epoch": 0.6582960553070354, "grad_norm": 0.3573242127895355, "learning_rate": 9.969499084795475e-06, "loss": 0.4246, "step": 6475 }, { "epoch": 0.6583977226514843, "grad_norm": 0.36489802598953247, "learning_rate": 9.969459933078045e-06, "loss": 0.4326, "step": 6476 }, { "epoch": 0.6584993899959333, "grad_norm": 0.373167484998703, "learning_rate": 9.969420756325657e-06, "loss": 0.397, "step": 6477 }, { "epoch": 0.6586010573403823, "grad_norm": 0.38278087973594666, "learning_rate": 9.96938155453851e-06, "loss": 0.3963, "step": 6478 }, { "epoch": 0.6587027246848313, "grad_norm": 0.3642929792404175, "learning_rate": 9.969342327716796e-06, "loss": 0.4004, "step": 6479 }, { "epoch": 0.6588043920292802, "grad_norm": 0.403598815202713, "learning_rate": 9.969303075860717e-06, "loss": 0.409, "step": 6480 }, { "epoch": 0.6589060593737291, "grad_norm": 0.41587182879447937, "learning_rate": 9.969263798970472e-06, "loss": 0.4026, "step": 6481 }, { "epoch": 0.6590077267181781, "grad_norm": 0.4044373631477356, "learning_rate": 9.969224497046255e-06, "loss": 0.3767, "step": 6482 }, { "epoch": 0.659109394062627, "grad_norm": 0.4250517189502716, "learning_rate": 9.969185170088266e-06, "loss": 0.4296, "step": 6483 }, { "epoch": 0.6592110614070761, "grad_norm": 0.4376981854438782, "learning_rate": 9.969145818096703e-06, "loss": 0.3997, "step": 6484 }, { "epoch": 0.659312728751525, "grad_norm": 0.44865602254867554, "learning_rate": 9.969106441071766e-06, "loss": 0.4011, "step": 6485 }, { "epoch": 0.659414396095974, "grad_norm": 0.4021470844745636, "learning_rate": 9.96906703901365e-06, "loss": 0.3854, "step": 6486 }, { "epoch": 0.6595160634404229, "grad_norm": 0.405600905418396, "learning_rate": 9.969027611922555e-06, "loss": 0.4233, "step": 6487 }, { "epoch": 0.6596177307848718, "grad_norm": 0.4239184558391571, "learning_rate": 9.96898815979868e-06, "loss": 0.3897, "step": 6488 }, { "epoch": 0.6597193981293209, "grad_norm": 0.35682055354118347, "learning_rate": 9.968948682642223e-06, "loss": 0.4047, "step": 6489 }, { "epoch": 0.6598210654737698, "grad_norm": 0.391137957572937, "learning_rate": 9.968909180453385e-06, "loss": 0.4317, "step": 6490 }, { "epoch": 0.6599227328182188, "grad_norm": 0.44489508867263794, "learning_rate": 9.968869653232362e-06, "loss": 0.4082, "step": 6491 }, { "epoch": 0.6600244001626677, "grad_norm": 0.36724793910980225, "learning_rate": 9.968830100979354e-06, "loss": 0.3888, "step": 6492 }, { "epoch": 0.6601260675071167, "grad_norm": 0.41886773705482483, "learning_rate": 9.968790523694563e-06, "loss": 0.4408, "step": 6493 }, { "epoch": 0.6602277348515657, "grad_norm": 0.41451719403266907, "learning_rate": 9.968750921378183e-06, "loss": 0.3797, "step": 6494 }, { "epoch": 0.6603294021960147, "grad_norm": 0.37209999561309814, "learning_rate": 9.968711294030418e-06, "loss": 0.3972, "step": 6495 }, { "epoch": 0.6604310695404636, "grad_norm": 0.37083178758621216, "learning_rate": 9.968671641651466e-06, "loss": 0.39, "step": 6496 }, { "epoch": 0.6605327368849125, "grad_norm": 0.4143437147140503, "learning_rate": 9.968631964241529e-06, "loss": 0.3869, "step": 6497 }, { "epoch": 0.6606344042293615, "grad_norm": 0.40276655554771423, "learning_rate": 9.968592261800802e-06, "loss": 0.4278, "step": 6498 }, { "epoch": 0.6607360715738105, "grad_norm": 0.3830623924732208, "learning_rate": 9.96855253432949e-06, "loss": 0.3981, "step": 6499 }, { "epoch": 0.6608377389182595, "grad_norm": 0.386135071516037, "learning_rate": 9.968512781827791e-06, "loss": 0.3871, "step": 6500 }, { "epoch": 0.6609394062627084, "grad_norm": 0.34513553977012634, "learning_rate": 9.968473004295904e-06, "loss": 0.403, "step": 6501 }, { "epoch": 0.6610410736071574, "grad_norm": 0.4341296851634979, "learning_rate": 9.968433201734032e-06, "loss": 0.4194, "step": 6502 }, { "epoch": 0.6611427409516063, "grad_norm": 0.3494955897331238, "learning_rate": 9.968393374142373e-06, "loss": 0.3922, "step": 6503 }, { "epoch": 0.6612444082960554, "grad_norm": 0.3449326455593109, "learning_rate": 9.968353521521127e-06, "loss": 0.4037, "step": 6504 }, { "epoch": 0.6613460756405043, "grad_norm": 0.37791189551353455, "learning_rate": 9.9683136438705e-06, "loss": 0.3781, "step": 6505 }, { "epoch": 0.6614477429849532, "grad_norm": 0.3499926030635834, "learning_rate": 9.968273741190689e-06, "loss": 0.4018, "step": 6506 }, { "epoch": 0.6615494103294022, "grad_norm": 0.33864301443099976, "learning_rate": 9.968233813481895e-06, "loss": 0.4038, "step": 6507 }, { "epoch": 0.6616510776738511, "grad_norm": 0.3266954720020294, "learning_rate": 9.96819386074432e-06, "loss": 0.4056, "step": 6508 }, { "epoch": 0.6617527450183002, "grad_norm": 0.33839425444602966, "learning_rate": 9.968153882978165e-06, "loss": 0.3893, "step": 6509 }, { "epoch": 0.6618544123627491, "grad_norm": 0.3497769832611084, "learning_rate": 9.968113880183629e-06, "loss": 0.4169, "step": 6510 }, { "epoch": 0.661956079707198, "grad_norm": 0.34565985202789307, "learning_rate": 9.968073852360918e-06, "loss": 0.3699, "step": 6511 }, { "epoch": 0.662057747051647, "grad_norm": 0.3651254177093506, "learning_rate": 9.968033799510234e-06, "loss": 0.3772, "step": 6512 }, { "epoch": 0.6621594143960959, "grad_norm": 0.356292724609375, "learning_rate": 9.967993721631772e-06, "loss": 0.3749, "step": 6513 }, { "epoch": 0.662261081740545, "grad_norm": 0.30239349603652954, "learning_rate": 9.96795361872574e-06, "loss": 0.3933, "step": 6514 }, { "epoch": 0.6623627490849939, "grad_norm": 0.3714100420475006, "learning_rate": 9.967913490792339e-06, "loss": 0.3809, "step": 6515 }, { "epoch": 0.6624644164294429, "grad_norm": 0.39225825667381287, "learning_rate": 9.967873337831769e-06, "loss": 0.4094, "step": 6516 }, { "epoch": 0.6625660837738918, "grad_norm": 0.3387155532836914, "learning_rate": 9.967833159844233e-06, "loss": 0.3824, "step": 6517 }, { "epoch": 0.6626677511183408, "grad_norm": 0.33319658041000366, "learning_rate": 9.967792956829936e-06, "loss": 0.402, "step": 6518 }, { "epoch": 0.6627694184627898, "grad_norm": 0.37454262375831604, "learning_rate": 9.967752728789077e-06, "loss": 0.4159, "step": 6519 }, { "epoch": 0.6628710858072387, "grad_norm": 0.40704554319381714, "learning_rate": 9.967712475721863e-06, "loss": 0.3929, "step": 6520 }, { "epoch": 0.6629727531516877, "grad_norm": 0.3894556164741516, "learning_rate": 9.96767219762849e-06, "loss": 0.4246, "step": 6521 }, { "epoch": 0.6630744204961366, "grad_norm": 0.3589736223220825, "learning_rate": 9.967631894509168e-06, "loss": 0.382, "step": 6522 }, { "epoch": 0.6631760878405856, "grad_norm": 0.3961525559425354, "learning_rate": 9.967591566364097e-06, "loss": 0.4505, "step": 6523 }, { "epoch": 0.6632777551850345, "grad_norm": 0.3726254105567932, "learning_rate": 9.967551213193478e-06, "loss": 0.3703, "step": 6524 }, { "epoch": 0.6633794225294836, "grad_norm": 0.3852754831314087, "learning_rate": 9.967510834997518e-06, "loss": 0.3723, "step": 6525 }, { "epoch": 0.6634810898739325, "grad_norm": 0.3745759129524231, "learning_rate": 9.967470431776419e-06, "loss": 0.4087, "step": 6526 }, { "epoch": 0.6635827572183814, "grad_norm": 0.36657312512397766, "learning_rate": 9.967430003530385e-06, "loss": 0.368, "step": 6527 }, { "epoch": 0.6636844245628304, "grad_norm": 0.321811318397522, "learning_rate": 9.967389550259618e-06, "loss": 0.3806, "step": 6528 }, { "epoch": 0.6637860919072793, "grad_norm": 0.3587932884693146, "learning_rate": 9.967349071964323e-06, "loss": 0.446, "step": 6529 }, { "epoch": 0.6638877592517284, "grad_norm": 0.33498233556747437, "learning_rate": 9.967308568644704e-06, "loss": 0.4006, "step": 6530 }, { "epoch": 0.6639894265961773, "grad_norm": 0.3773192763328552, "learning_rate": 9.967268040300963e-06, "loss": 0.3665, "step": 6531 }, { "epoch": 0.6640910939406263, "grad_norm": 0.36567309498786926, "learning_rate": 9.96722748693331e-06, "loss": 0.4012, "step": 6532 }, { "epoch": 0.6641927612850752, "grad_norm": 0.36139893531799316, "learning_rate": 9.967186908541942e-06, "loss": 0.3937, "step": 6533 }, { "epoch": 0.6642944286295241, "grad_norm": 0.3380800187587738, "learning_rate": 9.967146305127067e-06, "loss": 0.3824, "step": 6534 }, { "epoch": 0.6643960959739732, "grad_norm": 0.34799692034721375, "learning_rate": 9.96710567668889e-06, "loss": 0.3947, "step": 6535 }, { "epoch": 0.6644977633184221, "grad_norm": 0.36465218663215637, "learning_rate": 9.967065023227616e-06, "loss": 0.4165, "step": 6536 }, { "epoch": 0.6645994306628711, "grad_norm": 0.3688034415245056, "learning_rate": 9.967024344743448e-06, "loss": 0.4355, "step": 6537 }, { "epoch": 0.66470109800732, "grad_norm": 0.3641163408756256, "learning_rate": 9.966983641236592e-06, "loss": 0.4214, "step": 6538 }, { "epoch": 0.664802765351769, "grad_norm": 0.39988094568252563, "learning_rate": 9.966942912707251e-06, "loss": 0.3838, "step": 6539 }, { "epoch": 0.664904432696218, "grad_norm": 0.35301244258880615, "learning_rate": 9.966902159155634e-06, "loss": 0.401, "step": 6540 }, { "epoch": 0.665006100040667, "grad_norm": 0.37583738565444946, "learning_rate": 9.966861380581943e-06, "loss": 0.4127, "step": 6541 }, { "epoch": 0.6651077673851159, "grad_norm": 0.4099099040031433, "learning_rate": 9.966820576986384e-06, "loss": 0.3908, "step": 6542 }, { "epoch": 0.6652094347295648, "grad_norm": 0.34584054350852966, "learning_rate": 9.966779748369166e-06, "loss": 0.3869, "step": 6543 }, { "epoch": 0.6653111020740138, "grad_norm": 0.35908856987953186, "learning_rate": 9.96673889473049e-06, "loss": 0.4087, "step": 6544 }, { "epoch": 0.6654127694184628, "grad_norm": 0.41831275820732117, "learning_rate": 9.966698016070563e-06, "loss": 0.3807, "step": 6545 }, { "epoch": 0.6655144367629118, "grad_norm": 0.42312753200531006, "learning_rate": 9.966657112389593e-06, "loss": 0.3849, "step": 6546 }, { "epoch": 0.6656161041073607, "grad_norm": 0.38422131538391113, "learning_rate": 9.966616183687784e-06, "loss": 0.4467, "step": 6547 }, { "epoch": 0.6657177714518097, "grad_norm": 0.39552950859069824, "learning_rate": 9.966575229965343e-06, "loss": 0.3624, "step": 6548 }, { "epoch": 0.6658194387962586, "grad_norm": 0.382544606924057, "learning_rate": 9.966534251222477e-06, "loss": 0.3911, "step": 6549 }, { "epoch": 0.6659211061407077, "grad_norm": 0.35634058713912964, "learning_rate": 9.96649324745939e-06, "loss": 0.406, "step": 6550 }, { "epoch": 0.6660227734851566, "grad_norm": 0.370991587638855, "learning_rate": 9.966452218676293e-06, "loss": 0.3908, "step": 6551 }, { "epoch": 0.6661244408296055, "grad_norm": 0.4001990258693695, "learning_rate": 9.966411164873388e-06, "loss": 0.3584, "step": 6552 }, { "epoch": 0.6662261081740545, "grad_norm": 0.42295655608177185, "learning_rate": 9.966370086050883e-06, "loss": 0.4097, "step": 6553 }, { "epoch": 0.6663277755185034, "grad_norm": 0.3223782777786255, "learning_rate": 9.966328982208989e-06, "loss": 0.3727, "step": 6554 }, { "epoch": 0.6664294428629525, "grad_norm": 0.37756869196891785, "learning_rate": 9.966287853347907e-06, "loss": 0.403, "step": 6555 }, { "epoch": 0.6665311102074014, "grad_norm": 0.4053451120853424, "learning_rate": 9.966246699467848e-06, "loss": 0.3842, "step": 6556 }, { "epoch": 0.6666327775518504, "grad_norm": 0.3813483715057373, "learning_rate": 9.966205520569017e-06, "loss": 0.3758, "step": 6557 }, { "epoch": 0.6667344448962993, "grad_norm": 0.3984403908252716, "learning_rate": 9.966164316651623e-06, "loss": 0.4169, "step": 6558 }, { "epoch": 0.6668361122407482, "grad_norm": 0.42087677121162415, "learning_rate": 9.966123087715875e-06, "loss": 0.4023, "step": 6559 }, { "epoch": 0.6669377795851973, "grad_norm": 0.4012666642665863, "learning_rate": 9.966081833761976e-06, "loss": 0.4158, "step": 6560 }, { "epoch": 0.6670394469296462, "grad_norm": 0.3851780593395233, "learning_rate": 9.96604055479014e-06, "loss": 0.4394, "step": 6561 }, { "epoch": 0.6671411142740952, "grad_norm": 0.3360409140586853, "learning_rate": 9.96599925080057e-06, "loss": 0.3998, "step": 6562 }, { "epoch": 0.6672427816185441, "grad_norm": 0.33342206478118896, "learning_rate": 9.965957921793476e-06, "loss": 0.3719, "step": 6563 }, { "epoch": 0.667344448962993, "grad_norm": 0.34009039402008057, "learning_rate": 9.965916567769067e-06, "loss": 0.3899, "step": 6564 }, { "epoch": 0.667446116307442, "grad_norm": 0.3974757790565491, "learning_rate": 9.965875188727549e-06, "loss": 0.3695, "step": 6565 }, { "epoch": 0.667547783651891, "grad_norm": 0.3518950343132019, "learning_rate": 9.965833784669134e-06, "loss": 0.3723, "step": 6566 }, { "epoch": 0.66764945099634, "grad_norm": 0.3558104634284973, "learning_rate": 9.965792355594027e-06, "loss": 0.3966, "step": 6567 }, { "epoch": 0.6677511183407889, "grad_norm": 0.377996027469635, "learning_rate": 9.965750901502438e-06, "loss": 0.3844, "step": 6568 }, { "epoch": 0.6678527856852379, "grad_norm": 0.38304221630096436, "learning_rate": 9.965709422394577e-06, "loss": 0.4085, "step": 6569 }, { "epoch": 0.6679544530296868, "grad_norm": 0.36210477352142334, "learning_rate": 9.965667918270652e-06, "loss": 0.3742, "step": 6570 }, { "epoch": 0.6680561203741359, "grad_norm": 0.3580884635448456, "learning_rate": 9.96562638913087e-06, "loss": 0.3994, "step": 6571 }, { "epoch": 0.6681577877185848, "grad_norm": 0.33383694291114807, "learning_rate": 9.965584834975443e-06, "loss": 0.3848, "step": 6572 }, { "epoch": 0.6682594550630337, "grad_norm": 0.34489327669143677, "learning_rate": 9.965543255804581e-06, "loss": 0.392, "step": 6573 }, { "epoch": 0.6683611224074827, "grad_norm": 0.39211875200271606, "learning_rate": 9.965501651618491e-06, "loss": 0.4171, "step": 6574 }, { "epoch": 0.6684627897519316, "grad_norm": 0.3367624580860138, "learning_rate": 9.965460022417385e-06, "loss": 0.3966, "step": 6575 }, { "epoch": 0.6685644570963807, "grad_norm": 0.3267633616924286, "learning_rate": 9.96541836820147e-06, "loss": 0.3924, "step": 6576 }, { "epoch": 0.6686661244408296, "grad_norm": 0.34802311658859253, "learning_rate": 9.965376688970959e-06, "loss": 0.3874, "step": 6577 }, { "epoch": 0.6687677917852786, "grad_norm": 0.376811683177948, "learning_rate": 9.965334984726058e-06, "loss": 0.3992, "step": 6578 }, { "epoch": 0.6688694591297275, "grad_norm": 0.32798701524734497, "learning_rate": 9.96529325546698e-06, "loss": 0.3713, "step": 6579 }, { "epoch": 0.6689711264741764, "grad_norm": 0.32967516779899597, "learning_rate": 9.965251501193934e-06, "loss": 0.377, "step": 6580 }, { "epoch": 0.6690727938186255, "grad_norm": 0.371543824672699, "learning_rate": 9.965209721907132e-06, "loss": 0.3962, "step": 6581 }, { "epoch": 0.6691744611630744, "grad_norm": 0.382409930229187, "learning_rate": 9.965167917606783e-06, "loss": 0.3776, "step": 6582 }, { "epoch": 0.6692761285075234, "grad_norm": 0.3201240599155426, "learning_rate": 9.965126088293098e-06, "loss": 0.407, "step": 6583 }, { "epoch": 0.6693777958519723, "grad_norm": 0.3568606972694397, "learning_rate": 9.965084233966287e-06, "loss": 0.382, "step": 6584 }, { "epoch": 0.6694794631964213, "grad_norm": 0.4029388427734375, "learning_rate": 9.965042354626563e-06, "loss": 0.3959, "step": 6585 }, { "epoch": 0.6695811305408703, "grad_norm": 0.3639025390148163, "learning_rate": 9.965000450274133e-06, "loss": 0.4029, "step": 6586 }, { "epoch": 0.6696827978853193, "grad_norm": 0.34800484776496887, "learning_rate": 9.964958520909213e-06, "loss": 0.3864, "step": 6587 }, { "epoch": 0.6697844652297682, "grad_norm": 0.35691168904304504, "learning_rate": 9.96491656653201e-06, "loss": 0.3736, "step": 6588 }, { "epoch": 0.6698861325742171, "grad_norm": 0.35878780484199524, "learning_rate": 9.964874587142738e-06, "loss": 0.4271, "step": 6589 }, { "epoch": 0.6699877999186661, "grad_norm": 0.3660775423049927, "learning_rate": 9.964832582741608e-06, "loss": 0.3864, "step": 6590 }, { "epoch": 0.6700894672631151, "grad_norm": 0.3551021218299866, "learning_rate": 9.96479055332883e-06, "loss": 0.3845, "step": 6591 }, { "epoch": 0.6701911346075641, "grad_norm": 0.3379300832748413, "learning_rate": 9.96474849890462e-06, "loss": 0.3669, "step": 6592 }, { "epoch": 0.670292801952013, "grad_norm": 0.40290239453315735, "learning_rate": 9.964706419469184e-06, "loss": 0.4127, "step": 6593 }, { "epoch": 0.670394469296462, "grad_norm": 0.35503068566322327, "learning_rate": 9.964664315022737e-06, "loss": 0.4179, "step": 6594 }, { "epoch": 0.6704961366409109, "grad_norm": 0.3212084472179413, "learning_rate": 9.964622185565493e-06, "loss": 0.4207, "step": 6595 }, { "epoch": 0.67059780398536, "grad_norm": 0.36857107281684875, "learning_rate": 9.96458003109766e-06, "loss": 0.3796, "step": 6596 }, { "epoch": 0.6706994713298089, "grad_norm": 0.33609193563461304, "learning_rate": 9.964537851619453e-06, "loss": 0.398, "step": 6597 }, { "epoch": 0.6708011386742578, "grad_norm": 0.33123335242271423, "learning_rate": 9.964495647131085e-06, "loss": 0.4025, "step": 6598 }, { "epoch": 0.6709028060187068, "grad_norm": 0.3131926953792572, "learning_rate": 9.964453417632767e-06, "loss": 0.3927, "step": 6599 }, { "epoch": 0.6710044733631557, "grad_norm": 0.33508771657943726, "learning_rate": 9.964411163124713e-06, "loss": 0.4034, "step": 6600 }, { "epoch": 0.6711061407076048, "grad_norm": 0.3516365587711334, "learning_rate": 9.964368883607133e-06, "loss": 0.3777, "step": 6601 }, { "epoch": 0.6712078080520537, "grad_norm": 0.33139804005622864, "learning_rate": 9.964326579080246e-06, "loss": 0.3493, "step": 6602 }, { "epoch": 0.6713094753965027, "grad_norm": 0.34747955203056335, "learning_rate": 9.96428424954426e-06, "loss": 0.3855, "step": 6603 }, { "epoch": 0.6714111427409516, "grad_norm": 0.33376583456993103, "learning_rate": 9.964241894999389e-06, "loss": 0.4168, "step": 6604 }, { "epoch": 0.6715128100854005, "grad_norm": 0.3423861861228943, "learning_rate": 9.964199515445846e-06, "loss": 0.3957, "step": 6605 }, { "epoch": 0.6716144774298495, "grad_norm": 0.3504312038421631, "learning_rate": 9.964157110883847e-06, "loss": 0.3788, "step": 6606 }, { "epoch": 0.6717161447742985, "grad_norm": 0.36965009570121765, "learning_rate": 9.964114681313604e-06, "loss": 0.412, "step": 6607 }, { "epoch": 0.6718178121187475, "grad_norm": 0.3769089877605438, "learning_rate": 9.964072226735331e-06, "loss": 0.3672, "step": 6608 }, { "epoch": 0.6719194794631964, "grad_norm": 0.3393612205982208, "learning_rate": 9.964029747149242e-06, "loss": 0.4258, "step": 6609 }, { "epoch": 0.6720211468076454, "grad_norm": 0.32496559619903564, "learning_rate": 9.963987242555551e-06, "loss": 0.3649, "step": 6610 }, { "epoch": 0.6721228141520943, "grad_norm": 0.3251422941684723, "learning_rate": 9.963944712954469e-06, "loss": 0.3786, "step": 6611 }, { "epoch": 0.6722244814965433, "grad_norm": 0.36156079173088074, "learning_rate": 9.963902158346216e-06, "loss": 0.4005, "step": 6612 }, { "epoch": 0.6723261488409923, "grad_norm": 0.3067032992839813, "learning_rate": 9.963859578731003e-06, "loss": 0.359, "step": 6613 }, { "epoch": 0.6724278161854412, "grad_norm": 0.31792816519737244, "learning_rate": 9.963816974109046e-06, "loss": 0.3811, "step": 6614 }, { "epoch": 0.6725294835298902, "grad_norm": 0.3543793559074402, "learning_rate": 9.963774344480556e-06, "loss": 0.4044, "step": 6615 }, { "epoch": 0.6726311508743391, "grad_norm": 0.3572746217250824, "learning_rate": 9.963731689845752e-06, "loss": 0.393, "step": 6616 }, { "epoch": 0.6727328182187882, "grad_norm": 0.33455517888069153, "learning_rate": 9.963689010204849e-06, "loss": 0.3813, "step": 6617 }, { "epoch": 0.6728344855632371, "grad_norm": 0.3379508852958679, "learning_rate": 9.963646305558059e-06, "loss": 0.3803, "step": 6618 }, { "epoch": 0.672936152907686, "grad_norm": 0.34123286604881287, "learning_rate": 9.963603575905597e-06, "loss": 0.4153, "step": 6619 }, { "epoch": 0.673037820252135, "grad_norm": 0.3514072299003601, "learning_rate": 9.963560821247682e-06, "loss": 0.4236, "step": 6620 }, { "epoch": 0.6731394875965839, "grad_norm": 0.32388970255851746, "learning_rate": 9.963518041584525e-06, "loss": 0.385, "step": 6621 }, { "epoch": 0.673241154941033, "grad_norm": 0.3509320020675659, "learning_rate": 9.963475236916344e-06, "loss": 0.4053, "step": 6622 }, { "epoch": 0.6733428222854819, "grad_norm": 0.32674068212509155, "learning_rate": 9.963432407243356e-06, "loss": 0.3979, "step": 6623 }, { "epoch": 0.6734444896299309, "grad_norm": 0.32116127014160156, "learning_rate": 9.963389552565774e-06, "loss": 0.3644, "step": 6624 }, { "epoch": 0.6735461569743798, "grad_norm": 0.3209798336029053, "learning_rate": 9.963346672883813e-06, "loss": 0.3983, "step": 6625 }, { "epoch": 0.6736478243188287, "grad_norm": 0.34691697359085083, "learning_rate": 9.963303768197693e-06, "loss": 0.4098, "step": 6626 }, { "epoch": 0.6737494916632778, "grad_norm": 0.3311839997768402, "learning_rate": 9.963260838507629e-06, "loss": 0.3649, "step": 6627 }, { "epoch": 0.6738511590077267, "grad_norm": 0.3462188243865967, "learning_rate": 9.963217883813833e-06, "loss": 0.3849, "step": 6628 }, { "epoch": 0.6739528263521757, "grad_norm": 0.322549045085907, "learning_rate": 9.963174904116529e-06, "loss": 0.3901, "step": 6629 }, { "epoch": 0.6740544936966246, "grad_norm": 0.35758069157600403, "learning_rate": 9.963131899415927e-06, "loss": 0.41, "step": 6630 }, { "epoch": 0.6741561610410736, "grad_norm": 0.3448197841644287, "learning_rate": 9.963088869712246e-06, "loss": 0.4271, "step": 6631 }, { "epoch": 0.6742578283855226, "grad_norm": 0.3187347948551178, "learning_rate": 9.963045815005702e-06, "loss": 0.3595, "step": 6632 }, { "epoch": 0.6743594957299716, "grad_norm": 0.34580567479133606, "learning_rate": 9.963002735296514e-06, "loss": 0.408, "step": 6633 }, { "epoch": 0.6744611630744205, "grad_norm": 0.3210185468196869, "learning_rate": 9.962959630584896e-06, "loss": 0.3739, "step": 6634 }, { "epoch": 0.6745628304188694, "grad_norm": 0.3282828629016876, "learning_rate": 9.962916500871068e-06, "loss": 0.4008, "step": 6635 }, { "epoch": 0.6746644977633184, "grad_norm": 0.33045610785484314, "learning_rate": 9.962873346155245e-06, "loss": 0.3779, "step": 6636 }, { "epoch": 0.6747661651077674, "grad_norm": 0.3273613154888153, "learning_rate": 9.962830166437647e-06, "loss": 0.3781, "step": 6637 }, { "epoch": 0.6748678324522164, "grad_norm": 0.3607975244522095, "learning_rate": 9.96278696171849e-06, "loss": 0.4047, "step": 6638 }, { "epoch": 0.6749694997966653, "grad_norm": 0.3748500943183899, "learning_rate": 9.96274373199799e-06, "loss": 0.3973, "step": 6639 }, { "epoch": 0.6750711671411143, "grad_norm": 0.3638502359390259, "learning_rate": 9.962700477276368e-06, "loss": 0.409, "step": 6640 }, { "epoch": 0.6751728344855632, "grad_norm": 0.3086812496185303, "learning_rate": 9.962657197553838e-06, "loss": 0.3954, "step": 6641 }, { "epoch": 0.6752745018300123, "grad_norm": 0.35669398307800293, "learning_rate": 9.962613892830622e-06, "loss": 0.4042, "step": 6642 }, { "epoch": 0.6753761691744612, "grad_norm": 0.3819800019264221, "learning_rate": 9.962570563106937e-06, "loss": 0.3976, "step": 6643 }, { "epoch": 0.6754778365189101, "grad_norm": 0.31591159105300903, "learning_rate": 9.962527208382999e-06, "loss": 0.4077, "step": 6644 }, { "epoch": 0.6755795038633591, "grad_norm": 0.35785260796546936, "learning_rate": 9.962483828659029e-06, "loss": 0.3965, "step": 6645 }, { "epoch": 0.675681171207808, "grad_norm": 0.3544965386390686, "learning_rate": 9.962440423935248e-06, "loss": 0.3946, "step": 6646 }, { "epoch": 0.6757828385522571, "grad_norm": 0.3385149836540222, "learning_rate": 9.962396994211866e-06, "loss": 0.3751, "step": 6647 }, { "epoch": 0.675884505896706, "grad_norm": 0.3491494357585907, "learning_rate": 9.96235353948911e-06, "loss": 0.3981, "step": 6648 }, { "epoch": 0.675986173241155, "grad_norm": 0.3327602446079254, "learning_rate": 9.962310059767195e-06, "loss": 0.3842, "step": 6649 }, { "epoch": 0.6760878405856039, "grad_norm": 0.3526260256767273, "learning_rate": 9.962266555046343e-06, "loss": 0.3962, "step": 6650 }, { "epoch": 0.6761895079300528, "grad_norm": 0.41016921401023865, "learning_rate": 9.96222302532677e-06, "loss": 0.3937, "step": 6651 }, { "epoch": 0.6762911752745018, "grad_norm": 0.39127597212791443, "learning_rate": 9.962179470608697e-06, "loss": 0.4372, "step": 6652 }, { "epoch": 0.6763928426189508, "grad_norm": 0.36485710740089417, "learning_rate": 9.962135890892342e-06, "loss": 0.375, "step": 6653 }, { "epoch": 0.6764945099633998, "grad_norm": 0.37354815006256104, "learning_rate": 9.962092286177927e-06, "loss": 0.39, "step": 6654 }, { "epoch": 0.6765961773078487, "grad_norm": 0.35092926025390625, "learning_rate": 9.96204865646567e-06, "loss": 0.3961, "step": 6655 }, { "epoch": 0.6766978446522977, "grad_norm": 0.3515363037586212, "learning_rate": 9.96200500175579e-06, "loss": 0.3842, "step": 6656 }, { "epoch": 0.6767995119967466, "grad_norm": 0.32834962010383606, "learning_rate": 9.961961322048508e-06, "loss": 0.38, "step": 6657 }, { "epoch": 0.6769011793411956, "grad_norm": 0.3425200283527374, "learning_rate": 9.961917617344046e-06, "loss": 0.3857, "step": 6658 }, { "epoch": 0.6770028466856446, "grad_norm": 0.35622742772102356, "learning_rate": 9.96187388764262e-06, "loss": 0.3861, "step": 6659 }, { "epoch": 0.6771045140300935, "grad_norm": 0.37621089816093445, "learning_rate": 9.961830132944454e-06, "loss": 0.4059, "step": 6660 }, { "epoch": 0.6772061813745425, "grad_norm": 0.3258618116378784, "learning_rate": 9.961786353249766e-06, "loss": 0.3997, "step": 6661 }, { "epoch": 0.6773078487189914, "grad_norm": 0.3552229702472687, "learning_rate": 9.961742548558777e-06, "loss": 0.4052, "step": 6662 }, { "epoch": 0.6774095160634405, "grad_norm": 0.36757394671440125, "learning_rate": 9.961698718871708e-06, "loss": 0.4075, "step": 6663 }, { "epoch": 0.6775111834078894, "grad_norm": 0.3458164930343628, "learning_rate": 9.96165486418878e-06, "loss": 0.3864, "step": 6664 }, { "epoch": 0.6776128507523383, "grad_norm": 0.3645486831665039, "learning_rate": 9.961610984510217e-06, "loss": 0.4299, "step": 6665 }, { "epoch": 0.6777145180967873, "grad_norm": 0.36362698674201965, "learning_rate": 9.961567079836234e-06, "loss": 0.3594, "step": 6666 }, { "epoch": 0.6778161854412362, "grad_norm": 0.3626842498779297, "learning_rate": 9.961523150167057e-06, "loss": 0.4113, "step": 6667 }, { "epoch": 0.6779178527856853, "grad_norm": 0.32880422472953796, "learning_rate": 9.961479195502904e-06, "loss": 0.4089, "step": 6668 }, { "epoch": 0.6780195201301342, "grad_norm": 0.3485690653324127, "learning_rate": 9.961435215843997e-06, "loss": 0.3986, "step": 6669 }, { "epoch": 0.6781211874745832, "grad_norm": 0.3434402644634247, "learning_rate": 9.961391211190558e-06, "loss": 0.3749, "step": 6670 }, { "epoch": 0.6782228548190321, "grad_norm": 0.3464532196521759, "learning_rate": 9.96134718154281e-06, "loss": 0.3866, "step": 6671 }, { "epoch": 0.678324522163481, "grad_norm": 0.3341672718524933, "learning_rate": 9.961303126900976e-06, "loss": 0.3958, "step": 6672 }, { "epoch": 0.6784261895079301, "grad_norm": 0.3185313940048218, "learning_rate": 9.961259047265272e-06, "loss": 0.3818, "step": 6673 }, { "epoch": 0.678527856852379, "grad_norm": 0.32670339941978455, "learning_rate": 9.961214942635927e-06, "loss": 0.3871, "step": 6674 }, { "epoch": 0.678629524196828, "grad_norm": 0.32374972105026245, "learning_rate": 9.96117081301316e-06, "loss": 0.4573, "step": 6675 }, { "epoch": 0.6787311915412769, "grad_norm": 0.3717581033706665, "learning_rate": 9.961126658397192e-06, "loss": 0.3784, "step": 6676 }, { "epoch": 0.6788328588857259, "grad_norm": 0.3295235335826874, "learning_rate": 9.961082478788246e-06, "loss": 0.3888, "step": 6677 }, { "epoch": 0.6789345262301749, "grad_norm": 0.34224843978881836, "learning_rate": 9.961038274186548e-06, "loss": 0.4006, "step": 6678 }, { "epoch": 0.6790361935746239, "grad_norm": 0.36380136013031006, "learning_rate": 9.960994044592316e-06, "loss": 0.4095, "step": 6679 }, { "epoch": 0.6791378609190728, "grad_norm": 0.3610650897026062, "learning_rate": 9.960949790005775e-06, "loss": 0.4143, "step": 6680 }, { "epoch": 0.6792395282635217, "grad_norm": 0.33261048793792725, "learning_rate": 9.96090551042715e-06, "loss": 0.4118, "step": 6681 }, { "epoch": 0.6793411956079707, "grad_norm": 0.32106831669807434, "learning_rate": 9.96086120585666e-06, "loss": 0.3928, "step": 6682 }, { "epoch": 0.6794428629524197, "grad_norm": 0.3556630611419678, "learning_rate": 9.96081687629453e-06, "loss": 0.4391, "step": 6683 }, { "epoch": 0.6795445302968687, "grad_norm": 0.374846875667572, "learning_rate": 9.960772521740984e-06, "loss": 0.4239, "step": 6684 }, { "epoch": 0.6796461976413176, "grad_norm": 0.3481954038143158, "learning_rate": 9.960728142196245e-06, "loss": 0.381, "step": 6685 }, { "epoch": 0.6797478649857666, "grad_norm": 0.3405681848526001, "learning_rate": 9.960683737660537e-06, "loss": 0.4086, "step": 6686 }, { "epoch": 0.6798495323302155, "grad_norm": 0.3160792589187622, "learning_rate": 9.960639308134083e-06, "loss": 0.3921, "step": 6687 }, { "epoch": 0.6799511996746646, "grad_norm": 0.36657264828681946, "learning_rate": 9.960594853617107e-06, "loss": 0.3708, "step": 6688 }, { "epoch": 0.6800528670191135, "grad_norm": 0.38392284512519836, "learning_rate": 9.960550374109832e-06, "loss": 0.3895, "step": 6689 }, { "epoch": 0.6801545343635624, "grad_norm": 0.34898263216018677, "learning_rate": 9.960505869612483e-06, "loss": 0.3805, "step": 6690 }, { "epoch": 0.6802562017080114, "grad_norm": 0.324773907661438, "learning_rate": 9.960461340125284e-06, "loss": 0.4169, "step": 6691 }, { "epoch": 0.6803578690524603, "grad_norm": 0.3457101583480835, "learning_rate": 9.960416785648462e-06, "loss": 0.3982, "step": 6692 }, { "epoch": 0.6804595363969093, "grad_norm": 0.3750990033149719, "learning_rate": 9.960372206182236e-06, "loss": 0.3842, "step": 6693 }, { "epoch": 0.6805612037413583, "grad_norm": 0.4009673595428467, "learning_rate": 9.960327601726836e-06, "loss": 0.4099, "step": 6694 }, { "epoch": 0.6806628710858073, "grad_norm": 0.3332962691783905, "learning_rate": 9.960282972282482e-06, "loss": 0.4022, "step": 6695 }, { "epoch": 0.6807645384302562, "grad_norm": 0.349004328250885, "learning_rate": 9.960238317849404e-06, "loss": 0.3989, "step": 6696 }, { "epoch": 0.6808662057747051, "grad_norm": 0.4290679693222046, "learning_rate": 9.960193638427822e-06, "loss": 0.4056, "step": 6697 }, { "epoch": 0.6809678731191541, "grad_norm": 0.35255807638168335, "learning_rate": 9.960148934017963e-06, "loss": 0.3978, "step": 6698 }, { "epoch": 0.6810695404636031, "grad_norm": 0.39320024847984314, "learning_rate": 9.960104204620054e-06, "loss": 0.4529, "step": 6699 }, { "epoch": 0.6811712078080521, "grad_norm": 0.3506864905357361, "learning_rate": 9.960059450234318e-06, "loss": 0.3754, "step": 6700 }, { "epoch": 0.681272875152501, "grad_norm": 0.339402437210083, "learning_rate": 9.96001467086098e-06, "loss": 0.3891, "step": 6701 }, { "epoch": 0.68137454249695, "grad_norm": 0.40897807478904724, "learning_rate": 9.959969866500268e-06, "loss": 0.3747, "step": 6702 }, { "epoch": 0.6814762098413989, "grad_norm": 0.3489638864994049, "learning_rate": 9.959925037152406e-06, "loss": 0.4046, "step": 6703 }, { "epoch": 0.681577877185848, "grad_norm": 0.34691375494003296, "learning_rate": 9.959880182817621e-06, "loss": 0.3893, "step": 6704 }, { "epoch": 0.6816795445302969, "grad_norm": 0.3838978707790375, "learning_rate": 9.959835303496137e-06, "loss": 0.4007, "step": 6705 }, { "epoch": 0.6817812118747458, "grad_norm": 0.4033968448638916, "learning_rate": 9.959790399188183e-06, "loss": 0.3959, "step": 6706 }, { "epoch": 0.6818828792191948, "grad_norm": 0.37622231245040894, "learning_rate": 9.959745469893982e-06, "loss": 0.4116, "step": 6707 }, { "epoch": 0.6819845465636437, "grad_norm": 0.35236749053001404, "learning_rate": 9.959700515613763e-06, "loss": 0.4079, "step": 6708 }, { "epoch": 0.6820862139080928, "grad_norm": 0.38833531737327576, "learning_rate": 9.959655536347751e-06, "loss": 0.4105, "step": 6709 }, { "epoch": 0.6821878812525417, "grad_norm": 0.3704622685909271, "learning_rate": 9.959610532096173e-06, "loss": 0.3801, "step": 6710 }, { "epoch": 0.6822895485969906, "grad_norm": 0.36290982365608215, "learning_rate": 9.959565502859256e-06, "loss": 0.4257, "step": 6711 }, { "epoch": 0.6823912159414396, "grad_norm": 0.3588355481624603, "learning_rate": 9.959520448637226e-06, "loss": 0.4263, "step": 6712 }, { "epoch": 0.6824928832858885, "grad_norm": 0.3231407403945923, "learning_rate": 9.959475369430311e-06, "loss": 0.4257, "step": 6713 }, { "epoch": 0.6825945506303376, "grad_norm": 0.3798182010650635, "learning_rate": 9.959430265238737e-06, "loss": 0.3932, "step": 6714 }, { "epoch": 0.6826962179747865, "grad_norm": 0.3595742881298065, "learning_rate": 9.959385136062733e-06, "loss": 0.3867, "step": 6715 }, { "epoch": 0.6827978853192355, "grad_norm": 0.3238006830215454, "learning_rate": 9.959339981902525e-06, "loss": 0.4028, "step": 6716 }, { "epoch": 0.6828995526636844, "grad_norm": 0.3591814339160919, "learning_rate": 9.95929480275834e-06, "loss": 0.3998, "step": 6717 }, { "epoch": 0.6830012200081333, "grad_norm": 0.4067552983760834, "learning_rate": 9.959249598630407e-06, "loss": 0.3975, "step": 6718 }, { "epoch": 0.6831028873525824, "grad_norm": 0.3508594036102295, "learning_rate": 9.959204369518954e-06, "loss": 0.4145, "step": 6719 }, { "epoch": 0.6832045546970313, "grad_norm": 0.3248653709888458, "learning_rate": 9.959159115424205e-06, "loss": 0.3916, "step": 6720 }, { "epoch": 0.6833062220414803, "grad_norm": 0.35578295588493347, "learning_rate": 9.959113836346393e-06, "loss": 0.3952, "step": 6721 }, { "epoch": 0.6834078893859292, "grad_norm": 0.34336695075035095, "learning_rate": 9.959068532285743e-06, "loss": 0.3746, "step": 6722 }, { "epoch": 0.6835095567303782, "grad_norm": 0.38250061869621277, "learning_rate": 9.959023203242485e-06, "loss": 0.3976, "step": 6723 }, { "epoch": 0.6836112240748272, "grad_norm": 0.37231647968292236, "learning_rate": 9.958977849216845e-06, "loss": 0.4283, "step": 6724 }, { "epoch": 0.6837128914192762, "grad_norm": 0.369119256734848, "learning_rate": 9.958932470209054e-06, "loss": 0.4006, "step": 6725 }, { "epoch": 0.6838145587637251, "grad_norm": 0.34352409839630127, "learning_rate": 9.95888706621934e-06, "loss": 0.3933, "step": 6726 }, { "epoch": 0.683916226108174, "grad_norm": 0.4049857258796692, "learning_rate": 9.958841637247932e-06, "loss": 0.4031, "step": 6727 }, { "epoch": 0.684017893452623, "grad_norm": 0.3374619483947754, "learning_rate": 9.958796183295056e-06, "loss": 0.4114, "step": 6728 }, { "epoch": 0.684119560797072, "grad_norm": 0.3070622980594635, "learning_rate": 9.958750704360945e-06, "loss": 0.3766, "step": 6729 }, { "epoch": 0.684221228141521, "grad_norm": 0.3769659399986267, "learning_rate": 9.958705200445826e-06, "loss": 0.377, "step": 6730 }, { "epoch": 0.6843228954859699, "grad_norm": 0.366242378950119, "learning_rate": 9.958659671549928e-06, "loss": 0.3984, "step": 6731 }, { "epoch": 0.6844245628304189, "grad_norm": 0.34117087721824646, "learning_rate": 9.958614117673481e-06, "loss": 0.3837, "step": 6732 }, { "epoch": 0.6845262301748678, "grad_norm": 0.3560315668582916, "learning_rate": 9.958568538816715e-06, "loss": 0.451, "step": 6733 }, { "epoch": 0.6846278975193167, "grad_norm": 0.4112151265144348, "learning_rate": 9.95852293497986e-06, "loss": 0.434, "step": 6734 }, { "epoch": 0.6847295648637658, "grad_norm": 0.34608709812164307, "learning_rate": 9.958477306163144e-06, "loss": 0.4134, "step": 6735 }, { "epoch": 0.6848312322082147, "grad_norm": 0.3440212607383728, "learning_rate": 9.958431652366796e-06, "loss": 0.3796, "step": 6736 }, { "epoch": 0.6849328995526637, "grad_norm": 0.32964327931404114, "learning_rate": 9.958385973591049e-06, "loss": 0.3365, "step": 6737 }, { "epoch": 0.6850345668971126, "grad_norm": 0.3844527006149292, "learning_rate": 9.958340269836134e-06, "loss": 0.4308, "step": 6738 }, { "epoch": 0.6851362342415616, "grad_norm": 0.3684391677379608, "learning_rate": 9.958294541102276e-06, "loss": 0.441, "step": 6739 }, { "epoch": 0.6852379015860106, "grad_norm": 0.3473619520664215, "learning_rate": 9.958248787389709e-06, "loss": 0.3802, "step": 6740 }, { "epoch": 0.6853395689304596, "grad_norm": 0.32744932174682617, "learning_rate": 9.958203008698661e-06, "loss": 0.3563, "step": 6741 }, { "epoch": 0.6854412362749085, "grad_norm": 0.3520199656486511, "learning_rate": 9.958157205029368e-06, "loss": 0.3812, "step": 6742 }, { "epoch": 0.6855429036193574, "grad_norm": 0.33098477125167847, "learning_rate": 9.958111376382055e-06, "loss": 0.3763, "step": 6743 }, { "epoch": 0.6856445709638064, "grad_norm": 0.3371514081954956, "learning_rate": 9.958065522756957e-06, "loss": 0.3878, "step": 6744 }, { "epoch": 0.6857462383082554, "grad_norm": 0.32728448510169983, "learning_rate": 9.958019644154301e-06, "loss": 0.4011, "step": 6745 }, { "epoch": 0.6858479056527044, "grad_norm": 0.35311421751976013, "learning_rate": 9.95797374057432e-06, "loss": 0.4372, "step": 6746 }, { "epoch": 0.6859495729971533, "grad_norm": 0.3686210513114929, "learning_rate": 9.957927812017248e-06, "loss": 0.4034, "step": 6747 }, { "epoch": 0.6860512403416023, "grad_norm": 0.3145209848880768, "learning_rate": 9.957881858483311e-06, "loss": 0.4128, "step": 6748 }, { "epoch": 0.6861529076860512, "grad_norm": 0.32777872681617737, "learning_rate": 9.957835879972746e-06, "loss": 0.3962, "step": 6749 }, { "epoch": 0.6862545750305002, "grad_norm": 0.407532274723053, "learning_rate": 9.957789876485782e-06, "loss": 0.435, "step": 6750 }, { "epoch": 0.6863562423749492, "grad_norm": 0.3547765612602234, "learning_rate": 9.957743848022648e-06, "loss": 0.398, "step": 6751 }, { "epoch": 0.6864579097193981, "grad_norm": 0.4075333774089813, "learning_rate": 9.957697794583581e-06, "loss": 0.3788, "step": 6752 }, { "epoch": 0.6865595770638471, "grad_norm": 0.3628944158554077, "learning_rate": 9.95765171616881e-06, "loss": 0.3898, "step": 6753 }, { "epoch": 0.686661244408296, "grad_norm": 0.35395702719688416, "learning_rate": 9.957605612778567e-06, "loss": 0.3947, "step": 6754 }, { "epoch": 0.6867629117527451, "grad_norm": 0.3537406921386719, "learning_rate": 9.957559484413086e-06, "loss": 0.4275, "step": 6755 }, { "epoch": 0.686864579097194, "grad_norm": 0.33016878366470337, "learning_rate": 9.957513331072598e-06, "loss": 0.3618, "step": 6756 }, { "epoch": 0.686966246441643, "grad_norm": 0.3243880569934845, "learning_rate": 9.957467152757336e-06, "loss": 0.4159, "step": 6757 }, { "epoch": 0.6870679137860919, "grad_norm": 0.36937177181243896, "learning_rate": 9.957420949467534e-06, "loss": 0.4135, "step": 6758 }, { "epoch": 0.6871695811305408, "grad_norm": 0.3260558247566223, "learning_rate": 9.95737472120342e-06, "loss": 0.4018, "step": 6759 }, { "epoch": 0.6872712484749899, "grad_norm": 0.3222740590572357, "learning_rate": 9.957328467965233e-06, "loss": 0.3865, "step": 6760 }, { "epoch": 0.6873729158194388, "grad_norm": 0.3193528950214386, "learning_rate": 9.957282189753203e-06, "loss": 0.377, "step": 6761 }, { "epoch": 0.6874745831638878, "grad_norm": 0.3200908601284027, "learning_rate": 9.957235886567562e-06, "loss": 0.4015, "step": 6762 }, { "epoch": 0.6875762505083367, "grad_norm": 0.3633936047554016, "learning_rate": 9.957189558408546e-06, "loss": 0.4063, "step": 6763 }, { "epoch": 0.6876779178527856, "grad_norm": 0.32463374733924866, "learning_rate": 9.957143205276386e-06, "loss": 0.3637, "step": 6764 }, { "epoch": 0.6877795851972347, "grad_norm": 0.38922014832496643, "learning_rate": 9.957096827171316e-06, "loss": 0.3803, "step": 6765 }, { "epoch": 0.6878812525416836, "grad_norm": 0.3620227873325348, "learning_rate": 9.957050424093572e-06, "loss": 0.4271, "step": 6766 }, { "epoch": 0.6879829198861326, "grad_norm": 0.35945454239845276, "learning_rate": 9.957003996043384e-06, "loss": 0.3846, "step": 6767 }, { "epoch": 0.6880845872305815, "grad_norm": 0.39030343294143677, "learning_rate": 9.95695754302099e-06, "loss": 0.4167, "step": 6768 }, { "epoch": 0.6881862545750305, "grad_norm": 0.3632575273513794, "learning_rate": 9.95691106502662e-06, "loss": 0.3973, "step": 6769 }, { "epoch": 0.6882879219194795, "grad_norm": 0.35084766149520874, "learning_rate": 9.95686456206051e-06, "loss": 0.4095, "step": 6770 }, { "epoch": 0.6883895892639285, "grad_norm": 0.35062384605407715, "learning_rate": 9.956818034122895e-06, "loss": 0.4243, "step": 6771 }, { "epoch": 0.6884912566083774, "grad_norm": 0.3570843040943146, "learning_rate": 9.95677148121401e-06, "loss": 0.3838, "step": 6772 }, { "epoch": 0.6885929239528263, "grad_norm": 0.32896509766578674, "learning_rate": 9.956724903334087e-06, "loss": 0.4017, "step": 6773 }, { "epoch": 0.6886945912972753, "grad_norm": 0.35567706823349, "learning_rate": 9.956678300483361e-06, "loss": 0.384, "step": 6774 }, { "epoch": 0.6887962586417242, "grad_norm": 0.349053293466568, "learning_rate": 9.956631672662067e-06, "loss": 0.376, "step": 6775 }, { "epoch": 0.6888979259861733, "grad_norm": 0.31282755732536316, "learning_rate": 9.956585019870442e-06, "loss": 0.3942, "step": 6776 }, { "epoch": 0.6889995933306222, "grad_norm": 0.3094870448112488, "learning_rate": 9.95653834210872e-06, "loss": 0.3623, "step": 6777 }, { "epoch": 0.6891012606750712, "grad_norm": 0.32787469029426575, "learning_rate": 9.956491639377136e-06, "loss": 0.3792, "step": 6778 }, { "epoch": 0.6892029280195201, "grad_norm": 0.34666672348976135, "learning_rate": 9.956444911675924e-06, "loss": 0.4027, "step": 6779 }, { "epoch": 0.689304595363969, "grad_norm": 0.3471797704696655, "learning_rate": 9.95639815900532e-06, "loss": 0.4123, "step": 6780 }, { "epoch": 0.6894062627084181, "grad_norm": 0.329755574464798, "learning_rate": 9.956351381365559e-06, "loss": 0.4046, "step": 6781 }, { "epoch": 0.689507930052867, "grad_norm": 0.3677501380443573, "learning_rate": 9.956304578756878e-06, "loss": 0.4079, "step": 6782 }, { "epoch": 0.689609597397316, "grad_norm": 0.3502892255783081, "learning_rate": 9.956257751179515e-06, "loss": 0.4101, "step": 6783 }, { "epoch": 0.6897112647417649, "grad_norm": 0.32222065329551697, "learning_rate": 9.9562108986337e-06, "loss": 0.3582, "step": 6784 }, { "epoch": 0.6898129320862139, "grad_norm": 0.36352866888046265, "learning_rate": 9.956164021119673e-06, "loss": 0.3697, "step": 6785 }, { "epoch": 0.6899145994306629, "grad_norm": 0.34020277857780457, "learning_rate": 9.956117118637672e-06, "loss": 0.4074, "step": 6786 }, { "epoch": 0.6900162667751119, "grad_norm": 0.3285907506942749, "learning_rate": 9.956070191187927e-06, "loss": 0.3456, "step": 6787 }, { "epoch": 0.6901179341195608, "grad_norm": 0.34101805090904236, "learning_rate": 9.95602323877068e-06, "loss": 0.3951, "step": 6788 }, { "epoch": 0.6902196014640097, "grad_norm": 0.32925090193748474, "learning_rate": 9.955976261386167e-06, "loss": 0.3652, "step": 6789 }, { "epoch": 0.6903212688084587, "grad_norm": 0.3548751175403595, "learning_rate": 9.955929259034622e-06, "loss": 0.4411, "step": 6790 }, { "epoch": 0.6904229361529077, "grad_norm": 0.3511233627796173, "learning_rate": 9.955882231716283e-06, "loss": 0.3895, "step": 6791 }, { "epoch": 0.6905246034973567, "grad_norm": 0.2936131954193115, "learning_rate": 9.955835179431385e-06, "loss": 0.4091, "step": 6792 }, { "epoch": 0.6906262708418056, "grad_norm": 0.3205375373363495, "learning_rate": 9.95578810218017e-06, "loss": 0.3708, "step": 6793 }, { "epoch": 0.6907279381862546, "grad_norm": 0.374394029378891, "learning_rate": 9.95574099996287e-06, "loss": 0.4014, "step": 6794 }, { "epoch": 0.6908296055307035, "grad_norm": 0.3074425458908081, "learning_rate": 9.955693872779728e-06, "loss": 0.3891, "step": 6795 }, { "epoch": 0.6909312728751525, "grad_norm": 0.3319406807422638, "learning_rate": 9.955646720630975e-06, "loss": 0.4315, "step": 6796 }, { "epoch": 0.6910329402196015, "grad_norm": 0.33953168988227844, "learning_rate": 9.955599543516852e-06, "loss": 0.3738, "step": 6797 }, { "epoch": 0.6911346075640504, "grad_norm": 0.34540125727653503, "learning_rate": 9.955552341437596e-06, "loss": 0.4, "step": 6798 }, { "epoch": 0.6912362749084994, "grad_norm": 0.31721875071525574, "learning_rate": 9.955505114393445e-06, "loss": 0.3879, "step": 6799 }, { "epoch": 0.6913379422529483, "grad_norm": 0.33717232942581177, "learning_rate": 9.955457862384637e-06, "loss": 0.4187, "step": 6800 }, { "epoch": 0.6914396095973974, "grad_norm": 0.36350592970848083, "learning_rate": 9.955410585411411e-06, "loss": 0.4025, "step": 6801 }, { "epoch": 0.6915412769418463, "grad_norm": 0.3860205113887787, "learning_rate": 9.955363283474003e-06, "loss": 0.4366, "step": 6802 }, { "epoch": 0.6916429442862952, "grad_norm": 0.34534701704978943, "learning_rate": 9.955315956572651e-06, "loss": 0.4041, "step": 6803 }, { "epoch": 0.6917446116307442, "grad_norm": 0.3606525659561157, "learning_rate": 9.955268604707597e-06, "loss": 0.3783, "step": 6804 }, { "epoch": 0.6918462789751931, "grad_norm": 0.3305945098400116, "learning_rate": 9.955221227879075e-06, "loss": 0.3856, "step": 6805 }, { "epoch": 0.6919479463196422, "grad_norm": 0.36782997846603394, "learning_rate": 9.955173826087327e-06, "loss": 0.4207, "step": 6806 }, { "epoch": 0.6920496136640911, "grad_norm": 0.3546763062477112, "learning_rate": 9.955126399332594e-06, "loss": 0.3955, "step": 6807 }, { "epoch": 0.6921512810085401, "grad_norm": 0.3661334812641144, "learning_rate": 9.955078947615107e-06, "loss": 0.4322, "step": 6808 }, { "epoch": 0.692252948352989, "grad_norm": 0.34425830841064453, "learning_rate": 9.955031470935113e-06, "loss": 0.3875, "step": 6809 }, { "epoch": 0.692354615697438, "grad_norm": 0.3319881856441498, "learning_rate": 9.954983969292847e-06, "loss": 0.3816, "step": 6810 }, { "epoch": 0.692456283041887, "grad_norm": 0.4117811322212219, "learning_rate": 9.954936442688548e-06, "loss": 0.4075, "step": 6811 }, { "epoch": 0.6925579503863359, "grad_norm": 0.3795207440853119, "learning_rate": 9.954888891122456e-06, "loss": 0.3774, "step": 6812 }, { "epoch": 0.6926596177307849, "grad_norm": 0.3153873383998871, "learning_rate": 9.954841314594813e-06, "loss": 0.397, "step": 6813 }, { "epoch": 0.6927612850752338, "grad_norm": 0.3490866720676422, "learning_rate": 9.954793713105857e-06, "loss": 0.4039, "step": 6814 }, { "epoch": 0.6928629524196828, "grad_norm": 0.3252056837081909, "learning_rate": 9.954746086655828e-06, "loss": 0.4013, "step": 6815 }, { "epoch": 0.6929646197641317, "grad_norm": 0.35214173793792725, "learning_rate": 9.954698435244965e-06, "loss": 0.425, "step": 6816 }, { "epoch": 0.6930662871085808, "grad_norm": 0.3719384968280792, "learning_rate": 9.954650758873509e-06, "loss": 0.4007, "step": 6817 }, { "epoch": 0.6931679544530297, "grad_norm": 0.38056379556655884, "learning_rate": 9.954603057541699e-06, "loss": 0.3823, "step": 6818 }, { "epoch": 0.6932696217974786, "grad_norm": 0.32906582951545715, "learning_rate": 9.954555331249778e-06, "loss": 0.4173, "step": 6819 }, { "epoch": 0.6933712891419276, "grad_norm": 0.3642027676105499, "learning_rate": 9.954507579997982e-06, "loss": 0.3937, "step": 6820 }, { "epoch": 0.6934729564863765, "grad_norm": 0.37472447752952576, "learning_rate": 9.954459803786556e-06, "loss": 0.4031, "step": 6821 }, { "epoch": 0.6935746238308256, "grad_norm": 0.33247795701026917, "learning_rate": 9.954412002615738e-06, "loss": 0.3934, "step": 6822 }, { "epoch": 0.6936762911752745, "grad_norm": 0.34092798829078674, "learning_rate": 9.954364176485771e-06, "loss": 0.3905, "step": 6823 }, { "epoch": 0.6937779585197235, "grad_norm": 0.34962937235832214, "learning_rate": 9.954316325396893e-06, "loss": 0.4319, "step": 6824 }, { "epoch": 0.6938796258641724, "grad_norm": 0.31936323642730713, "learning_rate": 9.954268449349349e-06, "loss": 0.4238, "step": 6825 }, { "epoch": 0.6939812932086213, "grad_norm": 0.31359151005744934, "learning_rate": 9.954220548343375e-06, "loss": 0.364, "step": 6826 }, { "epoch": 0.6940829605530704, "grad_norm": 0.3482224643230438, "learning_rate": 9.954172622379217e-06, "loss": 0.4064, "step": 6827 }, { "epoch": 0.6941846278975193, "grad_norm": 0.3371761441230774, "learning_rate": 9.954124671457114e-06, "loss": 0.3763, "step": 6828 }, { "epoch": 0.6942862952419683, "grad_norm": 0.30920982360839844, "learning_rate": 9.954076695577308e-06, "loss": 0.3838, "step": 6829 }, { "epoch": 0.6943879625864172, "grad_norm": 0.37257102131843567, "learning_rate": 9.954028694740042e-06, "loss": 0.4152, "step": 6830 }, { "epoch": 0.6944896299308662, "grad_norm": 0.40105539560317993, "learning_rate": 9.953980668945556e-06, "loss": 0.3878, "step": 6831 }, { "epoch": 0.6945912972753152, "grad_norm": 0.3842484951019287, "learning_rate": 9.953932618194092e-06, "loss": 0.4116, "step": 6832 }, { "epoch": 0.6946929646197642, "grad_norm": 0.3257441222667694, "learning_rate": 9.953884542485893e-06, "loss": 0.3827, "step": 6833 }, { "epoch": 0.6947946319642131, "grad_norm": 0.35486146807670593, "learning_rate": 9.9538364418212e-06, "loss": 0.3968, "step": 6834 }, { "epoch": 0.694896299308662, "grad_norm": 0.4057086408138275, "learning_rate": 9.953788316200259e-06, "loss": 0.3884, "step": 6835 }, { "epoch": 0.694997966653111, "grad_norm": 0.3901863694190979, "learning_rate": 9.953740165623307e-06, "loss": 0.4421, "step": 6836 }, { "epoch": 0.69509963399756, "grad_norm": 0.3844054937362671, "learning_rate": 9.953691990090591e-06, "loss": 0.4103, "step": 6837 }, { "epoch": 0.695201301342009, "grad_norm": 0.38692808151245117, "learning_rate": 9.953643789602352e-06, "loss": 0.3955, "step": 6838 }, { "epoch": 0.6953029686864579, "grad_norm": 0.32896658778190613, "learning_rate": 9.95359556415883e-06, "loss": 0.3772, "step": 6839 }, { "epoch": 0.6954046360309069, "grad_norm": 0.37389153242111206, "learning_rate": 9.953547313760273e-06, "loss": 0.3917, "step": 6840 }, { "epoch": 0.6955063033753558, "grad_norm": 0.3705350458621979, "learning_rate": 9.953499038406921e-06, "loss": 0.3713, "step": 6841 }, { "epoch": 0.6956079707198048, "grad_norm": 0.3669911026954651, "learning_rate": 9.95345073809902e-06, "loss": 0.4289, "step": 6842 }, { "epoch": 0.6957096380642538, "grad_norm": 0.32652518153190613, "learning_rate": 9.953402412836809e-06, "loss": 0.4157, "step": 6843 }, { "epoch": 0.6958113054087027, "grad_norm": 0.3346405029296875, "learning_rate": 9.953354062620535e-06, "loss": 0.379, "step": 6844 }, { "epoch": 0.6959129727531517, "grad_norm": 0.37802308797836304, "learning_rate": 9.953305687450438e-06, "loss": 0.3869, "step": 6845 }, { "epoch": 0.6960146400976006, "grad_norm": 0.37098178267478943, "learning_rate": 9.953257287326768e-06, "loss": 0.4055, "step": 6846 }, { "epoch": 0.6961163074420497, "grad_norm": 0.32954031229019165, "learning_rate": 9.953208862249762e-06, "loss": 0.377, "step": 6847 }, { "epoch": 0.6962179747864986, "grad_norm": 0.32465660572052, "learning_rate": 9.953160412219667e-06, "loss": 0.3923, "step": 6848 }, { "epoch": 0.6963196421309475, "grad_norm": 0.32451504468917847, "learning_rate": 9.953111937236728e-06, "loss": 0.4126, "step": 6849 }, { "epoch": 0.6964213094753965, "grad_norm": 0.3475878834724426, "learning_rate": 9.953063437301186e-06, "loss": 0.3758, "step": 6850 }, { "epoch": 0.6965229768198454, "grad_norm": 0.36876389384269714, "learning_rate": 9.95301491241329e-06, "loss": 0.4194, "step": 6851 }, { "epoch": 0.6966246441642945, "grad_norm": 0.342647522687912, "learning_rate": 9.952966362573281e-06, "loss": 0.4003, "step": 6852 }, { "epoch": 0.6967263115087434, "grad_norm": 0.34880343079566956, "learning_rate": 9.952917787781404e-06, "loss": 0.3808, "step": 6853 }, { "epoch": 0.6968279788531924, "grad_norm": 0.3600543141365051, "learning_rate": 9.952869188037905e-06, "loss": 0.3853, "step": 6854 }, { "epoch": 0.6969296461976413, "grad_norm": 0.32817602157592773, "learning_rate": 9.952820563343026e-06, "loss": 0.3977, "step": 6855 }, { "epoch": 0.6970313135420902, "grad_norm": 0.3353709280490875, "learning_rate": 9.952771913697017e-06, "loss": 0.4022, "step": 6856 }, { "epoch": 0.6971329808865392, "grad_norm": 0.3779595196247101, "learning_rate": 9.952723239100116e-06, "loss": 0.3784, "step": 6857 }, { "epoch": 0.6972346482309882, "grad_norm": 0.36389121413230896, "learning_rate": 9.952674539552575e-06, "loss": 0.394, "step": 6858 }, { "epoch": 0.6973363155754372, "grad_norm": 0.314680278301239, "learning_rate": 9.952625815054637e-06, "loss": 0.3745, "step": 6859 }, { "epoch": 0.6974379829198861, "grad_norm": 0.3633080720901489, "learning_rate": 9.952577065606548e-06, "loss": 0.4018, "step": 6860 }, { "epoch": 0.6975396502643351, "grad_norm": 0.3619922697544098, "learning_rate": 9.95252829120855e-06, "loss": 0.3787, "step": 6861 }, { "epoch": 0.697641317608784, "grad_norm": 0.33870837092399597, "learning_rate": 9.952479491860893e-06, "loss": 0.3897, "step": 6862 }, { "epoch": 0.6977429849532331, "grad_norm": 0.3869813084602356, "learning_rate": 9.95243066756382e-06, "loss": 0.4232, "step": 6863 }, { "epoch": 0.697844652297682, "grad_norm": 0.37939751148223877, "learning_rate": 9.95238181831758e-06, "loss": 0.3924, "step": 6864 }, { "epoch": 0.6979463196421309, "grad_norm": 0.32226818799972534, "learning_rate": 9.952332944122415e-06, "loss": 0.414, "step": 6865 }, { "epoch": 0.6980479869865799, "grad_norm": 0.36998820304870605, "learning_rate": 9.952284044978575e-06, "loss": 0.3965, "step": 6866 }, { "epoch": 0.6981496543310288, "grad_norm": 0.35027700662612915, "learning_rate": 9.952235120886304e-06, "loss": 0.4029, "step": 6867 }, { "epoch": 0.6982513216754779, "grad_norm": 0.33521178364753723, "learning_rate": 9.95218617184585e-06, "loss": 0.4342, "step": 6868 }, { "epoch": 0.6983529890199268, "grad_norm": 0.35041338205337524, "learning_rate": 9.95213719785746e-06, "loss": 0.4108, "step": 6869 }, { "epoch": 0.6984546563643758, "grad_norm": 0.33709725737571716, "learning_rate": 9.952088198921378e-06, "loss": 0.3724, "step": 6870 }, { "epoch": 0.6985563237088247, "grad_norm": 0.3616487979888916, "learning_rate": 9.952039175037851e-06, "loss": 0.4124, "step": 6871 }, { "epoch": 0.6986579910532736, "grad_norm": 0.3524283468723297, "learning_rate": 9.95199012620713e-06, "loss": 0.3898, "step": 6872 }, { "epoch": 0.6987596583977227, "grad_norm": 0.345017671585083, "learning_rate": 9.95194105242946e-06, "loss": 0.3837, "step": 6873 }, { "epoch": 0.6988613257421716, "grad_norm": 0.3508434295654297, "learning_rate": 9.951891953705084e-06, "loss": 0.3671, "step": 6874 }, { "epoch": 0.6989629930866206, "grad_norm": 0.317596435546875, "learning_rate": 9.951842830034256e-06, "loss": 0.4115, "step": 6875 }, { "epoch": 0.6990646604310695, "grad_norm": 0.32626602053642273, "learning_rate": 9.95179368141722e-06, "loss": 0.3836, "step": 6876 }, { "epoch": 0.6991663277755185, "grad_norm": 0.35228848457336426, "learning_rate": 9.951744507854225e-06, "loss": 0.3513, "step": 6877 }, { "epoch": 0.6992679951199675, "grad_norm": 0.35286131501197815, "learning_rate": 9.951695309345517e-06, "loss": 0.3814, "step": 6878 }, { "epoch": 0.6993696624644165, "grad_norm": 0.3463935852050781, "learning_rate": 9.951646085891344e-06, "loss": 0.4129, "step": 6879 }, { "epoch": 0.6994713298088654, "grad_norm": 0.34781086444854736, "learning_rate": 9.951596837491955e-06, "loss": 0.3858, "step": 6880 }, { "epoch": 0.6995729971533143, "grad_norm": 0.357767790555954, "learning_rate": 9.951547564147599e-06, "loss": 0.3957, "step": 6881 }, { "epoch": 0.6996746644977633, "grad_norm": 0.32474592328071594, "learning_rate": 9.951498265858523e-06, "loss": 0.4165, "step": 6882 }, { "epoch": 0.6997763318422123, "grad_norm": 0.3321569859981537, "learning_rate": 9.951448942624973e-06, "loss": 0.3905, "step": 6883 }, { "epoch": 0.6998779991866613, "grad_norm": 0.3063189685344696, "learning_rate": 9.951399594447202e-06, "loss": 0.4218, "step": 6884 }, { "epoch": 0.6999796665311102, "grad_norm": 0.392886221408844, "learning_rate": 9.951350221325456e-06, "loss": 0.397, "step": 6885 }, { "epoch": 0.7000813338755592, "grad_norm": 0.3552248775959015, "learning_rate": 9.951300823259986e-06, "loss": 0.4012, "step": 6886 }, { "epoch": 0.7001830012200081, "grad_norm": 0.3280228078365326, "learning_rate": 9.951251400251037e-06, "loss": 0.395, "step": 6887 }, { "epoch": 0.7002846685644571, "grad_norm": 0.35866162180900574, "learning_rate": 9.95120195229886e-06, "loss": 0.3845, "step": 6888 }, { "epoch": 0.7003863359089061, "grad_norm": 0.3572283089160919, "learning_rate": 9.951152479403703e-06, "loss": 0.4127, "step": 6889 }, { "epoch": 0.700488003253355, "grad_norm": 0.3283812999725342, "learning_rate": 9.951102981565819e-06, "loss": 0.3817, "step": 6890 }, { "epoch": 0.700589670597804, "grad_norm": 0.34216901659965515, "learning_rate": 9.951053458785453e-06, "loss": 0.3773, "step": 6891 }, { "epoch": 0.7006913379422529, "grad_norm": 0.3668705224990845, "learning_rate": 9.951003911062858e-06, "loss": 0.403, "step": 6892 }, { "epoch": 0.700793005286702, "grad_norm": 0.32890135049819946, "learning_rate": 9.95095433839828e-06, "loss": 0.4251, "step": 6893 }, { "epoch": 0.7008946726311509, "grad_norm": 0.3385959267616272, "learning_rate": 9.95090474079197e-06, "loss": 0.3843, "step": 6894 }, { "epoch": 0.7009963399755998, "grad_norm": 0.31802597641944885, "learning_rate": 9.950855118244182e-06, "loss": 0.3951, "step": 6895 }, { "epoch": 0.7010980073200488, "grad_norm": 0.35003721714019775, "learning_rate": 9.950805470755159e-06, "loss": 0.4009, "step": 6896 }, { "epoch": 0.7011996746644977, "grad_norm": 0.37556037306785583, "learning_rate": 9.950755798325156e-06, "loss": 0.3976, "step": 6897 }, { "epoch": 0.7013013420089467, "grad_norm": 0.3109815716743469, "learning_rate": 9.950706100954422e-06, "loss": 0.4054, "step": 6898 }, { "epoch": 0.7014030093533957, "grad_norm": 0.3360980153083801, "learning_rate": 9.950656378643207e-06, "loss": 0.4051, "step": 6899 }, { "epoch": 0.7015046766978447, "grad_norm": 0.3736903965473175, "learning_rate": 9.95060663139176e-06, "loss": 0.4365, "step": 6900 }, { "epoch": 0.7016063440422936, "grad_norm": 0.31496986746788025, "learning_rate": 9.950556859200336e-06, "loss": 0.4151, "step": 6901 }, { "epoch": 0.7017080113867425, "grad_norm": 0.31289902329444885, "learning_rate": 9.95050706206918e-06, "loss": 0.4004, "step": 6902 }, { "epoch": 0.7018096787311915, "grad_norm": 0.3432866334915161, "learning_rate": 9.950457239998547e-06, "loss": 0.4151, "step": 6903 }, { "epoch": 0.7019113460756405, "grad_norm": 0.3731837272644043, "learning_rate": 9.950407392988688e-06, "loss": 0.4069, "step": 6904 }, { "epoch": 0.7020130134200895, "grad_norm": 0.3301020860671997, "learning_rate": 9.950357521039852e-06, "loss": 0.4235, "step": 6905 }, { "epoch": 0.7021146807645384, "grad_norm": 0.34714892506599426, "learning_rate": 9.950307624152291e-06, "loss": 0.4267, "step": 6906 }, { "epoch": 0.7022163481089874, "grad_norm": 0.3454902470111847, "learning_rate": 9.950257702326256e-06, "loss": 0.3714, "step": 6907 }, { "epoch": 0.7023180154534363, "grad_norm": 0.35527297854423523, "learning_rate": 9.950207755562e-06, "loss": 0.4194, "step": 6908 }, { "epoch": 0.7024196827978854, "grad_norm": 0.3327149450778961, "learning_rate": 9.950157783859774e-06, "loss": 0.3848, "step": 6909 }, { "epoch": 0.7025213501423343, "grad_norm": 0.3603944182395935, "learning_rate": 9.95010778721983e-06, "loss": 0.3702, "step": 6910 }, { "epoch": 0.7026230174867832, "grad_norm": 0.3596110939979553, "learning_rate": 9.950057765642418e-06, "loss": 0.4138, "step": 6911 }, { "epoch": 0.7027246848312322, "grad_norm": 0.34945836663246155, "learning_rate": 9.95000771912779e-06, "loss": 0.3959, "step": 6912 }, { "epoch": 0.7028263521756811, "grad_norm": 0.335131973028183, "learning_rate": 9.949957647676201e-06, "loss": 0.3784, "step": 6913 }, { "epoch": 0.7029280195201302, "grad_norm": 0.3484671115875244, "learning_rate": 9.949907551287902e-06, "loss": 0.3927, "step": 6914 }, { "epoch": 0.7030296868645791, "grad_norm": 0.3511849045753479, "learning_rate": 9.949857429963145e-06, "loss": 0.3806, "step": 6915 }, { "epoch": 0.7031313542090281, "grad_norm": 0.3222753703594208, "learning_rate": 9.949807283702181e-06, "loss": 0.3682, "step": 6916 }, { "epoch": 0.703233021553477, "grad_norm": 0.37510886788368225, "learning_rate": 9.949757112505267e-06, "loss": 0.4115, "step": 6917 }, { "epoch": 0.7033346888979259, "grad_norm": 0.3446873724460602, "learning_rate": 9.94970691637265e-06, "loss": 0.3763, "step": 6918 }, { "epoch": 0.703436356242375, "grad_norm": 0.34430864453315735, "learning_rate": 9.949656695304588e-06, "loss": 0.4338, "step": 6919 }, { "epoch": 0.7035380235868239, "grad_norm": 0.32937198877334595, "learning_rate": 9.94960644930133e-06, "loss": 0.405, "step": 6920 }, { "epoch": 0.7036396909312729, "grad_norm": 0.3803786635398865, "learning_rate": 9.949556178363133e-06, "loss": 0.3943, "step": 6921 }, { "epoch": 0.7037413582757218, "grad_norm": 0.3663828670978546, "learning_rate": 9.949505882490247e-06, "loss": 0.4039, "step": 6922 }, { "epoch": 0.7038430256201708, "grad_norm": 0.40078434348106384, "learning_rate": 9.949455561682926e-06, "loss": 0.408, "step": 6923 }, { "epoch": 0.7039446929646198, "grad_norm": 0.3447287082672119, "learning_rate": 9.949405215941423e-06, "loss": 0.4064, "step": 6924 }, { "epoch": 0.7040463603090688, "grad_norm": 0.339124858379364, "learning_rate": 9.949354845265995e-06, "loss": 0.3696, "step": 6925 }, { "epoch": 0.7041480276535177, "grad_norm": 0.3446577191352844, "learning_rate": 9.949304449656894e-06, "loss": 0.4116, "step": 6926 }, { "epoch": 0.7042496949979666, "grad_norm": 0.3520548641681671, "learning_rate": 9.94925402911437e-06, "loss": 0.39, "step": 6927 }, { "epoch": 0.7043513623424156, "grad_norm": 0.33252230286598206, "learning_rate": 9.949203583638682e-06, "loss": 0.4088, "step": 6928 }, { "epoch": 0.7044530296868646, "grad_norm": 0.36601629853248596, "learning_rate": 9.949153113230083e-06, "loss": 0.4225, "step": 6929 }, { "epoch": 0.7045546970313136, "grad_norm": 0.32677626609802246, "learning_rate": 9.949102617888827e-06, "loss": 0.3828, "step": 6930 }, { "epoch": 0.7046563643757625, "grad_norm": 0.3514757454395294, "learning_rate": 9.949052097615168e-06, "loss": 0.3766, "step": 6931 }, { "epoch": 0.7047580317202115, "grad_norm": 0.34812411665916443, "learning_rate": 9.94900155240936e-06, "loss": 0.4272, "step": 6932 }, { "epoch": 0.7048596990646604, "grad_norm": 0.34527119994163513, "learning_rate": 9.94895098227166e-06, "loss": 0.3729, "step": 6933 }, { "epoch": 0.7049613664091094, "grad_norm": 0.3362717628479004, "learning_rate": 9.948900387202318e-06, "loss": 0.3963, "step": 6934 }, { "epoch": 0.7050630337535584, "grad_norm": 0.31972190737724304, "learning_rate": 9.948849767201594e-06, "loss": 0.351, "step": 6935 }, { "epoch": 0.7051647010980073, "grad_norm": 0.3512125313282013, "learning_rate": 9.948799122269739e-06, "loss": 0.4312, "step": 6936 }, { "epoch": 0.7052663684424563, "grad_norm": 0.34845468401908875, "learning_rate": 9.94874845240701e-06, "loss": 0.3567, "step": 6937 }, { "epoch": 0.7053680357869052, "grad_norm": 0.3487107753753662, "learning_rate": 9.948697757613666e-06, "loss": 0.3857, "step": 6938 }, { "epoch": 0.7054697031313543, "grad_norm": 0.33836817741394043, "learning_rate": 9.948647037889956e-06, "loss": 0.3884, "step": 6939 }, { "epoch": 0.7055713704758032, "grad_norm": 0.34634992480278015, "learning_rate": 9.948596293236137e-06, "loss": 0.3663, "step": 6940 }, { "epoch": 0.7056730378202521, "grad_norm": 0.35670310258865356, "learning_rate": 9.94854552365247e-06, "loss": 0.4143, "step": 6941 }, { "epoch": 0.7057747051647011, "grad_norm": 0.32959961891174316, "learning_rate": 9.948494729139202e-06, "loss": 0.3808, "step": 6942 }, { "epoch": 0.70587637250915, "grad_norm": 0.31477299332618713, "learning_rate": 9.948443909696594e-06, "loss": 0.412, "step": 6943 }, { "epoch": 0.705978039853599, "grad_norm": 0.3386000394821167, "learning_rate": 9.948393065324904e-06, "loss": 0.4461, "step": 6944 }, { "epoch": 0.706079707198048, "grad_norm": 0.3468603491783142, "learning_rate": 9.948342196024384e-06, "loss": 0.3617, "step": 6945 }, { "epoch": 0.706181374542497, "grad_norm": 0.3628236949443817, "learning_rate": 9.948291301795293e-06, "loss": 0.3968, "step": 6946 }, { "epoch": 0.7062830418869459, "grad_norm": 0.3324694335460663, "learning_rate": 9.948240382637886e-06, "loss": 0.3796, "step": 6947 }, { "epoch": 0.7063847092313948, "grad_norm": 0.3987928330898285, "learning_rate": 9.948189438552419e-06, "loss": 0.4266, "step": 6948 }, { "epoch": 0.7064863765758438, "grad_norm": 0.37571677565574646, "learning_rate": 9.948138469539151e-06, "loss": 0.3869, "step": 6949 }, { "epoch": 0.7065880439202928, "grad_norm": 0.3712340295314789, "learning_rate": 9.948087475598336e-06, "loss": 0.3585, "step": 6950 }, { "epoch": 0.7066897112647418, "grad_norm": 0.36471235752105713, "learning_rate": 9.948036456730232e-06, "loss": 0.3645, "step": 6951 }, { "epoch": 0.7067913786091907, "grad_norm": 0.35835954546928406, "learning_rate": 9.947985412935097e-06, "loss": 0.3761, "step": 6952 }, { "epoch": 0.7068930459536397, "grad_norm": 0.3527417778968811, "learning_rate": 9.947934344213186e-06, "loss": 0.3847, "step": 6953 }, { "epoch": 0.7069947132980886, "grad_norm": 0.34342268109321594, "learning_rate": 9.94788325056476e-06, "loss": 0.3783, "step": 6954 }, { "epoch": 0.7070963806425377, "grad_norm": 0.39867115020751953, "learning_rate": 9.947832131990072e-06, "loss": 0.4182, "step": 6955 }, { "epoch": 0.7071980479869866, "grad_norm": 0.3306059241294861, "learning_rate": 9.94778098848938e-06, "loss": 0.372, "step": 6956 }, { "epoch": 0.7072997153314355, "grad_norm": 0.3708733022212982, "learning_rate": 9.947729820062946e-06, "loss": 0.4114, "step": 6957 }, { "epoch": 0.7074013826758845, "grad_norm": 0.3447243571281433, "learning_rate": 9.947678626711026e-06, "loss": 0.3886, "step": 6958 }, { "epoch": 0.7075030500203334, "grad_norm": 0.32013869285583496, "learning_rate": 9.947627408433873e-06, "loss": 0.4029, "step": 6959 }, { "epoch": 0.7076047173647825, "grad_norm": 0.38079187273979187, "learning_rate": 9.947576165231751e-06, "loss": 0.4215, "step": 6960 }, { "epoch": 0.7077063847092314, "grad_norm": 0.3827497065067291, "learning_rate": 9.947524897104917e-06, "loss": 0.4256, "step": 6961 }, { "epoch": 0.7078080520536804, "grad_norm": 0.3589165508747101, "learning_rate": 9.947473604053625e-06, "loss": 0.3952, "step": 6962 }, { "epoch": 0.7079097193981293, "grad_norm": 0.3810463547706604, "learning_rate": 9.947422286078142e-06, "loss": 0.3958, "step": 6963 }, { "epoch": 0.7080113867425782, "grad_norm": 0.34180977940559387, "learning_rate": 9.947370943178715e-06, "loss": 0.3877, "step": 6964 }, { "epoch": 0.7081130540870273, "grad_norm": 0.31641072034835815, "learning_rate": 9.947319575355612e-06, "loss": 0.3795, "step": 6965 }, { "epoch": 0.7082147214314762, "grad_norm": 0.39100298285484314, "learning_rate": 9.947268182609088e-06, "loss": 0.4173, "step": 6966 }, { "epoch": 0.7083163887759252, "grad_norm": 0.36137858033180237, "learning_rate": 9.947216764939404e-06, "loss": 0.398, "step": 6967 }, { "epoch": 0.7084180561203741, "grad_norm": 0.353174090385437, "learning_rate": 9.947165322346817e-06, "loss": 0.4184, "step": 6968 }, { "epoch": 0.7085197234648231, "grad_norm": 0.33817824721336365, "learning_rate": 9.947113854831587e-06, "loss": 0.4039, "step": 6969 }, { "epoch": 0.7086213908092721, "grad_norm": 0.3287970721721649, "learning_rate": 9.94706236239397e-06, "loss": 0.3923, "step": 6970 }, { "epoch": 0.708723058153721, "grad_norm": 0.3559485077857971, "learning_rate": 9.947010845034233e-06, "loss": 0.3637, "step": 6971 }, { "epoch": 0.70882472549817, "grad_norm": 0.3508860468864441, "learning_rate": 9.946959302752628e-06, "loss": 0.4286, "step": 6972 }, { "epoch": 0.7089263928426189, "grad_norm": 0.3423560857772827, "learning_rate": 9.946907735549419e-06, "loss": 0.4314, "step": 6973 }, { "epoch": 0.7090280601870679, "grad_norm": 0.32223019003868103, "learning_rate": 9.946856143424863e-06, "loss": 0.3968, "step": 6974 }, { "epoch": 0.7091297275315169, "grad_norm": 0.35596752166748047, "learning_rate": 9.946804526379222e-06, "loss": 0.391, "step": 6975 }, { "epoch": 0.7092313948759659, "grad_norm": 0.3639313876628876, "learning_rate": 9.946752884412757e-06, "loss": 0.4239, "step": 6976 }, { "epoch": 0.7093330622204148, "grad_norm": 0.3363986909389496, "learning_rate": 9.946701217525723e-06, "loss": 0.4289, "step": 6977 }, { "epoch": 0.7094347295648638, "grad_norm": 0.3460281789302826, "learning_rate": 9.946649525718386e-06, "loss": 0.3605, "step": 6978 }, { "epoch": 0.7095363969093127, "grad_norm": 0.38379916548728943, "learning_rate": 9.946597808991004e-06, "loss": 0.3975, "step": 6979 }, { "epoch": 0.7096380642537617, "grad_norm": 0.3464890718460083, "learning_rate": 9.946546067343837e-06, "loss": 0.3823, "step": 6980 }, { "epoch": 0.7097397315982107, "grad_norm": 0.36077865958213806, "learning_rate": 9.946494300777148e-06, "loss": 0.4132, "step": 6981 }, { "epoch": 0.7098413989426596, "grad_norm": 0.3433682322502136, "learning_rate": 9.946442509291194e-06, "loss": 0.4106, "step": 6982 }, { "epoch": 0.7099430662871086, "grad_norm": 0.3397206962108612, "learning_rate": 9.94639069288624e-06, "loss": 0.3781, "step": 6983 }, { "epoch": 0.7100447336315575, "grad_norm": 0.3624405264854431, "learning_rate": 9.946338851562545e-06, "loss": 0.3837, "step": 6984 }, { "epoch": 0.7101464009760065, "grad_norm": 0.37178292870521545, "learning_rate": 9.946286985320368e-06, "loss": 0.3972, "step": 6985 }, { "epoch": 0.7102480683204555, "grad_norm": 0.3489464521408081, "learning_rate": 9.946235094159974e-06, "loss": 0.3862, "step": 6986 }, { "epoch": 0.7103497356649044, "grad_norm": 0.3364352583885193, "learning_rate": 9.946183178081625e-06, "loss": 0.3925, "step": 6987 }, { "epoch": 0.7104514030093534, "grad_norm": 0.40130460262298584, "learning_rate": 9.946131237085578e-06, "loss": 0.4179, "step": 6988 }, { "epoch": 0.7105530703538023, "grad_norm": 0.34098124504089355, "learning_rate": 9.946079271172098e-06, "loss": 0.4023, "step": 6989 }, { "epoch": 0.7106547376982513, "grad_norm": 0.36996132135391235, "learning_rate": 9.946027280341446e-06, "loss": 0.4024, "step": 6990 }, { "epoch": 0.7107564050427003, "grad_norm": 0.37194669246673584, "learning_rate": 9.945975264593883e-06, "loss": 0.4, "step": 6991 }, { "epoch": 0.7108580723871493, "grad_norm": 0.34243717789649963, "learning_rate": 9.945923223929674e-06, "loss": 0.4221, "step": 6992 }, { "epoch": 0.7109597397315982, "grad_norm": 0.32701292634010315, "learning_rate": 9.945871158349079e-06, "loss": 0.3744, "step": 6993 }, { "epoch": 0.7110614070760471, "grad_norm": 0.38126078248023987, "learning_rate": 9.945819067852358e-06, "loss": 0.4043, "step": 6994 }, { "epoch": 0.7111630744204961, "grad_norm": 0.3358471691608429, "learning_rate": 9.945766952439778e-06, "loss": 0.4277, "step": 6995 }, { "epoch": 0.7112647417649451, "grad_norm": 0.3758552074432373, "learning_rate": 9.945714812111597e-06, "loss": 0.379, "step": 6996 }, { "epoch": 0.7113664091093941, "grad_norm": 0.3731415867805481, "learning_rate": 9.945662646868082e-06, "loss": 0.4028, "step": 6997 }, { "epoch": 0.711468076453843, "grad_norm": 0.35555076599121094, "learning_rate": 9.945610456709495e-06, "loss": 0.3838, "step": 6998 }, { "epoch": 0.711569743798292, "grad_norm": 0.398082435131073, "learning_rate": 9.945558241636094e-06, "loss": 0.3776, "step": 6999 }, { "epoch": 0.7116714111427409, "grad_norm": 0.3422527015209198, "learning_rate": 9.945506001648151e-06, "loss": 0.3874, "step": 7000 }, { "epoch": 0.71177307848719, "grad_norm": 0.34549152851104736, "learning_rate": 9.94545373674592e-06, "loss": 0.4228, "step": 7001 }, { "epoch": 0.7118747458316389, "grad_norm": 0.33961784839630127, "learning_rate": 9.94540144692967e-06, "loss": 0.3907, "step": 7002 }, { "epoch": 0.7119764131760878, "grad_norm": 0.3466697931289673, "learning_rate": 9.945349132199663e-06, "loss": 0.3673, "step": 7003 }, { "epoch": 0.7120780805205368, "grad_norm": 0.3501182794570923, "learning_rate": 9.94529679255616e-06, "loss": 0.3874, "step": 7004 }, { "epoch": 0.7121797478649857, "grad_norm": 0.3219864070415497, "learning_rate": 9.945244427999429e-06, "loss": 0.4589, "step": 7005 }, { "epoch": 0.7122814152094348, "grad_norm": 0.3258596658706665, "learning_rate": 9.94519203852973e-06, "loss": 0.366, "step": 7006 }, { "epoch": 0.7123830825538837, "grad_norm": 0.35186660289764404, "learning_rate": 9.94513962414733e-06, "loss": 0.3738, "step": 7007 }, { "epoch": 0.7124847498983327, "grad_norm": 0.38278308510780334, "learning_rate": 9.945087184852493e-06, "loss": 0.404, "step": 7008 }, { "epoch": 0.7125864172427816, "grad_norm": 0.3527015149593353, "learning_rate": 9.945034720645479e-06, "loss": 0.3934, "step": 7009 }, { "epoch": 0.7126880845872305, "grad_norm": 0.32465028762817383, "learning_rate": 9.944982231526557e-06, "loss": 0.3551, "step": 7010 }, { "epoch": 0.7127897519316796, "grad_norm": 0.4229940176010132, "learning_rate": 9.94492971749599e-06, "loss": 0.451, "step": 7011 }, { "epoch": 0.7128914192761285, "grad_norm": 0.3226792812347412, "learning_rate": 9.944877178554041e-06, "loss": 0.3628, "step": 7012 }, { "epoch": 0.7129930866205775, "grad_norm": 0.35426652431488037, "learning_rate": 9.944824614700977e-06, "loss": 0.4017, "step": 7013 }, { "epoch": 0.7130947539650264, "grad_norm": 0.3504330813884735, "learning_rate": 9.944772025937059e-06, "loss": 0.4065, "step": 7014 }, { "epoch": 0.7131964213094754, "grad_norm": 0.3783782720565796, "learning_rate": 9.944719412262559e-06, "loss": 0.3716, "step": 7015 }, { "epoch": 0.7132980886539244, "grad_norm": 0.36189574003219604, "learning_rate": 9.944666773677734e-06, "loss": 0.4063, "step": 7016 }, { "epoch": 0.7133997559983734, "grad_norm": 0.30613091588020325, "learning_rate": 9.944614110182853e-06, "loss": 0.3932, "step": 7017 }, { "epoch": 0.7135014233428223, "grad_norm": 0.3585731089115143, "learning_rate": 9.944561421778182e-06, "loss": 0.392, "step": 7018 }, { "epoch": 0.7136030906872712, "grad_norm": 0.3474397659301758, "learning_rate": 9.944508708463986e-06, "loss": 0.409, "step": 7019 }, { "epoch": 0.7137047580317202, "grad_norm": 0.34918200969696045, "learning_rate": 9.944455970240532e-06, "loss": 0.4142, "step": 7020 }, { "epoch": 0.7138064253761692, "grad_norm": 0.35072869062423706, "learning_rate": 9.94440320710808e-06, "loss": 0.3949, "step": 7021 }, { "epoch": 0.7139080927206182, "grad_norm": 0.34903383255004883, "learning_rate": 9.944350419066902e-06, "loss": 0.3936, "step": 7022 }, { "epoch": 0.7140097600650671, "grad_norm": 0.43449661135673523, "learning_rate": 9.94429760611726e-06, "loss": 0.4311, "step": 7023 }, { "epoch": 0.714111427409516, "grad_norm": 0.3407173752784729, "learning_rate": 9.944244768259423e-06, "loss": 0.3918, "step": 7024 }, { "epoch": 0.714213094753965, "grad_norm": 0.318715363740921, "learning_rate": 9.944191905493654e-06, "loss": 0.4125, "step": 7025 }, { "epoch": 0.7143147620984139, "grad_norm": 0.43103867769241333, "learning_rate": 9.944139017820222e-06, "loss": 0.4049, "step": 7026 }, { "epoch": 0.714416429442863, "grad_norm": 0.3297014832496643, "learning_rate": 9.944086105239394e-06, "loss": 0.4107, "step": 7027 }, { "epoch": 0.7145180967873119, "grad_norm": 0.3387553095817566, "learning_rate": 9.944033167751435e-06, "loss": 0.3903, "step": 7028 }, { "epoch": 0.7146197641317609, "grad_norm": 0.39020413160324097, "learning_rate": 9.94398020535661e-06, "loss": 0.4022, "step": 7029 }, { "epoch": 0.7147214314762098, "grad_norm": 0.337687611579895, "learning_rate": 9.943927218055187e-06, "loss": 0.3947, "step": 7030 }, { "epoch": 0.7148230988206588, "grad_norm": 0.3738633692264557, "learning_rate": 9.943874205847436e-06, "loss": 0.3987, "step": 7031 }, { "epoch": 0.7149247661651078, "grad_norm": 0.3236652910709381, "learning_rate": 9.94382116873362e-06, "loss": 0.4083, "step": 7032 }, { "epoch": 0.7150264335095567, "grad_norm": 0.33265671133995056, "learning_rate": 9.943768106714009e-06, "loss": 0.342, "step": 7033 }, { "epoch": 0.7151281008540057, "grad_norm": 0.35300883650779724, "learning_rate": 9.943715019788867e-06, "loss": 0.4039, "step": 7034 }, { "epoch": 0.7152297681984546, "grad_norm": 0.365654319524765, "learning_rate": 9.943661907958466e-06, "loss": 0.4013, "step": 7035 }, { "epoch": 0.7153314355429036, "grad_norm": 0.3623886704444885, "learning_rate": 9.94360877122307e-06, "loss": 0.3824, "step": 7036 }, { "epoch": 0.7154331028873526, "grad_norm": 0.31151053309440613, "learning_rate": 9.943555609582947e-06, "loss": 0.4046, "step": 7037 }, { "epoch": 0.7155347702318016, "grad_norm": 0.3185712993144989, "learning_rate": 9.943502423038367e-06, "loss": 0.3695, "step": 7038 }, { "epoch": 0.7156364375762505, "grad_norm": 0.34374743700027466, "learning_rate": 9.943449211589595e-06, "loss": 0.3989, "step": 7039 }, { "epoch": 0.7157381049206994, "grad_norm": 0.3532874286174774, "learning_rate": 9.943395975236903e-06, "loss": 0.4052, "step": 7040 }, { "epoch": 0.7158397722651484, "grad_norm": 0.32830744981765747, "learning_rate": 9.943342713980555e-06, "loss": 0.3855, "step": 7041 }, { "epoch": 0.7159414396095974, "grad_norm": 0.3347032070159912, "learning_rate": 9.94328942782082e-06, "loss": 0.3944, "step": 7042 }, { "epoch": 0.7160431069540464, "grad_norm": 0.34394901990890503, "learning_rate": 9.943236116757971e-06, "loss": 0.3727, "step": 7043 }, { "epoch": 0.7161447742984953, "grad_norm": 0.34025734663009644, "learning_rate": 9.94318278079227e-06, "loss": 0.4055, "step": 7044 }, { "epoch": 0.7162464416429443, "grad_norm": 0.34552377462387085, "learning_rate": 9.943129419923989e-06, "loss": 0.3844, "step": 7045 }, { "epoch": 0.7163481089873932, "grad_norm": 0.33458003401756287, "learning_rate": 9.943076034153397e-06, "loss": 0.3951, "step": 7046 }, { "epoch": 0.7164497763318423, "grad_norm": 0.3831862211227417, "learning_rate": 9.943022623480764e-06, "loss": 0.4505, "step": 7047 }, { "epoch": 0.7165514436762912, "grad_norm": 0.32131072878837585, "learning_rate": 9.942969187906358e-06, "loss": 0.3529, "step": 7048 }, { "epoch": 0.7166531110207401, "grad_norm": 0.3871023654937744, "learning_rate": 9.942915727430447e-06, "loss": 0.4036, "step": 7049 }, { "epoch": 0.7167547783651891, "grad_norm": 0.36466601490974426, "learning_rate": 9.9428622420533e-06, "loss": 0.3934, "step": 7050 }, { "epoch": 0.716856445709638, "grad_norm": 0.3270837366580963, "learning_rate": 9.942808731775188e-06, "loss": 0.3905, "step": 7051 }, { "epoch": 0.7169581130540871, "grad_norm": 0.33946582674980164, "learning_rate": 9.942755196596382e-06, "loss": 0.3822, "step": 7052 }, { "epoch": 0.717059780398536, "grad_norm": 0.33373793959617615, "learning_rate": 9.942701636517148e-06, "loss": 0.3895, "step": 7053 }, { "epoch": 0.717161447742985, "grad_norm": 0.31402045488357544, "learning_rate": 9.942648051537759e-06, "loss": 0.395, "step": 7054 }, { "epoch": 0.7172631150874339, "grad_norm": 0.31982943415641785, "learning_rate": 9.942594441658483e-06, "loss": 0.3792, "step": 7055 }, { "epoch": 0.7173647824318828, "grad_norm": 0.33642712235450745, "learning_rate": 9.94254080687959e-06, "loss": 0.4065, "step": 7056 }, { "epoch": 0.7174664497763319, "grad_norm": 0.34464070200920105, "learning_rate": 9.942487147201353e-06, "loss": 0.3898, "step": 7057 }, { "epoch": 0.7175681171207808, "grad_norm": 0.3213998079299927, "learning_rate": 9.94243346262404e-06, "loss": 0.4012, "step": 7058 }, { "epoch": 0.7176697844652298, "grad_norm": 0.3381228446960449, "learning_rate": 9.94237975314792e-06, "loss": 0.4113, "step": 7059 }, { "epoch": 0.7177714518096787, "grad_norm": 0.3575740158557892, "learning_rate": 9.942326018773265e-06, "loss": 0.4131, "step": 7060 }, { "epoch": 0.7178731191541277, "grad_norm": 0.3419772982597351, "learning_rate": 9.942272259500347e-06, "loss": 0.3962, "step": 7061 }, { "epoch": 0.7179747864985767, "grad_norm": 0.3403554856777191, "learning_rate": 9.942218475329435e-06, "loss": 0.4069, "step": 7062 }, { "epoch": 0.7180764538430257, "grad_norm": 0.36129847168922424, "learning_rate": 9.942164666260803e-06, "loss": 0.347, "step": 7063 }, { "epoch": 0.7181781211874746, "grad_norm": 0.34635215997695923, "learning_rate": 9.942110832294718e-06, "loss": 0.4353, "step": 7064 }, { "epoch": 0.7182797885319235, "grad_norm": 0.3549838364124298, "learning_rate": 9.942056973431453e-06, "loss": 0.4001, "step": 7065 }, { "epoch": 0.7183814558763725, "grad_norm": 0.36764195561408997, "learning_rate": 9.94200308967128e-06, "loss": 0.3864, "step": 7066 }, { "epoch": 0.7184831232208214, "grad_norm": 0.38914933800697327, "learning_rate": 9.941949181014469e-06, "loss": 0.4, "step": 7067 }, { "epoch": 0.7185847905652705, "grad_norm": 0.3327857255935669, "learning_rate": 9.941895247461296e-06, "loss": 0.4262, "step": 7068 }, { "epoch": 0.7186864579097194, "grad_norm": 0.3205123543739319, "learning_rate": 9.941841289012025e-06, "loss": 0.3795, "step": 7069 }, { "epoch": 0.7187881252541684, "grad_norm": 0.355976402759552, "learning_rate": 9.941787305666933e-06, "loss": 0.3821, "step": 7070 }, { "epoch": 0.7188897925986173, "grad_norm": 0.319501668214798, "learning_rate": 9.94173329742629e-06, "loss": 0.4266, "step": 7071 }, { "epoch": 0.7189914599430662, "grad_norm": 0.3652936816215515, "learning_rate": 9.94167926429037e-06, "loss": 0.3622, "step": 7072 }, { "epoch": 0.7190931272875153, "grad_norm": 0.32570913434028625, "learning_rate": 9.941625206259446e-06, "loss": 0.3789, "step": 7073 }, { "epoch": 0.7191947946319642, "grad_norm": 0.3614753782749176, "learning_rate": 9.941571123333786e-06, "loss": 0.3771, "step": 7074 }, { "epoch": 0.7192964619764132, "grad_norm": 0.35177677869796753, "learning_rate": 9.941517015513667e-06, "loss": 0.3898, "step": 7075 }, { "epoch": 0.7193981293208621, "grad_norm": 0.360809862613678, "learning_rate": 9.941462882799358e-06, "loss": 0.3977, "step": 7076 }, { "epoch": 0.719499796665311, "grad_norm": 0.3974573314189911, "learning_rate": 9.941408725191133e-06, "loss": 0.3672, "step": 7077 }, { "epoch": 0.7196014640097601, "grad_norm": 0.35634955763816833, "learning_rate": 9.941354542689266e-06, "loss": 0.3967, "step": 7078 }, { "epoch": 0.719703131354209, "grad_norm": 0.40179675817489624, "learning_rate": 9.941300335294029e-06, "loss": 0.4236, "step": 7079 }, { "epoch": 0.719804798698658, "grad_norm": 0.3582768738269806, "learning_rate": 9.941246103005694e-06, "loss": 0.4391, "step": 7080 }, { "epoch": 0.7199064660431069, "grad_norm": 0.3870300352573395, "learning_rate": 9.941191845824537e-06, "loss": 0.3929, "step": 7081 }, { "epoch": 0.7200081333875559, "grad_norm": 0.4288327097892761, "learning_rate": 9.941137563750828e-06, "loss": 0.427, "step": 7082 }, { "epoch": 0.7201098007320049, "grad_norm": 0.3621866703033447, "learning_rate": 9.941083256784845e-06, "loss": 0.4024, "step": 7083 }, { "epoch": 0.7202114680764539, "grad_norm": 0.3621918261051178, "learning_rate": 9.941028924926856e-06, "loss": 0.3906, "step": 7084 }, { "epoch": 0.7203131354209028, "grad_norm": 0.3791273832321167, "learning_rate": 9.940974568177137e-06, "loss": 0.4033, "step": 7085 }, { "epoch": 0.7204148027653517, "grad_norm": 0.3435521721839905, "learning_rate": 9.940920186535965e-06, "loss": 0.379, "step": 7086 }, { "epoch": 0.7205164701098007, "grad_norm": 0.379747211933136, "learning_rate": 9.94086578000361e-06, "loss": 0.4011, "step": 7087 }, { "epoch": 0.7206181374542497, "grad_norm": 0.3698000907897949, "learning_rate": 9.940811348580346e-06, "loss": 0.3818, "step": 7088 }, { "epoch": 0.7207198047986987, "grad_norm": 0.36919543147087097, "learning_rate": 9.940756892266449e-06, "loss": 0.3835, "step": 7089 }, { "epoch": 0.7208214721431476, "grad_norm": 0.39441215991973877, "learning_rate": 9.940702411062196e-06, "loss": 0.4137, "step": 7090 }, { "epoch": 0.7209231394875966, "grad_norm": 0.3711793124675751, "learning_rate": 9.940647904967856e-06, "loss": 0.3968, "step": 7091 }, { "epoch": 0.7210248068320455, "grad_norm": 0.3570955693721771, "learning_rate": 9.940593373983705e-06, "loss": 0.3756, "step": 7092 }, { "epoch": 0.7211264741764946, "grad_norm": 0.3595816195011139, "learning_rate": 9.94053881811002e-06, "loss": 0.3885, "step": 7093 }, { "epoch": 0.7212281415209435, "grad_norm": 0.3466241955757141, "learning_rate": 9.940484237347074e-06, "loss": 0.4134, "step": 7094 }, { "epoch": 0.7213298088653924, "grad_norm": 0.33185139298439026, "learning_rate": 9.94042963169514e-06, "loss": 0.3699, "step": 7095 }, { "epoch": 0.7214314762098414, "grad_norm": 0.38757067918777466, "learning_rate": 9.9403750011545e-06, "loss": 0.3928, "step": 7096 }, { "epoch": 0.7215331435542903, "grad_norm": 0.31258469820022583, "learning_rate": 9.940320345725424e-06, "loss": 0.3776, "step": 7097 }, { "epoch": 0.7216348108987394, "grad_norm": 0.32748177647590637, "learning_rate": 9.940265665408185e-06, "loss": 0.3727, "step": 7098 }, { "epoch": 0.7217364782431883, "grad_norm": 0.37063097953796387, "learning_rate": 9.940210960203064e-06, "loss": 0.3974, "step": 7099 }, { "epoch": 0.7218381455876373, "grad_norm": 0.3346923589706421, "learning_rate": 9.940156230110334e-06, "loss": 0.3828, "step": 7100 }, { "epoch": 0.7219398129320862, "grad_norm": 0.3466201722621918, "learning_rate": 9.94010147513027e-06, "loss": 0.3804, "step": 7101 }, { "epoch": 0.7220414802765351, "grad_norm": 0.35441821813583374, "learning_rate": 9.940046695263148e-06, "loss": 0.4005, "step": 7102 }, { "epoch": 0.7221431476209842, "grad_norm": 0.3514367640018463, "learning_rate": 9.939991890509246e-06, "loss": 0.3749, "step": 7103 }, { "epoch": 0.7222448149654331, "grad_norm": 0.38037484884262085, "learning_rate": 9.93993706086884e-06, "loss": 0.4508, "step": 7104 }, { "epoch": 0.7223464823098821, "grad_norm": 0.33915144205093384, "learning_rate": 9.939882206342203e-06, "loss": 0.4083, "step": 7105 }, { "epoch": 0.722448149654331, "grad_norm": 0.33304592967033386, "learning_rate": 9.939827326929615e-06, "loss": 0.3839, "step": 7106 }, { "epoch": 0.72254981699878, "grad_norm": 0.3341015875339508, "learning_rate": 9.939772422631349e-06, "loss": 0.427, "step": 7107 }, { "epoch": 0.7226514843432289, "grad_norm": 0.34995195269584656, "learning_rate": 9.939717493447684e-06, "loss": 0.3857, "step": 7108 }, { "epoch": 0.722753151687678, "grad_norm": 0.31787487864494324, "learning_rate": 9.939662539378895e-06, "loss": 0.3888, "step": 7109 }, { "epoch": 0.7228548190321269, "grad_norm": 0.3182162344455719, "learning_rate": 9.939607560425261e-06, "loss": 0.3853, "step": 7110 }, { "epoch": 0.7229564863765758, "grad_norm": 0.31510189175605774, "learning_rate": 9.93955255658706e-06, "loss": 0.3916, "step": 7111 }, { "epoch": 0.7230581537210248, "grad_norm": 0.321139931678772, "learning_rate": 9.939497527864565e-06, "loss": 0.401, "step": 7112 }, { "epoch": 0.7231598210654737, "grad_norm": 0.35070154070854187, "learning_rate": 9.939442474258055e-06, "loss": 0.3963, "step": 7113 }, { "epoch": 0.7232614884099228, "grad_norm": 0.335675448179245, "learning_rate": 9.939387395767808e-06, "loss": 0.3715, "step": 7114 }, { "epoch": 0.7233631557543717, "grad_norm": 0.33930233120918274, "learning_rate": 9.939332292394102e-06, "loss": 0.3684, "step": 7115 }, { "epoch": 0.7234648230988207, "grad_norm": 0.33496788144111633, "learning_rate": 9.939277164137213e-06, "loss": 0.3906, "step": 7116 }, { "epoch": 0.7235664904432696, "grad_norm": 0.34313860535621643, "learning_rate": 9.939222010997418e-06, "loss": 0.3872, "step": 7117 }, { "epoch": 0.7236681577877185, "grad_norm": 0.3253977596759796, "learning_rate": 9.939166832974997e-06, "loss": 0.4091, "step": 7118 }, { "epoch": 0.7237698251321676, "grad_norm": 0.33335310220718384, "learning_rate": 9.939111630070228e-06, "loss": 0.3711, "step": 7119 }, { "epoch": 0.7238714924766165, "grad_norm": 0.3428157567977905, "learning_rate": 9.93905640228339e-06, "loss": 0.4028, "step": 7120 }, { "epoch": 0.7239731598210655, "grad_norm": 0.31243404746055603, "learning_rate": 9.939001149614757e-06, "loss": 0.3548, "step": 7121 }, { "epoch": 0.7240748271655144, "grad_norm": 0.3353498578071594, "learning_rate": 9.93894587206461e-06, "loss": 0.4084, "step": 7122 }, { "epoch": 0.7241764945099634, "grad_norm": 0.3792893886566162, "learning_rate": 9.938890569633228e-06, "loss": 0.3814, "step": 7123 }, { "epoch": 0.7242781618544124, "grad_norm": 0.33158332109451294, "learning_rate": 9.93883524232089e-06, "loss": 0.3685, "step": 7124 }, { "epoch": 0.7243798291988613, "grad_norm": 0.3640766143798828, "learning_rate": 9.938779890127872e-06, "loss": 0.4084, "step": 7125 }, { "epoch": 0.7244814965433103, "grad_norm": 0.3892062306404114, "learning_rate": 9.938724513054454e-06, "loss": 0.3846, "step": 7126 }, { "epoch": 0.7245831638877592, "grad_norm": 0.33475083112716675, "learning_rate": 9.938669111100918e-06, "loss": 0.3967, "step": 7127 }, { "epoch": 0.7246848312322082, "grad_norm": 0.3201891779899597, "learning_rate": 9.938613684267539e-06, "loss": 0.4024, "step": 7128 }, { "epoch": 0.7247864985766572, "grad_norm": 0.35146474838256836, "learning_rate": 9.938558232554597e-06, "loss": 0.4004, "step": 7129 }, { "epoch": 0.7248881659211062, "grad_norm": 0.34761542081832886, "learning_rate": 9.938502755962374e-06, "loss": 0.3703, "step": 7130 }, { "epoch": 0.7249898332655551, "grad_norm": 0.32921579480171204, "learning_rate": 9.938447254491145e-06, "loss": 0.4083, "step": 7131 }, { "epoch": 0.725091500610004, "grad_norm": 0.3599958121776581, "learning_rate": 9.938391728141195e-06, "loss": 0.434, "step": 7132 }, { "epoch": 0.725193167954453, "grad_norm": 0.37913021445274353, "learning_rate": 9.938336176912802e-06, "loss": 0.395, "step": 7133 }, { "epoch": 0.725294835298902, "grad_norm": 0.39461463689804077, "learning_rate": 9.938280600806243e-06, "loss": 0.4116, "step": 7134 }, { "epoch": 0.725396502643351, "grad_norm": 0.31456804275512695, "learning_rate": 9.938224999821799e-06, "loss": 0.3776, "step": 7135 }, { "epoch": 0.7254981699877999, "grad_norm": 0.36339664459228516, "learning_rate": 9.938169373959752e-06, "loss": 0.4441, "step": 7136 }, { "epoch": 0.7255998373322489, "grad_norm": 0.39064887166023254, "learning_rate": 9.938113723220382e-06, "loss": 0.4102, "step": 7137 }, { "epoch": 0.7257015046766978, "grad_norm": 0.3436189591884613, "learning_rate": 9.938058047603967e-06, "loss": 0.3813, "step": 7138 }, { "epoch": 0.7258031720211469, "grad_norm": 0.37797510623931885, "learning_rate": 9.938002347110789e-06, "loss": 0.4075, "step": 7139 }, { "epoch": 0.7259048393655958, "grad_norm": 0.3562437891960144, "learning_rate": 9.93794662174113e-06, "loss": 0.3936, "step": 7140 }, { "epoch": 0.7260065067100447, "grad_norm": 0.33974114060401917, "learning_rate": 9.937890871495268e-06, "loss": 0.3652, "step": 7141 }, { "epoch": 0.7261081740544937, "grad_norm": 0.400656133890152, "learning_rate": 9.937835096373487e-06, "loss": 0.3989, "step": 7142 }, { "epoch": 0.7262098413989426, "grad_norm": 0.3441004455089569, "learning_rate": 9.937779296376065e-06, "loss": 0.3924, "step": 7143 }, { "epoch": 0.7263115087433917, "grad_norm": 0.3397640287876129, "learning_rate": 9.937723471503285e-06, "loss": 0.3832, "step": 7144 }, { "epoch": 0.7264131760878406, "grad_norm": 0.34441065788269043, "learning_rate": 9.937667621755425e-06, "loss": 0.3625, "step": 7145 }, { "epoch": 0.7265148434322896, "grad_norm": 0.31404197216033936, "learning_rate": 9.937611747132771e-06, "loss": 0.4153, "step": 7146 }, { "epoch": 0.7266165107767385, "grad_norm": 0.3463251292705536, "learning_rate": 9.937555847635602e-06, "loss": 0.3875, "step": 7147 }, { "epoch": 0.7267181781211874, "grad_norm": 0.33500319719314575, "learning_rate": 9.937499923264201e-06, "loss": 0.4053, "step": 7148 }, { "epoch": 0.7268198454656364, "grad_norm": 0.3223843574523926, "learning_rate": 9.937443974018848e-06, "loss": 0.4129, "step": 7149 }, { "epoch": 0.7269215128100854, "grad_norm": 0.36541229486465454, "learning_rate": 9.937387999899826e-06, "loss": 0.3597, "step": 7150 }, { "epoch": 0.7270231801545344, "grad_norm": 0.3313375413417816, "learning_rate": 9.937332000907416e-06, "loss": 0.4072, "step": 7151 }, { "epoch": 0.7271248474989833, "grad_norm": 0.3368203043937683, "learning_rate": 9.9372759770419e-06, "loss": 0.3836, "step": 7152 }, { "epoch": 0.7272265148434323, "grad_norm": 0.3445909023284912, "learning_rate": 9.937219928303562e-06, "loss": 0.3879, "step": 7153 }, { "epoch": 0.7273281821878812, "grad_norm": 0.3350103497505188, "learning_rate": 9.937163854692682e-06, "loss": 0.4032, "step": 7154 }, { "epoch": 0.7274298495323303, "grad_norm": 0.35123011469841003, "learning_rate": 9.937107756209546e-06, "loss": 0.3911, "step": 7155 }, { "epoch": 0.7275315168767792, "grad_norm": 0.35528290271759033, "learning_rate": 9.937051632854432e-06, "loss": 0.3934, "step": 7156 }, { "epoch": 0.7276331842212281, "grad_norm": 0.35575100779533386, "learning_rate": 9.936995484627627e-06, "loss": 0.4012, "step": 7157 }, { "epoch": 0.7277348515656771, "grad_norm": 0.35122933983802795, "learning_rate": 9.93693931152941e-06, "loss": 0.392, "step": 7158 }, { "epoch": 0.727836518910126, "grad_norm": 0.3692682087421417, "learning_rate": 9.936883113560067e-06, "loss": 0.4007, "step": 7159 }, { "epoch": 0.7279381862545751, "grad_norm": 0.34558945894241333, "learning_rate": 9.93682689071988e-06, "loss": 0.4028, "step": 7160 }, { "epoch": 0.728039853599024, "grad_norm": 0.3322213590145111, "learning_rate": 9.936770643009133e-06, "loss": 0.3789, "step": 7161 }, { "epoch": 0.728141520943473, "grad_norm": 0.40123802423477173, "learning_rate": 9.936714370428107e-06, "loss": 0.3938, "step": 7162 }, { "epoch": 0.7282431882879219, "grad_norm": 0.3336865305900574, "learning_rate": 9.936658072977089e-06, "loss": 0.3695, "step": 7163 }, { "epoch": 0.7283448556323708, "grad_norm": 0.3407234251499176, "learning_rate": 9.93660175065636e-06, "loss": 0.3859, "step": 7164 }, { "epoch": 0.7284465229768199, "grad_norm": 0.3065173327922821, "learning_rate": 9.936545403466205e-06, "loss": 0.3957, "step": 7165 }, { "epoch": 0.7285481903212688, "grad_norm": 0.33463746309280396, "learning_rate": 9.936489031406906e-06, "loss": 0.4118, "step": 7166 }, { "epoch": 0.7286498576657178, "grad_norm": 0.36000367999076843, "learning_rate": 9.936432634478748e-06, "loss": 0.363, "step": 7167 }, { "epoch": 0.7287515250101667, "grad_norm": 0.33943092823028564, "learning_rate": 9.936376212682017e-06, "loss": 0.3817, "step": 7168 }, { "epoch": 0.7288531923546157, "grad_norm": 0.35371318459510803, "learning_rate": 9.936319766016996e-06, "loss": 0.3796, "step": 7169 }, { "epoch": 0.7289548596990647, "grad_norm": 0.3747350573539734, "learning_rate": 9.936263294483968e-06, "loss": 0.4285, "step": 7170 }, { "epoch": 0.7290565270435136, "grad_norm": 0.38714149594306946, "learning_rate": 9.93620679808322e-06, "loss": 0.396, "step": 7171 }, { "epoch": 0.7291581943879626, "grad_norm": 0.3588159680366516, "learning_rate": 9.936150276815032e-06, "loss": 0.3715, "step": 7172 }, { "epoch": 0.7292598617324115, "grad_norm": 0.4012594521045685, "learning_rate": 9.936093730679694e-06, "loss": 0.4229, "step": 7173 }, { "epoch": 0.7293615290768605, "grad_norm": 0.3250674605369568, "learning_rate": 9.93603715967749e-06, "loss": 0.3736, "step": 7174 }, { "epoch": 0.7294631964213095, "grad_norm": 0.38753241300582886, "learning_rate": 9.935980563808702e-06, "loss": 0.4205, "step": 7175 }, { "epoch": 0.7295648637657585, "grad_norm": 0.40430834889411926, "learning_rate": 9.935923943073617e-06, "loss": 0.4203, "step": 7176 }, { "epoch": 0.7296665311102074, "grad_norm": 0.3526008427143097, "learning_rate": 9.935867297472522e-06, "loss": 0.3781, "step": 7177 }, { "epoch": 0.7297681984546563, "grad_norm": 0.4176056683063507, "learning_rate": 9.935810627005698e-06, "loss": 0.3944, "step": 7178 }, { "epoch": 0.7298698657991053, "grad_norm": 0.3760218322277069, "learning_rate": 9.935753931673435e-06, "loss": 0.3896, "step": 7179 }, { "epoch": 0.7299715331435543, "grad_norm": 0.3802199065685272, "learning_rate": 9.935697211476014e-06, "loss": 0.3877, "step": 7180 }, { "epoch": 0.7300732004880033, "grad_norm": 0.37623968720436096, "learning_rate": 9.935640466413725e-06, "loss": 0.3951, "step": 7181 }, { "epoch": 0.7301748678324522, "grad_norm": 0.3282391428947449, "learning_rate": 9.935583696486853e-06, "loss": 0.3838, "step": 7182 }, { "epoch": 0.7302765351769012, "grad_norm": 0.39710086584091187, "learning_rate": 9.935526901695683e-06, "loss": 0.4268, "step": 7183 }, { "epoch": 0.7303782025213501, "grad_norm": 0.3429011106491089, "learning_rate": 9.9354700820405e-06, "loss": 0.4116, "step": 7184 }, { "epoch": 0.7304798698657992, "grad_norm": 0.34201696515083313, "learning_rate": 9.935413237521591e-06, "loss": 0.4225, "step": 7185 }, { "epoch": 0.7305815372102481, "grad_norm": 0.3823084831237793, "learning_rate": 9.935356368139246e-06, "loss": 0.3737, "step": 7186 }, { "epoch": 0.730683204554697, "grad_norm": 0.34189751744270325, "learning_rate": 9.935299473893746e-06, "loss": 0.404, "step": 7187 }, { "epoch": 0.730784871899146, "grad_norm": 0.3947100043296814, "learning_rate": 9.935242554785382e-06, "loss": 0.4423, "step": 7188 }, { "epoch": 0.7308865392435949, "grad_norm": 0.376081645488739, "learning_rate": 9.935185610814437e-06, "loss": 0.4109, "step": 7189 }, { "epoch": 0.7309882065880439, "grad_norm": 0.33121705055236816, "learning_rate": 9.9351286419812e-06, "loss": 0.391, "step": 7190 }, { "epoch": 0.7310898739324929, "grad_norm": 0.33912214636802673, "learning_rate": 9.935071648285958e-06, "loss": 0.3855, "step": 7191 }, { "epoch": 0.7311915412769419, "grad_norm": 0.41981789469718933, "learning_rate": 9.935014629728998e-06, "loss": 0.3768, "step": 7192 }, { "epoch": 0.7312932086213908, "grad_norm": 0.32730814814567566, "learning_rate": 9.934957586310607e-06, "loss": 0.3661, "step": 7193 }, { "epoch": 0.7313948759658397, "grad_norm": 0.34992748498916626, "learning_rate": 9.934900518031072e-06, "loss": 0.384, "step": 7194 }, { "epoch": 0.7314965433102887, "grad_norm": 0.3747546374797821, "learning_rate": 9.93484342489068e-06, "loss": 0.4077, "step": 7195 }, { "epoch": 0.7315982106547377, "grad_norm": 0.36007291078567505, "learning_rate": 9.93478630688972e-06, "loss": 0.4203, "step": 7196 }, { "epoch": 0.7316998779991867, "grad_norm": 0.32503587007522583, "learning_rate": 9.93472916402848e-06, "loss": 0.3759, "step": 7197 }, { "epoch": 0.7318015453436356, "grad_norm": 0.3475700616836548, "learning_rate": 9.934671996307248e-06, "loss": 0.4191, "step": 7198 }, { "epoch": 0.7319032126880846, "grad_norm": 0.3613230586051941, "learning_rate": 9.93461480372631e-06, "loss": 0.3931, "step": 7199 }, { "epoch": 0.7320048800325335, "grad_norm": 0.33495959639549255, "learning_rate": 9.934557586285956e-06, "loss": 0.3852, "step": 7200 }, { "epoch": 0.7321065473769826, "grad_norm": 0.33644720911979675, "learning_rate": 9.934500343986473e-06, "loss": 0.398, "step": 7201 }, { "epoch": 0.7322082147214315, "grad_norm": 0.36196810007095337, "learning_rate": 9.934443076828148e-06, "loss": 0.4152, "step": 7202 }, { "epoch": 0.7323098820658804, "grad_norm": 0.3527570366859436, "learning_rate": 9.934385784811274e-06, "loss": 0.3841, "step": 7203 }, { "epoch": 0.7324115494103294, "grad_norm": 0.3397977948188782, "learning_rate": 9.934328467936136e-06, "loss": 0.4117, "step": 7204 }, { "epoch": 0.7325132167547783, "grad_norm": 0.33963310718536377, "learning_rate": 9.934271126203025e-06, "loss": 0.3931, "step": 7205 }, { "epoch": 0.7326148840992274, "grad_norm": 0.32958295941352844, "learning_rate": 9.934213759612226e-06, "loss": 0.4032, "step": 7206 }, { "epoch": 0.7327165514436763, "grad_norm": 0.3376585841178894, "learning_rate": 9.934156368164033e-06, "loss": 0.396, "step": 7207 }, { "epoch": 0.7328182187881253, "grad_norm": 0.35615816712379456, "learning_rate": 9.934098951858733e-06, "loss": 0.411, "step": 7208 }, { "epoch": 0.7329198861325742, "grad_norm": 0.36259540915489197, "learning_rate": 9.934041510696612e-06, "loss": 0.4047, "step": 7209 }, { "epoch": 0.7330215534770231, "grad_norm": 0.30966895818710327, "learning_rate": 9.933984044677965e-06, "loss": 0.3691, "step": 7210 }, { "epoch": 0.7331232208214722, "grad_norm": 0.32848045229911804, "learning_rate": 9.93392655380308e-06, "loss": 0.389, "step": 7211 }, { "epoch": 0.7332248881659211, "grad_norm": 0.3564067780971527, "learning_rate": 9.933869038072242e-06, "loss": 0.4156, "step": 7212 }, { "epoch": 0.7333265555103701, "grad_norm": 0.3578697144985199, "learning_rate": 9.933811497485746e-06, "loss": 0.3897, "step": 7213 }, { "epoch": 0.733428222854819, "grad_norm": 0.3247033953666687, "learning_rate": 9.93375393204388e-06, "loss": 0.3851, "step": 7214 }, { "epoch": 0.733529890199268, "grad_norm": 0.3326911926269531, "learning_rate": 9.933696341746935e-06, "loss": 0.3768, "step": 7215 }, { "epoch": 0.733631557543717, "grad_norm": 0.32711341977119446, "learning_rate": 9.9336387265952e-06, "loss": 0.3929, "step": 7216 }, { "epoch": 0.733733224888166, "grad_norm": 0.3371213972568512, "learning_rate": 9.933581086588963e-06, "loss": 0.394, "step": 7217 }, { "epoch": 0.7338348922326149, "grad_norm": 0.3342376947402954, "learning_rate": 9.93352342172852e-06, "loss": 0.3673, "step": 7218 }, { "epoch": 0.7339365595770638, "grad_norm": 0.3210850656032562, "learning_rate": 9.933465732014157e-06, "loss": 0.4192, "step": 7219 }, { "epoch": 0.7340382269215128, "grad_norm": 0.36690935492515564, "learning_rate": 9.933408017446167e-06, "loss": 0.4309, "step": 7220 }, { "epoch": 0.7341398942659618, "grad_norm": 0.3420347273349762, "learning_rate": 9.933350278024838e-06, "loss": 0.3769, "step": 7221 }, { "epoch": 0.7342415616104108, "grad_norm": 0.31234079599380493, "learning_rate": 9.933292513750463e-06, "loss": 0.3776, "step": 7222 }, { "epoch": 0.7343432289548597, "grad_norm": 0.3364858329296112, "learning_rate": 9.933234724623333e-06, "loss": 0.4022, "step": 7223 }, { "epoch": 0.7344448962993086, "grad_norm": 0.33434394001960754, "learning_rate": 9.93317691064374e-06, "loss": 0.4072, "step": 7224 }, { "epoch": 0.7345465636437576, "grad_norm": 0.34000515937805176, "learning_rate": 9.933119071811973e-06, "loss": 0.3923, "step": 7225 }, { "epoch": 0.7346482309882066, "grad_norm": 0.3780350387096405, "learning_rate": 9.933061208128324e-06, "loss": 0.3929, "step": 7226 }, { "epoch": 0.7347498983326556, "grad_norm": 0.3613113462924957, "learning_rate": 9.933003319593086e-06, "loss": 0.3882, "step": 7227 }, { "epoch": 0.7348515656771045, "grad_norm": 0.3231028914451599, "learning_rate": 9.932945406206548e-06, "loss": 0.393, "step": 7228 }, { "epoch": 0.7349532330215535, "grad_norm": 0.3328721225261688, "learning_rate": 9.932887467969005e-06, "loss": 0.3877, "step": 7229 }, { "epoch": 0.7350549003660024, "grad_norm": 0.37185046076774597, "learning_rate": 9.932829504880748e-06, "loss": 0.37, "step": 7230 }, { "epoch": 0.7351565677104515, "grad_norm": 0.3476461172103882, "learning_rate": 9.932771516942066e-06, "loss": 0.3902, "step": 7231 }, { "epoch": 0.7352582350549004, "grad_norm": 0.3353292644023895, "learning_rate": 9.932713504153252e-06, "loss": 0.3866, "step": 7232 }, { "epoch": 0.7353599023993493, "grad_norm": 0.34100985527038574, "learning_rate": 9.932655466514603e-06, "loss": 0.3892, "step": 7233 }, { "epoch": 0.7354615697437983, "grad_norm": 0.3477371335029602, "learning_rate": 9.932597404026407e-06, "loss": 0.3785, "step": 7234 }, { "epoch": 0.7355632370882472, "grad_norm": 0.33493342995643616, "learning_rate": 9.932539316688956e-06, "loss": 0.3982, "step": 7235 }, { "epoch": 0.7356649044326962, "grad_norm": 0.3658131957054138, "learning_rate": 9.932481204502546e-06, "loss": 0.4106, "step": 7236 }, { "epoch": 0.7357665717771452, "grad_norm": 0.3721277117729187, "learning_rate": 9.932423067467466e-06, "loss": 0.4009, "step": 7237 }, { "epoch": 0.7358682391215942, "grad_norm": 0.34916722774505615, "learning_rate": 9.932364905584011e-06, "loss": 0.3931, "step": 7238 }, { "epoch": 0.7359699064660431, "grad_norm": 0.34898999333381653, "learning_rate": 9.932306718852474e-06, "loss": 0.3935, "step": 7239 }, { "epoch": 0.736071573810492, "grad_norm": 0.34906619787216187, "learning_rate": 9.932248507273149e-06, "loss": 0.3538, "step": 7240 }, { "epoch": 0.736173241154941, "grad_norm": 0.3730435371398926, "learning_rate": 9.932190270846327e-06, "loss": 0.3774, "step": 7241 }, { "epoch": 0.73627490849939, "grad_norm": 0.3844445049762726, "learning_rate": 9.932132009572301e-06, "loss": 0.4139, "step": 7242 }, { "epoch": 0.736376575843839, "grad_norm": 0.34425756335258484, "learning_rate": 9.93207372345137e-06, "loss": 0.3694, "step": 7243 }, { "epoch": 0.7364782431882879, "grad_norm": 0.36479318141937256, "learning_rate": 9.93201541248382e-06, "loss": 0.385, "step": 7244 }, { "epoch": 0.7365799105327369, "grad_norm": 0.36527499556541443, "learning_rate": 9.93195707666995e-06, "loss": 0.3733, "step": 7245 }, { "epoch": 0.7366815778771858, "grad_norm": 0.3472275733947754, "learning_rate": 9.93189871601005e-06, "loss": 0.4081, "step": 7246 }, { "epoch": 0.7367832452216349, "grad_norm": 0.3677486777305603, "learning_rate": 9.931840330504419e-06, "loss": 0.3828, "step": 7247 }, { "epoch": 0.7368849125660838, "grad_norm": 0.3379504680633545, "learning_rate": 9.931781920153348e-06, "loss": 0.3929, "step": 7248 }, { "epoch": 0.7369865799105327, "grad_norm": 0.34524521231651306, "learning_rate": 9.931723484957131e-06, "loss": 0.3686, "step": 7249 }, { "epoch": 0.7370882472549817, "grad_norm": 0.33898183703422546, "learning_rate": 9.931665024916065e-06, "loss": 0.386, "step": 7250 }, { "epoch": 0.7371899145994306, "grad_norm": 0.3532172441482544, "learning_rate": 9.93160654003044e-06, "loss": 0.3851, "step": 7251 }, { "epoch": 0.7372915819438797, "grad_norm": 0.3855576813220978, "learning_rate": 9.931548030300553e-06, "loss": 0.4139, "step": 7252 }, { "epoch": 0.7373932492883286, "grad_norm": 0.3237709701061249, "learning_rate": 9.931489495726701e-06, "loss": 0.4005, "step": 7253 }, { "epoch": 0.7374949166327776, "grad_norm": 0.3499559462070465, "learning_rate": 9.931430936309176e-06, "loss": 0.3736, "step": 7254 }, { "epoch": 0.7375965839772265, "grad_norm": 0.3904954791069031, "learning_rate": 9.931372352048273e-06, "loss": 0.3896, "step": 7255 }, { "epoch": 0.7376982513216754, "grad_norm": 0.3256974220275879, "learning_rate": 9.931313742944289e-06, "loss": 0.3589, "step": 7256 }, { "epoch": 0.7377999186661245, "grad_norm": 0.3723755180835724, "learning_rate": 9.931255108997517e-06, "loss": 0.3683, "step": 7257 }, { "epoch": 0.7379015860105734, "grad_norm": 0.36607620120048523, "learning_rate": 9.931196450208254e-06, "loss": 0.3666, "step": 7258 }, { "epoch": 0.7380032533550224, "grad_norm": 0.3730389475822449, "learning_rate": 9.931137766576796e-06, "loss": 0.4004, "step": 7259 }, { "epoch": 0.7381049206994713, "grad_norm": 0.37920430302619934, "learning_rate": 9.931079058103436e-06, "loss": 0.393, "step": 7260 }, { "epoch": 0.7382065880439203, "grad_norm": 0.3256927728652954, "learning_rate": 9.931020324788473e-06, "loss": 0.385, "step": 7261 }, { "epoch": 0.7383082553883693, "grad_norm": 0.3864113688468933, "learning_rate": 9.930961566632203e-06, "loss": 0.3919, "step": 7262 }, { "epoch": 0.7384099227328182, "grad_norm": 0.33318087458610535, "learning_rate": 9.930902783634917e-06, "loss": 0.3865, "step": 7263 }, { "epoch": 0.7385115900772672, "grad_norm": 0.3211784064769745, "learning_rate": 9.930843975796916e-06, "loss": 0.3947, "step": 7264 }, { "epoch": 0.7386132574217161, "grad_norm": 0.3391818404197693, "learning_rate": 9.930785143118496e-06, "loss": 0.3823, "step": 7265 }, { "epoch": 0.7387149247661651, "grad_norm": 0.3650593161582947, "learning_rate": 9.93072628559995e-06, "loss": 0.3954, "step": 7266 }, { "epoch": 0.7388165921106141, "grad_norm": 0.3281420171260834, "learning_rate": 9.930667403241579e-06, "loss": 0.4022, "step": 7267 }, { "epoch": 0.7389182594550631, "grad_norm": 0.36157122254371643, "learning_rate": 9.930608496043674e-06, "loss": 0.421, "step": 7268 }, { "epoch": 0.739019926799512, "grad_norm": 0.3623455762863159, "learning_rate": 9.930549564006539e-06, "loss": 0.3987, "step": 7269 }, { "epoch": 0.739121594143961, "grad_norm": 0.32810935378074646, "learning_rate": 9.930490607130465e-06, "loss": 0.3979, "step": 7270 }, { "epoch": 0.7392232614884099, "grad_norm": 0.38455721735954285, "learning_rate": 9.93043162541575e-06, "loss": 0.4071, "step": 7271 }, { "epoch": 0.7393249288328589, "grad_norm": 0.35870271921157837, "learning_rate": 9.930372618862694e-06, "loss": 0.3869, "step": 7272 }, { "epoch": 0.7394265961773079, "grad_norm": 0.3380841612815857, "learning_rate": 9.930313587471592e-06, "loss": 0.402, "step": 7273 }, { "epoch": 0.7395282635217568, "grad_norm": 0.3569449782371521, "learning_rate": 9.93025453124274e-06, "loss": 0.4095, "step": 7274 }, { "epoch": 0.7396299308662058, "grad_norm": 0.34107181429862976, "learning_rate": 9.930195450176439e-06, "loss": 0.4164, "step": 7275 }, { "epoch": 0.7397315982106547, "grad_norm": 0.3381873369216919, "learning_rate": 9.930136344272984e-06, "loss": 0.4043, "step": 7276 }, { "epoch": 0.7398332655551036, "grad_norm": 0.35917580127716064, "learning_rate": 9.930077213532676e-06, "loss": 0.3631, "step": 7277 }, { "epoch": 0.7399349328995527, "grad_norm": 0.3590680956840515, "learning_rate": 9.930018057955808e-06, "loss": 0.4091, "step": 7278 }, { "epoch": 0.7400366002440016, "grad_norm": 0.3113015294075012, "learning_rate": 9.929958877542681e-06, "loss": 0.4128, "step": 7279 }, { "epoch": 0.7401382675884506, "grad_norm": 0.3793167173862457, "learning_rate": 9.929899672293594e-06, "loss": 0.4607, "step": 7280 }, { "epoch": 0.7402399349328995, "grad_norm": 0.35009661316871643, "learning_rate": 9.929840442208843e-06, "loss": 0.3853, "step": 7281 }, { "epoch": 0.7403416022773485, "grad_norm": 0.3289245665073395, "learning_rate": 9.929781187288728e-06, "loss": 0.37, "step": 7282 }, { "epoch": 0.7404432696217975, "grad_norm": 0.3450219929218292, "learning_rate": 9.929721907533544e-06, "loss": 0.4086, "step": 7283 }, { "epoch": 0.7405449369662465, "grad_norm": 0.36904177069664, "learning_rate": 9.929662602943597e-06, "loss": 0.3776, "step": 7284 }, { "epoch": 0.7406466043106954, "grad_norm": 0.3183918297290802, "learning_rate": 9.929603273519179e-06, "loss": 0.3873, "step": 7285 }, { "epoch": 0.7407482716551443, "grad_norm": 0.34991273283958435, "learning_rate": 9.929543919260592e-06, "loss": 0.3966, "step": 7286 }, { "epoch": 0.7408499389995933, "grad_norm": 0.3307282328605652, "learning_rate": 9.929484540168133e-06, "loss": 0.3818, "step": 7287 }, { "epoch": 0.7409516063440423, "grad_norm": 0.38023266196250916, "learning_rate": 9.9294251362421e-06, "loss": 0.4354, "step": 7288 }, { "epoch": 0.7410532736884913, "grad_norm": 0.32513320446014404, "learning_rate": 9.929365707482798e-06, "loss": 0.3554, "step": 7289 }, { "epoch": 0.7411549410329402, "grad_norm": 0.33179110288619995, "learning_rate": 9.929306253890524e-06, "loss": 0.4012, "step": 7290 }, { "epoch": 0.7412566083773892, "grad_norm": 0.3286225199699402, "learning_rate": 9.929246775465576e-06, "loss": 0.3757, "step": 7291 }, { "epoch": 0.7413582757218381, "grad_norm": 0.32320740818977356, "learning_rate": 9.929187272208252e-06, "loss": 0.4135, "step": 7292 }, { "epoch": 0.7414599430662872, "grad_norm": 0.31874024868011475, "learning_rate": 9.929127744118854e-06, "loss": 0.4132, "step": 7293 }, { "epoch": 0.7415616104107361, "grad_norm": 0.33224958181381226, "learning_rate": 9.929068191197683e-06, "loss": 0.4012, "step": 7294 }, { "epoch": 0.741663277755185, "grad_norm": 0.334399551153183, "learning_rate": 9.929008613445036e-06, "loss": 0.4031, "step": 7295 }, { "epoch": 0.741764945099634, "grad_norm": 0.30860796570777893, "learning_rate": 9.928949010861217e-06, "loss": 0.4024, "step": 7296 }, { "epoch": 0.7418666124440829, "grad_norm": 0.33545932173728943, "learning_rate": 9.928889383446522e-06, "loss": 0.3848, "step": 7297 }, { "epoch": 0.741968279788532, "grad_norm": 0.3655296564102173, "learning_rate": 9.928829731201258e-06, "loss": 0.3819, "step": 7298 }, { "epoch": 0.7420699471329809, "grad_norm": 0.34208953380584717, "learning_rate": 9.928770054125715e-06, "loss": 0.4114, "step": 7299 }, { "epoch": 0.7421716144774299, "grad_norm": 0.3224583566188812, "learning_rate": 9.928710352220204e-06, "loss": 0.401, "step": 7300 }, { "epoch": 0.7422732818218788, "grad_norm": 0.34495508670806885, "learning_rate": 9.92865062548502e-06, "loss": 0.4124, "step": 7301 }, { "epoch": 0.7423749491663277, "grad_norm": 0.34037861227989197, "learning_rate": 9.928590873920466e-06, "loss": 0.4078, "step": 7302 }, { "epoch": 0.7424766165107768, "grad_norm": 0.582964301109314, "learning_rate": 9.928531097526843e-06, "loss": 0.4076, "step": 7303 }, { "epoch": 0.7425782838552257, "grad_norm": 0.3639208674430847, "learning_rate": 9.92847129630445e-06, "loss": 0.4095, "step": 7304 }, { "epoch": 0.7426799511996747, "grad_norm": 0.32539474964141846, "learning_rate": 9.92841147025359e-06, "loss": 0.3995, "step": 7305 }, { "epoch": 0.7427816185441236, "grad_norm": 0.33495455980300903, "learning_rate": 9.928351619374566e-06, "loss": 0.3727, "step": 7306 }, { "epoch": 0.7428832858885726, "grad_norm": 0.3387826383113861, "learning_rate": 9.928291743667675e-06, "loss": 0.3606, "step": 7307 }, { "epoch": 0.7429849532330216, "grad_norm": 0.3185983896255493, "learning_rate": 9.928231843133222e-06, "loss": 0.3993, "step": 7308 }, { "epoch": 0.7430866205774705, "grad_norm": 0.3574348986148834, "learning_rate": 9.92817191777151e-06, "loss": 0.4154, "step": 7309 }, { "epoch": 0.7431882879219195, "grad_norm": 0.3320039212703705, "learning_rate": 9.928111967582836e-06, "loss": 0.4164, "step": 7310 }, { "epoch": 0.7432899552663684, "grad_norm": 0.3483724892139435, "learning_rate": 9.928051992567507e-06, "loss": 0.4302, "step": 7311 }, { "epoch": 0.7433916226108174, "grad_norm": 0.35352376103401184, "learning_rate": 9.927991992725824e-06, "loss": 0.43, "step": 7312 }, { "epoch": 0.7434932899552664, "grad_norm": 0.3327808976173401, "learning_rate": 9.927931968058087e-06, "loss": 0.4007, "step": 7313 }, { "epoch": 0.7435949572997154, "grad_norm": 0.3403376042842865, "learning_rate": 9.9278719185646e-06, "loss": 0.4541, "step": 7314 }, { "epoch": 0.7436966246441643, "grad_norm": 0.32486552000045776, "learning_rate": 9.927811844245665e-06, "loss": 0.3922, "step": 7315 }, { "epoch": 0.7437982919886132, "grad_norm": 0.3617769479751587, "learning_rate": 9.927751745101586e-06, "loss": 0.3779, "step": 7316 }, { "epoch": 0.7438999593330622, "grad_norm": 0.3477579355239868, "learning_rate": 9.927691621132664e-06, "loss": 0.3844, "step": 7317 }, { "epoch": 0.7440016266775111, "grad_norm": 0.3445749878883362, "learning_rate": 9.927631472339201e-06, "loss": 0.4186, "step": 7318 }, { "epoch": 0.7441032940219602, "grad_norm": 0.3612663447856903, "learning_rate": 9.927571298721504e-06, "loss": 0.4291, "step": 7319 }, { "epoch": 0.7442049613664091, "grad_norm": 0.3685816526412964, "learning_rate": 9.927511100279874e-06, "loss": 0.4187, "step": 7320 }, { "epoch": 0.7443066287108581, "grad_norm": 0.3272557854652405, "learning_rate": 9.927450877014613e-06, "loss": 0.402, "step": 7321 }, { "epoch": 0.744408296055307, "grad_norm": 0.32054561376571655, "learning_rate": 9.927390628926026e-06, "loss": 0.374, "step": 7322 }, { "epoch": 0.744509963399756, "grad_norm": 0.33474642038345337, "learning_rate": 9.927330356014415e-06, "loss": 0.3888, "step": 7323 }, { "epoch": 0.744611630744205, "grad_norm": 0.35799962282180786, "learning_rate": 9.927270058280085e-06, "loss": 0.3806, "step": 7324 }, { "epoch": 0.7447132980886539, "grad_norm": 0.32371556758880615, "learning_rate": 9.927209735723338e-06, "loss": 0.3935, "step": 7325 }, { "epoch": 0.7448149654331029, "grad_norm": 0.3701343238353729, "learning_rate": 9.927149388344482e-06, "loss": 0.4223, "step": 7326 }, { "epoch": 0.7449166327775518, "grad_norm": 0.37366122007369995, "learning_rate": 9.927089016143816e-06, "loss": 0.3969, "step": 7327 }, { "epoch": 0.7450183001220008, "grad_norm": 0.3432758152484894, "learning_rate": 9.927028619121648e-06, "loss": 0.3693, "step": 7328 }, { "epoch": 0.7451199674664498, "grad_norm": 0.34699708223342896, "learning_rate": 9.926968197278279e-06, "loss": 0.3652, "step": 7329 }, { "epoch": 0.7452216348108988, "grad_norm": 0.3494661748409271, "learning_rate": 9.926907750614017e-06, "loss": 0.401, "step": 7330 }, { "epoch": 0.7453233021553477, "grad_norm": 0.3367727994918823, "learning_rate": 9.926847279129164e-06, "loss": 0.3725, "step": 7331 }, { "epoch": 0.7454249694997966, "grad_norm": 0.3378113806247711, "learning_rate": 9.926786782824024e-06, "loss": 0.404, "step": 7332 }, { "epoch": 0.7455266368442456, "grad_norm": 0.3166194558143616, "learning_rate": 9.926726261698903e-06, "loss": 0.3578, "step": 7333 }, { "epoch": 0.7456283041886946, "grad_norm": 0.31444117426872253, "learning_rate": 9.926665715754106e-06, "loss": 0.385, "step": 7334 }, { "epoch": 0.7457299715331436, "grad_norm": 0.3433433175086975, "learning_rate": 9.926605144989941e-06, "loss": 0.4018, "step": 7335 }, { "epoch": 0.7458316388775925, "grad_norm": 0.3217852711677551, "learning_rate": 9.926544549406706e-06, "loss": 0.3906, "step": 7336 }, { "epoch": 0.7459333062220415, "grad_norm": 0.29336249828338623, "learning_rate": 9.92648392900471e-06, "loss": 0.4084, "step": 7337 }, { "epoch": 0.7460349735664904, "grad_norm": 0.33328863978385925, "learning_rate": 9.926423283784262e-06, "loss": 0.41, "step": 7338 }, { "epoch": 0.7461366409109395, "grad_norm": 0.32949915528297424, "learning_rate": 9.926362613745662e-06, "loss": 0.3915, "step": 7339 }, { "epoch": 0.7462383082553884, "grad_norm": 0.33903974294662476, "learning_rate": 9.926301918889218e-06, "loss": 0.3848, "step": 7340 }, { "epoch": 0.7463399755998373, "grad_norm": 0.34164664149284363, "learning_rate": 9.926241199215238e-06, "loss": 0.4181, "step": 7341 }, { "epoch": 0.7464416429442863, "grad_norm": 0.34645405411720276, "learning_rate": 9.926180454724024e-06, "loss": 0.3935, "step": 7342 }, { "epoch": 0.7465433102887352, "grad_norm": 0.3311060070991516, "learning_rate": 9.926119685415883e-06, "loss": 0.4023, "step": 7343 }, { "epoch": 0.7466449776331843, "grad_norm": 0.33490607142448425, "learning_rate": 9.92605889129112e-06, "loss": 0.374, "step": 7344 }, { "epoch": 0.7467466449776332, "grad_norm": 0.34696000814437866, "learning_rate": 9.925998072350045e-06, "loss": 0.4128, "step": 7345 }, { "epoch": 0.7468483123220822, "grad_norm": 0.35574331879615784, "learning_rate": 9.925937228592963e-06, "loss": 0.3864, "step": 7346 }, { "epoch": 0.7469499796665311, "grad_norm": 0.31538015604019165, "learning_rate": 9.925876360020179e-06, "loss": 0.404, "step": 7347 }, { "epoch": 0.74705164701098, "grad_norm": 0.3314487934112549, "learning_rate": 9.925815466631999e-06, "loss": 0.3978, "step": 7348 }, { "epoch": 0.7471533143554291, "grad_norm": 0.3183685541152954, "learning_rate": 9.925754548428732e-06, "loss": 0.3787, "step": 7349 }, { "epoch": 0.747254981699878, "grad_norm": 0.3178585171699524, "learning_rate": 9.925693605410685e-06, "loss": 0.4016, "step": 7350 }, { "epoch": 0.747356649044327, "grad_norm": 0.32670295238494873, "learning_rate": 9.925632637578164e-06, "loss": 0.3844, "step": 7351 }, { "epoch": 0.7474583163887759, "grad_norm": 0.3483871519565582, "learning_rate": 9.925571644931475e-06, "loss": 0.3788, "step": 7352 }, { "epoch": 0.7475599837332249, "grad_norm": 0.3173201382160187, "learning_rate": 9.925510627470929e-06, "loss": 0.3822, "step": 7353 }, { "epoch": 0.7476616510776739, "grad_norm": 0.36083728075027466, "learning_rate": 9.925449585196829e-06, "loss": 0.447, "step": 7354 }, { "epoch": 0.7477633184221228, "grad_norm": 0.3336627185344696, "learning_rate": 9.925388518109485e-06, "loss": 0.4352, "step": 7355 }, { "epoch": 0.7478649857665718, "grad_norm": 0.3499580919742584, "learning_rate": 9.925327426209203e-06, "loss": 0.3613, "step": 7356 }, { "epoch": 0.7479666531110207, "grad_norm": 0.35919100046157837, "learning_rate": 9.925266309496295e-06, "loss": 0.3958, "step": 7357 }, { "epoch": 0.7480683204554697, "grad_norm": 0.37301596999168396, "learning_rate": 9.925205167971063e-06, "loss": 0.4361, "step": 7358 }, { "epoch": 0.7481699877999186, "grad_norm": 0.3411368131637573, "learning_rate": 9.925144001633818e-06, "loss": 0.354, "step": 7359 }, { "epoch": 0.7482716551443677, "grad_norm": 0.43874427676200867, "learning_rate": 9.925082810484868e-06, "loss": 0.4122, "step": 7360 }, { "epoch": 0.7483733224888166, "grad_norm": 0.3211832642555237, "learning_rate": 9.925021594524523e-06, "loss": 0.3933, "step": 7361 }, { "epoch": 0.7484749898332655, "grad_norm": 0.3393495976924896, "learning_rate": 9.924960353753086e-06, "loss": 0.3999, "step": 7362 }, { "epoch": 0.7485766571777145, "grad_norm": 0.377748966217041, "learning_rate": 9.924899088170873e-06, "loss": 0.3904, "step": 7363 }, { "epoch": 0.7486783245221634, "grad_norm": 0.327440083026886, "learning_rate": 9.924837797778186e-06, "loss": 0.3694, "step": 7364 }, { "epoch": 0.7487799918666125, "grad_norm": 0.3672334849834442, "learning_rate": 9.924776482575337e-06, "loss": 0.3726, "step": 7365 }, { "epoch": 0.7488816592110614, "grad_norm": 0.32718390226364136, "learning_rate": 9.924715142562636e-06, "loss": 0.3861, "step": 7366 }, { "epoch": 0.7489833265555104, "grad_norm": 0.3389245867729187, "learning_rate": 9.92465377774039e-06, "loss": 0.4216, "step": 7367 }, { "epoch": 0.7490849938999593, "grad_norm": 0.3701116442680359, "learning_rate": 9.924592388108907e-06, "loss": 0.3856, "step": 7368 }, { "epoch": 0.7491866612444082, "grad_norm": 0.3369100093841553, "learning_rate": 9.924530973668498e-06, "loss": 0.4036, "step": 7369 }, { "epoch": 0.7492883285888573, "grad_norm": 0.36943739652633667, "learning_rate": 9.924469534419473e-06, "loss": 0.4016, "step": 7370 }, { "epoch": 0.7493899959333062, "grad_norm": 0.4367247521877289, "learning_rate": 9.92440807036214e-06, "loss": 0.4036, "step": 7371 }, { "epoch": 0.7494916632777552, "grad_norm": 0.31073683500289917, "learning_rate": 9.92434658149681e-06, "loss": 0.4142, "step": 7372 }, { "epoch": 0.7495933306222041, "grad_norm": 0.34882214665412903, "learning_rate": 9.924285067823793e-06, "loss": 0.4266, "step": 7373 }, { "epoch": 0.7496949979666531, "grad_norm": 0.4271591603755951, "learning_rate": 9.924223529343396e-06, "loss": 0.3937, "step": 7374 }, { "epoch": 0.7497966653111021, "grad_norm": 0.34761250019073486, "learning_rate": 9.924161966055932e-06, "loss": 0.3808, "step": 7375 }, { "epoch": 0.7498983326555511, "grad_norm": 0.37543100118637085, "learning_rate": 9.92410037796171e-06, "loss": 0.3938, "step": 7376 }, { "epoch": 0.75, "grad_norm": 0.3634949028491974, "learning_rate": 9.924038765061042e-06, "loss": 0.3736, "step": 7377 }, { "epoch": 0.7501016673444489, "grad_norm": 0.34108245372772217, "learning_rate": 9.923977127354233e-06, "loss": 0.3594, "step": 7378 }, { "epoch": 0.7502033346888979, "grad_norm": 0.3630181550979614, "learning_rate": 9.923915464841601e-06, "loss": 0.4258, "step": 7379 }, { "epoch": 0.7503050020333469, "grad_norm": 0.3660162389278412, "learning_rate": 9.923853777523451e-06, "loss": 0.39, "step": 7380 }, { "epoch": 0.7504066693777959, "grad_norm": 0.3715139627456665, "learning_rate": 9.923792065400098e-06, "loss": 0.3775, "step": 7381 }, { "epoch": 0.7505083367222448, "grad_norm": 0.3489576578140259, "learning_rate": 9.923730328471848e-06, "loss": 0.4155, "step": 7382 }, { "epoch": 0.7506100040666938, "grad_norm": 0.4932250380516052, "learning_rate": 9.923668566739017e-06, "loss": 0.3887, "step": 7383 }, { "epoch": 0.7507116714111427, "grad_norm": 0.3892301023006439, "learning_rate": 9.923606780201913e-06, "loss": 0.3848, "step": 7384 }, { "epoch": 0.7508133387555918, "grad_norm": 0.3300042748451233, "learning_rate": 9.923544968860847e-06, "loss": 0.3907, "step": 7385 }, { "epoch": 0.7509150061000407, "grad_norm": 0.39500629901885986, "learning_rate": 9.923483132716132e-06, "loss": 0.3919, "step": 7386 }, { "epoch": 0.7510166734444896, "grad_norm": 0.37804487347602844, "learning_rate": 9.92342127176808e-06, "loss": 0.3884, "step": 7387 }, { "epoch": 0.7511183407889386, "grad_norm": 0.3263232707977295, "learning_rate": 9.923359386017e-06, "loss": 0.4088, "step": 7388 }, { "epoch": 0.7512200081333875, "grad_norm": 0.3220060169696808, "learning_rate": 9.923297475463207e-06, "loss": 0.4285, "step": 7389 }, { "epoch": 0.7513216754778366, "grad_norm": 0.33276745676994324, "learning_rate": 9.92323554010701e-06, "loss": 0.3866, "step": 7390 }, { "epoch": 0.7514233428222855, "grad_norm": 0.37144988775253296, "learning_rate": 9.923173579948724e-06, "loss": 0.4166, "step": 7391 }, { "epoch": 0.7515250101667345, "grad_norm": 0.31459754705429077, "learning_rate": 9.923111594988656e-06, "loss": 0.3866, "step": 7392 }, { "epoch": 0.7516266775111834, "grad_norm": 0.33848121762275696, "learning_rate": 9.923049585227125e-06, "loss": 0.4108, "step": 7393 }, { "epoch": 0.7517283448556323, "grad_norm": 0.35592731833457947, "learning_rate": 9.92298755066444e-06, "loss": 0.3721, "step": 7394 }, { "epoch": 0.7518300122000814, "grad_norm": 0.3194354176521301, "learning_rate": 9.922925491300912e-06, "loss": 0.3909, "step": 7395 }, { "epoch": 0.7519316795445303, "grad_norm": 0.36158204078674316, "learning_rate": 9.922863407136855e-06, "loss": 0.4156, "step": 7396 }, { "epoch": 0.7520333468889793, "grad_norm": 0.3517443537712097, "learning_rate": 9.922801298172584e-06, "loss": 0.3668, "step": 7397 }, { "epoch": 0.7521350142334282, "grad_norm": 0.3457930386066437, "learning_rate": 9.922739164408408e-06, "loss": 0.3951, "step": 7398 }, { "epoch": 0.7522366815778772, "grad_norm": 0.34641966223716736, "learning_rate": 9.922677005844644e-06, "loss": 0.3654, "step": 7399 }, { "epoch": 0.7523383489223261, "grad_norm": 0.3148312270641327, "learning_rate": 9.9226148224816e-06, "loss": 0.3304, "step": 7400 }, { "epoch": 0.7524400162667751, "grad_norm": 0.38037869334220886, "learning_rate": 9.922552614319594e-06, "loss": 0.3708, "step": 7401 }, { "epoch": 0.7525416836112241, "grad_norm": 0.39111077785491943, "learning_rate": 9.922490381358939e-06, "loss": 0.4246, "step": 7402 }, { "epoch": 0.752643350955673, "grad_norm": 0.35922378301620483, "learning_rate": 9.922428123599946e-06, "loss": 0.4017, "step": 7403 }, { "epoch": 0.752745018300122, "grad_norm": 0.37553930282592773, "learning_rate": 9.92236584104293e-06, "loss": 0.4011, "step": 7404 }, { "epoch": 0.7528466856445709, "grad_norm": 0.3723903298377991, "learning_rate": 9.922303533688204e-06, "loss": 0.3952, "step": 7405 }, { "epoch": 0.75294835298902, "grad_norm": 0.34647518396377563, "learning_rate": 9.922241201536082e-06, "loss": 0.3935, "step": 7406 }, { "epoch": 0.7530500203334689, "grad_norm": 0.37120485305786133, "learning_rate": 9.92217884458688e-06, "loss": 0.3775, "step": 7407 }, { "epoch": 0.7531516876779178, "grad_norm": 0.3245297372341156, "learning_rate": 9.92211646284091e-06, "loss": 0.3746, "step": 7408 }, { "epoch": 0.7532533550223668, "grad_norm": 0.3772847652435303, "learning_rate": 9.922054056298487e-06, "loss": 0.3657, "step": 7409 }, { "epoch": 0.7533550223668157, "grad_norm": 0.3851510286331177, "learning_rate": 9.921991624959926e-06, "loss": 0.4056, "step": 7410 }, { "epoch": 0.7534566897112648, "grad_norm": 0.3329410254955292, "learning_rate": 9.92192916882554e-06, "loss": 0.3821, "step": 7411 }, { "epoch": 0.7535583570557137, "grad_norm": 0.41849663853645325, "learning_rate": 9.921866687895646e-06, "loss": 0.3705, "step": 7412 }, { "epoch": 0.7536600244001627, "grad_norm": 0.38215675950050354, "learning_rate": 9.921804182170554e-06, "loss": 0.3577, "step": 7413 }, { "epoch": 0.7537616917446116, "grad_norm": 0.33848533034324646, "learning_rate": 9.921741651650585e-06, "loss": 0.3839, "step": 7414 }, { "epoch": 0.7538633590890605, "grad_norm": 0.37436607480049133, "learning_rate": 9.92167909633605e-06, "loss": 0.4011, "step": 7415 }, { "epoch": 0.7539650264335096, "grad_norm": 0.4085700511932373, "learning_rate": 9.921616516227266e-06, "loss": 0.4297, "step": 7416 }, { "epoch": 0.7540666937779585, "grad_norm": 0.3762089014053345, "learning_rate": 9.921553911324547e-06, "loss": 0.3883, "step": 7417 }, { "epoch": 0.7541683611224075, "grad_norm": 0.33918192982673645, "learning_rate": 9.921491281628208e-06, "loss": 0.3878, "step": 7418 }, { "epoch": 0.7542700284668564, "grad_norm": 0.36676570773124695, "learning_rate": 9.921428627138567e-06, "loss": 0.3943, "step": 7419 }, { "epoch": 0.7543716958113054, "grad_norm": 0.3632349967956543, "learning_rate": 9.921365947855937e-06, "loss": 0.4471, "step": 7420 }, { "epoch": 0.7544733631557544, "grad_norm": 0.33841514587402344, "learning_rate": 9.921303243780635e-06, "loss": 0.3718, "step": 7421 }, { "epoch": 0.7545750305002034, "grad_norm": 0.3159526586532593, "learning_rate": 9.921240514912977e-06, "loss": 0.396, "step": 7422 }, { "epoch": 0.7546766978446523, "grad_norm": 0.3835263252258301, "learning_rate": 9.92117776125328e-06, "loss": 0.376, "step": 7423 }, { "epoch": 0.7547783651891012, "grad_norm": 0.39964330196380615, "learning_rate": 9.921114982801858e-06, "loss": 0.4404, "step": 7424 }, { "epoch": 0.7548800325335502, "grad_norm": 0.3440991938114166, "learning_rate": 9.921052179559027e-06, "loss": 0.3782, "step": 7425 }, { "epoch": 0.7549816998779992, "grad_norm": 0.4000453054904938, "learning_rate": 9.920989351525106e-06, "loss": 0.3906, "step": 7426 }, { "epoch": 0.7550833672224482, "grad_norm": 0.3750167787075043, "learning_rate": 9.92092649870041e-06, "loss": 0.4281, "step": 7427 }, { "epoch": 0.7551850345668971, "grad_norm": 0.3626638650894165, "learning_rate": 9.920863621085254e-06, "loss": 0.3969, "step": 7428 }, { "epoch": 0.7552867019113461, "grad_norm": 0.34827783703804016, "learning_rate": 9.920800718679958e-06, "loss": 0.3812, "step": 7429 }, { "epoch": 0.755388369255795, "grad_norm": 0.3233191668987274, "learning_rate": 9.920737791484838e-06, "loss": 0.3671, "step": 7430 }, { "epoch": 0.755490036600244, "grad_norm": 0.34562796354293823, "learning_rate": 9.920674839500209e-06, "loss": 0.366, "step": 7431 }, { "epoch": 0.755591703944693, "grad_norm": 0.3406445384025574, "learning_rate": 9.920611862726389e-06, "loss": 0.4345, "step": 7432 }, { "epoch": 0.7556933712891419, "grad_norm": 0.373545378446579, "learning_rate": 9.920548861163697e-06, "loss": 0.3843, "step": 7433 }, { "epoch": 0.7557950386335909, "grad_norm": 0.3094812035560608, "learning_rate": 9.920485834812449e-06, "loss": 0.426, "step": 7434 }, { "epoch": 0.7558967059780398, "grad_norm": 0.33660319447517395, "learning_rate": 9.920422783672963e-06, "loss": 0.377, "step": 7435 }, { "epoch": 0.7559983733224889, "grad_norm": 0.3221777081489563, "learning_rate": 9.920359707745555e-06, "loss": 0.3904, "step": 7436 }, { "epoch": 0.7561000406669378, "grad_norm": 0.3419148921966553, "learning_rate": 9.920296607030544e-06, "loss": 0.3969, "step": 7437 }, { "epoch": 0.7562017080113868, "grad_norm": 0.3201550543308258, "learning_rate": 9.92023348152825e-06, "loss": 0.3803, "step": 7438 }, { "epoch": 0.7563033753558357, "grad_norm": 0.35940104722976685, "learning_rate": 9.920170331238988e-06, "loss": 0.3939, "step": 7439 }, { "epoch": 0.7564050427002846, "grad_norm": 0.34537431597709656, "learning_rate": 9.920107156163076e-06, "loss": 0.4, "step": 7440 }, { "epoch": 0.7565067100447336, "grad_norm": 0.35491564869880676, "learning_rate": 9.920043956300834e-06, "loss": 0.4268, "step": 7441 }, { "epoch": 0.7566083773891826, "grad_norm": 0.33730337023735046, "learning_rate": 9.919980731652579e-06, "loss": 0.4381, "step": 7442 }, { "epoch": 0.7567100447336316, "grad_norm": 0.33401474356651306, "learning_rate": 9.919917482218631e-06, "loss": 0.3819, "step": 7443 }, { "epoch": 0.7568117120780805, "grad_norm": 0.381360799074173, "learning_rate": 9.919854207999308e-06, "loss": 0.4022, "step": 7444 }, { "epoch": 0.7569133794225295, "grad_norm": 0.34243133664131165, "learning_rate": 9.919790908994929e-06, "loss": 0.3681, "step": 7445 }, { "epoch": 0.7570150467669784, "grad_norm": 0.3179168999195099, "learning_rate": 9.919727585205812e-06, "loss": 0.3634, "step": 7446 }, { "epoch": 0.7571167141114274, "grad_norm": 0.3824864625930786, "learning_rate": 9.919664236632277e-06, "loss": 0.3857, "step": 7447 }, { "epoch": 0.7572183814558764, "grad_norm": 0.3353639245033264, "learning_rate": 9.91960086327464e-06, "loss": 0.3938, "step": 7448 }, { "epoch": 0.7573200488003253, "grad_norm": 0.3800714313983917, "learning_rate": 9.919537465133227e-06, "loss": 0.4011, "step": 7449 }, { "epoch": 0.7574217161447743, "grad_norm": 0.34354597330093384, "learning_rate": 9.91947404220835e-06, "loss": 0.3761, "step": 7450 }, { "epoch": 0.7575233834892232, "grad_norm": 0.3505958020687103, "learning_rate": 9.919410594500334e-06, "loss": 0.4057, "step": 7451 }, { "epoch": 0.7576250508336723, "grad_norm": 0.34453657269477844, "learning_rate": 9.919347122009497e-06, "loss": 0.3912, "step": 7452 }, { "epoch": 0.7577267181781212, "grad_norm": 0.34772422909736633, "learning_rate": 9.919283624736155e-06, "loss": 0.3855, "step": 7453 }, { "epoch": 0.7578283855225701, "grad_norm": 0.3390767276287079, "learning_rate": 9.919220102680635e-06, "loss": 0.3598, "step": 7454 }, { "epoch": 0.7579300528670191, "grad_norm": 0.33300480246543884, "learning_rate": 9.91915655584325e-06, "loss": 0.4334, "step": 7455 }, { "epoch": 0.758031720211468, "grad_norm": 0.3421586751937866, "learning_rate": 9.919092984224325e-06, "loss": 0.4156, "step": 7456 }, { "epoch": 0.7581333875559171, "grad_norm": 0.3560235798358917, "learning_rate": 9.919029387824178e-06, "loss": 0.3855, "step": 7457 }, { "epoch": 0.758235054900366, "grad_norm": 0.34223243594169617, "learning_rate": 9.91896576664313e-06, "loss": 0.3841, "step": 7458 }, { "epoch": 0.758336722244815, "grad_norm": 0.3272906541824341, "learning_rate": 9.918902120681502e-06, "loss": 0.3736, "step": 7459 }, { "epoch": 0.7584383895892639, "grad_norm": 0.3274691104888916, "learning_rate": 9.918838449939612e-06, "loss": 0.3852, "step": 7460 }, { "epoch": 0.7585400569337128, "grad_norm": 0.3247416317462921, "learning_rate": 9.918774754417783e-06, "loss": 0.4375, "step": 7461 }, { "epoch": 0.7586417242781619, "grad_norm": 0.37632107734680176, "learning_rate": 9.918711034116338e-06, "loss": 0.3822, "step": 7462 }, { "epoch": 0.7587433916226108, "grad_norm": 0.37295666337013245, "learning_rate": 9.918647289035594e-06, "loss": 0.4077, "step": 7463 }, { "epoch": 0.7588450589670598, "grad_norm": 0.3263910710811615, "learning_rate": 9.918583519175876e-06, "loss": 0.3839, "step": 7464 }, { "epoch": 0.7589467263115087, "grad_norm": 0.3620527982711792, "learning_rate": 9.918519724537499e-06, "loss": 0.3544, "step": 7465 }, { "epoch": 0.7590483936559577, "grad_norm": 0.3184884488582611, "learning_rate": 9.918455905120792e-06, "loss": 0.3952, "step": 7466 }, { "epoch": 0.7591500610004067, "grad_norm": 0.36497625708580017, "learning_rate": 9.918392060926071e-06, "loss": 0.3741, "step": 7467 }, { "epoch": 0.7592517283448557, "grad_norm": 0.3235774636268616, "learning_rate": 9.91832819195366e-06, "loss": 0.4283, "step": 7468 }, { "epoch": 0.7593533956893046, "grad_norm": 0.40603092312812805, "learning_rate": 9.918264298203881e-06, "loss": 0.447, "step": 7469 }, { "epoch": 0.7594550630337535, "grad_norm": 0.339323490858078, "learning_rate": 9.918200379677055e-06, "loss": 0.4002, "step": 7470 }, { "epoch": 0.7595567303782025, "grad_norm": 0.34214261174201965, "learning_rate": 9.918136436373505e-06, "loss": 0.3991, "step": 7471 }, { "epoch": 0.7596583977226515, "grad_norm": 0.31559857726097107, "learning_rate": 9.91807246829355e-06, "loss": 0.4041, "step": 7472 }, { "epoch": 0.7597600650671005, "grad_norm": 0.4134816527366638, "learning_rate": 9.918008475437515e-06, "loss": 0.4259, "step": 7473 }, { "epoch": 0.7598617324115494, "grad_norm": 0.33435970544815063, "learning_rate": 9.917944457805723e-06, "loss": 0.3864, "step": 7474 }, { "epoch": 0.7599633997559984, "grad_norm": 0.3780481219291687, "learning_rate": 9.917880415398497e-06, "loss": 0.3683, "step": 7475 }, { "epoch": 0.7600650671004473, "grad_norm": 0.3525865077972412, "learning_rate": 9.917816348216157e-06, "loss": 0.4116, "step": 7476 }, { "epoch": 0.7601667344448964, "grad_norm": 0.3604346215724945, "learning_rate": 9.917752256259026e-06, "loss": 0.3764, "step": 7477 }, { "epoch": 0.7602684017893453, "grad_norm": 0.4022163152694702, "learning_rate": 9.917688139527428e-06, "loss": 0.3889, "step": 7478 }, { "epoch": 0.7603700691337942, "grad_norm": 0.37023353576660156, "learning_rate": 9.917623998021685e-06, "loss": 0.3583, "step": 7479 }, { "epoch": 0.7604717364782432, "grad_norm": 0.38534680008888245, "learning_rate": 9.917559831742123e-06, "loss": 0.3574, "step": 7480 }, { "epoch": 0.7605734038226921, "grad_norm": 0.3534843623638153, "learning_rate": 9.917495640689063e-06, "loss": 0.3951, "step": 7481 }, { "epoch": 0.7606750711671411, "grad_norm": 0.4022429883480072, "learning_rate": 9.917431424862826e-06, "loss": 0.3888, "step": 7482 }, { "epoch": 0.7607767385115901, "grad_norm": 0.36936527490615845, "learning_rate": 9.917367184263741e-06, "loss": 0.3898, "step": 7483 }, { "epoch": 0.760878405856039, "grad_norm": 0.32464781403541565, "learning_rate": 9.917302918892127e-06, "loss": 0.4046, "step": 7484 }, { "epoch": 0.760980073200488, "grad_norm": 0.36404043436050415, "learning_rate": 9.917238628748311e-06, "loss": 0.3577, "step": 7485 }, { "epoch": 0.7610817405449369, "grad_norm": 0.40749263763427734, "learning_rate": 9.917174313832613e-06, "loss": 0.4059, "step": 7486 }, { "epoch": 0.7611834078893859, "grad_norm": 0.3139622211456299, "learning_rate": 9.91710997414536e-06, "loss": 0.4108, "step": 7487 }, { "epoch": 0.7612850752338349, "grad_norm": 0.4056891202926636, "learning_rate": 9.917045609686877e-06, "loss": 0.3785, "step": 7488 }, { "epoch": 0.7613867425782839, "grad_norm": 0.32418376207351685, "learning_rate": 9.916981220457485e-06, "loss": 0.3897, "step": 7489 }, { "epoch": 0.7614884099227328, "grad_norm": 0.343161016702652, "learning_rate": 9.916916806457511e-06, "loss": 0.4023, "step": 7490 }, { "epoch": 0.7615900772671818, "grad_norm": 0.3422084450721741, "learning_rate": 9.916852367687277e-06, "loss": 0.3947, "step": 7491 }, { "epoch": 0.7616917446116307, "grad_norm": 0.33739617466926575, "learning_rate": 9.91678790414711e-06, "loss": 0.4159, "step": 7492 }, { "epoch": 0.7617934119560797, "grad_norm": 0.3391704261302948, "learning_rate": 9.916723415837335e-06, "loss": 0.4293, "step": 7493 }, { "epoch": 0.7618950793005287, "grad_norm": 0.3199510872364044, "learning_rate": 9.916658902758275e-06, "loss": 0.4047, "step": 7494 }, { "epoch": 0.7619967466449776, "grad_norm": 0.33994564414024353, "learning_rate": 9.916594364910257e-06, "loss": 0.4041, "step": 7495 }, { "epoch": 0.7620984139894266, "grad_norm": 0.3463327884674072, "learning_rate": 9.916529802293604e-06, "loss": 0.394, "step": 7496 }, { "epoch": 0.7622000813338755, "grad_norm": 0.3315559923648834, "learning_rate": 9.916465214908642e-06, "loss": 0.3941, "step": 7497 }, { "epoch": 0.7623017486783246, "grad_norm": 0.32431676983833313, "learning_rate": 9.916400602755696e-06, "loss": 0.4006, "step": 7498 }, { "epoch": 0.7624034160227735, "grad_norm": 0.3518267869949341, "learning_rate": 9.916335965835092e-06, "loss": 0.4036, "step": 7499 }, { "epoch": 0.7625050833672224, "grad_norm": 0.3170645236968994, "learning_rate": 9.916271304147159e-06, "loss": 0.3944, "step": 7500 }, { "epoch": 0.7626067507116714, "grad_norm": 0.29122793674468994, "learning_rate": 9.916206617692215e-06, "loss": 0.3489, "step": 7501 }, { "epoch": 0.7627084180561203, "grad_norm": 0.3111083507537842, "learning_rate": 9.916141906470593e-06, "loss": 0.382, "step": 7502 }, { "epoch": 0.7628100854005694, "grad_norm": 0.3265747129917145, "learning_rate": 9.916077170482616e-06, "loss": 0.3957, "step": 7503 }, { "epoch": 0.7629117527450183, "grad_norm": 0.3289235532283783, "learning_rate": 9.916012409728611e-06, "loss": 0.3635, "step": 7504 }, { "epoch": 0.7630134200894673, "grad_norm": 0.3244902193546295, "learning_rate": 9.915947624208902e-06, "loss": 0.3902, "step": 7505 }, { "epoch": 0.7631150874339162, "grad_norm": 0.3401889204978943, "learning_rate": 9.915882813923817e-06, "loss": 0.3849, "step": 7506 }, { "epoch": 0.7632167547783651, "grad_norm": 0.32900312542915344, "learning_rate": 9.915817978873684e-06, "loss": 0.3775, "step": 7507 }, { "epoch": 0.7633184221228142, "grad_norm": 0.3312739431858063, "learning_rate": 9.915753119058828e-06, "loss": 0.3749, "step": 7508 }, { "epoch": 0.7634200894672631, "grad_norm": 0.3313884437084198, "learning_rate": 9.915688234479575e-06, "loss": 0.4198, "step": 7509 }, { "epoch": 0.7635217568117121, "grad_norm": 0.37381604313850403, "learning_rate": 9.915623325136253e-06, "loss": 0.4069, "step": 7510 }, { "epoch": 0.763623424156161, "grad_norm": 0.35143327713012695, "learning_rate": 9.91555839102919e-06, "loss": 0.414, "step": 7511 }, { "epoch": 0.76372509150061, "grad_norm": 0.3689301311969757, "learning_rate": 9.915493432158711e-06, "loss": 0.3832, "step": 7512 }, { "epoch": 0.763826758845059, "grad_norm": 0.32460319995880127, "learning_rate": 9.915428448525143e-06, "loss": 0.3407, "step": 7513 }, { "epoch": 0.763928426189508, "grad_norm": 0.3530625104904175, "learning_rate": 9.915363440128818e-06, "loss": 0.4036, "step": 7514 }, { "epoch": 0.7640300935339569, "grad_norm": 0.35261139273643494, "learning_rate": 9.915298406970057e-06, "loss": 0.378, "step": 7515 }, { "epoch": 0.7641317608784058, "grad_norm": 0.35243046283721924, "learning_rate": 9.91523334904919e-06, "loss": 0.3639, "step": 7516 }, { "epoch": 0.7642334282228548, "grad_norm": 0.32980844378471375, "learning_rate": 9.915168266366548e-06, "loss": 0.3868, "step": 7517 }, { "epoch": 0.7643350955673038, "grad_norm": 0.32614707946777344, "learning_rate": 9.915103158922457e-06, "loss": 0.3976, "step": 7518 }, { "epoch": 0.7644367629117528, "grad_norm": 0.3389614522457123, "learning_rate": 9.915038026717242e-06, "loss": 0.3977, "step": 7519 }, { "epoch": 0.7645384302562017, "grad_norm": 0.35056954622268677, "learning_rate": 9.914972869751234e-06, "loss": 0.3763, "step": 7520 }, { "epoch": 0.7646400976006507, "grad_norm": 0.33843380212783813, "learning_rate": 9.914907688024761e-06, "loss": 0.3805, "step": 7521 }, { "epoch": 0.7647417649450996, "grad_norm": 0.3738213777542114, "learning_rate": 9.914842481538151e-06, "loss": 0.4108, "step": 7522 }, { "epoch": 0.7648434322895485, "grad_norm": 0.35404694080352783, "learning_rate": 9.914777250291731e-06, "loss": 0.3953, "step": 7523 }, { "epoch": 0.7649450996339976, "grad_norm": 0.31405967473983765, "learning_rate": 9.914711994285832e-06, "loss": 0.3921, "step": 7524 }, { "epoch": 0.7650467669784465, "grad_norm": 0.3663533926010132, "learning_rate": 9.914646713520784e-06, "loss": 0.3711, "step": 7525 }, { "epoch": 0.7651484343228955, "grad_norm": 0.33987829089164734, "learning_rate": 9.914581407996912e-06, "loss": 0.4002, "step": 7526 }, { "epoch": 0.7652501016673444, "grad_norm": 0.310888409614563, "learning_rate": 9.914516077714546e-06, "loss": 0.4151, "step": 7527 }, { "epoch": 0.7653517690117934, "grad_norm": 0.3239732086658478, "learning_rate": 9.914450722674017e-06, "loss": 0.3781, "step": 7528 }, { "epoch": 0.7654534363562424, "grad_norm": 0.33915975689888, "learning_rate": 9.914385342875652e-06, "loss": 0.3812, "step": 7529 }, { "epoch": 0.7655551037006914, "grad_norm": 0.33597415685653687, "learning_rate": 9.914319938319783e-06, "loss": 0.3659, "step": 7530 }, { "epoch": 0.7656567710451403, "grad_norm": 0.3356285095214844, "learning_rate": 9.914254509006738e-06, "loss": 0.382, "step": 7531 }, { "epoch": 0.7657584383895892, "grad_norm": 0.3740748465061188, "learning_rate": 9.914189054936845e-06, "loss": 0.3953, "step": 7532 }, { "epoch": 0.7658601057340382, "grad_norm": 0.3225221037864685, "learning_rate": 9.914123576110437e-06, "loss": 0.4099, "step": 7533 }, { "epoch": 0.7659617730784872, "grad_norm": 0.3241710960865021, "learning_rate": 9.91405807252784e-06, "loss": 0.3608, "step": 7534 }, { "epoch": 0.7660634404229362, "grad_norm": 0.34894320368766785, "learning_rate": 9.913992544189388e-06, "loss": 0.4088, "step": 7535 }, { "epoch": 0.7661651077673851, "grad_norm": 0.38782376050949097, "learning_rate": 9.91392699109541e-06, "loss": 0.3993, "step": 7536 }, { "epoch": 0.766266775111834, "grad_norm": 0.32729095220565796, "learning_rate": 9.913861413246235e-06, "loss": 0.356, "step": 7537 }, { "epoch": 0.766368442456283, "grad_norm": 0.32121720910072327, "learning_rate": 9.913795810642192e-06, "loss": 0.3937, "step": 7538 }, { "epoch": 0.766470109800732, "grad_norm": 0.3235696852207184, "learning_rate": 9.913730183283615e-06, "loss": 0.4232, "step": 7539 }, { "epoch": 0.766571777145181, "grad_norm": 0.33100807666778564, "learning_rate": 9.913664531170832e-06, "loss": 0.3842, "step": 7540 }, { "epoch": 0.7666734444896299, "grad_norm": 0.3296375274658203, "learning_rate": 9.913598854304177e-06, "loss": 0.3648, "step": 7541 }, { "epoch": 0.7667751118340789, "grad_norm": 0.3137072026729584, "learning_rate": 9.913533152683977e-06, "loss": 0.3961, "step": 7542 }, { "epoch": 0.7668767791785278, "grad_norm": 0.3300429880619049, "learning_rate": 9.913467426310563e-06, "loss": 0.3699, "step": 7543 }, { "epoch": 0.7669784465229769, "grad_norm": 0.3256242275238037, "learning_rate": 9.91340167518427e-06, "loss": 0.4144, "step": 7544 }, { "epoch": 0.7670801138674258, "grad_norm": 0.3210481107234955, "learning_rate": 9.913335899305427e-06, "loss": 0.3925, "step": 7545 }, { "epoch": 0.7671817812118747, "grad_norm": 0.31233036518096924, "learning_rate": 9.913270098674366e-06, "loss": 0.3589, "step": 7546 }, { "epoch": 0.7672834485563237, "grad_norm": 0.3398962616920471, "learning_rate": 9.913204273291416e-06, "loss": 0.4126, "step": 7547 }, { "epoch": 0.7673851159007726, "grad_norm": 0.35844746232032776, "learning_rate": 9.91313842315691e-06, "loss": 0.4018, "step": 7548 }, { "epoch": 0.7674867832452217, "grad_norm": 0.3297092616558075, "learning_rate": 9.913072548271181e-06, "loss": 0.3863, "step": 7549 }, { "epoch": 0.7675884505896706, "grad_norm": 0.3264983594417572, "learning_rate": 9.913006648634561e-06, "loss": 0.404, "step": 7550 }, { "epoch": 0.7676901179341196, "grad_norm": 0.4023820161819458, "learning_rate": 9.912940724247379e-06, "loss": 0.4063, "step": 7551 }, { "epoch": 0.7677917852785685, "grad_norm": 0.35490795969963074, "learning_rate": 9.91287477510997e-06, "loss": 0.4197, "step": 7552 }, { "epoch": 0.7678934526230174, "grad_norm": 0.3438240587711334, "learning_rate": 9.912808801222666e-06, "loss": 0.3897, "step": 7553 }, { "epoch": 0.7679951199674665, "grad_norm": 0.3917396664619446, "learning_rate": 9.9127428025858e-06, "loss": 0.41, "step": 7554 }, { "epoch": 0.7680967873119154, "grad_norm": 0.36265286803245544, "learning_rate": 9.912676779199701e-06, "loss": 0.3873, "step": 7555 }, { "epoch": 0.7681984546563644, "grad_norm": 0.3720817565917969, "learning_rate": 9.912610731064704e-06, "loss": 0.4147, "step": 7556 }, { "epoch": 0.7683001220008133, "grad_norm": 0.3837028443813324, "learning_rate": 9.912544658181142e-06, "loss": 0.4016, "step": 7557 }, { "epoch": 0.7684017893452623, "grad_norm": 0.36307868361473083, "learning_rate": 9.912478560549347e-06, "loss": 0.4171, "step": 7558 }, { "epoch": 0.7685034566897113, "grad_norm": 0.3645457327365875, "learning_rate": 9.912412438169654e-06, "loss": 0.3857, "step": 7559 }, { "epoch": 0.7686051240341603, "grad_norm": 0.3637436032295227, "learning_rate": 9.912346291042393e-06, "loss": 0.3658, "step": 7560 }, { "epoch": 0.7687067913786092, "grad_norm": 0.33258554339408875, "learning_rate": 9.9122801191679e-06, "loss": 0.3948, "step": 7561 }, { "epoch": 0.7688084587230581, "grad_norm": 0.33303430676460266, "learning_rate": 9.912213922546506e-06, "loss": 0.37, "step": 7562 }, { "epoch": 0.7689101260675071, "grad_norm": 0.3593302071094513, "learning_rate": 9.912147701178546e-06, "loss": 0.4435, "step": 7563 }, { "epoch": 0.7690117934119561, "grad_norm": 0.327877402305603, "learning_rate": 9.912081455064352e-06, "loss": 0.3544, "step": 7564 }, { "epoch": 0.7691134607564051, "grad_norm": 0.3401902914047241, "learning_rate": 9.91201518420426e-06, "loss": 0.4056, "step": 7565 }, { "epoch": 0.769215128100854, "grad_norm": 0.3023121953010559, "learning_rate": 9.911948888598603e-06, "loss": 0.3734, "step": 7566 }, { "epoch": 0.769316795445303, "grad_norm": 0.31411585211753845, "learning_rate": 9.911882568247714e-06, "loss": 0.4312, "step": 7567 }, { "epoch": 0.7694184627897519, "grad_norm": 0.35334086418151855, "learning_rate": 9.911816223151929e-06, "loss": 0.3772, "step": 7568 }, { "epoch": 0.7695201301342008, "grad_norm": 0.3324936628341675, "learning_rate": 9.91174985331158e-06, "loss": 0.4031, "step": 7569 }, { "epoch": 0.7696217974786499, "grad_norm": 0.35275182127952576, "learning_rate": 9.911683458727002e-06, "loss": 0.421, "step": 7570 }, { "epoch": 0.7697234648230988, "grad_norm": 0.35355785489082336, "learning_rate": 9.911617039398531e-06, "loss": 0.381, "step": 7571 }, { "epoch": 0.7698251321675478, "grad_norm": 0.3128677010536194, "learning_rate": 9.9115505953265e-06, "loss": 0.3617, "step": 7572 }, { "epoch": 0.7699267995119967, "grad_norm": 0.3142351508140564, "learning_rate": 9.911484126511245e-06, "loss": 0.385, "step": 7573 }, { "epoch": 0.7700284668564457, "grad_norm": 0.32564565539360046, "learning_rate": 9.9114176329531e-06, "loss": 0.3741, "step": 7574 }, { "epoch": 0.7701301342008947, "grad_norm": 0.3193749189376831, "learning_rate": 9.911351114652398e-06, "loss": 0.3692, "step": 7575 }, { "epoch": 0.7702318015453437, "grad_norm": 0.3571982979774475, "learning_rate": 9.911284571609479e-06, "loss": 0.4045, "step": 7576 }, { "epoch": 0.7703334688897926, "grad_norm": 0.3364763557910919, "learning_rate": 9.911218003824674e-06, "loss": 0.3731, "step": 7577 }, { "epoch": 0.7704351362342415, "grad_norm": 0.3146052658557892, "learning_rate": 9.911151411298322e-06, "loss": 0.394, "step": 7578 }, { "epoch": 0.7705368035786905, "grad_norm": 0.3431857228279114, "learning_rate": 9.911084794030754e-06, "loss": 0.3778, "step": 7579 }, { "epoch": 0.7706384709231395, "grad_norm": 0.3249731957912445, "learning_rate": 9.911018152022307e-06, "loss": 0.4287, "step": 7580 }, { "epoch": 0.7707401382675885, "grad_norm": 0.36151814460754395, "learning_rate": 9.910951485273319e-06, "loss": 0.3996, "step": 7581 }, { "epoch": 0.7708418056120374, "grad_norm": 0.32479017972946167, "learning_rate": 9.910884793784124e-06, "loss": 0.4195, "step": 7582 }, { "epoch": 0.7709434729564864, "grad_norm": 0.36071208119392395, "learning_rate": 9.91081807755506e-06, "loss": 0.4269, "step": 7583 }, { "epoch": 0.7710451403009353, "grad_norm": 0.3359319865703583, "learning_rate": 9.910751336586459e-06, "loss": 0.3934, "step": 7584 }, { "epoch": 0.7711468076453843, "grad_norm": 0.3446105718612671, "learning_rate": 9.91068457087866e-06, "loss": 0.3696, "step": 7585 }, { "epoch": 0.7712484749898333, "grad_norm": 0.3674194812774658, "learning_rate": 9.910617780432001e-06, "loss": 0.377, "step": 7586 }, { "epoch": 0.7713501423342822, "grad_norm": 0.32685476541519165, "learning_rate": 9.910550965246816e-06, "loss": 0.364, "step": 7587 }, { "epoch": 0.7714518096787312, "grad_norm": 0.36717674136161804, "learning_rate": 9.910484125323441e-06, "loss": 0.3782, "step": 7588 }, { "epoch": 0.7715534770231801, "grad_norm": 0.3540191054344177, "learning_rate": 9.910417260662215e-06, "loss": 0.3785, "step": 7589 }, { "epoch": 0.7716551443676292, "grad_norm": 0.3328976035118103, "learning_rate": 9.910350371263474e-06, "loss": 0.4213, "step": 7590 }, { "epoch": 0.7717568117120781, "grad_norm": 0.3132631480693817, "learning_rate": 9.910283457127555e-06, "loss": 0.3922, "step": 7591 }, { "epoch": 0.771858479056527, "grad_norm": 0.35684314370155334, "learning_rate": 9.910216518254794e-06, "loss": 0.3803, "step": 7592 }, { "epoch": 0.771960146400976, "grad_norm": 0.3243463933467865, "learning_rate": 9.910149554645527e-06, "loss": 0.4197, "step": 7593 }, { "epoch": 0.7720618137454249, "grad_norm": 0.3466610014438629, "learning_rate": 9.910082566300097e-06, "loss": 0.4208, "step": 7594 }, { "epoch": 0.772163481089874, "grad_norm": 0.3319666385650635, "learning_rate": 9.910015553218835e-06, "loss": 0.3866, "step": 7595 }, { "epoch": 0.7722651484343229, "grad_norm": 0.31322741508483887, "learning_rate": 9.909948515402084e-06, "loss": 0.3865, "step": 7596 }, { "epoch": 0.7723668157787719, "grad_norm": 0.3618566691875458, "learning_rate": 9.909881452850176e-06, "loss": 0.4211, "step": 7597 }, { "epoch": 0.7724684831232208, "grad_norm": 0.3301522731781006, "learning_rate": 9.909814365563455e-06, "loss": 0.3793, "step": 7598 }, { "epoch": 0.7725701504676697, "grad_norm": 0.34721314907073975, "learning_rate": 9.909747253542255e-06, "loss": 0.3709, "step": 7599 }, { "epoch": 0.7726718178121188, "grad_norm": 0.3653176426887512, "learning_rate": 9.909680116786914e-06, "loss": 0.4022, "step": 7600 }, { "epoch": 0.7727734851565677, "grad_norm": 0.3316728174686432, "learning_rate": 9.909612955297772e-06, "loss": 0.4005, "step": 7601 }, { "epoch": 0.7728751525010167, "grad_norm": 0.3228774666786194, "learning_rate": 9.909545769075166e-06, "loss": 0.4104, "step": 7602 }, { "epoch": 0.7729768198454656, "grad_norm": 0.3646668791770935, "learning_rate": 9.909478558119437e-06, "loss": 0.3823, "step": 7603 }, { "epoch": 0.7730784871899146, "grad_norm": 0.3572915494441986, "learning_rate": 9.90941132243092e-06, "loss": 0.3891, "step": 7604 }, { "epoch": 0.7731801545343636, "grad_norm": 0.3167334794998169, "learning_rate": 9.909344062009956e-06, "loss": 0.3931, "step": 7605 }, { "epoch": 0.7732818218788126, "grad_norm": 0.34573766589164734, "learning_rate": 9.909276776856884e-06, "loss": 0.375, "step": 7606 }, { "epoch": 0.7733834892232615, "grad_norm": 0.35305944085121155, "learning_rate": 9.90920946697204e-06, "loss": 0.3713, "step": 7607 }, { "epoch": 0.7734851565677104, "grad_norm": 0.3089078962802887, "learning_rate": 9.909142132355769e-06, "loss": 0.3374, "step": 7608 }, { "epoch": 0.7735868239121594, "grad_norm": 0.3470257818698883, "learning_rate": 9.909074773008402e-06, "loss": 0.3686, "step": 7609 }, { "epoch": 0.7736884912566083, "grad_norm": 0.39953282475471497, "learning_rate": 9.909007388930286e-06, "loss": 0.4004, "step": 7610 }, { "epoch": 0.7737901586010574, "grad_norm": 0.3303986191749573, "learning_rate": 9.908939980121757e-06, "loss": 0.3732, "step": 7611 }, { "epoch": 0.7738918259455063, "grad_norm": 0.37347742915153503, "learning_rate": 9.908872546583155e-06, "loss": 0.3904, "step": 7612 }, { "epoch": 0.7739934932899553, "grad_norm": 0.4027969539165497, "learning_rate": 9.908805088314817e-06, "loss": 0.378, "step": 7613 }, { "epoch": 0.7740951606344042, "grad_norm": 0.327680766582489, "learning_rate": 9.90873760531709e-06, "loss": 0.3864, "step": 7614 }, { "epoch": 0.7741968279788531, "grad_norm": 0.3364129364490509, "learning_rate": 9.908670097590305e-06, "loss": 0.4013, "step": 7615 }, { "epoch": 0.7742984953233022, "grad_norm": 0.3798793852329254, "learning_rate": 9.908602565134808e-06, "loss": 0.4188, "step": 7616 }, { "epoch": 0.7744001626677511, "grad_norm": 0.3521152138710022, "learning_rate": 9.90853500795094e-06, "loss": 0.3638, "step": 7617 }, { "epoch": 0.7745018300122001, "grad_norm": 0.3305310904979706, "learning_rate": 9.908467426039038e-06, "loss": 0.3902, "step": 7618 }, { "epoch": 0.774603497356649, "grad_norm": 0.3515644967556, "learning_rate": 9.908399819399441e-06, "loss": 0.3949, "step": 7619 }, { "epoch": 0.774705164701098, "grad_norm": 0.3852595090866089, "learning_rate": 9.908332188032494e-06, "loss": 0.3845, "step": 7620 }, { "epoch": 0.774806832045547, "grad_norm": 0.348614364862442, "learning_rate": 9.908264531938537e-06, "loss": 0.3798, "step": 7621 }, { "epoch": 0.774908499389996, "grad_norm": 0.33100709319114685, "learning_rate": 9.908196851117908e-06, "loss": 0.4038, "step": 7622 }, { "epoch": 0.7750101667344449, "grad_norm": 0.3806977868080139, "learning_rate": 9.90812914557095e-06, "loss": 0.3851, "step": 7623 }, { "epoch": 0.7751118340788938, "grad_norm": 0.35393041372299194, "learning_rate": 9.908061415298004e-06, "loss": 0.4001, "step": 7624 }, { "epoch": 0.7752135014233428, "grad_norm": 0.3307688534259796, "learning_rate": 9.907993660299411e-06, "loss": 0.4032, "step": 7625 }, { "epoch": 0.7753151687677918, "grad_norm": 0.33912378549575806, "learning_rate": 9.90792588057551e-06, "loss": 0.3728, "step": 7626 }, { "epoch": 0.7754168361122408, "grad_norm": 0.3561944365501404, "learning_rate": 9.907858076126648e-06, "loss": 0.3879, "step": 7627 }, { "epoch": 0.7755185034566897, "grad_norm": 0.3585686683654785, "learning_rate": 9.907790246953162e-06, "loss": 0.3888, "step": 7628 }, { "epoch": 0.7756201708011387, "grad_norm": 0.3692440688610077, "learning_rate": 9.907722393055394e-06, "loss": 0.4352, "step": 7629 }, { "epoch": 0.7757218381455876, "grad_norm": 0.3558693528175354, "learning_rate": 9.90765451443369e-06, "loss": 0.3877, "step": 7630 }, { "epoch": 0.7758235054900366, "grad_norm": 0.345933198928833, "learning_rate": 9.907586611088386e-06, "loss": 0.3753, "step": 7631 }, { "epoch": 0.7759251728344856, "grad_norm": 0.34582895040512085, "learning_rate": 9.907518683019827e-06, "loss": 0.3735, "step": 7632 }, { "epoch": 0.7760268401789345, "grad_norm": 0.3744267225265503, "learning_rate": 9.907450730228357e-06, "loss": 0.3939, "step": 7633 }, { "epoch": 0.7761285075233835, "grad_norm": 0.3186644911766052, "learning_rate": 9.907382752714314e-06, "loss": 0.3789, "step": 7634 }, { "epoch": 0.7762301748678324, "grad_norm": 0.36625179648399353, "learning_rate": 9.907314750478043e-06, "loss": 0.4289, "step": 7635 }, { "epoch": 0.7763318422122815, "grad_norm": 0.3666931688785553, "learning_rate": 9.907246723519888e-06, "loss": 0.3932, "step": 7636 }, { "epoch": 0.7764335095567304, "grad_norm": 0.34410375356674194, "learning_rate": 9.90717867184019e-06, "loss": 0.4293, "step": 7637 }, { "epoch": 0.7765351769011793, "grad_norm": 0.3429229259490967, "learning_rate": 9.90711059543929e-06, "loss": 0.3591, "step": 7638 }, { "epoch": 0.7766368442456283, "grad_norm": 0.330959677696228, "learning_rate": 9.907042494317535e-06, "loss": 0.3683, "step": 7639 }, { "epoch": 0.7767385115900772, "grad_norm": 0.3527780771255493, "learning_rate": 9.906974368475265e-06, "loss": 0.4412, "step": 7640 }, { "epoch": 0.7768401789345263, "grad_norm": 0.31799355149269104, "learning_rate": 9.906906217912825e-06, "loss": 0.3923, "step": 7641 }, { "epoch": 0.7769418462789752, "grad_norm": 0.3209884762763977, "learning_rate": 9.906838042630557e-06, "loss": 0.3983, "step": 7642 }, { "epoch": 0.7770435136234242, "grad_norm": 0.3299640715122223, "learning_rate": 9.906769842628804e-06, "loss": 0.4095, "step": 7643 }, { "epoch": 0.7771451809678731, "grad_norm": 0.34333667159080505, "learning_rate": 9.906701617907912e-06, "loss": 0.394, "step": 7644 }, { "epoch": 0.777246848312322, "grad_norm": 0.32508212327957153, "learning_rate": 9.906633368468224e-06, "loss": 0.3841, "step": 7645 }, { "epoch": 0.7773485156567711, "grad_norm": 0.34856075048446655, "learning_rate": 9.906565094310082e-06, "loss": 0.3706, "step": 7646 }, { "epoch": 0.77745018300122, "grad_norm": 0.35288575291633606, "learning_rate": 9.906496795433831e-06, "loss": 0.3723, "step": 7647 }, { "epoch": 0.777551850345669, "grad_norm": 0.3317580819129944, "learning_rate": 9.906428471839816e-06, "loss": 0.3743, "step": 7648 }, { "epoch": 0.7776535176901179, "grad_norm": 0.34335455298423767, "learning_rate": 9.906360123528377e-06, "loss": 0.3892, "step": 7649 }, { "epoch": 0.7777551850345669, "grad_norm": 0.3272826671600342, "learning_rate": 9.906291750499866e-06, "loss": 0.3702, "step": 7650 }, { "epoch": 0.7778568523790158, "grad_norm": 0.366328626871109, "learning_rate": 9.906223352754621e-06, "loss": 0.4124, "step": 7651 }, { "epoch": 0.7779585197234649, "grad_norm": 0.305154412984848, "learning_rate": 9.906154930292989e-06, "loss": 0.3603, "step": 7652 }, { "epoch": 0.7780601870679138, "grad_norm": 0.3095966875553131, "learning_rate": 9.906086483115314e-06, "loss": 0.3877, "step": 7653 }, { "epoch": 0.7781618544123627, "grad_norm": 0.33724144101142883, "learning_rate": 9.90601801122194e-06, "loss": 0.3636, "step": 7654 }, { "epoch": 0.7782635217568117, "grad_norm": 0.3791618347167969, "learning_rate": 9.905949514613216e-06, "loss": 0.4019, "step": 7655 }, { "epoch": 0.7783651891012606, "grad_norm": 0.33177244663238525, "learning_rate": 9.905880993289482e-06, "loss": 0.3856, "step": 7656 }, { "epoch": 0.7784668564457097, "grad_norm": 0.33468812704086304, "learning_rate": 9.905812447251086e-06, "loss": 0.4199, "step": 7657 }, { "epoch": 0.7785685237901586, "grad_norm": 0.36444422602653503, "learning_rate": 9.905743876498372e-06, "loss": 0.3908, "step": 7658 }, { "epoch": 0.7786701911346076, "grad_norm": 0.3469654321670532, "learning_rate": 9.905675281031689e-06, "loss": 0.3569, "step": 7659 }, { "epoch": 0.7787718584790565, "grad_norm": 0.31847286224365234, "learning_rate": 9.905606660851377e-06, "loss": 0.3877, "step": 7660 }, { "epoch": 0.7788735258235054, "grad_norm": 0.32933568954467773, "learning_rate": 9.905538015957785e-06, "loss": 0.3925, "step": 7661 }, { "epoch": 0.7789751931679545, "grad_norm": 0.4090394675731659, "learning_rate": 9.905469346351258e-06, "loss": 0.4157, "step": 7662 }, { "epoch": 0.7790768605124034, "grad_norm": 0.3370627462863922, "learning_rate": 9.905400652032143e-06, "loss": 0.3685, "step": 7663 }, { "epoch": 0.7791785278568524, "grad_norm": 0.3050197958946228, "learning_rate": 9.905331933000784e-06, "loss": 0.3788, "step": 7664 }, { "epoch": 0.7792801952013013, "grad_norm": 0.36872947216033936, "learning_rate": 9.905263189257529e-06, "loss": 0.4008, "step": 7665 }, { "epoch": 0.7793818625457503, "grad_norm": 0.3933965265750885, "learning_rate": 9.905194420802722e-06, "loss": 0.4408, "step": 7666 }, { "epoch": 0.7794835298901993, "grad_norm": 0.35016313195228577, "learning_rate": 9.905125627636714e-06, "loss": 0.3941, "step": 7667 }, { "epoch": 0.7795851972346483, "grad_norm": 0.32370734214782715, "learning_rate": 9.905056809759846e-06, "loss": 0.3776, "step": 7668 }, { "epoch": 0.7796868645790972, "grad_norm": 0.36415550112724304, "learning_rate": 9.90498796717247e-06, "loss": 0.4065, "step": 7669 }, { "epoch": 0.7797885319235461, "grad_norm": 0.32373693585395813, "learning_rate": 9.90491909987493e-06, "loss": 0.4057, "step": 7670 }, { "epoch": 0.7798901992679951, "grad_norm": 0.3299018144607544, "learning_rate": 9.904850207867571e-06, "loss": 0.4149, "step": 7671 }, { "epoch": 0.7799918666124441, "grad_norm": 0.3205963373184204, "learning_rate": 9.904781291150744e-06, "loss": 0.4077, "step": 7672 }, { "epoch": 0.7800935339568931, "grad_norm": 0.3172449767589569, "learning_rate": 9.904712349724793e-06, "loss": 0.399, "step": 7673 }, { "epoch": 0.780195201301342, "grad_norm": 0.3267742693424225, "learning_rate": 9.904643383590067e-06, "loss": 0.3801, "step": 7674 }, { "epoch": 0.780296868645791, "grad_norm": 0.34062567353248596, "learning_rate": 9.904574392746915e-06, "loss": 0.3855, "step": 7675 }, { "epoch": 0.7803985359902399, "grad_norm": 0.3453430235385895, "learning_rate": 9.90450537719568e-06, "loss": 0.3835, "step": 7676 }, { "epoch": 0.780500203334689, "grad_norm": 0.30564916133880615, "learning_rate": 9.904436336936716e-06, "loss": 0.3951, "step": 7677 }, { "epoch": 0.7806018706791379, "grad_norm": 0.31393927335739136, "learning_rate": 9.904367271970364e-06, "loss": 0.3555, "step": 7678 }, { "epoch": 0.7807035380235868, "grad_norm": 0.30967047810554504, "learning_rate": 9.904298182296976e-06, "loss": 0.3914, "step": 7679 }, { "epoch": 0.7808052053680358, "grad_norm": 0.32912707328796387, "learning_rate": 9.9042290679169e-06, "loss": 0.4016, "step": 7680 }, { "epoch": 0.7809068727124847, "grad_norm": 0.3169373869895935, "learning_rate": 9.904159928830484e-06, "loss": 0.373, "step": 7681 }, { "epoch": 0.7810085400569338, "grad_norm": 0.3307797908782959, "learning_rate": 9.904090765038074e-06, "loss": 0.4147, "step": 7682 }, { "epoch": 0.7811102074013827, "grad_norm": 0.3266405761241913, "learning_rate": 9.90402157654002e-06, "loss": 0.3777, "step": 7683 }, { "epoch": 0.7812118747458316, "grad_norm": 0.3269374370574951, "learning_rate": 9.903952363336672e-06, "loss": 0.3651, "step": 7684 }, { "epoch": 0.7813135420902806, "grad_norm": 0.3105173707008362, "learning_rate": 9.903883125428377e-06, "loss": 0.3836, "step": 7685 }, { "epoch": 0.7814152094347295, "grad_norm": 0.37348321080207825, "learning_rate": 9.903813862815484e-06, "loss": 0.4009, "step": 7686 }, { "epoch": 0.7815168767791786, "grad_norm": 0.352935254573822, "learning_rate": 9.90374457549834e-06, "loss": 0.3595, "step": 7687 }, { "epoch": 0.7816185441236275, "grad_norm": 0.31523868441581726, "learning_rate": 9.903675263477299e-06, "loss": 0.3911, "step": 7688 }, { "epoch": 0.7817202114680765, "grad_norm": 0.3412283957004547, "learning_rate": 9.903605926752707e-06, "loss": 0.3861, "step": 7689 }, { "epoch": 0.7818218788125254, "grad_norm": 0.3105359673500061, "learning_rate": 9.903536565324911e-06, "loss": 0.4126, "step": 7690 }, { "epoch": 0.7819235461569743, "grad_norm": 0.30531570315361023, "learning_rate": 9.903467179194266e-06, "loss": 0.3926, "step": 7691 }, { "epoch": 0.7820252135014233, "grad_norm": 0.36766767501831055, "learning_rate": 9.903397768361118e-06, "loss": 0.4134, "step": 7692 }, { "epoch": 0.7821268808458723, "grad_norm": 0.3430837094783783, "learning_rate": 9.903328332825817e-06, "loss": 0.3796, "step": 7693 }, { "epoch": 0.7822285481903213, "grad_norm": 0.3309554159641266, "learning_rate": 9.903258872588713e-06, "loss": 0.3861, "step": 7694 }, { "epoch": 0.7823302155347702, "grad_norm": 0.36011144518852234, "learning_rate": 9.903189387650156e-06, "loss": 0.3761, "step": 7695 }, { "epoch": 0.7824318828792192, "grad_norm": 0.30426904559135437, "learning_rate": 9.903119878010496e-06, "loss": 0.3836, "step": 7696 }, { "epoch": 0.7825335502236681, "grad_norm": 0.3535192608833313, "learning_rate": 9.903050343670083e-06, "loss": 0.4004, "step": 7697 }, { "epoch": 0.7826352175681172, "grad_norm": 0.37322092056274414, "learning_rate": 9.902980784629269e-06, "loss": 0.3926, "step": 7698 }, { "epoch": 0.7827368849125661, "grad_norm": 0.3473508954048157, "learning_rate": 9.902911200888402e-06, "loss": 0.4263, "step": 7699 }, { "epoch": 0.782838552257015, "grad_norm": 0.34604784846305847, "learning_rate": 9.902841592447832e-06, "loss": 0.3929, "step": 7700 }, { "epoch": 0.782940219601464, "grad_norm": 0.35158443450927734, "learning_rate": 9.902771959307912e-06, "loss": 0.3929, "step": 7701 }, { "epoch": 0.7830418869459129, "grad_norm": 0.3305371403694153, "learning_rate": 9.902702301468992e-06, "loss": 0.3814, "step": 7702 }, { "epoch": 0.783143554290362, "grad_norm": 0.30959272384643555, "learning_rate": 9.902632618931423e-06, "loss": 0.3763, "step": 7703 }, { "epoch": 0.7832452216348109, "grad_norm": 0.32671165466308594, "learning_rate": 9.902562911695556e-06, "loss": 0.3922, "step": 7704 }, { "epoch": 0.7833468889792599, "grad_norm": 0.30288198590278625, "learning_rate": 9.902493179761743e-06, "loss": 0.3818, "step": 7705 }, { "epoch": 0.7834485563237088, "grad_norm": 0.32906240224838257, "learning_rate": 9.902423423130333e-06, "loss": 0.3812, "step": 7706 }, { "epoch": 0.7835502236681577, "grad_norm": 0.3686071038246155, "learning_rate": 9.902353641801679e-06, "loss": 0.4245, "step": 7707 }, { "epoch": 0.7836518910126068, "grad_norm": 0.3131965696811676, "learning_rate": 9.902283835776132e-06, "loss": 0.3702, "step": 7708 }, { "epoch": 0.7837535583570557, "grad_norm": 0.3259749412536621, "learning_rate": 9.902214005054045e-06, "loss": 0.3935, "step": 7709 }, { "epoch": 0.7838552257015047, "grad_norm": 0.34025514125823975, "learning_rate": 9.902144149635768e-06, "loss": 0.3794, "step": 7710 }, { "epoch": 0.7839568930459536, "grad_norm": 0.32579469680786133, "learning_rate": 9.902074269521654e-06, "loss": 0.3649, "step": 7711 }, { "epoch": 0.7840585603904026, "grad_norm": 0.3489760756492615, "learning_rate": 9.902004364712055e-06, "loss": 0.3884, "step": 7712 }, { "epoch": 0.7841602277348516, "grad_norm": 0.3215063810348511, "learning_rate": 9.901934435207323e-06, "loss": 0.4224, "step": 7713 }, { "epoch": 0.7842618950793006, "grad_norm": 0.34084394574165344, "learning_rate": 9.90186448100781e-06, "loss": 0.4188, "step": 7714 }, { "epoch": 0.7843635624237495, "grad_norm": 0.32980597019195557, "learning_rate": 9.901794502113868e-06, "loss": 0.3571, "step": 7715 }, { "epoch": 0.7844652297681984, "grad_norm": 0.3304222822189331, "learning_rate": 9.901724498525852e-06, "loss": 0.3732, "step": 7716 }, { "epoch": 0.7845668971126474, "grad_norm": 0.34786662459373474, "learning_rate": 9.901654470244111e-06, "loss": 0.4032, "step": 7717 }, { "epoch": 0.7846685644570964, "grad_norm": 0.3270036280155182, "learning_rate": 9.901584417268999e-06, "loss": 0.4107, "step": 7718 }, { "epoch": 0.7847702318015454, "grad_norm": 0.3446330428123474, "learning_rate": 9.90151433960087e-06, "loss": 0.3858, "step": 7719 }, { "epoch": 0.7848718991459943, "grad_norm": 0.3105587661266327, "learning_rate": 9.901444237240078e-06, "loss": 0.4017, "step": 7720 }, { "epoch": 0.7849735664904433, "grad_norm": 0.30599915981292725, "learning_rate": 9.901374110186974e-06, "loss": 0.3808, "step": 7721 }, { "epoch": 0.7850752338348922, "grad_norm": 0.3164029121398926, "learning_rate": 9.901303958441913e-06, "loss": 0.4235, "step": 7722 }, { "epoch": 0.7851769011793412, "grad_norm": 0.34464511275291443, "learning_rate": 9.901233782005245e-06, "loss": 0.3873, "step": 7723 }, { "epoch": 0.7852785685237902, "grad_norm": 0.32935184240341187, "learning_rate": 9.901163580877326e-06, "loss": 0.4012, "step": 7724 }, { "epoch": 0.7853802358682391, "grad_norm": 0.3321022093296051, "learning_rate": 9.90109335505851e-06, "loss": 0.3924, "step": 7725 }, { "epoch": 0.7854819032126881, "grad_norm": 0.33179721236228943, "learning_rate": 9.901023104549151e-06, "loss": 0.3875, "step": 7726 }, { "epoch": 0.785583570557137, "grad_norm": 0.3133828341960907, "learning_rate": 9.900952829349604e-06, "loss": 0.3681, "step": 7727 }, { "epoch": 0.7856852379015861, "grad_norm": 0.33878085017204285, "learning_rate": 9.90088252946022e-06, "loss": 0.3586, "step": 7728 }, { "epoch": 0.785786905246035, "grad_norm": 0.33563438057899475, "learning_rate": 9.900812204881354e-06, "loss": 0.4299, "step": 7729 }, { "epoch": 0.785888572590484, "grad_norm": 0.3176177442073822, "learning_rate": 9.900741855613361e-06, "loss": 0.3835, "step": 7730 }, { "epoch": 0.7859902399349329, "grad_norm": 0.37186118960380554, "learning_rate": 9.900671481656594e-06, "loss": 0.3958, "step": 7731 }, { "epoch": 0.7860919072793818, "grad_norm": 0.3451867401599884, "learning_rate": 9.90060108301141e-06, "loss": 0.3782, "step": 7732 }, { "epoch": 0.7861935746238308, "grad_norm": 0.361615389585495, "learning_rate": 9.900530659678162e-06, "loss": 0.4119, "step": 7733 }, { "epoch": 0.7862952419682798, "grad_norm": 0.327407568693161, "learning_rate": 9.900460211657203e-06, "loss": 0.4151, "step": 7734 }, { "epoch": 0.7863969093127288, "grad_norm": 0.35479623079299927, "learning_rate": 9.900389738948893e-06, "loss": 0.4031, "step": 7735 }, { "epoch": 0.7864985766571777, "grad_norm": 0.35850951075553894, "learning_rate": 9.900319241553582e-06, "loss": 0.3923, "step": 7736 }, { "epoch": 0.7866002440016266, "grad_norm": 0.33374762535095215, "learning_rate": 9.900248719471629e-06, "loss": 0.3898, "step": 7737 }, { "epoch": 0.7867019113460756, "grad_norm": 0.3625493347644806, "learning_rate": 9.900178172703386e-06, "loss": 0.4175, "step": 7738 }, { "epoch": 0.7868035786905246, "grad_norm": 0.36005091667175293, "learning_rate": 9.900107601249212e-06, "loss": 0.3963, "step": 7739 }, { "epoch": 0.7869052460349736, "grad_norm": 0.32873040437698364, "learning_rate": 9.900037005109458e-06, "loss": 0.3822, "step": 7740 }, { "epoch": 0.7870069133794225, "grad_norm": 0.35837575793266296, "learning_rate": 9.899966384284483e-06, "loss": 0.3883, "step": 7741 }, { "epoch": 0.7871085807238715, "grad_norm": 0.34593668580055237, "learning_rate": 9.899895738774642e-06, "loss": 0.3685, "step": 7742 }, { "epoch": 0.7872102480683204, "grad_norm": 0.3259241580963135, "learning_rate": 9.899825068580288e-06, "loss": 0.3665, "step": 7743 }, { "epoch": 0.7873119154127695, "grad_norm": 0.3300454020500183, "learning_rate": 9.899754373701782e-06, "loss": 0.4038, "step": 7744 }, { "epoch": 0.7874135827572184, "grad_norm": 0.33459362387657166, "learning_rate": 9.899683654139479e-06, "loss": 0.3825, "step": 7745 }, { "epoch": 0.7875152501016673, "grad_norm": 0.37241190671920776, "learning_rate": 9.899612909893733e-06, "loss": 0.4023, "step": 7746 }, { "epoch": 0.7876169174461163, "grad_norm": 0.31370043754577637, "learning_rate": 9.899542140964899e-06, "loss": 0.3973, "step": 7747 }, { "epoch": 0.7877185847905652, "grad_norm": 0.3437141180038452, "learning_rate": 9.899471347353339e-06, "loss": 0.435, "step": 7748 }, { "epoch": 0.7878202521350143, "grad_norm": 0.360132098197937, "learning_rate": 9.899400529059407e-06, "loss": 0.39, "step": 7749 }, { "epoch": 0.7879219194794632, "grad_norm": 0.3794894814491272, "learning_rate": 9.899329686083456e-06, "loss": 0.4223, "step": 7750 }, { "epoch": 0.7880235868239122, "grad_norm": 0.33223599195480347, "learning_rate": 9.89925881842585e-06, "loss": 0.3974, "step": 7751 }, { "epoch": 0.7881252541683611, "grad_norm": 0.3082202672958374, "learning_rate": 9.89918792608694e-06, "loss": 0.4091, "step": 7752 }, { "epoch": 0.78822692151281, "grad_norm": 0.3433893918991089, "learning_rate": 9.899117009067086e-06, "loss": 0.3724, "step": 7753 }, { "epoch": 0.7883285888572591, "grad_norm": 0.34174302220344543, "learning_rate": 9.899046067366645e-06, "loss": 0.427, "step": 7754 }, { "epoch": 0.788430256201708, "grad_norm": 0.3129817247390747, "learning_rate": 9.898975100985975e-06, "loss": 0.4274, "step": 7755 }, { "epoch": 0.788531923546157, "grad_norm": 0.33782854676246643, "learning_rate": 9.89890410992543e-06, "loss": 0.3796, "step": 7756 }, { "epoch": 0.7886335908906059, "grad_norm": 0.3552441895008087, "learning_rate": 9.898833094185374e-06, "loss": 0.4223, "step": 7757 }, { "epoch": 0.7887352582350549, "grad_norm": 0.29360833764076233, "learning_rate": 9.898762053766159e-06, "loss": 0.3913, "step": 7758 }, { "epoch": 0.7888369255795039, "grad_norm": 0.33276116847991943, "learning_rate": 9.898690988668145e-06, "loss": 0.3808, "step": 7759 }, { "epoch": 0.7889385929239529, "grad_norm": 0.3797301650047302, "learning_rate": 9.898619898891688e-06, "loss": 0.3844, "step": 7760 }, { "epoch": 0.7890402602684018, "grad_norm": 0.34600910544395447, "learning_rate": 9.898548784437151e-06, "loss": 0.4004, "step": 7761 }, { "epoch": 0.7891419276128507, "grad_norm": 0.3337354063987732, "learning_rate": 9.898477645304887e-06, "loss": 0.4024, "step": 7762 }, { "epoch": 0.7892435949572997, "grad_norm": 0.3555909991264343, "learning_rate": 9.898406481495259e-06, "loss": 0.3787, "step": 7763 }, { "epoch": 0.7893452623017487, "grad_norm": 0.3547232151031494, "learning_rate": 9.898335293008623e-06, "loss": 0.3861, "step": 7764 }, { "epoch": 0.7894469296461977, "grad_norm": 0.39493271708488464, "learning_rate": 9.898264079845335e-06, "loss": 0.4058, "step": 7765 }, { "epoch": 0.7895485969906466, "grad_norm": 0.33043980598449707, "learning_rate": 9.898192842005759e-06, "loss": 0.3734, "step": 7766 }, { "epoch": 0.7896502643350956, "grad_norm": 0.352231502532959, "learning_rate": 9.89812157949025e-06, "loss": 0.4033, "step": 7767 }, { "epoch": 0.7897519316795445, "grad_norm": 0.3334774076938629, "learning_rate": 9.89805029229917e-06, "loss": 0.3937, "step": 7768 }, { "epoch": 0.7898535990239935, "grad_norm": 0.33475741744041443, "learning_rate": 9.897978980432877e-06, "loss": 0.3902, "step": 7769 }, { "epoch": 0.7899552663684425, "grad_norm": 0.3392573893070221, "learning_rate": 9.897907643891727e-06, "loss": 0.3509, "step": 7770 }, { "epoch": 0.7900569337128914, "grad_norm": 0.3566048741340637, "learning_rate": 9.897836282676084e-06, "loss": 0.3929, "step": 7771 }, { "epoch": 0.7901586010573404, "grad_norm": 0.34173551201820374, "learning_rate": 9.897764896786306e-06, "loss": 0.3841, "step": 7772 }, { "epoch": 0.7902602684017893, "grad_norm": 0.33554762601852417, "learning_rate": 9.897693486222752e-06, "loss": 0.3879, "step": 7773 }, { "epoch": 0.7903619357462383, "grad_norm": 0.3413026034832001, "learning_rate": 9.897622050985783e-06, "loss": 0.3772, "step": 7774 }, { "epoch": 0.7904636030906873, "grad_norm": 0.40104731917381287, "learning_rate": 9.897550591075757e-06, "loss": 0.4163, "step": 7775 }, { "epoch": 0.7905652704351362, "grad_norm": 0.29521989822387695, "learning_rate": 9.897479106493035e-06, "loss": 0.3634, "step": 7776 }, { "epoch": 0.7906669377795852, "grad_norm": 0.3584235608577728, "learning_rate": 9.897407597237976e-06, "loss": 0.406, "step": 7777 }, { "epoch": 0.7907686051240341, "grad_norm": 0.34744641184806824, "learning_rate": 9.897336063310943e-06, "loss": 0.3837, "step": 7778 }, { "epoch": 0.7908702724684831, "grad_norm": 0.3573223054409027, "learning_rate": 9.897264504712294e-06, "loss": 0.3831, "step": 7779 }, { "epoch": 0.7909719398129321, "grad_norm": 0.30075281858444214, "learning_rate": 9.89719292144239e-06, "loss": 0.3755, "step": 7780 }, { "epoch": 0.7910736071573811, "grad_norm": 0.3352319598197937, "learning_rate": 9.897121313501593e-06, "loss": 0.3877, "step": 7781 }, { "epoch": 0.79117527450183, "grad_norm": 0.3668484091758728, "learning_rate": 9.897049680890261e-06, "loss": 0.3852, "step": 7782 }, { "epoch": 0.791276941846279, "grad_norm": 0.34158486127853394, "learning_rate": 9.896978023608757e-06, "loss": 0.3713, "step": 7783 }, { "epoch": 0.7913786091907279, "grad_norm": 0.3257020115852356, "learning_rate": 9.896906341657442e-06, "loss": 0.3909, "step": 7784 }, { "epoch": 0.7914802765351769, "grad_norm": 0.3384263515472412, "learning_rate": 9.896834635036675e-06, "loss": 0.3678, "step": 7785 }, { "epoch": 0.7915819438796259, "grad_norm": 0.3581353724002838, "learning_rate": 9.89676290374682e-06, "loss": 0.4009, "step": 7786 }, { "epoch": 0.7916836112240748, "grad_norm": 0.3512103855609894, "learning_rate": 9.896691147788238e-06, "loss": 0.4287, "step": 7787 }, { "epoch": 0.7917852785685238, "grad_norm": 0.3533158004283905, "learning_rate": 9.896619367161288e-06, "loss": 0.3715, "step": 7788 }, { "epoch": 0.7918869459129727, "grad_norm": 0.34755560755729675, "learning_rate": 9.896547561866334e-06, "loss": 0.397, "step": 7789 }, { "epoch": 0.7919886132574218, "grad_norm": 0.31367596983909607, "learning_rate": 9.896475731903737e-06, "loss": 0.3861, "step": 7790 }, { "epoch": 0.7920902806018707, "grad_norm": 0.2999679744243622, "learning_rate": 9.896403877273859e-06, "loss": 0.3629, "step": 7791 }, { "epoch": 0.7921919479463196, "grad_norm": 0.3592380881309509, "learning_rate": 9.89633199797706e-06, "loss": 0.4353, "step": 7792 }, { "epoch": 0.7922936152907686, "grad_norm": 0.3434077799320221, "learning_rate": 9.896260094013706e-06, "loss": 0.3764, "step": 7793 }, { "epoch": 0.7923952826352175, "grad_norm": 0.31395918130874634, "learning_rate": 9.896188165384156e-06, "loss": 0.3675, "step": 7794 }, { "epoch": 0.7924969499796666, "grad_norm": 0.31497108936309814, "learning_rate": 9.896116212088775e-06, "loss": 0.4007, "step": 7795 }, { "epoch": 0.7925986173241155, "grad_norm": 0.38812410831451416, "learning_rate": 9.896044234127923e-06, "loss": 0.3946, "step": 7796 }, { "epoch": 0.7927002846685645, "grad_norm": 0.3655039668083191, "learning_rate": 9.895972231501963e-06, "loss": 0.4084, "step": 7797 }, { "epoch": 0.7928019520130134, "grad_norm": 0.3624495267868042, "learning_rate": 9.89590020421126e-06, "loss": 0.3942, "step": 7798 }, { "epoch": 0.7929036193574623, "grad_norm": 0.35730040073394775, "learning_rate": 9.895828152256174e-06, "loss": 0.4121, "step": 7799 }, { "epoch": 0.7930052867019114, "grad_norm": 0.38357219099998474, "learning_rate": 9.89575607563707e-06, "loss": 0.4171, "step": 7800 }, { "epoch": 0.7931069540463603, "grad_norm": 0.3409837782382965, "learning_rate": 9.89568397435431e-06, "loss": 0.3756, "step": 7801 }, { "epoch": 0.7932086213908093, "grad_norm": 0.35419392585754395, "learning_rate": 9.895611848408257e-06, "loss": 0.3773, "step": 7802 }, { "epoch": 0.7933102887352582, "grad_norm": 0.3583783209323883, "learning_rate": 9.895539697799275e-06, "loss": 0.4006, "step": 7803 }, { "epoch": 0.7934119560797072, "grad_norm": 0.29956695437431335, "learning_rate": 9.895467522527727e-06, "loss": 0.3535, "step": 7804 }, { "epoch": 0.7935136234241562, "grad_norm": 0.34368011355400085, "learning_rate": 9.895395322593978e-06, "loss": 0.4107, "step": 7805 }, { "epoch": 0.7936152907686052, "grad_norm": 0.33985766768455505, "learning_rate": 9.895323097998391e-06, "loss": 0.3763, "step": 7806 }, { "epoch": 0.7937169581130541, "grad_norm": 0.312963604927063, "learning_rate": 9.89525084874133e-06, "loss": 0.3804, "step": 7807 }, { "epoch": 0.793818625457503, "grad_norm": 0.31822288036346436, "learning_rate": 9.895178574823156e-06, "loss": 0.4142, "step": 7808 }, { "epoch": 0.793920292801952, "grad_norm": 0.37287771701812744, "learning_rate": 9.895106276244237e-06, "loss": 0.4042, "step": 7809 }, { "epoch": 0.794021960146401, "grad_norm": 0.32631415128707886, "learning_rate": 9.895033953004936e-06, "loss": 0.3856, "step": 7810 }, { "epoch": 0.79412362749085, "grad_norm": 0.3540448248386383, "learning_rate": 9.894961605105617e-06, "loss": 0.391, "step": 7811 }, { "epoch": 0.7942252948352989, "grad_norm": 0.35156214237213135, "learning_rate": 9.894889232546644e-06, "loss": 0.4091, "step": 7812 }, { "epoch": 0.7943269621797479, "grad_norm": 0.3484170734882355, "learning_rate": 9.894816835328383e-06, "loss": 0.385, "step": 7813 }, { "epoch": 0.7944286295241968, "grad_norm": 0.33951449394226074, "learning_rate": 9.894744413451199e-06, "loss": 0.3986, "step": 7814 }, { "epoch": 0.7945302968686457, "grad_norm": 0.32487940788269043, "learning_rate": 9.894671966915454e-06, "loss": 0.4002, "step": 7815 }, { "epoch": 0.7946319642130948, "grad_norm": 0.33775633573532104, "learning_rate": 9.894599495721515e-06, "loss": 0.395, "step": 7816 }, { "epoch": 0.7947336315575437, "grad_norm": 0.33250656723976135, "learning_rate": 9.894526999869747e-06, "loss": 0.3977, "step": 7817 }, { "epoch": 0.7948352989019927, "grad_norm": 0.3374146521091461, "learning_rate": 9.894454479360516e-06, "loss": 0.3835, "step": 7818 }, { "epoch": 0.7949369662464416, "grad_norm": 0.31521254777908325, "learning_rate": 9.894381934194185e-06, "loss": 0.3641, "step": 7819 }, { "epoch": 0.7950386335908906, "grad_norm": 0.3418862223625183, "learning_rate": 9.894309364371122e-06, "loss": 0.4083, "step": 7820 }, { "epoch": 0.7951403009353396, "grad_norm": 0.3680911958217621, "learning_rate": 9.894236769891693e-06, "loss": 0.3842, "step": 7821 }, { "epoch": 0.7952419682797885, "grad_norm": 0.3540850877761841, "learning_rate": 9.894164150756258e-06, "loss": 0.3592, "step": 7822 }, { "epoch": 0.7953436356242375, "grad_norm": 0.33367079496383667, "learning_rate": 9.89409150696519e-06, "loss": 0.3838, "step": 7823 }, { "epoch": 0.7954453029686864, "grad_norm": 0.3387928307056427, "learning_rate": 9.894018838518851e-06, "loss": 0.3661, "step": 7824 }, { "epoch": 0.7955469703131354, "grad_norm": 0.3090212345123291, "learning_rate": 9.89394614541761e-06, "loss": 0.4037, "step": 7825 }, { "epoch": 0.7956486376575844, "grad_norm": 0.337740033864975, "learning_rate": 9.893873427661829e-06, "loss": 0.4018, "step": 7826 }, { "epoch": 0.7957503050020334, "grad_norm": 0.36712583899497986, "learning_rate": 9.893800685251877e-06, "loss": 0.4028, "step": 7827 }, { "epoch": 0.7958519723464823, "grad_norm": 0.32098525762557983, "learning_rate": 9.893727918188121e-06, "loss": 0.3792, "step": 7828 }, { "epoch": 0.7959536396909312, "grad_norm": 0.32717594504356384, "learning_rate": 9.893655126470926e-06, "loss": 0.3664, "step": 7829 }, { "epoch": 0.7960553070353802, "grad_norm": 0.3784734010696411, "learning_rate": 9.89358231010066e-06, "loss": 0.4037, "step": 7830 }, { "epoch": 0.7961569743798292, "grad_norm": 0.32579290866851807, "learning_rate": 9.893509469077689e-06, "loss": 0.3649, "step": 7831 }, { "epoch": 0.7962586417242782, "grad_norm": 0.36335209012031555, "learning_rate": 9.89343660340238e-06, "loss": 0.3753, "step": 7832 }, { "epoch": 0.7963603090687271, "grad_norm": 0.3210713267326355, "learning_rate": 9.8933637130751e-06, "loss": 0.3855, "step": 7833 }, { "epoch": 0.7964619764131761, "grad_norm": 0.30896633863449097, "learning_rate": 9.893290798096217e-06, "loss": 0.3956, "step": 7834 }, { "epoch": 0.796563643757625, "grad_norm": 0.31221598386764526, "learning_rate": 9.893217858466098e-06, "loss": 0.3973, "step": 7835 }, { "epoch": 0.7966653111020741, "grad_norm": 0.3494676351547241, "learning_rate": 9.893144894185109e-06, "loss": 0.3529, "step": 7836 }, { "epoch": 0.796766978446523, "grad_norm": 0.315715491771698, "learning_rate": 9.893071905253619e-06, "loss": 0.3624, "step": 7837 }, { "epoch": 0.7968686457909719, "grad_norm": 0.3575512170791626, "learning_rate": 9.892998891671996e-06, "loss": 0.4268, "step": 7838 }, { "epoch": 0.7969703131354209, "grad_norm": 0.35773640871047974, "learning_rate": 9.892925853440609e-06, "loss": 0.3987, "step": 7839 }, { "epoch": 0.7970719804798698, "grad_norm": 0.34088748693466187, "learning_rate": 9.89285279055982e-06, "loss": 0.4244, "step": 7840 }, { "epoch": 0.7971736478243189, "grad_norm": 0.3420836627483368, "learning_rate": 9.892779703030006e-06, "loss": 0.3812, "step": 7841 }, { "epoch": 0.7972753151687678, "grad_norm": 0.33726033568382263, "learning_rate": 9.892706590851528e-06, "loss": 0.3802, "step": 7842 }, { "epoch": 0.7973769825132168, "grad_norm": 0.324989378452301, "learning_rate": 9.892633454024758e-06, "loss": 0.3677, "step": 7843 }, { "epoch": 0.7974786498576657, "grad_norm": 0.3652493357658386, "learning_rate": 9.892560292550064e-06, "loss": 0.3879, "step": 7844 }, { "epoch": 0.7975803172021146, "grad_norm": 0.3179037570953369, "learning_rate": 9.892487106427811e-06, "loss": 0.3859, "step": 7845 }, { "epoch": 0.7976819845465637, "grad_norm": 0.3468206226825714, "learning_rate": 9.892413895658372e-06, "loss": 0.4088, "step": 7846 }, { "epoch": 0.7977836518910126, "grad_norm": 0.3634389042854309, "learning_rate": 9.892340660242117e-06, "loss": 0.3947, "step": 7847 }, { "epoch": 0.7978853192354616, "grad_norm": 0.34175223112106323, "learning_rate": 9.89226740017941e-06, "loss": 0.4126, "step": 7848 }, { "epoch": 0.7979869865799105, "grad_norm": 0.3206469714641571, "learning_rate": 9.892194115470622e-06, "loss": 0.3872, "step": 7849 }, { "epoch": 0.7980886539243595, "grad_norm": 0.31499308347702026, "learning_rate": 9.892120806116124e-06, "loss": 0.3709, "step": 7850 }, { "epoch": 0.7981903212688085, "grad_norm": 0.3232589066028595, "learning_rate": 9.892047472116281e-06, "loss": 0.3537, "step": 7851 }, { "epoch": 0.7982919886132575, "grad_norm": 0.33233484625816345, "learning_rate": 9.891974113471468e-06, "loss": 0.4098, "step": 7852 }, { "epoch": 0.7983936559577064, "grad_norm": 0.3377993702888489, "learning_rate": 9.891900730182051e-06, "loss": 0.3556, "step": 7853 }, { "epoch": 0.7984953233021553, "grad_norm": 0.31983858346939087, "learning_rate": 9.891827322248402e-06, "loss": 0.3876, "step": 7854 }, { "epoch": 0.7985969906466043, "grad_norm": 0.3496147096157074, "learning_rate": 9.891753889670887e-06, "loss": 0.372, "step": 7855 }, { "epoch": 0.7986986579910533, "grad_norm": 0.34934088587760925, "learning_rate": 9.89168043244988e-06, "loss": 0.3747, "step": 7856 }, { "epoch": 0.7988003253355023, "grad_norm": 0.3601694405078888, "learning_rate": 9.89160695058575e-06, "loss": 0.3888, "step": 7857 }, { "epoch": 0.7989019926799512, "grad_norm": 0.3281809687614441, "learning_rate": 9.891533444078866e-06, "loss": 0.3805, "step": 7858 }, { "epoch": 0.7990036600244002, "grad_norm": 0.32229140400886536, "learning_rate": 9.891459912929598e-06, "loss": 0.3851, "step": 7859 }, { "epoch": 0.7991053273688491, "grad_norm": 0.3585785925388336, "learning_rate": 9.891386357138318e-06, "loss": 0.3941, "step": 7860 }, { "epoch": 0.799206994713298, "grad_norm": 0.3628758192062378, "learning_rate": 9.891312776705397e-06, "loss": 0.3985, "step": 7861 }, { "epoch": 0.7993086620577471, "grad_norm": 0.3560221493244171, "learning_rate": 9.891239171631202e-06, "loss": 0.3828, "step": 7862 }, { "epoch": 0.799410329402196, "grad_norm": 0.32823890447616577, "learning_rate": 9.891165541916108e-06, "loss": 0.3913, "step": 7863 }, { "epoch": 0.799511996746645, "grad_norm": 0.32448863983154297, "learning_rate": 9.891091887560485e-06, "loss": 0.39, "step": 7864 }, { "epoch": 0.7996136640910939, "grad_norm": 0.3138785660266876, "learning_rate": 9.891018208564702e-06, "loss": 0.3968, "step": 7865 }, { "epoch": 0.7997153314355429, "grad_norm": 0.3307025730609894, "learning_rate": 9.890944504929133e-06, "loss": 0.386, "step": 7866 }, { "epoch": 0.7998169987799919, "grad_norm": 0.30698201060295105, "learning_rate": 9.890870776654147e-06, "loss": 0.3695, "step": 7867 }, { "epoch": 0.7999186661244408, "grad_norm": 0.3137752115726471, "learning_rate": 9.890797023740116e-06, "loss": 0.4004, "step": 7868 }, { "epoch": 0.8000203334688898, "grad_norm": 0.3481585681438446, "learning_rate": 9.890723246187412e-06, "loss": 0.363, "step": 7869 }, { "epoch": 0.8001220008133387, "grad_norm": 0.35501256585121155, "learning_rate": 9.890649443996408e-06, "loss": 0.4152, "step": 7870 }, { "epoch": 0.8002236681577877, "grad_norm": 0.31763404607772827, "learning_rate": 9.890575617167472e-06, "loss": 0.3792, "step": 7871 }, { "epoch": 0.8003253355022367, "grad_norm": 0.3576979339122772, "learning_rate": 9.89050176570098e-06, "loss": 0.3863, "step": 7872 }, { "epoch": 0.8004270028466857, "grad_norm": 0.3490922451019287, "learning_rate": 9.8904278895973e-06, "loss": 0.3963, "step": 7873 }, { "epoch": 0.8005286701911346, "grad_norm": 0.32854676246643066, "learning_rate": 9.890353988856807e-06, "loss": 0.3981, "step": 7874 }, { "epoch": 0.8006303375355835, "grad_norm": 0.33823567628860474, "learning_rate": 9.890280063479874e-06, "loss": 0.4001, "step": 7875 }, { "epoch": 0.8007320048800325, "grad_norm": 0.3860185742378235, "learning_rate": 9.890206113466872e-06, "loss": 0.3916, "step": 7876 }, { "epoch": 0.8008336722244815, "grad_norm": 0.3321220874786377, "learning_rate": 9.890132138818174e-06, "loss": 0.3628, "step": 7877 }, { "epoch": 0.8009353395689305, "grad_norm": 0.33859023451805115, "learning_rate": 9.89005813953415e-06, "loss": 0.3928, "step": 7878 }, { "epoch": 0.8010370069133794, "grad_norm": 0.3595622181892395, "learning_rate": 9.889984115615179e-06, "loss": 0.3927, "step": 7879 }, { "epoch": 0.8011386742578284, "grad_norm": 0.31248438358306885, "learning_rate": 9.889910067061627e-06, "loss": 0.3781, "step": 7880 }, { "epoch": 0.8012403416022773, "grad_norm": 0.3358707129955292, "learning_rate": 9.889835993873871e-06, "loss": 0.4003, "step": 7881 }, { "epoch": 0.8013420089467264, "grad_norm": 0.3199557662010193, "learning_rate": 9.889761896052284e-06, "loss": 0.3835, "step": 7882 }, { "epoch": 0.8014436762911753, "grad_norm": 0.3448142111301422, "learning_rate": 9.889687773597238e-06, "loss": 0.3698, "step": 7883 }, { "epoch": 0.8015453436356242, "grad_norm": 0.32218965888023376, "learning_rate": 9.889613626509107e-06, "loss": 0.4097, "step": 7884 }, { "epoch": 0.8016470109800732, "grad_norm": 0.3164898753166199, "learning_rate": 9.889539454788263e-06, "loss": 0.3979, "step": 7885 }, { "epoch": 0.8017486783245221, "grad_norm": 0.34135717153549194, "learning_rate": 9.889465258435083e-06, "loss": 0.4011, "step": 7886 }, { "epoch": 0.8018503456689712, "grad_norm": 0.33737054467201233, "learning_rate": 9.88939103744994e-06, "loss": 0.4139, "step": 7887 }, { "epoch": 0.8019520130134201, "grad_norm": 0.31418442726135254, "learning_rate": 9.889316791833205e-06, "loss": 0.4038, "step": 7888 }, { "epoch": 0.8020536803578691, "grad_norm": 0.31130918860435486, "learning_rate": 9.889242521585254e-06, "loss": 0.3729, "step": 7889 }, { "epoch": 0.802155347702318, "grad_norm": 0.3323245942592621, "learning_rate": 9.88916822670646e-06, "loss": 0.4027, "step": 7890 }, { "epoch": 0.8022570150467669, "grad_norm": 0.3121635317802429, "learning_rate": 9.8890939071972e-06, "loss": 0.3681, "step": 7891 }, { "epoch": 0.802358682391216, "grad_norm": 0.35314393043518066, "learning_rate": 9.889019563057847e-06, "loss": 0.4214, "step": 7892 }, { "epoch": 0.8024603497356649, "grad_norm": 0.3156280815601349, "learning_rate": 9.888945194288774e-06, "loss": 0.3843, "step": 7893 }, { "epoch": 0.8025620170801139, "grad_norm": 0.3025280833244324, "learning_rate": 9.888870800890357e-06, "loss": 0.386, "step": 7894 }, { "epoch": 0.8026636844245628, "grad_norm": 0.35282137989997864, "learning_rate": 9.888796382862971e-06, "loss": 0.4098, "step": 7895 }, { "epoch": 0.8027653517690118, "grad_norm": 0.35249608755111694, "learning_rate": 9.888721940206991e-06, "loss": 0.3908, "step": 7896 }, { "epoch": 0.8028670191134608, "grad_norm": 0.32867884635925293, "learning_rate": 9.888647472922791e-06, "loss": 0.4229, "step": 7897 }, { "epoch": 0.8029686864579098, "grad_norm": 0.3195907771587372, "learning_rate": 9.888572981010748e-06, "loss": 0.3837, "step": 7898 }, { "epoch": 0.8030703538023587, "grad_norm": 0.3341180086135864, "learning_rate": 9.888498464471235e-06, "loss": 0.4261, "step": 7899 }, { "epoch": 0.8031720211468076, "grad_norm": 0.3164460062980652, "learning_rate": 9.888423923304628e-06, "loss": 0.4028, "step": 7900 }, { "epoch": 0.8032736884912566, "grad_norm": 0.33413565158843994, "learning_rate": 9.888349357511305e-06, "loss": 0.4007, "step": 7901 }, { "epoch": 0.8033753558357055, "grad_norm": 0.34398123621940613, "learning_rate": 9.888274767091637e-06, "loss": 0.3951, "step": 7902 }, { "epoch": 0.8034770231801546, "grad_norm": 0.3320082128047943, "learning_rate": 9.888200152046003e-06, "loss": 0.4132, "step": 7903 }, { "epoch": 0.8035786905246035, "grad_norm": 0.2969749867916107, "learning_rate": 9.88812551237478e-06, "loss": 0.3843, "step": 7904 }, { "epoch": 0.8036803578690525, "grad_norm": 0.3566212058067322, "learning_rate": 9.88805084807834e-06, "loss": 0.3937, "step": 7905 }, { "epoch": 0.8037820252135014, "grad_norm": 0.3604562282562256, "learning_rate": 9.887976159157062e-06, "loss": 0.3674, "step": 7906 }, { "epoch": 0.8038836925579503, "grad_norm": 0.3180265724658966, "learning_rate": 9.887901445611321e-06, "loss": 0.3903, "step": 7907 }, { "epoch": 0.8039853599023994, "grad_norm": 0.3073981702327728, "learning_rate": 9.887826707441497e-06, "loss": 0.4071, "step": 7908 }, { "epoch": 0.8040870272468483, "grad_norm": 0.3355531096458435, "learning_rate": 9.887751944647961e-06, "loss": 0.3735, "step": 7909 }, { "epoch": 0.8041886945912973, "grad_norm": 0.30997294187545776, "learning_rate": 9.887677157231093e-06, "loss": 0.3649, "step": 7910 }, { "epoch": 0.8042903619357462, "grad_norm": 0.3472186028957367, "learning_rate": 9.887602345191268e-06, "loss": 0.3971, "step": 7911 }, { "epoch": 0.8043920292801952, "grad_norm": 0.346344530582428, "learning_rate": 9.887527508528865e-06, "loss": 0.3949, "step": 7912 }, { "epoch": 0.8044936966246442, "grad_norm": 0.3147668242454529, "learning_rate": 9.88745264724426e-06, "loss": 0.4112, "step": 7913 }, { "epoch": 0.8045953639690931, "grad_norm": 0.30627772212028503, "learning_rate": 9.887377761337828e-06, "loss": 0.4032, "step": 7914 }, { "epoch": 0.8046970313135421, "grad_norm": 0.35575276613235474, "learning_rate": 9.887302850809952e-06, "loss": 0.3832, "step": 7915 }, { "epoch": 0.804798698657991, "grad_norm": 0.3124625086784363, "learning_rate": 9.887227915661001e-06, "loss": 0.3927, "step": 7916 }, { "epoch": 0.80490036600244, "grad_norm": 0.32014551758766174, "learning_rate": 9.887152955891361e-06, "loss": 0.4035, "step": 7917 }, { "epoch": 0.805002033346889, "grad_norm": 0.3050377368927002, "learning_rate": 9.887077971501404e-06, "loss": 0.4167, "step": 7918 }, { "epoch": 0.805103700691338, "grad_norm": 0.28150197863578796, "learning_rate": 9.88700296249151e-06, "loss": 0.3839, "step": 7919 }, { "epoch": 0.8052053680357869, "grad_norm": 0.3253463804721832, "learning_rate": 9.886927928862057e-06, "loss": 0.344, "step": 7920 }, { "epoch": 0.8053070353802358, "grad_norm": 0.34030500054359436, "learning_rate": 9.886852870613422e-06, "loss": 0.4502, "step": 7921 }, { "epoch": 0.8054087027246848, "grad_norm": 0.32719457149505615, "learning_rate": 9.886777787745983e-06, "loss": 0.3932, "step": 7922 }, { "epoch": 0.8055103700691338, "grad_norm": 0.34814828634262085, "learning_rate": 9.886702680260119e-06, "loss": 0.3958, "step": 7923 }, { "epoch": 0.8056120374135828, "grad_norm": 0.3302978277206421, "learning_rate": 9.886627548156206e-06, "loss": 0.4178, "step": 7924 }, { "epoch": 0.8057137047580317, "grad_norm": 0.3038853108882904, "learning_rate": 9.886552391434628e-06, "loss": 0.3675, "step": 7925 }, { "epoch": 0.8058153721024807, "grad_norm": 0.3381441831588745, "learning_rate": 9.88647721009576e-06, "loss": 0.4342, "step": 7926 }, { "epoch": 0.8059170394469296, "grad_norm": 0.31701967120170593, "learning_rate": 9.886402004139978e-06, "loss": 0.387, "step": 7927 }, { "epoch": 0.8060187067913787, "grad_norm": 0.3337151110172272, "learning_rate": 9.886326773567667e-06, "loss": 0.3765, "step": 7928 }, { "epoch": 0.8061203741358276, "grad_norm": 0.3279070556163788, "learning_rate": 9.886251518379202e-06, "loss": 0.4034, "step": 7929 }, { "epoch": 0.8062220414802765, "grad_norm": 0.32639721035957336, "learning_rate": 9.886176238574962e-06, "loss": 0.4342, "step": 7930 }, { "epoch": 0.8063237088247255, "grad_norm": 0.315277636051178, "learning_rate": 9.88610093415533e-06, "loss": 0.3928, "step": 7931 }, { "epoch": 0.8064253761691744, "grad_norm": 0.3084906041622162, "learning_rate": 9.88602560512068e-06, "loss": 0.3834, "step": 7932 }, { "epoch": 0.8065270435136235, "grad_norm": 0.33454617857933044, "learning_rate": 9.885950251471395e-06, "loss": 0.4079, "step": 7933 }, { "epoch": 0.8066287108580724, "grad_norm": 0.31424763798713684, "learning_rate": 9.885874873207853e-06, "loss": 0.4223, "step": 7934 }, { "epoch": 0.8067303782025214, "grad_norm": 0.3244735300540924, "learning_rate": 9.885799470330435e-06, "loss": 0.3921, "step": 7935 }, { "epoch": 0.8068320455469703, "grad_norm": 0.3184172809123993, "learning_rate": 9.885724042839521e-06, "loss": 0.4106, "step": 7936 }, { "epoch": 0.8069337128914192, "grad_norm": 0.3058209717273712, "learning_rate": 9.885648590735489e-06, "loss": 0.3958, "step": 7937 }, { "epoch": 0.8070353802358683, "grad_norm": 0.324420303106308, "learning_rate": 9.885573114018722e-06, "loss": 0.3527, "step": 7938 }, { "epoch": 0.8071370475803172, "grad_norm": 0.33947718143463135, "learning_rate": 9.885497612689597e-06, "loss": 0.3889, "step": 7939 }, { "epoch": 0.8072387149247662, "grad_norm": 0.3118625283241272, "learning_rate": 9.885422086748497e-06, "loss": 0.3671, "step": 7940 }, { "epoch": 0.8073403822692151, "grad_norm": 0.305239737033844, "learning_rate": 9.8853465361958e-06, "loss": 0.3807, "step": 7941 }, { "epoch": 0.8074420496136641, "grad_norm": 0.3282815217971802, "learning_rate": 9.885270961031889e-06, "loss": 0.3726, "step": 7942 }, { "epoch": 0.807543716958113, "grad_norm": 0.32477307319641113, "learning_rate": 9.885195361257144e-06, "loss": 0.3761, "step": 7943 }, { "epoch": 0.807645384302562, "grad_norm": 0.343803346157074, "learning_rate": 9.885119736871947e-06, "loss": 0.3808, "step": 7944 }, { "epoch": 0.807747051647011, "grad_norm": 0.30071356892585754, "learning_rate": 9.885044087876676e-06, "loss": 0.3891, "step": 7945 }, { "epoch": 0.8078487189914599, "grad_norm": 0.3118046820163727, "learning_rate": 9.884968414271714e-06, "loss": 0.3792, "step": 7946 }, { "epoch": 0.8079503863359089, "grad_norm": 0.33513253927230835, "learning_rate": 9.884892716057443e-06, "loss": 0.3728, "step": 7947 }, { "epoch": 0.8080520536803578, "grad_norm": 0.3596290051937103, "learning_rate": 9.884816993234242e-06, "loss": 0.4415, "step": 7948 }, { "epoch": 0.8081537210248069, "grad_norm": 0.38166600465774536, "learning_rate": 9.884741245802494e-06, "loss": 0.4039, "step": 7949 }, { "epoch": 0.8082553883692558, "grad_norm": 0.32009485363960266, "learning_rate": 9.88466547376258e-06, "loss": 0.4111, "step": 7950 }, { "epoch": 0.8083570557137048, "grad_norm": 0.33358657360076904, "learning_rate": 9.884589677114884e-06, "loss": 0.4028, "step": 7951 }, { "epoch": 0.8084587230581537, "grad_norm": 0.3472106456756592, "learning_rate": 9.884513855859785e-06, "loss": 0.3894, "step": 7952 }, { "epoch": 0.8085603904026026, "grad_norm": 0.3448287844657898, "learning_rate": 9.884438009997666e-06, "loss": 0.3647, "step": 7953 }, { "epoch": 0.8086620577470517, "grad_norm": 0.36276066303253174, "learning_rate": 9.884362139528907e-06, "loss": 0.3669, "step": 7954 }, { "epoch": 0.8087637250915006, "grad_norm": 0.35920432209968567, "learning_rate": 9.884286244453894e-06, "loss": 0.3835, "step": 7955 }, { "epoch": 0.8088653924359496, "grad_norm": 0.32682275772094727, "learning_rate": 9.884210324773008e-06, "loss": 0.4114, "step": 7956 }, { "epoch": 0.8089670597803985, "grad_norm": 0.3424680233001709, "learning_rate": 9.88413438048663e-06, "loss": 0.3895, "step": 7957 }, { "epoch": 0.8090687271248475, "grad_norm": 0.37720414996147156, "learning_rate": 9.884058411595145e-06, "loss": 0.4032, "step": 7958 }, { "epoch": 0.8091703944692965, "grad_norm": 0.33205798268318176, "learning_rate": 9.883982418098934e-06, "loss": 0.3858, "step": 7959 }, { "epoch": 0.8092720618137454, "grad_norm": 0.3768315613269806, "learning_rate": 9.883906399998379e-06, "loss": 0.3975, "step": 7960 }, { "epoch": 0.8093737291581944, "grad_norm": 0.3598148822784424, "learning_rate": 9.883830357293865e-06, "loss": 0.4049, "step": 7961 }, { "epoch": 0.8094753965026433, "grad_norm": 0.370365709066391, "learning_rate": 9.883754289985774e-06, "loss": 0.4208, "step": 7962 }, { "epoch": 0.8095770638470923, "grad_norm": 0.3315984904766083, "learning_rate": 9.883678198074489e-06, "loss": 0.3775, "step": 7963 }, { "epoch": 0.8096787311915413, "grad_norm": 0.35600292682647705, "learning_rate": 9.883602081560394e-06, "loss": 0.4002, "step": 7964 }, { "epoch": 0.8097803985359903, "grad_norm": 0.3446742594242096, "learning_rate": 9.883525940443874e-06, "loss": 0.3957, "step": 7965 }, { "epoch": 0.8098820658804392, "grad_norm": 0.315754771232605, "learning_rate": 9.883449774725309e-06, "loss": 0.3902, "step": 7966 }, { "epoch": 0.8099837332248881, "grad_norm": 0.3842071294784546, "learning_rate": 9.883373584405085e-06, "loss": 0.4028, "step": 7967 }, { "epoch": 0.8100854005693371, "grad_norm": 0.35408884286880493, "learning_rate": 9.883297369483584e-06, "loss": 0.3822, "step": 7968 }, { "epoch": 0.8101870679137861, "grad_norm": 0.3421882092952728, "learning_rate": 9.883221129961193e-06, "loss": 0.4493, "step": 7969 }, { "epoch": 0.8102887352582351, "grad_norm": 0.4076947867870331, "learning_rate": 9.883144865838294e-06, "loss": 0.3526, "step": 7970 }, { "epoch": 0.810390402602684, "grad_norm": 0.34752047061920166, "learning_rate": 9.883068577115271e-06, "loss": 0.3786, "step": 7971 }, { "epoch": 0.810492069947133, "grad_norm": 0.34324124455451965, "learning_rate": 9.88299226379251e-06, "loss": 0.3984, "step": 7972 }, { "epoch": 0.8105937372915819, "grad_norm": 0.39426982402801514, "learning_rate": 9.882915925870394e-06, "loss": 0.3875, "step": 7973 }, { "epoch": 0.810695404636031, "grad_norm": 0.32152611017227173, "learning_rate": 9.882839563349307e-06, "loss": 0.4152, "step": 7974 }, { "epoch": 0.8107970719804799, "grad_norm": 0.3316305875778198, "learning_rate": 9.882763176229636e-06, "loss": 0.3932, "step": 7975 }, { "epoch": 0.8108987393249288, "grad_norm": 0.3328568935394287, "learning_rate": 9.882686764511765e-06, "loss": 0.3897, "step": 7976 }, { "epoch": 0.8110004066693778, "grad_norm": 0.32853594422340393, "learning_rate": 9.882610328196077e-06, "loss": 0.4073, "step": 7977 }, { "epoch": 0.8111020740138267, "grad_norm": 0.3245250880718231, "learning_rate": 9.882533867282958e-06, "loss": 0.4011, "step": 7978 }, { "epoch": 0.8112037413582758, "grad_norm": 0.308900386095047, "learning_rate": 9.882457381772794e-06, "loss": 0.4078, "step": 7979 }, { "epoch": 0.8113054087027247, "grad_norm": 0.2878873646259308, "learning_rate": 9.88238087166597e-06, "loss": 0.3723, "step": 7980 }, { "epoch": 0.8114070760471737, "grad_norm": 0.3183572590351105, "learning_rate": 9.88230433696287e-06, "loss": 0.4117, "step": 7981 }, { "epoch": 0.8115087433916226, "grad_norm": 0.29665592312812805, "learning_rate": 9.882227777663884e-06, "loss": 0.4212, "step": 7982 }, { "epoch": 0.8116104107360715, "grad_norm": 0.33895209431648254, "learning_rate": 9.882151193769394e-06, "loss": 0.4188, "step": 7983 }, { "epoch": 0.8117120780805205, "grad_norm": 0.30058684945106506, "learning_rate": 9.882074585279785e-06, "loss": 0.3876, "step": 7984 }, { "epoch": 0.8118137454249695, "grad_norm": 0.2991819977760315, "learning_rate": 9.881997952195445e-06, "loss": 0.401, "step": 7985 }, { "epoch": 0.8119154127694185, "grad_norm": 0.35189923644065857, "learning_rate": 9.88192129451676e-06, "loss": 0.4028, "step": 7986 }, { "epoch": 0.8120170801138674, "grad_norm": 0.3475339710712433, "learning_rate": 9.881844612244115e-06, "loss": 0.3702, "step": 7987 }, { "epoch": 0.8121187474583164, "grad_norm": 0.2936496436595917, "learning_rate": 9.881767905377897e-06, "loss": 0.3793, "step": 7988 }, { "epoch": 0.8122204148027653, "grad_norm": 0.3053706884384155, "learning_rate": 9.881691173918493e-06, "loss": 0.3658, "step": 7989 }, { "epoch": 0.8123220821472144, "grad_norm": 0.32815271615982056, "learning_rate": 9.881614417866288e-06, "loss": 0.3674, "step": 7990 }, { "epoch": 0.8124237494916633, "grad_norm": 0.30520039796829224, "learning_rate": 9.88153763722167e-06, "loss": 0.4238, "step": 7991 }, { "epoch": 0.8125254168361122, "grad_norm": 0.3214031457901001, "learning_rate": 9.881460831985027e-06, "loss": 0.3828, "step": 7992 }, { "epoch": 0.8126270841805612, "grad_norm": 0.31357812881469727, "learning_rate": 9.88138400215674e-06, "loss": 0.4197, "step": 7993 }, { "epoch": 0.8127287515250101, "grad_norm": 0.31539925932884216, "learning_rate": 9.881307147737205e-06, "loss": 0.3424, "step": 7994 }, { "epoch": 0.8128304188694592, "grad_norm": 0.34838956594467163, "learning_rate": 9.881230268726803e-06, "loss": 0.3995, "step": 7995 }, { "epoch": 0.8129320862139081, "grad_norm": 0.33359256386756897, "learning_rate": 9.881153365125921e-06, "loss": 0.3891, "step": 7996 }, { "epoch": 0.813033753558357, "grad_norm": 0.3559210002422333, "learning_rate": 9.881076436934951e-06, "loss": 0.3953, "step": 7997 }, { "epoch": 0.813135420902806, "grad_norm": 0.3370758593082428, "learning_rate": 9.880999484154276e-06, "loss": 0.3581, "step": 7998 }, { "epoch": 0.8132370882472549, "grad_norm": 0.35045307874679565, "learning_rate": 9.880922506784285e-06, "loss": 0.4074, "step": 7999 }, { "epoch": 0.813338755591704, "grad_norm": 0.34185531735420227, "learning_rate": 9.880845504825369e-06, "loss": 0.3984, "step": 8000 }, { "epoch": 0.8134404229361529, "grad_norm": 0.32515329122543335, "learning_rate": 9.88076847827791e-06, "loss": 0.3788, "step": 8001 }, { "epoch": 0.8135420902806019, "grad_norm": 0.29107901453971863, "learning_rate": 9.880691427142302e-06, "loss": 0.3701, "step": 8002 }, { "epoch": 0.8136437576250508, "grad_norm": 0.3049781322479248, "learning_rate": 9.88061435141893e-06, "loss": 0.394, "step": 8003 }, { "epoch": 0.8137454249694998, "grad_norm": 0.32086697220802307, "learning_rate": 9.880537251108182e-06, "loss": 0.4012, "step": 8004 }, { "epoch": 0.8138470923139488, "grad_norm": 0.3201914131641388, "learning_rate": 9.880460126210447e-06, "loss": 0.3879, "step": 8005 }, { "epoch": 0.8139487596583977, "grad_norm": 0.3207003176212311, "learning_rate": 9.880382976726114e-06, "loss": 0.3828, "step": 8006 }, { "epoch": 0.8140504270028467, "grad_norm": 0.32774579524993896, "learning_rate": 9.880305802655573e-06, "loss": 0.3896, "step": 8007 }, { "epoch": 0.8141520943472956, "grad_norm": 0.33763983845710754, "learning_rate": 9.880228603999209e-06, "loss": 0.4089, "step": 8008 }, { "epoch": 0.8142537616917446, "grad_norm": 0.3322318494319916, "learning_rate": 9.880151380757412e-06, "loss": 0.384, "step": 8009 }, { "epoch": 0.8143554290361936, "grad_norm": 0.31719857454299927, "learning_rate": 9.880074132930574e-06, "loss": 0.3705, "step": 8010 }, { "epoch": 0.8144570963806426, "grad_norm": 0.3184854984283447, "learning_rate": 9.879996860519082e-06, "loss": 0.4033, "step": 8011 }, { "epoch": 0.8145587637250915, "grad_norm": 0.36125174164772034, "learning_rate": 9.879919563523324e-06, "loss": 0.3827, "step": 8012 }, { "epoch": 0.8146604310695404, "grad_norm": 0.3373394012451172, "learning_rate": 9.879842241943692e-06, "loss": 0.405, "step": 8013 }, { "epoch": 0.8147620984139894, "grad_norm": 0.31318992376327515, "learning_rate": 9.879764895780574e-06, "loss": 0.3861, "step": 8014 }, { "epoch": 0.8148637657584384, "grad_norm": 0.30441802740097046, "learning_rate": 9.87968752503436e-06, "loss": 0.4108, "step": 8015 }, { "epoch": 0.8149654331028874, "grad_norm": 0.3343072533607483, "learning_rate": 9.879610129705442e-06, "loss": 0.376, "step": 8016 }, { "epoch": 0.8150671004473363, "grad_norm": 0.33902114629745483, "learning_rate": 9.879532709794206e-06, "loss": 0.4026, "step": 8017 }, { "epoch": 0.8151687677917853, "grad_norm": 0.3371962308883667, "learning_rate": 9.879455265301044e-06, "loss": 0.3992, "step": 8018 }, { "epoch": 0.8152704351362342, "grad_norm": 0.33174940943717957, "learning_rate": 9.879377796226345e-06, "loss": 0.41, "step": 8019 }, { "epoch": 0.8153721024806833, "grad_norm": 0.33555471897125244, "learning_rate": 9.8793003025705e-06, "loss": 0.4002, "step": 8020 }, { "epoch": 0.8154737698251322, "grad_norm": 0.3675406277179718, "learning_rate": 9.879222784333902e-06, "loss": 0.3811, "step": 8021 }, { "epoch": 0.8155754371695811, "grad_norm": 0.3119078576564789, "learning_rate": 9.879145241516937e-06, "loss": 0.3975, "step": 8022 }, { "epoch": 0.8156771045140301, "grad_norm": 0.3203306794166565, "learning_rate": 9.879067674119999e-06, "loss": 0.3855, "step": 8023 }, { "epoch": 0.815778771858479, "grad_norm": 0.3682684302330017, "learning_rate": 9.878990082143476e-06, "loss": 0.3962, "step": 8024 }, { "epoch": 0.815880439202928, "grad_norm": 0.34319406747817993, "learning_rate": 9.878912465587762e-06, "loss": 0.3924, "step": 8025 }, { "epoch": 0.815982106547377, "grad_norm": 0.3148445785045624, "learning_rate": 9.878834824453245e-06, "loss": 0.3758, "step": 8026 }, { "epoch": 0.816083773891826, "grad_norm": 0.37236183881759644, "learning_rate": 9.878757158740317e-06, "loss": 0.4174, "step": 8027 }, { "epoch": 0.8161854412362749, "grad_norm": 0.3397116959095001, "learning_rate": 9.878679468449373e-06, "loss": 0.3585, "step": 8028 }, { "epoch": 0.8162871085807238, "grad_norm": 0.3526744246482849, "learning_rate": 9.878601753580798e-06, "loss": 0.4099, "step": 8029 }, { "epoch": 0.8163887759251728, "grad_norm": 0.3418833911418915, "learning_rate": 9.878524014134989e-06, "loss": 0.3549, "step": 8030 }, { "epoch": 0.8164904432696218, "grad_norm": 0.3493071496486664, "learning_rate": 9.878446250112334e-06, "loss": 0.3979, "step": 8031 }, { "epoch": 0.8165921106140708, "grad_norm": 0.3437661826610565, "learning_rate": 9.878368461513226e-06, "loss": 0.3715, "step": 8032 }, { "epoch": 0.8166937779585197, "grad_norm": 0.38069799542427063, "learning_rate": 9.878290648338057e-06, "loss": 0.4174, "step": 8033 }, { "epoch": 0.8167954453029687, "grad_norm": 0.3511786162853241, "learning_rate": 9.87821281058722e-06, "loss": 0.3925, "step": 8034 }, { "epoch": 0.8168971126474176, "grad_norm": 0.3652227818965912, "learning_rate": 9.878134948261105e-06, "loss": 0.3712, "step": 8035 }, { "epoch": 0.8169987799918667, "grad_norm": 0.33215975761413574, "learning_rate": 9.878057061360105e-06, "loss": 0.3823, "step": 8036 }, { "epoch": 0.8171004473363156, "grad_norm": 0.3393070101737976, "learning_rate": 9.877979149884613e-06, "loss": 0.3866, "step": 8037 }, { "epoch": 0.8172021146807645, "grad_norm": 0.3755098581314087, "learning_rate": 9.877901213835021e-06, "loss": 0.4039, "step": 8038 }, { "epoch": 0.8173037820252135, "grad_norm": 0.33121994137763977, "learning_rate": 9.877823253211723e-06, "loss": 0.3539, "step": 8039 }, { "epoch": 0.8174054493696624, "grad_norm": 0.3638864755630493, "learning_rate": 9.87774526801511e-06, "loss": 0.3986, "step": 8040 }, { "epoch": 0.8175071167141115, "grad_norm": 0.3305375874042511, "learning_rate": 9.877667258245575e-06, "loss": 0.3849, "step": 8041 }, { "epoch": 0.8176087840585604, "grad_norm": 0.3196479380130768, "learning_rate": 9.877589223903514e-06, "loss": 0.3878, "step": 8042 }, { "epoch": 0.8177104514030094, "grad_norm": 0.37835630774497986, "learning_rate": 9.877511164989316e-06, "loss": 0.3782, "step": 8043 }, { "epoch": 0.8178121187474583, "grad_norm": 0.3648827373981476, "learning_rate": 9.877433081503374e-06, "loss": 0.398, "step": 8044 }, { "epoch": 0.8179137860919072, "grad_norm": 0.37421974539756775, "learning_rate": 9.877354973446085e-06, "loss": 0.3956, "step": 8045 }, { "epoch": 0.8180154534363563, "grad_norm": 0.3750481605529785, "learning_rate": 9.87727684081784e-06, "loss": 0.3878, "step": 8046 }, { "epoch": 0.8181171207808052, "grad_norm": 0.3611009120941162, "learning_rate": 9.877198683619033e-06, "loss": 0.385, "step": 8047 }, { "epoch": 0.8182187881252542, "grad_norm": 0.3709877133369446, "learning_rate": 9.877120501850059e-06, "loss": 0.3971, "step": 8048 }, { "epoch": 0.8183204554697031, "grad_norm": 0.3571978211402893, "learning_rate": 9.87704229551131e-06, "loss": 0.396, "step": 8049 }, { "epoch": 0.818422122814152, "grad_norm": 0.35688474774360657, "learning_rate": 9.876964064603181e-06, "loss": 0.3625, "step": 8050 }, { "epoch": 0.8185237901586011, "grad_norm": 0.3554329574108124, "learning_rate": 9.876885809126067e-06, "loss": 0.3954, "step": 8051 }, { "epoch": 0.81862545750305, "grad_norm": 0.3584110736846924, "learning_rate": 9.876807529080361e-06, "loss": 0.4034, "step": 8052 }, { "epoch": 0.818727124847499, "grad_norm": 0.3010902404785156, "learning_rate": 9.876729224466458e-06, "loss": 0.3833, "step": 8053 }, { "epoch": 0.8188287921919479, "grad_norm": 0.396791011095047, "learning_rate": 9.87665089528475e-06, "loss": 0.3685, "step": 8054 }, { "epoch": 0.8189304595363969, "grad_norm": 0.3622184991836548, "learning_rate": 9.876572541535636e-06, "loss": 0.3867, "step": 8055 }, { "epoch": 0.8190321268808459, "grad_norm": 0.33586594462394714, "learning_rate": 9.876494163219506e-06, "loss": 0.3937, "step": 8056 }, { "epoch": 0.8191337942252949, "grad_norm": 0.3512127101421356, "learning_rate": 9.876415760336759e-06, "loss": 0.3786, "step": 8057 }, { "epoch": 0.8192354615697438, "grad_norm": 0.3433915972709656, "learning_rate": 9.876337332887787e-06, "loss": 0.3736, "step": 8058 }, { "epoch": 0.8193371289141927, "grad_norm": 0.358040988445282, "learning_rate": 9.876258880872988e-06, "loss": 0.3787, "step": 8059 }, { "epoch": 0.8194387962586417, "grad_norm": 0.3325608968734741, "learning_rate": 9.876180404292755e-06, "loss": 0.4322, "step": 8060 }, { "epoch": 0.8195404636030907, "grad_norm": 0.3638547658920288, "learning_rate": 9.876101903147482e-06, "loss": 0.4214, "step": 8061 }, { "epoch": 0.8196421309475397, "grad_norm": 0.3452768623828888, "learning_rate": 9.876023377437569e-06, "loss": 0.3735, "step": 8062 }, { "epoch": 0.8197437982919886, "grad_norm": 0.3263104259967804, "learning_rate": 9.875944827163408e-06, "loss": 0.3886, "step": 8063 }, { "epoch": 0.8198454656364376, "grad_norm": 0.35427337884902954, "learning_rate": 9.875866252325394e-06, "loss": 0.4058, "step": 8064 }, { "epoch": 0.8199471329808865, "grad_norm": 0.35637450218200684, "learning_rate": 9.875787652923927e-06, "loss": 0.422, "step": 8065 }, { "epoch": 0.8200488003253354, "grad_norm": 0.32623931765556335, "learning_rate": 9.8757090289594e-06, "loss": 0.4346, "step": 8066 }, { "epoch": 0.8201504676697845, "grad_norm": 0.3208605945110321, "learning_rate": 9.875630380432208e-06, "loss": 0.3775, "step": 8067 }, { "epoch": 0.8202521350142334, "grad_norm": 0.3355322778224945, "learning_rate": 9.87555170734275e-06, "loss": 0.3754, "step": 8068 }, { "epoch": 0.8203538023586824, "grad_norm": 0.3478057384490967, "learning_rate": 9.87547300969142e-06, "loss": 0.3847, "step": 8069 }, { "epoch": 0.8204554697031313, "grad_norm": 0.3273508548736572, "learning_rate": 9.875394287478617e-06, "loss": 0.3934, "step": 8070 }, { "epoch": 0.8205571370475803, "grad_norm": 0.3554733097553253, "learning_rate": 9.875315540704735e-06, "loss": 0.3995, "step": 8071 }, { "epoch": 0.8206588043920293, "grad_norm": 0.32989323139190674, "learning_rate": 9.875236769370174e-06, "loss": 0.363, "step": 8072 }, { "epoch": 0.8207604717364783, "grad_norm": 0.3106248676776886, "learning_rate": 9.875157973475326e-06, "loss": 0.4049, "step": 8073 }, { "epoch": 0.8208621390809272, "grad_norm": 0.30455270409584045, "learning_rate": 9.875079153020592e-06, "loss": 0.3569, "step": 8074 }, { "epoch": 0.8209638064253761, "grad_norm": 0.3595879375934601, "learning_rate": 9.875000308006366e-06, "loss": 0.3713, "step": 8075 }, { "epoch": 0.8210654737698251, "grad_norm": 0.32825201749801636, "learning_rate": 9.874921438433049e-06, "loss": 0.4354, "step": 8076 }, { "epoch": 0.8211671411142741, "grad_norm": 0.30209219455718994, "learning_rate": 9.874842544301035e-06, "loss": 0.3741, "step": 8077 }, { "epoch": 0.8212688084587231, "grad_norm": 0.3202773630619049, "learning_rate": 9.874763625610723e-06, "loss": 0.3899, "step": 8078 }, { "epoch": 0.821370475803172, "grad_norm": 0.3234156668186188, "learning_rate": 9.874684682362512e-06, "loss": 0.4007, "step": 8079 }, { "epoch": 0.821472143147621, "grad_norm": 0.3119722306728363, "learning_rate": 9.874605714556797e-06, "loss": 0.4002, "step": 8080 }, { "epoch": 0.8215738104920699, "grad_norm": 0.3177829384803772, "learning_rate": 9.874526722193975e-06, "loss": 0.3934, "step": 8081 }, { "epoch": 0.821675477836519, "grad_norm": 0.30031242966651917, "learning_rate": 9.874447705274448e-06, "loss": 0.3922, "step": 8082 }, { "epoch": 0.8217771451809679, "grad_norm": 0.3395388424396515, "learning_rate": 9.87436866379861e-06, "loss": 0.3881, "step": 8083 }, { "epoch": 0.8218788125254168, "grad_norm": 0.3303139805793762, "learning_rate": 9.874289597766863e-06, "loss": 0.3799, "step": 8084 }, { "epoch": 0.8219804798698658, "grad_norm": 0.35217729210853577, "learning_rate": 9.874210507179602e-06, "loss": 0.3896, "step": 8085 }, { "epoch": 0.8220821472143147, "grad_norm": 0.3178246021270752, "learning_rate": 9.874131392037227e-06, "loss": 0.3908, "step": 8086 }, { "epoch": 0.8221838145587638, "grad_norm": 0.3051474988460541, "learning_rate": 9.874052252340135e-06, "loss": 0.4023, "step": 8087 }, { "epoch": 0.8222854819032127, "grad_norm": 0.32666388154029846, "learning_rate": 9.873973088088728e-06, "loss": 0.3715, "step": 8088 }, { "epoch": 0.8223871492476617, "grad_norm": 0.32943645119667053, "learning_rate": 9.873893899283402e-06, "loss": 0.3708, "step": 8089 }, { "epoch": 0.8224888165921106, "grad_norm": 0.3457503020763397, "learning_rate": 9.873814685924557e-06, "loss": 0.3823, "step": 8090 }, { "epoch": 0.8225904839365595, "grad_norm": 0.3706231713294983, "learning_rate": 9.873735448012593e-06, "loss": 0.3829, "step": 8091 }, { "epoch": 0.8226921512810086, "grad_norm": 0.3580299913883209, "learning_rate": 9.873656185547906e-06, "loss": 0.4055, "step": 8092 }, { "epoch": 0.8227938186254575, "grad_norm": 0.330992728471756, "learning_rate": 9.8735768985309e-06, "loss": 0.3892, "step": 8093 }, { "epoch": 0.8228954859699065, "grad_norm": 0.2971092760562897, "learning_rate": 9.873497586961968e-06, "loss": 0.3987, "step": 8094 }, { "epoch": 0.8229971533143554, "grad_norm": 0.33113154768943787, "learning_rate": 9.873418250841516e-06, "loss": 0.3949, "step": 8095 }, { "epoch": 0.8230988206588044, "grad_norm": 0.3553510308265686, "learning_rate": 9.87333889016994e-06, "loss": 0.3796, "step": 8096 }, { "epoch": 0.8232004880032534, "grad_norm": 0.3367886245250702, "learning_rate": 9.873259504947638e-06, "loss": 0.3815, "step": 8097 }, { "epoch": 0.8233021553477023, "grad_norm": 0.34782329201698303, "learning_rate": 9.873180095175017e-06, "loss": 0.3733, "step": 8098 }, { "epoch": 0.8234038226921513, "grad_norm": 0.3505535423755646, "learning_rate": 9.87310066085247e-06, "loss": 0.3994, "step": 8099 }, { "epoch": 0.8235054900366002, "grad_norm": 0.34367835521698, "learning_rate": 9.873021201980401e-06, "loss": 0.4282, "step": 8100 }, { "epoch": 0.8236071573810492, "grad_norm": 0.3297463655471802, "learning_rate": 9.87294171855921e-06, "loss": 0.3933, "step": 8101 }, { "epoch": 0.8237088247254982, "grad_norm": 0.3151320815086365, "learning_rate": 9.872862210589295e-06, "loss": 0.3919, "step": 8102 }, { "epoch": 0.8238104920699472, "grad_norm": 0.32449617981910706, "learning_rate": 9.87278267807106e-06, "loss": 0.3954, "step": 8103 }, { "epoch": 0.8239121594143961, "grad_norm": 0.3774630129337311, "learning_rate": 9.872703121004904e-06, "loss": 0.3904, "step": 8104 }, { "epoch": 0.824013826758845, "grad_norm": 0.30315151810646057, "learning_rate": 9.872623539391225e-06, "loss": 0.3857, "step": 8105 }, { "epoch": 0.824115494103294, "grad_norm": 0.3240470588207245, "learning_rate": 9.872543933230427e-06, "loss": 0.4004, "step": 8106 }, { "epoch": 0.8242171614477429, "grad_norm": 0.35821613669395447, "learning_rate": 9.872464302522913e-06, "loss": 0.366, "step": 8107 }, { "epoch": 0.824318828792192, "grad_norm": 0.3305937349796295, "learning_rate": 9.87238464726908e-06, "loss": 0.3763, "step": 8108 }, { "epoch": 0.8244204961366409, "grad_norm": 0.3213087320327759, "learning_rate": 9.872304967469331e-06, "loss": 0.3812, "step": 8109 }, { "epoch": 0.8245221634810899, "grad_norm": 0.34290575981140137, "learning_rate": 9.872225263124067e-06, "loss": 0.4303, "step": 8110 }, { "epoch": 0.8246238308255388, "grad_norm": 0.3091408610343933, "learning_rate": 9.87214553423369e-06, "loss": 0.3831, "step": 8111 }, { "epoch": 0.8247254981699877, "grad_norm": 0.3493843078613281, "learning_rate": 9.872065780798604e-06, "loss": 0.3642, "step": 8112 }, { "epoch": 0.8248271655144368, "grad_norm": 0.31286755204200745, "learning_rate": 9.871986002819205e-06, "loss": 0.392, "step": 8113 }, { "epoch": 0.8249288328588857, "grad_norm": 0.32655173540115356, "learning_rate": 9.8719062002959e-06, "loss": 0.4097, "step": 8114 }, { "epoch": 0.8250305002033347, "grad_norm": 0.3183063864707947, "learning_rate": 9.87182637322909e-06, "loss": 0.3439, "step": 8115 }, { "epoch": 0.8251321675477836, "grad_norm": 0.3495127260684967, "learning_rate": 9.871746521619174e-06, "loss": 0.428, "step": 8116 }, { "epoch": 0.8252338348922326, "grad_norm": 0.3290683627128601, "learning_rate": 9.87166664546656e-06, "loss": 0.3662, "step": 8117 }, { "epoch": 0.8253355022366816, "grad_norm": 0.36328306794166565, "learning_rate": 9.871586744771644e-06, "loss": 0.4251, "step": 8118 }, { "epoch": 0.8254371695811306, "grad_norm": 0.3337772488594055, "learning_rate": 9.871506819534834e-06, "loss": 0.3599, "step": 8119 }, { "epoch": 0.8255388369255795, "grad_norm": 0.3200501799583435, "learning_rate": 9.871426869756528e-06, "loss": 0.4091, "step": 8120 }, { "epoch": 0.8256405042700284, "grad_norm": 0.36259347200393677, "learning_rate": 9.871346895437134e-06, "loss": 0.3974, "step": 8121 }, { "epoch": 0.8257421716144774, "grad_norm": 0.3492749333381653, "learning_rate": 9.87126689657705e-06, "loss": 0.3788, "step": 8122 }, { "epoch": 0.8258438389589264, "grad_norm": 0.34095892310142517, "learning_rate": 9.871186873176682e-06, "loss": 0.3692, "step": 8123 }, { "epoch": 0.8259455063033754, "grad_norm": 0.33747589588165283, "learning_rate": 9.87110682523643e-06, "loss": 0.3966, "step": 8124 }, { "epoch": 0.8260471736478243, "grad_norm": 0.3228128254413605, "learning_rate": 9.871026752756702e-06, "loss": 0.3781, "step": 8125 }, { "epoch": 0.8261488409922733, "grad_norm": 0.3675323724746704, "learning_rate": 9.870946655737896e-06, "loss": 0.4179, "step": 8126 }, { "epoch": 0.8262505083367222, "grad_norm": 0.3198178708553314, "learning_rate": 9.870866534180421e-06, "loss": 0.3934, "step": 8127 }, { "epoch": 0.8263521756811713, "grad_norm": 0.3181969225406647, "learning_rate": 9.870786388084676e-06, "loss": 0.3796, "step": 8128 }, { "epoch": 0.8264538430256202, "grad_norm": 0.34423041343688965, "learning_rate": 9.870706217451069e-06, "loss": 0.3672, "step": 8129 }, { "epoch": 0.8265555103700691, "grad_norm": 0.32134684920310974, "learning_rate": 9.870626022279998e-06, "loss": 0.4212, "step": 8130 }, { "epoch": 0.8266571777145181, "grad_norm": 0.30498969554901123, "learning_rate": 9.870545802571874e-06, "loss": 0.3664, "step": 8131 }, { "epoch": 0.826758845058967, "grad_norm": 0.3257138133049011, "learning_rate": 9.870465558327096e-06, "loss": 0.3998, "step": 8132 }, { "epoch": 0.8268605124034161, "grad_norm": 0.3505941927433014, "learning_rate": 9.87038528954607e-06, "loss": 0.3896, "step": 8133 }, { "epoch": 0.826962179747865, "grad_norm": 0.3477044105529785, "learning_rate": 9.8703049962292e-06, "loss": 0.3967, "step": 8134 }, { "epoch": 0.827063847092314, "grad_norm": 0.36484989523887634, "learning_rate": 9.870224678376891e-06, "loss": 0.3809, "step": 8135 }, { "epoch": 0.8271655144367629, "grad_norm": 0.35272157192230225, "learning_rate": 9.870144335989546e-06, "loss": 0.377, "step": 8136 }, { "epoch": 0.8272671817812118, "grad_norm": 0.3453657925128937, "learning_rate": 9.870063969067574e-06, "loss": 0.3729, "step": 8137 }, { "epoch": 0.8273688491256609, "grad_norm": 0.3689870834350586, "learning_rate": 9.869983577611374e-06, "loss": 0.3796, "step": 8138 }, { "epoch": 0.8274705164701098, "grad_norm": 0.33818235993385315, "learning_rate": 9.869903161621357e-06, "loss": 0.3843, "step": 8139 }, { "epoch": 0.8275721838145588, "grad_norm": 0.3253532648086548, "learning_rate": 9.869822721097923e-06, "loss": 0.4, "step": 8140 }, { "epoch": 0.8276738511590077, "grad_norm": 0.34603455662727356, "learning_rate": 9.869742256041481e-06, "loss": 0.3567, "step": 8141 }, { "epoch": 0.8277755185034567, "grad_norm": 0.354414165019989, "learning_rate": 9.869661766452434e-06, "loss": 0.4062, "step": 8142 }, { "epoch": 0.8278771858479057, "grad_norm": 0.32778120040893555, "learning_rate": 9.869581252331189e-06, "loss": 0.3923, "step": 8143 }, { "epoch": 0.8279788531923546, "grad_norm": 0.3458411395549774, "learning_rate": 9.86950071367815e-06, "loss": 0.399, "step": 8144 }, { "epoch": 0.8280805205368036, "grad_norm": 0.33050715923309326, "learning_rate": 9.869420150493723e-06, "loss": 0.3893, "step": 8145 }, { "epoch": 0.8281821878812525, "grad_norm": 0.333810418844223, "learning_rate": 9.869339562778315e-06, "loss": 0.3866, "step": 8146 }, { "epoch": 0.8282838552257015, "grad_norm": 0.3152066767215729, "learning_rate": 9.869258950532332e-06, "loss": 0.3738, "step": 8147 }, { "epoch": 0.8283855225701505, "grad_norm": 0.35882654786109924, "learning_rate": 9.869178313756178e-06, "loss": 0.3678, "step": 8148 }, { "epoch": 0.8284871899145995, "grad_norm": 0.34459349513053894, "learning_rate": 9.869097652450262e-06, "loss": 0.4056, "step": 8149 }, { "epoch": 0.8285888572590484, "grad_norm": 0.34798336029052734, "learning_rate": 9.869016966614989e-06, "loss": 0.3802, "step": 8150 }, { "epoch": 0.8286905246034973, "grad_norm": 0.3513503670692444, "learning_rate": 9.868936256250765e-06, "loss": 0.3841, "step": 8151 }, { "epoch": 0.8287921919479463, "grad_norm": 0.33140042424201965, "learning_rate": 9.868855521357998e-06, "loss": 0.3507, "step": 8152 }, { "epoch": 0.8288938592923952, "grad_norm": 0.29935991764068604, "learning_rate": 9.868774761937095e-06, "loss": 0.3878, "step": 8153 }, { "epoch": 0.8289955266368443, "grad_norm": 0.3486490547657013, "learning_rate": 9.86869397798846e-06, "loss": 0.3706, "step": 8154 }, { "epoch": 0.8290971939812932, "grad_norm": 0.34087231755256653, "learning_rate": 9.868613169512502e-06, "loss": 0.3499, "step": 8155 }, { "epoch": 0.8291988613257422, "grad_norm": 0.3185849189758301, "learning_rate": 9.868532336509627e-06, "loss": 0.3614, "step": 8156 }, { "epoch": 0.8293005286701911, "grad_norm": 0.312938928604126, "learning_rate": 9.868451478980243e-06, "loss": 0.3754, "step": 8157 }, { "epoch": 0.82940219601464, "grad_norm": 0.3738931715488434, "learning_rate": 9.868370596924758e-06, "loss": 0.4163, "step": 8158 }, { "epoch": 0.8295038633590891, "grad_norm": 0.3189704716205597, "learning_rate": 9.86828969034358e-06, "loss": 0.3957, "step": 8159 }, { "epoch": 0.829605530703538, "grad_norm": 0.3472193777561188, "learning_rate": 9.868208759237115e-06, "loss": 0.4079, "step": 8160 }, { "epoch": 0.829707198047987, "grad_norm": 0.3311985731124878, "learning_rate": 9.86812780360577e-06, "loss": 0.379, "step": 8161 }, { "epoch": 0.8298088653924359, "grad_norm": 0.31078192591667175, "learning_rate": 9.868046823449954e-06, "loss": 0.417, "step": 8162 }, { "epoch": 0.8299105327368849, "grad_norm": 0.3211420774459839, "learning_rate": 9.867965818770075e-06, "loss": 0.3925, "step": 8163 }, { "epoch": 0.8300122000813339, "grad_norm": 0.3511503040790558, "learning_rate": 9.867884789566541e-06, "loss": 0.3819, "step": 8164 }, { "epoch": 0.8301138674257829, "grad_norm": 0.32725974917411804, "learning_rate": 9.867803735839763e-06, "loss": 0.3671, "step": 8165 }, { "epoch": 0.8302155347702318, "grad_norm": 0.34646478295326233, "learning_rate": 9.867722657590145e-06, "loss": 0.3919, "step": 8166 }, { "epoch": 0.8303172021146807, "grad_norm": 0.34248340129852295, "learning_rate": 9.867641554818094e-06, "loss": 0.4164, "step": 8167 }, { "epoch": 0.8304188694591297, "grad_norm": 0.3279661238193512, "learning_rate": 9.867560427524026e-06, "loss": 0.345, "step": 8168 }, { "epoch": 0.8305205368035787, "grad_norm": 0.3246005177497864, "learning_rate": 9.867479275708343e-06, "loss": 0.4043, "step": 8169 }, { "epoch": 0.8306222041480277, "grad_norm": 0.30497369170188904, "learning_rate": 9.867398099371455e-06, "loss": 0.3642, "step": 8170 }, { "epoch": 0.8307238714924766, "grad_norm": 0.32246094942092896, "learning_rate": 9.867316898513773e-06, "loss": 0.3778, "step": 8171 }, { "epoch": 0.8308255388369256, "grad_norm": 0.3249947428703308, "learning_rate": 9.867235673135706e-06, "loss": 0.3997, "step": 8172 }, { "epoch": 0.8309272061813745, "grad_norm": 0.31444019079208374, "learning_rate": 9.867154423237661e-06, "loss": 0.4022, "step": 8173 }, { "epoch": 0.8310288735258236, "grad_norm": 0.32186323404312134, "learning_rate": 9.86707314882005e-06, "loss": 0.3867, "step": 8174 }, { "epoch": 0.8311305408702725, "grad_norm": 0.33350324630737305, "learning_rate": 9.86699184988328e-06, "loss": 0.4031, "step": 8175 }, { "epoch": 0.8312322082147214, "grad_norm": 0.3067808449268341, "learning_rate": 9.866910526427762e-06, "loss": 0.3742, "step": 8176 }, { "epoch": 0.8313338755591704, "grad_norm": 0.33902814984321594, "learning_rate": 9.866829178453905e-06, "loss": 0.3884, "step": 8177 }, { "epoch": 0.8314355429036193, "grad_norm": 0.31090492010116577, "learning_rate": 9.86674780596212e-06, "loss": 0.3943, "step": 8178 }, { "epoch": 0.8315372102480684, "grad_norm": 0.339962363243103, "learning_rate": 9.866666408952815e-06, "loss": 0.427, "step": 8179 }, { "epoch": 0.8316388775925173, "grad_norm": 0.3497622311115265, "learning_rate": 9.866584987426403e-06, "loss": 0.3938, "step": 8180 }, { "epoch": 0.8317405449369663, "grad_norm": 0.31669145822525024, "learning_rate": 9.86650354138329e-06, "loss": 0.4125, "step": 8181 }, { "epoch": 0.8318422122814152, "grad_norm": 0.35922154784202576, "learning_rate": 9.866422070823889e-06, "loss": 0.3639, "step": 8182 }, { "epoch": 0.8319438796258641, "grad_norm": 0.3242591917514801, "learning_rate": 9.866340575748611e-06, "loss": 0.3699, "step": 8183 }, { "epoch": 0.8320455469703132, "grad_norm": 0.3228142559528351, "learning_rate": 9.866259056157865e-06, "loss": 0.4225, "step": 8184 }, { "epoch": 0.8321472143147621, "grad_norm": 0.367963969707489, "learning_rate": 9.866177512052063e-06, "loss": 0.4079, "step": 8185 }, { "epoch": 0.8322488816592111, "grad_norm": 0.34203407168388367, "learning_rate": 9.866095943431615e-06, "loss": 0.409, "step": 8186 }, { "epoch": 0.83235054900366, "grad_norm": 0.33986297249794006, "learning_rate": 9.866014350296931e-06, "loss": 0.4193, "step": 8187 }, { "epoch": 0.832452216348109, "grad_norm": 0.35999059677124023, "learning_rate": 9.865932732648424e-06, "loss": 0.3728, "step": 8188 }, { "epoch": 0.832553883692558, "grad_norm": 0.35721951723098755, "learning_rate": 9.865851090486504e-06, "loss": 0.3875, "step": 8189 }, { "epoch": 0.832655551037007, "grad_norm": 0.32323983311653137, "learning_rate": 9.865769423811581e-06, "loss": 0.372, "step": 8190 }, { "epoch": 0.8327572183814559, "grad_norm": 0.41947081685066223, "learning_rate": 9.86568773262407e-06, "loss": 0.3794, "step": 8191 }, { "epoch": 0.8328588857259048, "grad_norm": 0.3556876480579376, "learning_rate": 9.865606016924381e-06, "loss": 0.4213, "step": 8192 }, { "epoch": 0.8329605530703538, "grad_norm": 0.33969318866729736, "learning_rate": 9.865524276712924e-06, "loss": 0.4138, "step": 8193 }, { "epoch": 0.8330622204148027, "grad_norm": 0.35254257917404175, "learning_rate": 9.86544251199011e-06, "loss": 0.4002, "step": 8194 }, { "epoch": 0.8331638877592518, "grad_norm": 0.3907731771469116, "learning_rate": 9.865360722756356e-06, "loss": 0.4106, "step": 8195 }, { "epoch": 0.8332655551037007, "grad_norm": 0.3780561685562134, "learning_rate": 9.86527890901207e-06, "loss": 0.3721, "step": 8196 }, { "epoch": 0.8333672224481496, "grad_norm": 0.34549543261528015, "learning_rate": 9.865197070757663e-06, "loss": 0.3891, "step": 8197 }, { "epoch": 0.8334688897925986, "grad_norm": 0.3534890413284302, "learning_rate": 9.865115207993552e-06, "loss": 0.4246, "step": 8198 }, { "epoch": 0.8335705571370475, "grad_norm": 0.3461741805076599, "learning_rate": 9.865033320720145e-06, "loss": 0.3861, "step": 8199 }, { "epoch": 0.8336722244814966, "grad_norm": 0.38541263341903687, "learning_rate": 9.864951408937858e-06, "loss": 0.4323, "step": 8200 }, { "epoch": 0.8337738918259455, "grad_norm": 0.3427666127681732, "learning_rate": 9.8648694726471e-06, "loss": 0.3892, "step": 8201 }, { "epoch": 0.8338755591703945, "grad_norm": 0.3429504930973053, "learning_rate": 9.864787511848287e-06, "loss": 0.3927, "step": 8202 }, { "epoch": 0.8339772265148434, "grad_norm": 0.3511442244052887, "learning_rate": 9.864705526541829e-06, "loss": 0.3788, "step": 8203 }, { "epoch": 0.8340788938592923, "grad_norm": 0.3743118941783905, "learning_rate": 9.864623516728141e-06, "loss": 0.3728, "step": 8204 }, { "epoch": 0.8341805612037414, "grad_norm": 0.3774007558822632, "learning_rate": 9.864541482407638e-06, "loss": 0.4243, "step": 8205 }, { "epoch": 0.8342822285481903, "grad_norm": 0.4113233983516693, "learning_rate": 9.864459423580728e-06, "loss": 0.4062, "step": 8206 }, { "epoch": 0.8343838958926393, "grad_norm": 0.3384026288986206, "learning_rate": 9.864377340247828e-06, "loss": 0.4171, "step": 8207 }, { "epoch": 0.8344855632370882, "grad_norm": 0.356950044631958, "learning_rate": 9.864295232409352e-06, "loss": 0.4215, "step": 8208 }, { "epoch": 0.8345872305815372, "grad_norm": 0.359702467918396, "learning_rate": 9.864213100065711e-06, "loss": 0.3538, "step": 8209 }, { "epoch": 0.8346888979259862, "grad_norm": 0.35723719000816345, "learning_rate": 9.864130943217323e-06, "loss": 0.4293, "step": 8210 }, { "epoch": 0.8347905652704352, "grad_norm": 0.3472226560115814, "learning_rate": 9.864048761864598e-06, "loss": 0.422, "step": 8211 }, { "epoch": 0.8348922326148841, "grad_norm": 0.32218697667121887, "learning_rate": 9.86396655600795e-06, "loss": 0.3653, "step": 8212 }, { "epoch": 0.834993899959333, "grad_norm": 0.34738874435424805, "learning_rate": 9.863884325647795e-06, "loss": 0.3845, "step": 8213 }, { "epoch": 0.835095567303782, "grad_norm": 0.30686473846435547, "learning_rate": 9.863802070784547e-06, "loss": 0.3618, "step": 8214 }, { "epoch": 0.835197234648231, "grad_norm": 0.30201125144958496, "learning_rate": 9.86371979141862e-06, "loss": 0.3861, "step": 8215 }, { "epoch": 0.83529890199268, "grad_norm": 0.34481948614120483, "learning_rate": 9.863637487550427e-06, "loss": 0.3955, "step": 8216 }, { "epoch": 0.8354005693371289, "grad_norm": 0.3245750069618225, "learning_rate": 9.863555159180386e-06, "loss": 0.3966, "step": 8217 }, { "epoch": 0.8355022366815779, "grad_norm": 0.3149603009223938, "learning_rate": 9.863472806308909e-06, "loss": 0.4399, "step": 8218 }, { "epoch": 0.8356039040260268, "grad_norm": 0.3341767489910126, "learning_rate": 9.863390428936413e-06, "loss": 0.411, "step": 8219 }, { "epoch": 0.8357055713704759, "grad_norm": 0.35796916484832764, "learning_rate": 9.86330802706331e-06, "loss": 0.4139, "step": 8220 }, { "epoch": 0.8358072387149248, "grad_norm": 0.31825804710388184, "learning_rate": 9.863225600690019e-06, "loss": 0.3686, "step": 8221 }, { "epoch": 0.8359089060593737, "grad_norm": 0.35024142265319824, "learning_rate": 9.863143149816952e-06, "loss": 0.4, "step": 8222 }, { "epoch": 0.8360105734038227, "grad_norm": 0.3646654486656189, "learning_rate": 9.863060674444525e-06, "loss": 0.3886, "step": 8223 }, { "epoch": 0.8361122407482716, "grad_norm": 0.3833068907260895, "learning_rate": 9.862978174573153e-06, "loss": 0.3749, "step": 8224 }, { "epoch": 0.8362139080927207, "grad_norm": 0.3599184453487396, "learning_rate": 9.862895650203254e-06, "loss": 0.3893, "step": 8225 }, { "epoch": 0.8363155754371696, "grad_norm": 0.31452620029449463, "learning_rate": 9.862813101335243e-06, "loss": 0.4234, "step": 8226 }, { "epoch": 0.8364172427816186, "grad_norm": 0.3426227867603302, "learning_rate": 9.862730527969532e-06, "loss": 0.3941, "step": 8227 }, { "epoch": 0.8365189101260675, "grad_norm": 0.33869674801826477, "learning_rate": 9.862647930106543e-06, "loss": 0.3908, "step": 8228 }, { "epoch": 0.8366205774705164, "grad_norm": 0.31200647354125977, "learning_rate": 9.862565307746689e-06, "loss": 0.3769, "step": 8229 }, { "epoch": 0.8367222448149655, "grad_norm": 0.34871217608451843, "learning_rate": 9.862482660890386e-06, "loss": 0.4009, "step": 8230 }, { "epoch": 0.8368239121594144, "grad_norm": 0.31409355998039246, "learning_rate": 9.862399989538051e-06, "loss": 0.3593, "step": 8231 }, { "epoch": 0.8369255795038634, "grad_norm": 0.312389999628067, "learning_rate": 9.862317293690101e-06, "loss": 0.3813, "step": 8232 }, { "epoch": 0.8370272468483123, "grad_norm": 0.33539295196533203, "learning_rate": 9.86223457334695e-06, "loss": 0.3808, "step": 8233 }, { "epoch": 0.8371289141927613, "grad_norm": 0.3693644404411316, "learning_rate": 9.862151828509019e-06, "loss": 0.4076, "step": 8234 }, { "epoch": 0.8372305815372102, "grad_norm": 0.33392131328582764, "learning_rate": 9.862069059176722e-06, "loss": 0.4082, "step": 8235 }, { "epoch": 0.8373322488816592, "grad_norm": 0.3554646670818329, "learning_rate": 9.861986265350475e-06, "loss": 0.3864, "step": 8236 }, { "epoch": 0.8374339162261082, "grad_norm": 0.36607757210731506, "learning_rate": 9.861903447030699e-06, "loss": 0.4131, "step": 8237 }, { "epoch": 0.8375355835705571, "grad_norm": 0.345145583152771, "learning_rate": 9.861820604217806e-06, "loss": 0.3678, "step": 8238 }, { "epoch": 0.8376372509150061, "grad_norm": 0.3329654633998871, "learning_rate": 9.861737736912219e-06, "loss": 0.3881, "step": 8239 }, { "epoch": 0.837738918259455, "grad_norm": 0.36213165521621704, "learning_rate": 9.86165484511435e-06, "loss": 0.3771, "step": 8240 }, { "epoch": 0.8378405856039041, "grad_norm": 0.3359174430370331, "learning_rate": 9.86157192882462e-06, "loss": 0.3674, "step": 8241 }, { "epoch": 0.837942252948353, "grad_norm": 0.336335152387619, "learning_rate": 9.861488988043446e-06, "loss": 0.4122, "step": 8242 }, { "epoch": 0.838043920292802, "grad_norm": 0.3273993134498596, "learning_rate": 9.861406022771246e-06, "loss": 0.3912, "step": 8243 }, { "epoch": 0.8381455876372509, "grad_norm": 0.32019537687301636, "learning_rate": 9.861323033008438e-06, "loss": 0.384, "step": 8244 }, { "epoch": 0.8382472549816998, "grad_norm": 0.3169011175632477, "learning_rate": 9.86124001875544e-06, "loss": 0.3619, "step": 8245 }, { "epoch": 0.8383489223261489, "grad_norm": 0.30624037981033325, "learning_rate": 9.861156980012669e-06, "loss": 0.367, "step": 8246 }, { "epoch": 0.8384505896705978, "grad_norm": 0.3339965343475342, "learning_rate": 9.861073916780544e-06, "loss": 0.3766, "step": 8247 }, { "epoch": 0.8385522570150468, "grad_norm": 0.3313756585121155, "learning_rate": 9.860990829059484e-06, "loss": 0.3997, "step": 8248 }, { "epoch": 0.8386539243594957, "grad_norm": 0.3454819321632385, "learning_rate": 9.860907716849907e-06, "loss": 0.375, "step": 8249 }, { "epoch": 0.8387555917039446, "grad_norm": 0.3728641867637634, "learning_rate": 9.860824580152235e-06, "loss": 0.3818, "step": 8250 }, { "epoch": 0.8388572590483937, "grad_norm": 0.3584546744823456, "learning_rate": 9.860741418966881e-06, "loss": 0.4158, "step": 8251 }, { "epoch": 0.8389589263928426, "grad_norm": 0.32301825284957886, "learning_rate": 9.860658233294268e-06, "loss": 0.3887, "step": 8252 }, { "epoch": 0.8390605937372916, "grad_norm": 0.3345527946949005, "learning_rate": 9.860575023134813e-06, "loss": 0.3996, "step": 8253 }, { "epoch": 0.8391622610817405, "grad_norm": 0.3473026752471924, "learning_rate": 9.860491788488937e-06, "loss": 0.3707, "step": 8254 }, { "epoch": 0.8392639284261895, "grad_norm": 0.374128133058548, "learning_rate": 9.860408529357057e-06, "loss": 0.3989, "step": 8255 }, { "epoch": 0.8393655957706385, "grad_norm": 0.3264268934726715, "learning_rate": 9.860325245739596e-06, "loss": 0.3717, "step": 8256 }, { "epoch": 0.8394672631150875, "grad_norm": 0.32389163970947266, "learning_rate": 9.86024193763697e-06, "loss": 0.3614, "step": 8257 }, { "epoch": 0.8395689304595364, "grad_norm": 0.3265949487686157, "learning_rate": 9.8601586050496e-06, "loss": 0.3706, "step": 8258 }, { "epoch": 0.8396705978039853, "grad_norm": 0.35182181000709534, "learning_rate": 9.860075247977907e-06, "loss": 0.41, "step": 8259 }, { "epoch": 0.8397722651484343, "grad_norm": 0.3081290125846863, "learning_rate": 9.859991866422308e-06, "loss": 0.3746, "step": 8260 }, { "epoch": 0.8398739324928833, "grad_norm": 0.31124913692474365, "learning_rate": 9.859908460383226e-06, "loss": 0.3839, "step": 8261 }, { "epoch": 0.8399755998373323, "grad_norm": 0.3598564565181732, "learning_rate": 9.859825029861081e-06, "loss": 0.405, "step": 8262 }, { "epoch": 0.8400772671817812, "grad_norm": 0.35098645091056824, "learning_rate": 9.859741574856291e-06, "loss": 0.3661, "step": 8263 }, { "epoch": 0.8401789345262302, "grad_norm": 0.3170781433582306, "learning_rate": 9.859658095369278e-06, "loss": 0.4065, "step": 8264 }, { "epoch": 0.8402806018706791, "grad_norm": 0.3415317237377167, "learning_rate": 9.859574591400462e-06, "loss": 0.4, "step": 8265 }, { "epoch": 0.8403822692151282, "grad_norm": 0.3498072326183319, "learning_rate": 9.859491062950265e-06, "loss": 0.4224, "step": 8266 }, { "epoch": 0.8404839365595771, "grad_norm": 0.36198583245277405, "learning_rate": 9.859407510019105e-06, "loss": 0.399, "step": 8267 }, { "epoch": 0.840585603904026, "grad_norm": 0.3747679591178894, "learning_rate": 9.859323932607407e-06, "loss": 0.4358, "step": 8268 }, { "epoch": 0.840687271248475, "grad_norm": 0.32487189769744873, "learning_rate": 9.85924033071559e-06, "loss": 0.3777, "step": 8269 }, { "epoch": 0.8407889385929239, "grad_norm": 0.3648340702056885, "learning_rate": 9.859156704344072e-06, "loss": 0.3978, "step": 8270 }, { "epoch": 0.840890605937373, "grad_norm": 0.3222644031047821, "learning_rate": 9.85907305349328e-06, "loss": 0.3782, "step": 8271 }, { "epoch": 0.8409922732818219, "grad_norm": 0.3310381770133972, "learning_rate": 9.858989378163631e-06, "loss": 0.3768, "step": 8272 }, { "epoch": 0.8410939406262709, "grad_norm": 0.3413781225681305, "learning_rate": 9.85890567835555e-06, "loss": 0.3818, "step": 8273 }, { "epoch": 0.8411956079707198, "grad_norm": 0.3659532368183136, "learning_rate": 9.858821954069455e-06, "loss": 0.4024, "step": 8274 }, { "epoch": 0.8412972753151687, "grad_norm": 0.3581346571445465, "learning_rate": 9.858738205305772e-06, "loss": 0.342, "step": 8275 }, { "epoch": 0.8413989426596177, "grad_norm": 0.332618772983551, "learning_rate": 9.858654432064918e-06, "loss": 0.3664, "step": 8276 }, { "epoch": 0.8415006100040667, "grad_norm": 0.35216301679611206, "learning_rate": 9.858570634347318e-06, "loss": 0.4105, "step": 8277 }, { "epoch": 0.8416022773485157, "grad_norm": 0.355831116437912, "learning_rate": 9.858486812153393e-06, "loss": 0.398, "step": 8278 }, { "epoch": 0.8417039446929646, "grad_norm": 0.384353905916214, "learning_rate": 9.858402965483569e-06, "loss": 0.4123, "step": 8279 }, { "epoch": 0.8418056120374136, "grad_norm": 0.33993804454803467, "learning_rate": 9.858319094338263e-06, "loss": 0.3696, "step": 8280 }, { "epoch": 0.8419072793818625, "grad_norm": 0.3320707380771637, "learning_rate": 9.858235198717901e-06, "loss": 0.388, "step": 8281 }, { "epoch": 0.8420089467263115, "grad_norm": 0.33788618445396423, "learning_rate": 9.858151278622903e-06, "loss": 0.4101, "step": 8282 }, { "epoch": 0.8421106140707605, "grad_norm": 0.3607540428638458, "learning_rate": 9.858067334053695e-06, "loss": 0.3879, "step": 8283 }, { "epoch": 0.8422122814152094, "grad_norm": 0.3724248707294464, "learning_rate": 9.857983365010696e-06, "loss": 0.3978, "step": 8284 }, { "epoch": 0.8423139487596584, "grad_norm": 0.3237457871437073, "learning_rate": 9.857899371494336e-06, "loss": 0.3864, "step": 8285 }, { "epoch": 0.8424156161041073, "grad_norm": 0.3323773145675659, "learning_rate": 9.85781535350503e-06, "loss": 0.3765, "step": 8286 }, { "epoch": 0.8425172834485564, "grad_norm": 0.3711622953414917, "learning_rate": 9.857731311043204e-06, "loss": 0.3831, "step": 8287 }, { "epoch": 0.8426189507930053, "grad_norm": 0.300533652305603, "learning_rate": 9.857647244109283e-06, "loss": 0.3959, "step": 8288 }, { "epoch": 0.8427206181374542, "grad_norm": 0.31539517641067505, "learning_rate": 9.857563152703688e-06, "loss": 0.3844, "step": 8289 }, { "epoch": 0.8428222854819032, "grad_norm": 0.319237619638443, "learning_rate": 9.857479036826847e-06, "loss": 0.3886, "step": 8290 }, { "epoch": 0.8429239528263521, "grad_norm": 0.31181764602661133, "learning_rate": 9.857394896479178e-06, "loss": 0.4046, "step": 8291 }, { "epoch": 0.8430256201708012, "grad_norm": 0.31029748916625977, "learning_rate": 9.857310731661109e-06, "loss": 0.3819, "step": 8292 }, { "epoch": 0.8431272875152501, "grad_norm": 0.3287447690963745, "learning_rate": 9.857226542373064e-06, "loss": 0.3878, "step": 8293 }, { "epoch": 0.8432289548596991, "grad_norm": 0.3632493317127228, "learning_rate": 9.857142328615465e-06, "loss": 0.3806, "step": 8294 }, { "epoch": 0.843330622204148, "grad_norm": 0.3419611155986786, "learning_rate": 9.857058090388738e-06, "loss": 0.3558, "step": 8295 }, { "epoch": 0.843432289548597, "grad_norm": 0.28560274839401245, "learning_rate": 9.856973827693306e-06, "loss": 0.3722, "step": 8296 }, { "epoch": 0.843533956893046, "grad_norm": 0.4672812521457672, "learning_rate": 9.856889540529593e-06, "loss": 0.4175, "step": 8297 }, { "epoch": 0.8436356242374949, "grad_norm": 0.31064093112945557, "learning_rate": 9.856805228898025e-06, "loss": 0.3985, "step": 8298 }, { "epoch": 0.8437372915819439, "grad_norm": 0.3463605046272278, "learning_rate": 9.856720892799028e-06, "loss": 0.4153, "step": 8299 }, { "epoch": 0.8438389589263928, "grad_norm": 0.33191758394241333, "learning_rate": 9.856636532233024e-06, "loss": 0.3756, "step": 8300 }, { "epoch": 0.8439406262708418, "grad_norm": 0.32913652062416077, "learning_rate": 9.85655214720044e-06, "loss": 0.3813, "step": 8301 }, { "epoch": 0.8440422936152908, "grad_norm": 0.3341576159000397, "learning_rate": 9.8564677377017e-06, "loss": 0.3937, "step": 8302 }, { "epoch": 0.8441439609597398, "grad_norm": 0.31882330775260925, "learning_rate": 9.856383303737229e-06, "loss": 0.3808, "step": 8303 }, { "epoch": 0.8442456283041887, "grad_norm": 0.30827680230140686, "learning_rate": 9.856298845307453e-06, "loss": 0.415, "step": 8304 }, { "epoch": 0.8443472956486376, "grad_norm": 0.3127191364765167, "learning_rate": 9.856214362412798e-06, "loss": 0.4065, "step": 8305 }, { "epoch": 0.8444489629930866, "grad_norm": 0.3251956105232239, "learning_rate": 9.856129855053688e-06, "loss": 0.3999, "step": 8306 }, { "epoch": 0.8445506303375356, "grad_norm": 0.30189359188079834, "learning_rate": 9.856045323230553e-06, "loss": 0.4007, "step": 8307 }, { "epoch": 0.8446522976819846, "grad_norm": 0.31956663727760315, "learning_rate": 9.855960766943813e-06, "loss": 0.4203, "step": 8308 }, { "epoch": 0.8447539650264335, "grad_norm": 0.33117419481277466, "learning_rate": 9.855876186193896e-06, "loss": 0.3721, "step": 8309 }, { "epoch": 0.8448556323708825, "grad_norm": 0.3425159156322479, "learning_rate": 9.85579158098123e-06, "loss": 0.3992, "step": 8310 }, { "epoch": 0.8449572997153314, "grad_norm": 0.30978554487228394, "learning_rate": 9.85570695130624e-06, "loss": 0.3758, "step": 8311 }, { "epoch": 0.8450589670597805, "grad_norm": 0.3166256248950958, "learning_rate": 9.855622297169352e-06, "loss": 0.4346, "step": 8312 }, { "epoch": 0.8451606344042294, "grad_norm": 0.3306095004081726, "learning_rate": 9.855537618570992e-06, "loss": 0.3873, "step": 8313 }, { "epoch": 0.8452623017486783, "grad_norm": 0.33765026926994324, "learning_rate": 9.855452915511588e-06, "loss": 0.4113, "step": 8314 }, { "epoch": 0.8453639690931273, "grad_norm": 0.32496771216392517, "learning_rate": 9.855368187991566e-06, "loss": 0.4162, "step": 8315 }, { "epoch": 0.8454656364375762, "grad_norm": 0.34427013993263245, "learning_rate": 9.855283436011354e-06, "loss": 0.4276, "step": 8316 }, { "epoch": 0.8455673037820252, "grad_norm": 0.33860597014427185, "learning_rate": 9.855198659571376e-06, "loss": 0.3656, "step": 8317 }, { "epoch": 0.8456689711264742, "grad_norm": 0.3130514621734619, "learning_rate": 9.855113858672061e-06, "loss": 0.391, "step": 8318 }, { "epoch": 0.8457706384709232, "grad_norm": 0.3450978398323059, "learning_rate": 9.855029033313837e-06, "loss": 0.3623, "step": 8319 }, { "epoch": 0.8458723058153721, "grad_norm": 0.3159649074077606, "learning_rate": 9.85494418349713e-06, "loss": 0.3715, "step": 8320 }, { "epoch": 0.845973973159821, "grad_norm": 0.3201683461666107, "learning_rate": 9.854859309222368e-06, "loss": 0.357, "step": 8321 }, { "epoch": 0.84607564050427, "grad_norm": 0.33239132165908813, "learning_rate": 9.854774410489979e-06, "loss": 0.3813, "step": 8322 }, { "epoch": 0.846177307848719, "grad_norm": 0.32950738072395325, "learning_rate": 9.854689487300391e-06, "loss": 0.3869, "step": 8323 }, { "epoch": 0.846278975193168, "grad_norm": 0.30927956104278564, "learning_rate": 9.85460453965403e-06, "loss": 0.3718, "step": 8324 }, { "epoch": 0.8463806425376169, "grad_norm": 0.3199562430381775, "learning_rate": 9.854519567551323e-06, "loss": 0.3906, "step": 8325 }, { "epoch": 0.8464823098820659, "grad_norm": 0.36399605870246887, "learning_rate": 9.854434570992702e-06, "loss": 0.4014, "step": 8326 }, { "epoch": 0.8465839772265148, "grad_norm": 0.3356614410877228, "learning_rate": 9.854349549978593e-06, "loss": 0.3975, "step": 8327 }, { "epoch": 0.8466856445709638, "grad_norm": 0.31902429461479187, "learning_rate": 9.854264504509425e-06, "loss": 0.3687, "step": 8328 }, { "epoch": 0.8467873119154128, "grad_norm": 0.3754357099533081, "learning_rate": 9.854179434585626e-06, "loss": 0.3666, "step": 8329 }, { "epoch": 0.8468889792598617, "grad_norm": 0.30798688530921936, "learning_rate": 9.854094340207623e-06, "loss": 0.3839, "step": 8330 }, { "epoch": 0.8469906466043107, "grad_norm": 0.3054943382740021, "learning_rate": 9.85400922137585e-06, "loss": 0.4221, "step": 8331 }, { "epoch": 0.8470923139487596, "grad_norm": 0.3635063171386719, "learning_rate": 9.853924078090727e-06, "loss": 0.3657, "step": 8332 }, { "epoch": 0.8471939812932087, "grad_norm": 0.3899398446083069, "learning_rate": 9.853838910352691e-06, "loss": 0.4025, "step": 8333 }, { "epoch": 0.8472956486376576, "grad_norm": 0.32607731223106384, "learning_rate": 9.853753718162167e-06, "loss": 0.3848, "step": 8334 }, { "epoch": 0.8473973159821065, "grad_norm": 0.3439719080924988, "learning_rate": 9.853668501519586e-06, "loss": 0.3923, "step": 8335 }, { "epoch": 0.8474989833265555, "grad_norm": 0.3554455041885376, "learning_rate": 9.853583260425377e-06, "loss": 0.3596, "step": 8336 }, { "epoch": 0.8476006506710044, "grad_norm": 0.36344611644744873, "learning_rate": 9.853497994879966e-06, "loss": 0.3963, "step": 8337 }, { "epoch": 0.8477023180154535, "grad_norm": 0.33121049404144287, "learning_rate": 9.853412704883789e-06, "loss": 0.3815, "step": 8338 }, { "epoch": 0.8478039853599024, "grad_norm": 0.3767484724521637, "learning_rate": 9.85332739043727e-06, "loss": 0.4246, "step": 8339 }, { "epoch": 0.8479056527043514, "grad_norm": 0.3383992612361908, "learning_rate": 9.85324205154084e-06, "loss": 0.3934, "step": 8340 }, { "epoch": 0.8480073200488003, "grad_norm": 0.3319839835166931, "learning_rate": 9.853156688194934e-06, "loss": 0.375, "step": 8341 }, { "epoch": 0.8481089873932492, "grad_norm": 0.32051101326942444, "learning_rate": 9.853071300399975e-06, "loss": 0.3804, "step": 8342 }, { "epoch": 0.8482106547376983, "grad_norm": 0.34333863854408264, "learning_rate": 9.852985888156396e-06, "loss": 0.4007, "step": 8343 }, { "epoch": 0.8483123220821472, "grad_norm": 0.37617379426956177, "learning_rate": 9.852900451464626e-06, "loss": 0.3689, "step": 8344 }, { "epoch": 0.8484139894265962, "grad_norm": 0.3463064134120941, "learning_rate": 9.8528149903251e-06, "loss": 0.3619, "step": 8345 }, { "epoch": 0.8485156567710451, "grad_norm": 0.3216269910335541, "learning_rate": 9.852729504738243e-06, "loss": 0.3871, "step": 8346 }, { "epoch": 0.8486173241154941, "grad_norm": 0.3304673135280609, "learning_rate": 9.852643994704489e-06, "loss": 0.3581, "step": 8347 }, { "epoch": 0.8487189914599431, "grad_norm": 0.3090662956237793, "learning_rate": 9.852558460224267e-06, "loss": 0.379, "step": 8348 }, { "epoch": 0.8488206588043921, "grad_norm": 0.3124483525753021, "learning_rate": 9.852472901298009e-06, "loss": 0.39, "step": 8349 }, { "epoch": 0.848922326148841, "grad_norm": 0.3145671486854553, "learning_rate": 9.852387317926146e-06, "loss": 0.414, "step": 8350 }, { "epoch": 0.8490239934932899, "grad_norm": 0.2850518226623535, "learning_rate": 9.852301710109109e-06, "loss": 0.3883, "step": 8351 }, { "epoch": 0.8491256608377389, "grad_norm": 0.3139042556285858, "learning_rate": 9.852216077847328e-06, "loss": 0.3758, "step": 8352 }, { "epoch": 0.8492273281821879, "grad_norm": 0.2938995063304901, "learning_rate": 9.852130421141236e-06, "loss": 0.384, "step": 8353 }, { "epoch": 0.8493289955266369, "grad_norm": 0.31721094250679016, "learning_rate": 9.852044739991264e-06, "loss": 0.3713, "step": 8354 }, { "epoch": 0.8494306628710858, "grad_norm": 0.30908286571502686, "learning_rate": 9.851959034397843e-06, "loss": 0.3971, "step": 8355 }, { "epoch": 0.8495323302155348, "grad_norm": 0.3120766282081604, "learning_rate": 9.851873304361405e-06, "loss": 0.414, "step": 8356 }, { "epoch": 0.8496339975599837, "grad_norm": 0.32929444313049316, "learning_rate": 9.851787549882384e-06, "loss": 0.3994, "step": 8357 }, { "epoch": 0.8497356649044326, "grad_norm": 0.29964491724967957, "learning_rate": 9.85170177096121e-06, "loss": 0.4232, "step": 8358 }, { "epoch": 0.8498373322488817, "grad_norm": 0.3262071907520294, "learning_rate": 9.851615967598316e-06, "loss": 0.4457, "step": 8359 }, { "epoch": 0.8499389995933306, "grad_norm": 0.31936922669410706, "learning_rate": 9.851530139794132e-06, "loss": 0.365, "step": 8360 }, { "epoch": 0.8500406669377796, "grad_norm": 0.3519129455089569, "learning_rate": 9.851444287549092e-06, "loss": 0.3782, "step": 8361 }, { "epoch": 0.8501423342822285, "grad_norm": 0.3213324248790741, "learning_rate": 9.85135841086363e-06, "loss": 0.392, "step": 8362 }, { "epoch": 0.8502440016266775, "grad_norm": 0.2929277718067169, "learning_rate": 9.851272509738177e-06, "loss": 0.3583, "step": 8363 }, { "epoch": 0.8503456689711265, "grad_norm": 0.33538904786109924, "learning_rate": 9.851186584173165e-06, "loss": 0.3686, "step": 8364 }, { "epoch": 0.8504473363155755, "grad_norm": 0.3455994129180908, "learning_rate": 9.851100634169027e-06, "loss": 0.3882, "step": 8365 }, { "epoch": 0.8505490036600244, "grad_norm": 0.28261086344718933, "learning_rate": 9.8510146597262e-06, "loss": 0.3445, "step": 8366 }, { "epoch": 0.8506506710044733, "grad_norm": 0.3211216628551483, "learning_rate": 9.850928660845111e-06, "loss": 0.3853, "step": 8367 }, { "epoch": 0.8507523383489223, "grad_norm": 0.33847835659980774, "learning_rate": 9.850842637526197e-06, "loss": 0.3966, "step": 8368 }, { "epoch": 0.8508540056933713, "grad_norm": 0.3238575756549835, "learning_rate": 9.85075658976989e-06, "loss": 0.375, "step": 8369 }, { "epoch": 0.8509556730378203, "grad_norm": 0.3093055784702301, "learning_rate": 9.850670517576625e-06, "loss": 0.3806, "step": 8370 }, { "epoch": 0.8510573403822692, "grad_norm": 0.3297022581100464, "learning_rate": 9.850584420946835e-06, "loss": 0.3905, "step": 8371 }, { "epoch": 0.8511590077267182, "grad_norm": 0.32397201657295227, "learning_rate": 9.85049829988095e-06, "loss": 0.3748, "step": 8372 }, { "epoch": 0.8512606750711671, "grad_norm": 0.32420244812965393, "learning_rate": 9.850412154379411e-06, "loss": 0.3728, "step": 8373 }, { "epoch": 0.8513623424156161, "grad_norm": 0.3292601406574249, "learning_rate": 9.850325984442648e-06, "loss": 0.3573, "step": 8374 }, { "epoch": 0.8514640097600651, "grad_norm": 0.33317938446998596, "learning_rate": 9.850239790071094e-06, "loss": 0.3943, "step": 8375 }, { "epoch": 0.851565677104514, "grad_norm": 0.34024208784103394, "learning_rate": 9.850153571265184e-06, "loss": 0.3691, "step": 8376 }, { "epoch": 0.851667344448963, "grad_norm": 0.30721205472946167, "learning_rate": 9.850067328025354e-06, "loss": 0.3809, "step": 8377 }, { "epoch": 0.8517690117934119, "grad_norm": 0.32331010699272156, "learning_rate": 9.849981060352038e-06, "loss": 0.4011, "step": 8378 }, { "epoch": 0.851870679137861, "grad_norm": 0.3659365475177765, "learning_rate": 9.849894768245668e-06, "loss": 0.3841, "step": 8379 }, { "epoch": 0.8519723464823099, "grad_norm": 0.3459642827510834, "learning_rate": 9.84980845170668e-06, "loss": 0.3681, "step": 8380 }, { "epoch": 0.8520740138267588, "grad_norm": 0.3234075605869293, "learning_rate": 9.849722110735514e-06, "loss": 0.3867, "step": 8381 }, { "epoch": 0.8521756811712078, "grad_norm": 0.34724685549736023, "learning_rate": 9.849635745332597e-06, "loss": 0.3653, "step": 8382 }, { "epoch": 0.8522773485156567, "grad_norm": 0.3269725739955902, "learning_rate": 9.84954935549837e-06, "loss": 0.3995, "step": 8383 }, { "epoch": 0.8523790158601058, "grad_norm": 0.3578585088253021, "learning_rate": 9.849462941233262e-06, "loss": 0.3874, "step": 8384 }, { "epoch": 0.8524806832045547, "grad_norm": 0.3463672995567322, "learning_rate": 9.849376502537714e-06, "loss": 0.391, "step": 8385 }, { "epoch": 0.8525823505490037, "grad_norm": 0.3499906063079834, "learning_rate": 9.84929003941216e-06, "loss": 0.3823, "step": 8386 }, { "epoch": 0.8526840178934526, "grad_norm": 0.329921156167984, "learning_rate": 9.849203551857034e-06, "loss": 0.3902, "step": 8387 }, { "epoch": 0.8527856852379015, "grad_norm": 0.30727627873420715, "learning_rate": 9.849117039872775e-06, "loss": 0.372, "step": 8388 }, { "epoch": 0.8528873525823506, "grad_norm": 0.33568647503852844, "learning_rate": 9.849030503459815e-06, "loss": 0.3915, "step": 8389 }, { "epoch": 0.8529890199267995, "grad_norm": 0.34780654311180115, "learning_rate": 9.848943942618592e-06, "loss": 0.4199, "step": 8390 }, { "epoch": 0.8530906872712485, "grad_norm": 0.3155103325843811, "learning_rate": 9.848857357349542e-06, "loss": 0.4045, "step": 8391 }, { "epoch": 0.8531923546156974, "grad_norm": 0.3457314968109131, "learning_rate": 9.8487707476531e-06, "loss": 0.4026, "step": 8392 }, { "epoch": 0.8532940219601464, "grad_norm": 0.3227718770503998, "learning_rate": 9.848684113529705e-06, "loss": 0.4083, "step": 8393 }, { "epoch": 0.8533956893045954, "grad_norm": 0.3156420886516571, "learning_rate": 9.84859745497979e-06, "loss": 0.4373, "step": 8394 }, { "epoch": 0.8534973566490444, "grad_norm": 0.32443317770957947, "learning_rate": 9.848510772003795e-06, "loss": 0.4125, "step": 8395 }, { "epoch": 0.8535990239934933, "grad_norm": 0.3311973214149475, "learning_rate": 9.848424064602152e-06, "loss": 0.4376, "step": 8396 }, { "epoch": 0.8537006913379422, "grad_norm": 0.31041601300239563, "learning_rate": 9.848337332775303e-06, "loss": 0.3905, "step": 8397 }, { "epoch": 0.8538023586823912, "grad_norm": 0.34559059143066406, "learning_rate": 9.848250576523682e-06, "loss": 0.3744, "step": 8398 }, { "epoch": 0.8539040260268401, "grad_norm": 0.3255808353424072, "learning_rate": 9.848163795847728e-06, "loss": 0.3701, "step": 8399 }, { "epoch": 0.8540056933712892, "grad_norm": 0.3217732608318329, "learning_rate": 9.848076990747875e-06, "loss": 0.3922, "step": 8400 }, { "epoch": 0.8541073607157381, "grad_norm": 0.32348015904426575, "learning_rate": 9.847990161224562e-06, "loss": 0.3968, "step": 8401 }, { "epoch": 0.8542090280601871, "grad_norm": 0.303591251373291, "learning_rate": 9.847903307278228e-06, "loss": 0.3802, "step": 8402 }, { "epoch": 0.854310695404636, "grad_norm": 0.3037789762020111, "learning_rate": 9.847816428909308e-06, "loss": 0.4004, "step": 8403 }, { "epoch": 0.8544123627490849, "grad_norm": 0.3572944402694702, "learning_rate": 9.847729526118242e-06, "loss": 0.3881, "step": 8404 }, { "epoch": 0.854514030093534, "grad_norm": 0.3101639449596405, "learning_rate": 9.847642598905466e-06, "loss": 0.3882, "step": 8405 }, { "epoch": 0.8546156974379829, "grad_norm": 0.319577157497406, "learning_rate": 9.847555647271418e-06, "loss": 0.3829, "step": 8406 }, { "epoch": 0.8547173647824319, "grad_norm": 0.35796278715133667, "learning_rate": 9.847468671216539e-06, "loss": 0.3726, "step": 8407 }, { "epoch": 0.8548190321268808, "grad_norm": 0.30030208826065063, "learning_rate": 9.847381670741262e-06, "loss": 0.4037, "step": 8408 }, { "epoch": 0.8549206994713298, "grad_norm": 0.3328496813774109, "learning_rate": 9.847294645846029e-06, "loss": 0.4194, "step": 8409 }, { "epoch": 0.8550223668157788, "grad_norm": 0.3178492784500122, "learning_rate": 9.847207596531278e-06, "loss": 0.3928, "step": 8410 }, { "epoch": 0.8551240341602278, "grad_norm": 0.31879013776779175, "learning_rate": 9.847120522797447e-06, "loss": 0.3626, "step": 8411 }, { "epoch": 0.8552257015046767, "grad_norm": 0.3302992284297943, "learning_rate": 9.847033424644973e-06, "loss": 0.4042, "step": 8412 }, { "epoch": 0.8553273688491256, "grad_norm": 0.35185757279396057, "learning_rate": 9.846946302074298e-06, "loss": 0.3872, "step": 8413 }, { "epoch": 0.8554290361935746, "grad_norm": 0.33261924982070923, "learning_rate": 9.84685915508586e-06, "loss": 0.3916, "step": 8414 }, { "epoch": 0.8555307035380236, "grad_norm": 0.34620967507362366, "learning_rate": 9.846771983680096e-06, "loss": 0.3874, "step": 8415 }, { "epoch": 0.8556323708824726, "grad_norm": 0.3276161253452301, "learning_rate": 9.846684787857446e-06, "loss": 0.3475, "step": 8416 }, { "epoch": 0.8557340382269215, "grad_norm": 0.31936800479888916, "learning_rate": 9.846597567618352e-06, "loss": 0.3861, "step": 8417 }, { "epoch": 0.8558357055713705, "grad_norm": 0.3323214054107666, "learning_rate": 9.84651032296325e-06, "loss": 0.3814, "step": 8418 }, { "epoch": 0.8559373729158194, "grad_norm": 0.3688594698905945, "learning_rate": 9.84642305389258e-06, "loss": 0.4108, "step": 8419 }, { "epoch": 0.8560390402602684, "grad_norm": 0.35041090846061707, "learning_rate": 9.846335760406783e-06, "loss": 0.3781, "step": 8420 }, { "epoch": 0.8561407076047174, "grad_norm": 0.332474946975708, "learning_rate": 9.846248442506297e-06, "loss": 0.3534, "step": 8421 }, { "epoch": 0.8562423749491663, "grad_norm": 0.3585069477558136, "learning_rate": 9.846161100191564e-06, "loss": 0.3923, "step": 8422 }, { "epoch": 0.8563440422936153, "grad_norm": 0.3474225699901581, "learning_rate": 9.846073733463024e-06, "loss": 0.3866, "step": 8423 }, { "epoch": 0.8564457096380642, "grad_norm": 0.36906254291534424, "learning_rate": 9.845986342321114e-06, "loss": 0.4362, "step": 8424 }, { "epoch": 0.8565473769825133, "grad_norm": 0.3164553642272949, "learning_rate": 9.845898926766279e-06, "loss": 0.4008, "step": 8425 }, { "epoch": 0.8566490443269622, "grad_norm": 0.3357463777065277, "learning_rate": 9.845811486798956e-06, "loss": 0.3575, "step": 8426 }, { "epoch": 0.8567507116714111, "grad_norm": 0.33254021406173706, "learning_rate": 9.845724022419585e-06, "loss": 0.3703, "step": 8427 }, { "epoch": 0.8568523790158601, "grad_norm": 0.31216806173324585, "learning_rate": 9.845636533628608e-06, "loss": 0.378, "step": 8428 }, { "epoch": 0.856954046360309, "grad_norm": 0.3492407500743866, "learning_rate": 9.845549020426465e-06, "loss": 0.408, "step": 8429 }, { "epoch": 0.8570557137047581, "grad_norm": 0.34541937708854675, "learning_rate": 9.8454614828136e-06, "loss": 0.3906, "step": 8430 }, { "epoch": 0.857157381049207, "grad_norm": 0.31254449486732483, "learning_rate": 9.84537392079045e-06, "loss": 0.4139, "step": 8431 }, { "epoch": 0.857259048393656, "grad_norm": 0.36099693179130554, "learning_rate": 9.845286334357457e-06, "loss": 0.4108, "step": 8432 }, { "epoch": 0.8573607157381049, "grad_norm": 0.3400419354438782, "learning_rate": 9.845198723515065e-06, "loss": 0.3933, "step": 8433 }, { "epoch": 0.8574623830825538, "grad_norm": 0.3414430618286133, "learning_rate": 9.845111088263711e-06, "loss": 0.3862, "step": 8434 }, { "epoch": 0.8575640504270029, "grad_norm": 0.3047906458377838, "learning_rate": 9.84502342860384e-06, "loss": 0.4003, "step": 8435 }, { "epoch": 0.8576657177714518, "grad_norm": 0.3218674659729004, "learning_rate": 9.844935744535893e-06, "loss": 0.3939, "step": 8436 }, { "epoch": 0.8577673851159008, "grad_norm": 0.31951114535331726, "learning_rate": 9.844848036060309e-06, "loss": 0.3756, "step": 8437 }, { "epoch": 0.8578690524603497, "grad_norm": 0.3534933030605316, "learning_rate": 9.844760303177534e-06, "loss": 0.4134, "step": 8438 }, { "epoch": 0.8579707198047987, "grad_norm": 0.34324294328689575, "learning_rate": 9.844672545888005e-06, "loss": 0.4157, "step": 8439 }, { "epoch": 0.8580723871492476, "grad_norm": 0.34001484513282776, "learning_rate": 9.844584764192168e-06, "loss": 0.3669, "step": 8440 }, { "epoch": 0.8581740544936967, "grad_norm": 0.31713661551475525, "learning_rate": 9.844496958090465e-06, "loss": 0.3996, "step": 8441 }, { "epoch": 0.8582757218381456, "grad_norm": 0.35331425070762634, "learning_rate": 9.844409127583338e-06, "loss": 0.3768, "step": 8442 }, { "epoch": 0.8583773891825945, "grad_norm": 0.40453705191612244, "learning_rate": 9.84432127267123e-06, "loss": 0.4201, "step": 8443 }, { "epoch": 0.8584790565270435, "grad_norm": 0.33387064933776855, "learning_rate": 9.84423339335458e-06, "loss": 0.403, "step": 8444 }, { "epoch": 0.8585807238714924, "grad_norm": 0.3688717484474182, "learning_rate": 9.844145489633835e-06, "loss": 0.3793, "step": 8445 }, { "epoch": 0.8586823912159415, "grad_norm": 0.4035572111606598, "learning_rate": 9.844057561509435e-06, "loss": 0.4181, "step": 8446 }, { "epoch": 0.8587840585603904, "grad_norm": 0.3263297975063324, "learning_rate": 9.843969608981825e-06, "loss": 0.3925, "step": 8447 }, { "epoch": 0.8588857259048394, "grad_norm": 0.3211163878440857, "learning_rate": 9.843881632051447e-06, "loss": 0.3696, "step": 8448 }, { "epoch": 0.8589873932492883, "grad_norm": 0.352711021900177, "learning_rate": 9.843793630718745e-06, "loss": 0.3648, "step": 8449 }, { "epoch": 0.8590890605937372, "grad_norm": 0.3588755428791046, "learning_rate": 9.843705604984162e-06, "loss": 0.4288, "step": 8450 }, { "epoch": 0.8591907279381863, "grad_norm": 0.31655773520469666, "learning_rate": 9.84361755484814e-06, "loss": 0.382, "step": 8451 }, { "epoch": 0.8592923952826352, "grad_norm": 0.34916916489601135, "learning_rate": 9.843529480311123e-06, "loss": 0.376, "step": 8452 }, { "epoch": 0.8593940626270842, "grad_norm": 0.3718419075012207, "learning_rate": 9.843441381373557e-06, "loss": 0.4258, "step": 8453 }, { "epoch": 0.8594957299715331, "grad_norm": 0.3473544120788574, "learning_rate": 9.843353258035883e-06, "loss": 0.3568, "step": 8454 }, { "epoch": 0.8595973973159821, "grad_norm": 0.32242611050605774, "learning_rate": 9.843265110298547e-06, "loss": 0.3899, "step": 8455 }, { "epoch": 0.8596990646604311, "grad_norm": 0.3184034824371338, "learning_rate": 9.843176938161993e-06, "loss": 0.3557, "step": 8456 }, { "epoch": 0.85980073200488, "grad_norm": 0.3933192491531372, "learning_rate": 9.843088741626666e-06, "loss": 0.4127, "step": 8457 }, { "epoch": 0.859902399349329, "grad_norm": 0.32279470562934875, "learning_rate": 9.843000520693005e-06, "loss": 0.4022, "step": 8458 }, { "epoch": 0.8600040666937779, "grad_norm": 0.317175954580307, "learning_rate": 9.842912275361461e-06, "loss": 0.404, "step": 8459 }, { "epoch": 0.8601057340382269, "grad_norm": 0.35299041867256165, "learning_rate": 9.842824005632475e-06, "loss": 0.3761, "step": 8460 }, { "epoch": 0.8602074013826759, "grad_norm": 0.3192364573478699, "learning_rate": 9.842735711506492e-06, "loss": 0.397, "step": 8461 }, { "epoch": 0.8603090687271249, "grad_norm": 0.30952194333076477, "learning_rate": 9.842647392983958e-06, "loss": 0.3818, "step": 8462 }, { "epoch": 0.8604107360715738, "grad_norm": 0.30859267711639404, "learning_rate": 9.842559050065316e-06, "loss": 0.4076, "step": 8463 }, { "epoch": 0.8605124034160228, "grad_norm": 0.34314507246017456, "learning_rate": 9.842470682751013e-06, "loss": 0.3801, "step": 8464 }, { "epoch": 0.8606140707604717, "grad_norm": 0.3343682885169983, "learning_rate": 9.842382291041495e-06, "loss": 0.398, "step": 8465 }, { "epoch": 0.8607157381049207, "grad_norm": 0.33105969429016113, "learning_rate": 9.842293874937204e-06, "loss": 0.3749, "step": 8466 }, { "epoch": 0.8608174054493697, "grad_norm": 0.32860055565834045, "learning_rate": 9.842205434438588e-06, "loss": 0.3998, "step": 8467 }, { "epoch": 0.8609190727938186, "grad_norm": 0.3555898070335388, "learning_rate": 9.84211696954609e-06, "loss": 0.3843, "step": 8468 }, { "epoch": 0.8610207401382676, "grad_norm": 0.3269003927707672, "learning_rate": 9.84202848026016e-06, "loss": 0.39, "step": 8469 }, { "epoch": 0.8611224074827165, "grad_norm": 0.3465923070907593, "learning_rate": 9.84193996658124e-06, "loss": 0.3702, "step": 8470 }, { "epoch": 0.8612240748271656, "grad_norm": 0.35749053955078125, "learning_rate": 9.841851428509776e-06, "loss": 0.375, "step": 8471 }, { "epoch": 0.8613257421716145, "grad_norm": 0.3303922414779663, "learning_rate": 9.841762866046217e-06, "loss": 0.4065, "step": 8472 }, { "epoch": 0.8614274095160634, "grad_norm": 0.40980708599090576, "learning_rate": 9.841674279191004e-06, "loss": 0.3913, "step": 8473 }, { "epoch": 0.8615290768605124, "grad_norm": 0.37015238404273987, "learning_rate": 9.841585667944588e-06, "loss": 0.3573, "step": 8474 }, { "epoch": 0.8616307442049613, "grad_norm": 0.3333839476108551, "learning_rate": 9.841497032307416e-06, "loss": 0.3656, "step": 8475 }, { "epoch": 0.8617324115494104, "grad_norm": 0.3322244882583618, "learning_rate": 9.84140837227993e-06, "loss": 0.3385, "step": 8476 }, { "epoch": 0.8618340788938593, "grad_norm": 0.354448139667511, "learning_rate": 9.84131968786258e-06, "loss": 0.4307, "step": 8477 }, { "epoch": 0.8619357462383083, "grad_norm": 0.34442585706710815, "learning_rate": 9.841230979055814e-06, "loss": 0.3823, "step": 8478 }, { "epoch": 0.8620374135827572, "grad_norm": 0.328971803188324, "learning_rate": 9.841142245860074e-06, "loss": 0.3869, "step": 8479 }, { "epoch": 0.8621390809272061, "grad_norm": 0.34446343779563904, "learning_rate": 9.841053488275811e-06, "loss": 0.3866, "step": 8480 }, { "epoch": 0.8622407482716552, "grad_norm": 0.33908388018608093, "learning_rate": 9.84096470630347e-06, "loss": 0.4014, "step": 8481 }, { "epoch": 0.8623424156161041, "grad_norm": 0.34174689650535583, "learning_rate": 9.8408758999435e-06, "loss": 0.4199, "step": 8482 }, { "epoch": 0.8624440829605531, "grad_norm": 0.3109284043312073, "learning_rate": 9.840787069196348e-06, "loss": 0.3954, "step": 8483 }, { "epoch": 0.862545750305002, "grad_norm": 0.38481202721595764, "learning_rate": 9.840698214062461e-06, "loss": 0.4107, "step": 8484 }, { "epoch": 0.862647417649451, "grad_norm": 0.3288211226463318, "learning_rate": 9.840609334542286e-06, "loss": 0.4315, "step": 8485 }, { "epoch": 0.8627490849938999, "grad_norm": 0.32001474499702454, "learning_rate": 9.840520430636272e-06, "loss": 0.3728, "step": 8486 }, { "epoch": 0.862850752338349, "grad_norm": 0.33852291107177734, "learning_rate": 9.840431502344864e-06, "loss": 0.4283, "step": 8487 }, { "epoch": 0.8629524196827979, "grad_norm": 0.32061702013015747, "learning_rate": 9.840342549668515e-06, "loss": 0.3606, "step": 8488 }, { "epoch": 0.8630540870272468, "grad_norm": 0.2875301241874695, "learning_rate": 9.84025357260767e-06, "loss": 0.3874, "step": 8489 }, { "epoch": 0.8631557543716958, "grad_norm": 0.345090389251709, "learning_rate": 9.840164571162777e-06, "loss": 0.3653, "step": 8490 }, { "epoch": 0.8632574217161447, "grad_norm": 0.31160596013069153, "learning_rate": 9.840075545334286e-06, "loss": 0.3891, "step": 8491 }, { "epoch": 0.8633590890605938, "grad_norm": 0.34473010897636414, "learning_rate": 9.839986495122646e-06, "loss": 0.3859, "step": 8492 }, { "epoch": 0.8634607564050427, "grad_norm": 0.3337050974369049, "learning_rate": 9.839897420528301e-06, "loss": 0.3903, "step": 8493 }, { "epoch": 0.8635624237494917, "grad_norm": 0.37435007095336914, "learning_rate": 9.839808321551705e-06, "loss": 0.3808, "step": 8494 }, { "epoch": 0.8636640910939406, "grad_norm": 0.30755507946014404, "learning_rate": 9.839719198193304e-06, "loss": 0.3732, "step": 8495 }, { "epoch": 0.8637657584383895, "grad_norm": 0.3067948520183563, "learning_rate": 9.839630050453548e-06, "loss": 0.3606, "step": 8496 }, { "epoch": 0.8638674257828386, "grad_norm": 0.32599443197250366, "learning_rate": 9.839540878332887e-06, "loss": 0.3751, "step": 8497 }, { "epoch": 0.8639690931272875, "grad_norm": 0.33092692494392395, "learning_rate": 9.839451681831767e-06, "loss": 0.4334, "step": 8498 }, { "epoch": 0.8640707604717365, "grad_norm": 0.35159942507743835, "learning_rate": 9.839362460950641e-06, "loss": 0.3928, "step": 8499 }, { "epoch": 0.8641724278161854, "grad_norm": 0.3578519821166992, "learning_rate": 9.839273215689956e-06, "loss": 0.4197, "step": 8500 }, { "epoch": 0.8642740951606344, "grad_norm": 0.3449890911579132, "learning_rate": 9.839183946050164e-06, "loss": 0.3716, "step": 8501 }, { "epoch": 0.8643757625050834, "grad_norm": 0.4179060459136963, "learning_rate": 9.839094652031711e-06, "loss": 0.3621, "step": 8502 }, { "epoch": 0.8644774298495324, "grad_norm": 0.3496111333370209, "learning_rate": 9.83900533363505e-06, "loss": 0.3962, "step": 8503 }, { "epoch": 0.8645790971939813, "grad_norm": 0.3164990544319153, "learning_rate": 9.838915990860632e-06, "loss": 0.397, "step": 8504 }, { "epoch": 0.8646807645384302, "grad_norm": 0.3316049873828888, "learning_rate": 9.838826623708903e-06, "loss": 0.3938, "step": 8505 }, { "epoch": 0.8647824318828792, "grad_norm": 0.3421737849712372, "learning_rate": 9.838737232180315e-06, "loss": 0.3938, "step": 8506 }, { "epoch": 0.8648840992273282, "grad_norm": 0.3238045275211334, "learning_rate": 9.83864781627532e-06, "loss": 0.3709, "step": 8507 }, { "epoch": 0.8649857665717772, "grad_norm": 0.3118017017841339, "learning_rate": 9.838558375994365e-06, "loss": 0.4011, "step": 8508 }, { "epoch": 0.8650874339162261, "grad_norm": 0.31374815106391907, "learning_rate": 9.838468911337906e-06, "loss": 0.396, "step": 8509 }, { "epoch": 0.865189101260675, "grad_norm": 0.33527159690856934, "learning_rate": 9.838379422306389e-06, "loss": 0.3979, "step": 8510 }, { "epoch": 0.865290768605124, "grad_norm": 0.30185040831565857, "learning_rate": 9.838289908900264e-06, "loss": 0.3797, "step": 8511 }, { "epoch": 0.865392435949573, "grad_norm": 0.3000108301639557, "learning_rate": 9.838200371119988e-06, "loss": 0.367, "step": 8512 }, { "epoch": 0.865494103294022, "grad_norm": 0.34733402729034424, "learning_rate": 9.838110808966006e-06, "loss": 0.3968, "step": 8513 }, { "epoch": 0.8655957706384709, "grad_norm": 0.3162219822406769, "learning_rate": 9.838021222438772e-06, "loss": 0.3715, "step": 8514 }, { "epoch": 0.8656974379829199, "grad_norm": 0.32684552669525146, "learning_rate": 9.837931611538736e-06, "loss": 0.3749, "step": 8515 }, { "epoch": 0.8657991053273688, "grad_norm": 0.3219580352306366, "learning_rate": 9.837841976266351e-06, "loss": 0.3767, "step": 8516 }, { "epoch": 0.8659007726718179, "grad_norm": 0.3373170793056488, "learning_rate": 9.837752316622067e-06, "loss": 0.3793, "step": 8517 }, { "epoch": 0.8660024400162668, "grad_norm": 0.34882107377052307, "learning_rate": 9.837662632606338e-06, "loss": 0.4165, "step": 8518 }, { "epoch": 0.8661041073607157, "grad_norm": 0.29741793870925903, "learning_rate": 9.837572924219615e-06, "loss": 0.4012, "step": 8519 }, { "epoch": 0.8662057747051647, "grad_norm": 0.36228570342063904, "learning_rate": 9.837483191462345e-06, "loss": 0.3891, "step": 8520 }, { "epoch": 0.8663074420496136, "grad_norm": 0.33725541830062866, "learning_rate": 9.837393434334988e-06, "loss": 0.3637, "step": 8521 }, { "epoch": 0.8664091093940627, "grad_norm": 0.3245500326156616, "learning_rate": 9.83730365283799e-06, "loss": 0.4049, "step": 8522 }, { "epoch": 0.8665107767385116, "grad_norm": 0.35868459939956665, "learning_rate": 9.837213846971809e-06, "loss": 0.4012, "step": 8523 }, { "epoch": 0.8666124440829606, "grad_norm": 0.3037653863430023, "learning_rate": 9.837124016736892e-06, "loss": 0.3601, "step": 8524 }, { "epoch": 0.8667141114274095, "grad_norm": 0.3397928774356842, "learning_rate": 9.837034162133692e-06, "loss": 0.4443, "step": 8525 }, { "epoch": 0.8668157787718584, "grad_norm": 0.3258824348449707, "learning_rate": 9.836944283162666e-06, "loss": 0.4032, "step": 8526 }, { "epoch": 0.8669174461163074, "grad_norm": 0.3271580934524536, "learning_rate": 9.836854379824263e-06, "loss": 0.4034, "step": 8527 }, { "epoch": 0.8670191134607564, "grad_norm": 0.33255454897880554, "learning_rate": 9.836764452118937e-06, "loss": 0.3649, "step": 8528 }, { "epoch": 0.8671207808052054, "grad_norm": 0.31942352652549744, "learning_rate": 9.836674500047141e-06, "loss": 0.3994, "step": 8529 }, { "epoch": 0.8672224481496543, "grad_norm": 0.3628474771976471, "learning_rate": 9.83658452360933e-06, "loss": 0.425, "step": 8530 }, { "epoch": 0.8673241154941033, "grad_norm": 0.31408369541168213, "learning_rate": 9.836494522805954e-06, "loss": 0.4004, "step": 8531 }, { "epoch": 0.8674257828385522, "grad_norm": 0.3077984154224396, "learning_rate": 9.836404497637468e-06, "loss": 0.3678, "step": 8532 }, { "epoch": 0.8675274501830013, "grad_norm": 0.36785194277763367, "learning_rate": 9.836314448104325e-06, "loss": 0.4459, "step": 8533 }, { "epoch": 0.8676291175274502, "grad_norm": 0.30257079005241394, "learning_rate": 9.836224374206979e-06, "loss": 0.3651, "step": 8534 }, { "epoch": 0.8677307848718991, "grad_norm": 0.3268560469150543, "learning_rate": 9.836134275945886e-06, "loss": 0.3751, "step": 8535 }, { "epoch": 0.8678324522163481, "grad_norm": 0.321042001247406, "learning_rate": 9.836044153321495e-06, "loss": 0.3989, "step": 8536 }, { "epoch": 0.867934119560797, "grad_norm": 0.31176483631134033, "learning_rate": 9.835954006334264e-06, "loss": 0.4074, "step": 8537 }, { "epoch": 0.8680357869052461, "grad_norm": 0.32227322459220886, "learning_rate": 9.835863834984646e-06, "loss": 0.4122, "step": 8538 }, { "epoch": 0.868137454249695, "grad_norm": 0.3188454806804657, "learning_rate": 9.835773639273097e-06, "loss": 0.3782, "step": 8539 }, { "epoch": 0.868239121594144, "grad_norm": 0.315939724445343, "learning_rate": 9.835683419200066e-06, "loss": 0.4021, "step": 8540 }, { "epoch": 0.8683407889385929, "grad_norm": 0.31105661392211914, "learning_rate": 9.835593174766013e-06, "loss": 0.335, "step": 8541 }, { "epoch": 0.8684424562830418, "grad_norm": 0.3205728232860565, "learning_rate": 9.83550290597139e-06, "loss": 0.3794, "step": 8542 }, { "epoch": 0.8685441236274909, "grad_norm": 0.3301471173763275, "learning_rate": 9.835412612816652e-06, "loss": 0.3935, "step": 8543 }, { "epoch": 0.8686457909719398, "grad_norm": 0.28554415702819824, "learning_rate": 9.835322295302256e-06, "loss": 0.3866, "step": 8544 }, { "epoch": 0.8687474583163888, "grad_norm": 0.2900037169456482, "learning_rate": 9.835231953428654e-06, "loss": 0.3899, "step": 8545 }, { "epoch": 0.8688491256608377, "grad_norm": 0.3167026937007904, "learning_rate": 9.835141587196304e-06, "loss": 0.394, "step": 8546 }, { "epoch": 0.8689507930052867, "grad_norm": 0.330372154712677, "learning_rate": 9.835051196605659e-06, "loss": 0.3862, "step": 8547 }, { "epoch": 0.8690524603497357, "grad_norm": 0.3236386775970459, "learning_rate": 9.834960781657175e-06, "loss": 0.3969, "step": 8548 }, { "epoch": 0.8691541276941847, "grad_norm": 0.3300824761390686, "learning_rate": 9.834870342351306e-06, "loss": 0.4197, "step": 8549 }, { "epoch": 0.8692557950386336, "grad_norm": 0.3328384757041931, "learning_rate": 9.83477987868851e-06, "loss": 0.3963, "step": 8550 }, { "epoch": 0.8693574623830825, "grad_norm": 0.3172514736652374, "learning_rate": 9.834689390669243e-06, "loss": 0.3654, "step": 8551 }, { "epoch": 0.8694591297275315, "grad_norm": 0.28764063119888306, "learning_rate": 9.834598878293959e-06, "loss": 0.393, "step": 8552 }, { "epoch": 0.8695607970719805, "grad_norm": 0.3402693569660187, "learning_rate": 9.834508341563114e-06, "loss": 0.3994, "step": 8553 }, { "epoch": 0.8696624644164295, "grad_norm": 0.33551090955734253, "learning_rate": 9.834417780477167e-06, "loss": 0.3958, "step": 8554 }, { "epoch": 0.8697641317608784, "grad_norm": 0.3035794496536255, "learning_rate": 9.83432719503657e-06, "loss": 0.3888, "step": 8555 }, { "epoch": 0.8698657991053274, "grad_norm": 0.3211766183376312, "learning_rate": 9.834236585241784e-06, "loss": 0.3772, "step": 8556 }, { "epoch": 0.8699674664497763, "grad_norm": 0.32135653495788574, "learning_rate": 9.834145951093258e-06, "loss": 0.384, "step": 8557 }, { "epoch": 0.8700691337942253, "grad_norm": 0.30744263529777527, "learning_rate": 9.834055292591458e-06, "loss": 0.3794, "step": 8558 }, { "epoch": 0.8701708011386743, "grad_norm": 0.3610224425792694, "learning_rate": 9.833964609736834e-06, "loss": 0.3593, "step": 8559 }, { "epoch": 0.8702724684831232, "grad_norm": 0.31689780950546265, "learning_rate": 9.833873902529847e-06, "loss": 0.3768, "step": 8560 }, { "epoch": 0.8703741358275722, "grad_norm": 0.3576159179210663, "learning_rate": 9.83378317097095e-06, "loss": 0.4139, "step": 8561 }, { "epoch": 0.8704758031720211, "grad_norm": 0.30831632018089294, "learning_rate": 9.833692415060603e-06, "loss": 0.3791, "step": 8562 }, { "epoch": 0.8705774705164702, "grad_norm": 0.3727136552333832, "learning_rate": 9.833601634799262e-06, "loss": 0.3766, "step": 8563 }, { "epoch": 0.8706791378609191, "grad_norm": 0.3516496419906616, "learning_rate": 9.833510830187384e-06, "loss": 0.3652, "step": 8564 }, { "epoch": 0.870780805205368, "grad_norm": 0.3284151554107666, "learning_rate": 9.833420001225427e-06, "loss": 0.4076, "step": 8565 }, { "epoch": 0.870882472549817, "grad_norm": 0.3379290699958801, "learning_rate": 9.83332914791385e-06, "loss": 0.3853, "step": 8566 }, { "epoch": 0.8709841398942659, "grad_norm": 0.41289228200912476, "learning_rate": 9.833238270253109e-06, "loss": 0.3745, "step": 8567 }, { "epoch": 0.8710858072387149, "grad_norm": 0.32362696528434753, "learning_rate": 9.83314736824366e-06, "loss": 0.4098, "step": 8568 }, { "epoch": 0.8711874745831639, "grad_norm": 0.3531155288219452, "learning_rate": 9.833056441885966e-06, "loss": 0.4207, "step": 8569 }, { "epoch": 0.8712891419276129, "grad_norm": 0.32823359966278076, "learning_rate": 9.832965491180482e-06, "loss": 0.3805, "step": 8570 }, { "epoch": 0.8713908092720618, "grad_norm": 0.3283582329750061, "learning_rate": 9.832874516127664e-06, "loss": 0.3576, "step": 8571 }, { "epoch": 0.8714924766165107, "grad_norm": 0.3140107989311218, "learning_rate": 9.832783516727974e-06, "loss": 0.3999, "step": 8572 }, { "epoch": 0.8715941439609597, "grad_norm": 0.3469901978969574, "learning_rate": 9.832692492981869e-06, "loss": 0.3988, "step": 8573 }, { "epoch": 0.8716958113054087, "grad_norm": 0.3809543550014496, "learning_rate": 9.832601444889808e-06, "loss": 0.3893, "step": 8574 }, { "epoch": 0.8717974786498577, "grad_norm": 0.3276805281639099, "learning_rate": 9.832510372452249e-06, "loss": 0.4097, "step": 8575 }, { "epoch": 0.8718991459943066, "grad_norm": 0.31343361735343933, "learning_rate": 9.83241927566965e-06, "loss": 0.3518, "step": 8576 }, { "epoch": 0.8720008133387556, "grad_norm": 0.32836490869522095, "learning_rate": 9.832328154542473e-06, "loss": 0.3916, "step": 8577 }, { "epoch": 0.8721024806832045, "grad_norm": 0.3260240852832794, "learning_rate": 9.832237009071176e-06, "loss": 0.3891, "step": 8578 }, { "epoch": 0.8722041480276536, "grad_norm": 0.30532151460647583, "learning_rate": 9.832145839256214e-06, "loss": 0.3856, "step": 8579 }, { "epoch": 0.8723058153721025, "grad_norm": 0.2878988981246948, "learning_rate": 9.83205464509805e-06, "loss": 0.3704, "step": 8580 }, { "epoch": 0.8724074827165514, "grad_norm": 0.3243614137172699, "learning_rate": 9.831963426597145e-06, "loss": 0.3502, "step": 8581 }, { "epoch": 0.8725091500610004, "grad_norm": 0.3272594213485718, "learning_rate": 9.831872183753957e-06, "loss": 0.3943, "step": 8582 }, { "epoch": 0.8726108174054493, "grad_norm": 0.3350695073604584, "learning_rate": 9.831780916568943e-06, "loss": 0.3646, "step": 8583 }, { "epoch": 0.8727124847498984, "grad_norm": 0.3070450723171234, "learning_rate": 9.831689625042565e-06, "loss": 0.408, "step": 8584 }, { "epoch": 0.8728141520943473, "grad_norm": 0.32343539595603943, "learning_rate": 9.831598309175285e-06, "loss": 0.4105, "step": 8585 }, { "epoch": 0.8729158194387963, "grad_norm": 0.33023497462272644, "learning_rate": 9.831506968967559e-06, "loss": 0.4139, "step": 8586 }, { "epoch": 0.8730174867832452, "grad_norm": 0.3054482042789459, "learning_rate": 9.83141560441985e-06, "loss": 0.3961, "step": 8587 }, { "epoch": 0.8731191541276941, "grad_norm": 0.3537997603416443, "learning_rate": 9.831324215532617e-06, "loss": 0.4107, "step": 8588 }, { "epoch": 0.8732208214721432, "grad_norm": 0.3048126697540283, "learning_rate": 9.831232802306319e-06, "loss": 0.4109, "step": 8589 }, { "epoch": 0.8733224888165921, "grad_norm": 0.2920853793621063, "learning_rate": 9.83114136474142e-06, "loss": 0.3792, "step": 8590 }, { "epoch": 0.8734241561610411, "grad_norm": 0.3209601640701294, "learning_rate": 9.831049902838378e-06, "loss": 0.4323, "step": 8591 }, { "epoch": 0.87352582350549, "grad_norm": 0.300248384475708, "learning_rate": 9.830958416597656e-06, "loss": 0.3767, "step": 8592 }, { "epoch": 0.873627490849939, "grad_norm": 0.3313238024711609, "learning_rate": 9.830866906019712e-06, "loss": 0.3689, "step": 8593 }, { "epoch": 0.873729158194388, "grad_norm": 0.3135312497615814, "learning_rate": 9.83077537110501e-06, "loss": 0.3765, "step": 8594 }, { "epoch": 0.873830825538837, "grad_norm": 0.308903306722641, "learning_rate": 9.830683811854008e-06, "loss": 0.3568, "step": 8595 }, { "epoch": 0.8739324928832859, "grad_norm": 0.32130104303359985, "learning_rate": 9.83059222826717e-06, "loss": 0.3937, "step": 8596 }, { "epoch": 0.8740341602277348, "grad_norm": 0.31303027272224426, "learning_rate": 9.830500620344955e-06, "loss": 0.3884, "step": 8597 }, { "epoch": 0.8741358275721838, "grad_norm": 0.312884122133255, "learning_rate": 9.830408988087828e-06, "loss": 0.3733, "step": 8598 }, { "epoch": 0.8742374949166328, "grad_norm": 0.3339879810810089, "learning_rate": 9.830317331496247e-06, "loss": 0.3703, "step": 8599 }, { "epoch": 0.8743391622610818, "grad_norm": 0.3085101544857025, "learning_rate": 9.830225650570675e-06, "loss": 0.3814, "step": 8600 }, { "epoch": 0.8744408296055307, "grad_norm": 0.2784089744091034, "learning_rate": 9.830133945311576e-06, "loss": 0.3443, "step": 8601 }, { "epoch": 0.8745424969499797, "grad_norm": 0.34349310398101807, "learning_rate": 9.830042215719407e-06, "loss": 0.3668, "step": 8602 }, { "epoch": 0.8746441642944286, "grad_norm": 0.3114061653614044, "learning_rate": 9.829950461794635e-06, "loss": 0.3879, "step": 8603 }, { "epoch": 0.8747458316388776, "grad_norm": 0.28218355774879456, "learning_rate": 9.82985868353772e-06, "loss": 0.3603, "step": 8604 }, { "epoch": 0.8748474989833266, "grad_norm": 0.30617600679397583, "learning_rate": 9.829766880949125e-06, "loss": 0.3812, "step": 8605 }, { "epoch": 0.8749491663277755, "grad_norm": 0.31878986954689026, "learning_rate": 9.829675054029311e-06, "loss": 0.3828, "step": 8606 }, { "epoch": 0.8750508336722245, "grad_norm": 0.3311171531677246, "learning_rate": 9.829583202778742e-06, "loss": 0.3837, "step": 8607 }, { "epoch": 0.8751525010166734, "grad_norm": 0.3251890242099762, "learning_rate": 9.829491327197882e-06, "loss": 0.3749, "step": 8608 }, { "epoch": 0.8752541683611224, "grad_norm": 0.3369819223880768, "learning_rate": 9.82939942728719e-06, "loss": 0.37, "step": 8609 }, { "epoch": 0.8753558357055714, "grad_norm": 0.32853415608406067, "learning_rate": 9.829307503047135e-06, "loss": 0.3674, "step": 8610 }, { "epoch": 0.8754575030500203, "grad_norm": 0.35084378719329834, "learning_rate": 9.829215554478172e-06, "loss": 0.3576, "step": 8611 }, { "epoch": 0.8755591703944693, "grad_norm": 0.32338908314704895, "learning_rate": 9.82912358158077e-06, "loss": 0.4104, "step": 8612 }, { "epoch": 0.8756608377389182, "grad_norm": 0.3203957974910736, "learning_rate": 9.829031584355393e-06, "loss": 0.3725, "step": 8613 }, { "epoch": 0.8757625050833672, "grad_norm": 0.36088496446609497, "learning_rate": 9.828939562802502e-06, "loss": 0.3747, "step": 8614 }, { "epoch": 0.8758641724278162, "grad_norm": 0.3050801455974579, "learning_rate": 9.82884751692256e-06, "loss": 0.3693, "step": 8615 }, { "epoch": 0.8759658397722652, "grad_norm": 0.33762431144714355, "learning_rate": 9.828755446716032e-06, "loss": 0.3988, "step": 8616 }, { "epoch": 0.8760675071167141, "grad_norm": 0.3207441568374634, "learning_rate": 9.82866335218338e-06, "loss": 0.3576, "step": 8617 }, { "epoch": 0.876169174461163, "grad_norm": 0.347171813249588, "learning_rate": 9.828571233325073e-06, "loss": 0.416, "step": 8618 }, { "epoch": 0.876270841805612, "grad_norm": 0.3452550172805786, "learning_rate": 9.828479090141568e-06, "loss": 0.3813, "step": 8619 }, { "epoch": 0.876372509150061, "grad_norm": 0.3338868319988251, "learning_rate": 9.828386922633335e-06, "loss": 0.3868, "step": 8620 }, { "epoch": 0.87647417649451, "grad_norm": 0.3017929792404175, "learning_rate": 9.828294730800835e-06, "loss": 0.4007, "step": 8621 }, { "epoch": 0.8765758438389589, "grad_norm": 0.3166244626045227, "learning_rate": 9.828202514644534e-06, "loss": 0.3738, "step": 8622 }, { "epoch": 0.8766775111834079, "grad_norm": 0.30224642157554626, "learning_rate": 9.828110274164896e-06, "loss": 0.4027, "step": 8623 }, { "epoch": 0.8767791785278568, "grad_norm": 0.2964169979095459, "learning_rate": 9.828018009362386e-06, "loss": 0.3812, "step": 8624 }, { "epoch": 0.8768808458723059, "grad_norm": 0.3408784568309784, "learning_rate": 9.827925720237468e-06, "loss": 0.3901, "step": 8625 }, { "epoch": 0.8769825132167548, "grad_norm": 0.3221625089645386, "learning_rate": 9.827833406790608e-06, "loss": 0.3927, "step": 8626 }, { "epoch": 0.8770841805612037, "grad_norm": 0.3136245310306549, "learning_rate": 9.82774106902227e-06, "loss": 0.3899, "step": 8627 }, { "epoch": 0.8771858479056527, "grad_norm": 0.3517056703567505, "learning_rate": 9.82764870693292e-06, "loss": 0.4195, "step": 8628 }, { "epoch": 0.8772875152501016, "grad_norm": 0.33147263526916504, "learning_rate": 9.827556320523024e-06, "loss": 0.4086, "step": 8629 }, { "epoch": 0.8773891825945507, "grad_norm": 0.38040587306022644, "learning_rate": 9.827463909793045e-06, "loss": 0.4049, "step": 8630 }, { "epoch": 0.8774908499389996, "grad_norm": 0.3352098762989044, "learning_rate": 9.82737147474345e-06, "loss": 0.3884, "step": 8631 }, { "epoch": 0.8775925172834486, "grad_norm": 0.3163328468799591, "learning_rate": 9.827279015374706e-06, "loss": 0.434, "step": 8632 }, { "epoch": 0.8776941846278975, "grad_norm": 0.3194078505039215, "learning_rate": 9.827186531687277e-06, "loss": 0.3866, "step": 8633 }, { "epoch": 0.8777958519723464, "grad_norm": 0.3147849440574646, "learning_rate": 9.82709402368163e-06, "loss": 0.3753, "step": 8634 }, { "epoch": 0.8778975193167955, "grad_norm": 0.3040098249912262, "learning_rate": 9.827001491358228e-06, "loss": 0.3867, "step": 8635 }, { "epoch": 0.8779991866612444, "grad_norm": 0.34242939949035645, "learning_rate": 9.826908934717541e-06, "loss": 0.4066, "step": 8636 }, { "epoch": 0.8781008540056934, "grad_norm": 0.30325642228126526, "learning_rate": 9.826816353760035e-06, "loss": 0.4016, "step": 8637 }, { "epoch": 0.8782025213501423, "grad_norm": 0.3274676203727722, "learning_rate": 9.826723748486173e-06, "loss": 0.3621, "step": 8638 }, { "epoch": 0.8783041886945913, "grad_norm": 0.34347259998321533, "learning_rate": 9.826631118896426e-06, "loss": 0.375, "step": 8639 }, { "epoch": 0.8784058560390403, "grad_norm": 0.32800814509391785, "learning_rate": 9.826538464991258e-06, "loss": 0.3537, "step": 8640 }, { "epoch": 0.8785075233834893, "grad_norm": 0.3696078360080719, "learning_rate": 9.826445786771135e-06, "loss": 0.4367, "step": 8641 }, { "epoch": 0.8786091907279382, "grad_norm": 0.3239368200302124, "learning_rate": 9.826353084236525e-06, "loss": 0.4112, "step": 8642 }, { "epoch": 0.8787108580723871, "grad_norm": 0.3212175965309143, "learning_rate": 9.826260357387895e-06, "loss": 0.3659, "step": 8643 }, { "epoch": 0.8788125254168361, "grad_norm": 0.3404349088668823, "learning_rate": 9.826167606225714e-06, "loss": 0.3532, "step": 8644 }, { "epoch": 0.8789141927612851, "grad_norm": 0.31462162733078003, "learning_rate": 9.826074830750446e-06, "loss": 0.4073, "step": 8645 }, { "epoch": 0.8790158601057341, "grad_norm": 0.3375280499458313, "learning_rate": 9.82598203096256e-06, "loss": 0.3858, "step": 8646 }, { "epoch": 0.879117527450183, "grad_norm": 0.35096868872642517, "learning_rate": 9.825889206862524e-06, "loss": 0.3944, "step": 8647 }, { "epoch": 0.879219194794632, "grad_norm": 0.30843961238861084, "learning_rate": 9.825796358450805e-06, "loss": 0.3889, "step": 8648 }, { "epoch": 0.8793208621390809, "grad_norm": 0.31694263219833374, "learning_rate": 9.82570348572787e-06, "loss": 0.4245, "step": 8649 }, { "epoch": 0.8794225294835298, "grad_norm": 0.3403390049934387, "learning_rate": 9.825610588694187e-06, "loss": 0.3958, "step": 8650 }, { "epoch": 0.8795241968279789, "grad_norm": 0.29213714599609375, "learning_rate": 9.825517667350226e-06, "loss": 0.3504, "step": 8651 }, { "epoch": 0.8796258641724278, "grad_norm": 0.33627545833587646, "learning_rate": 9.825424721696454e-06, "loss": 0.4174, "step": 8652 }, { "epoch": 0.8797275315168768, "grad_norm": 0.3322984576225281, "learning_rate": 9.825331751733339e-06, "loss": 0.3832, "step": 8653 }, { "epoch": 0.8798291988613257, "grad_norm": 0.3036121428012848, "learning_rate": 9.825238757461347e-06, "loss": 0.3838, "step": 8654 }, { "epoch": 0.8799308662057747, "grad_norm": 0.3105931282043457, "learning_rate": 9.82514573888095e-06, "loss": 0.362, "step": 8655 }, { "epoch": 0.8800325335502237, "grad_norm": 0.3365042507648468, "learning_rate": 9.825052695992616e-06, "loss": 0.3954, "step": 8656 }, { "epoch": 0.8801342008946726, "grad_norm": 0.3028351664543152, "learning_rate": 9.824959628796814e-06, "loss": 0.3846, "step": 8657 }, { "epoch": 0.8802358682391216, "grad_norm": 0.31034141778945923, "learning_rate": 9.824866537294008e-06, "loss": 0.4147, "step": 8658 }, { "epoch": 0.8803375355835705, "grad_norm": 0.3105849325656891, "learning_rate": 9.824773421484674e-06, "loss": 0.3887, "step": 8659 }, { "epoch": 0.8804392029280195, "grad_norm": 0.3047090172767639, "learning_rate": 9.82468028136928e-06, "loss": 0.3645, "step": 8660 }, { "epoch": 0.8805408702724685, "grad_norm": 0.32362839579582214, "learning_rate": 9.82458711694829e-06, "loss": 0.3855, "step": 8661 }, { "epoch": 0.8806425376169175, "grad_norm": 0.3196902573108673, "learning_rate": 9.824493928222178e-06, "loss": 0.3791, "step": 8662 }, { "epoch": 0.8807442049613664, "grad_norm": 0.3386913537979126, "learning_rate": 9.82440071519141e-06, "loss": 0.4087, "step": 8663 }, { "epoch": 0.8808458723058153, "grad_norm": 0.33066028356552124, "learning_rate": 9.82430747785646e-06, "loss": 0.3972, "step": 8664 }, { "epoch": 0.8809475396502643, "grad_norm": 0.31780222058296204, "learning_rate": 9.824214216217796e-06, "loss": 0.3843, "step": 8665 }, { "epoch": 0.8810492069947133, "grad_norm": 0.29101061820983887, "learning_rate": 9.824120930275885e-06, "loss": 0.403, "step": 8666 }, { "epoch": 0.8811508743391623, "grad_norm": 0.3338131606578827, "learning_rate": 9.8240276200312e-06, "loss": 0.3656, "step": 8667 }, { "epoch": 0.8812525416836112, "grad_norm": 0.3308289349079132, "learning_rate": 9.82393428548421e-06, "loss": 0.3694, "step": 8668 }, { "epoch": 0.8813542090280602, "grad_norm": 0.32944440841674805, "learning_rate": 9.823840926635386e-06, "loss": 0.4023, "step": 8669 }, { "epoch": 0.8814558763725091, "grad_norm": 0.30790361762046814, "learning_rate": 9.823747543485198e-06, "loss": 0.3794, "step": 8670 }, { "epoch": 0.8815575437169582, "grad_norm": 0.30244171619415283, "learning_rate": 9.823654136034116e-06, "loss": 0.3796, "step": 8671 }, { "epoch": 0.8816592110614071, "grad_norm": 0.3329784870147705, "learning_rate": 9.82356070428261e-06, "loss": 0.3742, "step": 8672 }, { "epoch": 0.881760878405856, "grad_norm": 0.3090263903141022, "learning_rate": 9.823467248231152e-06, "loss": 0.3894, "step": 8673 }, { "epoch": 0.881862545750305, "grad_norm": 0.3282395601272583, "learning_rate": 9.823373767880211e-06, "loss": 0.3809, "step": 8674 }, { "epoch": 0.8819642130947539, "grad_norm": 0.31691479682922363, "learning_rate": 9.823280263230259e-06, "loss": 0.3708, "step": 8675 }, { "epoch": 0.882065880439203, "grad_norm": 0.3321987986564636, "learning_rate": 9.823186734281769e-06, "loss": 0.4131, "step": 8676 }, { "epoch": 0.8821675477836519, "grad_norm": 0.29918646812438965, "learning_rate": 9.82309318103521e-06, "loss": 0.3745, "step": 8677 }, { "epoch": 0.8822692151281009, "grad_norm": 0.3080039620399475, "learning_rate": 9.822999603491054e-06, "loss": 0.3736, "step": 8678 }, { "epoch": 0.8823708824725498, "grad_norm": 0.3531706631183624, "learning_rate": 9.822906001649771e-06, "loss": 0.4096, "step": 8679 }, { "epoch": 0.8824725498169987, "grad_norm": 0.3488772511482239, "learning_rate": 9.822812375511835e-06, "loss": 0.3955, "step": 8680 }, { "epoch": 0.8825742171614478, "grad_norm": 0.29999011754989624, "learning_rate": 9.822718725077715e-06, "loss": 0.3945, "step": 8681 }, { "epoch": 0.8826758845058967, "grad_norm": 0.36184000968933105, "learning_rate": 9.822625050347886e-06, "loss": 0.3474, "step": 8682 }, { "epoch": 0.8827775518503457, "grad_norm": 0.3733122646808624, "learning_rate": 9.822531351322815e-06, "loss": 0.4042, "step": 8683 }, { "epoch": 0.8828792191947946, "grad_norm": 0.3377378582954407, "learning_rate": 9.822437628002979e-06, "loss": 0.4088, "step": 8684 }, { "epoch": 0.8829808865392436, "grad_norm": 0.3276716470718384, "learning_rate": 9.822343880388849e-06, "loss": 0.3972, "step": 8685 }, { "epoch": 0.8830825538836926, "grad_norm": 0.3237382471561432, "learning_rate": 9.822250108480896e-06, "loss": 0.3871, "step": 8686 }, { "epoch": 0.8831842212281416, "grad_norm": 0.3358270525932312, "learning_rate": 9.82215631227959e-06, "loss": 0.3998, "step": 8687 }, { "epoch": 0.8832858885725905, "grad_norm": 0.35829782485961914, "learning_rate": 9.82206249178541e-06, "loss": 0.3725, "step": 8688 }, { "epoch": 0.8833875559170394, "grad_norm": 0.3305685520172119, "learning_rate": 9.821968646998823e-06, "loss": 0.3655, "step": 8689 }, { "epoch": 0.8834892232614884, "grad_norm": 0.3522397577762604, "learning_rate": 9.821874777920306e-06, "loss": 0.4078, "step": 8690 }, { "epoch": 0.8835908906059373, "grad_norm": 0.3039129376411438, "learning_rate": 9.821780884550327e-06, "loss": 0.3848, "step": 8691 }, { "epoch": 0.8836925579503864, "grad_norm": 0.33986368775367737, "learning_rate": 9.821686966889364e-06, "loss": 0.3623, "step": 8692 }, { "epoch": 0.8837942252948353, "grad_norm": 0.3924529254436493, "learning_rate": 9.821593024937887e-06, "loss": 0.3619, "step": 8693 }, { "epoch": 0.8838958926392843, "grad_norm": 0.34636473655700684, "learning_rate": 9.82149905869637e-06, "loss": 0.3927, "step": 8694 }, { "epoch": 0.8839975599837332, "grad_norm": 0.34896665811538696, "learning_rate": 9.821405068165286e-06, "loss": 0.4268, "step": 8695 }, { "epoch": 0.8840992273281821, "grad_norm": 0.3829537332057953, "learning_rate": 9.82131105334511e-06, "loss": 0.3913, "step": 8696 }, { "epoch": 0.8842008946726312, "grad_norm": 0.35056161880493164, "learning_rate": 9.821217014236315e-06, "loss": 0.3922, "step": 8697 }, { "epoch": 0.8843025620170801, "grad_norm": 0.341030478477478, "learning_rate": 9.821122950839373e-06, "loss": 0.3766, "step": 8698 }, { "epoch": 0.8844042293615291, "grad_norm": 0.3330562114715576, "learning_rate": 9.82102886315476e-06, "loss": 0.3834, "step": 8699 }, { "epoch": 0.884505896705978, "grad_norm": 0.3361847996711731, "learning_rate": 9.820934751182949e-06, "loss": 0.3612, "step": 8700 }, { "epoch": 0.884607564050427, "grad_norm": 0.2999083995819092, "learning_rate": 9.820840614924414e-06, "loss": 0.3919, "step": 8701 }, { "epoch": 0.884709231394876, "grad_norm": 0.3549557328224182, "learning_rate": 9.82074645437963e-06, "loss": 0.4024, "step": 8702 }, { "epoch": 0.884810898739325, "grad_norm": 0.33251604437828064, "learning_rate": 9.820652269549072e-06, "loss": 0.4112, "step": 8703 }, { "epoch": 0.8849125660837739, "grad_norm": 0.31116703152656555, "learning_rate": 9.820558060433211e-06, "loss": 0.3976, "step": 8704 }, { "epoch": 0.8850142334282228, "grad_norm": 0.3032827377319336, "learning_rate": 9.820463827032526e-06, "loss": 0.3666, "step": 8705 }, { "epoch": 0.8851159007726718, "grad_norm": 0.3671509325504303, "learning_rate": 9.82036956934749e-06, "loss": 0.3877, "step": 8706 }, { "epoch": 0.8852175681171208, "grad_norm": 0.31236377358436584, "learning_rate": 9.820275287378577e-06, "loss": 0.4182, "step": 8707 }, { "epoch": 0.8853192354615698, "grad_norm": 0.29589730501174927, "learning_rate": 9.820180981126262e-06, "loss": 0.4072, "step": 8708 }, { "epoch": 0.8854209028060187, "grad_norm": 0.33414191007614136, "learning_rate": 9.82008665059102e-06, "loss": 0.4199, "step": 8709 }, { "epoch": 0.8855225701504676, "grad_norm": 0.31161749362945557, "learning_rate": 9.81999229577333e-06, "loss": 0.4086, "step": 8710 }, { "epoch": 0.8856242374949166, "grad_norm": 0.3167436718940735, "learning_rate": 9.819897916673663e-06, "loss": 0.3805, "step": 8711 }, { "epoch": 0.8857259048393656, "grad_norm": 0.3387070298194885, "learning_rate": 9.819803513292495e-06, "loss": 0.387, "step": 8712 }, { "epoch": 0.8858275721838146, "grad_norm": 0.30193808674812317, "learning_rate": 9.819709085630301e-06, "loss": 0.3801, "step": 8713 }, { "epoch": 0.8859292395282635, "grad_norm": 0.33072522282600403, "learning_rate": 9.819614633687559e-06, "loss": 0.3823, "step": 8714 }, { "epoch": 0.8860309068727125, "grad_norm": 0.30555787682533264, "learning_rate": 9.819520157464744e-06, "loss": 0.3832, "step": 8715 }, { "epoch": 0.8861325742171614, "grad_norm": 0.3374650180339813, "learning_rate": 9.819425656962331e-06, "loss": 0.4016, "step": 8716 }, { "epoch": 0.8862342415616105, "grad_norm": 0.3076587915420532, "learning_rate": 9.819331132180798e-06, "loss": 0.3931, "step": 8717 }, { "epoch": 0.8863359089060594, "grad_norm": 0.38385868072509766, "learning_rate": 9.81923658312062e-06, "loss": 0.4242, "step": 8718 }, { "epoch": 0.8864375762505083, "grad_norm": 0.3085598945617676, "learning_rate": 9.819142009782272e-06, "loss": 0.3785, "step": 8719 }, { "epoch": 0.8865392435949573, "grad_norm": 0.3555978536605835, "learning_rate": 9.819047412166233e-06, "loss": 0.4327, "step": 8720 }, { "epoch": 0.8866409109394062, "grad_norm": 0.4168723225593567, "learning_rate": 9.818952790272977e-06, "loss": 0.3811, "step": 8721 }, { "epoch": 0.8867425782838553, "grad_norm": 0.32818740606307983, "learning_rate": 9.818858144102983e-06, "loss": 0.4144, "step": 8722 }, { "epoch": 0.8868442456283042, "grad_norm": 0.3149678409099579, "learning_rate": 9.818763473656727e-06, "loss": 0.3694, "step": 8723 }, { "epoch": 0.8869459129727532, "grad_norm": 0.3894604742527008, "learning_rate": 9.818668778934685e-06, "loss": 0.3998, "step": 8724 }, { "epoch": 0.8870475803172021, "grad_norm": 0.4032038748264313, "learning_rate": 9.818574059937335e-06, "loss": 0.3956, "step": 8725 }, { "epoch": 0.887149247661651, "grad_norm": 0.32108747959136963, "learning_rate": 9.818479316665155e-06, "loss": 0.3496, "step": 8726 }, { "epoch": 0.8872509150061001, "grad_norm": 0.37765586376190186, "learning_rate": 9.81838454911862e-06, "loss": 0.3853, "step": 8727 }, { "epoch": 0.887352582350549, "grad_norm": 0.36051106452941895, "learning_rate": 9.81828975729821e-06, "loss": 0.3689, "step": 8728 }, { "epoch": 0.887454249694998, "grad_norm": 0.34278231859207153, "learning_rate": 9.8181949412044e-06, "loss": 0.3807, "step": 8729 }, { "epoch": 0.8875559170394469, "grad_norm": 0.32236596941947937, "learning_rate": 9.818100100837668e-06, "loss": 0.3845, "step": 8730 }, { "epoch": 0.8876575843838959, "grad_norm": 0.33324894309043884, "learning_rate": 9.818005236198493e-06, "loss": 0.3862, "step": 8731 }, { "epoch": 0.8877592517283448, "grad_norm": 0.31003299355506897, "learning_rate": 9.817910347287354e-06, "loss": 0.3647, "step": 8732 }, { "epoch": 0.8878609190727939, "grad_norm": 0.3414062559604645, "learning_rate": 9.817815434104726e-06, "loss": 0.415, "step": 8733 }, { "epoch": 0.8879625864172428, "grad_norm": 0.2729800045490265, "learning_rate": 9.817720496651091e-06, "loss": 0.3696, "step": 8734 }, { "epoch": 0.8880642537616917, "grad_norm": 0.33771049976348877, "learning_rate": 9.817625534926923e-06, "loss": 0.4734, "step": 8735 }, { "epoch": 0.8881659211061407, "grad_norm": 0.3121461868286133, "learning_rate": 9.817530548932703e-06, "loss": 0.3865, "step": 8736 }, { "epoch": 0.8882675884505896, "grad_norm": 0.3338399827480316, "learning_rate": 9.81743553866891e-06, "loss": 0.4264, "step": 8737 }, { "epoch": 0.8883692557950387, "grad_norm": 0.32448798418045044, "learning_rate": 9.81734050413602e-06, "loss": 0.4019, "step": 8738 }, { "epoch": 0.8884709231394876, "grad_norm": 0.32728636264801025, "learning_rate": 9.817245445334514e-06, "loss": 0.3967, "step": 8739 }, { "epoch": 0.8885725904839366, "grad_norm": 0.29134368896484375, "learning_rate": 9.81715036226487e-06, "loss": 0.3707, "step": 8740 }, { "epoch": 0.8886742578283855, "grad_norm": 0.32515981793403625, "learning_rate": 9.817055254927567e-06, "loss": 0.3681, "step": 8741 }, { "epoch": 0.8887759251728344, "grad_norm": 0.3302648067474365, "learning_rate": 9.816960123323084e-06, "loss": 0.4242, "step": 8742 }, { "epoch": 0.8888775925172835, "grad_norm": 0.34882840514183044, "learning_rate": 9.816864967451902e-06, "loss": 0.4131, "step": 8743 }, { "epoch": 0.8889792598617324, "grad_norm": 0.32764819264411926, "learning_rate": 9.816769787314498e-06, "loss": 0.4063, "step": 8744 }, { "epoch": 0.8890809272061814, "grad_norm": 0.31645578145980835, "learning_rate": 9.816674582911351e-06, "loss": 0.3893, "step": 8745 }, { "epoch": 0.8891825945506303, "grad_norm": 0.3337680399417877, "learning_rate": 9.816579354242946e-06, "loss": 0.3768, "step": 8746 }, { "epoch": 0.8892842618950793, "grad_norm": 0.308470219373703, "learning_rate": 9.816484101309756e-06, "loss": 0.4066, "step": 8747 }, { "epoch": 0.8893859292395283, "grad_norm": 0.318411648273468, "learning_rate": 9.816388824112265e-06, "loss": 0.3789, "step": 8748 }, { "epoch": 0.8894875965839772, "grad_norm": 0.3098335862159729, "learning_rate": 9.81629352265095e-06, "loss": 0.3946, "step": 8749 }, { "epoch": 0.8895892639284262, "grad_norm": 0.3406359851360321, "learning_rate": 9.816198196926294e-06, "loss": 0.4117, "step": 8750 }, { "epoch": 0.8896909312728751, "grad_norm": 0.3160037696361542, "learning_rate": 9.816102846938775e-06, "loss": 0.4127, "step": 8751 }, { "epoch": 0.8897925986173241, "grad_norm": 0.3245948553085327, "learning_rate": 9.816007472688874e-06, "loss": 0.4005, "step": 8752 }, { "epoch": 0.8898942659617731, "grad_norm": 0.3606381118297577, "learning_rate": 9.815912074177074e-06, "loss": 0.3685, "step": 8753 }, { "epoch": 0.8899959333062221, "grad_norm": 0.3218511939048767, "learning_rate": 9.815816651403851e-06, "loss": 0.402, "step": 8754 }, { "epoch": 0.890097600650671, "grad_norm": 0.33647260069847107, "learning_rate": 9.81572120436969e-06, "loss": 0.4228, "step": 8755 }, { "epoch": 0.89019926799512, "grad_norm": 0.31894001364707947, "learning_rate": 9.815625733075069e-06, "loss": 0.3781, "step": 8756 }, { "epoch": 0.8903009353395689, "grad_norm": 0.36874109506607056, "learning_rate": 9.81553023752047e-06, "loss": 0.4062, "step": 8757 }, { "epoch": 0.8904026026840179, "grad_norm": 0.33313459157943726, "learning_rate": 9.815434717706374e-06, "loss": 0.4039, "step": 8758 }, { "epoch": 0.8905042700284669, "grad_norm": 0.3227536082267761, "learning_rate": 9.815339173633263e-06, "loss": 0.376, "step": 8759 }, { "epoch": 0.8906059373729158, "grad_norm": 0.31749269366264343, "learning_rate": 9.815243605301616e-06, "loss": 0.3712, "step": 8760 }, { "epoch": 0.8907076047173648, "grad_norm": 0.3918147087097168, "learning_rate": 9.815148012711916e-06, "loss": 0.4047, "step": 8761 }, { "epoch": 0.8908092720618137, "grad_norm": 0.32127004861831665, "learning_rate": 9.815052395864647e-06, "loss": 0.3631, "step": 8762 }, { "epoch": 0.8909109394062628, "grad_norm": 0.34255650639533997, "learning_rate": 9.814956754760285e-06, "loss": 0.4055, "step": 8763 }, { "epoch": 0.8910126067507117, "grad_norm": 0.3327397406101227, "learning_rate": 9.814861089399316e-06, "loss": 0.4119, "step": 8764 }, { "epoch": 0.8911142740951606, "grad_norm": 0.37151220440864563, "learning_rate": 9.814765399782223e-06, "loss": 0.3962, "step": 8765 }, { "epoch": 0.8912159414396096, "grad_norm": 0.3247855007648468, "learning_rate": 9.814669685909486e-06, "loss": 0.3893, "step": 8766 }, { "epoch": 0.8913176087840585, "grad_norm": 0.3213185966014862, "learning_rate": 9.814573947781586e-06, "loss": 0.3714, "step": 8767 }, { "epoch": 0.8914192761285076, "grad_norm": 0.3103685677051544, "learning_rate": 9.814478185399006e-06, "loss": 0.4124, "step": 8768 }, { "epoch": 0.8915209434729565, "grad_norm": 0.32105016708374023, "learning_rate": 9.81438239876223e-06, "loss": 0.3898, "step": 8769 }, { "epoch": 0.8916226108174055, "grad_norm": 0.3564230799674988, "learning_rate": 9.81428658787174e-06, "loss": 0.3655, "step": 8770 }, { "epoch": 0.8917242781618544, "grad_norm": 0.300002783536911, "learning_rate": 9.814190752728017e-06, "loss": 0.3822, "step": 8771 }, { "epoch": 0.8918259455063033, "grad_norm": 0.32472673058509827, "learning_rate": 9.814094893331545e-06, "loss": 0.3784, "step": 8772 }, { "epoch": 0.8919276128507524, "grad_norm": 0.29767948389053345, "learning_rate": 9.813999009682808e-06, "loss": 0.3969, "step": 8773 }, { "epoch": 0.8920292801952013, "grad_norm": 0.2995266020298004, "learning_rate": 9.813903101782287e-06, "loss": 0.3692, "step": 8774 }, { "epoch": 0.8921309475396503, "grad_norm": 0.30787020921707153, "learning_rate": 9.813807169630465e-06, "loss": 0.3927, "step": 8775 }, { "epoch": 0.8922326148840992, "grad_norm": 0.3173048198223114, "learning_rate": 9.81371121322783e-06, "loss": 0.3767, "step": 8776 }, { "epoch": 0.8923342822285482, "grad_norm": 0.29068055748939514, "learning_rate": 9.813615232574859e-06, "loss": 0.4095, "step": 8777 }, { "epoch": 0.8924359495729971, "grad_norm": 0.30524930357933044, "learning_rate": 9.81351922767204e-06, "loss": 0.3912, "step": 8778 }, { "epoch": 0.8925376169174462, "grad_norm": 0.3014414310455322, "learning_rate": 9.813423198519853e-06, "loss": 0.39, "step": 8779 }, { "epoch": 0.8926392842618951, "grad_norm": 0.3375043570995331, "learning_rate": 9.813327145118786e-06, "loss": 0.401, "step": 8780 }, { "epoch": 0.892740951606344, "grad_norm": 0.31829380989074707, "learning_rate": 9.813231067469318e-06, "loss": 0.4018, "step": 8781 }, { "epoch": 0.892842618950793, "grad_norm": 0.3082747161388397, "learning_rate": 9.813134965571938e-06, "loss": 0.4153, "step": 8782 }, { "epoch": 0.8929442862952419, "grad_norm": 0.32163599133491516, "learning_rate": 9.813038839427126e-06, "loss": 0.3823, "step": 8783 }, { "epoch": 0.893045953639691, "grad_norm": 0.3173368573188782, "learning_rate": 9.81294268903537e-06, "loss": 0.3613, "step": 8784 }, { "epoch": 0.8931476209841399, "grad_norm": 0.3229883909225464, "learning_rate": 9.812846514397152e-06, "loss": 0.3938, "step": 8785 }, { "epoch": 0.8932492883285889, "grad_norm": 0.3593546748161316, "learning_rate": 9.812750315512956e-06, "loss": 0.3744, "step": 8786 }, { "epoch": 0.8933509556730378, "grad_norm": 0.3753787875175476, "learning_rate": 9.812654092383269e-06, "loss": 0.4124, "step": 8787 }, { "epoch": 0.8934526230174867, "grad_norm": 0.30754607915878296, "learning_rate": 9.812557845008573e-06, "loss": 0.3764, "step": 8788 }, { "epoch": 0.8935542903619358, "grad_norm": 0.3531032204627991, "learning_rate": 9.812461573389353e-06, "loss": 0.3992, "step": 8789 }, { "epoch": 0.8936559577063847, "grad_norm": 0.3619566261768341, "learning_rate": 9.812365277526099e-06, "loss": 0.3562, "step": 8790 }, { "epoch": 0.8937576250508337, "grad_norm": 0.29489582777023315, "learning_rate": 9.812268957419289e-06, "loss": 0.4142, "step": 8791 }, { "epoch": 0.8938592923952826, "grad_norm": 0.3551121950149536, "learning_rate": 9.812172613069412e-06, "loss": 0.3946, "step": 8792 }, { "epoch": 0.8939609597397316, "grad_norm": 0.32642054557800293, "learning_rate": 9.812076244476954e-06, "loss": 0.3898, "step": 8793 }, { "epoch": 0.8940626270841806, "grad_norm": 0.34118708968162537, "learning_rate": 9.811979851642398e-06, "loss": 0.3896, "step": 8794 }, { "epoch": 0.8941642944286295, "grad_norm": 0.3377388119697571, "learning_rate": 9.811883434566234e-06, "loss": 0.3563, "step": 8795 }, { "epoch": 0.8942659617730785, "grad_norm": 0.3426927328109741, "learning_rate": 9.811786993248943e-06, "loss": 0.379, "step": 8796 }, { "epoch": 0.8943676291175274, "grad_norm": 0.3400607705116272, "learning_rate": 9.811690527691012e-06, "loss": 0.3804, "step": 8797 }, { "epoch": 0.8944692964619764, "grad_norm": 0.344777375459671, "learning_rate": 9.811594037892928e-06, "loss": 0.3735, "step": 8798 }, { "epoch": 0.8945709638064254, "grad_norm": 0.3933458924293518, "learning_rate": 9.811497523855178e-06, "loss": 0.3809, "step": 8799 }, { "epoch": 0.8946726311508744, "grad_norm": 0.3141176104545593, "learning_rate": 9.811400985578244e-06, "loss": 0.4196, "step": 8800 }, { "epoch": 0.8947742984953233, "grad_norm": 0.33777448534965515, "learning_rate": 9.811304423062618e-06, "loss": 0.3928, "step": 8801 }, { "epoch": 0.8948759658397722, "grad_norm": 0.3316791355609894, "learning_rate": 9.811207836308781e-06, "loss": 0.3586, "step": 8802 }, { "epoch": 0.8949776331842212, "grad_norm": 0.29190266132354736, "learning_rate": 9.811111225317225e-06, "loss": 0.3858, "step": 8803 }, { "epoch": 0.8950793005286702, "grad_norm": 0.34140756726264954, "learning_rate": 9.811014590088433e-06, "loss": 0.4443, "step": 8804 }, { "epoch": 0.8951809678731192, "grad_norm": 0.31442713737487793, "learning_rate": 9.810917930622891e-06, "loss": 0.4047, "step": 8805 }, { "epoch": 0.8952826352175681, "grad_norm": 0.3226609230041504, "learning_rate": 9.81082124692109e-06, "loss": 0.3534, "step": 8806 }, { "epoch": 0.8953843025620171, "grad_norm": 0.3024536967277527, "learning_rate": 9.810724538983513e-06, "loss": 0.3651, "step": 8807 }, { "epoch": 0.895485969906466, "grad_norm": 0.3002566695213318, "learning_rate": 9.81062780681065e-06, "loss": 0.3836, "step": 8808 }, { "epoch": 0.8955876372509151, "grad_norm": 0.3362593650817871, "learning_rate": 9.810531050402987e-06, "loss": 0.3893, "step": 8809 }, { "epoch": 0.895689304595364, "grad_norm": 0.33951041102409363, "learning_rate": 9.810434269761011e-06, "loss": 0.3745, "step": 8810 }, { "epoch": 0.8957909719398129, "grad_norm": 0.3125609755516052, "learning_rate": 9.810337464885212e-06, "loss": 0.3885, "step": 8811 }, { "epoch": 0.8958926392842619, "grad_norm": 0.37021633982658386, "learning_rate": 9.810240635776075e-06, "loss": 0.4286, "step": 8812 }, { "epoch": 0.8959943066287108, "grad_norm": 0.3397156000137329, "learning_rate": 9.810143782434088e-06, "loss": 0.3976, "step": 8813 }, { "epoch": 0.8960959739731599, "grad_norm": 0.3178732991218567, "learning_rate": 9.810046904859741e-06, "loss": 0.4009, "step": 8814 }, { "epoch": 0.8961976413176088, "grad_norm": 0.3374946415424347, "learning_rate": 9.80995000305352e-06, "loss": 0.3799, "step": 8815 }, { "epoch": 0.8962993086620578, "grad_norm": 0.33806970715522766, "learning_rate": 9.809853077015914e-06, "loss": 0.4263, "step": 8816 }, { "epoch": 0.8964009760065067, "grad_norm": 0.3338205814361572, "learning_rate": 9.809756126747412e-06, "loss": 0.38, "step": 8817 }, { "epoch": 0.8965026433509556, "grad_norm": 0.34196534752845764, "learning_rate": 9.8096591522485e-06, "loss": 0.4079, "step": 8818 }, { "epoch": 0.8966043106954046, "grad_norm": 0.30856063961982727, "learning_rate": 9.80956215351967e-06, "loss": 0.3841, "step": 8819 }, { "epoch": 0.8967059780398536, "grad_norm": 0.3446991741657257, "learning_rate": 9.809465130561407e-06, "loss": 0.3874, "step": 8820 }, { "epoch": 0.8968076453843026, "grad_norm": 0.36282768845558167, "learning_rate": 9.809368083374204e-06, "loss": 0.3722, "step": 8821 }, { "epoch": 0.8969093127287515, "grad_norm": 0.327900767326355, "learning_rate": 9.809271011958545e-06, "loss": 0.3663, "step": 8822 }, { "epoch": 0.8970109800732005, "grad_norm": 0.31447771191596985, "learning_rate": 9.809173916314922e-06, "loss": 0.3789, "step": 8823 }, { "epoch": 0.8971126474176494, "grad_norm": 0.3688371181488037, "learning_rate": 9.809076796443825e-06, "loss": 0.3905, "step": 8824 }, { "epoch": 0.8972143147620985, "grad_norm": 0.31556347012519836, "learning_rate": 9.808979652345741e-06, "loss": 0.3771, "step": 8825 }, { "epoch": 0.8973159821065474, "grad_norm": 0.33874577283859253, "learning_rate": 9.808882484021161e-06, "loss": 0.3905, "step": 8826 }, { "epoch": 0.8974176494509963, "grad_norm": 0.3075782060623169, "learning_rate": 9.808785291470574e-06, "loss": 0.387, "step": 8827 }, { "epoch": 0.8975193167954453, "grad_norm": 0.3107069730758667, "learning_rate": 9.808688074694467e-06, "loss": 0.3975, "step": 8828 }, { "epoch": 0.8976209841398942, "grad_norm": 0.3107544779777527, "learning_rate": 9.808590833693333e-06, "loss": 0.3602, "step": 8829 }, { "epoch": 0.8977226514843433, "grad_norm": 0.3323974609375, "learning_rate": 9.808493568467664e-06, "loss": 0.3814, "step": 8830 }, { "epoch": 0.8978243188287922, "grad_norm": 0.3295201361179352, "learning_rate": 9.808396279017943e-06, "loss": 0.3822, "step": 8831 }, { "epoch": 0.8979259861732412, "grad_norm": 0.33557194471359253, "learning_rate": 9.808298965344666e-06, "loss": 0.3995, "step": 8832 }, { "epoch": 0.8980276535176901, "grad_norm": 0.32805299758911133, "learning_rate": 9.808201627448322e-06, "loss": 0.4084, "step": 8833 }, { "epoch": 0.898129320862139, "grad_norm": 0.31081917881965637, "learning_rate": 9.8081042653294e-06, "loss": 0.4225, "step": 8834 }, { "epoch": 0.8982309882065881, "grad_norm": 0.29466843605041504, "learning_rate": 9.808006878988391e-06, "loss": 0.3992, "step": 8835 }, { "epoch": 0.898332655551037, "grad_norm": 0.325441837310791, "learning_rate": 9.807909468425787e-06, "loss": 0.4154, "step": 8836 }, { "epoch": 0.898434322895486, "grad_norm": 0.33883917331695557, "learning_rate": 9.807812033642078e-06, "loss": 0.3844, "step": 8837 }, { "epoch": 0.8985359902399349, "grad_norm": 0.3445585072040558, "learning_rate": 9.807714574637754e-06, "loss": 0.4511, "step": 8838 }, { "epoch": 0.8986376575843839, "grad_norm": 0.2867007255554199, "learning_rate": 9.807617091413306e-06, "loss": 0.3499, "step": 8839 }, { "epoch": 0.8987393249288329, "grad_norm": 0.3481675088405609, "learning_rate": 9.807519583969224e-06, "loss": 0.3981, "step": 8840 }, { "epoch": 0.8988409922732818, "grad_norm": 0.3318790793418884, "learning_rate": 9.807422052306004e-06, "loss": 0.3921, "step": 8841 }, { "epoch": 0.8989426596177308, "grad_norm": 0.33503222465515137, "learning_rate": 9.807324496424132e-06, "loss": 0.4387, "step": 8842 }, { "epoch": 0.8990443269621797, "grad_norm": 0.326415091753006, "learning_rate": 9.807226916324102e-06, "loss": 0.3687, "step": 8843 }, { "epoch": 0.8991459943066287, "grad_norm": 0.32877811789512634, "learning_rate": 9.807129312006406e-06, "loss": 0.4193, "step": 8844 }, { "epoch": 0.8992476616510777, "grad_norm": 0.3238472044467926, "learning_rate": 9.807031683471533e-06, "loss": 0.3919, "step": 8845 }, { "epoch": 0.8993493289955267, "grad_norm": 0.37354519963264465, "learning_rate": 9.80693403071998e-06, "loss": 0.3898, "step": 8846 }, { "epoch": 0.8994509963399756, "grad_norm": 0.3431360423564911, "learning_rate": 9.806836353752232e-06, "loss": 0.3945, "step": 8847 }, { "epoch": 0.8995526636844245, "grad_norm": 0.3240785300731659, "learning_rate": 9.806738652568787e-06, "loss": 0.3762, "step": 8848 }, { "epoch": 0.8996543310288735, "grad_norm": 0.3380415439605713, "learning_rate": 9.806640927170134e-06, "loss": 0.3842, "step": 8849 }, { "epoch": 0.8997559983733225, "grad_norm": 0.35157737135887146, "learning_rate": 9.806543177556767e-06, "loss": 0.4142, "step": 8850 }, { "epoch": 0.8998576657177715, "grad_norm": 0.3333185911178589, "learning_rate": 9.806445403729177e-06, "loss": 0.3707, "step": 8851 }, { "epoch": 0.8999593330622204, "grad_norm": 0.3440156877040863, "learning_rate": 9.806347605687856e-06, "loss": 0.4153, "step": 8852 }, { "epoch": 0.9000610004066694, "grad_norm": 0.31844696402549744, "learning_rate": 9.8062497834333e-06, "loss": 0.3923, "step": 8853 }, { "epoch": 0.9001626677511183, "grad_norm": 0.31132063269615173, "learning_rate": 9.806151936966e-06, "loss": 0.3721, "step": 8854 }, { "epoch": 0.9002643350955674, "grad_norm": 0.3241539001464844, "learning_rate": 9.806054066286447e-06, "loss": 0.3828, "step": 8855 }, { "epoch": 0.9003660024400163, "grad_norm": 0.307618111371994, "learning_rate": 9.805956171395137e-06, "loss": 0.3447, "step": 8856 }, { "epoch": 0.9004676697844652, "grad_norm": 0.34900522232055664, "learning_rate": 9.80585825229256e-06, "loss": 0.3811, "step": 8857 }, { "epoch": 0.9005693371289142, "grad_norm": 0.3464067578315735, "learning_rate": 9.805760308979212e-06, "loss": 0.3465, "step": 8858 }, { "epoch": 0.9006710044733631, "grad_norm": 0.3590549826622009, "learning_rate": 9.805662341455587e-06, "loss": 0.4226, "step": 8859 }, { "epoch": 0.9007726718178121, "grad_norm": 0.35172927379608154, "learning_rate": 9.805564349722177e-06, "loss": 0.3789, "step": 8860 }, { "epoch": 0.9008743391622611, "grad_norm": 0.3922243118286133, "learning_rate": 9.805466333779474e-06, "loss": 0.3819, "step": 8861 }, { "epoch": 0.9009760065067101, "grad_norm": 0.33112818002700806, "learning_rate": 9.805368293627974e-06, "loss": 0.389, "step": 8862 }, { "epoch": 0.901077673851159, "grad_norm": 0.3631896376609802, "learning_rate": 9.805270229268172e-06, "loss": 0.4156, "step": 8863 }, { "epoch": 0.9011793411956079, "grad_norm": 0.3015486001968384, "learning_rate": 9.805172140700559e-06, "loss": 0.3775, "step": 8864 }, { "epoch": 0.9012810085400569, "grad_norm": 0.38533923029899597, "learning_rate": 9.805074027925632e-06, "loss": 0.403, "step": 8865 }, { "epoch": 0.9013826758845059, "grad_norm": 0.37850990891456604, "learning_rate": 9.804975890943882e-06, "loss": 0.4008, "step": 8866 }, { "epoch": 0.9014843432289549, "grad_norm": 0.3034813106060028, "learning_rate": 9.804877729755807e-06, "loss": 0.3564, "step": 8867 }, { "epoch": 0.9015860105734038, "grad_norm": 0.3547946810722351, "learning_rate": 9.804779544361899e-06, "loss": 0.4165, "step": 8868 }, { "epoch": 0.9016876779178528, "grad_norm": 0.3316470980644226, "learning_rate": 9.804681334762653e-06, "loss": 0.3869, "step": 8869 }, { "epoch": 0.9017893452623017, "grad_norm": 0.3226429224014282, "learning_rate": 9.804583100958566e-06, "loss": 0.4041, "step": 8870 }, { "epoch": 0.9018910126067508, "grad_norm": 0.32847124338150024, "learning_rate": 9.80448484295013e-06, "loss": 0.3833, "step": 8871 }, { "epoch": 0.9019926799511997, "grad_norm": 0.3160274624824524, "learning_rate": 9.80438656073784e-06, "loss": 0.3532, "step": 8872 }, { "epoch": 0.9020943472956486, "grad_norm": 0.3326874077320099, "learning_rate": 9.804288254322194e-06, "loss": 0.4018, "step": 8873 }, { "epoch": 0.9021960146400976, "grad_norm": 0.32808127999305725, "learning_rate": 9.804189923703685e-06, "loss": 0.3829, "step": 8874 }, { "epoch": 0.9022976819845465, "grad_norm": 0.3158084750175476, "learning_rate": 9.804091568882808e-06, "loss": 0.4268, "step": 8875 }, { "epoch": 0.9023993493289956, "grad_norm": 0.3214947581291199, "learning_rate": 9.803993189860059e-06, "loss": 0.381, "step": 8876 }, { "epoch": 0.9025010166734445, "grad_norm": 0.31853774189949036, "learning_rate": 9.803894786635935e-06, "loss": 0.4178, "step": 8877 }, { "epoch": 0.9026026840178935, "grad_norm": 0.32362353801727295, "learning_rate": 9.803796359210932e-06, "loss": 0.4014, "step": 8878 }, { "epoch": 0.9027043513623424, "grad_norm": 0.33149629831314087, "learning_rate": 9.803697907585541e-06, "loss": 0.4042, "step": 8879 }, { "epoch": 0.9028060187067913, "grad_norm": 0.302864134311676, "learning_rate": 9.803599431760264e-06, "loss": 0.39, "step": 8880 }, { "epoch": 0.9029076860512404, "grad_norm": 0.30410128831863403, "learning_rate": 9.803500931735593e-06, "loss": 0.3984, "step": 8881 }, { "epoch": 0.9030093533956893, "grad_norm": 0.37358349561691284, "learning_rate": 9.803402407512027e-06, "loss": 0.4308, "step": 8882 }, { "epoch": 0.9031110207401383, "grad_norm": 0.3141413629055023, "learning_rate": 9.803303859090061e-06, "loss": 0.3818, "step": 8883 }, { "epoch": 0.9032126880845872, "grad_norm": 0.2995740473270416, "learning_rate": 9.803205286470192e-06, "loss": 0.3765, "step": 8884 }, { "epoch": 0.9033143554290362, "grad_norm": 0.30740368366241455, "learning_rate": 9.803106689652915e-06, "loss": 0.38, "step": 8885 }, { "epoch": 0.9034160227734852, "grad_norm": 0.3382231891155243, "learning_rate": 9.803008068638728e-06, "loss": 0.3783, "step": 8886 }, { "epoch": 0.9035176901179341, "grad_norm": 0.32229775190353394, "learning_rate": 9.802909423428128e-06, "loss": 0.3735, "step": 8887 }, { "epoch": 0.9036193574623831, "grad_norm": 0.3175095319747925, "learning_rate": 9.802810754021612e-06, "loss": 0.4182, "step": 8888 }, { "epoch": 0.903721024806832, "grad_norm": 0.36159855127334595, "learning_rate": 9.802712060419676e-06, "loss": 0.3783, "step": 8889 }, { "epoch": 0.903822692151281, "grad_norm": 0.362691730260849, "learning_rate": 9.80261334262282e-06, "loss": 0.3887, "step": 8890 }, { "epoch": 0.90392435949573, "grad_norm": 0.30324697494506836, "learning_rate": 9.802514600631537e-06, "loss": 0.3715, "step": 8891 }, { "epoch": 0.904026026840179, "grad_norm": 0.31702733039855957, "learning_rate": 9.802415834446328e-06, "loss": 0.3765, "step": 8892 }, { "epoch": 0.9041276941846279, "grad_norm": 0.331317663192749, "learning_rate": 9.802317044067688e-06, "loss": 0.4083, "step": 8893 }, { "epoch": 0.9042293615290768, "grad_norm": 0.32817569375038147, "learning_rate": 9.802218229496116e-06, "loss": 0.3695, "step": 8894 }, { "epoch": 0.9043310288735258, "grad_norm": 0.30855312943458557, "learning_rate": 9.802119390732112e-06, "loss": 0.366, "step": 8895 }, { "epoch": 0.9044326962179748, "grad_norm": 0.33710673451423645, "learning_rate": 9.80202052777617e-06, "loss": 0.3893, "step": 8896 }, { "epoch": 0.9045343635624238, "grad_norm": 0.31571879982948303, "learning_rate": 9.801921640628788e-06, "loss": 0.4082, "step": 8897 }, { "epoch": 0.9046360309068727, "grad_norm": 0.3102761507034302, "learning_rate": 9.801822729290468e-06, "loss": 0.389, "step": 8898 }, { "epoch": 0.9047376982513217, "grad_norm": 0.32220590114593506, "learning_rate": 9.801723793761706e-06, "loss": 0.3975, "step": 8899 }, { "epoch": 0.9048393655957706, "grad_norm": 0.31822386384010315, "learning_rate": 9.801624834043001e-06, "loss": 0.4037, "step": 8900 }, { "epoch": 0.9049410329402195, "grad_norm": 0.32979848980903625, "learning_rate": 9.801525850134849e-06, "loss": 0.3956, "step": 8901 }, { "epoch": 0.9050427002846686, "grad_norm": 0.29527735710144043, "learning_rate": 9.801426842037754e-06, "loss": 0.4061, "step": 8902 }, { "epoch": 0.9051443676291175, "grad_norm": 0.3054259717464447, "learning_rate": 9.801327809752211e-06, "loss": 0.3925, "step": 8903 }, { "epoch": 0.9052460349735665, "grad_norm": 0.33931922912597656, "learning_rate": 9.801228753278718e-06, "loss": 0.3906, "step": 8904 }, { "epoch": 0.9053477023180154, "grad_norm": 0.30364295840263367, "learning_rate": 9.801129672617775e-06, "loss": 0.352, "step": 8905 }, { "epoch": 0.9054493696624644, "grad_norm": 0.30324864387512207, "learning_rate": 9.801030567769883e-06, "loss": 0.3625, "step": 8906 }, { "epoch": 0.9055510370069134, "grad_norm": 0.31896859407424927, "learning_rate": 9.80093143873554e-06, "loss": 0.3771, "step": 8907 }, { "epoch": 0.9056527043513624, "grad_norm": 0.3731214702129364, "learning_rate": 9.800832285515244e-06, "loss": 0.3909, "step": 8908 }, { "epoch": 0.9057543716958113, "grad_norm": 0.3276749849319458, "learning_rate": 9.800733108109496e-06, "loss": 0.3772, "step": 8909 }, { "epoch": 0.9058560390402602, "grad_norm": 0.34407973289489746, "learning_rate": 9.800633906518796e-06, "loss": 0.4012, "step": 8910 }, { "epoch": 0.9059577063847092, "grad_norm": 0.36742356419563293, "learning_rate": 9.800534680743642e-06, "loss": 0.4075, "step": 8911 }, { "epoch": 0.9060593737291582, "grad_norm": 0.30091604590415955, "learning_rate": 9.800435430784537e-06, "loss": 0.3769, "step": 8912 }, { "epoch": 0.9061610410736072, "grad_norm": 0.31867358088493347, "learning_rate": 9.80033615664198e-06, "loss": 0.3598, "step": 8913 }, { "epoch": 0.9062627084180561, "grad_norm": 0.32671427726745605, "learning_rate": 9.800236858316468e-06, "loss": 0.3872, "step": 8914 }, { "epoch": 0.9063643757625051, "grad_norm": 0.32897505164146423, "learning_rate": 9.800137535808503e-06, "loss": 0.4315, "step": 8915 }, { "epoch": 0.906466043106954, "grad_norm": 0.32492595911026, "learning_rate": 9.800038189118586e-06, "loss": 0.3741, "step": 8916 }, { "epoch": 0.906567710451403, "grad_norm": 0.31125032901763916, "learning_rate": 9.799938818247218e-06, "loss": 0.4006, "step": 8917 }, { "epoch": 0.906669377795852, "grad_norm": 0.3018983006477356, "learning_rate": 9.799839423194898e-06, "loss": 0.395, "step": 8918 }, { "epoch": 0.9067710451403009, "grad_norm": 0.32039013504981995, "learning_rate": 9.799740003962129e-06, "loss": 0.3994, "step": 8919 }, { "epoch": 0.9068727124847499, "grad_norm": 0.33597058057785034, "learning_rate": 9.799640560549411e-06, "loss": 0.3764, "step": 8920 }, { "epoch": 0.9069743798291988, "grad_norm": 0.2999859154224396, "learning_rate": 9.799541092957243e-06, "loss": 0.389, "step": 8921 }, { "epoch": 0.9070760471736479, "grad_norm": 0.32498395442962646, "learning_rate": 9.799441601186127e-06, "loss": 0.3684, "step": 8922 }, { "epoch": 0.9071777145180968, "grad_norm": 0.32179078459739685, "learning_rate": 9.799342085236566e-06, "loss": 0.3818, "step": 8923 }, { "epoch": 0.9072793818625458, "grad_norm": 0.3214123845100403, "learning_rate": 9.79924254510906e-06, "loss": 0.3776, "step": 8924 }, { "epoch": 0.9073810492069947, "grad_norm": 0.30544736981391907, "learning_rate": 9.799142980804109e-06, "loss": 0.4241, "step": 8925 }, { "epoch": 0.9074827165514436, "grad_norm": 0.2959558665752411, "learning_rate": 9.799043392322219e-06, "loss": 0.3886, "step": 8926 }, { "epoch": 0.9075843838958927, "grad_norm": 0.3135080337524414, "learning_rate": 9.798943779663885e-06, "loss": 0.3655, "step": 8927 }, { "epoch": 0.9076860512403416, "grad_norm": 0.30458375811576843, "learning_rate": 9.798844142829615e-06, "loss": 0.3773, "step": 8928 }, { "epoch": 0.9077877185847906, "grad_norm": 0.3072386085987091, "learning_rate": 9.798744481819908e-06, "loss": 0.3783, "step": 8929 }, { "epoch": 0.9078893859292395, "grad_norm": 0.30259940028190613, "learning_rate": 9.798644796635268e-06, "loss": 0.3659, "step": 8930 }, { "epoch": 0.9079910532736885, "grad_norm": 0.3360186517238617, "learning_rate": 9.798545087276194e-06, "loss": 0.4166, "step": 8931 }, { "epoch": 0.9080927206181375, "grad_norm": 0.30142179131507874, "learning_rate": 9.79844535374319e-06, "loss": 0.3842, "step": 8932 }, { "epoch": 0.9081943879625864, "grad_norm": 0.31557410955429077, "learning_rate": 9.79834559603676e-06, "loss": 0.3693, "step": 8933 }, { "epoch": 0.9082960553070354, "grad_norm": 0.35102158784866333, "learning_rate": 9.798245814157405e-06, "loss": 0.3675, "step": 8934 }, { "epoch": 0.9083977226514843, "grad_norm": 0.31239941716194153, "learning_rate": 9.798146008105629e-06, "loss": 0.3713, "step": 8935 }, { "epoch": 0.9084993899959333, "grad_norm": 0.30083030462265015, "learning_rate": 9.798046177881931e-06, "loss": 0.3805, "step": 8936 }, { "epoch": 0.9086010573403823, "grad_norm": 0.32311367988586426, "learning_rate": 9.797946323486818e-06, "loss": 0.3934, "step": 8937 }, { "epoch": 0.9087027246848313, "grad_norm": 0.3081755042076111, "learning_rate": 9.797846444920792e-06, "loss": 0.4152, "step": 8938 }, { "epoch": 0.9088043920292802, "grad_norm": 0.3066410720348358, "learning_rate": 9.797746542184355e-06, "loss": 0.369, "step": 8939 }, { "epoch": 0.9089060593737291, "grad_norm": 0.28678274154663086, "learning_rate": 9.79764661527801e-06, "loss": 0.4017, "step": 8940 }, { "epoch": 0.9090077267181781, "grad_norm": 0.30143874883651733, "learning_rate": 9.797546664202263e-06, "loss": 0.3823, "step": 8941 }, { "epoch": 0.909109394062627, "grad_norm": 0.3079633116722107, "learning_rate": 9.797446688957615e-06, "loss": 0.3746, "step": 8942 }, { "epoch": 0.9092110614070761, "grad_norm": 0.2971521019935608, "learning_rate": 9.797346689544572e-06, "loss": 0.3887, "step": 8943 }, { "epoch": 0.909312728751525, "grad_norm": 0.31116074323654175, "learning_rate": 9.797246665963635e-06, "loss": 0.3751, "step": 8944 }, { "epoch": 0.909414396095974, "grad_norm": 0.31987228989601135, "learning_rate": 9.79714661821531e-06, "loss": 0.4344, "step": 8945 }, { "epoch": 0.9095160634404229, "grad_norm": 0.319824755191803, "learning_rate": 9.7970465463001e-06, "loss": 0.4051, "step": 8946 }, { "epoch": 0.9096177307848718, "grad_norm": 0.3195517361164093, "learning_rate": 9.79694645021851e-06, "loss": 0.4189, "step": 8947 }, { "epoch": 0.9097193981293209, "grad_norm": 0.33403828740119934, "learning_rate": 9.796846329971043e-06, "loss": 0.3828, "step": 8948 }, { "epoch": 0.9098210654737698, "grad_norm": 0.30165910720825195, "learning_rate": 9.796746185558203e-06, "loss": 0.3851, "step": 8949 }, { "epoch": 0.9099227328182188, "grad_norm": 0.309641033411026, "learning_rate": 9.796646016980498e-06, "loss": 0.3947, "step": 8950 }, { "epoch": 0.9100244001626677, "grad_norm": 0.3237028121948242, "learning_rate": 9.79654582423843e-06, "loss": 0.3617, "step": 8951 }, { "epoch": 0.9101260675071167, "grad_norm": 0.3380339741706848, "learning_rate": 9.796445607332503e-06, "loss": 0.4136, "step": 8952 }, { "epoch": 0.9102277348515657, "grad_norm": 0.30374160408973694, "learning_rate": 9.796345366263222e-06, "loss": 0.3884, "step": 8953 }, { "epoch": 0.9103294021960147, "grad_norm": 0.3393263518810272, "learning_rate": 9.796245101031094e-06, "loss": 0.4166, "step": 8954 }, { "epoch": 0.9104310695404636, "grad_norm": 0.31229668855667114, "learning_rate": 9.796144811636622e-06, "loss": 0.3819, "step": 8955 }, { "epoch": 0.9105327368849125, "grad_norm": 0.3217916786670685, "learning_rate": 9.796044498080314e-06, "loss": 0.3854, "step": 8956 }, { "epoch": 0.9106344042293615, "grad_norm": 0.3299560248851776, "learning_rate": 9.795944160362672e-06, "loss": 0.4092, "step": 8957 }, { "epoch": 0.9107360715738105, "grad_norm": 0.326630175113678, "learning_rate": 9.795843798484203e-06, "loss": 0.3768, "step": 8958 }, { "epoch": 0.9108377389182595, "grad_norm": 0.3916861116886139, "learning_rate": 9.795743412445414e-06, "loss": 0.3852, "step": 8959 }, { "epoch": 0.9109394062627084, "grad_norm": 0.3055201768875122, "learning_rate": 9.795643002246808e-06, "loss": 0.3758, "step": 8960 }, { "epoch": 0.9110410736071574, "grad_norm": 0.3102886378765106, "learning_rate": 9.795542567888894e-06, "loss": 0.3775, "step": 8961 }, { "epoch": 0.9111427409516063, "grad_norm": 0.35458728671073914, "learning_rate": 9.795442109372175e-06, "loss": 0.3819, "step": 8962 }, { "epoch": 0.9112444082960554, "grad_norm": 0.3183174431324005, "learning_rate": 9.795341626697156e-06, "loss": 0.3975, "step": 8963 }, { "epoch": 0.9113460756405043, "grad_norm": 0.29922357201576233, "learning_rate": 9.795241119864348e-06, "loss": 0.3814, "step": 8964 }, { "epoch": 0.9114477429849532, "grad_norm": 0.3145752251148224, "learning_rate": 9.795140588874254e-06, "loss": 0.3972, "step": 8965 }, { "epoch": 0.9115494103294022, "grad_norm": 0.32001781463623047, "learning_rate": 9.795040033727383e-06, "loss": 0.3758, "step": 8966 }, { "epoch": 0.9116510776738511, "grad_norm": 0.3459325134754181, "learning_rate": 9.794939454424237e-06, "loss": 0.3843, "step": 8967 }, { "epoch": 0.9117527450183002, "grad_norm": 0.2829683721065521, "learning_rate": 9.794838850965327e-06, "loss": 0.4056, "step": 8968 }, { "epoch": 0.9118544123627491, "grad_norm": 0.3458747863769531, "learning_rate": 9.794738223351159e-06, "loss": 0.3818, "step": 8969 }, { "epoch": 0.911956079707198, "grad_norm": 0.3059339225292206, "learning_rate": 9.794637571582238e-06, "loss": 0.3794, "step": 8970 }, { "epoch": 0.912057747051647, "grad_norm": 0.2998068630695343, "learning_rate": 9.794536895659072e-06, "loss": 0.3627, "step": 8971 }, { "epoch": 0.9121594143960959, "grad_norm": 0.3271520137786865, "learning_rate": 9.794436195582168e-06, "loss": 0.4029, "step": 8972 }, { "epoch": 0.912261081740545, "grad_norm": 0.31763729453086853, "learning_rate": 9.794335471352034e-06, "loss": 0.3991, "step": 8973 }, { "epoch": 0.9123627490849939, "grad_norm": 0.3113107681274414, "learning_rate": 9.79423472296918e-06, "loss": 0.3733, "step": 8974 }, { "epoch": 0.9124644164294429, "grad_norm": 0.34594351053237915, "learning_rate": 9.794133950434108e-06, "loss": 0.3838, "step": 8975 }, { "epoch": 0.9125660837738918, "grad_norm": 0.30881890654563904, "learning_rate": 9.794033153747329e-06, "loss": 0.3602, "step": 8976 }, { "epoch": 0.9126677511183408, "grad_norm": 0.29011985659599304, "learning_rate": 9.793932332909351e-06, "loss": 0.4, "step": 8977 }, { "epoch": 0.9127694184627898, "grad_norm": 0.3244478702545166, "learning_rate": 9.79383148792068e-06, "loss": 0.3895, "step": 8978 }, { "epoch": 0.9128710858072387, "grad_norm": 0.38309529423713684, "learning_rate": 9.793730618781825e-06, "loss": 0.3994, "step": 8979 }, { "epoch": 0.9129727531516877, "grad_norm": 0.30795231461524963, "learning_rate": 9.793629725493296e-06, "loss": 0.3823, "step": 8980 }, { "epoch": 0.9130744204961366, "grad_norm": 0.3212387263774872, "learning_rate": 9.793528808055598e-06, "loss": 0.4174, "step": 8981 }, { "epoch": 0.9131760878405856, "grad_norm": 0.3354787826538086, "learning_rate": 9.79342786646924e-06, "loss": 0.4161, "step": 8982 }, { "epoch": 0.9132777551850345, "grad_norm": 0.35225746035575867, "learning_rate": 9.793326900734734e-06, "loss": 0.3804, "step": 8983 }, { "epoch": 0.9133794225294836, "grad_norm": 0.3833875358104706, "learning_rate": 9.793225910852587e-06, "loss": 0.3768, "step": 8984 }, { "epoch": 0.9134810898739325, "grad_norm": 0.35715919733047485, "learning_rate": 9.793124896823302e-06, "loss": 0.3761, "step": 8985 }, { "epoch": 0.9135827572183814, "grad_norm": 0.35839030146598816, "learning_rate": 9.793023858647397e-06, "loss": 0.3891, "step": 8986 }, { "epoch": 0.9136844245628304, "grad_norm": 0.3591454327106476, "learning_rate": 9.792922796325377e-06, "loss": 0.4069, "step": 8987 }, { "epoch": 0.9137860919072793, "grad_norm": 0.35807549953460693, "learning_rate": 9.79282170985775e-06, "loss": 0.341, "step": 8988 }, { "epoch": 0.9138877592517284, "grad_norm": 0.3291541635990143, "learning_rate": 9.792720599245024e-06, "loss": 0.388, "step": 8989 }, { "epoch": 0.9139894265961773, "grad_norm": 0.3343711793422699, "learning_rate": 9.792619464487712e-06, "loss": 0.3758, "step": 8990 }, { "epoch": 0.9140910939406263, "grad_norm": 0.33628594875335693, "learning_rate": 9.792518305586323e-06, "loss": 0.3861, "step": 8991 }, { "epoch": 0.9141927612850752, "grad_norm": 0.32969820499420166, "learning_rate": 9.792417122541364e-06, "loss": 0.3929, "step": 8992 }, { "epoch": 0.9142944286295241, "grad_norm": 0.35579463839530945, "learning_rate": 9.792315915353348e-06, "loss": 0.3794, "step": 8993 }, { "epoch": 0.9143960959739732, "grad_norm": 0.340155690908432, "learning_rate": 9.792214684022782e-06, "loss": 0.3857, "step": 8994 }, { "epoch": 0.9144977633184221, "grad_norm": 0.3163162171840668, "learning_rate": 9.792113428550178e-06, "loss": 0.3873, "step": 8995 }, { "epoch": 0.9145994306628711, "grad_norm": 0.32240036129951477, "learning_rate": 9.792012148936046e-06, "loss": 0.3893, "step": 8996 }, { "epoch": 0.91470109800732, "grad_norm": 0.34856119751930237, "learning_rate": 9.791910845180894e-06, "loss": 0.399, "step": 8997 }, { "epoch": 0.914802765351769, "grad_norm": 0.29130589962005615, "learning_rate": 9.791809517285236e-06, "loss": 0.4128, "step": 8998 }, { "epoch": 0.914904432696218, "grad_norm": 0.31336694955825806, "learning_rate": 9.791708165249579e-06, "loss": 0.3769, "step": 8999 }, { "epoch": 0.915006100040667, "grad_norm": 0.35401561856269836, "learning_rate": 9.791606789074434e-06, "loss": 0.414, "step": 9000 }, { "epoch": 0.9151077673851159, "grad_norm": 0.3215486705303192, "learning_rate": 9.791505388760314e-06, "loss": 0.3977, "step": 9001 }, { "epoch": 0.9152094347295648, "grad_norm": 0.3217526376247406, "learning_rate": 9.791403964307729e-06, "loss": 0.4011, "step": 9002 }, { "epoch": 0.9153111020740138, "grad_norm": 0.314718633890152, "learning_rate": 9.791302515717187e-06, "loss": 0.3744, "step": 9003 }, { "epoch": 0.9154127694184628, "grad_norm": 0.33381497859954834, "learning_rate": 9.791201042989204e-06, "loss": 0.3709, "step": 9004 }, { "epoch": 0.9155144367629118, "grad_norm": 0.3053431510925293, "learning_rate": 9.791099546124287e-06, "loss": 0.3986, "step": 9005 }, { "epoch": 0.9156161041073607, "grad_norm": 0.3082848787307739, "learning_rate": 9.79099802512295e-06, "loss": 0.3624, "step": 9006 }, { "epoch": 0.9157177714518097, "grad_norm": 0.35558032989501953, "learning_rate": 9.790896479985703e-06, "loss": 0.3949, "step": 9007 }, { "epoch": 0.9158194387962586, "grad_norm": 0.3157140016555786, "learning_rate": 9.790794910713058e-06, "loss": 0.3705, "step": 9008 }, { "epoch": 0.9159211061407077, "grad_norm": 0.3205019235610962, "learning_rate": 9.790693317305527e-06, "loss": 0.3973, "step": 9009 }, { "epoch": 0.9160227734851566, "grad_norm": 0.3441831171512604, "learning_rate": 9.790591699763623e-06, "loss": 0.3845, "step": 9010 }, { "epoch": 0.9161244408296055, "grad_norm": 0.3416849970817566, "learning_rate": 9.790490058087854e-06, "loss": 0.4147, "step": 9011 }, { "epoch": 0.9162261081740545, "grad_norm": 0.35886430740356445, "learning_rate": 9.790388392278736e-06, "loss": 0.3881, "step": 9012 }, { "epoch": 0.9163277755185034, "grad_norm": 0.2992231845855713, "learning_rate": 9.790286702336777e-06, "loss": 0.3889, "step": 9013 }, { "epoch": 0.9164294428629525, "grad_norm": 0.3130582273006439, "learning_rate": 9.790184988262494e-06, "loss": 0.4145, "step": 9014 }, { "epoch": 0.9165311102074014, "grad_norm": 0.35233744978904724, "learning_rate": 9.790083250056398e-06, "loss": 0.4225, "step": 9015 }, { "epoch": 0.9166327775518504, "grad_norm": 0.3220705986022949, "learning_rate": 9.789981487719e-06, "loss": 0.3501, "step": 9016 }, { "epoch": 0.9167344448962993, "grad_norm": 0.30303457379341125, "learning_rate": 9.789879701250815e-06, "loss": 0.395, "step": 9017 }, { "epoch": 0.9168361122407482, "grad_norm": 0.3096753656864166, "learning_rate": 9.789777890652352e-06, "loss": 0.394, "step": 9018 }, { "epoch": 0.9169377795851973, "grad_norm": 0.3407573401927948, "learning_rate": 9.789676055924127e-06, "loss": 0.3708, "step": 9019 }, { "epoch": 0.9170394469296462, "grad_norm": 0.36306822299957275, "learning_rate": 9.789574197066653e-06, "loss": 0.3679, "step": 9020 }, { "epoch": 0.9171411142740952, "grad_norm": 0.3406561017036438, "learning_rate": 9.789472314080442e-06, "loss": 0.3934, "step": 9021 }, { "epoch": 0.9172427816185441, "grad_norm": 0.3334965407848358, "learning_rate": 9.789370406966008e-06, "loss": 0.3846, "step": 9022 }, { "epoch": 0.917344448962993, "grad_norm": 0.33395054936408997, "learning_rate": 9.789268475723863e-06, "loss": 0.3876, "step": 9023 }, { "epoch": 0.917446116307442, "grad_norm": 0.31996530294418335, "learning_rate": 9.789166520354523e-06, "loss": 0.3734, "step": 9024 }, { "epoch": 0.917547783651891, "grad_norm": 0.3278038501739502, "learning_rate": 9.789064540858498e-06, "loss": 0.3643, "step": 9025 }, { "epoch": 0.91764945099634, "grad_norm": 0.3221594989299774, "learning_rate": 9.788962537236307e-06, "loss": 0.4055, "step": 9026 }, { "epoch": 0.9177511183407889, "grad_norm": 0.336086630821228, "learning_rate": 9.788860509488459e-06, "loss": 0.4125, "step": 9027 }, { "epoch": 0.9178527856852379, "grad_norm": 0.315070241689682, "learning_rate": 9.788758457615467e-06, "loss": 0.381, "step": 9028 }, { "epoch": 0.9179544530296868, "grad_norm": 0.31542009115219116, "learning_rate": 9.788656381617851e-06, "loss": 0.3928, "step": 9029 }, { "epoch": 0.9180561203741359, "grad_norm": 0.30793118476867676, "learning_rate": 9.788554281496122e-06, "loss": 0.3636, "step": 9030 }, { "epoch": 0.9181577877185848, "grad_norm": 0.32039421796798706, "learning_rate": 9.788452157250794e-06, "loss": 0.3508, "step": 9031 }, { "epoch": 0.9182594550630337, "grad_norm": 0.3758614957332611, "learning_rate": 9.788350008882383e-06, "loss": 0.4501, "step": 9032 }, { "epoch": 0.9183611224074827, "grad_norm": 0.29998278617858887, "learning_rate": 9.7882478363914e-06, "loss": 0.3788, "step": 9033 }, { "epoch": 0.9184627897519316, "grad_norm": 0.340027391910553, "learning_rate": 9.788145639778364e-06, "loss": 0.3762, "step": 9034 }, { "epoch": 0.9185644570963807, "grad_norm": 0.4239652752876282, "learning_rate": 9.788043419043788e-06, "loss": 0.3843, "step": 9035 }, { "epoch": 0.9186661244408296, "grad_norm": 0.35423335433006287, "learning_rate": 9.787941174188187e-06, "loss": 0.4141, "step": 9036 }, { "epoch": 0.9187677917852786, "grad_norm": 0.30033808946609497, "learning_rate": 9.787838905212076e-06, "loss": 0.34, "step": 9037 }, { "epoch": 0.9188694591297275, "grad_norm": 0.3347458243370056, "learning_rate": 9.78773661211597e-06, "loss": 0.377, "step": 9038 }, { "epoch": 0.9189711264741764, "grad_norm": 0.37340906262397766, "learning_rate": 9.787634294900386e-06, "loss": 0.4006, "step": 9039 }, { "epoch": 0.9190727938186255, "grad_norm": 0.3086467385292053, "learning_rate": 9.787531953565838e-06, "loss": 0.4073, "step": 9040 }, { "epoch": 0.9191744611630744, "grad_norm": 0.33752790093421936, "learning_rate": 9.78742958811284e-06, "loss": 0.3891, "step": 9041 }, { "epoch": 0.9192761285075234, "grad_norm": 0.34194329380989075, "learning_rate": 9.787327198541911e-06, "loss": 0.3835, "step": 9042 }, { "epoch": 0.9193777958519723, "grad_norm": 0.31861206889152527, "learning_rate": 9.787224784853565e-06, "loss": 0.4025, "step": 9043 }, { "epoch": 0.9194794631964213, "grad_norm": 0.3056691586971283, "learning_rate": 9.787122347048318e-06, "loss": 0.3683, "step": 9044 }, { "epoch": 0.9195811305408703, "grad_norm": 0.3165362775325775, "learning_rate": 9.787019885126687e-06, "loss": 0.35, "step": 9045 }, { "epoch": 0.9196827978853193, "grad_norm": 0.32377853989601135, "learning_rate": 9.786917399089186e-06, "loss": 0.4025, "step": 9046 }, { "epoch": 0.9197844652297682, "grad_norm": 0.3150225281715393, "learning_rate": 9.786814888936334e-06, "loss": 0.3954, "step": 9047 }, { "epoch": 0.9198861325742171, "grad_norm": 0.33476054668426514, "learning_rate": 9.786712354668643e-06, "loss": 0.3949, "step": 9048 }, { "epoch": 0.9199877999186661, "grad_norm": 0.30801257491111755, "learning_rate": 9.786609796286636e-06, "loss": 0.3839, "step": 9049 }, { "epoch": 0.9200894672631151, "grad_norm": 0.31099817156791687, "learning_rate": 9.786507213790826e-06, "loss": 0.4029, "step": 9050 }, { "epoch": 0.9201911346075641, "grad_norm": 0.2881886065006256, "learning_rate": 9.786404607181727e-06, "loss": 0.3895, "step": 9051 }, { "epoch": 0.920292801952013, "grad_norm": 0.3101288974285126, "learning_rate": 9.786301976459862e-06, "loss": 0.3997, "step": 9052 }, { "epoch": 0.920394469296462, "grad_norm": 0.32954704761505127, "learning_rate": 9.786199321625744e-06, "loss": 0.3988, "step": 9053 }, { "epoch": 0.9204961366409109, "grad_norm": 0.3182373344898224, "learning_rate": 9.786096642679893e-06, "loss": 0.4421, "step": 9054 }, { "epoch": 0.92059780398536, "grad_norm": 0.34440478682518005, "learning_rate": 9.785993939622822e-06, "loss": 0.3988, "step": 9055 }, { "epoch": 0.9206994713298089, "grad_norm": 0.28941723704338074, "learning_rate": 9.785891212455053e-06, "loss": 0.3833, "step": 9056 }, { "epoch": 0.9208011386742578, "grad_norm": 0.332173615694046, "learning_rate": 9.7857884611771e-06, "loss": 0.4044, "step": 9057 }, { "epoch": 0.9209028060187068, "grad_norm": 0.3660939037799835, "learning_rate": 9.785685685789483e-06, "loss": 0.409, "step": 9058 }, { "epoch": 0.9210044733631557, "grad_norm": 0.3182024657726288, "learning_rate": 9.785582886292717e-06, "loss": 0.3738, "step": 9059 }, { "epoch": 0.9211061407076048, "grad_norm": 0.3125143051147461, "learning_rate": 9.785480062687323e-06, "loss": 0.3902, "step": 9060 }, { "epoch": 0.9212078080520537, "grad_norm": 0.33955419063568115, "learning_rate": 9.785377214973816e-06, "loss": 0.4263, "step": 9061 }, { "epoch": 0.9213094753965027, "grad_norm": 0.33342674374580383, "learning_rate": 9.785274343152718e-06, "loss": 0.3811, "step": 9062 }, { "epoch": 0.9214111427409516, "grad_norm": 0.31391674280166626, "learning_rate": 9.785171447224544e-06, "loss": 0.3597, "step": 9063 }, { "epoch": 0.9215128100854005, "grad_norm": 0.30691036581993103, "learning_rate": 9.785068527189815e-06, "loss": 0.3818, "step": 9064 }, { "epoch": 0.9216144774298495, "grad_norm": 0.2981070876121521, "learning_rate": 9.784965583049045e-06, "loss": 0.4129, "step": 9065 }, { "epoch": 0.9217161447742985, "grad_norm": 0.32664868235588074, "learning_rate": 9.784862614802757e-06, "loss": 0.4089, "step": 9066 }, { "epoch": 0.9218178121187475, "grad_norm": 0.3054378628730774, "learning_rate": 9.784759622451468e-06, "loss": 0.3544, "step": 9067 }, { "epoch": 0.9219194794631964, "grad_norm": 0.3122243583202362, "learning_rate": 9.784656605995698e-06, "loss": 0.3915, "step": 9068 }, { "epoch": 0.9220211468076454, "grad_norm": 0.39021310210227966, "learning_rate": 9.784553565435964e-06, "loss": 0.4421, "step": 9069 }, { "epoch": 0.9221228141520943, "grad_norm": 0.31665778160095215, "learning_rate": 9.784450500772786e-06, "loss": 0.3873, "step": 9070 }, { "epoch": 0.9222244814965433, "grad_norm": 0.34132272005081177, "learning_rate": 9.784347412006682e-06, "loss": 0.4109, "step": 9071 }, { "epoch": 0.9223261488409923, "grad_norm": 0.3035793900489807, "learning_rate": 9.784244299138177e-06, "loss": 0.3806, "step": 9072 }, { "epoch": 0.9224278161854412, "grad_norm": 0.352448046207428, "learning_rate": 9.784141162167782e-06, "loss": 0.4058, "step": 9073 }, { "epoch": 0.9225294835298902, "grad_norm": 0.33151495456695557, "learning_rate": 9.784038001096022e-06, "loss": 0.3965, "step": 9074 }, { "epoch": 0.9226311508743391, "grad_norm": 0.331594318151474, "learning_rate": 9.783934815923415e-06, "loss": 0.3617, "step": 9075 }, { "epoch": 0.9227328182187882, "grad_norm": 0.35204124450683594, "learning_rate": 9.783831606650483e-06, "loss": 0.3979, "step": 9076 }, { "epoch": 0.9228344855632371, "grad_norm": 0.34767091274261475, "learning_rate": 9.783728373277742e-06, "loss": 0.371, "step": 9077 }, { "epoch": 0.922936152907686, "grad_norm": 0.33311381936073303, "learning_rate": 9.783625115805715e-06, "loss": 0.4069, "step": 9078 }, { "epoch": 0.923037820252135, "grad_norm": 0.32066959142684937, "learning_rate": 9.783521834234923e-06, "loss": 0.4003, "step": 9079 }, { "epoch": 0.9231394875965839, "grad_norm": 0.3824211061000824, "learning_rate": 9.783418528565882e-06, "loss": 0.3981, "step": 9080 }, { "epoch": 0.923241154941033, "grad_norm": 0.32490161061286926, "learning_rate": 9.783315198799117e-06, "loss": 0.4114, "step": 9081 }, { "epoch": 0.9233428222854819, "grad_norm": 0.3100561499595642, "learning_rate": 9.783211844935146e-06, "loss": 0.4357, "step": 9082 }, { "epoch": 0.9234444896299309, "grad_norm": 0.31921038031578064, "learning_rate": 9.783108466974491e-06, "loss": 0.4045, "step": 9083 }, { "epoch": 0.9235461569743798, "grad_norm": 0.3431953191757202, "learning_rate": 9.783005064917672e-06, "loss": 0.3934, "step": 9084 }, { "epoch": 0.9236478243188287, "grad_norm": 0.296192467212677, "learning_rate": 9.78290163876521e-06, "loss": 0.3732, "step": 9085 }, { "epoch": 0.9237494916632778, "grad_norm": 0.3187159597873688, "learning_rate": 9.782798188517626e-06, "loss": 0.3862, "step": 9086 }, { "epoch": 0.9238511590077267, "grad_norm": 0.3468327224254608, "learning_rate": 9.782694714175442e-06, "loss": 0.3854, "step": 9087 }, { "epoch": 0.9239528263521757, "grad_norm": 0.319110631942749, "learning_rate": 9.782591215739177e-06, "loss": 0.3941, "step": 9088 }, { "epoch": 0.9240544936966246, "grad_norm": 0.3046635389328003, "learning_rate": 9.782487693209354e-06, "loss": 0.3681, "step": 9089 }, { "epoch": 0.9241561610410736, "grad_norm": 0.30852362513542175, "learning_rate": 9.782384146586496e-06, "loss": 0.3842, "step": 9090 }, { "epoch": 0.9242578283855226, "grad_norm": 0.3744698166847229, "learning_rate": 9.782280575871124e-06, "loss": 0.4026, "step": 9091 }, { "epoch": 0.9243594957299716, "grad_norm": 0.30416139960289, "learning_rate": 9.782176981063755e-06, "loss": 0.3738, "step": 9092 }, { "epoch": 0.9244611630744205, "grad_norm": 0.3586055338382721, "learning_rate": 9.782073362164919e-06, "loss": 0.3833, "step": 9093 }, { "epoch": 0.9245628304188694, "grad_norm": 0.34636521339416504, "learning_rate": 9.781969719175132e-06, "loss": 0.4007, "step": 9094 }, { "epoch": 0.9246644977633184, "grad_norm": 0.3524506688117981, "learning_rate": 9.781866052094917e-06, "loss": 0.3641, "step": 9095 }, { "epoch": 0.9247661651077674, "grad_norm": 0.3117632567882538, "learning_rate": 9.781762360924797e-06, "loss": 0.386, "step": 9096 }, { "epoch": 0.9248678324522164, "grad_norm": 0.3444679379463196, "learning_rate": 9.781658645665297e-06, "loss": 0.3521, "step": 9097 }, { "epoch": 0.9249694997966653, "grad_norm": 0.37642210721969604, "learning_rate": 9.781554906316936e-06, "loss": 0.3779, "step": 9098 }, { "epoch": 0.9250711671411143, "grad_norm": 0.34985101222991943, "learning_rate": 9.781451142880239e-06, "loss": 0.3918, "step": 9099 }, { "epoch": 0.9251728344855632, "grad_norm": 0.31058016419410706, "learning_rate": 9.781347355355723e-06, "loss": 0.3711, "step": 9100 }, { "epoch": 0.9252745018300123, "grad_norm": 0.35501545667648315, "learning_rate": 9.781243543743918e-06, "loss": 0.379, "step": 9101 }, { "epoch": 0.9253761691744612, "grad_norm": 0.34721338748931885, "learning_rate": 9.781139708045346e-06, "loss": 0.3986, "step": 9102 }, { "epoch": 0.9254778365189101, "grad_norm": 0.34367072582244873, "learning_rate": 9.781035848260526e-06, "loss": 0.3925, "step": 9103 }, { "epoch": 0.9255795038633591, "grad_norm": 0.296949565410614, "learning_rate": 9.780931964389983e-06, "loss": 0.3469, "step": 9104 }, { "epoch": 0.925681171207808, "grad_norm": 0.29397502541542053, "learning_rate": 9.780828056434244e-06, "loss": 0.3885, "step": 9105 }, { "epoch": 0.9257828385522571, "grad_norm": 0.3464430868625641, "learning_rate": 9.780724124393827e-06, "loss": 0.3973, "step": 9106 }, { "epoch": 0.925884505896706, "grad_norm": 0.3206426203250885, "learning_rate": 9.780620168269257e-06, "loss": 0.421, "step": 9107 }, { "epoch": 0.925986173241155, "grad_norm": 0.33910611271858215, "learning_rate": 9.78051618806106e-06, "loss": 0.3622, "step": 9108 }, { "epoch": 0.9260878405856039, "grad_norm": 0.34402036666870117, "learning_rate": 9.78041218376976e-06, "loss": 0.3794, "step": 9109 }, { "epoch": 0.9261895079300528, "grad_norm": 0.31808236241340637, "learning_rate": 9.780308155395877e-06, "loss": 0.3681, "step": 9110 }, { "epoch": 0.9262911752745018, "grad_norm": 0.3139359951019287, "learning_rate": 9.780204102939939e-06, "loss": 0.3789, "step": 9111 }, { "epoch": 0.9263928426189508, "grad_norm": 0.3257865309715271, "learning_rate": 9.780100026402467e-06, "loss": 0.3986, "step": 9112 }, { "epoch": 0.9264945099633998, "grad_norm": 0.3528623878955841, "learning_rate": 9.779995925783988e-06, "loss": 0.3802, "step": 9113 }, { "epoch": 0.9265961773078487, "grad_norm": 0.3315512537956238, "learning_rate": 9.779891801085025e-06, "loss": 0.4068, "step": 9114 }, { "epoch": 0.9266978446522977, "grad_norm": 0.32921749353408813, "learning_rate": 9.779787652306104e-06, "loss": 0.3779, "step": 9115 }, { "epoch": 0.9267995119967466, "grad_norm": 0.326968789100647, "learning_rate": 9.779683479447747e-06, "loss": 0.3842, "step": 9116 }, { "epoch": 0.9269011793411956, "grad_norm": 0.3263116776943207, "learning_rate": 9.779579282510483e-06, "loss": 0.4184, "step": 9117 }, { "epoch": 0.9270028466856446, "grad_norm": 0.2960870862007141, "learning_rate": 9.779475061494833e-06, "loss": 0.3625, "step": 9118 }, { "epoch": 0.9271045140300935, "grad_norm": 0.3433523178100586, "learning_rate": 9.779370816401322e-06, "loss": 0.4015, "step": 9119 }, { "epoch": 0.9272061813745425, "grad_norm": 0.3516335189342499, "learning_rate": 9.779266547230479e-06, "loss": 0.3944, "step": 9120 }, { "epoch": 0.9273078487189914, "grad_norm": 0.32177749276161194, "learning_rate": 9.779162253982826e-06, "loss": 0.3971, "step": 9121 }, { "epoch": 0.9274095160634405, "grad_norm": 0.32210567593574524, "learning_rate": 9.779057936658887e-06, "loss": 0.3878, "step": 9122 }, { "epoch": 0.9275111834078894, "grad_norm": 0.3235931694507599, "learning_rate": 9.778953595259192e-06, "loss": 0.4017, "step": 9123 }, { "epoch": 0.9276128507523383, "grad_norm": 0.3261714279651642, "learning_rate": 9.778849229784264e-06, "loss": 0.3735, "step": 9124 }, { "epoch": 0.9277145180967873, "grad_norm": 0.32056373357772827, "learning_rate": 9.77874484023463e-06, "loss": 0.3662, "step": 9125 }, { "epoch": 0.9278161854412362, "grad_norm": 0.35188236832618713, "learning_rate": 9.778640426610815e-06, "loss": 0.3745, "step": 9126 }, { "epoch": 0.9279178527856853, "grad_norm": 0.3187849223613739, "learning_rate": 9.778535988913345e-06, "loss": 0.4039, "step": 9127 }, { "epoch": 0.9280195201301342, "grad_norm": 0.3066483438014984, "learning_rate": 9.778431527142746e-06, "loss": 0.381, "step": 9128 }, { "epoch": 0.9281211874745832, "grad_norm": 0.313477486371994, "learning_rate": 9.778327041299544e-06, "loss": 0.3944, "step": 9129 }, { "epoch": 0.9282228548190321, "grad_norm": 0.33250463008880615, "learning_rate": 9.778222531384265e-06, "loss": 0.4103, "step": 9130 }, { "epoch": 0.928324522163481, "grad_norm": 0.34131181240081787, "learning_rate": 9.778117997397438e-06, "loss": 0.3682, "step": 9131 }, { "epoch": 0.9284261895079301, "grad_norm": 0.32419097423553467, "learning_rate": 9.778013439339589e-06, "loss": 0.3758, "step": 9132 }, { "epoch": 0.928527856852379, "grad_norm": 0.326801598072052, "learning_rate": 9.777908857211243e-06, "loss": 0.3562, "step": 9133 }, { "epoch": 0.928629524196828, "grad_norm": 0.33857065439224243, "learning_rate": 9.777804251012926e-06, "loss": 0.3736, "step": 9134 }, { "epoch": 0.9287311915412769, "grad_norm": 0.32288259267807007, "learning_rate": 9.777699620745167e-06, "loss": 0.4074, "step": 9135 }, { "epoch": 0.9288328588857259, "grad_norm": 0.3005062937736511, "learning_rate": 9.777594966408493e-06, "loss": 0.4015, "step": 9136 }, { "epoch": 0.9289345262301749, "grad_norm": 0.34589919447898865, "learning_rate": 9.77749028800343e-06, "loss": 0.3942, "step": 9137 }, { "epoch": 0.9290361935746239, "grad_norm": 0.3159516453742981, "learning_rate": 9.777385585530508e-06, "loss": 0.366, "step": 9138 }, { "epoch": 0.9291378609190728, "grad_norm": 0.35713571310043335, "learning_rate": 9.777280858990253e-06, "loss": 0.3868, "step": 9139 }, { "epoch": 0.9292395282635217, "grad_norm": 0.3423199951648712, "learning_rate": 9.777176108383191e-06, "loss": 0.392, "step": 9140 }, { "epoch": 0.9293411956079707, "grad_norm": 0.3082961440086365, "learning_rate": 9.777071333709852e-06, "loss": 0.3598, "step": 9141 }, { "epoch": 0.9294428629524197, "grad_norm": 0.3620491325855255, "learning_rate": 9.776966534970761e-06, "loss": 0.3391, "step": 9142 }, { "epoch": 0.9295445302968687, "grad_norm": 0.3326514661312103, "learning_rate": 9.776861712166448e-06, "loss": 0.3791, "step": 9143 }, { "epoch": 0.9296461976413176, "grad_norm": 0.31505733728408813, "learning_rate": 9.776756865297443e-06, "loss": 0.3953, "step": 9144 }, { "epoch": 0.9297478649857666, "grad_norm": 0.2925020456314087, "learning_rate": 9.77665199436427e-06, "loss": 0.3603, "step": 9145 }, { "epoch": 0.9298495323302155, "grad_norm": 0.36734238266944885, "learning_rate": 9.77654709936746e-06, "loss": 0.4217, "step": 9146 }, { "epoch": 0.9299511996746646, "grad_norm": 0.35235917568206787, "learning_rate": 9.776442180307541e-06, "loss": 0.3864, "step": 9147 }, { "epoch": 0.9300528670191135, "grad_norm": 0.34434476494789124, "learning_rate": 9.776337237185042e-06, "loss": 0.3624, "step": 9148 }, { "epoch": 0.9301545343635624, "grad_norm": 0.38514286279678345, "learning_rate": 9.77623227000049e-06, "loss": 0.3981, "step": 9149 }, { "epoch": 0.9302562017080114, "grad_norm": 0.41403573751449585, "learning_rate": 9.776127278754414e-06, "loss": 0.3968, "step": 9150 }, { "epoch": 0.9303578690524603, "grad_norm": 0.3174709677696228, "learning_rate": 9.776022263447346e-06, "loss": 0.3856, "step": 9151 }, { "epoch": 0.9304595363969093, "grad_norm": 0.3334386944770813, "learning_rate": 9.775917224079812e-06, "loss": 0.3836, "step": 9152 }, { "epoch": 0.9305612037413583, "grad_norm": 0.35259196162223816, "learning_rate": 9.775812160652341e-06, "loss": 0.399, "step": 9153 }, { "epoch": 0.9306628710858073, "grad_norm": 0.3236255943775177, "learning_rate": 9.775707073165464e-06, "loss": 0.3858, "step": 9154 }, { "epoch": 0.9307645384302562, "grad_norm": 0.3083132803440094, "learning_rate": 9.77560196161971e-06, "loss": 0.3858, "step": 9155 }, { "epoch": 0.9308662057747051, "grad_norm": 0.3225055932998657, "learning_rate": 9.775496826015607e-06, "loss": 0.3946, "step": 9156 }, { "epoch": 0.9309678731191541, "grad_norm": 0.3596912920475006, "learning_rate": 9.775391666353686e-06, "loss": 0.4083, "step": 9157 }, { "epoch": 0.9310695404636031, "grad_norm": 0.3163512945175171, "learning_rate": 9.775286482634479e-06, "loss": 0.4155, "step": 9158 }, { "epoch": 0.9311712078080521, "grad_norm": 0.3236643671989441, "learning_rate": 9.77518127485851e-06, "loss": 0.3716, "step": 9159 }, { "epoch": 0.931272875152501, "grad_norm": 0.34191951155662537, "learning_rate": 9.775076043026314e-06, "loss": 0.3873, "step": 9160 }, { "epoch": 0.93137454249695, "grad_norm": 0.3531752824783325, "learning_rate": 9.774970787138421e-06, "loss": 0.3936, "step": 9161 }, { "epoch": 0.9314762098413989, "grad_norm": 0.33154061436653137, "learning_rate": 9.774865507195358e-06, "loss": 0.3887, "step": 9162 }, { "epoch": 0.931577877185848, "grad_norm": 0.33914250135421753, "learning_rate": 9.774760203197657e-06, "loss": 0.3765, "step": 9163 }, { "epoch": 0.9316795445302969, "grad_norm": 0.3141995072364807, "learning_rate": 9.774654875145851e-06, "loss": 0.3954, "step": 9164 }, { "epoch": 0.9317812118747458, "grad_norm": 0.30907320976257324, "learning_rate": 9.774549523040467e-06, "loss": 0.4087, "step": 9165 }, { "epoch": 0.9318828792191948, "grad_norm": 0.2966752052307129, "learning_rate": 9.774444146882036e-06, "loss": 0.3783, "step": 9166 }, { "epoch": 0.9319845465636437, "grad_norm": 0.3335716724395752, "learning_rate": 9.774338746671091e-06, "loss": 0.378, "step": 9167 }, { "epoch": 0.9320862139080928, "grad_norm": 0.31704092025756836, "learning_rate": 9.774233322408162e-06, "loss": 0.4095, "step": 9168 }, { "epoch": 0.9321878812525417, "grad_norm": 0.3286114037036896, "learning_rate": 9.77412787409378e-06, "loss": 0.4135, "step": 9169 }, { "epoch": 0.9322895485969906, "grad_norm": 0.3280808925628662, "learning_rate": 9.774022401728477e-06, "loss": 0.3774, "step": 9170 }, { "epoch": 0.9323912159414396, "grad_norm": 0.339497447013855, "learning_rate": 9.773916905312783e-06, "loss": 0.4396, "step": 9171 }, { "epoch": 0.9324928832858885, "grad_norm": 0.3200896978378296, "learning_rate": 9.773811384847229e-06, "loss": 0.374, "step": 9172 }, { "epoch": 0.9325945506303376, "grad_norm": 0.31480535864830017, "learning_rate": 9.77370584033235e-06, "loss": 0.3784, "step": 9173 }, { "epoch": 0.9326962179747865, "grad_norm": 0.3287980854511261, "learning_rate": 9.773600271768674e-06, "loss": 0.4085, "step": 9174 }, { "epoch": 0.9327978853192355, "grad_norm": 0.3431384563446045, "learning_rate": 9.773494679156734e-06, "loss": 0.3802, "step": 9175 }, { "epoch": 0.9328995526636844, "grad_norm": 0.3002326488494873, "learning_rate": 9.773389062497062e-06, "loss": 0.3615, "step": 9176 }, { "epoch": 0.9330012200081333, "grad_norm": 0.321348637342453, "learning_rate": 9.773283421790192e-06, "loss": 0.3609, "step": 9177 }, { "epoch": 0.9331028873525824, "grad_norm": 0.34404754638671875, "learning_rate": 9.773177757036652e-06, "loss": 0.3961, "step": 9178 }, { "epoch": 0.9332045546970313, "grad_norm": 0.3221170902252197, "learning_rate": 9.773072068236979e-06, "loss": 0.3764, "step": 9179 }, { "epoch": 0.9333062220414803, "grad_norm": 0.29203131794929504, "learning_rate": 9.772966355391702e-06, "loss": 0.3558, "step": 9180 }, { "epoch": 0.9334078893859292, "grad_norm": 0.3236519992351532, "learning_rate": 9.772860618501354e-06, "loss": 0.4218, "step": 9181 }, { "epoch": 0.9335095567303782, "grad_norm": 0.4034336507320404, "learning_rate": 9.77275485756647e-06, "loss": 0.3934, "step": 9182 }, { "epoch": 0.9336112240748272, "grad_norm": 0.32443132996559143, "learning_rate": 9.77264907258758e-06, "loss": 0.3897, "step": 9183 }, { "epoch": 0.9337128914192762, "grad_norm": 0.3197472393512726, "learning_rate": 9.772543263565219e-06, "loss": 0.3648, "step": 9184 }, { "epoch": 0.9338145587637251, "grad_norm": 0.29073551297187805, "learning_rate": 9.772437430499917e-06, "loss": 0.3622, "step": 9185 }, { "epoch": 0.933916226108174, "grad_norm": 0.32107409834861755, "learning_rate": 9.772331573392212e-06, "loss": 0.4072, "step": 9186 }, { "epoch": 0.934017893452623, "grad_norm": 0.312491774559021, "learning_rate": 9.772225692242634e-06, "loss": 0.379, "step": 9187 }, { "epoch": 0.934119560797072, "grad_norm": 0.3245563805103302, "learning_rate": 9.772119787051716e-06, "loss": 0.4138, "step": 9188 }, { "epoch": 0.934221228141521, "grad_norm": 0.30993351340293884, "learning_rate": 9.772013857819993e-06, "loss": 0.4246, "step": 9189 }, { "epoch": 0.9343228954859699, "grad_norm": 0.31287941336631775, "learning_rate": 9.771907904547998e-06, "loss": 0.3676, "step": 9190 }, { "epoch": 0.9344245628304189, "grad_norm": 0.3357836902141571, "learning_rate": 9.771801927236266e-06, "loss": 0.3824, "step": 9191 }, { "epoch": 0.9345262301748678, "grad_norm": 0.31251442432403564, "learning_rate": 9.77169592588533e-06, "loss": 0.3713, "step": 9192 }, { "epoch": 0.9346278975193167, "grad_norm": 0.35024163126945496, "learning_rate": 9.771589900495723e-06, "loss": 0.3946, "step": 9193 }, { "epoch": 0.9347295648637658, "grad_norm": 0.3504972755908966, "learning_rate": 9.77148385106798e-06, "loss": 0.3713, "step": 9194 }, { "epoch": 0.9348312322082147, "grad_norm": 0.33247560262680054, "learning_rate": 9.771377777602635e-06, "loss": 0.4138, "step": 9195 }, { "epoch": 0.9349328995526637, "grad_norm": 0.33461010456085205, "learning_rate": 9.771271680100222e-06, "loss": 0.3825, "step": 9196 }, { "epoch": 0.9350345668971126, "grad_norm": 0.33946508169174194, "learning_rate": 9.771165558561277e-06, "loss": 0.3797, "step": 9197 }, { "epoch": 0.9351362342415616, "grad_norm": 0.3093417286872864, "learning_rate": 9.771059412986334e-06, "loss": 0.355, "step": 9198 }, { "epoch": 0.9352379015860106, "grad_norm": 0.31255319714546204, "learning_rate": 9.770953243375929e-06, "loss": 0.3983, "step": 9199 }, { "epoch": 0.9353395689304596, "grad_norm": 0.3153252899646759, "learning_rate": 9.770847049730592e-06, "loss": 0.3851, "step": 9200 }, { "epoch": 0.9354412362749085, "grad_norm": 0.3042182922363281, "learning_rate": 9.770740832050863e-06, "loss": 0.3622, "step": 9201 }, { "epoch": 0.9355429036193574, "grad_norm": 0.3149341642856598, "learning_rate": 9.770634590337275e-06, "loss": 0.3775, "step": 9202 }, { "epoch": 0.9356445709638064, "grad_norm": 0.3226032257080078, "learning_rate": 9.770528324590365e-06, "loss": 0.3663, "step": 9203 }, { "epoch": 0.9357462383082554, "grad_norm": 0.3600975275039673, "learning_rate": 9.770422034810664e-06, "loss": 0.4169, "step": 9204 }, { "epoch": 0.9358479056527044, "grad_norm": 0.2938815951347351, "learning_rate": 9.770315720998713e-06, "loss": 0.3702, "step": 9205 }, { "epoch": 0.9359495729971533, "grad_norm": 0.3332999348640442, "learning_rate": 9.770209383155045e-06, "loss": 0.3881, "step": 9206 }, { "epoch": 0.9360512403416023, "grad_norm": 0.3012370765209198, "learning_rate": 9.770103021280193e-06, "loss": 0.3936, "step": 9207 }, { "epoch": 0.9361529076860512, "grad_norm": 0.3184336721897125, "learning_rate": 9.7699966353747e-06, "loss": 0.3553, "step": 9208 }, { "epoch": 0.9362545750305002, "grad_norm": 0.28879639506340027, "learning_rate": 9.769890225439093e-06, "loss": 0.3557, "step": 9209 }, { "epoch": 0.9363562423749492, "grad_norm": 0.3360322117805481, "learning_rate": 9.769783791473914e-06, "loss": 0.3814, "step": 9210 }, { "epoch": 0.9364579097193981, "grad_norm": 0.29394567012786865, "learning_rate": 9.7696773334797e-06, "loss": 0.3636, "step": 9211 }, { "epoch": 0.9365595770638471, "grad_norm": 0.2925311326980591, "learning_rate": 9.769570851456982e-06, "loss": 0.3945, "step": 9212 }, { "epoch": 0.936661244408296, "grad_norm": 0.3209652006626129, "learning_rate": 9.7694643454063e-06, "loss": 0.4108, "step": 9213 }, { "epoch": 0.9367629117527451, "grad_norm": 0.3290860652923584, "learning_rate": 9.769357815328191e-06, "loss": 0.3606, "step": 9214 }, { "epoch": 0.936864579097194, "grad_norm": 0.3105616271495819, "learning_rate": 9.76925126122319e-06, "loss": 0.4058, "step": 9215 }, { "epoch": 0.936966246441643, "grad_norm": 0.32808712124824524, "learning_rate": 9.769144683091836e-06, "loss": 0.3784, "step": 9216 }, { "epoch": 0.9370679137860919, "grad_norm": 0.3148565888404846, "learning_rate": 9.769038080934664e-06, "loss": 0.389, "step": 9217 }, { "epoch": 0.9371695811305408, "grad_norm": 0.29425179958343506, "learning_rate": 9.76893145475221e-06, "loss": 0.3801, "step": 9218 }, { "epoch": 0.9372712484749899, "grad_norm": 0.3177850842475891, "learning_rate": 9.768824804545013e-06, "loss": 0.3511, "step": 9219 }, { "epoch": 0.9373729158194388, "grad_norm": 0.35070961713790894, "learning_rate": 9.768718130313611e-06, "loss": 0.3937, "step": 9220 }, { "epoch": 0.9374745831638878, "grad_norm": 0.29290610551834106, "learning_rate": 9.768611432058538e-06, "loss": 0.3803, "step": 9221 }, { "epoch": 0.9375762505083367, "grad_norm": 0.33808064460754395, "learning_rate": 9.768504709780336e-06, "loss": 0.418, "step": 9222 }, { "epoch": 0.9376779178527856, "grad_norm": 0.32457858324050903, "learning_rate": 9.768397963479538e-06, "loss": 0.3815, "step": 9223 }, { "epoch": 0.9377795851972347, "grad_norm": 0.3117550313472748, "learning_rate": 9.768291193156687e-06, "loss": 0.3941, "step": 9224 }, { "epoch": 0.9378812525416836, "grad_norm": 0.2982501983642578, "learning_rate": 9.768184398812318e-06, "loss": 0.3883, "step": 9225 }, { "epoch": 0.9379829198861326, "grad_norm": 0.2937878668308258, "learning_rate": 9.768077580446967e-06, "loss": 0.3677, "step": 9226 }, { "epoch": 0.9380845872305815, "grad_norm": 0.31857624650001526, "learning_rate": 9.767970738061176e-06, "loss": 0.3757, "step": 9227 }, { "epoch": 0.9381862545750305, "grad_norm": 0.308779239654541, "learning_rate": 9.76786387165548e-06, "loss": 0.4148, "step": 9228 }, { "epoch": 0.9382879219194795, "grad_norm": 0.303013414144516, "learning_rate": 9.767756981230421e-06, "loss": 0.3678, "step": 9229 }, { "epoch": 0.9383895892639285, "grad_norm": 0.3360164165496826, "learning_rate": 9.767650066786533e-06, "loss": 0.3736, "step": 9230 }, { "epoch": 0.9384912566083774, "grad_norm": 0.33219149708747864, "learning_rate": 9.767543128324358e-06, "loss": 0.4187, "step": 9231 }, { "epoch": 0.9385929239528263, "grad_norm": 0.31960543990135193, "learning_rate": 9.767436165844434e-06, "loss": 0.3433, "step": 9232 }, { "epoch": 0.9386945912972753, "grad_norm": 0.3411983251571655, "learning_rate": 9.767329179347298e-06, "loss": 0.4397, "step": 9233 }, { "epoch": 0.9387962586417242, "grad_norm": 0.3370826542377472, "learning_rate": 9.767222168833492e-06, "loss": 0.3845, "step": 9234 }, { "epoch": 0.9388979259861733, "grad_norm": 0.34289491176605225, "learning_rate": 9.767115134303553e-06, "loss": 0.4089, "step": 9235 }, { "epoch": 0.9389995933306222, "grad_norm": 0.35815587639808655, "learning_rate": 9.767008075758022e-06, "loss": 0.3921, "step": 9236 }, { "epoch": 0.9391012606750712, "grad_norm": 0.30439576506614685, "learning_rate": 9.766900993197435e-06, "loss": 0.3784, "step": 9237 }, { "epoch": 0.9392029280195201, "grad_norm": 0.31712788343429565, "learning_rate": 9.766793886622334e-06, "loss": 0.387, "step": 9238 }, { "epoch": 0.939304595363969, "grad_norm": 0.329618901014328, "learning_rate": 9.76668675603326e-06, "loss": 0.3625, "step": 9239 }, { "epoch": 0.9394062627084181, "grad_norm": 0.3448764383792877, "learning_rate": 9.76657960143075e-06, "loss": 0.3954, "step": 9240 }, { "epoch": 0.939507930052867, "grad_norm": 0.32767367362976074, "learning_rate": 9.766472422815344e-06, "loss": 0.4026, "step": 9241 }, { "epoch": 0.939609597397316, "grad_norm": 0.31647852063179016, "learning_rate": 9.766365220187583e-06, "loss": 0.3999, "step": 9242 }, { "epoch": 0.9397112647417649, "grad_norm": 0.3151889741420746, "learning_rate": 9.766257993548008e-06, "loss": 0.3963, "step": 9243 }, { "epoch": 0.9398129320862139, "grad_norm": 0.3165428042411804, "learning_rate": 9.766150742897156e-06, "loss": 0.3913, "step": 9244 }, { "epoch": 0.9399145994306629, "grad_norm": 0.3199245035648346, "learning_rate": 9.766043468235568e-06, "loss": 0.3998, "step": 9245 }, { "epoch": 0.9400162667751119, "grad_norm": 0.33721643686294556, "learning_rate": 9.765936169563788e-06, "loss": 0.4042, "step": 9246 }, { "epoch": 0.9401179341195608, "grad_norm": 0.31620699167251587, "learning_rate": 9.765828846882354e-06, "loss": 0.3994, "step": 9247 }, { "epoch": 0.9402196014640097, "grad_norm": 0.3082786798477173, "learning_rate": 9.765721500191806e-06, "loss": 0.4091, "step": 9248 }, { "epoch": 0.9403212688084587, "grad_norm": 0.33752816915512085, "learning_rate": 9.765614129492686e-06, "loss": 0.3651, "step": 9249 }, { "epoch": 0.9404229361529077, "grad_norm": 0.3193666636943817, "learning_rate": 9.765506734785533e-06, "loss": 0.3895, "step": 9250 }, { "epoch": 0.9405246034973567, "grad_norm": 0.30227774381637573, "learning_rate": 9.765399316070892e-06, "loss": 0.3722, "step": 9251 }, { "epoch": 0.9406262708418056, "grad_norm": 0.3672599792480469, "learning_rate": 9.7652918733493e-06, "loss": 0.3808, "step": 9252 }, { "epoch": 0.9407279381862546, "grad_norm": 0.3090479373931885, "learning_rate": 9.765184406621301e-06, "loss": 0.3916, "step": 9253 }, { "epoch": 0.9408296055307035, "grad_norm": 0.29457810521125793, "learning_rate": 9.765076915887434e-06, "loss": 0.3663, "step": 9254 }, { "epoch": 0.9409312728751525, "grad_norm": 0.3102820813655853, "learning_rate": 9.764969401148243e-06, "loss": 0.3757, "step": 9255 }, { "epoch": 0.9410329402196015, "grad_norm": 0.34172800183296204, "learning_rate": 9.76486186240427e-06, "loss": 0.3738, "step": 9256 }, { "epoch": 0.9411346075640504, "grad_norm": 0.3196687698364258, "learning_rate": 9.764754299656051e-06, "loss": 0.39, "step": 9257 }, { "epoch": 0.9412362749084994, "grad_norm": 0.3272842466831207, "learning_rate": 9.764646712904135e-06, "loss": 0.4495, "step": 9258 }, { "epoch": 0.9413379422529483, "grad_norm": 0.339009553194046, "learning_rate": 9.764539102149061e-06, "loss": 0.3685, "step": 9259 }, { "epoch": 0.9414396095973974, "grad_norm": 0.35919076204299927, "learning_rate": 9.764431467391372e-06, "loss": 0.3796, "step": 9260 }, { "epoch": 0.9415412769418463, "grad_norm": 0.37836432456970215, "learning_rate": 9.764323808631608e-06, "loss": 0.415, "step": 9261 }, { "epoch": 0.9416429442862952, "grad_norm": 0.3621119558811188, "learning_rate": 9.764216125870312e-06, "loss": 0.3811, "step": 9262 }, { "epoch": 0.9417446116307442, "grad_norm": 0.32235008478164673, "learning_rate": 9.76410841910803e-06, "loss": 0.3984, "step": 9263 }, { "epoch": 0.9418462789751931, "grad_norm": 0.4107087254524231, "learning_rate": 9.764000688345299e-06, "loss": 0.4202, "step": 9264 }, { "epoch": 0.9419479463196422, "grad_norm": 0.3203967213630676, "learning_rate": 9.763892933582667e-06, "loss": 0.3533, "step": 9265 }, { "epoch": 0.9420496136640911, "grad_norm": 0.33977270126342773, "learning_rate": 9.763785154820674e-06, "loss": 0.3652, "step": 9266 }, { "epoch": 0.9421512810085401, "grad_norm": 0.34201040863990784, "learning_rate": 9.763677352059861e-06, "loss": 0.371, "step": 9267 }, { "epoch": 0.942252948352989, "grad_norm": 0.3402392566204071, "learning_rate": 9.763569525300775e-06, "loss": 0.4089, "step": 9268 }, { "epoch": 0.942354615697438, "grad_norm": 0.33652639389038086, "learning_rate": 9.76346167454396e-06, "loss": 0.3949, "step": 9269 }, { "epoch": 0.942456283041887, "grad_norm": 0.3302376866340637, "learning_rate": 9.763353799789954e-06, "loss": 0.4072, "step": 9270 }, { "epoch": 0.9425579503863359, "grad_norm": 0.2986519932746887, "learning_rate": 9.763245901039304e-06, "loss": 0.3728, "step": 9271 }, { "epoch": 0.9426596177307849, "grad_norm": 0.34117570519447327, "learning_rate": 9.763137978292555e-06, "loss": 0.3895, "step": 9272 }, { "epoch": 0.9427612850752338, "grad_norm": 0.32812806963920593, "learning_rate": 9.763030031550248e-06, "loss": 0.3937, "step": 9273 }, { "epoch": 0.9428629524196828, "grad_norm": 0.3281995952129364, "learning_rate": 9.762922060812926e-06, "loss": 0.364, "step": 9274 }, { "epoch": 0.9429646197641317, "grad_norm": 0.3225947320461273, "learning_rate": 9.762814066081137e-06, "loss": 0.395, "step": 9275 }, { "epoch": 0.9430662871085808, "grad_norm": 0.3124593496322632, "learning_rate": 9.76270604735542e-06, "loss": 0.3709, "step": 9276 }, { "epoch": 0.9431679544530297, "grad_norm": 0.3352351486682892, "learning_rate": 9.762598004636323e-06, "loss": 0.3653, "step": 9277 }, { "epoch": 0.9432696217974786, "grad_norm": 0.3027610778808594, "learning_rate": 9.762489937924389e-06, "loss": 0.3831, "step": 9278 }, { "epoch": 0.9433712891419276, "grad_norm": 0.31522703170776367, "learning_rate": 9.762381847220163e-06, "loss": 0.3424, "step": 9279 }, { "epoch": 0.9434729564863765, "grad_norm": 0.3294794261455536, "learning_rate": 9.762273732524189e-06, "loss": 0.4003, "step": 9280 }, { "epoch": 0.9435746238308256, "grad_norm": 0.3361586332321167, "learning_rate": 9.76216559383701e-06, "loss": 0.37, "step": 9281 }, { "epoch": 0.9436762911752745, "grad_norm": 0.3045675456523895, "learning_rate": 9.762057431159174e-06, "loss": 0.3826, "step": 9282 }, { "epoch": 0.9437779585197235, "grad_norm": 0.3321824371814728, "learning_rate": 9.761949244491222e-06, "loss": 0.3909, "step": 9283 }, { "epoch": 0.9438796258641724, "grad_norm": 0.3382357954978943, "learning_rate": 9.761841033833704e-06, "loss": 0.4119, "step": 9284 }, { "epoch": 0.9439812932086213, "grad_norm": 0.35841068625450134, "learning_rate": 9.76173279918716e-06, "loss": 0.4284, "step": 9285 }, { "epoch": 0.9440829605530704, "grad_norm": 0.34072983264923096, "learning_rate": 9.76162454055214e-06, "loss": 0.3809, "step": 9286 }, { "epoch": 0.9441846278975193, "grad_norm": 0.34024131298065186, "learning_rate": 9.761516257929186e-06, "loss": 0.4016, "step": 9287 }, { "epoch": 0.9442862952419683, "grad_norm": 0.330597460269928, "learning_rate": 9.761407951318846e-06, "loss": 0.3968, "step": 9288 }, { "epoch": 0.9443879625864172, "grad_norm": 0.35459521412849426, "learning_rate": 9.761299620721663e-06, "loss": 0.3935, "step": 9289 }, { "epoch": 0.9444896299308662, "grad_norm": 0.2923009693622589, "learning_rate": 9.761191266138183e-06, "loss": 0.3741, "step": 9290 }, { "epoch": 0.9445912972753152, "grad_norm": 0.2996520698070526, "learning_rate": 9.761082887568955e-06, "loss": 0.3691, "step": 9291 }, { "epoch": 0.9446929646197642, "grad_norm": 0.3084099292755127, "learning_rate": 9.760974485014522e-06, "loss": 0.385, "step": 9292 }, { "epoch": 0.9447946319642131, "grad_norm": 0.31811603903770447, "learning_rate": 9.76086605847543e-06, "loss": 0.3725, "step": 9293 }, { "epoch": 0.944896299308662, "grad_norm": 0.31906935572624207, "learning_rate": 9.760757607952227e-06, "loss": 0.3909, "step": 9294 }, { "epoch": 0.944997966653111, "grad_norm": 0.31070661544799805, "learning_rate": 9.760649133445458e-06, "loss": 0.3617, "step": 9295 }, { "epoch": 0.94509963399756, "grad_norm": 0.30961790680885315, "learning_rate": 9.76054063495567e-06, "loss": 0.4125, "step": 9296 }, { "epoch": 0.945201301342009, "grad_norm": 0.31139522790908813, "learning_rate": 9.76043211248341e-06, "loss": 0.4208, "step": 9297 }, { "epoch": 0.9453029686864579, "grad_norm": 0.33663222193717957, "learning_rate": 9.760323566029223e-06, "loss": 0.3719, "step": 9298 }, { "epoch": 0.9454046360309069, "grad_norm": 0.3188166618347168, "learning_rate": 9.76021499559366e-06, "loss": 0.3903, "step": 9299 }, { "epoch": 0.9455063033753558, "grad_norm": 0.3042771816253662, "learning_rate": 9.760106401177263e-06, "loss": 0.4095, "step": 9300 }, { "epoch": 0.9456079707198048, "grad_norm": 0.28294384479522705, "learning_rate": 9.759997782780582e-06, "loss": 0.3726, "step": 9301 }, { "epoch": 0.9457096380642538, "grad_norm": 0.30930617451667786, "learning_rate": 9.759889140404162e-06, "loss": 0.376, "step": 9302 }, { "epoch": 0.9458113054087027, "grad_norm": 0.30484631657600403, "learning_rate": 9.759780474048553e-06, "loss": 0.36, "step": 9303 }, { "epoch": 0.9459129727531517, "grad_norm": 0.32016992568969727, "learning_rate": 9.7596717837143e-06, "loss": 0.3597, "step": 9304 }, { "epoch": 0.9460146400976006, "grad_norm": 0.33171749114990234, "learning_rate": 9.759563069401952e-06, "loss": 0.386, "step": 9305 }, { "epoch": 0.9461163074420497, "grad_norm": 0.28821566700935364, "learning_rate": 9.759454331112056e-06, "loss": 0.3847, "step": 9306 }, { "epoch": 0.9462179747864986, "grad_norm": 0.3739508390426636, "learning_rate": 9.75934556884516e-06, "loss": 0.4131, "step": 9307 }, { "epoch": 0.9463196421309475, "grad_norm": 0.33034661412239075, "learning_rate": 9.759236782601814e-06, "loss": 0.386, "step": 9308 }, { "epoch": 0.9464213094753965, "grad_norm": 0.3116007149219513, "learning_rate": 9.759127972382561e-06, "loss": 0.3954, "step": 9309 }, { "epoch": 0.9465229768198454, "grad_norm": 0.3146355152130127, "learning_rate": 9.759019138187955e-06, "loss": 0.4004, "step": 9310 }, { "epoch": 0.9466246441642945, "grad_norm": 0.31777524948120117, "learning_rate": 9.75891028001854e-06, "loss": 0.4079, "step": 9311 }, { "epoch": 0.9467263115087434, "grad_norm": 0.2927410900592804, "learning_rate": 9.758801397874867e-06, "loss": 0.3827, "step": 9312 }, { "epoch": 0.9468279788531924, "grad_norm": 0.3137059807777405, "learning_rate": 9.758692491757481e-06, "loss": 0.3831, "step": 9313 }, { "epoch": 0.9469296461976413, "grad_norm": 0.32933175563812256, "learning_rate": 9.758583561666934e-06, "loss": 0.3513, "step": 9314 }, { "epoch": 0.9470313135420902, "grad_norm": 0.3092595636844635, "learning_rate": 9.758474607603776e-06, "loss": 0.3935, "step": 9315 }, { "epoch": 0.9471329808865392, "grad_norm": 0.3161822557449341, "learning_rate": 9.758365629568552e-06, "loss": 0.4232, "step": 9316 }, { "epoch": 0.9472346482309882, "grad_norm": 0.344154953956604, "learning_rate": 9.758256627561812e-06, "loss": 0.3509, "step": 9317 }, { "epoch": 0.9473363155754372, "grad_norm": 0.31915101408958435, "learning_rate": 9.758147601584106e-06, "loss": 0.3902, "step": 9318 }, { "epoch": 0.9474379829198861, "grad_norm": 0.3041531443595886, "learning_rate": 9.758038551635985e-06, "loss": 0.4114, "step": 9319 }, { "epoch": 0.9475396502643351, "grad_norm": 0.30729156732559204, "learning_rate": 9.757929477717996e-06, "loss": 0.3983, "step": 9320 }, { "epoch": 0.947641317608784, "grad_norm": 0.3271055817604065, "learning_rate": 9.757820379830686e-06, "loss": 0.3707, "step": 9321 }, { "epoch": 0.9477429849532331, "grad_norm": 0.31525346636772156, "learning_rate": 9.75771125797461e-06, "loss": 0.3649, "step": 9322 }, { "epoch": 0.947844652297682, "grad_norm": 0.31999534368515015, "learning_rate": 9.757602112150317e-06, "loss": 0.4097, "step": 9323 }, { "epoch": 0.9479463196421309, "grad_norm": 0.33059269189834595, "learning_rate": 9.757492942358353e-06, "loss": 0.3758, "step": 9324 }, { "epoch": 0.9480479869865799, "grad_norm": 0.34293046593666077, "learning_rate": 9.75738374859927e-06, "loss": 0.3507, "step": 9325 }, { "epoch": 0.9481496543310288, "grad_norm": 0.3156643509864807, "learning_rate": 9.757274530873618e-06, "loss": 0.3783, "step": 9326 }, { "epoch": 0.9482513216754779, "grad_norm": 0.31459885835647583, "learning_rate": 9.757165289181949e-06, "loss": 0.4122, "step": 9327 }, { "epoch": 0.9483529890199268, "grad_norm": 0.33760154247283936, "learning_rate": 9.757056023524808e-06, "loss": 0.3753, "step": 9328 }, { "epoch": 0.9484546563643758, "grad_norm": 0.3198468089103699, "learning_rate": 9.756946733902755e-06, "loss": 0.3494, "step": 9329 }, { "epoch": 0.9485563237088247, "grad_norm": 0.35173487663269043, "learning_rate": 9.75683742031633e-06, "loss": 0.4279, "step": 9330 }, { "epoch": 0.9486579910532736, "grad_norm": 0.310377836227417, "learning_rate": 9.75672808276609e-06, "loss": 0.3747, "step": 9331 }, { "epoch": 0.9487596583977227, "grad_norm": 0.31837210059165955, "learning_rate": 9.756618721252585e-06, "loss": 0.3787, "step": 9332 }, { "epoch": 0.9488613257421716, "grad_norm": 0.3486345112323761, "learning_rate": 9.756509335776364e-06, "loss": 0.3959, "step": 9333 }, { "epoch": 0.9489629930866206, "grad_norm": 0.28192561864852905, "learning_rate": 9.75639992633798e-06, "loss": 0.3682, "step": 9334 }, { "epoch": 0.9490646604310695, "grad_norm": 0.32260221242904663, "learning_rate": 9.756290492937984e-06, "loss": 0.435, "step": 9335 }, { "epoch": 0.9491663277755185, "grad_norm": 0.32132768630981445, "learning_rate": 9.756181035576927e-06, "loss": 0.3724, "step": 9336 }, { "epoch": 0.9492679951199675, "grad_norm": 0.3336917459964752, "learning_rate": 9.756071554255359e-06, "loss": 0.3807, "step": 9337 }, { "epoch": 0.9493696624644165, "grad_norm": 0.3211337625980377, "learning_rate": 9.755962048973833e-06, "loss": 0.4009, "step": 9338 }, { "epoch": 0.9494713298088654, "grad_norm": 0.29741400480270386, "learning_rate": 9.7558525197329e-06, "loss": 0.3896, "step": 9339 }, { "epoch": 0.9495729971533143, "grad_norm": 0.3025676906108856, "learning_rate": 9.755742966533113e-06, "loss": 0.3942, "step": 9340 }, { "epoch": 0.9496746644977633, "grad_norm": 0.32608455419540405, "learning_rate": 9.755633389375024e-06, "loss": 0.3872, "step": 9341 }, { "epoch": 0.9497763318422123, "grad_norm": 0.3190976083278656, "learning_rate": 9.75552378825918e-06, "loss": 0.3852, "step": 9342 }, { "epoch": 0.9498779991866613, "grad_norm": 0.2833960950374603, "learning_rate": 9.75541416318614e-06, "loss": 0.3867, "step": 9343 }, { "epoch": 0.9499796665311102, "grad_norm": 0.32056495547294617, "learning_rate": 9.755304514156455e-06, "loss": 0.4046, "step": 9344 }, { "epoch": 0.9500813338755592, "grad_norm": 0.3403708040714264, "learning_rate": 9.755194841170674e-06, "loss": 0.4109, "step": 9345 }, { "epoch": 0.9501830012200081, "grad_norm": 0.327292263507843, "learning_rate": 9.755085144229353e-06, "loss": 0.4275, "step": 9346 }, { "epoch": 0.9502846685644571, "grad_norm": 0.36324867606163025, "learning_rate": 9.754975423333042e-06, "loss": 0.4084, "step": 9347 }, { "epoch": 0.9503863359089061, "grad_norm": 0.3513815701007843, "learning_rate": 9.754865678482294e-06, "loss": 0.4007, "step": 9348 }, { "epoch": 0.950488003253355, "grad_norm": 0.2967403829097748, "learning_rate": 9.754755909677663e-06, "loss": 0.3863, "step": 9349 }, { "epoch": 0.950589670597804, "grad_norm": 0.3279682397842407, "learning_rate": 9.754646116919702e-06, "loss": 0.3665, "step": 9350 }, { "epoch": 0.9506913379422529, "grad_norm": 0.36182457208633423, "learning_rate": 9.754536300208963e-06, "loss": 0.3926, "step": 9351 }, { "epoch": 0.950793005286702, "grad_norm": 0.2893187999725342, "learning_rate": 9.754426459546001e-06, "loss": 0.3821, "step": 9352 }, { "epoch": 0.9508946726311509, "grad_norm": 0.3470836579799652, "learning_rate": 9.754316594931368e-06, "loss": 0.4576, "step": 9353 }, { "epoch": 0.9509963399755998, "grad_norm": 0.3064196705818176, "learning_rate": 9.754206706365618e-06, "loss": 0.3993, "step": 9354 }, { "epoch": 0.9510980073200488, "grad_norm": 0.30329030752182007, "learning_rate": 9.754096793849303e-06, "loss": 0.3685, "step": 9355 }, { "epoch": 0.9511996746644977, "grad_norm": 0.30933836102485657, "learning_rate": 9.75398685738298e-06, "loss": 0.3893, "step": 9356 }, { "epoch": 0.9513013420089467, "grad_norm": 0.3318711519241333, "learning_rate": 9.7538768969672e-06, "loss": 0.4044, "step": 9357 }, { "epoch": 0.9514030093533957, "grad_norm": 0.32809728384017944, "learning_rate": 9.753766912602518e-06, "loss": 0.3839, "step": 9358 }, { "epoch": 0.9515046766978447, "grad_norm": 0.33438023924827576, "learning_rate": 9.753656904289489e-06, "loss": 0.3925, "step": 9359 }, { "epoch": 0.9516063440422936, "grad_norm": 0.29209476709365845, "learning_rate": 9.753546872028666e-06, "loss": 0.4081, "step": 9360 }, { "epoch": 0.9517080113867425, "grad_norm": 0.3341488540172577, "learning_rate": 9.753436815820602e-06, "loss": 0.4036, "step": 9361 }, { "epoch": 0.9518096787311915, "grad_norm": 0.3246075510978699, "learning_rate": 9.753326735665853e-06, "loss": 0.3733, "step": 9362 }, { "epoch": 0.9519113460756405, "grad_norm": 0.32463183999061584, "learning_rate": 9.753216631564974e-06, "loss": 0.3941, "step": 9363 }, { "epoch": 0.9520130134200895, "grad_norm": 0.31796741485595703, "learning_rate": 9.753106503518518e-06, "loss": 0.3839, "step": 9364 }, { "epoch": 0.9521146807645384, "grad_norm": 0.35716915130615234, "learning_rate": 9.752996351527044e-06, "loss": 0.4037, "step": 9365 }, { "epoch": 0.9522163481089874, "grad_norm": 0.33048638701438904, "learning_rate": 9.7528861755911e-06, "loss": 0.3583, "step": 9366 }, { "epoch": 0.9523180154534363, "grad_norm": 0.3104676902294159, "learning_rate": 9.752775975711249e-06, "loss": 0.3611, "step": 9367 }, { "epoch": 0.9524196827978854, "grad_norm": 0.3652971088886261, "learning_rate": 9.75266575188804e-06, "loss": 0.3569, "step": 9368 }, { "epoch": 0.9525213501423343, "grad_norm": 0.35574957728385925, "learning_rate": 9.75255550412203e-06, "loss": 0.3835, "step": 9369 }, { "epoch": 0.9526230174867832, "grad_norm": 0.3123045563697815, "learning_rate": 9.752445232413774e-06, "loss": 0.3576, "step": 9370 }, { "epoch": 0.9527246848312322, "grad_norm": 0.32971784472465515, "learning_rate": 9.75233493676383e-06, "loss": 0.377, "step": 9371 }, { "epoch": 0.9528263521756811, "grad_norm": 0.3247165381908417, "learning_rate": 9.752224617172753e-06, "loss": 0.3894, "step": 9372 }, { "epoch": 0.9529280195201302, "grad_norm": 0.3140277564525604, "learning_rate": 9.752114273641093e-06, "loss": 0.3378, "step": 9373 }, { "epoch": 0.9530296868645791, "grad_norm": 0.33221569657325745, "learning_rate": 9.752003906169415e-06, "loss": 0.3876, "step": 9374 }, { "epoch": 0.9531313542090281, "grad_norm": 0.3249284029006958, "learning_rate": 9.751893514758268e-06, "loss": 0.4042, "step": 9375 }, { "epoch": 0.953233021553477, "grad_norm": 0.31770819425582886, "learning_rate": 9.751783099408212e-06, "loss": 0.3909, "step": 9376 }, { "epoch": 0.9533346888979259, "grad_norm": 0.31749844551086426, "learning_rate": 9.751672660119801e-06, "loss": 0.3779, "step": 9377 }, { "epoch": 0.953436356242375, "grad_norm": 0.32274213433265686, "learning_rate": 9.751562196893594e-06, "loss": 0.3893, "step": 9378 }, { "epoch": 0.9535380235868239, "grad_norm": 0.3238515257835388, "learning_rate": 9.751451709730146e-06, "loss": 0.3866, "step": 9379 }, { "epoch": 0.9536396909312729, "grad_norm": 0.35856881737709045, "learning_rate": 9.75134119863001e-06, "loss": 0.3892, "step": 9380 }, { "epoch": 0.9537413582757218, "grad_norm": 0.3418983817100525, "learning_rate": 9.751230663593749e-06, "loss": 0.4054, "step": 9381 }, { "epoch": 0.9538430256201708, "grad_norm": 0.3633604347705841, "learning_rate": 9.751120104621917e-06, "loss": 0.3948, "step": 9382 }, { "epoch": 0.9539446929646198, "grad_norm": 0.3474491536617279, "learning_rate": 9.75100952171507e-06, "loss": 0.3968, "step": 9383 }, { "epoch": 0.9540463603090688, "grad_norm": 0.31641826033592224, "learning_rate": 9.750898914873767e-06, "loss": 0.4089, "step": 9384 }, { "epoch": 0.9541480276535177, "grad_norm": 0.339884877204895, "learning_rate": 9.750788284098564e-06, "loss": 0.3945, "step": 9385 }, { "epoch": 0.9542496949979666, "grad_norm": 0.35991519689559937, "learning_rate": 9.750677629390017e-06, "loss": 0.3928, "step": 9386 }, { "epoch": 0.9543513623424156, "grad_norm": 0.3267268240451813, "learning_rate": 9.750566950748687e-06, "loss": 0.3972, "step": 9387 }, { "epoch": 0.9544530296868646, "grad_norm": 0.3158368766307831, "learning_rate": 9.750456248175128e-06, "loss": 0.3606, "step": 9388 }, { "epoch": 0.9545546970313136, "grad_norm": 0.3242937922477722, "learning_rate": 9.750345521669901e-06, "loss": 0.3573, "step": 9389 }, { "epoch": 0.9546563643757625, "grad_norm": 0.32084134221076965, "learning_rate": 9.750234771233561e-06, "loss": 0.3836, "step": 9390 }, { "epoch": 0.9547580317202115, "grad_norm": 0.28108492493629456, "learning_rate": 9.750123996866667e-06, "loss": 0.3833, "step": 9391 }, { "epoch": 0.9548596990646604, "grad_norm": 0.3237813711166382, "learning_rate": 9.750013198569778e-06, "loss": 0.4053, "step": 9392 }, { "epoch": 0.9549613664091094, "grad_norm": 0.3156077563762665, "learning_rate": 9.74990237634345e-06, "loss": 0.4033, "step": 9393 }, { "epoch": 0.9550630337535584, "grad_norm": 0.3106790781021118, "learning_rate": 9.749791530188244e-06, "loss": 0.3933, "step": 9394 }, { "epoch": 0.9551647010980073, "grad_norm": 0.3034692704677582, "learning_rate": 9.749680660104715e-06, "loss": 0.3613, "step": 9395 }, { "epoch": 0.9552663684424563, "grad_norm": 0.3060033321380615, "learning_rate": 9.749569766093426e-06, "loss": 0.3993, "step": 9396 }, { "epoch": 0.9553680357869052, "grad_norm": 0.31383129954338074, "learning_rate": 9.749458848154931e-06, "loss": 0.42, "step": 9397 }, { "epoch": 0.9554697031313543, "grad_norm": 0.3323477506637573, "learning_rate": 9.749347906289792e-06, "loss": 0.4026, "step": 9398 }, { "epoch": 0.9555713704758032, "grad_norm": 0.2907595634460449, "learning_rate": 9.749236940498567e-06, "loss": 0.3797, "step": 9399 }, { "epoch": 0.9556730378202521, "grad_norm": 0.3280189633369446, "learning_rate": 9.749125950781815e-06, "loss": 0.3961, "step": 9400 }, { "epoch": 0.9557747051647011, "grad_norm": 0.3390052020549774, "learning_rate": 9.749014937140096e-06, "loss": 0.3951, "step": 9401 }, { "epoch": 0.95587637250915, "grad_norm": 0.3189302682876587, "learning_rate": 9.748903899573965e-06, "loss": 0.3753, "step": 9402 }, { "epoch": 0.955978039853599, "grad_norm": 0.3433730900287628, "learning_rate": 9.748792838083987e-06, "loss": 0.3722, "step": 9403 }, { "epoch": 0.956079707198048, "grad_norm": 0.31491920351982117, "learning_rate": 9.748681752670719e-06, "loss": 0.4115, "step": 9404 }, { "epoch": 0.956181374542497, "grad_norm": 0.32947298884391785, "learning_rate": 9.74857064333472e-06, "loss": 0.4013, "step": 9405 }, { "epoch": 0.9562830418869459, "grad_norm": 0.3325786590576172, "learning_rate": 9.748459510076552e-06, "loss": 0.371, "step": 9406 }, { "epoch": 0.9563847092313948, "grad_norm": 0.323008269071579, "learning_rate": 9.748348352896773e-06, "loss": 0.3715, "step": 9407 }, { "epoch": 0.9564863765758438, "grad_norm": 0.3244336247444153, "learning_rate": 9.748237171795943e-06, "loss": 0.382, "step": 9408 }, { "epoch": 0.9565880439202928, "grad_norm": 0.30828073620796204, "learning_rate": 9.748125966774621e-06, "loss": 0.3563, "step": 9409 }, { "epoch": 0.9566897112647418, "grad_norm": 0.3105379045009613, "learning_rate": 9.74801473783337e-06, "loss": 0.3547, "step": 9410 }, { "epoch": 0.9567913786091907, "grad_norm": 0.30736884474754333, "learning_rate": 9.747903484972751e-06, "loss": 0.3606, "step": 9411 }, { "epoch": 0.9568930459536397, "grad_norm": 0.3402230739593506, "learning_rate": 9.74779220819332e-06, "loss": 0.3643, "step": 9412 }, { "epoch": 0.9569947132980886, "grad_norm": 0.29747310280799866, "learning_rate": 9.74768090749564e-06, "loss": 0.3753, "step": 9413 }, { "epoch": 0.9570963806425377, "grad_norm": 0.3159017860889435, "learning_rate": 9.747569582880274e-06, "loss": 0.3732, "step": 9414 }, { "epoch": 0.9571980479869866, "grad_norm": 0.3381446599960327, "learning_rate": 9.747458234347778e-06, "loss": 0.3903, "step": 9415 }, { "epoch": 0.9572997153314355, "grad_norm": 0.33295246958732605, "learning_rate": 9.747346861898718e-06, "loss": 0.3805, "step": 9416 }, { "epoch": 0.9574013826758845, "grad_norm": 0.29365819692611694, "learning_rate": 9.747235465533651e-06, "loss": 0.3676, "step": 9417 }, { "epoch": 0.9575030500203334, "grad_norm": 0.30038896203041077, "learning_rate": 9.747124045253141e-06, "loss": 0.3864, "step": 9418 }, { "epoch": 0.9576047173647825, "grad_norm": 0.3504304885864258, "learning_rate": 9.747012601057747e-06, "loss": 0.3766, "step": 9419 }, { "epoch": 0.9577063847092314, "grad_norm": 0.325273722410202, "learning_rate": 9.746901132948032e-06, "loss": 0.3885, "step": 9420 }, { "epoch": 0.9578080520536804, "grad_norm": 0.3223854899406433, "learning_rate": 9.746789640924557e-06, "loss": 0.3842, "step": 9421 }, { "epoch": 0.9579097193981293, "grad_norm": 0.3129887878894806, "learning_rate": 9.746678124987884e-06, "loss": 0.3848, "step": 9422 }, { "epoch": 0.9580113867425782, "grad_norm": 0.3538753390312195, "learning_rate": 9.746566585138576e-06, "loss": 0.421, "step": 9423 }, { "epoch": 0.9581130540870273, "grad_norm": 0.31870004534721375, "learning_rate": 9.74645502137719e-06, "loss": 0.3727, "step": 9424 }, { "epoch": 0.9582147214314762, "grad_norm": 0.31766942143440247, "learning_rate": 9.746343433704296e-06, "loss": 0.3856, "step": 9425 }, { "epoch": 0.9583163887759252, "grad_norm": 0.31531816720962524, "learning_rate": 9.746231822120448e-06, "loss": 0.365, "step": 9426 }, { "epoch": 0.9584180561203741, "grad_norm": 0.36730965971946716, "learning_rate": 9.746120186626213e-06, "loss": 0.4046, "step": 9427 }, { "epoch": 0.9585197234648231, "grad_norm": 0.3245594799518585, "learning_rate": 9.746008527222153e-06, "loss": 0.3996, "step": 9428 }, { "epoch": 0.9586213908092721, "grad_norm": 0.29844191670417786, "learning_rate": 9.745896843908829e-06, "loss": 0.3856, "step": 9429 }, { "epoch": 0.958723058153721, "grad_norm": 0.30358147621154785, "learning_rate": 9.745785136686806e-06, "loss": 0.3674, "step": 9430 }, { "epoch": 0.95882472549817, "grad_norm": 0.3396075367927551, "learning_rate": 9.745673405556644e-06, "loss": 0.3801, "step": 9431 }, { "epoch": 0.9589263928426189, "grad_norm": 0.3030127286911011, "learning_rate": 9.745561650518907e-06, "loss": 0.3619, "step": 9432 }, { "epoch": 0.9590280601870679, "grad_norm": 0.3059757947921753, "learning_rate": 9.745449871574157e-06, "loss": 0.3798, "step": 9433 }, { "epoch": 0.9591297275315169, "grad_norm": 0.28741857409477234, "learning_rate": 9.74533806872296e-06, "loss": 0.4074, "step": 9434 }, { "epoch": 0.9592313948759659, "grad_norm": 0.29077374935150146, "learning_rate": 9.745226241965877e-06, "loss": 0.4572, "step": 9435 }, { "epoch": 0.9593330622204148, "grad_norm": 0.3280205726623535, "learning_rate": 9.745114391303473e-06, "loss": 0.4084, "step": 9436 }, { "epoch": 0.9594347295648638, "grad_norm": 0.2836388647556305, "learning_rate": 9.74500251673631e-06, "loss": 0.3704, "step": 9437 }, { "epoch": 0.9595363969093127, "grad_norm": 0.3026672303676605, "learning_rate": 9.74489061826495e-06, "loss": 0.389, "step": 9438 }, { "epoch": 0.9596380642537617, "grad_norm": 0.3148046135902405, "learning_rate": 9.74477869588996e-06, "loss": 0.4142, "step": 9439 }, { "epoch": 0.9597397315982107, "grad_norm": 0.29486745595932007, "learning_rate": 9.744666749611902e-06, "loss": 0.4074, "step": 9440 }, { "epoch": 0.9598413989426596, "grad_norm": 0.30561545491218567, "learning_rate": 9.744554779431342e-06, "loss": 0.4123, "step": 9441 }, { "epoch": 0.9599430662871086, "grad_norm": 0.32050496339797974, "learning_rate": 9.744442785348841e-06, "loss": 0.3762, "step": 9442 }, { "epoch": 0.9600447336315575, "grad_norm": 0.3394029140472412, "learning_rate": 9.744330767364965e-06, "loss": 0.3946, "step": 9443 }, { "epoch": 0.9601464009760065, "grad_norm": 0.3039279282093048, "learning_rate": 9.744218725480279e-06, "loss": 0.3916, "step": 9444 }, { "epoch": 0.9602480683204555, "grad_norm": 0.3224914073944092, "learning_rate": 9.744106659695345e-06, "loss": 0.3689, "step": 9445 }, { "epoch": 0.9603497356649044, "grad_norm": 0.2835688889026642, "learning_rate": 9.74399457001073e-06, "loss": 0.3728, "step": 9446 }, { "epoch": 0.9604514030093534, "grad_norm": 0.2902989983558655, "learning_rate": 9.743882456426996e-06, "loss": 0.399, "step": 9447 }, { "epoch": 0.9605530703538023, "grad_norm": 0.3420191705226898, "learning_rate": 9.74377031894471e-06, "loss": 0.3907, "step": 9448 }, { "epoch": 0.9606547376982513, "grad_norm": 0.2787429988384247, "learning_rate": 9.743658157564437e-06, "loss": 0.3726, "step": 9449 }, { "epoch": 0.9607564050427003, "grad_norm": 0.3139103651046753, "learning_rate": 9.743545972286742e-06, "loss": 0.3865, "step": 9450 }, { "epoch": 0.9608580723871493, "grad_norm": 0.32219037413597107, "learning_rate": 9.743433763112188e-06, "loss": 0.3791, "step": 9451 }, { "epoch": 0.9609597397315982, "grad_norm": 0.32547104358673096, "learning_rate": 9.743321530041344e-06, "loss": 0.3763, "step": 9452 }, { "epoch": 0.9610614070760471, "grad_norm": 0.2979939877986908, "learning_rate": 9.743209273074774e-06, "loss": 0.3389, "step": 9453 }, { "epoch": 0.9611630744204961, "grad_norm": 0.30668625235557556, "learning_rate": 9.74309699221304e-06, "loss": 0.3866, "step": 9454 }, { "epoch": 0.9612647417649451, "grad_norm": 0.3187551200389862, "learning_rate": 9.742984687456711e-06, "loss": 0.3754, "step": 9455 }, { "epoch": 0.9613664091093941, "grad_norm": 0.2910953462123871, "learning_rate": 9.742872358806354e-06, "loss": 0.3647, "step": 9456 }, { "epoch": 0.961468076453843, "grad_norm": 0.3316482901573181, "learning_rate": 9.742760006262533e-06, "loss": 0.3832, "step": 9457 }, { "epoch": 0.961569743798292, "grad_norm": 0.31189414858818054, "learning_rate": 9.742647629825813e-06, "loss": 0.3859, "step": 9458 }, { "epoch": 0.9616714111427409, "grad_norm": 0.3171228766441345, "learning_rate": 9.742535229496761e-06, "loss": 0.4409, "step": 9459 }, { "epoch": 0.96177307848719, "grad_norm": 0.3154910206794739, "learning_rate": 9.742422805275944e-06, "loss": 0.3887, "step": 9460 }, { "epoch": 0.9618747458316389, "grad_norm": 0.3039776086807251, "learning_rate": 9.742310357163928e-06, "loss": 0.3941, "step": 9461 }, { "epoch": 0.9619764131760878, "grad_norm": 0.3385378122329712, "learning_rate": 9.74219788516128e-06, "loss": 0.4146, "step": 9462 }, { "epoch": 0.9620780805205368, "grad_norm": 0.3215848505496979, "learning_rate": 9.742085389268565e-06, "loss": 0.3791, "step": 9463 }, { "epoch": 0.9621797478649857, "grad_norm": 0.29675501585006714, "learning_rate": 9.74197286948635e-06, "loss": 0.373, "step": 9464 }, { "epoch": 0.9622814152094348, "grad_norm": 0.27147164940834045, "learning_rate": 9.741860325815204e-06, "loss": 0.4035, "step": 9465 }, { "epoch": 0.9623830825538837, "grad_norm": 0.319978803396225, "learning_rate": 9.741747758255692e-06, "loss": 0.3748, "step": 9466 }, { "epoch": 0.9624847498983327, "grad_norm": 0.31014856696128845, "learning_rate": 9.741635166808381e-06, "loss": 0.3482, "step": 9467 }, { "epoch": 0.9625864172427816, "grad_norm": 0.323263943195343, "learning_rate": 9.74152255147384e-06, "loss": 0.4098, "step": 9468 }, { "epoch": 0.9626880845872305, "grad_norm": 0.36078858375549316, "learning_rate": 9.741409912252635e-06, "loss": 0.3958, "step": 9469 }, { "epoch": 0.9627897519316796, "grad_norm": 0.2981296479701996, "learning_rate": 9.741297249145332e-06, "loss": 0.3709, "step": 9470 }, { "epoch": 0.9628914192761285, "grad_norm": 0.29374077916145325, "learning_rate": 9.741184562152501e-06, "loss": 0.3707, "step": 9471 }, { "epoch": 0.9629930866205775, "grad_norm": 0.3311518728733063, "learning_rate": 9.74107185127471e-06, "loss": 0.3897, "step": 9472 }, { "epoch": 0.9630947539650264, "grad_norm": 0.32181572914123535, "learning_rate": 9.740959116512524e-06, "loss": 0.4046, "step": 9473 }, { "epoch": 0.9631964213094754, "grad_norm": 0.29436010122299194, "learning_rate": 9.740846357866515e-06, "loss": 0.3829, "step": 9474 }, { "epoch": 0.9632980886539244, "grad_norm": 0.30882999300956726, "learning_rate": 9.740733575337246e-06, "loss": 0.4083, "step": 9475 }, { "epoch": 0.9633997559983734, "grad_norm": 0.3067151606082916, "learning_rate": 9.740620768925289e-06, "loss": 0.3808, "step": 9476 }, { "epoch": 0.9635014233428223, "grad_norm": 0.3385072350502014, "learning_rate": 9.740507938631212e-06, "loss": 0.4107, "step": 9477 }, { "epoch": 0.9636030906872712, "grad_norm": 0.30609220266342163, "learning_rate": 9.740395084455582e-06, "loss": 0.3731, "step": 9478 }, { "epoch": 0.9637047580317202, "grad_norm": 0.3029637336730957, "learning_rate": 9.740282206398968e-06, "loss": 0.3729, "step": 9479 }, { "epoch": 0.9638064253761692, "grad_norm": 0.3074740469455719, "learning_rate": 9.740169304461938e-06, "loss": 0.3762, "step": 9480 }, { "epoch": 0.9639080927206182, "grad_norm": 0.3131153881549835, "learning_rate": 9.740056378645062e-06, "loss": 0.3814, "step": 9481 }, { "epoch": 0.9640097600650671, "grad_norm": 0.31489917635917664, "learning_rate": 9.739943428948908e-06, "loss": 0.4106, "step": 9482 }, { "epoch": 0.964111427409516, "grad_norm": 0.3123965859413147, "learning_rate": 9.739830455374045e-06, "loss": 0.4403, "step": 9483 }, { "epoch": 0.964213094753965, "grad_norm": 0.304178923368454, "learning_rate": 9.739717457921044e-06, "loss": 0.379, "step": 9484 }, { "epoch": 0.9643147620984139, "grad_norm": 0.34927091002464294, "learning_rate": 9.739604436590471e-06, "loss": 0.3882, "step": 9485 }, { "epoch": 0.964416429442863, "grad_norm": 0.30607759952545166, "learning_rate": 9.739491391382897e-06, "loss": 0.3783, "step": 9486 }, { "epoch": 0.9645180967873119, "grad_norm": 0.3160981237888336, "learning_rate": 9.739378322298893e-06, "loss": 0.3984, "step": 9487 }, { "epoch": 0.9646197641317609, "grad_norm": 0.33975452184677124, "learning_rate": 9.739265229339028e-06, "loss": 0.4013, "step": 9488 }, { "epoch": 0.9647214314762098, "grad_norm": 0.3219572603702545, "learning_rate": 9.739152112503868e-06, "loss": 0.4018, "step": 9489 }, { "epoch": 0.9648230988206588, "grad_norm": 0.2967930734157562, "learning_rate": 9.73903897179399e-06, "loss": 0.3734, "step": 9490 }, { "epoch": 0.9649247661651078, "grad_norm": 0.29662638902664185, "learning_rate": 9.738925807209958e-06, "loss": 0.3987, "step": 9491 }, { "epoch": 0.9650264335095567, "grad_norm": 0.311778724193573, "learning_rate": 9.738812618752342e-06, "loss": 0.3542, "step": 9492 }, { "epoch": 0.9651281008540057, "grad_norm": 0.3542109429836273, "learning_rate": 9.738699406421717e-06, "loss": 0.3892, "step": 9493 }, { "epoch": 0.9652297681984546, "grad_norm": 0.29969608783721924, "learning_rate": 9.738586170218649e-06, "loss": 0.426, "step": 9494 }, { "epoch": 0.9653314355429036, "grad_norm": 0.3129812479019165, "learning_rate": 9.73847291014371e-06, "loss": 0.3762, "step": 9495 }, { "epoch": 0.9654331028873526, "grad_norm": 0.3602316379547119, "learning_rate": 9.738359626197471e-06, "loss": 0.3625, "step": 9496 }, { "epoch": 0.9655347702318016, "grad_norm": 0.2915186882019043, "learning_rate": 9.738246318380501e-06, "loss": 0.3882, "step": 9497 }, { "epoch": 0.9656364375762505, "grad_norm": 0.31460654735565186, "learning_rate": 9.738132986693374e-06, "loss": 0.3828, "step": 9498 }, { "epoch": 0.9657381049206994, "grad_norm": 0.3439030945301056, "learning_rate": 9.738019631136657e-06, "loss": 0.3963, "step": 9499 }, { "epoch": 0.9658397722651484, "grad_norm": 0.2945864796638489, "learning_rate": 9.737906251710923e-06, "loss": 0.3746, "step": 9500 }, { "epoch": 0.9659414396095974, "grad_norm": 0.30362600088119507, "learning_rate": 9.737792848416745e-06, "loss": 0.3892, "step": 9501 }, { "epoch": 0.9660431069540464, "grad_norm": 0.2974526286125183, "learning_rate": 9.737679421254692e-06, "loss": 0.372, "step": 9502 }, { "epoch": 0.9661447742984953, "grad_norm": 0.3145897388458252, "learning_rate": 9.737565970225335e-06, "loss": 0.3705, "step": 9503 }, { "epoch": 0.9662464416429443, "grad_norm": 0.3103581666946411, "learning_rate": 9.737452495329246e-06, "loss": 0.3913, "step": 9504 }, { "epoch": 0.9663481089873932, "grad_norm": 0.32655322551727295, "learning_rate": 9.737338996566998e-06, "loss": 0.3908, "step": 9505 }, { "epoch": 0.9664497763318423, "grad_norm": 0.30539342761039734, "learning_rate": 9.737225473939163e-06, "loss": 0.3759, "step": 9506 }, { "epoch": 0.9665514436762912, "grad_norm": 0.2803161144256592, "learning_rate": 9.73711192744631e-06, "loss": 0.3814, "step": 9507 }, { "epoch": 0.9666531110207401, "grad_norm": 0.2861683666706085, "learning_rate": 9.736998357089014e-06, "loss": 0.3638, "step": 9508 }, { "epoch": 0.9667547783651891, "grad_norm": 0.29981666803359985, "learning_rate": 9.736884762867847e-06, "loss": 0.4026, "step": 9509 }, { "epoch": 0.966856445709638, "grad_norm": 0.3463799059391022, "learning_rate": 9.736771144783378e-06, "loss": 0.4042, "step": 9510 }, { "epoch": 0.9669581130540871, "grad_norm": 0.3077075779438019, "learning_rate": 9.736657502836181e-06, "loss": 0.4097, "step": 9511 }, { "epoch": 0.967059780398536, "grad_norm": 0.31399914622306824, "learning_rate": 9.73654383702683e-06, "loss": 0.3548, "step": 9512 }, { "epoch": 0.967161447742985, "grad_norm": 0.28966251015663147, "learning_rate": 9.736430147355899e-06, "loss": 0.369, "step": 9513 }, { "epoch": 0.9672631150874339, "grad_norm": 0.33258312940597534, "learning_rate": 9.736316433823956e-06, "loss": 0.4245, "step": 9514 }, { "epoch": 0.9673647824318828, "grad_norm": 0.3377875089645386, "learning_rate": 9.736202696431577e-06, "loss": 0.3686, "step": 9515 }, { "epoch": 0.9674664497763319, "grad_norm": 0.32704901695251465, "learning_rate": 9.736088935179335e-06, "loss": 0.4022, "step": 9516 }, { "epoch": 0.9675681171207808, "grad_norm": 0.3742475211620331, "learning_rate": 9.735975150067802e-06, "loss": 0.3873, "step": 9517 }, { "epoch": 0.9676697844652298, "grad_norm": 0.3315330445766449, "learning_rate": 9.73586134109755e-06, "loss": 0.3538, "step": 9518 }, { "epoch": 0.9677714518096787, "grad_norm": 0.292660117149353, "learning_rate": 9.735747508269155e-06, "loss": 0.3999, "step": 9519 }, { "epoch": 0.9678731191541277, "grad_norm": 0.3154953122138977, "learning_rate": 9.73563365158319e-06, "loss": 0.4277, "step": 9520 }, { "epoch": 0.9679747864985767, "grad_norm": 0.32330530881881714, "learning_rate": 9.735519771040228e-06, "loss": 0.3511, "step": 9521 }, { "epoch": 0.9680764538430257, "grad_norm": 0.32913070917129517, "learning_rate": 9.735405866640844e-06, "loss": 0.3887, "step": 9522 }, { "epoch": 0.9681781211874746, "grad_norm": 0.32953041791915894, "learning_rate": 9.735291938385607e-06, "loss": 0.3712, "step": 9523 }, { "epoch": 0.9682797885319235, "grad_norm": 0.30702316761016846, "learning_rate": 9.735177986275098e-06, "loss": 0.3741, "step": 9524 }, { "epoch": 0.9683814558763725, "grad_norm": 0.3137650787830353, "learning_rate": 9.735064010309884e-06, "loss": 0.3687, "step": 9525 }, { "epoch": 0.9684831232208214, "grad_norm": 0.3279253840446472, "learning_rate": 9.734950010490548e-06, "loss": 0.3802, "step": 9526 }, { "epoch": 0.9685847905652705, "grad_norm": 0.31114619970321655, "learning_rate": 9.734835986817654e-06, "loss": 0.4099, "step": 9527 }, { "epoch": 0.9686864579097194, "grad_norm": 0.3024636209011078, "learning_rate": 9.734721939291784e-06, "loss": 0.38, "step": 9528 }, { "epoch": 0.9687881252541684, "grad_norm": 0.34670311212539673, "learning_rate": 9.73460786791351e-06, "loss": 0.3855, "step": 9529 }, { "epoch": 0.9688897925986173, "grad_norm": 0.32524269819259644, "learning_rate": 9.734493772683404e-06, "loss": 0.3766, "step": 9530 }, { "epoch": 0.9689914599430662, "grad_norm": 0.35222649574279785, "learning_rate": 9.734379653602046e-06, "loss": 0.4128, "step": 9531 }, { "epoch": 0.9690931272875153, "grad_norm": 0.33050987124443054, "learning_rate": 9.73426551067001e-06, "loss": 0.4062, "step": 9532 }, { "epoch": 0.9691947946319642, "grad_norm": 0.2985708713531494, "learning_rate": 9.734151343887865e-06, "loss": 0.3972, "step": 9533 }, { "epoch": 0.9692964619764132, "grad_norm": 0.33572813868522644, "learning_rate": 9.734037153256193e-06, "loss": 0.4123, "step": 9534 }, { "epoch": 0.9693981293208621, "grad_norm": 0.3712209463119507, "learning_rate": 9.733922938775567e-06, "loss": 0.4064, "step": 9535 }, { "epoch": 0.969499796665311, "grad_norm": 0.3233555257320404, "learning_rate": 9.733808700446562e-06, "loss": 0.3812, "step": 9536 }, { "epoch": 0.9696014640097601, "grad_norm": 0.3324925899505615, "learning_rate": 9.733694438269755e-06, "loss": 0.3945, "step": 9537 }, { "epoch": 0.969703131354209, "grad_norm": 0.3410065472126007, "learning_rate": 9.733580152245717e-06, "loss": 0.3722, "step": 9538 }, { "epoch": 0.969804798698658, "grad_norm": 0.3219832181930542, "learning_rate": 9.73346584237503e-06, "loss": 0.3588, "step": 9539 }, { "epoch": 0.9699064660431069, "grad_norm": 0.31277626752853394, "learning_rate": 9.733351508658264e-06, "loss": 0.362, "step": 9540 }, { "epoch": 0.9700081333875559, "grad_norm": 0.33100053668022156, "learning_rate": 9.733237151096e-06, "loss": 0.3763, "step": 9541 }, { "epoch": 0.9701098007320049, "grad_norm": 0.32312268018722534, "learning_rate": 9.733122769688812e-06, "loss": 0.3748, "step": 9542 }, { "epoch": 0.9702114680764539, "grad_norm": 0.31790563464164734, "learning_rate": 9.733008364437276e-06, "loss": 0.4353, "step": 9543 }, { "epoch": 0.9703131354209028, "grad_norm": 0.33598387241363525, "learning_rate": 9.732893935341969e-06, "loss": 0.3842, "step": 9544 }, { "epoch": 0.9704148027653517, "grad_norm": 0.3249610662460327, "learning_rate": 9.732779482403466e-06, "loss": 0.3934, "step": 9545 }, { "epoch": 0.9705164701098007, "grad_norm": 0.3108679950237274, "learning_rate": 9.732665005622346e-06, "loss": 0.3736, "step": 9546 }, { "epoch": 0.9706181374542497, "grad_norm": 0.34178847074508667, "learning_rate": 9.732550504999184e-06, "loss": 0.3835, "step": 9547 }, { "epoch": 0.9707198047986987, "grad_norm": 0.33033129572868347, "learning_rate": 9.732435980534556e-06, "loss": 0.3825, "step": 9548 }, { "epoch": 0.9708214721431476, "grad_norm": 0.31303995847702026, "learning_rate": 9.732321432229042e-06, "loss": 0.3447, "step": 9549 }, { "epoch": 0.9709231394875966, "grad_norm": 0.3645290434360504, "learning_rate": 9.732206860083216e-06, "loss": 0.346, "step": 9550 }, { "epoch": 0.9710248068320455, "grad_norm": 0.3269103467464447, "learning_rate": 9.732092264097657e-06, "loss": 0.3971, "step": 9551 }, { "epoch": 0.9711264741764946, "grad_norm": 0.34358730912208557, "learning_rate": 9.731977644272941e-06, "loss": 0.3867, "step": 9552 }, { "epoch": 0.9712281415209435, "grad_norm": 0.3712248206138611, "learning_rate": 9.731863000609646e-06, "loss": 0.3672, "step": 9553 }, { "epoch": 0.9713298088653924, "grad_norm": 0.39536336064338684, "learning_rate": 9.73174833310835e-06, "loss": 0.3817, "step": 9554 }, { "epoch": 0.9714314762098414, "grad_norm": 0.32612982392311096, "learning_rate": 9.73163364176963e-06, "loss": 0.4105, "step": 9555 }, { "epoch": 0.9715331435542903, "grad_norm": 0.3498980402946472, "learning_rate": 9.731518926594065e-06, "loss": 0.4144, "step": 9556 }, { "epoch": 0.9716348108987394, "grad_norm": 0.3669610321521759, "learning_rate": 9.731404187582232e-06, "loss": 0.4004, "step": 9557 }, { "epoch": 0.9717364782431883, "grad_norm": 0.33336201310157776, "learning_rate": 9.731289424734708e-06, "loss": 0.4098, "step": 9558 }, { "epoch": 0.9718381455876373, "grad_norm": 0.34042322635650635, "learning_rate": 9.731174638052072e-06, "loss": 0.3994, "step": 9559 }, { "epoch": 0.9719398129320862, "grad_norm": 0.32173725962638855, "learning_rate": 9.731059827534903e-06, "loss": 0.3945, "step": 9560 }, { "epoch": 0.9720414802765351, "grad_norm": 0.3226785957813263, "learning_rate": 9.73094499318378e-06, "loss": 0.4084, "step": 9561 }, { "epoch": 0.9721431476209842, "grad_norm": 0.2930792272090912, "learning_rate": 9.730830134999278e-06, "loss": 0.3957, "step": 9562 }, { "epoch": 0.9722448149654331, "grad_norm": 0.30529820919036865, "learning_rate": 9.730715252981981e-06, "loss": 0.3952, "step": 9563 }, { "epoch": 0.9723464823098821, "grad_norm": 0.3188919126987457, "learning_rate": 9.730600347132463e-06, "loss": 0.4133, "step": 9564 }, { "epoch": 0.972448149654331, "grad_norm": 0.28732213377952576, "learning_rate": 9.730485417451304e-06, "loss": 0.3681, "step": 9565 }, { "epoch": 0.97254981699878, "grad_norm": 0.31055107712745667, "learning_rate": 9.730370463939085e-06, "loss": 0.3828, "step": 9566 }, { "epoch": 0.9726514843432289, "grad_norm": 0.321749210357666, "learning_rate": 9.730255486596384e-06, "loss": 0.3964, "step": 9567 }, { "epoch": 0.972753151687678, "grad_norm": 0.3180566132068634, "learning_rate": 9.730140485423779e-06, "loss": 0.4081, "step": 9568 }, { "epoch": 0.9728548190321269, "grad_norm": 0.3297918438911438, "learning_rate": 9.73002546042185e-06, "loss": 0.3775, "step": 9569 }, { "epoch": 0.9729564863765758, "grad_norm": 0.3216856122016907, "learning_rate": 9.729910411591177e-06, "loss": 0.3764, "step": 9570 }, { "epoch": 0.9730581537210248, "grad_norm": 0.3327368199825287, "learning_rate": 9.72979533893234e-06, "loss": 0.3881, "step": 9571 }, { "epoch": 0.9731598210654737, "grad_norm": 0.32057327032089233, "learning_rate": 9.729680242445917e-06, "loss": 0.362, "step": 9572 }, { "epoch": 0.9732614884099228, "grad_norm": 0.3523515462875366, "learning_rate": 9.72956512213249e-06, "loss": 0.3921, "step": 9573 }, { "epoch": 0.9733631557543717, "grad_norm": 0.31163841485977173, "learning_rate": 9.729449977992636e-06, "loss": 0.3985, "step": 9574 }, { "epoch": 0.9734648230988207, "grad_norm": 0.3088366985321045, "learning_rate": 9.729334810026938e-06, "loss": 0.4028, "step": 9575 }, { "epoch": 0.9735664904432696, "grad_norm": 0.3837038576602936, "learning_rate": 9.729219618235977e-06, "loss": 0.3937, "step": 9576 }, { "epoch": 0.9736681577877185, "grad_norm": 0.34065884351730347, "learning_rate": 9.729104402620329e-06, "loss": 0.3843, "step": 9577 }, { "epoch": 0.9737698251321676, "grad_norm": 0.313300758600235, "learning_rate": 9.728989163180578e-06, "loss": 0.4142, "step": 9578 }, { "epoch": 0.9738714924766165, "grad_norm": 0.3390788733959198, "learning_rate": 9.728873899917302e-06, "loss": 0.3676, "step": 9579 }, { "epoch": 0.9739731598210655, "grad_norm": 0.32610589265823364, "learning_rate": 9.728758612831084e-06, "loss": 0.3994, "step": 9580 }, { "epoch": 0.9740748271655144, "grad_norm": 0.3072904944419861, "learning_rate": 9.728643301922503e-06, "loss": 0.3595, "step": 9581 }, { "epoch": 0.9741764945099634, "grad_norm": 0.31872427463531494, "learning_rate": 9.728527967192142e-06, "loss": 0.4026, "step": 9582 }, { "epoch": 0.9742781618544124, "grad_norm": 0.29191166162490845, "learning_rate": 9.72841260864058e-06, "loss": 0.3436, "step": 9583 }, { "epoch": 0.9743798291988613, "grad_norm": 0.33403217792510986, "learning_rate": 9.728297226268399e-06, "loss": 0.3976, "step": 9584 }, { "epoch": 0.9744814965433103, "grad_norm": 0.3117140531539917, "learning_rate": 9.72818182007618e-06, "loss": 0.3732, "step": 9585 }, { "epoch": 0.9745831638877592, "grad_norm": 0.3208903968334198, "learning_rate": 9.728066390064506e-06, "loss": 0.3875, "step": 9586 }, { "epoch": 0.9746848312322082, "grad_norm": 0.2932240962982178, "learning_rate": 9.727950936233957e-06, "loss": 0.3653, "step": 9587 }, { "epoch": 0.9747864985766572, "grad_norm": 0.3390159606933594, "learning_rate": 9.727835458585113e-06, "loss": 0.3816, "step": 9588 }, { "epoch": 0.9748881659211062, "grad_norm": 0.3770570158958435, "learning_rate": 9.727719957118557e-06, "loss": 0.4164, "step": 9589 }, { "epoch": 0.9749898332655551, "grad_norm": 0.29903605580329895, "learning_rate": 9.727604431834873e-06, "loss": 0.3764, "step": 9590 }, { "epoch": 0.975091500610004, "grad_norm": 0.3194028437137604, "learning_rate": 9.72748888273464e-06, "loss": 0.3683, "step": 9591 }, { "epoch": 0.975193167954453, "grad_norm": 0.33835989236831665, "learning_rate": 9.727373309818444e-06, "loss": 0.3977, "step": 9592 }, { "epoch": 0.975294835298902, "grad_norm": 0.3206464946269989, "learning_rate": 9.727257713086862e-06, "loss": 0.3899, "step": 9593 }, { "epoch": 0.975396502643351, "grad_norm": 0.3268936276435852, "learning_rate": 9.727142092540481e-06, "loss": 0.3911, "step": 9594 }, { "epoch": 0.9754981699877999, "grad_norm": 0.28539568185806274, "learning_rate": 9.727026448179879e-06, "loss": 0.406, "step": 9595 }, { "epoch": 0.9755998373322489, "grad_norm": 0.33336254954338074, "learning_rate": 9.726910780005643e-06, "loss": 0.3894, "step": 9596 }, { "epoch": 0.9757015046766978, "grad_norm": 0.32401347160339355, "learning_rate": 9.726795088018352e-06, "loss": 0.3611, "step": 9597 }, { "epoch": 0.9758031720211469, "grad_norm": 0.33546707034111023, "learning_rate": 9.726679372218592e-06, "loss": 0.3889, "step": 9598 }, { "epoch": 0.9759048393655958, "grad_norm": 0.30437830090522766, "learning_rate": 9.726563632606946e-06, "loss": 0.3875, "step": 9599 }, { "epoch": 0.9760065067100447, "grad_norm": 0.3439030051231384, "learning_rate": 9.726447869183993e-06, "loss": 0.3846, "step": 9600 }, { "epoch": 0.9761081740544937, "grad_norm": 0.3340173065662384, "learning_rate": 9.72633208195032e-06, "loss": 0.3965, "step": 9601 }, { "epoch": 0.9762098413989426, "grad_norm": 0.33529552817344666, "learning_rate": 9.726216270906509e-06, "loss": 0.3719, "step": 9602 }, { "epoch": 0.9763115087433917, "grad_norm": 0.3584260940551758, "learning_rate": 9.726100436053144e-06, "loss": 0.4022, "step": 9603 }, { "epoch": 0.9764131760878406, "grad_norm": 0.3456677496433258, "learning_rate": 9.725984577390807e-06, "loss": 0.3918, "step": 9604 }, { "epoch": 0.9765148434322896, "grad_norm": 0.2964259088039398, "learning_rate": 9.725868694920084e-06, "loss": 0.4249, "step": 9605 }, { "epoch": 0.9766165107767385, "grad_norm": 0.3847790062427521, "learning_rate": 9.725752788641557e-06, "loss": 0.3859, "step": 9606 }, { "epoch": 0.9767181781211874, "grad_norm": 0.31214988231658936, "learning_rate": 9.725636858555811e-06, "loss": 0.3724, "step": 9607 }, { "epoch": 0.9768198454656364, "grad_norm": 0.3236638903617859, "learning_rate": 9.725520904663427e-06, "loss": 0.4237, "step": 9608 }, { "epoch": 0.9769215128100854, "grad_norm": 0.31521517038345337, "learning_rate": 9.725404926964995e-06, "loss": 0.4024, "step": 9609 }, { "epoch": 0.9770231801545344, "grad_norm": 0.2965063750743866, "learning_rate": 9.725288925461094e-06, "loss": 0.3783, "step": 9610 }, { "epoch": 0.9771248474989833, "grad_norm": 0.33279940485954285, "learning_rate": 9.725172900152311e-06, "loss": 0.4234, "step": 9611 }, { "epoch": 0.9772265148434323, "grad_norm": 0.3009551763534546, "learning_rate": 9.72505685103923e-06, "loss": 0.3839, "step": 9612 }, { "epoch": 0.9773281821878812, "grad_norm": 0.312929630279541, "learning_rate": 9.724940778122435e-06, "loss": 0.3984, "step": 9613 }, { "epoch": 0.9774298495323303, "grad_norm": 0.29880398511886597, "learning_rate": 9.724824681402511e-06, "loss": 0.4001, "step": 9614 }, { "epoch": 0.9775315168767792, "grad_norm": 0.28969547152519226, "learning_rate": 9.724708560880043e-06, "loss": 0.3673, "step": 9615 }, { "epoch": 0.9776331842212281, "grad_norm": 0.3063180148601532, "learning_rate": 9.724592416555616e-06, "loss": 0.3972, "step": 9616 }, { "epoch": 0.9777348515656771, "grad_norm": 0.3205675780773163, "learning_rate": 9.724476248429816e-06, "loss": 0.4093, "step": 9617 }, { "epoch": 0.977836518910126, "grad_norm": 0.28282999992370605, "learning_rate": 9.724360056503225e-06, "loss": 0.3908, "step": 9618 }, { "epoch": 0.9779381862545751, "grad_norm": 0.3234935402870178, "learning_rate": 9.724243840776433e-06, "loss": 0.3526, "step": 9619 }, { "epoch": 0.978039853599024, "grad_norm": 0.2990066707134247, "learning_rate": 9.724127601250025e-06, "loss": 0.3783, "step": 9620 }, { "epoch": 0.978141520943473, "grad_norm": 0.3135583698749542, "learning_rate": 9.724011337924581e-06, "loss": 0.409, "step": 9621 }, { "epoch": 0.9782431882879219, "grad_norm": 0.3001324534416199, "learning_rate": 9.72389505080069e-06, "loss": 0.411, "step": 9622 }, { "epoch": 0.9783448556323708, "grad_norm": 0.30842325091362, "learning_rate": 9.723778739878942e-06, "loss": 0.4, "step": 9623 }, { "epoch": 0.9784465229768199, "grad_norm": 0.323011577129364, "learning_rate": 9.723662405159917e-06, "loss": 0.4187, "step": 9624 }, { "epoch": 0.9785481903212688, "grad_norm": 0.2894400656223297, "learning_rate": 9.723546046644204e-06, "loss": 0.3599, "step": 9625 }, { "epoch": 0.9786498576657178, "grad_norm": 0.2958424389362335, "learning_rate": 9.72342966433239e-06, "loss": 0.378, "step": 9626 }, { "epoch": 0.9787515250101667, "grad_norm": 0.3359357714653015, "learning_rate": 9.723313258225056e-06, "loss": 0.4023, "step": 9627 }, { "epoch": 0.9788531923546157, "grad_norm": 0.30937469005584717, "learning_rate": 9.723196828322795e-06, "loss": 0.3475, "step": 9628 }, { "epoch": 0.9789548596990647, "grad_norm": 0.31142672896385193, "learning_rate": 9.72308037462619e-06, "loss": 0.407, "step": 9629 }, { "epoch": 0.9790565270435136, "grad_norm": 0.33296000957489014, "learning_rate": 9.722963897135829e-06, "loss": 0.3703, "step": 9630 }, { "epoch": 0.9791581943879626, "grad_norm": 0.30860233306884766, "learning_rate": 9.722847395852298e-06, "loss": 0.3875, "step": 9631 }, { "epoch": 0.9792598617324115, "grad_norm": 0.3141127824783325, "learning_rate": 9.722730870776183e-06, "loss": 0.3957, "step": 9632 }, { "epoch": 0.9793615290768605, "grad_norm": 0.30765268206596375, "learning_rate": 9.722614321908074e-06, "loss": 0.3893, "step": 9633 }, { "epoch": 0.9794631964213095, "grad_norm": 0.3321448862552643, "learning_rate": 9.722497749248556e-06, "loss": 0.3665, "step": 9634 }, { "epoch": 0.9795648637657585, "grad_norm": 0.32313480973243713, "learning_rate": 9.722381152798215e-06, "loss": 0.3693, "step": 9635 }, { "epoch": 0.9796665311102074, "grad_norm": 0.3189506232738495, "learning_rate": 9.722264532557642e-06, "loss": 0.3797, "step": 9636 }, { "epoch": 0.9797681984546563, "grad_norm": 0.36478832364082336, "learning_rate": 9.722147888527421e-06, "loss": 0.3813, "step": 9637 }, { "epoch": 0.9798698657991053, "grad_norm": 0.2961902320384979, "learning_rate": 9.722031220708142e-06, "loss": 0.3678, "step": 9638 }, { "epoch": 0.9799715331435543, "grad_norm": 0.3258848786354065, "learning_rate": 9.72191452910039e-06, "loss": 0.3692, "step": 9639 }, { "epoch": 0.9800732004880033, "grad_norm": 0.3194546401500702, "learning_rate": 9.721797813704756e-06, "loss": 0.3736, "step": 9640 }, { "epoch": 0.9801748678324522, "grad_norm": 0.34289270639419556, "learning_rate": 9.721681074521827e-06, "loss": 0.3937, "step": 9641 }, { "epoch": 0.9802765351769012, "grad_norm": 0.3097810745239258, "learning_rate": 9.721564311552191e-06, "loss": 0.3837, "step": 9642 }, { "epoch": 0.9803782025213501, "grad_norm": 0.2993449568748474, "learning_rate": 9.721447524796435e-06, "loss": 0.3777, "step": 9643 }, { "epoch": 0.9804798698657992, "grad_norm": 0.2890838384628296, "learning_rate": 9.721330714255149e-06, "loss": 0.3684, "step": 9644 }, { "epoch": 0.9805815372102481, "grad_norm": 0.3191799521446228, "learning_rate": 9.721213879928922e-06, "loss": 0.3518, "step": 9645 }, { "epoch": 0.980683204554697, "grad_norm": 0.2777199149131775, "learning_rate": 9.721097021818338e-06, "loss": 0.3568, "step": 9646 }, { "epoch": 0.980784871899146, "grad_norm": 0.29854264855384827, "learning_rate": 9.720980139923992e-06, "loss": 0.3609, "step": 9647 }, { "epoch": 0.9808865392435949, "grad_norm": 0.31194865703582764, "learning_rate": 9.72086323424647e-06, "loss": 0.3823, "step": 9648 }, { "epoch": 0.9809882065880439, "grad_norm": 0.3210117816925049, "learning_rate": 9.720746304786358e-06, "loss": 0.3769, "step": 9649 }, { "epoch": 0.9810898739324929, "grad_norm": 0.3109578490257263, "learning_rate": 9.720629351544251e-06, "loss": 0.3506, "step": 9650 }, { "epoch": 0.9811915412769419, "grad_norm": 0.3086821436882019, "learning_rate": 9.720512374520734e-06, "loss": 0.4014, "step": 9651 }, { "epoch": 0.9812932086213908, "grad_norm": 0.31508535146713257, "learning_rate": 9.720395373716399e-06, "loss": 0.3856, "step": 9652 }, { "epoch": 0.9813948759658397, "grad_norm": 0.31539544463157654, "learning_rate": 9.720278349131831e-06, "loss": 0.4105, "step": 9653 }, { "epoch": 0.9814965433102887, "grad_norm": 0.31642138957977295, "learning_rate": 9.720161300767624e-06, "loss": 0.3849, "step": 9654 }, { "epoch": 0.9815982106547377, "grad_norm": 0.30355969071388245, "learning_rate": 9.720044228624366e-06, "loss": 0.3875, "step": 9655 }, { "epoch": 0.9816998779991867, "grad_norm": 0.3330683410167694, "learning_rate": 9.719927132702646e-06, "loss": 0.3783, "step": 9656 }, { "epoch": 0.9818015453436356, "grad_norm": 0.31076595187187195, "learning_rate": 9.719810013003056e-06, "loss": 0.3901, "step": 9657 }, { "epoch": 0.9819032126880846, "grad_norm": 0.29289019107818604, "learning_rate": 9.719692869526184e-06, "loss": 0.4014, "step": 9658 }, { "epoch": 0.9820048800325335, "grad_norm": 0.3258219361305237, "learning_rate": 9.71957570227262e-06, "loss": 0.3941, "step": 9659 }, { "epoch": 0.9821065473769826, "grad_norm": 0.30555450916290283, "learning_rate": 9.719458511242957e-06, "loss": 0.3673, "step": 9660 }, { "epoch": 0.9822082147214315, "grad_norm": 0.3371909260749817, "learning_rate": 9.719341296437783e-06, "loss": 0.3578, "step": 9661 }, { "epoch": 0.9823098820658804, "grad_norm": 0.31738150119781494, "learning_rate": 9.719224057857687e-06, "loss": 0.3837, "step": 9662 }, { "epoch": 0.9824115494103294, "grad_norm": 0.33465275168418884, "learning_rate": 9.719106795503265e-06, "loss": 0.3685, "step": 9663 }, { "epoch": 0.9825132167547783, "grad_norm": 0.2990998923778534, "learning_rate": 9.718989509375102e-06, "loss": 0.3684, "step": 9664 }, { "epoch": 0.9826148840992274, "grad_norm": 0.3217144012451172, "learning_rate": 9.718872199473792e-06, "loss": 0.3451, "step": 9665 }, { "epoch": 0.9827165514436763, "grad_norm": 0.3172640800476074, "learning_rate": 9.718754865799926e-06, "loss": 0.3821, "step": 9666 }, { "epoch": 0.9828182187881253, "grad_norm": 0.3369431793689728, "learning_rate": 9.718637508354094e-06, "loss": 0.3411, "step": 9667 }, { "epoch": 0.9829198861325742, "grad_norm": 0.30998724699020386, "learning_rate": 9.718520127136887e-06, "loss": 0.3341, "step": 9668 }, { "epoch": 0.9830215534770231, "grad_norm": 0.3173619210720062, "learning_rate": 9.718402722148897e-06, "loss": 0.3906, "step": 9669 }, { "epoch": 0.9831232208214722, "grad_norm": 0.35735633969306946, "learning_rate": 9.718285293390714e-06, "loss": 0.3828, "step": 9670 }, { "epoch": 0.9832248881659211, "grad_norm": 0.36654695868492126, "learning_rate": 9.718167840862933e-06, "loss": 0.3673, "step": 9671 }, { "epoch": 0.9833265555103701, "grad_norm": 0.3611278533935547, "learning_rate": 9.718050364566144e-06, "loss": 0.3935, "step": 9672 }, { "epoch": 0.983428222854819, "grad_norm": 0.3071514666080475, "learning_rate": 9.717932864500938e-06, "loss": 0.3799, "step": 9673 }, { "epoch": 0.983529890199268, "grad_norm": 0.367715448141098, "learning_rate": 9.717815340667906e-06, "loss": 0.3817, "step": 9674 }, { "epoch": 0.983631557543717, "grad_norm": 0.4103834927082062, "learning_rate": 9.717697793067642e-06, "loss": 0.4024, "step": 9675 }, { "epoch": 0.983733224888166, "grad_norm": 0.29215142130851746, "learning_rate": 9.717580221700737e-06, "loss": 0.3458, "step": 9676 }, { "epoch": 0.9838348922326149, "grad_norm": 0.3417019546031952, "learning_rate": 9.717462626567787e-06, "loss": 0.404, "step": 9677 }, { "epoch": 0.9839365595770638, "grad_norm": 0.3413337171077728, "learning_rate": 9.71734500766938e-06, "loss": 0.3704, "step": 9678 }, { "epoch": 0.9840382269215128, "grad_norm": 0.2791375517845154, "learning_rate": 9.717227365006107e-06, "loss": 0.394, "step": 9679 }, { "epoch": 0.9841398942659618, "grad_norm": 0.3198421001434326, "learning_rate": 9.717109698578565e-06, "loss": 0.3708, "step": 9680 }, { "epoch": 0.9842415616104108, "grad_norm": 0.3456294536590576, "learning_rate": 9.716992008387346e-06, "loss": 0.378, "step": 9681 }, { "epoch": 0.9843432289548597, "grad_norm": 0.32632946968078613, "learning_rate": 9.716874294433042e-06, "loss": 0.3807, "step": 9682 }, { "epoch": 0.9844448962993086, "grad_norm": 0.34275272488594055, "learning_rate": 9.716756556716246e-06, "loss": 0.3618, "step": 9683 }, { "epoch": 0.9845465636437576, "grad_norm": 0.32428649067878723, "learning_rate": 9.716638795237552e-06, "loss": 0.3835, "step": 9684 }, { "epoch": 0.9846482309882066, "grad_norm": 0.30373361706733704, "learning_rate": 9.716521009997552e-06, "loss": 0.3777, "step": 9685 }, { "epoch": 0.9847498983326556, "grad_norm": 0.34021884202957153, "learning_rate": 9.71640320099684e-06, "loss": 0.3977, "step": 9686 }, { "epoch": 0.9848515656771045, "grad_norm": 0.31667038798332214, "learning_rate": 9.71628536823601e-06, "loss": 0.3993, "step": 9687 }, { "epoch": 0.9849532330215535, "grad_norm": 0.35608088970184326, "learning_rate": 9.716167511715655e-06, "loss": 0.4107, "step": 9688 }, { "epoch": 0.9850549003660024, "grad_norm": 0.36261123418807983, "learning_rate": 9.716049631436368e-06, "loss": 0.4078, "step": 9689 }, { "epoch": 0.9851565677104515, "grad_norm": 0.304892361164093, "learning_rate": 9.715931727398743e-06, "loss": 0.3972, "step": 9690 }, { "epoch": 0.9852582350549004, "grad_norm": 0.2973848879337311, "learning_rate": 9.715813799603376e-06, "loss": 0.3781, "step": 9691 }, { "epoch": 0.9853599023993493, "grad_norm": 0.3025570809841156, "learning_rate": 9.71569584805086e-06, "loss": 0.3825, "step": 9692 }, { "epoch": 0.9854615697437983, "grad_norm": 0.34150099754333496, "learning_rate": 9.715577872741787e-06, "loss": 0.388, "step": 9693 }, { "epoch": 0.9855632370882472, "grad_norm": 0.31812813878059387, "learning_rate": 9.715459873676756e-06, "loss": 0.3926, "step": 9694 }, { "epoch": 0.9856649044326962, "grad_norm": 0.2921503186225891, "learning_rate": 9.715341850856356e-06, "loss": 0.3776, "step": 9695 }, { "epoch": 0.9857665717771452, "grad_norm": 0.2834872603416443, "learning_rate": 9.715223804281186e-06, "loss": 0.3789, "step": 9696 }, { "epoch": 0.9858682391215942, "grad_norm": 0.332319051027298, "learning_rate": 9.71510573395184e-06, "loss": 0.4, "step": 9697 }, { "epoch": 0.9859699064660431, "grad_norm": 0.3339053690433502, "learning_rate": 9.714987639868911e-06, "loss": 0.3857, "step": 9698 }, { "epoch": 0.986071573810492, "grad_norm": 0.2902987599372864, "learning_rate": 9.714869522032993e-06, "loss": 0.3977, "step": 9699 }, { "epoch": 0.986173241154941, "grad_norm": 0.2973673939704895, "learning_rate": 9.714751380444685e-06, "loss": 0.3829, "step": 9700 }, { "epoch": 0.98627490849939, "grad_norm": 0.2948368787765503, "learning_rate": 9.714633215104578e-06, "loss": 0.3655, "step": 9701 }, { "epoch": 0.986376575843839, "grad_norm": 0.28642794489860535, "learning_rate": 9.71451502601327e-06, "loss": 0.3688, "step": 9702 }, { "epoch": 0.9864782431882879, "grad_norm": 0.2955739200115204, "learning_rate": 9.714396813171355e-06, "loss": 0.3662, "step": 9703 }, { "epoch": 0.9865799105327369, "grad_norm": 0.3408322334289551, "learning_rate": 9.714278576579431e-06, "loss": 0.4057, "step": 9704 }, { "epoch": 0.9866815778771858, "grad_norm": 0.31090274453163147, "learning_rate": 9.71416031623809e-06, "loss": 0.3979, "step": 9705 }, { "epoch": 0.9867832452216349, "grad_norm": 0.3351286053657532, "learning_rate": 9.714042032147931e-06, "loss": 0.3749, "step": 9706 }, { "epoch": 0.9868849125660838, "grad_norm": 0.30934518575668335, "learning_rate": 9.713923724309549e-06, "loss": 0.3914, "step": 9707 }, { "epoch": 0.9869865799105327, "grad_norm": 0.3079646825790405, "learning_rate": 9.713805392723537e-06, "loss": 0.3703, "step": 9708 }, { "epoch": 0.9870882472549817, "grad_norm": 0.3280855417251587, "learning_rate": 9.713687037390495e-06, "loss": 0.4337, "step": 9709 }, { "epoch": 0.9871899145994306, "grad_norm": 0.32946890592575073, "learning_rate": 9.713568658311017e-06, "loss": 0.3607, "step": 9710 }, { "epoch": 0.9872915819438797, "grad_norm": 0.307549387216568, "learning_rate": 9.713450255485702e-06, "loss": 0.3724, "step": 9711 }, { "epoch": 0.9873932492883286, "grad_norm": 0.3431527614593506, "learning_rate": 9.713331828915143e-06, "loss": 0.4021, "step": 9712 }, { "epoch": 0.9874949166327776, "grad_norm": 0.3190855383872986, "learning_rate": 9.713213378599939e-06, "loss": 0.3882, "step": 9713 }, { "epoch": 0.9875965839772265, "grad_norm": 0.36994099617004395, "learning_rate": 9.713094904540686e-06, "loss": 0.3988, "step": 9714 }, { "epoch": 0.9876982513216754, "grad_norm": 0.3297271132469177, "learning_rate": 9.712976406737982e-06, "loss": 0.4096, "step": 9715 }, { "epoch": 0.9877999186661245, "grad_norm": 0.2934090793132782, "learning_rate": 9.712857885192422e-06, "loss": 0.365, "step": 9716 }, { "epoch": 0.9879015860105734, "grad_norm": 0.33360350131988525, "learning_rate": 9.712739339904602e-06, "loss": 0.39, "step": 9717 }, { "epoch": 0.9880032533550224, "grad_norm": 0.3302399516105652, "learning_rate": 9.712620770875123e-06, "loss": 0.3744, "step": 9718 }, { "epoch": 0.9881049206994713, "grad_norm": 0.3285580575466156, "learning_rate": 9.71250217810458e-06, "loss": 0.4057, "step": 9719 }, { "epoch": 0.9882065880439203, "grad_norm": 0.3238225281238556, "learning_rate": 9.712383561593571e-06, "loss": 0.3861, "step": 9720 }, { "epoch": 0.9883082553883693, "grad_norm": 0.3475322723388672, "learning_rate": 9.712264921342694e-06, "loss": 0.3729, "step": 9721 }, { "epoch": 0.9884099227328182, "grad_norm": 0.3143652677536011, "learning_rate": 9.712146257352545e-06, "loss": 0.384, "step": 9722 }, { "epoch": 0.9885115900772672, "grad_norm": 0.3258199989795685, "learning_rate": 9.712027569623724e-06, "loss": 0.3752, "step": 9723 }, { "epoch": 0.9886132574217161, "grad_norm": 0.2920733392238617, "learning_rate": 9.711908858156827e-06, "loss": 0.3582, "step": 9724 }, { "epoch": 0.9887149247661651, "grad_norm": 0.2847578227519989, "learning_rate": 9.711790122952453e-06, "loss": 0.3743, "step": 9725 }, { "epoch": 0.9888165921106141, "grad_norm": 0.3061349093914032, "learning_rate": 9.7116713640112e-06, "loss": 0.3835, "step": 9726 }, { "epoch": 0.9889182594550631, "grad_norm": 0.2970201373100281, "learning_rate": 9.711552581333668e-06, "loss": 0.4032, "step": 9727 }, { "epoch": 0.989019926799512, "grad_norm": 0.28830191493034363, "learning_rate": 9.711433774920452e-06, "loss": 0.3621, "step": 9728 }, { "epoch": 0.989121594143961, "grad_norm": 0.30519282817840576, "learning_rate": 9.711314944772153e-06, "loss": 0.3939, "step": 9729 }, { "epoch": 0.9892232614884099, "grad_norm": 0.29862186312675476, "learning_rate": 9.711196090889368e-06, "loss": 0.3789, "step": 9730 }, { "epoch": 0.9893249288328589, "grad_norm": 0.31034383177757263, "learning_rate": 9.711077213272697e-06, "loss": 0.3834, "step": 9731 }, { "epoch": 0.9894265961773079, "grad_norm": 0.2953374981880188, "learning_rate": 9.71095831192274e-06, "loss": 0.4115, "step": 9732 }, { "epoch": 0.9895282635217568, "grad_norm": 0.3087966740131378, "learning_rate": 9.710839386840093e-06, "loss": 0.3551, "step": 9733 }, { "epoch": 0.9896299308662058, "grad_norm": 0.3275272250175476, "learning_rate": 9.710720438025357e-06, "loss": 0.403, "step": 9734 }, { "epoch": 0.9897315982106547, "grad_norm": 0.3493707776069641, "learning_rate": 9.71060146547913e-06, "loss": 0.3973, "step": 9735 }, { "epoch": 0.9898332655551036, "grad_norm": 0.3012937009334564, "learning_rate": 9.710482469202014e-06, "loss": 0.3794, "step": 9736 }, { "epoch": 0.9899349328995527, "grad_norm": 0.3219594955444336, "learning_rate": 9.710363449194606e-06, "loss": 0.3947, "step": 9737 }, { "epoch": 0.9900366002440016, "grad_norm": 0.31134480237960815, "learning_rate": 9.710244405457505e-06, "loss": 0.3956, "step": 9738 }, { "epoch": 0.9901382675884506, "grad_norm": 0.31724998354911804, "learning_rate": 9.710125337991313e-06, "loss": 0.4165, "step": 9739 }, { "epoch": 0.9902399349328995, "grad_norm": 0.31614649295806885, "learning_rate": 9.71000624679663e-06, "loss": 0.3594, "step": 9740 }, { "epoch": 0.9903416022773485, "grad_norm": 0.32021573185920715, "learning_rate": 9.709887131874054e-06, "loss": 0.3482, "step": 9741 }, { "epoch": 0.9904432696217975, "grad_norm": 0.3279944658279419, "learning_rate": 9.709767993224184e-06, "loss": 0.3871, "step": 9742 }, { "epoch": 0.9905449369662465, "grad_norm": 0.3097282946109772, "learning_rate": 9.709648830847624e-06, "loss": 0.3588, "step": 9743 }, { "epoch": 0.9906466043106954, "grad_norm": 0.316419392824173, "learning_rate": 9.709529644744973e-06, "loss": 0.4241, "step": 9744 }, { "epoch": 0.9907482716551443, "grad_norm": 0.3544919192790985, "learning_rate": 9.70941043491683e-06, "loss": 0.3975, "step": 9745 }, { "epoch": 0.9908499389995933, "grad_norm": 0.34427186846733093, "learning_rate": 9.709291201363795e-06, "loss": 0.4224, "step": 9746 }, { "epoch": 0.9909516063440423, "grad_norm": 0.31313055753707886, "learning_rate": 9.709171944086471e-06, "loss": 0.4111, "step": 9747 }, { "epoch": 0.9910532736884913, "grad_norm": 0.33443716168403625, "learning_rate": 9.709052663085459e-06, "loss": 0.382, "step": 9748 }, { "epoch": 0.9911549410329402, "grad_norm": 0.31249403953552246, "learning_rate": 9.708933358361356e-06, "loss": 0.3617, "step": 9749 }, { "epoch": 0.9912566083773892, "grad_norm": 0.3760821223258972, "learning_rate": 9.708814029914769e-06, "loss": 0.4132, "step": 9750 }, { "epoch": 0.9913582757218381, "grad_norm": 0.33690083026885986, "learning_rate": 9.708694677746294e-06, "loss": 0.3962, "step": 9751 }, { "epoch": 0.9914599430662872, "grad_norm": 0.34727010130882263, "learning_rate": 9.708575301856536e-06, "loss": 0.4095, "step": 9752 }, { "epoch": 0.9915616104107361, "grad_norm": 0.32608702778816223, "learning_rate": 9.708455902246093e-06, "loss": 0.3761, "step": 9753 }, { "epoch": 0.991663277755185, "grad_norm": 0.33376583456993103, "learning_rate": 9.708336478915567e-06, "loss": 0.3837, "step": 9754 }, { "epoch": 0.991764945099634, "grad_norm": 0.29763126373291016, "learning_rate": 9.708217031865562e-06, "loss": 0.3742, "step": 9755 }, { "epoch": 0.9918666124440829, "grad_norm": 0.3423607647418976, "learning_rate": 9.708097561096679e-06, "loss": 0.4139, "step": 9756 }, { "epoch": 0.991968279788532, "grad_norm": 0.30974626541137695, "learning_rate": 9.707978066609518e-06, "loss": 0.3889, "step": 9757 }, { "epoch": 0.9920699471329809, "grad_norm": 0.3335466682910919, "learning_rate": 9.707858548404683e-06, "loss": 0.3824, "step": 9758 }, { "epoch": 0.9921716144774299, "grad_norm": 0.3044785261154175, "learning_rate": 9.707739006482775e-06, "loss": 0.3975, "step": 9759 }, { "epoch": 0.9922732818218788, "grad_norm": 0.29672303795814514, "learning_rate": 9.707619440844396e-06, "loss": 0.3639, "step": 9760 }, { "epoch": 0.9923749491663277, "grad_norm": 0.3331323564052582, "learning_rate": 9.70749985149015e-06, "loss": 0.4244, "step": 9761 }, { "epoch": 0.9924766165107768, "grad_norm": 0.2960849106311798, "learning_rate": 9.707380238420638e-06, "loss": 0.3453, "step": 9762 }, { "epoch": 0.9925782838552257, "grad_norm": 0.28195080161094666, "learning_rate": 9.707260601636464e-06, "loss": 0.3691, "step": 9763 }, { "epoch": 0.9926799511996747, "grad_norm": 0.33552172780036926, "learning_rate": 9.707140941138228e-06, "loss": 0.3919, "step": 9764 }, { "epoch": 0.9927816185441236, "grad_norm": 0.310366690158844, "learning_rate": 9.707021256926536e-06, "loss": 0.3668, "step": 9765 }, { "epoch": 0.9928832858885726, "grad_norm": 0.3209300935268402, "learning_rate": 9.70690154900199e-06, "loss": 0.4431, "step": 9766 }, { "epoch": 0.9929849532330216, "grad_norm": 0.29878562688827515, "learning_rate": 9.706781817365191e-06, "loss": 0.3733, "step": 9767 }, { "epoch": 0.9930866205774705, "grad_norm": 0.2862066924571991, "learning_rate": 9.706662062016745e-06, "loss": 0.3585, "step": 9768 }, { "epoch": 0.9931882879219195, "grad_norm": 0.2757642865180969, "learning_rate": 9.706542282957252e-06, "loss": 0.3608, "step": 9769 }, { "epoch": 0.9932899552663684, "grad_norm": 0.3319334387779236, "learning_rate": 9.706422480187319e-06, "loss": 0.4146, "step": 9770 }, { "epoch": 0.9933916226108174, "grad_norm": 0.2997480034828186, "learning_rate": 9.70630265370755e-06, "loss": 0.4186, "step": 9771 }, { "epoch": 0.9934932899552664, "grad_norm": 0.3104003369808197, "learning_rate": 9.706182803518542e-06, "loss": 0.382, "step": 9772 }, { "epoch": 0.9935949572997154, "grad_norm": 0.3151113986968994, "learning_rate": 9.706062929620905e-06, "loss": 0.35, "step": 9773 }, { "epoch": 0.9936966246441643, "grad_norm": 0.3293851315975189, "learning_rate": 9.705943032015244e-06, "loss": 0.3656, "step": 9774 }, { "epoch": 0.9937982919886132, "grad_norm": 0.3000261187553406, "learning_rate": 9.705823110702157e-06, "loss": 0.3893, "step": 9775 }, { "epoch": 0.9938999593330622, "grad_norm": 0.30048903822898865, "learning_rate": 9.705703165682252e-06, "loss": 0.3939, "step": 9776 }, { "epoch": 0.9940016266775111, "grad_norm": 0.3274964392185211, "learning_rate": 9.705583196956134e-06, "loss": 0.3614, "step": 9777 }, { "epoch": 0.9941032940219602, "grad_norm": 0.32143232226371765, "learning_rate": 9.705463204524405e-06, "loss": 0.3744, "step": 9778 }, { "epoch": 0.9942049613664091, "grad_norm": 0.2983279228210449, "learning_rate": 9.705343188387672e-06, "loss": 0.3795, "step": 9779 }, { "epoch": 0.9943066287108581, "grad_norm": 0.33942532539367676, "learning_rate": 9.705223148546536e-06, "loss": 0.3767, "step": 9780 }, { "epoch": 0.994408296055307, "grad_norm": 0.386698842048645, "learning_rate": 9.705103085001606e-06, "loss": 0.4273, "step": 9781 }, { "epoch": 0.994509963399756, "grad_norm": 0.3087505102157593, "learning_rate": 9.704982997753484e-06, "loss": 0.424, "step": 9782 }, { "epoch": 0.994611630744205, "grad_norm": 0.33173152804374695, "learning_rate": 9.704862886802776e-06, "loss": 0.3875, "step": 9783 }, { "epoch": 0.9947132980886539, "grad_norm": 0.3636951744556427, "learning_rate": 9.704742752150085e-06, "loss": 0.4041, "step": 9784 }, { "epoch": 0.9948149654331029, "grad_norm": 0.36554619669914246, "learning_rate": 9.70462259379602e-06, "loss": 0.4101, "step": 9785 }, { "epoch": 0.9949166327775518, "grad_norm": 0.3102218806743622, "learning_rate": 9.704502411741183e-06, "loss": 0.3658, "step": 9786 }, { "epoch": 0.9950183001220008, "grad_norm": 0.3091849982738495, "learning_rate": 9.704382205986182e-06, "loss": 0.4003, "step": 9787 }, { "epoch": 0.9951199674664498, "grad_norm": 0.33411306142807007, "learning_rate": 9.704261976531621e-06, "loss": 0.3752, "step": 9788 }, { "epoch": 0.9952216348108988, "grad_norm": 0.32457470893859863, "learning_rate": 9.704141723378107e-06, "loss": 0.371, "step": 9789 }, { "epoch": 0.9953233021553477, "grad_norm": 0.33767297863960266, "learning_rate": 9.704021446526244e-06, "loss": 0.3905, "step": 9790 }, { "epoch": 0.9954249694997966, "grad_norm": 0.32105475664138794, "learning_rate": 9.703901145976638e-06, "loss": 0.3671, "step": 9791 }, { "epoch": 0.9955266368442456, "grad_norm": 0.3413381576538086, "learning_rate": 9.703780821729898e-06, "loss": 0.3875, "step": 9792 }, { "epoch": 0.9956283041886946, "grad_norm": 0.3518904149532318, "learning_rate": 9.703660473786626e-06, "loss": 0.4006, "step": 9793 }, { "epoch": 0.9957299715331436, "grad_norm": 0.3205040991306305, "learning_rate": 9.70354010214743e-06, "loss": 0.3914, "step": 9794 }, { "epoch": 0.9958316388775925, "grad_norm": 0.303054541349411, "learning_rate": 9.703419706812917e-06, "loss": 0.3807, "step": 9795 }, { "epoch": 0.9959333062220415, "grad_norm": 0.3485208749771118, "learning_rate": 9.703299287783696e-06, "loss": 0.406, "step": 9796 }, { "epoch": 0.9960349735664904, "grad_norm": 0.3491160273551941, "learning_rate": 9.703178845060369e-06, "loss": 0.3894, "step": 9797 }, { "epoch": 0.9961366409109395, "grad_norm": 0.3303183317184448, "learning_rate": 9.703058378643543e-06, "loss": 0.3528, "step": 9798 }, { "epoch": 0.9962383082553884, "grad_norm": 0.32641515135765076, "learning_rate": 9.702937888533827e-06, "loss": 0.3855, "step": 9799 }, { "epoch": 0.9963399755998373, "grad_norm": 0.3170565068721771, "learning_rate": 9.70281737473183e-06, "loss": 0.3795, "step": 9800 }, { "epoch": 0.9964416429442863, "grad_norm": 0.33506545424461365, "learning_rate": 9.702696837238155e-06, "loss": 0.3695, "step": 9801 }, { "epoch": 0.9965433102887352, "grad_norm": 0.2984747886657715, "learning_rate": 9.70257627605341e-06, "loss": 0.3762, "step": 9802 }, { "epoch": 0.9966449776331843, "grad_norm": 0.3123699724674225, "learning_rate": 9.702455691178205e-06, "loss": 0.3612, "step": 9803 }, { "epoch": 0.9967466449776332, "grad_norm": 0.3038407564163208, "learning_rate": 9.702335082613144e-06, "loss": 0.4053, "step": 9804 }, { "epoch": 0.9968483123220822, "grad_norm": 0.34448549151420593, "learning_rate": 9.702214450358836e-06, "loss": 0.3977, "step": 9805 }, { "epoch": 0.9969499796665311, "grad_norm": 0.3223365843296051, "learning_rate": 9.702093794415891e-06, "loss": 0.4121, "step": 9806 }, { "epoch": 0.99705164701098, "grad_norm": 0.31043562293052673, "learning_rate": 9.701973114784913e-06, "loss": 0.3713, "step": 9807 }, { "epoch": 0.9971533143554291, "grad_norm": 0.3335932493209839, "learning_rate": 9.701852411466513e-06, "loss": 0.3869, "step": 9808 }, { "epoch": 0.997254981699878, "grad_norm": 0.30133482813835144, "learning_rate": 9.701731684461297e-06, "loss": 0.3907, "step": 9809 }, { "epoch": 0.997356649044327, "grad_norm": 0.3227856755256653, "learning_rate": 9.701610933769876e-06, "loss": 0.3763, "step": 9810 }, { "epoch": 0.9974583163887759, "grad_norm": 0.31209588050842285, "learning_rate": 9.701490159392855e-06, "loss": 0.391, "step": 9811 }, { "epoch": 0.9975599837332249, "grad_norm": 0.2966468930244446, "learning_rate": 9.701369361330844e-06, "loss": 0.4255, "step": 9812 }, { "epoch": 0.9976616510776739, "grad_norm": 0.29793810844421387, "learning_rate": 9.70124853958445e-06, "loss": 0.3671, "step": 9813 }, { "epoch": 0.9977633184221228, "grad_norm": 0.32197901606559753, "learning_rate": 9.701127694154285e-06, "loss": 0.3875, "step": 9814 }, { "epoch": 0.9978649857665718, "grad_norm": 0.3244114816188812, "learning_rate": 9.701006825040955e-06, "loss": 0.3707, "step": 9815 }, { "epoch": 0.9979666531110207, "grad_norm": 0.3111218810081482, "learning_rate": 9.70088593224507e-06, "loss": 0.4175, "step": 9816 }, { "epoch": 0.9980683204554697, "grad_norm": 0.33797553181648254, "learning_rate": 9.70076501576724e-06, "loss": 0.412, "step": 9817 }, { "epoch": 0.9981699877999186, "grad_norm": 0.33194616436958313, "learning_rate": 9.70064407560807e-06, "loss": 0.3695, "step": 9818 }, { "epoch": 0.9982716551443677, "grad_norm": 0.32991519570350647, "learning_rate": 9.700523111768174e-06, "loss": 0.415, "step": 9819 }, { "epoch": 0.9983733224888166, "grad_norm": 0.34293133020401, "learning_rate": 9.700402124248161e-06, "loss": 0.3986, "step": 9820 }, { "epoch": 0.9984749898332655, "grad_norm": 0.313963383436203, "learning_rate": 9.700281113048636e-06, "loss": 0.3593, "step": 9821 }, { "epoch": 0.9985766571777145, "grad_norm": 0.31402459740638733, "learning_rate": 9.700160078170214e-06, "loss": 0.3727, "step": 9822 }, { "epoch": 0.9986783245221634, "grad_norm": 0.3039301931858063, "learning_rate": 9.7000390196135e-06, "loss": 0.3713, "step": 9823 }, { "epoch": 0.9987799918666125, "grad_norm": 0.31738874316215515, "learning_rate": 9.69991793737911e-06, "loss": 0.3961, "step": 9824 }, { "epoch": 0.9988816592110614, "grad_norm": 0.2820795774459839, "learning_rate": 9.699796831467647e-06, "loss": 0.3688, "step": 9825 }, { "epoch": 0.9989833265555104, "grad_norm": 0.3284406065940857, "learning_rate": 9.699675701879726e-06, "loss": 0.3726, "step": 9826 }, { "epoch": 0.9990849938999593, "grad_norm": 0.3153286874294281, "learning_rate": 9.699554548615954e-06, "loss": 0.3984, "step": 9827 }, { "epoch": 0.9991866612444082, "grad_norm": 0.3191172480583191, "learning_rate": 9.699433371676945e-06, "loss": 0.3727, "step": 9828 }, { "epoch": 0.9992883285888573, "grad_norm": 0.3060797452926636, "learning_rate": 9.699312171063306e-06, "loss": 0.3706, "step": 9829 }, { "epoch": 0.9993899959333062, "grad_norm": 0.2815815210342407, "learning_rate": 9.69919094677565e-06, "loss": 0.3721, "step": 9830 }, { "epoch": 0.9994916632777552, "grad_norm": 0.3112279176712036, "learning_rate": 9.699069698814587e-06, "loss": 0.3914, "step": 9831 }, { "epoch": 0.9995933306222041, "grad_norm": 0.28654786944389343, "learning_rate": 9.698948427180727e-06, "loss": 0.3644, "step": 9832 }, { "epoch": 0.9996949979666531, "grad_norm": 0.29567810893058777, "learning_rate": 9.698827131874681e-06, "loss": 0.4217, "step": 9833 }, { "epoch": 0.9997966653111021, "grad_norm": 0.2817235291004181, "learning_rate": 9.698705812897062e-06, "loss": 0.3754, "step": 9834 }, { "epoch": 0.9998983326555511, "grad_norm": 0.29943937063217163, "learning_rate": 9.698584470248478e-06, "loss": 0.3664, "step": 9835 }, { "epoch": 1.0, "grad_norm": 0.32173219323158264, "learning_rate": 9.698463103929542e-06, "loss": 0.3609, "step": 9836 }, { "epoch": 1.000101667344449, "grad_norm": 0.3155473470687866, "learning_rate": 9.698341713940866e-06, "loss": 0.3578, "step": 9837 }, { "epoch": 1.0002033346888979, "grad_norm": 0.31428083777427673, "learning_rate": 9.69822030028306e-06, "loss": 0.3765, "step": 9838 }, { "epoch": 1.0003050020333468, "grad_norm": 0.3478449583053589, "learning_rate": 9.698098862956739e-06, "loss": 0.4076, "step": 9839 }, { "epoch": 1.0004066693777958, "grad_norm": 0.29836344718933105, "learning_rate": 9.69797740196251e-06, "loss": 0.349, "step": 9840 }, { "epoch": 1.0005083367222447, "grad_norm": 0.3144470751285553, "learning_rate": 9.697855917300989e-06, "loss": 0.3416, "step": 9841 }, { "epoch": 1.0006100040666939, "grad_norm": 0.31470853090286255, "learning_rate": 9.697734408972786e-06, "loss": 0.3377, "step": 9842 }, { "epoch": 1.0007116714111428, "grad_norm": 0.3144121468067169, "learning_rate": 9.697612876978513e-06, "loss": 0.3776, "step": 9843 }, { "epoch": 1.0008133387555918, "grad_norm": 0.29499995708465576, "learning_rate": 9.697491321318781e-06, "loss": 0.3654, "step": 9844 }, { "epoch": 1.0009150061000407, "grad_norm": 0.29489272832870483, "learning_rate": 9.697369741994207e-06, "loss": 0.3692, "step": 9845 }, { "epoch": 1.0010166734444896, "grad_norm": 0.31793662905693054, "learning_rate": 9.697248139005398e-06, "loss": 0.3532, "step": 9846 }, { "epoch": 1.0011183407889386, "grad_norm": 0.3304246664047241, "learning_rate": 9.69712651235297e-06, "loss": 0.3529, "step": 9847 }, { "epoch": 1.0012200081333875, "grad_norm": 0.292602002620697, "learning_rate": 9.697004862037536e-06, "loss": 0.3709, "step": 9848 }, { "epoch": 1.0013216754778365, "grad_norm": 0.2973501980304718, "learning_rate": 9.696883188059707e-06, "loss": 0.3667, "step": 9849 }, { "epoch": 1.0014233428222854, "grad_norm": 0.30470582842826843, "learning_rate": 9.696761490420096e-06, "loss": 0.3795, "step": 9850 }, { "epoch": 1.0015250101667343, "grad_norm": 0.3067902624607086, "learning_rate": 9.696639769119318e-06, "loss": 0.3641, "step": 9851 }, { "epoch": 1.0016266775111835, "grad_norm": 0.30101627111434937, "learning_rate": 9.696518024157983e-06, "loss": 0.3724, "step": 9852 }, { "epoch": 1.0017283448556324, "grad_norm": 0.2867058217525482, "learning_rate": 9.696396255536706e-06, "loss": 0.3576, "step": 9853 }, { "epoch": 1.0018300122000814, "grad_norm": 0.313523530960083, "learning_rate": 9.696274463256103e-06, "loss": 0.3644, "step": 9854 }, { "epoch": 1.0019316795445303, "grad_norm": 0.3038922846317291, "learning_rate": 9.696152647316784e-06, "loss": 0.3698, "step": 9855 }, { "epoch": 1.0020333468889793, "grad_norm": 0.3022801876068115, "learning_rate": 9.696030807719364e-06, "loss": 0.3722, "step": 9856 }, { "epoch": 1.0021350142334282, "grad_norm": 0.3239785432815552, "learning_rate": 9.695908944464457e-06, "loss": 0.3751, "step": 9857 }, { "epoch": 1.0022366815778772, "grad_norm": 0.32028648257255554, "learning_rate": 9.695787057552678e-06, "loss": 0.3557, "step": 9858 }, { "epoch": 1.002338348922326, "grad_norm": 0.29945483803749084, "learning_rate": 9.695665146984637e-06, "loss": 0.38, "step": 9859 }, { "epoch": 1.002440016266775, "grad_norm": 0.2912735044956207, "learning_rate": 9.695543212760951e-06, "loss": 0.3497, "step": 9860 }, { "epoch": 1.002541683611224, "grad_norm": 0.3123404085636139, "learning_rate": 9.695421254882235e-06, "loss": 0.3699, "step": 9861 }, { "epoch": 1.0026433509556731, "grad_norm": 0.3044359087944031, "learning_rate": 9.695299273349103e-06, "loss": 0.3701, "step": 9862 }, { "epoch": 1.002745018300122, "grad_norm": 0.3037288784980774, "learning_rate": 9.69517726816217e-06, "loss": 0.3549, "step": 9863 }, { "epoch": 1.002846685644571, "grad_norm": 0.3088265061378479, "learning_rate": 9.69505523932205e-06, "loss": 0.3651, "step": 9864 }, { "epoch": 1.00294835298902, "grad_norm": 0.30644848942756653, "learning_rate": 9.694933186829354e-06, "loss": 0.3857, "step": 9865 }, { "epoch": 1.003050020333469, "grad_norm": 0.31528282165527344, "learning_rate": 9.694811110684703e-06, "loss": 0.3889, "step": 9866 }, { "epoch": 1.0031516876779178, "grad_norm": 0.317909836769104, "learning_rate": 9.694689010888707e-06, "loss": 0.3566, "step": 9867 }, { "epoch": 1.0032533550223668, "grad_norm": 0.2898562252521515, "learning_rate": 9.694566887441984e-06, "loss": 0.3921, "step": 9868 }, { "epoch": 1.0033550223668157, "grad_norm": 0.31348589062690735, "learning_rate": 9.69444474034515e-06, "loss": 0.3571, "step": 9869 }, { "epoch": 1.0034566897112647, "grad_norm": 0.32160982489585876, "learning_rate": 9.69432256959882e-06, "loss": 0.402, "step": 9870 }, { "epoch": 1.0035583570557136, "grad_norm": 0.3020179271697998, "learning_rate": 9.694200375203606e-06, "loss": 0.3658, "step": 9871 }, { "epoch": 1.0036600244001628, "grad_norm": 0.34996265172958374, "learning_rate": 9.694078157160127e-06, "loss": 0.4048, "step": 9872 }, { "epoch": 1.0037616917446117, "grad_norm": 0.32773101329803467, "learning_rate": 9.693955915468997e-06, "loss": 0.3877, "step": 9873 }, { "epoch": 1.0038633590890607, "grad_norm": 0.32753786444664, "learning_rate": 9.693833650130834e-06, "loss": 0.3506, "step": 9874 }, { "epoch": 1.0039650264335096, "grad_norm": 0.36570578813552856, "learning_rate": 9.693711361146251e-06, "loss": 0.3883, "step": 9875 }, { "epoch": 1.0040666937779585, "grad_norm": 0.3012138903141022, "learning_rate": 9.693589048515867e-06, "loss": 0.3699, "step": 9876 }, { "epoch": 1.0041683611224075, "grad_norm": 0.330609530210495, "learning_rate": 9.693466712240296e-06, "loss": 0.369, "step": 9877 }, { "epoch": 1.0042700284668564, "grad_norm": 0.29993563890457153, "learning_rate": 9.693344352320156e-06, "loss": 0.3722, "step": 9878 }, { "epoch": 1.0043716958113054, "grad_norm": 0.318753182888031, "learning_rate": 9.69322196875606e-06, "loss": 0.3553, "step": 9879 }, { "epoch": 1.0044733631557543, "grad_norm": 0.28130412101745605, "learning_rate": 9.693099561548628e-06, "loss": 0.3656, "step": 9880 }, { "epoch": 1.0045750305002032, "grad_norm": 0.3448873460292816, "learning_rate": 9.692977130698477e-06, "loss": 0.3787, "step": 9881 }, { "epoch": 1.0046766978446522, "grad_norm": 0.3154756724834442, "learning_rate": 9.692854676206221e-06, "loss": 0.3894, "step": 9882 }, { "epoch": 1.0047783651891014, "grad_norm": 0.27855193614959717, "learning_rate": 9.692732198072479e-06, "loss": 0.353, "step": 9883 }, { "epoch": 1.0048800325335503, "grad_norm": 0.316112756729126, "learning_rate": 9.692609696297868e-06, "loss": 0.3388, "step": 9884 }, { "epoch": 1.0049816998779992, "grad_norm": 0.30732595920562744, "learning_rate": 9.692487170883002e-06, "loss": 0.3543, "step": 9885 }, { "epoch": 1.0050833672224482, "grad_norm": 0.3114015460014343, "learning_rate": 9.692364621828503e-06, "loss": 0.369, "step": 9886 }, { "epoch": 1.0051850345668971, "grad_norm": 0.3039001226425171, "learning_rate": 9.692242049134985e-06, "loss": 0.3744, "step": 9887 }, { "epoch": 1.005286701911346, "grad_norm": 0.36677494645118713, "learning_rate": 9.692119452803068e-06, "loss": 0.3696, "step": 9888 }, { "epoch": 1.005388369255795, "grad_norm": 0.3465687930583954, "learning_rate": 9.691996832833369e-06, "loss": 0.3992, "step": 9889 }, { "epoch": 1.005490036600244, "grad_norm": 0.32583120465278625, "learning_rate": 9.691874189226501e-06, "loss": 0.3599, "step": 9890 }, { "epoch": 1.0055917039446929, "grad_norm": 0.3031204640865326, "learning_rate": 9.69175152198309e-06, "loss": 0.3732, "step": 9891 }, { "epoch": 1.0056933712891418, "grad_norm": 0.31366488337516785, "learning_rate": 9.691628831103748e-06, "loss": 0.3927, "step": 9892 }, { "epoch": 1.005795038633591, "grad_norm": 0.31446149945259094, "learning_rate": 9.691506116589093e-06, "loss": 0.329, "step": 9893 }, { "epoch": 1.00589670597804, "grad_norm": 0.32479429244995117, "learning_rate": 9.691383378439749e-06, "loss": 0.3515, "step": 9894 }, { "epoch": 1.0059983733224889, "grad_norm": 0.29496702551841736, "learning_rate": 9.691260616656327e-06, "loss": 0.3684, "step": 9895 }, { "epoch": 1.0061000406669378, "grad_norm": 0.3305833637714386, "learning_rate": 9.69113783123945e-06, "loss": 0.3289, "step": 9896 }, { "epoch": 1.0062017080113868, "grad_norm": 0.345760315656662, "learning_rate": 9.691015022189736e-06, "loss": 0.3735, "step": 9897 }, { "epoch": 1.0063033753558357, "grad_norm": 0.29143330454826355, "learning_rate": 9.690892189507802e-06, "loss": 0.3627, "step": 9898 }, { "epoch": 1.0064050427002846, "grad_norm": 0.3373127579689026, "learning_rate": 9.690769333194268e-06, "loss": 0.3751, "step": 9899 }, { "epoch": 1.0065067100447336, "grad_norm": 0.3138255476951599, "learning_rate": 9.690646453249753e-06, "loss": 0.3637, "step": 9900 }, { "epoch": 1.0066083773891825, "grad_norm": 0.29414892196655273, "learning_rate": 9.690523549674876e-06, "loss": 0.3267, "step": 9901 }, { "epoch": 1.0067100447336315, "grad_norm": 0.33836355805397034, "learning_rate": 9.690400622470257e-06, "loss": 0.3525, "step": 9902 }, { "epoch": 1.0068117120780806, "grad_norm": 0.32083258032798767, "learning_rate": 9.690277671636512e-06, "loss": 0.3961, "step": 9903 }, { "epoch": 1.0069133794225296, "grad_norm": 0.3191449046134949, "learning_rate": 9.690154697174265e-06, "loss": 0.3742, "step": 9904 }, { "epoch": 1.0070150467669785, "grad_norm": 0.34405311942100525, "learning_rate": 9.690031699084132e-06, "loss": 0.3721, "step": 9905 }, { "epoch": 1.0071167141114274, "grad_norm": 0.3126887083053589, "learning_rate": 9.689908677366734e-06, "loss": 0.3969, "step": 9906 }, { "epoch": 1.0072183814558764, "grad_norm": 0.31925955414772034, "learning_rate": 9.68978563202269e-06, "loss": 0.3711, "step": 9907 }, { "epoch": 1.0073200488003253, "grad_norm": 0.3157893419265747, "learning_rate": 9.689662563052621e-06, "loss": 0.3903, "step": 9908 }, { "epoch": 1.0074217161447743, "grad_norm": 0.3349095582962036, "learning_rate": 9.689539470457148e-06, "loss": 0.3903, "step": 9909 }, { "epoch": 1.0075233834892232, "grad_norm": 0.3477099537849426, "learning_rate": 9.689416354236886e-06, "loss": 0.3793, "step": 9910 }, { "epoch": 1.0076250508336722, "grad_norm": 0.3095589876174927, "learning_rate": 9.689293214392462e-06, "loss": 0.3455, "step": 9911 }, { "epoch": 1.007726718178121, "grad_norm": 0.31769901514053345, "learning_rate": 9.689170050924491e-06, "loss": 0.3481, "step": 9912 }, { "epoch": 1.0078283855225703, "grad_norm": 0.335477739572525, "learning_rate": 9.689046863833597e-06, "loss": 0.3821, "step": 9913 }, { "epoch": 1.0079300528670192, "grad_norm": 0.2925763428211212, "learning_rate": 9.6889236531204e-06, "loss": 0.3458, "step": 9914 }, { "epoch": 1.0080317202114681, "grad_norm": 0.3048999607563019, "learning_rate": 9.688800418785518e-06, "loss": 0.3852, "step": 9915 }, { "epoch": 1.008133387555917, "grad_norm": 0.32285943627357483, "learning_rate": 9.688677160829574e-06, "loss": 0.3626, "step": 9916 }, { "epoch": 1.008235054900366, "grad_norm": 0.3168417513370514, "learning_rate": 9.688553879253188e-06, "loss": 0.3699, "step": 9917 }, { "epoch": 1.008336722244815, "grad_norm": 0.30704981088638306, "learning_rate": 9.688430574056984e-06, "loss": 0.3437, "step": 9918 }, { "epoch": 1.008438389589264, "grad_norm": 0.32990744709968567, "learning_rate": 9.688307245241579e-06, "loss": 0.3993, "step": 9919 }, { "epoch": 1.0085400569337128, "grad_norm": 0.3002309799194336, "learning_rate": 9.688183892807597e-06, "loss": 0.3877, "step": 9920 }, { "epoch": 1.0086417242781618, "grad_norm": 0.31801638007164, "learning_rate": 9.688060516755657e-06, "loss": 0.3755, "step": 9921 }, { "epoch": 1.0087433916226107, "grad_norm": 0.3214177191257477, "learning_rate": 9.687937117086385e-06, "loss": 0.3937, "step": 9922 }, { "epoch": 1.0088450589670597, "grad_norm": 0.31037625670433044, "learning_rate": 9.687813693800399e-06, "loss": 0.3456, "step": 9923 }, { "epoch": 1.0089467263115088, "grad_norm": 0.3053118884563446, "learning_rate": 9.68769024689832e-06, "loss": 0.3468, "step": 9924 }, { "epoch": 1.0090483936559578, "grad_norm": 0.33882370591163635, "learning_rate": 9.687566776380773e-06, "loss": 0.3732, "step": 9925 }, { "epoch": 1.0091500610004067, "grad_norm": 0.3096214234828949, "learning_rate": 9.687443282248377e-06, "loss": 0.4065, "step": 9926 }, { "epoch": 1.0092517283448557, "grad_norm": 0.3110421299934387, "learning_rate": 9.687319764501757e-06, "loss": 0.3563, "step": 9927 }, { "epoch": 1.0093533956893046, "grad_norm": 0.34128761291503906, "learning_rate": 9.687196223141535e-06, "loss": 0.3872, "step": 9928 }, { "epoch": 1.0094550630337535, "grad_norm": 0.3627068102359772, "learning_rate": 9.687072658168331e-06, "loss": 0.3652, "step": 9929 }, { "epoch": 1.0095567303782025, "grad_norm": 0.3033406734466553, "learning_rate": 9.686949069582767e-06, "loss": 0.3384, "step": 9930 }, { "epoch": 1.0096583977226514, "grad_norm": 0.33393746614456177, "learning_rate": 9.686825457385469e-06, "loss": 0.3751, "step": 9931 }, { "epoch": 1.0097600650671004, "grad_norm": 0.32678818702697754, "learning_rate": 9.686701821577059e-06, "loss": 0.356, "step": 9932 }, { "epoch": 1.0098617324115493, "grad_norm": 0.32083818316459656, "learning_rate": 9.686578162158157e-06, "loss": 0.3667, "step": 9933 }, { "epoch": 1.0099633997559985, "grad_norm": 0.3235529065132141, "learning_rate": 9.68645447912939e-06, "loss": 0.3511, "step": 9934 }, { "epoch": 1.0100650671004474, "grad_norm": 0.3391556739807129, "learning_rate": 9.68633077249138e-06, "loss": 0.3652, "step": 9935 }, { "epoch": 1.0101667344448964, "grad_norm": 0.33213648200035095, "learning_rate": 9.686207042244746e-06, "loss": 0.3848, "step": 9936 }, { "epoch": 1.0102684017893453, "grad_norm": 0.27948322892189026, "learning_rate": 9.686083288390116e-06, "loss": 0.3474, "step": 9937 }, { "epoch": 1.0103700691337942, "grad_norm": 0.33789798617362976, "learning_rate": 9.685959510928112e-06, "loss": 0.3651, "step": 9938 }, { "epoch": 1.0104717364782432, "grad_norm": 0.3163256347179413, "learning_rate": 9.685835709859357e-06, "loss": 0.3285, "step": 9939 }, { "epoch": 1.0105734038226921, "grad_norm": 0.2938360273838043, "learning_rate": 9.685711885184475e-06, "loss": 0.3467, "step": 9940 }, { "epoch": 1.010675071167141, "grad_norm": 0.3103830814361572, "learning_rate": 9.68558803690409e-06, "loss": 0.3304, "step": 9941 }, { "epoch": 1.01077673851159, "grad_norm": 0.3257819712162018, "learning_rate": 9.685464165018827e-06, "loss": 0.3747, "step": 9942 }, { "epoch": 1.010878405856039, "grad_norm": 0.33485010266304016, "learning_rate": 9.685340269529309e-06, "loss": 0.4207, "step": 9943 }, { "epoch": 1.010980073200488, "grad_norm": 0.3041277527809143, "learning_rate": 9.68521635043616e-06, "loss": 0.3762, "step": 9944 }, { "epoch": 1.011081740544937, "grad_norm": 0.34236255288124084, "learning_rate": 9.685092407740002e-06, "loss": 0.3558, "step": 9945 }, { "epoch": 1.011183407889386, "grad_norm": 0.3022623360157013, "learning_rate": 9.684968441441465e-06, "loss": 0.3607, "step": 9946 }, { "epoch": 1.011285075233835, "grad_norm": 0.2973406910896301, "learning_rate": 9.684844451541168e-06, "loss": 0.3817, "step": 9947 }, { "epoch": 1.0113867425782839, "grad_norm": 0.31042933464050293, "learning_rate": 9.684720438039742e-06, "loss": 0.3616, "step": 9948 }, { "epoch": 1.0114884099227328, "grad_norm": 0.3165550231933594, "learning_rate": 9.684596400937802e-06, "loss": 0.3995, "step": 9949 }, { "epoch": 1.0115900772671818, "grad_norm": 0.2868378460407257, "learning_rate": 9.684472340235983e-06, "loss": 0.3644, "step": 9950 }, { "epoch": 1.0116917446116307, "grad_norm": 0.3186265528202057, "learning_rate": 9.684348255934904e-06, "loss": 0.3251, "step": 9951 }, { "epoch": 1.0117934119560796, "grad_norm": 0.30855247378349304, "learning_rate": 9.684224148035191e-06, "loss": 0.3604, "step": 9952 }, { "epoch": 1.0118950793005286, "grad_norm": 0.29642513394355774, "learning_rate": 9.68410001653747e-06, "loss": 0.3695, "step": 9953 }, { "epoch": 1.0119967466449777, "grad_norm": 0.33374279737472534, "learning_rate": 9.683975861442366e-06, "loss": 0.3651, "step": 9954 }, { "epoch": 1.0120984139894267, "grad_norm": 0.3176023066043854, "learning_rate": 9.683851682750505e-06, "loss": 0.3976, "step": 9955 }, { "epoch": 1.0122000813338756, "grad_norm": 0.30639833211898804, "learning_rate": 9.683727480462513e-06, "loss": 0.3873, "step": 9956 }, { "epoch": 1.0123017486783246, "grad_norm": 0.31258898973464966, "learning_rate": 9.683603254579015e-06, "loss": 0.3839, "step": 9957 }, { "epoch": 1.0124034160227735, "grad_norm": 0.344745010137558, "learning_rate": 9.683479005100635e-06, "loss": 0.3517, "step": 9958 }, { "epoch": 1.0125050833672224, "grad_norm": 0.3334054946899414, "learning_rate": 9.683354732028003e-06, "loss": 0.3807, "step": 9959 }, { "epoch": 1.0126067507116714, "grad_norm": 0.35644611716270447, "learning_rate": 9.683230435361743e-06, "loss": 0.3475, "step": 9960 }, { "epoch": 1.0127084180561203, "grad_norm": 0.3585447072982788, "learning_rate": 9.683106115102479e-06, "loss": 0.3801, "step": 9961 }, { "epoch": 1.0128100854005693, "grad_norm": 0.3079334795475006, "learning_rate": 9.68298177125084e-06, "loss": 0.3504, "step": 9962 }, { "epoch": 1.0129117527450182, "grad_norm": 0.327559232711792, "learning_rate": 9.682857403807453e-06, "loss": 0.3567, "step": 9963 }, { "epoch": 1.0130134200894672, "grad_norm": 0.3362731337547302, "learning_rate": 9.682733012772942e-06, "loss": 0.3671, "step": 9964 }, { "epoch": 1.0131150874339163, "grad_norm": 0.3058450520038605, "learning_rate": 9.682608598147933e-06, "loss": 0.3582, "step": 9965 }, { "epoch": 1.0132167547783653, "grad_norm": 0.2934683561325073, "learning_rate": 9.682484159933058e-06, "loss": 0.3543, "step": 9966 }, { "epoch": 1.0133184221228142, "grad_norm": 0.3220813572406769, "learning_rate": 9.682359698128938e-06, "loss": 0.3413, "step": 9967 }, { "epoch": 1.0134200894672631, "grad_norm": 0.34302985668182373, "learning_rate": 9.682235212736203e-06, "loss": 0.3735, "step": 9968 }, { "epoch": 1.013521756811712, "grad_norm": 0.2806081175804138, "learning_rate": 9.682110703755482e-06, "loss": 0.3388, "step": 9969 }, { "epoch": 1.013623424156161, "grad_norm": 0.30407753586769104, "learning_rate": 9.681986171187398e-06, "loss": 0.3436, "step": 9970 }, { "epoch": 1.01372509150061, "grad_norm": 0.32898691296577454, "learning_rate": 9.681861615032581e-06, "loss": 0.3539, "step": 9971 }, { "epoch": 1.013826758845059, "grad_norm": 0.30660751461982727, "learning_rate": 9.681737035291657e-06, "loss": 0.3398, "step": 9972 }, { "epoch": 1.0139284261895078, "grad_norm": 0.29531827569007874, "learning_rate": 9.681612431965255e-06, "loss": 0.4104, "step": 9973 }, { "epoch": 1.0140300935339568, "grad_norm": 0.33886831998825073, "learning_rate": 9.681487805054001e-06, "loss": 0.3715, "step": 9974 }, { "epoch": 1.014131760878406, "grad_norm": 0.30279386043548584, "learning_rate": 9.681363154558524e-06, "loss": 0.3693, "step": 9975 }, { "epoch": 1.014233428222855, "grad_norm": 0.3370250165462494, "learning_rate": 9.681238480479453e-06, "loss": 0.3897, "step": 9976 }, { "epoch": 1.0143350955673038, "grad_norm": 0.29213112592697144, "learning_rate": 9.681113782817414e-06, "loss": 0.344, "step": 9977 }, { "epoch": 1.0144367629117528, "grad_norm": 0.32993611693382263, "learning_rate": 9.680989061573037e-06, "loss": 0.3683, "step": 9978 }, { "epoch": 1.0145384302562017, "grad_norm": 0.3507004976272583, "learning_rate": 9.68086431674695e-06, "loss": 0.3634, "step": 9979 }, { "epoch": 1.0146400976006507, "grad_norm": 0.2944674789905548, "learning_rate": 9.680739548339779e-06, "loss": 0.3937, "step": 9980 }, { "epoch": 1.0147417649450996, "grad_norm": 0.32370370626449585, "learning_rate": 9.680614756352156e-06, "loss": 0.3756, "step": 9981 }, { "epoch": 1.0148434322895485, "grad_norm": 0.3076234757900238, "learning_rate": 9.680489940784707e-06, "loss": 0.3602, "step": 9982 }, { "epoch": 1.0149450996339975, "grad_norm": 0.2996390461921692, "learning_rate": 9.680365101638062e-06, "loss": 0.3656, "step": 9983 }, { "epoch": 1.0150467669784464, "grad_norm": 0.31716188788414, "learning_rate": 9.680240238912853e-06, "loss": 0.3783, "step": 9984 }, { "epoch": 1.0151484343228956, "grad_norm": 0.29540663957595825, "learning_rate": 9.680115352609702e-06, "loss": 0.3571, "step": 9985 }, { "epoch": 1.0152501016673445, "grad_norm": 0.2840299606323242, "learning_rate": 9.679990442729244e-06, "loss": 0.414, "step": 9986 }, { "epoch": 1.0153517690117935, "grad_norm": 0.3380902111530304, "learning_rate": 9.679865509272106e-06, "loss": 0.3505, "step": 9987 }, { "epoch": 1.0154534363562424, "grad_norm": 0.30552273988723755, "learning_rate": 9.679740552238916e-06, "loss": 0.3625, "step": 9988 }, { "epoch": 1.0155551037006914, "grad_norm": 0.31331178545951843, "learning_rate": 9.679615571630307e-06, "loss": 0.3944, "step": 9989 }, { "epoch": 1.0156567710451403, "grad_norm": 0.3061487674713135, "learning_rate": 9.679490567446906e-06, "loss": 0.3138, "step": 9990 }, { "epoch": 1.0157584383895892, "grad_norm": 0.30157825350761414, "learning_rate": 9.679365539689345e-06, "loss": 0.3881, "step": 9991 }, { "epoch": 1.0158601057340382, "grad_norm": 0.2851502001285553, "learning_rate": 9.67924048835825e-06, "loss": 0.3643, "step": 9992 }, { "epoch": 1.0159617730784871, "grad_norm": 0.3218517005443573, "learning_rate": 9.679115413454254e-06, "loss": 0.3338, "step": 9993 }, { "epoch": 1.016063440422936, "grad_norm": 0.31783613562583923, "learning_rate": 9.678990314977988e-06, "loss": 0.3717, "step": 9994 }, { "epoch": 1.0161651077673852, "grad_norm": 0.2960266172885895, "learning_rate": 9.678865192930081e-06, "loss": 0.3514, "step": 9995 }, { "epoch": 1.0162667751118342, "grad_norm": 0.3369567096233368, "learning_rate": 9.678740047311161e-06, "loss": 0.396, "step": 9996 }, { "epoch": 1.016368442456283, "grad_norm": 0.3227294087409973, "learning_rate": 9.678614878121861e-06, "loss": 0.3855, "step": 9997 }, { "epoch": 1.016470109800732, "grad_norm": 0.3175198435783386, "learning_rate": 9.67848968536281e-06, "loss": 0.3859, "step": 9998 }, { "epoch": 1.016571777145181, "grad_norm": 0.3118188679218292, "learning_rate": 9.678364469034642e-06, "loss": 0.3813, "step": 9999 }, { "epoch": 1.01667344448963, "grad_norm": 0.2942180335521698, "learning_rate": 9.678239229137984e-06, "loss": 0.346, "step": 10000 }, { "epoch": 1.0167751118340789, "grad_norm": 0.32644784450531006, "learning_rate": 9.67811396567347e-06, "loss": 0.3811, "step": 10001 }, { "epoch": 1.0168767791785278, "grad_norm": 0.307552307844162, "learning_rate": 9.677988678641728e-06, "loss": 0.3256, "step": 10002 }, { "epoch": 1.0169784465229768, "grad_norm": 0.3173882067203522, "learning_rate": 9.677863368043391e-06, "loss": 0.3554, "step": 10003 }, { "epoch": 1.0170801138674257, "grad_norm": 0.3217884302139282, "learning_rate": 9.677738033879091e-06, "loss": 0.3831, "step": 10004 }, { "epoch": 1.0171817812118746, "grad_norm": 0.3078806400299072, "learning_rate": 9.677612676149457e-06, "loss": 0.3293, "step": 10005 }, { "epoch": 1.0172834485563238, "grad_norm": 0.33084291219711304, "learning_rate": 9.677487294855123e-06, "loss": 0.3966, "step": 10006 }, { "epoch": 1.0173851159007727, "grad_norm": 0.3296714127063751, "learning_rate": 9.67736188999672e-06, "loss": 0.3785, "step": 10007 }, { "epoch": 1.0174867832452217, "grad_norm": 0.3238377869129181, "learning_rate": 9.677236461574878e-06, "loss": 0.3786, "step": 10008 }, { "epoch": 1.0175884505896706, "grad_norm": 0.30143702030181885, "learning_rate": 9.677111009590232e-06, "loss": 0.3717, "step": 10009 }, { "epoch": 1.0176901179341196, "grad_norm": 0.3347584903240204, "learning_rate": 9.67698553404341e-06, "loss": 0.3582, "step": 10010 }, { "epoch": 1.0177917852785685, "grad_norm": 0.32636094093322754, "learning_rate": 9.676860034935047e-06, "loss": 0.3683, "step": 10011 }, { "epoch": 1.0178934526230174, "grad_norm": 0.3108583390712738, "learning_rate": 9.676734512265775e-06, "loss": 0.359, "step": 10012 }, { "epoch": 1.0179951199674664, "grad_norm": 0.3539266884326935, "learning_rate": 9.676608966036225e-06, "loss": 0.3664, "step": 10013 }, { "epoch": 1.0180967873119153, "grad_norm": 0.3184036612510681, "learning_rate": 9.676483396247032e-06, "loss": 0.3534, "step": 10014 }, { "epoch": 1.0181984546563643, "grad_norm": 0.31310343742370605, "learning_rate": 9.676357802898826e-06, "loss": 0.3645, "step": 10015 }, { "epoch": 1.0183001220008134, "grad_norm": 0.35169219970703125, "learning_rate": 9.67623218599224e-06, "loss": 0.3888, "step": 10016 }, { "epoch": 1.0184017893452624, "grad_norm": 0.33235007524490356, "learning_rate": 9.676106545527907e-06, "loss": 0.374, "step": 10017 }, { "epoch": 1.0185034566897113, "grad_norm": 0.2933652997016907, "learning_rate": 9.675980881506462e-06, "loss": 0.3784, "step": 10018 }, { "epoch": 1.0186051240341603, "grad_norm": 0.3220110237598419, "learning_rate": 9.675855193928536e-06, "loss": 0.3689, "step": 10019 }, { "epoch": 1.0187067913786092, "grad_norm": 0.3070542812347412, "learning_rate": 9.675729482794762e-06, "loss": 0.3494, "step": 10020 }, { "epoch": 1.0188084587230581, "grad_norm": 0.3426586091518402, "learning_rate": 9.675603748105775e-06, "loss": 0.347, "step": 10021 }, { "epoch": 1.018910126067507, "grad_norm": 0.3173580467700958, "learning_rate": 9.675477989862207e-06, "loss": 0.3773, "step": 10022 }, { "epoch": 1.019011793411956, "grad_norm": 0.32383355498313904, "learning_rate": 9.675352208064693e-06, "loss": 0.3609, "step": 10023 }, { "epoch": 1.019113460756405, "grad_norm": 0.3227500021457672, "learning_rate": 9.675226402713864e-06, "loss": 0.3852, "step": 10024 }, { "epoch": 1.019215128100854, "grad_norm": 0.28819459676742554, "learning_rate": 9.675100573810356e-06, "loss": 0.3525, "step": 10025 }, { "epoch": 1.019316795445303, "grad_norm": 0.30321642756462097, "learning_rate": 9.674974721354803e-06, "loss": 0.3693, "step": 10026 }, { "epoch": 1.019418462789752, "grad_norm": 0.29611775279045105, "learning_rate": 9.674848845347837e-06, "loss": 0.3643, "step": 10027 }, { "epoch": 1.019520130134201, "grad_norm": 0.3230546712875366, "learning_rate": 9.674722945790093e-06, "loss": 0.4128, "step": 10028 }, { "epoch": 1.01962179747865, "grad_norm": 0.32726916670799255, "learning_rate": 9.674597022682208e-06, "loss": 0.371, "step": 10029 }, { "epoch": 1.0197234648230988, "grad_norm": 0.3030080497264862, "learning_rate": 9.674471076024813e-06, "loss": 0.3504, "step": 10030 }, { "epoch": 1.0198251321675478, "grad_norm": 0.31259676814079285, "learning_rate": 9.674345105818543e-06, "loss": 0.3778, "step": 10031 }, { "epoch": 1.0199267995119967, "grad_norm": 0.31284835934638977, "learning_rate": 9.674219112064034e-06, "loss": 0.3242, "step": 10032 }, { "epoch": 1.0200284668564457, "grad_norm": 0.32864388823509216, "learning_rate": 9.674093094761918e-06, "loss": 0.3858, "step": 10033 }, { "epoch": 1.0201301342008946, "grad_norm": 0.3164671063423157, "learning_rate": 9.673967053912835e-06, "loss": 0.3723, "step": 10034 }, { "epoch": 1.0202318015453435, "grad_norm": 0.31814080476760864, "learning_rate": 9.673840989517414e-06, "loss": 0.3716, "step": 10035 }, { "epoch": 1.0203334688897927, "grad_norm": 0.33034563064575195, "learning_rate": 9.673714901576292e-06, "loss": 0.3672, "step": 10036 }, { "epoch": 1.0204351362342416, "grad_norm": 0.29932108521461487, "learning_rate": 9.673588790090108e-06, "loss": 0.3621, "step": 10037 }, { "epoch": 1.0205368035786906, "grad_norm": 0.3168347179889679, "learning_rate": 9.673462655059493e-06, "loss": 0.3574, "step": 10038 }, { "epoch": 1.0206384709231395, "grad_norm": 0.30270838737487793, "learning_rate": 9.673336496485084e-06, "loss": 0.3844, "step": 10039 }, { "epoch": 1.0207401382675885, "grad_norm": 0.2925713062286377, "learning_rate": 9.673210314367515e-06, "loss": 0.3526, "step": 10040 }, { "epoch": 1.0208418056120374, "grad_norm": 0.28394684195518494, "learning_rate": 9.673084108707424e-06, "loss": 0.3465, "step": 10041 }, { "epoch": 1.0209434729564864, "grad_norm": 0.29971984028816223, "learning_rate": 9.672957879505445e-06, "loss": 0.4382, "step": 10042 }, { "epoch": 1.0210451403009353, "grad_norm": 0.3217235803604126, "learning_rate": 9.672831626762215e-06, "loss": 0.3631, "step": 10043 }, { "epoch": 1.0211468076453842, "grad_norm": 0.29741689562797546, "learning_rate": 9.672705350478371e-06, "loss": 0.3558, "step": 10044 }, { "epoch": 1.0212484749898332, "grad_norm": 0.3253050744533539, "learning_rate": 9.672579050654548e-06, "loss": 0.3694, "step": 10045 }, { "epoch": 1.0213501423342821, "grad_norm": 0.32268619537353516, "learning_rate": 9.672452727291379e-06, "loss": 0.4013, "step": 10046 }, { "epoch": 1.0214518096787313, "grad_norm": 0.32919853925704956, "learning_rate": 9.672326380389505e-06, "loss": 0.3704, "step": 10047 }, { "epoch": 1.0215534770231802, "grad_norm": 0.32024946808815, "learning_rate": 9.672200009949561e-06, "loss": 0.3587, "step": 10048 }, { "epoch": 1.0216551443676292, "grad_norm": 0.29216256737709045, "learning_rate": 9.672073615972184e-06, "loss": 0.347, "step": 10049 }, { "epoch": 1.021756811712078, "grad_norm": 0.3995313346385956, "learning_rate": 9.671947198458012e-06, "loss": 0.3997, "step": 10050 }, { "epoch": 1.021858479056527, "grad_norm": 0.34252122044563293, "learning_rate": 9.671820757407677e-06, "loss": 0.372, "step": 10051 }, { "epoch": 1.021960146400976, "grad_norm": 0.27666306495666504, "learning_rate": 9.671694292821822e-06, "loss": 0.3419, "step": 10052 }, { "epoch": 1.022061813745425, "grad_norm": 0.3631207048892975, "learning_rate": 9.67156780470108e-06, "loss": 0.3848, "step": 10053 }, { "epoch": 1.0221634810898739, "grad_norm": 0.35627254843711853, "learning_rate": 9.67144129304609e-06, "loss": 0.3828, "step": 10054 }, { "epoch": 1.0222651484343228, "grad_norm": 0.2844802737236023, "learning_rate": 9.671314757857488e-06, "loss": 0.3627, "step": 10055 }, { "epoch": 1.0223668157787718, "grad_norm": 0.29219168424606323, "learning_rate": 9.671188199135913e-06, "loss": 0.369, "step": 10056 }, { "epoch": 1.022468483123221, "grad_norm": 0.37007516622543335, "learning_rate": 9.671061616882003e-06, "loss": 0.3789, "step": 10057 }, { "epoch": 1.0225701504676699, "grad_norm": 0.318307101726532, "learning_rate": 9.670935011096392e-06, "loss": 0.3627, "step": 10058 }, { "epoch": 1.0226718178121188, "grad_norm": 0.3168559968471527, "learning_rate": 9.670808381779722e-06, "loss": 0.3853, "step": 10059 }, { "epoch": 1.0227734851565677, "grad_norm": 0.3095965087413788, "learning_rate": 9.67068172893263e-06, "loss": 0.3605, "step": 10060 }, { "epoch": 1.0228751525010167, "grad_norm": 0.3961120545864105, "learning_rate": 9.670555052555752e-06, "loss": 0.38, "step": 10061 }, { "epoch": 1.0229768198454656, "grad_norm": 0.3011695146560669, "learning_rate": 9.670428352649728e-06, "loss": 0.3767, "step": 10062 }, { "epoch": 1.0230784871899146, "grad_norm": 0.34073391556739807, "learning_rate": 9.670301629215195e-06, "loss": 0.3583, "step": 10063 }, { "epoch": 1.0231801545343635, "grad_norm": 0.32037225365638733, "learning_rate": 9.670174882252794e-06, "loss": 0.3986, "step": 10064 }, { "epoch": 1.0232818218788124, "grad_norm": 0.3375820815563202, "learning_rate": 9.67004811176316e-06, "loss": 0.3878, "step": 10065 }, { "epoch": 1.0233834892232614, "grad_norm": 0.3156124949455261, "learning_rate": 9.669921317746935e-06, "loss": 0.3394, "step": 10066 }, { "epoch": 1.0234851565677106, "grad_norm": 0.3255791962146759, "learning_rate": 9.669794500204755e-06, "loss": 0.4164, "step": 10067 }, { "epoch": 1.0235868239121595, "grad_norm": 0.33043742179870605, "learning_rate": 9.669667659137261e-06, "loss": 0.3887, "step": 10068 }, { "epoch": 1.0236884912566084, "grad_norm": 0.31652647256851196, "learning_rate": 9.66954079454509e-06, "loss": 0.3462, "step": 10069 }, { "epoch": 1.0237901586010574, "grad_norm": 0.3334122896194458, "learning_rate": 9.669413906428881e-06, "loss": 0.3727, "step": 10070 }, { "epoch": 1.0238918259455063, "grad_norm": 0.3445793390274048, "learning_rate": 9.669286994789277e-06, "loss": 0.3744, "step": 10071 }, { "epoch": 1.0239934932899553, "grad_norm": 0.3280565142631531, "learning_rate": 9.669160059626914e-06, "loss": 0.3861, "step": 10072 }, { "epoch": 1.0240951606344042, "grad_norm": 0.3217945694923401, "learning_rate": 9.669033100942431e-06, "loss": 0.3689, "step": 10073 }, { "epoch": 1.0241968279788531, "grad_norm": 0.32474425435066223, "learning_rate": 9.66890611873647e-06, "loss": 0.3969, "step": 10074 }, { "epoch": 1.024298495323302, "grad_norm": 0.3301372826099396, "learning_rate": 9.66877911300967e-06, "loss": 0.3729, "step": 10075 }, { "epoch": 1.024400162667751, "grad_norm": 0.31269562244415283, "learning_rate": 9.668652083762668e-06, "loss": 0.3441, "step": 10076 }, { "epoch": 1.0245018300122002, "grad_norm": 0.3002341091632843, "learning_rate": 9.668525030996108e-06, "loss": 0.3613, "step": 10077 }, { "epoch": 1.0246034973566491, "grad_norm": 0.32186976075172424, "learning_rate": 9.668397954710629e-06, "loss": 0.3914, "step": 10078 }, { "epoch": 1.024705164701098, "grad_norm": 0.35437673330307007, "learning_rate": 9.668270854906869e-06, "loss": 0.369, "step": 10079 }, { "epoch": 1.024806832045547, "grad_norm": 0.31596508622169495, "learning_rate": 9.66814373158547e-06, "loss": 0.328, "step": 10080 }, { "epoch": 1.024908499389996, "grad_norm": 0.32247358560562134, "learning_rate": 9.668016584747073e-06, "loss": 0.3736, "step": 10081 }, { "epoch": 1.025010166734445, "grad_norm": 0.3385099172592163, "learning_rate": 9.667889414392317e-06, "loss": 0.3698, "step": 10082 }, { "epoch": 1.0251118340788938, "grad_norm": 0.3350273668766022, "learning_rate": 9.667762220521843e-06, "loss": 0.4243, "step": 10083 }, { "epoch": 1.0252135014233428, "grad_norm": 0.34045127034187317, "learning_rate": 9.667635003136291e-06, "loss": 0.3809, "step": 10084 }, { "epoch": 1.0253151687677917, "grad_norm": 0.3709607720375061, "learning_rate": 9.667507762236305e-06, "loss": 0.3998, "step": 10085 }, { "epoch": 1.0254168361122407, "grad_norm": 0.32710182666778564, "learning_rate": 9.667380497822526e-06, "loss": 0.3525, "step": 10086 }, { "epoch": 1.0255185034566896, "grad_norm": 0.335897296667099, "learning_rate": 9.667253209895589e-06, "loss": 0.3481, "step": 10087 }, { "epoch": 1.0256201708011388, "grad_norm": 0.35670527815818787, "learning_rate": 9.667125898456142e-06, "loss": 0.3554, "step": 10088 }, { "epoch": 1.0257218381455877, "grad_norm": 0.3251339793205261, "learning_rate": 9.666998563504823e-06, "loss": 0.3688, "step": 10089 }, { "epoch": 1.0258235054900366, "grad_norm": 0.33776676654815674, "learning_rate": 9.666871205042275e-06, "loss": 0.3546, "step": 10090 }, { "epoch": 1.0259251728344856, "grad_norm": 0.36793145537376404, "learning_rate": 9.666743823069138e-06, "loss": 0.3598, "step": 10091 }, { "epoch": 1.0260268401789345, "grad_norm": 0.39545243978500366, "learning_rate": 9.666616417586054e-06, "loss": 0.3668, "step": 10092 }, { "epoch": 1.0261285075233835, "grad_norm": 0.3252497613430023, "learning_rate": 9.666488988593668e-06, "loss": 0.3447, "step": 10093 }, { "epoch": 1.0262301748678324, "grad_norm": 0.32368552684783936, "learning_rate": 9.666361536092617e-06, "loss": 0.3546, "step": 10094 }, { "epoch": 1.0263318422122814, "grad_norm": 0.3203696012496948, "learning_rate": 9.666234060083547e-06, "loss": 0.391, "step": 10095 }, { "epoch": 1.0264335095567303, "grad_norm": 0.37869444489479065, "learning_rate": 9.666106560567098e-06, "loss": 0.3875, "step": 10096 }, { "epoch": 1.0265351769011792, "grad_norm": 0.34030529856681824, "learning_rate": 9.665979037543913e-06, "loss": 0.3403, "step": 10097 }, { "epoch": 1.0266368442456284, "grad_norm": 0.307600736618042, "learning_rate": 9.665851491014635e-06, "loss": 0.3451, "step": 10098 }, { "epoch": 1.0267385115900773, "grad_norm": 0.3587414622306824, "learning_rate": 9.665723920979904e-06, "loss": 0.3733, "step": 10099 }, { "epoch": 1.0268401789345263, "grad_norm": 0.3052920997142792, "learning_rate": 9.665596327440366e-06, "loss": 0.3599, "step": 10100 }, { "epoch": 1.0269418462789752, "grad_norm": 0.29757750034332275, "learning_rate": 9.665468710396663e-06, "loss": 0.3711, "step": 10101 }, { "epoch": 1.0270435136234242, "grad_norm": 0.3716311752796173, "learning_rate": 9.665341069849436e-06, "loss": 0.3684, "step": 10102 }, { "epoch": 1.027145180967873, "grad_norm": 0.3036899268627167, "learning_rate": 9.66521340579933e-06, "loss": 0.369, "step": 10103 }, { "epoch": 1.027246848312322, "grad_norm": 0.3002358376979828, "learning_rate": 9.665085718246989e-06, "loss": 0.358, "step": 10104 }, { "epoch": 1.027348515656771, "grad_norm": 0.3461650311946869, "learning_rate": 9.664958007193053e-06, "loss": 0.3701, "step": 10105 }, { "epoch": 1.02745018300122, "grad_norm": 0.3105649948120117, "learning_rate": 9.664830272638167e-06, "loss": 0.3509, "step": 10106 }, { "epoch": 1.0275518503456689, "grad_norm": 0.3402528464794159, "learning_rate": 9.664702514582976e-06, "loss": 0.3815, "step": 10107 }, { "epoch": 1.027653517690118, "grad_norm": 0.3041984438896179, "learning_rate": 9.664574733028121e-06, "loss": 0.3865, "step": 10108 }, { "epoch": 1.027755185034567, "grad_norm": 0.2915915548801422, "learning_rate": 9.664446927974248e-06, "loss": 0.3686, "step": 10109 }, { "epoch": 1.027856852379016, "grad_norm": 0.33471986651420593, "learning_rate": 9.664319099421997e-06, "loss": 0.3476, "step": 10110 }, { "epoch": 1.0279585197234649, "grad_norm": 0.33119744062423706, "learning_rate": 9.664191247372017e-06, "loss": 0.3845, "step": 10111 }, { "epoch": 1.0280601870679138, "grad_norm": 0.3121975362300873, "learning_rate": 9.66406337182495e-06, "loss": 0.3339, "step": 10112 }, { "epoch": 1.0281618544123627, "grad_norm": 0.308904230594635, "learning_rate": 9.663935472781441e-06, "loss": 0.3482, "step": 10113 }, { "epoch": 1.0282635217568117, "grad_norm": 0.2874875068664551, "learning_rate": 9.663807550242131e-06, "loss": 0.3595, "step": 10114 }, { "epoch": 1.0283651891012606, "grad_norm": 0.3114194869995117, "learning_rate": 9.663679604207668e-06, "loss": 0.3618, "step": 10115 }, { "epoch": 1.0284668564457096, "grad_norm": 0.31452327966690063, "learning_rate": 9.663551634678697e-06, "loss": 0.3626, "step": 10116 }, { "epoch": 1.0285685237901585, "grad_norm": 0.32017505168914795, "learning_rate": 9.663423641655858e-06, "loss": 0.3956, "step": 10117 }, { "epoch": 1.0286701911346077, "grad_norm": 0.3048692047595978, "learning_rate": 9.663295625139801e-06, "loss": 0.3743, "step": 10118 }, { "epoch": 1.0287718584790566, "grad_norm": 0.3412081301212311, "learning_rate": 9.663167585131169e-06, "loss": 0.3753, "step": 10119 }, { "epoch": 1.0288735258235056, "grad_norm": 0.33707094192504883, "learning_rate": 9.663039521630604e-06, "loss": 0.326, "step": 10120 }, { "epoch": 1.0289751931679545, "grad_norm": 0.33309653401374817, "learning_rate": 9.662911434638757e-06, "loss": 0.3909, "step": 10121 }, { "epoch": 1.0290768605124034, "grad_norm": 0.3336617052555084, "learning_rate": 9.66278332415627e-06, "loss": 0.3346, "step": 10122 }, { "epoch": 1.0291785278568524, "grad_norm": 0.32893499732017517, "learning_rate": 9.662655190183788e-06, "loss": 0.354, "step": 10123 }, { "epoch": 1.0292801952013013, "grad_norm": 0.30045658349990845, "learning_rate": 9.662527032721958e-06, "loss": 0.373, "step": 10124 }, { "epoch": 1.0293818625457503, "grad_norm": 0.30652984976768494, "learning_rate": 9.662398851771423e-06, "loss": 0.3777, "step": 10125 }, { "epoch": 1.0294835298901992, "grad_norm": 0.3454684019088745, "learning_rate": 9.662270647332833e-06, "loss": 0.3968, "step": 10126 }, { "epoch": 1.0295851972346481, "grad_norm": 0.3184354305267334, "learning_rate": 9.662142419406827e-06, "loss": 0.374, "step": 10127 }, { "epoch": 1.029686864579097, "grad_norm": 0.35567089915275574, "learning_rate": 9.66201416799406e-06, "loss": 0.3832, "step": 10128 }, { "epoch": 1.0297885319235462, "grad_norm": 0.3439140021800995, "learning_rate": 9.661885893095172e-06, "loss": 0.3749, "step": 10129 }, { "epoch": 1.0298901992679952, "grad_norm": 0.33397576212882996, "learning_rate": 9.66175759471081e-06, "loss": 0.3687, "step": 10130 }, { "epoch": 1.0299918666124441, "grad_norm": 0.30783241987228394, "learning_rate": 9.661629272841622e-06, "loss": 0.3777, "step": 10131 }, { "epoch": 1.030093533956893, "grad_norm": 0.36126676201820374, "learning_rate": 9.661500927488254e-06, "loss": 0.3709, "step": 10132 }, { "epoch": 1.030195201301342, "grad_norm": 0.3416525721549988, "learning_rate": 9.66137255865135e-06, "loss": 0.3612, "step": 10133 }, { "epoch": 1.030296868645791, "grad_norm": 0.29029595851898193, "learning_rate": 9.66124416633156e-06, "loss": 0.3344, "step": 10134 }, { "epoch": 1.03039853599024, "grad_norm": 0.3330966830253601, "learning_rate": 9.661115750529529e-06, "loss": 0.4109, "step": 10135 }, { "epoch": 1.0305002033346888, "grad_norm": 0.36465975642204285, "learning_rate": 9.660987311245906e-06, "loss": 0.3594, "step": 10136 }, { "epoch": 1.0306018706791378, "grad_norm": 0.31915920972824097, "learning_rate": 9.660858848481336e-06, "loss": 0.3724, "step": 10137 }, { "epoch": 1.0307035380235867, "grad_norm": 0.3160700798034668, "learning_rate": 9.660730362236465e-06, "loss": 0.3517, "step": 10138 }, { "epoch": 1.0308052053680359, "grad_norm": 0.34214070439338684, "learning_rate": 9.660601852511944e-06, "loss": 0.3403, "step": 10139 }, { "epoch": 1.0309068727124848, "grad_norm": 0.33661139011383057, "learning_rate": 9.660473319308417e-06, "loss": 0.3344, "step": 10140 }, { "epoch": 1.0310085400569338, "grad_norm": 0.3532175123691559, "learning_rate": 9.660344762626534e-06, "loss": 0.3646, "step": 10141 }, { "epoch": 1.0311102074013827, "grad_norm": 0.3332034945487976, "learning_rate": 9.660216182466941e-06, "loss": 0.3261, "step": 10142 }, { "epoch": 1.0312118747458316, "grad_norm": 0.36206039786338806, "learning_rate": 9.660087578830287e-06, "loss": 0.3727, "step": 10143 }, { "epoch": 1.0313135420902806, "grad_norm": 0.32620614767074585, "learning_rate": 9.659958951717218e-06, "loss": 0.3641, "step": 10144 }, { "epoch": 1.0314152094347295, "grad_norm": 0.32032498717308044, "learning_rate": 9.659830301128384e-06, "loss": 0.3682, "step": 10145 }, { "epoch": 1.0315168767791785, "grad_norm": 0.3314984440803528, "learning_rate": 9.659701627064433e-06, "loss": 0.3653, "step": 10146 }, { "epoch": 1.0316185441236274, "grad_norm": 0.40110066533088684, "learning_rate": 9.659572929526011e-06, "loss": 0.3762, "step": 10147 }, { "epoch": 1.0317202114680764, "grad_norm": 0.36083880066871643, "learning_rate": 9.659444208513768e-06, "loss": 0.3548, "step": 10148 }, { "epoch": 1.0318218788125255, "grad_norm": 0.3012582063674927, "learning_rate": 9.659315464028352e-06, "loss": 0.3777, "step": 10149 }, { "epoch": 1.0319235461569745, "grad_norm": 0.4171901047229767, "learning_rate": 9.659186696070413e-06, "loss": 0.399, "step": 10150 }, { "epoch": 1.0320252135014234, "grad_norm": 0.34501445293426514, "learning_rate": 9.6590579046406e-06, "loss": 0.3396, "step": 10151 }, { "epoch": 1.0321268808458723, "grad_norm": 0.3120477795600891, "learning_rate": 9.658929089739557e-06, "loss": 0.3842, "step": 10152 }, { "epoch": 1.0322285481903213, "grad_norm": 0.32581591606140137, "learning_rate": 9.658800251367938e-06, "loss": 0.3875, "step": 10153 }, { "epoch": 1.0323302155347702, "grad_norm": 0.3215571939945221, "learning_rate": 9.65867138952639e-06, "loss": 0.3555, "step": 10154 }, { "epoch": 1.0324318828792192, "grad_norm": 0.329448938369751, "learning_rate": 9.658542504215565e-06, "loss": 0.3688, "step": 10155 }, { "epoch": 1.032533550223668, "grad_norm": 0.3099195659160614, "learning_rate": 9.658413595436108e-06, "loss": 0.3745, "step": 10156 }, { "epoch": 1.032635217568117, "grad_norm": 0.316556841135025, "learning_rate": 9.658284663188671e-06, "loss": 0.3436, "step": 10157 }, { "epoch": 1.032736884912566, "grad_norm": 0.31305596232414246, "learning_rate": 9.658155707473902e-06, "loss": 0.3831, "step": 10158 }, { "epoch": 1.0328385522570152, "grad_norm": 0.31208232045173645, "learning_rate": 9.658026728292451e-06, "loss": 0.3362, "step": 10159 }, { "epoch": 1.032940219601464, "grad_norm": 0.29775965213775635, "learning_rate": 9.657897725644969e-06, "loss": 0.3221, "step": 10160 }, { "epoch": 1.033041886945913, "grad_norm": 0.3265398442745209, "learning_rate": 9.657768699532105e-06, "loss": 0.3435, "step": 10161 }, { "epoch": 1.033143554290362, "grad_norm": 0.30843743681907654, "learning_rate": 9.657639649954509e-06, "loss": 0.3538, "step": 10162 }, { "epoch": 1.033245221634811, "grad_norm": 0.311038076877594, "learning_rate": 9.657510576912832e-06, "loss": 0.3617, "step": 10163 }, { "epoch": 1.0333468889792599, "grad_norm": 0.3499146103858948, "learning_rate": 9.657381480407724e-06, "loss": 0.3819, "step": 10164 }, { "epoch": 1.0334485563237088, "grad_norm": 0.2942982316017151, "learning_rate": 9.657252360439833e-06, "loss": 0.3841, "step": 10165 }, { "epoch": 1.0335502236681577, "grad_norm": 0.31847769021987915, "learning_rate": 9.657123217009811e-06, "loss": 0.3322, "step": 10166 }, { "epoch": 1.0336518910126067, "grad_norm": 0.35253360867500305, "learning_rate": 9.65699405011831e-06, "loss": 0.3753, "step": 10167 }, { "epoch": 1.0337535583570556, "grad_norm": 0.3156750202178955, "learning_rate": 9.656864859765982e-06, "loss": 0.3755, "step": 10168 }, { "epoch": 1.0338552257015046, "grad_norm": 0.3397139310836792, "learning_rate": 9.656735645953473e-06, "loss": 0.3441, "step": 10169 }, { "epoch": 1.0339568930459537, "grad_norm": 0.3435101807117462, "learning_rate": 9.656606408681437e-06, "loss": 0.3432, "step": 10170 }, { "epoch": 1.0340585603904027, "grad_norm": 0.30025598406791687, "learning_rate": 9.656477147950523e-06, "loss": 0.36, "step": 10171 }, { "epoch": 1.0341602277348516, "grad_norm": 0.31893011927604675, "learning_rate": 9.656347863761387e-06, "loss": 0.3956, "step": 10172 }, { "epoch": 1.0342618950793006, "grad_norm": 0.32258927822113037, "learning_rate": 9.656218556114674e-06, "loss": 0.3825, "step": 10173 }, { "epoch": 1.0343635624237495, "grad_norm": 0.3121667504310608, "learning_rate": 9.656089225011038e-06, "loss": 0.3904, "step": 10174 }, { "epoch": 1.0344652297681984, "grad_norm": 0.3282186985015869, "learning_rate": 9.655959870451133e-06, "loss": 0.3756, "step": 10175 }, { "epoch": 1.0345668971126474, "grad_norm": 0.33366209268569946, "learning_rate": 9.655830492435608e-06, "loss": 0.3632, "step": 10176 }, { "epoch": 1.0346685644570963, "grad_norm": 0.3073767125606537, "learning_rate": 9.655701090965115e-06, "loss": 0.3662, "step": 10177 }, { "epoch": 1.0347702318015453, "grad_norm": 0.3121197521686554, "learning_rate": 9.655571666040306e-06, "loss": 0.3496, "step": 10178 }, { "epoch": 1.0348718991459942, "grad_norm": 0.3318181335926056, "learning_rate": 9.655442217661835e-06, "loss": 0.3585, "step": 10179 }, { "epoch": 1.0349735664904434, "grad_norm": 0.34605562686920166, "learning_rate": 9.65531274583035e-06, "loss": 0.3757, "step": 10180 }, { "epoch": 1.0350752338348923, "grad_norm": 0.33273574709892273, "learning_rate": 9.655183250546507e-06, "loss": 0.3734, "step": 10181 }, { "epoch": 1.0351769011793412, "grad_norm": 0.3111441433429718, "learning_rate": 9.655053731810957e-06, "loss": 0.3648, "step": 10182 }, { "epoch": 1.0352785685237902, "grad_norm": 0.33597421646118164, "learning_rate": 9.654924189624353e-06, "loss": 0.3424, "step": 10183 }, { "epoch": 1.0353802358682391, "grad_norm": 0.333151251077652, "learning_rate": 9.654794623987347e-06, "loss": 0.4038, "step": 10184 }, { "epoch": 1.035481903212688, "grad_norm": 0.2950874865055084, "learning_rate": 9.654665034900592e-06, "loss": 0.3449, "step": 10185 }, { "epoch": 1.035583570557137, "grad_norm": 0.3130517899990082, "learning_rate": 9.654535422364739e-06, "loss": 0.3858, "step": 10186 }, { "epoch": 1.035685237901586, "grad_norm": 0.31986311078071594, "learning_rate": 9.654405786380442e-06, "loss": 0.3665, "step": 10187 }, { "epoch": 1.035786905246035, "grad_norm": 0.3094917833805084, "learning_rate": 9.654276126948358e-06, "loss": 0.3494, "step": 10188 }, { "epoch": 1.0358885725904838, "grad_norm": 0.29154154658317566, "learning_rate": 9.654146444069134e-06, "loss": 0.3302, "step": 10189 }, { "epoch": 1.035990239934933, "grad_norm": 0.313271701335907, "learning_rate": 9.654016737743428e-06, "loss": 0.3477, "step": 10190 }, { "epoch": 1.036091907279382, "grad_norm": 0.3115355968475342, "learning_rate": 9.653887007971892e-06, "loss": 0.3852, "step": 10191 }, { "epoch": 1.0361935746238309, "grad_norm": 0.3252883553504944, "learning_rate": 9.653757254755179e-06, "loss": 0.3468, "step": 10192 }, { "epoch": 1.0362952419682798, "grad_norm": 0.3132166266441345, "learning_rate": 9.653627478093941e-06, "loss": 0.3498, "step": 10193 }, { "epoch": 1.0363969093127288, "grad_norm": 0.36975690722465515, "learning_rate": 9.653497677988834e-06, "loss": 0.3902, "step": 10194 }, { "epoch": 1.0364985766571777, "grad_norm": 0.29553258419036865, "learning_rate": 9.653367854440512e-06, "loss": 0.3406, "step": 10195 }, { "epoch": 1.0366002440016266, "grad_norm": 0.3199729025363922, "learning_rate": 9.65323800744963e-06, "loss": 0.333, "step": 10196 }, { "epoch": 1.0367019113460756, "grad_norm": 0.3553116023540497, "learning_rate": 9.65310813701684e-06, "loss": 0.3932, "step": 10197 }, { "epoch": 1.0368035786905245, "grad_norm": 0.30522266030311584, "learning_rate": 9.652978243142797e-06, "loss": 0.3426, "step": 10198 }, { "epoch": 1.0369052460349735, "grad_norm": 0.3061025142669678, "learning_rate": 9.652848325828153e-06, "loss": 0.3739, "step": 10199 }, { "epoch": 1.0370069133794226, "grad_norm": 0.30496856570243835, "learning_rate": 9.652718385073567e-06, "loss": 0.3817, "step": 10200 }, { "epoch": 1.0371085807238716, "grad_norm": 0.2884228527545929, "learning_rate": 9.65258842087969e-06, "loss": 0.3715, "step": 10201 }, { "epoch": 1.0372102480683205, "grad_norm": 0.33321619033813477, "learning_rate": 9.652458433247181e-06, "loss": 0.3907, "step": 10202 }, { "epoch": 1.0373119154127695, "grad_norm": 0.3020811378955841, "learning_rate": 9.65232842217669e-06, "loss": 0.3611, "step": 10203 }, { "epoch": 1.0374135827572184, "grad_norm": 0.2790374457836151, "learning_rate": 9.652198387668874e-06, "loss": 0.3372, "step": 10204 }, { "epoch": 1.0375152501016673, "grad_norm": 0.30907905101776123, "learning_rate": 9.65206832972439e-06, "loss": 0.3758, "step": 10205 }, { "epoch": 1.0376169174461163, "grad_norm": 0.3007216155529022, "learning_rate": 9.651938248343889e-06, "loss": 0.3634, "step": 10206 }, { "epoch": 1.0377185847905652, "grad_norm": 0.31115803122520447, "learning_rate": 9.651808143528029e-06, "loss": 0.3708, "step": 10207 }, { "epoch": 1.0378202521350142, "grad_norm": 0.30286771059036255, "learning_rate": 9.651678015277463e-06, "loss": 0.3639, "step": 10208 }, { "epoch": 1.037921919479463, "grad_norm": 0.3223646283149719, "learning_rate": 9.651547863592852e-06, "loss": 0.3753, "step": 10209 }, { "epoch": 1.038023586823912, "grad_norm": 0.3267996907234192, "learning_rate": 9.651417688474846e-06, "loss": 0.3867, "step": 10210 }, { "epoch": 1.0381252541683612, "grad_norm": 0.3102564513683319, "learning_rate": 9.651287489924105e-06, "loss": 0.3682, "step": 10211 }, { "epoch": 1.0382269215128102, "grad_norm": 0.28596827387809753, "learning_rate": 9.651157267941282e-06, "loss": 0.3791, "step": 10212 }, { "epoch": 1.038328588857259, "grad_norm": 0.2973189651966095, "learning_rate": 9.651027022527033e-06, "loss": 0.3616, "step": 10213 }, { "epoch": 1.038430256201708, "grad_norm": 0.3201874792575836, "learning_rate": 9.650896753682017e-06, "loss": 0.3679, "step": 10214 }, { "epoch": 1.038531923546157, "grad_norm": 0.29936131834983826, "learning_rate": 9.650766461406888e-06, "loss": 0.3176, "step": 10215 }, { "epoch": 1.038633590890606, "grad_norm": 0.28991392254829407, "learning_rate": 9.650636145702302e-06, "loss": 0.3486, "step": 10216 }, { "epoch": 1.0387352582350549, "grad_norm": 0.3162079155445099, "learning_rate": 9.650505806568915e-06, "loss": 0.3558, "step": 10217 }, { "epoch": 1.0388369255795038, "grad_norm": 0.2861080765724182, "learning_rate": 9.650375444007386e-06, "loss": 0.3511, "step": 10218 }, { "epoch": 1.0389385929239527, "grad_norm": 0.3041958212852478, "learning_rate": 9.650245058018373e-06, "loss": 0.3466, "step": 10219 }, { "epoch": 1.0390402602684017, "grad_norm": 0.29906636476516724, "learning_rate": 9.650114648602526e-06, "loss": 0.3558, "step": 10220 }, { "epoch": 1.0391419276128508, "grad_norm": 0.31461405754089355, "learning_rate": 9.64998421576051e-06, "loss": 0.379, "step": 10221 }, { "epoch": 1.0392435949572998, "grad_norm": 0.31021565198898315, "learning_rate": 9.649853759492977e-06, "loss": 0.3873, "step": 10222 }, { "epoch": 1.0393452623017487, "grad_norm": 0.2894037067890167, "learning_rate": 9.649723279800586e-06, "loss": 0.3576, "step": 10223 }, { "epoch": 1.0394469296461977, "grad_norm": 0.2832872271537781, "learning_rate": 9.649592776683994e-06, "loss": 0.3282, "step": 10224 }, { "epoch": 1.0395485969906466, "grad_norm": 0.326327919960022, "learning_rate": 9.64946225014386e-06, "loss": 0.3723, "step": 10225 }, { "epoch": 1.0396502643350956, "grad_norm": 0.3260743021965027, "learning_rate": 9.649331700180837e-06, "loss": 0.384, "step": 10226 }, { "epoch": 1.0397519316795445, "grad_norm": 0.277680367231369, "learning_rate": 9.64920112679559e-06, "loss": 0.3287, "step": 10227 }, { "epoch": 1.0398535990239934, "grad_norm": 0.3069435954093933, "learning_rate": 9.64907052998877e-06, "loss": 0.349, "step": 10228 }, { "epoch": 1.0399552663684424, "grad_norm": 0.3024321496486664, "learning_rate": 9.648939909761036e-06, "loss": 0.368, "step": 10229 }, { "epoch": 1.0400569337128913, "grad_norm": 0.31584078073501587, "learning_rate": 9.648809266113049e-06, "loss": 0.3506, "step": 10230 }, { "epoch": 1.0401586010573405, "grad_norm": 0.28837329149246216, "learning_rate": 9.648678599045464e-06, "loss": 0.3498, "step": 10231 }, { "epoch": 1.0402602684017894, "grad_norm": 0.2985040545463562, "learning_rate": 9.648547908558943e-06, "loss": 0.3747, "step": 10232 }, { "epoch": 1.0403619357462384, "grad_norm": 0.3107675313949585, "learning_rate": 9.648417194654143e-06, "loss": 0.3855, "step": 10233 }, { "epoch": 1.0404636030906873, "grad_norm": 0.31564798951148987, "learning_rate": 9.64828645733172e-06, "loss": 0.3792, "step": 10234 }, { "epoch": 1.0405652704351362, "grad_norm": 0.28360357880592346, "learning_rate": 9.648155696592336e-06, "loss": 0.3329, "step": 10235 }, { "epoch": 1.0406669377795852, "grad_norm": 0.28056222200393677, "learning_rate": 9.648024912436646e-06, "loss": 0.3715, "step": 10236 }, { "epoch": 1.0407686051240341, "grad_norm": 0.2961462140083313, "learning_rate": 9.647894104865311e-06, "loss": 0.3435, "step": 10237 }, { "epoch": 1.040870272468483, "grad_norm": 0.33513081073760986, "learning_rate": 9.647763273878993e-06, "loss": 0.3895, "step": 10238 }, { "epoch": 1.040971939812932, "grad_norm": 0.3168034255504608, "learning_rate": 9.647632419478345e-06, "loss": 0.3798, "step": 10239 }, { "epoch": 1.041073607157381, "grad_norm": 0.30960628390312195, "learning_rate": 9.647501541664032e-06, "loss": 0.3556, "step": 10240 }, { "epoch": 1.0411752745018301, "grad_norm": 0.3553866744041443, "learning_rate": 9.647370640436709e-06, "loss": 0.3846, "step": 10241 }, { "epoch": 1.041276941846279, "grad_norm": 0.32566437125205994, "learning_rate": 9.647239715797038e-06, "loss": 0.3362, "step": 10242 }, { "epoch": 1.041378609190728, "grad_norm": 0.3033275902271271, "learning_rate": 9.647108767745676e-06, "loss": 0.3643, "step": 10243 }, { "epoch": 1.041480276535177, "grad_norm": 0.31532183289527893, "learning_rate": 9.646977796283286e-06, "loss": 0.399, "step": 10244 }, { "epoch": 1.0415819438796259, "grad_norm": 0.32834696769714355, "learning_rate": 9.646846801410526e-06, "loss": 0.364, "step": 10245 }, { "epoch": 1.0416836112240748, "grad_norm": 0.32066255807876587, "learning_rate": 9.646715783128055e-06, "loss": 0.3642, "step": 10246 }, { "epoch": 1.0417852785685238, "grad_norm": 0.31637993454933167, "learning_rate": 9.646584741436536e-06, "loss": 0.3599, "step": 10247 }, { "epoch": 1.0418869459129727, "grad_norm": 0.339935302734375, "learning_rate": 9.646453676336624e-06, "loss": 0.3959, "step": 10248 }, { "epoch": 1.0419886132574216, "grad_norm": 0.3471735119819641, "learning_rate": 9.646322587828986e-06, "loss": 0.3551, "step": 10249 }, { "epoch": 1.0420902806018706, "grad_norm": 0.3130565285682678, "learning_rate": 9.646191475914277e-06, "loss": 0.3871, "step": 10250 }, { "epoch": 1.0421919479463195, "grad_norm": 0.3071085512638092, "learning_rate": 9.646060340593159e-06, "loss": 0.3706, "step": 10251 }, { "epoch": 1.0422936152907687, "grad_norm": 0.3020602762699127, "learning_rate": 9.645929181866295e-06, "loss": 0.35, "step": 10252 }, { "epoch": 1.0423952826352176, "grad_norm": 0.3222511410713196, "learning_rate": 9.645797999734343e-06, "loss": 0.3729, "step": 10253 }, { "epoch": 1.0424969499796666, "grad_norm": 0.2878749966621399, "learning_rate": 9.645666794197965e-06, "loss": 0.3813, "step": 10254 }, { "epoch": 1.0425986173241155, "grad_norm": 0.2771991789340973, "learning_rate": 9.64553556525782e-06, "loss": 0.3633, "step": 10255 }, { "epoch": 1.0427002846685645, "grad_norm": 0.3233530521392822, "learning_rate": 9.645404312914572e-06, "loss": 0.3231, "step": 10256 }, { "epoch": 1.0428019520130134, "grad_norm": 0.30833661556243896, "learning_rate": 9.645273037168882e-06, "loss": 0.3638, "step": 10257 }, { "epoch": 1.0429036193574623, "grad_norm": 0.27765172719955444, "learning_rate": 9.645141738021409e-06, "loss": 0.3791, "step": 10258 }, { "epoch": 1.0430052867019113, "grad_norm": 0.3126852512359619, "learning_rate": 9.645010415472816e-06, "loss": 0.3677, "step": 10259 }, { "epoch": 1.0431069540463602, "grad_norm": 0.30356404185295105, "learning_rate": 9.644879069523765e-06, "loss": 0.385, "step": 10260 }, { "epoch": 1.0432086213908092, "grad_norm": 0.30819061398506165, "learning_rate": 9.644747700174917e-06, "loss": 0.3476, "step": 10261 }, { "epoch": 1.0433102887352583, "grad_norm": 0.3021852374076843, "learning_rate": 9.644616307426933e-06, "loss": 0.3679, "step": 10262 }, { "epoch": 1.0434119560797073, "grad_norm": 0.3525620400905609, "learning_rate": 9.644484891280475e-06, "loss": 0.3546, "step": 10263 }, { "epoch": 1.0435136234241562, "grad_norm": 0.32585665583610535, "learning_rate": 9.644353451736208e-06, "loss": 0.3769, "step": 10264 }, { "epoch": 1.0436152907686052, "grad_norm": 0.3004000186920166, "learning_rate": 9.644221988794791e-06, "loss": 0.3382, "step": 10265 }, { "epoch": 1.043716958113054, "grad_norm": 0.31634747982025146, "learning_rate": 9.644090502456887e-06, "loss": 0.3876, "step": 10266 }, { "epoch": 1.043818625457503, "grad_norm": 0.31853920221328735, "learning_rate": 9.643958992723158e-06, "loss": 0.3736, "step": 10267 }, { "epoch": 1.043920292801952, "grad_norm": 0.3121902048587799, "learning_rate": 9.643827459594268e-06, "loss": 0.3458, "step": 10268 }, { "epoch": 1.044021960146401, "grad_norm": 0.357164591550827, "learning_rate": 9.643695903070879e-06, "loss": 0.3764, "step": 10269 }, { "epoch": 1.0441236274908499, "grad_norm": 0.3178443908691406, "learning_rate": 9.643564323153654e-06, "loss": 0.4212, "step": 10270 }, { "epoch": 1.0442252948352988, "grad_norm": 0.33624839782714844, "learning_rate": 9.643432719843253e-06, "loss": 0.3501, "step": 10271 }, { "epoch": 1.044326962179748, "grad_norm": 0.3434141278266907, "learning_rate": 9.643301093140345e-06, "loss": 0.3853, "step": 10272 }, { "epoch": 1.044428629524197, "grad_norm": 0.28302422165870667, "learning_rate": 9.643169443045587e-06, "loss": 0.3555, "step": 10273 }, { "epoch": 1.0445302968686458, "grad_norm": 0.32258474826812744, "learning_rate": 9.643037769559645e-06, "loss": 0.3745, "step": 10274 }, { "epoch": 1.0446319642130948, "grad_norm": 0.3454027473926544, "learning_rate": 9.642906072683183e-06, "loss": 0.4008, "step": 10275 }, { "epoch": 1.0447336315575437, "grad_norm": 0.32166460156440735, "learning_rate": 9.642774352416863e-06, "loss": 0.4013, "step": 10276 }, { "epoch": 1.0448352989019927, "grad_norm": 0.30798935890197754, "learning_rate": 9.642642608761349e-06, "loss": 0.3968, "step": 10277 }, { "epoch": 1.0449369662464416, "grad_norm": 0.3037533760070801, "learning_rate": 9.642510841717306e-06, "loss": 0.3315, "step": 10278 }, { "epoch": 1.0450386335908906, "grad_norm": 0.3179766535758972, "learning_rate": 9.642379051285394e-06, "loss": 0.3702, "step": 10279 }, { "epoch": 1.0451403009353395, "grad_norm": 0.30456244945526123, "learning_rate": 9.64224723746628e-06, "loss": 0.3652, "step": 10280 }, { "epoch": 1.0452419682797884, "grad_norm": 0.310386598110199, "learning_rate": 9.64211540026063e-06, "loss": 0.4062, "step": 10281 }, { "epoch": 1.0453436356242376, "grad_norm": 0.30679023265838623, "learning_rate": 9.641983539669104e-06, "loss": 0.3415, "step": 10282 }, { "epoch": 1.0454453029686865, "grad_norm": 0.3225063681602478, "learning_rate": 9.64185165569237e-06, "loss": 0.3939, "step": 10283 }, { "epoch": 1.0455469703131355, "grad_norm": 0.3035506308078766, "learning_rate": 9.641719748331088e-06, "loss": 0.392, "step": 10284 }, { "epoch": 1.0456486376575844, "grad_norm": 0.3070827126502991, "learning_rate": 9.641587817585925e-06, "loss": 0.3504, "step": 10285 }, { "epoch": 1.0457503050020334, "grad_norm": 0.30992409586906433, "learning_rate": 9.641455863457547e-06, "loss": 0.3738, "step": 10286 }, { "epoch": 1.0458519723464823, "grad_norm": 0.30634453892707825, "learning_rate": 9.641323885946617e-06, "loss": 0.3808, "step": 10287 }, { "epoch": 1.0459536396909312, "grad_norm": 0.32074931263923645, "learning_rate": 9.6411918850538e-06, "loss": 0.3602, "step": 10288 }, { "epoch": 1.0460553070353802, "grad_norm": 0.32111093401908875, "learning_rate": 9.64105986077976e-06, "loss": 0.3477, "step": 10289 }, { "epoch": 1.0461569743798291, "grad_norm": 0.31990164518356323, "learning_rate": 9.640927813125165e-06, "loss": 0.4223, "step": 10290 }, { "epoch": 1.046258641724278, "grad_norm": 0.3316501975059509, "learning_rate": 9.640795742090678e-06, "loss": 0.3517, "step": 10291 }, { "epoch": 1.046360309068727, "grad_norm": 0.3352372348308563, "learning_rate": 9.640663647676965e-06, "loss": 0.3656, "step": 10292 }, { "epoch": 1.0464619764131762, "grad_norm": 0.3049285411834717, "learning_rate": 9.640531529884691e-06, "loss": 0.358, "step": 10293 }, { "epoch": 1.0465636437576251, "grad_norm": 0.2958003282546997, "learning_rate": 9.640399388714521e-06, "loss": 0.3648, "step": 10294 }, { "epoch": 1.046665311102074, "grad_norm": 0.2918272912502289, "learning_rate": 9.640267224167123e-06, "loss": 0.3507, "step": 10295 }, { "epoch": 1.046766978446523, "grad_norm": 0.33228614926338196, "learning_rate": 9.640135036243161e-06, "loss": 0.3693, "step": 10296 }, { "epoch": 1.046868645790972, "grad_norm": 0.3348384201526642, "learning_rate": 9.640002824943302e-06, "loss": 0.3749, "step": 10297 }, { "epoch": 1.0469703131354209, "grad_norm": 0.29872065782546997, "learning_rate": 9.639870590268211e-06, "loss": 0.3671, "step": 10298 }, { "epoch": 1.0470719804798698, "grad_norm": 0.3036920130252838, "learning_rate": 9.639738332218554e-06, "loss": 0.3555, "step": 10299 }, { "epoch": 1.0471736478243188, "grad_norm": 0.3064829111099243, "learning_rate": 9.639606050794999e-06, "loss": 0.3475, "step": 10300 }, { "epoch": 1.0472753151687677, "grad_norm": 0.3274347186088562, "learning_rate": 9.639473745998211e-06, "loss": 0.3686, "step": 10301 }, { "epoch": 1.0473769825132166, "grad_norm": 0.31439751386642456, "learning_rate": 9.639341417828856e-06, "loss": 0.351, "step": 10302 }, { "epoch": 1.0474786498576658, "grad_norm": 0.3066692054271698, "learning_rate": 9.639209066287601e-06, "loss": 0.3989, "step": 10303 }, { "epoch": 1.0475803172021148, "grad_norm": 0.3153320550918579, "learning_rate": 9.639076691375114e-06, "loss": 0.3765, "step": 10304 }, { "epoch": 1.0476819845465637, "grad_norm": 0.3037431240081787, "learning_rate": 9.63894429309206e-06, "loss": 0.3586, "step": 10305 }, { "epoch": 1.0477836518910126, "grad_norm": 0.3140444755554199, "learning_rate": 9.638811871439109e-06, "loss": 0.35, "step": 10306 }, { "epoch": 1.0478853192354616, "grad_norm": 0.30409449338912964, "learning_rate": 9.638679426416925e-06, "loss": 0.3527, "step": 10307 }, { "epoch": 1.0479869865799105, "grad_norm": 0.34503138065338135, "learning_rate": 9.638546958026176e-06, "loss": 0.3689, "step": 10308 }, { "epoch": 1.0480886539243595, "grad_norm": 0.3235795199871063, "learning_rate": 9.638414466267532e-06, "loss": 0.3785, "step": 10309 }, { "epoch": 1.0481903212688084, "grad_norm": 0.311896950006485, "learning_rate": 9.638281951141655e-06, "loss": 0.337, "step": 10310 }, { "epoch": 1.0482919886132573, "grad_norm": 0.2885192930698395, "learning_rate": 9.638149412649218e-06, "loss": 0.3557, "step": 10311 }, { "epoch": 1.0483936559577063, "grad_norm": 0.32820960879325867, "learning_rate": 9.638016850790885e-06, "loss": 0.3605, "step": 10312 }, { "epoch": 1.0484953233021554, "grad_norm": 0.3083552420139313, "learning_rate": 9.637884265567325e-06, "loss": 0.3364, "step": 10313 }, { "epoch": 1.0485969906466044, "grad_norm": 0.298883855342865, "learning_rate": 9.637751656979208e-06, "loss": 0.3657, "step": 10314 }, { "epoch": 1.0486986579910533, "grad_norm": 0.3536345660686493, "learning_rate": 9.637619025027199e-06, "loss": 0.4131, "step": 10315 }, { "epoch": 1.0488003253355023, "grad_norm": 0.315499871969223, "learning_rate": 9.637486369711967e-06, "loss": 0.3577, "step": 10316 }, { "epoch": 1.0489019926799512, "grad_norm": 0.3201928734779358, "learning_rate": 9.637353691034183e-06, "loss": 0.3666, "step": 10317 }, { "epoch": 1.0490036600244002, "grad_norm": 0.33176884055137634, "learning_rate": 9.637220988994512e-06, "loss": 0.3558, "step": 10318 }, { "epoch": 1.049105327368849, "grad_norm": 0.3210235834121704, "learning_rate": 9.637088263593623e-06, "loss": 0.3869, "step": 10319 }, { "epoch": 1.049206994713298, "grad_norm": 0.3067542016506195, "learning_rate": 9.636955514832185e-06, "loss": 0.3487, "step": 10320 }, { "epoch": 1.049308662057747, "grad_norm": 0.30384165048599243, "learning_rate": 9.636822742710868e-06, "loss": 0.3663, "step": 10321 }, { "epoch": 1.049410329402196, "grad_norm": 0.30393916368484497, "learning_rate": 9.636689947230338e-06, "loss": 0.3619, "step": 10322 }, { "epoch": 1.049511996746645, "grad_norm": 0.3422098457813263, "learning_rate": 9.636557128391268e-06, "loss": 0.3873, "step": 10323 }, { "epoch": 1.049613664091094, "grad_norm": 0.3137698769569397, "learning_rate": 9.636424286194324e-06, "loss": 0.3563, "step": 10324 }, { "epoch": 1.049715331435543, "grad_norm": 0.2923548221588135, "learning_rate": 9.636291420640177e-06, "loss": 0.3976, "step": 10325 }, { "epoch": 1.049816998779992, "grad_norm": 0.32085874676704407, "learning_rate": 9.636158531729496e-06, "loss": 0.3827, "step": 10326 }, { "epoch": 1.0499186661244408, "grad_norm": 0.2935948967933655, "learning_rate": 9.636025619462947e-06, "loss": 0.363, "step": 10327 }, { "epoch": 1.0500203334688898, "grad_norm": 0.32805243134498596, "learning_rate": 9.635892683841206e-06, "loss": 0.3697, "step": 10328 }, { "epoch": 1.0501220008133387, "grad_norm": 0.30247625708580017, "learning_rate": 9.635759724864938e-06, "loss": 0.3487, "step": 10329 }, { "epoch": 1.0502236681577877, "grad_norm": 0.2800043821334839, "learning_rate": 9.635626742534814e-06, "loss": 0.3858, "step": 10330 }, { "epoch": 1.0503253355022366, "grad_norm": 0.30154672265052795, "learning_rate": 9.635493736851505e-06, "loss": 0.3624, "step": 10331 }, { "epoch": 1.0504270028466856, "grad_norm": 0.3210388422012329, "learning_rate": 9.63536070781568e-06, "loss": 0.3832, "step": 10332 }, { "epoch": 1.0505286701911345, "grad_norm": 0.30039289593696594, "learning_rate": 9.635227655428007e-06, "loss": 0.3978, "step": 10333 }, { "epoch": 1.0506303375355837, "grad_norm": 0.29514968395233154, "learning_rate": 9.63509457968916e-06, "loss": 0.3511, "step": 10334 }, { "epoch": 1.0507320048800326, "grad_norm": 0.31573599576950073, "learning_rate": 9.63496148059981e-06, "loss": 0.3546, "step": 10335 }, { "epoch": 1.0508336722244815, "grad_norm": 0.2994120717048645, "learning_rate": 9.634828358160622e-06, "loss": 0.3566, "step": 10336 }, { "epoch": 1.0509353395689305, "grad_norm": 0.2915385961532593, "learning_rate": 9.634695212372272e-06, "loss": 0.3563, "step": 10337 }, { "epoch": 1.0510370069133794, "grad_norm": 0.29842591285705566, "learning_rate": 9.63456204323543e-06, "loss": 0.3537, "step": 10338 }, { "epoch": 1.0511386742578284, "grad_norm": 0.31893014907836914, "learning_rate": 9.634428850750763e-06, "loss": 0.4092, "step": 10339 }, { "epoch": 1.0512403416022773, "grad_norm": 0.28451070189476013, "learning_rate": 9.634295634918946e-06, "loss": 0.3996, "step": 10340 }, { "epoch": 1.0513420089467262, "grad_norm": 0.3653852939605713, "learning_rate": 9.63416239574065e-06, "loss": 0.3533, "step": 10341 }, { "epoch": 1.0514436762911752, "grad_norm": 0.35853007435798645, "learning_rate": 9.634029133216545e-06, "loss": 0.3567, "step": 10342 }, { "epoch": 1.0515453436356241, "grad_norm": 0.31838276982307434, "learning_rate": 9.6338958473473e-06, "loss": 0.3936, "step": 10343 }, { "epoch": 1.0516470109800733, "grad_norm": 0.33441630005836487, "learning_rate": 9.633762538133591e-06, "loss": 0.3881, "step": 10344 }, { "epoch": 1.0517486783245222, "grad_norm": 0.33895885944366455, "learning_rate": 9.633629205576087e-06, "loss": 0.36, "step": 10345 }, { "epoch": 1.0518503456689712, "grad_norm": 0.30950435996055603, "learning_rate": 9.633495849675461e-06, "loss": 0.3241, "step": 10346 }, { "epoch": 1.0519520130134201, "grad_norm": 0.31075015664100647, "learning_rate": 9.633362470432383e-06, "loss": 0.3515, "step": 10347 }, { "epoch": 1.052053680357869, "grad_norm": 0.3684864342212677, "learning_rate": 9.633229067847526e-06, "loss": 0.4043, "step": 10348 }, { "epoch": 1.052155347702318, "grad_norm": 0.30182376503944397, "learning_rate": 9.633095641921562e-06, "loss": 0.3666, "step": 10349 }, { "epoch": 1.052257015046767, "grad_norm": 0.3542443513870239, "learning_rate": 9.632962192655164e-06, "loss": 0.3911, "step": 10350 }, { "epoch": 1.0523586823912159, "grad_norm": 0.32425257563591003, "learning_rate": 9.632828720049002e-06, "loss": 0.3842, "step": 10351 }, { "epoch": 1.0524603497356648, "grad_norm": 0.3294447064399719, "learning_rate": 9.63269522410375e-06, "loss": 0.388, "step": 10352 }, { "epoch": 1.0525620170801138, "grad_norm": 0.33455657958984375, "learning_rate": 9.632561704820081e-06, "loss": 0.3379, "step": 10353 }, { "epoch": 1.052663684424563, "grad_norm": 0.3337298333644867, "learning_rate": 9.632428162198669e-06, "loss": 0.3685, "step": 10354 }, { "epoch": 1.0527653517690119, "grad_norm": 0.2944452166557312, "learning_rate": 9.63229459624018e-06, "loss": 0.3716, "step": 10355 }, { "epoch": 1.0528670191134608, "grad_norm": 0.34580090641975403, "learning_rate": 9.632161006945295e-06, "loss": 0.3732, "step": 10356 }, { "epoch": 1.0529686864579098, "grad_norm": 0.29556363821029663, "learning_rate": 9.632027394314683e-06, "loss": 0.3395, "step": 10357 }, { "epoch": 1.0530703538023587, "grad_norm": 0.308805912733078, "learning_rate": 9.631893758349019e-06, "loss": 0.3781, "step": 10358 }, { "epoch": 1.0531720211468076, "grad_norm": 0.301139771938324, "learning_rate": 9.631760099048972e-06, "loss": 0.3332, "step": 10359 }, { "epoch": 1.0532736884912566, "grad_norm": 0.29364416003227234, "learning_rate": 9.631626416415221e-06, "loss": 0.358, "step": 10360 }, { "epoch": 1.0533753558357055, "grad_norm": 0.298014372587204, "learning_rate": 9.631492710448436e-06, "loss": 0.3447, "step": 10361 }, { "epoch": 1.0534770231801545, "grad_norm": 0.3265191614627838, "learning_rate": 9.63135898114929e-06, "loss": 0.3423, "step": 10362 }, { "epoch": 1.0535786905246034, "grad_norm": 0.33457377552986145, "learning_rate": 9.63122522851846e-06, "loss": 0.3677, "step": 10363 }, { "epoch": 1.0536803578690526, "grad_norm": 0.31275805830955505, "learning_rate": 9.631091452556616e-06, "loss": 0.3585, "step": 10364 }, { "epoch": 1.0537820252135015, "grad_norm": 0.31461015343666077, "learning_rate": 9.630957653264434e-06, "loss": 0.3485, "step": 10365 }, { "epoch": 1.0538836925579504, "grad_norm": 0.3366874158382416, "learning_rate": 9.630823830642588e-06, "loss": 0.3735, "step": 10366 }, { "epoch": 1.0539853599023994, "grad_norm": 0.3020881116390228, "learning_rate": 9.630689984691753e-06, "loss": 0.3452, "step": 10367 }, { "epoch": 1.0540870272468483, "grad_norm": 0.34770697355270386, "learning_rate": 9.630556115412602e-06, "loss": 0.3903, "step": 10368 }, { "epoch": 1.0541886945912973, "grad_norm": 0.2941596806049347, "learning_rate": 9.63042222280581e-06, "loss": 0.3625, "step": 10369 }, { "epoch": 1.0542903619357462, "grad_norm": 0.30616632103919983, "learning_rate": 9.630288306872051e-06, "loss": 0.38, "step": 10370 }, { "epoch": 1.0543920292801952, "grad_norm": 0.31927114725112915, "learning_rate": 9.630154367612e-06, "loss": 0.3499, "step": 10371 }, { "epoch": 1.054493696624644, "grad_norm": 0.2947242259979248, "learning_rate": 9.63002040502633e-06, "loss": 0.3704, "step": 10372 }, { "epoch": 1.054595363969093, "grad_norm": 0.30559468269348145, "learning_rate": 9.629886419115718e-06, "loss": 0.3796, "step": 10373 }, { "epoch": 1.054697031313542, "grad_norm": 0.30963584780693054, "learning_rate": 9.62975240988084e-06, "loss": 0.3692, "step": 10374 }, { "epoch": 1.0547986986579911, "grad_norm": 0.2911827564239502, "learning_rate": 9.629618377322367e-06, "loss": 0.3325, "step": 10375 }, { "epoch": 1.05490036600244, "grad_norm": 0.289075642824173, "learning_rate": 9.629484321440978e-06, "loss": 0.3645, "step": 10376 }, { "epoch": 1.055002033346889, "grad_norm": 0.2805875539779663, "learning_rate": 9.629350242237348e-06, "loss": 0.3469, "step": 10377 }, { "epoch": 1.055103700691338, "grad_norm": 0.3204960525035858, "learning_rate": 9.629216139712152e-06, "loss": 0.3708, "step": 10378 }, { "epoch": 1.055205368035787, "grad_norm": 0.32668110728263855, "learning_rate": 9.629082013866063e-06, "loss": 0.3733, "step": 10379 }, { "epoch": 1.0553070353802358, "grad_norm": 0.3202175199985504, "learning_rate": 9.62894786469976e-06, "loss": 0.378, "step": 10380 }, { "epoch": 1.0554087027246848, "grad_norm": 0.30126258730888367, "learning_rate": 9.62881369221392e-06, "loss": 0.3354, "step": 10381 }, { "epoch": 1.0555103700691337, "grad_norm": 0.2971212863922119, "learning_rate": 9.628679496409213e-06, "loss": 0.3769, "step": 10382 }, { "epoch": 1.0556120374135827, "grad_norm": 0.31539860367774963, "learning_rate": 9.62854527728632e-06, "loss": 0.3644, "step": 10383 }, { "epoch": 1.0557137047580318, "grad_norm": 0.3250461220741272, "learning_rate": 9.628411034845915e-06, "loss": 0.3597, "step": 10384 }, { "epoch": 1.0558153721024808, "grad_norm": 0.2951416075229645, "learning_rate": 9.628276769088676e-06, "loss": 0.3493, "step": 10385 }, { "epoch": 1.0559170394469297, "grad_norm": 0.3072455823421478, "learning_rate": 9.628142480015277e-06, "loss": 0.3601, "step": 10386 }, { "epoch": 1.0560187067913787, "grad_norm": 0.34173062443733215, "learning_rate": 9.628008167626397e-06, "loss": 0.3657, "step": 10387 }, { "epoch": 1.0561203741358276, "grad_norm": 0.3056153357028961, "learning_rate": 9.627873831922714e-06, "loss": 0.3599, "step": 10388 }, { "epoch": 1.0562220414802765, "grad_norm": 0.3205762505531311, "learning_rate": 9.6277394729049e-06, "loss": 0.3552, "step": 10389 }, { "epoch": 1.0563237088247255, "grad_norm": 0.28790193796157837, "learning_rate": 9.627605090573634e-06, "loss": 0.357, "step": 10390 }, { "epoch": 1.0564253761691744, "grad_norm": 0.3168993890285492, "learning_rate": 9.627470684929595e-06, "loss": 0.3578, "step": 10391 }, { "epoch": 1.0565270435136234, "grad_norm": 0.29516372084617615, "learning_rate": 9.627336255973457e-06, "loss": 0.348, "step": 10392 }, { "epoch": 1.0566287108580723, "grad_norm": 0.3167271018028259, "learning_rate": 9.627201803705898e-06, "loss": 0.3659, "step": 10393 }, { "epoch": 1.0567303782025212, "grad_norm": 0.3288189172744751, "learning_rate": 9.627067328127597e-06, "loss": 0.369, "step": 10394 }, { "epoch": 1.0568320455469704, "grad_norm": 0.275594562292099, "learning_rate": 9.62693282923923e-06, "loss": 0.3656, "step": 10395 }, { "epoch": 1.0569337128914194, "grad_norm": 0.28381815552711487, "learning_rate": 9.626798307041473e-06, "loss": 0.366, "step": 10396 }, { "epoch": 1.0570353802358683, "grad_norm": 0.2779788076877594, "learning_rate": 9.62666376153501e-06, "loss": 0.3754, "step": 10397 }, { "epoch": 1.0571370475803172, "grad_norm": 0.2894209921360016, "learning_rate": 9.62652919272051e-06, "loss": 0.3686, "step": 10398 }, { "epoch": 1.0572387149247662, "grad_norm": 0.32573309540748596, "learning_rate": 9.626394600598659e-06, "loss": 0.3582, "step": 10399 }, { "epoch": 1.0573403822692151, "grad_norm": 0.2994888424873352, "learning_rate": 9.626259985170129e-06, "loss": 0.379, "step": 10400 }, { "epoch": 1.057442049613664, "grad_norm": 0.30956631898880005, "learning_rate": 9.626125346435602e-06, "loss": 0.4022, "step": 10401 }, { "epoch": 1.057543716958113, "grad_norm": 0.3026430010795593, "learning_rate": 9.625990684395754e-06, "loss": 0.3824, "step": 10402 }, { "epoch": 1.057645384302562, "grad_norm": 0.3142949938774109, "learning_rate": 9.625855999051264e-06, "loss": 0.374, "step": 10403 }, { "epoch": 1.0577470516470109, "grad_norm": 0.2843567430973053, "learning_rate": 9.62572129040281e-06, "loss": 0.3476, "step": 10404 }, { "epoch": 1.05784871899146, "grad_norm": 0.3061203360557556, "learning_rate": 9.625586558451073e-06, "loss": 0.3616, "step": 10405 }, { "epoch": 1.057950386335909, "grad_norm": 0.3068850338459015, "learning_rate": 9.62545180319673e-06, "loss": 0.3568, "step": 10406 }, { "epoch": 1.058052053680358, "grad_norm": 0.3075626492500305, "learning_rate": 9.625317024640458e-06, "loss": 0.3757, "step": 10407 }, { "epoch": 1.0581537210248069, "grad_norm": 0.3364868462085724, "learning_rate": 9.625182222782941e-06, "loss": 0.3708, "step": 10408 }, { "epoch": 1.0582553883692558, "grad_norm": 0.32031014561653137, "learning_rate": 9.625047397624854e-06, "loss": 0.3702, "step": 10409 }, { "epoch": 1.0583570557137048, "grad_norm": 0.29390954971313477, "learning_rate": 9.624912549166875e-06, "loss": 0.3666, "step": 10410 }, { "epoch": 1.0584587230581537, "grad_norm": 0.3194800317287445, "learning_rate": 9.624777677409689e-06, "loss": 0.3824, "step": 10411 }, { "epoch": 1.0585603904026026, "grad_norm": 0.3016112148761749, "learning_rate": 9.624642782353971e-06, "loss": 0.3799, "step": 10412 }, { "epoch": 1.0586620577470516, "grad_norm": 0.31827306747436523, "learning_rate": 9.624507864000401e-06, "loss": 0.3797, "step": 10413 }, { "epoch": 1.0587637250915005, "grad_norm": 0.3080366253852844, "learning_rate": 9.62437292234966e-06, "loss": 0.336, "step": 10414 }, { "epoch": 1.0588653924359495, "grad_norm": 0.2935435175895691, "learning_rate": 9.624237957402428e-06, "loss": 0.3616, "step": 10415 }, { "epoch": 1.0589670597803986, "grad_norm": 0.3206942677497864, "learning_rate": 9.624102969159381e-06, "loss": 0.3782, "step": 10416 }, { "epoch": 1.0590687271248476, "grad_norm": 0.318839967250824, "learning_rate": 9.623967957621206e-06, "loss": 0.3916, "step": 10417 }, { "epoch": 1.0591703944692965, "grad_norm": 0.3189624845981598, "learning_rate": 9.623832922788576e-06, "loss": 0.378, "step": 10418 }, { "epoch": 1.0592720618137454, "grad_norm": 0.30405884981155396, "learning_rate": 9.623697864662175e-06, "loss": 0.3624, "step": 10419 }, { "epoch": 1.0593737291581944, "grad_norm": 0.302569180727005, "learning_rate": 9.623562783242684e-06, "loss": 0.3673, "step": 10420 }, { "epoch": 1.0594753965026433, "grad_norm": 0.3217090368270874, "learning_rate": 9.623427678530783e-06, "loss": 0.3726, "step": 10421 }, { "epoch": 1.0595770638470923, "grad_norm": 0.30385640263557434, "learning_rate": 9.62329255052715e-06, "loss": 0.3534, "step": 10422 }, { "epoch": 1.0596787311915412, "grad_norm": 0.3259389102458954, "learning_rate": 9.62315739923247e-06, "loss": 0.3617, "step": 10423 }, { "epoch": 1.0597803985359902, "grad_norm": 0.3176208436489105, "learning_rate": 9.62302222464742e-06, "loss": 0.3699, "step": 10424 }, { "epoch": 1.0598820658804393, "grad_norm": 0.36456313729286194, "learning_rate": 9.622887026772682e-06, "loss": 0.3648, "step": 10425 }, { "epoch": 1.0599837332248883, "grad_norm": 0.322696715593338, "learning_rate": 9.62275180560894e-06, "loss": 0.3595, "step": 10426 }, { "epoch": 1.0600854005693372, "grad_norm": 0.2873244285583496, "learning_rate": 9.622616561156872e-06, "loss": 0.3919, "step": 10427 }, { "epoch": 1.0601870679137861, "grad_norm": 0.34512388706207275, "learning_rate": 9.622481293417159e-06, "loss": 0.4026, "step": 10428 }, { "epoch": 1.060288735258235, "grad_norm": 0.33417078852653503, "learning_rate": 9.622346002390486e-06, "loss": 0.3493, "step": 10429 }, { "epoch": 1.060390402602684, "grad_norm": 0.2970033288002014, "learning_rate": 9.622210688077531e-06, "loss": 0.3637, "step": 10430 }, { "epoch": 1.060492069947133, "grad_norm": 0.2985493838787079, "learning_rate": 9.622075350478976e-06, "loss": 0.3617, "step": 10431 }, { "epoch": 1.060593737291582, "grad_norm": 0.3373793363571167, "learning_rate": 9.621939989595506e-06, "loss": 0.3615, "step": 10432 }, { "epoch": 1.0606954046360308, "grad_norm": 0.29923781752586365, "learning_rate": 9.6218046054278e-06, "loss": 0.3588, "step": 10433 }, { "epoch": 1.0607970719804798, "grad_norm": 0.3462122976779938, "learning_rate": 9.62166919797654e-06, "loss": 0.3909, "step": 10434 }, { "epoch": 1.0608987393249287, "grad_norm": 0.3030585050582886, "learning_rate": 9.621533767242408e-06, "loss": 0.3847, "step": 10435 }, { "epoch": 1.061000406669378, "grad_norm": 0.31878095865249634, "learning_rate": 9.62139831322609e-06, "loss": 0.36, "step": 10436 }, { "epoch": 1.0611020740138268, "grad_norm": 0.32090967893600464, "learning_rate": 9.621262835928262e-06, "loss": 0.3707, "step": 10437 }, { "epoch": 1.0612037413582758, "grad_norm": 0.3222772181034088, "learning_rate": 9.621127335349611e-06, "loss": 0.3596, "step": 10438 }, { "epoch": 1.0613054087027247, "grad_norm": 0.2859407365322113, "learning_rate": 9.62099181149082e-06, "loss": 0.3395, "step": 10439 }, { "epoch": 1.0614070760471737, "grad_norm": 0.32067206501960754, "learning_rate": 9.62085626435257e-06, "loss": 0.3558, "step": 10440 }, { "epoch": 1.0615087433916226, "grad_norm": 0.3337027132511139, "learning_rate": 9.620720693935544e-06, "loss": 0.3759, "step": 10441 }, { "epoch": 1.0616104107360715, "grad_norm": 0.32515525817871094, "learning_rate": 9.620585100240425e-06, "loss": 0.3914, "step": 10442 }, { "epoch": 1.0617120780805205, "grad_norm": 0.37102317810058594, "learning_rate": 9.620449483267896e-06, "loss": 0.4203, "step": 10443 }, { "epoch": 1.0618137454249694, "grad_norm": 0.31286507844924927, "learning_rate": 9.62031384301864e-06, "loss": 0.3743, "step": 10444 }, { "epoch": 1.0619154127694184, "grad_norm": 0.30013248324394226, "learning_rate": 9.620178179493342e-06, "loss": 0.391, "step": 10445 }, { "epoch": 1.0620170801138675, "grad_norm": 0.33447155356407166, "learning_rate": 9.620042492692685e-06, "loss": 0.3618, "step": 10446 }, { "epoch": 1.0621187474583165, "grad_norm": 0.3061428666114807, "learning_rate": 9.619906782617351e-06, "loss": 0.3422, "step": 10447 }, { "epoch": 1.0622204148027654, "grad_norm": 0.3156841993331909, "learning_rate": 9.619771049268023e-06, "loss": 0.3839, "step": 10448 }, { "epoch": 1.0623220821472144, "grad_norm": 0.3055494427680969, "learning_rate": 9.619635292645388e-06, "loss": 0.3443, "step": 10449 }, { "epoch": 1.0624237494916633, "grad_norm": 0.29760318994522095, "learning_rate": 9.619499512750128e-06, "loss": 0.3655, "step": 10450 }, { "epoch": 1.0625254168361122, "grad_norm": 0.31658557057380676, "learning_rate": 9.619363709582927e-06, "loss": 0.3662, "step": 10451 }, { "epoch": 1.0626270841805612, "grad_norm": 0.2836202383041382, "learning_rate": 9.619227883144469e-06, "loss": 0.374, "step": 10452 }, { "epoch": 1.0627287515250101, "grad_norm": 0.2975849211215973, "learning_rate": 9.61909203343544e-06, "loss": 0.3716, "step": 10453 }, { "epoch": 1.062830418869459, "grad_norm": 0.30294105410575867, "learning_rate": 9.618956160456521e-06, "loss": 0.3432, "step": 10454 }, { "epoch": 1.062932086213908, "grad_norm": 0.30556926131248474, "learning_rate": 9.618820264208399e-06, "loss": 0.349, "step": 10455 }, { "epoch": 1.063033753558357, "grad_norm": 0.3051479160785675, "learning_rate": 9.61868434469176e-06, "loss": 0.3445, "step": 10456 }, { "epoch": 1.063135420902806, "grad_norm": 0.30912071466445923, "learning_rate": 9.618548401907285e-06, "loss": 0.3687, "step": 10457 }, { "epoch": 1.063237088247255, "grad_norm": 0.29079458117485046, "learning_rate": 9.61841243585566e-06, "loss": 0.3884, "step": 10458 }, { "epoch": 1.063338755591704, "grad_norm": 0.3049955666065216, "learning_rate": 9.618276446537574e-06, "loss": 0.3812, "step": 10459 }, { "epoch": 1.063440422936153, "grad_norm": 0.3092912435531616, "learning_rate": 9.618140433953705e-06, "loss": 0.3467, "step": 10460 }, { "epoch": 1.0635420902806019, "grad_norm": 0.30833467841148376, "learning_rate": 9.618004398104743e-06, "loss": 0.3507, "step": 10461 }, { "epoch": 1.0636437576250508, "grad_norm": 0.3053591251373291, "learning_rate": 9.617868338991373e-06, "loss": 0.3537, "step": 10462 }, { "epoch": 1.0637454249694998, "grad_norm": 0.35368070006370544, "learning_rate": 9.61773225661428e-06, "loss": 0.4216, "step": 10463 }, { "epoch": 1.0638470923139487, "grad_norm": 0.3179129958152771, "learning_rate": 9.617596150974148e-06, "loss": 0.3968, "step": 10464 }, { "epoch": 1.0639487596583976, "grad_norm": 0.3110136091709137, "learning_rate": 9.617460022071666e-06, "loss": 0.36, "step": 10465 }, { "epoch": 1.0640504270028468, "grad_norm": 0.299734890460968, "learning_rate": 9.617323869907514e-06, "loss": 0.3151, "step": 10466 }, { "epoch": 1.0641520943472957, "grad_norm": 0.3324494957923889, "learning_rate": 9.617187694482384e-06, "loss": 0.3773, "step": 10467 }, { "epoch": 1.0642537616917447, "grad_norm": 0.36179375648498535, "learning_rate": 9.61705149579696e-06, "loss": 0.379, "step": 10468 }, { "epoch": 1.0643554290361936, "grad_norm": 0.30774205923080444, "learning_rate": 9.616915273851927e-06, "loss": 0.3989, "step": 10469 }, { "epoch": 1.0644570963806426, "grad_norm": 0.3413814902305603, "learning_rate": 9.616779028647972e-06, "loss": 0.3715, "step": 10470 }, { "epoch": 1.0645587637250915, "grad_norm": 0.353054404258728, "learning_rate": 9.616642760185782e-06, "loss": 0.3676, "step": 10471 }, { "epoch": 1.0646604310695404, "grad_norm": 0.3387831747531891, "learning_rate": 9.616506468466042e-06, "loss": 0.3779, "step": 10472 }, { "epoch": 1.0647620984139894, "grad_norm": 0.3292776644229889, "learning_rate": 9.616370153489439e-06, "loss": 0.3489, "step": 10473 }, { "epoch": 1.0648637657584383, "grad_norm": 0.3517090380191803, "learning_rate": 9.616233815256662e-06, "loss": 0.3962, "step": 10474 }, { "epoch": 1.0649654331028873, "grad_norm": 0.30608150362968445, "learning_rate": 9.616097453768394e-06, "loss": 0.345, "step": 10475 }, { "epoch": 1.0650671004473362, "grad_norm": 0.31485655903816223, "learning_rate": 9.615961069025325e-06, "loss": 0.3871, "step": 10476 }, { "epoch": 1.0651687677917854, "grad_norm": 0.3073820173740387, "learning_rate": 9.615824661028142e-06, "loss": 0.4026, "step": 10477 }, { "epoch": 1.0652704351362343, "grad_norm": 0.3409285247325897, "learning_rate": 9.61568822977753e-06, "loss": 0.3943, "step": 10478 }, { "epoch": 1.0653721024806833, "grad_norm": 0.34124648571014404, "learning_rate": 9.615551775274177e-06, "loss": 0.3818, "step": 10479 }, { "epoch": 1.0654737698251322, "grad_norm": 0.34286272525787354, "learning_rate": 9.615415297518773e-06, "loss": 0.3732, "step": 10480 }, { "epoch": 1.0655754371695811, "grad_norm": 0.30346736311912537, "learning_rate": 9.615278796512002e-06, "loss": 0.4042, "step": 10481 }, { "epoch": 1.06567710451403, "grad_norm": 0.3046504557132721, "learning_rate": 9.615142272254553e-06, "loss": 0.3464, "step": 10482 }, { "epoch": 1.065778771858479, "grad_norm": 0.3397293984889984, "learning_rate": 9.615005724747115e-06, "loss": 0.3823, "step": 10483 }, { "epoch": 1.065880439202928, "grad_norm": 0.3207055926322937, "learning_rate": 9.614869153990373e-06, "loss": 0.3661, "step": 10484 }, { "epoch": 1.065982106547377, "grad_norm": 0.30096235871315, "learning_rate": 9.614732559985019e-06, "loss": 0.3541, "step": 10485 }, { "epoch": 1.0660837738918258, "grad_norm": 0.30102258920669556, "learning_rate": 9.614595942731738e-06, "loss": 0.3729, "step": 10486 }, { "epoch": 1.066185441236275, "grad_norm": 0.2915705442428589, "learning_rate": 9.61445930223122e-06, "loss": 0.3587, "step": 10487 }, { "epoch": 1.066287108580724, "grad_norm": 0.3071000874042511, "learning_rate": 9.614322638484151e-06, "loss": 0.3575, "step": 10488 }, { "epoch": 1.066388775925173, "grad_norm": 0.30589014291763306, "learning_rate": 9.614185951491223e-06, "loss": 0.3699, "step": 10489 }, { "epoch": 1.0664904432696218, "grad_norm": 0.29970601201057434, "learning_rate": 9.614049241253122e-06, "loss": 0.3677, "step": 10490 }, { "epoch": 1.0665921106140708, "grad_norm": 0.3085009753704071, "learning_rate": 9.613912507770537e-06, "loss": 0.3847, "step": 10491 }, { "epoch": 1.0666937779585197, "grad_norm": 0.29551368951797485, "learning_rate": 9.613775751044156e-06, "loss": 0.3504, "step": 10492 }, { "epoch": 1.0667954453029687, "grad_norm": 0.2982047498226166, "learning_rate": 9.61363897107467e-06, "loss": 0.3481, "step": 10493 }, { "epoch": 1.0668971126474176, "grad_norm": 0.2811069190502167, "learning_rate": 9.61350216786277e-06, "loss": 0.3244, "step": 10494 }, { "epoch": 1.0669987799918665, "grad_norm": 0.31234210729599, "learning_rate": 9.61336534140914e-06, "loss": 0.3616, "step": 10495 }, { "epoch": 1.0671004473363155, "grad_norm": 0.34845346212387085, "learning_rate": 9.613228491714472e-06, "loss": 0.3981, "step": 10496 }, { "epoch": 1.0672021146807644, "grad_norm": 0.3042949438095093, "learning_rate": 9.613091618779453e-06, "loss": 0.3467, "step": 10497 }, { "epoch": 1.0673037820252136, "grad_norm": 0.3069767951965332, "learning_rate": 9.612954722604778e-06, "loss": 0.3314, "step": 10498 }, { "epoch": 1.0674054493696625, "grad_norm": 0.33995985984802246, "learning_rate": 9.612817803191132e-06, "loss": 0.3681, "step": 10499 }, { "epoch": 1.0675071167141115, "grad_norm": 0.32327786087989807, "learning_rate": 9.612680860539207e-06, "loss": 0.3776, "step": 10500 }, { "epoch": 1.0676087840585604, "grad_norm": 0.28362801671028137, "learning_rate": 9.612543894649691e-06, "loss": 0.3701, "step": 10501 }, { "epoch": 1.0677104514030094, "grad_norm": 0.3327774405479431, "learning_rate": 9.612406905523275e-06, "loss": 0.3724, "step": 10502 }, { "epoch": 1.0678121187474583, "grad_norm": 0.31739985942840576, "learning_rate": 9.612269893160648e-06, "loss": 0.3407, "step": 10503 }, { "epoch": 1.0679137860919072, "grad_norm": 0.31552544236183167, "learning_rate": 9.612132857562503e-06, "loss": 0.4022, "step": 10504 }, { "epoch": 1.0680154534363562, "grad_norm": 0.3545110523700714, "learning_rate": 9.611995798729528e-06, "loss": 0.371, "step": 10505 }, { "epoch": 1.0681171207808051, "grad_norm": 0.32111406326293945, "learning_rate": 9.611858716662414e-06, "loss": 0.4131, "step": 10506 }, { "epoch": 1.0682187881252543, "grad_norm": 0.3055398762226105, "learning_rate": 9.611721611361853e-06, "loss": 0.3582, "step": 10507 }, { "epoch": 1.0683204554697032, "grad_norm": 0.3282454013824463, "learning_rate": 9.611584482828534e-06, "loss": 0.3619, "step": 10508 }, { "epoch": 1.0684221228141522, "grad_norm": 0.3534533381462097, "learning_rate": 9.611447331063147e-06, "loss": 0.3731, "step": 10509 }, { "epoch": 1.068523790158601, "grad_norm": 0.30566972494125366, "learning_rate": 9.611310156066384e-06, "loss": 0.3462, "step": 10510 }, { "epoch": 1.06862545750305, "grad_norm": 0.27895814180374146, "learning_rate": 9.611172957838937e-06, "loss": 0.3475, "step": 10511 }, { "epoch": 1.068727124847499, "grad_norm": 0.32218092679977417, "learning_rate": 9.611035736381497e-06, "loss": 0.3737, "step": 10512 }, { "epoch": 1.068828792191948, "grad_norm": 0.316911906003952, "learning_rate": 9.610898491694753e-06, "loss": 0.356, "step": 10513 }, { "epoch": 1.0689304595363969, "grad_norm": 0.3125443458557129, "learning_rate": 9.6107612237794e-06, "loss": 0.3671, "step": 10514 }, { "epoch": 1.0690321268808458, "grad_norm": 0.2904821038246155, "learning_rate": 9.610623932636127e-06, "loss": 0.3505, "step": 10515 }, { "epoch": 1.0691337942252948, "grad_norm": 0.2900893986225128, "learning_rate": 9.610486618265627e-06, "loss": 0.3473, "step": 10516 }, { "epoch": 1.0692354615697437, "grad_norm": 0.29884764552116394, "learning_rate": 9.61034928066859e-06, "loss": 0.3555, "step": 10517 }, { "epoch": 1.0693371289141929, "grad_norm": 0.32796263694763184, "learning_rate": 9.610211919845709e-06, "loss": 0.3902, "step": 10518 }, { "epoch": 1.0694387962586418, "grad_norm": 0.3019964098930359, "learning_rate": 9.610074535797676e-06, "loss": 0.3944, "step": 10519 }, { "epoch": 1.0695404636030907, "grad_norm": 0.31329572200775146, "learning_rate": 9.609937128525183e-06, "loss": 0.3742, "step": 10520 }, { "epoch": 1.0696421309475397, "grad_norm": 0.2905118763446808, "learning_rate": 9.609799698028923e-06, "loss": 0.3754, "step": 10521 }, { "epoch": 1.0697437982919886, "grad_norm": 0.3031884431838989, "learning_rate": 9.609662244309586e-06, "loss": 0.362, "step": 10522 }, { "epoch": 1.0698454656364376, "grad_norm": 0.2901557385921478, "learning_rate": 9.609524767367868e-06, "loss": 0.3655, "step": 10523 }, { "epoch": 1.0699471329808865, "grad_norm": 0.295436829328537, "learning_rate": 9.609387267204459e-06, "loss": 0.3522, "step": 10524 }, { "epoch": 1.0700488003253354, "grad_norm": 0.2972766160964966, "learning_rate": 9.609249743820052e-06, "loss": 0.3669, "step": 10525 }, { "epoch": 1.0701504676697844, "grad_norm": 0.2832503020763397, "learning_rate": 9.60911219721534e-06, "loss": 0.3657, "step": 10526 }, { "epoch": 1.0702521350142333, "grad_norm": 0.3128155469894409, "learning_rate": 9.608974627391015e-06, "loss": 0.3464, "step": 10527 }, { "epoch": 1.0703538023586825, "grad_norm": 0.31339722871780396, "learning_rate": 9.608837034347774e-06, "loss": 0.3762, "step": 10528 }, { "epoch": 1.0704554697031314, "grad_norm": 0.3014134466648102, "learning_rate": 9.608699418086305e-06, "loss": 0.3972, "step": 10529 }, { "epoch": 1.0705571370475804, "grad_norm": 0.3070560097694397, "learning_rate": 9.608561778607303e-06, "loss": 0.3926, "step": 10530 }, { "epoch": 1.0706588043920293, "grad_norm": 0.3122102916240692, "learning_rate": 9.608424115911463e-06, "loss": 0.3759, "step": 10531 }, { "epoch": 1.0707604717364783, "grad_norm": 0.32425808906555176, "learning_rate": 9.608286429999477e-06, "loss": 0.3604, "step": 10532 }, { "epoch": 1.0708621390809272, "grad_norm": 0.3081396818161011, "learning_rate": 9.60814872087204e-06, "loss": 0.3668, "step": 10533 }, { "epoch": 1.0709638064253761, "grad_norm": 0.2923581600189209, "learning_rate": 9.608010988529843e-06, "loss": 0.3878, "step": 10534 }, { "epoch": 1.071065473769825, "grad_norm": 0.3192266821861267, "learning_rate": 9.607873232973584e-06, "loss": 0.3963, "step": 10535 }, { "epoch": 1.071167141114274, "grad_norm": 0.3501167595386505, "learning_rate": 9.607735454203952e-06, "loss": 0.3443, "step": 10536 }, { "epoch": 1.071268808458723, "grad_norm": 0.3040258288383484, "learning_rate": 9.607597652221647e-06, "loss": 0.3343, "step": 10537 }, { "epoch": 1.071370475803172, "grad_norm": 0.32015013694763184, "learning_rate": 9.607459827027357e-06, "loss": 0.3612, "step": 10538 }, { "epoch": 1.071472143147621, "grad_norm": 0.34808966517448425, "learning_rate": 9.60732197862178e-06, "loss": 0.3514, "step": 10539 }, { "epoch": 1.07157381049207, "grad_norm": 0.2977031171321869, "learning_rate": 9.607184107005611e-06, "loss": 0.341, "step": 10540 }, { "epoch": 1.071675477836519, "grad_norm": 0.32753464579582214, "learning_rate": 9.607046212179543e-06, "loss": 0.3409, "step": 10541 }, { "epoch": 1.071777145180968, "grad_norm": 0.31997478008270264, "learning_rate": 9.606908294144271e-06, "loss": 0.3861, "step": 10542 }, { "epoch": 1.0718788125254168, "grad_norm": 0.3174724876880646, "learning_rate": 9.60677035290049e-06, "loss": 0.372, "step": 10543 }, { "epoch": 1.0719804798698658, "grad_norm": 0.31717604398727417, "learning_rate": 9.606632388448893e-06, "loss": 0.3372, "step": 10544 }, { "epoch": 1.0720821472143147, "grad_norm": 0.33491870760917664, "learning_rate": 9.606494400790177e-06, "loss": 0.3886, "step": 10545 }, { "epoch": 1.0721838145587637, "grad_norm": 0.3066249489784241, "learning_rate": 9.606356389925037e-06, "loss": 0.341, "step": 10546 }, { "epoch": 1.0722854819032126, "grad_norm": 0.3134155571460724, "learning_rate": 9.60621835585417e-06, "loss": 0.3602, "step": 10547 }, { "epoch": 1.0723871492476618, "grad_norm": 0.31494778394699097, "learning_rate": 9.606080298578268e-06, "loss": 0.3589, "step": 10548 }, { "epoch": 1.0724888165921107, "grad_norm": 0.3001648187637329, "learning_rate": 9.605942218098028e-06, "loss": 0.3481, "step": 10549 }, { "epoch": 1.0725904839365596, "grad_norm": 0.2921580374240875, "learning_rate": 9.605804114414146e-06, "loss": 0.3761, "step": 10550 }, { "epoch": 1.0726921512810086, "grad_norm": 0.3218623995780945, "learning_rate": 9.605665987527317e-06, "loss": 0.3584, "step": 10551 }, { "epoch": 1.0727938186254575, "grad_norm": 0.30280715227127075, "learning_rate": 9.605527837438238e-06, "loss": 0.3612, "step": 10552 }, { "epoch": 1.0728954859699065, "grad_norm": 0.2897326350212097, "learning_rate": 9.605389664147603e-06, "loss": 0.355, "step": 10553 }, { "epoch": 1.0729971533143554, "grad_norm": 0.29312801361083984, "learning_rate": 9.605251467656111e-06, "loss": 0.3454, "step": 10554 }, { "epoch": 1.0730988206588044, "grad_norm": 0.2859320640563965, "learning_rate": 9.605113247964455e-06, "loss": 0.3297, "step": 10555 }, { "epoch": 1.0732004880032533, "grad_norm": 0.3404652178287506, "learning_rate": 9.604975005073333e-06, "loss": 0.3644, "step": 10556 }, { "epoch": 1.0733021553477022, "grad_norm": 0.3010406196117401, "learning_rate": 9.604836738983443e-06, "loss": 0.3499, "step": 10557 }, { "epoch": 1.0734038226921512, "grad_norm": 0.3076721131801605, "learning_rate": 9.604698449695478e-06, "loss": 0.3786, "step": 10558 }, { "epoch": 1.0735054900366003, "grad_norm": 0.3236182630062103, "learning_rate": 9.604560137210137e-06, "loss": 0.3873, "step": 10559 }, { "epoch": 1.0736071573810493, "grad_norm": 0.3177799880504608, "learning_rate": 9.604421801528116e-06, "loss": 0.3768, "step": 10560 }, { "epoch": 1.0737088247254982, "grad_norm": 0.318484365940094, "learning_rate": 9.604283442650112e-06, "loss": 0.366, "step": 10561 }, { "epoch": 1.0738104920699472, "grad_norm": 0.31874576210975647, "learning_rate": 9.604145060576821e-06, "loss": 0.3968, "step": 10562 }, { "epoch": 1.073912159414396, "grad_norm": 0.3168003261089325, "learning_rate": 9.604006655308943e-06, "loss": 0.3718, "step": 10563 }, { "epoch": 1.074013826758845, "grad_norm": 0.29697224497795105, "learning_rate": 9.603868226847174e-06, "loss": 0.3667, "step": 10564 }, { "epoch": 1.074115494103294, "grad_norm": 0.3279592990875244, "learning_rate": 9.60372977519221e-06, "loss": 0.3803, "step": 10565 }, { "epoch": 1.074217161447743, "grad_norm": 0.27886009216308594, "learning_rate": 9.60359130034475e-06, "loss": 0.3527, "step": 10566 }, { "epoch": 1.0743188287921919, "grad_norm": 0.34023237228393555, "learning_rate": 9.60345280230549e-06, "loss": 0.3721, "step": 10567 }, { "epoch": 1.0744204961366408, "grad_norm": 0.3396610617637634, "learning_rate": 9.603314281075129e-06, "loss": 0.3259, "step": 10568 }, { "epoch": 1.07452216348109, "grad_norm": 0.30649253726005554, "learning_rate": 9.603175736654366e-06, "loss": 0.3989, "step": 10569 }, { "epoch": 1.074623830825539, "grad_norm": 0.318579763174057, "learning_rate": 9.603037169043896e-06, "loss": 0.4143, "step": 10570 }, { "epoch": 1.0747254981699879, "grad_norm": 0.2969835102558136, "learning_rate": 9.602898578244418e-06, "loss": 0.3435, "step": 10571 }, { "epoch": 1.0748271655144368, "grad_norm": 0.31362324953079224, "learning_rate": 9.602759964256632e-06, "loss": 0.3707, "step": 10572 }, { "epoch": 1.0749288328588857, "grad_norm": 0.33528932929039, "learning_rate": 9.602621327081235e-06, "loss": 0.3666, "step": 10573 }, { "epoch": 1.0750305002033347, "grad_norm": 0.3225119411945343, "learning_rate": 9.602482666718926e-06, "loss": 0.3874, "step": 10574 }, { "epoch": 1.0751321675477836, "grad_norm": 0.33625224232673645, "learning_rate": 9.6023439831704e-06, "loss": 0.3603, "step": 10575 }, { "epoch": 1.0752338348922326, "grad_norm": 0.31875160336494446, "learning_rate": 9.602205276436363e-06, "loss": 0.3668, "step": 10576 }, { "epoch": 1.0753355022366815, "grad_norm": 0.31504756212234497, "learning_rate": 9.602066546517507e-06, "loss": 0.338, "step": 10577 }, { "epoch": 1.0754371695811304, "grad_norm": 0.3258790075778961, "learning_rate": 9.601927793414534e-06, "loss": 0.3859, "step": 10578 }, { "epoch": 1.0755388369255794, "grad_norm": 0.33433449268341064, "learning_rate": 9.601789017128143e-06, "loss": 0.3579, "step": 10579 }, { "epoch": 1.0756405042700286, "grad_norm": 0.30618909001350403, "learning_rate": 9.60165021765903e-06, "loss": 0.3989, "step": 10580 }, { "epoch": 1.0757421716144775, "grad_norm": 0.3355284035205841, "learning_rate": 9.6015113950079e-06, "loss": 0.4076, "step": 10581 }, { "epoch": 1.0758438389589264, "grad_norm": 0.3302136957645416, "learning_rate": 9.601372549175447e-06, "loss": 0.3699, "step": 10582 }, { "epoch": 1.0759455063033754, "grad_norm": 0.3102051913738251, "learning_rate": 9.601233680162374e-06, "loss": 0.3577, "step": 10583 }, { "epoch": 1.0760471736478243, "grad_norm": 0.3081949055194855, "learning_rate": 9.601094787969378e-06, "loss": 0.371, "step": 10584 }, { "epoch": 1.0761488409922733, "grad_norm": 0.298010915517807, "learning_rate": 9.60095587259716e-06, "loss": 0.3523, "step": 10585 }, { "epoch": 1.0762505083367222, "grad_norm": 0.3088156580924988, "learning_rate": 9.60081693404642e-06, "loss": 0.3866, "step": 10586 }, { "epoch": 1.0763521756811711, "grad_norm": 0.2957010269165039, "learning_rate": 9.600677972317856e-06, "loss": 0.3766, "step": 10587 }, { "epoch": 1.07645384302562, "grad_norm": 0.3281523585319519, "learning_rate": 9.600538987412171e-06, "loss": 0.3755, "step": 10588 }, { "epoch": 1.0765555103700692, "grad_norm": 0.3260763883590698, "learning_rate": 9.600399979330064e-06, "loss": 0.3852, "step": 10589 }, { "epoch": 1.0766571777145182, "grad_norm": 0.30364421010017395, "learning_rate": 9.600260948072235e-06, "loss": 0.3604, "step": 10590 }, { "epoch": 1.0767588450589671, "grad_norm": 0.2945837080478668, "learning_rate": 9.600121893639384e-06, "loss": 0.3682, "step": 10591 }, { "epoch": 1.076860512403416, "grad_norm": 0.29014909267425537, "learning_rate": 9.599982816032214e-06, "loss": 0.3508, "step": 10592 }, { "epoch": 1.076962179747865, "grad_norm": 0.3154824376106262, "learning_rate": 9.599843715251423e-06, "loss": 0.3509, "step": 10593 }, { "epoch": 1.077063847092314, "grad_norm": 0.31254130601882935, "learning_rate": 9.59970459129771e-06, "loss": 0.362, "step": 10594 }, { "epoch": 1.077165514436763, "grad_norm": 0.28828802704811096, "learning_rate": 9.599565444171782e-06, "loss": 0.3479, "step": 10595 }, { "epoch": 1.0772671817812118, "grad_norm": 0.32462096214294434, "learning_rate": 9.599426273874334e-06, "loss": 0.4222, "step": 10596 }, { "epoch": 1.0773688491256608, "grad_norm": 0.3082740008831024, "learning_rate": 9.599287080406069e-06, "loss": 0.3651, "step": 10597 }, { "epoch": 1.0774705164701097, "grad_norm": 0.3147006332874298, "learning_rate": 9.599147863767689e-06, "loss": 0.3863, "step": 10598 }, { "epoch": 1.0775721838145587, "grad_norm": 0.2998131513595581, "learning_rate": 9.599008623959896e-06, "loss": 0.3931, "step": 10599 }, { "epoch": 1.0776738511590078, "grad_norm": 0.3079794645309448, "learning_rate": 9.59886936098339e-06, "loss": 0.386, "step": 10600 }, { "epoch": 1.0777755185034568, "grad_norm": 0.3168978691101074, "learning_rate": 9.598730074838872e-06, "loss": 0.368, "step": 10601 }, { "epoch": 1.0778771858479057, "grad_norm": 0.29054930806159973, "learning_rate": 9.598590765527045e-06, "loss": 0.3501, "step": 10602 }, { "epoch": 1.0779788531923546, "grad_norm": 0.30747202038764954, "learning_rate": 9.598451433048612e-06, "loss": 0.3818, "step": 10603 }, { "epoch": 1.0780805205368036, "grad_norm": 0.2943679988384247, "learning_rate": 9.598312077404273e-06, "loss": 0.3678, "step": 10604 }, { "epoch": 1.0781821878812525, "grad_norm": 0.3278126120567322, "learning_rate": 9.59817269859473e-06, "loss": 0.3468, "step": 10605 }, { "epoch": 1.0782838552257015, "grad_norm": 0.325642466545105, "learning_rate": 9.598033296620686e-06, "loss": 0.3514, "step": 10606 }, { "epoch": 1.0783855225701504, "grad_norm": 0.2837677299976349, "learning_rate": 9.597893871482842e-06, "loss": 0.3628, "step": 10607 }, { "epoch": 1.0784871899145994, "grad_norm": 0.3084336817264557, "learning_rate": 9.597754423181901e-06, "loss": 0.3674, "step": 10608 }, { "epoch": 1.0785888572590483, "grad_norm": 0.33240920305252075, "learning_rate": 9.597614951718568e-06, "loss": 0.3771, "step": 10609 }, { "epoch": 1.0786905246034975, "grad_norm": 0.3117263913154602, "learning_rate": 9.597475457093541e-06, "loss": 0.3593, "step": 10610 }, { "epoch": 1.0787921919479464, "grad_norm": 0.279927521944046, "learning_rate": 9.597335939307527e-06, "loss": 0.3577, "step": 10611 }, { "epoch": 1.0788938592923953, "grad_norm": 0.3068515658378601, "learning_rate": 9.597196398361226e-06, "loss": 0.371, "step": 10612 }, { "epoch": 1.0789955266368443, "grad_norm": 0.30379265546798706, "learning_rate": 9.597056834255343e-06, "loss": 0.3744, "step": 10613 }, { "epoch": 1.0790971939812932, "grad_norm": 0.2774225175380707, "learning_rate": 9.596917246990578e-06, "loss": 0.3484, "step": 10614 }, { "epoch": 1.0791988613257422, "grad_norm": 0.31389105319976807, "learning_rate": 9.596777636567638e-06, "loss": 0.3459, "step": 10615 }, { "epoch": 1.079300528670191, "grad_norm": 0.30667805671691895, "learning_rate": 9.596638002987224e-06, "loss": 0.3605, "step": 10616 }, { "epoch": 1.07940219601464, "grad_norm": 0.3033447861671448, "learning_rate": 9.596498346250042e-06, "loss": 0.3907, "step": 10617 }, { "epoch": 1.079503863359089, "grad_norm": 0.3277949392795563, "learning_rate": 9.596358666356791e-06, "loss": 0.3893, "step": 10618 }, { "epoch": 1.079605530703538, "grad_norm": 0.30926817655563354, "learning_rate": 9.59621896330818e-06, "loss": 0.3544, "step": 10619 }, { "epoch": 1.0797071980479869, "grad_norm": 0.3229009509086609, "learning_rate": 9.596079237104907e-06, "loss": 0.3982, "step": 10620 }, { "epoch": 1.079808865392436, "grad_norm": 0.29706132411956787, "learning_rate": 9.59593948774768e-06, "loss": 0.3727, "step": 10621 }, { "epoch": 1.079910532736885, "grad_norm": 0.33397629857063293, "learning_rate": 9.595799715237204e-06, "loss": 0.3714, "step": 10622 }, { "epoch": 1.080012200081334, "grad_norm": 0.29197192192077637, "learning_rate": 9.595659919574179e-06, "loss": 0.3555, "step": 10623 }, { "epoch": 1.0801138674257829, "grad_norm": 0.2806859314441681, "learning_rate": 9.595520100759312e-06, "loss": 0.3678, "step": 10624 }, { "epoch": 1.0802155347702318, "grad_norm": 0.2971765697002411, "learning_rate": 9.595380258793307e-06, "loss": 0.3222, "step": 10625 }, { "epoch": 1.0803172021146807, "grad_norm": 0.2973950207233429, "learning_rate": 9.595240393676869e-06, "loss": 0.3975, "step": 10626 }, { "epoch": 1.0804188694591297, "grad_norm": 0.292203426361084, "learning_rate": 9.5951005054107e-06, "loss": 0.3569, "step": 10627 }, { "epoch": 1.0805205368035786, "grad_norm": 0.3251028060913086, "learning_rate": 9.594960593995508e-06, "loss": 0.3486, "step": 10628 }, { "epoch": 1.0806222041480276, "grad_norm": 0.2830139994621277, "learning_rate": 9.594820659431997e-06, "loss": 0.3888, "step": 10629 }, { "epoch": 1.0807238714924767, "grad_norm": 0.3411802351474762, "learning_rate": 9.594680701720869e-06, "loss": 0.3778, "step": 10630 }, { "epoch": 1.0808255388369257, "grad_norm": 0.33156636357307434, "learning_rate": 9.594540720862833e-06, "loss": 0.3452, "step": 10631 }, { "epoch": 1.0809272061813746, "grad_norm": 0.3602869212627411, "learning_rate": 9.594400716858592e-06, "loss": 0.3473, "step": 10632 }, { "epoch": 1.0810288735258236, "grad_norm": 0.3371613323688507, "learning_rate": 9.594260689708854e-06, "loss": 0.3665, "step": 10633 }, { "epoch": 1.0811305408702725, "grad_norm": 0.3509971499443054, "learning_rate": 9.594120639414321e-06, "loss": 0.3633, "step": 10634 }, { "epoch": 1.0812322082147214, "grad_norm": 0.3360998332500458, "learning_rate": 9.5939805659757e-06, "loss": 0.3851, "step": 10635 }, { "epoch": 1.0813338755591704, "grad_norm": 0.2960473895072937, "learning_rate": 9.593840469393696e-06, "loss": 0.3621, "step": 10636 }, { "epoch": 1.0814355429036193, "grad_norm": 0.36432135105133057, "learning_rate": 9.593700349669017e-06, "loss": 0.3762, "step": 10637 }, { "epoch": 1.0815372102480683, "grad_norm": 0.31488269567489624, "learning_rate": 9.593560206802365e-06, "loss": 0.3294, "step": 10638 }, { "epoch": 1.0816388775925172, "grad_norm": 0.31877148151397705, "learning_rate": 9.59342004079445e-06, "loss": 0.3587, "step": 10639 }, { "epoch": 1.0817405449369661, "grad_norm": 0.3090878427028656, "learning_rate": 9.593279851645976e-06, "loss": 0.3611, "step": 10640 }, { "epoch": 1.0818422122814153, "grad_norm": 0.32857462763786316, "learning_rate": 9.59313963935765e-06, "loss": 0.3549, "step": 10641 }, { "epoch": 1.0819438796258642, "grad_norm": 0.35380661487579346, "learning_rate": 9.592999403930178e-06, "loss": 0.35, "step": 10642 }, { "epoch": 1.0820455469703132, "grad_norm": 0.3117124140262604, "learning_rate": 9.592859145364266e-06, "loss": 0.3872, "step": 10643 }, { "epoch": 1.0821472143147621, "grad_norm": 0.3045297861099243, "learning_rate": 9.592718863660622e-06, "loss": 0.3698, "step": 10644 }, { "epoch": 1.082248881659211, "grad_norm": 0.3275218605995178, "learning_rate": 9.59257855881995e-06, "loss": 0.3553, "step": 10645 }, { "epoch": 1.08235054900366, "grad_norm": 0.32503703236579895, "learning_rate": 9.59243823084296e-06, "loss": 0.3848, "step": 10646 }, { "epoch": 1.082452216348109, "grad_norm": 0.3096332252025604, "learning_rate": 9.592297879730357e-06, "loss": 0.3977, "step": 10647 }, { "epoch": 1.082553883692558, "grad_norm": 0.30312126874923706, "learning_rate": 9.59215750548285e-06, "loss": 0.3937, "step": 10648 }, { "epoch": 1.0826555510370068, "grad_norm": 0.33699485659599304, "learning_rate": 9.592017108101142e-06, "loss": 0.3609, "step": 10649 }, { "epoch": 1.0827572183814558, "grad_norm": 0.2873876988887787, "learning_rate": 9.591876687585944e-06, "loss": 0.3479, "step": 10650 }, { "epoch": 1.082858885725905, "grad_norm": 0.2897213399410248, "learning_rate": 9.591736243937962e-06, "loss": 0.3798, "step": 10651 }, { "epoch": 1.0829605530703539, "grad_norm": 0.2980472445487976, "learning_rate": 9.591595777157906e-06, "loss": 0.3673, "step": 10652 }, { "epoch": 1.0830622204148028, "grad_norm": 0.3156791627407074, "learning_rate": 9.591455287246479e-06, "loss": 0.3537, "step": 10653 }, { "epoch": 1.0831638877592518, "grad_norm": 0.31138408184051514, "learning_rate": 9.591314774204393e-06, "loss": 0.3827, "step": 10654 }, { "epoch": 1.0832655551037007, "grad_norm": 0.29447582364082336, "learning_rate": 9.591174238032355e-06, "loss": 0.3458, "step": 10655 }, { "epoch": 1.0833672224481496, "grad_norm": 0.340060830116272, "learning_rate": 9.59103367873107e-06, "loss": 0.3901, "step": 10656 }, { "epoch": 1.0834688897925986, "grad_norm": 0.31415945291519165, "learning_rate": 9.59089309630125e-06, "loss": 0.3563, "step": 10657 }, { "epoch": 1.0835705571370475, "grad_norm": 0.3038463294506073, "learning_rate": 9.590752490743602e-06, "loss": 0.344, "step": 10658 }, { "epoch": 1.0836722244814965, "grad_norm": 0.2887347936630249, "learning_rate": 9.590611862058832e-06, "loss": 0.3925, "step": 10659 }, { "epoch": 1.0837738918259454, "grad_norm": 0.3426070213317871, "learning_rate": 9.590471210247652e-06, "loss": 0.3565, "step": 10660 }, { "epoch": 1.0838755591703944, "grad_norm": 0.3168833553791046, "learning_rate": 9.590330535310768e-06, "loss": 0.3781, "step": 10661 }, { "epoch": 1.0839772265148435, "grad_norm": 0.2983405888080597, "learning_rate": 9.590189837248888e-06, "loss": 0.3679, "step": 10662 }, { "epoch": 1.0840788938592925, "grad_norm": 0.3021412193775177, "learning_rate": 9.590049116062725e-06, "loss": 0.3514, "step": 10663 }, { "epoch": 1.0841805612037414, "grad_norm": 0.2826647162437439, "learning_rate": 9.589908371752985e-06, "loss": 0.353, "step": 10664 }, { "epoch": 1.0842822285481903, "grad_norm": 0.3026074469089508, "learning_rate": 9.589767604320377e-06, "loss": 0.3634, "step": 10665 }, { "epoch": 1.0843838958926393, "grad_norm": 0.3295261859893799, "learning_rate": 9.58962681376561e-06, "loss": 0.3757, "step": 10666 }, { "epoch": 1.0844855632370882, "grad_norm": 0.3117871582508087, "learning_rate": 9.589486000089394e-06, "loss": 0.3671, "step": 10667 }, { "epoch": 1.0845872305815372, "grad_norm": 0.29098567366600037, "learning_rate": 9.589345163292438e-06, "loss": 0.3623, "step": 10668 }, { "epoch": 1.084688897925986, "grad_norm": 0.31197425723075867, "learning_rate": 9.589204303375452e-06, "loss": 0.3439, "step": 10669 }, { "epoch": 1.084790565270435, "grad_norm": 0.303411066532135, "learning_rate": 9.589063420339146e-06, "loss": 0.4241, "step": 10670 }, { "epoch": 1.0848922326148842, "grad_norm": 0.28678035736083984, "learning_rate": 9.588922514184228e-06, "loss": 0.35, "step": 10671 }, { "epoch": 1.0849938999593332, "grad_norm": 0.2935176491737366, "learning_rate": 9.588781584911409e-06, "loss": 0.3484, "step": 10672 }, { "epoch": 1.085095567303782, "grad_norm": 0.3323821723461151, "learning_rate": 9.5886406325214e-06, "loss": 0.3822, "step": 10673 }, { "epoch": 1.085197234648231, "grad_norm": 0.2902909815311432, "learning_rate": 9.588499657014907e-06, "loss": 0.3433, "step": 10674 }, { "epoch": 1.08529890199268, "grad_norm": 0.3160462975502014, "learning_rate": 9.588358658392646e-06, "loss": 0.3687, "step": 10675 }, { "epoch": 1.085400569337129, "grad_norm": 0.3004998564720154, "learning_rate": 9.588217636655324e-06, "loss": 0.3919, "step": 10676 }, { "epoch": 1.0855022366815779, "grad_norm": 0.295286625623703, "learning_rate": 9.588076591803651e-06, "loss": 0.3581, "step": 10677 }, { "epoch": 1.0856039040260268, "grad_norm": 0.2981101870536804, "learning_rate": 9.587935523838339e-06, "loss": 0.3626, "step": 10678 }, { "epoch": 1.0857055713704757, "grad_norm": 0.3346812427043915, "learning_rate": 9.587794432760099e-06, "loss": 0.3736, "step": 10679 }, { "epoch": 1.0858072387149247, "grad_norm": 0.3207184672355652, "learning_rate": 9.587653318569638e-06, "loss": 0.3766, "step": 10680 }, { "epoch": 1.0859089060593736, "grad_norm": 0.35621321201324463, "learning_rate": 9.587512181267672e-06, "loss": 0.3842, "step": 10681 }, { "epoch": 1.0860105734038228, "grad_norm": 0.31904399394989014, "learning_rate": 9.58737102085491e-06, "loss": 0.3764, "step": 10682 }, { "epoch": 1.0861122407482717, "grad_norm": 0.3119509518146515, "learning_rate": 9.587229837332061e-06, "loss": 0.3682, "step": 10683 }, { "epoch": 1.0862139080927207, "grad_norm": 0.32576799392700195, "learning_rate": 9.587088630699839e-06, "loss": 0.3567, "step": 10684 }, { "epoch": 1.0863155754371696, "grad_norm": 0.2947612404823303, "learning_rate": 9.586947400958956e-06, "loss": 0.3509, "step": 10685 }, { "epoch": 1.0864172427816186, "grad_norm": 0.3069773316383362, "learning_rate": 9.58680614811012e-06, "loss": 0.3551, "step": 10686 }, { "epoch": 1.0865189101260675, "grad_norm": 0.3155510425567627, "learning_rate": 9.586664872154046e-06, "loss": 0.3586, "step": 10687 }, { "epoch": 1.0866205774705164, "grad_norm": 0.303806334733963, "learning_rate": 9.586523573091443e-06, "loss": 0.3476, "step": 10688 }, { "epoch": 1.0867222448149654, "grad_norm": 0.2973417639732361, "learning_rate": 9.586382250923025e-06, "loss": 0.3449, "step": 10689 }, { "epoch": 1.0868239121594143, "grad_norm": 0.34633269906044006, "learning_rate": 9.586240905649503e-06, "loss": 0.3685, "step": 10690 }, { "epoch": 1.0869255795038633, "grad_norm": 0.31844455003738403, "learning_rate": 9.586099537271591e-06, "loss": 0.3826, "step": 10691 }, { "epoch": 1.0870272468483124, "grad_norm": 0.31458982825279236, "learning_rate": 9.585958145789997e-06, "loss": 0.3807, "step": 10692 }, { "epoch": 1.0871289141927614, "grad_norm": 0.31385380029678345, "learning_rate": 9.585816731205438e-06, "loss": 0.37, "step": 10693 }, { "epoch": 1.0872305815372103, "grad_norm": 0.3358323872089386, "learning_rate": 9.585675293518621e-06, "loss": 0.3615, "step": 10694 }, { "epoch": 1.0873322488816592, "grad_norm": 0.31687986850738525, "learning_rate": 9.585533832730263e-06, "loss": 0.3693, "step": 10695 }, { "epoch": 1.0874339162261082, "grad_norm": 0.298384428024292, "learning_rate": 9.585392348841076e-06, "loss": 0.3498, "step": 10696 }, { "epoch": 1.0875355835705571, "grad_norm": 0.31275674700737, "learning_rate": 9.585250841851771e-06, "loss": 0.3794, "step": 10697 }, { "epoch": 1.087637250915006, "grad_norm": 0.29260000586509705, "learning_rate": 9.585109311763062e-06, "loss": 0.3542, "step": 10698 }, { "epoch": 1.087738918259455, "grad_norm": 0.29276782274246216, "learning_rate": 9.584967758575662e-06, "loss": 0.3855, "step": 10699 }, { "epoch": 1.087840585603904, "grad_norm": 0.31551241874694824, "learning_rate": 9.584826182290284e-06, "loss": 0.3656, "step": 10700 }, { "epoch": 1.087942252948353, "grad_norm": 0.3073161244392395, "learning_rate": 9.58468458290764e-06, "loss": 0.4035, "step": 10701 }, { "epoch": 1.0880439202928018, "grad_norm": 0.298833966255188, "learning_rate": 9.584542960428447e-06, "loss": 0.3404, "step": 10702 }, { "epoch": 1.088145587637251, "grad_norm": 0.30593591928482056, "learning_rate": 9.584401314853416e-06, "loss": 0.3725, "step": 10703 }, { "epoch": 1.0882472549817, "grad_norm": 0.3343714773654938, "learning_rate": 9.584259646183258e-06, "loss": 0.3419, "step": 10704 }, { "epoch": 1.0883489223261489, "grad_norm": 0.2983039319515228, "learning_rate": 9.58411795441869e-06, "loss": 0.391, "step": 10705 }, { "epoch": 1.0884505896705978, "grad_norm": 0.3069632947444916, "learning_rate": 9.583976239560426e-06, "loss": 0.3888, "step": 10706 }, { "epoch": 1.0885522570150468, "grad_norm": 0.315216064453125, "learning_rate": 9.583834501609179e-06, "loss": 0.3606, "step": 10707 }, { "epoch": 1.0886539243594957, "grad_norm": 0.3053050935268402, "learning_rate": 9.583692740565664e-06, "loss": 0.4007, "step": 10708 }, { "epoch": 1.0887555917039446, "grad_norm": 0.2932926118373871, "learning_rate": 9.583550956430592e-06, "loss": 0.3615, "step": 10709 }, { "epoch": 1.0888572590483936, "grad_norm": 0.31730782985687256, "learning_rate": 9.583409149204681e-06, "loss": 0.3681, "step": 10710 }, { "epoch": 1.0889589263928425, "grad_norm": 0.33830296993255615, "learning_rate": 9.583267318888643e-06, "loss": 0.3484, "step": 10711 }, { "epoch": 1.0890605937372917, "grad_norm": 0.3143828809261322, "learning_rate": 9.583125465483193e-06, "loss": 0.3883, "step": 10712 }, { "epoch": 1.0891622610817406, "grad_norm": 0.3139471113681793, "learning_rate": 9.582983588989047e-06, "loss": 0.3619, "step": 10713 }, { "epoch": 1.0892639284261896, "grad_norm": 0.3228834867477417, "learning_rate": 9.58284168940692e-06, "loss": 0.3587, "step": 10714 }, { "epoch": 1.0893655957706385, "grad_norm": 0.29572972655296326, "learning_rate": 9.582699766737525e-06, "loss": 0.3611, "step": 10715 }, { "epoch": 1.0894672631150875, "grad_norm": 0.3392777144908905, "learning_rate": 9.582557820981576e-06, "loss": 0.4461, "step": 10716 }, { "epoch": 1.0895689304595364, "grad_norm": 0.3095693588256836, "learning_rate": 9.58241585213979e-06, "loss": 0.3421, "step": 10717 }, { "epoch": 1.0896705978039853, "grad_norm": 0.2814876139163971, "learning_rate": 9.582273860212882e-06, "loss": 0.3457, "step": 10718 }, { "epoch": 1.0897722651484343, "grad_norm": 0.31366461515426636, "learning_rate": 9.58213184520157e-06, "loss": 0.3587, "step": 10719 }, { "epoch": 1.0898739324928832, "grad_norm": 0.3106045126914978, "learning_rate": 9.581989807106563e-06, "loss": 0.3811, "step": 10720 }, { "epoch": 1.0899755998373322, "grad_norm": 0.2899973392486572, "learning_rate": 9.581847745928581e-06, "loss": 0.3266, "step": 10721 }, { "epoch": 1.090077267181781, "grad_norm": 0.31068480014801025, "learning_rate": 9.58170566166834e-06, "loss": 0.3868, "step": 10722 }, { "epoch": 1.0901789345262303, "grad_norm": 0.3408255875110626, "learning_rate": 9.581563554326554e-06, "loss": 0.3847, "step": 10723 }, { "epoch": 1.0902806018706792, "grad_norm": 0.3193292021751404, "learning_rate": 9.581421423903939e-06, "loss": 0.3619, "step": 10724 }, { "epoch": 1.0903822692151282, "grad_norm": 0.3364901542663574, "learning_rate": 9.581279270401213e-06, "loss": 0.3591, "step": 10725 }, { "epoch": 1.090483936559577, "grad_norm": 0.33099672198295593, "learning_rate": 9.581137093819089e-06, "loss": 0.401, "step": 10726 }, { "epoch": 1.090585603904026, "grad_norm": 0.30004504323005676, "learning_rate": 9.580994894158286e-06, "loss": 0.3598, "step": 10727 }, { "epoch": 1.090687271248475, "grad_norm": 0.3627922832965851, "learning_rate": 9.580852671419521e-06, "loss": 0.3552, "step": 10728 }, { "epoch": 1.090788938592924, "grad_norm": 0.31378233432769775, "learning_rate": 9.580710425603506e-06, "loss": 0.3448, "step": 10729 }, { "epoch": 1.0908906059373729, "grad_norm": 0.3005169928073883, "learning_rate": 9.580568156710962e-06, "loss": 0.3913, "step": 10730 }, { "epoch": 1.0909922732818218, "grad_norm": 0.3577496409416199, "learning_rate": 9.580425864742603e-06, "loss": 0.3827, "step": 10731 }, { "epoch": 1.0910939406262707, "grad_norm": 0.3175920844078064, "learning_rate": 9.580283549699147e-06, "loss": 0.3482, "step": 10732 }, { "epoch": 1.09119560797072, "grad_norm": 0.29504862427711487, "learning_rate": 9.580141211581312e-06, "loss": 0.3382, "step": 10733 }, { "epoch": 1.0912972753151688, "grad_norm": 0.3244789242744446, "learning_rate": 9.579998850389813e-06, "loss": 0.3405, "step": 10734 }, { "epoch": 1.0913989426596178, "grad_norm": 0.3417532444000244, "learning_rate": 9.579856466125369e-06, "loss": 0.3608, "step": 10735 }, { "epoch": 1.0915006100040667, "grad_norm": 0.30959072709083557, "learning_rate": 9.579714058788697e-06, "loss": 0.3661, "step": 10736 }, { "epoch": 1.0916022773485157, "grad_norm": 0.3382580876350403, "learning_rate": 9.579571628380512e-06, "loss": 0.3705, "step": 10737 }, { "epoch": 1.0917039446929646, "grad_norm": 0.3722907304763794, "learning_rate": 9.579429174901534e-06, "loss": 0.4136, "step": 10738 }, { "epoch": 1.0918056120374136, "grad_norm": 0.30491629242897034, "learning_rate": 9.579286698352481e-06, "loss": 0.3802, "step": 10739 }, { "epoch": 1.0919072793818625, "grad_norm": 0.32511430978775024, "learning_rate": 9.57914419873407e-06, "loss": 0.3659, "step": 10740 }, { "epoch": 1.0920089467263114, "grad_norm": 0.36883169412612915, "learning_rate": 9.579001676047018e-06, "loss": 0.361, "step": 10741 }, { "epoch": 1.0921106140707604, "grad_norm": 0.3238970935344696, "learning_rate": 9.578859130292041e-06, "loss": 0.3723, "step": 10742 }, { "epoch": 1.0922122814152093, "grad_norm": 0.34180599451065063, "learning_rate": 9.578716561469864e-06, "loss": 0.4141, "step": 10743 }, { "epoch": 1.0923139487596585, "grad_norm": 0.3416772484779358, "learning_rate": 9.5785739695812e-06, "loss": 0.3891, "step": 10744 }, { "epoch": 1.0924156161041074, "grad_norm": 0.29181453585624695, "learning_rate": 9.578431354626767e-06, "loss": 0.3533, "step": 10745 }, { "epoch": 1.0925172834485564, "grad_norm": 0.31278878450393677, "learning_rate": 9.578288716607287e-06, "loss": 0.3372, "step": 10746 }, { "epoch": 1.0926189507930053, "grad_norm": 0.3290766477584839, "learning_rate": 9.578146055523474e-06, "loss": 0.3573, "step": 10747 }, { "epoch": 1.0927206181374542, "grad_norm": 0.3149983882904053, "learning_rate": 9.57800337137605e-06, "loss": 0.3585, "step": 10748 }, { "epoch": 1.0928222854819032, "grad_norm": 0.2994785010814667, "learning_rate": 9.577860664165733e-06, "loss": 0.3421, "step": 10749 }, { "epoch": 1.0929239528263521, "grad_norm": 0.29020363092422485, "learning_rate": 9.577717933893243e-06, "loss": 0.3734, "step": 10750 }, { "epoch": 1.093025620170801, "grad_norm": 0.3166849613189697, "learning_rate": 9.577575180559297e-06, "loss": 0.3724, "step": 10751 }, { "epoch": 1.09312728751525, "grad_norm": 0.3097691535949707, "learning_rate": 9.577432404164615e-06, "loss": 0.3888, "step": 10752 }, { "epoch": 1.0932289548596992, "grad_norm": 0.34369155764579773, "learning_rate": 9.577289604709915e-06, "loss": 0.3745, "step": 10753 }, { "epoch": 1.0933306222041481, "grad_norm": 0.31707021594047546, "learning_rate": 9.577146782195919e-06, "loss": 0.3889, "step": 10754 }, { "epoch": 1.093432289548597, "grad_norm": 0.30600613355636597, "learning_rate": 9.577003936623346e-06, "loss": 0.3788, "step": 10755 }, { "epoch": 1.093533956893046, "grad_norm": 0.34663283824920654, "learning_rate": 9.576861067992913e-06, "loss": 0.3332, "step": 10756 }, { "epoch": 1.093635624237495, "grad_norm": 0.3378225564956665, "learning_rate": 9.576718176305343e-06, "loss": 0.3588, "step": 10757 }, { "epoch": 1.0937372915819439, "grad_norm": 0.30036216974258423, "learning_rate": 9.576575261561354e-06, "loss": 0.3771, "step": 10758 }, { "epoch": 1.0938389589263928, "grad_norm": 0.29613494873046875, "learning_rate": 9.576432323761666e-06, "loss": 0.3584, "step": 10759 }, { "epoch": 1.0939406262708418, "grad_norm": 0.3486926555633545, "learning_rate": 9.576289362906999e-06, "loss": 0.3848, "step": 10760 }, { "epoch": 1.0940422936152907, "grad_norm": 0.32705098390579224, "learning_rate": 9.576146378998074e-06, "loss": 0.348, "step": 10761 }, { "epoch": 1.0941439609597396, "grad_norm": 0.31256455183029175, "learning_rate": 9.576003372035613e-06, "loss": 0.4232, "step": 10762 }, { "epoch": 1.0942456283041886, "grad_norm": 0.346284955739975, "learning_rate": 9.575860342020333e-06, "loss": 0.3984, "step": 10763 }, { "epoch": 1.0943472956486378, "grad_norm": 0.3054157793521881, "learning_rate": 9.575717288952956e-06, "loss": 0.3802, "step": 10764 }, { "epoch": 1.0944489629930867, "grad_norm": 0.2927788197994232, "learning_rate": 9.575574212834202e-06, "loss": 0.3552, "step": 10765 }, { "epoch": 1.0945506303375356, "grad_norm": 0.327273964881897, "learning_rate": 9.575431113664792e-06, "loss": 0.3861, "step": 10766 }, { "epoch": 1.0946522976819846, "grad_norm": 0.29469966888427734, "learning_rate": 9.57528799144545e-06, "loss": 0.3376, "step": 10767 }, { "epoch": 1.0947539650264335, "grad_norm": 0.2951279580593109, "learning_rate": 9.575144846176893e-06, "loss": 0.3106, "step": 10768 }, { "epoch": 1.0948556323708825, "grad_norm": 0.30554142594337463, "learning_rate": 9.575001677859842e-06, "loss": 0.3767, "step": 10769 }, { "epoch": 1.0949572997153314, "grad_norm": 0.3371458947658539, "learning_rate": 9.574858486495022e-06, "loss": 0.3769, "step": 10770 }, { "epoch": 1.0950589670597803, "grad_norm": 0.269241601228714, "learning_rate": 9.57471527208315e-06, "loss": 0.3324, "step": 10771 }, { "epoch": 1.0951606344042293, "grad_norm": 0.2894285321235657, "learning_rate": 9.574572034624952e-06, "loss": 0.3709, "step": 10772 }, { "epoch": 1.0952623017486782, "grad_norm": 0.3174765110015869, "learning_rate": 9.574428774121145e-06, "loss": 0.3372, "step": 10773 }, { "epoch": 1.0953639690931274, "grad_norm": 0.3182668387889862, "learning_rate": 9.574285490572453e-06, "loss": 0.377, "step": 10774 }, { "epoch": 1.0954656364375763, "grad_norm": 0.3229849934577942, "learning_rate": 9.574142183979598e-06, "loss": 0.3893, "step": 10775 }, { "epoch": 1.0955673037820253, "grad_norm": 0.3058300018310547, "learning_rate": 9.573998854343302e-06, "loss": 0.352, "step": 10776 }, { "epoch": 1.0956689711264742, "grad_norm": 0.33778607845306396, "learning_rate": 9.573855501664287e-06, "loss": 0.3434, "step": 10777 }, { "epoch": 1.0957706384709232, "grad_norm": 0.32184478640556335, "learning_rate": 9.573712125943275e-06, "loss": 0.3874, "step": 10778 }, { "epoch": 1.095872305815372, "grad_norm": 0.29559090733528137, "learning_rate": 9.573568727180989e-06, "loss": 0.359, "step": 10779 }, { "epoch": 1.095973973159821, "grad_norm": 0.31324172019958496, "learning_rate": 9.573425305378148e-06, "loss": 0.3593, "step": 10780 }, { "epoch": 1.09607564050427, "grad_norm": 0.31079766154289246, "learning_rate": 9.573281860535479e-06, "loss": 0.3842, "step": 10781 }, { "epoch": 1.096177307848719, "grad_norm": 0.30805858969688416, "learning_rate": 9.573138392653702e-06, "loss": 0.3439, "step": 10782 }, { "epoch": 1.0962789751931679, "grad_norm": 0.3104117810726166, "learning_rate": 9.57299490173354e-06, "loss": 0.3785, "step": 10783 }, { "epoch": 1.0963806425376168, "grad_norm": 0.32012665271759033, "learning_rate": 9.572851387775719e-06, "loss": 0.3581, "step": 10784 }, { "epoch": 1.096482309882066, "grad_norm": 0.2938648462295532, "learning_rate": 9.572707850780958e-06, "loss": 0.3373, "step": 10785 }, { "epoch": 1.096583977226515, "grad_norm": 0.30845192074775696, "learning_rate": 9.57256429074998e-06, "loss": 0.327, "step": 10786 }, { "epoch": 1.0966856445709638, "grad_norm": 0.33068180084228516, "learning_rate": 9.572420707683511e-06, "loss": 0.3696, "step": 10787 }, { "epoch": 1.0967873119154128, "grad_norm": 0.3452167809009552, "learning_rate": 9.572277101582273e-06, "loss": 0.365, "step": 10788 }, { "epoch": 1.0968889792598617, "grad_norm": 0.31905877590179443, "learning_rate": 9.57213347244699e-06, "loss": 0.3745, "step": 10789 }, { "epoch": 1.0969906466043107, "grad_norm": 0.31321921944618225, "learning_rate": 9.571989820278384e-06, "loss": 0.3907, "step": 10790 }, { "epoch": 1.0970923139487596, "grad_norm": 0.3464565575122833, "learning_rate": 9.57184614507718e-06, "loss": 0.3826, "step": 10791 }, { "epoch": 1.0971939812932086, "grad_norm": 0.3316923677921295, "learning_rate": 9.571702446844102e-06, "loss": 0.4083, "step": 10792 }, { "epoch": 1.0972956486376575, "grad_norm": 0.31181174516677856, "learning_rate": 9.571558725579873e-06, "loss": 0.3677, "step": 10793 }, { "epoch": 1.0973973159821067, "grad_norm": 0.32086801528930664, "learning_rate": 9.57141498128522e-06, "loss": 0.3659, "step": 10794 }, { "epoch": 1.0974989833265556, "grad_norm": 0.300374299287796, "learning_rate": 9.57127121396086e-06, "loss": 0.3548, "step": 10795 }, { "epoch": 1.0976006506710045, "grad_norm": 0.34667229652404785, "learning_rate": 9.571127423607524e-06, "loss": 0.3917, "step": 10796 }, { "epoch": 1.0977023180154535, "grad_norm": 0.31011855602264404, "learning_rate": 9.570983610225936e-06, "loss": 0.368, "step": 10797 }, { "epoch": 1.0978039853599024, "grad_norm": 0.29666614532470703, "learning_rate": 9.570839773816817e-06, "loss": 0.3749, "step": 10798 }, { "epoch": 1.0979056527043514, "grad_norm": 0.29285892844200134, "learning_rate": 9.570695914380894e-06, "loss": 0.3874, "step": 10799 }, { "epoch": 1.0980073200488003, "grad_norm": 0.2922459542751312, "learning_rate": 9.570552031918891e-06, "loss": 0.407, "step": 10800 }, { "epoch": 1.0981089873932492, "grad_norm": 0.30198317766189575, "learning_rate": 9.570408126431534e-06, "loss": 0.4186, "step": 10801 }, { "epoch": 1.0982106547376982, "grad_norm": 0.2949986159801483, "learning_rate": 9.570264197919545e-06, "loss": 0.4009, "step": 10802 }, { "epoch": 1.0983123220821471, "grad_norm": 0.2982371747493744, "learning_rate": 9.570120246383653e-06, "loss": 0.3637, "step": 10803 }, { "epoch": 1.098413989426596, "grad_norm": 0.2867472767829895, "learning_rate": 9.56997627182458e-06, "loss": 0.3761, "step": 10804 }, { "epoch": 1.0985156567710452, "grad_norm": 0.30275920033454895, "learning_rate": 9.569832274243051e-06, "loss": 0.3712, "step": 10805 }, { "epoch": 1.0986173241154942, "grad_norm": 0.29033708572387695, "learning_rate": 9.569688253639796e-06, "loss": 0.3436, "step": 10806 }, { "epoch": 1.0987189914599431, "grad_norm": 0.2949819266796112, "learning_rate": 9.569544210015535e-06, "loss": 0.3782, "step": 10807 }, { "epoch": 1.098820658804392, "grad_norm": 0.34240174293518066, "learning_rate": 9.569400143370997e-06, "loss": 0.3761, "step": 10808 }, { "epoch": 1.098922326148841, "grad_norm": 0.30645981431007385, "learning_rate": 9.569256053706908e-06, "loss": 0.4035, "step": 10809 }, { "epoch": 1.09902399349329, "grad_norm": 0.282014936208725, "learning_rate": 9.569111941023993e-06, "loss": 0.3492, "step": 10810 }, { "epoch": 1.0991256608377389, "grad_norm": 0.3579345941543579, "learning_rate": 9.568967805322977e-06, "loss": 0.3819, "step": 10811 }, { "epoch": 1.0992273281821878, "grad_norm": 0.3434174954891205, "learning_rate": 9.568823646604588e-06, "loss": 0.3538, "step": 10812 }, { "epoch": 1.0993289955266368, "grad_norm": 0.28935736417770386, "learning_rate": 9.568679464869548e-06, "loss": 0.3419, "step": 10813 }, { "epoch": 1.0994306628710857, "grad_norm": 0.34435155987739563, "learning_rate": 9.56853526011859e-06, "loss": 0.3719, "step": 10814 }, { "epoch": 1.0995323302155349, "grad_norm": 0.3051263391971588, "learning_rate": 9.568391032352435e-06, "loss": 0.3644, "step": 10815 }, { "epoch": 1.0996339975599838, "grad_norm": 0.3033975064754486, "learning_rate": 9.568246781571812e-06, "loss": 0.3476, "step": 10816 }, { "epoch": 1.0997356649044328, "grad_norm": 0.34967976808547974, "learning_rate": 9.568102507777447e-06, "loss": 0.3852, "step": 10817 }, { "epoch": 1.0998373322488817, "grad_norm": 0.331814706325531, "learning_rate": 9.567958210970068e-06, "loss": 0.3495, "step": 10818 }, { "epoch": 1.0999389995933306, "grad_norm": 0.29184356331825256, "learning_rate": 9.567813891150401e-06, "loss": 0.3527, "step": 10819 }, { "epoch": 1.1000406669377796, "grad_norm": 0.32963666319847107, "learning_rate": 9.567669548319174e-06, "loss": 0.4006, "step": 10820 }, { "epoch": 1.1001423342822285, "grad_norm": 0.3504234254360199, "learning_rate": 9.56752518247711e-06, "loss": 0.3789, "step": 10821 }, { "epoch": 1.1002440016266775, "grad_norm": 0.31260448694229126, "learning_rate": 9.567380793624943e-06, "loss": 0.3487, "step": 10822 }, { "epoch": 1.1003456689711264, "grad_norm": 0.31636059284210205, "learning_rate": 9.567236381763396e-06, "loss": 0.3926, "step": 10823 }, { "epoch": 1.1004473363155753, "grad_norm": 0.291011244058609, "learning_rate": 9.567091946893196e-06, "loss": 0.3816, "step": 10824 }, { "epoch": 1.1005490036600243, "grad_norm": 0.30056238174438477, "learning_rate": 9.566947489015073e-06, "loss": 0.3984, "step": 10825 }, { "epoch": 1.1006506710044734, "grad_norm": 0.30690211057662964, "learning_rate": 9.566803008129754e-06, "loss": 0.3759, "step": 10826 }, { "epoch": 1.1007523383489224, "grad_norm": 0.3101646900177002, "learning_rate": 9.566658504237966e-06, "loss": 0.3485, "step": 10827 }, { "epoch": 1.1008540056933713, "grad_norm": 0.29866454005241394, "learning_rate": 9.56651397734044e-06, "loss": 0.3485, "step": 10828 }, { "epoch": 1.1009556730378203, "grad_norm": 0.31697601079940796, "learning_rate": 9.5663694274379e-06, "loss": 0.3863, "step": 10829 }, { "epoch": 1.1010573403822692, "grad_norm": 0.3073936998844147, "learning_rate": 9.566224854531075e-06, "loss": 0.3875, "step": 10830 }, { "epoch": 1.1011590077267182, "grad_norm": 0.28543078899383545, "learning_rate": 9.566080258620696e-06, "loss": 0.3896, "step": 10831 }, { "epoch": 1.101260675071167, "grad_norm": 0.29417774081230164, "learning_rate": 9.565935639707488e-06, "loss": 0.3702, "step": 10832 }, { "epoch": 1.101362342415616, "grad_norm": 0.3162936866283417, "learning_rate": 9.565790997792184e-06, "loss": 0.4048, "step": 10833 }, { "epoch": 1.101464009760065, "grad_norm": 0.32806771993637085, "learning_rate": 9.565646332875506e-06, "loss": 0.3898, "step": 10834 }, { "epoch": 1.1015656771045141, "grad_norm": 0.27646908164024353, "learning_rate": 9.56550164495819e-06, "loss": 0.3553, "step": 10835 }, { "epoch": 1.101667344448963, "grad_norm": 0.3048360049724579, "learning_rate": 9.56535693404096e-06, "loss": 0.3382, "step": 10836 }, { "epoch": 1.101769011793412, "grad_norm": 0.31377482414245605, "learning_rate": 9.565212200124547e-06, "loss": 0.3793, "step": 10837 }, { "epoch": 1.101870679137861, "grad_norm": 0.29013559222221375, "learning_rate": 9.565067443209682e-06, "loss": 0.3817, "step": 10838 }, { "epoch": 1.10197234648231, "grad_norm": 0.2818470299243927, "learning_rate": 9.564922663297089e-06, "loss": 0.3773, "step": 10839 }, { "epoch": 1.1020740138267588, "grad_norm": 0.2930527925491333, "learning_rate": 9.564777860387502e-06, "loss": 0.3774, "step": 10840 }, { "epoch": 1.1021756811712078, "grad_norm": 0.3114740550518036, "learning_rate": 9.564633034481648e-06, "loss": 0.3578, "step": 10841 }, { "epoch": 1.1022773485156567, "grad_norm": 0.31896334886550903, "learning_rate": 9.564488185580259e-06, "loss": 0.3358, "step": 10842 }, { "epoch": 1.1023790158601057, "grad_norm": 0.32490506768226624, "learning_rate": 9.564343313684061e-06, "loss": 0.3586, "step": 10843 }, { "epoch": 1.1024806832045546, "grad_norm": 0.31020766496658325, "learning_rate": 9.564198418793786e-06, "loss": 0.3648, "step": 10844 }, { "epoch": 1.1025823505490036, "grad_norm": 0.2818000018596649, "learning_rate": 9.564053500910167e-06, "loss": 0.3416, "step": 10845 }, { "epoch": 1.1026840178934527, "grad_norm": 0.3159385621547699, "learning_rate": 9.563908560033928e-06, "loss": 0.358, "step": 10846 }, { "epoch": 1.1027856852379017, "grad_norm": 0.29030871391296387, "learning_rate": 9.563763596165804e-06, "loss": 0.3576, "step": 10847 }, { "epoch": 1.1028873525823506, "grad_norm": 0.35574230551719666, "learning_rate": 9.563618609306522e-06, "loss": 0.3752, "step": 10848 }, { "epoch": 1.1029890199267995, "grad_norm": 0.2999277412891388, "learning_rate": 9.563473599456816e-06, "loss": 0.3827, "step": 10849 }, { "epoch": 1.1030906872712485, "grad_norm": 0.3148461580276489, "learning_rate": 9.563328566617413e-06, "loss": 0.3629, "step": 10850 }, { "epoch": 1.1031923546156974, "grad_norm": 0.34842783212661743, "learning_rate": 9.563183510789045e-06, "loss": 0.3836, "step": 10851 }, { "epoch": 1.1032940219601464, "grad_norm": 0.2864961624145508, "learning_rate": 9.563038431972445e-06, "loss": 0.366, "step": 10852 }, { "epoch": 1.1033956893045953, "grad_norm": 0.326330304145813, "learning_rate": 9.562893330168339e-06, "loss": 0.365, "step": 10853 }, { "epoch": 1.1034973566490442, "grad_norm": 0.2972567677497864, "learning_rate": 9.562748205377462e-06, "loss": 0.3927, "step": 10854 }, { "epoch": 1.1035990239934932, "grad_norm": 0.31336748600006104, "learning_rate": 9.562603057600543e-06, "loss": 0.4183, "step": 10855 }, { "epoch": 1.1037006913379424, "grad_norm": 0.2977539598941803, "learning_rate": 9.562457886838315e-06, "loss": 0.3661, "step": 10856 }, { "epoch": 1.1038023586823913, "grad_norm": 0.29112306237220764, "learning_rate": 9.562312693091507e-06, "loss": 0.3792, "step": 10857 }, { "epoch": 1.1039040260268402, "grad_norm": 0.3053835332393646, "learning_rate": 9.562167476360853e-06, "loss": 0.3894, "step": 10858 }, { "epoch": 1.1040056933712892, "grad_norm": 0.31459829211235046, "learning_rate": 9.562022236647083e-06, "loss": 0.3907, "step": 10859 }, { "epoch": 1.1041073607157381, "grad_norm": 0.31819602847099304, "learning_rate": 9.56187697395093e-06, "loss": 0.385, "step": 10860 }, { "epoch": 1.104209028060187, "grad_norm": 0.29448604583740234, "learning_rate": 9.561731688273123e-06, "loss": 0.3381, "step": 10861 }, { "epoch": 1.104310695404636, "grad_norm": 0.2708092927932739, "learning_rate": 9.561586379614399e-06, "loss": 0.3509, "step": 10862 }, { "epoch": 1.104412362749085, "grad_norm": 0.29607170820236206, "learning_rate": 9.561441047975484e-06, "loss": 0.3695, "step": 10863 }, { "epoch": 1.1045140300935339, "grad_norm": 0.3193551301956177, "learning_rate": 9.561295693357114e-06, "loss": 0.3936, "step": 10864 }, { "epoch": 1.1046156974379828, "grad_norm": 0.3229978084564209, "learning_rate": 9.56115031576002e-06, "loss": 0.3972, "step": 10865 }, { "epoch": 1.1047173647824318, "grad_norm": 0.2991206645965576, "learning_rate": 9.561004915184933e-06, "loss": 0.3659, "step": 10866 }, { "epoch": 1.104819032126881, "grad_norm": 0.3292764723300934, "learning_rate": 9.560859491632588e-06, "loss": 0.3659, "step": 10867 }, { "epoch": 1.1049206994713299, "grad_norm": 0.32719555497169495, "learning_rate": 9.560714045103718e-06, "loss": 0.3963, "step": 10868 }, { "epoch": 1.1050223668157788, "grad_norm": 0.3067421317100525, "learning_rate": 9.560568575599054e-06, "loss": 0.3551, "step": 10869 }, { "epoch": 1.1051240341602278, "grad_norm": 0.3228209316730499, "learning_rate": 9.560423083119328e-06, "loss": 0.355, "step": 10870 }, { "epoch": 1.1052257015046767, "grad_norm": 0.3663511574268341, "learning_rate": 9.560277567665275e-06, "loss": 0.3781, "step": 10871 }, { "epoch": 1.1053273688491256, "grad_norm": 0.2873240113258362, "learning_rate": 9.560132029237629e-06, "loss": 0.3563, "step": 10872 }, { "epoch": 1.1054290361935746, "grad_norm": 0.2858010530471802, "learning_rate": 9.559986467837119e-06, "loss": 0.3561, "step": 10873 }, { "epoch": 1.1055307035380235, "grad_norm": 0.31384190917015076, "learning_rate": 9.559840883464482e-06, "loss": 0.369, "step": 10874 }, { "epoch": 1.1056323708824725, "grad_norm": 0.3084913194179535, "learning_rate": 9.559695276120451e-06, "loss": 0.3867, "step": 10875 }, { "epoch": 1.1057340382269216, "grad_norm": 0.32179510593414307, "learning_rate": 9.559549645805757e-06, "loss": 0.339, "step": 10876 }, { "epoch": 1.1058357055713706, "grad_norm": 0.3085464537143707, "learning_rate": 9.559403992521137e-06, "loss": 0.388, "step": 10877 }, { "epoch": 1.1059373729158195, "grad_norm": 0.3162544369697571, "learning_rate": 9.559258316267322e-06, "loss": 0.3867, "step": 10878 }, { "epoch": 1.1060390402602684, "grad_norm": 0.2691120207309723, "learning_rate": 9.559112617045048e-06, "loss": 0.3675, "step": 10879 }, { "epoch": 1.1061407076047174, "grad_norm": 0.2786441445350647, "learning_rate": 9.558966894855047e-06, "loss": 0.3314, "step": 10880 }, { "epoch": 1.1062423749491663, "grad_norm": 0.2940187454223633, "learning_rate": 9.558821149698054e-06, "loss": 0.379, "step": 10881 }, { "epoch": 1.1063440422936153, "grad_norm": 0.2936048209667206, "learning_rate": 9.558675381574804e-06, "loss": 0.3821, "step": 10882 }, { "epoch": 1.1064457096380642, "grad_norm": 0.280904620885849, "learning_rate": 9.55852959048603e-06, "loss": 0.3681, "step": 10883 }, { "epoch": 1.1065473769825132, "grad_norm": 0.3258283734321594, "learning_rate": 9.55838377643247e-06, "loss": 0.3617, "step": 10884 }, { "epoch": 1.106649044326962, "grad_norm": 0.303507536649704, "learning_rate": 9.558237939414852e-06, "loss": 0.3779, "step": 10885 }, { "epoch": 1.106750711671411, "grad_norm": 0.315277099609375, "learning_rate": 9.558092079433917e-06, "loss": 0.3553, "step": 10886 }, { "epoch": 1.1068523790158602, "grad_norm": 0.3350936770439148, "learning_rate": 9.557946196490396e-06, "loss": 0.4037, "step": 10887 }, { "epoch": 1.1069540463603091, "grad_norm": 0.31083908677101135, "learning_rate": 9.557800290585027e-06, "loss": 0.3846, "step": 10888 }, { "epoch": 1.107055713704758, "grad_norm": 0.2850189208984375, "learning_rate": 9.55765436171854e-06, "loss": 0.3899, "step": 10889 }, { "epoch": 1.107157381049207, "grad_norm": 0.31088465452194214, "learning_rate": 9.557508409891676e-06, "loss": 0.3612, "step": 10890 }, { "epoch": 1.107259048393656, "grad_norm": 0.3135281205177307, "learning_rate": 9.557362435105167e-06, "loss": 0.3493, "step": 10891 }, { "epoch": 1.107360715738105, "grad_norm": 0.3115536868572235, "learning_rate": 9.55721643735975e-06, "loss": 0.3641, "step": 10892 }, { "epoch": 1.1074623830825538, "grad_norm": 0.3053872585296631, "learning_rate": 9.557070416656157e-06, "loss": 0.3976, "step": 10893 }, { "epoch": 1.1075640504270028, "grad_norm": 0.28481608629226685, "learning_rate": 9.55692437299513e-06, "loss": 0.3644, "step": 10894 }, { "epoch": 1.1076657177714517, "grad_norm": 0.2908771336078644, "learning_rate": 9.556778306377397e-06, "loss": 0.3558, "step": 10895 }, { "epoch": 1.1077673851159007, "grad_norm": 0.32065001130104065, "learning_rate": 9.556632216803699e-06, "loss": 0.3744, "step": 10896 }, { "epoch": 1.1078690524603498, "grad_norm": 0.2866840064525604, "learning_rate": 9.556486104274771e-06, "loss": 0.3788, "step": 10897 }, { "epoch": 1.1079707198047988, "grad_norm": 0.3354032039642334, "learning_rate": 9.556339968791349e-06, "loss": 0.3869, "step": 10898 }, { "epoch": 1.1080723871492477, "grad_norm": 0.3011781871318817, "learning_rate": 9.556193810354168e-06, "loss": 0.3485, "step": 10899 }, { "epoch": 1.1081740544936967, "grad_norm": 0.32230204343795776, "learning_rate": 9.556047628963968e-06, "loss": 0.3768, "step": 10900 }, { "epoch": 1.1082757218381456, "grad_norm": 0.3028518855571747, "learning_rate": 9.55590142462148e-06, "loss": 0.3559, "step": 10901 }, { "epoch": 1.1083773891825945, "grad_norm": 0.334870845079422, "learning_rate": 9.555755197327444e-06, "loss": 0.3706, "step": 10902 }, { "epoch": 1.1084790565270435, "grad_norm": 0.33306995034217834, "learning_rate": 9.555608947082596e-06, "loss": 0.3762, "step": 10903 }, { "epoch": 1.1085807238714924, "grad_norm": 0.3034571707248688, "learning_rate": 9.555462673887673e-06, "loss": 0.4056, "step": 10904 }, { "epoch": 1.1086823912159414, "grad_norm": 0.3207843005657196, "learning_rate": 9.555316377743411e-06, "loss": 0.3605, "step": 10905 }, { "epoch": 1.1087840585603903, "grad_norm": 0.3245638310909271, "learning_rate": 9.55517005865055e-06, "loss": 0.3752, "step": 10906 }, { "epoch": 1.1088857259048392, "grad_norm": 0.2954135835170746, "learning_rate": 9.555023716609821e-06, "loss": 0.3574, "step": 10907 }, { "epoch": 1.1089873932492884, "grad_norm": 0.2995108366012573, "learning_rate": 9.554877351621966e-06, "loss": 0.3274, "step": 10908 }, { "epoch": 1.1090890605937374, "grad_norm": 0.3523721396923065, "learning_rate": 9.554730963687723e-06, "loss": 0.3709, "step": 10909 }, { "epoch": 1.1091907279381863, "grad_norm": 0.30385762453079224, "learning_rate": 9.554584552807826e-06, "loss": 0.3619, "step": 10910 }, { "epoch": 1.1092923952826352, "grad_norm": 0.309607595205307, "learning_rate": 9.554438118983016e-06, "loss": 0.3737, "step": 10911 }, { "epoch": 1.1093940626270842, "grad_norm": 0.3105561137199402, "learning_rate": 9.554291662214027e-06, "loss": 0.38, "step": 10912 }, { "epoch": 1.1094957299715331, "grad_norm": 0.3148573637008667, "learning_rate": 9.5541451825016e-06, "loss": 0.3812, "step": 10913 }, { "epoch": 1.109597397315982, "grad_norm": 0.3242894411087036, "learning_rate": 9.553998679846472e-06, "loss": 0.3678, "step": 10914 }, { "epoch": 1.109699064660431, "grad_norm": 0.33374419808387756, "learning_rate": 9.553852154249381e-06, "loss": 0.3691, "step": 10915 }, { "epoch": 1.10980073200488, "grad_norm": 0.3115719258785248, "learning_rate": 9.553705605711065e-06, "loss": 0.3848, "step": 10916 }, { "epoch": 1.109902399349329, "grad_norm": 0.3282032012939453, "learning_rate": 9.553559034232263e-06, "loss": 0.3693, "step": 10917 }, { "epoch": 1.110004066693778, "grad_norm": 0.30599430203437805, "learning_rate": 9.553412439813711e-06, "loss": 0.3477, "step": 10918 }, { "epoch": 1.110105734038227, "grad_norm": 0.3188909888267517, "learning_rate": 9.553265822456149e-06, "loss": 0.3315, "step": 10919 }, { "epoch": 1.110207401382676, "grad_norm": 0.2960016429424286, "learning_rate": 9.553119182160317e-06, "loss": 0.3663, "step": 10920 }, { "epoch": 1.1103090687271249, "grad_norm": 0.36242374777793884, "learning_rate": 9.552972518926953e-06, "loss": 0.3629, "step": 10921 }, { "epoch": 1.1104107360715738, "grad_norm": 0.30928313732147217, "learning_rate": 9.552825832756794e-06, "loss": 0.3781, "step": 10922 }, { "epoch": 1.1105124034160228, "grad_norm": 0.3472477197647095, "learning_rate": 9.552679123650581e-06, "loss": 0.3667, "step": 10923 }, { "epoch": 1.1106140707604717, "grad_norm": 0.3452147841453552, "learning_rate": 9.552532391609052e-06, "loss": 0.3944, "step": 10924 }, { "epoch": 1.1107157381049206, "grad_norm": 0.3300861418247223, "learning_rate": 9.552385636632947e-06, "loss": 0.3452, "step": 10925 }, { "epoch": 1.1108174054493696, "grad_norm": 0.32823824882507324, "learning_rate": 9.552238858723005e-06, "loss": 0.3876, "step": 10926 }, { "epoch": 1.1109190727938185, "grad_norm": 0.32458430528640747, "learning_rate": 9.552092057879965e-06, "loss": 0.3764, "step": 10927 }, { "epoch": 1.1110207401382677, "grad_norm": 0.31190499663352966, "learning_rate": 9.551945234104567e-06, "loss": 0.3559, "step": 10928 }, { "epoch": 1.1111224074827166, "grad_norm": 0.329497754573822, "learning_rate": 9.551798387397552e-06, "loss": 0.3543, "step": 10929 }, { "epoch": 1.1112240748271656, "grad_norm": 0.35977962613105774, "learning_rate": 9.551651517759656e-06, "loss": 0.3733, "step": 10930 }, { "epoch": 1.1113257421716145, "grad_norm": 0.3238544464111328, "learning_rate": 9.551504625191623e-06, "loss": 0.3786, "step": 10931 }, { "epoch": 1.1114274095160634, "grad_norm": 0.31166133284568787, "learning_rate": 9.551357709694192e-06, "loss": 0.382, "step": 10932 }, { "epoch": 1.1115290768605124, "grad_norm": 0.291522741317749, "learning_rate": 9.551210771268102e-06, "loss": 0.3766, "step": 10933 }, { "epoch": 1.1116307442049613, "grad_norm": 0.303755521774292, "learning_rate": 9.551063809914091e-06, "loss": 0.3559, "step": 10934 }, { "epoch": 1.1117324115494103, "grad_norm": 0.2984112799167633, "learning_rate": 9.550916825632906e-06, "loss": 0.3368, "step": 10935 }, { "epoch": 1.1118340788938592, "grad_norm": 0.2855149209499359, "learning_rate": 9.55076981842528e-06, "loss": 0.377, "step": 10936 }, { "epoch": 1.1119357462383082, "grad_norm": 0.3208326995372772, "learning_rate": 9.55062278829196e-06, "loss": 0.3312, "step": 10937 }, { "epoch": 1.1120374135827573, "grad_norm": 0.3085063397884369, "learning_rate": 9.550475735233683e-06, "loss": 0.3631, "step": 10938 }, { "epoch": 1.1121390809272063, "grad_norm": 0.33776116371154785, "learning_rate": 9.550328659251188e-06, "loss": 0.3683, "step": 10939 }, { "epoch": 1.1122407482716552, "grad_norm": 0.35698312520980835, "learning_rate": 9.550181560345222e-06, "loss": 0.3983, "step": 10940 }, { "epoch": 1.1123424156161041, "grad_norm": 0.3750365972518921, "learning_rate": 9.550034438516522e-06, "loss": 0.373, "step": 10941 }, { "epoch": 1.112444082960553, "grad_norm": 0.30570054054260254, "learning_rate": 9.54988729376583e-06, "loss": 0.3526, "step": 10942 }, { "epoch": 1.112545750305002, "grad_norm": 0.29354748129844666, "learning_rate": 9.549740126093885e-06, "loss": 0.3531, "step": 10943 }, { "epoch": 1.112647417649451, "grad_norm": 0.37415510416030884, "learning_rate": 9.549592935501431e-06, "loss": 0.3541, "step": 10944 }, { "epoch": 1.1127490849939, "grad_norm": 0.3533170819282532, "learning_rate": 9.54944572198921e-06, "loss": 0.3804, "step": 10945 }, { "epoch": 1.1128507523383488, "grad_norm": 0.3050874173641205, "learning_rate": 9.549298485557962e-06, "loss": 0.3646, "step": 10946 }, { "epoch": 1.1129524196827978, "grad_norm": 0.301760196685791, "learning_rate": 9.549151226208428e-06, "loss": 0.3865, "step": 10947 }, { "epoch": 1.113054087027247, "grad_norm": 0.2898676097393036, "learning_rate": 9.549003943941353e-06, "loss": 0.3283, "step": 10948 }, { "epoch": 1.113155754371696, "grad_norm": 0.34928199648857117, "learning_rate": 9.548856638757477e-06, "loss": 0.3791, "step": 10949 }, { "epoch": 1.1132574217161448, "grad_norm": 0.3125622868537903, "learning_rate": 9.548709310657543e-06, "loss": 0.3512, "step": 10950 }, { "epoch": 1.1133590890605938, "grad_norm": 0.28925061225891113, "learning_rate": 9.54856195964229e-06, "loss": 0.3699, "step": 10951 }, { "epoch": 1.1134607564050427, "grad_norm": 0.37440094351768494, "learning_rate": 9.548414585712465e-06, "loss": 0.3867, "step": 10952 }, { "epoch": 1.1135624237494917, "grad_norm": 0.329847514629364, "learning_rate": 9.548267188868806e-06, "loss": 0.3439, "step": 10953 }, { "epoch": 1.1136640910939406, "grad_norm": 0.3166523873806, "learning_rate": 9.548119769112058e-06, "loss": 0.3875, "step": 10954 }, { "epoch": 1.1137657584383895, "grad_norm": 0.3146543502807617, "learning_rate": 9.547972326442965e-06, "loss": 0.3336, "step": 10955 }, { "epoch": 1.1138674257828385, "grad_norm": 0.2972991466522217, "learning_rate": 9.547824860862267e-06, "loss": 0.3596, "step": 10956 }, { "epoch": 1.1139690931272874, "grad_norm": 0.3146865963935852, "learning_rate": 9.547677372370709e-06, "loss": 0.3956, "step": 10957 }, { "epoch": 1.1140707604717366, "grad_norm": 0.3172794282436371, "learning_rate": 9.547529860969034e-06, "loss": 0.3839, "step": 10958 }, { "epoch": 1.1141724278161855, "grad_norm": 0.3088330030441284, "learning_rate": 9.547382326657982e-06, "loss": 0.3773, "step": 10959 }, { "epoch": 1.1142740951606345, "grad_norm": 0.35249394178390503, "learning_rate": 9.547234769438299e-06, "loss": 0.3533, "step": 10960 }, { "epoch": 1.1143757625050834, "grad_norm": 0.3290766775608063, "learning_rate": 9.547087189310729e-06, "loss": 0.3692, "step": 10961 }, { "epoch": 1.1144774298495324, "grad_norm": 0.29710498452186584, "learning_rate": 9.546939586276013e-06, "loss": 0.3927, "step": 10962 }, { "epoch": 1.1145790971939813, "grad_norm": 0.36511969566345215, "learning_rate": 9.546791960334895e-06, "loss": 0.37, "step": 10963 }, { "epoch": 1.1146807645384302, "grad_norm": 0.3848116993904114, "learning_rate": 9.546644311488123e-06, "loss": 0.3928, "step": 10964 }, { "epoch": 1.1147824318828792, "grad_norm": 0.3180059790611267, "learning_rate": 9.546496639736434e-06, "loss": 0.3588, "step": 10965 }, { "epoch": 1.1148840992273281, "grad_norm": 0.33623042702674866, "learning_rate": 9.546348945080577e-06, "loss": 0.403, "step": 10966 }, { "epoch": 1.114985766571777, "grad_norm": 0.41286173462867737, "learning_rate": 9.546201227521294e-06, "loss": 0.4288, "step": 10967 }, { "epoch": 1.115087433916226, "grad_norm": 0.33388662338256836, "learning_rate": 9.54605348705933e-06, "loss": 0.3749, "step": 10968 }, { "epoch": 1.1151891012606752, "grad_norm": 0.32296496629714966, "learning_rate": 9.545905723695428e-06, "loss": 0.3284, "step": 10969 }, { "epoch": 1.115290768605124, "grad_norm": 0.38237661123275757, "learning_rate": 9.545757937430333e-06, "loss": 0.3564, "step": 10970 }, { "epoch": 1.115392435949573, "grad_norm": 0.333941787481308, "learning_rate": 9.545610128264791e-06, "loss": 0.3872, "step": 10971 }, { "epoch": 1.115494103294022, "grad_norm": 0.35726505517959595, "learning_rate": 9.545462296199546e-06, "loss": 0.3733, "step": 10972 }, { "epoch": 1.115595770638471, "grad_norm": 0.3330729603767395, "learning_rate": 9.54531444123534e-06, "loss": 0.3817, "step": 10973 }, { "epoch": 1.1156974379829199, "grad_norm": 0.3371885418891907, "learning_rate": 9.545166563372921e-06, "loss": 0.3836, "step": 10974 }, { "epoch": 1.1157991053273688, "grad_norm": 0.30299949645996094, "learning_rate": 9.545018662613033e-06, "loss": 0.3602, "step": 10975 }, { "epoch": 1.1159007726718178, "grad_norm": 0.32749661803245544, "learning_rate": 9.54487073895642e-06, "loss": 0.3803, "step": 10976 }, { "epoch": 1.1160024400162667, "grad_norm": 0.3012917637825012, "learning_rate": 9.54472279240383e-06, "loss": 0.361, "step": 10977 }, { "epoch": 1.1161041073607156, "grad_norm": 0.296175479888916, "learning_rate": 9.544574822956006e-06, "loss": 0.3504, "step": 10978 }, { "epoch": 1.1162057747051648, "grad_norm": 0.3052285611629486, "learning_rate": 9.544426830613694e-06, "loss": 0.38, "step": 10979 }, { "epoch": 1.1163074420496137, "grad_norm": 0.3007153272628784, "learning_rate": 9.54427881537764e-06, "loss": 0.3571, "step": 10980 }, { "epoch": 1.1164091093940627, "grad_norm": 0.31178855895996094, "learning_rate": 9.544130777248589e-06, "loss": 0.3645, "step": 10981 }, { "epoch": 1.1165107767385116, "grad_norm": 0.32187744975090027, "learning_rate": 9.543982716227285e-06, "loss": 0.3457, "step": 10982 }, { "epoch": 1.1166124440829606, "grad_norm": 0.3257071375846863, "learning_rate": 9.543834632314477e-06, "loss": 0.3186, "step": 10983 }, { "epoch": 1.1167141114274095, "grad_norm": 0.3020622730255127, "learning_rate": 9.54368652551091e-06, "loss": 0.3798, "step": 10984 }, { "epoch": 1.1168157787718584, "grad_norm": 0.292483389377594, "learning_rate": 9.54353839581733e-06, "loss": 0.3501, "step": 10985 }, { "epoch": 1.1169174461163074, "grad_norm": 0.29874131083488464, "learning_rate": 9.543390243234484e-06, "loss": 0.3398, "step": 10986 }, { "epoch": 1.1170191134607563, "grad_norm": 0.3509334921836853, "learning_rate": 9.543242067763117e-06, "loss": 0.4222, "step": 10987 }, { "epoch": 1.1171207808052053, "grad_norm": 0.2828713655471802, "learning_rate": 9.543093869403976e-06, "loss": 0.4127, "step": 10988 }, { "epoch": 1.1172224481496544, "grad_norm": 0.31136849522590637, "learning_rate": 9.542945648157808e-06, "loss": 0.3543, "step": 10989 }, { "epoch": 1.1173241154941034, "grad_norm": 0.31650424003601074, "learning_rate": 9.54279740402536e-06, "loss": 0.3398, "step": 10990 }, { "epoch": 1.1174257828385523, "grad_norm": 0.3065088391304016, "learning_rate": 9.542649137007377e-06, "loss": 0.3685, "step": 10991 }, { "epoch": 1.1175274501830013, "grad_norm": 0.2884950339794159, "learning_rate": 9.542500847104608e-06, "loss": 0.3721, "step": 10992 }, { "epoch": 1.1176291175274502, "grad_norm": 0.30010607838630676, "learning_rate": 9.542352534317801e-06, "loss": 0.3791, "step": 10993 }, { "epoch": 1.1177307848718991, "grad_norm": 0.3209371268749237, "learning_rate": 9.542204198647697e-06, "loss": 0.3743, "step": 10994 }, { "epoch": 1.117832452216348, "grad_norm": 0.28509077429771423, "learning_rate": 9.542055840095051e-06, "loss": 0.3782, "step": 10995 }, { "epoch": 1.117934119560797, "grad_norm": 0.2777270972728729, "learning_rate": 9.541907458660606e-06, "loss": 0.3598, "step": 10996 }, { "epoch": 1.118035786905246, "grad_norm": 0.317672461271286, "learning_rate": 9.54175905434511e-06, "loss": 0.3586, "step": 10997 }, { "epoch": 1.118137454249695, "grad_norm": 0.3261633515357971, "learning_rate": 9.541610627149312e-06, "loss": 0.3475, "step": 10998 }, { "epoch": 1.118239121594144, "grad_norm": 0.3117451071739197, "learning_rate": 9.541462177073958e-06, "loss": 0.3594, "step": 10999 }, { "epoch": 1.118340788938593, "grad_norm": 0.2984040677547455, "learning_rate": 9.541313704119799e-06, "loss": 0.385, "step": 11000 }, { "epoch": 1.118442456283042, "grad_norm": 0.3274063467979431, "learning_rate": 9.541165208287579e-06, "loss": 0.3833, "step": 11001 }, { "epoch": 1.118544123627491, "grad_norm": 0.338408499956131, "learning_rate": 9.541016689578048e-06, "loss": 0.3732, "step": 11002 }, { "epoch": 1.1186457909719398, "grad_norm": 0.2960709035396576, "learning_rate": 9.540868147991953e-06, "loss": 0.3342, "step": 11003 }, { "epoch": 1.1187474583163888, "grad_norm": 0.3078896999359131, "learning_rate": 9.540719583530044e-06, "loss": 0.353, "step": 11004 }, { "epoch": 1.1188491256608377, "grad_norm": 0.3347996175289154, "learning_rate": 9.54057099619307e-06, "loss": 0.3573, "step": 11005 }, { "epoch": 1.1189507930052867, "grad_norm": 0.31165897846221924, "learning_rate": 9.540422385981778e-06, "loss": 0.4158, "step": 11006 }, { "epoch": 1.1190524603497356, "grad_norm": 0.32630664110183716, "learning_rate": 9.540273752896916e-06, "loss": 0.3788, "step": 11007 }, { "epoch": 1.1191541276941845, "grad_norm": 0.33043551445007324, "learning_rate": 9.540125096939235e-06, "loss": 0.3796, "step": 11008 }, { "epoch": 1.1192557950386335, "grad_norm": 0.2873748540878296, "learning_rate": 9.53997641810948e-06, "loss": 0.3924, "step": 11009 }, { "epoch": 1.1193574623830826, "grad_norm": 0.2948121428489685, "learning_rate": 9.539827716408405e-06, "loss": 0.3689, "step": 11010 }, { "epoch": 1.1194591297275316, "grad_norm": 0.33033284544944763, "learning_rate": 9.539678991836758e-06, "loss": 0.3576, "step": 11011 }, { "epoch": 1.1195607970719805, "grad_norm": 0.31395500898361206, "learning_rate": 9.539530244395285e-06, "loss": 0.3731, "step": 11012 }, { "epoch": 1.1196624644164295, "grad_norm": 0.31578683853149414, "learning_rate": 9.539381474084737e-06, "loss": 0.4126, "step": 11013 }, { "epoch": 1.1197641317608784, "grad_norm": 0.34920474886894226, "learning_rate": 9.539232680905865e-06, "loss": 0.403, "step": 11014 }, { "epoch": 1.1198657991053274, "grad_norm": 0.3280321955680847, "learning_rate": 9.539083864859416e-06, "loss": 0.3771, "step": 11015 }, { "epoch": 1.1199674664497763, "grad_norm": 0.34245193004608154, "learning_rate": 9.538935025946144e-06, "loss": 0.3781, "step": 11016 }, { "epoch": 1.1200691337942252, "grad_norm": 0.32954487204551697, "learning_rate": 9.538786164166794e-06, "loss": 0.3496, "step": 11017 }, { "epoch": 1.1201708011386742, "grad_norm": 0.34610381722450256, "learning_rate": 9.538637279522118e-06, "loss": 0.3752, "step": 11018 }, { "epoch": 1.1202724684831231, "grad_norm": 0.3002362847328186, "learning_rate": 9.538488372012865e-06, "loss": 0.3678, "step": 11019 }, { "epoch": 1.1203741358275723, "grad_norm": 0.3697541058063507, "learning_rate": 9.538339441639789e-06, "loss": 0.3647, "step": 11020 }, { "epoch": 1.1204758031720212, "grad_norm": 0.3263556659221649, "learning_rate": 9.538190488403633e-06, "loss": 0.379, "step": 11021 }, { "epoch": 1.1205774705164702, "grad_norm": 0.3112527132034302, "learning_rate": 9.538041512305154e-06, "loss": 0.379, "step": 11022 }, { "epoch": 1.120679137860919, "grad_norm": 0.31032055616378784, "learning_rate": 9.537892513345102e-06, "loss": 0.3839, "step": 11023 }, { "epoch": 1.120780805205368, "grad_norm": 0.31907686591148376, "learning_rate": 9.537743491524223e-06, "loss": 0.3751, "step": 11024 }, { "epoch": 1.120882472549817, "grad_norm": 0.3369174599647522, "learning_rate": 9.537594446843272e-06, "loss": 0.3797, "step": 11025 }, { "epoch": 1.120984139894266, "grad_norm": 0.29650717973709106, "learning_rate": 9.537445379302998e-06, "loss": 0.3404, "step": 11026 }, { "epoch": 1.1210858072387149, "grad_norm": 0.31284114718437195, "learning_rate": 9.537296288904155e-06, "loss": 0.3611, "step": 11027 }, { "epoch": 1.1211874745831638, "grad_norm": 0.31207573413848877, "learning_rate": 9.537147175647488e-06, "loss": 0.3583, "step": 11028 }, { "epoch": 1.1212891419276128, "grad_norm": 0.34274789690971375, "learning_rate": 9.536998039533753e-06, "loss": 0.3783, "step": 11029 }, { "epoch": 1.121390809272062, "grad_norm": 0.3016389310359955, "learning_rate": 9.5368488805637e-06, "loss": 0.3412, "step": 11030 }, { "epoch": 1.1214924766165109, "grad_norm": 0.315219908952713, "learning_rate": 9.536699698738082e-06, "loss": 0.3623, "step": 11031 }, { "epoch": 1.1215941439609598, "grad_norm": 0.3407328128814697, "learning_rate": 9.536550494057646e-06, "loss": 0.3523, "step": 11032 }, { "epoch": 1.1216958113054087, "grad_norm": 0.3219884932041168, "learning_rate": 9.536401266523149e-06, "loss": 0.3583, "step": 11033 }, { "epoch": 1.1217974786498577, "grad_norm": 0.3177954852581024, "learning_rate": 9.536252016135338e-06, "loss": 0.37, "step": 11034 }, { "epoch": 1.1218991459943066, "grad_norm": 0.31325000524520874, "learning_rate": 9.53610274289497e-06, "loss": 0.3909, "step": 11035 }, { "epoch": 1.1220008133387556, "grad_norm": 0.3160380423069, "learning_rate": 9.535953446802795e-06, "loss": 0.3501, "step": 11036 }, { "epoch": 1.1221024806832045, "grad_norm": 0.3093348443508148, "learning_rate": 9.535804127859563e-06, "loss": 0.3393, "step": 11037 }, { "epoch": 1.1222041480276534, "grad_norm": 0.32531821727752686, "learning_rate": 9.535654786066026e-06, "loss": 0.3372, "step": 11038 }, { "epoch": 1.1223058153721024, "grad_norm": 0.2800118923187256, "learning_rate": 9.53550542142294e-06, "loss": 0.3578, "step": 11039 }, { "epoch": 1.1224074827165516, "grad_norm": 0.3120347857475281, "learning_rate": 9.535356033931056e-06, "loss": 0.3811, "step": 11040 }, { "epoch": 1.1225091500610005, "grad_norm": 0.3623972237110138, "learning_rate": 9.535206623591124e-06, "loss": 0.3783, "step": 11041 }, { "epoch": 1.1226108174054494, "grad_norm": 0.3104676902294159, "learning_rate": 9.5350571904039e-06, "loss": 0.3962, "step": 11042 }, { "epoch": 1.1227124847498984, "grad_norm": 0.2852402329444885, "learning_rate": 9.534907734370135e-06, "loss": 0.3515, "step": 11043 }, { "epoch": 1.1228141520943473, "grad_norm": 0.33702364563941956, "learning_rate": 9.534758255490584e-06, "loss": 0.3903, "step": 11044 }, { "epoch": 1.1229158194387963, "grad_norm": 0.3233661353588104, "learning_rate": 9.534608753765997e-06, "loss": 0.3551, "step": 11045 }, { "epoch": 1.1230174867832452, "grad_norm": 0.3394690454006195, "learning_rate": 9.534459229197128e-06, "loss": 0.3387, "step": 11046 }, { "epoch": 1.1231191541276941, "grad_norm": 0.311160683631897, "learning_rate": 9.534309681784733e-06, "loss": 0.3721, "step": 11047 }, { "epoch": 1.123220821472143, "grad_norm": 0.27516457438468933, "learning_rate": 9.534160111529562e-06, "loss": 0.3801, "step": 11048 }, { "epoch": 1.123322488816592, "grad_norm": 0.30160757899284363, "learning_rate": 9.534010518432371e-06, "loss": 0.3571, "step": 11049 }, { "epoch": 1.123424156161041, "grad_norm": 0.2994702160358429, "learning_rate": 9.533860902493911e-06, "loss": 0.3503, "step": 11050 }, { "epoch": 1.1235258235054901, "grad_norm": 0.2844141125679016, "learning_rate": 9.533711263714938e-06, "loss": 0.3533, "step": 11051 }, { "epoch": 1.123627490849939, "grad_norm": 0.3175264894962311, "learning_rate": 9.533561602096204e-06, "loss": 0.3598, "step": 11052 }, { "epoch": 1.123729158194388, "grad_norm": 0.29432693123817444, "learning_rate": 9.533411917638465e-06, "loss": 0.3392, "step": 11053 }, { "epoch": 1.123830825538837, "grad_norm": 0.29464849829673767, "learning_rate": 9.533262210342476e-06, "loss": 0.3531, "step": 11054 }, { "epoch": 1.123932492883286, "grad_norm": 0.33486849069595337, "learning_rate": 9.533112480208987e-06, "loss": 0.3374, "step": 11055 }, { "epoch": 1.1240341602277348, "grad_norm": 0.3053351938724518, "learning_rate": 9.532962727238754e-06, "loss": 0.3396, "step": 11056 }, { "epoch": 1.1241358275721838, "grad_norm": 0.3094004988670349, "learning_rate": 9.532812951432534e-06, "loss": 0.3562, "step": 11057 }, { "epoch": 1.1242374949166327, "grad_norm": 0.2787741422653198, "learning_rate": 9.532663152791077e-06, "loss": 0.3804, "step": 11058 }, { "epoch": 1.1243391622610817, "grad_norm": 0.30962637066841125, "learning_rate": 9.532513331315141e-06, "loss": 0.363, "step": 11059 }, { "epoch": 1.1244408296055306, "grad_norm": 0.3393332064151764, "learning_rate": 9.53236348700548e-06, "loss": 0.3798, "step": 11060 }, { "epoch": 1.1245424969499798, "grad_norm": 0.29956746101379395, "learning_rate": 9.532213619862848e-06, "loss": 0.3835, "step": 11061 }, { "epoch": 1.1246441642944287, "grad_norm": 0.2877293527126312, "learning_rate": 9.532063729888003e-06, "loss": 0.365, "step": 11062 }, { "epoch": 1.1247458316388776, "grad_norm": 0.29070112109184265, "learning_rate": 9.531913817081698e-06, "loss": 0.3623, "step": 11063 }, { "epoch": 1.1248474989833266, "grad_norm": 0.30370405316352844, "learning_rate": 9.531763881444687e-06, "loss": 0.4058, "step": 11064 }, { "epoch": 1.1249491663277755, "grad_norm": 0.3126365840435028, "learning_rate": 9.531613922977726e-06, "loss": 0.3583, "step": 11065 }, { "epoch": 1.1250508336722245, "grad_norm": 0.29842454195022583, "learning_rate": 9.531463941681572e-06, "loss": 0.3602, "step": 11066 }, { "epoch": 1.1251525010166734, "grad_norm": 0.2883148491382599, "learning_rate": 9.531313937556978e-06, "loss": 0.3891, "step": 11067 }, { "epoch": 1.1252541683611224, "grad_norm": 0.2665531635284424, "learning_rate": 9.531163910604701e-06, "loss": 0.3515, "step": 11068 }, { "epoch": 1.1253558357055713, "grad_norm": 0.314393013715744, "learning_rate": 9.5310138608255e-06, "loss": 0.3499, "step": 11069 }, { "epoch": 1.1254575030500202, "grad_norm": 0.3023664057254791, "learning_rate": 9.530863788220125e-06, "loss": 0.3504, "step": 11070 }, { "epoch": 1.1255591703944692, "grad_norm": 0.2799139618873596, "learning_rate": 9.530713692789336e-06, "loss": 0.3573, "step": 11071 }, { "epoch": 1.1256608377389183, "grad_norm": 0.2647399306297302, "learning_rate": 9.530563574533888e-06, "loss": 0.3796, "step": 11072 }, { "epoch": 1.1257625050833673, "grad_norm": 0.3179563283920288, "learning_rate": 9.530413433454537e-06, "loss": 0.3672, "step": 11073 }, { "epoch": 1.1258641724278162, "grad_norm": 0.3124328553676605, "learning_rate": 9.53026326955204e-06, "loss": 0.3626, "step": 11074 }, { "epoch": 1.1259658397722652, "grad_norm": 0.28299468755722046, "learning_rate": 9.530113082827155e-06, "loss": 0.3701, "step": 11075 }, { "epoch": 1.126067507116714, "grad_norm": 0.3234691917896271, "learning_rate": 9.529962873280634e-06, "loss": 0.3902, "step": 11076 }, { "epoch": 1.126169174461163, "grad_norm": 0.32975655794143677, "learning_rate": 9.529812640913239e-06, "loss": 0.3827, "step": 11077 }, { "epoch": 1.126270841805612, "grad_norm": 0.31295499205589294, "learning_rate": 9.529662385725722e-06, "loss": 0.3703, "step": 11078 }, { "epoch": 1.126372509150061, "grad_norm": 0.33639344573020935, "learning_rate": 9.529512107718844e-06, "loss": 0.3817, "step": 11079 }, { "epoch": 1.1264741764945099, "grad_norm": 0.33122825622558594, "learning_rate": 9.529361806893358e-06, "loss": 0.3923, "step": 11080 }, { "epoch": 1.126575843838959, "grad_norm": 0.31904131174087524, "learning_rate": 9.529211483250027e-06, "loss": 0.3764, "step": 11081 }, { "epoch": 1.126677511183408, "grad_norm": 0.31939664483070374, "learning_rate": 9.529061136789602e-06, "loss": 0.3617, "step": 11082 }, { "epoch": 1.126779178527857, "grad_norm": 0.3513946235179901, "learning_rate": 9.528910767512844e-06, "loss": 0.3905, "step": 11083 }, { "epoch": 1.1268808458723059, "grad_norm": 0.3191199004650116, "learning_rate": 9.528760375420511e-06, "loss": 0.3633, "step": 11084 }, { "epoch": 1.1269825132167548, "grad_norm": 0.2892557978630066, "learning_rate": 9.528609960513358e-06, "loss": 0.3845, "step": 11085 }, { "epoch": 1.1270841805612037, "grad_norm": 0.3794821500778198, "learning_rate": 9.528459522792145e-06, "loss": 0.376, "step": 11086 }, { "epoch": 1.1271858479056527, "grad_norm": 0.32961249351501465, "learning_rate": 9.528309062257628e-06, "loss": 0.3994, "step": 11087 }, { "epoch": 1.1272875152501016, "grad_norm": 0.31191709637641907, "learning_rate": 9.528158578910566e-06, "loss": 0.35, "step": 11088 }, { "epoch": 1.1273891825945506, "grad_norm": 0.29780539870262146, "learning_rate": 9.52800807275172e-06, "loss": 0.3411, "step": 11089 }, { "epoch": 1.1274908499389995, "grad_norm": 0.31441450119018555, "learning_rate": 9.527857543781842e-06, "loss": 0.3401, "step": 11090 }, { "epoch": 1.1275925172834484, "grad_norm": 0.31110233068466187, "learning_rate": 9.527706992001694e-06, "loss": 0.367, "step": 11091 }, { "epoch": 1.1276941846278976, "grad_norm": 0.32770347595214844, "learning_rate": 9.527556417412037e-06, "loss": 0.3579, "step": 11092 }, { "epoch": 1.1277958519723466, "grad_norm": 0.2835678458213806, "learning_rate": 9.527405820013625e-06, "loss": 0.3735, "step": 11093 }, { "epoch": 1.1278975193167955, "grad_norm": 0.2960623502731323, "learning_rate": 9.527255199807217e-06, "loss": 0.3827, "step": 11094 }, { "epoch": 1.1279991866612444, "grad_norm": 0.31832966208457947, "learning_rate": 9.527104556793576e-06, "loss": 0.3881, "step": 11095 }, { "epoch": 1.1281008540056934, "grad_norm": 0.2911260724067688, "learning_rate": 9.526953890973456e-06, "loss": 0.3763, "step": 11096 }, { "epoch": 1.1282025213501423, "grad_norm": 0.3273514211177826, "learning_rate": 9.52680320234762e-06, "loss": 0.3569, "step": 11097 }, { "epoch": 1.1283041886945913, "grad_norm": 0.3112177550792694, "learning_rate": 9.526652490916822e-06, "loss": 0.3712, "step": 11098 }, { "epoch": 1.1284058560390402, "grad_norm": 0.29852890968322754, "learning_rate": 9.526501756681825e-06, "loss": 0.3545, "step": 11099 }, { "epoch": 1.1285075233834891, "grad_norm": 0.29867711663246155, "learning_rate": 9.52635099964339e-06, "loss": 0.3537, "step": 11100 }, { "epoch": 1.1286091907279383, "grad_norm": 0.3145667612552643, "learning_rate": 9.526200219802274e-06, "loss": 0.3612, "step": 11101 }, { "epoch": 1.1287108580723872, "grad_norm": 0.3311488628387451, "learning_rate": 9.526049417159235e-06, "loss": 0.3806, "step": 11102 }, { "epoch": 1.1288125254168362, "grad_norm": 0.3053450882434845, "learning_rate": 9.525898591715034e-06, "loss": 0.3787, "step": 11103 }, { "epoch": 1.1289141927612851, "grad_norm": 0.28862494230270386, "learning_rate": 9.525747743470435e-06, "loss": 0.3604, "step": 11104 }, { "epoch": 1.129015860105734, "grad_norm": 0.3117036819458008, "learning_rate": 9.525596872426192e-06, "loss": 0.4012, "step": 11105 }, { "epoch": 1.129117527450183, "grad_norm": 0.291634202003479, "learning_rate": 9.525445978583068e-06, "loss": 0.3798, "step": 11106 }, { "epoch": 1.129219194794632, "grad_norm": 0.29770511388778687, "learning_rate": 9.525295061941822e-06, "loss": 0.3725, "step": 11107 }, { "epoch": 1.129320862139081, "grad_norm": 0.34940969944000244, "learning_rate": 9.525144122503214e-06, "loss": 0.3822, "step": 11108 }, { "epoch": 1.1294225294835298, "grad_norm": 0.3091282248497009, "learning_rate": 9.524993160268007e-06, "loss": 0.3473, "step": 11109 }, { "epoch": 1.1295241968279788, "grad_norm": 0.30228886008262634, "learning_rate": 9.524842175236958e-06, "loss": 0.3714, "step": 11110 }, { "epoch": 1.1296258641724277, "grad_norm": 0.3024267554283142, "learning_rate": 9.524691167410832e-06, "loss": 0.3653, "step": 11111 }, { "epoch": 1.1297275315168767, "grad_norm": 0.33094581961631775, "learning_rate": 9.524540136790386e-06, "loss": 0.3683, "step": 11112 }, { "epoch": 1.1298291988613258, "grad_norm": 0.3203200697898865, "learning_rate": 9.524389083376382e-06, "loss": 0.3908, "step": 11113 }, { "epoch": 1.1299308662057748, "grad_norm": 0.2829848527908325, "learning_rate": 9.52423800716958e-06, "loss": 0.3883, "step": 11114 }, { "epoch": 1.1300325335502237, "grad_norm": 0.324879914522171, "learning_rate": 9.524086908170743e-06, "loss": 0.3578, "step": 11115 }, { "epoch": 1.1301342008946726, "grad_norm": 0.3255222737789154, "learning_rate": 9.52393578638063e-06, "loss": 0.3813, "step": 11116 }, { "epoch": 1.1302358682391216, "grad_norm": 0.3402361273765564, "learning_rate": 9.523784641800006e-06, "loss": 0.3881, "step": 11117 }, { "epoch": 1.1303375355835705, "grad_norm": 0.36103904247283936, "learning_rate": 9.523633474429629e-06, "loss": 0.3756, "step": 11118 }, { "epoch": 1.1304392029280195, "grad_norm": 0.26736146211624146, "learning_rate": 9.52348228427026e-06, "loss": 0.3805, "step": 11119 }, { "epoch": 1.1305408702724684, "grad_norm": 0.29452916979789734, "learning_rate": 9.523331071322663e-06, "loss": 0.3731, "step": 11120 }, { "epoch": 1.1306425376169174, "grad_norm": 0.3154144585132599, "learning_rate": 9.523179835587599e-06, "loss": 0.3777, "step": 11121 }, { "epoch": 1.1307442049613665, "grad_norm": 0.33643943071365356, "learning_rate": 9.523028577065832e-06, "loss": 0.343, "step": 11122 }, { "epoch": 1.1308458723058155, "grad_norm": 0.3276948034763336, "learning_rate": 9.52287729575812e-06, "loss": 0.3614, "step": 11123 }, { "epoch": 1.1309475396502644, "grad_norm": 0.2846727967262268, "learning_rate": 9.522725991665225e-06, "loss": 0.3535, "step": 11124 }, { "epoch": 1.1310492069947133, "grad_norm": 0.32851728796958923, "learning_rate": 9.522574664787915e-06, "loss": 0.3449, "step": 11125 }, { "epoch": 1.1311508743391623, "grad_norm": 0.340142160654068, "learning_rate": 9.522423315126944e-06, "loss": 0.3838, "step": 11126 }, { "epoch": 1.1312525416836112, "grad_norm": 0.3313775956630707, "learning_rate": 9.522271942683083e-06, "loss": 0.3988, "step": 11127 }, { "epoch": 1.1313542090280602, "grad_norm": 0.33748316764831543, "learning_rate": 9.52212054745709e-06, "loss": 0.3583, "step": 11128 }, { "epoch": 1.131455876372509, "grad_norm": 0.2883918285369873, "learning_rate": 9.521969129449728e-06, "loss": 0.3435, "step": 11129 }, { "epoch": 1.131557543716958, "grad_norm": 0.3277811110019684, "learning_rate": 9.52181768866176e-06, "loss": 0.3717, "step": 11130 }, { "epoch": 1.131659211061407, "grad_norm": 0.332759827375412, "learning_rate": 9.521666225093948e-06, "loss": 0.353, "step": 11131 }, { "epoch": 1.131760878405856, "grad_norm": 0.3071424663066864, "learning_rate": 9.521514738747058e-06, "loss": 0.355, "step": 11132 }, { "epoch": 1.131862545750305, "grad_norm": 0.28165242075920105, "learning_rate": 9.521363229621849e-06, "loss": 0.3804, "step": 11133 }, { "epoch": 1.131964213094754, "grad_norm": 0.3489331007003784, "learning_rate": 9.521211697719089e-06, "loss": 0.3662, "step": 11134 }, { "epoch": 1.132065880439203, "grad_norm": 0.3224635422229767, "learning_rate": 9.521060143039536e-06, "loss": 0.3755, "step": 11135 }, { "epoch": 1.132167547783652, "grad_norm": 0.303378164768219, "learning_rate": 9.520908565583959e-06, "loss": 0.3684, "step": 11136 }, { "epoch": 1.1322692151281009, "grad_norm": 0.3104605972766876, "learning_rate": 9.520756965353118e-06, "loss": 0.3429, "step": 11137 }, { "epoch": 1.1323708824725498, "grad_norm": 0.3193381428718567, "learning_rate": 9.520605342347777e-06, "loss": 0.3162, "step": 11138 }, { "epoch": 1.1324725498169987, "grad_norm": 0.3275246322154999, "learning_rate": 9.5204536965687e-06, "loss": 0.3764, "step": 11139 }, { "epoch": 1.1325742171614477, "grad_norm": 0.28620705008506775, "learning_rate": 9.520302028016653e-06, "loss": 0.3471, "step": 11140 }, { "epoch": 1.1326758845058966, "grad_norm": 0.29698264598846436, "learning_rate": 9.520150336692399e-06, "loss": 0.3598, "step": 11141 }, { "epoch": 1.1327775518503458, "grad_norm": 0.32648414373397827, "learning_rate": 9.5199986225967e-06, "loss": 0.332, "step": 11142 }, { "epoch": 1.1328792191947947, "grad_norm": 0.3241899013519287, "learning_rate": 9.519846885730324e-06, "loss": 0.3698, "step": 11143 }, { "epoch": 1.1329808865392437, "grad_norm": 0.3175070881843567, "learning_rate": 9.519695126094033e-06, "loss": 0.3862, "step": 11144 }, { "epoch": 1.1330825538836926, "grad_norm": 0.3089113235473633, "learning_rate": 9.51954334368859e-06, "loss": 0.3339, "step": 11145 }, { "epoch": 1.1331842212281416, "grad_norm": 0.2968105971813202, "learning_rate": 9.519391538514763e-06, "loss": 0.3447, "step": 11146 }, { "epoch": 1.1332858885725905, "grad_norm": 0.3279678225517273, "learning_rate": 9.519239710573316e-06, "loss": 0.3848, "step": 11147 }, { "epoch": 1.1333875559170394, "grad_norm": 0.30168616771698, "learning_rate": 9.519087859865013e-06, "loss": 0.3564, "step": 11148 }, { "epoch": 1.1334892232614884, "grad_norm": 0.299856960773468, "learning_rate": 9.518935986390618e-06, "loss": 0.3466, "step": 11149 }, { "epoch": 1.1335908906059373, "grad_norm": 0.2900083065032959, "learning_rate": 9.518784090150898e-06, "loss": 0.3721, "step": 11150 }, { "epoch": 1.1336925579503863, "grad_norm": 0.3134945333003998, "learning_rate": 9.51863217114662e-06, "loss": 0.3755, "step": 11151 }, { "epoch": 1.1337942252948352, "grad_norm": 0.31025630235671997, "learning_rate": 9.518480229378544e-06, "loss": 0.3501, "step": 11152 }, { "epoch": 1.1338958926392841, "grad_norm": 0.3174611032009125, "learning_rate": 9.51832826484744e-06, "loss": 0.3829, "step": 11153 }, { "epoch": 1.1339975599837333, "grad_norm": 0.2991313934326172, "learning_rate": 9.51817627755407e-06, "loss": 0.3471, "step": 11154 }, { "epoch": 1.1340992273281822, "grad_norm": 0.296947181224823, "learning_rate": 9.518024267499203e-06, "loss": 0.3608, "step": 11155 }, { "epoch": 1.1342008946726312, "grad_norm": 0.3026360273361206, "learning_rate": 9.517872234683603e-06, "loss": 0.3647, "step": 11156 }, { "epoch": 1.1343025620170801, "grad_norm": 0.3146514296531677, "learning_rate": 9.517720179108036e-06, "loss": 0.3605, "step": 11157 }, { "epoch": 1.134404229361529, "grad_norm": 0.29310178756713867, "learning_rate": 9.517568100773269e-06, "loss": 0.3676, "step": 11158 }, { "epoch": 1.134505896705978, "grad_norm": 0.3103073835372925, "learning_rate": 9.517415999680069e-06, "loss": 0.3702, "step": 11159 }, { "epoch": 1.134607564050427, "grad_norm": 0.29489538073539734, "learning_rate": 9.517263875829198e-06, "loss": 0.3302, "step": 11160 }, { "epoch": 1.134709231394876, "grad_norm": 0.31477445363998413, "learning_rate": 9.517111729221426e-06, "loss": 0.3772, "step": 11161 }, { "epoch": 1.1348108987393248, "grad_norm": 0.3297240734100342, "learning_rate": 9.516959559857518e-06, "loss": 0.3383, "step": 11162 }, { "epoch": 1.134912566083774, "grad_norm": 0.2910999357700348, "learning_rate": 9.516807367738242e-06, "loss": 0.3699, "step": 11163 }, { "epoch": 1.135014233428223, "grad_norm": 0.315079003572464, "learning_rate": 9.516655152864363e-06, "loss": 0.3712, "step": 11164 }, { "epoch": 1.1351159007726719, "grad_norm": 0.30856603384017944, "learning_rate": 9.51650291523665e-06, "loss": 0.3537, "step": 11165 }, { "epoch": 1.1352175681171208, "grad_norm": 0.29224076867103577, "learning_rate": 9.516350654855867e-06, "loss": 0.3336, "step": 11166 }, { "epoch": 1.1353192354615698, "grad_norm": 0.3081386685371399, "learning_rate": 9.516198371722784e-06, "loss": 0.343, "step": 11167 }, { "epoch": 1.1354209028060187, "grad_norm": 0.3102785050868988, "learning_rate": 9.516046065838166e-06, "loss": 0.3555, "step": 11168 }, { "epoch": 1.1355225701504676, "grad_norm": 0.3429521322250366, "learning_rate": 9.515893737202782e-06, "loss": 0.4175, "step": 11169 }, { "epoch": 1.1356242374949166, "grad_norm": 0.3324129283428192, "learning_rate": 9.515741385817397e-06, "loss": 0.3716, "step": 11170 }, { "epoch": 1.1357259048393655, "grad_norm": 0.3211437165737152, "learning_rate": 9.51558901168278e-06, "loss": 0.3746, "step": 11171 }, { "epoch": 1.1358275721838145, "grad_norm": 0.2874375283718109, "learning_rate": 9.5154366147997e-06, "loss": 0.3469, "step": 11172 }, { "epoch": 1.1359292395282634, "grad_norm": 0.29107043147087097, "learning_rate": 9.515284195168923e-06, "loss": 0.3515, "step": 11173 }, { "epoch": 1.1360309068727126, "grad_norm": 0.3096775412559509, "learning_rate": 9.515131752791217e-06, "loss": 0.3825, "step": 11174 }, { "epoch": 1.1361325742171615, "grad_norm": 0.29739436507225037, "learning_rate": 9.51497928766735e-06, "loss": 0.3515, "step": 11175 }, { "epoch": 1.1362342415616105, "grad_norm": 0.3447307050228119, "learning_rate": 9.51482679979809e-06, "loss": 0.3693, "step": 11176 }, { "epoch": 1.1363359089060594, "grad_norm": 0.30229538679122925, "learning_rate": 9.514674289184207e-06, "loss": 0.3678, "step": 11177 }, { "epoch": 1.1364375762505083, "grad_norm": 0.2988038957118988, "learning_rate": 9.514521755826467e-06, "loss": 0.3704, "step": 11178 }, { "epoch": 1.1365392435949573, "grad_norm": 0.29783016443252563, "learning_rate": 9.514369199725638e-06, "loss": 0.3827, "step": 11179 }, { "epoch": 1.1366409109394062, "grad_norm": 0.32477959990501404, "learning_rate": 9.51421662088249e-06, "loss": 0.344, "step": 11180 }, { "epoch": 1.1367425782838552, "grad_norm": 0.3058205544948578, "learning_rate": 9.514064019297793e-06, "loss": 0.3749, "step": 11181 }, { "epoch": 1.136844245628304, "grad_norm": 0.26472702622413635, "learning_rate": 9.513911394972313e-06, "loss": 0.3403, "step": 11182 }, { "epoch": 1.1369459129727533, "grad_norm": 0.31326836347579956, "learning_rate": 9.51375874790682e-06, "loss": 0.4002, "step": 11183 }, { "epoch": 1.1370475803172022, "grad_norm": 0.3123836815357208, "learning_rate": 9.513606078102084e-06, "loss": 0.3642, "step": 11184 }, { "epoch": 1.1371492476616512, "grad_norm": 0.31066733598709106, "learning_rate": 9.513453385558873e-06, "loss": 0.3912, "step": 11185 }, { "epoch": 1.1372509150061, "grad_norm": 0.32577401399612427, "learning_rate": 9.513300670277957e-06, "loss": 0.3813, "step": 11186 }, { "epoch": 1.137352582350549, "grad_norm": 0.3218252658843994, "learning_rate": 9.513147932260102e-06, "loss": 0.3723, "step": 11187 }, { "epoch": 1.137454249694998, "grad_norm": 0.30005866289138794, "learning_rate": 9.512995171506082e-06, "loss": 0.3978, "step": 11188 }, { "epoch": 1.137555917039447, "grad_norm": 0.32735544443130493, "learning_rate": 9.512842388016665e-06, "loss": 0.3679, "step": 11189 }, { "epoch": 1.1376575843838959, "grad_norm": 0.30395063757896423, "learning_rate": 9.51268958179262e-06, "loss": 0.3551, "step": 11190 }, { "epoch": 1.1377592517283448, "grad_norm": 0.30921971797943115, "learning_rate": 9.512536752834717e-06, "loss": 0.3628, "step": 11191 }, { "epoch": 1.1378609190727937, "grad_norm": 0.3214145004749298, "learning_rate": 9.512383901143727e-06, "loss": 0.4109, "step": 11192 }, { "epoch": 1.1379625864172427, "grad_norm": 0.31141725182533264, "learning_rate": 9.51223102672042e-06, "loss": 0.3745, "step": 11193 }, { "epoch": 1.1380642537616916, "grad_norm": 0.3308009207248688, "learning_rate": 9.512078129565563e-06, "loss": 0.3806, "step": 11194 }, { "epoch": 1.1381659211061408, "grad_norm": 0.30212703347206116, "learning_rate": 9.511925209679931e-06, "loss": 0.3403, "step": 11195 }, { "epoch": 1.1382675884505897, "grad_norm": 0.3000239431858063, "learning_rate": 9.51177226706429e-06, "loss": 0.3438, "step": 11196 }, { "epoch": 1.1383692557950387, "grad_norm": 0.31676381826400757, "learning_rate": 9.511619301719413e-06, "loss": 0.3505, "step": 11197 }, { "epoch": 1.1384709231394876, "grad_norm": 0.32312747836112976, "learning_rate": 9.51146631364607e-06, "loss": 0.3898, "step": 11198 }, { "epoch": 1.1385725904839366, "grad_norm": 0.29705730080604553, "learning_rate": 9.511313302845033e-06, "loss": 0.3576, "step": 11199 }, { "epoch": 1.1386742578283855, "grad_norm": 0.3159266710281372, "learning_rate": 9.51116026931707e-06, "loss": 0.4163, "step": 11200 }, { "epoch": 1.1387759251728344, "grad_norm": 0.3049735426902771, "learning_rate": 9.511007213062956e-06, "loss": 0.3851, "step": 11201 }, { "epoch": 1.1388775925172834, "grad_norm": 0.3011501431465149, "learning_rate": 9.510854134083457e-06, "loss": 0.3466, "step": 11202 }, { "epoch": 1.1389792598617323, "grad_norm": 0.31670519709587097, "learning_rate": 9.510701032379349e-06, "loss": 0.3722, "step": 11203 }, { "epoch": 1.1390809272061815, "grad_norm": 0.3162612318992615, "learning_rate": 9.510547907951398e-06, "loss": 0.3731, "step": 11204 }, { "epoch": 1.1391825945506304, "grad_norm": 0.3194710612297058, "learning_rate": 9.510394760800382e-06, "loss": 0.3823, "step": 11205 }, { "epoch": 1.1392842618950794, "grad_norm": 0.31410810351371765, "learning_rate": 9.510241590927067e-06, "loss": 0.3533, "step": 11206 }, { "epoch": 1.1393859292395283, "grad_norm": 0.2955547869205475, "learning_rate": 9.510088398332227e-06, "loss": 0.3757, "step": 11207 }, { "epoch": 1.1394875965839772, "grad_norm": 0.3119167983531952, "learning_rate": 9.509935183016633e-06, "loss": 0.378, "step": 11208 }, { "epoch": 1.1395892639284262, "grad_norm": 0.32971474528312683, "learning_rate": 9.509781944981059e-06, "loss": 0.362, "step": 11209 }, { "epoch": 1.1396909312728751, "grad_norm": 0.3334042429924011, "learning_rate": 9.509628684226274e-06, "loss": 0.3741, "step": 11210 }, { "epoch": 1.139792598617324, "grad_norm": 0.2998878061771393, "learning_rate": 9.50947540075305e-06, "loss": 0.3819, "step": 11211 }, { "epoch": 1.139894265961773, "grad_norm": 0.33652159571647644, "learning_rate": 9.509322094562163e-06, "loss": 0.3423, "step": 11212 }, { "epoch": 1.139995933306222, "grad_norm": 0.3321003317832947, "learning_rate": 9.509168765654382e-06, "loss": 0.3653, "step": 11213 }, { "epoch": 1.140097600650671, "grad_norm": 0.3445090651512146, "learning_rate": 9.50901541403048e-06, "loss": 0.3794, "step": 11214 }, { "epoch": 1.14019926799512, "grad_norm": 0.3261967599391937, "learning_rate": 9.50886203969123e-06, "loss": 0.4108, "step": 11215 }, { "epoch": 1.140300935339569, "grad_norm": 0.305894136428833, "learning_rate": 9.508708642637405e-06, "loss": 0.3375, "step": 11216 }, { "epoch": 1.140402602684018, "grad_norm": 0.31641194224357605, "learning_rate": 9.508555222869775e-06, "loss": 0.3309, "step": 11217 }, { "epoch": 1.1405042700284669, "grad_norm": 0.31339144706726074, "learning_rate": 9.508401780389118e-06, "loss": 0.3737, "step": 11218 }, { "epoch": 1.1406059373729158, "grad_norm": 0.3340717554092407, "learning_rate": 9.508248315196202e-06, "loss": 0.3849, "step": 11219 }, { "epoch": 1.1407076047173648, "grad_norm": 0.297850638628006, "learning_rate": 9.508094827291804e-06, "loss": 0.3525, "step": 11220 }, { "epoch": 1.1408092720618137, "grad_norm": 0.3196490705013275, "learning_rate": 9.507941316676696e-06, "loss": 0.385, "step": 11221 }, { "epoch": 1.1409109394062626, "grad_norm": 0.29900699853897095, "learning_rate": 9.50778778335165e-06, "loss": 0.3893, "step": 11222 }, { "epoch": 1.1410126067507116, "grad_norm": 0.29792213439941406, "learning_rate": 9.507634227317439e-06, "loss": 0.3639, "step": 11223 }, { "epoch": 1.1411142740951608, "grad_norm": 0.3144516348838806, "learning_rate": 9.50748064857484e-06, "loss": 0.3778, "step": 11224 }, { "epoch": 1.1412159414396097, "grad_norm": 0.30970120429992676, "learning_rate": 9.507327047124624e-06, "loss": 0.365, "step": 11225 }, { "epoch": 1.1413176087840586, "grad_norm": 0.3059038519859314, "learning_rate": 9.507173422967565e-06, "loss": 0.4015, "step": 11226 }, { "epoch": 1.1414192761285076, "grad_norm": 0.2779884934425354, "learning_rate": 9.507019776104439e-06, "loss": 0.3443, "step": 11227 }, { "epoch": 1.1415209434729565, "grad_norm": 0.29277893900871277, "learning_rate": 9.506866106536015e-06, "loss": 0.375, "step": 11228 }, { "epoch": 1.1416226108174055, "grad_norm": 0.3085065186023712, "learning_rate": 9.506712414263074e-06, "loss": 0.3649, "step": 11229 }, { "epoch": 1.1417242781618544, "grad_norm": 0.3098442256450653, "learning_rate": 9.506558699286386e-06, "loss": 0.3921, "step": 11230 }, { "epoch": 1.1418259455063033, "grad_norm": 0.30778589844703674, "learning_rate": 9.506404961606724e-06, "loss": 0.3767, "step": 11231 }, { "epoch": 1.1419276128507523, "grad_norm": 0.32842764258384705, "learning_rate": 9.506251201224867e-06, "loss": 0.4014, "step": 11232 }, { "epoch": 1.1420292801952012, "grad_norm": 0.2838367223739624, "learning_rate": 9.506097418141586e-06, "loss": 0.3309, "step": 11233 }, { "epoch": 1.1421309475396502, "grad_norm": 0.3137137293815613, "learning_rate": 9.505943612357657e-06, "loss": 0.3672, "step": 11234 }, { "epoch": 1.142232614884099, "grad_norm": 0.3217919170856476, "learning_rate": 9.505789783873857e-06, "loss": 0.3397, "step": 11235 }, { "epoch": 1.1423342822285483, "grad_norm": 0.33633720874786377, "learning_rate": 9.505635932690958e-06, "loss": 0.3956, "step": 11236 }, { "epoch": 1.1424359495729972, "grad_norm": 0.285374253988266, "learning_rate": 9.505482058809735e-06, "loss": 0.3925, "step": 11237 }, { "epoch": 1.1425376169174462, "grad_norm": 0.32325440645217896, "learning_rate": 9.505328162230964e-06, "loss": 0.3556, "step": 11238 }, { "epoch": 1.142639284261895, "grad_norm": 0.3242413103580475, "learning_rate": 9.50517424295542e-06, "loss": 0.3761, "step": 11239 }, { "epoch": 1.142740951606344, "grad_norm": 0.31587544083595276, "learning_rate": 9.505020300983879e-06, "loss": 0.3776, "step": 11240 }, { "epoch": 1.142842618950793, "grad_norm": 0.29534947872161865, "learning_rate": 9.504866336317118e-06, "loss": 0.3778, "step": 11241 }, { "epoch": 1.142944286295242, "grad_norm": 0.32063621282577515, "learning_rate": 9.504712348955909e-06, "loss": 0.3646, "step": 11242 }, { "epoch": 1.1430459536396909, "grad_norm": 0.3015235662460327, "learning_rate": 9.50455833890103e-06, "loss": 0.4006, "step": 11243 }, { "epoch": 1.1431476209841398, "grad_norm": 0.3088458180427551, "learning_rate": 9.504404306153256e-06, "loss": 0.375, "step": 11244 }, { "epoch": 1.143249288328589, "grad_norm": 0.33112671971321106, "learning_rate": 9.504250250713365e-06, "loss": 0.3481, "step": 11245 }, { "epoch": 1.143350955673038, "grad_norm": 0.3012170195579529, "learning_rate": 9.504096172582131e-06, "loss": 0.3566, "step": 11246 }, { "epoch": 1.1434526230174868, "grad_norm": 0.3009195625782013, "learning_rate": 9.50394207176033e-06, "loss": 0.3798, "step": 11247 }, { "epoch": 1.1435542903619358, "grad_norm": 0.32451504468917847, "learning_rate": 9.50378794824874e-06, "loss": 0.4042, "step": 11248 }, { "epoch": 1.1436559577063847, "grad_norm": 0.30747294425964355, "learning_rate": 9.503633802048137e-06, "loss": 0.347, "step": 11249 }, { "epoch": 1.1437576250508337, "grad_norm": 0.29711925983428955, "learning_rate": 9.503479633159295e-06, "loss": 0.3772, "step": 11250 }, { "epoch": 1.1438592923952826, "grad_norm": 0.3215942680835724, "learning_rate": 9.503325441582996e-06, "loss": 0.385, "step": 11251 }, { "epoch": 1.1439609597397316, "grad_norm": 0.3132496476173401, "learning_rate": 9.503171227320012e-06, "loss": 0.3751, "step": 11252 }, { "epoch": 1.1440626270841805, "grad_norm": 0.33614128828048706, "learning_rate": 9.503016990371121e-06, "loss": 0.337, "step": 11253 }, { "epoch": 1.1441642944286294, "grad_norm": 0.30073878169059753, "learning_rate": 9.502862730737102e-06, "loss": 0.3678, "step": 11254 }, { "epoch": 1.1442659617730784, "grad_norm": 0.28419598937034607, "learning_rate": 9.502708448418729e-06, "loss": 0.3499, "step": 11255 }, { "epoch": 1.1443676291175275, "grad_norm": 0.34024161100387573, "learning_rate": 9.50255414341678e-06, "loss": 0.3931, "step": 11256 }, { "epoch": 1.1444692964619765, "grad_norm": 0.3209191560745239, "learning_rate": 9.502399815732035e-06, "loss": 0.3777, "step": 11257 }, { "epoch": 1.1445709638064254, "grad_norm": 0.29146432876586914, "learning_rate": 9.50224546536527e-06, "loss": 0.3681, "step": 11258 }, { "epoch": 1.1446726311508744, "grad_norm": 0.2880949079990387, "learning_rate": 9.50209109231726e-06, "loss": 0.3536, "step": 11259 }, { "epoch": 1.1447742984953233, "grad_norm": 0.30794164538383484, "learning_rate": 9.501936696588788e-06, "loss": 0.3767, "step": 11260 }, { "epoch": 1.1448759658397722, "grad_norm": 0.3051852583885193, "learning_rate": 9.501782278180626e-06, "loss": 0.3512, "step": 11261 }, { "epoch": 1.1449776331842212, "grad_norm": 0.3242776691913605, "learning_rate": 9.501627837093557e-06, "loss": 0.3652, "step": 11262 }, { "epoch": 1.1450793005286701, "grad_norm": 0.28236156702041626, "learning_rate": 9.501473373328355e-06, "loss": 0.3603, "step": 11263 }, { "epoch": 1.145180967873119, "grad_norm": 0.29682019352912903, "learning_rate": 9.5013188868858e-06, "loss": 0.3839, "step": 11264 }, { "epoch": 1.1452826352175682, "grad_norm": 0.29521575570106506, "learning_rate": 9.50116437776667e-06, "loss": 0.349, "step": 11265 }, { "epoch": 1.1453843025620172, "grad_norm": 0.28395816683769226, "learning_rate": 9.501009845971744e-06, "loss": 0.3699, "step": 11266 }, { "epoch": 1.1454859699064661, "grad_norm": 0.3023372292518616, "learning_rate": 9.5008552915018e-06, "loss": 0.366, "step": 11267 }, { "epoch": 1.145587637250915, "grad_norm": 0.2995915710926056, "learning_rate": 9.500700714357618e-06, "loss": 0.3901, "step": 11268 }, { "epoch": 1.145689304595364, "grad_norm": 0.30371376872062683, "learning_rate": 9.500546114539972e-06, "loss": 0.3535, "step": 11269 }, { "epoch": 1.145790971939813, "grad_norm": 0.31990063190460205, "learning_rate": 9.500391492049647e-06, "loss": 0.342, "step": 11270 }, { "epoch": 1.1458926392842619, "grad_norm": 0.3242615759372711, "learning_rate": 9.500236846887418e-06, "loss": 0.3714, "step": 11271 }, { "epoch": 1.1459943066287108, "grad_norm": 0.3099132180213928, "learning_rate": 9.500082179054066e-06, "loss": 0.3547, "step": 11272 }, { "epoch": 1.1460959739731598, "grad_norm": 0.28851118683815, "learning_rate": 9.499927488550368e-06, "loss": 0.3685, "step": 11273 }, { "epoch": 1.1461976413176087, "grad_norm": 0.299661785364151, "learning_rate": 9.499772775377106e-06, "loss": 0.36, "step": 11274 }, { "epoch": 1.1462993086620576, "grad_norm": 0.3218839466571808, "learning_rate": 9.499618039535057e-06, "loss": 0.3666, "step": 11275 }, { "epoch": 1.1464009760065066, "grad_norm": 0.3097834289073944, "learning_rate": 9.499463281025003e-06, "loss": 0.3294, "step": 11276 }, { "epoch": 1.1465026433509558, "grad_norm": 0.2753795087337494, "learning_rate": 9.499308499847721e-06, "loss": 0.3722, "step": 11277 }, { "epoch": 1.1466043106954047, "grad_norm": 0.29842111468315125, "learning_rate": 9.499153696003993e-06, "loss": 0.3532, "step": 11278 }, { "epoch": 1.1467059780398536, "grad_norm": 0.314314603805542, "learning_rate": 9.498998869494597e-06, "loss": 0.3668, "step": 11279 }, { "epoch": 1.1468076453843026, "grad_norm": 0.34840869903564453, "learning_rate": 9.498844020320314e-06, "loss": 0.3391, "step": 11280 }, { "epoch": 1.1469093127287515, "grad_norm": 0.3132863938808441, "learning_rate": 9.498689148481924e-06, "loss": 0.3715, "step": 11281 }, { "epoch": 1.1470109800732005, "grad_norm": 0.3077402114868164, "learning_rate": 9.498534253980207e-06, "loss": 0.3324, "step": 11282 }, { "epoch": 1.1471126474176494, "grad_norm": 0.3058275282382965, "learning_rate": 9.498379336815943e-06, "loss": 0.3903, "step": 11283 }, { "epoch": 1.1472143147620983, "grad_norm": 0.3268934190273285, "learning_rate": 9.498224396989913e-06, "loss": 0.3568, "step": 11284 }, { "epoch": 1.1473159821065473, "grad_norm": 0.305524617433548, "learning_rate": 9.498069434502897e-06, "loss": 0.4121, "step": 11285 }, { "epoch": 1.1474176494509964, "grad_norm": 0.29207688570022583, "learning_rate": 9.497914449355679e-06, "loss": 0.3507, "step": 11286 }, { "epoch": 1.1475193167954454, "grad_norm": 0.32304900884628296, "learning_rate": 9.497759441549035e-06, "loss": 0.3526, "step": 11287 }, { "epoch": 1.1476209841398943, "grad_norm": 0.30531683564186096, "learning_rate": 9.497604411083746e-06, "loss": 0.3741, "step": 11288 }, { "epoch": 1.1477226514843433, "grad_norm": 0.34298673272132874, "learning_rate": 9.497449357960596e-06, "loss": 0.4399, "step": 11289 }, { "epoch": 1.1478243188287922, "grad_norm": 0.2990489900112152, "learning_rate": 9.497294282180365e-06, "loss": 0.3466, "step": 11290 }, { "epoch": 1.1479259861732412, "grad_norm": 0.35784536600112915, "learning_rate": 9.497139183743836e-06, "loss": 0.3935, "step": 11291 }, { "epoch": 1.14802765351769, "grad_norm": 0.34611234068870544, "learning_rate": 9.496984062651787e-06, "loss": 0.3695, "step": 11292 }, { "epoch": 1.148129320862139, "grad_norm": 0.305277943611145, "learning_rate": 9.496828918905e-06, "loss": 0.3733, "step": 11293 }, { "epoch": 1.148230988206588, "grad_norm": 0.32096895575523376, "learning_rate": 9.496673752504258e-06, "loss": 0.3807, "step": 11294 }, { "epoch": 1.148332655551037, "grad_norm": 0.3153409957885742, "learning_rate": 9.496518563450342e-06, "loss": 0.3478, "step": 11295 }, { "epoch": 1.1484343228954859, "grad_norm": 0.3008897304534912, "learning_rate": 9.496363351744035e-06, "loss": 0.3753, "step": 11296 }, { "epoch": 1.148535990239935, "grad_norm": 0.32541319727897644, "learning_rate": 9.496208117386116e-06, "loss": 0.3735, "step": 11297 }, { "epoch": 1.148637657584384, "grad_norm": 0.32019951939582825, "learning_rate": 9.496052860377371e-06, "loss": 0.4071, "step": 11298 }, { "epoch": 1.148739324928833, "grad_norm": 0.30075690150260925, "learning_rate": 9.49589758071858e-06, "loss": 0.3633, "step": 11299 }, { "epoch": 1.1488409922732818, "grad_norm": 0.3052433729171753, "learning_rate": 9.495742278410525e-06, "loss": 0.3666, "step": 11300 }, { "epoch": 1.1489426596177308, "grad_norm": 0.3392356336116791, "learning_rate": 9.495586953453988e-06, "loss": 0.3656, "step": 11301 }, { "epoch": 1.1490443269621797, "grad_norm": 0.29130780696868896, "learning_rate": 9.495431605849754e-06, "loss": 0.3441, "step": 11302 }, { "epoch": 1.1491459943066287, "grad_norm": 0.3138572573661804, "learning_rate": 9.495276235598602e-06, "loss": 0.37, "step": 11303 }, { "epoch": 1.1492476616510776, "grad_norm": 0.3184949457645416, "learning_rate": 9.495120842701317e-06, "loss": 0.4039, "step": 11304 }, { "epoch": 1.1493493289955266, "grad_norm": 0.27434471249580383, "learning_rate": 9.494965427158682e-06, "loss": 0.3773, "step": 11305 }, { "epoch": 1.1494509963399757, "grad_norm": 0.29853466153144836, "learning_rate": 9.494809988971479e-06, "loss": 0.4007, "step": 11306 }, { "epoch": 1.1495526636844247, "grad_norm": 0.328606516122818, "learning_rate": 9.49465452814049e-06, "loss": 0.382, "step": 11307 }, { "epoch": 1.1496543310288736, "grad_norm": 0.32673150300979614, "learning_rate": 9.494499044666501e-06, "loss": 0.3762, "step": 11308 }, { "epoch": 1.1497559983733225, "grad_norm": 0.3227528929710388, "learning_rate": 9.494343538550294e-06, "loss": 0.3836, "step": 11309 }, { "epoch": 1.1498576657177715, "grad_norm": 0.33198678493499756, "learning_rate": 9.494188009792653e-06, "loss": 0.3906, "step": 11310 }, { "epoch": 1.1499593330622204, "grad_norm": 0.2995099723339081, "learning_rate": 9.49403245839436e-06, "loss": 0.338, "step": 11311 }, { "epoch": 1.1500610004066694, "grad_norm": 0.30184343457221985, "learning_rate": 9.493876884356199e-06, "loss": 0.3477, "step": 11312 }, { "epoch": 1.1501626677511183, "grad_norm": 0.3096465468406677, "learning_rate": 9.493721287678955e-06, "loss": 0.3845, "step": 11313 }, { "epoch": 1.1502643350955672, "grad_norm": 0.33818548917770386, "learning_rate": 9.49356566836341e-06, "loss": 0.3822, "step": 11314 }, { "epoch": 1.1503660024400162, "grad_norm": 0.3128049671649933, "learning_rate": 9.49341002641035e-06, "loss": 0.3596, "step": 11315 }, { "epoch": 1.1504676697844651, "grad_norm": 0.29905790090560913, "learning_rate": 9.493254361820557e-06, "loss": 0.3509, "step": 11316 }, { "epoch": 1.150569337128914, "grad_norm": 0.3270752429962158, "learning_rate": 9.493098674594817e-06, "loss": 0.3337, "step": 11317 }, { "epoch": 1.1506710044733632, "grad_norm": 0.30369943380355835, "learning_rate": 9.492942964733913e-06, "loss": 0.3764, "step": 11318 }, { "epoch": 1.1507726718178122, "grad_norm": 0.33985820412635803, "learning_rate": 9.492787232238632e-06, "loss": 0.3849, "step": 11319 }, { "epoch": 1.1508743391622611, "grad_norm": 0.32650959491729736, "learning_rate": 9.492631477109755e-06, "loss": 0.356, "step": 11320 }, { "epoch": 1.15097600650671, "grad_norm": 0.3083748519420624, "learning_rate": 9.492475699348066e-06, "loss": 0.3685, "step": 11321 }, { "epoch": 1.151077673851159, "grad_norm": 0.2986988127231598, "learning_rate": 9.492319898954357e-06, "loss": 0.3598, "step": 11322 }, { "epoch": 1.151179341195608, "grad_norm": 0.3521662950515747, "learning_rate": 9.492164075929404e-06, "loss": 0.4047, "step": 11323 }, { "epoch": 1.1512810085400569, "grad_norm": 0.335125207901001, "learning_rate": 9.492008230273998e-06, "loss": 0.3591, "step": 11324 }, { "epoch": 1.1513826758845058, "grad_norm": 0.2854629456996918, "learning_rate": 9.49185236198892e-06, "loss": 0.3341, "step": 11325 }, { "epoch": 1.1514843432289548, "grad_norm": 0.2901638150215149, "learning_rate": 9.491696471074958e-06, "loss": 0.3813, "step": 11326 }, { "epoch": 1.151586010573404, "grad_norm": 0.3111639618873596, "learning_rate": 9.491540557532898e-06, "loss": 0.3894, "step": 11327 }, { "epoch": 1.1516876779178529, "grad_norm": 0.30171987414360046, "learning_rate": 9.491384621363522e-06, "loss": 0.3431, "step": 11328 }, { "epoch": 1.1517893452623018, "grad_norm": 0.33056798577308655, "learning_rate": 9.49122866256762e-06, "loss": 0.3796, "step": 11329 }, { "epoch": 1.1518910126067508, "grad_norm": 0.2969246804714203, "learning_rate": 9.491072681145973e-06, "loss": 0.3917, "step": 11330 }, { "epoch": 1.1519926799511997, "grad_norm": 0.3032691776752472, "learning_rate": 9.490916677099369e-06, "loss": 0.3735, "step": 11331 }, { "epoch": 1.1520943472956486, "grad_norm": 0.2829267084598541, "learning_rate": 9.490760650428597e-06, "loss": 0.3679, "step": 11332 }, { "epoch": 1.1521960146400976, "grad_norm": 0.31128573417663574, "learning_rate": 9.490604601134437e-06, "loss": 0.3764, "step": 11333 }, { "epoch": 1.1522976819845465, "grad_norm": 0.2958182096481323, "learning_rate": 9.490448529217679e-06, "loss": 0.3602, "step": 11334 }, { "epoch": 1.1523993493289955, "grad_norm": 0.28876185417175293, "learning_rate": 9.490292434679109e-06, "loss": 0.3636, "step": 11335 }, { "epoch": 1.1525010166734444, "grad_norm": 0.29317474365234375, "learning_rate": 9.490136317519512e-06, "loss": 0.4059, "step": 11336 }, { "epoch": 1.1526026840178933, "grad_norm": 0.31966057419776917, "learning_rate": 9.489980177739675e-06, "loss": 0.3899, "step": 11337 }, { "epoch": 1.1527043513623425, "grad_norm": 0.29520440101623535, "learning_rate": 9.489824015340386e-06, "loss": 0.3784, "step": 11338 }, { "epoch": 1.1528060187067914, "grad_norm": 0.29642078280448914, "learning_rate": 9.489667830322429e-06, "loss": 0.4107, "step": 11339 }, { "epoch": 1.1529076860512404, "grad_norm": 0.30154237151145935, "learning_rate": 9.489511622686594e-06, "loss": 0.3632, "step": 11340 }, { "epoch": 1.1530093533956893, "grad_norm": 0.29950156807899475, "learning_rate": 9.489355392433666e-06, "loss": 0.3531, "step": 11341 }, { "epoch": 1.1531110207401383, "grad_norm": 0.3000846207141876, "learning_rate": 9.48919913956443e-06, "loss": 0.3444, "step": 11342 }, { "epoch": 1.1532126880845872, "grad_norm": 0.3261875808238983, "learning_rate": 9.489042864079678e-06, "loss": 0.3491, "step": 11343 }, { "epoch": 1.1533143554290362, "grad_norm": 0.28386345505714417, "learning_rate": 9.488886565980195e-06, "loss": 0.3472, "step": 11344 }, { "epoch": 1.153416022773485, "grad_norm": 0.29869258403778076, "learning_rate": 9.488730245266767e-06, "loss": 0.3711, "step": 11345 }, { "epoch": 1.153517690117934, "grad_norm": 0.29045718908309937, "learning_rate": 9.488573901940183e-06, "loss": 0.3829, "step": 11346 }, { "epoch": 1.1536193574623832, "grad_norm": 0.30607903003692627, "learning_rate": 9.488417536001231e-06, "loss": 0.3779, "step": 11347 }, { "epoch": 1.1537210248068321, "grad_norm": 0.2780137360095978, "learning_rate": 9.488261147450696e-06, "loss": 0.3952, "step": 11348 }, { "epoch": 1.153822692151281, "grad_norm": 0.2813732624053955, "learning_rate": 9.48810473628937e-06, "loss": 0.3543, "step": 11349 }, { "epoch": 1.15392435949573, "grad_norm": 0.2838769853115082, "learning_rate": 9.48794830251804e-06, "loss": 0.3635, "step": 11350 }, { "epoch": 1.154026026840179, "grad_norm": 0.29775339365005493, "learning_rate": 9.48779184613749e-06, "loss": 0.3497, "step": 11351 }, { "epoch": 1.154127694184628, "grad_norm": 0.32529571652412415, "learning_rate": 9.487635367148512e-06, "loss": 0.3925, "step": 11352 }, { "epoch": 1.1542293615290768, "grad_norm": 0.2884801924228668, "learning_rate": 9.487478865551896e-06, "loss": 0.3658, "step": 11353 }, { "epoch": 1.1543310288735258, "grad_norm": 0.3057778477668762, "learning_rate": 9.487322341348425e-06, "loss": 0.3718, "step": 11354 }, { "epoch": 1.1544326962179747, "grad_norm": 0.303087055683136, "learning_rate": 9.48716579453889e-06, "loss": 0.3667, "step": 11355 }, { "epoch": 1.1545343635624237, "grad_norm": 0.2800057530403137, "learning_rate": 9.487009225124082e-06, "loss": 0.3616, "step": 11356 }, { "epoch": 1.1546360309068726, "grad_norm": 0.3103761076927185, "learning_rate": 9.486852633104787e-06, "loss": 0.3914, "step": 11357 }, { "epoch": 1.1547376982513216, "grad_norm": 0.2972928583621979, "learning_rate": 9.486696018481794e-06, "loss": 0.3301, "step": 11358 }, { "epoch": 1.1548393655957707, "grad_norm": 0.3205670416355133, "learning_rate": 9.486539381255896e-06, "loss": 0.3518, "step": 11359 }, { "epoch": 1.1549410329402197, "grad_norm": 0.2913191020488739, "learning_rate": 9.486382721427875e-06, "loss": 0.3591, "step": 11360 }, { "epoch": 1.1550427002846686, "grad_norm": 0.3045635223388672, "learning_rate": 9.486226038998525e-06, "loss": 0.34, "step": 11361 }, { "epoch": 1.1551443676291175, "grad_norm": 0.3301455080509186, "learning_rate": 9.486069333968634e-06, "loss": 0.394, "step": 11362 }, { "epoch": 1.1552460349735665, "grad_norm": 0.3130773603916168, "learning_rate": 9.485912606338992e-06, "loss": 0.351, "step": 11363 }, { "epoch": 1.1553477023180154, "grad_norm": 0.31313544511795044, "learning_rate": 9.485755856110388e-06, "loss": 0.3756, "step": 11364 }, { "epoch": 1.1554493696624644, "grad_norm": 0.31480342149734497, "learning_rate": 9.485599083283613e-06, "loss": 0.3692, "step": 11365 }, { "epoch": 1.1555510370069133, "grad_norm": 0.33251699805259705, "learning_rate": 9.485442287859455e-06, "loss": 0.3661, "step": 11366 }, { "epoch": 1.1556527043513622, "grad_norm": 0.33082306385040283, "learning_rate": 9.485285469838706e-06, "loss": 0.3605, "step": 11367 }, { "epoch": 1.1557543716958114, "grad_norm": 0.3028743863105774, "learning_rate": 9.485128629222153e-06, "loss": 0.3525, "step": 11368 }, { "epoch": 1.1558560390402604, "grad_norm": 0.2989121675491333, "learning_rate": 9.484971766010587e-06, "loss": 0.3516, "step": 11369 }, { "epoch": 1.1559577063847093, "grad_norm": 0.31127798557281494, "learning_rate": 9.484814880204802e-06, "loss": 0.3377, "step": 11370 }, { "epoch": 1.1560593737291582, "grad_norm": 0.3275124728679657, "learning_rate": 9.484657971805583e-06, "loss": 0.356, "step": 11371 }, { "epoch": 1.1561610410736072, "grad_norm": 0.27404364943504333, "learning_rate": 9.484501040813723e-06, "loss": 0.3351, "step": 11372 }, { "epoch": 1.1562627084180561, "grad_norm": 0.30326953530311584, "learning_rate": 9.484344087230015e-06, "loss": 0.3991, "step": 11373 }, { "epoch": 1.156364375762505, "grad_norm": 0.3219137191772461, "learning_rate": 9.484187111055244e-06, "loss": 0.3716, "step": 11374 }, { "epoch": 1.156466043106954, "grad_norm": 0.3165101110935211, "learning_rate": 9.484030112290205e-06, "loss": 0.3752, "step": 11375 }, { "epoch": 1.156567710451403, "grad_norm": 0.28647372126579285, "learning_rate": 9.483873090935687e-06, "loss": 0.368, "step": 11376 }, { "epoch": 1.1566693777958519, "grad_norm": 0.30374664068222046, "learning_rate": 9.483716046992484e-06, "loss": 0.3723, "step": 11377 }, { "epoch": 1.1567710451403008, "grad_norm": 0.310405433177948, "learning_rate": 9.483558980461384e-06, "loss": 0.3458, "step": 11378 }, { "epoch": 1.15687271248475, "grad_norm": 0.287066787481308, "learning_rate": 9.48340189134318e-06, "loss": 0.3976, "step": 11379 }, { "epoch": 1.156974379829199, "grad_norm": 0.2926928699016571, "learning_rate": 9.48324477963866e-06, "loss": 0.402, "step": 11380 }, { "epoch": 1.1570760471736479, "grad_norm": 0.30347898602485657, "learning_rate": 9.48308764534862e-06, "loss": 0.373, "step": 11381 }, { "epoch": 1.1571777145180968, "grad_norm": 0.31169718503952026, "learning_rate": 9.482930488473852e-06, "loss": 0.3582, "step": 11382 }, { "epoch": 1.1572793818625458, "grad_norm": 0.3079773485660553, "learning_rate": 9.482773309015144e-06, "loss": 0.3789, "step": 11383 }, { "epoch": 1.1573810492069947, "grad_norm": 0.2884025573730469, "learning_rate": 9.482616106973289e-06, "loss": 0.3566, "step": 11384 }, { "epoch": 1.1574827165514436, "grad_norm": 0.3059389889240265, "learning_rate": 9.482458882349078e-06, "loss": 0.3558, "step": 11385 }, { "epoch": 1.1575843838958926, "grad_norm": 0.2847989499568939, "learning_rate": 9.482301635143305e-06, "loss": 0.354, "step": 11386 }, { "epoch": 1.1576860512403415, "grad_norm": 0.3084351420402527, "learning_rate": 9.482144365356761e-06, "loss": 0.3654, "step": 11387 }, { "epoch": 1.1577877185847907, "grad_norm": 0.29933634400367737, "learning_rate": 9.48198707299024e-06, "loss": 0.3874, "step": 11388 }, { "epoch": 1.1578893859292396, "grad_norm": 0.33313050866127014, "learning_rate": 9.481829758044534e-06, "loss": 0.3842, "step": 11389 }, { "epoch": 1.1579910532736886, "grad_norm": 0.2795921564102173, "learning_rate": 9.481672420520433e-06, "loss": 0.3518, "step": 11390 }, { "epoch": 1.1580927206181375, "grad_norm": 0.280531644821167, "learning_rate": 9.481515060418731e-06, "loss": 0.3476, "step": 11391 }, { "epoch": 1.1581943879625864, "grad_norm": 0.2931165099143982, "learning_rate": 9.481357677740222e-06, "loss": 0.3595, "step": 11392 }, { "epoch": 1.1582960553070354, "grad_norm": 0.29577863216400146, "learning_rate": 9.4812002724857e-06, "loss": 0.3994, "step": 11393 }, { "epoch": 1.1583977226514843, "grad_norm": 0.30338340997695923, "learning_rate": 9.481042844655952e-06, "loss": 0.3844, "step": 11394 }, { "epoch": 1.1584993899959333, "grad_norm": 0.2933691740036011, "learning_rate": 9.480885394251778e-06, "loss": 0.3777, "step": 11395 }, { "epoch": 1.1586010573403822, "grad_norm": 0.3107280433177948, "learning_rate": 9.480727921273967e-06, "loss": 0.3342, "step": 11396 }, { "epoch": 1.1587027246848312, "grad_norm": 0.30481454730033875, "learning_rate": 9.480570425723315e-06, "loss": 0.3655, "step": 11397 }, { "epoch": 1.15880439202928, "grad_norm": 0.30463239550590515, "learning_rate": 9.480412907600612e-06, "loss": 0.3554, "step": 11398 }, { "epoch": 1.158906059373729, "grad_norm": 0.3052676022052765, "learning_rate": 9.480255366906655e-06, "loss": 0.3705, "step": 11399 }, { "epoch": 1.1590077267181782, "grad_norm": 0.2900103032588959, "learning_rate": 9.480097803642234e-06, "loss": 0.3511, "step": 11400 }, { "epoch": 1.1591093940626271, "grad_norm": 0.2792089581489563, "learning_rate": 9.479940217808146e-06, "loss": 0.3559, "step": 11401 }, { "epoch": 1.159211061407076, "grad_norm": 0.2965574562549591, "learning_rate": 9.479782609405183e-06, "loss": 0.3515, "step": 11402 }, { "epoch": 1.159312728751525, "grad_norm": 0.318434476852417, "learning_rate": 9.479624978434141e-06, "loss": 0.3699, "step": 11403 }, { "epoch": 1.159414396095974, "grad_norm": 0.3084542751312256, "learning_rate": 9.479467324895814e-06, "loss": 0.3397, "step": 11404 }, { "epoch": 1.159516063440423, "grad_norm": 0.2919600307941437, "learning_rate": 9.479309648790994e-06, "loss": 0.3563, "step": 11405 }, { "epoch": 1.1596177307848718, "grad_norm": 0.3050766885280609, "learning_rate": 9.479151950120475e-06, "loss": 0.3723, "step": 11406 }, { "epoch": 1.1597193981293208, "grad_norm": 0.33458465337753296, "learning_rate": 9.478994228885054e-06, "loss": 0.4168, "step": 11407 }, { "epoch": 1.1598210654737697, "grad_norm": 0.3052816092967987, "learning_rate": 9.478836485085525e-06, "loss": 0.3715, "step": 11408 }, { "epoch": 1.159922732818219, "grad_norm": 0.33387550711631775, "learning_rate": 9.478678718722681e-06, "loss": 0.3736, "step": 11409 }, { "epoch": 1.1600244001626678, "grad_norm": 0.3528677821159363, "learning_rate": 9.47852092979732e-06, "loss": 0.3318, "step": 11410 }, { "epoch": 1.1601260675071168, "grad_norm": 0.2944738268852234, "learning_rate": 9.478363118310233e-06, "loss": 0.3741, "step": 11411 }, { "epoch": 1.1602277348515657, "grad_norm": 0.3204349875450134, "learning_rate": 9.478205284262216e-06, "loss": 0.372, "step": 11412 }, { "epoch": 1.1603294021960147, "grad_norm": 0.33201727271080017, "learning_rate": 9.478047427654067e-06, "loss": 0.3822, "step": 11413 }, { "epoch": 1.1604310695404636, "grad_norm": 0.30836206674575806, "learning_rate": 9.477889548486579e-06, "loss": 0.3766, "step": 11414 }, { "epoch": 1.1605327368849125, "grad_norm": 0.32217633724212646, "learning_rate": 9.477731646760546e-06, "loss": 0.4106, "step": 11415 }, { "epoch": 1.1606344042293615, "grad_norm": 0.3085397481918335, "learning_rate": 9.477573722476764e-06, "loss": 0.3706, "step": 11416 }, { "epoch": 1.1607360715738104, "grad_norm": 0.3111379146575928, "learning_rate": 9.477415775636033e-06, "loss": 0.3734, "step": 11417 }, { "epoch": 1.1608377389182594, "grad_norm": 0.30303725600242615, "learning_rate": 9.477257806239145e-06, "loss": 0.4157, "step": 11418 }, { "epoch": 1.1609394062627083, "grad_norm": 0.31670278310775757, "learning_rate": 9.477099814286894e-06, "loss": 0.3314, "step": 11419 }, { "epoch": 1.1610410736071575, "grad_norm": 0.292726069688797, "learning_rate": 9.476941799780079e-06, "loss": 0.3704, "step": 11420 }, { "epoch": 1.1611427409516064, "grad_norm": 0.26196327805519104, "learning_rate": 9.476783762719495e-06, "loss": 0.3617, "step": 11421 }, { "epoch": 1.1612444082960554, "grad_norm": 0.2790718376636505, "learning_rate": 9.476625703105939e-06, "loss": 0.3574, "step": 11422 }, { "epoch": 1.1613460756405043, "grad_norm": 0.3191690444946289, "learning_rate": 9.476467620940205e-06, "loss": 0.3955, "step": 11423 }, { "epoch": 1.1614477429849532, "grad_norm": 0.31485357880592346, "learning_rate": 9.476309516223092e-06, "loss": 0.3588, "step": 11424 }, { "epoch": 1.1615494103294022, "grad_norm": 0.30562445521354675, "learning_rate": 9.476151388955395e-06, "loss": 0.3492, "step": 11425 }, { "epoch": 1.1616510776738511, "grad_norm": 0.29860469698905945, "learning_rate": 9.475993239137912e-06, "loss": 0.3461, "step": 11426 }, { "epoch": 1.1617527450183, "grad_norm": 0.30187976360321045, "learning_rate": 9.475835066771437e-06, "loss": 0.3933, "step": 11427 }, { "epoch": 1.161854412362749, "grad_norm": 0.3285617530345917, "learning_rate": 9.47567687185677e-06, "loss": 0.3407, "step": 11428 }, { "epoch": 1.1619560797071982, "grad_norm": 0.3132869005203247, "learning_rate": 9.475518654394706e-06, "loss": 0.3897, "step": 11429 }, { "epoch": 1.162057747051647, "grad_norm": 0.3181891441345215, "learning_rate": 9.475360414386043e-06, "loss": 0.3704, "step": 11430 }, { "epoch": 1.162159414396096, "grad_norm": 0.3199860751628876, "learning_rate": 9.475202151831577e-06, "loss": 0.3941, "step": 11431 }, { "epoch": 1.162261081740545, "grad_norm": 0.3241737484931946, "learning_rate": 9.475043866732107e-06, "loss": 0.3616, "step": 11432 }, { "epoch": 1.162362749084994, "grad_norm": 0.28577497601509094, "learning_rate": 9.474885559088429e-06, "loss": 0.3667, "step": 11433 }, { "epoch": 1.1624644164294429, "grad_norm": 0.3209310472011566, "learning_rate": 9.47472722890134e-06, "loss": 0.3742, "step": 11434 }, { "epoch": 1.1625660837738918, "grad_norm": 0.29342448711395264, "learning_rate": 9.47456887617164e-06, "loss": 0.3432, "step": 11435 }, { "epoch": 1.1626677511183408, "grad_norm": 0.32434359192848206, "learning_rate": 9.474410500900124e-06, "loss": 0.3795, "step": 11436 }, { "epoch": 1.1627694184627897, "grad_norm": 0.2985239028930664, "learning_rate": 9.474252103087591e-06, "loss": 0.3858, "step": 11437 }, { "epoch": 1.1628710858072386, "grad_norm": 0.28158095479011536, "learning_rate": 9.474093682734841e-06, "loss": 0.3788, "step": 11438 }, { "epoch": 1.1629727531516876, "grad_norm": 0.29749739170074463, "learning_rate": 9.47393523984267e-06, "loss": 0.3731, "step": 11439 }, { "epoch": 1.1630744204961365, "grad_norm": 0.28332391381263733, "learning_rate": 9.473776774411874e-06, "loss": 0.376, "step": 11440 }, { "epoch": 1.1631760878405857, "grad_norm": 0.3230383098125458, "learning_rate": 9.473618286443256e-06, "loss": 0.3578, "step": 11441 }, { "epoch": 1.1632777551850346, "grad_norm": 0.3100258708000183, "learning_rate": 9.47345977593761e-06, "loss": 0.3809, "step": 11442 }, { "epoch": 1.1633794225294836, "grad_norm": 0.3109044134616852, "learning_rate": 9.47330124289574e-06, "loss": 0.3641, "step": 11443 }, { "epoch": 1.1634810898739325, "grad_norm": 0.3180657625198364, "learning_rate": 9.47314268731844e-06, "loss": 0.3277, "step": 11444 }, { "epoch": 1.1635827572183814, "grad_norm": 0.3257911503314972, "learning_rate": 9.47298410920651e-06, "loss": 0.3208, "step": 11445 }, { "epoch": 1.1636844245628304, "grad_norm": 0.3310930132865906, "learning_rate": 9.472825508560748e-06, "loss": 0.3654, "step": 11446 }, { "epoch": 1.1637860919072793, "grad_norm": 0.323148638010025, "learning_rate": 9.472666885381954e-06, "loss": 0.3849, "step": 11447 }, { "epoch": 1.1638877592517283, "grad_norm": 0.3186066150665283, "learning_rate": 9.472508239670928e-06, "loss": 0.3109, "step": 11448 }, { "epoch": 1.1639894265961772, "grad_norm": 0.3355943560600281, "learning_rate": 9.472349571428467e-06, "loss": 0.3384, "step": 11449 }, { "epoch": 1.1640910939406264, "grad_norm": 0.3155287504196167, "learning_rate": 9.472190880655374e-06, "loss": 0.3838, "step": 11450 }, { "epoch": 1.1641927612850753, "grad_norm": 0.3079984486103058, "learning_rate": 9.472032167352446e-06, "loss": 0.3326, "step": 11451 }, { "epoch": 1.1642944286295243, "grad_norm": 0.3549392521381378, "learning_rate": 9.47187343152048e-06, "loss": 0.3693, "step": 11452 }, { "epoch": 1.1643960959739732, "grad_norm": 0.3043487071990967, "learning_rate": 9.471714673160281e-06, "loss": 0.3268, "step": 11453 }, { "epoch": 1.1644977633184221, "grad_norm": 0.3107861876487732, "learning_rate": 9.471555892272644e-06, "loss": 0.3697, "step": 11454 }, { "epoch": 1.164599430662871, "grad_norm": 0.27048543095588684, "learning_rate": 9.471397088858371e-06, "loss": 0.3584, "step": 11455 }, { "epoch": 1.16470109800732, "grad_norm": 0.3063165545463562, "learning_rate": 9.471238262918263e-06, "loss": 0.3401, "step": 11456 }, { "epoch": 1.164802765351769, "grad_norm": 0.3148036301136017, "learning_rate": 9.47107941445312e-06, "loss": 0.3474, "step": 11457 }, { "epoch": 1.164904432696218, "grad_norm": 0.28631749749183655, "learning_rate": 9.470920543463741e-06, "loss": 0.3655, "step": 11458 }, { "epoch": 1.1650061000406668, "grad_norm": 0.2989809215068817, "learning_rate": 9.470761649950926e-06, "loss": 0.4045, "step": 11459 }, { "epoch": 1.1651077673851158, "grad_norm": 0.2977702021598816, "learning_rate": 9.470602733915478e-06, "loss": 0.3617, "step": 11460 }, { "epoch": 1.165209434729565, "grad_norm": 0.32170289754867554, "learning_rate": 9.470443795358192e-06, "loss": 0.3758, "step": 11461 }, { "epoch": 1.165311102074014, "grad_norm": 0.2915477156639099, "learning_rate": 9.470284834279876e-06, "loss": 0.3385, "step": 11462 }, { "epoch": 1.1654127694184628, "grad_norm": 0.3073212504386902, "learning_rate": 9.470125850681326e-06, "loss": 0.3848, "step": 11463 }, { "epoch": 1.1655144367629118, "grad_norm": 0.28255799412727356, "learning_rate": 9.469966844563346e-06, "loss": 0.3337, "step": 11464 }, { "epoch": 1.1656161041073607, "grad_norm": 0.29607516527175903, "learning_rate": 9.469807815926732e-06, "loss": 0.405, "step": 11465 }, { "epoch": 1.1657177714518097, "grad_norm": 0.28352370858192444, "learning_rate": 9.46964876477229e-06, "loss": 0.3635, "step": 11466 }, { "epoch": 1.1658194387962586, "grad_norm": 0.3020259141921997, "learning_rate": 9.46948969110082e-06, "loss": 0.3536, "step": 11467 }, { "epoch": 1.1659211061407075, "grad_norm": 0.3583443760871887, "learning_rate": 9.469330594913124e-06, "loss": 0.3936, "step": 11468 }, { "epoch": 1.1660227734851565, "grad_norm": 0.3072604835033417, "learning_rate": 9.469171476210002e-06, "loss": 0.3874, "step": 11469 }, { "epoch": 1.1661244408296056, "grad_norm": 0.3150164484977722, "learning_rate": 9.469012334992254e-06, "loss": 0.3936, "step": 11470 }, { "epoch": 1.1662261081740546, "grad_norm": 0.3126640021800995, "learning_rate": 9.468853171260685e-06, "loss": 0.3657, "step": 11471 }, { "epoch": 1.1663277755185035, "grad_norm": 0.2784152626991272, "learning_rate": 9.468693985016097e-06, "loss": 0.3859, "step": 11472 }, { "epoch": 1.1664294428629525, "grad_norm": 0.3152649402618408, "learning_rate": 9.46853477625929e-06, "loss": 0.3915, "step": 11473 }, { "epoch": 1.1665311102074014, "grad_norm": 0.3220801055431366, "learning_rate": 9.468375544991067e-06, "loss": 0.3688, "step": 11474 }, { "epoch": 1.1666327775518504, "grad_norm": 0.3271262049674988, "learning_rate": 9.468216291212229e-06, "loss": 0.398, "step": 11475 }, { "epoch": 1.1667344448962993, "grad_norm": 0.3045859932899475, "learning_rate": 9.468057014923578e-06, "loss": 0.3899, "step": 11476 }, { "epoch": 1.1668361122407482, "grad_norm": 0.2957451641559601, "learning_rate": 9.46789771612592e-06, "loss": 0.3465, "step": 11477 }, { "epoch": 1.1669377795851972, "grad_norm": 0.29994428157806396, "learning_rate": 9.467738394820054e-06, "loss": 0.377, "step": 11478 }, { "epoch": 1.1670394469296461, "grad_norm": 0.314611554145813, "learning_rate": 9.467579051006784e-06, "loss": 0.3439, "step": 11479 }, { "epoch": 1.167141114274095, "grad_norm": 0.3029038608074188, "learning_rate": 9.46741968468691e-06, "loss": 0.3499, "step": 11480 }, { "epoch": 1.167242781618544, "grad_norm": 0.30311280488967896, "learning_rate": 9.46726029586124e-06, "loss": 0.3379, "step": 11481 }, { "epoch": 1.1673444489629932, "grad_norm": 0.28610801696777344, "learning_rate": 9.467100884530574e-06, "loss": 0.3822, "step": 11482 }, { "epoch": 1.167446116307442, "grad_norm": 0.2925962507724762, "learning_rate": 9.466941450695714e-06, "loss": 0.3448, "step": 11483 }, { "epoch": 1.167547783651891, "grad_norm": 0.3279688060283661, "learning_rate": 9.466781994357466e-06, "loss": 0.3586, "step": 11484 }, { "epoch": 1.16764945099634, "grad_norm": 0.3045162856578827, "learning_rate": 9.46662251551663e-06, "loss": 0.3646, "step": 11485 }, { "epoch": 1.167751118340789, "grad_norm": 0.29809609055519104, "learning_rate": 9.466463014174013e-06, "loss": 0.3635, "step": 11486 }, { "epoch": 1.1678527856852379, "grad_norm": 0.3144721984863281, "learning_rate": 9.466303490330415e-06, "loss": 0.3895, "step": 11487 }, { "epoch": 1.1679544530296868, "grad_norm": 0.30306586623191833, "learning_rate": 9.466143943986642e-06, "loss": 0.3545, "step": 11488 }, { "epoch": 1.1680561203741358, "grad_norm": 0.2744971513748169, "learning_rate": 9.465984375143498e-06, "loss": 0.3634, "step": 11489 }, { "epoch": 1.1681577877185847, "grad_norm": 0.30496853590011597, "learning_rate": 9.465824783801786e-06, "loss": 0.3818, "step": 11490 }, { "epoch": 1.1682594550630339, "grad_norm": 0.30381321907043457, "learning_rate": 9.46566516996231e-06, "loss": 0.3465, "step": 11491 }, { "epoch": 1.1683611224074828, "grad_norm": 0.29180672764778137, "learning_rate": 9.465505533625873e-06, "loss": 0.3435, "step": 11492 }, { "epoch": 1.1684627897519317, "grad_norm": 0.2986195981502533, "learning_rate": 9.46534587479328e-06, "loss": 0.3444, "step": 11493 }, { "epoch": 1.1685644570963807, "grad_norm": 0.34661826491355896, "learning_rate": 9.465186193465336e-06, "loss": 0.374, "step": 11494 }, { "epoch": 1.1686661244408296, "grad_norm": 0.29238295555114746, "learning_rate": 9.465026489642846e-06, "loss": 0.3336, "step": 11495 }, { "epoch": 1.1687677917852786, "grad_norm": 0.32290053367614746, "learning_rate": 9.464866763326614e-06, "loss": 0.3512, "step": 11496 }, { "epoch": 1.1688694591297275, "grad_norm": 0.2933572828769684, "learning_rate": 9.464707014517442e-06, "loss": 0.3344, "step": 11497 }, { "epoch": 1.1689711264741764, "grad_norm": 0.3046276569366455, "learning_rate": 9.464547243216138e-06, "loss": 0.372, "step": 11498 }, { "epoch": 1.1690727938186254, "grad_norm": 0.33890363574028015, "learning_rate": 9.464387449423506e-06, "loss": 0.3951, "step": 11499 }, { "epoch": 1.1691744611630743, "grad_norm": 0.31136107444763184, "learning_rate": 9.464227633140352e-06, "loss": 0.3577, "step": 11500 }, { "epoch": 1.1692761285075233, "grad_norm": 0.32568231225013733, "learning_rate": 9.464067794367479e-06, "loss": 0.3708, "step": 11501 }, { "epoch": 1.1693777958519724, "grad_norm": 0.3032433092594147, "learning_rate": 9.463907933105694e-06, "loss": 0.3463, "step": 11502 }, { "epoch": 1.1694794631964214, "grad_norm": 0.34852680563926697, "learning_rate": 9.4637480493558e-06, "loss": 0.3393, "step": 11503 }, { "epoch": 1.1695811305408703, "grad_norm": 0.26646947860717773, "learning_rate": 9.463588143118608e-06, "loss": 0.3562, "step": 11504 }, { "epoch": 1.1696827978853193, "grad_norm": 0.29624971747398376, "learning_rate": 9.463428214394916e-06, "loss": 0.3432, "step": 11505 }, { "epoch": 1.1697844652297682, "grad_norm": 0.3521418571472168, "learning_rate": 9.463268263185534e-06, "loss": 0.3509, "step": 11506 }, { "epoch": 1.1698861325742171, "grad_norm": 0.31872326135635376, "learning_rate": 9.463108289491268e-06, "loss": 0.3355, "step": 11507 }, { "epoch": 1.169987799918666, "grad_norm": 0.3082786500453949, "learning_rate": 9.462948293312922e-06, "loss": 0.342, "step": 11508 }, { "epoch": 1.170089467263115, "grad_norm": 0.3450118899345398, "learning_rate": 9.462788274651303e-06, "loss": 0.3533, "step": 11509 }, { "epoch": 1.170191134607564, "grad_norm": 0.32160648703575134, "learning_rate": 9.462628233507217e-06, "loss": 0.3659, "step": 11510 }, { "epoch": 1.1702928019520131, "grad_norm": 0.28491902351379395, "learning_rate": 9.462468169881472e-06, "loss": 0.3656, "step": 11511 }, { "epoch": 1.170394469296462, "grad_norm": 0.35196545720100403, "learning_rate": 9.462308083774871e-06, "loss": 0.3712, "step": 11512 }, { "epoch": 1.170496136640911, "grad_norm": 0.30242669582366943, "learning_rate": 9.462147975188223e-06, "loss": 0.3666, "step": 11513 }, { "epoch": 1.17059780398536, "grad_norm": 0.2892402410507202, "learning_rate": 9.461987844122335e-06, "loss": 0.4386, "step": 11514 }, { "epoch": 1.170699471329809, "grad_norm": 0.3140158951282501, "learning_rate": 9.461827690578012e-06, "loss": 0.3781, "step": 11515 }, { "epoch": 1.1708011386742578, "grad_norm": 0.3356620669364929, "learning_rate": 9.46166751455606e-06, "loss": 0.3624, "step": 11516 }, { "epoch": 1.1709028060187068, "grad_norm": 0.29723018407821655, "learning_rate": 9.461507316057288e-06, "loss": 0.3523, "step": 11517 }, { "epoch": 1.1710044733631557, "grad_norm": 0.3292672336101532, "learning_rate": 9.461347095082503e-06, "loss": 0.3854, "step": 11518 }, { "epoch": 1.1711061407076047, "grad_norm": 0.307336688041687, "learning_rate": 9.46118685163251e-06, "loss": 0.356, "step": 11519 }, { "epoch": 1.1712078080520536, "grad_norm": 0.3118605315685272, "learning_rate": 9.46102658570812e-06, "loss": 0.3321, "step": 11520 }, { "epoch": 1.1713094753965025, "grad_norm": 0.27972882986068726, "learning_rate": 9.460866297310138e-06, "loss": 0.3826, "step": 11521 }, { "epoch": 1.1714111427409515, "grad_norm": 0.2940123677253723, "learning_rate": 9.46070598643937e-06, "loss": 0.4114, "step": 11522 }, { "epoch": 1.1715128100854006, "grad_norm": 0.308502197265625, "learning_rate": 9.460545653096626e-06, "loss": 0.3505, "step": 11523 }, { "epoch": 1.1716144774298496, "grad_norm": 0.33928850293159485, "learning_rate": 9.460385297282712e-06, "loss": 0.4027, "step": 11524 }, { "epoch": 1.1717161447742985, "grad_norm": 0.30633410811424255, "learning_rate": 9.460224918998439e-06, "loss": 0.3909, "step": 11525 }, { "epoch": 1.1718178121187475, "grad_norm": 0.31408023834228516, "learning_rate": 9.460064518244611e-06, "loss": 0.3511, "step": 11526 }, { "epoch": 1.1719194794631964, "grad_norm": 0.34000593423843384, "learning_rate": 9.459904095022038e-06, "loss": 0.372, "step": 11527 }, { "epoch": 1.1720211468076454, "grad_norm": 0.3100394904613495, "learning_rate": 9.45974364933153e-06, "loss": 0.3916, "step": 11528 }, { "epoch": 1.1721228141520943, "grad_norm": 0.31413066387176514, "learning_rate": 9.459583181173892e-06, "loss": 0.4243, "step": 11529 }, { "epoch": 1.1722244814965432, "grad_norm": 0.3041127622127533, "learning_rate": 9.459422690549932e-06, "loss": 0.3288, "step": 11530 }, { "epoch": 1.1723261488409922, "grad_norm": 0.2964649200439453, "learning_rate": 9.459262177460462e-06, "loss": 0.3593, "step": 11531 }, { "epoch": 1.1724278161854413, "grad_norm": 0.3046327829360962, "learning_rate": 9.459101641906289e-06, "loss": 0.3469, "step": 11532 }, { "epoch": 1.1725294835298903, "grad_norm": 0.29848217964172363, "learning_rate": 9.458941083888221e-06, "loss": 0.3877, "step": 11533 }, { "epoch": 1.1726311508743392, "grad_norm": 0.3315858244895935, "learning_rate": 9.458780503407068e-06, "loss": 0.3438, "step": 11534 }, { "epoch": 1.1727328182187882, "grad_norm": 0.31768399477005005, "learning_rate": 9.458619900463636e-06, "loss": 0.3706, "step": 11535 }, { "epoch": 1.172834485563237, "grad_norm": 0.30445244908332825, "learning_rate": 9.45845927505874e-06, "loss": 0.3862, "step": 11536 }, { "epoch": 1.172936152907686, "grad_norm": 0.30951160192489624, "learning_rate": 9.458298627193183e-06, "loss": 0.389, "step": 11537 }, { "epoch": 1.173037820252135, "grad_norm": 0.33822301030158997, "learning_rate": 9.458137956867779e-06, "loss": 0.3415, "step": 11538 }, { "epoch": 1.173139487596584, "grad_norm": 0.3199053406715393, "learning_rate": 9.457977264083333e-06, "loss": 0.4185, "step": 11539 }, { "epoch": 1.1732411549410329, "grad_norm": 0.2863314747810364, "learning_rate": 9.45781654884066e-06, "loss": 0.3812, "step": 11540 }, { "epoch": 1.1733428222854818, "grad_norm": 0.30374041199684143, "learning_rate": 9.457655811140564e-06, "loss": 0.3995, "step": 11541 }, { "epoch": 1.1734444896299308, "grad_norm": 0.28599879145622253, "learning_rate": 9.457495050983859e-06, "loss": 0.3786, "step": 11542 }, { "epoch": 1.17354615697438, "grad_norm": 0.2996971011161804, "learning_rate": 9.457334268371352e-06, "loss": 0.358, "step": 11543 }, { "epoch": 1.1736478243188289, "grad_norm": 0.28331515192985535, "learning_rate": 9.457173463303855e-06, "loss": 0.3756, "step": 11544 }, { "epoch": 1.1737494916632778, "grad_norm": 0.2990296483039856, "learning_rate": 9.457012635782177e-06, "loss": 0.3783, "step": 11545 }, { "epoch": 1.1738511590077267, "grad_norm": 0.33692264556884766, "learning_rate": 9.456851785807127e-06, "loss": 0.3524, "step": 11546 }, { "epoch": 1.1739528263521757, "grad_norm": 0.30998045206069946, "learning_rate": 9.456690913379518e-06, "loss": 0.3586, "step": 11547 }, { "epoch": 1.1740544936966246, "grad_norm": 0.3037378191947937, "learning_rate": 9.45653001850016e-06, "loss": 0.3598, "step": 11548 }, { "epoch": 1.1741561610410736, "grad_norm": 0.32108813524246216, "learning_rate": 9.456369101169863e-06, "loss": 0.3883, "step": 11549 }, { "epoch": 1.1742578283855225, "grad_norm": 0.27310872077941895, "learning_rate": 9.456208161389436e-06, "loss": 0.3607, "step": 11550 }, { "epoch": 1.1743594957299714, "grad_norm": 0.30923715233802795, "learning_rate": 9.456047199159693e-06, "loss": 0.346, "step": 11551 }, { "epoch": 1.1744611630744206, "grad_norm": 0.3058222532272339, "learning_rate": 9.455886214481441e-06, "loss": 0.3469, "step": 11552 }, { "epoch": 1.1745628304188696, "grad_norm": 0.3132108449935913, "learning_rate": 9.455725207355493e-06, "loss": 0.377, "step": 11553 }, { "epoch": 1.1746644977633185, "grad_norm": 0.2970792353153229, "learning_rate": 9.455564177782662e-06, "loss": 0.4053, "step": 11554 }, { "epoch": 1.1747661651077674, "grad_norm": 0.30247026681900024, "learning_rate": 9.455403125763758e-06, "loss": 0.3685, "step": 11555 }, { "epoch": 1.1748678324522164, "grad_norm": 0.2858445346355438, "learning_rate": 9.45524205129959e-06, "loss": 0.3695, "step": 11556 }, { "epoch": 1.1749694997966653, "grad_norm": 0.2938213050365448, "learning_rate": 9.455080954390972e-06, "loss": 0.3775, "step": 11557 }, { "epoch": 1.1750711671411143, "grad_norm": 0.2997974455356598, "learning_rate": 9.454919835038715e-06, "loss": 0.3898, "step": 11558 }, { "epoch": 1.1751728344855632, "grad_norm": 0.3112221956253052, "learning_rate": 9.454758693243631e-06, "loss": 0.3685, "step": 11559 }, { "epoch": 1.1752745018300121, "grad_norm": 0.294075071811676, "learning_rate": 9.45459752900653e-06, "loss": 0.3574, "step": 11560 }, { "epoch": 1.175376169174461, "grad_norm": 0.2770339250564575, "learning_rate": 9.454436342328226e-06, "loss": 0.3714, "step": 11561 }, { "epoch": 1.17547783651891, "grad_norm": 0.2837502658367157, "learning_rate": 9.45427513320953e-06, "loss": 0.3613, "step": 11562 }, { "epoch": 1.175579503863359, "grad_norm": 0.30639079213142395, "learning_rate": 9.454113901651253e-06, "loss": 0.3444, "step": 11563 }, { "epoch": 1.1756811712078081, "grad_norm": 0.30948811769485474, "learning_rate": 9.453952647654212e-06, "loss": 0.3688, "step": 11564 }, { "epoch": 1.175782838552257, "grad_norm": 0.29054608941078186, "learning_rate": 9.453791371219214e-06, "loss": 0.3844, "step": 11565 }, { "epoch": 1.175884505896706, "grad_norm": 0.2817371189594269, "learning_rate": 9.453630072347074e-06, "loss": 0.3355, "step": 11566 }, { "epoch": 1.175986173241155, "grad_norm": 0.3300437331199646, "learning_rate": 9.453468751038604e-06, "loss": 0.3671, "step": 11567 }, { "epoch": 1.176087840585604, "grad_norm": 0.28349024057388306, "learning_rate": 9.453307407294616e-06, "loss": 0.3742, "step": 11568 }, { "epoch": 1.1761895079300528, "grad_norm": 0.29650625586509705, "learning_rate": 9.453146041115923e-06, "loss": 0.3536, "step": 11569 }, { "epoch": 1.1762911752745018, "grad_norm": 0.3112664520740509, "learning_rate": 9.452984652503339e-06, "loss": 0.3677, "step": 11570 }, { "epoch": 1.1763928426189507, "grad_norm": 0.34152543544769287, "learning_rate": 9.452823241457678e-06, "loss": 0.3927, "step": 11571 }, { "epoch": 1.1764945099633997, "grad_norm": 0.28663137555122375, "learning_rate": 9.452661807979751e-06, "loss": 0.3577, "step": 11572 }, { "epoch": 1.1765961773078488, "grad_norm": 0.30491921305656433, "learning_rate": 9.45250035207037e-06, "loss": 0.3627, "step": 11573 }, { "epoch": 1.1766978446522978, "grad_norm": 0.2912597060203552, "learning_rate": 9.452338873730352e-06, "loss": 0.3441, "step": 11574 }, { "epoch": 1.1767995119967467, "grad_norm": 0.2781059443950653, "learning_rate": 9.452177372960509e-06, "loss": 0.3467, "step": 11575 }, { "epoch": 1.1769011793411956, "grad_norm": 0.2994270324707031, "learning_rate": 9.452015849761653e-06, "loss": 0.3684, "step": 11576 }, { "epoch": 1.1770028466856446, "grad_norm": 0.3165968060493469, "learning_rate": 9.4518543041346e-06, "loss": 0.3683, "step": 11577 }, { "epoch": 1.1771045140300935, "grad_norm": 0.28733518719673157, "learning_rate": 9.451692736080164e-06, "loss": 0.3668, "step": 11578 }, { "epoch": 1.1772061813745425, "grad_norm": 0.2943083643913269, "learning_rate": 9.451531145599157e-06, "loss": 0.3547, "step": 11579 }, { "epoch": 1.1773078487189914, "grad_norm": 0.28349587321281433, "learning_rate": 9.451369532692394e-06, "loss": 0.3654, "step": 11580 }, { "epoch": 1.1774095160634404, "grad_norm": 0.29557597637176514, "learning_rate": 9.451207897360688e-06, "loss": 0.3558, "step": 11581 }, { "epoch": 1.1775111834078893, "grad_norm": 0.2758186161518097, "learning_rate": 9.451046239604856e-06, "loss": 0.3519, "step": 11582 }, { "epoch": 1.1776128507523382, "grad_norm": 0.28109824657440186, "learning_rate": 9.450884559425711e-06, "loss": 0.3485, "step": 11583 }, { "epoch": 1.1777145180967874, "grad_norm": 0.3247903287410736, "learning_rate": 9.450722856824066e-06, "loss": 0.3588, "step": 11584 }, { "epoch": 1.1778161854412363, "grad_norm": 0.3030831515789032, "learning_rate": 9.450561131800738e-06, "loss": 0.3427, "step": 11585 }, { "epoch": 1.1779178527856853, "grad_norm": 0.2856365442276001, "learning_rate": 9.450399384356539e-06, "loss": 0.3675, "step": 11586 }, { "epoch": 1.1780195201301342, "grad_norm": 0.2935337424278259, "learning_rate": 9.450237614492285e-06, "loss": 0.358, "step": 11587 }, { "epoch": 1.1781211874745832, "grad_norm": 0.3509039878845215, "learning_rate": 9.450075822208793e-06, "loss": 0.4187, "step": 11588 }, { "epoch": 1.178222854819032, "grad_norm": 0.290696918964386, "learning_rate": 9.449914007506878e-06, "loss": 0.3622, "step": 11589 }, { "epoch": 1.178324522163481, "grad_norm": 0.2862076461315155, "learning_rate": 9.449752170387351e-06, "loss": 0.3902, "step": 11590 }, { "epoch": 1.17842618950793, "grad_norm": 0.29266682267189026, "learning_rate": 9.449590310851032e-06, "loss": 0.3324, "step": 11591 }, { "epoch": 1.178527856852379, "grad_norm": 0.32088255882263184, "learning_rate": 9.449428428898732e-06, "loss": 0.357, "step": 11592 }, { "epoch": 1.178629524196828, "grad_norm": 0.327633261680603, "learning_rate": 9.449266524531271e-06, "loss": 0.3495, "step": 11593 }, { "epoch": 1.178731191541277, "grad_norm": 0.2781321108341217, "learning_rate": 9.449104597749462e-06, "loss": 0.3581, "step": 11594 }, { "epoch": 1.178832858885726, "grad_norm": 0.31642860174179077, "learning_rate": 9.448942648554122e-06, "loss": 0.403, "step": 11595 }, { "epoch": 1.178934526230175, "grad_norm": 0.30953964591026306, "learning_rate": 9.448780676946064e-06, "loss": 0.3692, "step": 11596 }, { "epoch": 1.1790361935746239, "grad_norm": 0.2916091978549957, "learning_rate": 9.448618682926108e-06, "loss": 0.3835, "step": 11597 }, { "epoch": 1.1791378609190728, "grad_norm": 0.3119176924228668, "learning_rate": 9.448456666495067e-06, "loss": 0.3721, "step": 11598 }, { "epoch": 1.1792395282635217, "grad_norm": 0.3194739818572998, "learning_rate": 9.44829462765376e-06, "loss": 0.3545, "step": 11599 }, { "epoch": 1.1793411956079707, "grad_norm": 0.29660919308662415, "learning_rate": 9.448132566403001e-06, "loss": 0.3649, "step": 11600 }, { "epoch": 1.1794428629524196, "grad_norm": 0.31625938415527344, "learning_rate": 9.447970482743607e-06, "loss": 0.3886, "step": 11601 }, { "epoch": 1.1795445302968686, "grad_norm": 0.3136926591396332, "learning_rate": 9.447808376676395e-06, "loss": 0.344, "step": 11602 }, { "epoch": 1.1796461976413175, "grad_norm": 0.3048457205295563, "learning_rate": 9.44764624820218e-06, "loss": 0.3797, "step": 11603 }, { "epoch": 1.1797478649857664, "grad_norm": 0.3224453330039978, "learning_rate": 9.447484097321782e-06, "loss": 0.3799, "step": 11604 }, { "epoch": 1.1798495323302156, "grad_norm": 0.30172067880630493, "learning_rate": 9.447321924036015e-06, "loss": 0.403, "step": 11605 }, { "epoch": 1.1799511996746646, "grad_norm": 0.3212781846523285, "learning_rate": 9.447159728345697e-06, "loss": 0.372, "step": 11606 }, { "epoch": 1.1800528670191135, "grad_norm": 0.3268807828426361, "learning_rate": 9.446997510251646e-06, "loss": 0.3385, "step": 11607 }, { "epoch": 1.1801545343635624, "grad_norm": 0.2873787581920624, "learning_rate": 9.446835269754677e-06, "loss": 0.3385, "step": 11608 }, { "epoch": 1.1802562017080114, "grad_norm": 0.31101858615875244, "learning_rate": 9.44667300685561e-06, "loss": 0.3455, "step": 11609 }, { "epoch": 1.1803578690524603, "grad_norm": 0.2915058732032776, "learning_rate": 9.44651072155526e-06, "loss": 0.3753, "step": 11610 }, { "epoch": 1.1804595363969093, "grad_norm": 0.32970139384269714, "learning_rate": 9.446348413854448e-06, "loss": 0.3566, "step": 11611 }, { "epoch": 1.1805612037413582, "grad_norm": 0.27598392963409424, "learning_rate": 9.446186083753985e-06, "loss": 0.3449, "step": 11612 }, { "epoch": 1.1806628710858071, "grad_norm": 0.3154236078262329, "learning_rate": 9.446023731254696e-06, "loss": 0.3762, "step": 11613 }, { "epoch": 1.1807645384302563, "grad_norm": 0.30949610471725464, "learning_rate": 9.445861356357395e-06, "loss": 0.3419, "step": 11614 }, { "epoch": 1.1808662057747052, "grad_norm": 0.29305967688560486, "learning_rate": 9.445698959062902e-06, "loss": 0.3382, "step": 11615 }, { "epoch": 1.1809678731191542, "grad_norm": 0.32815787196159363, "learning_rate": 9.445536539372034e-06, "loss": 0.3683, "step": 11616 }, { "epoch": 1.1810695404636031, "grad_norm": 0.3033996522426605, "learning_rate": 9.445374097285608e-06, "loss": 0.3603, "step": 11617 }, { "epoch": 1.181171207808052, "grad_norm": 0.2891389727592468, "learning_rate": 9.445211632804445e-06, "loss": 0.3786, "step": 11618 }, { "epoch": 1.181272875152501, "grad_norm": 0.35198190808296204, "learning_rate": 9.445049145929361e-06, "loss": 0.3649, "step": 11619 }, { "epoch": 1.18137454249695, "grad_norm": 0.29212215542793274, "learning_rate": 9.444886636661176e-06, "loss": 0.3666, "step": 11620 }, { "epoch": 1.181476209841399, "grad_norm": 0.29641246795654297, "learning_rate": 9.444724105000708e-06, "loss": 0.3692, "step": 11621 }, { "epoch": 1.1815778771858478, "grad_norm": 0.3130495250225067, "learning_rate": 9.444561550948778e-06, "loss": 0.3662, "step": 11622 }, { "epoch": 1.1816795445302968, "grad_norm": 0.2864047884941101, "learning_rate": 9.444398974506202e-06, "loss": 0.359, "step": 11623 }, { "epoch": 1.1817812118747457, "grad_norm": 0.3104359209537506, "learning_rate": 9.444236375673802e-06, "loss": 0.3798, "step": 11624 }, { "epoch": 1.1818828792191949, "grad_norm": 0.30364543199539185, "learning_rate": 9.444073754452391e-06, "loss": 0.3719, "step": 11625 }, { "epoch": 1.1819845465636438, "grad_norm": 0.2964743673801422, "learning_rate": 9.443911110842795e-06, "loss": 0.3826, "step": 11626 }, { "epoch": 1.1820862139080928, "grad_norm": 0.3163430094718933, "learning_rate": 9.443748444845831e-06, "loss": 0.3726, "step": 11627 }, { "epoch": 1.1821878812525417, "grad_norm": 0.3122285008430481, "learning_rate": 9.443585756462319e-06, "loss": 0.3711, "step": 11628 }, { "epoch": 1.1822895485969906, "grad_norm": 0.3100660443305969, "learning_rate": 9.443423045693077e-06, "loss": 0.3436, "step": 11629 }, { "epoch": 1.1823912159414396, "grad_norm": 0.29797857999801636, "learning_rate": 9.443260312538925e-06, "loss": 0.3463, "step": 11630 }, { "epoch": 1.1824928832858885, "grad_norm": 0.30576109886169434, "learning_rate": 9.443097557000685e-06, "loss": 0.3728, "step": 11631 }, { "epoch": 1.1825945506303375, "grad_norm": 0.3096979558467865, "learning_rate": 9.442934779079174e-06, "loss": 0.3759, "step": 11632 }, { "epoch": 1.1826962179747864, "grad_norm": 0.31022438406944275, "learning_rate": 9.442771978775213e-06, "loss": 0.3688, "step": 11633 }, { "epoch": 1.1827978853192356, "grad_norm": 0.30210548639297485, "learning_rate": 9.442609156089623e-06, "loss": 0.3831, "step": 11634 }, { "epoch": 1.1828995526636845, "grad_norm": 0.3059389293193817, "learning_rate": 9.442446311023226e-06, "loss": 0.3836, "step": 11635 }, { "epoch": 1.1830012200081335, "grad_norm": 0.3082640767097473, "learning_rate": 9.442283443576837e-06, "loss": 0.3598, "step": 11636 }, { "epoch": 1.1831028873525824, "grad_norm": 0.2783670127391815, "learning_rate": 9.442120553751281e-06, "loss": 0.3456, "step": 11637 }, { "epoch": 1.1832045546970313, "grad_norm": 0.2877557575702667, "learning_rate": 9.441957641547378e-06, "loss": 0.3303, "step": 11638 }, { "epoch": 1.1833062220414803, "grad_norm": 0.2917274534702301, "learning_rate": 9.441794706965948e-06, "loss": 0.3344, "step": 11639 }, { "epoch": 1.1834078893859292, "grad_norm": 0.31071949005126953, "learning_rate": 9.441631750007811e-06, "loss": 0.361, "step": 11640 }, { "epoch": 1.1835095567303782, "grad_norm": 0.3063974976539612, "learning_rate": 9.441468770673788e-06, "loss": 0.3725, "step": 11641 }, { "epoch": 1.183611224074827, "grad_norm": 0.2953355312347412, "learning_rate": 9.441305768964701e-06, "loss": 0.3676, "step": 11642 }, { "epoch": 1.183712891419276, "grad_norm": 0.3057916760444641, "learning_rate": 9.441142744881372e-06, "loss": 0.3906, "step": 11643 }, { "epoch": 1.183814558763725, "grad_norm": 0.2888805866241455, "learning_rate": 9.44097969842462e-06, "loss": 0.368, "step": 11644 }, { "epoch": 1.183916226108174, "grad_norm": 0.3001554608345032, "learning_rate": 9.440816629595268e-06, "loss": 0.3834, "step": 11645 }, { "epoch": 1.184017893452623, "grad_norm": 0.3053721487522125, "learning_rate": 9.440653538394137e-06, "loss": 0.392, "step": 11646 }, { "epoch": 1.184119560797072, "grad_norm": 0.3038465082645416, "learning_rate": 9.440490424822051e-06, "loss": 0.3572, "step": 11647 }, { "epoch": 1.184221228141521, "grad_norm": 0.3068102300167084, "learning_rate": 9.440327288879827e-06, "loss": 0.4024, "step": 11648 }, { "epoch": 1.18432289548597, "grad_norm": 0.2968759536743164, "learning_rate": 9.44016413056829e-06, "loss": 0.3618, "step": 11649 }, { "epoch": 1.1844245628304189, "grad_norm": 0.30336225032806396, "learning_rate": 9.440000949888262e-06, "loss": 0.3616, "step": 11650 }, { "epoch": 1.1845262301748678, "grad_norm": 0.3172636032104492, "learning_rate": 9.439837746840563e-06, "loss": 0.3644, "step": 11651 }, { "epoch": 1.1846278975193167, "grad_norm": 0.31075015664100647, "learning_rate": 9.439674521426016e-06, "loss": 0.361, "step": 11652 }, { "epoch": 1.1847295648637657, "grad_norm": 0.29289084672927856, "learning_rate": 9.439511273645446e-06, "loss": 0.3833, "step": 11653 }, { "epoch": 1.1848312322082148, "grad_norm": 0.2944355905056, "learning_rate": 9.439348003499672e-06, "loss": 0.3336, "step": 11654 }, { "epoch": 1.1849328995526638, "grad_norm": 0.29213377833366394, "learning_rate": 9.439184710989517e-06, "loss": 0.4002, "step": 11655 }, { "epoch": 1.1850345668971127, "grad_norm": 0.3012233376502991, "learning_rate": 9.439021396115806e-06, "loss": 0.3466, "step": 11656 }, { "epoch": 1.1851362342415617, "grad_norm": 0.30808985233306885, "learning_rate": 9.43885805887936e-06, "loss": 0.3548, "step": 11657 }, { "epoch": 1.1852379015860106, "grad_norm": 0.3031025826931, "learning_rate": 9.438694699281e-06, "loss": 0.3644, "step": 11658 }, { "epoch": 1.1853395689304596, "grad_norm": 0.29458338022232056, "learning_rate": 9.43853131732155e-06, "loss": 0.3504, "step": 11659 }, { "epoch": 1.1854412362749085, "grad_norm": 0.28506147861480713, "learning_rate": 9.438367913001837e-06, "loss": 0.3522, "step": 11660 }, { "epoch": 1.1855429036193574, "grad_norm": 0.3016749620437622, "learning_rate": 9.43820448632268e-06, "loss": 0.355, "step": 11661 }, { "epoch": 1.1856445709638064, "grad_norm": 0.3087240755558014, "learning_rate": 9.438041037284904e-06, "loss": 0.403, "step": 11662 }, { "epoch": 1.1857462383082553, "grad_norm": 0.3033910393714905, "learning_rate": 9.437877565889333e-06, "loss": 0.4011, "step": 11663 }, { "epoch": 1.1858479056527043, "grad_norm": 0.3116852641105652, "learning_rate": 9.437714072136786e-06, "loss": 0.3361, "step": 11664 }, { "epoch": 1.1859495729971532, "grad_norm": 0.29742616415023804, "learning_rate": 9.437550556028093e-06, "loss": 0.3814, "step": 11665 }, { "epoch": 1.1860512403416024, "grad_norm": 0.29521816968917847, "learning_rate": 9.437387017564073e-06, "loss": 0.3783, "step": 11666 }, { "epoch": 1.1861529076860513, "grad_norm": 0.3276427090167999, "learning_rate": 9.437223456745552e-06, "loss": 0.3788, "step": 11667 }, { "epoch": 1.1862545750305002, "grad_norm": 0.3034530580043793, "learning_rate": 9.437059873573355e-06, "loss": 0.3728, "step": 11668 }, { "epoch": 1.1863562423749492, "grad_norm": 0.2975657880306244, "learning_rate": 9.436896268048303e-06, "loss": 0.3693, "step": 11669 }, { "epoch": 1.1864579097193981, "grad_norm": 0.32269883155822754, "learning_rate": 9.436732640171225e-06, "loss": 0.3759, "step": 11670 }, { "epoch": 1.186559577063847, "grad_norm": 0.3486010730266571, "learning_rate": 9.43656898994294e-06, "loss": 0.383, "step": 11671 }, { "epoch": 1.186661244408296, "grad_norm": 0.27807801961898804, "learning_rate": 9.436405317364274e-06, "loss": 0.3667, "step": 11672 }, { "epoch": 1.186762911752745, "grad_norm": 0.29344213008880615, "learning_rate": 9.436241622436053e-06, "loss": 0.3452, "step": 11673 }, { "epoch": 1.186864579097194, "grad_norm": 0.28780415654182434, "learning_rate": 9.4360779051591e-06, "loss": 0.3718, "step": 11674 }, { "epoch": 1.186966246441643, "grad_norm": 0.319004088640213, "learning_rate": 9.435914165534243e-06, "loss": 0.3986, "step": 11675 }, { "epoch": 1.187067913786092, "grad_norm": 0.3111567199230194, "learning_rate": 9.435750403562302e-06, "loss": 0.3581, "step": 11676 }, { "epoch": 1.187169581130541, "grad_norm": 0.28849536180496216, "learning_rate": 9.435586619244107e-06, "loss": 0.3499, "step": 11677 }, { "epoch": 1.1872712484749899, "grad_norm": 0.3232800364494324, "learning_rate": 9.43542281258048e-06, "loss": 0.3773, "step": 11678 }, { "epoch": 1.1873729158194388, "grad_norm": 0.29476410150527954, "learning_rate": 9.435258983572247e-06, "loss": 0.3302, "step": 11679 }, { "epoch": 1.1874745831638878, "grad_norm": 0.3194718360900879, "learning_rate": 9.435095132220232e-06, "loss": 0.367, "step": 11680 }, { "epoch": 1.1875762505083367, "grad_norm": 0.32823890447616577, "learning_rate": 9.434931258525262e-06, "loss": 0.3416, "step": 11681 }, { "epoch": 1.1876779178527856, "grad_norm": 0.30128949880599976, "learning_rate": 9.434767362488163e-06, "loss": 0.3381, "step": 11682 }, { "epoch": 1.1877795851972346, "grad_norm": 0.3293398320674896, "learning_rate": 9.43460344410976e-06, "loss": 0.397, "step": 11683 }, { "epoch": 1.1878812525416835, "grad_norm": 0.3089536130428314, "learning_rate": 9.434439503390878e-06, "loss": 0.3474, "step": 11684 }, { "epoch": 1.1879829198861325, "grad_norm": 0.2953673303127289, "learning_rate": 9.434275540332343e-06, "loss": 0.3981, "step": 11685 }, { "epoch": 1.1880845872305814, "grad_norm": 0.3032476305961609, "learning_rate": 9.434111554934982e-06, "loss": 0.3936, "step": 11686 }, { "epoch": 1.1881862545750306, "grad_norm": 0.2888466417789459, "learning_rate": 9.433947547199621e-06, "loss": 0.3653, "step": 11687 }, { "epoch": 1.1882879219194795, "grad_norm": 0.2883830666542053, "learning_rate": 9.433783517127087e-06, "loss": 0.3684, "step": 11688 }, { "epoch": 1.1883895892639285, "grad_norm": 0.34596461057662964, "learning_rate": 9.433619464718204e-06, "loss": 0.3987, "step": 11689 }, { "epoch": 1.1884912566083774, "grad_norm": 0.30750182271003723, "learning_rate": 9.433455389973798e-06, "loss": 0.3659, "step": 11690 }, { "epoch": 1.1885929239528263, "grad_norm": 0.2761344313621521, "learning_rate": 9.4332912928947e-06, "loss": 0.3296, "step": 11691 }, { "epoch": 1.1886945912972753, "grad_norm": 0.3025124967098236, "learning_rate": 9.433127173481733e-06, "loss": 0.3454, "step": 11692 }, { "epoch": 1.1887962586417242, "grad_norm": 0.30372747778892517, "learning_rate": 9.432963031735726e-06, "loss": 0.3651, "step": 11693 }, { "epoch": 1.1888979259861732, "grad_norm": 0.27841565012931824, "learning_rate": 9.432798867657503e-06, "loss": 0.3817, "step": 11694 }, { "epoch": 1.1889995933306223, "grad_norm": 0.2857264280319214, "learning_rate": 9.432634681247894e-06, "loss": 0.3536, "step": 11695 }, { "epoch": 1.1891012606750713, "grad_norm": 0.28354784846305847, "learning_rate": 9.432470472507724e-06, "loss": 0.3154, "step": 11696 }, { "epoch": 1.1892029280195202, "grad_norm": 0.3137284219264984, "learning_rate": 9.432306241437822e-06, "loss": 0.352, "step": 11697 }, { "epoch": 1.1893045953639692, "grad_norm": 0.29907935857772827, "learning_rate": 9.432141988039011e-06, "loss": 0.3607, "step": 11698 }, { "epoch": 1.189406262708418, "grad_norm": 0.2784575819969177, "learning_rate": 9.431977712312126e-06, "loss": 0.3981, "step": 11699 }, { "epoch": 1.189507930052867, "grad_norm": 0.28286775946617126, "learning_rate": 9.431813414257989e-06, "loss": 0.3672, "step": 11700 }, { "epoch": 1.189609597397316, "grad_norm": 0.3194156885147095, "learning_rate": 9.43164909387743e-06, "loss": 0.3623, "step": 11701 }, { "epoch": 1.189711264741765, "grad_norm": 0.28664326667785645, "learning_rate": 9.431484751171273e-06, "loss": 0.3754, "step": 11702 }, { "epoch": 1.1898129320862139, "grad_norm": 0.2999981641769409, "learning_rate": 9.431320386140352e-06, "loss": 0.3604, "step": 11703 }, { "epoch": 1.1899145994306628, "grad_norm": 0.30933162569999695, "learning_rate": 9.431155998785492e-06, "loss": 0.3537, "step": 11704 }, { "epoch": 1.1900162667751117, "grad_norm": 0.32619842886924744, "learning_rate": 9.43099158910752e-06, "loss": 0.3566, "step": 11705 }, { "epoch": 1.1901179341195607, "grad_norm": 0.2959655225276947, "learning_rate": 9.430827157107265e-06, "loss": 0.3679, "step": 11706 }, { "epoch": 1.1902196014640098, "grad_norm": 0.33359527587890625, "learning_rate": 9.430662702785556e-06, "loss": 0.3505, "step": 11707 }, { "epoch": 1.1903212688084588, "grad_norm": 0.3133382201194763, "learning_rate": 9.430498226143221e-06, "loss": 0.3813, "step": 11708 }, { "epoch": 1.1904229361529077, "grad_norm": 0.30608826875686646, "learning_rate": 9.43033372718109e-06, "loss": 0.3357, "step": 11709 }, { "epoch": 1.1905246034973567, "grad_norm": 0.3488026559352875, "learning_rate": 9.430169205899991e-06, "loss": 0.3876, "step": 11710 }, { "epoch": 1.1906262708418056, "grad_norm": 0.2946476340293884, "learning_rate": 9.430004662300749e-06, "loss": 0.3315, "step": 11711 }, { "epoch": 1.1907279381862546, "grad_norm": 0.28722283244132996, "learning_rate": 9.4298400963842e-06, "loss": 0.3896, "step": 11712 }, { "epoch": 1.1908296055307035, "grad_norm": 0.28584110736846924, "learning_rate": 9.429675508151166e-06, "loss": 0.3732, "step": 11713 }, { "epoch": 1.1909312728751524, "grad_norm": 0.29010769724845886, "learning_rate": 9.42951089760248e-06, "loss": 0.3692, "step": 11714 }, { "epoch": 1.1910329402196014, "grad_norm": 0.27956917881965637, "learning_rate": 9.429346264738972e-06, "loss": 0.3308, "step": 11715 }, { "epoch": 1.1911346075640505, "grad_norm": 0.2857516407966614, "learning_rate": 9.429181609561471e-06, "loss": 0.3409, "step": 11716 }, { "epoch": 1.1912362749084995, "grad_norm": 0.2873759865760803, "learning_rate": 9.429016932070804e-06, "loss": 0.4055, "step": 11717 }, { "epoch": 1.1913379422529484, "grad_norm": 0.3038843274116516, "learning_rate": 9.428852232267802e-06, "loss": 0.4078, "step": 11718 }, { "epoch": 1.1914396095973974, "grad_norm": 0.3042912185192108, "learning_rate": 9.428687510153297e-06, "loss": 0.3476, "step": 11719 }, { "epoch": 1.1915412769418463, "grad_norm": 0.29769647121429443, "learning_rate": 9.428522765728115e-06, "loss": 0.3832, "step": 11720 }, { "epoch": 1.1916429442862952, "grad_norm": 0.2809305191040039, "learning_rate": 9.428357998993087e-06, "loss": 0.3718, "step": 11721 }, { "epoch": 1.1917446116307442, "grad_norm": 0.29001691937446594, "learning_rate": 9.428193209949045e-06, "loss": 0.3556, "step": 11722 }, { "epoch": 1.1918462789751931, "grad_norm": 0.28176385164260864, "learning_rate": 9.428028398596819e-06, "loss": 0.3809, "step": 11723 }, { "epoch": 1.191947946319642, "grad_norm": 0.30717676877975464, "learning_rate": 9.427863564937236e-06, "loss": 0.3611, "step": 11724 }, { "epoch": 1.192049613664091, "grad_norm": 0.2958742082118988, "learning_rate": 9.427698708971128e-06, "loss": 0.3437, "step": 11725 }, { "epoch": 1.19215128100854, "grad_norm": 0.28876474499702454, "learning_rate": 9.427533830699327e-06, "loss": 0.3402, "step": 11726 }, { "epoch": 1.1922529483529891, "grad_norm": 0.28521329164505005, "learning_rate": 9.427368930122664e-06, "loss": 0.3532, "step": 11727 }, { "epoch": 1.192354615697438, "grad_norm": 0.3035869598388672, "learning_rate": 9.427204007241967e-06, "loss": 0.3666, "step": 11728 }, { "epoch": 1.192456283041887, "grad_norm": 0.29402831196784973, "learning_rate": 9.427039062058068e-06, "loss": 0.3729, "step": 11729 }, { "epoch": 1.192557950386336, "grad_norm": 0.30550265312194824, "learning_rate": 9.4268740945718e-06, "loss": 0.3382, "step": 11730 }, { "epoch": 1.1926596177307849, "grad_norm": 0.31035059690475464, "learning_rate": 9.426709104783992e-06, "loss": 0.3901, "step": 11731 }, { "epoch": 1.1927612850752338, "grad_norm": 0.3069576323032379, "learning_rate": 9.426544092695473e-06, "loss": 0.321, "step": 11732 }, { "epoch": 1.1928629524196828, "grad_norm": 0.3063557744026184, "learning_rate": 9.426379058307081e-06, "loss": 0.3477, "step": 11733 }, { "epoch": 1.1929646197641317, "grad_norm": 0.30821001529693604, "learning_rate": 9.426214001619638e-06, "loss": 0.3689, "step": 11734 }, { "epoch": 1.1930662871085806, "grad_norm": 0.30054712295532227, "learning_rate": 9.426048922633984e-06, "loss": 0.3704, "step": 11735 }, { "epoch": 1.1931679544530298, "grad_norm": 0.2840740382671356, "learning_rate": 9.425883821350948e-06, "loss": 0.3586, "step": 11736 }, { "epoch": 1.1932696217974788, "grad_norm": 0.2829468250274658, "learning_rate": 9.425718697771359e-06, "loss": 0.3691, "step": 11737 }, { "epoch": 1.1933712891419277, "grad_norm": 0.2847449779510498, "learning_rate": 9.425553551896051e-06, "loss": 0.3752, "step": 11738 }, { "epoch": 1.1934729564863766, "grad_norm": 0.3142753541469574, "learning_rate": 9.425388383725858e-06, "loss": 0.3578, "step": 11739 }, { "epoch": 1.1935746238308256, "grad_norm": 0.2771579921245575, "learning_rate": 9.425223193261608e-06, "loss": 0.3636, "step": 11740 }, { "epoch": 1.1936762911752745, "grad_norm": 0.2831900119781494, "learning_rate": 9.425057980504135e-06, "loss": 0.3697, "step": 11741 }, { "epoch": 1.1937779585197235, "grad_norm": 0.3068799376487732, "learning_rate": 9.424892745454273e-06, "loss": 0.3724, "step": 11742 }, { "epoch": 1.1938796258641724, "grad_norm": 0.2994384169578552, "learning_rate": 9.424727488112853e-06, "loss": 0.3533, "step": 11743 }, { "epoch": 1.1939812932086213, "grad_norm": 0.3259347379207611, "learning_rate": 9.424562208480706e-06, "loss": 0.3645, "step": 11744 }, { "epoch": 1.1940829605530703, "grad_norm": 0.28368908166885376, "learning_rate": 9.424396906558667e-06, "loss": 0.3352, "step": 11745 }, { "epoch": 1.1941846278975192, "grad_norm": 0.28019019961357117, "learning_rate": 9.424231582347565e-06, "loss": 0.3611, "step": 11746 }, { "epoch": 1.1942862952419682, "grad_norm": 0.29806190729141235, "learning_rate": 9.424066235848239e-06, "loss": 0.3589, "step": 11747 }, { "epoch": 1.1943879625864173, "grad_norm": 0.29862862825393677, "learning_rate": 9.423900867061517e-06, "loss": 0.3851, "step": 11748 }, { "epoch": 1.1944896299308663, "grad_norm": 0.27145272493362427, "learning_rate": 9.423735475988233e-06, "loss": 0.3995, "step": 11749 }, { "epoch": 1.1945912972753152, "grad_norm": 0.27829068899154663, "learning_rate": 9.423570062629223e-06, "loss": 0.3429, "step": 11750 }, { "epoch": 1.1946929646197642, "grad_norm": 0.2943591773509979, "learning_rate": 9.423404626985317e-06, "loss": 0.3563, "step": 11751 }, { "epoch": 1.194794631964213, "grad_norm": 0.2681278884410858, "learning_rate": 9.42323916905735e-06, "loss": 0.3595, "step": 11752 }, { "epoch": 1.194896299308662, "grad_norm": 0.30346783995628357, "learning_rate": 9.423073688846155e-06, "loss": 0.3997, "step": 11753 }, { "epoch": 1.194997966653111, "grad_norm": 0.3096378743648529, "learning_rate": 9.422908186352567e-06, "loss": 0.3658, "step": 11754 }, { "epoch": 1.19509963399756, "grad_norm": 0.29974180459976196, "learning_rate": 9.422742661577418e-06, "loss": 0.3649, "step": 11755 }, { "epoch": 1.1952013013420089, "grad_norm": 0.2996162176132202, "learning_rate": 9.422577114521542e-06, "loss": 0.3633, "step": 11756 }, { "epoch": 1.195302968686458, "grad_norm": 0.2981005012989044, "learning_rate": 9.422411545185773e-06, "loss": 0.364, "step": 11757 }, { "epoch": 1.195404636030907, "grad_norm": 0.3302764892578125, "learning_rate": 9.422245953570946e-06, "loss": 0.3678, "step": 11758 }, { "epoch": 1.195506303375356, "grad_norm": 0.32846155762672424, "learning_rate": 9.422080339677896e-06, "loss": 0.3897, "step": 11759 }, { "epoch": 1.1956079707198048, "grad_norm": 0.3018782436847687, "learning_rate": 9.421914703507455e-06, "loss": 0.3571, "step": 11760 }, { "epoch": 1.1957096380642538, "grad_norm": 0.2999889850616455, "learning_rate": 9.421749045060458e-06, "loss": 0.396, "step": 11761 }, { "epoch": 1.1958113054087027, "grad_norm": 0.2993874251842499, "learning_rate": 9.42158336433774e-06, "loss": 0.3872, "step": 11762 }, { "epoch": 1.1959129727531517, "grad_norm": 0.3449292480945587, "learning_rate": 9.421417661340136e-06, "loss": 0.3516, "step": 11763 }, { "epoch": 1.1960146400976006, "grad_norm": 0.31295251846313477, "learning_rate": 9.421251936068482e-06, "loss": 0.4048, "step": 11764 }, { "epoch": 1.1961163074420496, "grad_norm": 0.29065096378326416, "learning_rate": 9.42108618852361e-06, "loss": 0.3612, "step": 11765 }, { "epoch": 1.1962179747864985, "grad_norm": 0.3511277735233307, "learning_rate": 9.420920418706356e-06, "loss": 0.3634, "step": 11766 }, { "epoch": 1.1963196421309474, "grad_norm": 0.35936665534973145, "learning_rate": 9.420754626617556e-06, "loss": 0.3653, "step": 11767 }, { "epoch": 1.1964213094753966, "grad_norm": 0.31243059039115906, "learning_rate": 9.420588812258045e-06, "loss": 0.3467, "step": 11768 }, { "epoch": 1.1965229768198455, "grad_norm": 0.3089143633842468, "learning_rate": 9.420422975628658e-06, "loss": 0.364, "step": 11769 }, { "epoch": 1.1966246441642945, "grad_norm": 0.3454590439796448, "learning_rate": 9.420257116730231e-06, "loss": 0.4156, "step": 11770 }, { "epoch": 1.1967263115087434, "grad_norm": 0.2852253317832947, "learning_rate": 9.420091235563598e-06, "loss": 0.3806, "step": 11771 }, { "epoch": 1.1968279788531924, "grad_norm": 0.3033979833126068, "learning_rate": 9.419925332129595e-06, "loss": 0.3983, "step": 11772 }, { "epoch": 1.1969296461976413, "grad_norm": 0.311560720205307, "learning_rate": 9.419759406429061e-06, "loss": 0.3488, "step": 11773 }, { "epoch": 1.1970313135420902, "grad_norm": 0.3037337064743042, "learning_rate": 9.419593458462828e-06, "loss": 0.3588, "step": 11774 }, { "epoch": 1.1971329808865392, "grad_norm": 0.29520902037620544, "learning_rate": 9.419427488231734e-06, "loss": 0.3513, "step": 11775 }, { "epoch": 1.1972346482309881, "grad_norm": 0.29550454020500183, "learning_rate": 9.419261495736613e-06, "loss": 0.4038, "step": 11776 }, { "epoch": 1.1973363155754373, "grad_norm": 0.29715532064437866, "learning_rate": 9.419095480978303e-06, "loss": 0.361, "step": 11777 }, { "epoch": 1.1974379829198862, "grad_norm": 0.35555300116539, "learning_rate": 9.418929443957643e-06, "loss": 0.3889, "step": 11778 }, { "epoch": 1.1975396502643352, "grad_norm": 0.30120667815208435, "learning_rate": 9.418763384675465e-06, "loss": 0.3781, "step": 11779 }, { "epoch": 1.1976413176087841, "grad_norm": 0.29838377237319946, "learning_rate": 9.418597303132606e-06, "loss": 0.3871, "step": 11780 }, { "epoch": 1.197742984953233, "grad_norm": 0.2844332158565521, "learning_rate": 9.418431199329905e-06, "loss": 0.3779, "step": 11781 }, { "epoch": 1.197844652297682, "grad_norm": 0.2753586173057556, "learning_rate": 9.418265073268198e-06, "loss": 0.3642, "step": 11782 }, { "epoch": 1.197946319642131, "grad_norm": 0.2700783312320709, "learning_rate": 9.418098924948318e-06, "loss": 0.3762, "step": 11783 }, { "epoch": 1.1980479869865799, "grad_norm": 0.27074992656707764, "learning_rate": 9.417932754371108e-06, "loss": 0.3591, "step": 11784 }, { "epoch": 1.1981496543310288, "grad_norm": 0.2890259921550751, "learning_rate": 9.417766561537404e-06, "loss": 0.3587, "step": 11785 }, { "epoch": 1.1982513216754778, "grad_norm": 0.28686290979385376, "learning_rate": 9.41760034644804e-06, "loss": 0.3764, "step": 11786 }, { "epoch": 1.1983529890199267, "grad_norm": 0.27059412002563477, "learning_rate": 9.417434109103856e-06, "loss": 0.3864, "step": 11787 }, { "epoch": 1.1984546563643756, "grad_norm": 0.30586275458335876, "learning_rate": 9.417267849505689e-06, "loss": 0.391, "step": 11788 }, { "epoch": 1.1985563237088248, "grad_norm": 0.27288344502449036, "learning_rate": 9.417101567654376e-06, "loss": 0.3867, "step": 11789 }, { "epoch": 1.1986579910532738, "grad_norm": 0.3118334710597992, "learning_rate": 9.416935263550754e-06, "loss": 0.3626, "step": 11790 }, { "epoch": 1.1987596583977227, "grad_norm": 0.30110254883766174, "learning_rate": 9.416768937195663e-06, "loss": 0.3765, "step": 11791 }, { "epoch": 1.1988613257421716, "grad_norm": 0.26274916529655457, "learning_rate": 9.41660258858994e-06, "loss": 0.3377, "step": 11792 }, { "epoch": 1.1989629930866206, "grad_norm": 0.28671979904174805, "learning_rate": 9.416436217734422e-06, "loss": 0.3531, "step": 11793 }, { "epoch": 1.1990646604310695, "grad_norm": 0.2999805510044098, "learning_rate": 9.416269824629947e-06, "loss": 0.3472, "step": 11794 }, { "epoch": 1.1991663277755185, "grad_norm": 0.27486029267311096, "learning_rate": 9.416103409277355e-06, "loss": 0.3756, "step": 11795 }, { "epoch": 1.1992679951199674, "grad_norm": 0.3242053985595703, "learning_rate": 9.415936971677484e-06, "loss": 0.3638, "step": 11796 }, { "epoch": 1.1993696624644163, "grad_norm": 0.31459805369377136, "learning_rate": 9.41577051183117e-06, "loss": 0.3432, "step": 11797 }, { "epoch": 1.1994713298088655, "grad_norm": 0.3006092309951782, "learning_rate": 9.415604029739255e-06, "loss": 0.3943, "step": 11798 }, { "epoch": 1.1995729971533144, "grad_norm": 0.31327342987060547, "learning_rate": 9.415437525402575e-06, "loss": 0.3909, "step": 11799 }, { "epoch": 1.1996746644977634, "grad_norm": 0.3338366746902466, "learning_rate": 9.415270998821972e-06, "loss": 0.3467, "step": 11800 }, { "epoch": 1.1997763318422123, "grad_norm": 0.31958824396133423, "learning_rate": 9.415104449998283e-06, "loss": 0.3504, "step": 11801 }, { "epoch": 1.1998779991866613, "grad_norm": 0.2929531931877136, "learning_rate": 9.414937878932345e-06, "loss": 0.374, "step": 11802 }, { "epoch": 1.1999796665311102, "grad_norm": 0.3639238476753235, "learning_rate": 9.414771285625e-06, "loss": 0.3926, "step": 11803 }, { "epoch": 1.2000813338755592, "grad_norm": 0.30346763134002686, "learning_rate": 9.414604670077086e-06, "loss": 0.3353, "step": 11804 }, { "epoch": 1.200183001220008, "grad_norm": 0.3247007429599762, "learning_rate": 9.414438032289443e-06, "loss": 0.3921, "step": 11805 }, { "epoch": 1.200284668564457, "grad_norm": 0.29137662053108215, "learning_rate": 9.414271372262911e-06, "loss": 0.3566, "step": 11806 }, { "epoch": 1.200386335908906, "grad_norm": 0.329925000667572, "learning_rate": 9.414104689998327e-06, "loss": 0.3755, "step": 11807 }, { "epoch": 1.200488003253355, "grad_norm": 0.2877279222011566, "learning_rate": 9.413937985496535e-06, "loss": 0.3522, "step": 11808 }, { "epoch": 1.200589670597804, "grad_norm": 0.27767524123191833, "learning_rate": 9.41377125875837e-06, "loss": 0.3399, "step": 11809 }, { "epoch": 1.200691337942253, "grad_norm": 0.2786658704280853, "learning_rate": 9.413604509784675e-06, "loss": 0.3896, "step": 11810 }, { "epoch": 1.200793005286702, "grad_norm": 0.2919282019138336, "learning_rate": 9.41343773857629e-06, "loss": 0.3761, "step": 11811 }, { "epoch": 1.200894672631151, "grad_norm": 0.2912857234477997, "learning_rate": 9.413270945134055e-06, "loss": 0.3631, "step": 11812 }, { "epoch": 1.2009963399755998, "grad_norm": 0.281605064868927, "learning_rate": 9.413104129458807e-06, "loss": 0.36, "step": 11813 }, { "epoch": 1.2010980073200488, "grad_norm": 0.3100000321865082, "learning_rate": 9.412937291551392e-06, "loss": 0.3674, "step": 11814 }, { "epoch": 1.2011996746644977, "grad_norm": 0.27817901968955994, "learning_rate": 9.412770431412647e-06, "loss": 0.3253, "step": 11815 }, { "epoch": 1.2013013420089467, "grad_norm": 0.2962631583213806, "learning_rate": 9.412603549043413e-06, "loss": 0.3559, "step": 11816 }, { "epoch": 1.2014030093533956, "grad_norm": 0.3146103322505951, "learning_rate": 9.41243664444453e-06, "loss": 0.3672, "step": 11817 }, { "epoch": 1.2015046766978448, "grad_norm": 0.3011731505393982, "learning_rate": 9.41226971761684e-06, "loss": 0.365, "step": 11818 }, { "epoch": 1.2016063440422937, "grad_norm": 0.31738895177841187, "learning_rate": 9.412102768561184e-06, "loss": 0.3723, "step": 11819 }, { "epoch": 1.2017080113867427, "grad_norm": 0.29336056113243103, "learning_rate": 9.411935797278403e-06, "loss": 0.3402, "step": 11820 }, { "epoch": 1.2018096787311916, "grad_norm": 0.29740282893180847, "learning_rate": 9.411768803769338e-06, "loss": 0.3529, "step": 11821 }, { "epoch": 1.2019113460756405, "grad_norm": 0.33265867829322815, "learning_rate": 9.41160178803483e-06, "loss": 0.3926, "step": 11822 }, { "epoch": 1.2020130134200895, "grad_norm": 0.3012704849243164, "learning_rate": 9.411434750075719e-06, "loss": 0.3431, "step": 11823 }, { "epoch": 1.2021146807645384, "grad_norm": 0.3463961184024811, "learning_rate": 9.41126768989285e-06, "loss": 0.3842, "step": 11824 }, { "epoch": 1.2022163481089874, "grad_norm": 0.36988216638565063, "learning_rate": 9.411100607487063e-06, "loss": 0.3164, "step": 11825 }, { "epoch": 1.2023180154534363, "grad_norm": 0.30113843083381653, "learning_rate": 9.410933502859199e-06, "loss": 0.3965, "step": 11826 }, { "epoch": 1.2024196827978852, "grad_norm": 0.2972545921802521, "learning_rate": 9.410766376010099e-06, "loss": 0.3794, "step": 11827 }, { "epoch": 1.2025213501423342, "grad_norm": 0.3417249023914337, "learning_rate": 9.410599226940606e-06, "loss": 0.3453, "step": 11828 }, { "epoch": 1.2026230174867831, "grad_norm": 0.2929476797580719, "learning_rate": 9.410432055651564e-06, "loss": 0.3775, "step": 11829 }, { "epoch": 1.2027246848312323, "grad_norm": 0.29517310857772827, "learning_rate": 9.410264862143812e-06, "loss": 0.3434, "step": 11830 }, { "epoch": 1.2028263521756812, "grad_norm": 0.27737846970558167, "learning_rate": 9.410097646418195e-06, "loss": 0.3762, "step": 11831 }, { "epoch": 1.2029280195201302, "grad_norm": 0.303134024143219, "learning_rate": 9.409930408475552e-06, "loss": 0.3453, "step": 11832 }, { "epoch": 1.2030296868645791, "grad_norm": 0.27620530128479004, "learning_rate": 9.40976314831673e-06, "loss": 0.378, "step": 11833 }, { "epoch": 1.203131354209028, "grad_norm": 0.2721123993396759, "learning_rate": 9.409595865942567e-06, "loss": 0.3535, "step": 11834 }, { "epoch": 1.203233021553477, "grad_norm": 0.28367459774017334, "learning_rate": 9.409428561353908e-06, "loss": 0.3711, "step": 11835 }, { "epoch": 1.203334688897926, "grad_norm": 0.29778924584388733, "learning_rate": 9.409261234551597e-06, "loss": 0.3729, "step": 11836 }, { "epoch": 1.2034363562423749, "grad_norm": 0.2890763282775879, "learning_rate": 9.409093885536475e-06, "loss": 0.3601, "step": 11837 }, { "epoch": 1.2035380235868238, "grad_norm": 0.2936527729034424, "learning_rate": 9.408926514309386e-06, "loss": 0.3736, "step": 11838 }, { "epoch": 1.203639690931273, "grad_norm": 0.2977030873298645, "learning_rate": 9.408759120871173e-06, "loss": 0.3802, "step": 11839 }, { "epoch": 1.203741358275722, "grad_norm": 0.28553062677383423, "learning_rate": 9.40859170522268e-06, "loss": 0.3561, "step": 11840 }, { "epoch": 1.2038430256201709, "grad_norm": 0.2906327545642853, "learning_rate": 9.408424267364749e-06, "loss": 0.3318, "step": 11841 }, { "epoch": 1.2039446929646198, "grad_norm": 0.29309338331222534, "learning_rate": 9.408256807298224e-06, "loss": 0.3308, "step": 11842 }, { "epoch": 1.2040463603090688, "grad_norm": 0.35383954644203186, "learning_rate": 9.408089325023947e-06, "loss": 0.3505, "step": 11843 }, { "epoch": 1.2041480276535177, "grad_norm": 0.30416449904441833, "learning_rate": 9.407921820542767e-06, "loss": 0.3581, "step": 11844 }, { "epoch": 1.2042496949979666, "grad_norm": 0.3390044569969177, "learning_rate": 9.407754293855522e-06, "loss": 0.3641, "step": 11845 }, { "epoch": 1.2043513623424156, "grad_norm": 0.3364981710910797, "learning_rate": 9.40758674496306e-06, "loss": 0.3483, "step": 11846 }, { "epoch": 1.2044530296868645, "grad_norm": 0.32953134179115295, "learning_rate": 9.40741917386622e-06, "loss": 0.3509, "step": 11847 }, { "epoch": 1.2045546970313135, "grad_norm": 0.3192783296108246, "learning_rate": 9.407251580565853e-06, "loss": 0.3834, "step": 11848 }, { "epoch": 1.2046563643757624, "grad_norm": 0.32798758149147034, "learning_rate": 9.4070839650628e-06, "loss": 0.3509, "step": 11849 }, { "epoch": 1.2047580317202116, "grad_norm": 0.32910284399986267, "learning_rate": 9.406916327357902e-06, "loss": 0.3465, "step": 11850 }, { "epoch": 1.2048596990646605, "grad_norm": 0.30590489506721497, "learning_rate": 9.406748667452009e-06, "loss": 0.3796, "step": 11851 }, { "epoch": 1.2049613664091094, "grad_norm": 0.2771322727203369, "learning_rate": 9.406580985345963e-06, "loss": 0.3179, "step": 11852 }, { "epoch": 1.2050630337535584, "grad_norm": 0.30913034081459045, "learning_rate": 9.40641328104061e-06, "loss": 0.3729, "step": 11853 }, { "epoch": 1.2051647010980073, "grad_norm": 0.3124108612537384, "learning_rate": 9.406245554536793e-06, "loss": 0.3554, "step": 11854 }, { "epoch": 1.2052663684424563, "grad_norm": 0.29432663321495056, "learning_rate": 9.406077805835359e-06, "loss": 0.3965, "step": 11855 }, { "epoch": 1.2053680357869052, "grad_norm": 0.28915125131607056, "learning_rate": 9.405910034937151e-06, "loss": 0.3715, "step": 11856 }, { "epoch": 1.2054697031313542, "grad_norm": 0.3278309106826782, "learning_rate": 9.405742241843016e-06, "loss": 0.355, "step": 11857 }, { "epoch": 1.205571370475803, "grad_norm": 0.2853618860244751, "learning_rate": 9.405574426553799e-06, "loss": 0.3466, "step": 11858 }, { "epoch": 1.2056730378202523, "grad_norm": 0.2942383587360382, "learning_rate": 9.405406589070343e-06, "loss": 0.401, "step": 11859 }, { "epoch": 1.2057747051647012, "grad_norm": 0.30314934253692627, "learning_rate": 9.405238729393496e-06, "loss": 0.3719, "step": 11860 }, { "epoch": 1.2058763725091501, "grad_norm": 0.2973063588142395, "learning_rate": 9.405070847524105e-06, "loss": 0.3368, "step": 11861 }, { "epoch": 1.205978039853599, "grad_norm": 0.29813387989997864, "learning_rate": 9.404902943463012e-06, "loss": 0.3642, "step": 11862 }, { "epoch": 1.206079707198048, "grad_norm": 0.297528475522995, "learning_rate": 9.404735017211066e-06, "loss": 0.3842, "step": 11863 }, { "epoch": 1.206181374542497, "grad_norm": 0.30409976840019226, "learning_rate": 9.40456706876911e-06, "loss": 0.3446, "step": 11864 }, { "epoch": 1.206283041886946, "grad_norm": 0.30874383449554443, "learning_rate": 9.404399098137993e-06, "loss": 0.3311, "step": 11865 }, { "epoch": 1.2063847092313948, "grad_norm": 0.27736568450927734, "learning_rate": 9.404231105318559e-06, "loss": 0.3645, "step": 11866 }, { "epoch": 1.2064863765758438, "grad_norm": 0.2961141765117645, "learning_rate": 9.404063090311656e-06, "loss": 0.3594, "step": 11867 }, { "epoch": 1.2065880439202927, "grad_norm": 0.29732221364974976, "learning_rate": 9.403895053118132e-06, "loss": 0.3595, "step": 11868 }, { "epoch": 1.2066897112647417, "grad_norm": 0.29879212379455566, "learning_rate": 9.403726993738829e-06, "loss": 0.3851, "step": 11869 }, { "epoch": 1.2067913786091906, "grad_norm": 0.32379916310310364, "learning_rate": 9.403558912174595e-06, "loss": 0.4117, "step": 11870 }, { "epoch": 1.2068930459536398, "grad_norm": 0.2676982581615448, "learning_rate": 9.40339080842628e-06, "loss": 0.3595, "step": 11871 }, { "epoch": 1.2069947132980887, "grad_norm": 0.29787707328796387, "learning_rate": 9.403222682494726e-06, "loss": 0.3682, "step": 11872 }, { "epoch": 1.2070963806425377, "grad_norm": 0.30939599871635437, "learning_rate": 9.403054534380785e-06, "loss": 0.3846, "step": 11873 }, { "epoch": 1.2071980479869866, "grad_norm": 0.29522526264190674, "learning_rate": 9.402886364085299e-06, "loss": 0.3943, "step": 11874 }, { "epoch": 1.2072997153314355, "grad_norm": 0.2974189221858978, "learning_rate": 9.40271817160912e-06, "loss": 0.3772, "step": 11875 }, { "epoch": 1.2074013826758845, "grad_norm": 0.3082066476345062, "learning_rate": 9.402549956953091e-06, "loss": 0.3329, "step": 11876 }, { "epoch": 1.2075030500203334, "grad_norm": 0.29691365361213684, "learning_rate": 9.402381720118063e-06, "loss": 0.3732, "step": 11877 }, { "epoch": 1.2076047173647824, "grad_norm": 0.2979975640773773, "learning_rate": 9.402213461104882e-06, "loss": 0.3664, "step": 11878 }, { "epoch": 1.2077063847092313, "grad_norm": 0.279981404542923, "learning_rate": 9.402045179914396e-06, "loss": 0.3478, "step": 11879 }, { "epoch": 1.2078080520536805, "grad_norm": 0.26789364218711853, "learning_rate": 9.401876876547451e-06, "loss": 0.3651, "step": 11880 }, { "epoch": 1.2079097193981294, "grad_norm": 0.2854048013687134, "learning_rate": 9.401708551004897e-06, "loss": 0.3505, "step": 11881 }, { "epoch": 1.2080113867425784, "grad_norm": 0.2988613545894623, "learning_rate": 9.401540203287583e-06, "loss": 0.3845, "step": 11882 }, { "epoch": 1.2081130540870273, "grad_norm": 0.29357796907424927, "learning_rate": 9.401371833396354e-06, "loss": 0.3455, "step": 11883 }, { "epoch": 1.2082147214314762, "grad_norm": 0.29618948698043823, "learning_rate": 9.401203441332059e-06, "loss": 0.3614, "step": 11884 }, { "epoch": 1.2083163887759252, "grad_norm": 0.29650771617889404, "learning_rate": 9.401035027095548e-06, "loss": 0.3666, "step": 11885 }, { "epoch": 1.2084180561203741, "grad_norm": 0.293433278799057, "learning_rate": 9.400866590687667e-06, "loss": 0.3663, "step": 11886 }, { "epoch": 1.208519723464823, "grad_norm": 0.3080897033214569, "learning_rate": 9.400698132109266e-06, "loss": 0.3596, "step": 11887 }, { "epoch": 1.208621390809272, "grad_norm": 0.3078250586986542, "learning_rate": 9.400529651361196e-06, "loss": 0.3608, "step": 11888 }, { "epoch": 1.208723058153721, "grad_norm": 0.27829709649086, "learning_rate": 9.400361148444302e-06, "loss": 0.3497, "step": 11889 }, { "epoch": 1.2088247254981699, "grad_norm": 0.3053233325481415, "learning_rate": 9.400192623359434e-06, "loss": 0.3462, "step": 11890 }, { "epoch": 1.208926392842619, "grad_norm": 0.31016087532043457, "learning_rate": 9.40002407610744e-06, "loss": 0.384, "step": 11891 }, { "epoch": 1.209028060187068, "grad_norm": 0.2850283682346344, "learning_rate": 9.399855506689173e-06, "loss": 0.36, "step": 11892 }, { "epoch": 1.209129727531517, "grad_norm": 0.28596174716949463, "learning_rate": 9.399686915105478e-06, "loss": 0.3795, "step": 11893 }, { "epoch": 1.2092313948759659, "grad_norm": 0.3141528069972992, "learning_rate": 9.399518301357205e-06, "loss": 0.392, "step": 11894 }, { "epoch": 1.2093330622204148, "grad_norm": 0.2586805820465088, "learning_rate": 9.399349665445205e-06, "loss": 0.3557, "step": 11895 }, { "epoch": 1.2094347295648638, "grad_norm": 0.2913379669189453, "learning_rate": 9.399181007370328e-06, "loss": 0.373, "step": 11896 }, { "epoch": 1.2095363969093127, "grad_norm": 0.29400143027305603, "learning_rate": 9.399012327133422e-06, "loss": 0.3421, "step": 11897 }, { "epoch": 1.2096380642537616, "grad_norm": 0.28625449538230896, "learning_rate": 9.398843624735337e-06, "loss": 0.3556, "step": 11898 }, { "epoch": 1.2097397315982106, "grad_norm": 0.3008236885070801, "learning_rate": 9.398674900176923e-06, "loss": 0.329, "step": 11899 }, { "epoch": 1.2098413989426597, "grad_norm": 0.31972330808639526, "learning_rate": 9.398506153459029e-06, "loss": 0.3669, "step": 11900 }, { "epoch": 1.2099430662871087, "grad_norm": 0.3075094521045685, "learning_rate": 9.398337384582508e-06, "loss": 0.3716, "step": 11901 }, { "epoch": 1.2100447336315576, "grad_norm": 0.29266300797462463, "learning_rate": 9.398168593548207e-06, "loss": 0.3835, "step": 11902 }, { "epoch": 1.2101464009760066, "grad_norm": 0.2907944619655609, "learning_rate": 9.39799978035698e-06, "loss": 0.3831, "step": 11903 }, { "epoch": 1.2102480683204555, "grad_norm": 0.30526062846183777, "learning_rate": 9.397830945009672e-06, "loss": 0.398, "step": 11904 }, { "epoch": 1.2103497356649044, "grad_norm": 0.2920089662075043, "learning_rate": 9.39766208750714e-06, "loss": 0.3644, "step": 11905 }, { "epoch": 1.2104514030093534, "grad_norm": 0.31806913018226624, "learning_rate": 9.39749320785023e-06, "loss": 0.3698, "step": 11906 }, { "epoch": 1.2105530703538023, "grad_norm": 0.29741182923316956, "learning_rate": 9.397324306039795e-06, "loss": 0.3595, "step": 11907 }, { "epoch": 1.2106547376982513, "grad_norm": 0.2954758107662201, "learning_rate": 9.397155382076683e-06, "loss": 0.4011, "step": 11908 }, { "epoch": 1.2107564050427002, "grad_norm": 0.3025571405887604, "learning_rate": 9.396986435961747e-06, "loss": 0.3642, "step": 11909 }, { "epoch": 1.2108580723871492, "grad_norm": 0.31443333625793457, "learning_rate": 9.396817467695841e-06, "loss": 0.3887, "step": 11910 }, { "epoch": 1.210959739731598, "grad_norm": 0.30474814772605896, "learning_rate": 9.396648477279813e-06, "loss": 0.36, "step": 11911 }, { "epoch": 1.2110614070760473, "grad_norm": 0.29381075501441956, "learning_rate": 9.396479464714514e-06, "loss": 0.3494, "step": 11912 }, { "epoch": 1.2111630744204962, "grad_norm": 0.3165910243988037, "learning_rate": 9.396310430000796e-06, "loss": 0.3506, "step": 11913 }, { "epoch": 1.2112647417649451, "grad_norm": 0.31985238194465637, "learning_rate": 9.39614137313951e-06, "loss": 0.3508, "step": 11914 }, { "epoch": 1.211366409109394, "grad_norm": 0.2810612618923187, "learning_rate": 9.39597229413151e-06, "loss": 0.3634, "step": 11915 }, { "epoch": 1.211468076453843, "grad_norm": 0.29743435978889465, "learning_rate": 9.395803192977647e-06, "loss": 0.3401, "step": 11916 }, { "epoch": 1.211569743798292, "grad_norm": 0.34464550018310547, "learning_rate": 9.39563406967877e-06, "loss": 0.3916, "step": 11917 }, { "epoch": 1.211671411142741, "grad_norm": 0.3229938745498657, "learning_rate": 9.395464924235733e-06, "loss": 0.3657, "step": 11918 }, { "epoch": 1.2117730784871898, "grad_norm": 0.31230154633522034, "learning_rate": 9.39529575664939e-06, "loss": 0.3947, "step": 11919 }, { "epoch": 1.2118747458316388, "grad_norm": 0.316969633102417, "learning_rate": 9.395126566920589e-06, "loss": 0.3764, "step": 11920 }, { "epoch": 1.211976413176088, "grad_norm": 0.32027119398117065, "learning_rate": 9.394957355050187e-06, "loss": 0.3729, "step": 11921 }, { "epoch": 1.212078080520537, "grad_norm": 0.2777496576309204, "learning_rate": 9.394788121039034e-06, "loss": 0.3712, "step": 11922 }, { "epoch": 1.2121797478649858, "grad_norm": 0.30414527654647827, "learning_rate": 9.39461886488798e-06, "loss": 0.3635, "step": 11923 }, { "epoch": 1.2122814152094348, "grad_norm": 0.30849921703338623, "learning_rate": 9.394449586597885e-06, "loss": 0.3837, "step": 11924 }, { "epoch": 1.2123830825538837, "grad_norm": 0.2985389232635498, "learning_rate": 9.394280286169594e-06, "loss": 0.3567, "step": 11925 }, { "epoch": 1.2124847498983327, "grad_norm": 0.28943493962287903, "learning_rate": 9.394110963603965e-06, "loss": 0.341, "step": 11926 }, { "epoch": 1.2125864172427816, "grad_norm": 0.29067134857177734, "learning_rate": 9.393941618901849e-06, "loss": 0.3486, "step": 11927 }, { "epoch": 1.2126880845872305, "grad_norm": 0.29292067885398865, "learning_rate": 9.393772252064098e-06, "loss": 0.3805, "step": 11928 }, { "epoch": 1.2127897519316795, "grad_norm": 0.3140527307987213, "learning_rate": 9.393602863091567e-06, "loss": 0.3734, "step": 11929 }, { "epoch": 1.2128914192761284, "grad_norm": 0.3070138394832611, "learning_rate": 9.39343345198511e-06, "loss": 0.4084, "step": 11930 }, { "epoch": 1.2129930866205774, "grad_norm": 0.306639164686203, "learning_rate": 9.393264018745578e-06, "loss": 0.3765, "step": 11931 }, { "epoch": 1.2130947539650265, "grad_norm": 0.284956693649292, "learning_rate": 9.393094563373826e-06, "loss": 0.3243, "step": 11932 }, { "epoch": 1.2131964213094755, "grad_norm": 0.30465999245643616, "learning_rate": 9.392925085870707e-06, "loss": 0.3882, "step": 11933 }, { "epoch": 1.2132980886539244, "grad_norm": 0.3164204955101013, "learning_rate": 9.392755586237077e-06, "loss": 0.3725, "step": 11934 }, { "epoch": 1.2133997559983734, "grad_norm": 0.2737683057785034, "learning_rate": 9.392586064473787e-06, "loss": 0.3633, "step": 11935 }, { "epoch": 1.2135014233428223, "grad_norm": 0.3279033601284027, "learning_rate": 9.392416520581693e-06, "loss": 0.4211, "step": 11936 }, { "epoch": 1.2136030906872712, "grad_norm": 0.3347512483596802, "learning_rate": 9.392246954561648e-06, "loss": 0.3382, "step": 11937 }, { "epoch": 1.2137047580317202, "grad_norm": 0.30217841267585754, "learning_rate": 9.392077366414506e-06, "loss": 0.3621, "step": 11938 }, { "epoch": 1.2138064253761691, "grad_norm": 0.33810847997665405, "learning_rate": 9.391907756141123e-06, "loss": 0.3672, "step": 11939 }, { "epoch": 1.213908092720618, "grad_norm": 0.3010047972202301, "learning_rate": 9.391738123742353e-06, "loss": 0.3719, "step": 11940 }, { "epoch": 1.2140097600650672, "grad_norm": 0.31889867782592773, "learning_rate": 9.391568469219049e-06, "loss": 0.3516, "step": 11941 }, { "epoch": 1.2141114274095162, "grad_norm": 0.3144398629665375, "learning_rate": 9.391398792572067e-06, "loss": 0.3465, "step": 11942 }, { "epoch": 1.214213094753965, "grad_norm": 0.30634915828704834, "learning_rate": 9.39122909380226e-06, "loss": 0.3765, "step": 11943 }, { "epoch": 1.214314762098414, "grad_norm": 0.29401689767837524, "learning_rate": 9.391059372910485e-06, "loss": 0.3338, "step": 11944 }, { "epoch": 1.214416429442863, "grad_norm": 0.30111896991729736, "learning_rate": 9.390889629897596e-06, "loss": 0.3784, "step": 11945 }, { "epoch": 1.214518096787312, "grad_norm": 0.29164889454841614, "learning_rate": 9.39071986476445e-06, "loss": 0.3649, "step": 11946 }, { "epoch": 1.2146197641317609, "grad_norm": 0.3107396364212036, "learning_rate": 9.3905500775119e-06, "loss": 0.3799, "step": 11947 }, { "epoch": 1.2147214314762098, "grad_norm": 0.31674686074256897, "learning_rate": 9.390380268140801e-06, "loss": 0.4151, "step": 11948 }, { "epoch": 1.2148230988206588, "grad_norm": 0.30432048439979553, "learning_rate": 9.390210436652013e-06, "loss": 0.345, "step": 11949 }, { "epoch": 1.2149247661651077, "grad_norm": 0.2831929624080658, "learning_rate": 9.390040583046385e-06, "loss": 0.3586, "step": 11950 }, { "epoch": 1.2150264335095566, "grad_norm": 0.2858275771141052, "learning_rate": 9.389870707324776e-06, "loss": 0.367, "step": 11951 }, { "epoch": 1.2151281008540056, "grad_norm": 0.312429279088974, "learning_rate": 9.389700809488042e-06, "loss": 0.3341, "step": 11952 }, { "epoch": 1.2152297681984547, "grad_norm": 0.2909969091415405, "learning_rate": 9.389530889537038e-06, "loss": 0.3623, "step": 11953 }, { "epoch": 1.2153314355429037, "grad_norm": 0.2952195703983307, "learning_rate": 9.38936094747262e-06, "loss": 0.364, "step": 11954 }, { "epoch": 1.2154331028873526, "grad_norm": 0.28586292266845703, "learning_rate": 9.389190983295647e-06, "loss": 0.395, "step": 11955 }, { "epoch": 1.2155347702318016, "grad_norm": 0.27570652961730957, "learning_rate": 9.389020997006972e-06, "loss": 0.3456, "step": 11956 }, { "epoch": 1.2156364375762505, "grad_norm": 0.2863573133945465, "learning_rate": 9.388850988607451e-06, "loss": 0.3393, "step": 11957 }, { "epoch": 1.2157381049206994, "grad_norm": 0.29326456785202026, "learning_rate": 9.388680958097943e-06, "loss": 0.3522, "step": 11958 }, { "epoch": 1.2158397722651484, "grad_norm": 0.30112195014953613, "learning_rate": 9.388510905479302e-06, "loss": 0.362, "step": 11959 }, { "epoch": 1.2159414396095973, "grad_norm": 0.2899875044822693, "learning_rate": 9.388340830752387e-06, "loss": 0.3472, "step": 11960 }, { "epoch": 1.2160431069540463, "grad_norm": 0.3226422071456909, "learning_rate": 9.388170733918052e-06, "loss": 0.3364, "step": 11961 }, { "epoch": 1.2161447742984954, "grad_norm": 0.35769256949424744, "learning_rate": 9.388000614977157e-06, "loss": 0.3991, "step": 11962 }, { "epoch": 1.2162464416429444, "grad_norm": 0.31164756417274475, "learning_rate": 9.387830473930557e-06, "loss": 0.334, "step": 11963 }, { "epoch": 1.2163481089873933, "grad_norm": 0.31790393590927124, "learning_rate": 9.38766031077911e-06, "loss": 0.4011, "step": 11964 }, { "epoch": 1.2164497763318423, "grad_norm": 0.29454025626182556, "learning_rate": 9.387490125523675e-06, "loss": 0.3863, "step": 11965 }, { "epoch": 1.2165514436762912, "grad_norm": 0.32231536507606506, "learning_rate": 9.387319918165105e-06, "loss": 0.3526, "step": 11966 }, { "epoch": 1.2166531110207401, "grad_norm": 0.28786951303482056, "learning_rate": 9.38714968870426e-06, "loss": 0.3405, "step": 11967 }, { "epoch": 1.216754778365189, "grad_norm": 0.2876600921154022, "learning_rate": 9.386979437141997e-06, "loss": 0.3321, "step": 11968 }, { "epoch": 1.216856445709638, "grad_norm": 0.27565595507621765, "learning_rate": 9.386809163479176e-06, "loss": 0.3867, "step": 11969 }, { "epoch": 1.216958113054087, "grad_norm": 0.2948160469532013, "learning_rate": 9.386638867716651e-06, "loss": 0.3466, "step": 11970 }, { "epoch": 1.217059780398536, "grad_norm": 0.2884012758731842, "learning_rate": 9.386468549855283e-06, "loss": 0.3731, "step": 11971 }, { "epoch": 1.2171614477429848, "grad_norm": 0.29322534799575806, "learning_rate": 9.38629820989593e-06, "loss": 0.3508, "step": 11972 }, { "epoch": 1.217263115087434, "grad_norm": 0.3114473521709442, "learning_rate": 9.386127847839445e-06, "loss": 0.3893, "step": 11973 }, { "epoch": 1.217364782431883, "grad_norm": 0.2830516993999481, "learning_rate": 9.385957463686695e-06, "loss": 0.3676, "step": 11974 }, { "epoch": 1.217466449776332, "grad_norm": 0.31501123309135437, "learning_rate": 9.385787057438531e-06, "loss": 0.3734, "step": 11975 }, { "epoch": 1.2175681171207808, "grad_norm": 0.30031999945640564, "learning_rate": 9.385616629095814e-06, "loss": 0.3656, "step": 11976 }, { "epoch": 1.2176697844652298, "grad_norm": 0.3042227327823639, "learning_rate": 9.385446178659404e-06, "loss": 0.3802, "step": 11977 }, { "epoch": 1.2177714518096787, "grad_norm": 0.29749494791030884, "learning_rate": 9.385275706130156e-06, "loss": 0.351, "step": 11978 }, { "epoch": 1.2178731191541277, "grad_norm": 0.3051004409790039, "learning_rate": 9.385105211508931e-06, "loss": 0.3414, "step": 11979 }, { "epoch": 1.2179747864985766, "grad_norm": 0.29802101850509644, "learning_rate": 9.38493469479659e-06, "loss": 0.3919, "step": 11980 }, { "epoch": 1.2180764538430255, "grad_norm": 0.305661141872406, "learning_rate": 9.384764155993989e-06, "loss": 0.3459, "step": 11981 }, { "epoch": 1.2181781211874747, "grad_norm": 0.2927326560020447, "learning_rate": 9.38459359510199e-06, "loss": 0.3402, "step": 11982 }, { "epoch": 1.2182797885319236, "grad_norm": 0.2900833189487457, "learning_rate": 9.384423012121449e-06, "loss": 0.3025, "step": 11983 }, { "epoch": 1.2183814558763726, "grad_norm": 0.32354238629341125, "learning_rate": 9.384252407053226e-06, "loss": 0.3586, "step": 11984 }, { "epoch": 1.2184831232208215, "grad_norm": 0.30566084384918213, "learning_rate": 9.384081779898182e-06, "loss": 0.3155, "step": 11985 }, { "epoch": 1.2185847905652705, "grad_norm": 0.3176155984401703, "learning_rate": 9.383911130657175e-06, "loss": 0.3507, "step": 11986 }, { "epoch": 1.2186864579097194, "grad_norm": 0.2988308072090149, "learning_rate": 9.383740459331067e-06, "loss": 0.3752, "step": 11987 }, { "epoch": 1.2187881252541684, "grad_norm": 0.31424546241760254, "learning_rate": 9.383569765920714e-06, "loss": 0.3491, "step": 11988 }, { "epoch": 1.2188897925986173, "grad_norm": 0.3098785877227783, "learning_rate": 9.38339905042698e-06, "loss": 0.339, "step": 11989 }, { "epoch": 1.2189914599430662, "grad_norm": 0.30127620697021484, "learning_rate": 9.383228312850723e-06, "loss": 0.3889, "step": 11990 }, { "epoch": 1.2190931272875152, "grad_norm": 0.2992038130760193, "learning_rate": 9.383057553192806e-06, "loss": 0.3715, "step": 11991 }, { "epoch": 1.2191947946319641, "grad_norm": 0.3112688660621643, "learning_rate": 9.382886771454082e-06, "loss": 0.3774, "step": 11992 }, { "epoch": 1.219296461976413, "grad_norm": 0.30772435665130615, "learning_rate": 9.382715967635418e-06, "loss": 0.3639, "step": 11993 }, { "epoch": 1.2193981293208622, "grad_norm": 0.2947103977203369, "learning_rate": 9.382545141737672e-06, "loss": 0.3403, "step": 11994 }, { "epoch": 1.2194997966653112, "grad_norm": 0.3072727620601654, "learning_rate": 9.382374293761706e-06, "loss": 0.3587, "step": 11995 }, { "epoch": 1.21960146400976, "grad_norm": 0.3471298813819885, "learning_rate": 9.382203423708379e-06, "loss": 0.405, "step": 11996 }, { "epoch": 1.219703131354209, "grad_norm": 0.34823882579803467, "learning_rate": 9.382032531578554e-06, "loss": 0.3691, "step": 11997 }, { "epoch": 1.219804798698658, "grad_norm": 0.2905159294605255, "learning_rate": 9.381861617373087e-06, "loss": 0.3691, "step": 11998 }, { "epoch": 1.219906466043107, "grad_norm": 0.3217480182647705, "learning_rate": 9.381690681092845e-06, "loss": 0.3697, "step": 11999 }, { "epoch": 1.2200081333875559, "grad_norm": 0.34391433000564575, "learning_rate": 9.381519722738686e-06, "loss": 0.3786, "step": 12000 }, { "epoch": 1.2201098007320048, "grad_norm": 0.3091185986995697, "learning_rate": 9.381348742311473e-06, "loss": 0.3612, "step": 12001 }, { "epoch": 1.2202114680764538, "grad_norm": 0.2860622704029083, "learning_rate": 9.381177739812064e-06, "loss": 0.3686, "step": 12002 }, { "epoch": 1.220313135420903, "grad_norm": 0.31201449036598206, "learning_rate": 9.381006715241324e-06, "loss": 0.3449, "step": 12003 }, { "epoch": 1.2204148027653519, "grad_norm": 0.28801652789115906, "learning_rate": 9.380835668600113e-06, "loss": 0.367, "step": 12004 }, { "epoch": 1.2205164701098008, "grad_norm": 0.3137390911579132, "learning_rate": 9.380664599889294e-06, "loss": 0.3818, "step": 12005 }, { "epoch": 1.2206181374542497, "grad_norm": 0.2927893102169037, "learning_rate": 9.380493509109726e-06, "loss": 0.3633, "step": 12006 }, { "epoch": 1.2207198047986987, "grad_norm": 0.31042593717575073, "learning_rate": 9.380322396262273e-06, "loss": 0.3286, "step": 12007 }, { "epoch": 1.2208214721431476, "grad_norm": 0.2953857481479645, "learning_rate": 9.380151261347799e-06, "loss": 0.3646, "step": 12008 }, { "epoch": 1.2209231394875966, "grad_norm": 0.28184691071510315, "learning_rate": 9.379980104367161e-06, "loss": 0.3391, "step": 12009 }, { "epoch": 1.2210248068320455, "grad_norm": 0.3048556447029114, "learning_rate": 9.379808925321226e-06, "loss": 0.3861, "step": 12010 }, { "epoch": 1.2211264741764944, "grad_norm": 0.3144010603427887, "learning_rate": 9.379637724210853e-06, "loss": 0.4025, "step": 12011 }, { "epoch": 1.2212281415209434, "grad_norm": 0.29791009426116943, "learning_rate": 9.379466501036906e-06, "loss": 0.3531, "step": 12012 }, { "epoch": 1.2213298088653923, "grad_norm": 0.30807432532310486, "learning_rate": 9.379295255800248e-06, "loss": 0.3599, "step": 12013 }, { "epoch": 1.2214314762098415, "grad_norm": 0.3038339912891388, "learning_rate": 9.379123988501742e-06, "loss": 0.3637, "step": 12014 }, { "epoch": 1.2215331435542904, "grad_norm": 0.2956811487674713, "learning_rate": 9.378952699142248e-06, "loss": 0.3414, "step": 12015 }, { "epoch": 1.2216348108987394, "grad_norm": 0.32423731684684753, "learning_rate": 9.378781387722632e-06, "loss": 0.3453, "step": 12016 }, { "epoch": 1.2217364782431883, "grad_norm": 0.31635332107543945, "learning_rate": 9.378610054243755e-06, "loss": 0.3893, "step": 12017 }, { "epoch": 1.2218381455876373, "grad_norm": 0.2858276665210724, "learning_rate": 9.378438698706482e-06, "loss": 0.3662, "step": 12018 }, { "epoch": 1.2219398129320862, "grad_norm": 0.3116230368614197, "learning_rate": 9.378267321111675e-06, "loss": 0.3663, "step": 12019 }, { "epoch": 1.2220414802765351, "grad_norm": 0.3035045862197876, "learning_rate": 9.3780959214602e-06, "loss": 0.3731, "step": 12020 }, { "epoch": 1.222143147620984, "grad_norm": 0.30920982360839844, "learning_rate": 9.377924499752915e-06, "loss": 0.3659, "step": 12021 }, { "epoch": 1.222244814965433, "grad_norm": 0.29344287514686584, "learning_rate": 9.377753055990689e-06, "loss": 0.3724, "step": 12022 }, { "epoch": 1.2223464823098822, "grad_norm": 0.2864384055137634, "learning_rate": 9.377581590174381e-06, "loss": 0.3678, "step": 12023 }, { "epoch": 1.2224481496543311, "grad_norm": 0.31751731038093567, "learning_rate": 9.37741010230486e-06, "loss": 0.3497, "step": 12024 }, { "epoch": 1.22254981699878, "grad_norm": 0.2763083577156067, "learning_rate": 9.377238592382984e-06, "loss": 0.3517, "step": 12025 }, { "epoch": 1.222651484343229, "grad_norm": 0.28807711601257324, "learning_rate": 9.377067060409623e-06, "loss": 0.406, "step": 12026 }, { "epoch": 1.222753151687678, "grad_norm": 0.3027447462081909, "learning_rate": 9.376895506385637e-06, "loss": 0.3626, "step": 12027 }, { "epoch": 1.222854819032127, "grad_norm": 0.32528814673423767, "learning_rate": 9.37672393031189e-06, "loss": 0.3612, "step": 12028 }, { "epoch": 1.2229564863765758, "grad_norm": 0.2968291938304901, "learning_rate": 9.37655233218925e-06, "loss": 0.3724, "step": 12029 }, { "epoch": 1.2230581537210248, "grad_norm": 0.2926540970802307, "learning_rate": 9.37638071201858e-06, "loss": 0.3694, "step": 12030 }, { "epoch": 1.2231598210654737, "grad_norm": 0.31012243032455444, "learning_rate": 9.376209069800742e-06, "loss": 0.3269, "step": 12031 }, { "epoch": 1.2232614884099227, "grad_norm": 0.3268601894378662, "learning_rate": 9.376037405536604e-06, "loss": 0.3544, "step": 12032 }, { "epoch": 1.2233631557543716, "grad_norm": 0.3151634633541107, "learning_rate": 9.375865719227028e-06, "loss": 0.3991, "step": 12033 }, { "epoch": 1.2234648230988205, "grad_norm": 0.2568807601928711, "learning_rate": 9.375694010872882e-06, "loss": 0.3406, "step": 12034 }, { "epoch": 1.2235664904432697, "grad_norm": 0.3191046118736267, "learning_rate": 9.375522280475029e-06, "loss": 0.365, "step": 12035 }, { "epoch": 1.2236681577877186, "grad_norm": 0.3163207471370697, "learning_rate": 9.375350528034335e-06, "loss": 0.3599, "step": 12036 }, { "epoch": 1.2237698251321676, "grad_norm": 0.3106473386287689, "learning_rate": 9.375178753551663e-06, "loss": 0.3958, "step": 12037 }, { "epoch": 1.2238714924766165, "grad_norm": 0.29749536514282227, "learning_rate": 9.37500695702788e-06, "loss": 0.3843, "step": 12038 }, { "epoch": 1.2239731598210655, "grad_norm": 0.33242321014404297, "learning_rate": 9.374835138463855e-06, "loss": 0.3834, "step": 12039 }, { "epoch": 1.2240748271655144, "grad_norm": 0.34936606884002686, "learning_rate": 9.374663297860446e-06, "loss": 0.3855, "step": 12040 }, { "epoch": 1.2241764945099634, "grad_norm": 0.3013509213924408, "learning_rate": 9.374491435218527e-06, "loss": 0.3314, "step": 12041 }, { "epoch": 1.2242781618544123, "grad_norm": 0.29810404777526855, "learning_rate": 9.374319550538957e-06, "loss": 0.3438, "step": 12042 }, { "epoch": 1.2243798291988612, "grad_norm": 0.2876506745815277, "learning_rate": 9.374147643822605e-06, "loss": 0.3646, "step": 12043 }, { "epoch": 1.2244814965433104, "grad_norm": 0.2995201647281647, "learning_rate": 9.373975715070337e-06, "loss": 0.3623, "step": 12044 }, { "epoch": 1.2245831638877593, "grad_norm": 0.2879815101623535, "learning_rate": 9.373803764283019e-06, "loss": 0.3488, "step": 12045 }, { "epoch": 1.2246848312322083, "grad_norm": 0.2944205403327942, "learning_rate": 9.373631791461515e-06, "loss": 0.3841, "step": 12046 }, { "epoch": 1.2247864985766572, "grad_norm": 0.28728383779525757, "learning_rate": 9.373459796606696e-06, "loss": 0.3328, "step": 12047 }, { "epoch": 1.2248881659211062, "grad_norm": 0.2986229658126831, "learning_rate": 9.373287779719424e-06, "loss": 0.3943, "step": 12048 }, { "epoch": 1.224989833265555, "grad_norm": 0.3424080014228821, "learning_rate": 9.37311574080057e-06, "loss": 0.3812, "step": 12049 }, { "epoch": 1.225091500610004, "grad_norm": 0.3222413957118988, "learning_rate": 9.372943679850996e-06, "loss": 0.3635, "step": 12050 }, { "epoch": 1.225193167954453, "grad_norm": 0.2970484793186188, "learning_rate": 9.37277159687157e-06, "loss": 0.3576, "step": 12051 }, { "epoch": 1.225294835298902, "grad_norm": 0.3033316135406494, "learning_rate": 9.372599491863162e-06, "loss": 0.3488, "step": 12052 }, { "epoch": 1.2253965026433509, "grad_norm": 0.3040994703769684, "learning_rate": 9.372427364826636e-06, "loss": 0.3878, "step": 12053 }, { "epoch": 1.2254981699877998, "grad_norm": 0.28346824645996094, "learning_rate": 9.37225521576286e-06, "loss": 0.3631, "step": 12054 }, { "epoch": 1.225599837332249, "grad_norm": 0.32855236530303955, "learning_rate": 9.3720830446727e-06, "loss": 0.346, "step": 12055 }, { "epoch": 1.225701504676698, "grad_norm": 0.2984028160572052, "learning_rate": 9.371910851557026e-06, "loss": 0.3705, "step": 12056 }, { "epoch": 1.2258031720211469, "grad_norm": 0.2996376156806946, "learning_rate": 9.371738636416705e-06, "loss": 0.3357, "step": 12057 }, { "epoch": 1.2259048393655958, "grad_norm": 0.31462547183036804, "learning_rate": 9.371566399252602e-06, "loss": 0.3656, "step": 12058 }, { "epoch": 1.2260065067100447, "grad_norm": 0.33207055926322937, "learning_rate": 9.371394140065587e-06, "loss": 0.3807, "step": 12059 }, { "epoch": 1.2261081740544937, "grad_norm": 0.29738011956214905, "learning_rate": 9.371221858856526e-06, "loss": 0.3863, "step": 12060 }, { "epoch": 1.2262098413989426, "grad_norm": 0.31942951679229736, "learning_rate": 9.37104955562629e-06, "loss": 0.396, "step": 12061 }, { "epoch": 1.2263115087433916, "grad_norm": 0.325105220079422, "learning_rate": 9.370877230375744e-06, "loss": 0.354, "step": 12062 }, { "epoch": 1.2264131760878405, "grad_norm": 0.2983826696872711, "learning_rate": 9.370704883105757e-06, "loss": 0.3721, "step": 12063 }, { "epoch": 1.2265148434322897, "grad_norm": 0.3376481831073761, "learning_rate": 9.370532513817196e-06, "loss": 0.3734, "step": 12064 }, { "epoch": 1.2266165107767386, "grad_norm": 0.2968401312828064, "learning_rate": 9.370360122510934e-06, "loss": 0.3689, "step": 12065 }, { "epoch": 1.2267181781211876, "grad_norm": 0.30385398864746094, "learning_rate": 9.370187709187835e-06, "loss": 0.372, "step": 12066 }, { "epoch": 1.2268198454656365, "grad_norm": 0.3107796907424927, "learning_rate": 9.370015273848767e-06, "loss": 0.3454, "step": 12067 }, { "epoch": 1.2269215128100854, "grad_norm": 0.29038140177726746, "learning_rate": 9.369842816494602e-06, "loss": 0.3845, "step": 12068 }, { "epoch": 1.2270231801545344, "grad_norm": 0.30344104766845703, "learning_rate": 9.369670337126208e-06, "loss": 0.3541, "step": 12069 }, { "epoch": 1.2271248474989833, "grad_norm": 0.3316366672515869, "learning_rate": 9.369497835744452e-06, "loss": 0.3482, "step": 12070 }, { "epoch": 1.2272265148434323, "grad_norm": 0.28811028599739075, "learning_rate": 9.369325312350205e-06, "loss": 0.3699, "step": 12071 }, { "epoch": 1.2273281821878812, "grad_norm": 0.29407867789268494, "learning_rate": 9.369152766944336e-06, "loss": 0.3684, "step": 12072 }, { "epoch": 1.2274298495323301, "grad_norm": 0.3086165487766266, "learning_rate": 9.368980199527713e-06, "loss": 0.3706, "step": 12073 }, { "epoch": 1.227531516876779, "grad_norm": 0.2859644293785095, "learning_rate": 9.368807610101205e-06, "loss": 0.3841, "step": 12074 }, { "epoch": 1.227633184221228, "grad_norm": 0.2929965555667877, "learning_rate": 9.368634998665684e-06, "loss": 0.3496, "step": 12075 }, { "epoch": 1.2277348515656772, "grad_norm": 0.2768528461456299, "learning_rate": 9.368462365222016e-06, "loss": 0.3596, "step": 12076 }, { "epoch": 1.2278365189101261, "grad_norm": 0.28594788908958435, "learning_rate": 9.368289709771075e-06, "loss": 0.3995, "step": 12077 }, { "epoch": 1.227938186254575, "grad_norm": 0.2973772883415222, "learning_rate": 9.368117032313727e-06, "loss": 0.3611, "step": 12078 }, { "epoch": 1.228039853599024, "grad_norm": 0.33315905928611755, "learning_rate": 9.367944332850844e-06, "loss": 0.3844, "step": 12079 }, { "epoch": 1.228141520943473, "grad_norm": 0.2978695034980774, "learning_rate": 9.367771611383297e-06, "loss": 0.3807, "step": 12080 }, { "epoch": 1.228243188287922, "grad_norm": 0.3094593286514282, "learning_rate": 9.367598867911951e-06, "loss": 0.3649, "step": 12081 }, { "epoch": 1.2283448556323708, "grad_norm": 0.2920542359352112, "learning_rate": 9.367426102437682e-06, "loss": 0.3674, "step": 12082 }, { "epoch": 1.2284465229768198, "grad_norm": 0.27240151166915894, "learning_rate": 9.367253314961357e-06, "loss": 0.373, "step": 12083 }, { "epoch": 1.2285481903212687, "grad_norm": 0.2928042411804199, "learning_rate": 9.36708050548385e-06, "loss": 0.3649, "step": 12084 }, { "epoch": 1.2286498576657179, "grad_norm": 0.2980629801750183, "learning_rate": 9.366907674006026e-06, "loss": 0.391, "step": 12085 }, { "epoch": 1.2287515250101668, "grad_norm": 0.31375032663345337, "learning_rate": 9.366734820528762e-06, "loss": 0.3645, "step": 12086 }, { "epoch": 1.2288531923546158, "grad_norm": 0.286456823348999, "learning_rate": 9.366561945052924e-06, "loss": 0.3776, "step": 12087 }, { "epoch": 1.2289548596990647, "grad_norm": 0.30527302622795105, "learning_rate": 9.366389047579384e-06, "loss": 0.3776, "step": 12088 }, { "epoch": 1.2290565270435136, "grad_norm": 0.29278773069381714, "learning_rate": 9.366216128109016e-06, "loss": 0.3517, "step": 12089 }, { "epoch": 1.2291581943879626, "grad_norm": 0.2847849428653717, "learning_rate": 9.366043186642686e-06, "loss": 0.3353, "step": 12090 }, { "epoch": 1.2292598617324115, "grad_norm": 0.2981778085231781, "learning_rate": 9.36587022318127e-06, "loss": 0.3778, "step": 12091 }, { "epoch": 1.2293615290768605, "grad_norm": 0.3012315034866333, "learning_rate": 9.365697237725635e-06, "loss": 0.3506, "step": 12092 }, { "epoch": 1.2294631964213094, "grad_norm": 0.30447492003440857, "learning_rate": 9.365524230276657e-06, "loss": 0.338, "step": 12093 }, { "epoch": 1.2295648637657584, "grad_norm": 0.2816547751426697, "learning_rate": 9.365351200835204e-06, "loss": 0.3731, "step": 12094 }, { "epoch": 1.2296665311102073, "grad_norm": 0.3141991198062897, "learning_rate": 9.36517814940215e-06, "loss": 0.3752, "step": 12095 }, { "epoch": 1.2297681984546565, "grad_norm": 0.31500521302223206, "learning_rate": 9.365005075978365e-06, "loss": 0.3648, "step": 12096 }, { "epoch": 1.2298698657991054, "grad_norm": 0.304815411567688, "learning_rate": 9.364831980564722e-06, "loss": 0.4057, "step": 12097 }, { "epoch": 1.2299715331435543, "grad_norm": 0.2887342572212219, "learning_rate": 9.364658863162093e-06, "loss": 0.3668, "step": 12098 }, { "epoch": 1.2300732004880033, "grad_norm": 0.3028731048107147, "learning_rate": 9.36448572377135e-06, "loss": 0.3493, "step": 12099 }, { "epoch": 1.2301748678324522, "grad_norm": 0.318515807390213, "learning_rate": 9.364312562393365e-06, "loss": 0.382, "step": 12100 }, { "epoch": 1.2302765351769012, "grad_norm": 0.30529582500457764, "learning_rate": 9.36413937902901e-06, "loss": 0.3733, "step": 12101 }, { "epoch": 1.23037820252135, "grad_norm": 0.29220789670944214, "learning_rate": 9.363966173679158e-06, "loss": 0.3674, "step": 12102 }, { "epoch": 1.230479869865799, "grad_norm": 0.3140251636505127, "learning_rate": 9.36379294634468e-06, "loss": 0.4108, "step": 12103 }, { "epoch": 1.230581537210248, "grad_norm": 0.29170796275138855, "learning_rate": 9.363619697026453e-06, "loss": 0.34, "step": 12104 }, { "epoch": 1.2306832045546972, "grad_norm": 0.2961134910583496, "learning_rate": 9.363446425725344e-06, "loss": 0.3855, "step": 12105 }, { "epoch": 1.230784871899146, "grad_norm": 0.3031882643699646, "learning_rate": 9.363273132442231e-06, "loss": 0.371, "step": 12106 }, { "epoch": 1.230886539243595, "grad_norm": 0.307447612285614, "learning_rate": 9.363099817177984e-06, "loss": 0.3443, "step": 12107 }, { "epoch": 1.230988206588044, "grad_norm": 0.2752441465854645, "learning_rate": 9.362926479933478e-06, "loss": 0.3381, "step": 12108 }, { "epoch": 1.231089873932493, "grad_norm": 0.3374156951904297, "learning_rate": 9.362753120709584e-06, "loss": 0.3727, "step": 12109 }, { "epoch": 1.2311915412769419, "grad_norm": 0.3281579315662384, "learning_rate": 9.362579739507176e-06, "loss": 0.3706, "step": 12110 }, { "epoch": 1.2312932086213908, "grad_norm": 0.28903433680534363, "learning_rate": 9.36240633632713e-06, "loss": 0.3699, "step": 12111 }, { "epoch": 1.2313948759658397, "grad_norm": 0.3094984292984009, "learning_rate": 9.362232911170317e-06, "loss": 0.3591, "step": 12112 }, { "epoch": 1.2314965433102887, "grad_norm": 0.317596435546875, "learning_rate": 9.362059464037609e-06, "loss": 0.4027, "step": 12113 }, { "epoch": 1.2315982106547376, "grad_norm": 0.28374436497688293, "learning_rate": 9.361885994929885e-06, "loss": 0.3354, "step": 12114 }, { "epoch": 1.2316998779991866, "grad_norm": 0.3295365869998932, "learning_rate": 9.361712503848013e-06, "loss": 0.3858, "step": 12115 }, { "epoch": 1.2318015453436355, "grad_norm": 0.31647706031799316, "learning_rate": 9.36153899079287e-06, "loss": 0.3536, "step": 12116 }, { "epoch": 1.2319032126880847, "grad_norm": 0.29206329584121704, "learning_rate": 9.361365455765333e-06, "loss": 0.3387, "step": 12117 }, { "epoch": 1.2320048800325336, "grad_norm": 0.2976240813732147, "learning_rate": 9.361191898766272e-06, "loss": 0.3633, "step": 12118 }, { "epoch": 1.2321065473769826, "grad_norm": 0.307537704706192, "learning_rate": 9.361018319796562e-06, "loss": 0.3462, "step": 12119 }, { "epoch": 1.2322082147214315, "grad_norm": 0.3038957417011261, "learning_rate": 9.360844718857078e-06, "loss": 0.3737, "step": 12120 }, { "epoch": 1.2323098820658804, "grad_norm": 0.3019874691963196, "learning_rate": 9.360671095948696e-06, "loss": 0.3763, "step": 12121 }, { "epoch": 1.2324115494103294, "grad_norm": 0.28954797983169556, "learning_rate": 9.360497451072288e-06, "loss": 0.3609, "step": 12122 }, { "epoch": 1.2325132167547783, "grad_norm": 0.28390416502952576, "learning_rate": 9.36032378422873e-06, "loss": 0.3381, "step": 12123 }, { "epoch": 1.2326148840992273, "grad_norm": 0.3135640621185303, "learning_rate": 9.360150095418896e-06, "loss": 0.3611, "step": 12124 }, { "epoch": 1.2327165514436762, "grad_norm": 0.2810000479221344, "learning_rate": 9.359976384643665e-06, "loss": 0.3682, "step": 12125 }, { "epoch": 1.2328182187881254, "grad_norm": 0.28399980068206787, "learning_rate": 9.359802651903907e-06, "loss": 0.3594, "step": 12126 }, { "epoch": 1.2329198861325743, "grad_norm": 0.30490365624427795, "learning_rate": 9.359628897200499e-06, "loss": 0.3758, "step": 12127 }, { "epoch": 1.2330215534770232, "grad_norm": 0.2971251904964447, "learning_rate": 9.359455120534317e-06, "loss": 0.3379, "step": 12128 }, { "epoch": 1.2331232208214722, "grad_norm": 0.2837342619895935, "learning_rate": 9.359281321906237e-06, "loss": 0.3572, "step": 12129 }, { "epoch": 1.2332248881659211, "grad_norm": 0.33620595932006836, "learning_rate": 9.359107501317132e-06, "loss": 0.3645, "step": 12130 }, { "epoch": 1.23332655551037, "grad_norm": 0.30324694514274597, "learning_rate": 9.358933658767879e-06, "loss": 0.3711, "step": 12131 }, { "epoch": 1.233428222854819, "grad_norm": 0.28797242045402527, "learning_rate": 9.358759794259356e-06, "loss": 0.3708, "step": 12132 }, { "epoch": 1.233529890199268, "grad_norm": 0.29419392347335815, "learning_rate": 9.358585907792436e-06, "loss": 0.3762, "step": 12133 }, { "epoch": 1.233631557543717, "grad_norm": 0.29784831404685974, "learning_rate": 9.358411999367996e-06, "loss": 0.3812, "step": 12134 }, { "epoch": 1.2337332248881658, "grad_norm": 0.29507768154144287, "learning_rate": 9.358238068986912e-06, "loss": 0.3651, "step": 12135 }, { "epoch": 1.2338348922326148, "grad_norm": 0.3275938034057617, "learning_rate": 9.358064116650061e-06, "loss": 0.388, "step": 12136 }, { "epoch": 1.233936559577064, "grad_norm": 0.317982941865921, "learning_rate": 9.357890142358318e-06, "loss": 0.3842, "step": 12137 }, { "epoch": 1.2340382269215129, "grad_norm": 0.3175961971282959, "learning_rate": 9.35771614611256e-06, "loss": 0.3702, "step": 12138 }, { "epoch": 1.2341398942659618, "grad_norm": 0.34009963274002075, "learning_rate": 9.357542127913664e-06, "loss": 0.3683, "step": 12139 }, { "epoch": 1.2342415616104108, "grad_norm": 0.3304552137851715, "learning_rate": 9.357368087762505e-06, "loss": 0.3957, "step": 12140 }, { "epoch": 1.2343432289548597, "grad_norm": 0.2905226945877075, "learning_rate": 9.357194025659963e-06, "loss": 0.3449, "step": 12141 }, { "epoch": 1.2344448962993086, "grad_norm": 0.33019882440567017, "learning_rate": 9.357019941606912e-06, "loss": 0.3847, "step": 12142 }, { "epoch": 1.2345465636437576, "grad_norm": 0.2966926693916321, "learning_rate": 9.35684583560423e-06, "loss": 0.3953, "step": 12143 }, { "epoch": 1.2346482309882065, "grad_norm": 0.3147479295730591, "learning_rate": 9.356671707652794e-06, "loss": 0.3938, "step": 12144 }, { "epoch": 1.2347498983326555, "grad_norm": 0.3132949769496918, "learning_rate": 9.35649755775348e-06, "loss": 0.3559, "step": 12145 }, { "epoch": 1.2348515656771046, "grad_norm": 0.2983507812023163, "learning_rate": 9.356323385907169e-06, "loss": 0.3585, "step": 12146 }, { "epoch": 1.2349532330215536, "grad_norm": 0.33918488025665283, "learning_rate": 9.356149192114734e-06, "loss": 0.3661, "step": 12147 }, { "epoch": 1.2350549003660025, "grad_norm": 0.29074910283088684, "learning_rate": 9.355974976377056e-06, "loss": 0.3309, "step": 12148 }, { "epoch": 1.2351565677104515, "grad_norm": 0.2915228009223938, "learning_rate": 9.355800738695009e-06, "loss": 0.3493, "step": 12149 }, { "epoch": 1.2352582350549004, "grad_norm": 0.30410945415496826, "learning_rate": 9.355626479069475e-06, "loss": 0.3911, "step": 12150 }, { "epoch": 1.2353599023993493, "grad_norm": 0.3179700970649719, "learning_rate": 9.35545219750133e-06, "loss": 0.3668, "step": 12151 }, { "epoch": 1.2354615697437983, "grad_norm": 0.2857219874858856, "learning_rate": 9.35527789399145e-06, "loss": 0.3428, "step": 12152 }, { "epoch": 1.2355632370882472, "grad_norm": 0.31234368681907654, "learning_rate": 9.355103568540716e-06, "loss": 0.3736, "step": 12153 }, { "epoch": 1.2356649044326962, "grad_norm": 0.34605175256729126, "learning_rate": 9.354929221150006e-06, "loss": 0.3724, "step": 12154 }, { "epoch": 1.235766571777145, "grad_norm": 0.2912025451660156, "learning_rate": 9.354754851820196e-06, "loss": 0.3722, "step": 12155 }, { "epoch": 1.235868239121594, "grad_norm": 0.2841913402080536, "learning_rate": 9.354580460552168e-06, "loss": 0.3821, "step": 12156 }, { "epoch": 1.235969906466043, "grad_norm": 0.3058314323425293, "learning_rate": 9.354406047346795e-06, "loss": 0.3704, "step": 12157 }, { "epoch": 1.2360715738104922, "grad_norm": 0.2857547998428345, "learning_rate": 9.354231612204963e-06, "loss": 0.3748, "step": 12158 }, { "epoch": 1.236173241154941, "grad_norm": 0.27814462780952454, "learning_rate": 9.354057155127543e-06, "loss": 0.3634, "step": 12159 }, { "epoch": 1.23627490849939, "grad_norm": 0.26778310537338257, "learning_rate": 9.353882676115422e-06, "loss": 0.3311, "step": 12160 }, { "epoch": 1.236376575843839, "grad_norm": 0.28966525197029114, "learning_rate": 9.353708175169471e-06, "loss": 0.3241, "step": 12161 }, { "epoch": 1.236478243188288, "grad_norm": 0.30711954832077026, "learning_rate": 9.353533652290574e-06, "loss": 0.3838, "step": 12162 }, { "epoch": 1.2365799105327369, "grad_norm": 0.31437215209007263, "learning_rate": 9.35335910747961e-06, "loss": 0.3822, "step": 12163 }, { "epoch": 1.2366815778771858, "grad_norm": 0.29809796810150146, "learning_rate": 9.353184540737456e-06, "loss": 0.3531, "step": 12164 }, { "epoch": 1.2367832452216347, "grad_norm": 0.32843220233917236, "learning_rate": 9.353009952064993e-06, "loss": 0.3428, "step": 12165 }, { "epoch": 1.2368849125660837, "grad_norm": 0.3014228343963623, "learning_rate": 9.352835341463102e-06, "loss": 0.3783, "step": 12166 }, { "epoch": 1.2369865799105328, "grad_norm": 0.2953755259513855, "learning_rate": 9.35266070893266e-06, "loss": 0.3806, "step": 12167 }, { "epoch": 1.2370882472549818, "grad_norm": 0.2941674292087555, "learning_rate": 9.352486054474546e-06, "loss": 0.3806, "step": 12168 }, { "epoch": 1.2371899145994307, "grad_norm": 0.3166910409927368, "learning_rate": 9.352311378089643e-06, "loss": 0.3412, "step": 12169 }, { "epoch": 1.2372915819438797, "grad_norm": 0.30439335107803345, "learning_rate": 9.35213667977883e-06, "loss": 0.3669, "step": 12170 }, { "epoch": 1.2373932492883286, "grad_norm": 0.3233802616596222, "learning_rate": 9.351961959542986e-06, "loss": 0.3749, "step": 12171 }, { "epoch": 1.2374949166327776, "grad_norm": 0.31016579270362854, "learning_rate": 9.351787217382993e-06, "loss": 0.3571, "step": 12172 }, { "epoch": 1.2375965839772265, "grad_norm": 0.3186781406402588, "learning_rate": 9.351612453299727e-06, "loss": 0.3791, "step": 12173 }, { "epoch": 1.2376982513216754, "grad_norm": 0.2875148057937622, "learning_rate": 9.351437667294075e-06, "loss": 0.3552, "step": 12174 }, { "epoch": 1.2377999186661244, "grad_norm": 0.3450300693511963, "learning_rate": 9.351262859366912e-06, "loss": 0.3766, "step": 12175 }, { "epoch": 1.2379015860105733, "grad_norm": 0.29486820101737976, "learning_rate": 9.351088029519122e-06, "loss": 0.3282, "step": 12176 }, { "epoch": 1.2380032533550223, "grad_norm": 0.31143149733543396, "learning_rate": 9.350913177751585e-06, "loss": 0.3585, "step": 12177 }, { "epoch": 1.2381049206994714, "grad_norm": 0.3322404623031616, "learning_rate": 9.35073830406518e-06, "loss": 0.3884, "step": 12178 }, { "epoch": 1.2382065880439204, "grad_norm": 0.33987826108932495, "learning_rate": 9.350563408460789e-06, "loss": 0.3768, "step": 12179 }, { "epoch": 1.2383082553883693, "grad_norm": 0.29480400681495667, "learning_rate": 9.350388490939293e-06, "loss": 0.3701, "step": 12180 }, { "epoch": 1.2384099227328182, "grad_norm": 0.3227497637271881, "learning_rate": 9.350213551501574e-06, "loss": 0.3564, "step": 12181 }, { "epoch": 1.2385115900772672, "grad_norm": 0.3228188455104828, "learning_rate": 9.350038590148514e-06, "loss": 0.3568, "step": 12182 }, { "epoch": 1.2386132574217161, "grad_norm": 0.3012738525867462, "learning_rate": 9.349863606880993e-06, "loss": 0.3696, "step": 12183 }, { "epoch": 1.238714924766165, "grad_norm": 0.29743874073028564, "learning_rate": 9.349688601699893e-06, "loss": 0.3604, "step": 12184 }, { "epoch": 1.238816592110614, "grad_norm": 0.308338463306427, "learning_rate": 9.349513574606096e-06, "loss": 0.372, "step": 12185 }, { "epoch": 1.238918259455063, "grad_norm": 0.3098125159740448, "learning_rate": 9.349338525600482e-06, "loss": 0.3553, "step": 12186 }, { "epoch": 1.2390199267995121, "grad_norm": 0.28490591049194336, "learning_rate": 9.349163454683934e-06, "loss": 0.3391, "step": 12187 }, { "epoch": 1.239121594143961, "grad_norm": 0.2867714762687683, "learning_rate": 9.348988361857334e-06, "loss": 0.3561, "step": 12188 }, { "epoch": 1.23922326148841, "grad_norm": 0.31095144152641296, "learning_rate": 9.348813247121566e-06, "loss": 0.3755, "step": 12189 }, { "epoch": 1.239324928832859, "grad_norm": 0.30467474460601807, "learning_rate": 9.348638110477507e-06, "loss": 0.3739, "step": 12190 }, { "epoch": 1.2394265961773079, "grad_norm": 0.3198448121547699, "learning_rate": 9.348462951926045e-06, "loss": 0.393, "step": 12191 }, { "epoch": 1.2395282635217568, "grad_norm": 0.2907717823982239, "learning_rate": 9.348287771468058e-06, "loss": 0.4211, "step": 12192 }, { "epoch": 1.2396299308662058, "grad_norm": 0.3343709707260132, "learning_rate": 9.348112569104435e-06, "loss": 0.3483, "step": 12193 }, { "epoch": 1.2397315982106547, "grad_norm": 0.2939574122428894, "learning_rate": 9.34793734483605e-06, "loss": 0.3414, "step": 12194 }, { "epoch": 1.2398332655551036, "grad_norm": 0.2840871512889862, "learning_rate": 9.34776209866379e-06, "loss": 0.3622, "step": 12195 }, { "epoch": 1.2399349328995526, "grad_norm": 0.2985111176967621, "learning_rate": 9.34758683058854e-06, "loss": 0.347, "step": 12196 }, { "epoch": 1.2400366002440015, "grad_norm": 0.30319830775260925, "learning_rate": 9.347411540611177e-06, "loss": 0.3451, "step": 12197 }, { "epoch": 1.2401382675884505, "grad_norm": 0.3187038004398346, "learning_rate": 9.347236228732592e-06, "loss": 0.3709, "step": 12198 }, { "epoch": 1.2402399349328996, "grad_norm": 0.27279195189476013, "learning_rate": 9.347060894953663e-06, "loss": 0.3601, "step": 12199 }, { "epoch": 1.2403416022773486, "grad_norm": 0.30450332164764404, "learning_rate": 9.346885539275272e-06, "loss": 0.3702, "step": 12200 }, { "epoch": 1.2404432696217975, "grad_norm": 0.2949436604976654, "learning_rate": 9.346710161698307e-06, "loss": 0.3651, "step": 12201 }, { "epoch": 1.2405449369662465, "grad_norm": 0.28797468543052673, "learning_rate": 9.346534762223647e-06, "loss": 0.3527, "step": 12202 }, { "epoch": 1.2406466043106954, "grad_norm": 0.3013918697834015, "learning_rate": 9.346359340852179e-06, "loss": 0.3591, "step": 12203 }, { "epoch": 1.2407482716551443, "grad_norm": 0.2912670373916626, "learning_rate": 9.346183897584786e-06, "loss": 0.3663, "step": 12204 }, { "epoch": 1.2408499389995933, "grad_norm": 0.293128103017807, "learning_rate": 9.346008432422351e-06, "loss": 0.3958, "step": 12205 }, { "epoch": 1.2409516063440422, "grad_norm": 0.2938074469566345, "learning_rate": 9.345832945365756e-06, "loss": 0.3627, "step": 12206 }, { "epoch": 1.2410532736884912, "grad_norm": 0.3213559687137604, "learning_rate": 9.345657436415891e-06, "loss": 0.3733, "step": 12207 }, { "epoch": 1.2411549410329403, "grad_norm": 0.3316546678543091, "learning_rate": 9.345481905573634e-06, "loss": 0.3799, "step": 12208 }, { "epoch": 1.2412566083773893, "grad_norm": 0.2732566297054291, "learning_rate": 9.345306352839873e-06, "loss": 0.3629, "step": 12209 }, { "epoch": 1.2413582757218382, "grad_norm": 0.3257833421230316, "learning_rate": 9.345130778215491e-06, "loss": 0.3457, "step": 12210 }, { "epoch": 1.2414599430662872, "grad_norm": 0.32025304436683655, "learning_rate": 9.344955181701372e-06, "loss": 0.3751, "step": 12211 }, { "epoch": 1.241561610410736, "grad_norm": 0.3015875518321991, "learning_rate": 9.344779563298402e-06, "loss": 0.3686, "step": 12212 }, { "epoch": 1.241663277755185, "grad_norm": 0.3043143153190613, "learning_rate": 9.344603923007464e-06, "loss": 0.354, "step": 12213 }, { "epoch": 1.241764945099634, "grad_norm": 0.31621822714805603, "learning_rate": 9.344428260829445e-06, "loss": 0.3748, "step": 12214 }, { "epoch": 1.241866612444083, "grad_norm": 0.2806435227394104, "learning_rate": 9.344252576765229e-06, "loss": 0.3195, "step": 12215 }, { "epoch": 1.2419682797885319, "grad_norm": 0.3274634778499603, "learning_rate": 9.344076870815701e-06, "loss": 0.3772, "step": 12216 }, { "epoch": 1.2420699471329808, "grad_norm": 0.2939465343952179, "learning_rate": 9.343901142981746e-06, "loss": 0.3643, "step": 12217 }, { "epoch": 1.2421716144774297, "grad_norm": 0.2815069258213043, "learning_rate": 9.34372539326425e-06, "loss": 0.3665, "step": 12218 }, { "epoch": 1.242273281821879, "grad_norm": 0.32162997126579285, "learning_rate": 9.343549621664096e-06, "loss": 0.3757, "step": 12219 }, { "epoch": 1.2423749491663278, "grad_norm": 0.2773720622062683, "learning_rate": 9.34337382818217e-06, "loss": 0.3776, "step": 12220 }, { "epoch": 1.2424766165107768, "grad_norm": 0.2963666617870331, "learning_rate": 9.343198012819361e-06, "loss": 0.3531, "step": 12221 }, { "epoch": 1.2425782838552257, "grad_norm": 0.267709881067276, "learning_rate": 9.343022175576552e-06, "loss": 0.3533, "step": 12222 }, { "epoch": 1.2426799511996747, "grad_norm": 0.27907881140708923, "learning_rate": 9.34284631645463e-06, "loss": 0.3537, "step": 12223 }, { "epoch": 1.2427816185441236, "grad_norm": 0.27975329756736755, "learning_rate": 9.342670435454479e-06, "loss": 0.3473, "step": 12224 }, { "epoch": 1.2428832858885726, "grad_norm": 0.3015170693397522, "learning_rate": 9.342494532576986e-06, "loss": 0.3818, "step": 12225 }, { "epoch": 1.2429849532330215, "grad_norm": 0.3069239854812622, "learning_rate": 9.34231860782304e-06, "loss": 0.3436, "step": 12226 }, { "epoch": 1.2430866205774704, "grad_norm": 0.27612292766571045, "learning_rate": 9.342142661193524e-06, "loss": 0.3442, "step": 12227 }, { "epoch": 1.2431882879219196, "grad_norm": 0.28360864520072937, "learning_rate": 9.341966692689324e-06, "loss": 0.3417, "step": 12228 }, { "epoch": 1.2432899552663685, "grad_norm": 0.29891619086265564, "learning_rate": 9.341790702311327e-06, "loss": 0.359, "step": 12229 }, { "epoch": 1.2433916226108175, "grad_norm": 0.27113014459609985, "learning_rate": 9.34161469006042e-06, "loss": 0.3442, "step": 12230 }, { "epoch": 1.2434932899552664, "grad_norm": 0.31864193081855774, "learning_rate": 9.341438655937492e-06, "loss": 0.4016, "step": 12231 }, { "epoch": 1.2435949572997154, "grad_norm": 0.2958914041519165, "learning_rate": 9.341262599943429e-06, "loss": 0.3762, "step": 12232 }, { "epoch": 1.2436966246441643, "grad_norm": 0.2814188599586487, "learning_rate": 9.341086522079112e-06, "loss": 0.3634, "step": 12233 }, { "epoch": 1.2437982919886132, "grad_norm": 0.29997560381889343, "learning_rate": 9.340910422345437e-06, "loss": 0.4064, "step": 12234 }, { "epoch": 1.2438999593330622, "grad_norm": 0.27770093083381653, "learning_rate": 9.340734300743283e-06, "loss": 0.3677, "step": 12235 }, { "epoch": 1.2440016266775111, "grad_norm": 0.2830374240875244, "learning_rate": 9.340558157273546e-06, "loss": 0.3353, "step": 12236 }, { "epoch": 1.24410329402196, "grad_norm": 0.30800849199295044, "learning_rate": 9.340381991937104e-06, "loss": 0.3908, "step": 12237 }, { "epoch": 1.244204961366409, "grad_norm": 0.2948533296585083, "learning_rate": 9.34020580473485e-06, "loss": 0.3821, "step": 12238 }, { "epoch": 1.244306628710858, "grad_norm": 0.29824838042259216, "learning_rate": 9.340029595667671e-06, "loss": 0.3562, "step": 12239 }, { "epoch": 1.2444082960553071, "grad_norm": 0.308057963848114, "learning_rate": 9.339853364736454e-06, "loss": 0.3726, "step": 12240 }, { "epoch": 1.244509963399756, "grad_norm": 0.29930415749549866, "learning_rate": 9.339677111942089e-06, "loss": 0.3608, "step": 12241 }, { "epoch": 1.244611630744205, "grad_norm": 0.29426583647727966, "learning_rate": 9.33950083728546e-06, "loss": 0.3383, "step": 12242 }, { "epoch": 1.244713298088654, "grad_norm": 0.2842117249965668, "learning_rate": 9.339324540767456e-06, "loss": 0.3862, "step": 12243 }, { "epoch": 1.2448149654331029, "grad_norm": 0.2898666560649872, "learning_rate": 9.33914822238897e-06, "loss": 0.3594, "step": 12244 }, { "epoch": 1.2449166327775518, "grad_norm": 0.3281167447566986, "learning_rate": 9.338971882150883e-06, "loss": 0.3861, "step": 12245 }, { "epoch": 1.2450183001220008, "grad_norm": 0.30852949619293213, "learning_rate": 9.338795520054087e-06, "loss": 0.4028, "step": 12246 }, { "epoch": 1.2451199674664497, "grad_norm": 0.29753318428993225, "learning_rate": 9.338619136099471e-06, "loss": 0.3687, "step": 12247 }, { "epoch": 1.2452216348108986, "grad_norm": 0.2968134582042694, "learning_rate": 9.338442730287923e-06, "loss": 0.3812, "step": 12248 }, { "epoch": 1.2453233021553478, "grad_norm": 0.28589358925819397, "learning_rate": 9.33826630262033e-06, "loss": 0.3669, "step": 12249 }, { "epoch": 1.2454249694997968, "grad_norm": 0.3127034902572632, "learning_rate": 9.338089853097586e-06, "loss": 0.3721, "step": 12250 }, { "epoch": 1.2455266368442457, "grad_norm": 0.30575740337371826, "learning_rate": 9.337913381720571e-06, "loss": 0.3548, "step": 12251 }, { "epoch": 1.2456283041886946, "grad_norm": 0.2980025112628937, "learning_rate": 9.337736888490183e-06, "loss": 0.338, "step": 12252 }, { "epoch": 1.2457299715331436, "grad_norm": 0.3023500442504883, "learning_rate": 9.337560373407306e-06, "loss": 0.3694, "step": 12253 }, { "epoch": 1.2458316388775925, "grad_norm": 0.3101499676704407, "learning_rate": 9.337383836472832e-06, "loss": 0.3701, "step": 12254 }, { "epoch": 1.2459333062220415, "grad_norm": 0.3063661456108093, "learning_rate": 9.337207277687648e-06, "loss": 0.372, "step": 12255 }, { "epoch": 1.2460349735664904, "grad_norm": 0.31776028871536255, "learning_rate": 9.337030697052643e-06, "loss": 0.371, "step": 12256 }, { "epoch": 1.2461366409109393, "grad_norm": 0.29891693592071533, "learning_rate": 9.33685409456871e-06, "loss": 0.3804, "step": 12257 }, { "epoch": 1.2462383082553883, "grad_norm": 0.33244600892066956, "learning_rate": 9.336677470236734e-06, "loss": 0.3644, "step": 12258 }, { "epoch": 1.2463399755998372, "grad_norm": 0.29407206177711487, "learning_rate": 9.33650082405761e-06, "loss": 0.3727, "step": 12259 }, { "epoch": 1.2464416429442864, "grad_norm": 0.33347052335739136, "learning_rate": 9.336324156032225e-06, "loss": 0.3536, "step": 12260 }, { "epoch": 1.2465433102887353, "grad_norm": 0.32223230600357056, "learning_rate": 9.33614746616147e-06, "loss": 0.3448, "step": 12261 }, { "epoch": 1.2466449776331843, "grad_norm": 0.30823811888694763, "learning_rate": 9.335970754446234e-06, "loss": 0.3677, "step": 12262 }, { "epoch": 1.2467466449776332, "grad_norm": 0.3081035315990448, "learning_rate": 9.335794020887407e-06, "loss": 0.3354, "step": 12263 }, { "epoch": 1.2468483123220822, "grad_norm": 0.3221367597579956, "learning_rate": 9.33561726548588e-06, "loss": 0.3575, "step": 12264 }, { "epoch": 1.246949979666531, "grad_norm": 0.3013359308242798, "learning_rate": 9.335440488242544e-06, "loss": 0.3672, "step": 12265 }, { "epoch": 1.24705164701098, "grad_norm": 0.29187342524528503, "learning_rate": 9.335263689158289e-06, "loss": 0.3864, "step": 12266 }, { "epoch": 1.247153314355429, "grad_norm": 0.2952432334423065, "learning_rate": 9.335086868234006e-06, "loss": 0.386, "step": 12267 }, { "epoch": 1.247254981699878, "grad_norm": 0.3013889789581299, "learning_rate": 9.334910025470586e-06, "loss": 0.3806, "step": 12268 }, { "epoch": 1.247356649044327, "grad_norm": 0.2800385653972626, "learning_rate": 9.334733160868918e-06, "loss": 0.3588, "step": 12269 }, { "epoch": 1.247458316388776, "grad_norm": 0.29792869091033936, "learning_rate": 9.334556274429896e-06, "loss": 0.3801, "step": 12270 }, { "epoch": 1.247559983733225, "grad_norm": 0.29924139380455017, "learning_rate": 9.334379366154407e-06, "loss": 0.3709, "step": 12271 }, { "epoch": 1.247661651077674, "grad_norm": 0.2846781015396118, "learning_rate": 9.334202436043347e-06, "loss": 0.3645, "step": 12272 }, { "epoch": 1.2477633184221228, "grad_norm": 0.27681827545166016, "learning_rate": 9.334025484097604e-06, "loss": 0.36, "step": 12273 }, { "epoch": 1.2478649857665718, "grad_norm": 0.2974601686000824, "learning_rate": 9.33384851031807e-06, "loss": 0.3486, "step": 12274 }, { "epoch": 1.2479666531110207, "grad_norm": 0.3136777877807617, "learning_rate": 9.33367151470564e-06, "loss": 0.3562, "step": 12275 }, { "epoch": 1.2480683204554697, "grad_norm": 0.2715050280094147, "learning_rate": 9.3334944972612e-06, "loss": 0.3274, "step": 12276 }, { "epoch": 1.2481699877999186, "grad_norm": 0.29183506965637207, "learning_rate": 9.333317457985647e-06, "loss": 0.4058, "step": 12277 }, { "epoch": 1.2482716551443676, "grad_norm": 0.3124268054962158, "learning_rate": 9.333140396879867e-06, "loss": 0.3649, "step": 12278 }, { "epoch": 1.2483733224888165, "grad_norm": 0.29690247774124146, "learning_rate": 9.332963313944758e-06, "loss": 0.3577, "step": 12279 }, { "epoch": 1.2484749898332654, "grad_norm": 0.2890262305736542, "learning_rate": 9.332786209181208e-06, "loss": 0.3462, "step": 12280 }, { "epoch": 1.2485766571777146, "grad_norm": 0.28973355889320374, "learning_rate": 9.33260908259011e-06, "loss": 0.3391, "step": 12281 }, { "epoch": 1.2486783245221635, "grad_norm": 0.29126691818237305, "learning_rate": 9.332431934172362e-06, "loss": 0.352, "step": 12282 }, { "epoch": 1.2487799918666125, "grad_norm": 0.2719910442829132, "learning_rate": 9.332254763928847e-06, "loss": 0.3823, "step": 12283 }, { "epoch": 1.2488816592110614, "grad_norm": 0.3153685927391052, "learning_rate": 9.332077571860463e-06, "loss": 0.3816, "step": 12284 }, { "epoch": 1.2489833265555104, "grad_norm": 0.2686631977558136, "learning_rate": 9.331900357968102e-06, "loss": 0.3678, "step": 12285 }, { "epoch": 1.2490849938999593, "grad_norm": 0.2888405919075012, "learning_rate": 9.331723122252654e-06, "loss": 0.3817, "step": 12286 }, { "epoch": 1.2491866612444082, "grad_norm": 0.3049289286136627, "learning_rate": 9.331545864715018e-06, "loss": 0.3828, "step": 12287 }, { "epoch": 1.2492883285888572, "grad_norm": 0.30076563358306885, "learning_rate": 9.33136858535608e-06, "loss": 0.3282, "step": 12288 }, { "epoch": 1.2493899959333061, "grad_norm": 0.30514395236968994, "learning_rate": 9.331191284176738e-06, "loss": 0.3671, "step": 12289 }, { "epoch": 1.2494916632777553, "grad_norm": 0.31469929218292236, "learning_rate": 9.331013961177884e-06, "loss": 0.3594, "step": 12290 }, { "epoch": 1.2495933306222042, "grad_norm": 0.30587315559387207, "learning_rate": 9.33083661636041e-06, "loss": 0.3718, "step": 12291 }, { "epoch": 1.2496949979666532, "grad_norm": 0.287752240896225, "learning_rate": 9.330659249725211e-06, "loss": 0.3967, "step": 12292 }, { "epoch": 1.2497966653111021, "grad_norm": 0.2723963260650635, "learning_rate": 9.33048186127318e-06, "loss": 0.3355, "step": 12293 }, { "epoch": 1.249898332655551, "grad_norm": 0.2898714542388916, "learning_rate": 9.330304451005209e-06, "loss": 0.389, "step": 12294 }, { "epoch": 1.25, "grad_norm": 0.279166042804718, "learning_rate": 9.330127018922195e-06, "loss": 0.342, "step": 12295 }, { "epoch": 1.250101667344449, "grad_norm": 0.2836677134037018, "learning_rate": 9.329949565025028e-06, "loss": 0.3684, "step": 12296 }, { "epoch": 1.2502033346888979, "grad_norm": 0.2731153964996338, "learning_rate": 9.329772089314607e-06, "loss": 0.3568, "step": 12297 }, { "epoch": 1.2503050020333468, "grad_norm": 0.29016196727752686, "learning_rate": 9.32959459179182e-06, "loss": 0.3915, "step": 12298 }, { "epoch": 1.2504066693777958, "grad_norm": 0.2787299156188965, "learning_rate": 9.329417072457566e-06, "loss": 0.4086, "step": 12299 }, { "epoch": 1.2505083367222447, "grad_norm": 0.29116275906562805, "learning_rate": 9.329239531312736e-06, "loss": 0.3641, "step": 12300 }, { "epoch": 1.2506100040666936, "grad_norm": 0.283367782831192, "learning_rate": 9.329061968358228e-06, "loss": 0.3723, "step": 12301 }, { "epoch": 1.2507116714111428, "grad_norm": 0.31201136112213135, "learning_rate": 9.328884383594934e-06, "loss": 0.3864, "step": 12302 }, { "epoch": 1.2508133387555918, "grad_norm": 0.29273125529289246, "learning_rate": 9.328706777023749e-06, "loss": 0.3717, "step": 12303 }, { "epoch": 1.2509150061000407, "grad_norm": 0.2784252166748047, "learning_rate": 9.328529148645568e-06, "loss": 0.3618, "step": 12304 }, { "epoch": 1.2510166734444896, "grad_norm": 0.3672710061073303, "learning_rate": 9.328351498461286e-06, "loss": 0.3754, "step": 12305 }, { "epoch": 1.2511183407889386, "grad_norm": 0.28925052285194397, "learning_rate": 9.328173826471796e-06, "loss": 0.3107, "step": 12306 }, { "epoch": 1.2512200081333875, "grad_norm": 0.29046884179115295, "learning_rate": 9.327996132677996e-06, "loss": 0.3572, "step": 12307 }, { "epoch": 1.2513216754778365, "grad_norm": 0.30021536350250244, "learning_rate": 9.327818417080782e-06, "loss": 0.367, "step": 12308 }, { "epoch": 1.2514233428222854, "grad_norm": 0.31403589248657227, "learning_rate": 9.327640679681044e-06, "loss": 0.3551, "step": 12309 }, { "epoch": 1.2515250101667346, "grad_norm": 0.2861694395542145, "learning_rate": 9.327462920479683e-06, "loss": 0.3508, "step": 12310 }, { "epoch": 1.2516266775111835, "grad_norm": 0.3049652576446533, "learning_rate": 9.32728513947759e-06, "loss": 0.3869, "step": 12311 }, { "epoch": 1.2517283448556324, "grad_norm": 0.2884948253631592, "learning_rate": 9.327107336675664e-06, "loss": 0.3731, "step": 12312 }, { "epoch": 1.2518300122000814, "grad_norm": 0.2691248059272766, "learning_rate": 9.3269295120748e-06, "loss": 0.3254, "step": 12313 }, { "epoch": 1.2519316795445303, "grad_norm": 0.30170178413391113, "learning_rate": 9.326751665675893e-06, "loss": 0.3457, "step": 12314 }, { "epoch": 1.2520333468889793, "grad_norm": 0.30451449751853943, "learning_rate": 9.32657379747984e-06, "loss": 0.3566, "step": 12315 }, { "epoch": 1.2521350142334282, "grad_norm": 0.3362438678741455, "learning_rate": 9.326395907487536e-06, "loss": 0.3771, "step": 12316 }, { "epoch": 1.2522366815778772, "grad_norm": 0.31035923957824707, "learning_rate": 9.326217995699876e-06, "loss": 0.3899, "step": 12317 }, { "epoch": 1.252338348922326, "grad_norm": 0.29138442873954773, "learning_rate": 9.32604006211776e-06, "loss": 0.3885, "step": 12318 }, { "epoch": 1.252440016266775, "grad_norm": 0.2911439538002014, "learning_rate": 9.32586210674208e-06, "loss": 0.33, "step": 12319 }, { "epoch": 1.252541683611224, "grad_norm": 0.3645147979259491, "learning_rate": 9.325684129573736e-06, "loss": 0.3802, "step": 12320 }, { "epoch": 1.252643350955673, "grad_norm": 0.34244972467422485, "learning_rate": 9.325506130613624e-06, "loss": 0.3606, "step": 12321 }, { "epoch": 1.252745018300122, "grad_norm": 0.31603676080703735, "learning_rate": 9.325328109862638e-06, "loss": 0.3596, "step": 12322 }, { "epoch": 1.252846685644571, "grad_norm": 0.31331491470336914, "learning_rate": 9.325150067321679e-06, "loss": 0.3747, "step": 12323 }, { "epoch": 1.25294835298902, "grad_norm": 0.3262641429901123, "learning_rate": 9.32497200299164e-06, "loss": 0.4024, "step": 12324 }, { "epoch": 1.253050020333469, "grad_norm": 0.31370729207992554, "learning_rate": 9.324793916873421e-06, "loss": 0.3642, "step": 12325 }, { "epoch": 1.2531516876779178, "grad_norm": 0.29736292362213135, "learning_rate": 9.324615808967918e-06, "loss": 0.3515, "step": 12326 }, { "epoch": 1.2532533550223668, "grad_norm": 0.3101530969142914, "learning_rate": 9.324437679276029e-06, "loss": 0.3422, "step": 12327 }, { "epoch": 1.2533550223668157, "grad_norm": 0.3205987513065338, "learning_rate": 9.324259527798648e-06, "loss": 0.3583, "step": 12328 }, { "epoch": 1.2534566897112647, "grad_norm": 0.34223076701164246, "learning_rate": 9.324081354536676e-06, "loss": 0.3907, "step": 12329 }, { "epoch": 1.2535583570557138, "grad_norm": 0.3080088198184967, "learning_rate": 9.32390315949101e-06, "loss": 0.3796, "step": 12330 }, { "epoch": 1.2536600244001628, "grad_norm": 0.2884669005870819, "learning_rate": 9.32372494266255e-06, "loss": 0.3689, "step": 12331 }, { "epoch": 1.2537616917446117, "grad_norm": 0.2936096787452698, "learning_rate": 9.323546704052187e-06, "loss": 0.3503, "step": 12332 }, { "epoch": 1.2538633590890607, "grad_norm": 0.26038509607315063, "learning_rate": 9.323368443660825e-06, "loss": 0.3277, "step": 12333 }, { "epoch": 1.2539650264335096, "grad_norm": 0.2923482060432434, "learning_rate": 9.323190161489359e-06, "loss": 0.3606, "step": 12334 }, { "epoch": 1.2540666937779585, "grad_norm": 0.2834429144859314, "learning_rate": 9.32301185753869e-06, "loss": 0.3748, "step": 12335 }, { "epoch": 1.2541683611224075, "grad_norm": 0.28329747915267944, "learning_rate": 9.322833531809714e-06, "loss": 0.349, "step": 12336 }, { "epoch": 1.2542700284668564, "grad_norm": 0.3009120225906372, "learning_rate": 9.32265518430333e-06, "loss": 0.3649, "step": 12337 }, { "epoch": 1.2543716958113054, "grad_norm": 0.2796490490436554, "learning_rate": 9.322476815020437e-06, "loss": 0.3916, "step": 12338 }, { "epoch": 1.2544733631557543, "grad_norm": 0.311318039894104, "learning_rate": 9.322298423961932e-06, "loss": 0.3676, "step": 12339 }, { "epoch": 1.2545750305002032, "grad_norm": 0.3026916980743408, "learning_rate": 9.322120011128715e-06, "loss": 0.3411, "step": 12340 }, { "epoch": 1.2546766978446522, "grad_norm": 0.2924818992614746, "learning_rate": 9.321941576521685e-06, "loss": 0.3865, "step": 12341 }, { "epoch": 1.2547783651891011, "grad_norm": 0.29338350892066956, "learning_rate": 9.321763120141739e-06, "loss": 0.3634, "step": 12342 }, { "epoch": 1.2548800325335503, "grad_norm": 0.3064291179180145, "learning_rate": 9.321584641989778e-06, "loss": 0.3861, "step": 12343 }, { "epoch": 1.2549816998779992, "grad_norm": 0.2974635064601898, "learning_rate": 9.321406142066702e-06, "loss": 0.3662, "step": 12344 }, { "epoch": 1.2550833672224482, "grad_norm": 0.29910168051719666, "learning_rate": 9.321227620373407e-06, "loss": 0.3674, "step": 12345 }, { "epoch": 1.2551850345668971, "grad_norm": 0.27743491530418396, "learning_rate": 9.321049076910795e-06, "loss": 0.3738, "step": 12346 }, { "epoch": 1.255286701911346, "grad_norm": 0.29814809560775757, "learning_rate": 9.320870511679765e-06, "loss": 0.34, "step": 12347 }, { "epoch": 1.255388369255795, "grad_norm": 0.2988799810409546, "learning_rate": 9.320691924681214e-06, "loss": 0.357, "step": 12348 }, { "epoch": 1.255490036600244, "grad_norm": 0.29719510674476624, "learning_rate": 9.320513315916046e-06, "loss": 0.3763, "step": 12349 }, { "epoch": 1.2555917039446929, "grad_norm": 0.2834393084049225, "learning_rate": 9.320334685385157e-06, "loss": 0.3404, "step": 12350 }, { "epoch": 1.255693371289142, "grad_norm": 0.2934594452381134, "learning_rate": 9.32015603308945e-06, "loss": 0.412, "step": 12351 }, { "epoch": 1.255795038633591, "grad_norm": 0.3198230266571045, "learning_rate": 9.319977359029823e-06, "loss": 0.3737, "step": 12352 }, { "epoch": 1.25589670597804, "grad_norm": 0.30311426520347595, "learning_rate": 9.319798663207177e-06, "loss": 0.394, "step": 12353 }, { "epoch": 1.2559983733224889, "grad_norm": 0.3031705617904663, "learning_rate": 9.319619945622412e-06, "loss": 0.3937, "step": 12354 }, { "epoch": 1.2561000406669378, "grad_norm": 0.3146754503250122, "learning_rate": 9.319441206276427e-06, "loss": 0.408, "step": 12355 }, { "epoch": 1.2562017080113868, "grad_norm": 0.2701393663883209, "learning_rate": 9.319262445170126e-06, "loss": 0.3872, "step": 12356 }, { "epoch": 1.2563033753558357, "grad_norm": 0.32490110397338867, "learning_rate": 9.319083662304405e-06, "loss": 0.3435, "step": 12357 }, { "epoch": 1.2564050427002846, "grad_norm": 0.29889747500419617, "learning_rate": 9.318904857680167e-06, "loss": 0.3649, "step": 12358 }, { "epoch": 1.2565067100447336, "grad_norm": 0.2949789762496948, "learning_rate": 9.318726031298313e-06, "loss": 0.3766, "step": 12359 }, { "epoch": 1.2566083773891825, "grad_norm": 0.31357645988464355, "learning_rate": 9.318547183159742e-06, "loss": 0.3842, "step": 12360 }, { "epoch": 1.2567100447336315, "grad_norm": 0.2722896933555603, "learning_rate": 9.31836831326536e-06, "loss": 0.382, "step": 12361 }, { "epoch": 1.2568117120780804, "grad_norm": 0.287211537361145, "learning_rate": 9.318189421616062e-06, "loss": 0.3809, "step": 12362 }, { "epoch": 1.2569133794225296, "grad_norm": 0.2936016321182251, "learning_rate": 9.318010508212753e-06, "loss": 0.363, "step": 12363 }, { "epoch": 1.2570150467669785, "grad_norm": 0.2881653606891632, "learning_rate": 9.317831573056333e-06, "loss": 0.3484, "step": 12364 }, { "epoch": 1.2571167141114274, "grad_norm": 0.29437509179115295, "learning_rate": 9.317652616147703e-06, "loss": 0.4012, "step": 12365 }, { "epoch": 1.2572183814558764, "grad_norm": 0.3327329754829407, "learning_rate": 9.317473637487767e-06, "loss": 0.3583, "step": 12366 }, { "epoch": 1.2573200488003253, "grad_norm": 0.3180348575115204, "learning_rate": 9.317294637077424e-06, "loss": 0.3626, "step": 12367 }, { "epoch": 1.2574217161447743, "grad_norm": 0.3056853413581848, "learning_rate": 9.317115614917575e-06, "loss": 0.3619, "step": 12368 }, { "epoch": 1.2575233834892232, "grad_norm": 0.27620717883110046, "learning_rate": 9.316936571009124e-06, "loss": 0.3447, "step": 12369 }, { "epoch": 1.2576250508336722, "grad_norm": 0.28605523705482483, "learning_rate": 9.316757505352973e-06, "loss": 0.3618, "step": 12370 }, { "epoch": 1.2577267181781213, "grad_norm": 0.2989806830883026, "learning_rate": 9.316578417950024e-06, "loss": 0.3421, "step": 12371 }, { "epoch": 1.2578283855225703, "grad_norm": 0.31103163957595825, "learning_rate": 9.316399308801177e-06, "loss": 0.3311, "step": 12372 }, { "epoch": 1.2579300528670192, "grad_norm": 0.3207584023475647, "learning_rate": 9.316220177907338e-06, "loss": 0.357, "step": 12373 }, { "epoch": 1.2580317202114681, "grad_norm": 0.2748064398765564, "learning_rate": 9.316041025269406e-06, "loss": 0.3452, "step": 12374 }, { "epoch": 1.258133387555917, "grad_norm": 0.3016657531261444, "learning_rate": 9.315861850888285e-06, "loss": 0.3255, "step": 12375 }, { "epoch": 1.258235054900366, "grad_norm": 0.31374961137771606, "learning_rate": 9.31568265476488e-06, "loss": 0.3398, "step": 12376 }, { "epoch": 1.258336722244815, "grad_norm": 0.2759518027305603, "learning_rate": 9.315503436900088e-06, "loss": 0.3323, "step": 12377 }, { "epoch": 1.258438389589264, "grad_norm": 0.2800367772579193, "learning_rate": 9.315324197294817e-06, "loss": 0.3703, "step": 12378 }, { "epoch": 1.2585400569337128, "grad_norm": 0.3163146674633026, "learning_rate": 9.315144935949967e-06, "loss": 0.3932, "step": 12379 }, { "epoch": 1.2586417242781618, "grad_norm": 0.3322126567363739, "learning_rate": 9.314965652866444e-06, "loss": 0.3779, "step": 12380 }, { "epoch": 1.2587433916226107, "grad_norm": 0.30587080121040344, "learning_rate": 9.314786348045148e-06, "loss": 0.3532, "step": 12381 }, { "epoch": 1.2588450589670597, "grad_norm": 0.29182109236717224, "learning_rate": 9.314607021486983e-06, "loss": 0.3719, "step": 12382 }, { "epoch": 1.2589467263115086, "grad_norm": 0.3047805428504944, "learning_rate": 9.314427673192856e-06, "loss": 0.3902, "step": 12383 }, { "epoch": 1.2590483936559578, "grad_norm": 0.32580140233039856, "learning_rate": 9.314248303163666e-06, "loss": 0.3408, "step": 12384 }, { "epoch": 1.2591500610004067, "grad_norm": 0.3015638291835785, "learning_rate": 9.314068911400318e-06, "loss": 0.3812, "step": 12385 }, { "epoch": 1.2592517283448557, "grad_norm": 0.3143168091773987, "learning_rate": 9.313889497903716e-06, "loss": 0.383, "step": 12386 }, { "epoch": 1.2593533956893046, "grad_norm": 0.2869338095188141, "learning_rate": 9.313710062674764e-06, "loss": 0.3789, "step": 12387 }, { "epoch": 1.2594550630337535, "grad_norm": 0.31678396463394165, "learning_rate": 9.313530605714367e-06, "loss": 0.3884, "step": 12388 }, { "epoch": 1.2595567303782025, "grad_norm": 0.2990575432777405, "learning_rate": 9.313351127023426e-06, "loss": 0.343, "step": 12389 }, { "epoch": 1.2596583977226514, "grad_norm": 0.29861947894096375, "learning_rate": 9.313171626602848e-06, "loss": 0.3746, "step": 12390 }, { "epoch": 1.2597600650671004, "grad_norm": 0.2783950865268707, "learning_rate": 9.312992104453535e-06, "loss": 0.3458, "step": 12391 }, { "epoch": 1.2598617324115495, "grad_norm": 0.29127952456474304, "learning_rate": 9.312812560576394e-06, "loss": 0.3718, "step": 12392 }, { "epoch": 1.2599633997559985, "grad_norm": 0.2732832133769989, "learning_rate": 9.312632994972327e-06, "loss": 0.3838, "step": 12393 }, { "epoch": 1.2600650671004474, "grad_norm": 0.290416955947876, "learning_rate": 9.312453407642242e-06, "loss": 0.3837, "step": 12394 }, { "epoch": 1.2601667344448964, "grad_norm": 0.28145793080329895, "learning_rate": 9.312273798587042e-06, "loss": 0.3598, "step": 12395 }, { "epoch": 1.2602684017893453, "grad_norm": 0.3336395025253296, "learning_rate": 9.312094167807629e-06, "loss": 0.3703, "step": 12396 }, { "epoch": 1.2603700691337942, "grad_norm": 0.3067186176776886, "learning_rate": 9.31191451530491e-06, "loss": 0.3721, "step": 12397 }, { "epoch": 1.2604717364782432, "grad_norm": 0.29968059062957764, "learning_rate": 9.311734841079792e-06, "loss": 0.3639, "step": 12398 }, { "epoch": 1.2605734038226921, "grad_norm": 0.28331243991851807, "learning_rate": 9.311555145133177e-06, "loss": 0.3682, "step": 12399 }, { "epoch": 1.260675071167141, "grad_norm": 0.30197423696517944, "learning_rate": 9.311375427465973e-06, "loss": 0.4002, "step": 12400 }, { "epoch": 1.26077673851159, "grad_norm": 0.3144744634628296, "learning_rate": 9.311195688079084e-06, "loss": 0.3945, "step": 12401 }, { "epoch": 1.260878405856039, "grad_norm": 0.27586594223976135, "learning_rate": 9.311015926973415e-06, "loss": 0.3454, "step": 12402 }, { "epoch": 1.2609800732004879, "grad_norm": 0.2656427025794983, "learning_rate": 9.310836144149874e-06, "loss": 0.3454, "step": 12403 }, { "epoch": 1.261081740544937, "grad_norm": 0.287812739610672, "learning_rate": 9.310656339609363e-06, "loss": 0.354, "step": 12404 }, { "epoch": 1.261183407889386, "grad_norm": 0.2734392285346985, "learning_rate": 9.31047651335279e-06, "loss": 0.3484, "step": 12405 }, { "epoch": 1.261285075233835, "grad_norm": 0.27382785081863403, "learning_rate": 9.31029666538106e-06, "loss": 0.3443, "step": 12406 }, { "epoch": 1.2613867425782839, "grad_norm": 0.2787756025791168, "learning_rate": 9.310116795695081e-06, "loss": 0.3301, "step": 12407 }, { "epoch": 1.2614884099227328, "grad_norm": 0.31813207268714905, "learning_rate": 9.309936904295758e-06, "loss": 0.3863, "step": 12408 }, { "epoch": 1.2615900772671818, "grad_norm": 0.29483556747436523, "learning_rate": 9.309756991183997e-06, "loss": 0.3692, "step": 12409 }, { "epoch": 1.2616917446116307, "grad_norm": 0.26439160108566284, "learning_rate": 9.309577056360703e-06, "loss": 0.3288, "step": 12410 }, { "epoch": 1.2617934119560796, "grad_norm": 0.2813107371330261, "learning_rate": 9.309397099826785e-06, "loss": 0.3806, "step": 12411 }, { "epoch": 1.2618950793005288, "grad_norm": 0.3045046031475067, "learning_rate": 9.30921712158315e-06, "loss": 0.3913, "step": 12412 }, { "epoch": 1.2619967466449777, "grad_norm": 0.3190722167491913, "learning_rate": 9.3090371216307e-06, "loss": 0.388, "step": 12413 }, { "epoch": 1.2620984139894267, "grad_norm": 0.3012339770793915, "learning_rate": 9.308857099970346e-06, "loss": 0.3623, "step": 12414 }, { "epoch": 1.2622000813338756, "grad_norm": 0.3143984079360962, "learning_rate": 9.308677056602995e-06, "loss": 0.3533, "step": 12415 }, { "epoch": 1.2623017486783246, "grad_norm": 0.29918351769447327, "learning_rate": 9.308496991529553e-06, "loss": 0.3647, "step": 12416 }, { "epoch": 1.2624034160227735, "grad_norm": 0.28109070658683777, "learning_rate": 9.308316904750925e-06, "loss": 0.3521, "step": 12417 }, { "epoch": 1.2625050833672224, "grad_norm": 0.274277001619339, "learning_rate": 9.308136796268021e-06, "loss": 0.3224, "step": 12418 }, { "epoch": 1.2626067507116714, "grad_norm": 0.29003289341926575, "learning_rate": 9.307956666081749e-06, "loss": 0.3501, "step": 12419 }, { "epoch": 1.2627084180561203, "grad_norm": 0.26728811860084534, "learning_rate": 9.307776514193014e-06, "loss": 0.3614, "step": 12420 }, { "epoch": 1.2628100854005693, "grad_norm": 0.30572929978370667, "learning_rate": 9.307596340602725e-06, "loss": 0.3871, "step": 12421 }, { "epoch": 1.2629117527450182, "grad_norm": 0.30132558941841125, "learning_rate": 9.307416145311789e-06, "loss": 0.3794, "step": 12422 }, { "epoch": 1.2630134200894672, "grad_norm": 0.2919616997241974, "learning_rate": 9.307235928321114e-06, "loss": 0.3847, "step": 12423 }, { "epoch": 1.263115087433916, "grad_norm": 0.3037482798099518, "learning_rate": 9.307055689631608e-06, "loss": 0.3764, "step": 12424 }, { "epoch": 1.2632167547783653, "grad_norm": 0.2743070721626282, "learning_rate": 9.306875429244177e-06, "loss": 0.3193, "step": 12425 }, { "epoch": 1.2633184221228142, "grad_norm": 0.2736932635307312, "learning_rate": 9.306695147159735e-06, "loss": 0.3873, "step": 12426 }, { "epoch": 1.2634200894672631, "grad_norm": 0.29586854577064514, "learning_rate": 9.306514843379184e-06, "loss": 0.3864, "step": 12427 }, { "epoch": 1.263521756811712, "grad_norm": 0.288006067276001, "learning_rate": 9.306334517903434e-06, "loss": 0.3755, "step": 12428 }, { "epoch": 1.263623424156161, "grad_norm": 0.29211264848709106, "learning_rate": 9.306154170733395e-06, "loss": 0.3853, "step": 12429 }, { "epoch": 1.26372509150061, "grad_norm": 0.3016887903213501, "learning_rate": 9.305973801869975e-06, "loss": 0.368, "step": 12430 }, { "epoch": 1.263826758845059, "grad_norm": 0.3156229853630066, "learning_rate": 9.30579341131408e-06, "loss": 0.3467, "step": 12431 }, { "epoch": 1.2639284261895078, "grad_norm": 0.3122185170650482, "learning_rate": 9.305612999066624e-06, "loss": 0.3866, "step": 12432 }, { "epoch": 1.264030093533957, "grad_norm": 0.31154999136924744, "learning_rate": 9.305432565128512e-06, "loss": 0.3822, "step": 12433 }, { "epoch": 1.264131760878406, "grad_norm": 0.2920264005661011, "learning_rate": 9.305252109500653e-06, "loss": 0.3755, "step": 12434 }, { "epoch": 1.264233428222855, "grad_norm": 0.3198370337486267, "learning_rate": 9.305071632183959e-06, "loss": 0.3435, "step": 12435 }, { "epoch": 1.2643350955673038, "grad_norm": 0.30827558040618896, "learning_rate": 9.304891133179334e-06, "loss": 0.3722, "step": 12436 }, { "epoch": 1.2644367629117528, "grad_norm": 0.27014264464378357, "learning_rate": 9.304710612487693e-06, "loss": 0.3436, "step": 12437 }, { "epoch": 1.2645384302562017, "grad_norm": 0.3393188714981079, "learning_rate": 9.304530070109941e-06, "loss": 0.3353, "step": 12438 }, { "epoch": 1.2646400976006507, "grad_norm": 0.3220038115978241, "learning_rate": 9.30434950604699e-06, "loss": 0.3838, "step": 12439 }, { "epoch": 1.2647417649450996, "grad_norm": 0.309543639421463, "learning_rate": 9.304168920299751e-06, "loss": 0.4015, "step": 12440 }, { "epoch": 1.2648434322895485, "grad_norm": 0.32157355546951294, "learning_rate": 9.30398831286913e-06, "loss": 0.3911, "step": 12441 }, { "epoch": 1.2649450996339975, "grad_norm": 0.3437570035457611, "learning_rate": 9.303807683756039e-06, "loss": 0.3574, "step": 12442 }, { "epoch": 1.2650467669784464, "grad_norm": 0.30781951546669006, "learning_rate": 9.303627032961388e-06, "loss": 0.3651, "step": 12443 }, { "epoch": 1.2651484343228954, "grad_norm": 0.3392375409603119, "learning_rate": 9.303446360486086e-06, "loss": 0.3999, "step": 12444 }, { "epoch": 1.2652501016673445, "grad_norm": 0.3236834704875946, "learning_rate": 9.303265666331043e-06, "loss": 0.373, "step": 12445 }, { "epoch": 1.2653517690117935, "grad_norm": 0.329379677772522, "learning_rate": 9.303084950497172e-06, "loss": 0.3572, "step": 12446 }, { "epoch": 1.2654534363562424, "grad_norm": 0.321540504693985, "learning_rate": 9.302904212985382e-06, "loss": 0.3319, "step": 12447 }, { "epoch": 1.2655551037006914, "grad_norm": 0.2859354019165039, "learning_rate": 9.302723453796581e-06, "loss": 0.3629, "step": 12448 }, { "epoch": 1.2656567710451403, "grad_norm": 0.2984379529953003, "learning_rate": 9.302542672931682e-06, "loss": 0.3604, "step": 12449 }, { "epoch": 1.2657584383895892, "grad_norm": 0.33079007267951965, "learning_rate": 9.302361870391598e-06, "loss": 0.3791, "step": 12450 }, { "epoch": 1.2658601057340382, "grad_norm": 0.32639065384864807, "learning_rate": 9.302181046177235e-06, "loss": 0.4039, "step": 12451 }, { "epoch": 1.2659617730784871, "grad_norm": 0.2840607166290283, "learning_rate": 9.302000200289507e-06, "loss": 0.3444, "step": 12452 }, { "epoch": 1.2660634404229363, "grad_norm": 0.3280474841594696, "learning_rate": 9.301819332729322e-06, "loss": 0.389, "step": 12453 }, { "epoch": 1.2661651077673852, "grad_norm": 0.32036980986595154, "learning_rate": 9.301638443497598e-06, "loss": 0.3807, "step": 12454 }, { "epoch": 1.2662667751118342, "grad_norm": 0.305529922246933, "learning_rate": 9.301457532595238e-06, "loss": 0.3583, "step": 12455 }, { "epoch": 1.266368442456283, "grad_norm": 0.32822564244270325, "learning_rate": 9.301276600023159e-06, "loss": 0.3325, "step": 12456 }, { "epoch": 1.266470109800732, "grad_norm": 0.29300957918167114, "learning_rate": 9.30109564578227e-06, "loss": 0.3563, "step": 12457 }, { "epoch": 1.266571777145181, "grad_norm": 0.3053290843963623, "learning_rate": 9.300914669873483e-06, "loss": 0.38, "step": 12458 }, { "epoch": 1.26667344448963, "grad_norm": 0.3093035817146301, "learning_rate": 9.30073367229771e-06, "loss": 0.3659, "step": 12459 }, { "epoch": 1.2667751118340789, "grad_norm": 0.2903333902359009, "learning_rate": 9.300552653055864e-06, "loss": 0.3896, "step": 12460 }, { "epoch": 1.2668767791785278, "grad_norm": 0.2733229696750641, "learning_rate": 9.300371612148855e-06, "loss": 0.3406, "step": 12461 }, { "epoch": 1.2669784465229768, "grad_norm": 0.2981049120426178, "learning_rate": 9.300190549577594e-06, "loss": 0.3898, "step": 12462 }, { "epoch": 1.2670801138674257, "grad_norm": 0.29178595542907715, "learning_rate": 9.300009465342996e-06, "loss": 0.3796, "step": 12463 }, { "epoch": 1.2671817812118746, "grad_norm": 0.2757997512817383, "learning_rate": 9.299828359445973e-06, "loss": 0.3747, "step": 12464 }, { "epoch": 1.2672834485563236, "grad_norm": 0.298031210899353, "learning_rate": 9.299647231887436e-06, "loss": 0.3669, "step": 12465 }, { "epoch": 1.2673851159007727, "grad_norm": 0.300274521112442, "learning_rate": 9.299466082668298e-06, "loss": 0.3727, "step": 12466 }, { "epoch": 1.2674867832452217, "grad_norm": 0.2964463531970978, "learning_rate": 9.299284911789472e-06, "loss": 0.3489, "step": 12467 }, { "epoch": 1.2675884505896706, "grad_norm": 0.2792879045009613, "learning_rate": 9.29910371925187e-06, "loss": 0.3684, "step": 12468 }, { "epoch": 1.2676901179341196, "grad_norm": 0.30782708525657654, "learning_rate": 9.298922505056405e-06, "loss": 0.3665, "step": 12469 }, { "epoch": 1.2677917852785685, "grad_norm": 0.29384511709213257, "learning_rate": 9.298741269203991e-06, "loss": 0.3739, "step": 12470 }, { "epoch": 1.2678934526230174, "grad_norm": 0.31049880385398865, "learning_rate": 9.298560011695538e-06, "loss": 0.3322, "step": 12471 }, { "epoch": 1.2679951199674664, "grad_norm": 0.31410032510757446, "learning_rate": 9.298378732531962e-06, "loss": 0.3506, "step": 12472 }, { "epoch": 1.2680967873119153, "grad_norm": 0.2989375591278076, "learning_rate": 9.298197431714176e-06, "loss": 0.3416, "step": 12473 }, { "epoch": 1.2681984546563645, "grad_norm": 0.2867048680782318, "learning_rate": 9.298016109243091e-06, "loss": 0.3515, "step": 12474 }, { "epoch": 1.2683001220008134, "grad_norm": 0.31088513135910034, "learning_rate": 9.297834765119624e-06, "loss": 0.3642, "step": 12475 }, { "epoch": 1.2684017893452624, "grad_norm": 0.30610018968582153, "learning_rate": 9.297653399344686e-06, "loss": 0.3905, "step": 12476 }, { "epoch": 1.2685034566897113, "grad_norm": 0.284111350774765, "learning_rate": 9.297472011919192e-06, "loss": 0.3749, "step": 12477 }, { "epoch": 1.2686051240341603, "grad_norm": 0.2924377918243408, "learning_rate": 9.297290602844056e-06, "loss": 0.3844, "step": 12478 }, { "epoch": 1.2687067913786092, "grad_norm": 0.3332768380641937, "learning_rate": 9.297109172120189e-06, "loss": 0.4019, "step": 12479 }, { "epoch": 1.2688084587230581, "grad_norm": 0.28865569829940796, "learning_rate": 9.296927719748508e-06, "loss": 0.3326, "step": 12480 }, { "epoch": 1.268910126067507, "grad_norm": 0.28639402985572815, "learning_rate": 9.296746245729926e-06, "loss": 0.3635, "step": 12481 }, { "epoch": 1.269011793411956, "grad_norm": 0.2921423017978668, "learning_rate": 9.296564750065359e-06, "loss": 0.3685, "step": 12482 }, { "epoch": 1.269113460756405, "grad_norm": 0.32161858677864075, "learning_rate": 9.29638323275572e-06, "loss": 0.351, "step": 12483 }, { "epoch": 1.269215128100854, "grad_norm": 0.3071695864200592, "learning_rate": 9.296201693801921e-06, "loss": 0.3531, "step": 12484 }, { "epoch": 1.2693167954453028, "grad_norm": 0.3018084466457367, "learning_rate": 9.29602013320488e-06, "loss": 0.3631, "step": 12485 }, { "epoch": 1.269418462789752, "grad_norm": 0.27823126316070557, "learning_rate": 9.29583855096551e-06, "loss": 0.3319, "step": 12486 }, { "epoch": 1.269520130134201, "grad_norm": 0.3189076781272888, "learning_rate": 9.295656947084726e-06, "loss": 0.3594, "step": 12487 }, { "epoch": 1.26962179747865, "grad_norm": 0.31373608112335205, "learning_rate": 9.295475321563444e-06, "loss": 0.3447, "step": 12488 }, { "epoch": 1.2697234648230988, "grad_norm": 0.30835598707199097, "learning_rate": 9.295293674402577e-06, "loss": 0.3739, "step": 12489 }, { "epoch": 1.2698251321675478, "grad_norm": 0.27791836857795715, "learning_rate": 9.295112005603041e-06, "loss": 0.3449, "step": 12490 }, { "epoch": 1.2699267995119967, "grad_norm": 0.2987024784088135, "learning_rate": 9.294930315165754e-06, "loss": 0.3584, "step": 12491 }, { "epoch": 1.2700284668564457, "grad_norm": 0.3003759980201721, "learning_rate": 9.294748603091626e-06, "loss": 0.3428, "step": 12492 }, { "epoch": 1.2701301342008946, "grad_norm": 0.2988660931587219, "learning_rate": 9.294566869381577e-06, "loss": 0.383, "step": 12493 }, { "epoch": 1.2702318015453438, "grad_norm": 0.27340972423553467, "learning_rate": 9.29438511403652e-06, "loss": 0.3511, "step": 12494 }, { "epoch": 1.2703334688897927, "grad_norm": 0.2837582230567932, "learning_rate": 9.29420333705737e-06, "loss": 0.3603, "step": 12495 }, { "epoch": 1.2704351362342416, "grad_norm": 0.3062903583049774, "learning_rate": 9.294021538445047e-06, "loss": 0.3525, "step": 12496 }, { "epoch": 1.2705368035786906, "grad_norm": 0.27746322751045227, "learning_rate": 9.293839718200462e-06, "loss": 0.3162, "step": 12497 }, { "epoch": 1.2706384709231395, "grad_norm": 0.27938005328178406, "learning_rate": 9.293657876324532e-06, "loss": 0.3852, "step": 12498 }, { "epoch": 1.2707401382675885, "grad_norm": 0.314527690410614, "learning_rate": 9.293476012818174e-06, "loss": 0.4026, "step": 12499 }, { "epoch": 1.2708418056120374, "grad_norm": 0.3220672011375427, "learning_rate": 9.293294127682307e-06, "loss": 0.3516, "step": 12500 }, { "epoch": 1.2709434729564864, "grad_norm": 0.287199467420578, "learning_rate": 9.293112220917841e-06, "loss": 0.3595, "step": 12501 }, { "epoch": 1.2710451403009353, "grad_norm": 0.2896151840686798, "learning_rate": 9.292930292525697e-06, "loss": 0.3614, "step": 12502 }, { "epoch": 1.2711468076453842, "grad_norm": 0.2971341609954834, "learning_rate": 9.292748342506791e-06, "loss": 0.3654, "step": 12503 }, { "epoch": 1.2712484749898332, "grad_norm": 0.2699001133441925, "learning_rate": 9.292566370862038e-06, "loss": 0.3754, "step": 12504 }, { "epoch": 1.2713501423342821, "grad_norm": 0.2789750099182129, "learning_rate": 9.292384377592355e-06, "loss": 0.3606, "step": 12505 }, { "epoch": 1.271451809678731, "grad_norm": 0.31246402859687805, "learning_rate": 9.292202362698664e-06, "loss": 0.3864, "step": 12506 }, { "epoch": 1.2715534770231802, "grad_norm": 0.28277525305747986, "learning_rate": 9.292020326181873e-06, "loss": 0.3529, "step": 12507 }, { "epoch": 1.2716551443676292, "grad_norm": 0.3080175518989563, "learning_rate": 9.291838268042906e-06, "loss": 0.39, "step": 12508 }, { "epoch": 1.271756811712078, "grad_norm": 0.2887183427810669, "learning_rate": 9.291656188282675e-06, "loss": 0.3464, "step": 12509 }, { "epoch": 1.271858479056527, "grad_norm": 0.2929425835609436, "learning_rate": 9.291474086902102e-06, "loss": 0.3426, "step": 12510 }, { "epoch": 1.271960146400976, "grad_norm": 0.3128879964351654, "learning_rate": 9.291291963902102e-06, "loss": 0.3948, "step": 12511 }, { "epoch": 1.272061813745425, "grad_norm": 0.2846209406852722, "learning_rate": 9.291109819283592e-06, "loss": 0.3653, "step": 12512 }, { "epoch": 1.2721634810898739, "grad_norm": 0.2984684407711029, "learning_rate": 9.290927653047491e-06, "loss": 0.4084, "step": 12513 }, { "epoch": 1.2722651484343228, "grad_norm": 0.3061559200286865, "learning_rate": 9.290745465194717e-06, "loss": 0.3595, "step": 12514 }, { "epoch": 1.272366815778772, "grad_norm": 0.27261826395988464, "learning_rate": 9.290563255726187e-06, "loss": 0.3852, "step": 12515 }, { "epoch": 1.272468483123221, "grad_norm": 0.31553414463996887, "learning_rate": 9.290381024642817e-06, "loss": 0.3331, "step": 12516 }, { "epoch": 1.2725701504676699, "grad_norm": 0.2729276418685913, "learning_rate": 9.290198771945529e-06, "loss": 0.3641, "step": 12517 }, { "epoch": 1.2726718178121188, "grad_norm": 0.31224524974823, "learning_rate": 9.290016497635237e-06, "loss": 0.3941, "step": 12518 }, { "epoch": 1.2727734851565677, "grad_norm": 0.28215762972831726, "learning_rate": 9.289834201712863e-06, "loss": 0.3591, "step": 12519 }, { "epoch": 1.2728751525010167, "grad_norm": 0.2971286475658417, "learning_rate": 9.289651884179324e-06, "loss": 0.3463, "step": 12520 }, { "epoch": 1.2729768198454656, "grad_norm": 0.33802881836891174, "learning_rate": 9.289469545035537e-06, "loss": 0.3436, "step": 12521 }, { "epoch": 1.2730784871899146, "grad_norm": 0.2904939651489258, "learning_rate": 9.289287184282423e-06, "loss": 0.3827, "step": 12522 }, { "epoch": 1.2731801545343635, "grad_norm": 0.2970266342163086, "learning_rate": 9.289104801920899e-06, "loss": 0.354, "step": 12523 }, { "epoch": 1.2732818218788124, "grad_norm": 0.30013537406921387, "learning_rate": 9.288922397951883e-06, "loss": 0.3601, "step": 12524 }, { "epoch": 1.2733834892232614, "grad_norm": 0.3119872212409973, "learning_rate": 9.288739972376296e-06, "loss": 0.3589, "step": 12525 }, { "epoch": 1.2734851565677103, "grad_norm": 0.28535354137420654, "learning_rate": 9.288557525195056e-06, "loss": 0.3798, "step": 12526 }, { "epoch": 1.2735868239121595, "grad_norm": 0.275054007768631, "learning_rate": 9.288375056409083e-06, "loss": 0.3994, "step": 12527 }, { "epoch": 1.2736884912566084, "grad_norm": 0.3133619725704193, "learning_rate": 9.288192566019294e-06, "loss": 0.3783, "step": 12528 }, { "epoch": 1.2737901586010574, "grad_norm": 0.3164253830909729, "learning_rate": 9.288010054026613e-06, "loss": 0.3389, "step": 12529 }, { "epoch": 1.2738918259455063, "grad_norm": 0.2859901785850525, "learning_rate": 9.287827520431954e-06, "loss": 0.3949, "step": 12530 }, { "epoch": 1.2739934932899553, "grad_norm": 0.30594322085380554, "learning_rate": 9.287644965236239e-06, "loss": 0.3579, "step": 12531 }, { "epoch": 1.2740951606344042, "grad_norm": 0.3138011693954468, "learning_rate": 9.287462388440388e-06, "loss": 0.3772, "step": 12532 }, { "epoch": 1.2741968279788531, "grad_norm": 0.29949408769607544, "learning_rate": 9.28727979004532e-06, "loss": 0.3576, "step": 12533 }, { "epoch": 1.274298495323302, "grad_norm": 0.2996273934841156, "learning_rate": 9.287097170051955e-06, "loss": 0.3605, "step": 12534 }, { "epoch": 1.2744001626677512, "grad_norm": 0.29904013872146606, "learning_rate": 9.286914528461216e-06, "loss": 0.3465, "step": 12535 }, { "epoch": 1.2745018300122002, "grad_norm": 0.30677613615989685, "learning_rate": 9.286731865274017e-06, "loss": 0.4058, "step": 12536 }, { "epoch": 1.2746034973566491, "grad_norm": 0.2957547605037689, "learning_rate": 9.286549180491282e-06, "loss": 0.3445, "step": 12537 }, { "epoch": 1.274705164701098, "grad_norm": 0.2709047198295593, "learning_rate": 9.286366474113931e-06, "loss": 0.3142, "step": 12538 }, { "epoch": 1.274806832045547, "grad_norm": 0.3000176250934601, "learning_rate": 9.286183746142885e-06, "loss": 0.4283, "step": 12539 }, { "epoch": 1.274908499389996, "grad_norm": 0.30469396710395813, "learning_rate": 9.286000996579063e-06, "loss": 0.3996, "step": 12540 }, { "epoch": 1.275010166734445, "grad_norm": 0.28271812200546265, "learning_rate": 9.285818225423388e-06, "loss": 0.3936, "step": 12541 }, { "epoch": 1.2751118340788938, "grad_norm": 0.2847056984901428, "learning_rate": 9.285635432676778e-06, "loss": 0.3873, "step": 12542 }, { "epoch": 1.2752135014233428, "grad_norm": 0.2878175377845764, "learning_rate": 9.285452618340155e-06, "loss": 0.3692, "step": 12543 }, { "epoch": 1.2753151687677917, "grad_norm": 0.298035591840744, "learning_rate": 9.285269782414441e-06, "loss": 0.3651, "step": 12544 }, { "epoch": 1.2754168361122407, "grad_norm": 0.2936859130859375, "learning_rate": 9.285086924900554e-06, "loss": 0.3438, "step": 12545 }, { "epoch": 1.2755185034566896, "grad_norm": 0.28689223527908325, "learning_rate": 9.284904045799421e-06, "loss": 0.359, "step": 12546 }, { "epoch": 1.2756201708011385, "grad_norm": 0.3069321811199188, "learning_rate": 9.284721145111957e-06, "loss": 0.4124, "step": 12547 }, { "epoch": 1.2757218381455877, "grad_norm": 0.31000468134880066, "learning_rate": 9.284538222839088e-06, "loss": 0.3491, "step": 12548 }, { "epoch": 1.2758235054900366, "grad_norm": 0.2854805886745453, "learning_rate": 9.284355278981732e-06, "loss": 0.3428, "step": 12549 }, { "epoch": 1.2759251728344856, "grad_norm": 0.3020777702331543, "learning_rate": 9.284172313540811e-06, "loss": 0.3563, "step": 12550 }, { "epoch": 1.2760268401789345, "grad_norm": 0.3153565526008606, "learning_rate": 9.283989326517252e-06, "loss": 0.3589, "step": 12551 }, { "epoch": 1.2761285075233835, "grad_norm": 0.3215333819389343, "learning_rate": 9.283806317911969e-06, "loss": 0.3754, "step": 12552 }, { "epoch": 1.2762301748678324, "grad_norm": 0.2786870002746582, "learning_rate": 9.28362328772589e-06, "loss": 0.4038, "step": 12553 }, { "epoch": 1.2763318422122814, "grad_norm": 0.29663708806037903, "learning_rate": 9.283440235959933e-06, "loss": 0.3531, "step": 12554 }, { "epoch": 1.2764335095567305, "grad_norm": 0.2869502305984497, "learning_rate": 9.283257162615022e-06, "loss": 0.3817, "step": 12555 }, { "epoch": 1.2765351769011795, "grad_norm": 0.29477083683013916, "learning_rate": 9.283074067692081e-06, "loss": 0.3891, "step": 12556 }, { "epoch": 1.2766368442456284, "grad_norm": 0.2912463843822479, "learning_rate": 9.28289095119203e-06, "loss": 0.3939, "step": 12557 }, { "epoch": 1.2767385115900773, "grad_norm": 0.29146531224250793, "learning_rate": 9.282707813115792e-06, "loss": 0.3918, "step": 12558 }, { "epoch": 1.2768401789345263, "grad_norm": 0.2909714877605438, "learning_rate": 9.28252465346429e-06, "loss": 0.3971, "step": 12559 }, { "epoch": 1.2769418462789752, "grad_norm": 0.2878850996494293, "learning_rate": 9.282341472238445e-06, "loss": 0.3846, "step": 12560 }, { "epoch": 1.2770435136234242, "grad_norm": 0.29975390434265137, "learning_rate": 9.282158269439184e-06, "loss": 0.3445, "step": 12561 }, { "epoch": 1.277145180967873, "grad_norm": 0.3064850866794586, "learning_rate": 9.281975045067425e-06, "loss": 0.3561, "step": 12562 }, { "epoch": 1.277246848312322, "grad_norm": 0.28906798362731934, "learning_rate": 9.281791799124093e-06, "loss": 0.355, "step": 12563 }, { "epoch": 1.277348515656771, "grad_norm": 0.28195926547050476, "learning_rate": 9.281608531610114e-06, "loss": 0.362, "step": 12564 }, { "epoch": 1.27745018300122, "grad_norm": 0.3040331304073334, "learning_rate": 9.281425242526408e-06, "loss": 0.3615, "step": 12565 }, { "epoch": 1.2775518503456689, "grad_norm": 0.2841624617576599, "learning_rate": 9.281241931873897e-06, "loss": 0.3515, "step": 12566 }, { "epoch": 1.2776535176901178, "grad_norm": 0.29189345240592957, "learning_rate": 9.281058599653507e-06, "loss": 0.3877, "step": 12567 }, { "epoch": 1.277755185034567, "grad_norm": 0.2822672128677368, "learning_rate": 9.280875245866163e-06, "loss": 0.3637, "step": 12568 }, { "epoch": 1.277856852379016, "grad_norm": 0.29157567024230957, "learning_rate": 9.280691870512786e-06, "loss": 0.3726, "step": 12569 }, { "epoch": 1.2779585197234649, "grad_norm": 0.31539610028266907, "learning_rate": 9.2805084735943e-06, "loss": 0.3574, "step": 12570 }, { "epoch": 1.2780601870679138, "grad_norm": 0.2942371070384979, "learning_rate": 9.28032505511163e-06, "loss": 0.3557, "step": 12571 }, { "epoch": 1.2781618544123627, "grad_norm": 0.30115410685539246, "learning_rate": 9.280141615065699e-06, "loss": 0.3845, "step": 12572 }, { "epoch": 1.2782635217568117, "grad_norm": 0.3028239607810974, "learning_rate": 9.279958153457433e-06, "loss": 0.3664, "step": 12573 }, { "epoch": 1.2783651891012606, "grad_norm": 0.29939329624176025, "learning_rate": 9.279774670287753e-06, "loss": 0.343, "step": 12574 }, { "epoch": 1.2784668564457096, "grad_norm": 0.3165009915828705, "learning_rate": 9.279591165557586e-06, "loss": 0.4025, "step": 12575 }, { "epoch": 1.2785685237901587, "grad_norm": 0.3140961229801178, "learning_rate": 9.279407639267856e-06, "loss": 0.3814, "step": 12576 }, { "epoch": 1.2786701911346077, "grad_norm": 0.3065696358680725, "learning_rate": 9.279224091419488e-06, "loss": 0.3507, "step": 12577 }, { "epoch": 1.2787718584790566, "grad_norm": 0.3163911998271942, "learning_rate": 9.279040522013403e-06, "loss": 0.3707, "step": 12578 }, { "epoch": 1.2788735258235056, "grad_norm": 0.29184383153915405, "learning_rate": 9.278856931050531e-06, "loss": 0.3872, "step": 12579 }, { "epoch": 1.2789751931679545, "grad_norm": 0.2923681139945984, "learning_rate": 9.278673318531794e-06, "loss": 0.3808, "step": 12580 }, { "epoch": 1.2790768605124034, "grad_norm": 0.30450037121772766, "learning_rate": 9.278489684458118e-06, "loss": 0.3705, "step": 12581 }, { "epoch": 1.2791785278568524, "grad_norm": 0.2679387629032135, "learning_rate": 9.278306028830429e-06, "loss": 0.3635, "step": 12582 }, { "epoch": 1.2792801952013013, "grad_norm": 0.3037461042404175, "learning_rate": 9.27812235164965e-06, "loss": 0.3853, "step": 12583 }, { "epoch": 1.2793818625457503, "grad_norm": 0.3069087862968445, "learning_rate": 9.277938652916706e-06, "loss": 0.3996, "step": 12584 }, { "epoch": 1.2794835298901992, "grad_norm": 0.31284356117248535, "learning_rate": 9.277754932632523e-06, "loss": 0.3502, "step": 12585 }, { "epoch": 1.2795851972346481, "grad_norm": 0.31915560364723206, "learning_rate": 9.277571190798028e-06, "loss": 0.3924, "step": 12586 }, { "epoch": 1.279686864579097, "grad_norm": 0.3135499060153961, "learning_rate": 9.277387427414148e-06, "loss": 0.3409, "step": 12587 }, { "epoch": 1.279788531923546, "grad_norm": 0.28911519050598145, "learning_rate": 9.277203642481805e-06, "loss": 0.3469, "step": 12588 }, { "epoch": 1.2798901992679952, "grad_norm": 0.30488184094429016, "learning_rate": 9.277019836001927e-06, "loss": 0.3589, "step": 12589 }, { "epoch": 1.2799918666124441, "grad_norm": 0.3283770680427551, "learning_rate": 9.276836007975438e-06, "loss": 0.335, "step": 12590 }, { "epoch": 1.280093533956893, "grad_norm": 0.3031618297100067, "learning_rate": 9.276652158403268e-06, "loss": 0.3651, "step": 12591 }, { "epoch": 1.280195201301342, "grad_norm": 0.30837106704711914, "learning_rate": 9.27646828728634e-06, "loss": 0.3565, "step": 12592 }, { "epoch": 1.280296868645791, "grad_norm": 0.3469852805137634, "learning_rate": 9.276284394625578e-06, "loss": 0.3886, "step": 12593 }, { "epoch": 1.28039853599024, "grad_norm": 0.30042552947998047, "learning_rate": 9.276100480421915e-06, "loss": 0.3434, "step": 12594 }, { "epoch": 1.2805002033346888, "grad_norm": 0.29851841926574707, "learning_rate": 9.27591654467627e-06, "loss": 0.3449, "step": 12595 }, { "epoch": 1.280601870679138, "grad_norm": 0.3361906409263611, "learning_rate": 9.275732587389579e-06, "loss": 0.4197, "step": 12596 }, { "epoch": 1.280703538023587, "grad_norm": 0.3020058870315552, "learning_rate": 9.275548608562758e-06, "loss": 0.3652, "step": 12597 }, { "epoch": 1.2808052053680359, "grad_norm": 0.2871536910533905, "learning_rate": 9.275364608196743e-06, "loss": 0.3602, "step": 12598 }, { "epoch": 1.2809068727124848, "grad_norm": 0.2962985932826996, "learning_rate": 9.275180586292456e-06, "loss": 0.3922, "step": 12599 }, { "epoch": 1.2810085400569338, "grad_norm": 0.2909022867679596, "learning_rate": 9.274996542850825e-06, "loss": 0.3515, "step": 12600 }, { "epoch": 1.2811102074013827, "grad_norm": 0.2689923048019409, "learning_rate": 9.274812477872776e-06, "loss": 0.3735, "step": 12601 }, { "epoch": 1.2812118747458316, "grad_norm": 0.3080601692199707, "learning_rate": 9.27462839135924e-06, "loss": 0.3751, "step": 12602 }, { "epoch": 1.2813135420902806, "grad_norm": 0.2949630320072174, "learning_rate": 9.274444283311141e-06, "loss": 0.3082, "step": 12603 }, { "epoch": 1.2814152094347295, "grad_norm": 0.2856880724430084, "learning_rate": 9.274260153729406e-06, "loss": 0.3644, "step": 12604 }, { "epoch": 1.2815168767791785, "grad_norm": 0.275905966758728, "learning_rate": 9.274076002614964e-06, "loss": 0.3213, "step": 12605 }, { "epoch": 1.2816185441236274, "grad_norm": 0.3067518174648285, "learning_rate": 9.273891829968745e-06, "loss": 0.3554, "step": 12606 }, { "epoch": 1.2817202114680764, "grad_norm": 0.2831357419490814, "learning_rate": 9.273707635791673e-06, "loss": 0.3644, "step": 12607 }, { "epoch": 1.2818218788125253, "grad_norm": 0.2825385630130768, "learning_rate": 9.273523420084679e-06, "loss": 0.3679, "step": 12608 }, { "epoch": 1.2819235461569745, "grad_norm": 0.2823750972747803, "learning_rate": 9.273339182848689e-06, "loss": 0.3253, "step": 12609 }, { "epoch": 1.2820252135014234, "grad_norm": 0.2911403775215149, "learning_rate": 9.273154924084631e-06, "loss": 0.3737, "step": 12610 }, { "epoch": 1.2821268808458723, "grad_norm": 0.2891060709953308, "learning_rate": 9.272970643793435e-06, "loss": 0.3593, "step": 12611 }, { "epoch": 1.2822285481903213, "grad_norm": 0.2699737846851349, "learning_rate": 9.272786341976027e-06, "loss": 0.3502, "step": 12612 }, { "epoch": 1.2823302155347702, "grad_norm": 0.28481289744377136, "learning_rate": 9.272602018633338e-06, "loss": 0.3389, "step": 12613 }, { "epoch": 1.2824318828792192, "grad_norm": 0.3051776587963104, "learning_rate": 9.272417673766294e-06, "loss": 0.3744, "step": 12614 }, { "epoch": 1.282533550223668, "grad_norm": 0.288471519947052, "learning_rate": 9.272233307375827e-06, "loss": 0.3617, "step": 12615 }, { "epoch": 1.282635217568117, "grad_norm": 0.2705792486667633, "learning_rate": 9.272048919462862e-06, "loss": 0.361, "step": 12616 }, { "epoch": 1.2827368849125662, "grad_norm": 0.2880890965461731, "learning_rate": 9.27186451002833e-06, "loss": 0.3144, "step": 12617 }, { "epoch": 1.2828385522570152, "grad_norm": 0.3135104775428772, "learning_rate": 9.27168007907316e-06, "loss": 0.3885, "step": 12618 }, { "epoch": 1.282940219601464, "grad_norm": 0.30424764752388, "learning_rate": 9.271495626598281e-06, "loss": 0.3422, "step": 12619 }, { "epoch": 1.283041886945913, "grad_norm": 0.31175705790519714, "learning_rate": 9.271311152604623e-06, "loss": 0.3726, "step": 12620 }, { "epoch": 1.283143554290362, "grad_norm": 0.277756929397583, "learning_rate": 9.271126657093113e-06, "loss": 0.3572, "step": 12621 }, { "epoch": 1.283245221634811, "grad_norm": 0.27689144015312195, "learning_rate": 9.270942140064683e-06, "loss": 0.3513, "step": 12622 }, { "epoch": 1.2833468889792599, "grad_norm": 0.2925463616847992, "learning_rate": 9.27075760152026e-06, "loss": 0.3817, "step": 12623 }, { "epoch": 1.2834485563237088, "grad_norm": 0.28940385580062866, "learning_rate": 9.270573041460775e-06, "loss": 0.3622, "step": 12624 }, { "epoch": 1.2835502236681577, "grad_norm": 0.29054495692253113, "learning_rate": 9.27038845988716e-06, "loss": 0.3613, "step": 12625 }, { "epoch": 1.2836518910126067, "grad_norm": 0.3037237823009491, "learning_rate": 9.270203856800341e-06, "loss": 0.3792, "step": 12626 }, { "epoch": 1.2837535583570556, "grad_norm": 0.2860110402107239, "learning_rate": 9.270019232201251e-06, "loss": 0.3753, "step": 12627 }, { "epoch": 1.2838552257015046, "grad_norm": 0.27442529797554016, "learning_rate": 9.269834586090818e-06, "loss": 0.3466, "step": 12628 }, { "epoch": 1.2839568930459535, "grad_norm": 0.3188028335571289, "learning_rate": 9.269649918469972e-06, "loss": 0.3825, "step": 12629 }, { "epoch": 1.2840585603904027, "grad_norm": 0.2949405610561371, "learning_rate": 9.269465229339647e-06, "loss": 0.4037, "step": 12630 }, { "epoch": 1.2841602277348516, "grad_norm": 0.2754268944263458, "learning_rate": 9.269280518700767e-06, "loss": 0.3377, "step": 12631 }, { "epoch": 1.2842618950793006, "grad_norm": 0.3115817904472351, "learning_rate": 9.26909578655427e-06, "loss": 0.3686, "step": 12632 }, { "epoch": 1.2843635624237495, "grad_norm": 0.28638187050819397, "learning_rate": 9.26891103290108e-06, "loss": 0.3395, "step": 12633 }, { "epoch": 1.2844652297681984, "grad_norm": 0.29223692417144775, "learning_rate": 9.268726257742134e-06, "loss": 0.3519, "step": 12634 }, { "epoch": 1.2845668971126474, "grad_norm": 0.28625887632369995, "learning_rate": 9.268541461078356e-06, "loss": 0.3556, "step": 12635 }, { "epoch": 1.2846685644570963, "grad_norm": 0.2976813018321991, "learning_rate": 9.268356642910683e-06, "loss": 0.3635, "step": 12636 }, { "epoch": 1.2847702318015455, "grad_norm": 0.27870017290115356, "learning_rate": 9.268171803240042e-06, "loss": 0.3649, "step": 12637 }, { "epoch": 1.2848718991459944, "grad_norm": 0.30324992537498474, "learning_rate": 9.267986942067366e-06, "loss": 0.3968, "step": 12638 }, { "epoch": 1.2849735664904434, "grad_norm": 0.29320287704467773, "learning_rate": 9.267802059393586e-06, "loss": 0.377, "step": 12639 }, { "epoch": 1.2850752338348923, "grad_norm": 0.2861759066581726, "learning_rate": 9.267617155219635e-06, "loss": 0.3617, "step": 12640 }, { "epoch": 1.2851769011793412, "grad_norm": 0.31333115696907043, "learning_rate": 9.26743222954644e-06, "loss": 0.3781, "step": 12641 }, { "epoch": 1.2852785685237902, "grad_norm": 0.29372552037239075, "learning_rate": 9.267247282374938e-06, "loss": 0.3657, "step": 12642 }, { "epoch": 1.2853802358682391, "grad_norm": 0.2846826910972595, "learning_rate": 9.267062313706056e-06, "loss": 0.3801, "step": 12643 }, { "epoch": 1.285481903212688, "grad_norm": 0.31776365637779236, "learning_rate": 9.26687732354073e-06, "loss": 0.4128, "step": 12644 }, { "epoch": 1.285583570557137, "grad_norm": 0.2718215882778168, "learning_rate": 9.26669231187989e-06, "loss": 0.3394, "step": 12645 }, { "epoch": 1.285685237901586, "grad_norm": 0.293066143989563, "learning_rate": 9.266507278724468e-06, "loss": 0.3825, "step": 12646 }, { "epoch": 1.285786905246035, "grad_norm": 0.2826823592185974, "learning_rate": 9.266322224075395e-06, "loss": 0.3724, "step": 12647 }, { "epoch": 1.2858885725904838, "grad_norm": 0.2766471207141876, "learning_rate": 9.266137147933607e-06, "loss": 0.3355, "step": 12648 }, { "epoch": 1.2859902399349328, "grad_norm": 0.2793286144733429, "learning_rate": 9.265952050300031e-06, "loss": 0.3415, "step": 12649 }, { "epoch": 1.286091907279382, "grad_norm": 0.27863776683807373, "learning_rate": 9.265766931175603e-06, "loss": 0.3608, "step": 12650 }, { "epoch": 1.2861935746238309, "grad_norm": 0.28805655241012573, "learning_rate": 9.265581790561255e-06, "loss": 0.3538, "step": 12651 }, { "epoch": 1.2862952419682798, "grad_norm": 0.2605736255645752, "learning_rate": 9.265396628457922e-06, "loss": 0.3653, "step": 12652 }, { "epoch": 1.2863969093127288, "grad_norm": 0.2890007495880127, "learning_rate": 9.265211444866531e-06, "loss": 0.3962, "step": 12653 }, { "epoch": 1.2864985766571777, "grad_norm": 0.2967246472835541, "learning_rate": 9.26502623978802e-06, "loss": 0.3922, "step": 12654 }, { "epoch": 1.2866002440016266, "grad_norm": 0.27239900827407837, "learning_rate": 9.26484101322332e-06, "loss": 0.3352, "step": 12655 }, { "epoch": 1.2867019113460756, "grad_norm": 0.2971140146255493, "learning_rate": 9.264655765173366e-06, "loss": 0.3713, "step": 12656 }, { "epoch": 1.2868035786905245, "grad_norm": 0.2785007655620575, "learning_rate": 9.264470495639087e-06, "loss": 0.3752, "step": 12657 }, { "epoch": 1.2869052460349737, "grad_norm": 0.31183871626853943, "learning_rate": 9.26428520462142e-06, "loss": 0.4078, "step": 12658 }, { "epoch": 1.2870069133794226, "grad_norm": 0.2861023545265198, "learning_rate": 9.264099892121296e-06, "loss": 0.3248, "step": 12659 }, { "epoch": 1.2871085807238716, "grad_norm": 0.2761324644088745, "learning_rate": 9.263914558139653e-06, "loss": 0.3204, "step": 12660 }, { "epoch": 1.2872102480683205, "grad_norm": 0.2980622351169586, "learning_rate": 9.26372920267742e-06, "loss": 0.3868, "step": 12661 }, { "epoch": 1.2873119154127695, "grad_norm": 0.33641988039016724, "learning_rate": 9.263543825735533e-06, "loss": 0.3955, "step": 12662 }, { "epoch": 1.2874135827572184, "grad_norm": 0.2970842719078064, "learning_rate": 9.263358427314925e-06, "loss": 0.3445, "step": 12663 }, { "epoch": 1.2875152501016673, "grad_norm": 0.3156687617301941, "learning_rate": 9.26317300741653e-06, "loss": 0.3817, "step": 12664 }, { "epoch": 1.2876169174461163, "grad_norm": 0.31869086623191833, "learning_rate": 9.262987566041284e-06, "loss": 0.3959, "step": 12665 }, { "epoch": 1.2877185847905652, "grad_norm": 0.29144078493118286, "learning_rate": 9.262802103190118e-06, "loss": 0.3638, "step": 12666 }, { "epoch": 1.2878202521350142, "grad_norm": 0.2882169783115387, "learning_rate": 9.26261661886397e-06, "loss": 0.3678, "step": 12667 }, { "epoch": 1.287921919479463, "grad_norm": 0.30551812052726746, "learning_rate": 9.26243111306377e-06, "loss": 0.3639, "step": 12668 }, { "epoch": 1.288023586823912, "grad_norm": 0.2931220531463623, "learning_rate": 9.262245585790457e-06, "loss": 0.3618, "step": 12669 }, { "epoch": 1.288125254168361, "grad_norm": 0.28865042328834534, "learning_rate": 9.262060037044962e-06, "loss": 0.342, "step": 12670 }, { "epoch": 1.2882269215128102, "grad_norm": 0.317562073469162, "learning_rate": 9.261874466828222e-06, "loss": 0.3561, "step": 12671 }, { "epoch": 1.288328588857259, "grad_norm": 0.31515219807624817, "learning_rate": 9.261688875141173e-06, "loss": 0.3618, "step": 12672 }, { "epoch": 1.288430256201708, "grad_norm": 0.29949331283569336, "learning_rate": 9.261503261984745e-06, "loss": 0.382, "step": 12673 }, { "epoch": 1.288531923546157, "grad_norm": 0.3272647559642792, "learning_rate": 9.261317627359877e-06, "loss": 0.4038, "step": 12674 }, { "epoch": 1.288633590890606, "grad_norm": 0.2722317576408386, "learning_rate": 9.261131971267505e-06, "loss": 0.3397, "step": 12675 }, { "epoch": 1.2887352582350549, "grad_norm": 0.27575984597206116, "learning_rate": 9.260946293708562e-06, "loss": 0.3738, "step": 12676 }, { "epoch": 1.2888369255795038, "grad_norm": 0.3249792158603668, "learning_rate": 9.260760594683984e-06, "loss": 0.3838, "step": 12677 }, { "epoch": 1.288938592923953, "grad_norm": 0.3119722902774811, "learning_rate": 9.260574874194708e-06, "loss": 0.3854, "step": 12678 }, { "epoch": 1.289040260268402, "grad_norm": 0.30105558037757874, "learning_rate": 9.260389132241666e-06, "loss": 0.4074, "step": 12679 }, { "epoch": 1.2891419276128508, "grad_norm": 0.2826152741909027, "learning_rate": 9.260203368825797e-06, "loss": 0.3753, "step": 12680 }, { "epoch": 1.2892435949572998, "grad_norm": 0.3052076995372772, "learning_rate": 9.260017583948035e-06, "loss": 0.3579, "step": 12681 }, { "epoch": 1.2893452623017487, "grad_norm": 0.29184719920158386, "learning_rate": 9.259831777609318e-06, "loss": 0.4125, "step": 12682 }, { "epoch": 1.2894469296461977, "grad_norm": 0.304788738489151, "learning_rate": 9.25964594981058e-06, "loss": 0.3531, "step": 12683 }, { "epoch": 1.2895485969906466, "grad_norm": 0.30242919921875, "learning_rate": 9.259460100552759e-06, "loss": 0.3702, "step": 12684 }, { "epoch": 1.2896502643350956, "grad_norm": 0.3041332960128784, "learning_rate": 9.25927422983679e-06, "loss": 0.3677, "step": 12685 }, { "epoch": 1.2897519316795445, "grad_norm": 0.2935543656349182, "learning_rate": 9.25908833766361e-06, "loss": 0.3679, "step": 12686 }, { "epoch": 1.2898535990239934, "grad_norm": 0.29605740308761597, "learning_rate": 9.258902424034152e-06, "loss": 0.3551, "step": 12687 }, { "epoch": 1.2899552663684424, "grad_norm": 0.2635168135166168, "learning_rate": 9.258716488949358e-06, "loss": 0.3349, "step": 12688 }, { "epoch": 1.2900569337128913, "grad_norm": 0.2887948751449585, "learning_rate": 9.258530532410162e-06, "loss": 0.3545, "step": 12689 }, { "epoch": 1.2901586010573403, "grad_norm": 0.31397876143455505, "learning_rate": 9.258344554417502e-06, "loss": 0.4112, "step": 12690 }, { "epoch": 1.2902602684017894, "grad_norm": 0.2814549505710602, "learning_rate": 9.258158554972313e-06, "loss": 0.3482, "step": 12691 }, { "epoch": 1.2903619357462384, "grad_norm": 0.2850002348423004, "learning_rate": 9.257972534075534e-06, "loss": 0.3418, "step": 12692 }, { "epoch": 1.2904636030906873, "grad_norm": 0.30672264099121094, "learning_rate": 9.257786491728101e-06, "loss": 0.3627, "step": 12693 }, { "epoch": 1.2905652704351362, "grad_norm": 0.302899569272995, "learning_rate": 9.257600427930951e-06, "loss": 0.3632, "step": 12694 }, { "epoch": 1.2906669377795852, "grad_norm": 0.3070930242538452, "learning_rate": 9.257414342685021e-06, "loss": 0.3311, "step": 12695 }, { "epoch": 1.2907686051240341, "grad_norm": 0.27804842591285706, "learning_rate": 9.257228235991251e-06, "loss": 0.3317, "step": 12696 }, { "epoch": 1.290870272468483, "grad_norm": 0.28068000078201294, "learning_rate": 9.257042107850575e-06, "loss": 0.3632, "step": 12697 }, { "epoch": 1.290971939812932, "grad_norm": 0.3037705421447754, "learning_rate": 9.256855958263935e-06, "loss": 0.3574, "step": 12698 }, { "epoch": 1.2910736071573812, "grad_norm": 0.3066309690475464, "learning_rate": 9.256669787232265e-06, "loss": 0.3689, "step": 12699 }, { "epoch": 1.2911752745018301, "grad_norm": 0.31450337171554565, "learning_rate": 9.256483594756502e-06, "loss": 0.3586, "step": 12700 }, { "epoch": 1.291276941846279, "grad_norm": 0.29993101954460144, "learning_rate": 9.256297380837589e-06, "loss": 0.3691, "step": 12701 }, { "epoch": 1.291378609190728, "grad_norm": 0.3367871344089508, "learning_rate": 9.256111145476461e-06, "loss": 0.4061, "step": 12702 }, { "epoch": 1.291480276535177, "grad_norm": 0.32215356826782227, "learning_rate": 9.255924888674056e-06, "loss": 0.3812, "step": 12703 }, { "epoch": 1.2915819438796259, "grad_norm": 0.2909443974494934, "learning_rate": 9.255738610431311e-06, "loss": 0.3848, "step": 12704 }, { "epoch": 1.2916836112240748, "grad_norm": 0.2894515097141266, "learning_rate": 9.255552310749168e-06, "loss": 0.3495, "step": 12705 }, { "epoch": 1.2917852785685238, "grad_norm": 0.29291248321533203, "learning_rate": 9.255365989628564e-06, "loss": 0.3678, "step": 12706 }, { "epoch": 1.2918869459129727, "grad_norm": 0.3000923991203308, "learning_rate": 9.255179647070437e-06, "loss": 0.356, "step": 12707 }, { "epoch": 1.2919886132574216, "grad_norm": 0.3253907263278961, "learning_rate": 9.254993283075725e-06, "loss": 0.3706, "step": 12708 }, { "epoch": 1.2920902806018706, "grad_norm": 0.31351345777511597, "learning_rate": 9.25480689764537e-06, "loss": 0.3745, "step": 12709 }, { "epoch": 1.2921919479463195, "grad_norm": 0.3031923472881317, "learning_rate": 9.254620490780308e-06, "loss": 0.3901, "step": 12710 }, { "epoch": 1.2922936152907685, "grad_norm": 0.28108280897140503, "learning_rate": 9.254434062481479e-06, "loss": 0.3328, "step": 12711 }, { "epoch": 1.2923952826352176, "grad_norm": 0.28888651728630066, "learning_rate": 9.254247612749821e-06, "loss": 0.333, "step": 12712 }, { "epoch": 1.2924969499796666, "grad_norm": 0.28737515211105347, "learning_rate": 9.254061141586275e-06, "loss": 0.3429, "step": 12713 }, { "epoch": 1.2925986173241155, "grad_norm": 0.2930344343185425, "learning_rate": 9.25387464899178e-06, "loss": 0.3585, "step": 12714 }, { "epoch": 1.2927002846685645, "grad_norm": 0.2853774130344391, "learning_rate": 9.253688134967278e-06, "loss": 0.3814, "step": 12715 }, { "epoch": 1.2928019520130134, "grad_norm": 0.30581235885620117, "learning_rate": 9.253501599513703e-06, "loss": 0.3773, "step": 12716 }, { "epoch": 1.2929036193574623, "grad_norm": 0.29298681020736694, "learning_rate": 9.253315042631998e-06, "loss": 0.3856, "step": 12717 }, { "epoch": 1.2930052867019113, "grad_norm": 0.299776554107666, "learning_rate": 9.253128464323105e-06, "loss": 0.38, "step": 12718 }, { "epoch": 1.2931069540463604, "grad_norm": 0.2879783809185028, "learning_rate": 9.252941864587959e-06, "loss": 0.3342, "step": 12719 }, { "epoch": 1.2932086213908094, "grad_norm": 0.29292920231819153, "learning_rate": 9.252755243427502e-06, "loss": 0.3278, "step": 12720 }, { "epoch": 1.2933102887352583, "grad_norm": 0.30518820881843567, "learning_rate": 9.252568600842678e-06, "loss": 0.3467, "step": 12721 }, { "epoch": 1.2934119560797073, "grad_norm": 0.27291423082351685, "learning_rate": 9.252381936834421e-06, "loss": 0.358, "step": 12722 }, { "epoch": 1.2935136234241562, "grad_norm": 0.29820016026496887, "learning_rate": 9.252195251403676e-06, "loss": 0.3473, "step": 12723 }, { "epoch": 1.2936152907686052, "grad_norm": 0.3018701672554016, "learning_rate": 9.25200854455138e-06, "loss": 0.3799, "step": 12724 }, { "epoch": 1.293716958113054, "grad_norm": 0.2836188077926636, "learning_rate": 9.251821816278476e-06, "loss": 0.3473, "step": 12725 }, { "epoch": 1.293818625457503, "grad_norm": 0.271637499332428, "learning_rate": 9.251635066585904e-06, "loss": 0.3185, "step": 12726 }, { "epoch": 1.293920292801952, "grad_norm": 0.30140891671180725, "learning_rate": 9.251448295474607e-06, "loss": 0.3584, "step": 12727 }, { "epoch": 1.294021960146401, "grad_norm": 0.29694902896881104, "learning_rate": 9.251261502945522e-06, "loss": 0.3446, "step": 12728 }, { "epoch": 1.2941236274908499, "grad_norm": 0.32886803150177, "learning_rate": 9.251074688999592e-06, "loss": 0.3806, "step": 12729 }, { "epoch": 1.2942252948352988, "grad_norm": 0.26776957511901855, "learning_rate": 9.250887853637757e-06, "loss": 0.3503, "step": 12730 }, { "epoch": 1.2943269621797477, "grad_norm": 0.30700963735580444, "learning_rate": 9.25070099686096e-06, "loss": 0.3826, "step": 12731 }, { "epoch": 1.294428629524197, "grad_norm": 0.27101150155067444, "learning_rate": 9.250514118670142e-06, "loss": 0.3689, "step": 12732 }, { "epoch": 1.2945302968686458, "grad_norm": 0.26959073543548584, "learning_rate": 9.250327219066245e-06, "loss": 0.3431, "step": 12733 }, { "epoch": 1.2946319642130948, "grad_norm": 0.3019293546676636, "learning_rate": 9.250140298050207e-06, "loss": 0.3579, "step": 12734 }, { "epoch": 1.2947336315575437, "grad_norm": 0.30324500799179077, "learning_rate": 9.249953355622974e-06, "loss": 0.3608, "step": 12735 }, { "epoch": 1.2948352989019927, "grad_norm": 0.2828615605831146, "learning_rate": 9.249766391785484e-06, "loss": 0.3743, "step": 12736 }, { "epoch": 1.2949369662464416, "grad_norm": 0.2911299467086792, "learning_rate": 9.249579406538682e-06, "loss": 0.3679, "step": 12737 }, { "epoch": 1.2950386335908906, "grad_norm": 0.339876651763916, "learning_rate": 9.249392399883507e-06, "loss": 0.3711, "step": 12738 }, { "epoch": 1.2951403009353395, "grad_norm": 0.29732605814933777, "learning_rate": 9.249205371820906e-06, "loss": 0.3389, "step": 12739 }, { "epoch": 1.2952419682797887, "grad_norm": 0.2909485995769501, "learning_rate": 9.249018322351817e-06, "loss": 0.3562, "step": 12740 }, { "epoch": 1.2953436356242376, "grad_norm": 0.29600003361701965, "learning_rate": 9.248831251477182e-06, "loss": 0.3509, "step": 12741 }, { "epoch": 1.2954453029686865, "grad_norm": 0.2898816466331482, "learning_rate": 9.248644159197947e-06, "loss": 0.3524, "step": 12742 }, { "epoch": 1.2955469703131355, "grad_norm": 0.30897170305252075, "learning_rate": 9.248457045515051e-06, "loss": 0.3458, "step": 12743 }, { "epoch": 1.2956486376575844, "grad_norm": 0.2850162386894226, "learning_rate": 9.24826991042944e-06, "loss": 0.3351, "step": 12744 }, { "epoch": 1.2957503050020334, "grad_norm": 0.3021424412727356, "learning_rate": 9.248082753942052e-06, "loss": 0.3635, "step": 12745 }, { "epoch": 1.2958519723464823, "grad_norm": 0.308479905128479, "learning_rate": 9.247895576053834e-06, "loss": 0.3996, "step": 12746 }, { "epoch": 1.2959536396909312, "grad_norm": 0.2779332995414734, "learning_rate": 9.247708376765727e-06, "loss": 0.3542, "step": 12747 }, { "epoch": 1.2960553070353802, "grad_norm": 0.31240972876548767, "learning_rate": 9.247521156078673e-06, "loss": 0.3903, "step": 12748 }, { "epoch": 1.2961569743798291, "grad_norm": 0.2864086627960205, "learning_rate": 9.24733391399362e-06, "loss": 0.367, "step": 12749 }, { "epoch": 1.296258641724278, "grad_norm": 0.2984943091869354, "learning_rate": 9.247146650511508e-06, "loss": 0.3631, "step": 12750 }, { "epoch": 1.296360309068727, "grad_norm": 0.3004455864429474, "learning_rate": 9.246959365633277e-06, "loss": 0.3625, "step": 12751 }, { "epoch": 1.296461976413176, "grad_norm": 0.3051648437976837, "learning_rate": 9.246772059359877e-06, "loss": 0.345, "step": 12752 }, { "epoch": 1.2965636437576251, "grad_norm": 0.2824643850326538, "learning_rate": 9.246584731692247e-06, "loss": 0.3811, "step": 12753 }, { "epoch": 1.296665311102074, "grad_norm": 0.32244423031806946, "learning_rate": 9.246397382631334e-06, "loss": 0.3694, "step": 12754 }, { "epoch": 1.296766978446523, "grad_norm": 0.2987682819366455, "learning_rate": 9.246210012178078e-06, "loss": 0.3509, "step": 12755 }, { "epoch": 1.296868645790972, "grad_norm": 0.28525009751319885, "learning_rate": 9.246022620333426e-06, "loss": 0.3369, "step": 12756 }, { "epoch": 1.2969703131354209, "grad_norm": 0.30401551723480225, "learning_rate": 9.24583520709832e-06, "loss": 0.3504, "step": 12757 }, { "epoch": 1.2970719804798698, "grad_norm": 0.2890906035900116, "learning_rate": 9.245647772473708e-06, "loss": 0.342, "step": 12758 }, { "epoch": 1.2971736478243188, "grad_norm": 0.26554715633392334, "learning_rate": 9.245460316460529e-06, "loss": 0.387, "step": 12759 }, { "epoch": 1.297275315168768, "grad_norm": 0.2808889150619507, "learning_rate": 9.24527283905973e-06, "loss": 0.3446, "step": 12760 }, { "epoch": 1.2973769825132169, "grad_norm": 0.3155176639556885, "learning_rate": 9.245085340272255e-06, "loss": 0.3817, "step": 12761 }, { "epoch": 1.2974786498576658, "grad_norm": 0.33860623836517334, "learning_rate": 9.24489782009905e-06, "loss": 0.3701, "step": 12762 }, { "epoch": 1.2975803172021148, "grad_norm": 0.28788700699806213, "learning_rate": 9.244710278541058e-06, "loss": 0.3785, "step": 12763 }, { "epoch": 1.2976819845465637, "grad_norm": 0.2738306224346161, "learning_rate": 9.244522715599224e-06, "loss": 0.373, "step": 12764 }, { "epoch": 1.2977836518910126, "grad_norm": 0.3097994923591614, "learning_rate": 9.244335131274494e-06, "loss": 0.3821, "step": 12765 }, { "epoch": 1.2978853192354616, "grad_norm": 0.3020177483558655, "learning_rate": 9.244147525567813e-06, "loss": 0.3695, "step": 12766 }, { "epoch": 1.2979869865799105, "grad_norm": 0.30265292525291443, "learning_rate": 9.243959898480124e-06, "loss": 0.3878, "step": 12767 }, { "epoch": 1.2980886539243595, "grad_norm": 0.2976587116718292, "learning_rate": 9.243772250012372e-06, "loss": 0.3676, "step": 12768 }, { "epoch": 1.2981903212688084, "grad_norm": 0.2994072139263153, "learning_rate": 9.243584580165507e-06, "loss": 0.3471, "step": 12769 }, { "epoch": 1.2982919886132573, "grad_norm": 0.3225623369216919, "learning_rate": 9.243396888940471e-06, "loss": 0.348, "step": 12770 }, { "epoch": 1.2983936559577063, "grad_norm": 0.3274918794631958, "learning_rate": 9.243209176338207e-06, "loss": 0.3532, "step": 12771 }, { "epoch": 1.2984953233021552, "grad_norm": 0.2702167332172394, "learning_rate": 9.243021442359665e-06, "loss": 0.3544, "step": 12772 }, { "epoch": 1.2985969906466044, "grad_norm": 0.31813833117485046, "learning_rate": 9.24283368700579e-06, "loss": 0.3708, "step": 12773 }, { "epoch": 1.2986986579910533, "grad_norm": 0.3090766370296478, "learning_rate": 9.242645910277526e-06, "loss": 0.3649, "step": 12774 }, { "epoch": 1.2988003253355023, "grad_norm": 0.29012709856033325, "learning_rate": 9.242458112175821e-06, "loss": 0.3683, "step": 12775 }, { "epoch": 1.2989019926799512, "grad_norm": 0.3126906156539917, "learning_rate": 9.242270292701621e-06, "loss": 0.3892, "step": 12776 }, { "epoch": 1.2990036600244002, "grad_norm": 0.28280016779899597, "learning_rate": 9.24208245185587e-06, "loss": 0.3759, "step": 12777 }, { "epoch": 1.299105327368849, "grad_norm": 0.2945895791053772, "learning_rate": 9.241894589639515e-06, "loss": 0.3407, "step": 12778 }, { "epoch": 1.299206994713298, "grad_norm": 0.31282684206962585, "learning_rate": 9.241706706053505e-06, "loss": 0.3793, "step": 12779 }, { "epoch": 1.299308662057747, "grad_norm": 0.2854566276073456, "learning_rate": 9.241518801098783e-06, "loss": 0.3758, "step": 12780 }, { "epoch": 1.2994103294021961, "grad_norm": 0.31013569235801697, "learning_rate": 9.241330874776298e-06, "loss": 0.3628, "step": 12781 }, { "epoch": 1.299511996746645, "grad_norm": 0.32503148913383484, "learning_rate": 9.241142927086997e-06, "loss": 0.3724, "step": 12782 }, { "epoch": 1.299613664091094, "grad_norm": 0.3056611716747284, "learning_rate": 9.240954958031825e-06, "loss": 0.3868, "step": 12783 }, { "epoch": 1.299715331435543, "grad_norm": 0.30000025033950806, "learning_rate": 9.240766967611728e-06, "loss": 0.3728, "step": 12784 }, { "epoch": 1.299816998779992, "grad_norm": 0.3068498373031616, "learning_rate": 9.240578955827657e-06, "loss": 0.3476, "step": 12785 }, { "epoch": 1.2999186661244408, "grad_norm": 0.2811797559261322, "learning_rate": 9.240390922680555e-06, "loss": 0.3484, "step": 12786 }, { "epoch": 1.3000203334688898, "grad_norm": 0.2948254942893982, "learning_rate": 9.240202868171373e-06, "loss": 0.3698, "step": 12787 }, { "epoch": 1.3001220008133387, "grad_norm": 0.303281307220459, "learning_rate": 9.240014792301056e-06, "loss": 0.3573, "step": 12788 }, { "epoch": 1.3002236681577877, "grad_norm": 0.30476629734039307, "learning_rate": 9.239826695070551e-06, "loss": 0.3466, "step": 12789 }, { "epoch": 1.3003253355022366, "grad_norm": 0.29771700501441956, "learning_rate": 9.239638576480808e-06, "loss": 0.3516, "step": 12790 }, { "epoch": 1.3004270028466856, "grad_norm": 0.30458277463912964, "learning_rate": 9.239450436532773e-06, "loss": 0.3898, "step": 12791 }, { "epoch": 1.3005286701911345, "grad_norm": 0.2758307158946991, "learning_rate": 9.239262275227394e-06, "loss": 0.368, "step": 12792 }, { "epoch": 1.3006303375355834, "grad_norm": 0.294696569442749, "learning_rate": 9.23907409256562e-06, "loss": 0.3614, "step": 12793 }, { "epoch": 1.3007320048800326, "grad_norm": 0.31050971150398254, "learning_rate": 9.238885888548396e-06, "loss": 0.3515, "step": 12794 }, { "epoch": 1.3008336722244815, "grad_norm": 0.29356372356414795, "learning_rate": 9.238697663176674e-06, "loss": 0.3422, "step": 12795 }, { "epoch": 1.3009353395689305, "grad_norm": 0.2799536883831024, "learning_rate": 9.2385094164514e-06, "loss": 0.3573, "step": 12796 }, { "epoch": 1.3010370069133794, "grad_norm": 0.30515584349632263, "learning_rate": 9.238321148373523e-06, "loss": 0.3612, "step": 12797 }, { "epoch": 1.3011386742578284, "grad_norm": 0.3264712393283844, "learning_rate": 9.23813285894399e-06, "loss": 0.3544, "step": 12798 }, { "epoch": 1.3012403416022773, "grad_norm": 0.3359415829181671, "learning_rate": 9.237944548163754e-06, "loss": 0.4075, "step": 12799 }, { "epoch": 1.3013420089467262, "grad_norm": 0.3433261811733246, "learning_rate": 9.237756216033757e-06, "loss": 0.3793, "step": 12800 }, { "epoch": 1.3014436762911754, "grad_norm": 0.29264363646507263, "learning_rate": 9.237567862554952e-06, "loss": 0.3866, "step": 12801 }, { "epoch": 1.3015453436356244, "grad_norm": 0.29958391189575195, "learning_rate": 9.237379487728288e-06, "loss": 0.3879, "step": 12802 }, { "epoch": 1.3016470109800733, "grad_norm": 0.2994372248649597, "learning_rate": 9.237191091554713e-06, "loss": 0.3618, "step": 12803 }, { "epoch": 1.3017486783245222, "grad_norm": 0.2765255272388458, "learning_rate": 9.237002674035176e-06, "loss": 0.348, "step": 12804 }, { "epoch": 1.3018503456689712, "grad_norm": 0.2938484251499176, "learning_rate": 9.236814235170627e-06, "loss": 0.3428, "step": 12805 }, { "epoch": 1.3019520130134201, "grad_norm": 0.29518911242485046, "learning_rate": 9.236625774962011e-06, "loss": 0.3889, "step": 12806 }, { "epoch": 1.302053680357869, "grad_norm": 0.28408095240592957, "learning_rate": 9.236437293410285e-06, "loss": 0.3667, "step": 12807 }, { "epoch": 1.302155347702318, "grad_norm": 0.29761186242103577, "learning_rate": 9.236248790516393e-06, "loss": 0.3882, "step": 12808 }, { "epoch": 1.302257015046767, "grad_norm": 0.29249247908592224, "learning_rate": 9.236060266281289e-06, "loss": 0.371, "step": 12809 }, { "epoch": 1.3023586823912159, "grad_norm": 0.30300506949424744, "learning_rate": 9.235871720705916e-06, "loss": 0.4005, "step": 12810 }, { "epoch": 1.3024603497356648, "grad_norm": 0.2982597053050995, "learning_rate": 9.235683153791231e-06, "loss": 0.3859, "step": 12811 }, { "epoch": 1.3025620170801138, "grad_norm": 0.2597239911556244, "learning_rate": 9.23549456553818e-06, "loss": 0.365, "step": 12812 }, { "epoch": 1.3026636844245627, "grad_norm": 0.27950525283813477, "learning_rate": 9.235305955947713e-06, "loss": 0.3812, "step": 12813 }, { "epoch": 1.3027653517690119, "grad_norm": 0.3007309138774872, "learning_rate": 9.235117325020781e-06, "loss": 0.3578, "step": 12814 }, { "epoch": 1.3028670191134608, "grad_norm": 0.28188467025756836, "learning_rate": 9.234928672758336e-06, "loss": 0.3743, "step": 12815 }, { "epoch": 1.3029686864579098, "grad_norm": 0.2929272949695587, "learning_rate": 9.234739999161325e-06, "loss": 0.3522, "step": 12816 }, { "epoch": 1.3030703538023587, "grad_norm": 0.2986360192298889, "learning_rate": 9.234551304230701e-06, "loss": 0.3938, "step": 12817 }, { "epoch": 1.3031720211468076, "grad_norm": 0.3078341484069824, "learning_rate": 9.234362587967414e-06, "loss": 0.3526, "step": 12818 }, { "epoch": 1.3032736884912566, "grad_norm": 0.3324905335903168, "learning_rate": 9.234173850372414e-06, "loss": 0.3656, "step": 12819 }, { "epoch": 1.3033753558357055, "grad_norm": 0.29729634523391724, "learning_rate": 9.233985091446652e-06, "loss": 0.3509, "step": 12820 }, { "epoch": 1.3034770231801545, "grad_norm": 0.30918461084365845, "learning_rate": 9.23379631119108e-06, "loss": 0.3804, "step": 12821 }, { "epoch": 1.3035786905246036, "grad_norm": 0.344904363155365, "learning_rate": 9.233607509606648e-06, "loss": 0.3732, "step": 12822 }, { "epoch": 1.3036803578690526, "grad_norm": 0.2956813871860504, "learning_rate": 9.233418686694308e-06, "loss": 0.3569, "step": 12823 }, { "epoch": 1.3037820252135015, "grad_norm": 0.3057190179824829, "learning_rate": 9.23322984245501e-06, "loss": 0.3512, "step": 12824 }, { "epoch": 1.3038836925579504, "grad_norm": 0.30819278955459595, "learning_rate": 9.233040976889707e-06, "loss": 0.3423, "step": 12825 }, { "epoch": 1.3039853599023994, "grad_norm": 0.3016458749771118, "learning_rate": 9.232852089999348e-06, "loss": 0.3505, "step": 12826 }, { "epoch": 1.3040870272468483, "grad_norm": 0.28853410482406616, "learning_rate": 9.232663181784886e-06, "loss": 0.3756, "step": 12827 }, { "epoch": 1.3041886945912973, "grad_norm": 0.3125762641429901, "learning_rate": 9.232474252247275e-06, "loss": 0.3584, "step": 12828 }, { "epoch": 1.3042903619357462, "grad_norm": 0.3084246516227722, "learning_rate": 9.232285301387462e-06, "loss": 0.4343, "step": 12829 }, { "epoch": 1.3043920292801952, "grad_norm": 0.2993406355381012, "learning_rate": 9.232096329206402e-06, "loss": 0.3555, "step": 12830 }, { "epoch": 1.304493696624644, "grad_norm": 0.2977331578731537, "learning_rate": 9.231907335705047e-06, "loss": 0.3675, "step": 12831 }, { "epoch": 1.304595363969093, "grad_norm": 0.30087974667549133, "learning_rate": 9.231718320884349e-06, "loss": 0.3736, "step": 12832 }, { "epoch": 1.304697031313542, "grad_norm": 0.3177599012851715, "learning_rate": 9.231529284745257e-06, "loss": 0.3673, "step": 12833 }, { "epoch": 1.304798698657991, "grad_norm": 0.2796318829059601, "learning_rate": 9.231340227288728e-06, "loss": 0.3362, "step": 12834 }, { "epoch": 1.30490036600244, "grad_norm": 0.3402027487754822, "learning_rate": 9.231151148515713e-06, "loss": 0.3579, "step": 12835 }, { "epoch": 1.305002033346889, "grad_norm": 0.3106691539287567, "learning_rate": 9.230962048427164e-06, "loss": 0.3605, "step": 12836 }, { "epoch": 1.305103700691338, "grad_norm": 0.29818934202194214, "learning_rate": 9.230772927024032e-06, "loss": 0.3495, "step": 12837 }, { "epoch": 1.305205368035787, "grad_norm": 0.28696513175964355, "learning_rate": 9.230583784307274e-06, "loss": 0.3291, "step": 12838 }, { "epoch": 1.3053070353802358, "grad_norm": 0.3064005374908447, "learning_rate": 9.230394620277839e-06, "loss": 0.3587, "step": 12839 }, { "epoch": 1.3054087027246848, "grad_norm": 0.30506306886672974, "learning_rate": 9.23020543493668e-06, "loss": 0.3608, "step": 12840 }, { "epoch": 1.3055103700691337, "grad_norm": 0.3129926323890686, "learning_rate": 9.230016228284753e-06, "loss": 0.3658, "step": 12841 }, { "epoch": 1.305612037413583, "grad_norm": 0.27453187108039856, "learning_rate": 9.229827000323007e-06, "loss": 0.379, "step": 12842 }, { "epoch": 1.3057137047580318, "grad_norm": 0.2901052236557007, "learning_rate": 9.2296377510524e-06, "loss": 0.3789, "step": 12843 }, { "epoch": 1.3058153721024808, "grad_norm": 0.29301655292510986, "learning_rate": 9.229448480473883e-06, "loss": 0.3538, "step": 12844 }, { "epoch": 1.3059170394469297, "grad_norm": 0.27535516023635864, "learning_rate": 9.229259188588409e-06, "loss": 0.3374, "step": 12845 }, { "epoch": 1.3060187067913787, "grad_norm": 0.29302921891212463, "learning_rate": 9.229069875396933e-06, "loss": 0.3691, "step": 12846 }, { "epoch": 1.3061203741358276, "grad_norm": 0.28095632791519165, "learning_rate": 9.228880540900406e-06, "loss": 0.3288, "step": 12847 }, { "epoch": 1.3062220414802765, "grad_norm": 0.2721687853336334, "learning_rate": 9.228691185099785e-06, "loss": 0.3818, "step": 12848 }, { "epoch": 1.3063237088247255, "grad_norm": 0.2823489010334015, "learning_rate": 9.228501807996024e-06, "loss": 0.3395, "step": 12849 }, { "epoch": 1.3064253761691744, "grad_norm": 0.34018072485923767, "learning_rate": 9.228312409590073e-06, "loss": 0.3537, "step": 12850 }, { "epoch": 1.3065270435136234, "grad_norm": 0.3035721778869629, "learning_rate": 9.228122989882891e-06, "loss": 0.3523, "step": 12851 }, { "epoch": 1.3066287108580723, "grad_norm": 0.31892096996307373, "learning_rate": 9.22793354887543e-06, "loss": 0.3564, "step": 12852 }, { "epoch": 1.3067303782025212, "grad_norm": 0.3155500888824463, "learning_rate": 9.227744086568644e-06, "loss": 0.3558, "step": 12853 }, { "epoch": 1.3068320455469702, "grad_norm": 0.3080615699291229, "learning_rate": 9.227554602963487e-06, "loss": 0.3667, "step": 12854 }, { "epoch": 1.3069337128914194, "grad_norm": 0.2960961163043976, "learning_rate": 9.227365098060916e-06, "loss": 0.3255, "step": 12855 }, { "epoch": 1.3070353802358683, "grad_norm": 0.3118617534637451, "learning_rate": 9.227175571861884e-06, "loss": 0.3468, "step": 12856 }, { "epoch": 1.3071370475803172, "grad_norm": 0.28399214148521423, "learning_rate": 9.226986024367346e-06, "loss": 0.3536, "step": 12857 }, { "epoch": 1.3072387149247662, "grad_norm": 0.304475337266922, "learning_rate": 9.226796455578255e-06, "loss": 0.3552, "step": 12858 }, { "epoch": 1.3073403822692151, "grad_norm": 0.2834874987602234, "learning_rate": 9.226606865495569e-06, "loss": 0.3119, "step": 12859 }, { "epoch": 1.307442049613664, "grad_norm": 0.32578063011169434, "learning_rate": 9.226417254120244e-06, "loss": 0.3665, "step": 12860 }, { "epoch": 1.307543716958113, "grad_norm": 0.29455238580703735, "learning_rate": 9.226227621453232e-06, "loss": 0.3634, "step": 12861 }, { "epoch": 1.307645384302562, "grad_norm": 0.31872427463531494, "learning_rate": 9.226037967495488e-06, "loss": 0.3658, "step": 12862 }, { "epoch": 1.307747051647011, "grad_norm": 0.3080240786075592, "learning_rate": 9.225848292247972e-06, "loss": 0.404, "step": 12863 }, { "epoch": 1.30784871899146, "grad_norm": 0.28708648681640625, "learning_rate": 9.225658595711633e-06, "loss": 0.3597, "step": 12864 }, { "epoch": 1.307950386335909, "grad_norm": 0.2999286353588104, "learning_rate": 9.225468877887433e-06, "loss": 0.3711, "step": 12865 }, { "epoch": 1.308052053680358, "grad_norm": 0.3097676634788513, "learning_rate": 9.225279138776323e-06, "loss": 0.3517, "step": 12866 }, { "epoch": 1.3081537210248069, "grad_norm": 0.30040135979652405, "learning_rate": 9.225089378379261e-06, "loss": 0.4164, "step": 12867 }, { "epoch": 1.3082553883692558, "grad_norm": 0.2762987017631531, "learning_rate": 9.224899596697204e-06, "loss": 0.3405, "step": 12868 }, { "epoch": 1.3083570557137048, "grad_norm": 0.2850562632083893, "learning_rate": 9.224709793731107e-06, "loss": 0.3481, "step": 12869 }, { "epoch": 1.3084587230581537, "grad_norm": 0.2940986752510071, "learning_rate": 9.224519969481924e-06, "loss": 0.3602, "step": 12870 }, { "epoch": 1.3085603904026026, "grad_norm": 0.29947248101234436, "learning_rate": 9.224330123950615e-06, "loss": 0.3729, "step": 12871 }, { "epoch": 1.3086620577470516, "grad_norm": 0.3202880322933197, "learning_rate": 9.224140257138135e-06, "loss": 0.3915, "step": 12872 }, { "epoch": 1.3087637250915005, "grad_norm": 0.2904311716556549, "learning_rate": 9.22395036904544e-06, "loss": 0.3832, "step": 12873 }, { "epoch": 1.3088653924359495, "grad_norm": 0.29235708713531494, "learning_rate": 9.223760459673485e-06, "loss": 0.3672, "step": 12874 }, { "epoch": 1.3089670597803984, "grad_norm": 0.28316107392311096, "learning_rate": 9.223570529023228e-06, "loss": 0.3852, "step": 12875 }, { "epoch": 1.3090687271248476, "grad_norm": 0.2808661460876465, "learning_rate": 9.22338057709563e-06, "loss": 0.3376, "step": 12876 }, { "epoch": 1.3091703944692965, "grad_norm": 0.33529043197631836, "learning_rate": 9.223190603891643e-06, "loss": 0.3605, "step": 12877 }, { "epoch": 1.3092720618137454, "grad_norm": 0.29363590478897095, "learning_rate": 9.223000609412224e-06, "loss": 0.3528, "step": 12878 }, { "epoch": 1.3093737291581944, "grad_norm": 0.27796700596809387, "learning_rate": 9.222810593658332e-06, "loss": 0.3513, "step": 12879 }, { "epoch": 1.3094753965026433, "grad_norm": 0.29803571105003357, "learning_rate": 9.222620556630924e-06, "loss": 0.4114, "step": 12880 }, { "epoch": 1.3095770638470923, "grad_norm": 0.29880404472351074, "learning_rate": 9.222430498330957e-06, "loss": 0.3557, "step": 12881 }, { "epoch": 1.3096787311915412, "grad_norm": 0.2791113257408142, "learning_rate": 9.222240418759388e-06, "loss": 0.3791, "step": 12882 }, { "epoch": 1.3097803985359904, "grad_norm": 0.2889201045036316, "learning_rate": 9.222050317917177e-06, "loss": 0.3639, "step": 12883 }, { "epoch": 1.3098820658804393, "grad_norm": 0.2820950150489807, "learning_rate": 9.221860195805277e-06, "loss": 0.3806, "step": 12884 }, { "epoch": 1.3099837332248883, "grad_norm": 0.28335675597190857, "learning_rate": 9.221670052424651e-06, "loss": 0.3583, "step": 12885 }, { "epoch": 1.3100854005693372, "grad_norm": 0.27435460686683655, "learning_rate": 9.221479887776254e-06, "loss": 0.3499, "step": 12886 }, { "epoch": 1.3101870679137861, "grad_norm": 0.28444361686706543, "learning_rate": 9.221289701861042e-06, "loss": 0.3587, "step": 12887 }, { "epoch": 1.310288735258235, "grad_norm": 0.28307145833969116, "learning_rate": 9.22109949467998e-06, "loss": 0.3402, "step": 12888 }, { "epoch": 1.310390402602684, "grad_norm": 0.27146321535110474, "learning_rate": 9.220909266234019e-06, "loss": 0.3375, "step": 12889 }, { "epoch": 1.310492069947133, "grad_norm": 0.28602778911590576, "learning_rate": 9.22071901652412e-06, "loss": 0.3648, "step": 12890 }, { "epoch": 1.310593737291582, "grad_norm": 0.28755494952201843, "learning_rate": 9.220528745551242e-06, "loss": 0.3524, "step": 12891 }, { "epoch": 1.3106954046360308, "grad_norm": 0.27459239959716797, "learning_rate": 9.220338453316344e-06, "loss": 0.3612, "step": 12892 }, { "epoch": 1.3107970719804798, "grad_norm": 0.294540137052536, "learning_rate": 9.220148139820382e-06, "loss": 0.3686, "step": 12893 }, { "epoch": 1.3108987393249287, "grad_norm": 0.29606717824935913, "learning_rate": 9.219957805064319e-06, "loss": 0.3635, "step": 12894 }, { "epoch": 1.3110004066693777, "grad_norm": 0.2727319002151489, "learning_rate": 9.21976744904911e-06, "loss": 0.3397, "step": 12895 }, { "epoch": 1.3111020740138268, "grad_norm": 0.29285797476768494, "learning_rate": 9.219577071775715e-06, "loss": 0.3866, "step": 12896 }, { "epoch": 1.3112037413582758, "grad_norm": 0.28979313373565674, "learning_rate": 9.219386673245094e-06, "loss": 0.3867, "step": 12897 }, { "epoch": 1.3113054087027247, "grad_norm": 0.315616250038147, "learning_rate": 9.219196253458205e-06, "loss": 0.3445, "step": 12898 }, { "epoch": 1.3114070760471737, "grad_norm": 0.2858460545539856, "learning_rate": 9.219005812416008e-06, "loss": 0.3839, "step": 12899 }, { "epoch": 1.3115087433916226, "grad_norm": 0.3122733235359192, "learning_rate": 9.218815350119463e-06, "loss": 0.3563, "step": 12900 }, { "epoch": 1.3116104107360715, "grad_norm": 0.28220775723457336, "learning_rate": 9.21862486656953e-06, "loss": 0.3393, "step": 12901 }, { "epoch": 1.3117120780805205, "grad_norm": 0.29155004024505615, "learning_rate": 9.218434361767165e-06, "loss": 0.3767, "step": 12902 }, { "epoch": 1.3118137454249694, "grad_norm": 0.28503790497779846, "learning_rate": 9.218243835713331e-06, "loss": 0.3666, "step": 12903 }, { "epoch": 1.3119154127694186, "grad_norm": 0.3168448805809021, "learning_rate": 9.218053288408986e-06, "loss": 0.364, "step": 12904 }, { "epoch": 1.3120170801138675, "grad_norm": 0.3135705590248108, "learning_rate": 9.217862719855094e-06, "loss": 0.3695, "step": 12905 }, { "epoch": 1.3121187474583165, "grad_norm": 0.28020015358924866, "learning_rate": 9.217672130052609e-06, "loss": 0.3619, "step": 12906 }, { "epoch": 1.3122204148027654, "grad_norm": 0.2781907618045807, "learning_rate": 9.217481519002495e-06, "loss": 0.3552, "step": 12907 }, { "epoch": 1.3123220821472144, "grad_norm": 0.3079131841659546, "learning_rate": 9.21729088670571e-06, "loss": 0.3471, "step": 12908 }, { "epoch": 1.3124237494916633, "grad_norm": 0.2969326674938202, "learning_rate": 9.217100233163219e-06, "loss": 0.3587, "step": 12909 }, { "epoch": 1.3125254168361122, "grad_norm": 0.2925916016101837, "learning_rate": 9.216909558375977e-06, "loss": 0.336, "step": 12910 }, { "epoch": 1.3126270841805612, "grad_norm": 0.2768878638744354, "learning_rate": 9.216718862344947e-06, "loss": 0.3487, "step": 12911 }, { "epoch": 1.3127287515250101, "grad_norm": 0.3063093423843384, "learning_rate": 9.216528145071088e-06, "loss": 0.3647, "step": 12912 }, { "epoch": 1.312830418869459, "grad_norm": 0.29134586453437805, "learning_rate": 9.216337406555365e-06, "loss": 0.3604, "step": 12913 }, { "epoch": 1.312932086213908, "grad_norm": 0.3054860532283783, "learning_rate": 9.216146646798733e-06, "loss": 0.3902, "step": 12914 }, { "epoch": 1.313033753558357, "grad_norm": 0.2979033589363098, "learning_rate": 9.215955865802157e-06, "loss": 0.3664, "step": 12915 }, { "epoch": 1.3131354209028059, "grad_norm": 0.3108646273612976, "learning_rate": 9.215765063566599e-06, "loss": 0.336, "step": 12916 }, { "epoch": 1.313237088247255, "grad_norm": 0.2830895483493805, "learning_rate": 9.215574240093018e-06, "loss": 0.3535, "step": 12917 }, { "epoch": 1.313338755591704, "grad_norm": 0.2992279529571533, "learning_rate": 9.215383395382374e-06, "loss": 0.3924, "step": 12918 }, { "epoch": 1.313440422936153, "grad_norm": 0.32855212688446045, "learning_rate": 9.215192529435632e-06, "loss": 0.3623, "step": 12919 }, { "epoch": 1.3135420902806019, "grad_norm": 0.27739644050598145, "learning_rate": 9.215001642253752e-06, "loss": 0.356, "step": 12920 }, { "epoch": 1.3136437576250508, "grad_norm": 0.29765626788139343, "learning_rate": 9.214810733837693e-06, "loss": 0.3734, "step": 12921 }, { "epoch": 1.3137454249694998, "grad_norm": 0.3008922338485718, "learning_rate": 9.21461980418842e-06, "loss": 0.358, "step": 12922 }, { "epoch": 1.3138470923139487, "grad_norm": 0.2837320864200592, "learning_rate": 9.214428853306896e-06, "loss": 0.36, "step": 12923 }, { "epoch": 1.3139487596583979, "grad_norm": 0.30617791414260864, "learning_rate": 9.21423788119408e-06, "loss": 0.3779, "step": 12924 }, { "epoch": 1.3140504270028468, "grad_norm": 0.28274139761924744, "learning_rate": 9.214046887850934e-06, "loss": 0.3612, "step": 12925 }, { "epoch": 1.3141520943472957, "grad_norm": 0.28721699118614197, "learning_rate": 9.213855873278421e-06, "loss": 0.3502, "step": 12926 }, { "epoch": 1.3142537616917447, "grad_norm": 0.2810119390487671, "learning_rate": 9.213664837477504e-06, "loss": 0.357, "step": 12927 }, { "epoch": 1.3143554290361936, "grad_norm": 0.2926957309246063, "learning_rate": 9.213473780449144e-06, "loss": 0.3552, "step": 12928 }, { "epoch": 1.3144570963806426, "grad_norm": 0.27800217270851135, "learning_rate": 9.213282702194306e-06, "loss": 0.3434, "step": 12929 }, { "epoch": 1.3145587637250915, "grad_norm": 0.27670761942863464, "learning_rate": 9.213091602713951e-06, "loss": 0.3543, "step": 12930 }, { "epoch": 1.3146604310695404, "grad_norm": 0.2971634566783905, "learning_rate": 9.212900482009041e-06, "loss": 0.3574, "step": 12931 }, { "epoch": 1.3147620984139894, "grad_norm": 0.3213036060333252, "learning_rate": 9.212709340080539e-06, "loss": 0.3727, "step": 12932 }, { "epoch": 1.3148637657584383, "grad_norm": 0.2859221398830414, "learning_rate": 9.212518176929409e-06, "loss": 0.3604, "step": 12933 }, { "epoch": 1.3149654331028873, "grad_norm": 0.2825081944465637, "learning_rate": 9.212326992556612e-06, "loss": 0.3579, "step": 12934 }, { "epoch": 1.3150671004473362, "grad_norm": 0.33268091082572937, "learning_rate": 9.212135786963114e-06, "loss": 0.3669, "step": 12935 }, { "epoch": 1.3151687677917852, "grad_norm": 0.29362404346466064, "learning_rate": 9.211944560149879e-06, "loss": 0.324, "step": 12936 }, { "epoch": 1.3152704351362343, "grad_norm": 0.2963200807571411, "learning_rate": 9.211753312117865e-06, "loss": 0.3867, "step": 12937 }, { "epoch": 1.3153721024806833, "grad_norm": 0.29167279601097107, "learning_rate": 9.21156204286804e-06, "loss": 0.3593, "step": 12938 }, { "epoch": 1.3154737698251322, "grad_norm": 0.3231664299964905, "learning_rate": 9.211370752401367e-06, "loss": 0.375, "step": 12939 }, { "epoch": 1.3155754371695811, "grad_norm": 0.3280071020126343, "learning_rate": 9.211179440718807e-06, "loss": 0.3607, "step": 12940 }, { "epoch": 1.31567710451403, "grad_norm": 0.3077678680419922, "learning_rate": 9.210988107821327e-06, "loss": 0.3643, "step": 12941 }, { "epoch": 1.315778771858479, "grad_norm": 0.3002897799015045, "learning_rate": 9.21079675370989e-06, "loss": 0.3818, "step": 12942 }, { "epoch": 1.315880439202928, "grad_norm": 0.3000817596912384, "learning_rate": 9.210605378385457e-06, "loss": 0.3819, "step": 12943 }, { "epoch": 1.315982106547377, "grad_norm": 0.2780478000640869, "learning_rate": 9.210413981848998e-06, "loss": 0.3812, "step": 12944 }, { "epoch": 1.316083773891826, "grad_norm": 0.2809930741786957, "learning_rate": 9.210222564101472e-06, "loss": 0.3517, "step": 12945 }, { "epoch": 1.316185441236275, "grad_norm": 0.29824087023735046, "learning_rate": 9.210031125143846e-06, "loss": 0.3899, "step": 12946 }, { "epoch": 1.316287108580724, "grad_norm": 0.2944251000881195, "learning_rate": 9.209839664977082e-06, "loss": 0.3724, "step": 12947 }, { "epoch": 1.316388775925173, "grad_norm": 0.2912956774234772, "learning_rate": 9.209648183602149e-06, "loss": 0.3838, "step": 12948 }, { "epoch": 1.3164904432696218, "grad_norm": 0.27347254753112793, "learning_rate": 9.209456681020007e-06, "loss": 0.3533, "step": 12949 }, { "epoch": 1.3165921106140708, "grad_norm": 0.28110918402671814, "learning_rate": 9.209265157231623e-06, "loss": 0.3594, "step": 12950 }, { "epoch": 1.3166937779585197, "grad_norm": 0.321570485830307, "learning_rate": 9.209073612237961e-06, "loss": 0.3491, "step": 12951 }, { "epoch": 1.3167954453029687, "grad_norm": 0.28210902214050293, "learning_rate": 9.208882046039986e-06, "loss": 0.4031, "step": 12952 }, { "epoch": 1.3168971126474176, "grad_norm": 0.29082125425338745, "learning_rate": 9.208690458638666e-06, "loss": 0.3312, "step": 12953 }, { "epoch": 1.3169987799918665, "grad_norm": 0.29844316840171814, "learning_rate": 9.208498850034962e-06, "loss": 0.3377, "step": 12954 }, { "epoch": 1.3171004473363155, "grad_norm": 0.2766881287097931, "learning_rate": 9.208307220229839e-06, "loss": 0.3377, "step": 12955 }, { "epoch": 1.3172021146807644, "grad_norm": 0.28312769532203674, "learning_rate": 9.208115569224266e-06, "loss": 0.3821, "step": 12956 }, { "epoch": 1.3173037820252134, "grad_norm": 0.3301333785057068, "learning_rate": 9.207923897019206e-06, "loss": 0.3652, "step": 12957 }, { "epoch": 1.3174054493696625, "grad_norm": 0.29452407360076904, "learning_rate": 9.207732203615627e-06, "loss": 0.3413, "step": 12958 }, { "epoch": 1.3175071167141115, "grad_norm": 0.27160680294036865, "learning_rate": 9.207540489014493e-06, "loss": 0.3678, "step": 12959 }, { "epoch": 1.3176087840585604, "grad_norm": 0.29079341888427734, "learning_rate": 9.207348753216768e-06, "loss": 0.3412, "step": 12960 }, { "epoch": 1.3177104514030094, "grad_norm": 0.3273373544216156, "learning_rate": 9.20715699622342e-06, "loss": 0.374, "step": 12961 }, { "epoch": 1.3178121187474583, "grad_norm": 0.33998167514801025, "learning_rate": 9.206965218035416e-06, "loss": 0.396, "step": 12962 }, { "epoch": 1.3179137860919072, "grad_norm": 0.3097760081291199, "learning_rate": 9.206773418653721e-06, "loss": 0.3926, "step": 12963 }, { "epoch": 1.3180154534363562, "grad_norm": 0.2809465527534485, "learning_rate": 9.2065815980793e-06, "loss": 0.3771, "step": 12964 }, { "epoch": 1.3181171207808053, "grad_norm": 0.29016512632369995, "learning_rate": 9.206389756313122e-06, "loss": 0.35, "step": 12965 }, { "epoch": 1.3182187881252543, "grad_norm": 0.3180590271949768, "learning_rate": 9.206197893356151e-06, "loss": 0.361, "step": 12966 }, { "epoch": 1.3183204554697032, "grad_norm": 0.29804810881614685, "learning_rate": 9.206006009209355e-06, "loss": 0.37, "step": 12967 }, { "epoch": 1.3184221228141522, "grad_norm": 0.3013626039028168, "learning_rate": 9.205814103873701e-06, "loss": 0.3688, "step": 12968 }, { "epoch": 1.318523790158601, "grad_norm": 0.3057146370410919, "learning_rate": 9.205622177350153e-06, "loss": 0.4031, "step": 12969 }, { "epoch": 1.31862545750305, "grad_norm": 0.286426305770874, "learning_rate": 9.205430229639682e-06, "loss": 0.3577, "step": 12970 }, { "epoch": 1.318727124847499, "grad_norm": 0.2820485830307007, "learning_rate": 9.205238260743252e-06, "loss": 0.3651, "step": 12971 }, { "epoch": 1.318828792191948, "grad_norm": 0.3057822287082672, "learning_rate": 9.205046270661831e-06, "loss": 0.3808, "step": 12972 }, { "epoch": 1.3189304595363969, "grad_norm": 0.28764429688453674, "learning_rate": 9.204854259396386e-06, "loss": 0.3783, "step": 12973 }, { "epoch": 1.3190321268808458, "grad_norm": 0.298676073551178, "learning_rate": 9.204662226947882e-06, "loss": 0.373, "step": 12974 }, { "epoch": 1.3191337942252948, "grad_norm": 0.299180805683136, "learning_rate": 9.204470173317292e-06, "loss": 0.3702, "step": 12975 }, { "epoch": 1.3192354615697437, "grad_norm": 0.3033396899700165, "learning_rate": 9.20427809850558e-06, "loss": 0.3845, "step": 12976 }, { "epoch": 1.3193371289141926, "grad_norm": 0.30608102679252625, "learning_rate": 9.204086002513715e-06, "loss": 0.3801, "step": 12977 }, { "epoch": 1.3194387962586418, "grad_norm": 0.30487871170043945, "learning_rate": 9.203893885342661e-06, "loss": 0.3769, "step": 12978 }, { "epoch": 1.3195404636030907, "grad_norm": 0.29227757453918457, "learning_rate": 9.20370174699339e-06, "loss": 0.3596, "step": 12979 }, { "epoch": 1.3196421309475397, "grad_norm": 0.31377625465393066, "learning_rate": 9.203509587466869e-06, "loss": 0.3856, "step": 12980 }, { "epoch": 1.3197437982919886, "grad_norm": 0.27316299080848694, "learning_rate": 9.203317406764065e-06, "loss": 0.3822, "step": 12981 }, { "epoch": 1.3198454656364376, "grad_norm": 0.2981416881084442, "learning_rate": 9.203125204885947e-06, "loss": 0.375, "step": 12982 }, { "epoch": 1.3199471329808865, "grad_norm": 0.3014845550060272, "learning_rate": 9.202932981833483e-06, "loss": 0.3606, "step": 12983 }, { "epoch": 1.3200488003253354, "grad_norm": 0.2916781008243561, "learning_rate": 9.202740737607641e-06, "loss": 0.3679, "step": 12984 }, { "epoch": 1.3201504676697844, "grad_norm": 0.2937464416027069, "learning_rate": 9.20254847220939e-06, "loss": 0.3857, "step": 12985 }, { "epoch": 1.3202521350142336, "grad_norm": 0.29482409358024597, "learning_rate": 9.2023561856397e-06, "loss": 0.3638, "step": 12986 }, { "epoch": 1.3203538023586825, "grad_norm": 0.30849555134773254, "learning_rate": 9.202163877899536e-06, "loss": 0.3887, "step": 12987 }, { "epoch": 1.3204554697031314, "grad_norm": 0.2829645276069641, "learning_rate": 9.201971548989871e-06, "loss": 0.3658, "step": 12988 }, { "epoch": 1.3205571370475804, "grad_norm": 0.2765011489391327, "learning_rate": 9.20177919891167e-06, "loss": 0.3593, "step": 12989 }, { "epoch": 1.3206588043920293, "grad_norm": 0.2864892780780792, "learning_rate": 9.201586827665905e-06, "loss": 0.3686, "step": 12990 }, { "epoch": 1.3207604717364783, "grad_norm": 0.29711663722991943, "learning_rate": 9.201394435253545e-06, "loss": 0.3774, "step": 12991 }, { "epoch": 1.3208621390809272, "grad_norm": 0.28754886984825134, "learning_rate": 9.201202021675558e-06, "loss": 0.3684, "step": 12992 }, { "epoch": 1.3209638064253761, "grad_norm": 0.272932767868042, "learning_rate": 9.20100958693291e-06, "loss": 0.3633, "step": 12993 }, { "epoch": 1.321065473769825, "grad_norm": 0.31952524185180664, "learning_rate": 9.200817131026578e-06, "loss": 0.3652, "step": 12994 }, { "epoch": 1.321167141114274, "grad_norm": 0.27925243973731995, "learning_rate": 9.200624653957527e-06, "loss": 0.3442, "step": 12995 }, { "epoch": 1.321268808458723, "grad_norm": 0.2928663194179535, "learning_rate": 9.200432155726725e-06, "loss": 0.3949, "step": 12996 }, { "epoch": 1.321370475803172, "grad_norm": 0.30121946334838867, "learning_rate": 9.200239636335145e-06, "loss": 0.3819, "step": 12997 }, { "epoch": 1.3214721431476208, "grad_norm": 0.27917468547821045, "learning_rate": 9.200047095783756e-06, "loss": 0.3418, "step": 12998 }, { "epoch": 1.32157381049207, "grad_norm": 0.31340324878692627, "learning_rate": 9.199854534073529e-06, "loss": 0.341, "step": 12999 }, { "epoch": 1.321675477836519, "grad_norm": 0.267407089471817, "learning_rate": 9.199661951205433e-06, "loss": 0.3575, "step": 13000 }, { "epoch": 1.321777145180968, "grad_norm": 0.29484057426452637, "learning_rate": 9.199469347180435e-06, "loss": 0.3529, "step": 13001 }, { "epoch": 1.3218788125254168, "grad_norm": 0.31558990478515625, "learning_rate": 9.19927672199951e-06, "loss": 0.3585, "step": 13002 }, { "epoch": 1.3219804798698658, "grad_norm": 0.2675586938858032, "learning_rate": 9.199084075663628e-06, "loss": 0.3427, "step": 13003 }, { "epoch": 1.3220821472143147, "grad_norm": 0.3121856153011322, "learning_rate": 9.198891408173757e-06, "loss": 0.3783, "step": 13004 }, { "epoch": 1.3221838145587637, "grad_norm": 0.31870996952056885, "learning_rate": 9.19869871953087e-06, "loss": 0.3579, "step": 13005 }, { "epoch": 1.3222854819032128, "grad_norm": 0.3026132583618164, "learning_rate": 9.198506009735935e-06, "loss": 0.3891, "step": 13006 }, { "epoch": 1.3223871492476618, "grad_norm": 0.28276556730270386, "learning_rate": 9.198313278789926e-06, "loss": 0.3827, "step": 13007 }, { "epoch": 1.3224888165921107, "grad_norm": 0.2724732458591461, "learning_rate": 9.198120526693812e-06, "loss": 0.3313, "step": 13008 }, { "epoch": 1.3225904839365596, "grad_norm": 0.29360881447792053, "learning_rate": 9.197927753448564e-06, "loss": 0.3591, "step": 13009 }, { "epoch": 1.3226921512810086, "grad_norm": 0.3146187961101532, "learning_rate": 9.197734959055153e-06, "loss": 0.3635, "step": 13010 }, { "epoch": 1.3227938186254575, "grad_norm": 0.2851278483867645, "learning_rate": 9.197542143514552e-06, "loss": 0.378, "step": 13011 }, { "epoch": 1.3228954859699065, "grad_norm": 0.2824869453907013, "learning_rate": 9.19734930682773e-06, "loss": 0.3855, "step": 13012 }, { "epoch": 1.3229971533143554, "grad_norm": 0.2983466386795044, "learning_rate": 9.19715644899566e-06, "loss": 0.3664, "step": 13013 }, { "epoch": 1.3230988206588044, "grad_norm": 0.29163631796836853, "learning_rate": 9.196963570019315e-06, "loss": 0.3423, "step": 13014 }, { "epoch": 1.3232004880032533, "grad_norm": 0.28091439604759216, "learning_rate": 9.196770669899663e-06, "loss": 0.396, "step": 13015 }, { "epoch": 1.3233021553477022, "grad_norm": 0.3074396550655365, "learning_rate": 9.196577748637678e-06, "loss": 0.3881, "step": 13016 }, { "epoch": 1.3234038226921512, "grad_norm": 0.3042314648628235, "learning_rate": 9.196384806234332e-06, "loss": 0.3587, "step": 13017 }, { "epoch": 1.3235054900366001, "grad_norm": 0.27635982632637024, "learning_rate": 9.196191842690596e-06, "loss": 0.3588, "step": 13018 }, { "epoch": 1.3236071573810493, "grad_norm": 0.3063599467277527, "learning_rate": 9.195998858007442e-06, "loss": 0.3678, "step": 13019 }, { "epoch": 1.3237088247254982, "grad_norm": 0.30397140979766846, "learning_rate": 9.195805852185845e-06, "loss": 0.3473, "step": 13020 }, { "epoch": 1.3238104920699472, "grad_norm": 0.3142342269420624, "learning_rate": 9.195612825226774e-06, "loss": 0.3814, "step": 13021 }, { "epoch": 1.323912159414396, "grad_norm": 0.2815108299255371, "learning_rate": 9.195419777131203e-06, "loss": 0.3451, "step": 13022 }, { "epoch": 1.324013826758845, "grad_norm": 0.3027137517929077, "learning_rate": 9.195226707900102e-06, "loss": 0.3719, "step": 13023 }, { "epoch": 1.324115494103294, "grad_norm": 0.30333349108695984, "learning_rate": 9.195033617534448e-06, "loss": 0.3577, "step": 13024 }, { "epoch": 1.324217161447743, "grad_norm": 0.280020534992218, "learning_rate": 9.194840506035213e-06, "loss": 0.3785, "step": 13025 }, { "epoch": 1.3243188287921919, "grad_norm": 0.31506454944610596, "learning_rate": 9.194647373403365e-06, "loss": 0.3849, "step": 13026 }, { "epoch": 1.324420496136641, "grad_norm": 0.2797456681728363, "learning_rate": 9.19445421963988e-06, "loss": 0.3635, "step": 13027 }, { "epoch": 1.32452216348109, "grad_norm": 0.28765004873275757, "learning_rate": 9.194261044745733e-06, "loss": 0.3426, "step": 13028 }, { "epoch": 1.324623830825539, "grad_norm": 0.3225862979888916, "learning_rate": 9.194067848721897e-06, "loss": 0.4001, "step": 13029 }, { "epoch": 1.3247254981699879, "grad_norm": 0.26083970069885254, "learning_rate": 9.193874631569344e-06, "loss": 0.349, "step": 13030 }, { "epoch": 1.3248271655144368, "grad_norm": 0.2679082155227661, "learning_rate": 9.193681393289045e-06, "loss": 0.3587, "step": 13031 }, { "epoch": 1.3249288328588857, "grad_norm": 0.28475460410118103, "learning_rate": 9.193488133881977e-06, "loss": 0.3801, "step": 13032 }, { "epoch": 1.3250305002033347, "grad_norm": 0.2738172709941864, "learning_rate": 9.19329485334911e-06, "loss": 0.3504, "step": 13033 }, { "epoch": 1.3251321675477836, "grad_norm": 0.29564332962036133, "learning_rate": 9.193101551691423e-06, "loss": 0.3733, "step": 13034 }, { "epoch": 1.3252338348922326, "grad_norm": 0.2849465608596802, "learning_rate": 9.192908228909885e-06, "loss": 0.3699, "step": 13035 }, { "epoch": 1.3253355022366815, "grad_norm": 0.26535680890083313, "learning_rate": 9.19271488500547e-06, "loss": 0.347, "step": 13036 }, { "epoch": 1.3254371695811304, "grad_norm": 0.28298112750053406, "learning_rate": 9.192521519979159e-06, "loss": 0.3727, "step": 13037 }, { "epoch": 1.3255388369255794, "grad_norm": 0.3074210286140442, "learning_rate": 9.192328133831919e-06, "loss": 0.3563, "step": 13038 }, { "epoch": 1.3256405042700283, "grad_norm": 0.3118720054626465, "learning_rate": 9.192134726564722e-06, "loss": 0.3412, "step": 13039 }, { "epoch": 1.3257421716144775, "grad_norm": 0.2993200421333313, "learning_rate": 9.19194129817855e-06, "loss": 0.361, "step": 13040 }, { "epoch": 1.3258438389589264, "grad_norm": 0.28326863050460815, "learning_rate": 9.191747848674376e-06, "loss": 0.349, "step": 13041 }, { "epoch": 1.3259455063033754, "grad_norm": 0.2990299165248871, "learning_rate": 9.191554378053169e-06, "loss": 0.369, "step": 13042 }, { "epoch": 1.3260471736478243, "grad_norm": 0.29313138127326965, "learning_rate": 9.191360886315908e-06, "loss": 0.3581, "step": 13043 }, { "epoch": 1.3261488409922733, "grad_norm": 0.31951022148132324, "learning_rate": 9.191167373463566e-06, "loss": 0.3776, "step": 13044 }, { "epoch": 1.3262505083367222, "grad_norm": 0.29388704895973206, "learning_rate": 9.190973839497119e-06, "loss": 0.3572, "step": 13045 }, { "epoch": 1.3263521756811711, "grad_norm": 0.3067184090614319, "learning_rate": 9.190780284417543e-06, "loss": 0.3749, "step": 13046 }, { "epoch": 1.3264538430256203, "grad_norm": 0.31039345264434814, "learning_rate": 9.19058670822581e-06, "loss": 0.365, "step": 13047 }, { "epoch": 1.3265555103700692, "grad_norm": 0.3002910912036896, "learning_rate": 9.1903931109229e-06, "loss": 0.3772, "step": 13048 }, { "epoch": 1.3266571777145182, "grad_norm": 0.28468215465545654, "learning_rate": 9.190199492509783e-06, "loss": 0.3807, "step": 13049 }, { "epoch": 1.3267588450589671, "grad_norm": 0.3276577889919281, "learning_rate": 9.190005852987437e-06, "loss": 0.3382, "step": 13050 }, { "epoch": 1.326860512403416, "grad_norm": 0.28419575095176697, "learning_rate": 9.189812192356838e-06, "loss": 0.3604, "step": 13051 }, { "epoch": 1.326962179747865, "grad_norm": 0.27141961455345154, "learning_rate": 9.189618510618959e-06, "loss": 0.3516, "step": 13052 }, { "epoch": 1.327063847092314, "grad_norm": 0.31143301725387573, "learning_rate": 9.189424807774779e-06, "loss": 0.344, "step": 13053 }, { "epoch": 1.327165514436763, "grad_norm": 0.29385167360305786, "learning_rate": 9.189231083825272e-06, "loss": 0.3526, "step": 13054 }, { "epoch": 1.3272671817812118, "grad_norm": 0.2996138036251068, "learning_rate": 9.189037338771414e-06, "loss": 0.3867, "step": 13055 }, { "epoch": 1.3273688491256608, "grad_norm": 0.31848734617233276, "learning_rate": 9.188843572614183e-06, "loss": 0.3706, "step": 13056 }, { "epoch": 1.3274705164701097, "grad_norm": 0.29015934467315674, "learning_rate": 9.188649785354552e-06, "loss": 0.3635, "step": 13057 }, { "epoch": 1.3275721838145587, "grad_norm": 0.27183079719543457, "learning_rate": 9.188455976993499e-06, "loss": 0.3754, "step": 13058 }, { "epoch": 1.3276738511590076, "grad_norm": 0.3042788803577423, "learning_rate": 9.188262147532e-06, "loss": 0.3842, "step": 13059 }, { "epoch": 1.3277755185034568, "grad_norm": 0.28916630148887634, "learning_rate": 9.18806829697103e-06, "loss": 0.3543, "step": 13060 }, { "epoch": 1.3278771858479057, "grad_norm": 0.2850886881351471, "learning_rate": 9.187874425311571e-06, "loss": 0.3548, "step": 13061 }, { "epoch": 1.3279788531923546, "grad_norm": 0.2987445592880249, "learning_rate": 9.187680532554593e-06, "loss": 0.3524, "step": 13062 }, { "epoch": 1.3280805205368036, "grad_norm": 0.27617257833480835, "learning_rate": 9.187486618701077e-06, "loss": 0.3481, "step": 13063 }, { "epoch": 1.3281821878812525, "grad_norm": 0.27431783080101013, "learning_rate": 9.187292683751997e-06, "loss": 0.3583, "step": 13064 }, { "epoch": 1.3282838552257015, "grad_norm": 0.2978878319263458, "learning_rate": 9.187098727708333e-06, "loss": 0.3669, "step": 13065 }, { "epoch": 1.3283855225701504, "grad_norm": 0.2858758866786957, "learning_rate": 9.18690475057106e-06, "loss": 0.3842, "step": 13066 }, { "epoch": 1.3284871899145994, "grad_norm": 0.2953967750072479, "learning_rate": 9.186710752341156e-06, "loss": 0.3563, "step": 13067 }, { "epoch": 1.3285888572590485, "grad_norm": 0.27582311630249023, "learning_rate": 9.186516733019598e-06, "loss": 0.3519, "step": 13068 }, { "epoch": 1.3286905246034975, "grad_norm": 0.30268391966819763, "learning_rate": 9.186322692607363e-06, "loss": 0.359, "step": 13069 }, { "epoch": 1.3287921919479464, "grad_norm": 0.2881579101085663, "learning_rate": 9.18612863110543e-06, "loss": 0.4002, "step": 13070 }, { "epoch": 1.3288938592923953, "grad_norm": 0.28119832277297974, "learning_rate": 9.185934548514775e-06, "loss": 0.361, "step": 13071 }, { "epoch": 1.3289955266368443, "grad_norm": 0.30871036648750305, "learning_rate": 9.185740444836378e-06, "loss": 0.3588, "step": 13072 }, { "epoch": 1.3290971939812932, "grad_norm": 0.27533847093582153, "learning_rate": 9.185546320071214e-06, "loss": 0.3477, "step": 13073 }, { "epoch": 1.3291988613257422, "grad_norm": 0.30149000883102417, "learning_rate": 9.185352174220261e-06, "loss": 0.3769, "step": 13074 }, { "epoch": 1.329300528670191, "grad_norm": 0.3017546534538269, "learning_rate": 9.1851580072845e-06, "loss": 0.3939, "step": 13075 }, { "epoch": 1.32940219601464, "grad_norm": 0.2969115674495697, "learning_rate": 9.184963819264908e-06, "loss": 0.3433, "step": 13076 }, { "epoch": 1.329503863359089, "grad_norm": 0.2973248064517975, "learning_rate": 9.184769610162463e-06, "loss": 0.3756, "step": 13077 }, { "epoch": 1.329605530703538, "grad_norm": 0.28110942244529724, "learning_rate": 9.184575379978142e-06, "loss": 0.3482, "step": 13078 }, { "epoch": 1.3297071980479869, "grad_norm": 0.3140701651573181, "learning_rate": 9.184381128712925e-06, "loss": 0.3819, "step": 13079 }, { "epoch": 1.3298088653924358, "grad_norm": 0.29377350211143494, "learning_rate": 9.18418685636779e-06, "loss": 0.3696, "step": 13080 }, { "epoch": 1.329910532736885, "grad_norm": 0.3061273992061615, "learning_rate": 9.183992562943716e-06, "loss": 0.3574, "step": 13081 }, { "epoch": 1.330012200081334, "grad_norm": 0.27773720026016235, "learning_rate": 9.183798248441682e-06, "loss": 0.3724, "step": 13082 }, { "epoch": 1.3301138674257829, "grad_norm": 0.29527971148490906, "learning_rate": 9.183603912862667e-06, "loss": 0.3858, "step": 13083 }, { "epoch": 1.3302155347702318, "grad_norm": 0.3282037675380707, "learning_rate": 9.183409556207648e-06, "loss": 0.3547, "step": 13084 }, { "epoch": 1.3303172021146807, "grad_norm": 0.285174161195755, "learning_rate": 9.183215178477607e-06, "loss": 0.3838, "step": 13085 }, { "epoch": 1.3304188694591297, "grad_norm": 0.28331834077835083, "learning_rate": 9.183020779673521e-06, "loss": 0.3972, "step": 13086 }, { "epoch": 1.3305205368035786, "grad_norm": 0.30578944087028503, "learning_rate": 9.182826359796371e-06, "loss": 0.3854, "step": 13087 }, { "epoch": 1.3306222041480278, "grad_norm": 0.30102360248565674, "learning_rate": 9.182631918847137e-06, "loss": 0.393, "step": 13088 }, { "epoch": 1.3307238714924767, "grad_norm": 0.29282310605049133, "learning_rate": 9.182437456826796e-06, "loss": 0.3004, "step": 13089 }, { "epoch": 1.3308255388369257, "grad_norm": 0.28564655780792236, "learning_rate": 9.18224297373633e-06, "loss": 0.3381, "step": 13090 }, { "epoch": 1.3309272061813746, "grad_norm": 0.3181111216545105, "learning_rate": 9.182048469576717e-06, "loss": 0.3681, "step": 13091 }, { "epoch": 1.3310288735258236, "grad_norm": 0.2794477939605713, "learning_rate": 9.181853944348938e-06, "loss": 0.3678, "step": 13092 }, { "epoch": 1.3311305408702725, "grad_norm": 0.32772886753082275, "learning_rate": 9.181659398053972e-06, "loss": 0.3796, "step": 13093 }, { "epoch": 1.3312322082147214, "grad_norm": 0.31170880794525146, "learning_rate": 9.1814648306928e-06, "loss": 0.3686, "step": 13094 }, { "epoch": 1.3313338755591704, "grad_norm": 0.2846924662590027, "learning_rate": 9.181270242266403e-06, "loss": 0.341, "step": 13095 }, { "epoch": 1.3314355429036193, "grad_norm": 0.31031230092048645, "learning_rate": 9.181075632775757e-06, "loss": 0.3703, "step": 13096 }, { "epoch": 1.3315372102480683, "grad_norm": 0.27317047119140625, "learning_rate": 9.180881002221849e-06, "loss": 0.3452, "step": 13097 }, { "epoch": 1.3316388775925172, "grad_norm": 0.2812516987323761, "learning_rate": 9.180686350605655e-06, "loss": 0.3598, "step": 13098 }, { "epoch": 1.3317405449369661, "grad_norm": 0.28841832280158997, "learning_rate": 9.180491677928156e-06, "loss": 0.3607, "step": 13099 }, { "epoch": 1.331842212281415, "grad_norm": 0.2957531809806824, "learning_rate": 9.180296984190333e-06, "loss": 0.3653, "step": 13100 }, { "epoch": 1.3319438796258642, "grad_norm": 0.2951976954936981, "learning_rate": 9.180102269393168e-06, "loss": 0.3723, "step": 13101 }, { "epoch": 1.3320455469703132, "grad_norm": 0.30443274974823, "learning_rate": 9.17990753353764e-06, "loss": 0.3914, "step": 13102 }, { "epoch": 1.3321472143147621, "grad_norm": 0.3021535873413086, "learning_rate": 9.179712776624732e-06, "loss": 0.3877, "step": 13103 }, { "epoch": 1.332248881659211, "grad_norm": 0.2940797209739685, "learning_rate": 9.179517998655425e-06, "loss": 0.3433, "step": 13104 }, { "epoch": 1.33235054900366, "grad_norm": 0.2917134165763855, "learning_rate": 9.179323199630699e-06, "loss": 0.3689, "step": 13105 }, { "epoch": 1.332452216348109, "grad_norm": 0.3093563914299011, "learning_rate": 9.179128379551535e-06, "loss": 0.3892, "step": 13106 }, { "epoch": 1.332553883692558, "grad_norm": 0.31464236974716187, "learning_rate": 9.178933538418915e-06, "loss": 0.3787, "step": 13107 }, { "epoch": 1.3326555510370068, "grad_norm": 0.2961561381816864, "learning_rate": 9.178738676233823e-06, "loss": 0.3567, "step": 13108 }, { "epoch": 1.332757218381456, "grad_norm": 0.2979443371295929, "learning_rate": 9.178543792997237e-06, "loss": 0.3529, "step": 13109 }, { "epoch": 1.332858885725905, "grad_norm": 0.29335838556289673, "learning_rate": 9.17834888871014e-06, "loss": 0.3523, "step": 13110 }, { "epoch": 1.3329605530703539, "grad_norm": 0.28495416045188904, "learning_rate": 9.178153963373517e-06, "loss": 0.3666, "step": 13111 }, { "epoch": 1.3330622204148028, "grad_norm": 0.2972900867462158, "learning_rate": 9.177959016988346e-06, "loss": 0.3689, "step": 13112 }, { "epoch": 1.3331638877592518, "grad_norm": 0.27665746212005615, "learning_rate": 9.177764049555609e-06, "loss": 0.3398, "step": 13113 }, { "epoch": 1.3332655551037007, "grad_norm": 0.3061956465244293, "learning_rate": 9.17756906107629e-06, "loss": 0.3811, "step": 13114 }, { "epoch": 1.3333672224481496, "grad_norm": 0.3075033128261566, "learning_rate": 9.17737405155137e-06, "loss": 0.4147, "step": 13115 }, { "epoch": 1.3334688897925986, "grad_norm": 0.31453952193260193, "learning_rate": 9.177179020981835e-06, "loss": 0.355, "step": 13116 }, { "epoch": 1.3335705571370475, "grad_norm": 0.29973703622817993, "learning_rate": 9.176983969368661e-06, "loss": 0.3564, "step": 13117 }, { "epoch": 1.3336722244814965, "grad_norm": 0.2687602639198303, "learning_rate": 9.176788896712836e-06, "loss": 0.3353, "step": 13118 }, { "epoch": 1.3337738918259454, "grad_norm": 0.3029402792453766, "learning_rate": 9.176593803015342e-06, "loss": 0.3425, "step": 13119 }, { "epoch": 1.3338755591703944, "grad_norm": 0.2986351549625397, "learning_rate": 9.17639868827716e-06, "loss": 0.3937, "step": 13120 }, { "epoch": 1.3339772265148433, "grad_norm": 0.2966507375240326, "learning_rate": 9.176203552499275e-06, "loss": 0.372, "step": 13121 }, { "epoch": 1.3340788938592925, "grad_norm": 0.3101058900356293, "learning_rate": 9.176008395682668e-06, "loss": 0.3691, "step": 13122 }, { "epoch": 1.3341805612037414, "grad_norm": 0.3103129267692566, "learning_rate": 9.175813217828324e-06, "loss": 0.4135, "step": 13123 }, { "epoch": 1.3342822285481903, "grad_norm": 0.2832692265510559, "learning_rate": 9.175618018937224e-06, "loss": 0.3814, "step": 13124 }, { "epoch": 1.3343838958926393, "grad_norm": 0.31654903292655945, "learning_rate": 9.175422799010352e-06, "loss": 0.3568, "step": 13125 }, { "epoch": 1.3344855632370882, "grad_norm": 0.31129300594329834, "learning_rate": 9.175227558048694e-06, "loss": 0.35, "step": 13126 }, { "epoch": 1.3345872305815372, "grad_norm": 0.30233967304229736, "learning_rate": 9.175032296053231e-06, "loss": 0.3481, "step": 13127 }, { "epoch": 1.334688897925986, "grad_norm": 0.29990726709365845, "learning_rate": 9.17483701302495e-06, "loss": 0.3799, "step": 13128 }, { "epoch": 1.3347905652704353, "grad_norm": 0.2928588390350342, "learning_rate": 9.174641708964828e-06, "loss": 0.3955, "step": 13129 }, { "epoch": 1.3348922326148842, "grad_norm": 0.3317038118839264, "learning_rate": 9.174446383873853e-06, "loss": 0.382, "step": 13130 }, { "epoch": 1.3349938999593332, "grad_norm": 0.2880467474460602, "learning_rate": 9.174251037753013e-06, "loss": 0.3386, "step": 13131 }, { "epoch": 1.335095567303782, "grad_norm": 0.2846843898296356, "learning_rate": 9.174055670603285e-06, "loss": 0.3735, "step": 13132 }, { "epoch": 1.335197234648231, "grad_norm": 0.31515106558799744, "learning_rate": 9.173860282425658e-06, "loss": 0.3347, "step": 13133 }, { "epoch": 1.33529890199268, "grad_norm": 0.31222566962242126, "learning_rate": 9.173664873221115e-06, "loss": 0.3528, "step": 13134 }, { "epoch": 1.335400569337129, "grad_norm": 0.27145665884017944, "learning_rate": 9.173469442990638e-06, "loss": 0.3415, "step": 13135 }, { "epoch": 1.3355022366815779, "grad_norm": 0.28917962312698364, "learning_rate": 9.173273991735216e-06, "loss": 0.367, "step": 13136 }, { "epoch": 1.3356039040260268, "grad_norm": 0.31729352474212646, "learning_rate": 9.173078519455832e-06, "loss": 0.3765, "step": 13137 }, { "epoch": 1.3357055713704757, "grad_norm": 0.32203108072280884, "learning_rate": 9.172883026153469e-06, "loss": 0.3581, "step": 13138 }, { "epoch": 1.3358072387149247, "grad_norm": 0.31470784544944763, "learning_rate": 9.172687511829112e-06, "loss": 0.3642, "step": 13139 }, { "epoch": 1.3359089060593736, "grad_norm": 0.2758898437023163, "learning_rate": 9.172491976483747e-06, "loss": 0.3573, "step": 13140 }, { "epoch": 1.3360105734038226, "grad_norm": 0.29912495613098145, "learning_rate": 9.172296420118361e-06, "loss": 0.3654, "step": 13141 }, { "epoch": 1.3361122407482717, "grad_norm": 0.3419702649116516, "learning_rate": 9.172100842733935e-06, "loss": 0.4014, "step": 13142 }, { "epoch": 1.3362139080927207, "grad_norm": 0.3234859108924866, "learning_rate": 9.171905244331458e-06, "loss": 0.3551, "step": 13143 }, { "epoch": 1.3363155754371696, "grad_norm": 0.31160473823547363, "learning_rate": 9.171709624911911e-06, "loss": 0.3836, "step": 13144 }, { "epoch": 1.3364172427816186, "grad_norm": 0.30576014518737793, "learning_rate": 9.171513984476285e-06, "loss": 0.3692, "step": 13145 }, { "epoch": 1.3365189101260675, "grad_norm": 0.31850022077560425, "learning_rate": 9.171318323025561e-06, "loss": 0.364, "step": 13146 }, { "epoch": 1.3366205774705164, "grad_norm": 0.2974238097667694, "learning_rate": 9.171122640560727e-06, "loss": 0.3808, "step": 13147 }, { "epoch": 1.3367222448149654, "grad_norm": 0.2743024230003357, "learning_rate": 9.17092693708277e-06, "loss": 0.3433, "step": 13148 }, { "epoch": 1.3368239121594143, "grad_norm": 0.30270370841026306, "learning_rate": 9.170731212592674e-06, "loss": 0.3761, "step": 13149 }, { "epoch": 1.3369255795038635, "grad_norm": 0.2615249454975128, "learning_rate": 9.170535467091423e-06, "loss": 0.3314, "step": 13150 }, { "epoch": 1.3370272468483124, "grad_norm": 0.2771059572696686, "learning_rate": 9.170339700580008e-06, "loss": 0.3644, "step": 13151 }, { "epoch": 1.3371289141927614, "grad_norm": 0.2720606327056885, "learning_rate": 9.170143913059411e-06, "loss": 0.3398, "step": 13152 }, { "epoch": 1.3372305815372103, "grad_norm": 0.264626145362854, "learning_rate": 9.16994810453062e-06, "loss": 0.3604, "step": 13153 }, { "epoch": 1.3373322488816592, "grad_norm": 0.28897255659103394, "learning_rate": 9.169752274994622e-06, "loss": 0.335, "step": 13154 }, { "epoch": 1.3374339162261082, "grad_norm": 0.323552668094635, "learning_rate": 9.169556424452403e-06, "loss": 0.3628, "step": 13155 }, { "epoch": 1.3375355835705571, "grad_norm": 0.28599464893341064, "learning_rate": 9.169360552904949e-06, "loss": 0.3962, "step": 13156 }, { "epoch": 1.337637250915006, "grad_norm": 0.28226983547210693, "learning_rate": 9.169164660353249e-06, "loss": 0.3428, "step": 13157 }, { "epoch": 1.337738918259455, "grad_norm": 0.2720980942249298, "learning_rate": 9.168968746798286e-06, "loss": 0.3519, "step": 13158 }, { "epoch": 1.337840585603904, "grad_norm": 0.27008914947509766, "learning_rate": 9.16877281224105e-06, "loss": 0.3671, "step": 13159 }, { "epoch": 1.337942252948353, "grad_norm": 0.2657322287559509, "learning_rate": 9.168576856682526e-06, "loss": 0.3517, "step": 13160 }, { "epoch": 1.3380439202928018, "grad_norm": 0.28419551253318787, "learning_rate": 9.168380880123704e-06, "loss": 0.3619, "step": 13161 }, { "epoch": 1.3381455876372508, "grad_norm": 0.2939392626285553, "learning_rate": 9.168184882565569e-06, "loss": 0.3849, "step": 13162 }, { "epoch": 1.3382472549817, "grad_norm": 0.2871123254299164, "learning_rate": 9.16798886400911e-06, "loss": 0.3665, "step": 13163 }, { "epoch": 1.3383489223261489, "grad_norm": 0.2945374548435211, "learning_rate": 9.167792824455313e-06, "loss": 0.348, "step": 13164 }, { "epoch": 1.3384505896705978, "grad_norm": 0.3146193325519562, "learning_rate": 9.167596763905165e-06, "loss": 0.3696, "step": 13165 }, { "epoch": 1.3385522570150468, "grad_norm": 0.29686301946640015, "learning_rate": 9.167400682359657e-06, "loss": 0.3894, "step": 13166 }, { "epoch": 1.3386539243594957, "grad_norm": 0.28968900442123413, "learning_rate": 9.167204579819774e-06, "loss": 0.3879, "step": 13167 }, { "epoch": 1.3387555917039446, "grad_norm": 0.30963554978370667, "learning_rate": 9.167008456286504e-06, "loss": 0.3582, "step": 13168 }, { "epoch": 1.3388572590483936, "grad_norm": 0.26601725816726685, "learning_rate": 9.166812311760835e-06, "loss": 0.3439, "step": 13169 }, { "epoch": 1.3389589263928428, "grad_norm": 0.2972979247570038, "learning_rate": 9.166616146243757e-06, "loss": 0.3612, "step": 13170 }, { "epoch": 1.3390605937372917, "grad_norm": 0.29982301592826843, "learning_rate": 9.166419959736257e-06, "loss": 0.38, "step": 13171 }, { "epoch": 1.3391622610817406, "grad_norm": 0.3099140226840973, "learning_rate": 9.166223752239323e-06, "loss": 0.3775, "step": 13172 }, { "epoch": 1.3392639284261896, "grad_norm": 0.28558075428009033, "learning_rate": 9.166027523753941e-06, "loss": 0.3334, "step": 13173 }, { "epoch": 1.3393655957706385, "grad_norm": 0.2898348271846771, "learning_rate": 9.165831274281107e-06, "loss": 0.3651, "step": 13174 }, { "epoch": 1.3394672631150875, "grad_norm": 0.2864309549331665, "learning_rate": 9.165635003821802e-06, "loss": 0.3583, "step": 13175 }, { "epoch": 1.3395689304595364, "grad_norm": 0.2791792154312134, "learning_rate": 9.165438712377019e-06, "loss": 0.3594, "step": 13176 }, { "epoch": 1.3396705978039853, "grad_norm": 0.2674861252307892, "learning_rate": 9.165242399947746e-06, "loss": 0.3511, "step": 13177 }, { "epoch": 1.3397722651484343, "grad_norm": 0.3005771040916443, "learning_rate": 9.16504606653497e-06, "loss": 0.3507, "step": 13178 }, { "epoch": 1.3398739324928832, "grad_norm": 0.2842337191104889, "learning_rate": 9.164849712139684e-06, "loss": 0.3661, "step": 13179 }, { "epoch": 1.3399755998373322, "grad_norm": 0.298264741897583, "learning_rate": 9.164653336762875e-06, "loss": 0.3583, "step": 13180 }, { "epoch": 1.340077267181781, "grad_norm": 0.27576375007629395, "learning_rate": 9.164456940405529e-06, "loss": 0.3447, "step": 13181 }, { "epoch": 1.34017893452623, "grad_norm": 0.299412339925766, "learning_rate": 9.16426052306864e-06, "loss": 0.3669, "step": 13182 }, { "epoch": 1.3402806018706792, "grad_norm": 0.29943716526031494, "learning_rate": 9.164064084753198e-06, "loss": 0.3906, "step": 13183 }, { "epoch": 1.3403822692151282, "grad_norm": 0.323640376329422, "learning_rate": 9.163867625460187e-06, "loss": 0.3567, "step": 13184 }, { "epoch": 1.340483936559577, "grad_norm": 0.3036418855190277, "learning_rate": 9.163671145190604e-06, "loss": 0.3794, "step": 13185 }, { "epoch": 1.340585603904026, "grad_norm": 0.2906463146209717, "learning_rate": 9.163474643945433e-06, "loss": 0.3468, "step": 13186 }, { "epoch": 1.340687271248475, "grad_norm": 0.3058040142059326, "learning_rate": 9.163278121725667e-06, "loss": 0.3719, "step": 13187 }, { "epoch": 1.340788938592924, "grad_norm": 0.29559919238090515, "learning_rate": 9.163081578532295e-06, "loss": 0.3665, "step": 13188 }, { "epoch": 1.3408906059373729, "grad_norm": 0.25385168194770813, "learning_rate": 9.162885014366308e-06, "loss": 0.3596, "step": 13189 }, { "epoch": 1.3409922732818218, "grad_norm": 0.31215569376945496, "learning_rate": 9.162688429228694e-06, "loss": 0.3612, "step": 13190 }, { "epoch": 1.341093940626271, "grad_norm": 0.3085678815841675, "learning_rate": 9.162491823120446e-06, "loss": 0.3606, "step": 13191 }, { "epoch": 1.34119560797072, "grad_norm": 0.2958449721336365, "learning_rate": 9.162295196042552e-06, "loss": 0.3691, "step": 13192 }, { "epoch": 1.3412972753151688, "grad_norm": 0.29443037509918213, "learning_rate": 9.162098547996006e-06, "loss": 0.3447, "step": 13193 }, { "epoch": 1.3413989426596178, "grad_norm": 0.27520740032196045, "learning_rate": 9.161901878981796e-06, "loss": 0.3512, "step": 13194 }, { "epoch": 1.3415006100040667, "grad_norm": 0.2859935462474823, "learning_rate": 9.161705189000912e-06, "loss": 0.3558, "step": 13195 }, { "epoch": 1.3416022773485157, "grad_norm": 0.3089327812194824, "learning_rate": 9.161508478054347e-06, "loss": 0.3796, "step": 13196 }, { "epoch": 1.3417039446929646, "grad_norm": 0.2868349850177765, "learning_rate": 9.161311746143092e-06, "loss": 0.3423, "step": 13197 }, { "epoch": 1.3418056120374136, "grad_norm": 0.3135867416858673, "learning_rate": 9.161114993268137e-06, "loss": 0.3724, "step": 13198 }, { "epoch": 1.3419072793818625, "grad_norm": 0.27216392755508423, "learning_rate": 9.160918219430473e-06, "loss": 0.3544, "step": 13199 }, { "epoch": 1.3420089467263114, "grad_norm": 0.27960002422332764, "learning_rate": 9.160721424631092e-06, "loss": 0.3605, "step": 13200 }, { "epoch": 1.3421106140707604, "grad_norm": 0.3086352050304413, "learning_rate": 9.160524608870985e-06, "loss": 0.3512, "step": 13201 }, { "epoch": 1.3422122814152093, "grad_norm": 0.2810364365577698, "learning_rate": 9.160327772151143e-06, "loss": 0.3564, "step": 13202 }, { "epoch": 1.3423139487596583, "grad_norm": 0.2647712528705597, "learning_rate": 9.16013091447256e-06, "loss": 0.3509, "step": 13203 }, { "epoch": 1.3424156161041074, "grad_norm": 0.30138957500457764, "learning_rate": 9.159934035836224e-06, "loss": 0.3686, "step": 13204 }, { "epoch": 1.3425172834485564, "grad_norm": 0.28222060203552246, "learning_rate": 9.159737136243131e-06, "loss": 0.3467, "step": 13205 }, { "epoch": 1.3426189507930053, "grad_norm": 0.2940698266029358, "learning_rate": 9.159540215694269e-06, "loss": 0.357, "step": 13206 }, { "epoch": 1.3427206181374542, "grad_norm": 0.2992263436317444, "learning_rate": 9.159343274190633e-06, "loss": 0.3515, "step": 13207 }, { "epoch": 1.3428222854819032, "grad_norm": 0.2810186445713043, "learning_rate": 9.159146311733215e-06, "loss": 0.3531, "step": 13208 }, { "epoch": 1.3429239528263521, "grad_norm": 0.27417680621147156, "learning_rate": 9.158949328323005e-06, "loss": 0.3881, "step": 13209 }, { "epoch": 1.343025620170801, "grad_norm": 0.2974681258201599, "learning_rate": 9.158752323960996e-06, "loss": 0.3565, "step": 13210 }, { "epoch": 1.3431272875152502, "grad_norm": 0.31215521693229675, "learning_rate": 9.158555298648182e-06, "loss": 0.3626, "step": 13211 }, { "epoch": 1.3432289548596992, "grad_norm": 0.30834680795669556, "learning_rate": 9.158358252385554e-06, "loss": 0.3482, "step": 13212 }, { "epoch": 1.3433306222041481, "grad_norm": 0.28521448373794556, "learning_rate": 9.158161185174106e-06, "loss": 0.3735, "step": 13213 }, { "epoch": 1.343432289548597, "grad_norm": 0.29784896969795227, "learning_rate": 9.15796409701483e-06, "loss": 0.3298, "step": 13214 }, { "epoch": 1.343533956893046, "grad_norm": 0.29054492712020874, "learning_rate": 9.157766987908719e-06, "loss": 0.3489, "step": 13215 }, { "epoch": 1.343635624237495, "grad_norm": 0.316476434469223, "learning_rate": 9.157569857856765e-06, "loss": 0.3814, "step": 13216 }, { "epoch": 1.3437372915819439, "grad_norm": 0.2896936237812042, "learning_rate": 9.157372706859963e-06, "loss": 0.3751, "step": 13217 }, { "epoch": 1.3438389589263928, "grad_norm": 0.27813318371772766, "learning_rate": 9.157175534919305e-06, "loss": 0.3529, "step": 13218 }, { "epoch": 1.3439406262708418, "grad_norm": 0.281779944896698, "learning_rate": 9.156978342035784e-06, "loss": 0.3661, "step": 13219 }, { "epoch": 1.3440422936152907, "grad_norm": 0.2960670590400696, "learning_rate": 9.156781128210395e-06, "loss": 0.368, "step": 13220 }, { "epoch": 1.3441439609597396, "grad_norm": 0.2831512987613678, "learning_rate": 9.156583893444129e-06, "loss": 0.3506, "step": 13221 }, { "epoch": 1.3442456283041886, "grad_norm": 0.30506178736686707, "learning_rate": 9.156386637737982e-06, "loss": 0.318, "step": 13222 }, { "epoch": 1.3443472956486375, "grad_norm": 0.30435749888420105, "learning_rate": 9.156189361092946e-06, "loss": 0.3842, "step": 13223 }, { "epoch": 1.3444489629930867, "grad_norm": 0.29514726996421814, "learning_rate": 9.155992063510017e-06, "loss": 0.3659, "step": 13224 }, { "epoch": 1.3445506303375356, "grad_norm": 0.26693108677864075, "learning_rate": 9.155794744990187e-06, "loss": 0.3782, "step": 13225 }, { "epoch": 1.3446522976819846, "grad_norm": 0.31568774580955505, "learning_rate": 9.155597405534451e-06, "loss": 0.3816, "step": 13226 }, { "epoch": 1.3447539650264335, "grad_norm": 0.33912384510040283, "learning_rate": 9.155400045143802e-06, "loss": 0.4076, "step": 13227 }, { "epoch": 1.3448556323708825, "grad_norm": 0.29042360186576843, "learning_rate": 9.155202663819237e-06, "loss": 0.3647, "step": 13228 }, { "epoch": 1.3449572997153314, "grad_norm": 0.2746460735797882, "learning_rate": 9.155005261561746e-06, "loss": 0.3591, "step": 13229 }, { "epoch": 1.3450589670597803, "grad_norm": 0.30064091086387634, "learning_rate": 9.154807838372328e-06, "loss": 0.3588, "step": 13230 }, { "epoch": 1.3451606344042293, "grad_norm": 0.3151412308216095, "learning_rate": 9.154610394251974e-06, "loss": 0.3965, "step": 13231 }, { "epoch": 1.3452623017486784, "grad_norm": 0.3077469766139984, "learning_rate": 9.15441292920168e-06, "loss": 0.355, "step": 13232 }, { "epoch": 1.3453639690931274, "grad_norm": 0.3258016109466553, "learning_rate": 9.154215443222442e-06, "loss": 0.4034, "step": 13233 }, { "epoch": 1.3454656364375763, "grad_norm": 0.3106694519519806, "learning_rate": 9.154017936315252e-06, "loss": 0.3594, "step": 13234 }, { "epoch": 1.3455673037820253, "grad_norm": 0.30311641097068787, "learning_rate": 9.153820408481109e-06, "loss": 0.3477, "step": 13235 }, { "epoch": 1.3456689711264742, "grad_norm": 0.30500328540802, "learning_rate": 9.153622859721005e-06, "loss": 0.3394, "step": 13236 }, { "epoch": 1.3457706384709232, "grad_norm": 0.2817617952823639, "learning_rate": 9.153425290035936e-06, "loss": 0.3934, "step": 13237 }, { "epoch": 1.345872305815372, "grad_norm": 0.30668848752975464, "learning_rate": 9.153227699426897e-06, "loss": 0.3659, "step": 13238 }, { "epoch": 1.345973973159821, "grad_norm": 0.3057498335838318, "learning_rate": 9.153030087894883e-06, "loss": 0.373, "step": 13239 }, { "epoch": 1.34607564050427, "grad_norm": 0.2871512472629547, "learning_rate": 9.152832455440892e-06, "loss": 0.3691, "step": 13240 }, { "epoch": 1.346177307848719, "grad_norm": 0.31512483954429626, "learning_rate": 9.152634802065917e-06, "loss": 0.3656, "step": 13241 }, { "epoch": 1.3462789751931679, "grad_norm": 0.2903631329536438, "learning_rate": 9.152437127770956e-06, "loss": 0.3561, "step": 13242 }, { "epoch": 1.3463806425376168, "grad_norm": 0.2886486053466797, "learning_rate": 9.152239432557002e-06, "loss": 0.3524, "step": 13243 }, { "epoch": 1.3464823098820657, "grad_norm": 0.3282071053981781, "learning_rate": 9.152041716425053e-06, "loss": 0.3857, "step": 13244 }, { "epoch": 1.346583977226515, "grad_norm": 0.322868674993515, "learning_rate": 9.151843979376104e-06, "loss": 0.4098, "step": 13245 }, { "epoch": 1.3466856445709638, "grad_norm": 0.2937653064727783, "learning_rate": 9.151646221411153e-06, "loss": 0.3372, "step": 13246 }, { "epoch": 1.3467873119154128, "grad_norm": 0.29733502864837646, "learning_rate": 9.151448442531194e-06, "loss": 0.4101, "step": 13247 }, { "epoch": 1.3468889792598617, "grad_norm": 0.2959705591201782, "learning_rate": 9.151250642737223e-06, "loss": 0.3811, "step": 13248 }, { "epoch": 1.3469906466043107, "grad_norm": 0.301823228597641, "learning_rate": 9.15105282203024e-06, "loss": 0.3546, "step": 13249 }, { "epoch": 1.3470923139487596, "grad_norm": 0.29434335231781006, "learning_rate": 9.15085498041124e-06, "loss": 0.3614, "step": 13250 }, { "epoch": 1.3471939812932086, "grad_norm": 0.29629427194595337, "learning_rate": 9.150657117881217e-06, "loss": 0.348, "step": 13251 }, { "epoch": 1.3472956486376577, "grad_norm": 0.3112359941005707, "learning_rate": 9.150459234441169e-06, "loss": 0.3379, "step": 13252 }, { "epoch": 1.3473973159821067, "grad_norm": 0.2878469228744507, "learning_rate": 9.150261330092097e-06, "loss": 0.33, "step": 13253 }, { "epoch": 1.3474989833265556, "grad_norm": 0.3009991943836212, "learning_rate": 9.150063404834992e-06, "loss": 0.3767, "step": 13254 }, { "epoch": 1.3476006506710045, "grad_norm": 0.31061485409736633, "learning_rate": 9.149865458670853e-06, "loss": 0.3859, "step": 13255 }, { "epoch": 1.3477023180154535, "grad_norm": 0.28893694281578064, "learning_rate": 9.14966749160068e-06, "loss": 0.3704, "step": 13256 }, { "epoch": 1.3478039853599024, "grad_norm": 0.3163929283618927, "learning_rate": 9.149469503625468e-06, "loss": 0.3735, "step": 13257 }, { "epoch": 1.3479056527043514, "grad_norm": 0.2781926989555359, "learning_rate": 9.149271494746215e-06, "loss": 0.3985, "step": 13258 }, { "epoch": 1.3480073200488003, "grad_norm": 0.29324981570243835, "learning_rate": 9.149073464963916e-06, "loss": 0.3658, "step": 13259 }, { "epoch": 1.3481089873932492, "grad_norm": 0.29727548360824585, "learning_rate": 9.148875414279574e-06, "loss": 0.3417, "step": 13260 }, { "epoch": 1.3482106547376982, "grad_norm": 0.29958105087280273, "learning_rate": 9.14867734269418e-06, "loss": 0.3676, "step": 13261 }, { "epoch": 1.3483123220821471, "grad_norm": 0.30146583914756775, "learning_rate": 9.14847925020874e-06, "loss": 0.3563, "step": 13262 }, { "epoch": 1.348413989426596, "grad_norm": 0.29527392983436584, "learning_rate": 9.148281136824244e-06, "loss": 0.3984, "step": 13263 }, { "epoch": 1.348515656771045, "grad_norm": 0.2814216613769531, "learning_rate": 9.148083002541695e-06, "loss": 0.3695, "step": 13264 }, { "epoch": 1.3486173241154942, "grad_norm": 0.306588351726532, "learning_rate": 9.147884847362088e-06, "loss": 0.3542, "step": 13265 }, { "epoch": 1.3487189914599431, "grad_norm": 0.2996862232685089, "learning_rate": 9.147686671286423e-06, "loss": 0.348, "step": 13266 }, { "epoch": 1.348820658804392, "grad_norm": 0.2932059168815613, "learning_rate": 9.147488474315698e-06, "loss": 0.361, "step": 13267 }, { "epoch": 1.348922326148841, "grad_norm": 0.2956552505493164, "learning_rate": 9.147290256450913e-06, "loss": 0.3904, "step": 13268 }, { "epoch": 1.34902399349329, "grad_norm": 0.29269543290138245, "learning_rate": 9.147092017693064e-06, "loss": 0.3654, "step": 13269 }, { "epoch": 1.3491256608377389, "grad_norm": 0.27627211809158325, "learning_rate": 9.146893758043152e-06, "loss": 0.3687, "step": 13270 }, { "epoch": 1.3492273281821878, "grad_norm": 0.2925960123538971, "learning_rate": 9.146695477502176e-06, "loss": 0.3516, "step": 13271 }, { "epoch": 1.3493289955266368, "grad_norm": 0.3085261583328247, "learning_rate": 9.146497176071132e-06, "loss": 0.3695, "step": 13272 }, { "epoch": 1.349430662871086, "grad_norm": 0.2834555506706238, "learning_rate": 9.14629885375102e-06, "loss": 0.3975, "step": 13273 }, { "epoch": 1.3495323302155349, "grad_norm": 0.2857591509819031, "learning_rate": 9.14610051054284e-06, "loss": 0.3683, "step": 13274 }, { "epoch": 1.3496339975599838, "grad_norm": 0.30825215578079224, "learning_rate": 9.145902146447592e-06, "loss": 0.3393, "step": 13275 }, { "epoch": 1.3497356649044328, "grad_norm": 0.28080272674560547, "learning_rate": 9.145703761466273e-06, "loss": 0.3471, "step": 13276 }, { "epoch": 1.3498373322488817, "grad_norm": 0.3025134205818176, "learning_rate": 9.145505355599884e-06, "loss": 0.3594, "step": 13277 }, { "epoch": 1.3499389995933306, "grad_norm": 0.2830424904823303, "learning_rate": 9.145306928849423e-06, "loss": 0.3525, "step": 13278 }, { "epoch": 1.3500406669377796, "grad_norm": 0.31234511733055115, "learning_rate": 9.145108481215892e-06, "loss": 0.364, "step": 13279 }, { "epoch": 1.3501423342822285, "grad_norm": 0.31745243072509766, "learning_rate": 9.14491001270029e-06, "loss": 0.3798, "step": 13280 }, { "epoch": 1.3502440016266775, "grad_norm": 0.3111051023006439, "learning_rate": 9.144711523303615e-06, "loss": 0.3499, "step": 13281 }, { "epoch": 1.3503456689711264, "grad_norm": 0.29711002111434937, "learning_rate": 9.14451301302687e-06, "loss": 0.367, "step": 13282 }, { "epoch": 1.3504473363155753, "grad_norm": 0.2742379605770111, "learning_rate": 9.144314481871052e-06, "loss": 0.356, "step": 13283 }, { "epoch": 1.3505490036600243, "grad_norm": 0.3128224015235901, "learning_rate": 9.144115929837162e-06, "loss": 0.3885, "step": 13284 }, { "epoch": 1.3506506710044732, "grad_norm": 0.29004237055778503, "learning_rate": 9.143917356926201e-06, "loss": 0.3597, "step": 13285 }, { "epoch": 1.3507523383489224, "grad_norm": 0.2942812740802765, "learning_rate": 9.143718763139172e-06, "loss": 0.3517, "step": 13286 }, { "epoch": 1.3508540056933713, "grad_norm": 0.2950975000858307, "learning_rate": 9.143520148477069e-06, "loss": 0.3628, "step": 13287 }, { "epoch": 1.3509556730378203, "grad_norm": 0.29356202483177185, "learning_rate": 9.143321512940896e-06, "loss": 0.3604, "step": 13288 }, { "epoch": 1.3510573403822692, "grad_norm": 0.2972705364227295, "learning_rate": 9.143122856531657e-06, "loss": 0.3579, "step": 13289 }, { "epoch": 1.3511590077267182, "grad_norm": 0.283669114112854, "learning_rate": 9.142924179250348e-06, "loss": 0.4229, "step": 13290 }, { "epoch": 1.351260675071167, "grad_norm": 0.2762927711009979, "learning_rate": 9.14272548109797e-06, "loss": 0.3369, "step": 13291 }, { "epoch": 1.351362342415616, "grad_norm": 0.30174127221107483, "learning_rate": 9.142526762075526e-06, "loss": 0.3734, "step": 13292 }, { "epoch": 1.3514640097600652, "grad_norm": 0.2870791554450989, "learning_rate": 9.142328022184019e-06, "loss": 0.3585, "step": 13293 }, { "epoch": 1.3515656771045141, "grad_norm": 0.2931118309497833, "learning_rate": 9.142129261424445e-06, "loss": 0.3759, "step": 13294 }, { "epoch": 1.351667344448963, "grad_norm": 0.30981096625328064, "learning_rate": 9.14193047979781e-06, "loss": 0.3742, "step": 13295 }, { "epoch": 1.351769011793412, "grad_norm": 0.3214072585105896, "learning_rate": 9.141731677305113e-06, "loss": 0.3656, "step": 13296 }, { "epoch": 1.351870679137861, "grad_norm": 0.2913222014904022, "learning_rate": 9.141532853947355e-06, "loss": 0.3628, "step": 13297 }, { "epoch": 1.35197234648231, "grad_norm": 0.2825703024864197, "learning_rate": 9.14133400972554e-06, "loss": 0.352, "step": 13298 }, { "epoch": 1.3520740138267588, "grad_norm": 0.30188944935798645, "learning_rate": 9.141135144640668e-06, "loss": 0.3779, "step": 13299 }, { "epoch": 1.3521756811712078, "grad_norm": 0.29691174626350403, "learning_rate": 9.14093625869374e-06, "loss": 0.3545, "step": 13300 }, { "epoch": 1.3522773485156567, "grad_norm": 0.28421714901924133, "learning_rate": 9.14073735188576e-06, "loss": 0.3595, "step": 13301 }, { "epoch": 1.3523790158601057, "grad_norm": 0.2563052773475647, "learning_rate": 9.14053842421773e-06, "loss": 0.359, "step": 13302 }, { "epoch": 1.3524806832045546, "grad_norm": 0.27249762415885925, "learning_rate": 9.14033947569065e-06, "loss": 0.3276, "step": 13303 }, { "epoch": 1.3525823505490036, "grad_norm": 0.3003336489200592, "learning_rate": 9.140140506305525e-06, "loss": 0.3243, "step": 13304 }, { "epoch": 1.3526840178934525, "grad_norm": 0.29666340351104736, "learning_rate": 9.139941516063355e-06, "loss": 0.3904, "step": 13305 }, { "epoch": 1.3527856852379017, "grad_norm": 0.27187955379486084, "learning_rate": 9.139742504965144e-06, "loss": 0.367, "step": 13306 }, { "epoch": 1.3528873525823506, "grad_norm": 0.28756484389305115, "learning_rate": 9.139543473011894e-06, "loss": 0.3633, "step": 13307 }, { "epoch": 1.3529890199267995, "grad_norm": 0.28708416223526, "learning_rate": 9.139344420204608e-06, "loss": 0.3859, "step": 13308 }, { "epoch": 1.3530906872712485, "grad_norm": 0.3063371181488037, "learning_rate": 9.139145346544288e-06, "loss": 0.3533, "step": 13309 }, { "epoch": 1.3531923546156974, "grad_norm": 0.31659871339797974, "learning_rate": 9.138946252031937e-06, "loss": 0.3841, "step": 13310 }, { "epoch": 1.3532940219601464, "grad_norm": 0.2933480143547058, "learning_rate": 9.138747136668559e-06, "loss": 0.395, "step": 13311 }, { "epoch": 1.3533956893045953, "grad_norm": 0.3158952295780182, "learning_rate": 9.138548000455157e-06, "loss": 0.3632, "step": 13312 }, { "epoch": 1.3534973566490442, "grad_norm": 0.29936736822128296, "learning_rate": 9.138348843392733e-06, "loss": 0.3821, "step": 13313 }, { "epoch": 1.3535990239934934, "grad_norm": 0.3009646236896515, "learning_rate": 9.13814966548229e-06, "loss": 0.3935, "step": 13314 }, { "epoch": 1.3537006913379424, "grad_norm": 0.3065446615219116, "learning_rate": 9.137950466724832e-06, "loss": 0.3826, "step": 13315 }, { "epoch": 1.3538023586823913, "grad_norm": 0.2755054235458374, "learning_rate": 9.137751247121364e-06, "loss": 0.3549, "step": 13316 }, { "epoch": 1.3539040260268402, "grad_norm": 0.3040230870246887, "learning_rate": 9.137552006672889e-06, "loss": 0.3377, "step": 13317 }, { "epoch": 1.3540056933712892, "grad_norm": 0.29658523201942444, "learning_rate": 9.13735274538041e-06, "loss": 0.3813, "step": 13318 }, { "epoch": 1.3541073607157381, "grad_norm": 0.3129332959651947, "learning_rate": 9.13715346324493e-06, "loss": 0.3561, "step": 13319 }, { "epoch": 1.354209028060187, "grad_norm": 0.297145813703537, "learning_rate": 9.136954160267455e-06, "loss": 0.3473, "step": 13320 }, { "epoch": 1.354310695404636, "grad_norm": 0.2917502224445343, "learning_rate": 9.136754836448987e-06, "loss": 0.37, "step": 13321 }, { "epoch": 1.354412362749085, "grad_norm": 0.2933247685432434, "learning_rate": 9.136555491790533e-06, "loss": 0.3819, "step": 13322 }, { "epoch": 1.3545140300935339, "grad_norm": 0.28890424966812134, "learning_rate": 9.136356126293094e-06, "loss": 0.343, "step": 13323 }, { "epoch": 1.3546156974379828, "grad_norm": 0.2889709770679474, "learning_rate": 9.136156739957676e-06, "loss": 0.3964, "step": 13324 }, { "epoch": 1.3547173647824318, "grad_norm": 0.2945464551448822, "learning_rate": 9.135957332785283e-06, "loss": 0.3928, "step": 13325 }, { "epoch": 1.354819032126881, "grad_norm": 0.27391013503074646, "learning_rate": 9.13575790477692e-06, "loss": 0.3754, "step": 13326 }, { "epoch": 1.3549206994713299, "grad_norm": 0.31381848454475403, "learning_rate": 9.135558455933593e-06, "loss": 0.3978, "step": 13327 }, { "epoch": 1.3550223668157788, "grad_norm": 0.2906312644481659, "learning_rate": 9.135358986256304e-06, "loss": 0.3851, "step": 13328 }, { "epoch": 1.3551240341602278, "grad_norm": 0.29825472831726074, "learning_rate": 9.13515949574606e-06, "loss": 0.3542, "step": 13329 }, { "epoch": 1.3552257015046767, "grad_norm": 0.30775830149650574, "learning_rate": 9.134959984403862e-06, "loss": 0.3867, "step": 13330 }, { "epoch": 1.3553273688491256, "grad_norm": 0.28700876235961914, "learning_rate": 9.134760452230723e-06, "loss": 0.3974, "step": 13331 }, { "epoch": 1.3554290361935746, "grad_norm": 0.2950226366519928, "learning_rate": 9.134560899227639e-06, "loss": 0.3719, "step": 13332 }, { "epoch": 1.3555307035380235, "grad_norm": 0.3134092390537262, "learning_rate": 9.134361325395622e-06, "loss": 0.3948, "step": 13333 }, { "epoch": 1.3556323708824727, "grad_norm": 0.3038097620010376, "learning_rate": 9.134161730735674e-06, "loss": 0.3624, "step": 13334 }, { "epoch": 1.3557340382269216, "grad_norm": 0.31835654377937317, "learning_rate": 9.133962115248802e-06, "loss": 0.3598, "step": 13335 }, { "epoch": 1.3558357055713706, "grad_norm": 0.32407423853874207, "learning_rate": 9.133762478936013e-06, "loss": 0.3883, "step": 13336 }, { "epoch": 1.3559373729158195, "grad_norm": 0.28768596053123474, "learning_rate": 9.133562821798309e-06, "loss": 0.3915, "step": 13337 }, { "epoch": 1.3560390402602684, "grad_norm": 0.28598955273628235, "learning_rate": 9.133363143836698e-06, "loss": 0.3614, "step": 13338 }, { "epoch": 1.3561407076047174, "grad_norm": 0.28691422939300537, "learning_rate": 9.133163445052186e-06, "loss": 0.3746, "step": 13339 }, { "epoch": 1.3562423749491663, "grad_norm": 0.30458810925483704, "learning_rate": 9.13296372544578e-06, "loss": 0.3337, "step": 13340 }, { "epoch": 1.3563440422936153, "grad_norm": 0.27162966132164, "learning_rate": 9.132763985018482e-06, "loss": 0.3408, "step": 13341 }, { "epoch": 1.3564457096380642, "grad_norm": 0.2932698130607605, "learning_rate": 9.132564223771303e-06, "loss": 0.3524, "step": 13342 }, { "epoch": 1.3565473769825132, "grad_norm": 0.28280991315841675, "learning_rate": 9.132364441705248e-06, "loss": 0.3582, "step": 13343 }, { "epoch": 1.356649044326962, "grad_norm": 0.30827200412750244, "learning_rate": 9.132164638821322e-06, "loss": 0.3686, "step": 13344 }, { "epoch": 1.356750711671411, "grad_norm": 0.27716392278671265, "learning_rate": 9.131964815120534e-06, "loss": 0.3637, "step": 13345 }, { "epoch": 1.35685237901586, "grad_norm": 0.29217755794525146, "learning_rate": 9.131764970603886e-06, "loss": 0.3682, "step": 13346 }, { "epoch": 1.3569540463603091, "grad_norm": 0.28662750124931335, "learning_rate": 9.13156510527239e-06, "loss": 0.3987, "step": 13347 }, { "epoch": 1.357055713704758, "grad_norm": 0.28648602962493896, "learning_rate": 9.13136521912705e-06, "loss": 0.3603, "step": 13348 }, { "epoch": 1.357157381049207, "grad_norm": 0.2798246145248413, "learning_rate": 9.131165312168876e-06, "loss": 0.3683, "step": 13349 }, { "epoch": 1.357259048393656, "grad_norm": 0.3250039517879486, "learning_rate": 9.130965384398871e-06, "loss": 0.3899, "step": 13350 }, { "epoch": 1.357360715738105, "grad_norm": 0.3034401834011078, "learning_rate": 9.130765435818045e-06, "loss": 0.3753, "step": 13351 }, { "epoch": 1.3574623830825538, "grad_norm": 0.2844042181968689, "learning_rate": 9.130565466427403e-06, "loss": 0.3424, "step": 13352 }, { "epoch": 1.3575640504270028, "grad_norm": 0.3044893741607666, "learning_rate": 9.130365476227955e-06, "loss": 0.3668, "step": 13353 }, { "epoch": 1.3576657177714517, "grad_norm": 0.3135022819042206, "learning_rate": 9.130165465220707e-06, "loss": 0.3366, "step": 13354 }, { "epoch": 1.357767385115901, "grad_norm": 0.26768070459365845, "learning_rate": 9.129965433406667e-06, "loss": 0.3606, "step": 13355 }, { "epoch": 1.3578690524603498, "grad_norm": 0.32214248180389404, "learning_rate": 9.12976538078684e-06, "loss": 0.3656, "step": 13356 }, { "epoch": 1.3579707198047988, "grad_norm": 0.3323020935058594, "learning_rate": 9.12956530736224e-06, "loss": 0.3732, "step": 13357 }, { "epoch": 1.3580723871492477, "grad_norm": 0.29145491123199463, "learning_rate": 9.12936521313387e-06, "loss": 0.3787, "step": 13358 }, { "epoch": 1.3581740544936967, "grad_norm": 0.31359416246414185, "learning_rate": 9.12916509810274e-06, "loss": 0.3328, "step": 13359 }, { "epoch": 1.3582757218381456, "grad_norm": 0.3116520047187805, "learning_rate": 9.128964962269857e-06, "loss": 0.3556, "step": 13360 }, { "epoch": 1.3583773891825945, "grad_norm": 0.27089521288871765, "learning_rate": 9.128764805636227e-06, "loss": 0.3838, "step": 13361 }, { "epoch": 1.3584790565270435, "grad_norm": 0.3072035610675812, "learning_rate": 9.128564628202865e-06, "loss": 0.3441, "step": 13362 }, { "epoch": 1.3585807238714924, "grad_norm": 0.31046515703201294, "learning_rate": 9.128364429970774e-06, "loss": 0.3512, "step": 13363 }, { "epoch": 1.3586823912159414, "grad_norm": 0.2884821891784668, "learning_rate": 9.128164210940963e-06, "loss": 0.3744, "step": 13364 }, { "epoch": 1.3587840585603903, "grad_norm": 0.3116985857486725, "learning_rate": 9.127963971114445e-06, "loss": 0.3807, "step": 13365 }, { "epoch": 1.3588857259048392, "grad_norm": 0.29542645812034607, "learning_rate": 9.127763710492222e-06, "loss": 0.381, "step": 13366 }, { "epoch": 1.3589873932492884, "grad_norm": 0.2970981299877167, "learning_rate": 9.127563429075308e-06, "loss": 0.3794, "step": 13367 }, { "epoch": 1.3590890605937374, "grad_norm": 0.2883344888687134, "learning_rate": 9.127363126864711e-06, "loss": 0.3831, "step": 13368 }, { "epoch": 1.3591907279381863, "grad_norm": 0.2751977741718292, "learning_rate": 9.127162803861435e-06, "loss": 0.3597, "step": 13369 }, { "epoch": 1.3592923952826352, "grad_norm": 0.29320383071899414, "learning_rate": 9.126962460066498e-06, "loss": 0.3821, "step": 13370 }, { "epoch": 1.3593940626270842, "grad_norm": 0.3026982843875885, "learning_rate": 9.126762095480903e-06, "loss": 0.3532, "step": 13371 }, { "epoch": 1.3594957299715331, "grad_norm": 0.3108566701412201, "learning_rate": 9.126561710105662e-06, "loss": 0.3511, "step": 13372 }, { "epoch": 1.359597397315982, "grad_norm": 0.2753957509994507, "learning_rate": 9.126361303941784e-06, "loss": 0.366, "step": 13373 }, { "epoch": 1.359699064660431, "grad_norm": 0.2703802287578583, "learning_rate": 9.126160876990278e-06, "loss": 0.3689, "step": 13374 }, { "epoch": 1.3598007320048802, "grad_norm": 0.27232539653778076, "learning_rate": 9.125960429252155e-06, "loss": 0.3891, "step": 13375 }, { "epoch": 1.359902399349329, "grad_norm": 0.2915184497833252, "learning_rate": 9.125759960728422e-06, "loss": 0.3398, "step": 13376 }, { "epoch": 1.360004066693778, "grad_norm": 0.2982002794742584, "learning_rate": 9.12555947142009e-06, "loss": 0.3316, "step": 13377 }, { "epoch": 1.360105734038227, "grad_norm": 0.31622588634490967, "learning_rate": 9.125358961328172e-06, "loss": 0.39, "step": 13378 }, { "epoch": 1.360207401382676, "grad_norm": 0.2996746599674225, "learning_rate": 9.125158430453676e-06, "loss": 0.3785, "step": 13379 }, { "epoch": 1.3603090687271249, "grad_norm": 0.30372652411460876, "learning_rate": 9.124957878797612e-06, "loss": 0.3526, "step": 13380 }, { "epoch": 1.3604107360715738, "grad_norm": 0.28685975074768066, "learning_rate": 9.124757306360989e-06, "loss": 0.3792, "step": 13381 }, { "epoch": 1.3605124034160228, "grad_norm": 0.2861548662185669, "learning_rate": 9.12455671314482e-06, "loss": 0.361, "step": 13382 }, { "epoch": 1.3606140707604717, "grad_norm": 0.32195425033569336, "learning_rate": 9.124356099150114e-06, "loss": 0.3553, "step": 13383 }, { "epoch": 1.3607157381049206, "grad_norm": 0.28903883695602417, "learning_rate": 9.124155464377882e-06, "loss": 0.3513, "step": 13384 }, { "epoch": 1.3608174054493696, "grad_norm": 0.2892438769340515, "learning_rate": 9.123954808829136e-06, "loss": 0.3662, "step": 13385 }, { "epoch": 1.3609190727938185, "grad_norm": 0.2883368134498596, "learning_rate": 9.123754132504884e-06, "loss": 0.3563, "step": 13386 }, { "epoch": 1.3610207401382675, "grad_norm": 0.264182984828949, "learning_rate": 9.123553435406138e-06, "loss": 0.385, "step": 13387 }, { "epoch": 1.3611224074827166, "grad_norm": 0.28131139278411865, "learning_rate": 9.123352717533912e-06, "loss": 0.3629, "step": 13388 }, { "epoch": 1.3612240748271656, "grad_norm": 0.2765728235244751, "learning_rate": 9.123151978889215e-06, "loss": 0.3414, "step": 13389 }, { "epoch": 1.3613257421716145, "grad_norm": 0.2904873192310333, "learning_rate": 9.122951219473055e-06, "loss": 0.3526, "step": 13390 }, { "epoch": 1.3614274095160634, "grad_norm": 0.28257128596305847, "learning_rate": 9.12275043928645e-06, "loss": 0.3422, "step": 13391 }, { "epoch": 1.3615290768605124, "grad_norm": 0.27991440892219543, "learning_rate": 9.122549638330406e-06, "loss": 0.3791, "step": 13392 }, { "epoch": 1.3616307442049613, "grad_norm": 0.28984498977661133, "learning_rate": 9.122348816605937e-06, "loss": 0.3504, "step": 13393 }, { "epoch": 1.3617324115494103, "grad_norm": 0.3039630055427551, "learning_rate": 9.122147974114055e-06, "loss": 0.3512, "step": 13394 }, { "epoch": 1.3618340788938592, "grad_norm": 0.30596861243247986, "learning_rate": 9.121947110855769e-06, "loss": 0.3769, "step": 13395 }, { "epoch": 1.3619357462383084, "grad_norm": 0.2734508812427521, "learning_rate": 9.121746226832095e-06, "loss": 0.3608, "step": 13396 }, { "epoch": 1.3620374135827573, "grad_norm": 0.2880677878856659, "learning_rate": 9.121545322044043e-06, "loss": 0.3371, "step": 13397 }, { "epoch": 1.3621390809272063, "grad_norm": 0.30784961581230164, "learning_rate": 9.121344396492623e-06, "loss": 0.3532, "step": 13398 }, { "epoch": 1.3622407482716552, "grad_norm": 0.30053117871284485, "learning_rate": 9.121143450178852e-06, "loss": 0.3475, "step": 13399 }, { "epoch": 1.3623424156161041, "grad_norm": 0.2822206914424896, "learning_rate": 9.120942483103738e-06, "loss": 0.3778, "step": 13400 }, { "epoch": 1.362444082960553, "grad_norm": 0.2748214304447174, "learning_rate": 9.120741495268295e-06, "loss": 0.3634, "step": 13401 }, { "epoch": 1.362545750305002, "grad_norm": 0.290790319442749, "learning_rate": 9.120540486673537e-06, "loss": 0.3518, "step": 13402 }, { "epoch": 1.362647417649451, "grad_norm": 0.3019394874572754, "learning_rate": 9.120339457320476e-06, "loss": 0.3667, "step": 13403 }, { "epoch": 1.3627490849939, "grad_norm": 0.2738995850086212, "learning_rate": 9.12013840721012e-06, "loss": 0.359, "step": 13404 }, { "epoch": 1.3628507523383488, "grad_norm": 0.2885224521160126, "learning_rate": 9.119937336343488e-06, "loss": 0.3615, "step": 13405 }, { "epoch": 1.3629524196827978, "grad_norm": 0.29406118392944336, "learning_rate": 9.119736244721591e-06, "loss": 0.3855, "step": 13406 }, { "epoch": 1.3630540870272467, "grad_norm": 0.2721121311187744, "learning_rate": 9.119535132345443e-06, "loss": 0.3599, "step": 13407 }, { "epoch": 1.363155754371696, "grad_norm": 0.2902665436267853, "learning_rate": 9.119333999216053e-06, "loss": 0.3967, "step": 13408 }, { "epoch": 1.3632574217161448, "grad_norm": 0.2642844617366791, "learning_rate": 9.11913284533444e-06, "loss": 0.3352, "step": 13409 }, { "epoch": 1.3633590890605938, "grad_norm": 0.29263824224472046, "learning_rate": 9.118931670701613e-06, "loss": 0.3654, "step": 13410 }, { "epoch": 1.3634607564050427, "grad_norm": 0.2692534029483795, "learning_rate": 9.118730475318586e-06, "loss": 0.3685, "step": 13411 }, { "epoch": 1.3635624237494917, "grad_norm": 0.2861276865005493, "learning_rate": 9.118529259186376e-06, "loss": 0.352, "step": 13412 }, { "epoch": 1.3636640910939406, "grad_norm": 0.26962316036224365, "learning_rate": 9.118328022305993e-06, "loss": 0.3443, "step": 13413 }, { "epoch": 1.3637657584383895, "grad_norm": 0.28854262828826904, "learning_rate": 9.118126764678454e-06, "loss": 0.342, "step": 13414 }, { "epoch": 1.3638674257828385, "grad_norm": 0.2894086241722107, "learning_rate": 9.117925486304769e-06, "loss": 0.3608, "step": 13415 }, { "epoch": 1.3639690931272876, "grad_norm": 0.29380956292152405, "learning_rate": 9.117724187185955e-06, "loss": 0.4038, "step": 13416 }, { "epoch": 1.3640707604717366, "grad_norm": 0.29891031980514526, "learning_rate": 9.117522867323022e-06, "loss": 0.4174, "step": 13417 }, { "epoch": 1.3641724278161855, "grad_norm": 0.296734094619751, "learning_rate": 9.117321526716992e-06, "loss": 0.3834, "step": 13418 }, { "epoch": 1.3642740951606345, "grad_norm": 0.2810482680797577, "learning_rate": 9.11712016536887e-06, "loss": 0.3558, "step": 13419 }, { "epoch": 1.3643757625050834, "grad_norm": 0.3037430942058563, "learning_rate": 9.116918783279678e-06, "loss": 0.3766, "step": 13420 }, { "epoch": 1.3644774298495324, "grad_norm": 0.29296523332595825, "learning_rate": 9.116717380450427e-06, "loss": 0.3648, "step": 13421 }, { "epoch": 1.3645790971939813, "grad_norm": 0.28539448976516724, "learning_rate": 9.116515956882132e-06, "loss": 0.3628, "step": 13422 }, { "epoch": 1.3646807645384302, "grad_norm": 0.302633672952652, "learning_rate": 9.116314512575807e-06, "loss": 0.3827, "step": 13423 }, { "epoch": 1.3647824318828792, "grad_norm": 0.31226083636283875, "learning_rate": 9.116113047532468e-06, "loss": 0.369, "step": 13424 }, { "epoch": 1.3648840992273281, "grad_norm": 0.3035415709018707, "learning_rate": 9.115911561753132e-06, "loss": 0.3668, "step": 13425 }, { "epoch": 1.364985766571777, "grad_norm": 0.2929706573486328, "learning_rate": 9.115710055238809e-06, "loss": 0.328, "step": 13426 }, { "epoch": 1.365087433916226, "grad_norm": 0.2717874050140381, "learning_rate": 9.115508527990518e-06, "loss": 0.3305, "step": 13427 }, { "epoch": 1.365189101260675, "grad_norm": 0.30081629753112793, "learning_rate": 9.115306980009273e-06, "loss": 0.3716, "step": 13428 }, { "epoch": 1.365290768605124, "grad_norm": 0.2769300639629364, "learning_rate": 9.115105411296088e-06, "loss": 0.3697, "step": 13429 }, { "epoch": 1.365392435949573, "grad_norm": 0.33344611525535583, "learning_rate": 9.114903821851982e-06, "loss": 0.3807, "step": 13430 }, { "epoch": 1.365494103294022, "grad_norm": 0.30505380034446716, "learning_rate": 9.114702211677967e-06, "loss": 0.3568, "step": 13431 }, { "epoch": 1.365595770638471, "grad_norm": 0.2790379822254181, "learning_rate": 9.11450058077506e-06, "loss": 0.3739, "step": 13432 }, { "epoch": 1.3656974379829199, "grad_norm": 0.28846409916877747, "learning_rate": 9.114298929144277e-06, "loss": 0.397, "step": 13433 }, { "epoch": 1.3657991053273688, "grad_norm": 0.3085516393184662, "learning_rate": 9.114097256786632e-06, "loss": 0.376, "step": 13434 }, { "epoch": 1.3659007726718178, "grad_norm": 0.2932908833026886, "learning_rate": 9.113895563703146e-06, "loss": 0.3371, "step": 13435 }, { "epoch": 1.3660024400162667, "grad_norm": 1.454521656036377, "learning_rate": 9.113693849894829e-06, "loss": 0.382, "step": 13436 }, { "epoch": 1.3661041073607159, "grad_norm": 0.30829915404319763, "learning_rate": 9.1134921153627e-06, "loss": 0.3903, "step": 13437 }, { "epoch": 1.3662057747051648, "grad_norm": 0.2939242720603943, "learning_rate": 9.113290360107778e-06, "loss": 0.4053, "step": 13438 }, { "epoch": 1.3663074420496137, "grad_norm": 0.27721625566482544, "learning_rate": 9.113088584131074e-06, "loss": 0.3481, "step": 13439 }, { "epoch": 1.3664091093940627, "grad_norm": 0.27047255635261536, "learning_rate": 9.112886787433607e-06, "loss": 0.3774, "step": 13440 }, { "epoch": 1.3665107767385116, "grad_norm": 0.29083094000816345, "learning_rate": 9.112684970016396e-06, "loss": 0.3389, "step": 13441 }, { "epoch": 1.3666124440829606, "grad_norm": 0.275621622800827, "learning_rate": 9.112483131880455e-06, "loss": 0.3749, "step": 13442 }, { "epoch": 1.3667141114274095, "grad_norm": 0.30394247174263, "learning_rate": 9.1122812730268e-06, "loss": 0.3466, "step": 13443 }, { "epoch": 1.3668157787718584, "grad_norm": 0.29819798469543457, "learning_rate": 9.11207939345645e-06, "loss": 0.3926, "step": 13444 }, { "epoch": 1.3669174461163074, "grad_norm": 0.28049761056900024, "learning_rate": 9.111877493170421e-06, "loss": 0.3498, "step": 13445 }, { "epoch": 1.3670191134607563, "grad_norm": 0.3112313151359558, "learning_rate": 9.11167557216973e-06, "loss": 0.3663, "step": 13446 }, { "epoch": 1.3671207808052053, "grad_norm": 0.270951509475708, "learning_rate": 9.111473630455395e-06, "loss": 0.3341, "step": 13447 }, { "epoch": 1.3672224481496542, "grad_norm": 0.28672221302986145, "learning_rate": 9.111271668028432e-06, "loss": 0.3908, "step": 13448 }, { "epoch": 1.3673241154941034, "grad_norm": 0.29781052470207214, "learning_rate": 9.11106968488986e-06, "loss": 0.3443, "step": 13449 }, { "epoch": 1.3674257828385523, "grad_norm": 0.32430940866470337, "learning_rate": 9.110867681040696e-06, "loss": 0.3906, "step": 13450 }, { "epoch": 1.3675274501830013, "grad_norm": 0.2805151045322418, "learning_rate": 9.110665656481959e-06, "loss": 0.34, "step": 13451 }, { "epoch": 1.3676291175274502, "grad_norm": 0.2949606478214264, "learning_rate": 9.110463611214662e-06, "loss": 0.3256, "step": 13452 }, { "epoch": 1.3677307848718991, "grad_norm": 0.2715415060520172, "learning_rate": 9.110261545239828e-06, "loss": 0.3687, "step": 13453 }, { "epoch": 1.367832452216348, "grad_norm": 0.2822153866291046, "learning_rate": 9.110059458558473e-06, "loss": 0.3375, "step": 13454 }, { "epoch": 1.367934119560797, "grad_norm": 0.2905619740486145, "learning_rate": 9.109857351171615e-06, "loss": 0.408, "step": 13455 }, { "epoch": 1.368035786905246, "grad_norm": 0.31865713000297546, "learning_rate": 9.109655223080273e-06, "loss": 0.3391, "step": 13456 }, { "epoch": 1.3681374542496951, "grad_norm": 0.29363593459129333, "learning_rate": 9.109453074285463e-06, "loss": 0.3608, "step": 13457 }, { "epoch": 1.368239121594144, "grad_norm": 0.29900023341178894, "learning_rate": 9.109250904788206e-06, "loss": 0.3699, "step": 13458 }, { "epoch": 1.368340788938593, "grad_norm": 0.27018260955810547, "learning_rate": 9.109048714589519e-06, "loss": 0.3672, "step": 13459 }, { "epoch": 1.368442456283042, "grad_norm": 0.2868559658527374, "learning_rate": 9.108846503690421e-06, "loss": 0.3495, "step": 13460 }, { "epoch": 1.368544123627491, "grad_norm": 0.3320309519767761, "learning_rate": 9.108644272091933e-06, "loss": 0.3638, "step": 13461 }, { "epoch": 1.3686457909719398, "grad_norm": 0.30188894271850586, "learning_rate": 9.108442019795069e-06, "loss": 0.3649, "step": 13462 }, { "epoch": 1.3687474583163888, "grad_norm": 0.30339112877845764, "learning_rate": 9.10823974680085e-06, "loss": 0.3438, "step": 13463 }, { "epoch": 1.3688491256608377, "grad_norm": 0.3019682765007019, "learning_rate": 9.108037453110296e-06, "loss": 0.3824, "step": 13464 }, { "epoch": 1.3689507930052867, "grad_norm": 0.3016508221626282, "learning_rate": 9.107835138724427e-06, "loss": 0.3536, "step": 13465 }, { "epoch": 1.3690524603497356, "grad_norm": 0.30414244532585144, "learning_rate": 9.10763280364426e-06, "loss": 0.3649, "step": 13466 }, { "epoch": 1.3691541276941845, "grad_norm": 0.299162894487381, "learning_rate": 9.107430447870815e-06, "loss": 0.3499, "step": 13467 }, { "epoch": 1.3692557950386335, "grad_norm": 0.2942698001861572, "learning_rate": 9.10722807140511e-06, "loss": 0.3727, "step": 13468 }, { "epoch": 1.3693574623830824, "grad_norm": 0.3089086413383484, "learning_rate": 9.107025674248168e-06, "loss": 0.3765, "step": 13469 }, { "epoch": 1.3694591297275316, "grad_norm": 0.3125579357147217, "learning_rate": 9.106823256401007e-06, "loss": 0.3553, "step": 13470 }, { "epoch": 1.3695607970719805, "grad_norm": 0.26744887232780457, "learning_rate": 9.106620817864644e-06, "loss": 0.3193, "step": 13471 }, { "epoch": 1.3696624644164295, "grad_norm": 0.30912861227989197, "learning_rate": 9.106418358640103e-06, "loss": 0.3556, "step": 13472 }, { "epoch": 1.3697641317608784, "grad_norm": 0.27808693051338196, "learning_rate": 9.106215878728402e-06, "loss": 0.3819, "step": 13473 }, { "epoch": 1.3698657991053274, "grad_norm": 0.2848157584667206, "learning_rate": 9.10601337813056e-06, "loss": 0.3327, "step": 13474 }, { "epoch": 1.3699674664497763, "grad_norm": 0.27204108238220215, "learning_rate": 9.105810856847598e-06, "loss": 0.3581, "step": 13475 }, { "epoch": 1.3700691337942252, "grad_norm": 0.30211105942726135, "learning_rate": 9.105608314880539e-06, "loss": 0.3661, "step": 13476 }, { "epoch": 1.3701708011386742, "grad_norm": 0.2801683247089386, "learning_rate": 9.1054057522304e-06, "loss": 0.3434, "step": 13477 }, { "epoch": 1.3702724684831233, "grad_norm": 0.3161037862300873, "learning_rate": 9.105203168898202e-06, "loss": 0.3877, "step": 13478 }, { "epoch": 1.3703741358275723, "grad_norm": 0.2896254360675812, "learning_rate": 9.105000564884966e-06, "loss": 0.3618, "step": 13479 }, { "epoch": 1.3704758031720212, "grad_norm": 0.31490740180015564, "learning_rate": 9.104797940191713e-06, "loss": 0.3739, "step": 13480 }, { "epoch": 1.3705774705164702, "grad_norm": 0.2864968478679657, "learning_rate": 9.104595294819462e-06, "loss": 0.3466, "step": 13481 }, { "epoch": 1.370679137860919, "grad_norm": 0.29041069746017456, "learning_rate": 9.104392628769237e-06, "loss": 0.3642, "step": 13482 }, { "epoch": 1.370780805205368, "grad_norm": 0.30310747027397156, "learning_rate": 9.104189942042056e-06, "loss": 0.3571, "step": 13483 }, { "epoch": 1.370882472549817, "grad_norm": 0.292751282453537, "learning_rate": 9.103987234638942e-06, "loss": 0.3649, "step": 13484 }, { "epoch": 1.370984139894266, "grad_norm": 0.29235756397247314, "learning_rate": 9.103784506560916e-06, "loss": 0.3717, "step": 13485 }, { "epoch": 1.3710858072387149, "grad_norm": 0.3274747133255005, "learning_rate": 9.103581757808999e-06, "loss": 0.3848, "step": 13486 }, { "epoch": 1.3711874745831638, "grad_norm": 0.29314565658569336, "learning_rate": 9.103378988384211e-06, "loss": 0.3953, "step": 13487 }, { "epoch": 1.3712891419276128, "grad_norm": 0.2943720817565918, "learning_rate": 9.103176198287576e-06, "loss": 0.3587, "step": 13488 }, { "epoch": 1.3713908092720617, "grad_norm": 0.2917357385158539, "learning_rate": 9.102973387520115e-06, "loss": 0.3364, "step": 13489 }, { "epoch": 1.3714924766165109, "grad_norm": 0.309145987033844, "learning_rate": 9.102770556082847e-06, "loss": 0.3805, "step": 13490 }, { "epoch": 1.3715941439609598, "grad_norm": 0.30070316791534424, "learning_rate": 9.102567703976797e-06, "loss": 0.3929, "step": 13491 }, { "epoch": 1.3716958113054087, "grad_norm": 0.28541404008865356, "learning_rate": 9.102364831202985e-06, "loss": 0.3457, "step": 13492 }, { "epoch": 1.3717974786498577, "grad_norm": 0.3252742886543274, "learning_rate": 9.102161937762437e-06, "loss": 0.3959, "step": 13493 }, { "epoch": 1.3718991459943066, "grad_norm": 0.268618106842041, "learning_rate": 9.101959023656168e-06, "loss": 0.3676, "step": 13494 }, { "epoch": 1.3720008133387556, "grad_norm": 0.268155962228775, "learning_rate": 9.101756088885206e-06, "loss": 0.382, "step": 13495 }, { "epoch": 1.3721024806832045, "grad_norm": 0.27773013710975647, "learning_rate": 9.101553133450572e-06, "loss": 0.3625, "step": 13496 }, { "epoch": 1.3722041480276534, "grad_norm": 0.28262317180633545, "learning_rate": 9.101350157353287e-06, "loss": 0.3752, "step": 13497 }, { "epoch": 1.3723058153721026, "grad_norm": 0.28053364157676697, "learning_rate": 9.101147160594374e-06, "loss": 0.3403, "step": 13498 }, { "epoch": 1.3724074827165516, "grad_norm": 0.2930194139480591, "learning_rate": 9.10094414317486e-06, "loss": 0.3317, "step": 13499 }, { "epoch": 1.3725091500610005, "grad_norm": 0.28356921672821045, "learning_rate": 9.10074110509576e-06, "loss": 0.3673, "step": 13500 }, { "epoch": 1.3726108174054494, "grad_norm": 0.27778366208076477, "learning_rate": 9.1005380463581e-06, "loss": 0.3876, "step": 13501 }, { "epoch": 1.3727124847498984, "grad_norm": 0.27129971981048584, "learning_rate": 9.100334966962907e-06, "loss": 0.4032, "step": 13502 }, { "epoch": 1.3728141520943473, "grad_norm": 0.2754470705986023, "learning_rate": 9.100131866911199e-06, "loss": 0.3548, "step": 13503 }, { "epoch": 1.3729158194387963, "grad_norm": 0.2767046391963959, "learning_rate": 9.099928746204001e-06, "loss": 0.3803, "step": 13504 }, { "epoch": 1.3730174867832452, "grad_norm": 0.28678110241889954, "learning_rate": 9.099725604842338e-06, "loss": 0.3655, "step": 13505 }, { "epoch": 1.3731191541276941, "grad_norm": 0.2803490161895752, "learning_rate": 9.099522442827229e-06, "loss": 0.3288, "step": 13506 }, { "epoch": 1.373220821472143, "grad_norm": 0.28119271993637085, "learning_rate": 9.099319260159703e-06, "loss": 0.3888, "step": 13507 }, { "epoch": 1.373322488816592, "grad_norm": 0.2961014211177826, "learning_rate": 9.099116056840779e-06, "loss": 0.3709, "step": 13508 }, { "epoch": 1.373424156161041, "grad_norm": 0.2917582094669342, "learning_rate": 9.098912832871483e-06, "loss": 0.3525, "step": 13509 }, { "epoch": 1.37352582350549, "grad_norm": 0.2854457497596741, "learning_rate": 9.098709588252836e-06, "loss": 0.3683, "step": 13510 }, { "epoch": 1.373627490849939, "grad_norm": 0.2769344747066498, "learning_rate": 9.098506322985866e-06, "loss": 0.3784, "step": 13511 }, { "epoch": 1.373729158194388, "grad_norm": 0.27962976694107056, "learning_rate": 9.098303037071597e-06, "loss": 0.367, "step": 13512 }, { "epoch": 1.373830825538837, "grad_norm": 0.3122155964374542, "learning_rate": 9.098099730511047e-06, "loss": 0.3956, "step": 13513 }, { "epoch": 1.373932492883286, "grad_norm": 0.27576252818107605, "learning_rate": 9.097896403305248e-06, "loss": 0.3434, "step": 13514 }, { "epoch": 1.3740341602277348, "grad_norm": 0.2640286684036255, "learning_rate": 9.097693055455218e-06, "loss": 0.3399, "step": 13515 }, { "epoch": 1.3741358275721838, "grad_norm": 0.3133010268211365, "learning_rate": 9.097489686961986e-06, "loss": 0.37, "step": 13516 }, { "epoch": 1.3742374949166327, "grad_norm": 0.2940369248390198, "learning_rate": 9.097286297826574e-06, "loss": 0.3404, "step": 13517 }, { "epoch": 1.3743391622610817, "grad_norm": 0.2747483551502228, "learning_rate": 9.097082888050008e-06, "loss": 0.3562, "step": 13518 }, { "epoch": 1.3744408296055308, "grad_norm": 0.29409360885620117, "learning_rate": 9.096879457633313e-06, "loss": 0.3637, "step": 13519 }, { "epoch": 1.3745424969499798, "grad_norm": 0.31435737013816833, "learning_rate": 9.096676006577509e-06, "loss": 0.3656, "step": 13520 }, { "epoch": 1.3746441642944287, "grad_norm": 0.2867687940597534, "learning_rate": 9.096472534883629e-06, "loss": 0.3838, "step": 13521 }, { "epoch": 1.3747458316388776, "grad_norm": 0.2934643626213074, "learning_rate": 9.09626904255269e-06, "loss": 0.35, "step": 13522 }, { "epoch": 1.3748474989833266, "grad_norm": 0.3008875846862793, "learning_rate": 9.096065529585724e-06, "loss": 0.3468, "step": 13523 }, { "epoch": 1.3749491663277755, "grad_norm": 0.31778788566589355, "learning_rate": 9.095861995983752e-06, "loss": 0.3831, "step": 13524 }, { "epoch": 1.3750508336722245, "grad_norm": 0.33079272508621216, "learning_rate": 9.0956584417478e-06, "loss": 0.3622, "step": 13525 }, { "epoch": 1.3751525010166734, "grad_norm": 0.33512261509895325, "learning_rate": 9.095454866878896e-06, "loss": 0.3196, "step": 13526 }, { "epoch": 1.3752541683611224, "grad_norm": 0.285163551568985, "learning_rate": 9.095251271378062e-06, "loss": 0.359, "step": 13527 }, { "epoch": 1.3753558357055713, "grad_norm": 0.2805730998516083, "learning_rate": 9.095047655246325e-06, "loss": 0.3946, "step": 13528 }, { "epoch": 1.3754575030500202, "grad_norm": 0.28006380796432495, "learning_rate": 9.094844018484712e-06, "loss": 0.3461, "step": 13529 }, { "epoch": 1.3755591703944692, "grad_norm": 0.31945496797561646, "learning_rate": 9.094640361094249e-06, "loss": 0.4108, "step": 13530 }, { "epoch": 1.3756608377389183, "grad_norm": 0.3000941276550293, "learning_rate": 9.094436683075958e-06, "loss": 0.3766, "step": 13531 }, { "epoch": 1.3757625050833673, "grad_norm": 0.29340660572052, "learning_rate": 9.09423298443087e-06, "loss": 0.3506, "step": 13532 }, { "epoch": 1.3758641724278162, "grad_norm": 0.2873523533344269, "learning_rate": 9.094029265160009e-06, "loss": 0.3597, "step": 13533 }, { "epoch": 1.3759658397722652, "grad_norm": 0.3066848814487457, "learning_rate": 9.093825525264402e-06, "loss": 0.3158, "step": 13534 }, { "epoch": 1.376067507116714, "grad_norm": 0.2949431240558624, "learning_rate": 9.093621764745073e-06, "loss": 0.3422, "step": 13535 }, { "epoch": 1.376169174461163, "grad_norm": 0.2850896716117859, "learning_rate": 9.093417983603052e-06, "loss": 0.3761, "step": 13536 }, { "epoch": 1.376270841805612, "grad_norm": 0.2676527798175812, "learning_rate": 9.093214181839362e-06, "loss": 0.3128, "step": 13537 }, { "epoch": 1.376372509150061, "grad_norm": 0.2894245982170105, "learning_rate": 9.093010359455035e-06, "loss": 0.3838, "step": 13538 }, { "epoch": 1.37647417649451, "grad_norm": 0.29355213046073914, "learning_rate": 9.09280651645109e-06, "loss": 0.3522, "step": 13539 }, { "epoch": 1.376575843838959, "grad_norm": 0.2711121737957001, "learning_rate": 9.092602652828562e-06, "loss": 0.4032, "step": 13540 }, { "epoch": 1.376677511183408, "grad_norm": 0.2853803038597107, "learning_rate": 9.092398768588475e-06, "loss": 0.3478, "step": 13541 }, { "epoch": 1.376779178527857, "grad_norm": 0.2781423330307007, "learning_rate": 9.092194863731852e-06, "loss": 0.3591, "step": 13542 }, { "epoch": 1.3768808458723059, "grad_norm": 0.28173181414604187, "learning_rate": 9.091990938259725e-06, "loss": 0.3651, "step": 13543 }, { "epoch": 1.3769825132167548, "grad_norm": 0.2760010063648224, "learning_rate": 9.091786992173122e-06, "loss": 0.3653, "step": 13544 }, { "epoch": 1.3770841805612037, "grad_norm": 0.28803107142448425, "learning_rate": 9.091583025473065e-06, "loss": 0.3621, "step": 13545 }, { "epoch": 1.3771858479056527, "grad_norm": 0.2896180748939514, "learning_rate": 9.091379038160588e-06, "loss": 0.392, "step": 13546 }, { "epoch": 1.3772875152501016, "grad_norm": 0.2638944387435913, "learning_rate": 9.091175030236715e-06, "loss": 0.3754, "step": 13547 }, { "epoch": 1.3773891825945506, "grad_norm": 0.30535614490509033, "learning_rate": 9.090971001702474e-06, "loss": 0.3902, "step": 13548 }, { "epoch": 1.3774908499389995, "grad_norm": 0.2828676998615265, "learning_rate": 9.090766952558893e-06, "loss": 0.3677, "step": 13549 }, { "epoch": 1.3775925172834484, "grad_norm": 0.2688685655593872, "learning_rate": 9.090562882807001e-06, "loss": 0.3235, "step": 13550 }, { "epoch": 1.3776941846278974, "grad_norm": 0.2802278697490692, "learning_rate": 9.090358792447824e-06, "loss": 0.3602, "step": 13551 }, { "epoch": 1.3777958519723466, "grad_norm": 0.2795824706554413, "learning_rate": 9.090154681482393e-06, "loss": 0.4012, "step": 13552 }, { "epoch": 1.3778975193167955, "grad_norm": 0.28613001108169556, "learning_rate": 9.089950549911734e-06, "loss": 0.3755, "step": 13553 }, { "epoch": 1.3779991866612444, "grad_norm": 0.272326797246933, "learning_rate": 9.089746397736876e-06, "loss": 0.3529, "step": 13554 }, { "epoch": 1.3781008540056934, "grad_norm": 0.30056458711624146, "learning_rate": 9.089542224958847e-06, "loss": 0.3612, "step": 13555 }, { "epoch": 1.3782025213501423, "grad_norm": 0.3048170506954193, "learning_rate": 9.089338031578676e-06, "loss": 0.3533, "step": 13556 }, { "epoch": 1.3783041886945913, "grad_norm": 0.2965031862258911, "learning_rate": 9.089133817597393e-06, "loss": 0.3802, "step": 13557 }, { "epoch": 1.3784058560390402, "grad_norm": 0.2865940034389496, "learning_rate": 9.088929583016025e-06, "loss": 0.3533, "step": 13558 }, { "epoch": 1.3785075233834891, "grad_norm": 0.295360267162323, "learning_rate": 9.088725327835601e-06, "loss": 0.3479, "step": 13559 }, { "epoch": 1.3786091907279383, "grad_norm": 0.27844396233558655, "learning_rate": 9.088521052057151e-06, "loss": 0.3412, "step": 13560 }, { "epoch": 1.3787108580723872, "grad_norm": 0.30413195490837097, "learning_rate": 9.088316755681704e-06, "loss": 0.3627, "step": 13561 }, { "epoch": 1.3788125254168362, "grad_norm": 0.31838932633399963, "learning_rate": 9.088112438710288e-06, "loss": 0.3601, "step": 13562 }, { "epoch": 1.3789141927612851, "grad_norm": 0.31277209520339966, "learning_rate": 9.087908101143932e-06, "loss": 0.3468, "step": 13563 }, { "epoch": 1.379015860105734, "grad_norm": 0.3144155740737915, "learning_rate": 9.087703742983668e-06, "loss": 0.3771, "step": 13564 }, { "epoch": 1.379117527450183, "grad_norm": 0.3307541310787201, "learning_rate": 9.087499364230523e-06, "loss": 0.3785, "step": 13565 }, { "epoch": 1.379219194794632, "grad_norm": 0.28429996967315674, "learning_rate": 9.087294964885529e-06, "loss": 0.3498, "step": 13566 }, { "epoch": 1.379320862139081, "grad_norm": 0.2825121581554413, "learning_rate": 9.087090544949713e-06, "loss": 0.3663, "step": 13567 }, { "epoch": 1.3794225294835298, "grad_norm": 0.327322393655777, "learning_rate": 9.086886104424107e-06, "loss": 0.3779, "step": 13568 }, { "epoch": 1.3795241968279788, "grad_norm": 0.31568148732185364, "learning_rate": 9.086681643309738e-06, "loss": 0.3543, "step": 13569 }, { "epoch": 1.3796258641724277, "grad_norm": 0.3286254405975342, "learning_rate": 9.08647716160764e-06, "loss": 0.3662, "step": 13570 }, { "epoch": 1.3797275315168767, "grad_norm": 0.2857508659362793, "learning_rate": 9.08627265931884e-06, "loss": 0.3877, "step": 13571 }, { "epoch": 1.3798291988613258, "grad_norm": 0.2806303799152374, "learning_rate": 9.086068136444368e-06, "loss": 0.3687, "step": 13572 }, { "epoch": 1.3799308662057748, "grad_norm": 0.3134329617023468, "learning_rate": 9.085863592985258e-06, "loss": 0.3362, "step": 13573 }, { "epoch": 1.3800325335502237, "grad_norm": 0.3060431480407715, "learning_rate": 9.08565902894254e-06, "loss": 0.3638, "step": 13574 }, { "epoch": 1.3801342008946726, "grad_norm": 0.2982776165008545, "learning_rate": 9.085454444317238e-06, "loss": 0.3461, "step": 13575 }, { "epoch": 1.3802358682391216, "grad_norm": 0.2811644673347473, "learning_rate": 9.08524983911039e-06, "loss": 0.3732, "step": 13576 }, { "epoch": 1.3803375355835705, "grad_norm": 0.3370455503463745, "learning_rate": 9.085045213323025e-06, "loss": 0.3607, "step": 13577 }, { "epoch": 1.3804392029280195, "grad_norm": 0.2852924168109894, "learning_rate": 9.084840566956173e-06, "loss": 0.346, "step": 13578 }, { "epoch": 1.3805408702724684, "grad_norm": 0.26719579100608826, "learning_rate": 9.084635900010864e-06, "loss": 0.3837, "step": 13579 }, { "epoch": 1.3806425376169176, "grad_norm": 0.2803305983543396, "learning_rate": 9.084431212488129e-06, "loss": 0.3371, "step": 13580 }, { "epoch": 1.3807442049613665, "grad_norm": 0.303946316242218, "learning_rate": 9.084226504389e-06, "loss": 0.3785, "step": 13581 }, { "epoch": 1.3808458723058155, "grad_norm": 0.2876969873905182, "learning_rate": 9.08402177571451e-06, "loss": 0.3702, "step": 13582 }, { "epoch": 1.3809475396502644, "grad_norm": 0.2949196398258209, "learning_rate": 9.08381702646569e-06, "loss": 0.3561, "step": 13583 }, { "epoch": 1.3810492069947133, "grad_norm": 0.3052624762058258, "learning_rate": 9.083612256643566e-06, "loss": 0.3892, "step": 13584 }, { "epoch": 1.3811508743391623, "grad_norm": 0.31566232442855835, "learning_rate": 9.083407466249177e-06, "loss": 0.3958, "step": 13585 }, { "epoch": 1.3812525416836112, "grad_norm": 0.26931729912757874, "learning_rate": 9.083202655283552e-06, "loss": 0.3952, "step": 13586 }, { "epoch": 1.3813542090280602, "grad_norm": 0.30620691180229187, "learning_rate": 9.08299782374772e-06, "loss": 0.363, "step": 13587 }, { "epoch": 1.381455876372509, "grad_norm": 0.30194535851478577, "learning_rate": 9.082792971642717e-06, "loss": 0.3794, "step": 13588 }, { "epoch": 1.381557543716958, "grad_norm": 0.2861326336860657, "learning_rate": 9.082588098969573e-06, "loss": 0.3347, "step": 13589 }, { "epoch": 1.381659211061407, "grad_norm": 0.26959481835365295, "learning_rate": 9.082383205729319e-06, "loss": 0.3418, "step": 13590 }, { "epoch": 1.381760878405856, "grad_norm": 0.2996754050254822, "learning_rate": 9.082178291922989e-06, "loss": 0.3867, "step": 13591 }, { "epoch": 1.3818625457503049, "grad_norm": 0.3064047694206238, "learning_rate": 9.081973357551616e-06, "loss": 0.3813, "step": 13592 }, { "epoch": 1.381964213094754, "grad_norm": 0.2726278007030487, "learning_rate": 9.081768402616231e-06, "loss": 0.3511, "step": 13593 }, { "epoch": 1.382065880439203, "grad_norm": 0.3056618869304657, "learning_rate": 9.081563427117868e-06, "loss": 0.3572, "step": 13594 }, { "epoch": 1.382167547783652, "grad_norm": 0.2675185203552246, "learning_rate": 9.081358431057555e-06, "loss": 0.3494, "step": 13595 }, { "epoch": 1.3822692151281009, "grad_norm": 0.3182868957519531, "learning_rate": 9.08115341443633e-06, "loss": 0.3863, "step": 13596 }, { "epoch": 1.3823708824725498, "grad_norm": 0.30423668026924133, "learning_rate": 9.080948377255225e-06, "loss": 0.3764, "step": 13597 }, { "epoch": 1.3824725498169987, "grad_norm": 0.28881242871284485, "learning_rate": 9.08074331951527e-06, "loss": 0.3904, "step": 13598 }, { "epoch": 1.3825742171614477, "grad_norm": 0.2823579013347626, "learning_rate": 9.0805382412175e-06, "loss": 0.355, "step": 13599 }, { "epoch": 1.3826758845058966, "grad_norm": 0.30224326252937317, "learning_rate": 9.080333142362949e-06, "loss": 0.388, "step": 13600 }, { "epoch": 1.3827775518503458, "grad_norm": 0.2886711359024048, "learning_rate": 9.08012802295265e-06, "loss": 0.3586, "step": 13601 }, { "epoch": 1.3828792191947947, "grad_norm": 0.2839978039264679, "learning_rate": 9.079922882987634e-06, "loss": 0.3601, "step": 13602 }, { "epoch": 1.3829808865392437, "grad_norm": 0.26406168937683105, "learning_rate": 9.079717722468936e-06, "loss": 0.337, "step": 13603 }, { "epoch": 1.3830825538836926, "grad_norm": 0.2681461274623871, "learning_rate": 9.079512541397591e-06, "loss": 0.3638, "step": 13604 }, { "epoch": 1.3831842212281416, "grad_norm": 0.29566752910614014, "learning_rate": 9.07930733977463e-06, "loss": 0.3623, "step": 13605 }, { "epoch": 1.3832858885725905, "grad_norm": 0.27294376492500305, "learning_rate": 9.079102117601088e-06, "loss": 0.3409, "step": 13606 }, { "epoch": 1.3833875559170394, "grad_norm": 0.33131498098373413, "learning_rate": 9.078896874878001e-06, "loss": 0.3839, "step": 13607 }, { "epoch": 1.3834892232614884, "grad_norm": 0.2766190469264984, "learning_rate": 9.078691611606398e-06, "loss": 0.3584, "step": 13608 }, { "epoch": 1.3835908906059373, "grad_norm": 0.289520800113678, "learning_rate": 9.078486327787317e-06, "loss": 0.3926, "step": 13609 }, { "epoch": 1.3836925579503863, "grad_norm": 0.3234011232852936, "learning_rate": 9.078281023421791e-06, "loss": 0.3741, "step": 13610 }, { "epoch": 1.3837942252948352, "grad_norm": 0.2799794375896454, "learning_rate": 9.078075698510855e-06, "loss": 0.3591, "step": 13611 }, { "epoch": 1.3838958926392841, "grad_norm": 0.2613241970539093, "learning_rate": 9.077870353055543e-06, "loss": 0.3495, "step": 13612 }, { "epoch": 1.3839975599837333, "grad_norm": 0.292488694190979, "learning_rate": 9.077664987056886e-06, "loss": 0.3571, "step": 13613 }, { "epoch": 1.3840992273281822, "grad_norm": 0.2719994783401489, "learning_rate": 9.077459600515926e-06, "loss": 0.3475, "step": 13614 }, { "epoch": 1.3842008946726312, "grad_norm": 0.33591437339782715, "learning_rate": 9.077254193433692e-06, "loss": 0.4035, "step": 13615 }, { "epoch": 1.3843025620170801, "grad_norm": 0.28763696551322937, "learning_rate": 9.077048765811218e-06, "loss": 0.3774, "step": 13616 }, { "epoch": 1.384404229361529, "grad_norm": 0.27030301094055176, "learning_rate": 9.076843317649544e-06, "loss": 0.3324, "step": 13617 }, { "epoch": 1.384505896705978, "grad_norm": 0.28387251496315, "learning_rate": 9.076637848949702e-06, "loss": 0.3719, "step": 13618 }, { "epoch": 1.384607564050427, "grad_norm": 0.3563682436943054, "learning_rate": 9.076432359712725e-06, "loss": 0.3762, "step": 13619 }, { "epoch": 1.384709231394876, "grad_norm": 0.31106793880462646, "learning_rate": 9.076226849939653e-06, "loss": 0.3949, "step": 13620 }, { "epoch": 1.384810898739325, "grad_norm": 0.2895148694515228, "learning_rate": 9.076021319631518e-06, "loss": 0.3472, "step": 13621 }, { "epoch": 1.384912566083774, "grad_norm": 0.30206552147865295, "learning_rate": 9.075815768789356e-06, "loss": 0.382, "step": 13622 }, { "epoch": 1.385014233428223, "grad_norm": 0.3287025988101959, "learning_rate": 9.075610197414204e-06, "loss": 0.3676, "step": 13623 }, { "epoch": 1.3851159007726719, "grad_norm": 0.30351266264915466, "learning_rate": 9.075404605507095e-06, "loss": 0.338, "step": 13624 }, { "epoch": 1.3852175681171208, "grad_norm": 0.2962995171546936, "learning_rate": 9.075198993069066e-06, "loss": 0.3506, "step": 13625 }, { "epoch": 1.3853192354615698, "grad_norm": 0.3206002414226532, "learning_rate": 9.074993360101153e-06, "loss": 0.3864, "step": 13626 }, { "epoch": 1.3854209028060187, "grad_norm": 0.3190906047821045, "learning_rate": 9.074787706604391e-06, "loss": 0.3474, "step": 13627 }, { "epoch": 1.3855225701504676, "grad_norm": 0.2845425605773926, "learning_rate": 9.074582032579818e-06, "loss": 0.3624, "step": 13628 }, { "epoch": 1.3856242374949166, "grad_norm": 0.30562669038772583, "learning_rate": 9.074376338028469e-06, "loss": 0.3797, "step": 13629 }, { "epoch": 1.3857259048393655, "grad_norm": 0.2770639657974243, "learning_rate": 9.074170622951379e-06, "loss": 0.3625, "step": 13630 }, { "epoch": 1.3858275721838145, "grad_norm": 0.28900063037872314, "learning_rate": 9.073964887349588e-06, "loss": 0.3471, "step": 13631 }, { "epoch": 1.3859292395282634, "grad_norm": 0.27669185400009155, "learning_rate": 9.073759131224129e-06, "loss": 0.348, "step": 13632 }, { "epoch": 1.3860309068727124, "grad_norm": 0.3020532727241516, "learning_rate": 9.073553354576037e-06, "loss": 0.3655, "step": 13633 }, { "epoch": 1.3861325742171615, "grad_norm": 0.2917473018169403, "learning_rate": 9.073347557406355e-06, "loss": 0.3801, "step": 13634 }, { "epoch": 1.3862342415616105, "grad_norm": 0.2637861669063568, "learning_rate": 9.073141739716114e-06, "loss": 0.3785, "step": 13635 }, { "epoch": 1.3863359089060594, "grad_norm": 0.3055664598941803, "learning_rate": 9.072935901506353e-06, "loss": 0.3544, "step": 13636 }, { "epoch": 1.3864375762505083, "grad_norm": 0.2953885495662689, "learning_rate": 9.072730042778109e-06, "loss": 0.386, "step": 13637 }, { "epoch": 1.3865392435949573, "grad_norm": 0.27126678824424744, "learning_rate": 9.072524163532418e-06, "loss": 0.3699, "step": 13638 }, { "epoch": 1.3866409109394062, "grad_norm": 0.3053453862667084, "learning_rate": 9.07231826377032e-06, "loss": 0.3932, "step": 13639 }, { "epoch": 1.3867425782838552, "grad_norm": 0.2969793975353241, "learning_rate": 9.07211234349285e-06, "loss": 0.3805, "step": 13640 }, { "epoch": 1.386844245628304, "grad_norm": 0.28021669387817383, "learning_rate": 9.071906402701046e-06, "loss": 0.3502, "step": 13641 }, { "epoch": 1.3869459129727533, "grad_norm": 0.284049391746521, "learning_rate": 9.071700441395943e-06, "loss": 0.3353, "step": 13642 }, { "epoch": 1.3870475803172022, "grad_norm": 0.2950454354286194, "learning_rate": 9.071494459578581e-06, "loss": 0.3777, "step": 13643 }, { "epoch": 1.3871492476616512, "grad_norm": 0.2970392107963562, "learning_rate": 9.071288457249997e-06, "loss": 0.3695, "step": 13644 }, { "epoch": 1.3872509150061, "grad_norm": 0.30574852228164673, "learning_rate": 9.071082434411231e-06, "loss": 0.3534, "step": 13645 }, { "epoch": 1.387352582350549, "grad_norm": 0.2996707260608673, "learning_rate": 9.070876391063319e-06, "loss": 0.3681, "step": 13646 }, { "epoch": 1.387454249694998, "grad_norm": 0.27945539355278015, "learning_rate": 9.0706703272073e-06, "loss": 0.377, "step": 13647 }, { "epoch": 1.387555917039447, "grad_norm": 0.30148768424987793, "learning_rate": 9.070464242844211e-06, "loss": 0.3773, "step": 13648 }, { "epoch": 1.3876575843838959, "grad_norm": 0.30661916732788086, "learning_rate": 9.070258137975088e-06, "loss": 0.3258, "step": 13649 }, { "epoch": 1.3877592517283448, "grad_norm": 0.2845589220523834, "learning_rate": 9.070052012600974e-06, "loss": 0.3394, "step": 13650 }, { "epoch": 1.3878609190727937, "grad_norm": 0.2959079146385193, "learning_rate": 9.069845866722905e-06, "loss": 0.3714, "step": 13651 }, { "epoch": 1.3879625864172427, "grad_norm": 0.2920292615890503, "learning_rate": 9.069639700341918e-06, "loss": 0.3796, "step": 13652 }, { "epoch": 1.3880642537616916, "grad_norm": 0.2813778519630432, "learning_rate": 9.069433513459054e-06, "loss": 0.3571, "step": 13653 }, { "epoch": 1.3881659211061408, "grad_norm": 0.28899750113487244, "learning_rate": 9.069227306075352e-06, "loss": 0.3462, "step": 13654 }, { "epoch": 1.3882675884505897, "grad_norm": 0.3059414327144623, "learning_rate": 9.069021078191849e-06, "loss": 0.3579, "step": 13655 }, { "epoch": 1.3883692557950387, "grad_norm": 0.3503612279891968, "learning_rate": 9.068814829809585e-06, "loss": 0.3447, "step": 13656 }, { "epoch": 1.3884709231394876, "grad_norm": 0.25295379757881165, "learning_rate": 9.068608560929599e-06, "loss": 0.3607, "step": 13657 }, { "epoch": 1.3885725904839366, "grad_norm": 0.30649468302726746, "learning_rate": 9.06840227155293e-06, "loss": 0.3692, "step": 13658 }, { "epoch": 1.3886742578283855, "grad_norm": 0.322004109621048, "learning_rate": 9.068195961680615e-06, "loss": 0.3454, "step": 13659 }, { "epoch": 1.3887759251728344, "grad_norm": 0.3041904866695404, "learning_rate": 9.067989631313698e-06, "loss": 0.3504, "step": 13660 }, { "epoch": 1.3888775925172834, "grad_norm": 0.2820059359073639, "learning_rate": 9.067783280453215e-06, "loss": 0.3357, "step": 13661 }, { "epoch": 1.3889792598617325, "grad_norm": 0.2926635444164276, "learning_rate": 9.067576909100207e-06, "loss": 0.3515, "step": 13662 }, { "epoch": 1.3890809272061815, "grad_norm": 0.28779423236846924, "learning_rate": 9.067370517255713e-06, "loss": 0.3458, "step": 13663 }, { "epoch": 1.3891825945506304, "grad_norm": 0.26611340045928955, "learning_rate": 9.067164104920772e-06, "loss": 0.3816, "step": 13664 }, { "epoch": 1.3892842618950794, "grad_norm": 0.2921994924545288, "learning_rate": 9.066957672096426e-06, "loss": 0.3797, "step": 13665 }, { "epoch": 1.3893859292395283, "grad_norm": 0.30544859170913696, "learning_rate": 9.066751218783713e-06, "loss": 0.3497, "step": 13666 }, { "epoch": 1.3894875965839772, "grad_norm": 0.26578444242477417, "learning_rate": 9.066544744983674e-06, "loss": 0.371, "step": 13667 }, { "epoch": 1.3895892639284262, "grad_norm": 0.280405730009079, "learning_rate": 9.066338250697348e-06, "loss": 0.3758, "step": 13668 }, { "epoch": 1.3896909312728751, "grad_norm": 0.2824218273162842, "learning_rate": 9.066131735925778e-06, "loss": 0.3385, "step": 13669 }, { "epoch": 1.389792598617324, "grad_norm": 0.2895851135253906, "learning_rate": 9.065925200670002e-06, "loss": 0.3679, "step": 13670 }, { "epoch": 1.389894265961773, "grad_norm": 0.2838354706764221, "learning_rate": 9.06571864493106e-06, "loss": 0.3723, "step": 13671 }, { "epoch": 1.389995933306222, "grad_norm": 0.292120099067688, "learning_rate": 9.065512068709996e-06, "loss": 0.3776, "step": 13672 }, { "epoch": 1.390097600650671, "grad_norm": 0.3045247793197632, "learning_rate": 9.065305472007846e-06, "loss": 0.4096, "step": 13673 }, { "epoch": 1.3901992679951198, "grad_norm": 0.2834390699863434, "learning_rate": 9.065098854825654e-06, "loss": 0.3605, "step": 13674 }, { "epoch": 1.390300935339569, "grad_norm": 0.29989054799079895, "learning_rate": 9.064892217164458e-06, "loss": 0.3605, "step": 13675 }, { "epoch": 1.390402602684018, "grad_norm": 0.3077968657016754, "learning_rate": 9.064685559025303e-06, "loss": 0.3988, "step": 13676 }, { "epoch": 1.3905042700284669, "grad_norm": 0.2762892544269562, "learning_rate": 9.06447888040923e-06, "loss": 0.3489, "step": 13677 }, { "epoch": 1.3906059373729158, "grad_norm": 0.30850720405578613, "learning_rate": 9.064272181317274e-06, "loss": 0.3824, "step": 13678 }, { "epoch": 1.3907076047173648, "grad_norm": 0.3141951560974121, "learning_rate": 9.064065461750485e-06, "loss": 0.3524, "step": 13679 }, { "epoch": 1.3908092720618137, "grad_norm": 0.28672048449516296, "learning_rate": 9.063858721709896e-06, "loss": 0.3633, "step": 13680 }, { "epoch": 1.3909109394062626, "grad_norm": 0.289162814617157, "learning_rate": 9.063651961196553e-06, "loss": 0.3649, "step": 13681 }, { "epoch": 1.3910126067507116, "grad_norm": 0.27886444330215454, "learning_rate": 9.0634451802115e-06, "loss": 0.3598, "step": 13682 }, { "epoch": 1.3911142740951608, "grad_norm": 0.2997627258300781, "learning_rate": 9.063238378755774e-06, "loss": 0.3677, "step": 13683 }, { "epoch": 1.3912159414396097, "grad_norm": 0.2927698493003845, "learning_rate": 9.063031556830418e-06, "loss": 0.3605, "step": 13684 }, { "epoch": 1.3913176087840586, "grad_norm": 0.2847862243652344, "learning_rate": 9.062824714436476e-06, "loss": 0.3898, "step": 13685 }, { "epoch": 1.3914192761285076, "grad_norm": 0.2994919419288635, "learning_rate": 9.062617851574987e-06, "loss": 0.3519, "step": 13686 }, { "epoch": 1.3915209434729565, "grad_norm": 0.27445197105407715, "learning_rate": 9.062410968246995e-06, "loss": 0.3584, "step": 13687 }, { "epoch": 1.3916226108174055, "grad_norm": 0.27244019508361816, "learning_rate": 9.062204064453543e-06, "loss": 0.3565, "step": 13688 }, { "epoch": 1.3917242781618544, "grad_norm": 0.29588305950164795, "learning_rate": 9.061997140195671e-06, "loss": 0.3401, "step": 13689 }, { "epoch": 1.3918259455063033, "grad_norm": 0.269287645816803, "learning_rate": 9.061790195474424e-06, "loss": 0.3633, "step": 13690 }, { "epoch": 1.3919276128507523, "grad_norm": 0.27343061566352844, "learning_rate": 9.061583230290842e-06, "loss": 0.3437, "step": 13691 }, { "epoch": 1.3920292801952012, "grad_norm": 0.29812031984329224, "learning_rate": 9.06137624464597e-06, "loss": 0.3467, "step": 13692 }, { "epoch": 1.3921309475396502, "grad_norm": 0.32311201095581055, "learning_rate": 9.061169238540848e-06, "loss": 0.3623, "step": 13693 }, { "epoch": 1.392232614884099, "grad_norm": 0.28468605875968933, "learning_rate": 9.060962211976522e-06, "loss": 0.3522, "step": 13694 }, { "epoch": 1.3923342822285483, "grad_norm": 0.30921098589897156, "learning_rate": 9.060755164954032e-06, "loss": 0.379, "step": 13695 }, { "epoch": 1.3924359495729972, "grad_norm": 0.29905328154563904, "learning_rate": 9.060548097474424e-06, "loss": 0.3913, "step": 13696 }, { "epoch": 1.3925376169174462, "grad_norm": 0.30652087926864624, "learning_rate": 9.060341009538739e-06, "loss": 0.3758, "step": 13697 }, { "epoch": 1.392639284261895, "grad_norm": 0.32076793909072876, "learning_rate": 9.06013390114802e-06, "loss": 0.3436, "step": 13698 }, { "epoch": 1.392740951606344, "grad_norm": 0.2638712525367737, "learning_rate": 9.059926772303314e-06, "loss": 0.3363, "step": 13699 }, { "epoch": 1.392842618950793, "grad_norm": 0.3195091187953949, "learning_rate": 9.059719623005658e-06, "loss": 0.401, "step": 13700 }, { "epoch": 1.392944286295242, "grad_norm": 0.26998060941696167, "learning_rate": 9.0595124532561e-06, "loss": 0.3636, "step": 13701 }, { "epoch": 1.3930459536396909, "grad_norm": 0.28936952352523804, "learning_rate": 9.059305263055686e-06, "loss": 0.4187, "step": 13702 }, { "epoch": 1.39314762098414, "grad_norm": 0.2838385999202728, "learning_rate": 9.059098052405455e-06, "loss": 0.3805, "step": 13703 }, { "epoch": 1.393249288328589, "grad_norm": 0.2885594069957733, "learning_rate": 9.058890821306453e-06, "loss": 0.3898, "step": 13704 }, { "epoch": 1.393350955673038, "grad_norm": 0.2678275406360626, "learning_rate": 9.058683569759721e-06, "loss": 0.3866, "step": 13705 }, { "epoch": 1.3934526230174868, "grad_norm": 0.3033038377761841, "learning_rate": 9.058476297766307e-06, "loss": 0.3653, "step": 13706 }, { "epoch": 1.3935542903619358, "grad_norm": 0.28792619705200195, "learning_rate": 9.058269005327256e-06, "loss": 0.3544, "step": 13707 }, { "epoch": 1.3936559577063847, "grad_norm": 0.29764583706855774, "learning_rate": 9.058061692443607e-06, "loss": 0.3634, "step": 13708 }, { "epoch": 1.3937576250508337, "grad_norm": 0.27099210023880005, "learning_rate": 9.057854359116409e-06, "loss": 0.3636, "step": 13709 }, { "epoch": 1.3938592923952826, "grad_norm": 0.28621864318847656, "learning_rate": 9.057647005346705e-06, "loss": 0.3865, "step": 13710 }, { "epoch": 1.3939609597397316, "grad_norm": 0.30226144194602966, "learning_rate": 9.05743963113554e-06, "loss": 0.3552, "step": 13711 }, { "epoch": 1.3940626270841805, "grad_norm": 0.2988277077674866, "learning_rate": 9.05723223648396e-06, "loss": 0.3859, "step": 13712 }, { "epoch": 1.3941642944286294, "grad_norm": 0.2957635819911957, "learning_rate": 9.057024821393006e-06, "loss": 0.3475, "step": 13713 }, { "epoch": 1.3942659617730784, "grad_norm": 0.28358644247055054, "learning_rate": 9.056817385863724e-06, "loss": 0.3457, "step": 13714 }, { "epoch": 1.3943676291175273, "grad_norm": 0.29325124621391296, "learning_rate": 9.056609929897163e-06, "loss": 0.3786, "step": 13715 }, { "epoch": 1.3944692964619765, "grad_norm": 0.3153819143772125, "learning_rate": 9.056402453494361e-06, "loss": 0.3821, "step": 13716 }, { "epoch": 1.3945709638064254, "grad_norm": 0.3094702363014221, "learning_rate": 9.056194956656371e-06, "loss": 0.3665, "step": 13717 }, { "epoch": 1.3946726311508744, "grad_norm": 0.31627413630485535, "learning_rate": 9.055987439384234e-06, "loss": 0.3926, "step": 13718 }, { "epoch": 1.3947742984953233, "grad_norm": 0.31632035970687866, "learning_rate": 9.055779901678996e-06, "loss": 0.3771, "step": 13719 }, { "epoch": 1.3948759658397722, "grad_norm": 0.2816241383552551, "learning_rate": 9.055572343541702e-06, "loss": 0.3566, "step": 13720 }, { "epoch": 1.3949776331842212, "grad_norm": 0.31192582845687866, "learning_rate": 9.055364764973398e-06, "loss": 0.4155, "step": 13721 }, { "epoch": 1.3950793005286701, "grad_norm": 0.3153035044670105, "learning_rate": 9.055157165975132e-06, "loss": 0.3723, "step": 13722 }, { "epoch": 1.395180967873119, "grad_norm": 0.28897348046302795, "learning_rate": 9.054949546547946e-06, "loss": 0.3419, "step": 13723 }, { "epoch": 1.3952826352175682, "grad_norm": 0.32084980607032776, "learning_rate": 9.054741906692888e-06, "loss": 0.3818, "step": 13724 }, { "epoch": 1.3953843025620172, "grad_norm": 0.3072119653224945, "learning_rate": 9.054534246411004e-06, "loss": 0.3652, "step": 13725 }, { "epoch": 1.3954859699064661, "grad_norm": 0.28793156147003174, "learning_rate": 9.05432656570334e-06, "loss": 0.3712, "step": 13726 }, { "epoch": 1.395587637250915, "grad_norm": 0.28240254521369934, "learning_rate": 9.054118864570942e-06, "loss": 0.384, "step": 13727 }, { "epoch": 1.395689304595364, "grad_norm": 0.3061886131763458, "learning_rate": 9.053911143014857e-06, "loss": 0.3831, "step": 13728 }, { "epoch": 1.395790971939813, "grad_norm": 0.27708864212036133, "learning_rate": 9.053703401036132e-06, "loss": 0.3335, "step": 13729 }, { "epoch": 1.3958926392842619, "grad_norm": 0.3016543388366699, "learning_rate": 9.05349563863581e-06, "loss": 0.3763, "step": 13730 }, { "epoch": 1.3959943066287108, "grad_norm": 0.304322212934494, "learning_rate": 9.053287855814942e-06, "loss": 0.371, "step": 13731 }, { "epoch": 1.3960959739731598, "grad_norm": 0.2938549816608429, "learning_rate": 9.053080052574573e-06, "loss": 0.3542, "step": 13732 }, { "epoch": 1.3961976413176087, "grad_norm": 0.30726370215415955, "learning_rate": 9.05287222891575e-06, "loss": 0.359, "step": 13733 }, { "epoch": 1.3962993086620576, "grad_norm": 0.2914067506790161, "learning_rate": 9.052664384839518e-06, "loss": 0.3913, "step": 13734 }, { "epoch": 1.3964009760065066, "grad_norm": 0.31363779306411743, "learning_rate": 9.052456520346929e-06, "loss": 0.358, "step": 13735 }, { "epoch": 1.3965026433509558, "grad_norm": 0.2924712598323822, "learning_rate": 9.052248635439025e-06, "loss": 0.374, "step": 13736 }, { "epoch": 1.3966043106954047, "grad_norm": 0.3163607716560364, "learning_rate": 9.052040730116856e-06, "loss": 0.359, "step": 13737 }, { "epoch": 1.3967059780398536, "grad_norm": 0.3145264685153961, "learning_rate": 9.051832804381468e-06, "loss": 0.3314, "step": 13738 }, { "epoch": 1.3968076453843026, "grad_norm": 0.3272901773452759, "learning_rate": 9.05162485823391e-06, "loss": 0.394, "step": 13739 }, { "epoch": 1.3969093127287515, "grad_norm": 0.28162574768066406, "learning_rate": 9.051416891675229e-06, "loss": 0.3539, "step": 13740 }, { "epoch": 1.3970109800732005, "grad_norm": 0.31705814599990845, "learning_rate": 9.051208904706471e-06, "loss": 0.4073, "step": 13741 }, { "epoch": 1.3971126474176494, "grad_norm": 0.3516111969947815, "learning_rate": 9.051000897328685e-06, "loss": 0.3978, "step": 13742 }, { "epoch": 1.3972143147620983, "grad_norm": 0.3031356632709503, "learning_rate": 9.050792869542922e-06, "loss": 0.3548, "step": 13743 }, { "epoch": 1.3973159821065475, "grad_norm": 0.27430564165115356, "learning_rate": 9.050584821350225e-06, "loss": 0.3924, "step": 13744 }, { "epoch": 1.3974176494509964, "grad_norm": 0.30219754576683044, "learning_rate": 9.050376752751644e-06, "loss": 0.3753, "step": 13745 }, { "epoch": 1.3975193167954454, "grad_norm": 0.32030314207077026, "learning_rate": 9.050168663748229e-06, "loss": 0.3728, "step": 13746 }, { "epoch": 1.3976209841398943, "grad_norm": 0.2920369803905487, "learning_rate": 9.049960554341024e-06, "loss": 0.3522, "step": 13747 }, { "epoch": 1.3977226514843433, "grad_norm": 0.2670521140098572, "learning_rate": 9.049752424531083e-06, "loss": 0.3649, "step": 13748 }, { "epoch": 1.3978243188287922, "grad_norm": 0.3007553219795227, "learning_rate": 9.04954427431945e-06, "loss": 0.3932, "step": 13749 }, { "epoch": 1.3979259861732412, "grad_norm": 0.31393030285835266, "learning_rate": 9.049336103707177e-06, "loss": 0.3376, "step": 13750 }, { "epoch": 1.39802765351769, "grad_norm": 0.26453036069869995, "learning_rate": 9.049127912695309e-06, "loss": 0.3405, "step": 13751 }, { "epoch": 1.398129320862139, "grad_norm": 0.286478191614151, "learning_rate": 9.048919701284897e-06, "loss": 0.3577, "step": 13752 }, { "epoch": 1.398230988206588, "grad_norm": 0.3023682236671448, "learning_rate": 9.048711469476992e-06, "loss": 0.3643, "step": 13753 }, { "epoch": 1.398332655551037, "grad_norm": 0.31013819575309753, "learning_rate": 9.048503217272638e-06, "loss": 0.3765, "step": 13754 }, { "epoch": 1.3984343228954859, "grad_norm": 0.30540409684181213, "learning_rate": 9.048294944672888e-06, "loss": 0.3769, "step": 13755 }, { "epoch": 1.3985359902399348, "grad_norm": 0.29149502515792847, "learning_rate": 9.04808665167879e-06, "loss": 0.3692, "step": 13756 }, { "epoch": 1.398637657584384, "grad_norm": 0.2940913140773773, "learning_rate": 9.047878338291393e-06, "loss": 0.3668, "step": 13757 }, { "epoch": 1.398739324928833, "grad_norm": 0.2816545367240906, "learning_rate": 9.047670004511747e-06, "loss": 0.3467, "step": 13758 }, { "epoch": 1.3988409922732818, "grad_norm": 0.3040831983089447, "learning_rate": 9.047461650340903e-06, "loss": 0.3537, "step": 13759 }, { "epoch": 1.3989426596177308, "grad_norm": 0.2800772786140442, "learning_rate": 9.047253275779908e-06, "loss": 0.3735, "step": 13760 }, { "epoch": 1.3990443269621797, "grad_norm": 0.275097519159317, "learning_rate": 9.047044880829811e-06, "loss": 0.3615, "step": 13761 }, { "epoch": 1.3991459943066287, "grad_norm": 0.3221816420555115, "learning_rate": 9.046836465491667e-06, "loss": 0.4192, "step": 13762 }, { "epoch": 1.3992476616510776, "grad_norm": 0.3061220645904541, "learning_rate": 9.04662802976652e-06, "loss": 0.3601, "step": 13763 }, { "epoch": 1.3993493289955268, "grad_norm": 0.2859744429588318, "learning_rate": 9.046419573655422e-06, "loss": 0.3278, "step": 13764 }, { "epoch": 1.3994509963399757, "grad_norm": 0.29443585872650146, "learning_rate": 9.046211097159426e-06, "loss": 0.3951, "step": 13765 }, { "epoch": 1.3995526636844247, "grad_norm": 0.29083871841430664, "learning_rate": 9.04600260027958e-06, "loss": 0.374, "step": 13766 }, { "epoch": 1.3996543310288736, "grad_norm": 0.2944067120552063, "learning_rate": 9.045794083016933e-06, "loss": 0.3328, "step": 13767 }, { "epoch": 1.3997559983733225, "grad_norm": 0.3004275858402252, "learning_rate": 9.045585545372537e-06, "loss": 0.3606, "step": 13768 }, { "epoch": 1.3998576657177715, "grad_norm": 0.2834988236427307, "learning_rate": 9.045376987347442e-06, "loss": 0.3721, "step": 13769 }, { "epoch": 1.3999593330622204, "grad_norm": 0.29612359404563904, "learning_rate": 9.045168408942701e-06, "loss": 0.3868, "step": 13770 }, { "epoch": 1.4000610004066694, "grad_norm": 0.3144455552101135, "learning_rate": 9.044959810159362e-06, "loss": 0.3421, "step": 13771 }, { "epoch": 1.4001626677511183, "grad_norm": 0.271190345287323, "learning_rate": 9.044751190998475e-06, "loss": 0.3744, "step": 13772 }, { "epoch": 1.4002643350955672, "grad_norm": 0.2860531806945801, "learning_rate": 9.044542551461095e-06, "loss": 0.4057, "step": 13773 }, { "epoch": 1.4003660024400162, "grad_norm": 0.2700028717517853, "learning_rate": 9.044333891548269e-06, "loss": 0.3587, "step": 13774 }, { "epoch": 1.4004676697844651, "grad_norm": 0.28205054998397827, "learning_rate": 9.044125211261051e-06, "loss": 0.3229, "step": 13775 }, { "epoch": 1.400569337128914, "grad_norm": 0.2782636880874634, "learning_rate": 9.04391651060049e-06, "loss": 0.362, "step": 13776 }, { "epoch": 1.4006710044733632, "grad_norm": 0.27890491485595703, "learning_rate": 9.04370778956764e-06, "loss": 0.3649, "step": 13777 }, { "epoch": 1.4007726718178122, "grad_norm": 0.2775643765926361, "learning_rate": 9.043499048163552e-06, "loss": 0.3486, "step": 13778 }, { "epoch": 1.4008743391622611, "grad_norm": 0.2870390713214874, "learning_rate": 9.043290286389274e-06, "loss": 0.3778, "step": 13779 }, { "epoch": 1.40097600650671, "grad_norm": 0.27727657556533813, "learning_rate": 9.043081504245862e-06, "loss": 0.3372, "step": 13780 }, { "epoch": 1.401077673851159, "grad_norm": 0.2978925406932831, "learning_rate": 9.042872701734367e-06, "loss": 0.3416, "step": 13781 }, { "epoch": 1.401179341195608, "grad_norm": 0.29547762870788574, "learning_rate": 9.042663878855837e-06, "loss": 0.345, "step": 13782 }, { "epoch": 1.4012810085400569, "grad_norm": 0.29402220249176025, "learning_rate": 9.04245503561133e-06, "loss": 0.3903, "step": 13783 }, { "epoch": 1.4013826758845058, "grad_norm": 0.2988688051700592, "learning_rate": 9.042246172001894e-06, "loss": 0.3607, "step": 13784 }, { "epoch": 1.401484343228955, "grad_norm": 0.26649582386016846, "learning_rate": 9.042037288028582e-06, "loss": 0.3675, "step": 13785 }, { "epoch": 1.401586010573404, "grad_norm": 0.30522727966308594, "learning_rate": 9.041828383692447e-06, "loss": 0.3766, "step": 13786 }, { "epoch": 1.4016876779178529, "grad_norm": 0.29159897565841675, "learning_rate": 9.041619458994542e-06, "loss": 0.3783, "step": 13787 }, { "epoch": 1.4017893452623018, "grad_norm": 0.2900236248970032, "learning_rate": 9.041410513935918e-06, "loss": 0.3622, "step": 13788 }, { "epoch": 1.4018910126067508, "grad_norm": 0.26513999700546265, "learning_rate": 9.041201548517627e-06, "loss": 0.3368, "step": 13789 }, { "epoch": 1.4019926799511997, "grad_norm": 0.29480084776878357, "learning_rate": 9.040992562740725e-06, "loss": 0.3703, "step": 13790 }, { "epoch": 1.4020943472956486, "grad_norm": 0.2979432940483093, "learning_rate": 9.040783556606261e-06, "loss": 0.3366, "step": 13791 }, { "epoch": 1.4021960146400976, "grad_norm": 0.29469096660614014, "learning_rate": 9.040574530115292e-06, "loss": 0.3904, "step": 13792 }, { "epoch": 1.4022976819845465, "grad_norm": 0.29578348994255066, "learning_rate": 9.040365483268866e-06, "loss": 0.3781, "step": 13793 }, { "epoch": 1.4023993493289955, "grad_norm": 0.29463058710098267, "learning_rate": 9.04015641606804e-06, "loss": 0.3748, "step": 13794 }, { "epoch": 1.4025010166734444, "grad_norm": 0.2931957244873047, "learning_rate": 9.039947328513866e-06, "loss": 0.372, "step": 13795 }, { "epoch": 1.4026026840178933, "grad_norm": 0.2737646996974945, "learning_rate": 9.039738220607397e-06, "loss": 0.3737, "step": 13796 }, { "epoch": 1.4027043513623423, "grad_norm": 0.27921971678733826, "learning_rate": 9.039529092349688e-06, "loss": 0.3599, "step": 13797 }, { "epoch": 1.4028060187067914, "grad_norm": 0.29425832629203796, "learning_rate": 9.03931994374179e-06, "loss": 0.3608, "step": 13798 }, { "epoch": 1.4029076860512404, "grad_norm": 0.2825426459312439, "learning_rate": 9.039110774784759e-06, "loss": 0.3568, "step": 13799 }, { "epoch": 1.4030093533956893, "grad_norm": 0.2926291823387146, "learning_rate": 9.038901585479646e-06, "loss": 0.3558, "step": 13800 }, { "epoch": 1.4031110207401383, "grad_norm": 0.2977640628814697, "learning_rate": 9.03869237582751e-06, "loss": 0.3742, "step": 13801 }, { "epoch": 1.4032126880845872, "grad_norm": 0.3112207353115082, "learning_rate": 9.0384831458294e-06, "loss": 0.3834, "step": 13802 }, { "epoch": 1.4033143554290362, "grad_norm": 0.32202407717704773, "learning_rate": 9.03827389548637e-06, "loss": 0.35, "step": 13803 }, { "epoch": 1.403416022773485, "grad_norm": 0.30374354124069214, "learning_rate": 9.038064624799477e-06, "loss": 0.3408, "step": 13804 }, { "epoch": 1.4035176901179343, "grad_norm": 0.28359249234199524, "learning_rate": 9.037855333769774e-06, "loss": 0.3506, "step": 13805 }, { "epoch": 1.4036193574623832, "grad_norm": 0.32590416073799133, "learning_rate": 9.037646022398315e-06, "loss": 0.3678, "step": 13806 }, { "epoch": 1.4037210248068321, "grad_norm": 0.28651344776153564, "learning_rate": 9.037436690686155e-06, "loss": 0.3472, "step": 13807 }, { "epoch": 1.403822692151281, "grad_norm": 0.287071168422699, "learning_rate": 9.03722733863435e-06, "loss": 0.3533, "step": 13808 }, { "epoch": 1.40392435949573, "grad_norm": 0.30450189113616943, "learning_rate": 9.03701796624395e-06, "loss": 0.3827, "step": 13809 }, { "epoch": 1.404026026840179, "grad_norm": 0.3565133512020111, "learning_rate": 9.036808573516015e-06, "loss": 0.346, "step": 13810 }, { "epoch": 1.404127694184628, "grad_norm": 0.29238176345825195, "learning_rate": 9.036599160451598e-06, "loss": 0.38, "step": 13811 }, { "epoch": 1.4042293615290768, "grad_norm": 0.29450374841690063, "learning_rate": 9.036389727051752e-06, "loss": 0.3497, "step": 13812 }, { "epoch": 1.4043310288735258, "grad_norm": 0.31901779770851135, "learning_rate": 9.036180273317534e-06, "loss": 0.386, "step": 13813 }, { "epoch": 1.4044326962179747, "grad_norm": 0.33176565170288086, "learning_rate": 9.03597079925e-06, "loss": 0.3822, "step": 13814 }, { "epoch": 1.4045343635624237, "grad_norm": 0.3170605003833771, "learning_rate": 9.035761304850203e-06, "loss": 0.3379, "step": 13815 }, { "epoch": 1.4046360309068726, "grad_norm": 0.2975634038448334, "learning_rate": 9.0355517901192e-06, "loss": 0.3484, "step": 13816 }, { "epoch": 1.4047376982513216, "grad_norm": 0.3083333671092987, "learning_rate": 9.035342255058047e-06, "loss": 0.3896, "step": 13817 }, { "epoch": 1.4048393655957707, "grad_norm": 0.2987416684627533, "learning_rate": 9.035132699667798e-06, "loss": 0.3612, "step": 13818 }, { "epoch": 1.4049410329402197, "grad_norm": 0.2831325829029083, "learning_rate": 9.03492312394951e-06, "loss": 0.3744, "step": 13819 }, { "epoch": 1.4050427002846686, "grad_norm": 0.3101043999195099, "learning_rate": 9.034713527904237e-06, "loss": 0.3497, "step": 13820 }, { "epoch": 1.4051443676291175, "grad_norm": 0.31040841341018677, "learning_rate": 9.034503911533037e-06, "loss": 0.33, "step": 13821 }, { "epoch": 1.4052460349735665, "grad_norm": 0.29025644063949585, "learning_rate": 9.034294274836965e-06, "loss": 0.3456, "step": 13822 }, { "epoch": 1.4053477023180154, "grad_norm": 0.3017881512641907, "learning_rate": 9.034084617817077e-06, "loss": 0.3778, "step": 13823 }, { "epoch": 1.4054493696624644, "grad_norm": 0.31818822026252747, "learning_rate": 9.033874940474428e-06, "loss": 0.3428, "step": 13824 }, { "epoch": 1.4055510370069133, "grad_norm": 0.27507197856903076, "learning_rate": 9.033665242810077e-06, "loss": 0.3244, "step": 13825 }, { "epoch": 1.4056527043513625, "grad_norm": 0.32086122035980225, "learning_rate": 9.03345552482508e-06, "loss": 0.3664, "step": 13826 }, { "epoch": 1.4057543716958114, "grad_norm": 0.29129478335380554, "learning_rate": 9.03324578652049e-06, "loss": 0.3576, "step": 13827 }, { "epoch": 1.4058560390402604, "grad_norm": 0.3177645802497864, "learning_rate": 9.033036027897367e-06, "loss": 0.382, "step": 13828 }, { "epoch": 1.4059577063847093, "grad_norm": 0.2935453951358795, "learning_rate": 9.032826248956768e-06, "loss": 0.3794, "step": 13829 }, { "epoch": 1.4060593737291582, "grad_norm": 0.310029536485672, "learning_rate": 9.032616449699749e-06, "loss": 0.3697, "step": 13830 }, { "epoch": 1.4061610410736072, "grad_norm": 0.3056629002094269, "learning_rate": 9.032406630127366e-06, "loss": 0.3506, "step": 13831 }, { "epoch": 1.4062627084180561, "grad_norm": 0.2742340564727783, "learning_rate": 9.032196790240674e-06, "loss": 0.3493, "step": 13832 }, { "epoch": 1.406364375762505, "grad_norm": 0.2877868711948395, "learning_rate": 9.031986930040735e-06, "loss": 0.4119, "step": 13833 }, { "epoch": 1.406466043106954, "grad_norm": 0.28240421414375305, "learning_rate": 9.031777049528604e-06, "loss": 0.389, "step": 13834 }, { "epoch": 1.406567710451403, "grad_norm": 0.29146215319633484, "learning_rate": 9.03156714870534e-06, "loss": 0.3954, "step": 13835 }, { "epoch": 1.4066693777958519, "grad_norm": 0.2776392996311188, "learning_rate": 9.031357227571997e-06, "loss": 0.3611, "step": 13836 }, { "epoch": 1.4067710451403008, "grad_norm": 0.3031691908836365, "learning_rate": 9.031147286129633e-06, "loss": 0.3553, "step": 13837 }, { "epoch": 1.4068727124847498, "grad_norm": 0.28131693601608276, "learning_rate": 9.03093732437931e-06, "loss": 0.3386, "step": 13838 }, { "epoch": 1.406974379829199, "grad_norm": 0.29504549503326416, "learning_rate": 9.030727342322081e-06, "loss": 0.3581, "step": 13839 }, { "epoch": 1.4070760471736479, "grad_norm": 0.2826021909713745, "learning_rate": 9.030517339959005e-06, "loss": 0.3792, "step": 13840 }, { "epoch": 1.4071777145180968, "grad_norm": 0.2885349690914154, "learning_rate": 9.030307317291142e-06, "loss": 0.3558, "step": 13841 }, { "epoch": 1.4072793818625458, "grad_norm": 0.3215416967868805, "learning_rate": 9.030097274319547e-06, "loss": 0.33, "step": 13842 }, { "epoch": 1.4073810492069947, "grad_norm": 0.313488632440567, "learning_rate": 9.02988721104528e-06, "loss": 0.3627, "step": 13843 }, { "epoch": 1.4074827165514436, "grad_norm": 0.32434189319610596, "learning_rate": 9.029677127469398e-06, "loss": 0.3971, "step": 13844 }, { "epoch": 1.4075843838958926, "grad_norm": 0.2953447997570038, "learning_rate": 9.029467023592961e-06, "loss": 0.391, "step": 13845 }, { "epoch": 1.4076860512403417, "grad_norm": 0.27336856722831726, "learning_rate": 9.029256899417029e-06, "loss": 0.3346, "step": 13846 }, { "epoch": 1.4077877185847907, "grad_norm": 0.2957361936569214, "learning_rate": 9.029046754942655e-06, "loss": 0.3872, "step": 13847 }, { "epoch": 1.4078893859292396, "grad_norm": 0.27997899055480957, "learning_rate": 9.028836590170901e-06, "loss": 0.3742, "step": 13848 }, { "epoch": 1.4079910532736886, "grad_norm": 0.3110712170600891, "learning_rate": 9.028626405102827e-06, "loss": 0.3855, "step": 13849 }, { "epoch": 1.4080927206181375, "grad_norm": 0.29848769307136536, "learning_rate": 9.028416199739492e-06, "loss": 0.3522, "step": 13850 }, { "epoch": 1.4081943879625864, "grad_norm": 0.2898768484592438, "learning_rate": 9.028205974081951e-06, "loss": 0.3512, "step": 13851 }, { "epoch": 1.4082960553070354, "grad_norm": 0.31623873114585876, "learning_rate": 9.027995728131267e-06, "loss": 0.382, "step": 13852 }, { "epoch": 1.4083977226514843, "grad_norm": 0.28459984064102173, "learning_rate": 9.027785461888497e-06, "loss": 0.3734, "step": 13853 }, { "epoch": 1.4084993899959333, "grad_norm": 0.31170764565467834, "learning_rate": 9.0275751753547e-06, "loss": 0.3417, "step": 13854 }, { "epoch": 1.4086010573403822, "grad_norm": 0.31242379546165466, "learning_rate": 9.027364868530937e-06, "loss": 0.3749, "step": 13855 }, { "epoch": 1.4087027246848312, "grad_norm": 0.28095749020576477, "learning_rate": 9.027154541418269e-06, "loss": 0.3731, "step": 13856 }, { "epoch": 1.40880439202928, "grad_norm": 0.31341007351875305, "learning_rate": 9.02694419401775e-06, "loss": 0.3706, "step": 13857 }, { "epoch": 1.408906059373729, "grad_norm": 0.3358787000179291, "learning_rate": 9.026733826330445e-06, "loss": 0.3355, "step": 13858 }, { "epoch": 1.4090077267181782, "grad_norm": 0.28437018394470215, "learning_rate": 9.026523438357411e-06, "loss": 0.3647, "step": 13859 }, { "epoch": 1.4091093940626271, "grad_norm": 0.3310694396495819, "learning_rate": 9.02631303009971e-06, "loss": 0.3662, "step": 13860 }, { "epoch": 1.409211061407076, "grad_norm": 0.30735278129577637, "learning_rate": 9.0261026015584e-06, "loss": 0.3902, "step": 13861 }, { "epoch": 1.409312728751525, "grad_norm": 0.26643580198287964, "learning_rate": 9.02589215273454e-06, "loss": 0.3539, "step": 13862 }, { "epoch": 1.409414396095974, "grad_norm": 0.29497721791267395, "learning_rate": 9.025681683629196e-06, "loss": 0.3603, "step": 13863 }, { "epoch": 1.409516063440423, "grad_norm": 0.3079570531845093, "learning_rate": 9.025471194243423e-06, "loss": 0.3865, "step": 13864 }, { "epoch": 1.4096177307848718, "grad_norm": 0.2865557372570038, "learning_rate": 9.025260684578281e-06, "loss": 0.353, "step": 13865 }, { "epoch": 1.4097193981293208, "grad_norm": 0.2951294183731079, "learning_rate": 9.025050154634834e-06, "loss": 0.354, "step": 13866 }, { "epoch": 1.40982106547377, "grad_norm": 0.304736465215683, "learning_rate": 9.02483960441414e-06, "loss": 0.3515, "step": 13867 }, { "epoch": 1.409922732818219, "grad_norm": 0.2806017994880676, "learning_rate": 9.02462903391726e-06, "loss": 0.3317, "step": 13868 }, { "epoch": 1.4100244001626678, "grad_norm": 0.29369857907295227, "learning_rate": 9.024418443145255e-06, "loss": 0.3812, "step": 13869 }, { "epoch": 1.4101260675071168, "grad_norm": 0.3238607347011566, "learning_rate": 9.024207832099187e-06, "loss": 0.3677, "step": 13870 }, { "epoch": 1.4102277348515657, "grad_norm": 0.3253795802593231, "learning_rate": 9.023997200780118e-06, "loss": 0.3785, "step": 13871 }, { "epoch": 1.4103294021960147, "grad_norm": 0.2756910026073456, "learning_rate": 9.023786549189105e-06, "loss": 0.3325, "step": 13872 }, { "epoch": 1.4104310695404636, "grad_norm": 0.30927956104278564, "learning_rate": 9.023575877327214e-06, "loss": 0.3722, "step": 13873 }, { "epoch": 1.4105327368849125, "grad_norm": 0.2854200005531311, "learning_rate": 9.0233651851955e-06, "loss": 0.3789, "step": 13874 }, { "epoch": 1.4106344042293615, "grad_norm": 0.2960967719554901, "learning_rate": 9.02315447279503e-06, "loss": 0.3657, "step": 13875 }, { "epoch": 1.4107360715738104, "grad_norm": 0.32455629110336304, "learning_rate": 9.022943740126866e-06, "loss": 0.3305, "step": 13876 }, { "epoch": 1.4108377389182594, "grad_norm": 0.27888917922973633, "learning_rate": 9.022732987192065e-06, "loss": 0.3549, "step": 13877 }, { "epoch": 1.4109394062627083, "grad_norm": 0.29274940490722656, "learning_rate": 9.02252221399169e-06, "loss": 0.3668, "step": 13878 }, { "epoch": 1.4110410736071572, "grad_norm": 0.3194665312767029, "learning_rate": 9.022311420526808e-06, "loss": 0.3624, "step": 13879 }, { "epoch": 1.4111427409516064, "grad_norm": 0.3040440082550049, "learning_rate": 9.022100606798474e-06, "loss": 0.3448, "step": 13880 }, { "epoch": 1.4112444082960554, "grad_norm": 0.26695626974105835, "learning_rate": 9.021889772807754e-06, "loss": 0.3673, "step": 13881 }, { "epoch": 1.4113460756405043, "grad_norm": 0.28846046328544617, "learning_rate": 9.02167891855571e-06, "loss": 0.3441, "step": 13882 }, { "epoch": 1.4114477429849532, "grad_norm": 0.28463974595069885, "learning_rate": 9.021468044043402e-06, "loss": 0.3666, "step": 13883 }, { "epoch": 1.4115494103294022, "grad_norm": 0.30186688899993896, "learning_rate": 9.021257149271893e-06, "loss": 0.3564, "step": 13884 }, { "epoch": 1.4116510776738511, "grad_norm": 0.2805856168270111, "learning_rate": 9.021046234242247e-06, "loss": 0.3549, "step": 13885 }, { "epoch": 1.4117527450183, "grad_norm": 0.3026227355003357, "learning_rate": 9.020835298955524e-06, "loss": 0.3578, "step": 13886 }, { "epoch": 1.4118544123627492, "grad_norm": 0.28231778740882874, "learning_rate": 9.020624343412792e-06, "loss": 0.3487, "step": 13887 }, { "epoch": 1.4119560797071982, "grad_norm": 0.30944937467575073, "learning_rate": 9.020413367615106e-06, "loss": 0.3506, "step": 13888 }, { "epoch": 1.412057747051647, "grad_norm": 0.29056698083877563, "learning_rate": 9.020202371563536e-06, "loss": 0.3681, "step": 13889 }, { "epoch": 1.412159414396096, "grad_norm": 0.27741774916648865, "learning_rate": 9.019991355259139e-06, "loss": 0.357, "step": 13890 }, { "epoch": 1.412261081740545, "grad_norm": 0.28619226813316345, "learning_rate": 9.019780318702982e-06, "loss": 0.3886, "step": 13891 }, { "epoch": 1.412362749084994, "grad_norm": 0.31042373180389404, "learning_rate": 9.019569261896129e-06, "loss": 0.3625, "step": 13892 }, { "epoch": 1.4124644164294429, "grad_norm": 0.2746228277683258, "learning_rate": 9.019358184839639e-06, "loss": 0.3733, "step": 13893 }, { "epoch": 1.4125660837738918, "grad_norm": 0.27475112676620483, "learning_rate": 9.019147087534577e-06, "loss": 0.3636, "step": 13894 }, { "epoch": 1.4126677511183408, "grad_norm": 0.31072109937667847, "learning_rate": 9.018935969982007e-06, "loss": 0.3663, "step": 13895 }, { "epoch": 1.4127694184627897, "grad_norm": 0.3060942590236664, "learning_rate": 9.018724832182994e-06, "loss": 0.3477, "step": 13896 }, { "epoch": 1.4128710858072386, "grad_norm": 0.2798095643520355, "learning_rate": 9.018513674138602e-06, "loss": 0.4169, "step": 13897 }, { "epoch": 1.4129727531516876, "grad_norm": 0.3063764274120331, "learning_rate": 9.01830249584989e-06, "loss": 0.3691, "step": 13898 }, { "epoch": 1.4130744204961365, "grad_norm": 0.28632035851478577, "learning_rate": 9.018091297317926e-06, "loss": 0.3411, "step": 13899 }, { "epoch": 1.4131760878405857, "grad_norm": 0.3042771518230438, "learning_rate": 9.017880078543772e-06, "loss": 0.3822, "step": 13900 }, { "epoch": 1.4132777551850346, "grad_norm": 0.30151575803756714, "learning_rate": 9.017668839528495e-06, "loss": 0.3715, "step": 13901 }, { "epoch": 1.4133794225294836, "grad_norm": 0.26847097277641296, "learning_rate": 9.017457580273157e-06, "loss": 0.3547, "step": 13902 }, { "epoch": 1.4134810898739325, "grad_norm": 0.30590760707855225, "learning_rate": 9.017246300778821e-06, "loss": 0.3955, "step": 13903 }, { "epoch": 1.4135827572183814, "grad_norm": 0.2940349280834198, "learning_rate": 9.017035001046553e-06, "loss": 0.3375, "step": 13904 }, { "epoch": 1.4136844245628304, "grad_norm": 0.2673652172088623, "learning_rate": 9.016823681077418e-06, "loss": 0.3305, "step": 13905 }, { "epoch": 1.4137860919072793, "grad_norm": 0.2915661036968231, "learning_rate": 9.01661234087248e-06, "loss": 0.3506, "step": 13906 }, { "epoch": 1.4138877592517283, "grad_norm": 0.2875944972038269, "learning_rate": 9.016400980432802e-06, "loss": 0.3538, "step": 13907 }, { "epoch": 1.4139894265961774, "grad_norm": 0.297668993473053, "learning_rate": 9.01618959975945e-06, "loss": 0.3368, "step": 13908 }, { "epoch": 1.4140910939406264, "grad_norm": 0.30931931734085083, "learning_rate": 9.01597819885349e-06, "loss": 0.3461, "step": 13909 }, { "epoch": 1.4141927612850753, "grad_norm": 0.2700645625591278, "learning_rate": 9.015766777715987e-06, "loss": 0.3566, "step": 13910 }, { "epoch": 1.4142944286295243, "grad_norm": 0.28462210297584534, "learning_rate": 9.015555336348005e-06, "loss": 0.3371, "step": 13911 }, { "epoch": 1.4143960959739732, "grad_norm": 0.25981587171554565, "learning_rate": 9.015343874750609e-06, "loss": 0.3472, "step": 13912 }, { "epoch": 1.4144977633184221, "grad_norm": 0.2857198119163513, "learning_rate": 9.015132392924866e-06, "loss": 0.3409, "step": 13913 }, { "epoch": 1.414599430662871, "grad_norm": 0.2997093200683594, "learning_rate": 9.01492089087184e-06, "loss": 0.3454, "step": 13914 }, { "epoch": 1.41470109800732, "grad_norm": 0.28236326575279236, "learning_rate": 9.014709368592594e-06, "loss": 0.3561, "step": 13915 }, { "epoch": 1.414802765351769, "grad_norm": 0.28994572162628174, "learning_rate": 9.014497826088198e-06, "loss": 0.3702, "step": 13916 }, { "epoch": 1.414904432696218, "grad_norm": 0.2885160446166992, "learning_rate": 9.014286263359715e-06, "loss": 0.3937, "step": 13917 }, { "epoch": 1.4150061000406668, "grad_norm": 0.27816373109817505, "learning_rate": 9.014074680408213e-06, "loss": 0.3782, "step": 13918 }, { "epoch": 1.4151077673851158, "grad_norm": 0.26798081398010254, "learning_rate": 9.013863077234757e-06, "loss": 0.3532, "step": 13919 }, { "epoch": 1.4152094347295647, "grad_norm": 0.2855564057826996, "learning_rate": 9.013651453840411e-06, "loss": 0.3913, "step": 13920 }, { "epoch": 1.415311102074014, "grad_norm": 0.29149478673934937, "learning_rate": 9.013439810226246e-06, "loss": 0.3625, "step": 13921 }, { "epoch": 1.4154127694184628, "grad_norm": 0.2876308262348175, "learning_rate": 9.013228146393321e-06, "loss": 0.3748, "step": 13922 }, { "epoch": 1.4155144367629118, "grad_norm": 0.26768895983695984, "learning_rate": 9.013016462342708e-06, "loss": 0.3398, "step": 13923 }, { "epoch": 1.4156161041073607, "grad_norm": 0.282007098197937, "learning_rate": 9.012804758075472e-06, "loss": 0.3498, "step": 13924 }, { "epoch": 1.4157177714518097, "grad_norm": 0.3125799894332886, "learning_rate": 9.01259303359268e-06, "loss": 0.3528, "step": 13925 }, { "epoch": 1.4158194387962586, "grad_norm": 0.2851235270500183, "learning_rate": 9.012381288895396e-06, "loss": 0.3302, "step": 13926 }, { "epoch": 1.4159211061407075, "grad_norm": 0.3227390646934509, "learning_rate": 9.012169523984688e-06, "loss": 0.3533, "step": 13927 }, { "epoch": 1.4160227734851567, "grad_norm": 0.32025399804115295, "learning_rate": 9.011957738861626e-06, "loss": 0.4178, "step": 13928 }, { "epoch": 1.4161244408296056, "grad_norm": 0.3102930784225464, "learning_rate": 9.011745933527273e-06, "loss": 0.3564, "step": 13929 }, { "epoch": 1.4162261081740546, "grad_norm": 0.2788131535053253, "learning_rate": 9.011534107982697e-06, "loss": 0.3832, "step": 13930 }, { "epoch": 1.4163277755185035, "grad_norm": 0.28743281960487366, "learning_rate": 9.011322262228966e-06, "loss": 0.3498, "step": 13931 }, { "epoch": 1.4164294428629525, "grad_norm": 0.33546265959739685, "learning_rate": 9.011110396267146e-06, "loss": 0.3799, "step": 13932 }, { "epoch": 1.4165311102074014, "grad_norm": 0.2964100241661072, "learning_rate": 9.010898510098304e-06, "loss": 0.3549, "step": 13933 }, { "epoch": 1.4166327775518504, "grad_norm": 0.295740008354187, "learning_rate": 9.010686603723512e-06, "loss": 0.3936, "step": 13934 }, { "epoch": 1.4167344448962993, "grad_norm": 0.2708589434623718, "learning_rate": 9.010474677143832e-06, "loss": 0.355, "step": 13935 }, { "epoch": 1.4168361122407482, "grad_norm": 0.341344952583313, "learning_rate": 9.010262730360332e-06, "loss": 0.3994, "step": 13936 }, { "epoch": 1.4169377795851972, "grad_norm": 0.3052882254123688, "learning_rate": 9.010050763374083e-06, "loss": 0.3832, "step": 13937 }, { "epoch": 1.4170394469296461, "grad_norm": 0.29189765453338623, "learning_rate": 9.009838776186151e-06, "loss": 0.3774, "step": 13938 }, { "epoch": 1.417141114274095, "grad_norm": 0.3060451149940491, "learning_rate": 9.009626768797605e-06, "loss": 0.3519, "step": 13939 }, { "epoch": 1.417242781618544, "grad_norm": 0.3165317475795746, "learning_rate": 9.009414741209512e-06, "loss": 0.372, "step": 13940 }, { "epoch": 1.4173444489629932, "grad_norm": 0.2866528034210205, "learning_rate": 9.00920269342294e-06, "loss": 0.3306, "step": 13941 }, { "epoch": 1.417446116307442, "grad_norm": 0.2896970808506012, "learning_rate": 9.008990625438956e-06, "loss": 0.3521, "step": 13942 }, { "epoch": 1.417547783651891, "grad_norm": 0.2939721941947937, "learning_rate": 9.008778537258632e-06, "loss": 0.3526, "step": 13943 }, { "epoch": 1.41764945099634, "grad_norm": 0.2912589907646179, "learning_rate": 9.008566428883033e-06, "loss": 0.3372, "step": 13944 }, { "epoch": 1.417751118340789, "grad_norm": 0.29566916823387146, "learning_rate": 9.008354300313228e-06, "loss": 0.3518, "step": 13945 }, { "epoch": 1.4178527856852379, "grad_norm": 0.32887810468673706, "learning_rate": 9.00814215155029e-06, "loss": 0.3608, "step": 13946 }, { "epoch": 1.4179544530296868, "grad_norm": 0.31957313418388367, "learning_rate": 9.007929982595282e-06, "loss": 0.3564, "step": 13947 }, { "epoch": 1.4180561203741358, "grad_norm": 0.3176874816417694, "learning_rate": 9.007717793449275e-06, "loss": 0.366, "step": 13948 }, { "epoch": 1.418157787718585, "grad_norm": 0.34757164120674133, "learning_rate": 9.007505584113338e-06, "loss": 0.4063, "step": 13949 }, { "epoch": 1.4182594550630339, "grad_norm": 0.30245330929756165, "learning_rate": 9.007293354588543e-06, "loss": 0.3822, "step": 13950 }, { "epoch": 1.4183611224074828, "grad_norm": 0.2975718379020691, "learning_rate": 9.007081104875952e-06, "loss": 0.3738, "step": 13951 }, { "epoch": 1.4184627897519317, "grad_norm": 0.2865767180919647, "learning_rate": 9.00686883497664e-06, "loss": 0.3821, "step": 13952 }, { "epoch": 1.4185644570963807, "grad_norm": 0.3251478970050812, "learning_rate": 9.006656544891676e-06, "loss": 0.4198, "step": 13953 }, { "epoch": 1.4186661244408296, "grad_norm": 0.27471280097961426, "learning_rate": 9.006444234622129e-06, "loss": 0.379, "step": 13954 }, { "epoch": 1.4187677917852786, "grad_norm": 0.2722870111465454, "learning_rate": 9.006231904169067e-06, "loss": 0.3397, "step": 13955 }, { "epoch": 1.4188694591297275, "grad_norm": 0.2984824478626251, "learning_rate": 9.006019553533561e-06, "loss": 0.3673, "step": 13956 }, { "epoch": 1.4189711264741764, "grad_norm": 0.2801225483417511, "learning_rate": 9.00580718271668e-06, "loss": 0.3697, "step": 13957 }, { "epoch": 1.4190727938186254, "grad_norm": 0.3148947060108185, "learning_rate": 9.005594791719496e-06, "loss": 0.3498, "step": 13958 }, { "epoch": 1.4191744611630743, "grad_norm": 0.29702457785606384, "learning_rate": 9.005382380543076e-06, "loss": 0.3576, "step": 13959 }, { "epoch": 1.4192761285075233, "grad_norm": 0.2654799222946167, "learning_rate": 9.005169949188492e-06, "loss": 0.3516, "step": 13960 }, { "epoch": 1.4193777958519722, "grad_norm": 0.27366405725479126, "learning_rate": 9.004957497656812e-06, "loss": 0.3168, "step": 13961 }, { "epoch": 1.4194794631964214, "grad_norm": 0.27986958622932434, "learning_rate": 9.004745025949109e-06, "loss": 0.3336, "step": 13962 }, { "epoch": 1.4195811305408703, "grad_norm": 0.29235124588012695, "learning_rate": 9.004532534066453e-06, "loss": 0.3709, "step": 13963 }, { "epoch": 1.4196827978853193, "grad_norm": 0.28255560994148254, "learning_rate": 9.004320022009913e-06, "loss": 0.3805, "step": 13964 }, { "epoch": 1.4197844652297682, "grad_norm": 0.27677419781684875, "learning_rate": 9.00410748978056e-06, "loss": 0.3451, "step": 13965 }, { "epoch": 1.4198861325742171, "grad_norm": 0.27162688970565796, "learning_rate": 9.003894937379464e-06, "loss": 0.3309, "step": 13966 }, { "epoch": 1.419987799918666, "grad_norm": 0.2870756983757019, "learning_rate": 9.003682364807697e-06, "loss": 0.3702, "step": 13967 }, { "epoch": 1.420089467263115, "grad_norm": 0.2928277254104614, "learning_rate": 9.003469772066331e-06, "loss": 0.3744, "step": 13968 }, { "epoch": 1.4201911346075642, "grad_norm": 0.30717629194259644, "learning_rate": 9.003257159156435e-06, "loss": 0.3691, "step": 13969 }, { "epoch": 1.4202928019520131, "grad_norm": 0.27892202138900757, "learning_rate": 9.003044526079082e-06, "loss": 0.3751, "step": 13970 }, { "epoch": 1.420394469296462, "grad_norm": 0.2692219316959381, "learning_rate": 9.00283187283534e-06, "loss": 0.3435, "step": 13971 }, { "epoch": 1.420496136640911, "grad_norm": 0.3073844611644745, "learning_rate": 9.002619199426284e-06, "loss": 0.3953, "step": 13972 }, { "epoch": 1.42059780398536, "grad_norm": 0.3128539025783539, "learning_rate": 9.002406505852982e-06, "loss": 0.3918, "step": 13973 }, { "epoch": 1.420699471329809, "grad_norm": 0.28638726472854614, "learning_rate": 9.002193792116509e-06, "loss": 0.3711, "step": 13974 }, { "epoch": 1.4208011386742578, "grad_norm": 0.2632187306880951, "learning_rate": 9.001981058217934e-06, "loss": 0.3566, "step": 13975 }, { "epoch": 1.4209028060187068, "grad_norm": 0.29186373949050903, "learning_rate": 9.001768304158328e-06, "loss": 0.3569, "step": 13976 }, { "epoch": 1.4210044733631557, "grad_norm": 0.2906794846057892, "learning_rate": 9.001555529938766e-06, "loss": 0.3448, "step": 13977 }, { "epoch": 1.4211061407076047, "grad_norm": 0.2995149791240692, "learning_rate": 9.001342735560318e-06, "loss": 0.3862, "step": 13978 }, { "epoch": 1.4212078080520536, "grad_norm": 0.2904895544052124, "learning_rate": 9.001129921024055e-06, "loss": 0.3924, "step": 13979 }, { "epoch": 1.4213094753965025, "grad_norm": 0.29254117608070374, "learning_rate": 9.000917086331052e-06, "loss": 0.3872, "step": 13980 }, { "epoch": 1.4214111427409515, "grad_norm": 0.2827458381652832, "learning_rate": 9.000704231482377e-06, "loss": 0.3578, "step": 13981 }, { "epoch": 1.4215128100854006, "grad_norm": 0.29696500301361084, "learning_rate": 9.000491356479108e-06, "loss": 0.3826, "step": 13982 }, { "epoch": 1.4216144774298496, "grad_norm": 0.3060796558856964, "learning_rate": 9.000278461322311e-06, "loss": 0.3923, "step": 13983 }, { "epoch": 1.4217161447742985, "grad_norm": 0.3092261254787445, "learning_rate": 9.000065546013064e-06, "loss": 0.3488, "step": 13984 }, { "epoch": 1.4218178121187475, "grad_norm": 0.2845265865325928, "learning_rate": 8.999852610552436e-06, "loss": 0.3461, "step": 13985 }, { "epoch": 1.4219194794631964, "grad_norm": 0.3026736080646515, "learning_rate": 8.999639654941502e-06, "loss": 0.3631, "step": 13986 }, { "epoch": 1.4220211468076454, "grad_norm": 0.30771639943122864, "learning_rate": 8.999426679181334e-06, "loss": 0.3752, "step": 13987 }, { "epoch": 1.4221228141520943, "grad_norm": 0.297590970993042, "learning_rate": 8.999213683273004e-06, "loss": 0.3559, "step": 13988 }, { "epoch": 1.4222244814965432, "grad_norm": 0.34679070115089417, "learning_rate": 8.999000667217586e-06, "loss": 0.3756, "step": 13989 }, { "epoch": 1.4223261488409924, "grad_norm": 0.28713080286979675, "learning_rate": 8.998787631016153e-06, "loss": 0.3485, "step": 13990 }, { "epoch": 1.4224278161854413, "grad_norm": 0.2901906669139862, "learning_rate": 8.998574574669779e-06, "loss": 0.3933, "step": 13991 }, { "epoch": 1.4225294835298903, "grad_norm": 0.3057834506034851, "learning_rate": 8.998361498179535e-06, "loss": 0.3576, "step": 13992 }, { "epoch": 1.4226311508743392, "grad_norm": 0.30523166060447693, "learning_rate": 8.998148401546496e-06, "loss": 0.3904, "step": 13993 }, { "epoch": 1.4227328182187882, "grad_norm": 0.2850636839866638, "learning_rate": 8.997935284771736e-06, "loss": 0.3474, "step": 13994 }, { "epoch": 1.422834485563237, "grad_norm": 0.3110867142677307, "learning_rate": 8.997722147856329e-06, "loss": 0.3847, "step": 13995 }, { "epoch": 1.422936152907686, "grad_norm": 0.2835230529308319, "learning_rate": 8.997508990801347e-06, "loss": 0.3904, "step": 13996 }, { "epoch": 1.423037820252135, "grad_norm": 0.3010229468345642, "learning_rate": 8.997295813607864e-06, "loss": 0.398, "step": 13997 }, { "epoch": 1.423139487596584, "grad_norm": 0.2970642149448395, "learning_rate": 8.997082616276955e-06, "loss": 0.3697, "step": 13998 }, { "epoch": 1.4232411549410329, "grad_norm": 0.28119030594825745, "learning_rate": 8.996869398809695e-06, "loss": 0.3243, "step": 13999 }, { "epoch": 1.4233428222854818, "grad_norm": 0.29307684302330017, "learning_rate": 8.996656161207156e-06, "loss": 0.3555, "step": 14000 }, { "epoch": 1.4234444896299308, "grad_norm": 0.2832372784614563, "learning_rate": 8.996442903470413e-06, "loss": 0.3621, "step": 14001 }, { "epoch": 1.4235461569743797, "grad_norm": 0.3096049129962921, "learning_rate": 8.996229625600538e-06, "loss": 0.3701, "step": 14002 }, { "epoch": 1.4236478243188289, "grad_norm": 0.2790486216545105, "learning_rate": 8.99601632759861e-06, "loss": 0.3784, "step": 14003 }, { "epoch": 1.4237494916632778, "grad_norm": 0.29313012957572937, "learning_rate": 8.995803009465703e-06, "loss": 0.3685, "step": 14004 }, { "epoch": 1.4238511590077267, "grad_norm": 0.33432281017303467, "learning_rate": 8.995589671202887e-06, "loss": 0.3853, "step": 14005 }, { "epoch": 1.4239528263521757, "grad_norm": 0.3016054332256317, "learning_rate": 8.99537631281124e-06, "loss": 0.3675, "step": 14006 }, { "epoch": 1.4240544936966246, "grad_norm": 0.27977052330970764, "learning_rate": 8.99516293429184e-06, "loss": 0.3628, "step": 14007 }, { "epoch": 1.4241561610410736, "grad_norm": 0.29984715580940247, "learning_rate": 8.994949535645753e-06, "loss": 0.372, "step": 14008 }, { "epoch": 1.4242578283855225, "grad_norm": 0.28666213154792786, "learning_rate": 8.994736116874064e-06, "loss": 0.3699, "step": 14009 }, { "epoch": 1.4243594957299717, "grad_norm": 0.28897422552108765, "learning_rate": 8.994522677977841e-06, "loss": 0.3495, "step": 14010 }, { "epoch": 1.4244611630744206, "grad_norm": 0.2873377203941345, "learning_rate": 8.994309218958165e-06, "loss": 0.3361, "step": 14011 }, { "epoch": 1.4245628304188696, "grad_norm": 0.27627941966056824, "learning_rate": 8.994095739816107e-06, "loss": 0.3749, "step": 14012 }, { "epoch": 1.4246644977633185, "grad_norm": 0.2883191704750061, "learning_rate": 8.993882240552742e-06, "loss": 0.3813, "step": 14013 }, { "epoch": 1.4247661651077674, "grad_norm": 0.2743411064147949, "learning_rate": 8.99366872116915e-06, "loss": 0.3459, "step": 14014 }, { "epoch": 1.4248678324522164, "grad_norm": 0.2829272150993347, "learning_rate": 8.993455181666402e-06, "loss": 0.3543, "step": 14015 }, { "epoch": 1.4249694997966653, "grad_norm": 0.2643252909183502, "learning_rate": 8.993241622045576e-06, "loss": 0.3423, "step": 14016 }, { "epoch": 1.4250711671411143, "grad_norm": 0.3030247986316681, "learning_rate": 8.993028042307749e-06, "loss": 0.3702, "step": 14017 }, { "epoch": 1.4251728344855632, "grad_norm": 0.30137544870376587, "learning_rate": 8.992814442453995e-06, "loss": 0.357, "step": 14018 }, { "epoch": 1.4252745018300121, "grad_norm": 0.2862091362476349, "learning_rate": 8.992600822485391e-06, "loss": 0.3485, "step": 14019 }, { "epoch": 1.425376169174461, "grad_norm": 0.26488566398620605, "learning_rate": 8.992387182403012e-06, "loss": 0.375, "step": 14020 }, { "epoch": 1.42547783651891, "grad_norm": 0.27683326601982117, "learning_rate": 8.992173522207936e-06, "loss": 0.3999, "step": 14021 }, { "epoch": 1.425579503863359, "grad_norm": 0.3203153908252716, "learning_rate": 8.991959841901238e-06, "loss": 0.3723, "step": 14022 }, { "epoch": 1.4256811712078081, "grad_norm": 0.2658613920211792, "learning_rate": 8.991746141483994e-06, "loss": 0.3698, "step": 14023 }, { "epoch": 1.425782838552257, "grad_norm": 0.2973487079143524, "learning_rate": 8.991532420957283e-06, "loss": 0.3893, "step": 14024 }, { "epoch": 1.425884505896706, "grad_norm": 0.29151397943496704, "learning_rate": 8.991318680322181e-06, "loss": 0.3599, "step": 14025 }, { "epoch": 1.425986173241155, "grad_norm": 0.28464755415916443, "learning_rate": 8.991104919579761e-06, "loss": 0.3661, "step": 14026 }, { "epoch": 1.426087840585604, "grad_norm": 0.265788733959198, "learning_rate": 8.990891138731104e-06, "loss": 0.3635, "step": 14027 }, { "epoch": 1.4261895079300528, "grad_norm": 0.3095511496067047, "learning_rate": 8.990677337777287e-06, "loss": 0.3882, "step": 14028 }, { "epoch": 1.4262911752745018, "grad_norm": 0.28758394718170166, "learning_rate": 8.990463516719385e-06, "loss": 0.3541, "step": 14029 }, { "epoch": 1.4263928426189507, "grad_norm": 0.2791813611984253, "learning_rate": 8.990249675558477e-06, "loss": 0.3575, "step": 14030 }, { "epoch": 1.4264945099633999, "grad_norm": 0.28724417090415955, "learning_rate": 8.990035814295638e-06, "loss": 0.4051, "step": 14031 }, { "epoch": 1.4265961773078488, "grad_norm": 0.2845655083656311, "learning_rate": 8.989821932931948e-06, "loss": 0.3185, "step": 14032 }, { "epoch": 1.4266978446522978, "grad_norm": 0.27800148725509644, "learning_rate": 8.989608031468483e-06, "loss": 0.327, "step": 14033 }, { "epoch": 1.4267995119967467, "grad_norm": 0.2750993072986603, "learning_rate": 8.98939410990632e-06, "loss": 0.358, "step": 14034 }, { "epoch": 1.4269011793411956, "grad_norm": 0.28701695799827576, "learning_rate": 8.989180168246538e-06, "loss": 0.341, "step": 14035 }, { "epoch": 1.4270028466856446, "grad_norm": 0.2643585503101349, "learning_rate": 8.988966206490216e-06, "loss": 0.3426, "step": 14036 }, { "epoch": 1.4271045140300935, "grad_norm": 0.27084386348724365, "learning_rate": 8.988752224638426e-06, "loss": 0.3624, "step": 14037 }, { "epoch": 1.4272061813745425, "grad_norm": 0.28802719712257385, "learning_rate": 8.988538222692254e-06, "loss": 0.3447, "step": 14038 }, { "epoch": 1.4273078487189914, "grad_norm": 0.29907557368278503, "learning_rate": 8.988324200652773e-06, "loss": 0.4143, "step": 14039 }, { "epoch": 1.4274095160634404, "grad_norm": 0.2805064618587494, "learning_rate": 8.988110158521061e-06, "loss": 0.3504, "step": 14040 }, { "epoch": 1.4275111834078893, "grad_norm": 0.2705489695072174, "learning_rate": 8.9878960962982e-06, "loss": 0.3654, "step": 14041 }, { "epoch": 1.4276128507523382, "grad_norm": 0.29462674260139465, "learning_rate": 8.987682013985266e-06, "loss": 0.3618, "step": 14042 }, { "epoch": 1.4277145180967872, "grad_norm": 0.2675582468509674, "learning_rate": 8.987467911583336e-06, "loss": 0.3382, "step": 14043 }, { "epoch": 1.4278161854412363, "grad_norm": 0.2624863386154175, "learning_rate": 8.98725378909349e-06, "loss": 0.3559, "step": 14044 }, { "epoch": 1.4279178527856853, "grad_norm": 0.25558537244796753, "learning_rate": 8.987039646516808e-06, "loss": 0.3587, "step": 14045 }, { "epoch": 1.4280195201301342, "grad_norm": 0.26671862602233887, "learning_rate": 8.98682548385437e-06, "loss": 0.3603, "step": 14046 }, { "epoch": 1.4281211874745832, "grad_norm": 0.2743110954761505, "learning_rate": 8.98661130110725e-06, "loss": 0.3544, "step": 14047 }, { "epoch": 1.428222854819032, "grad_norm": 0.30099278688430786, "learning_rate": 8.98639709827653e-06, "loss": 0.3966, "step": 14048 }, { "epoch": 1.428324522163481, "grad_norm": 0.28100070357322693, "learning_rate": 8.986182875363287e-06, "loss": 0.3618, "step": 14049 }, { "epoch": 1.42842618950793, "grad_norm": 0.27205830812454224, "learning_rate": 8.985968632368604e-06, "loss": 0.3672, "step": 14050 }, { "epoch": 1.4285278568523792, "grad_norm": 0.30815979838371277, "learning_rate": 8.985754369293558e-06, "loss": 0.3692, "step": 14051 }, { "epoch": 1.428629524196828, "grad_norm": 0.28873100876808167, "learning_rate": 8.985540086139229e-06, "loss": 0.3682, "step": 14052 }, { "epoch": 1.428731191541277, "grad_norm": 0.2964334487915039, "learning_rate": 8.985325782906695e-06, "loss": 0.3543, "step": 14053 }, { "epoch": 1.428832858885726, "grad_norm": 0.28913256525993347, "learning_rate": 8.985111459597038e-06, "loss": 0.3558, "step": 14054 }, { "epoch": 1.428934526230175, "grad_norm": 0.31428056955337524, "learning_rate": 8.984897116211336e-06, "loss": 0.334, "step": 14055 }, { "epoch": 1.4290361935746239, "grad_norm": 0.2986087501049042, "learning_rate": 8.984682752750668e-06, "loss": 0.3412, "step": 14056 }, { "epoch": 1.4291378609190728, "grad_norm": 0.28710031509399414, "learning_rate": 8.984468369216118e-06, "loss": 0.3499, "step": 14057 }, { "epoch": 1.4292395282635217, "grad_norm": 0.30743393301963806, "learning_rate": 8.984253965608762e-06, "loss": 0.3787, "step": 14058 }, { "epoch": 1.4293411956079707, "grad_norm": 0.30730339884757996, "learning_rate": 8.98403954192968e-06, "loss": 0.3482, "step": 14059 }, { "epoch": 1.4294428629524196, "grad_norm": 0.273995041847229, "learning_rate": 8.983825098179955e-06, "loss": 0.3829, "step": 14060 }, { "epoch": 1.4295445302968686, "grad_norm": 0.32608693838119507, "learning_rate": 8.983610634360664e-06, "loss": 0.3637, "step": 14061 }, { "epoch": 1.4296461976413175, "grad_norm": 0.31585729122161865, "learning_rate": 8.983396150472891e-06, "loss": 0.3686, "step": 14062 }, { "epoch": 1.4297478649857664, "grad_norm": 0.2853771150112152, "learning_rate": 8.983181646517715e-06, "loss": 0.3577, "step": 14063 }, { "epoch": 1.4298495323302156, "grad_norm": 0.27065062522888184, "learning_rate": 8.982967122496216e-06, "loss": 0.3647, "step": 14064 }, { "epoch": 1.4299511996746646, "grad_norm": 0.2740245461463928, "learning_rate": 8.982752578409476e-06, "loss": 0.3437, "step": 14065 }, { "epoch": 1.4300528670191135, "grad_norm": 0.2833840847015381, "learning_rate": 8.982538014258573e-06, "loss": 0.3673, "step": 14066 }, { "epoch": 1.4301545343635624, "grad_norm": 0.32654041051864624, "learning_rate": 8.98232343004459e-06, "loss": 0.3621, "step": 14067 }, { "epoch": 1.4302562017080114, "grad_norm": 0.31036755442619324, "learning_rate": 8.982108825768609e-06, "loss": 0.3744, "step": 14068 }, { "epoch": 1.4303578690524603, "grad_norm": 0.27268195152282715, "learning_rate": 8.981894201431709e-06, "loss": 0.3218, "step": 14069 }, { "epoch": 1.4304595363969093, "grad_norm": 0.2742474675178528, "learning_rate": 8.981679557034973e-06, "loss": 0.3559, "step": 14070 }, { "epoch": 1.4305612037413582, "grad_norm": 0.3072464168071747, "learning_rate": 8.981464892579483e-06, "loss": 0.3849, "step": 14071 }, { "epoch": 1.4306628710858074, "grad_norm": 0.2994086444377899, "learning_rate": 8.981250208066316e-06, "loss": 0.3621, "step": 14072 }, { "epoch": 1.4307645384302563, "grad_norm": 0.27984127402305603, "learning_rate": 8.981035503496558e-06, "loss": 0.3281, "step": 14073 }, { "epoch": 1.4308662057747052, "grad_norm": 0.2734815180301666, "learning_rate": 8.980820778871291e-06, "loss": 0.3298, "step": 14074 }, { "epoch": 1.4309678731191542, "grad_norm": 0.30120599269866943, "learning_rate": 8.980606034191593e-06, "loss": 0.3556, "step": 14075 }, { "epoch": 1.4310695404636031, "grad_norm": 0.27469778060913086, "learning_rate": 8.980391269458546e-06, "loss": 0.3831, "step": 14076 }, { "epoch": 1.431171207808052, "grad_norm": 0.3018070459365845, "learning_rate": 8.980176484673237e-06, "loss": 0.3671, "step": 14077 }, { "epoch": 1.431272875152501, "grad_norm": 0.2763654887676239, "learning_rate": 8.979961679836743e-06, "loss": 0.3318, "step": 14078 }, { "epoch": 1.43137454249695, "grad_norm": 0.3051404058933258, "learning_rate": 8.97974685495015e-06, "loss": 0.3675, "step": 14079 }, { "epoch": 1.431476209841399, "grad_norm": 0.27724871039390564, "learning_rate": 8.979532010014535e-06, "loss": 0.3835, "step": 14080 }, { "epoch": 1.4315778771858478, "grad_norm": 0.30038145184516907, "learning_rate": 8.979317145030984e-06, "loss": 0.3586, "step": 14081 }, { "epoch": 1.4316795445302968, "grad_norm": 0.31360316276550293, "learning_rate": 8.979102260000579e-06, "loss": 0.4015, "step": 14082 }, { "epoch": 1.4317812118747457, "grad_norm": 0.2894275188446045, "learning_rate": 8.978887354924404e-06, "loss": 0.3523, "step": 14083 }, { "epoch": 1.4318828792191947, "grad_norm": 0.2913515567779541, "learning_rate": 8.97867242980354e-06, "loss": 0.389, "step": 14084 }, { "epoch": 1.4319845465636438, "grad_norm": 0.2902028560638428, "learning_rate": 8.978457484639067e-06, "loss": 0.3417, "step": 14085 }, { "epoch": 1.4320862139080928, "grad_norm": 0.3013366162776947, "learning_rate": 8.978242519432072e-06, "loss": 0.3537, "step": 14086 }, { "epoch": 1.4321878812525417, "grad_norm": 0.2709401845932007, "learning_rate": 8.978027534183637e-06, "loss": 0.3511, "step": 14087 }, { "epoch": 1.4322895485969906, "grad_norm": 0.28847914934158325, "learning_rate": 8.977812528894845e-06, "loss": 0.374, "step": 14088 }, { "epoch": 1.4323912159414396, "grad_norm": 0.2932470142841339, "learning_rate": 8.977597503566778e-06, "loss": 0.3664, "step": 14089 }, { "epoch": 1.4324928832858885, "grad_norm": 0.28668954968452454, "learning_rate": 8.97738245820052e-06, "loss": 0.3853, "step": 14090 }, { "epoch": 1.4325945506303375, "grad_norm": 0.30310317873954773, "learning_rate": 8.977167392797154e-06, "loss": 0.3615, "step": 14091 }, { "epoch": 1.4326962179747866, "grad_norm": 0.2799067497253418, "learning_rate": 8.976952307357766e-06, "loss": 0.3575, "step": 14092 }, { "epoch": 1.4327978853192356, "grad_norm": 0.31412404775619507, "learning_rate": 8.976737201883435e-06, "loss": 0.3594, "step": 14093 }, { "epoch": 1.4328995526636845, "grad_norm": 0.3006800711154938, "learning_rate": 8.976522076375248e-06, "loss": 0.3354, "step": 14094 }, { "epoch": 1.4330012200081335, "grad_norm": 0.2749447226524353, "learning_rate": 8.976306930834286e-06, "loss": 0.3561, "step": 14095 }, { "epoch": 1.4331028873525824, "grad_norm": 0.30494168400764465, "learning_rate": 8.976091765261637e-06, "loss": 0.3506, "step": 14096 }, { "epoch": 1.4332045546970313, "grad_norm": 0.27853357791900635, "learning_rate": 8.975876579658382e-06, "loss": 0.3214, "step": 14097 }, { "epoch": 1.4333062220414803, "grad_norm": 0.3009539246559143, "learning_rate": 8.975661374025607e-06, "loss": 0.3987, "step": 14098 }, { "epoch": 1.4334078893859292, "grad_norm": 0.2918476462364197, "learning_rate": 8.97544614836439e-06, "loss": 0.3265, "step": 14099 }, { "epoch": 1.4335095567303782, "grad_norm": 0.2771143317222595, "learning_rate": 8.975230902675824e-06, "loss": 0.3408, "step": 14100 }, { "epoch": 1.433611224074827, "grad_norm": 0.287995845079422, "learning_rate": 8.97501563696099e-06, "loss": 0.3858, "step": 14101 }, { "epoch": 1.433712891419276, "grad_norm": 0.2678889036178589, "learning_rate": 8.974800351220969e-06, "loss": 0.3262, "step": 14102 }, { "epoch": 1.433814558763725, "grad_norm": 0.28547346591949463, "learning_rate": 8.97458504545685e-06, "loss": 0.3144, "step": 14103 }, { "epoch": 1.433916226108174, "grad_norm": 0.27010437846183777, "learning_rate": 8.974369719669716e-06, "loss": 0.3352, "step": 14104 }, { "epoch": 1.434017893452623, "grad_norm": 0.2935531437397003, "learning_rate": 8.974154373860652e-06, "loss": 0.3826, "step": 14105 }, { "epoch": 1.434119560797072, "grad_norm": 0.3086070418357849, "learning_rate": 8.97393900803074e-06, "loss": 0.3706, "step": 14106 }, { "epoch": 1.434221228141521, "grad_norm": 0.3030029535293579, "learning_rate": 8.973723622181072e-06, "loss": 0.3997, "step": 14107 }, { "epoch": 1.43432289548597, "grad_norm": 0.2824310064315796, "learning_rate": 8.973508216312724e-06, "loss": 0.3638, "step": 14108 }, { "epoch": 1.4344245628304189, "grad_norm": 0.28292620182037354, "learning_rate": 8.973292790426788e-06, "loss": 0.3556, "step": 14109 }, { "epoch": 1.4345262301748678, "grad_norm": 0.2898242175579071, "learning_rate": 8.973077344524349e-06, "loss": 0.3647, "step": 14110 }, { "epoch": 1.4346278975193167, "grad_norm": 0.29498350620269775, "learning_rate": 8.972861878606488e-06, "loss": 0.3789, "step": 14111 }, { "epoch": 1.4347295648637657, "grad_norm": 0.29262790083885193, "learning_rate": 8.972646392674295e-06, "loss": 0.3962, "step": 14112 }, { "epoch": 1.4348312322082148, "grad_norm": 0.29864823818206787, "learning_rate": 8.972430886728851e-06, "loss": 0.344, "step": 14113 }, { "epoch": 1.4349328995526638, "grad_norm": 0.2739779055118561, "learning_rate": 8.972215360771246e-06, "loss": 0.386, "step": 14114 }, { "epoch": 1.4350345668971127, "grad_norm": 0.29307931661605835, "learning_rate": 8.971999814802563e-06, "loss": 0.3692, "step": 14115 }, { "epoch": 1.4351362342415617, "grad_norm": 0.27489784359931946, "learning_rate": 8.971784248823891e-06, "loss": 0.3557, "step": 14116 }, { "epoch": 1.4352379015860106, "grad_norm": 0.3033672273159027, "learning_rate": 8.971568662836312e-06, "loss": 0.3653, "step": 14117 }, { "epoch": 1.4353395689304596, "grad_norm": 0.2890854775905609, "learning_rate": 8.971353056840913e-06, "loss": 0.3414, "step": 14118 }, { "epoch": 1.4354412362749085, "grad_norm": 0.28273066878318787, "learning_rate": 8.971137430838783e-06, "loss": 0.3349, "step": 14119 }, { "epoch": 1.4355429036193574, "grad_norm": 0.31828010082244873, "learning_rate": 8.970921784831005e-06, "loss": 0.3701, "step": 14120 }, { "epoch": 1.4356445709638064, "grad_norm": 0.2988181710243225, "learning_rate": 8.970706118818667e-06, "loss": 0.3606, "step": 14121 }, { "epoch": 1.4357462383082553, "grad_norm": 0.2850673198699951, "learning_rate": 8.970490432802856e-06, "loss": 0.3562, "step": 14122 }, { "epoch": 1.4358479056527043, "grad_norm": 0.32829612493515015, "learning_rate": 8.970274726784655e-06, "loss": 0.3564, "step": 14123 }, { "epoch": 1.4359495729971532, "grad_norm": 0.2795920670032501, "learning_rate": 8.970059000765157e-06, "loss": 0.3728, "step": 14124 }, { "epoch": 1.4360512403416021, "grad_norm": 0.2693687975406647, "learning_rate": 8.969843254745443e-06, "loss": 0.3552, "step": 14125 }, { "epoch": 1.4361529076860513, "grad_norm": 0.28470903635025024, "learning_rate": 8.969627488726603e-06, "loss": 0.3609, "step": 14126 }, { "epoch": 1.4362545750305002, "grad_norm": 0.266053706407547, "learning_rate": 8.969411702709723e-06, "loss": 0.3323, "step": 14127 }, { "epoch": 1.4363562423749492, "grad_norm": 0.28835925459861755, "learning_rate": 8.969195896695888e-06, "loss": 0.3566, "step": 14128 }, { "epoch": 1.4364579097193981, "grad_norm": 0.293053537607193, "learning_rate": 8.96898007068619e-06, "loss": 0.3767, "step": 14129 }, { "epoch": 1.436559577063847, "grad_norm": 0.31601735949516296, "learning_rate": 8.968764224681713e-06, "loss": 0.4008, "step": 14130 }, { "epoch": 1.436661244408296, "grad_norm": 0.3150608539581299, "learning_rate": 8.968548358683544e-06, "loss": 0.3333, "step": 14131 }, { "epoch": 1.436762911752745, "grad_norm": 0.3218518793582916, "learning_rate": 8.968332472692772e-06, "loss": 0.3631, "step": 14132 }, { "epoch": 1.4368645790971941, "grad_norm": 0.30547788739204407, "learning_rate": 8.968116566710484e-06, "loss": 0.384, "step": 14133 }, { "epoch": 1.436966246441643, "grad_norm": 0.2668290138244629, "learning_rate": 8.967900640737767e-06, "loss": 0.3537, "step": 14134 }, { "epoch": 1.437067913786092, "grad_norm": 0.298292338848114, "learning_rate": 8.96768469477571e-06, "loss": 0.3598, "step": 14135 }, { "epoch": 1.437169581130541, "grad_norm": 0.31431692838668823, "learning_rate": 8.9674687288254e-06, "loss": 0.3771, "step": 14136 }, { "epoch": 1.4372712484749899, "grad_norm": 0.29455357789993286, "learning_rate": 8.967252742887929e-06, "loss": 0.3531, "step": 14137 }, { "epoch": 1.4373729158194388, "grad_norm": 0.3092800974845886, "learning_rate": 8.967036736964378e-06, "loss": 0.3463, "step": 14138 }, { "epoch": 1.4374745831638878, "grad_norm": 0.29390379786491394, "learning_rate": 8.96682071105584e-06, "loss": 0.4131, "step": 14139 }, { "epoch": 1.4375762505083367, "grad_norm": 0.28083959221839905, "learning_rate": 8.966604665163399e-06, "loss": 0.3427, "step": 14140 }, { "epoch": 1.4376779178527856, "grad_norm": 0.3026054799556732, "learning_rate": 8.966388599288148e-06, "loss": 0.3612, "step": 14141 }, { "epoch": 1.4377795851972346, "grad_norm": 0.3268221616744995, "learning_rate": 8.966172513431175e-06, "loss": 0.3604, "step": 14142 }, { "epoch": 1.4378812525416835, "grad_norm": 0.30807119607925415, "learning_rate": 8.965956407593565e-06, "loss": 0.3501, "step": 14143 }, { "epoch": 1.4379829198861325, "grad_norm": 0.3036346733570099, "learning_rate": 8.965740281776412e-06, "loss": 0.4043, "step": 14144 }, { "epoch": 1.4380845872305814, "grad_norm": 0.3304295241832733, "learning_rate": 8.9655241359808e-06, "loss": 0.3731, "step": 14145 }, { "epoch": 1.4381862545750306, "grad_norm": 0.29092368483543396, "learning_rate": 8.965307970207819e-06, "loss": 0.3642, "step": 14146 }, { "epoch": 1.4382879219194795, "grad_norm": 0.33529749512672424, "learning_rate": 8.965091784458561e-06, "loss": 0.3506, "step": 14147 }, { "epoch": 1.4383895892639285, "grad_norm": 0.2838336229324341, "learning_rate": 8.964875578734112e-06, "loss": 0.3515, "step": 14148 }, { "epoch": 1.4384912566083774, "grad_norm": 0.26202258467674255, "learning_rate": 8.964659353035562e-06, "loss": 0.3802, "step": 14149 }, { "epoch": 1.4385929239528263, "grad_norm": 0.3055957853794098, "learning_rate": 8.964443107364e-06, "loss": 0.3776, "step": 14150 }, { "epoch": 1.4386945912972753, "grad_norm": 0.31946027278900146, "learning_rate": 8.964226841720517e-06, "loss": 0.3694, "step": 14151 }, { "epoch": 1.4387962586417242, "grad_norm": 0.32051023840904236, "learning_rate": 8.964010556106198e-06, "loss": 0.3723, "step": 14152 }, { "epoch": 1.4388979259861732, "grad_norm": 0.2746521830558777, "learning_rate": 8.963794250522138e-06, "loss": 0.349, "step": 14153 }, { "epoch": 1.4389995933306223, "grad_norm": 0.2979527413845062, "learning_rate": 8.963577924969425e-06, "loss": 0.3652, "step": 14154 }, { "epoch": 1.4391012606750713, "grad_norm": 0.31356799602508545, "learning_rate": 8.963361579449149e-06, "loss": 0.3133, "step": 14155 }, { "epoch": 1.4392029280195202, "grad_norm": 0.30394935607910156, "learning_rate": 8.963145213962397e-06, "loss": 0.361, "step": 14156 }, { "epoch": 1.4393045953639692, "grad_norm": 0.3013184666633606, "learning_rate": 8.962928828510261e-06, "loss": 0.3866, "step": 14157 }, { "epoch": 1.439406262708418, "grad_norm": 0.3099268674850464, "learning_rate": 8.962712423093834e-06, "loss": 0.3568, "step": 14158 }, { "epoch": 1.439507930052867, "grad_norm": 0.3067801892757416, "learning_rate": 8.9624959977142e-06, "loss": 0.3795, "step": 14159 }, { "epoch": 1.439609597397316, "grad_norm": 0.28038591146469116, "learning_rate": 8.962279552372455e-06, "loss": 0.339, "step": 14160 }, { "epoch": 1.439711264741765, "grad_norm": 0.3037412762641907, "learning_rate": 8.962063087069687e-06, "loss": 0.3942, "step": 14161 }, { "epoch": 1.4398129320862139, "grad_norm": 0.28912439942359924, "learning_rate": 8.961846601806987e-06, "loss": 0.3709, "step": 14162 }, { "epoch": 1.4399145994306628, "grad_norm": 0.31010448932647705, "learning_rate": 8.961630096585445e-06, "loss": 0.3373, "step": 14163 }, { "epoch": 1.4400162667751117, "grad_norm": 0.31840360164642334, "learning_rate": 8.961413571406152e-06, "loss": 0.3918, "step": 14164 }, { "epoch": 1.4401179341195607, "grad_norm": 0.2904190421104431, "learning_rate": 8.961197026270198e-06, "loss": 0.3652, "step": 14165 }, { "epoch": 1.4402196014640096, "grad_norm": 0.29215335845947266, "learning_rate": 8.960980461178675e-06, "loss": 0.3675, "step": 14166 }, { "epoch": 1.4403212688084588, "grad_norm": 0.3093157112598419, "learning_rate": 8.960763876132674e-06, "loss": 0.3678, "step": 14167 }, { "epoch": 1.4404229361529077, "grad_norm": 0.2969922423362732, "learning_rate": 8.960547271133287e-06, "loss": 0.3343, "step": 14168 }, { "epoch": 1.4405246034973567, "grad_norm": 0.2904052138328552, "learning_rate": 8.960330646181603e-06, "loss": 0.3678, "step": 14169 }, { "epoch": 1.4406262708418056, "grad_norm": 0.26989656686782837, "learning_rate": 8.960114001278714e-06, "loss": 0.3754, "step": 14170 }, { "epoch": 1.4407279381862546, "grad_norm": 0.2984982132911682, "learning_rate": 8.959897336425712e-06, "loss": 0.3536, "step": 14171 }, { "epoch": 1.4408296055307035, "grad_norm": 0.27881574630737305, "learning_rate": 8.959680651623688e-06, "loss": 0.4007, "step": 14172 }, { "epoch": 1.4409312728751524, "grad_norm": 0.28652727603912354, "learning_rate": 8.959463946873734e-06, "loss": 0.3804, "step": 14173 }, { "epoch": 1.4410329402196016, "grad_norm": 0.2975572645664215, "learning_rate": 8.959247222176941e-06, "loss": 0.3914, "step": 14174 }, { "epoch": 1.4411346075640505, "grad_norm": 0.2854110896587372, "learning_rate": 8.959030477534403e-06, "loss": 0.3668, "step": 14175 }, { "epoch": 1.4412362749084995, "grad_norm": 0.31823185086250305, "learning_rate": 8.958813712947208e-06, "loss": 0.3959, "step": 14176 }, { "epoch": 1.4413379422529484, "grad_norm": 0.30955585837364197, "learning_rate": 8.958596928416452e-06, "loss": 0.3561, "step": 14177 }, { "epoch": 1.4414396095973974, "grad_norm": 0.2856405973434448, "learning_rate": 8.958380123943225e-06, "loss": 0.3476, "step": 14178 }, { "epoch": 1.4415412769418463, "grad_norm": 0.31011077761650085, "learning_rate": 8.95816329952862e-06, "loss": 0.3627, "step": 14179 }, { "epoch": 1.4416429442862952, "grad_norm": 0.2961975932121277, "learning_rate": 8.957946455173727e-06, "loss": 0.3559, "step": 14180 }, { "epoch": 1.4417446116307442, "grad_norm": 0.2948063910007477, "learning_rate": 8.957729590879642e-06, "loss": 0.3686, "step": 14181 }, { "epoch": 1.4418462789751931, "grad_norm": 0.3008435368537903, "learning_rate": 8.957512706647456e-06, "loss": 0.3457, "step": 14182 }, { "epoch": 1.441947946319642, "grad_norm": 0.2879594564437866, "learning_rate": 8.957295802478261e-06, "loss": 0.3493, "step": 14183 }, { "epoch": 1.442049613664091, "grad_norm": 0.3030175268650055, "learning_rate": 8.95707887837315e-06, "loss": 0.3736, "step": 14184 }, { "epoch": 1.44215128100854, "grad_norm": 0.3014112412929535, "learning_rate": 8.956861934333217e-06, "loss": 0.3444, "step": 14185 }, { "epoch": 1.442252948352989, "grad_norm": 0.29023218154907227, "learning_rate": 8.956644970359552e-06, "loss": 0.3711, "step": 14186 }, { "epoch": 1.442354615697438, "grad_norm": 0.29784470796585083, "learning_rate": 8.95642798645325e-06, "loss": 0.3378, "step": 14187 }, { "epoch": 1.442456283041887, "grad_norm": 0.33634817600250244, "learning_rate": 8.956210982615405e-06, "loss": 0.3792, "step": 14188 }, { "epoch": 1.442557950386336, "grad_norm": 0.3037894070148468, "learning_rate": 8.95599395884711e-06, "loss": 0.4197, "step": 14189 }, { "epoch": 1.4426596177307849, "grad_norm": 0.29551151394844055, "learning_rate": 8.955776915149457e-06, "loss": 0.3773, "step": 14190 }, { "epoch": 1.4427612850752338, "grad_norm": 0.2795877754688263, "learning_rate": 8.955559851523539e-06, "loss": 0.3759, "step": 14191 }, { "epoch": 1.4428629524196828, "grad_norm": 0.2970752418041229, "learning_rate": 8.95534276797045e-06, "loss": 0.345, "step": 14192 }, { "epoch": 1.4429646197641317, "grad_norm": 0.27572181820869446, "learning_rate": 8.955125664491286e-06, "loss": 0.3615, "step": 14193 }, { "epoch": 1.4430662871085806, "grad_norm": 0.2954244911670685, "learning_rate": 8.954908541087137e-06, "loss": 0.3585, "step": 14194 }, { "epoch": 1.4431679544530298, "grad_norm": 0.30469009280204773, "learning_rate": 8.954691397759099e-06, "loss": 0.3448, "step": 14195 }, { "epoch": 1.4432696217974788, "grad_norm": 0.2987014353275299, "learning_rate": 8.954474234508265e-06, "loss": 0.37, "step": 14196 }, { "epoch": 1.4433712891419277, "grad_norm": 0.3150749206542969, "learning_rate": 8.954257051335732e-06, "loss": 0.3866, "step": 14197 }, { "epoch": 1.4434729564863766, "grad_norm": 0.2987866699695587, "learning_rate": 8.954039848242591e-06, "loss": 0.3527, "step": 14198 }, { "epoch": 1.4435746238308256, "grad_norm": 0.3155508041381836, "learning_rate": 8.953822625229934e-06, "loss": 0.3449, "step": 14199 }, { "epoch": 1.4436762911752745, "grad_norm": 0.2989606261253357, "learning_rate": 8.953605382298862e-06, "loss": 0.3605, "step": 14200 }, { "epoch": 1.4437779585197235, "grad_norm": 0.27679795026779175, "learning_rate": 8.953388119450462e-06, "loss": 0.3737, "step": 14201 }, { "epoch": 1.4438796258641724, "grad_norm": 0.30196312069892883, "learning_rate": 8.953170836685834e-06, "loss": 0.3563, "step": 14202 }, { "epoch": 1.4439812932086213, "grad_norm": 0.312370628118515, "learning_rate": 8.95295353400607e-06, "loss": 0.3532, "step": 14203 }, { "epoch": 1.4440829605530703, "grad_norm": 0.28113314509391785, "learning_rate": 8.952736211412268e-06, "loss": 0.3533, "step": 14204 }, { "epoch": 1.4441846278975192, "grad_norm": 0.30791744589805603, "learning_rate": 8.952518868905517e-06, "loss": 0.3883, "step": 14205 }, { "epoch": 1.4442862952419682, "grad_norm": 0.34304454922676086, "learning_rate": 8.952301506486917e-06, "loss": 0.3782, "step": 14206 }, { "epoch": 1.444387962586417, "grad_norm": 0.31838974356651306, "learning_rate": 8.95208412415756e-06, "loss": 0.3779, "step": 14207 }, { "epoch": 1.4444896299308663, "grad_norm": 0.284702867269516, "learning_rate": 8.951866721918544e-06, "loss": 0.3612, "step": 14208 }, { "epoch": 1.4445912972753152, "grad_norm": 0.3061872124671936, "learning_rate": 8.951649299770962e-06, "loss": 0.3535, "step": 14209 }, { "epoch": 1.4446929646197642, "grad_norm": 0.2818219065666199, "learning_rate": 8.951431857715908e-06, "loss": 0.34, "step": 14210 }, { "epoch": 1.444794631964213, "grad_norm": 0.31697556376457214, "learning_rate": 8.951214395754482e-06, "loss": 0.371, "step": 14211 }, { "epoch": 1.444896299308662, "grad_norm": 0.29932913184165955, "learning_rate": 8.950996913887776e-06, "loss": 0.3559, "step": 14212 }, { "epoch": 1.444997966653111, "grad_norm": 0.3068314790725708, "learning_rate": 8.950779412116886e-06, "loss": 0.379, "step": 14213 }, { "epoch": 1.44509963399756, "grad_norm": 0.29366856813430786, "learning_rate": 8.950561890442908e-06, "loss": 0.3648, "step": 14214 }, { "epoch": 1.445201301342009, "grad_norm": 0.30227842926979065, "learning_rate": 8.950344348866938e-06, "loss": 0.3931, "step": 14215 }, { "epoch": 1.445302968686458, "grad_norm": 0.2927965223789215, "learning_rate": 8.950126787390071e-06, "loss": 0.3474, "step": 14216 }, { "epoch": 1.445404636030907, "grad_norm": 0.3032587766647339, "learning_rate": 8.949909206013407e-06, "loss": 0.3711, "step": 14217 }, { "epoch": 1.445506303375356, "grad_norm": 0.3051525056362152, "learning_rate": 8.949691604738036e-06, "loss": 0.3581, "step": 14218 }, { "epoch": 1.4456079707198048, "grad_norm": 0.3286685347557068, "learning_rate": 8.949473983565059e-06, "loss": 0.3641, "step": 14219 }, { "epoch": 1.4457096380642538, "grad_norm": 0.27678653597831726, "learning_rate": 8.94925634249557e-06, "loss": 0.371, "step": 14220 }, { "epoch": 1.4458113054087027, "grad_norm": 0.32733461260795593, "learning_rate": 8.949038681530666e-06, "loss": 0.3729, "step": 14221 }, { "epoch": 1.4459129727531517, "grad_norm": 0.3153105080127716, "learning_rate": 8.948821000671444e-06, "loss": 0.3623, "step": 14222 }, { "epoch": 1.4460146400976006, "grad_norm": 0.28530797362327576, "learning_rate": 8.948603299918999e-06, "loss": 0.3343, "step": 14223 }, { "epoch": 1.4461163074420496, "grad_norm": 0.2697601616382599, "learning_rate": 8.94838557927443e-06, "loss": 0.3304, "step": 14224 }, { "epoch": 1.4462179747864985, "grad_norm": 0.293253630399704, "learning_rate": 8.948167838738833e-06, "loss": 0.3747, "step": 14225 }, { "epoch": 1.4463196421309474, "grad_norm": 0.3043064773082733, "learning_rate": 8.947950078313305e-06, "loss": 0.3568, "step": 14226 }, { "epoch": 1.4464213094753964, "grad_norm": 0.3035450875759125, "learning_rate": 8.94773229799894e-06, "loss": 0.3586, "step": 14227 }, { "epoch": 1.4465229768198455, "grad_norm": 0.28182661533355713, "learning_rate": 8.94751449779684e-06, "loss": 0.4081, "step": 14228 }, { "epoch": 1.4466246441642945, "grad_norm": 0.3016781806945801, "learning_rate": 8.9472966777081e-06, "loss": 0.3782, "step": 14229 }, { "epoch": 1.4467263115087434, "grad_norm": 0.28533604741096497, "learning_rate": 8.947078837733817e-06, "loss": 0.3502, "step": 14230 }, { "epoch": 1.4468279788531924, "grad_norm": 0.28545260429382324, "learning_rate": 8.946860977875088e-06, "loss": 0.347, "step": 14231 }, { "epoch": 1.4469296461976413, "grad_norm": 0.2965518534183502, "learning_rate": 8.946643098133015e-06, "loss": 0.3903, "step": 14232 }, { "epoch": 1.4470313135420902, "grad_norm": 0.27884653210639954, "learning_rate": 8.946425198508688e-06, "loss": 0.3808, "step": 14233 }, { "epoch": 1.4471329808865392, "grad_norm": 0.2697690725326538, "learning_rate": 8.94620727900321e-06, "loss": 0.3533, "step": 14234 }, { "epoch": 1.4472346482309881, "grad_norm": 0.28931719064712524, "learning_rate": 8.945989339617676e-06, "loss": 0.3681, "step": 14235 }, { "epoch": 1.4473363155754373, "grad_norm": 0.2932288348674774, "learning_rate": 8.945771380353188e-06, "loss": 0.3554, "step": 14236 }, { "epoch": 1.4474379829198862, "grad_norm": 0.31276535987854004, "learning_rate": 8.94555340121084e-06, "loss": 0.3616, "step": 14237 }, { "epoch": 1.4475396502643352, "grad_norm": 0.3013092279434204, "learning_rate": 8.945335402191731e-06, "loss": 0.3925, "step": 14238 }, { "epoch": 1.4476413176087841, "grad_norm": 0.30855825543403625, "learning_rate": 8.94511738329696e-06, "loss": 0.3617, "step": 14239 }, { "epoch": 1.447742984953233, "grad_norm": 0.29388055205345154, "learning_rate": 8.944899344527626e-06, "loss": 0.3726, "step": 14240 }, { "epoch": 1.447844652297682, "grad_norm": 0.2770044803619385, "learning_rate": 8.944681285884827e-06, "loss": 0.3627, "step": 14241 }, { "epoch": 1.447946319642131, "grad_norm": 0.3125658631324768, "learning_rate": 8.94446320736966e-06, "loss": 0.3692, "step": 14242 }, { "epoch": 1.4480479869865799, "grad_norm": 0.2982155382633209, "learning_rate": 8.944245108983224e-06, "loss": 0.3665, "step": 14243 }, { "epoch": 1.4481496543310288, "grad_norm": 0.3095262944698334, "learning_rate": 8.94402699072662e-06, "loss": 0.4094, "step": 14244 }, { "epoch": 1.4482513216754778, "grad_norm": 0.2952789068222046, "learning_rate": 8.943808852600945e-06, "loss": 0.3298, "step": 14245 }, { "epoch": 1.4483529890199267, "grad_norm": 0.29802075028419495, "learning_rate": 8.943590694607298e-06, "loss": 0.354, "step": 14246 }, { "epoch": 1.4484546563643756, "grad_norm": 0.3023536801338196, "learning_rate": 8.943372516746778e-06, "loss": 0.3507, "step": 14247 }, { "epoch": 1.4485563237088246, "grad_norm": 0.2996566593647003, "learning_rate": 8.943154319020484e-06, "loss": 0.407, "step": 14248 }, { "epoch": 1.4486579910532738, "grad_norm": 0.29010966420173645, "learning_rate": 8.942936101429516e-06, "loss": 0.3587, "step": 14249 }, { "epoch": 1.4487596583977227, "grad_norm": 0.30503836274147034, "learning_rate": 8.942717863974973e-06, "loss": 0.3732, "step": 14250 }, { "epoch": 1.4488613257421716, "grad_norm": 0.28708308935165405, "learning_rate": 8.942499606657955e-06, "loss": 0.3697, "step": 14251 }, { "epoch": 1.4489629930866206, "grad_norm": 0.307243287563324, "learning_rate": 8.942281329479559e-06, "loss": 0.3805, "step": 14252 }, { "epoch": 1.4490646604310695, "grad_norm": 0.2751285135746002, "learning_rate": 8.942063032440888e-06, "loss": 0.3551, "step": 14253 }, { "epoch": 1.4491663277755185, "grad_norm": 0.28304219245910645, "learning_rate": 8.94184471554304e-06, "loss": 0.3566, "step": 14254 }, { "epoch": 1.4492679951199674, "grad_norm": 0.2755473256111145, "learning_rate": 8.941626378787115e-06, "loss": 0.3566, "step": 14255 }, { "epoch": 1.4493696624644166, "grad_norm": 0.279415488243103, "learning_rate": 8.941408022174212e-06, "loss": 0.3612, "step": 14256 }, { "epoch": 1.4494713298088655, "grad_norm": 0.2846960425376892, "learning_rate": 8.941189645705433e-06, "loss": 0.3618, "step": 14257 }, { "epoch": 1.4495729971533144, "grad_norm": 0.2758968472480774, "learning_rate": 8.940971249381879e-06, "loss": 0.397, "step": 14258 }, { "epoch": 1.4496746644977634, "grad_norm": 0.2877999246120453, "learning_rate": 8.940752833204644e-06, "loss": 0.4085, "step": 14259 }, { "epoch": 1.4497763318422123, "grad_norm": 0.2796575725078583, "learning_rate": 8.940534397174836e-06, "loss": 0.3534, "step": 14260 }, { "epoch": 1.4498779991866613, "grad_norm": 0.2902231514453888, "learning_rate": 8.940315941293551e-06, "loss": 0.3734, "step": 14261 }, { "epoch": 1.4499796665311102, "grad_norm": 0.29069575667381287, "learning_rate": 8.940097465561891e-06, "loss": 0.3662, "step": 14262 }, { "epoch": 1.4500813338755592, "grad_norm": 0.27437135577201843, "learning_rate": 8.939878969980956e-06, "loss": 0.3486, "step": 14263 }, { "epoch": 1.450183001220008, "grad_norm": 0.27816906571388245, "learning_rate": 8.939660454551846e-06, "loss": 0.3507, "step": 14264 }, { "epoch": 1.450284668564457, "grad_norm": 0.3020454943180084, "learning_rate": 8.939441919275664e-06, "loss": 0.4043, "step": 14265 }, { "epoch": 1.450386335908906, "grad_norm": 0.28278839588165283, "learning_rate": 8.939223364153512e-06, "loss": 0.351, "step": 14266 }, { "epoch": 1.450488003253355, "grad_norm": 0.26966941356658936, "learning_rate": 8.939004789186485e-06, "loss": 0.3715, "step": 14267 }, { "epoch": 1.4505896705978039, "grad_norm": 0.31734395027160645, "learning_rate": 8.93878619437569e-06, "loss": 0.3914, "step": 14268 }, { "epoch": 1.450691337942253, "grad_norm": 0.2759847640991211, "learning_rate": 8.938567579722225e-06, "loss": 0.3429, "step": 14269 }, { "epoch": 1.450793005286702, "grad_norm": 0.2819339334964752, "learning_rate": 8.938348945227193e-06, "loss": 0.3633, "step": 14270 }, { "epoch": 1.450894672631151, "grad_norm": 0.2651307284832001, "learning_rate": 8.938130290891696e-06, "loss": 0.3363, "step": 14271 }, { "epoch": 1.4509963399755998, "grad_norm": 0.27820640802383423, "learning_rate": 8.937911616716831e-06, "loss": 0.3792, "step": 14272 }, { "epoch": 1.4510980073200488, "grad_norm": 0.28347012400627136, "learning_rate": 8.937692922703707e-06, "loss": 0.3559, "step": 14273 }, { "epoch": 1.4511996746644977, "grad_norm": 0.2730003595352173, "learning_rate": 8.937474208853421e-06, "loss": 0.366, "step": 14274 }, { "epoch": 1.4513013420089467, "grad_norm": 0.2709350287914276, "learning_rate": 8.937255475167074e-06, "loss": 0.3533, "step": 14275 }, { "epoch": 1.4514030093533956, "grad_norm": 0.28773823380470276, "learning_rate": 8.93703672164577e-06, "loss": 0.3577, "step": 14276 }, { "epoch": 1.4515046766978448, "grad_norm": 0.30932819843292236, "learning_rate": 8.936817948290612e-06, "loss": 0.349, "step": 14277 }, { "epoch": 1.4516063440422937, "grad_norm": 0.2817602753639221, "learning_rate": 8.936599155102699e-06, "loss": 0.3255, "step": 14278 }, { "epoch": 1.4517080113867427, "grad_norm": 0.29394203424453735, "learning_rate": 8.936380342083136e-06, "loss": 0.3753, "step": 14279 }, { "epoch": 1.4518096787311916, "grad_norm": 0.30116263031959534, "learning_rate": 8.936161509233024e-06, "loss": 0.3652, "step": 14280 }, { "epoch": 1.4519113460756405, "grad_norm": 0.2825661599636078, "learning_rate": 8.935942656553466e-06, "loss": 0.3335, "step": 14281 }, { "epoch": 1.4520130134200895, "grad_norm": 0.2809261381626129, "learning_rate": 8.935723784045564e-06, "loss": 0.3651, "step": 14282 }, { "epoch": 1.4521146807645384, "grad_norm": 0.2960078716278076, "learning_rate": 8.93550489171042e-06, "loss": 0.3519, "step": 14283 }, { "epoch": 1.4522163481089874, "grad_norm": 0.28136199712753296, "learning_rate": 8.935285979549139e-06, "loss": 0.3533, "step": 14284 }, { "epoch": 1.4523180154534363, "grad_norm": 0.2933219075202942, "learning_rate": 8.935067047562823e-06, "loss": 0.3715, "step": 14285 }, { "epoch": 1.4524196827978852, "grad_norm": 0.2851262390613556, "learning_rate": 8.934848095752571e-06, "loss": 0.4044, "step": 14286 }, { "epoch": 1.4525213501423342, "grad_norm": 0.2830337882041931, "learning_rate": 8.934629124119492e-06, "loss": 0.3555, "step": 14287 }, { "epoch": 1.4526230174867831, "grad_norm": 0.27702760696411133, "learning_rate": 8.934410132664687e-06, "loss": 0.3888, "step": 14288 }, { "epoch": 1.452724684831232, "grad_norm": 0.28673624992370605, "learning_rate": 8.934191121389256e-06, "loss": 0.3758, "step": 14289 }, { "epoch": 1.4528263521756812, "grad_norm": 0.27528032660484314, "learning_rate": 8.933972090294308e-06, "loss": 0.3986, "step": 14290 }, { "epoch": 1.4529280195201302, "grad_norm": 0.2865503132343292, "learning_rate": 8.93375303938094e-06, "loss": 0.3556, "step": 14291 }, { "epoch": 1.4530296868645791, "grad_norm": 0.26585325598716736, "learning_rate": 8.93353396865026e-06, "loss": 0.3659, "step": 14292 }, { "epoch": 1.453131354209028, "grad_norm": 0.2988428771495819, "learning_rate": 8.933314878103374e-06, "loss": 0.386, "step": 14293 }, { "epoch": 1.453233021553477, "grad_norm": 0.29651716351509094, "learning_rate": 8.93309576774138e-06, "loss": 0.3654, "step": 14294 }, { "epoch": 1.453334688897926, "grad_norm": 0.25605735182762146, "learning_rate": 8.932876637565384e-06, "loss": 0.3784, "step": 14295 }, { "epoch": 1.4534363562423749, "grad_norm": 0.2708863615989685, "learning_rate": 8.93265748757649e-06, "loss": 0.393, "step": 14296 }, { "epoch": 1.453538023586824, "grad_norm": 0.28657880425453186, "learning_rate": 8.932438317775803e-06, "loss": 0.3621, "step": 14297 }, { "epoch": 1.453639690931273, "grad_norm": 0.28232038021087646, "learning_rate": 8.932219128164426e-06, "loss": 0.3362, "step": 14298 }, { "epoch": 1.453741358275722, "grad_norm": 0.29474589228630066, "learning_rate": 8.931999918743462e-06, "loss": 0.3655, "step": 14299 }, { "epoch": 1.4538430256201709, "grad_norm": 0.28067752718925476, "learning_rate": 8.931780689514018e-06, "loss": 0.3831, "step": 14300 }, { "epoch": 1.4539446929646198, "grad_norm": 0.26329052448272705, "learning_rate": 8.931561440477196e-06, "loss": 0.3661, "step": 14301 }, { "epoch": 1.4540463603090688, "grad_norm": 0.2814580798149109, "learning_rate": 8.931342171634103e-06, "loss": 0.3439, "step": 14302 }, { "epoch": 1.4541480276535177, "grad_norm": 0.2686627507209778, "learning_rate": 8.931122882985844e-06, "loss": 0.3541, "step": 14303 }, { "epoch": 1.4542496949979666, "grad_norm": 0.28341159224510193, "learning_rate": 8.93090357453352e-06, "loss": 0.3619, "step": 14304 }, { "epoch": 1.4543513623424156, "grad_norm": 0.2736961245536804, "learning_rate": 8.930684246278238e-06, "loss": 0.3597, "step": 14305 }, { "epoch": 1.4544530296868645, "grad_norm": 0.2842445373535156, "learning_rate": 8.930464898221103e-06, "loss": 0.3411, "step": 14306 }, { "epoch": 1.4545546970313135, "grad_norm": 0.27425602078437805, "learning_rate": 8.93024553036322e-06, "loss": 0.3575, "step": 14307 }, { "epoch": 1.4546563643757624, "grad_norm": 0.29238423705101013, "learning_rate": 8.930026142705694e-06, "loss": 0.3469, "step": 14308 }, { "epoch": 1.4547580317202113, "grad_norm": 0.296277791261673, "learning_rate": 8.92980673524963e-06, "loss": 0.3647, "step": 14309 }, { "epoch": 1.4548596990646605, "grad_norm": 0.28782129287719727, "learning_rate": 8.929587307996134e-06, "loss": 0.3627, "step": 14310 }, { "epoch": 1.4549613664091094, "grad_norm": 0.29958391189575195, "learning_rate": 8.92936786094631e-06, "loss": 0.3394, "step": 14311 }, { "epoch": 1.4550630337535584, "grad_norm": 0.2828466296195984, "learning_rate": 8.929148394101266e-06, "loss": 0.3626, "step": 14312 }, { "epoch": 1.4551647010980073, "grad_norm": 0.27422383427619934, "learning_rate": 8.928928907462104e-06, "loss": 0.3464, "step": 14313 }, { "epoch": 1.4552663684424563, "grad_norm": 0.2944139242172241, "learning_rate": 8.928709401029934e-06, "loss": 0.3641, "step": 14314 }, { "epoch": 1.4553680357869052, "grad_norm": 0.29200536012649536, "learning_rate": 8.928489874805857e-06, "loss": 0.3662, "step": 14315 }, { "epoch": 1.4554697031313542, "grad_norm": 0.28109410405158997, "learning_rate": 8.928270328790985e-06, "loss": 0.368, "step": 14316 }, { "epoch": 1.455571370475803, "grad_norm": 0.28431233763694763, "learning_rate": 8.928050762986418e-06, "loss": 0.3579, "step": 14317 }, { "epoch": 1.4556730378202523, "grad_norm": 0.2890966534614563, "learning_rate": 8.927831177393266e-06, "loss": 0.3722, "step": 14318 }, { "epoch": 1.4557747051647012, "grad_norm": 0.2863020598888397, "learning_rate": 8.927611572012633e-06, "loss": 0.3263, "step": 14319 }, { "epoch": 1.4558763725091501, "grad_norm": 0.29906558990478516, "learning_rate": 8.927391946845627e-06, "loss": 0.3489, "step": 14320 }, { "epoch": 1.455978039853599, "grad_norm": 0.2796570956707001, "learning_rate": 8.927172301893354e-06, "loss": 0.3631, "step": 14321 }, { "epoch": 1.456079707198048, "grad_norm": 0.2885550856590271, "learning_rate": 8.926952637156917e-06, "loss": 0.367, "step": 14322 }, { "epoch": 1.456181374542497, "grad_norm": 0.2850455343723297, "learning_rate": 8.92673295263743e-06, "loss": 0.348, "step": 14323 }, { "epoch": 1.456283041886946, "grad_norm": 0.2926580607891083, "learning_rate": 8.926513248335995e-06, "loss": 0.3769, "step": 14324 }, { "epoch": 1.4563847092313948, "grad_norm": 0.3020246624946594, "learning_rate": 8.926293524253718e-06, "loss": 0.3711, "step": 14325 }, { "epoch": 1.4564863765758438, "grad_norm": 0.2884562015533447, "learning_rate": 8.926073780391707e-06, "loss": 0.3755, "step": 14326 }, { "epoch": 1.4565880439202927, "grad_norm": 0.30483731627464294, "learning_rate": 8.92585401675107e-06, "loss": 0.3455, "step": 14327 }, { "epoch": 1.4566897112647417, "grad_norm": 0.2784259021282196, "learning_rate": 8.925634233332912e-06, "loss": 0.3592, "step": 14328 }, { "epoch": 1.4567913786091906, "grad_norm": 0.27782079577445984, "learning_rate": 8.925414430138344e-06, "loss": 0.322, "step": 14329 }, { "epoch": 1.4568930459536396, "grad_norm": 0.289363831281662, "learning_rate": 8.925194607168469e-06, "loss": 0.3479, "step": 14330 }, { "epoch": 1.4569947132980887, "grad_norm": 0.2967557907104492, "learning_rate": 8.924974764424396e-06, "loss": 0.3719, "step": 14331 }, { "epoch": 1.4570963806425377, "grad_norm": 0.2677423059940338, "learning_rate": 8.924754901907235e-06, "loss": 0.3303, "step": 14332 }, { "epoch": 1.4571980479869866, "grad_norm": 0.2989218831062317, "learning_rate": 8.924535019618091e-06, "loss": 0.3756, "step": 14333 }, { "epoch": 1.4572997153314355, "grad_norm": 0.294475793838501, "learning_rate": 8.924315117558071e-06, "loss": 0.373, "step": 14334 }, { "epoch": 1.4574013826758845, "grad_norm": 0.28302496671676636, "learning_rate": 8.924095195728283e-06, "loss": 0.3573, "step": 14335 }, { "epoch": 1.4575030500203334, "grad_norm": 0.2937608063220978, "learning_rate": 8.92387525412984e-06, "loss": 0.3517, "step": 14336 }, { "epoch": 1.4576047173647824, "grad_norm": 0.2997077405452728, "learning_rate": 8.923655292763842e-06, "loss": 0.3814, "step": 14337 }, { "epoch": 1.4577063847092315, "grad_norm": 0.29191169142723083, "learning_rate": 8.923435311631403e-06, "loss": 0.353, "step": 14338 }, { "epoch": 1.4578080520536805, "grad_norm": 0.3267126977443695, "learning_rate": 8.923215310733629e-06, "loss": 0.3912, "step": 14339 }, { "epoch": 1.4579097193981294, "grad_norm": 0.28477245569229126, "learning_rate": 8.922995290071627e-06, "loss": 0.3815, "step": 14340 }, { "epoch": 1.4580113867425784, "grad_norm": 0.2711406648159027, "learning_rate": 8.922775249646509e-06, "loss": 0.3646, "step": 14341 }, { "epoch": 1.4581130540870273, "grad_norm": 0.30422431230545044, "learning_rate": 8.92255518945938e-06, "loss": 0.3388, "step": 14342 }, { "epoch": 1.4582147214314762, "grad_norm": 0.30081284046173096, "learning_rate": 8.922335109511352e-06, "loss": 0.3694, "step": 14343 }, { "epoch": 1.4583163887759252, "grad_norm": 0.2950604557991028, "learning_rate": 8.922115009803529e-06, "loss": 0.3415, "step": 14344 }, { "epoch": 1.4584180561203741, "grad_norm": 0.31750163435935974, "learning_rate": 8.921894890337024e-06, "loss": 0.3943, "step": 14345 }, { "epoch": 1.458519723464823, "grad_norm": 0.3362430930137634, "learning_rate": 8.921674751112946e-06, "loss": 0.3519, "step": 14346 }, { "epoch": 1.458621390809272, "grad_norm": 0.2962852716445923, "learning_rate": 8.9214545921324e-06, "loss": 0.3472, "step": 14347 }, { "epoch": 1.458723058153721, "grad_norm": 0.28081005811691284, "learning_rate": 8.9212344133965e-06, "loss": 0.3706, "step": 14348 }, { "epoch": 1.4588247254981699, "grad_norm": 0.3117392063140869, "learning_rate": 8.921014214906351e-06, "loss": 0.3494, "step": 14349 }, { "epoch": 1.4589263928426188, "grad_norm": 0.2964818775653839, "learning_rate": 8.920793996663064e-06, "loss": 0.3637, "step": 14350 }, { "epoch": 1.459028060187068, "grad_norm": 0.288843035697937, "learning_rate": 8.92057375866775e-06, "loss": 0.3482, "step": 14351 }, { "epoch": 1.459129727531517, "grad_norm": 0.31423938274383545, "learning_rate": 8.920353500921517e-06, "loss": 0.3694, "step": 14352 }, { "epoch": 1.4592313948759659, "grad_norm": 0.31045374274253845, "learning_rate": 8.920133223425475e-06, "loss": 0.3744, "step": 14353 }, { "epoch": 1.4593330622204148, "grad_norm": 0.2869940400123596, "learning_rate": 8.919912926180731e-06, "loss": 0.383, "step": 14354 }, { "epoch": 1.4594347295648638, "grad_norm": 0.3014403283596039, "learning_rate": 8.9196926091884e-06, "loss": 0.3661, "step": 14355 }, { "epoch": 1.4595363969093127, "grad_norm": 0.2875054180622101, "learning_rate": 8.919472272449587e-06, "loss": 0.3871, "step": 14356 }, { "epoch": 1.4596380642537616, "grad_norm": 0.2780150771141052, "learning_rate": 8.919251915965405e-06, "loss": 0.3435, "step": 14357 }, { "epoch": 1.4597397315982106, "grad_norm": 0.282764732837677, "learning_rate": 8.919031539736965e-06, "loss": 0.3357, "step": 14358 }, { "epoch": 1.4598413989426597, "grad_norm": 0.32057225704193115, "learning_rate": 8.918811143765373e-06, "loss": 0.3747, "step": 14359 }, { "epoch": 1.4599430662871087, "grad_norm": 0.3091498017311096, "learning_rate": 8.918590728051741e-06, "loss": 0.3756, "step": 14360 }, { "epoch": 1.4600447336315576, "grad_norm": 0.27013280987739563, "learning_rate": 8.918370292597183e-06, "loss": 0.3574, "step": 14361 }, { "epoch": 1.4601464009760066, "grad_norm": 0.2730168104171753, "learning_rate": 8.918149837402805e-06, "loss": 0.36, "step": 14362 }, { "epoch": 1.4602480683204555, "grad_norm": 0.31401968002319336, "learning_rate": 8.917929362469719e-06, "loss": 0.3529, "step": 14363 }, { "epoch": 1.4603497356649044, "grad_norm": 0.2873683273792267, "learning_rate": 8.917708867799035e-06, "loss": 0.3575, "step": 14364 }, { "epoch": 1.4604514030093534, "grad_norm": 0.28536534309387207, "learning_rate": 8.917488353391867e-06, "loss": 0.3567, "step": 14365 }, { "epoch": 1.4605530703538023, "grad_norm": 0.28016138076782227, "learning_rate": 8.917267819249323e-06, "loss": 0.3518, "step": 14366 }, { "epoch": 1.4606547376982513, "grad_norm": 0.2842016816139221, "learning_rate": 8.917047265372513e-06, "loss": 0.3523, "step": 14367 }, { "epoch": 1.4607564050427002, "grad_norm": 0.3016507923603058, "learning_rate": 8.916826691762552e-06, "loss": 0.3625, "step": 14368 }, { "epoch": 1.4608580723871492, "grad_norm": 0.28263071179389954, "learning_rate": 8.916606098420548e-06, "loss": 0.3508, "step": 14369 }, { "epoch": 1.460959739731598, "grad_norm": 0.30992719531059265, "learning_rate": 8.916385485347611e-06, "loss": 0.3952, "step": 14370 }, { "epoch": 1.461061407076047, "grad_norm": 0.30185365676879883, "learning_rate": 8.916164852544858e-06, "loss": 0.3307, "step": 14371 }, { "epoch": 1.4611630744204962, "grad_norm": 0.2840033769607544, "learning_rate": 8.915944200013396e-06, "loss": 0.3528, "step": 14372 }, { "epoch": 1.4612647417649451, "grad_norm": 0.3050205707550049, "learning_rate": 8.915723527754337e-06, "loss": 0.4052, "step": 14373 }, { "epoch": 1.461366409109394, "grad_norm": 0.31079885363578796, "learning_rate": 8.915502835768791e-06, "loss": 0.3433, "step": 14374 }, { "epoch": 1.461468076453843, "grad_norm": 0.320548415184021, "learning_rate": 8.915282124057877e-06, "loss": 0.359, "step": 14375 }, { "epoch": 1.461569743798292, "grad_norm": 0.29628750681877136, "learning_rate": 8.9150613926227e-06, "loss": 0.3546, "step": 14376 }, { "epoch": 1.461671411142741, "grad_norm": 0.2843707799911499, "learning_rate": 8.914840641464374e-06, "loss": 0.3559, "step": 14377 }, { "epoch": 1.4617730784871898, "grad_norm": 0.294071763753891, "learning_rate": 8.914619870584012e-06, "loss": 0.3844, "step": 14378 }, { "epoch": 1.461874745831639, "grad_norm": 0.27632641792297363, "learning_rate": 8.914399079982724e-06, "loss": 0.3541, "step": 14379 }, { "epoch": 1.461976413176088, "grad_norm": 0.3163352310657501, "learning_rate": 8.914178269661624e-06, "loss": 0.3564, "step": 14380 }, { "epoch": 1.462078080520537, "grad_norm": 0.2995322644710541, "learning_rate": 8.913957439621825e-06, "loss": 0.3406, "step": 14381 }, { "epoch": 1.4621797478649858, "grad_norm": 0.2879803478717804, "learning_rate": 8.913736589864436e-06, "loss": 0.3635, "step": 14382 }, { "epoch": 1.4622814152094348, "grad_norm": 0.2875989079475403, "learning_rate": 8.913515720390576e-06, "loss": 0.3533, "step": 14383 }, { "epoch": 1.4623830825538837, "grad_norm": 0.3013477325439453, "learning_rate": 8.91329483120135e-06, "loss": 0.3498, "step": 14384 }, { "epoch": 1.4624847498983327, "grad_norm": 0.33517980575561523, "learning_rate": 8.913073922297876e-06, "loss": 0.3967, "step": 14385 }, { "epoch": 1.4625864172427816, "grad_norm": 0.2911851108074188, "learning_rate": 8.912852993681267e-06, "loss": 0.3477, "step": 14386 }, { "epoch": 1.4626880845872305, "grad_norm": 0.2857772409915924, "learning_rate": 8.912632045352633e-06, "loss": 0.3886, "step": 14387 }, { "epoch": 1.4627897519316795, "grad_norm": 0.31277015805244446, "learning_rate": 8.912411077313088e-06, "loss": 0.3733, "step": 14388 }, { "epoch": 1.4628914192761284, "grad_norm": 0.29232946038246155, "learning_rate": 8.912190089563746e-06, "loss": 0.3576, "step": 14389 }, { "epoch": 1.4629930866205774, "grad_norm": 0.3205896317958832, "learning_rate": 8.911969082105721e-06, "loss": 0.4117, "step": 14390 }, { "epoch": 1.4630947539650263, "grad_norm": 0.2895199954509735, "learning_rate": 8.911748054940125e-06, "loss": 0.3756, "step": 14391 }, { "epoch": 1.4631964213094755, "grad_norm": 0.30220603942871094, "learning_rate": 8.911527008068072e-06, "loss": 0.3302, "step": 14392 }, { "epoch": 1.4632980886539244, "grad_norm": 0.2879564166069031, "learning_rate": 8.911305941490674e-06, "loss": 0.3923, "step": 14393 }, { "epoch": 1.4633997559983734, "grad_norm": 0.322444349527359, "learning_rate": 8.911084855209048e-06, "loss": 0.3811, "step": 14394 }, { "epoch": 1.4635014233428223, "grad_norm": 0.3124940097332001, "learning_rate": 8.910863749224305e-06, "loss": 0.355, "step": 14395 }, { "epoch": 1.4636030906872712, "grad_norm": 0.3167836666107178, "learning_rate": 8.91064262353756e-06, "loss": 0.3646, "step": 14396 }, { "epoch": 1.4637047580317202, "grad_norm": 0.2929312288761139, "learning_rate": 8.910421478149927e-06, "loss": 0.388, "step": 14397 }, { "epoch": 1.4638064253761691, "grad_norm": 0.26618459820747375, "learning_rate": 8.91020031306252e-06, "loss": 0.342, "step": 14398 }, { "epoch": 1.463908092720618, "grad_norm": 0.30812910199165344, "learning_rate": 8.909979128276452e-06, "loss": 0.3582, "step": 14399 }, { "epoch": 1.4640097600650672, "grad_norm": 0.32735660672187805, "learning_rate": 8.90975792379284e-06, "loss": 0.385, "step": 14400 }, { "epoch": 1.4641114274095162, "grad_norm": 0.27005094289779663, "learning_rate": 8.909536699612796e-06, "loss": 0.3701, "step": 14401 }, { "epoch": 1.464213094753965, "grad_norm": 0.2917141914367676, "learning_rate": 8.909315455737433e-06, "loss": 0.3713, "step": 14402 }, { "epoch": 1.464314762098414, "grad_norm": 0.315298855304718, "learning_rate": 8.90909419216787e-06, "loss": 0.3851, "step": 14403 }, { "epoch": 1.464416429442863, "grad_norm": 0.3107275664806366, "learning_rate": 8.90887290890522e-06, "loss": 0.3798, "step": 14404 }, { "epoch": 1.464518096787312, "grad_norm": 0.2902098596096039, "learning_rate": 8.908651605950595e-06, "loss": 0.3357, "step": 14405 }, { "epoch": 1.4646197641317609, "grad_norm": 0.31064245104789734, "learning_rate": 8.908430283305112e-06, "loss": 0.3657, "step": 14406 }, { "epoch": 1.4647214314762098, "grad_norm": 0.2922241687774658, "learning_rate": 8.908208940969888e-06, "loss": 0.4089, "step": 14407 }, { "epoch": 1.4648230988206588, "grad_norm": 0.2881573736667633, "learning_rate": 8.907987578946036e-06, "loss": 0.3915, "step": 14408 }, { "epoch": 1.4649247661651077, "grad_norm": 0.2805407643318176, "learning_rate": 8.907766197234671e-06, "loss": 0.3799, "step": 14409 }, { "epoch": 1.4650264335095566, "grad_norm": 0.2747337818145752, "learning_rate": 8.907544795836906e-06, "loss": 0.3488, "step": 14410 }, { "epoch": 1.4651281008540056, "grad_norm": 0.2944588363170624, "learning_rate": 8.907323374753862e-06, "loss": 0.3832, "step": 14411 }, { "epoch": 1.4652297681984545, "grad_norm": 0.2948090732097626, "learning_rate": 8.90710193398665e-06, "loss": 0.3953, "step": 14412 }, { "epoch": 1.4653314355429037, "grad_norm": 0.2786891460418701, "learning_rate": 8.906880473536388e-06, "loss": 0.3569, "step": 14413 }, { "epoch": 1.4654331028873526, "grad_norm": 0.2855495512485504, "learning_rate": 8.906658993404188e-06, "loss": 0.3821, "step": 14414 }, { "epoch": 1.4655347702318016, "grad_norm": 0.3140861690044403, "learning_rate": 8.906437493591172e-06, "loss": 0.3657, "step": 14415 }, { "epoch": 1.4656364375762505, "grad_norm": 0.2860969603061676, "learning_rate": 8.906215974098449e-06, "loss": 0.3846, "step": 14416 }, { "epoch": 1.4657381049206994, "grad_norm": 0.2688308358192444, "learning_rate": 8.905994434927142e-06, "loss": 0.3913, "step": 14417 }, { "epoch": 1.4658397722651484, "grad_norm": 0.2726898193359375, "learning_rate": 8.90577287607836e-06, "loss": 0.3642, "step": 14418 }, { "epoch": 1.4659414396095973, "grad_norm": 0.26992759108543396, "learning_rate": 8.905551297553222e-06, "loss": 0.3468, "step": 14419 }, { "epoch": 1.4660431069540465, "grad_norm": 0.2658892869949341, "learning_rate": 8.905329699352846e-06, "loss": 0.3385, "step": 14420 }, { "epoch": 1.4661447742984954, "grad_norm": 0.26549771428108215, "learning_rate": 8.905108081478346e-06, "loss": 0.399, "step": 14421 }, { "epoch": 1.4662464416429444, "grad_norm": 0.3105332851409912, "learning_rate": 8.904886443930842e-06, "loss": 0.3385, "step": 14422 }, { "epoch": 1.4663481089873933, "grad_norm": 0.28356727957725525, "learning_rate": 8.904664786711446e-06, "loss": 0.3484, "step": 14423 }, { "epoch": 1.4664497763318423, "grad_norm": 0.28849321603775024, "learning_rate": 8.904443109821277e-06, "loss": 0.3594, "step": 14424 }, { "epoch": 1.4665514436762912, "grad_norm": 0.3028770983219147, "learning_rate": 8.904221413261452e-06, "loss": 0.3727, "step": 14425 }, { "epoch": 1.4666531110207401, "grad_norm": 0.29401054978370667, "learning_rate": 8.903999697033086e-06, "loss": 0.3673, "step": 14426 }, { "epoch": 1.466754778365189, "grad_norm": 0.289083868265152, "learning_rate": 8.903777961137299e-06, "loss": 0.3747, "step": 14427 }, { "epoch": 1.466856445709638, "grad_norm": 0.29965275526046753, "learning_rate": 8.903556205575205e-06, "loss": 0.3399, "step": 14428 }, { "epoch": 1.466958113054087, "grad_norm": 0.320297509431839, "learning_rate": 8.903334430347923e-06, "loss": 0.389, "step": 14429 }, { "epoch": 1.467059780398536, "grad_norm": 0.2708207964897156, "learning_rate": 8.90311263545657e-06, "loss": 0.3379, "step": 14430 }, { "epoch": 1.4671614477429848, "grad_norm": 0.28847557306289673, "learning_rate": 8.902890820902262e-06, "loss": 0.4177, "step": 14431 }, { "epoch": 1.4672631150874338, "grad_norm": 0.3358694612979889, "learning_rate": 8.902668986686118e-06, "loss": 0.3577, "step": 14432 }, { "epoch": 1.467364782431883, "grad_norm": 0.3422568142414093, "learning_rate": 8.902447132809255e-06, "loss": 0.3585, "step": 14433 }, { "epoch": 1.467466449776332, "grad_norm": 0.29193204641342163, "learning_rate": 8.902225259272792e-06, "loss": 0.382, "step": 14434 }, { "epoch": 1.4675681171207808, "grad_norm": 0.3180067539215088, "learning_rate": 8.902003366077845e-06, "loss": 0.3772, "step": 14435 }, { "epoch": 1.4676697844652298, "grad_norm": 0.32252630591392517, "learning_rate": 8.90178145322553e-06, "loss": 0.3716, "step": 14436 }, { "epoch": 1.4677714518096787, "grad_norm": 0.30168578028678894, "learning_rate": 8.90155952071697e-06, "loss": 0.3676, "step": 14437 }, { "epoch": 1.4678731191541277, "grad_norm": 0.2751029431819916, "learning_rate": 8.901337568553278e-06, "loss": 0.3789, "step": 14438 }, { "epoch": 1.4679747864985766, "grad_norm": 0.28177064657211304, "learning_rate": 8.901115596735577e-06, "loss": 0.3639, "step": 14439 }, { "epoch": 1.4680764538430255, "grad_norm": 0.31272441148757935, "learning_rate": 8.900893605264981e-06, "loss": 0.358, "step": 14440 }, { "epoch": 1.4681781211874747, "grad_norm": 0.28964507579803467, "learning_rate": 8.900671594142611e-06, "loss": 0.3696, "step": 14441 }, { "epoch": 1.4682797885319236, "grad_norm": 0.29551705718040466, "learning_rate": 8.900449563369583e-06, "loss": 0.3693, "step": 14442 }, { "epoch": 1.4683814558763726, "grad_norm": 0.28675222396850586, "learning_rate": 8.900227512947019e-06, "loss": 0.3696, "step": 14443 }, { "epoch": 1.4684831232208215, "grad_norm": 0.29944607615470886, "learning_rate": 8.900005442876035e-06, "loss": 0.3768, "step": 14444 }, { "epoch": 1.4685847905652705, "grad_norm": 0.28775984048843384, "learning_rate": 8.899783353157749e-06, "loss": 0.3414, "step": 14445 }, { "epoch": 1.4686864579097194, "grad_norm": 0.2673867642879486, "learning_rate": 8.89956124379328e-06, "loss": 0.3516, "step": 14446 }, { "epoch": 1.4687881252541684, "grad_norm": 0.2696041166782379, "learning_rate": 8.899339114783752e-06, "loss": 0.3718, "step": 14447 }, { "epoch": 1.4688897925986173, "grad_norm": 0.2684459388256073, "learning_rate": 8.899116966130277e-06, "loss": 0.3538, "step": 14448 }, { "epoch": 1.4689914599430662, "grad_norm": 0.2859574258327484, "learning_rate": 8.89889479783398e-06, "loss": 0.3632, "step": 14449 }, { "epoch": 1.4690931272875152, "grad_norm": 0.2667829096317291, "learning_rate": 8.898672609895975e-06, "loss": 0.3257, "step": 14450 }, { "epoch": 1.4691947946319641, "grad_norm": 0.2754509449005127, "learning_rate": 8.898450402317385e-06, "loss": 0.344, "step": 14451 }, { "epoch": 1.469296461976413, "grad_norm": 0.2818847894668579, "learning_rate": 8.898228175099329e-06, "loss": 0.3721, "step": 14452 }, { "epoch": 1.469398129320862, "grad_norm": 0.2682322561740875, "learning_rate": 8.898005928242926e-06, "loss": 0.3224, "step": 14453 }, { "epoch": 1.4694997966653112, "grad_norm": 0.28575658798217773, "learning_rate": 8.897783661749293e-06, "loss": 0.3363, "step": 14454 }, { "epoch": 1.46960146400976, "grad_norm": 0.288133442401886, "learning_rate": 8.897561375619555e-06, "loss": 0.3538, "step": 14455 }, { "epoch": 1.469703131354209, "grad_norm": 0.2565400004386902, "learning_rate": 8.897339069854826e-06, "loss": 0.349, "step": 14456 }, { "epoch": 1.469804798698658, "grad_norm": 0.2895434498786926, "learning_rate": 8.897116744456231e-06, "loss": 0.3982, "step": 14457 }, { "epoch": 1.469906466043107, "grad_norm": 0.27801141142845154, "learning_rate": 8.896894399424888e-06, "loss": 0.3479, "step": 14458 }, { "epoch": 1.4700081333875559, "grad_norm": 0.278582364320755, "learning_rate": 8.896672034761916e-06, "loss": 0.3426, "step": 14459 }, { "epoch": 1.4701098007320048, "grad_norm": 0.2868203818798065, "learning_rate": 8.896449650468438e-06, "loss": 0.3509, "step": 14460 }, { "epoch": 1.470211468076454, "grad_norm": 0.29124972224235535, "learning_rate": 8.89622724654557e-06, "loss": 0.358, "step": 14461 }, { "epoch": 1.470313135420903, "grad_norm": 0.2919718623161316, "learning_rate": 8.896004822994437e-06, "loss": 0.3495, "step": 14462 }, { "epoch": 1.4704148027653519, "grad_norm": 0.27896901965141296, "learning_rate": 8.895782379816158e-06, "loss": 0.3596, "step": 14463 }, { "epoch": 1.4705164701098008, "grad_norm": 0.2807033061981201, "learning_rate": 8.895559917011851e-06, "loss": 0.3851, "step": 14464 }, { "epoch": 1.4706181374542497, "grad_norm": 0.2631434500217438, "learning_rate": 8.895337434582641e-06, "loss": 0.335, "step": 14465 }, { "epoch": 1.4707198047986987, "grad_norm": 0.28398603200912476, "learning_rate": 8.895114932529645e-06, "loss": 0.364, "step": 14466 }, { "epoch": 1.4708214721431476, "grad_norm": 0.28618478775024414, "learning_rate": 8.894892410853987e-06, "loss": 0.3675, "step": 14467 }, { "epoch": 1.4709231394875966, "grad_norm": 0.29461658000946045, "learning_rate": 8.894669869556785e-06, "loss": 0.3929, "step": 14468 }, { "epoch": 1.4710248068320455, "grad_norm": 0.30531275272369385, "learning_rate": 8.894447308639163e-06, "loss": 0.3557, "step": 14469 }, { "epoch": 1.4711264741764944, "grad_norm": 0.25949662923812866, "learning_rate": 8.894224728102241e-06, "loss": 0.3614, "step": 14470 }, { "epoch": 1.4712281415209434, "grad_norm": 0.28624412417411804, "learning_rate": 8.89400212794714e-06, "loss": 0.3584, "step": 14471 }, { "epoch": 1.4713298088653923, "grad_norm": 0.27560192346572876, "learning_rate": 8.89377950817498e-06, "loss": 0.3703, "step": 14472 }, { "epoch": 1.4714314762098413, "grad_norm": 0.3031620383262634, "learning_rate": 8.893556868786885e-06, "loss": 0.3835, "step": 14473 }, { "epoch": 1.4715331435542904, "grad_norm": 0.3016737699508667, "learning_rate": 8.893334209783977e-06, "loss": 0.3753, "step": 14474 }, { "epoch": 1.4716348108987394, "grad_norm": 0.269298791885376, "learning_rate": 8.893111531167376e-06, "loss": 0.3427, "step": 14475 }, { "epoch": 1.4717364782431883, "grad_norm": 0.2750404477119446, "learning_rate": 8.892888832938205e-06, "loss": 0.3566, "step": 14476 }, { "epoch": 1.4718381455876373, "grad_norm": 0.27081984281539917, "learning_rate": 8.892666115097584e-06, "loss": 0.3859, "step": 14477 }, { "epoch": 1.4719398129320862, "grad_norm": 0.297309011220932, "learning_rate": 8.892443377646635e-06, "loss": 0.4076, "step": 14478 }, { "epoch": 1.4720414802765351, "grad_norm": 0.2755544185638428, "learning_rate": 8.892220620586482e-06, "loss": 0.3792, "step": 14479 }, { "epoch": 1.472143147620984, "grad_norm": 0.2821081876754761, "learning_rate": 8.891997843918249e-06, "loss": 0.3667, "step": 14480 }, { "epoch": 1.472244814965433, "grad_norm": 0.30097365379333496, "learning_rate": 8.891775047643054e-06, "loss": 0.353, "step": 14481 }, { "epoch": 1.4723464823098822, "grad_norm": 0.2683669924736023, "learning_rate": 8.89155223176202e-06, "loss": 0.3584, "step": 14482 }, { "epoch": 1.4724481496543311, "grad_norm": 0.28407952189445496, "learning_rate": 8.891329396276272e-06, "loss": 0.369, "step": 14483 }, { "epoch": 1.47254981699878, "grad_norm": 0.2879171669483185, "learning_rate": 8.89110654118693e-06, "loss": 0.3584, "step": 14484 }, { "epoch": 1.472651484343229, "grad_norm": 0.3131944537162781, "learning_rate": 8.890883666495118e-06, "loss": 0.3622, "step": 14485 }, { "epoch": 1.472753151687678, "grad_norm": 0.2737542986869812, "learning_rate": 8.89066077220196e-06, "loss": 0.3282, "step": 14486 }, { "epoch": 1.472854819032127, "grad_norm": 0.26218491792678833, "learning_rate": 8.890437858308577e-06, "loss": 0.3475, "step": 14487 }, { "epoch": 1.4729564863765758, "grad_norm": 0.26761239767074585, "learning_rate": 8.890214924816091e-06, "loss": 0.3644, "step": 14488 }, { "epoch": 1.4730581537210248, "grad_norm": 0.2717932164669037, "learning_rate": 8.88999197172563e-06, "loss": 0.3471, "step": 14489 }, { "epoch": 1.4731598210654737, "grad_norm": 0.26609542965888977, "learning_rate": 8.889768999038312e-06, "loss": 0.3629, "step": 14490 }, { "epoch": 1.4732614884099227, "grad_norm": 0.3006152808666229, "learning_rate": 8.889546006755262e-06, "loss": 0.3294, "step": 14491 }, { "epoch": 1.4733631557543716, "grad_norm": 0.29292112588882446, "learning_rate": 8.889322994877603e-06, "loss": 0.3832, "step": 14492 }, { "epoch": 1.4734648230988205, "grad_norm": 0.2924340069293976, "learning_rate": 8.88909996340646e-06, "loss": 0.3959, "step": 14493 }, { "epoch": 1.4735664904432695, "grad_norm": 0.2806929349899292, "learning_rate": 8.888876912342957e-06, "loss": 0.3727, "step": 14494 }, { "epoch": 1.4736681577877186, "grad_norm": 0.28179189562797546, "learning_rate": 8.888653841688213e-06, "loss": 0.3667, "step": 14495 }, { "epoch": 1.4737698251321676, "grad_norm": 0.29544737935066223, "learning_rate": 8.888430751443358e-06, "loss": 0.407, "step": 14496 }, { "epoch": 1.4738714924766165, "grad_norm": 0.2978214919567108, "learning_rate": 8.88820764160951e-06, "loss": 0.3661, "step": 14497 }, { "epoch": 1.4739731598210655, "grad_norm": 0.3043769299983978, "learning_rate": 8.887984512187797e-06, "loss": 0.3732, "step": 14498 }, { "epoch": 1.4740748271655144, "grad_norm": 0.27619463205337524, "learning_rate": 8.887761363179344e-06, "loss": 0.3671, "step": 14499 }, { "epoch": 1.4741764945099634, "grad_norm": 0.26224803924560547, "learning_rate": 8.88753819458527e-06, "loss": 0.3411, "step": 14500 }, { "epoch": 1.4742781618544123, "grad_norm": 0.2840985953807831, "learning_rate": 8.887315006406705e-06, "loss": 0.3569, "step": 14501 }, { "epoch": 1.4743798291988615, "grad_norm": 0.27857276797294617, "learning_rate": 8.88709179864477e-06, "loss": 0.3677, "step": 14502 }, { "epoch": 1.4744814965433104, "grad_norm": 0.29719310998916626, "learning_rate": 8.886868571300589e-06, "loss": 0.3857, "step": 14503 }, { "epoch": 1.4745831638877593, "grad_norm": 0.2687655985355377, "learning_rate": 8.886645324375288e-06, "loss": 0.3676, "step": 14504 }, { "epoch": 1.4746848312322083, "grad_norm": 0.27908119559288025, "learning_rate": 8.886422057869993e-06, "loss": 0.3618, "step": 14505 }, { "epoch": 1.4747864985766572, "grad_norm": 0.29156166315078735, "learning_rate": 8.886198771785826e-06, "loss": 0.3706, "step": 14506 }, { "epoch": 1.4748881659211062, "grad_norm": 0.3112714886665344, "learning_rate": 8.885975466123912e-06, "loss": 0.3695, "step": 14507 }, { "epoch": 1.474989833265555, "grad_norm": 0.28080296516418457, "learning_rate": 8.88575214088538e-06, "loss": 0.3575, "step": 14508 }, { "epoch": 1.475091500610004, "grad_norm": 0.2701580226421356, "learning_rate": 8.88552879607135e-06, "loss": 0.3626, "step": 14509 }, { "epoch": 1.475193167954453, "grad_norm": 0.295526921749115, "learning_rate": 8.885305431682948e-06, "loss": 0.3387, "step": 14510 }, { "epoch": 1.475294835298902, "grad_norm": 0.29392388463020325, "learning_rate": 8.885082047721304e-06, "loss": 0.3616, "step": 14511 }, { "epoch": 1.4753965026433509, "grad_norm": 0.2807263433933258, "learning_rate": 8.884858644187539e-06, "loss": 0.3753, "step": 14512 }, { "epoch": 1.4754981699877998, "grad_norm": 0.3023449778556824, "learning_rate": 8.884635221082777e-06, "loss": 0.3904, "step": 14513 }, { "epoch": 1.4755998373322488, "grad_norm": 0.2999721169471741, "learning_rate": 8.884411778408148e-06, "loss": 0.3814, "step": 14514 }, { "epoch": 1.475701504676698, "grad_norm": 0.2827831208705902, "learning_rate": 8.884188316164774e-06, "loss": 0.3327, "step": 14515 }, { "epoch": 1.4758031720211469, "grad_norm": 0.2827482223510742, "learning_rate": 8.883964834353784e-06, "loss": 0.3524, "step": 14516 }, { "epoch": 1.4759048393655958, "grad_norm": 0.2834818661212921, "learning_rate": 8.883741332976302e-06, "loss": 0.3731, "step": 14517 }, { "epoch": 1.4760065067100447, "grad_norm": 0.28540217876434326, "learning_rate": 8.883517812033453e-06, "loss": 0.3707, "step": 14518 }, { "epoch": 1.4761081740544937, "grad_norm": 0.285470187664032, "learning_rate": 8.883294271526366e-06, "loss": 0.3486, "step": 14519 }, { "epoch": 1.4762098413989426, "grad_norm": 0.28879156708717346, "learning_rate": 8.883070711456164e-06, "loss": 0.3531, "step": 14520 }, { "epoch": 1.4763115087433916, "grad_norm": 0.2745317220687866, "learning_rate": 8.882847131823972e-06, "loss": 0.3777, "step": 14521 }, { "epoch": 1.4764131760878405, "grad_norm": 0.2870258390903473, "learning_rate": 8.882623532630922e-06, "loss": 0.3549, "step": 14522 }, { "epoch": 1.4765148434322897, "grad_norm": 0.2679085433483124, "learning_rate": 8.882399913878137e-06, "loss": 0.3274, "step": 14523 }, { "epoch": 1.4766165107767386, "grad_norm": 0.294060617685318, "learning_rate": 8.882176275566744e-06, "loss": 0.3547, "step": 14524 }, { "epoch": 1.4767181781211876, "grad_norm": 0.2832733988761902, "learning_rate": 8.881952617697868e-06, "loss": 0.3481, "step": 14525 }, { "epoch": 1.4768198454656365, "grad_norm": 0.302969753742218, "learning_rate": 8.881728940272638e-06, "loss": 0.3862, "step": 14526 }, { "epoch": 1.4769215128100854, "grad_norm": 0.29157984256744385, "learning_rate": 8.88150524329218e-06, "loss": 0.3571, "step": 14527 }, { "epoch": 1.4770231801545344, "grad_norm": 0.3060929477214813, "learning_rate": 8.881281526757622e-06, "loss": 0.3667, "step": 14528 }, { "epoch": 1.4771248474989833, "grad_norm": 0.29599758982658386, "learning_rate": 8.881057790670089e-06, "loss": 0.3548, "step": 14529 }, { "epoch": 1.4772265148434323, "grad_norm": 0.27254077792167664, "learning_rate": 8.88083403503071e-06, "loss": 0.343, "step": 14530 }, { "epoch": 1.4773281821878812, "grad_norm": 0.28975367546081543, "learning_rate": 8.88061025984061e-06, "loss": 0.3648, "step": 14531 }, { "epoch": 1.4774298495323301, "grad_norm": 0.3338891565799713, "learning_rate": 8.880386465100918e-06, "loss": 0.3993, "step": 14532 }, { "epoch": 1.477531516876779, "grad_norm": 0.2829703688621521, "learning_rate": 8.880162650812762e-06, "loss": 0.3831, "step": 14533 }, { "epoch": 1.477633184221228, "grad_norm": 0.2755025327205658, "learning_rate": 8.879938816977267e-06, "loss": 0.3569, "step": 14534 }, { "epoch": 1.4777348515656772, "grad_norm": 0.3414791524410248, "learning_rate": 8.879714963595564e-06, "loss": 0.3672, "step": 14535 }, { "epoch": 1.4778365189101261, "grad_norm": 0.260317862033844, "learning_rate": 8.879491090668777e-06, "loss": 0.3358, "step": 14536 }, { "epoch": 1.477938186254575, "grad_norm": 0.2882614731788635, "learning_rate": 8.879267198198038e-06, "loss": 0.3511, "step": 14537 }, { "epoch": 1.478039853599024, "grad_norm": 0.285621702671051, "learning_rate": 8.879043286184472e-06, "loss": 0.4048, "step": 14538 }, { "epoch": 1.478141520943473, "grad_norm": 0.28447091579437256, "learning_rate": 8.878819354629208e-06, "loss": 0.3801, "step": 14539 }, { "epoch": 1.478243188287922, "grad_norm": 0.29337596893310547, "learning_rate": 8.878595403533372e-06, "loss": 0.3659, "step": 14540 }, { "epoch": 1.4783448556323708, "grad_norm": 0.2555970251560211, "learning_rate": 8.878371432898096e-06, "loss": 0.3693, "step": 14541 }, { "epoch": 1.4784465229768198, "grad_norm": 0.27688777446746826, "learning_rate": 8.878147442724505e-06, "loss": 0.3558, "step": 14542 }, { "epoch": 1.478548190321269, "grad_norm": 0.29398202896118164, "learning_rate": 8.87792343301373e-06, "loss": 0.3522, "step": 14543 }, { "epoch": 1.4786498576657179, "grad_norm": 0.2956644594669342, "learning_rate": 8.877699403766898e-06, "loss": 0.3664, "step": 14544 }, { "epoch": 1.4787515250101668, "grad_norm": 0.28035712242126465, "learning_rate": 8.877475354985138e-06, "loss": 0.3551, "step": 14545 }, { "epoch": 1.4788531923546158, "grad_norm": 0.281688392162323, "learning_rate": 8.877251286669578e-06, "loss": 0.3638, "step": 14546 }, { "epoch": 1.4789548596990647, "grad_norm": 0.28461042046546936, "learning_rate": 8.877027198821349e-06, "loss": 0.3677, "step": 14547 }, { "epoch": 1.4790565270435136, "grad_norm": 0.2815054953098297, "learning_rate": 8.876803091441577e-06, "loss": 0.3625, "step": 14548 }, { "epoch": 1.4791581943879626, "grad_norm": 0.29431429505348206, "learning_rate": 8.87657896453139e-06, "loss": 0.3504, "step": 14549 }, { "epoch": 1.4792598617324115, "grad_norm": 0.28035831451416016, "learning_rate": 8.876354818091923e-06, "loss": 0.3924, "step": 14550 }, { "epoch": 1.4793615290768605, "grad_norm": 0.28507593274116516, "learning_rate": 8.8761306521243e-06, "loss": 0.3701, "step": 14551 }, { "epoch": 1.4794631964213094, "grad_norm": 0.2979535460472107, "learning_rate": 8.875906466629652e-06, "loss": 0.365, "step": 14552 }, { "epoch": 1.4795648637657584, "grad_norm": 0.27080875635147095, "learning_rate": 8.875682261609109e-06, "loss": 0.3633, "step": 14553 }, { "epoch": 1.4796665311102073, "grad_norm": 0.2767167091369629, "learning_rate": 8.875458037063799e-06, "loss": 0.3381, "step": 14554 }, { "epoch": 1.4797681984546562, "grad_norm": 0.28090012073516846, "learning_rate": 8.875233792994852e-06, "loss": 0.3526, "step": 14555 }, { "epoch": 1.4798698657991054, "grad_norm": 0.3013513386249542, "learning_rate": 8.875009529403398e-06, "loss": 0.3943, "step": 14556 }, { "epoch": 1.4799715331435543, "grad_norm": 0.29767629504203796, "learning_rate": 8.87478524629057e-06, "loss": 0.3491, "step": 14557 }, { "epoch": 1.4800732004880033, "grad_norm": 0.26891809701919556, "learning_rate": 8.87456094365749e-06, "loss": 0.3404, "step": 14558 }, { "epoch": 1.4801748678324522, "grad_norm": 0.2831932008266449, "learning_rate": 8.874336621505295e-06, "loss": 0.3474, "step": 14559 }, { "epoch": 1.4802765351769012, "grad_norm": 0.3141353726387024, "learning_rate": 8.874112279835113e-06, "loss": 0.3976, "step": 14560 }, { "epoch": 1.48037820252135, "grad_norm": 0.2885630428791046, "learning_rate": 8.873887918648075e-06, "loss": 0.3681, "step": 14561 }, { "epoch": 1.480479869865799, "grad_norm": 0.2900181710720062, "learning_rate": 8.873663537945307e-06, "loss": 0.3732, "step": 14562 }, { "epoch": 1.480581537210248, "grad_norm": 0.2906230092048645, "learning_rate": 8.873439137727944e-06, "loss": 0.3662, "step": 14563 }, { "epoch": 1.4806832045546972, "grad_norm": 0.293408066034317, "learning_rate": 8.873214717997117e-06, "loss": 0.3753, "step": 14564 }, { "epoch": 1.480784871899146, "grad_norm": 0.2788698673248291, "learning_rate": 8.872990278753953e-06, "loss": 0.3687, "step": 14565 }, { "epoch": 1.480886539243595, "grad_norm": 0.28673988580703735, "learning_rate": 8.872765819999584e-06, "loss": 0.3904, "step": 14566 }, { "epoch": 1.480988206588044, "grad_norm": 0.3170926868915558, "learning_rate": 8.872541341735142e-06, "loss": 0.3446, "step": 14567 }, { "epoch": 1.481089873932493, "grad_norm": 0.2772383987903595, "learning_rate": 8.872316843961757e-06, "loss": 0.3675, "step": 14568 }, { "epoch": 1.4811915412769419, "grad_norm": 0.30921995639801025, "learning_rate": 8.87209232668056e-06, "loss": 0.3689, "step": 14569 }, { "epoch": 1.4812932086213908, "grad_norm": 0.28840476274490356, "learning_rate": 8.871867789892681e-06, "loss": 0.3353, "step": 14570 }, { "epoch": 1.4813948759658397, "grad_norm": 0.2949240803718567, "learning_rate": 8.871643233599252e-06, "loss": 0.3655, "step": 14571 }, { "epoch": 1.4814965433102887, "grad_norm": 0.30967938899993896, "learning_rate": 8.871418657801405e-06, "loss": 0.3828, "step": 14572 }, { "epoch": 1.4815982106547376, "grad_norm": 0.2909989058971405, "learning_rate": 8.871194062500272e-06, "loss": 0.3818, "step": 14573 }, { "epoch": 1.4816998779991866, "grad_norm": 0.2799537777900696, "learning_rate": 8.870969447696983e-06, "loss": 0.3542, "step": 14574 }, { "epoch": 1.4818015453436355, "grad_norm": 0.297105073928833, "learning_rate": 8.870744813392668e-06, "loss": 0.3371, "step": 14575 }, { "epoch": 1.4819032126880847, "grad_norm": 0.276568740606308, "learning_rate": 8.87052015958846e-06, "loss": 0.3159, "step": 14576 }, { "epoch": 1.4820048800325336, "grad_norm": 0.29663437604904175, "learning_rate": 8.870295486285495e-06, "loss": 0.3704, "step": 14577 }, { "epoch": 1.4821065473769826, "grad_norm": 0.260008305311203, "learning_rate": 8.870070793484898e-06, "loss": 0.3353, "step": 14578 }, { "epoch": 1.4822082147214315, "grad_norm": 0.2537987232208252, "learning_rate": 8.869846081187805e-06, "loss": 0.3218, "step": 14579 }, { "epoch": 1.4823098820658804, "grad_norm": 0.3098384737968445, "learning_rate": 8.869621349395346e-06, "loss": 0.3344, "step": 14580 }, { "epoch": 1.4824115494103294, "grad_norm": 0.30401068925857544, "learning_rate": 8.869396598108656e-06, "loss": 0.3782, "step": 14581 }, { "epoch": 1.4825132167547783, "grad_norm": 0.2931353747844696, "learning_rate": 8.869171827328864e-06, "loss": 0.3995, "step": 14582 }, { "epoch": 1.4826148840992273, "grad_norm": 0.28675687313079834, "learning_rate": 8.868947037057105e-06, "loss": 0.3413, "step": 14583 }, { "epoch": 1.4827165514436764, "grad_norm": 0.2918716371059418, "learning_rate": 8.868722227294509e-06, "loss": 0.3584, "step": 14584 }, { "epoch": 1.4828182187881254, "grad_norm": 0.28501802682876587, "learning_rate": 8.86849739804221e-06, "loss": 0.3679, "step": 14585 }, { "epoch": 1.4829198861325743, "grad_norm": 0.25771909952163696, "learning_rate": 8.86827254930134e-06, "loss": 0.3512, "step": 14586 }, { "epoch": 1.4830215534770232, "grad_norm": 0.27763593196868896, "learning_rate": 8.868047681073034e-06, "loss": 0.3513, "step": 14587 }, { "epoch": 1.4831232208214722, "grad_norm": 0.3016856014728546, "learning_rate": 8.867822793358421e-06, "loss": 0.3704, "step": 14588 }, { "epoch": 1.4832248881659211, "grad_norm": 0.28699082136154175, "learning_rate": 8.867597886158637e-06, "loss": 0.3812, "step": 14589 }, { "epoch": 1.48332655551037, "grad_norm": 0.31808528304100037, "learning_rate": 8.867372959474813e-06, "loss": 0.4014, "step": 14590 }, { "epoch": 1.483428222854819, "grad_norm": 0.2616981863975525, "learning_rate": 8.867148013308085e-06, "loss": 0.35, "step": 14591 }, { "epoch": 1.483529890199268, "grad_norm": 0.29400521516799927, "learning_rate": 8.866923047659582e-06, "loss": 0.3328, "step": 14592 }, { "epoch": 1.483631557543717, "grad_norm": 0.2884598970413208, "learning_rate": 8.866698062530441e-06, "loss": 0.3547, "step": 14593 }, { "epoch": 1.4837332248881658, "grad_norm": 0.2858816385269165, "learning_rate": 8.866473057921793e-06, "loss": 0.3697, "step": 14594 }, { "epoch": 1.4838348922326148, "grad_norm": 0.2872108221054077, "learning_rate": 8.866248033834775e-06, "loss": 0.3859, "step": 14595 }, { "epoch": 1.4839365595770637, "grad_norm": 0.28386157751083374, "learning_rate": 8.866022990270514e-06, "loss": 0.3579, "step": 14596 }, { "epoch": 1.4840382269215129, "grad_norm": 0.3102026581764221, "learning_rate": 8.865797927230151e-06, "loss": 0.3695, "step": 14597 }, { "epoch": 1.4841398942659618, "grad_norm": 0.2631908655166626, "learning_rate": 8.865572844714817e-06, "loss": 0.3256, "step": 14598 }, { "epoch": 1.4842415616104108, "grad_norm": 0.29375970363616943, "learning_rate": 8.865347742725644e-06, "loss": 0.3508, "step": 14599 }, { "epoch": 1.4843432289548597, "grad_norm": 0.29102855920791626, "learning_rate": 8.865122621263768e-06, "loss": 0.3595, "step": 14600 }, { "epoch": 1.4844448962993086, "grad_norm": 0.2998178005218506, "learning_rate": 8.864897480330323e-06, "loss": 0.3574, "step": 14601 }, { "epoch": 1.4845465636437576, "grad_norm": 0.26370880007743835, "learning_rate": 8.864672319926443e-06, "loss": 0.338, "step": 14602 }, { "epoch": 1.4846482309882065, "grad_norm": 0.2750309705734253, "learning_rate": 8.864447140053262e-06, "loss": 0.3355, "step": 14603 }, { "epoch": 1.4847498983326555, "grad_norm": 0.28523796796798706, "learning_rate": 8.864221940711914e-06, "loss": 0.3703, "step": 14604 }, { "epoch": 1.4848515656771046, "grad_norm": 0.28213703632354736, "learning_rate": 8.863996721903536e-06, "loss": 0.3539, "step": 14605 }, { "epoch": 1.4849532330215536, "grad_norm": 0.2963048219680786, "learning_rate": 8.863771483629259e-06, "loss": 0.3513, "step": 14606 }, { "epoch": 1.4850549003660025, "grad_norm": 0.3184901773929596, "learning_rate": 8.86354622589022e-06, "loss": 0.3438, "step": 14607 }, { "epoch": 1.4851565677104515, "grad_norm": 0.27843528985977173, "learning_rate": 8.863320948687553e-06, "loss": 0.3717, "step": 14608 }, { "epoch": 1.4852582350549004, "grad_norm": 0.29204103350639343, "learning_rate": 8.863095652022393e-06, "loss": 0.363, "step": 14609 }, { "epoch": 1.4853599023993493, "grad_norm": 0.29810917377471924, "learning_rate": 8.862870335895876e-06, "loss": 0.3358, "step": 14610 }, { "epoch": 1.4854615697437983, "grad_norm": 0.28017881512641907, "learning_rate": 8.862645000309135e-06, "loss": 0.356, "step": 14611 }, { "epoch": 1.4855632370882472, "grad_norm": 0.2821309268474579, "learning_rate": 8.862419645263308e-06, "loss": 0.3604, "step": 14612 }, { "epoch": 1.4856649044326962, "grad_norm": 0.28767919540405273, "learning_rate": 8.862194270759528e-06, "loss": 0.4138, "step": 14613 }, { "epoch": 1.485766571777145, "grad_norm": 0.30386003851890564, "learning_rate": 8.86196887679893e-06, "loss": 0.3646, "step": 14614 }, { "epoch": 1.485868239121594, "grad_norm": 0.3074682950973511, "learning_rate": 8.861743463382653e-06, "loss": 0.3503, "step": 14615 }, { "epoch": 1.485969906466043, "grad_norm": 0.28399643301963806, "learning_rate": 8.861518030511827e-06, "loss": 0.3635, "step": 14616 }, { "epoch": 1.4860715738104922, "grad_norm": 0.2828119695186615, "learning_rate": 8.861292578187593e-06, "loss": 0.3565, "step": 14617 }, { "epoch": 1.486173241154941, "grad_norm": 0.28044217824935913, "learning_rate": 8.861067106411083e-06, "loss": 0.3974, "step": 14618 }, { "epoch": 1.48627490849939, "grad_norm": 0.2766914665699005, "learning_rate": 8.860841615183435e-06, "loss": 0.3743, "step": 14619 }, { "epoch": 1.486376575843839, "grad_norm": 0.31087571382522583, "learning_rate": 8.860616104505787e-06, "loss": 0.3629, "step": 14620 }, { "epoch": 1.486478243188288, "grad_norm": 0.2642762064933777, "learning_rate": 8.860390574379269e-06, "loss": 0.3852, "step": 14621 }, { "epoch": 1.4865799105327369, "grad_norm": 0.2846660614013672, "learning_rate": 8.860165024805022e-06, "loss": 0.3746, "step": 14622 }, { "epoch": 1.4866815778771858, "grad_norm": 0.2967231869697571, "learning_rate": 8.859939455784182e-06, "loss": 0.3916, "step": 14623 }, { "epoch": 1.4867832452216347, "grad_norm": 0.2995258867740631, "learning_rate": 8.859713867317882e-06, "loss": 0.3803, "step": 14624 }, { "epoch": 1.486884912566084, "grad_norm": 0.2926611006259918, "learning_rate": 8.859488259407261e-06, "loss": 0.3611, "step": 14625 }, { "epoch": 1.4869865799105328, "grad_norm": 0.28905200958251953, "learning_rate": 8.859262632053455e-06, "loss": 0.3438, "step": 14626 }, { "epoch": 1.4870882472549818, "grad_norm": 0.2698694169521332, "learning_rate": 8.859036985257604e-06, "loss": 0.3425, "step": 14627 }, { "epoch": 1.4871899145994307, "grad_norm": 0.28806763887405396, "learning_rate": 8.858811319020839e-06, "loss": 0.3435, "step": 14628 }, { "epoch": 1.4872915819438797, "grad_norm": 0.2724127471446991, "learning_rate": 8.858585633344301e-06, "loss": 0.3577, "step": 14629 }, { "epoch": 1.4873932492883286, "grad_norm": 0.27188166975975037, "learning_rate": 8.858359928229125e-06, "loss": 0.3339, "step": 14630 }, { "epoch": 1.4874949166327776, "grad_norm": 0.2874075174331665, "learning_rate": 8.858134203676449e-06, "loss": 0.3555, "step": 14631 }, { "epoch": 1.4875965839772265, "grad_norm": 0.27924981713294983, "learning_rate": 8.857908459687409e-06, "loss": 0.3503, "step": 14632 }, { "epoch": 1.4876982513216754, "grad_norm": 0.30201518535614014, "learning_rate": 8.857682696263143e-06, "loss": 0.3545, "step": 14633 }, { "epoch": 1.4877999186661244, "grad_norm": 0.28357887268066406, "learning_rate": 8.857456913404788e-06, "loss": 0.3393, "step": 14634 }, { "epoch": 1.4879015860105733, "grad_norm": 0.3203970789909363, "learning_rate": 8.857231111113482e-06, "loss": 0.4145, "step": 14635 }, { "epoch": 1.4880032533550223, "grad_norm": 0.28014329075813293, "learning_rate": 8.857005289390364e-06, "loss": 0.3338, "step": 14636 }, { "epoch": 1.4881049206994712, "grad_norm": 0.2852352559566498, "learning_rate": 8.856779448236568e-06, "loss": 0.3654, "step": 14637 }, { "epoch": 1.4882065880439204, "grad_norm": 0.2756892740726471, "learning_rate": 8.856553587653233e-06, "loss": 0.3296, "step": 14638 }, { "epoch": 1.4883082553883693, "grad_norm": 0.3081374764442444, "learning_rate": 8.8563277076415e-06, "loss": 0.3407, "step": 14639 }, { "epoch": 1.4884099227328182, "grad_norm": 0.28148913383483887, "learning_rate": 8.856101808202501e-06, "loss": 0.3593, "step": 14640 }, { "epoch": 1.4885115900772672, "grad_norm": 0.2877155840396881, "learning_rate": 8.85587588933738e-06, "loss": 0.3884, "step": 14641 }, { "epoch": 1.4886132574217161, "grad_norm": 0.3141050934791565, "learning_rate": 8.855649951047273e-06, "loss": 0.3857, "step": 14642 }, { "epoch": 1.488714924766165, "grad_norm": 0.2913910448551178, "learning_rate": 8.855423993333316e-06, "loss": 0.3815, "step": 14643 }, { "epoch": 1.488816592110614, "grad_norm": 0.2804150879383087, "learning_rate": 8.85519801619665e-06, "loss": 0.3782, "step": 14644 }, { "epoch": 1.488918259455063, "grad_norm": 0.27475401759147644, "learning_rate": 8.854972019638412e-06, "loss": 0.3649, "step": 14645 }, { "epoch": 1.4890199267995121, "grad_norm": 0.3099752366542816, "learning_rate": 8.854746003659742e-06, "loss": 0.3481, "step": 14646 }, { "epoch": 1.489121594143961, "grad_norm": 0.33009007573127747, "learning_rate": 8.854519968261776e-06, "loss": 0.4011, "step": 14647 }, { "epoch": 1.48922326148841, "grad_norm": 0.2906382381916046, "learning_rate": 8.854293913445657e-06, "loss": 0.3478, "step": 14648 }, { "epoch": 1.489324928832859, "grad_norm": 0.3125210702419281, "learning_rate": 8.85406783921252e-06, "loss": 0.3863, "step": 14649 }, { "epoch": 1.4894265961773079, "grad_norm": 0.32379230856895447, "learning_rate": 8.853841745563505e-06, "loss": 0.3529, "step": 14650 }, { "epoch": 1.4895282635217568, "grad_norm": 0.2824117839336395, "learning_rate": 8.85361563249975e-06, "loss": 0.3491, "step": 14651 }, { "epoch": 1.4896299308662058, "grad_norm": 0.30534330010414124, "learning_rate": 8.853389500022396e-06, "loss": 0.3385, "step": 14652 }, { "epoch": 1.4897315982106547, "grad_norm": 0.2711506485939026, "learning_rate": 8.85316334813258e-06, "loss": 0.3649, "step": 14653 }, { "epoch": 1.4898332655551036, "grad_norm": 0.27887648344039917, "learning_rate": 8.852937176831446e-06, "loss": 0.3721, "step": 14654 }, { "epoch": 1.4899349328995526, "grad_norm": 0.28999266028404236, "learning_rate": 8.852710986120128e-06, "loss": 0.3516, "step": 14655 }, { "epoch": 1.4900366002440015, "grad_norm": 0.26665711402893066, "learning_rate": 8.852484775999768e-06, "loss": 0.3552, "step": 14656 }, { "epoch": 1.4901382675884505, "grad_norm": 0.26409438252449036, "learning_rate": 8.852258546471504e-06, "loss": 0.3513, "step": 14657 }, { "epoch": 1.4902399349328996, "grad_norm": 0.29471442103385925, "learning_rate": 8.85203229753648e-06, "loss": 0.4016, "step": 14658 }, { "epoch": 1.4903416022773486, "grad_norm": 0.3173048794269562, "learning_rate": 8.851806029195828e-06, "loss": 0.3858, "step": 14659 }, { "epoch": 1.4904432696217975, "grad_norm": 0.3020636737346649, "learning_rate": 8.851579741450696e-06, "loss": 0.3338, "step": 14660 }, { "epoch": 1.4905449369662465, "grad_norm": 0.28330302238464355, "learning_rate": 8.85135343430222e-06, "loss": 0.3599, "step": 14661 }, { "epoch": 1.4906466043106954, "grad_norm": 0.26498135924339294, "learning_rate": 8.85112710775154e-06, "loss": 0.3623, "step": 14662 }, { "epoch": 1.4907482716551443, "grad_norm": 0.28277459740638733, "learning_rate": 8.850900761799796e-06, "loss": 0.3415, "step": 14663 }, { "epoch": 1.4908499389995933, "grad_norm": 0.28742897510528564, "learning_rate": 8.850674396448132e-06, "loss": 0.3584, "step": 14664 }, { "epoch": 1.4909516063440422, "grad_norm": 0.2659006118774414, "learning_rate": 8.850448011697683e-06, "loss": 0.3662, "step": 14665 }, { "epoch": 1.4910532736884914, "grad_norm": 0.29530054330825806, "learning_rate": 8.850221607549594e-06, "loss": 0.3602, "step": 14666 }, { "epoch": 1.4911549410329403, "grad_norm": 0.31793493032455444, "learning_rate": 8.849995184005e-06, "loss": 0.3799, "step": 14667 }, { "epoch": 1.4912566083773893, "grad_norm": 0.3084012269973755, "learning_rate": 8.849768741065048e-06, "loss": 0.4102, "step": 14668 }, { "epoch": 1.4913582757218382, "grad_norm": 0.25487425923347473, "learning_rate": 8.849542278730874e-06, "loss": 0.3783, "step": 14669 }, { "epoch": 1.4914599430662872, "grad_norm": 0.3158418536186218, "learning_rate": 8.849315797003623e-06, "loss": 0.3693, "step": 14670 }, { "epoch": 1.491561610410736, "grad_norm": 0.28688451647758484, "learning_rate": 8.849089295884432e-06, "loss": 0.3607, "step": 14671 }, { "epoch": 1.491663277755185, "grad_norm": 0.278521329164505, "learning_rate": 8.848862775374445e-06, "loss": 0.3427, "step": 14672 }, { "epoch": 1.491764945099634, "grad_norm": 0.2862282693386078, "learning_rate": 8.8486362354748e-06, "loss": 0.326, "step": 14673 }, { "epoch": 1.491866612444083, "grad_norm": 0.2935492694377899, "learning_rate": 8.848409676186644e-06, "loss": 0.3532, "step": 14674 }, { "epoch": 1.4919682797885319, "grad_norm": 0.29420167207717896, "learning_rate": 8.848183097511112e-06, "loss": 0.3654, "step": 14675 }, { "epoch": 1.4920699471329808, "grad_norm": 0.29181554913520813, "learning_rate": 8.847956499449349e-06, "loss": 0.3652, "step": 14676 }, { "epoch": 1.4921716144774297, "grad_norm": 0.27782362699508667, "learning_rate": 8.847729882002493e-06, "loss": 0.3183, "step": 14677 }, { "epoch": 1.4922732818218787, "grad_norm": 0.2721661627292633, "learning_rate": 8.847503245171692e-06, "loss": 0.3694, "step": 14678 }, { "epoch": 1.4923749491663278, "grad_norm": 0.2838732898235321, "learning_rate": 8.84727658895808e-06, "loss": 0.3755, "step": 14679 }, { "epoch": 1.4924766165107768, "grad_norm": 0.3146532475948334, "learning_rate": 8.847049913362806e-06, "loss": 0.3701, "step": 14680 }, { "epoch": 1.4925782838552257, "grad_norm": 0.2912827134132385, "learning_rate": 8.846823218387008e-06, "loss": 0.3567, "step": 14681 }, { "epoch": 1.4926799511996747, "grad_norm": 0.2708079218864441, "learning_rate": 8.846596504031827e-06, "loss": 0.3506, "step": 14682 }, { "epoch": 1.4927816185441236, "grad_norm": 0.2934679090976715, "learning_rate": 8.846369770298409e-06, "loss": 0.3787, "step": 14683 }, { "epoch": 1.4928832858885726, "grad_norm": 0.2664181590080261, "learning_rate": 8.846143017187893e-06, "loss": 0.3521, "step": 14684 }, { "epoch": 1.4929849532330215, "grad_norm": 0.33734017610549927, "learning_rate": 8.845916244701423e-06, "loss": 0.3568, "step": 14685 }, { "epoch": 1.4930866205774704, "grad_norm": 0.28520306944847107, "learning_rate": 8.84568945284014e-06, "loss": 0.3516, "step": 14686 }, { "epoch": 1.4931882879219196, "grad_norm": 0.2759702503681183, "learning_rate": 8.845462641605189e-06, "loss": 0.3711, "step": 14687 }, { "epoch": 1.4932899552663685, "grad_norm": 0.297713965177536, "learning_rate": 8.84523581099771e-06, "loss": 0.3584, "step": 14688 }, { "epoch": 1.4933916226108175, "grad_norm": 0.293150395154953, "learning_rate": 8.845008961018848e-06, "loss": 0.3621, "step": 14689 }, { "epoch": 1.4934932899552664, "grad_norm": 0.3107451796531677, "learning_rate": 8.844782091669744e-06, "loss": 0.3555, "step": 14690 }, { "epoch": 1.4935949572997154, "grad_norm": 0.32322028279304504, "learning_rate": 8.844555202951539e-06, "loss": 0.3927, "step": 14691 }, { "epoch": 1.4936966246441643, "grad_norm": 0.28604480624198914, "learning_rate": 8.84432829486538e-06, "loss": 0.3433, "step": 14692 }, { "epoch": 1.4937982919886132, "grad_norm": 0.29351168870925903, "learning_rate": 8.84410136741241e-06, "loss": 0.3662, "step": 14693 }, { "epoch": 1.4938999593330622, "grad_norm": 0.2941651940345764, "learning_rate": 8.84387442059377e-06, "loss": 0.3662, "step": 14694 }, { "epoch": 1.4940016266775111, "grad_norm": 0.2743375599384308, "learning_rate": 8.843647454410604e-06, "loss": 0.3363, "step": 14695 }, { "epoch": 1.49410329402196, "grad_norm": 0.280703067779541, "learning_rate": 8.843420468864055e-06, "loss": 0.3687, "step": 14696 }, { "epoch": 1.494204961366409, "grad_norm": 0.30802595615386963, "learning_rate": 8.84319346395527e-06, "loss": 0.3397, "step": 14697 }, { "epoch": 1.494306628710858, "grad_norm": 0.29881715774536133, "learning_rate": 8.842966439685386e-06, "loss": 0.3286, "step": 14698 }, { "epoch": 1.4944082960553071, "grad_norm": 0.26650887727737427, "learning_rate": 8.842739396055551e-06, "loss": 0.3803, "step": 14699 }, { "epoch": 1.494509963399756, "grad_norm": 0.27795517444610596, "learning_rate": 8.842512333066909e-06, "loss": 0.3732, "step": 14700 }, { "epoch": 1.494611630744205, "grad_norm": 0.2848551869392395, "learning_rate": 8.842285250720601e-06, "loss": 0.3299, "step": 14701 }, { "epoch": 1.494713298088654, "grad_norm": 0.29854539036750793, "learning_rate": 8.842058149017776e-06, "loss": 0.3609, "step": 14702 }, { "epoch": 1.4948149654331029, "grad_norm": 0.28811272978782654, "learning_rate": 8.841831027959574e-06, "loss": 0.366, "step": 14703 }, { "epoch": 1.4949166327775518, "grad_norm": 0.2721979022026062, "learning_rate": 8.84160388754714e-06, "loss": 0.3211, "step": 14704 }, { "epoch": 1.4950183001220008, "grad_norm": 0.29041367769241333, "learning_rate": 8.841376727781617e-06, "loss": 0.3619, "step": 14705 }, { "epoch": 1.4951199674664497, "grad_norm": 0.29861411452293396, "learning_rate": 8.841149548664151e-06, "loss": 0.383, "step": 14706 }, { "epoch": 1.4952216348108989, "grad_norm": 0.2769794464111328, "learning_rate": 8.840922350195888e-06, "loss": 0.3649, "step": 14707 }, { "epoch": 1.4953233021553478, "grad_norm": 0.30870071053504944, "learning_rate": 8.840695132377973e-06, "loss": 0.3758, "step": 14708 }, { "epoch": 1.4954249694997968, "grad_norm": 0.3049800395965576, "learning_rate": 8.840467895211544e-06, "loss": 0.372, "step": 14709 }, { "epoch": 1.4955266368442457, "grad_norm": 0.31611478328704834, "learning_rate": 8.840240638697753e-06, "loss": 0.3443, "step": 14710 }, { "epoch": 1.4956283041886946, "grad_norm": 0.32628780603408813, "learning_rate": 8.840013362837743e-06, "loss": 0.3953, "step": 14711 }, { "epoch": 1.4957299715331436, "grad_norm": 0.3022404909133911, "learning_rate": 8.839786067632657e-06, "loss": 0.3645, "step": 14712 }, { "epoch": 1.4958316388775925, "grad_norm": 0.2863709330558777, "learning_rate": 8.83955875308364e-06, "loss": 0.3965, "step": 14713 }, { "epoch": 1.4959333062220415, "grad_norm": 0.2754930853843689, "learning_rate": 8.83933141919184e-06, "loss": 0.3791, "step": 14714 }, { "epoch": 1.4960349735664904, "grad_norm": 0.2833208441734314, "learning_rate": 8.8391040659584e-06, "loss": 0.3494, "step": 14715 }, { "epoch": 1.4961366409109393, "grad_norm": 0.3096194565296173, "learning_rate": 8.838876693384466e-06, "loss": 0.3943, "step": 14716 }, { "epoch": 1.4962383082553883, "grad_norm": 0.29146528244018555, "learning_rate": 8.838649301471185e-06, "loss": 0.3605, "step": 14717 }, { "epoch": 1.4963399755998372, "grad_norm": 0.2803781032562256, "learning_rate": 8.838421890219699e-06, "loss": 0.3555, "step": 14718 }, { "epoch": 1.4964416429442862, "grad_norm": 0.25825294852256775, "learning_rate": 8.838194459631158e-06, "loss": 0.3468, "step": 14719 }, { "epoch": 1.4965433102887353, "grad_norm": 0.28778958320617676, "learning_rate": 8.837967009706703e-06, "loss": 0.3429, "step": 14720 }, { "epoch": 1.4966449776331843, "grad_norm": 0.2787109613418579, "learning_rate": 8.837739540447483e-06, "loss": 0.3704, "step": 14721 }, { "epoch": 1.4967466449776332, "grad_norm": 0.27498048543930054, "learning_rate": 8.837512051854645e-06, "loss": 0.3562, "step": 14722 }, { "epoch": 1.4968483123220822, "grad_norm": 0.2912578284740448, "learning_rate": 8.83728454392933e-06, "loss": 0.3537, "step": 14723 }, { "epoch": 1.496949979666531, "grad_norm": 0.3069750964641571, "learning_rate": 8.83705701667269e-06, "loss": 0.3783, "step": 14724 }, { "epoch": 1.49705164701098, "grad_norm": 0.2814941704273224, "learning_rate": 8.836829470085866e-06, "loss": 0.3238, "step": 14725 }, { "epoch": 1.497153314355429, "grad_norm": 0.2826448082923889, "learning_rate": 8.83660190417001e-06, "loss": 0.3424, "step": 14726 }, { "epoch": 1.497254981699878, "grad_norm": 0.2795516550540924, "learning_rate": 8.836374318926261e-06, "loss": 0.3617, "step": 14727 }, { "epoch": 1.497356649044327, "grad_norm": 0.2686958909034729, "learning_rate": 8.836146714355774e-06, "loss": 0.3411, "step": 14728 }, { "epoch": 1.497458316388776, "grad_norm": 0.2903429865837097, "learning_rate": 8.83591909045969e-06, "loss": 0.3522, "step": 14729 }, { "epoch": 1.497559983733225, "grad_norm": 0.2887934446334839, "learning_rate": 8.835691447239157e-06, "loss": 0.3621, "step": 14730 }, { "epoch": 1.497661651077674, "grad_norm": 0.2975149154663086, "learning_rate": 8.835463784695323e-06, "loss": 0.3805, "step": 14731 }, { "epoch": 1.4977633184221228, "grad_norm": 0.2857194244861603, "learning_rate": 8.835236102829332e-06, "loss": 0.4114, "step": 14732 }, { "epoch": 1.4978649857665718, "grad_norm": 0.2938173711299896, "learning_rate": 8.835008401642335e-06, "loss": 0.3693, "step": 14733 }, { "epoch": 1.4979666531110207, "grad_norm": 0.2736916244029999, "learning_rate": 8.834780681135476e-06, "loss": 0.3559, "step": 14734 }, { "epoch": 1.4980683204554697, "grad_norm": 0.2782650887966156, "learning_rate": 8.834552941309903e-06, "loss": 0.3965, "step": 14735 }, { "epoch": 1.4981699877999186, "grad_norm": 0.2906741499900818, "learning_rate": 8.834325182166763e-06, "loss": 0.3777, "step": 14736 }, { "epoch": 1.4982716551443676, "grad_norm": 0.3083731234073639, "learning_rate": 8.834097403707205e-06, "loss": 0.3676, "step": 14737 }, { "epoch": 1.4983733224888165, "grad_norm": 0.288563996553421, "learning_rate": 8.833869605932375e-06, "loss": 0.354, "step": 14738 }, { "epoch": 1.4984749898332654, "grad_norm": 0.27971968054771423, "learning_rate": 8.83364178884342e-06, "loss": 0.3611, "step": 14739 }, { "epoch": 1.4985766571777146, "grad_norm": 0.26696646213531494, "learning_rate": 8.833413952441491e-06, "loss": 0.3559, "step": 14740 }, { "epoch": 1.4986783245221635, "grad_norm": 0.27801278233528137, "learning_rate": 8.833186096727733e-06, "loss": 0.3657, "step": 14741 }, { "epoch": 1.4987799918666125, "grad_norm": 0.3081093430519104, "learning_rate": 8.832958221703293e-06, "loss": 0.4047, "step": 14742 }, { "epoch": 1.4988816592110614, "grad_norm": 0.2872873544692993, "learning_rate": 8.832730327369321e-06, "loss": 0.3285, "step": 14743 }, { "epoch": 1.4989833265555104, "grad_norm": 0.32126477360725403, "learning_rate": 8.832502413726964e-06, "loss": 0.381, "step": 14744 }, { "epoch": 1.4990849938999593, "grad_norm": 0.30156809091567993, "learning_rate": 8.83227448077737e-06, "loss": 0.3798, "step": 14745 }, { "epoch": 1.4991866612444082, "grad_norm": 0.29304367303848267, "learning_rate": 8.83204652852169e-06, "loss": 0.3531, "step": 14746 }, { "epoch": 1.4992883285888572, "grad_norm": 0.31694361567497253, "learning_rate": 8.83181855696107e-06, "loss": 0.3629, "step": 14747 }, { "epoch": 1.4993899959333064, "grad_norm": 0.2970290184020996, "learning_rate": 8.831590566096657e-06, "loss": 0.3588, "step": 14748 }, { "epoch": 1.4994916632777553, "grad_norm": 0.2791173756122589, "learning_rate": 8.831362555929601e-06, "loss": 0.3655, "step": 14749 }, { "epoch": 1.4995933306222042, "grad_norm": 0.3005797564983368, "learning_rate": 8.831134526461052e-06, "loss": 0.3526, "step": 14750 }, { "epoch": 1.4996949979666532, "grad_norm": 0.27932611107826233, "learning_rate": 8.830906477692159e-06, "loss": 0.3723, "step": 14751 }, { "epoch": 1.4997966653111021, "grad_norm": 0.27094218134880066, "learning_rate": 8.830678409624068e-06, "loss": 0.3711, "step": 14752 }, { "epoch": 1.499898332655551, "grad_norm": 0.30781644582748413, "learning_rate": 8.830450322257928e-06, "loss": 0.3686, "step": 14753 }, { "epoch": 1.5, "grad_norm": 0.29161617159843445, "learning_rate": 8.83022221559489e-06, "loss": 0.3758, "step": 14754 }, { "epoch": 1.500101667344449, "grad_norm": 0.2776498794555664, "learning_rate": 8.829994089636105e-06, "loss": 0.3645, "step": 14755 }, { "epoch": 1.5002033346888979, "grad_norm": 0.269159197807312, "learning_rate": 8.829765944382717e-06, "loss": 0.3478, "step": 14756 }, { "epoch": 1.5003050020333468, "grad_norm": 0.2857041358947754, "learning_rate": 8.829537779835882e-06, "loss": 0.3611, "step": 14757 }, { "epoch": 1.5004066693777958, "grad_norm": 0.26453226804733276, "learning_rate": 8.829309595996742e-06, "loss": 0.3804, "step": 14758 }, { "epoch": 1.5005083367222447, "grad_norm": 0.2900620698928833, "learning_rate": 8.829081392866452e-06, "loss": 0.337, "step": 14759 }, { "epoch": 1.5006100040666936, "grad_norm": 0.29760482907295227, "learning_rate": 8.82885317044616e-06, "loss": 0.3759, "step": 14760 }, { "epoch": 1.5007116714111426, "grad_norm": 0.3096172511577606, "learning_rate": 8.828624928737015e-06, "loss": 0.3666, "step": 14761 }, { "epoch": 1.5008133387555918, "grad_norm": 0.31818875670433044, "learning_rate": 8.82839666774017e-06, "loss": 0.3785, "step": 14762 }, { "epoch": 1.5009150061000407, "grad_norm": 0.3138029873371124, "learning_rate": 8.828168387456769e-06, "loss": 0.3317, "step": 14763 }, { "epoch": 1.5010166734444896, "grad_norm": 0.281085342168808, "learning_rate": 8.827940087887966e-06, "loss": 0.3572, "step": 14764 }, { "epoch": 1.5011183407889386, "grad_norm": 0.2914210557937622, "learning_rate": 8.82771176903491e-06, "loss": 0.3931, "step": 14765 }, { "epoch": 1.5012200081333875, "grad_norm": 0.3265915811061859, "learning_rate": 8.827483430898754e-06, "loss": 0.3587, "step": 14766 }, { "epoch": 1.5013216754778367, "grad_norm": 0.2832714021205902, "learning_rate": 8.827255073480645e-06, "loss": 0.3646, "step": 14767 }, { "epoch": 1.5014233428222856, "grad_norm": 0.27808496356010437, "learning_rate": 8.827026696781733e-06, "loss": 0.3438, "step": 14768 }, { "epoch": 1.5015250101667346, "grad_norm": 0.31117668747901917, "learning_rate": 8.826798300803173e-06, "loss": 0.3648, "step": 14769 }, { "epoch": 1.5016266775111835, "grad_norm": 0.2998092472553253, "learning_rate": 8.826569885546109e-06, "loss": 0.3631, "step": 14770 }, { "epoch": 1.5017283448556324, "grad_norm": 0.27985161542892456, "learning_rate": 8.826341451011698e-06, "loss": 0.3532, "step": 14771 }, { "epoch": 1.5018300122000814, "grad_norm": 0.2647992670536041, "learning_rate": 8.826112997201087e-06, "loss": 0.3421, "step": 14772 }, { "epoch": 1.5019316795445303, "grad_norm": 0.2811155915260315, "learning_rate": 8.825884524115427e-06, "loss": 0.3845, "step": 14773 }, { "epoch": 1.5020333468889793, "grad_norm": 0.26904621720314026, "learning_rate": 8.82565603175587e-06, "loss": 0.3645, "step": 14774 }, { "epoch": 1.5021350142334282, "grad_norm": 0.2773781418800354, "learning_rate": 8.825427520123569e-06, "loss": 0.3648, "step": 14775 }, { "epoch": 1.5022366815778772, "grad_norm": 0.26963603496551514, "learning_rate": 8.825198989219671e-06, "loss": 0.3587, "step": 14776 }, { "epoch": 1.502338348922326, "grad_norm": 0.2785855829715729, "learning_rate": 8.824970439045331e-06, "loss": 0.3371, "step": 14777 }, { "epoch": 1.502440016266775, "grad_norm": 0.30801287293434143, "learning_rate": 8.824741869601699e-06, "loss": 0.3593, "step": 14778 }, { "epoch": 1.502541683611224, "grad_norm": 0.28589871525764465, "learning_rate": 8.824513280889926e-06, "loss": 0.3617, "step": 14779 }, { "epoch": 1.502643350955673, "grad_norm": 0.30130520462989807, "learning_rate": 8.824284672911163e-06, "loss": 0.3825, "step": 14780 }, { "epoch": 1.5027450183001219, "grad_norm": 0.275872141122818, "learning_rate": 8.824056045666562e-06, "loss": 0.3567, "step": 14781 }, { "epoch": 1.5028466856445708, "grad_norm": 0.27935847640037537, "learning_rate": 8.823827399157277e-06, "loss": 0.3469, "step": 14782 }, { "epoch": 1.50294835298902, "grad_norm": 0.27228739857673645, "learning_rate": 8.823598733384459e-06, "loss": 0.3389, "step": 14783 }, { "epoch": 1.503050020333469, "grad_norm": 0.28780585527420044, "learning_rate": 8.823370048349258e-06, "loss": 0.3562, "step": 14784 }, { "epoch": 1.5031516876779178, "grad_norm": 0.29426708817481995, "learning_rate": 8.823141344052827e-06, "loss": 0.3481, "step": 14785 }, { "epoch": 1.5032533550223668, "grad_norm": 0.27368706464767456, "learning_rate": 8.82291262049632e-06, "loss": 0.3888, "step": 14786 }, { "epoch": 1.5033550223668157, "grad_norm": 0.2854406535625458, "learning_rate": 8.822683877680886e-06, "loss": 0.3807, "step": 14787 }, { "epoch": 1.503456689711265, "grad_norm": 0.29463207721710205, "learning_rate": 8.82245511560768e-06, "loss": 0.3976, "step": 14788 }, { "epoch": 1.5035583570557138, "grad_norm": 0.25978243350982666, "learning_rate": 8.822226334277852e-06, "loss": 0.348, "step": 14789 }, { "epoch": 1.5036600244001628, "grad_norm": 0.32268592715263367, "learning_rate": 8.821997533692556e-06, "loss": 0.4006, "step": 14790 }, { "epoch": 1.5037616917446117, "grad_norm": 0.2792663872241974, "learning_rate": 8.821768713852947e-06, "loss": 0.3711, "step": 14791 }, { "epoch": 1.5038633590890607, "grad_norm": 0.2860671877861023, "learning_rate": 8.821539874760175e-06, "loss": 0.3778, "step": 14792 }, { "epoch": 1.5039650264335096, "grad_norm": 0.28086912631988525, "learning_rate": 8.821311016415392e-06, "loss": 0.3773, "step": 14793 }, { "epoch": 1.5040666937779585, "grad_norm": 0.2992909550666809, "learning_rate": 8.821082138819752e-06, "loss": 0.3478, "step": 14794 }, { "epoch": 1.5041683611224075, "grad_norm": 0.29222777485847473, "learning_rate": 8.820853241974408e-06, "loss": 0.3716, "step": 14795 }, { "epoch": 1.5042700284668564, "grad_norm": 0.2954007387161255, "learning_rate": 8.820624325880516e-06, "loss": 0.3423, "step": 14796 }, { "epoch": 1.5043716958113054, "grad_norm": 0.28910842537879944, "learning_rate": 8.820395390539224e-06, "loss": 0.3632, "step": 14797 }, { "epoch": 1.5044733631557543, "grad_norm": 0.30947235226631165, "learning_rate": 8.820166435951689e-06, "loss": 0.3681, "step": 14798 }, { "epoch": 1.5045750305002032, "grad_norm": 0.2966817319393158, "learning_rate": 8.819937462119063e-06, "loss": 0.3575, "step": 14799 }, { "epoch": 1.5046766978446522, "grad_norm": 0.2865232825279236, "learning_rate": 8.819708469042499e-06, "loss": 0.3283, "step": 14800 }, { "epoch": 1.5047783651891011, "grad_norm": 0.3120460510253906, "learning_rate": 8.819479456723152e-06, "loss": 0.3404, "step": 14801 }, { "epoch": 1.50488003253355, "grad_norm": 0.2988150715827942, "learning_rate": 8.819250425162176e-06, "loss": 0.3734, "step": 14802 }, { "epoch": 1.5049816998779992, "grad_norm": 0.2706225514411926, "learning_rate": 8.819021374360723e-06, "loss": 0.3389, "step": 14803 }, { "epoch": 1.5050833672224482, "grad_norm": 0.34390175342559814, "learning_rate": 8.818792304319948e-06, "loss": 0.3999, "step": 14804 }, { "epoch": 1.5051850345668971, "grad_norm": 0.32219573855400085, "learning_rate": 8.818563215041006e-06, "loss": 0.374, "step": 14805 }, { "epoch": 1.505286701911346, "grad_norm": 0.27792271971702576, "learning_rate": 8.818334106525048e-06, "loss": 0.3445, "step": 14806 }, { "epoch": 1.505388369255795, "grad_norm": 0.3223956823348999, "learning_rate": 8.81810497877323e-06, "loss": 0.3232, "step": 14807 }, { "epoch": 1.5054900366002442, "grad_norm": 0.268775075674057, "learning_rate": 8.817875831786709e-06, "loss": 0.3442, "step": 14808 }, { "epoch": 1.505591703944693, "grad_norm": 0.2842201292514801, "learning_rate": 8.817646665566636e-06, "loss": 0.3411, "step": 14809 }, { "epoch": 1.505693371289142, "grad_norm": 0.2906845510005951, "learning_rate": 8.817417480114163e-06, "loss": 0.3591, "step": 14810 }, { "epoch": 1.505795038633591, "grad_norm": 0.26731008291244507, "learning_rate": 8.817188275430452e-06, "loss": 0.3461, "step": 14811 }, { "epoch": 1.50589670597804, "grad_norm": 0.2939501404762268, "learning_rate": 8.816959051516652e-06, "loss": 0.3712, "step": 14812 }, { "epoch": 1.5059983733224889, "grad_norm": 0.28849145770072937, "learning_rate": 8.816729808373919e-06, "loss": 0.3654, "step": 14813 }, { "epoch": 1.5061000406669378, "grad_norm": 0.27113816142082214, "learning_rate": 8.816500546003409e-06, "loss": 0.3489, "step": 14814 }, { "epoch": 1.5062017080113868, "grad_norm": 0.2745451331138611, "learning_rate": 8.816271264406276e-06, "loss": 0.3807, "step": 14815 }, { "epoch": 1.5063033753558357, "grad_norm": 0.26378726959228516, "learning_rate": 8.816041963583675e-06, "loss": 0.3451, "step": 14816 }, { "epoch": 1.5064050427002846, "grad_norm": 0.2866808772087097, "learning_rate": 8.815812643536762e-06, "loss": 0.366, "step": 14817 }, { "epoch": 1.5065067100447336, "grad_norm": 0.28491851687431335, "learning_rate": 8.81558330426669e-06, "loss": 0.3501, "step": 14818 }, { "epoch": 1.5066083773891825, "grad_norm": 0.29097265005111694, "learning_rate": 8.815353945774619e-06, "loss": 0.3561, "step": 14819 }, { "epoch": 1.5067100447336315, "grad_norm": 0.2794170677661896, "learning_rate": 8.815124568061698e-06, "loss": 0.3608, "step": 14820 }, { "epoch": 1.5068117120780804, "grad_norm": 0.29363158345222473, "learning_rate": 8.814895171129087e-06, "loss": 0.3668, "step": 14821 }, { "epoch": 1.5069133794225293, "grad_norm": 0.2763374149799347, "learning_rate": 8.814665754977942e-06, "loss": 0.3586, "step": 14822 }, { "epoch": 1.5070150467669783, "grad_norm": 0.3129245638847351, "learning_rate": 8.814436319609419e-06, "loss": 0.396, "step": 14823 }, { "epoch": 1.5071167141114274, "grad_norm": 0.3041141927242279, "learning_rate": 8.814206865024668e-06, "loss": 0.3455, "step": 14824 }, { "epoch": 1.5072183814558764, "grad_norm": 0.2837207019329071, "learning_rate": 8.813977391224853e-06, "loss": 0.3537, "step": 14825 }, { "epoch": 1.5073200488003253, "grad_norm": 0.2766458988189697, "learning_rate": 8.813747898211125e-06, "loss": 0.387, "step": 14826 }, { "epoch": 1.5074217161447743, "grad_norm": 0.2767014801502228, "learning_rate": 8.81351838598464e-06, "loss": 0.388, "step": 14827 }, { "epoch": 1.5075233834892232, "grad_norm": 0.2789154648780823, "learning_rate": 8.813288854546556e-06, "loss": 0.3592, "step": 14828 }, { "epoch": 1.5076250508336724, "grad_norm": 0.3121969997882843, "learning_rate": 8.81305930389803e-06, "loss": 0.3598, "step": 14829 }, { "epoch": 1.5077267181781213, "grad_norm": 0.29165107011795044, "learning_rate": 8.812829734040216e-06, "loss": 0.3661, "step": 14830 }, { "epoch": 1.5078283855225703, "grad_norm": 0.2734079360961914, "learning_rate": 8.812600144974274e-06, "loss": 0.357, "step": 14831 }, { "epoch": 1.5079300528670192, "grad_norm": 0.28678610920906067, "learning_rate": 8.812370536701356e-06, "loss": 0.3031, "step": 14832 }, { "epoch": 1.5080317202114681, "grad_norm": 0.2930107116699219, "learning_rate": 8.812140909222622e-06, "loss": 0.3438, "step": 14833 }, { "epoch": 1.508133387555917, "grad_norm": 0.28527504205703735, "learning_rate": 8.81191126253923e-06, "loss": 0.332, "step": 14834 }, { "epoch": 1.508235054900366, "grad_norm": 0.2953311800956726, "learning_rate": 8.811681596652333e-06, "loss": 0.3628, "step": 14835 }, { "epoch": 1.508336722244815, "grad_norm": 0.2847835421562195, "learning_rate": 8.81145191156309e-06, "loss": 0.3727, "step": 14836 }, { "epoch": 1.508438389589264, "grad_norm": 0.2828443646430969, "learning_rate": 8.811222207272658e-06, "loss": 0.3471, "step": 14837 }, { "epoch": 1.5085400569337128, "grad_norm": 0.31744804978370667, "learning_rate": 8.810992483782194e-06, "loss": 0.3537, "step": 14838 }, { "epoch": 1.5086417242781618, "grad_norm": 0.3057911694049835, "learning_rate": 8.810762741092858e-06, "loss": 0.3534, "step": 14839 }, { "epoch": 1.5087433916226107, "grad_norm": 0.266638845205307, "learning_rate": 8.810532979205802e-06, "loss": 0.3476, "step": 14840 }, { "epoch": 1.5088450589670597, "grad_norm": 0.3091445565223694, "learning_rate": 8.810303198122188e-06, "loss": 0.3426, "step": 14841 }, { "epoch": 1.5089467263115086, "grad_norm": 0.334081768989563, "learning_rate": 8.810073397843171e-06, "loss": 0.3686, "step": 14842 }, { "epoch": 1.5090483936559576, "grad_norm": 0.29494544863700867, "learning_rate": 8.809843578369908e-06, "loss": 0.376, "step": 14843 }, { "epoch": 1.5091500610004067, "grad_norm": 0.30721694231033325, "learning_rate": 8.809613739703561e-06, "loss": 0.3674, "step": 14844 }, { "epoch": 1.5092517283448557, "grad_norm": 0.2842133641242981, "learning_rate": 8.809383881845284e-06, "loss": 0.3608, "step": 14845 }, { "epoch": 1.5093533956893046, "grad_norm": 0.28514596819877625, "learning_rate": 8.809154004796237e-06, "loss": 0.3336, "step": 14846 }, { "epoch": 1.5094550630337535, "grad_norm": 0.27937576174736023, "learning_rate": 8.808924108557577e-06, "loss": 0.3558, "step": 14847 }, { "epoch": 1.5095567303782025, "grad_norm": 0.2928258180618286, "learning_rate": 8.808694193130462e-06, "loss": 0.3595, "step": 14848 }, { "epoch": 1.5096583977226516, "grad_norm": 0.3143121302127838, "learning_rate": 8.808464258516052e-06, "loss": 0.3672, "step": 14849 }, { "epoch": 1.5097600650671006, "grad_norm": 0.29226943850517273, "learning_rate": 8.808234304715505e-06, "loss": 0.3748, "step": 14850 }, { "epoch": 1.5098617324115495, "grad_norm": 0.292111873626709, "learning_rate": 8.808004331729976e-06, "loss": 0.3776, "step": 14851 }, { "epoch": 1.5099633997559985, "grad_norm": 0.2990090548992157, "learning_rate": 8.807774339560626e-06, "loss": 0.3871, "step": 14852 }, { "epoch": 1.5100650671004474, "grad_norm": 0.29180145263671875, "learning_rate": 8.807544328208615e-06, "loss": 0.3733, "step": 14853 }, { "epoch": 1.5101667344448964, "grad_norm": 0.29505079984664917, "learning_rate": 8.8073142976751e-06, "loss": 0.3505, "step": 14854 }, { "epoch": 1.5102684017893453, "grad_norm": 0.3075740337371826, "learning_rate": 8.807084247961242e-06, "loss": 0.3489, "step": 14855 }, { "epoch": 1.5103700691337942, "grad_norm": 0.27057215571403503, "learning_rate": 8.806854179068196e-06, "loss": 0.368, "step": 14856 }, { "epoch": 1.5104717364782432, "grad_norm": 0.27873167395591736, "learning_rate": 8.806624090997124e-06, "loss": 0.3796, "step": 14857 }, { "epoch": 1.5105734038226921, "grad_norm": 0.30403760075569153, "learning_rate": 8.806393983749185e-06, "loss": 0.3593, "step": 14858 }, { "epoch": 1.510675071167141, "grad_norm": 0.3029336631298065, "learning_rate": 8.806163857325535e-06, "loss": 0.3499, "step": 14859 }, { "epoch": 1.51077673851159, "grad_norm": 0.2806594669818878, "learning_rate": 8.80593371172734e-06, "loss": 0.3572, "step": 14860 }, { "epoch": 1.510878405856039, "grad_norm": 0.2822699546813965, "learning_rate": 8.805703546955753e-06, "loss": 0.368, "step": 14861 }, { "epoch": 1.5109800732004879, "grad_norm": 0.314956396818161, "learning_rate": 8.805473363011938e-06, "loss": 0.3831, "step": 14862 }, { "epoch": 1.5110817405449368, "grad_norm": 0.29328954219818115, "learning_rate": 8.80524315989705e-06, "loss": 0.3479, "step": 14863 }, { "epoch": 1.5111834078893858, "grad_norm": 0.27741190791130066, "learning_rate": 8.805012937612253e-06, "loss": 0.3643, "step": 14864 }, { "epoch": 1.511285075233835, "grad_norm": 0.28631097078323364, "learning_rate": 8.804782696158703e-06, "loss": 0.3431, "step": 14865 }, { "epoch": 1.5113867425782839, "grad_norm": 0.29755961894989014, "learning_rate": 8.804552435537563e-06, "loss": 0.3732, "step": 14866 }, { "epoch": 1.5114884099227328, "grad_norm": 0.2702370882034302, "learning_rate": 8.804322155749992e-06, "loss": 0.3357, "step": 14867 }, { "epoch": 1.5115900772671818, "grad_norm": 0.3032044470310211, "learning_rate": 8.80409185679715e-06, "loss": 0.3707, "step": 14868 }, { "epoch": 1.5116917446116307, "grad_norm": 0.2911238372325897, "learning_rate": 8.803861538680199e-06, "loss": 0.3785, "step": 14869 }, { "epoch": 1.5117934119560799, "grad_norm": 0.26506054401397705, "learning_rate": 8.803631201400296e-06, "loss": 0.3329, "step": 14870 }, { "epoch": 1.5118950793005288, "grad_norm": 0.2918553352355957, "learning_rate": 8.803400844958602e-06, "loss": 0.3611, "step": 14871 }, { "epoch": 1.5119967466449777, "grad_norm": 0.28860005736351013, "learning_rate": 8.80317046935628e-06, "loss": 0.3317, "step": 14872 }, { "epoch": 1.5120984139894267, "grad_norm": 0.2792876064777374, "learning_rate": 8.802940074594486e-06, "loss": 0.3353, "step": 14873 }, { "epoch": 1.5122000813338756, "grad_norm": 0.30843913555145264, "learning_rate": 8.802709660674387e-06, "loss": 0.3783, "step": 14874 }, { "epoch": 1.5123017486783246, "grad_norm": 0.3130421042442322, "learning_rate": 8.802479227597138e-06, "loss": 0.3767, "step": 14875 }, { "epoch": 1.5124034160227735, "grad_norm": 0.2908579409122467, "learning_rate": 8.802248775363904e-06, "loss": 0.3916, "step": 14876 }, { "epoch": 1.5125050833672224, "grad_norm": 0.2933470904827118, "learning_rate": 8.802018303975843e-06, "loss": 0.356, "step": 14877 }, { "epoch": 1.5126067507116714, "grad_norm": 0.3015621602535248, "learning_rate": 8.801787813434119e-06, "loss": 0.3773, "step": 14878 }, { "epoch": 1.5127084180561203, "grad_norm": 0.2990340292453766, "learning_rate": 8.801557303739889e-06, "loss": 0.3519, "step": 14879 }, { "epoch": 1.5128100854005693, "grad_norm": 0.2865777313709259, "learning_rate": 8.801326774894318e-06, "loss": 0.3589, "step": 14880 }, { "epoch": 1.5129117527450182, "grad_norm": 0.29764798283576965, "learning_rate": 8.801096226898566e-06, "loss": 0.3282, "step": 14881 }, { "epoch": 1.5130134200894672, "grad_norm": 0.3090355396270752, "learning_rate": 8.800865659753795e-06, "loss": 0.3647, "step": 14882 }, { "epoch": 1.513115087433916, "grad_norm": 0.25711092352867126, "learning_rate": 8.800635073461166e-06, "loss": 0.346, "step": 14883 }, { "epoch": 1.513216754778365, "grad_norm": 0.2998639643192291, "learning_rate": 8.80040446802184e-06, "loss": 0.3604, "step": 14884 }, { "epoch": 1.5133184221228142, "grad_norm": 0.3013303279876709, "learning_rate": 8.80017384343698e-06, "loss": 0.3622, "step": 14885 }, { "epoch": 1.5134200894672631, "grad_norm": 0.2864665389060974, "learning_rate": 8.799943199707746e-06, "loss": 0.3791, "step": 14886 }, { "epoch": 1.513521756811712, "grad_norm": 0.29308977723121643, "learning_rate": 8.799712536835302e-06, "loss": 0.3774, "step": 14887 }, { "epoch": 1.513623424156161, "grad_norm": 0.30838850140571594, "learning_rate": 8.799481854820808e-06, "loss": 0.3377, "step": 14888 }, { "epoch": 1.51372509150061, "grad_norm": 0.28221002221107483, "learning_rate": 8.799251153665429e-06, "loss": 0.3849, "step": 14889 }, { "epoch": 1.5138267588450591, "grad_norm": 0.28577345609664917, "learning_rate": 8.799020433370326e-06, "loss": 0.3537, "step": 14890 }, { "epoch": 1.513928426189508, "grad_norm": 0.28720295429229736, "learning_rate": 8.79878969393666e-06, "loss": 0.3679, "step": 14891 }, { "epoch": 1.514030093533957, "grad_norm": 0.2675565183162689, "learning_rate": 8.798558935365593e-06, "loss": 0.3692, "step": 14892 }, { "epoch": 1.514131760878406, "grad_norm": 0.2903425395488739, "learning_rate": 8.798328157658292e-06, "loss": 0.3826, "step": 14893 }, { "epoch": 1.514233428222855, "grad_norm": 0.2829938232898712, "learning_rate": 8.798097360815913e-06, "loss": 0.3527, "step": 14894 }, { "epoch": 1.5143350955673038, "grad_norm": 0.28585678339004517, "learning_rate": 8.797866544839625e-06, "loss": 0.3889, "step": 14895 }, { "epoch": 1.5144367629117528, "grad_norm": 0.300428181886673, "learning_rate": 8.797635709730585e-06, "loss": 0.3858, "step": 14896 }, { "epoch": 1.5145384302562017, "grad_norm": 0.2940443456172943, "learning_rate": 8.797404855489959e-06, "loss": 0.3523, "step": 14897 }, { "epoch": 1.5146400976006507, "grad_norm": 0.28802382946014404, "learning_rate": 8.797173982118913e-06, "loss": 0.338, "step": 14898 }, { "epoch": 1.5147417649450996, "grad_norm": 0.27642929553985596, "learning_rate": 8.796943089618601e-06, "loss": 0.3164, "step": 14899 }, { "epoch": 1.5148434322895485, "grad_norm": 0.29785314202308655, "learning_rate": 8.796712177990197e-06, "loss": 0.3584, "step": 14900 }, { "epoch": 1.5149450996339975, "grad_norm": 0.2771660387516022, "learning_rate": 8.796481247234857e-06, "loss": 0.3629, "step": 14901 }, { "epoch": 1.5150467669784464, "grad_norm": 0.2841245234012604, "learning_rate": 8.796250297353748e-06, "loss": 0.3367, "step": 14902 }, { "epoch": 1.5151484343228954, "grad_norm": 0.2913063168525696, "learning_rate": 8.796019328348031e-06, "loss": 0.3633, "step": 14903 }, { "epoch": 1.5152501016673443, "grad_norm": 0.272820383310318, "learning_rate": 8.795788340218871e-06, "loss": 0.364, "step": 14904 }, { "epoch": 1.5153517690117932, "grad_norm": 0.2599603235721588, "learning_rate": 8.795557332967432e-06, "loss": 0.3783, "step": 14905 }, { "epoch": 1.5154534363562424, "grad_norm": 0.28599122166633606, "learning_rate": 8.795326306594876e-06, "loss": 0.369, "step": 14906 }, { "epoch": 1.5155551037006914, "grad_norm": 0.2887365221977234, "learning_rate": 8.79509526110237e-06, "loss": 0.3425, "step": 14907 }, { "epoch": 1.5156567710451403, "grad_norm": 0.27307552099227905, "learning_rate": 8.794864196491075e-06, "loss": 0.3538, "step": 14908 }, { "epoch": 1.5157584383895892, "grad_norm": 0.3113950490951538, "learning_rate": 8.794633112762155e-06, "loss": 0.3917, "step": 14909 }, { "epoch": 1.5158601057340382, "grad_norm": 0.27760154008865356, "learning_rate": 8.794402009916776e-06, "loss": 0.3679, "step": 14910 }, { "epoch": 1.5159617730784873, "grad_norm": 0.2787841558456421, "learning_rate": 8.794170887956102e-06, "loss": 0.3609, "step": 14911 }, { "epoch": 1.5160634404229363, "grad_norm": 0.2979164123535156, "learning_rate": 8.793939746881295e-06, "loss": 0.3803, "step": 14912 }, { "epoch": 1.5161651077673852, "grad_norm": 0.3043307065963745, "learning_rate": 8.79370858669352e-06, "loss": 0.3466, "step": 14913 }, { "epoch": 1.5162667751118342, "grad_norm": 0.2822422683238983, "learning_rate": 8.793477407393947e-06, "loss": 0.3607, "step": 14914 }, { "epoch": 1.516368442456283, "grad_norm": 0.3080165982246399, "learning_rate": 8.793246208983733e-06, "loss": 0.3451, "step": 14915 }, { "epoch": 1.516470109800732, "grad_norm": 0.3009423613548279, "learning_rate": 8.793014991464047e-06, "loss": 0.3667, "step": 14916 }, { "epoch": 1.516571777145181, "grad_norm": 0.29769548773765564, "learning_rate": 8.792783754836052e-06, "loss": 0.3491, "step": 14917 }, { "epoch": 1.51667344448963, "grad_norm": 0.33665308356285095, "learning_rate": 8.792552499100915e-06, "loss": 0.4029, "step": 14918 }, { "epoch": 1.5167751118340789, "grad_norm": 0.2927989959716797, "learning_rate": 8.792321224259799e-06, "loss": 0.3835, "step": 14919 }, { "epoch": 1.5168767791785278, "grad_norm": 0.2785729169845581, "learning_rate": 8.79208993031387e-06, "loss": 0.3592, "step": 14920 }, { "epoch": 1.5169784465229768, "grad_norm": 0.2838744521141052, "learning_rate": 8.791858617264294e-06, "loss": 0.3589, "step": 14921 }, { "epoch": 1.5170801138674257, "grad_norm": 0.29696011543273926, "learning_rate": 8.791627285112235e-06, "loss": 0.351, "step": 14922 }, { "epoch": 1.5171817812118746, "grad_norm": 0.309060662984848, "learning_rate": 8.791395933858857e-06, "loss": 0.3951, "step": 14923 }, { "epoch": 1.5172834485563236, "grad_norm": 0.2717094421386719, "learning_rate": 8.791164563505329e-06, "loss": 0.3582, "step": 14924 }, { "epoch": 1.5173851159007725, "grad_norm": 0.2999853193759918, "learning_rate": 8.790933174052815e-06, "loss": 0.3374, "step": 14925 }, { "epoch": 1.5174867832452217, "grad_norm": 0.30571746826171875, "learning_rate": 8.79070176550248e-06, "loss": 0.3504, "step": 14926 }, { "epoch": 1.5175884505896706, "grad_norm": 0.29536911845207214, "learning_rate": 8.79047033785549e-06, "loss": 0.3663, "step": 14927 }, { "epoch": 1.5176901179341196, "grad_norm": 0.2874009311199188, "learning_rate": 8.790238891113013e-06, "loss": 0.3415, "step": 14928 }, { "epoch": 1.5177917852785685, "grad_norm": 0.3130776286125183, "learning_rate": 8.79000742527621e-06, "loss": 0.3898, "step": 14929 }, { "epoch": 1.5178934526230174, "grad_norm": 0.28721991181373596, "learning_rate": 8.789775940346252e-06, "loss": 0.3623, "step": 14930 }, { "epoch": 1.5179951199674666, "grad_norm": 0.3161366283893585, "learning_rate": 8.789544436324303e-06, "loss": 0.3494, "step": 14931 }, { "epoch": 1.5180967873119156, "grad_norm": 0.31337183713912964, "learning_rate": 8.78931291321153e-06, "loss": 0.396, "step": 14932 }, { "epoch": 1.5181984546563645, "grad_norm": 0.30736568570137024, "learning_rate": 8.789081371009098e-06, "loss": 0.3771, "step": 14933 }, { "epoch": 1.5183001220008134, "grad_norm": 0.2824355661869049, "learning_rate": 8.788849809718177e-06, "loss": 0.3778, "step": 14934 }, { "epoch": 1.5184017893452624, "grad_norm": 0.280491441488266, "learning_rate": 8.788618229339929e-06, "loss": 0.3769, "step": 14935 }, { "epoch": 1.5185034566897113, "grad_norm": 0.2734745144844055, "learning_rate": 8.788386629875523e-06, "loss": 0.3702, "step": 14936 }, { "epoch": 1.5186051240341603, "grad_norm": 0.2872796952724457, "learning_rate": 8.788155011326125e-06, "loss": 0.4014, "step": 14937 }, { "epoch": 1.5187067913786092, "grad_norm": 0.2977043390274048, "learning_rate": 8.787923373692904e-06, "loss": 0.3713, "step": 14938 }, { "epoch": 1.5188084587230581, "grad_norm": 0.27196240425109863, "learning_rate": 8.787691716977023e-06, "loss": 0.3454, "step": 14939 }, { "epoch": 1.518910126067507, "grad_norm": 0.27970704436302185, "learning_rate": 8.787460041179653e-06, "loss": 0.345, "step": 14940 }, { "epoch": 1.519011793411956, "grad_norm": 0.2893238365650177, "learning_rate": 8.787228346301958e-06, "loss": 0.3772, "step": 14941 }, { "epoch": 1.519113460756405, "grad_norm": 0.3047765791416168, "learning_rate": 8.786996632345108e-06, "loss": 0.3448, "step": 14942 }, { "epoch": 1.519215128100854, "grad_norm": 0.27629342675209045, "learning_rate": 8.786764899310267e-06, "loss": 0.3464, "step": 14943 }, { "epoch": 1.5193167954453028, "grad_norm": 0.2914726138114929, "learning_rate": 8.786533147198605e-06, "loss": 0.3302, "step": 14944 }, { "epoch": 1.5194184627897518, "grad_norm": 0.2846358120441437, "learning_rate": 8.78630137601129e-06, "loss": 0.4034, "step": 14945 }, { "epoch": 1.5195201301342007, "grad_norm": 0.2708739936351776, "learning_rate": 8.786069585749488e-06, "loss": 0.3293, "step": 14946 }, { "epoch": 1.51962179747865, "grad_norm": 0.2674441337585449, "learning_rate": 8.785837776414365e-06, "loss": 0.355, "step": 14947 }, { "epoch": 1.5197234648230988, "grad_norm": 0.2654687166213989, "learning_rate": 8.785605948007094e-06, "loss": 0.3691, "step": 14948 }, { "epoch": 1.5198251321675478, "grad_norm": 0.29193413257598877, "learning_rate": 8.785374100528838e-06, "loss": 0.3658, "step": 14949 }, { "epoch": 1.5199267995119967, "grad_norm": 0.28874048590660095, "learning_rate": 8.785142233980768e-06, "loss": 0.3933, "step": 14950 }, { "epoch": 1.5200284668564457, "grad_norm": 0.2693561613559723, "learning_rate": 8.784910348364052e-06, "loss": 0.3856, "step": 14951 }, { "epoch": 1.5201301342008948, "grad_norm": 0.2850222885608673, "learning_rate": 8.784678443679853e-06, "loss": 0.3618, "step": 14952 }, { "epoch": 1.5202318015453438, "grad_norm": 0.26963865756988525, "learning_rate": 8.784446519929346e-06, "loss": 0.3776, "step": 14953 }, { "epoch": 1.5203334688897927, "grad_norm": 0.3104250729084015, "learning_rate": 8.784214577113696e-06, "loss": 0.3863, "step": 14954 }, { "epoch": 1.5204351362342416, "grad_norm": 0.2910346984863281, "learning_rate": 8.783982615234072e-06, "loss": 0.3637, "step": 14955 }, { "epoch": 1.5205368035786906, "grad_norm": 0.2776282727718353, "learning_rate": 8.783750634291644e-06, "loss": 0.3712, "step": 14956 }, { "epoch": 1.5206384709231395, "grad_norm": 0.26761361956596375, "learning_rate": 8.78351863428758e-06, "loss": 0.351, "step": 14957 }, { "epoch": 1.5207401382675885, "grad_norm": 0.25840267539024353, "learning_rate": 8.783286615223046e-06, "loss": 0.3532, "step": 14958 }, { "epoch": 1.5208418056120374, "grad_norm": 0.29453372955322266, "learning_rate": 8.783054577099215e-06, "loss": 0.3743, "step": 14959 }, { "epoch": 1.5209434729564864, "grad_norm": 0.28416892886161804, "learning_rate": 8.782822519917253e-06, "loss": 0.398, "step": 14960 }, { "epoch": 1.5210451403009353, "grad_norm": 0.29561924934387207, "learning_rate": 8.782590443678332e-06, "loss": 0.374, "step": 14961 }, { "epoch": 1.5211468076453842, "grad_norm": 0.2648257613182068, "learning_rate": 8.782358348383615e-06, "loss": 0.3493, "step": 14962 }, { "epoch": 1.5212484749898332, "grad_norm": 0.3079971969127655, "learning_rate": 8.78212623403428e-06, "loss": 0.3812, "step": 14963 }, { "epoch": 1.5213501423342821, "grad_norm": 0.269199937582016, "learning_rate": 8.78189410063149e-06, "loss": 0.3571, "step": 14964 }, { "epoch": 1.521451809678731, "grad_norm": 0.2809048295021057, "learning_rate": 8.781661948176416e-06, "loss": 0.3408, "step": 14965 }, { "epoch": 1.52155347702318, "grad_norm": 0.27670374512672424, "learning_rate": 8.781429776670227e-06, "loss": 0.3247, "step": 14966 }, { "epoch": 1.5216551443676292, "grad_norm": 0.27261778712272644, "learning_rate": 8.781197586114094e-06, "loss": 0.3354, "step": 14967 }, { "epoch": 1.521756811712078, "grad_norm": 0.27093544602394104, "learning_rate": 8.780965376509188e-06, "loss": 0.3538, "step": 14968 }, { "epoch": 1.521858479056527, "grad_norm": 0.2910587787628174, "learning_rate": 8.780733147856674e-06, "loss": 0.3759, "step": 14969 }, { "epoch": 1.521960146400976, "grad_norm": 0.2655007541179657, "learning_rate": 8.780500900157727e-06, "loss": 0.3595, "step": 14970 }, { "epoch": 1.522061813745425, "grad_norm": 0.28379130363464355, "learning_rate": 8.780268633413514e-06, "loss": 0.3211, "step": 14971 }, { "epoch": 1.522163481089874, "grad_norm": 0.29358890652656555, "learning_rate": 8.780036347625206e-06, "loss": 0.3888, "step": 14972 }, { "epoch": 1.522265148434323, "grad_norm": 0.2974455952644348, "learning_rate": 8.779804042793973e-06, "loss": 0.3507, "step": 14973 }, { "epoch": 1.522366815778772, "grad_norm": 0.2930710017681122, "learning_rate": 8.779571718920985e-06, "loss": 0.3388, "step": 14974 }, { "epoch": 1.522468483123221, "grad_norm": 0.29633548855781555, "learning_rate": 8.779339376007415e-06, "loss": 0.3641, "step": 14975 }, { "epoch": 1.5225701504676699, "grad_norm": 0.2821680009365082, "learning_rate": 8.77910701405443e-06, "loss": 0.376, "step": 14976 }, { "epoch": 1.5226718178121188, "grad_norm": 0.28188470005989075, "learning_rate": 8.7788746330632e-06, "loss": 0.347, "step": 14977 }, { "epoch": 1.5227734851565677, "grad_norm": 0.278477281332016, "learning_rate": 8.778642233034899e-06, "loss": 0.3195, "step": 14978 }, { "epoch": 1.5228751525010167, "grad_norm": 0.29255735874176025, "learning_rate": 8.778409813970697e-06, "loss": 0.3588, "step": 14979 }, { "epoch": 1.5229768198454656, "grad_norm": 0.2866198718547821, "learning_rate": 8.778177375871764e-06, "loss": 0.3455, "step": 14980 }, { "epoch": 1.5230784871899146, "grad_norm": 0.28311267495155334, "learning_rate": 8.77794491873927e-06, "loss": 0.3437, "step": 14981 }, { "epoch": 1.5231801545343635, "grad_norm": 0.28804880380630493, "learning_rate": 8.777712442574388e-06, "loss": 0.3634, "step": 14982 }, { "epoch": 1.5232818218788124, "grad_norm": 0.28700047731399536, "learning_rate": 8.77747994737829e-06, "loss": 0.3722, "step": 14983 }, { "epoch": 1.5233834892232614, "grad_norm": 0.27284520864486694, "learning_rate": 8.777247433152146e-06, "loss": 0.3761, "step": 14984 }, { "epoch": 1.5234851565677103, "grad_norm": 0.336963415145874, "learning_rate": 8.777014899897123e-06, "loss": 0.4212, "step": 14985 }, { "epoch": 1.5235868239121593, "grad_norm": 0.27408841252326965, "learning_rate": 8.7767823476144e-06, "loss": 0.3145, "step": 14986 }, { "epoch": 1.5236884912566082, "grad_norm": 0.3116142749786377, "learning_rate": 8.776549776305143e-06, "loss": 0.3767, "step": 14987 }, { "epoch": 1.5237901586010574, "grad_norm": 0.2960856258869171, "learning_rate": 8.776317185970528e-06, "loss": 0.3655, "step": 14988 }, { "epoch": 1.5238918259455063, "grad_norm": 0.30978766083717346, "learning_rate": 8.776084576611723e-06, "loss": 0.3659, "step": 14989 }, { "epoch": 1.5239934932899553, "grad_norm": 0.3309750556945801, "learning_rate": 8.7758519482299e-06, "loss": 0.3814, "step": 14990 }, { "epoch": 1.5240951606344042, "grad_norm": 0.28616636991500854, "learning_rate": 8.775619300826234e-06, "loss": 0.3477, "step": 14991 }, { "epoch": 1.5241968279788531, "grad_norm": 0.2940927743911743, "learning_rate": 8.775386634401896e-06, "loss": 0.3766, "step": 14992 }, { "epoch": 1.5242984953233023, "grad_norm": 0.33862221240997314, "learning_rate": 8.775153948958055e-06, "loss": 0.3508, "step": 14993 }, { "epoch": 1.5244001626677512, "grad_norm": 0.2820408344268799, "learning_rate": 8.774921244495886e-06, "loss": 0.3478, "step": 14994 }, { "epoch": 1.5245018300122002, "grad_norm": 0.30005085468292236, "learning_rate": 8.774688521016562e-06, "loss": 0.3955, "step": 14995 }, { "epoch": 1.5246034973566491, "grad_norm": 0.2925220727920532, "learning_rate": 8.774455778521254e-06, "loss": 0.3512, "step": 14996 }, { "epoch": 1.524705164701098, "grad_norm": 0.294941246509552, "learning_rate": 8.774223017011135e-06, "loss": 0.3679, "step": 14997 }, { "epoch": 1.524806832045547, "grad_norm": 0.28995195031166077, "learning_rate": 8.773990236487376e-06, "loss": 0.3685, "step": 14998 }, { "epoch": 1.524908499389996, "grad_norm": 0.30578985810279846, "learning_rate": 8.773757436951151e-06, "loss": 0.3918, "step": 14999 }, { "epoch": 1.525010166734445, "grad_norm": 0.2957094609737396, "learning_rate": 8.773524618403634e-06, "loss": 0.3422, "step": 15000 }, { "epoch": 1.5251118340788938, "grad_norm": 0.2973470389842987, "learning_rate": 8.773291780845996e-06, "loss": 0.3835, "step": 15001 }, { "epoch": 1.5252135014233428, "grad_norm": 0.2978467345237732, "learning_rate": 8.773058924279412e-06, "loss": 0.368, "step": 15002 }, { "epoch": 1.5253151687677917, "grad_norm": 0.2685413360595703, "learning_rate": 8.772826048705051e-06, "loss": 0.3792, "step": 15003 }, { "epoch": 1.5254168361122407, "grad_norm": 0.28189894556999207, "learning_rate": 8.772593154124091e-06, "loss": 0.3619, "step": 15004 }, { "epoch": 1.5255185034566896, "grad_norm": 0.2900867164134979, "learning_rate": 8.772360240537702e-06, "loss": 0.3503, "step": 15005 }, { "epoch": 1.5256201708011385, "grad_norm": 0.2989204525947571, "learning_rate": 8.77212730794706e-06, "loss": 0.4103, "step": 15006 }, { "epoch": 1.5257218381455875, "grad_norm": 0.27872586250305176, "learning_rate": 8.771894356353337e-06, "loss": 0.3708, "step": 15007 }, { "epoch": 1.5258235054900366, "grad_norm": 0.3055365979671478, "learning_rate": 8.771661385757706e-06, "loss": 0.3979, "step": 15008 }, { "epoch": 1.5259251728344856, "grad_norm": 0.26752886176109314, "learning_rate": 8.771428396161342e-06, "loss": 0.3967, "step": 15009 }, { "epoch": 1.5260268401789345, "grad_norm": 0.3089219331741333, "learning_rate": 8.771195387565417e-06, "loss": 0.3628, "step": 15010 }, { "epoch": 1.5261285075233835, "grad_norm": 0.2929828464984894, "learning_rate": 8.770962359971105e-06, "loss": 0.369, "step": 15011 }, { "epoch": 1.5262301748678324, "grad_norm": 0.29417166113853455, "learning_rate": 8.770729313379583e-06, "loss": 0.3681, "step": 15012 }, { "epoch": 1.5263318422122816, "grad_norm": 0.2973385751247406, "learning_rate": 8.77049624779202e-06, "loss": 0.3553, "step": 15013 }, { "epoch": 1.5264335095567305, "grad_norm": 0.3093537390232086, "learning_rate": 8.770263163209597e-06, "loss": 0.3553, "step": 15014 }, { "epoch": 1.5265351769011795, "grad_norm": 0.3049248456954956, "learning_rate": 8.770030059633479e-06, "loss": 0.3739, "step": 15015 }, { "epoch": 1.5266368442456284, "grad_norm": 0.28617751598358154, "learning_rate": 8.769796937064848e-06, "loss": 0.3914, "step": 15016 }, { "epoch": 1.5267385115900773, "grad_norm": 0.2669980227947235, "learning_rate": 8.769563795504876e-06, "loss": 0.3577, "step": 15017 }, { "epoch": 1.5268401789345263, "grad_norm": 0.2727604806423187, "learning_rate": 8.769330634954738e-06, "loss": 0.3569, "step": 15018 }, { "epoch": 1.5269418462789752, "grad_norm": 0.3114587366580963, "learning_rate": 8.769097455415608e-06, "loss": 0.3746, "step": 15019 }, { "epoch": 1.5270435136234242, "grad_norm": 0.2974366247653961, "learning_rate": 8.768864256888659e-06, "loss": 0.3875, "step": 15020 }, { "epoch": 1.527145180967873, "grad_norm": 0.3125026226043701, "learning_rate": 8.768631039375068e-06, "loss": 0.3722, "step": 15021 }, { "epoch": 1.527246848312322, "grad_norm": 0.30228373408317566, "learning_rate": 8.768397802876009e-06, "loss": 0.314, "step": 15022 }, { "epoch": 1.527348515656771, "grad_norm": 0.2883380055427551, "learning_rate": 8.768164547392659e-06, "loss": 0.3608, "step": 15023 }, { "epoch": 1.52745018300122, "grad_norm": 0.2794869840145111, "learning_rate": 8.767931272926192e-06, "loss": 0.378, "step": 15024 }, { "epoch": 1.5275518503456689, "grad_norm": 0.2822117209434509, "learning_rate": 8.767697979477781e-06, "loss": 0.3375, "step": 15025 }, { "epoch": 1.5276535176901178, "grad_norm": 0.29122909903526306, "learning_rate": 8.767464667048602e-06, "loss": 0.3267, "step": 15026 }, { "epoch": 1.5277551850345668, "grad_norm": 0.3160117268562317, "learning_rate": 8.767231335639834e-06, "loss": 0.3941, "step": 15027 }, { "epoch": 1.5278568523790157, "grad_norm": 0.28472504019737244, "learning_rate": 8.766997985252647e-06, "loss": 0.3525, "step": 15028 }, { "epoch": 1.5279585197234649, "grad_norm": 0.2960970997810364, "learning_rate": 8.766764615888222e-06, "loss": 0.3715, "step": 15029 }, { "epoch": 1.5280601870679138, "grad_norm": 0.2971268892288208, "learning_rate": 8.76653122754773e-06, "loss": 0.3651, "step": 15030 }, { "epoch": 1.5281618544123627, "grad_norm": 0.3118210434913635, "learning_rate": 8.766297820232348e-06, "loss": 0.3529, "step": 15031 }, { "epoch": 1.5282635217568117, "grad_norm": 0.29328006505966187, "learning_rate": 8.766064393943254e-06, "loss": 0.3677, "step": 15032 }, { "epoch": 1.5283651891012606, "grad_norm": 0.32317090034484863, "learning_rate": 8.765830948681623e-06, "loss": 0.3415, "step": 15033 }, { "epoch": 1.5284668564457098, "grad_norm": 0.2922527492046356, "learning_rate": 8.765597484448629e-06, "loss": 0.3837, "step": 15034 }, { "epoch": 1.5285685237901587, "grad_norm": 0.273448646068573, "learning_rate": 8.76536400124545e-06, "loss": 0.3622, "step": 15035 }, { "epoch": 1.5286701911346077, "grad_norm": 0.2747379243373871, "learning_rate": 8.765130499073265e-06, "loss": 0.351, "step": 15036 }, { "epoch": 1.5287718584790566, "grad_norm": 0.2846592664718628, "learning_rate": 8.764896977933243e-06, "loss": 0.3573, "step": 15037 }, { "epoch": 1.5288735258235056, "grad_norm": 0.29044264554977417, "learning_rate": 8.764663437826568e-06, "loss": 0.38, "step": 15038 }, { "epoch": 1.5289751931679545, "grad_norm": 0.29889363050460815, "learning_rate": 8.764429878754412e-06, "loss": 0.3806, "step": 15039 }, { "epoch": 1.5290768605124034, "grad_norm": 0.2853957712650299, "learning_rate": 8.764196300717952e-06, "loss": 0.3742, "step": 15040 }, { "epoch": 1.5291785278568524, "grad_norm": 0.3006371557712555, "learning_rate": 8.763962703718368e-06, "loss": 0.4065, "step": 15041 }, { "epoch": 1.5292801952013013, "grad_norm": 0.27753210067749023, "learning_rate": 8.763729087756831e-06, "loss": 0.3487, "step": 15042 }, { "epoch": 1.5293818625457503, "grad_norm": 0.2938295304775238, "learning_rate": 8.763495452834523e-06, "loss": 0.3619, "step": 15043 }, { "epoch": 1.5294835298901992, "grad_norm": 0.27719923853874207, "learning_rate": 8.763261798952619e-06, "loss": 0.3766, "step": 15044 }, { "epoch": 1.5295851972346481, "grad_norm": 0.307652086019516, "learning_rate": 8.763028126112297e-06, "loss": 0.387, "step": 15045 }, { "epoch": 1.529686864579097, "grad_norm": 0.30329111218452454, "learning_rate": 8.762794434314733e-06, "loss": 0.3734, "step": 15046 }, { "epoch": 1.529788531923546, "grad_norm": 0.2873097062110901, "learning_rate": 8.762560723561104e-06, "loss": 0.3434, "step": 15047 }, { "epoch": 1.529890199267995, "grad_norm": 0.29267629981040955, "learning_rate": 8.76232699385259e-06, "loss": 0.3642, "step": 15048 }, { "epoch": 1.5299918666124441, "grad_norm": 0.30347728729248047, "learning_rate": 8.762093245190366e-06, "loss": 0.3502, "step": 15049 }, { "epoch": 1.530093533956893, "grad_norm": 0.26930975914001465, "learning_rate": 8.76185947757561e-06, "loss": 0.3204, "step": 15050 }, { "epoch": 1.530195201301342, "grad_norm": 0.28634437918663025, "learning_rate": 8.761625691009498e-06, "loss": 0.3845, "step": 15051 }, { "epoch": 1.530296868645791, "grad_norm": 0.28175002336502075, "learning_rate": 8.761391885493213e-06, "loss": 0.344, "step": 15052 }, { "epoch": 1.53039853599024, "grad_norm": 0.3102426528930664, "learning_rate": 8.761158061027928e-06, "loss": 0.3811, "step": 15053 }, { "epoch": 1.530500203334689, "grad_norm": 0.2930986285209656, "learning_rate": 8.760924217614824e-06, "loss": 0.3943, "step": 15054 }, { "epoch": 1.530601870679138, "grad_norm": 0.30474722385406494, "learning_rate": 8.760690355255074e-06, "loss": 0.3286, "step": 15055 }, { "epoch": 1.530703538023587, "grad_norm": 0.25679993629455566, "learning_rate": 8.760456473949862e-06, "loss": 0.3455, "step": 15056 }, { "epoch": 1.5308052053680359, "grad_norm": 0.2712517976760864, "learning_rate": 8.760222573700363e-06, "loss": 0.3658, "step": 15057 }, { "epoch": 1.5309068727124848, "grad_norm": 0.30237990617752075, "learning_rate": 8.759988654507759e-06, "loss": 0.3713, "step": 15058 }, { "epoch": 1.5310085400569338, "grad_norm": 0.2889308035373688, "learning_rate": 8.759754716373224e-06, "loss": 0.3894, "step": 15059 }, { "epoch": 1.5311102074013827, "grad_norm": 0.2571866512298584, "learning_rate": 8.759520759297936e-06, "loss": 0.3539, "step": 15060 }, { "epoch": 1.5312118747458316, "grad_norm": 0.32726767659187317, "learning_rate": 8.759286783283077e-06, "loss": 0.3394, "step": 15061 }, { "epoch": 1.5313135420902806, "grad_norm": 0.2792609930038452, "learning_rate": 8.759052788329824e-06, "loss": 0.3701, "step": 15062 }, { "epoch": 1.5314152094347295, "grad_norm": 0.2789784073829651, "learning_rate": 8.75881877443936e-06, "loss": 0.3509, "step": 15063 }, { "epoch": 1.5315168767791785, "grad_norm": 0.2709193229675293, "learning_rate": 8.758584741612857e-06, "loss": 0.3528, "step": 15064 }, { "epoch": 1.5316185441236274, "grad_norm": 0.27331098914146423, "learning_rate": 8.758350689851499e-06, "loss": 0.3467, "step": 15065 }, { "epoch": 1.5317202114680764, "grad_norm": 0.2677009701728821, "learning_rate": 8.75811661915646e-06, "loss": 0.3306, "step": 15066 }, { "epoch": 1.5318218788125253, "grad_norm": 0.2969713509082794, "learning_rate": 8.757882529528926e-06, "loss": 0.3574, "step": 15067 }, { "epoch": 1.5319235461569742, "grad_norm": 0.2617255449295044, "learning_rate": 8.757648420970071e-06, "loss": 0.3705, "step": 15068 }, { "epoch": 1.5320252135014232, "grad_norm": 0.2678447365760803, "learning_rate": 8.757414293481077e-06, "loss": 0.3543, "step": 15069 }, { "epoch": 1.5321268808458723, "grad_norm": 0.28373610973358154, "learning_rate": 8.757180147063122e-06, "loss": 0.3569, "step": 15070 }, { "epoch": 1.5322285481903213, "grad_norm": 0.27534064650535583, "learning_rate": 8.756945981717387e-06, "loss": 0.3743, "step": 15071 }, { "epoch": 1.5323302155347702, "grad_norm": 0.274443656206131, "learning_rate": 8.75671179744505e-06, "loss": 0.3447, "step": 15072 }, { "epoch": 1.5324318828792192, "grad_norm": 0.2883564233779907, "learning_rate": 8.756477594247293e-06, "loss": 0.3886, "step": 15073 }, { "epoch": 1.532533550223668, "grad_norm": 0.28958243131637573, "learning_rate": 8.756243372125294e-06, "loss": 0.4166, "step": 15074 }, { "epoch": 1.5326352175681173, "grad_norm": 0.2661992609500885, "learning_rate": 8.756009131080234e-06, "loss": 0.3709, "step": 15075 }, { "epoch": 1.5327368849125662, "grad_norm": 0.28066739439964294, "learning_rate": 8.755774871113292e-06, "loss": 0.3585, "step": 15076 }, { "epoch": 1.5328385522570152, "grad_norm": 0.2898123860359192, "learning_rate": 8.755540592225649e-06, "loss": 0.3703, "step": 15077 }, { "epoch": 1.532940219601464, "grad_norm": 0.2972085773944855, "learning_rate": 8.755306294418483e-06, "loss": 0.3531, "step": 15078 }, { "epoch": 1.533041886945913, "grad_norm": 0.2944868206977844, "learning_rate": 8.75507197769298e-06, "loss": 0.3735, "step": 15079 }, { "epoch": 1.533143554290362, "grad_norm": 0.32405921816825867, "learning_rate": 8.754837642050314e-06, "loss": 0.3652, "step": 15080 }, { "epoch": 1.533245221634811, "grad_norm": 0.26779142022132874, "learning_rate": 8.754603287491668e-06, "loss": 0.3731, "step": 15081 }, { "epoch": 1.5333468889792599, "grad_norm": 0.2855853736400604, "learning_rate": 8.754368914018225e-06, "loss": 0.3823, "step": 15082 }, { "epoch": 1.5334485563237088, "grad_norm": 0.2880594730377197, "learning_rate": 8.754134521631162e-06, "loss": 0.3638, "step": 15083 }, { "epoch": 1.5335502236681577, "grad_norm": 0.2714146673679352, "learning_rate": 8.753900110331662e-06, "loss": 0.3594, "step": 15084 }, { "epoch": 1.5336518910126067, "grad_norm": 0.29097554087638855, "learning_rate": 8.753665680120905e-06, "loss": 0.3649, "step": 15085 }, { "epoch": 1.5337535583570556, "grad_norm": 0.28277015686035156, "learning_rate": 8.753431231000072e-06, "loss": 0.3625, "step": 15086 }, { "epoch": 1.5338552257015046, "grad_norm": 0.29126960039138794, "learning_rate": 8.753196762970345e-06, "loss": 0.3397, "step": 15087 }, { "epoch": 1.5339568930459535, "grad_norm": 0.2825663685798645, "learning_rate": 8.752962276032905e-06, "loss": 0.3558, "step": 15088 }, { "epoch": 1.5340585603904024, "grad_norm": 0.2810012400150299, "learning_rate": 8.752727770188933e-06, "loss": 0.3563, "step": 15089 }, { "epoch": 1.5341602277348516, "grad_norm": 0.28800880908966064, "learning_rate": 8.752493245439608e-06, "loss": 0.3746, "step": 15090 }, { "epoch": 1.5342618950793006, "grad_norm": 0.2705845832824707, "learning_rate": 8.752258701786115e-06, "loss": 0.3394, "step": 15091 }, { "epoch": 1.5343635624237495, "grad_norm": 0.3013242185115814, "learning_rate": 8.752024139229636e-06, "loss": 0.3689, "step": 15092 }, { "epoch": 1.5344652297681984, "grad_norm": 0.29980260133743286, "learning_rate": 8.75178955777135e-06, "loss": 0.4034, "step": 15093 }, { "epoch": 1.5345668971126474, "grad_norm": 0.28024834394454956, "learning_rate": 8.75155495741244e-06, "loss": 0.3369, "step": 15094 }, { "epoch": 1.5346685644570965, "grad_norm": 0.2937996983528137, "learning_rate": 8.751320338154086e-06, "loss": 0.3816, "step": 15095 }, { "epoch": 1.5347702318015455, "grad_norm": 0.2809475362300873, "learning_rate": 8.751085699997473e-06, "loss": 0.3795, "step": 15096 }, { "epoch": 1.5348718991459944, "grad_norm": 0.28508731722831726, "learning_rate": 8.750851042943781e-06, "loss": 0.3788, "step": 15097 }, { "epoch": 1.5349735664904434, "grad_norm": 0.3070884048938751, "learning_rate": 8.750616366994193e-06, "loss": 0.3646, "step": 15098 }, { "epoch": 1.5350752338348923, "grad_norm": 0.29631349444389343, "learning_rate": 8.750381672149891e-06, "loss": 0.3539, "step": 15099 }, { "epoch": 1.5351769011793412, "grad_norm": 0.2703409790992737, "learning_rate": 8.750146958412058e-06, "loss": 0.3355, "step": 15100 }, { "epoch": 1.5352785685237902, "grad_norm": 0.2662905752658844, "learning_rate": 8.749912225781875e-06, "loss": 0.3586, "step": 15101 }, { "epoch": 1.5353802358682391, "grad_norm": 0.2785159945487976, "learning_rate": 8.749677474260526e-06, "loss": 0.3337, "step": 15102 }, { "epoch": 1.535481903212688, "grad_norm": 0.29859909415245056, "learning_rate": 8.749442703849193e-06, "loss": 0.3623, "step": 15103 }, { "epoch": 1.535583570557137, "grad_norm": 0.2745892405509949, "learning_rate": 8.749207914549057e-06, "loss": 0.3508, "step": 15104 }, { "epoch": 1.535685237901586, "grad_norm": 0.26774856448173523, "learning_rate": 8.748973106361305e-06, "loss": 0.3918, "step": 15105 }, { "epoch": 1.535786905246035, "grad_norm": 0.28691262006759644, "learning_rate": 8.748738279287116e-06, "loss": 0.3435, "step": 15106 }, { "epoch": 1.5358885725904838, "grad_norm": 0.2794051468372345, "learning_rate": 8.748503433327674e-06, "loss": 0.3648, "step": 15107 }, { "epoch": 1.5359902399349328, "grad_norm": 0.27265551686286926, "learning_rate": 8.748268568484163e-06, "loss": 0.4234, "step": 15108 }, { "epoch": 1.5360919072793817, "grad_norm": 0.2814793288707733, "learning_rate": 8.748033684757766e-06, "loss": 0.3654, "step": 15109 }, { "epoch": 1.5361935746238307, "grad_norm": 0.2681819200515747, "learning_rate": 8.747798782149665e-06, "loss": 0.3705, "step": 15110 }, { "epoch": 1.5362952419682798, "grad_norm": 0.2707519233226776, "learning_rate": 8.747563860661046e-06, "loss": 0.367, "step": 15111 }, { "epoch": 1.5363969093127288, "grad_norm": 0.2715480923652649, "learning_rate": 8.74732892029309e-06, "loss": 0.3476, "step": 15112 }, { "epoch": 1.5364985766571777, "grad_norm": 0.25726571679115295, "learning_rate": 8.747093961046979e-06, "loss": 0.3465, "step": 15113 }, { "epoch": 1.5366002440016266, "grad_norm": 0.274983674287796, "learning_rate": 8.746858982923903e-06, "loss": 0.3457, "step": 15114 }, { "epoch": 1.5367019113460756, "grad_norm": 0.27313077449798584, "learning_rate": 8.74662398592504e-06, "loss": 0.3232, "step": 15115 }, { "epoch": 1.5368035786905248, "grad_norm": 0.2903252840042114, "learning_rate": 8.746388970051576e-06, "loss": 0.3977, "step": 15116 }, { "epoch": 1.5369052460349737, "grad_norm": 0.2785213887691498, "learning_rate": 8.746153935304693e-06, "loss": 0.3377, "step": 15117 }, { "epoch": 1.5370069133794226, "grad_norm": 0.28063178062438965, "learning_rate": 8.745918881685578e-06, "loss": 0.3655, "step": 15118 }, { "epoch": 1.5371085807238716, "grad_norm": 0.27950307726860046, "learning_rate": 8.745683809195415e-06, "loss": 0.3455, "step": 15119 }, { "epoch": 1.5372102480683205, "grad_norm": 0.2629068195819855, "learning_rate": 8.745448717835385e-06, "loss": 0.3489, "step": 15120 }, { "epoch": 1.5373119154127695, "grad_norm": 0.29657191038131714, "learning_rate": 8.745213607606677e-06, "loss": 0.4065, "step": 15121 }, { "epoch": 1.5374135827572184, "grad_norm": 0.2852165699005127, "learning_rate": 8.74497847851047e-06, "loss": 0.3548, "step": 15122 }, { "epoch": 1.5375152501016673, "grad_norm": 0.2564469277858734, "learning_rate": 8.744743330547954e-06, "loss": 0.3296, "step": 15123 }, { "epoch": 1.5376169174461163, "grad_norm": 0.27422595024108887, "learning_rate": 8.744508163720309e-06, "loss": 0.383, "step": 15124 }, { "epoch": 1.5377185847905652, "grad_norm": 0.3009960353374481, "learning_rate": 8.744272978028722e-06, "loss": 0.338, "step": 15125 }, { "epoch": 1.5378202521350142, "grad_norm": 0.28574511408805847, "learning_rate": 8.74403777347438e-06, "loss": 0.3444, "step": 15126 }, { "epoch": 1.537921919479463, "grad_norm": 0.29951733350753784, "learning_rate": 8.74380255005846e-06, "loss": 0.3696, "step": 15127 }, { "epoch": 1.538023586823912, "grad_norm": 0.29704540967941284, "learning_rate": 8.743567307782158e-06, "loss": 0.3763, "step": 15128 }, { "epoch": 1.538125254168361, "grad_norm": 0.2928360104560852, "learning_rate": 8.74333204664665e-06, "loss": 0.3448, "step": 15129 }, { "epoch": 1.53822692151281, "grad_norm": 0.2964533269405365, "learning_rate": 8.743096766653126e-06, "loss": 0.3447, "step": 15130 }, { "epoch": 1.538328588857259, "grad_norm": 0.29790154099464417, "learning_rate": 8.74286146780277e-06, "loss": 0.3778, "step": 15131 }, { "epoch": 1.538430256201708, "grad_norm": 0.2896919846534729, "learning_rate": 8.742626150096767e-06, "loss": 0.3721, "step": 15132 }, { "epoch": 1.538531923546157, "grad_norm": 0.28446921706199646, "learning_rate": 8.742390813536303e-06, "loss": 0.3526, "step": 15133 }, { "epoch": 1.538633590890606, "grad_norm": 0.2833463251590729, "learning_rate": 8.742155458122563e-06, "loss": 0.412, "step": 15134 }, { "epoch": 1.5387352582350549, "grad_norm": 0.2871229648590088, "learning_rate": 8.741920083856734e-06, "loss": 0.3359, "step": 15135 }, { "epoch": 1.538836925579504, "grad_norm": 0.29536136984825134, "learning_rate": 8.74168469074e-06, "loss": 0.346, "step": 15136 }, { "epoch": 1.538938592923953, "grad_norm": 0.27487677335739136, "learning_rate": 8.741449278773549e-06, "loss": 0.3445, "step": 15137 }, { "epoch": 1.539040260268402, "grad_norm": 0.2613946199417114, "learning_rate": 8.741213847958565e-06, "loss": 0.3543, "step": 15138 }, { "epoch": 1.5391419276128508, "grad_norm": 0.2901468873023987, "learning_rate": 8.740978398296233e-06, "loss": 0.3696, "step": 15139 }, { "epoch": 1.5392435949572998, "grad_norm": 0.28944626450538635, "learning_rate": 8.740742929787745e-06, "loss": 0.3513, "step": 15140 }, { "epoch": 1.5393452623017487, "grad_norm": 0.2706669270992279, "learning_rate": 8.740507442434278e-06, "loss": 0.3285, "step": 15141 }, { "epoch": 1.5394469296461977, "grad_norm": 0.29599887132644653, "learning_rate": 8.740271936237025e-06, "loss": 0.3776, "step": 15142 }, { "epoch": 1.5395485969906466, "grad_norm": 0.28518906235694885, "learning_rate": 8.740036411197172e-06, "loss": 0.3547, "step": 15143 }, { "epoch": 1.5396502643350956, "grad_norm": 0.2653728425502777, "learning_rate": 8.739800867315906e-06, "loss": 0.3736, "step": 15144 }, { "epoch": 1.5397519316795445, "grad_norm": 0.2849714457988739, "learning_rate": 8.73956530459441e-06, "loss": 0.3939, "step": 15145 }, { "epoch": 1.5398535990239934, "grad_norm": 0.27721694111824036, "learning_rate": 8.739329723033872e-06, "loss": 0.3318, "step": 15146 }, { "epoch": 1.5399552663684424, "grad_norm": 0.271984338760376, "learning_rate": 8.73909412263548e-06, "loss": 0.3357, "step": 15147 }, { "epoch": 1.5400569337128913, "grad_norm": 0.2876337766647339, "learning_rate": 8.73885850340042e-06, "loss": 0.3978, "step": 15148 }, { "epoch": 1.5401586010573403, "grad_norm": 0.2714316248893738, "learning_rate": 8.73862286532988e-06, "loss": 0.3682, "step": 15149 }, { "epoch": 1.5402602684017892, "grad_norm": 0.29442158341407776, "learning_rate": 8.738387208425046e-06, "loss": 0.4142, "step": 15150 }, { "epoch": 1.5403619357462381, "grad_norm": 0.275033175945282, "learning_rate": 8.738151532687104e-06, "loss": 0.3501, "step": 15151 }, { "epoch": 1.5404636030906873, "grad_norm": 0.268884539604187, "learning_rate": 8.737915838117245e-06, "loss": 0.356, "step": 15152 }, { "epoch": 1.5405652704351362, "grad_norm": 0.2930662930011749, "learning_rate": 8.737680124716655e-06, "loss": 0.3464, "step": 15153 }, { "epoch": 1.5406669377795852, "grad_norm": 0.2915477752685547, "learning_rate": 8.737444392486518e-06, "loss": 0.3491, "step": 15154 }, { "epoch": 1.5407686051240341, "grad_norm": 0.27332812547683716, "learning_rate": 8.737208641428027e-06, "loss": 0.36, "step": 15155 }, { "epoch": 1.540870272468483, "grad_norm": 0.28579726815223694, "learning_rate": 8.736972871542365e-06, "loss": 0.379, "step": 15156 }, { "epoch": 1.5409719398129322, "grad_norm": 0.285374253988266, "learning_rate": 8.736737082830724e-06, "loss": 0.3304, "step": 15157 }, { "epoch": 1.5410736071573812, "grad_norm": 0.2896430790424347, "learning_rate": 8.736501275294286e-06, "loss": 0.3656, "step": 15158 }, { "epoch": 1.5411752745018301, "grad_norm": 0.2786703109741211, "learning_rate": 8.736265448934246e-06, "loss": 0.3668, "step": 15159 }, { "epoch": 1.541276941846279, "grad_norm": 0.2851411998271942, "learning_rate": 8.736029603751788e-06, "loss": 0.3624, "step": 15160 }, { "epoch": 1.541378609190728, "grad_norm": 0.2952861487865448, "learning_rate": 8.7357937397481e-06, "loss": 0.3857, "step": 15161 }, { "epoch": 1.541480276535177, "grad_norm": 0.2674276828765869, "learning_rate": 8.73555785692437e-06, "loss": 0.3364, "step": 15162 }, { "epoch": 1.5415819438796259, "grad_norm": 0.30939650535583496, "learning_rate": 8.735321955281788e-06, "loss": 0.3915, "step": 15163 }, { "epoch": 1.5416836112240748, "grad_norm": 0.28905975818634033, "learning_rate": 8.73508603482154e-06, "loss": 0.3798, "step": 15164 }, { "epoch": 1.5417852785685238, "grad_norm": 0.279967725276947, "learning_rate": 8.734850095544818e-06, "loss": 0.3506, "step": 15165 }, { "epoch": 1.5418869459129727, "grad_norm": 0.2984767258167267, "learning_rate": 8.73461413745281e-06, "loss": 0.3475, "step": 15166 }, { "epoch": 1.5419886132574216, "grad_norm": 0.2908956706523895, "learning_rate": 8.734378160546701e-06, "loss": 0.3453, "step": 15167 }, { "epoch": 1.5420902806018706, "grad_norm": 0.2728022038936615, "learning_rate": 8.734142164827683e-06, "loss": 0.3604, "step": 15168 }, { "epoch": 1.5421919479463195, "grad_norm": 0.28916603326797485, "learning_rate": 8.733906150296944e-06, "loss": 0.3878, "step": 15169 }, { "epoch": 1.5422936152907685, "grad_norm": 0.2922925353050232, "learning_rate": 8.733670116955675e-06, "loss": 0.3807, "step": 15170 }, { "epoch": 1.5423952826352174, "grad_norm": 0.2699294984340668, "learning_rate": 8.733434064805062e-06, "loss": 0.3659, "step": 15171 }, { "epoch": 1.5424969499796666, "grad_norm": 0.30958715081214905, "learning_rate": 8.733197993846295e-06, "loss": 0.369, "step": 15172 }, { "epoch": 1.5425986173241155, "grad_norm": 0.2988267242908478, "learning_rate": 8.732961904080563e-06, "loss": 0.355, "step": 15173 }, { "epoch": 1.5427002846685645, "grad_norm": 0.3200112283229828, "learning_rate": 8.732725795509057e-06, "loss": 0.3427, "step": 15174 }, { "epoch": 1.5428019520130134, "grad_norm": 0.2936219871044159, "learning_rate": 8.732489668132965e-06, "loss": 0.3693, "step": 15175 }, { "epoch": 1.5429036193574623, "grad_norm": 0.293525755405426, "learning_rate": 8.732253521953478e-06, "loss": 0.3597, "step": 15176 }, { "epoch": 1.5430052867019115, "grad_norm": 0.289750337600708, "learning_rate": 8.732017356971783e-06, "loss": 0.3557, "step": 15177 }, { "epoch": 1.5431069540463604, "grad_norm": 0.2803717851638794, "learning_rate": 8.731781173189074e-06, "loss": 0.3646, "step": 15178 }, { "epoch": 1.5432086213908094, "grad_norm": 0.2737821936607361, "learning_rate": 8.731544970606534e-06, "loss": 0.375, "step": 15179 }, { "epoch": 1.5433102887352583, "grad_norm": 0.3089364767074585, "learning_rate": 8.73130874922536e-06, "loss": 0.346, "step": 15180 }, { "epoch": 1.5434119560797073, "grad_norm": 0.3360886573791504, "learning_rate": 8.73107250904674e-06, "loss": 0.3594, "step": 15181 }, { "epoch": 1.5435136234241562, "grad_norm": 0.30072495341300964, "learning_rate": 8.730836250071863e-06, "loss": 0.3701, "step": 15182 }, { "epoch": 1.5436152907686052, "grad_norm": 0.3066934049129486, "learning_rate": 8.730599972301918e-06, "loss": 0.3506, "step": 15183 }, { "epoch": 1.543716958113054, "grad_norm": 0.2865592837333679, "learning_rate": 8.730363675738099e-06, "loss": 0.3411, "step": 15184 }, { "epoch": 1.543818625457503, "grad_norm": 0.3006577789783478, "learning_rate": 8.730127360381592e-06, "loss": 0.3821, "step": 15185 }, { "epoch": 1.543920292801952, "grad_norm": 0.2971705496311188, "learning_rate": 8.72989102623359e-06, "loss": 0.3712, "step": 15186 }, { "epoch": 1.544021960146401, "grad_norm": 0.29645657539367676, "learning_rate": 8.729654673295285e-06, "loss": 0.3713, "step": 15187 }, { "epoch": 1.5441236274908499, "grad_norm": 0.28919342160224915, "learning_rate": 8.729418301567865e-06, "loss": 0.3434, "step": 15188 }, { "epoch": 1.5442252948352988, "grad_norm": 0.33934035897254944, "learning_rate": 8.729181911052521e-06, "loss": 0.3991, "step": 15189 }, { "epoch": 1.5443269621797477, "grad_norm": 0.29725125432014465, "learning_rate": 8.728945501750446e-06, "loss": 0.3761, "step": 15190 }, { "epoch": 1.5444286295241967, "grad_norm": 0.2886565029621124, "learning_rate": 8.728709073662831e-06, "loss": 0.3285, "step": 15191 }, { "epoch": 1.5445302968686456, "grad_norm": 0.343161016702652, "learning_rate": 8.728472626790864e-06, "loss": 0.3599, "step": 15192 }, { "epoch": 1.5446319642130948, "grad_norm": 0.2983802855014801, "learning_rate": 8.728236161135739e-06, "loss": 0.3743, "step": 15193 }, { "epoch": 1.5447336315575437, "grad_norm": 0.280004620552063, "learning_rate": 8.727999676698644e-06, "loss": 0.3401, "step": 15194 }, { "epoch": 1.5448352989019927, "grad_norm": 0.3198195695877075, "learning_rate": 8.727763173480774e-06, "loss": 0.3846, "step": 15195 }, { "epoch": 1.5449369662464416, "grad_norm": 0.31455197930336, "learning_rate": 8.72752665148332e-06, "loss": 0.3654, "step": 15196 }, { "epoch": 1.5450386335908906, "grad_norm": 0.2632719576358795, "learning_rate": 8.72729011070747e-06, "loss": 0.3626, "step": 15197 }, { "epoch": 1.5451403009353397, "grad_norm": 0.33627715706825256, "learning_rate": 8.727053551154421e-06, "loss": 0.3874, "step": 15198 }, { "epoch": 1.5452419682797887, "grad_norm": 0.31315866112709045, "learning_rate": 8.72681697282536e-06, "loss": 0.3903, "step": 15199 }, { "epoch": 1.5453436356242376, "grad_norm": 0.27677783370018005, "learning_rate": 8.72658037572148e-06, "loss": 0.3638, "step": 15200 }, { "epoch": 1.5454453029686865, "grad_norm": 0.29876604676246643, "learning_rate": 8.726343759843976e-06, "loss": 0.3564, "step": 15201 }, { "epoch": 1.5455469703131355, "grad_norm": 0.31727442145347595, "learning_rate": 8.726107125194036e-06, "loss": 0.372, "step": 15202 }, { "epoch": 1.5456486376575844, "grad_norm": 0.29617831110954285, "learning_rate": 8.725870471772856e-06, "loss": 0.3606, "step": 15203 }, { "epoch": 1.5457503050020334, "grad_norm": 0.2941001355648041, "learning_rate": 8.725633799581621e-06, "loss": 0.3673, "step": 15204 }, { "epoch": 1.5458519723464823, "grad_norm": 0.27929213643074036, "learning_rate": 8.725397108621533e-06, "loss": 0.348, "step": 15205 }, { "epoch": 1.5459536396909312, "grad_norm": 0.3263901174068451, "learning_rate": 8.725160398893778e-06, "loss": 0.3751, "step": 15206 }, { "epoch": 1.5460553070353802, "grad_norm": 0.29593876004219055, "learning_rate": 8.72492367039955e-06, "loss": 0.3638, "step": 15207 }, { "epoch": 1.5461569743798291, "grad_norm": 0.2825709879398346, "learning_rate": 8.724686923140041e-06, "loss": 0.3389, "step": 15208 }, { "epoch": 1.546258641724278, "grad_norm": 0.2862740457057953, "learning_rate": 8.724450157116447e-06, "loss": 0.3504, "step": 15209 }, { "epoch": 1.546360309068727, "grad_norm": 0.2863934338092804, "learning_rate": 8.724213372329957e-06, "loss": 0.3873, "step": 15210 }, { "epoch": 1.546461976413176, "grad_norm": 0.30150356888771057, "learning_rate": 8.723976568781764e-06, "loss": 0.3346, "step": 15211 }, { "epoch": 1.546563643757625, "grad_norm": 0.29759982228279114, "learning_rate": 8.723739746473063e-06, "loss": 0.381, "step": 15212 }, { "epoch": 1.546665311102074, "grad_norm": 0.2640208899974823, "learning_rate": 8.723502905405046e-06, "loss": 0.3701, "step": 15213 }, { "epoch": 1.546766978446523, "grad_norm": 0.2871916890144348, "learning_rate": 8.723266045578904e-06, "loss": 0.3315, "step": 15214 }, { "epoch": 1.546868645790972, "grad_norm": 0.28876590728759766, "learning_rate": 8.723029166995836e-06, "loss": 0.3924, "step": 15215 }, { "epoch": 1.5469703131354209, "grad_norm": 0.2829875349998474, "learning_rate": 8.72279226965703e-06, "loss": 0.3761, "step": 15216 }, { "epoch": 1.5470719804798698, "grad_norm": 0.26071324944496155, "learning_rate": 8.72255535356368e-06, "loss": 0.3856, "step": 15217 }, { "epoch": 1.547173647824319, "grad_norm": 0.28443291783332825, "learning_rate": 8.722318418716984e-06, "loss": 0.4121, "step": 15218 }, { "epoch": 1.547275315168768, "grad_norm": 0.29259219765663147, "learning_rate": 8.722081465118128e-06, "loss": 0.354, "step": 15219 }, { "epoch": 1.5473769825132169, "grad_norm": 0.2869221270084381, "learning_rate": 8.721844492768314e-06, "loss": 0.3695, "step": 15220 }, { "epoch": 1.5474786498576658, "grad_norm": 0.2995619475841522, "learning_rate": 8.72160750166873e-06, "loss": 0.3321, "step": 15221 }, { "epoch": 1.5475803172021148, "grad_norm": 0.28425660729408264, "learning_rate": 8.721370491820572e-06, "loss": 0.3613, "step": 15222 }, { "epoch": 1.5476819845465637, "grad_norm": 0.2835533916950226, "learning_rate": 8.721133463225035e-06, "loss": 0.3317, "step": 15223 }, { "epoch": 1.5477836518910126, "grad_norm": 0.28214704990386963, "learning_rate": 8.720896415883311e-06, "loss": 0.359, "step": 15224 }, { "epoch": 1.5478853192354616, "grad_norm": 0.3014999032020569, "learning_rate": 8.720659349796595e-06, "loss": 0.3757, "step": 15225 }, { "epoch": 1.5479869865799105, "grad_norm": 0.2619799077510834, "learning_rate": 8.72042226496608e-06, "loss": 0.3227, "step": 15226 }, { "epoch": 1.5480886539243595, "grad_norm": 0.26191630959510803, "learning_rate": 8.720185161392964e-06, "loss": 0.3463, "step": 15227 }, { "epoch": 1.5481903212688084, "grad_norm": 0.29410991072654724, "learning_rate": 8.719948039078438e-06, "loss": 0.3388, "step": 15228 }, { "epoch": 1.5482919886132573, "grad_norm": 0.29645052552223206, "learning_rate": 8.719710898023698e-06, "loss": 0.3767, "step": 15229 }, { "epoch": 1.5483936559577063, "grad_norm": 0.287393718957901, "learning_rate": 8.719473738229938e-06, "loss": 0.3276, "step": 15230 }, { "epoch": 1.5484953233021552, "grad_norm": 0.28595152497291565, "learning_rate": 8.719236559698355e-06, "loss": 0.3954, "step": 15231 }, { "epoch": 1.5485969906466042, "grad_norm": 0.2869609594345093, "learning_rate": 8.71899936243014e-06, "loss": 0.3607, "step": 15232 }, { "epoch": 1.5486986579910533, "grad_norm": 0.25336775183677673, "learning_rate": 8.71876214642649e-06, "loss": 0.3492, "step": 15233 }, { "epoch": 1.5488003253355023, "grad_norm": 0.2884986400604248, "learning_rate": 8.718524911688602e-06, "loss": 0.3417, "step": 15234 }, { "epoch": 1.5489019926799512, "grad_norm": 0.2869276702404022, "learning_rate": 8.718287658217667e-06, "loss": 0.3495, "step": 15235 }, { "epoch": 1.5490036600244002, "grad_norm": 0.2877179682254791, "learning_rate": 8.718050386014883e-06, "loss": 0.394, "step": 15236 }, { "epoch": 1.549105327368849, "grad_norm": 0.3096331059932709, "learning_rate": 8.717813095081444e-06, "loss": 0.372, "step": 15237 }, { "epoch": 1.549206994713298, "grad_norm": 0.30536454916000366, "learning_rate": 8.717575785418549e-06, "loss": 0.3552, "step": 15238 }, { "epoch": 1.5493086620577472, "grad_norm": 0.281625360250473, "learning_rate": 8.717338457027388e-06, "loss": 0.3765, "step": 15239 }, { "epoch": 1.5494103294021961, "grad_norm": 0.3339979946613312, "learning_rate": 8.71710110990916e-06, "loss": 0.3845, "step": 15240 }, { "epoch": 1.549511996746645, "grad_norm": 0.28406473994255066, "learning_rate": 8.716863744065059e-06, "loss": 0.3579, "step": 15241 }, { "epoch": 1.549613664091094, "grad_norm": 0.26962366700172424, "learning_rate": 8.716626359496284e-06, "loss": 0.34, "step": 15242 }, { "epoch": 1.549715331435543, "grad_norm": 0.2977949380874634, "learning_rate": 8.716388956204026e-06, "loss": 0.3659, "step": 15243 }, { "epoch": 1.549816998779992, "grad_norm": 0.28971654176712036, "learning_rate": 8.716151534189486e-06, "loss": 0.3259, "step": 15244 }, { "epoch": 1.5499186661244408, "grad_norm": 0.29397210478782654, "learning_rate": 8.715914093453856e-06, "loss": 0.3634, "step": 15245 }, { "epoch": 1.5500203334688898, "grad_norm": 0.28753042221069336, "learning_rate": 8.715676633998333e-06, "loss": 0.3209, "step": 15246 }, { "epoch": 1.5501220008133387, "grad_norm": 0.27895238995552063, "learning_rate": 8.715439155824116e-06, "loss": 0.3705, "step": 15247 }, { "epoch": 1.5502236681577877, "grad_norm": 0.307963103055954, "learning_rate": 8.715201658932399e-06, "loss": 0.3831, "step": 15248 }, { "epoch": 1.5503253355022366, "grad_norm": 0.27590957283973694, "learning_rate": 8.714964143324379e-06, "loss": 0.3432, "step": 15249 }, { "epoch": 1.5504270028466856, "grad_norm": 0.30155590176582336, "learning_rate": 8.714726609001252e-06, "loss": 0.3948, "step": 15250 }, { "epoch": 1.5505286701911345, "grad_norm": 0.297118604183197, "learning_rate": 8.714489055964215e-06, "loss": 0.3949, "step": 15251 }, { "epoch": 1.5506303375355834, "grad_norm": 0.28668248653411865, "learning_rate": 8.714251484214465e-06, "loss": 0.3703, "step": 15252 }, { "epoch": 1.5507320048800324, "grad_norm": 0.29411327838897705, "learning_rate": 8.7140138937532e-06, "loss": 0.3526, "step": 15253 }, { "epoch": 1.5508336722244815, "grad_norm": 0.2795754671096802, "learning_rate": 8.713776284581614e-06, "loss": 0.3237, "step": 15254 }, { "epoch": 1.5509353395689305, "grad_norm": 0.31487706303596497, "learning_rate": 8.713538656700905e-06, "loss": 0.383, "step": 15255 }, { "epoch": 1.5510370069133794, "grad_norm": 0.30553963780403137, "learning_rate": 8.713301010112272e-06, "loss": 0.3611, "step": 15256 }, { "epoch": 1.5511386742578284, "grad_norm": 0.28247177600860596, "learning_rate": 8.71306334481691e-06, "loss": 0.3621, "step": 15257 }, { "epoch": 1.5512403416022773, "grad_norm": 0.2882896363735199, "learning_rate": 8.712825660816017e-06, "loss": 0.3673, "step": 15258 }, { "epoch": 1.5513420089467265, "grad_norm": 0.2803821861743927, "learning_rate": 8.712587958110793e-06, "loss": 0.3463, "step": 15259 }, { "epoch": 1.5514436762911754, "grad_norm": 0.28339046239852905, "learning_rate": 8.712350236702432e-06, "loss": 0.3505, "step": 15260 }, { "epoch": 1.5515453436356244, "grad_norm": 0.29554858803749084, "learning_rate": 8.71211249659213e-06, "loss": 0.3468, "step": 15261 }, { "epoch": 1.5516470109800733, "grad_norm": 0.2802450358867645, "learning_rate": 8.711874737781092e-06, "loss": 0.3497, "step": 15262 }, { "epoch": 1.5517486783245222, "grad_norm": 0.26950281858444214, "learning_rate": 8.711636960270508e-06, "loss": 0.3528, "step": 15263 }, { "epoch": 1.5518503456689712, "grad_norm": 0.3145018219947815, "learning_rate": 8.711399164061581e-06, "loss": 0.3576, "step": 15264 }, { "epoch": 1.5519520130134201, "grad_norm": 0.2968461215496063, "learning_rate": 8.711161349155504e-06, "loss": 0.3742, "step": 15265 }, { "epoch": 1.552053680357869, "grad_norm": 0.29584214091300964, "learning_rate": 8.71092351555348e-06, "loss": 0.346, "step": 15266 }, { "epoch": 1.552155347702318, "grad_norm": 0.2809993624687195, "learning_rate": 8.710685663256707e-06, "loss": 0.353, "step": 15267 }, { "epoch": 1.552257015046767, "grad_norm": 0.2625660300254822, "learning_rate": 8.710447792266378e-06, "loss": 0.3543, "step": 15268 }, { "epoch": 1.5523586823912159, "grad_norm": 0.28701382875442505, "learning_rate": 8.710209902583697e-06, "loss": 0.3699, "step": 15269 }, { "epoch": 1.5524603497356648, "grad_norm": 0.27893275022506714, "learning_rate": 8.709971994209859e-06, "loss": 0.3326, "step": 15270 }, { "epoch": 1.5525620170801138, "grad_norm": 0.30313628911972046, "learning_rate": 8.709734067146062e-06, "loss": 0.3937, "step": 15271 }, { "epoch": 1.5526636844245627, "grad_norm": 0.29097169637680054, "learning_rate": 8.70949612139351e-06, "loss": 0.3507, "step": 15272 }, { "epoch": 1.5527653517690116, "grad_norm": 0.2798313498497009, "learning_rate": 8.709258156953396e-06, "loss": 0.3433, "step": 15273 }, { "epoch": 1.5528670191134608, "grad_norm": 0.29647064208984375, "learning_rate": 8.709020173826921e-06, "loss": 0.3686, "step": 15274 }, { "epoch": 1.5529686864579098, "grad_norm": 0.28497615456581116, "learning_rate": 8.708782172015284e-06, "loss": 0.3429, "step": 15275 }, { "epoch": 1.5530703538023587, "grad_norm": 0.3012760281562805, "learning_rate": 8.708544151519685e-06, "loss": 0.3677, "step": 15276 }, { "epoch": 1.5531720211468076, "grad_norm": 0.27264469861984253, "learning_rate": 8.70830611234132e-06, "loss": 0.3394, "step": 15277 }, { "epoch": 1.5532736884912566, "grad_norm": 0.2775481939315796, "learning_rate": 8.708068054481391e-06, "loss": 0.3407, "step": 15278 }, { "epoch": 1.5533753558357055, "grad_norm": 0.2938298285007477, "learning_rate": 8.707829977941096e-06, "loss": 0.3532, "step": 15279 }, { "epoch": 1.5534770231801547, "grad_norm": 0.2725779414176941, "learning_rate": 8.707591882721635e-06, "loss": 0.3335, "step": 15280 }, { "epoch": 1.5535786905246036, "grad_norm": 0.2732509970664978, "learning_rate": 8.707353768824207e-06, "loss": 0.3299, "step": 15281 }, { "epoch": 1.5536803578690526, "grad_norm": 0.28095391392707825, "learning_rate": 8.707115636250012e-06, "loss": 0.355, "step": 15282 }, { "epoch": 1.5537820252135015, "grad_norm": 0.27317672967910767, "learning_rate": 8.706877485000249e-06, "loss": 0.3373, "step": 15283 }, { "epoch": 1.5538836925579504, "grad_norm": 0.2835816740989685, "learning_rate": 8.706639315076118e-06, "loss": 0.3588, "step": 15284 }, { "epoch": 1.5539853599023994, "grad_norm": 0.3004370629787445, "learning_rate": 8.706401126478819e-06, "loss": 0.3472, "step": 15285 }, { "epoch": 1.5540870272468483, "grad_norm": 0.2957928776741028, "learning_rate": 8.70616291920955e-06, "loss": 0.3538, "step": 15286 }, { "epoch": 1.5541886945912973, "grad_norm": 0.2791310250759125, "learning_rate": 8.705924693269516e-06, "loss": 0.4008, "step": 15287 }, { "epoch": 1.5542903619357462, "grad_norm": 0.2893129885196686, "learning_rate": 8.705686448659914e-06, "loss": 0.3538, "step": 15288 }, { "epoch": 1.5543920292801952, "grad_norm": 0.2823847830295563, "learning_rate": 8.705448185381944e-06, "loss": 0.3604, "step": 15289 }, { "epoch": 1.554493696624644, "grad_norm": 0.2798917293548584, "learning_rate": 8.705209903436804e-06, "loss": 0.3682, "step": 15290 }, { "epoch": 1.554595363969093, "grad_norm": 0.29835790395736694, "learning_rate": 8.704971602825701e-06, "loss": 0.3635, "step": 15291 }, { "epoch": 1.554697031313542, "grad_norm": 0.29521867632865906, "learning_rate": 8.704733283549827e-06, "loss": 0.3576, "step": 15292 }, { "epoch": 1.554798698657991, "grad_norm": 0.2842954397201538, "learning_rate": 8.70449494561039e-06, "loss": 0.3825, "step": 15293 }, { "epoch": 1.5549003660024399, "grad_norm": 0.3007007837295532, "learning_rate": 8.704256589008587e-06, "loss": 0.386, "step": 15294 }, { "epoch": 1.555002033346889, "grad_norm": 0.27238091826438904, "learning_rate": 8.70401821374562e-06, "loss": 0.3437, "step": 15295 }, { "epoch": 1.555103700691338, "grad_norm": 0.28072604537010193, "learning_rate": 8.70377981982269e-06, "loss": 0.3442, "step": 15296 }, { "epoch": 1.555205368035787, "grad_norm": 0.3013227880001068, "learning_rate": 8.703541407240996e-06, "loss": 0.3491, "step": 15297 }, { "epoch": 1.5553070353802358, "grad_norm": 0.30054062604904175, "learning_rate": 8.703302976001742e-06, "loss": 0.3653, "step": 15298 }, { "epoch": 1.5554087027246848, "grad_norm": 0.2865429222583771, "learning_rate": 8.703064526106126e-06, "loss": 0.3619, "step": 15299 }, { "epoch": 1.555510370069134, "grad_norm": 0.29219213128089905, "learning_rate": 8.702826057555352e-06, "loss": 0.3959, "step": 15300 }, { "epoch": 1.555612037413583, "grad_norm": 0.3116801679134369, "learning_rate": 8.70258757035062e-06, "loss": 0.3532, "step": 15301 }, { "epoch": 1.5557137047580318, "grad_norm": 0.2938770055770874, "learning_rate": 8.702349064493131e-06, "loss": 0.3475, "step": 15302 }, { "epoch": 1.5558153721024808, "grad_norm": 0.31991222500801086, "learning_rate": 8.702110539984088e-06, "loss": 0.4023, "step": 15303 }, { "epoch": 1.5559170394469297, "grad_norm": 0.3363201320171356, "learning_rate": 8.701871996824691e-06, "loss": 0.3743, "step": 15304 }, { "epoch": 1.5560187067913787, "grad_norm": 0.306733101606369, "learning_rate": 8.701633435016144e-06, "loss": 0.3786, "step": 15305 }, { "epoch": 1.5561203741358276, "grad_norm": 0.28389972448349, "learning_rate": 8.701394854559646e-06, "loss": 0.3726, "step": 15306 }, { "epoch": 1.5562220414802765, "grad_norm": 0.2751694321632385, "learning_rate": 8.701156255456403e-06, "loss": 0.3679, "step": 15307 }, { "epoch": 1.5563237088247255, "grad_norm": 0.3164496421813965, "learning_rate": 8.70091763770761e-06, "loss": 0.3841, "step": 15308 }, { "epoch": 1.5564253761691744, "grad_norm": 0.30058425664901733, "learning_rate": 8.700679001314475e-06, "loss": 0.3785, "step": 15309 }, { "epoch": 1.5565270435136234, "grad_norm": 0.28035229444503784, "learning_rate": 8.7004403462782e-06, "loss": 0.3741, "step": 15310 }, { "epoch": 1.5566287108580723, "grad_norm": 0.28449925780296326, "learning_rate": 8.700201672599986e-06, "loss": 0.327, "step": 15311 }, { "epoch": 1.5567303782025212, "grad_norm": 0.2943291664123535, "learning_rate": 8.699962980281034e-06, "loss": 0.3718, "step": 15312 }, { "epoch": 1.5568320455469702, "grad_norm": 0.2696123421192169, "learning_rate": 8.69972426932255e-06, "loss": 0.3394, "step": 15313 }, { "epoch": 1.5569337128914191, "grad_norm": 0.26374807953834534, "learning_rate": 8.699485539725732e-06, "loss": 0.3651, "step": 15314 }, { "epoch": 1.5570353802358683, "grad_norm": 0.27318423986434937, "learning_rate": 8.699246791491784e-06, "loss": 0.3366, "step": 15315 }, { "epoch": 1.5571370475803172, "grad_norm": 0.2777385115623474, "learning_rate": 8.699008024621914e-06, "loss": 0.3947, "step": 15316 }, { "epoch": 1.5572387149247662, "grad_norm": 0.28359588980674744, "learning_rate": 8.698769239117317e-06, "loss": 0.3357, "step": 15317 }, { "epoch": 1.5573403822692151, "grad_norm": 0.270283043384552, "learning_rate": 8.6985304349792e-06, "loss": 0.4055, "step": 15318 }, { "epoch": 1.557442049613664, "grad_norm": 0.3085568845272064, "learning_rate": 8.698291612208766e-06, "loss": 0.3627, "step": 15319 }, { "epoch": 1.557543716958113, "grad_norm": 0.2853529155254364, "learning_rate": 8.698052770807217e-06, "loss": 0.3539, "step": 15320 }, { "epoch": 1.5576453843025622, "grad_norm": 0.2572682499885559, "learning_rate": 8.697813910775758e-06, "loss": 0.3332, "step": 15321 }, { "epoch": 1.557747051647011, "grad_norm": 0.26826608180999756, "learning_rate": 8.69757503211559e-06, "loss": 0.3495, "step": 15322 }, { "epoch": 1.55784871899146, "grad_norm": 0.28277942538261414, "learning_rate": 8.69733613482792e-06, "loss": 0.3632, "step": 15323 }, { "epoch": 1.557950386335909, "grad_norm": 0.3116951882839203, "learning_rate": 8.697097218913948e-06, "loss": 0.3937, "step": 15324 }, { "epoch": 1.558052053680358, "grad_norm": 0.27619922161102295, "learning_rate": 8.696858284374878e-06, "loss": 0.3412, "step": 15325 }, { "epoch": 1.5581537210248069, "grad_norm": 0.27496418356895447, "learning_rate": 8.696619331211915e-06, "loss": 0.3298, "step": 15326 }, { "epoch": 1.5582553883692558, "grad_norm": 0.2725966274738312, "learning_rate": 8.696380359426262e-06, "loss": 0.3683, "step": 15327 }, { "epoch": 1.5583570557137048, "grad_norm": 0.28046026825904846, "learning_rate": 8.696141369019123e-06, "loss": 0.3342, "step": 15328 }, { "epoch": 1.5584587230581537, "grad_norm": 0.2709694504737854, "learning_rate": 8.695902359991704e-06, "loss": 0.3819, "step": 15329 }, { "epoch": 1.5585603904026026, "grad_norm": 0.29208824038505554, "learning_rate": 8.695663332345206e-06, "loss": 0.3564, "step": 15330 }, { "epoch": 1.5586620577470516, "grad_norm": 0.28264278173446655, "learning_rate": 8.695424286080833e-06, "loss": 0.3374, "step": 15331 }, { "epoch": 1.5587637250915005, "grad_norm": 0.3070691227912903, "learning_rate": 8.695185221199791e-06, "loss": 0.3647, "step": 15332 }, { "epoch": 1.5588653924359495, "grad_norm": 0.3101685047149658, "learning_rate": 8.694946137703286e-06, "loss": 0.3381, "step": 15333 }, { "epoch": 1.5589670597803984, "grad_norm": 0.2952410876750946, "learning_rate": 8.694707035592518e-06, "loss": 0.3989, "step": 15334 }, { "epoch": 1.5590687271248473, "grad_norm": 0.2856689989566803, "learning_rate": 8.694467914868696e-06, "loss": 0.3349, "step": 15335 }, { "epoch": 1.5591703944692965, "grad_norm": 0.2884664237499237, "learning_rate": 8.69422877553302e-06, "loss": 0.3647, "step": 15336 }, { "epoch": 1.5592720618137454, "grad_norm": 0.2843679189682007, "learning_rate": 8.6939896175867e-06, "loss": 0.3483, "step": 15337 }, { "epoch": 1.5593737291581944, "grad_norm": 0.2852536141872406, "learning_rate": 8.693750441030938e-06, "loss": 0.3605, "step": 15338 }, { "epoch": 1.5594753965026433, "grad_norm": 0.2900118827819824, "learning_rate": 8.693511245866936e-06, "loss": 0.375, "step": 15339 }, { "epoch": 1.5595770638470923, "grad_norm": 0.29040324687957764, "learning_rate": 8.693272032095905e-06, "loss": 0.3768, "step": 15340 }, { "epoch": 1.5596787311915414, "grad_norm": 0.3019695580005646, "learning_rate": 8.693032799719046e-06, "loss": 0.3964, "step": 15341 }, { "epoch": 1.5597803985359904, "grad_norm": 0.27333274483680725, "learning_rate": 8.692793548737566e-06, "loss": 0.3616, "step": 15342 }, { "epoch": 1.5598820658804393, "grad_norm": 0.27407440543174744, "learning_rate": 8.69255427915267e-06, "loss": 0.3798, "step": 15343 }, { "epoch": 1.5599837332248883, "grad_norm": 0.2752275764942169, "learning_rate": 8.692314990965563e-06, "loss": 0.3547, "step": 15344 }, { "epoch": 1.5600854005693372, "grad_norm": 0.2815346419811249, "learning_rate": 8.692075684177449e-06, "loss": 0.3657, "step": 15345 }, { "epoch": 1.5601870679137861, "grad_norm": 0.29868465662002563, "learning_rate": 8.691836358789537e-06, "loss": 0.3783, "step": 15346 }, { "epoch": 1.560288735258235, "grad_norm": 0.270846962928772, "learning_rate": 8.691597014803027e-06, "loss": 0.3365, "step": 15347 }, { "epoch": 1.560390402602684, "grad_norm": 0.2660784423351288, "learning_rate": 8.691357652219132e-06, "loss": 0.3789, "step": 15348 }, { "epoch": 1.560492069947133, "grad_norm": 0.2903192341327667, "learning_rate": 8.691118271039054e-06, "loss": 0.4057, "step": 15349 }, { "epoch": 1.560593737291582, "grad_norm": 0.26242145895957947, "learning_rate": 8.690878871263998e-06, "loss": 0.3806, "step": 15350 }, { "epoch": 1.5606954046360308, "grad_norm": 0.31200066208839417, "learning_rate": 8.690639452895174e-06, "loss": 0.4211, "step": 15351 }, { "epoch": 1.5607970719804798, "grad_norm": 0.29258638620376587, "learning_rate": 8.690400015933781e-06, "loss": 0.3704, "step": 15352 }, { "epoch": 1.5608987393249287, "grad_norm": 0.27872762084007263, "learning_rate": 8.690160560381034e-06, "loss": 0.3776, "step": 15353 }, { "epoch": 1.5610004066693777, "grad_norm": 0.282307505607605, "learning_rate": 8.689921086238134e-06, "loss": 0.3519, "step": 15354 }, { "epoch": 1.5611020740138266, "grad_norm": 0.2782759666442871, "learning_rate": 8.689681593506286e-06, "loss": 0.356, "step": 15355 }, { "epoch": 1.5612037413582758, "grad_norm": 0.28744661808013916, "learning_rate": 8.6894420821867e-06, "loss": 0.3482, "step": 15356 }, { "epoch": 1.5613054087027247, "grad_norm": 0.3203432857990265, "learning_rate": 8.689202552280583e-06, "loss": 0.3533, "step": 15357 }, { "epoch": 1.5614070760471737, "grad_norm": 0.2912544310092926, "learning_rate": 8.68896300378914e-06, "loss": 0.3438, "step": 15358 }, { "epoch": 1.5615087433916226, "grad_norm": 0.27779728174209595, "learning_rate": 8.688723436713577e-06, "loss": 0.3568, "step": 15359 }, { "epoch": 1.5616104107360715, "grad_norm": 0.28581932187080383, "learning_rate": 8.688483851055104e-06, "loss": 0.3282, "step": 15360 }, { "epoch": 1.5617120780805205, "grad_norm": 0.2968074083328247, "learning_rate": 8.688244246814923e-06, "loss": 0.3537, "step": 15361 }, { "epoch": 1.5618137454249696, "grad_norm": 0.30511581897735596, "learning_rate": 8.688004623994246e-06, "loss": 0.3391, "step": 15362 }, { "epoch": 1.5619154127694186, "grad_norm": 0.28844356536865234, "learning_rate": 8.687764982594277e-06, "loss": 0.3785, "step": 15363 }, { "epoch": 1.5620170801138675, "grad_norm": 0.28585660457611084, "learning_rate": 8.687525322616225e-06, "loss": 0.3312, "step": 15364 }, { "epoch": 1.5621187474583165, "grad_norm": 0.31034213304519653, "learning_rate": 8.687285644061295e-06, "loss": 0.3429, "step": 15365 }, { "epoch": 1.5622204148027654, "grad_norm": 0.30057492852211, "learning_rate": 8.687045946930698e-06, "loss": 0.376, "step": 15366 }, { "epoch": 1.5623220821472144, "grad_norm": 0.276048481464386, "learning_rate": 8.68680623122564e-06, "loss": 0.3346, "step": 15367 }, { "epoch": 1.5624237494916633, "grad_norm": 0.3075734078884125, "learning_rate": 8.686566496947326e-06, "loss": 0.3437, "step": 15368 }, { "epoch": 1.5625254168361122, "grad_norm": 0.32774755358695984, "learning_rate": 8.686326744096968e-06, "loss": 0.3619, "step": 15369 }, { "epoch": 1.5626270841805612, "grad_norm": 0.2717091739177704, "learning_rate": 8.686086972675772e-06, "loss": 0.3444, "step": 15370 }, { "epoch": 1.5627287515250101, "grad_norm": 0.2621549963951111, "learning_rate": 8.685847182684945e-06, "loss": 0.3695, "step": 15371 }, { "epoch": 1.562830418869459, "grad_norm": 0.32222387194633484, "learning_rate": 8.685607374125696e-06, "loss": 0.3623, "step": 15372 }, { "epoch": 1.562932086213908, "grad_norm": 0.2896496057510376, "learning_rate": 8.685367546999233e-06, "loss": 0.3624, "step": 15373 }, { "epoch": 1.563033753558357, "grad_norm": 0.2978857457637787, "learning_rate": 8.685127701306763e-06, "loss": 0.3683, "step": 15374 }, { "epoch": 1.5631354209028059, "grad_norm": 0.2978333830833435, "learning_rate": 8.684887837049497e-06, "loss": 0.391, "step": 15375 }, { "epoch": 1.5632370882472548, "grad_norm": 0.2723165452480316, "learning_rate": 8.684647954228641e-06, "loss": 0.3785, "step": 15376 }, { "epoch": 1.563338755591704, "grad_norm": 0.3027943968772888, "learning_rate": 8.684408052845404e-06, "loss": 0.4254, "step": 15377 }, { "epoch": 1.563440422936153, "grad_norm": 0.28420671820640564, "learning_rate": 8.684168132900996e-06, "loss": 0.3676, "step": 15378 }, { "epoch": 1.5635420902806019, "grad_norm": 0.2879331409931183, "learning_rate": 8.683928194396623e-06, "loss": 0.3334, "step": 15379 }, { "epoch": 1.5636437576250508, "grad_norm": 0.2889702618122101, "learning_rate": 8.683688237333495e-06, "loss": 0.3864, "step": 15380 }, { "epoch": 1.5637454249694998, "grad_norm": 0.31928831338882446, "learning_rate": 8.683448261712822e-06, "loss": 0.3735, "step": 15381 }, { "epoch": 1.563847092313949, "grad_norm": 0.28908804059028625, "learning_rate": 8.683208267535812e-06, "loss": 0.3717, "step": 15382 }, { "epoch": 1.5639487596583979, "grad_norm": 0.26892974972724915, "learning_rate": 8.682968254803674e-06, "loss": 0.3664, "step": 15383 }, { "epoch": 1.5640504270028468, "grad_norm": 0.30187028646469116, "learning_rate": 8.682728223517616e-06, "loss": 0.3454, "step": 15384 }, { "epoch": 1.5641520943472957, "grad_norm": 0.308137446641922, "learning_rate": 8.68248817367885e-06, "loss": 0.3604, "step": 15385 }, { "epoch": 1.5642537616917447, "grad_norm": 0.27311989665031433, "learning_rate": 8.682248105288583e-06, "loss": 0.3807, "step": 15386 }, { "epoch": 1.5643554290361936, "grad_norm": 0.3062026798725128, "learning_rate": 8.682008018348026e-06, "loss": 0.3761, "step": 15387 }, { "epoch": 1.5644570963806426, "grad_norm": 0.304454505443573, "learning_rate": 8.681767912858385e-06, "loss": 0.3596, "step": 15388 }, { "epoch": 1.5645587637250915, "grad_norm": 0.2737884819507599, "learning_rate": 8.681527788820874e-06, "loss": 0.3522, "step": 15389 }, { "epoch": 1.5646604310695404, "grad_norm": 0.30239978432655334, "learning_rate": 8.681287646236701e-06, "loss": 0.3363, "step": 15390 }, { "epoch": 1.5647620984139894, "grad_norm": 0.2781428098678589, "learning_rate": 8.681047485107074e-06, "loss": 0.3549, "step": 15391 }, { "epoch": 1.5648637657584383, "grad_norm": 0.2796262800693512, "learning_rate": 8.680807305433207e-06, "loss": 0.3377, "step": 15392 }, { "epoch": 1.5649654331028873, "grad_norm": 0.33022406697273254, "learning_rate": 8.680567107216307e-06, "loss": 0.3824, "step": 15393 }, { "epoch": 1.5650671004473362, "grad_norm": 0.3296058475971222, "learning_rate": 8.680326890457584e-06, "loss": 0.3651, "step": 15394 }, { "epoch": 1.5651687677917852, "grad_norm": 0.3044567108154297, "learning_rate": 8.680086655158248e-06, "loss": 0.372, "step": 15395 }, { "epoch": 1.565270435136234, "grad_norm": 0.30185943841934204, "learning_rate": 8.67984640131951e-06, "loss": 0.344, "step": 15396 }, { "epoch": 1.5653721024806833, "grad_norm": 0.30286747217178345, "learning_rate": 8.679606128942581e-06, "loss": 0.3341, "step": 15397 }, { "epoch": 1.5654737698251322, "grad_norm": 0.32309776544570923, "learning_rate": 8.67936583802867e-06, "loss": 0.3693, "step": 15398 }, { "epoch": 1.5655754371695811, "grad_norm": 0.30666112899780273, "learning_rate": 8.67912552857899e-06, "loss": 0.3779, "step": 15399 }, { "epoch": 1.56567710451403, "grad_norm": 0.31533315777778625, "learning_rate": 8.678885200594747e-06, "loss": 0.4006, "step": 15400 }, { "epoch": 1.565778771858479, "grad_norm": 0.28363341093063354, "learning_rate": 8.678644854077158e-06, "loss": 0.3816, "step": 15401 }, { "epoch": 1.565880439202928, "grad_norm": 0.3171374797821045, "learning_rate": 8.678404489027427e-06, "loss": 0.3792, "step": 15402 }, { "epoch": 1.5659821065473771, "grad_norm": 0.3175196945667267, "learning_rate": 8.678164105446772e-06, "loss": 0.3521, "step": 15403 }, { "epoch": 1.566083773891826, "grad_norm": 0.2915959358215332, "learning_rate": 8.677923703336398e-06, "loss": 0.3504, "step": 15404 }, { "epoch": 1.566185441236275, "grad_norm": 0.2996239960193634, "learning_rate": 8.677683282697517e-06, "loss": 0.3623, "step": 15405 }, { "epoch": 1.566287108580724, "grad_norm": 0.30830082297325134, "learning_rate": 8.677442843531344e-06, "loss": 0.3235, "step": 15406 }, { "epoch": 1.566388775925173, "grad_norm": 0.3227546215057373, "learning_rate": 8.677202385839087e-06, "loss": 0.3676, "step": 15407 }, { "epoch": 1.5664904432696218, "grad_norm": 0.3159123361110687, "learning_rate": 8.67696190962196e-06, "loss": 0.3597, "step": 15408 }, { "epoch": 1.5665921106140708, "grad_norm": 0.2976890206336975, "learning_rate": 8.67672141488117e-06, "loss": 0.3626, "step": 15409 }, { "epoch": 1.5666937779585197, "grad_norm": 0.30695679783821106, "learning_rate": 8.676480901617932e-06, "loss": 0.3661, "step": 15410 }, { "epoch": 1.5667954453029687, "grad_norm": 0.29579880833625793, "learning_rate": 8.676240369833458e-06, "loss": 0.3578, "step": 15411 }, { "epoch": 1.5668971126474176, "grad_norm": 0.31577739119529724, "learning_rate": 8.675999819528958e-06, "loss": 0.3824, "step": 15412 }, { "epoch": 1.5669987799918665, "grad_norm": 0.306264728307724, "learning_rate": 8.675759250705642e-06, "loss": 0.3415, "step": 15413 }, { "epoch": 1.5671004473363155, "grad_norm": 0.28231728076934814, "learning_rate": 8.675518663364728e-06, "loss": 0.3981, "step": 15414 }, { "epoch": 1.5672021146807644, "grad_norm": 0.29296669363975525, "learning_rate": 8.675278057507423e-06, "loss": 0.3505, "step": 15415 }, { "epoch": 1.5673037820252134, "grad_norm": 0.2852819859981537, "learning_rate": 8.675037433134941e-06, "loss": 0.3496, "step": 15416 }, { "epoch": 1.5674054493696623, "grad_norm": 0.2951783537864685, "learning_rate": 8.674796790248491e-06, "loss": 0.3516, "step": 15417 }, { "epoch": 1.5675071167141115, "grad_norm": 0.2989577353000641, "learning_rate": 8.674556128849291e-06, "loss": 0.3228, "step": 15418 }, { "epoch": 1.5676087840585604, "grad_norm": 0.2877735495567322, "learning_rate": 8.67431544893855e-06, "loss": 0.4138, "step": 15419 }, { "epoch": 1.5677104514030094, "grad_norm": 0.29859715700149536, "learning_rate": 8.674074750517481e-06, "loss": 0.3919, "step": 15420 }, { "epoch": 1.5678121187474583, "grad_norm": 0.2839244604110718, "learning_rate": 8.673834033587295e-06, "loss": 0.3427, "step": 15421 }, { "epoch": 1.5679137860919072, "grad_norm": 0.3011442720890045, "learning_rate": 8.673593298149208e-06, "loss": 0.353, "step": 15422 }, { "epoch": 1.5680154534363564, "grad_norm": 0.2735578715801239, "learning_rate": 8.673352544204431e-06, "loss": 0.3619, "step": 15423 }, { "epoch": 1.5681171207808053, "grad_norm": 0.2812616229057312, "learning_rate": 8.673111771754175e-06, "loss": 0.3437, "step": 15424 }, { "epoch": 1.5682187881252543, "grad_norm": 0.3009883463382721, "learning_rate": 8.672870980799657e-06, "loss": 0.371, "step": 15425 }, { "epoch": 1.5683204554697032, "grad_norm": 0.28444790840148926, "learning_rate": 8.672630171342086e-06, "loss": 0.3584, "step": 15426 }, { "epoch": 1.5684221228141522, "grad_norm": 0.27738845348358154, "learning_rate": 8.672389343382678e-06, "loss": 0.3802, "step": 15427 }, { "epoch": 1.568523790158601, "grad_norm": 0.29801326990127563, "learning_rate": 8.672148496922645e-06, "loss": 0.3749, "step": 15428 }, { "epoch": 1.56862545750305, "grad_norm": 0.32802635431289673, "learning_rate": 8.6719076319632e-06, "loss": 0.3865, "step": 15429 }, { "epoch": 1.568727124847499, "grad_norm": 0.2947179675102234, "learning_rate": 8.671666748505557e-06, "loss": 0.3364, "step": 15430 }, { "epoch": 1.568828792191948, "grad_norm": 0.30600541830062866, "learning_rate": 8.67142584655093e-06, "loss": 0.3465, "step": 15431 }, { "epoch": 1.5689304595363969, "grad_norm": 0.29675906896591187, "learning_rate": 8.67118492610053e-06, "loss": 0.3297, "step": 15432 }, { "epoch": 1.5690321268808458, "grad_norm": 0.3100779354572296, "learning_rate": 8.670943987155576e-06, "loss": 0.3628, "step": 15433 }, { "epoch": 1.5691337942252948, "grad_norm": 0.28726890683174133, "learning_rate": 8.670703029717277e-06, "loss": 0.3637, "step": 15434 }, { "epoch": 1.5692354615697437, "grad_norm": 0.3101149797439575, "learning_rate": 8.670462053786848e-06, "loss": 0.3608, "step": 15435 }, { "epoch": 1.5693371289141926, "grad_norm": 0.3391895890235901, "learning_rate": 8.670221059365505e-06, "loss": 0.3729, "step": 15436 }, { "epoch": 1.5694387962586416, "grad_norm": 0.28974854946136475, "learning_rate": 8.669980046454458e-06, "loss": 0.3565, "step": 15437 }, { "epoch": 1.5695404636030907, "grad_norm": 0.2928495705127716, "learning_rate": 8.669739015054926e-06, "loss": 0.3276, "step": 15438 }, { "epoch": 1.5696421309475397, "grad_norm": 0.2837693989276886, "learning_rate": 8.669497965168118e-06, "loss": 0.3552, "step": 15439 }, { "epoch": 1.5697437982919886, "grad_norm": 0.29942986369132996, "learning_rate": 8.669256896795255e-06, "loss": 0.3699, "step": 15440 }, { "epoch": 1.5698454656364376, "grad_norm": 0.2818712294101715, "learning_rate": 8.669015809937545e-06, "loss": 0.3763, "step": 15441 }, { "epoch": 1.5699471329808865, "grad_norm": 0.2983514964580536, "learning_rate": 8.668774704596205e-06, "loss": 0.3438, "step": 15442 }, { "epoch": 1.5700488003253354, "grad_norm": 0.24961277842521667, "learning_rate": 8.668533580772452e-06, "loss": 0.3292, "step": 15443 }, { "epoch": 1.5701504676697846, "grad_norm": 0.29372528195381165, "learning_rate": 8.668292438467499e-06, "loss": 0.364, "step": 15444 }, { "epoch": 1.5702521350142336, "grad_norm": 0.2932666540145874, "learning_rate": 8.668051277682557e-06, "loss": 0.3802, "step": 15445 }, { "epoch": 1.5703538023586825, "grad_norm": 0.256906121969223, "learning_rate": 8.667810098418848e-06, "loss": 0.3297, "step": 15446 }, { "epoch": 1.5704554697031314, "grad_norm": 0.28158795833587646, "learning_rate": 8.66756890067758e-06, "loss": 0.3845, "step": 15447 }, { "epoch": 1.5705571370475804, "grad_norm": 0.2666096091270447, "learning_rate": 8.667327684459974e-06, "loss": 0.3675, "step": 15448 }, { "epoch": 1.5706588043920293, "grad_norm": 0.30250245332717896, "learning_rate": 8.667086449767241e-06, "loss": 0.3453, "step": 15449 }, { "epoch": 1.5707604717364783, "grad_norm": 0.297603964805603, "learning_rate": 8.666845196600599e-06, "loss": 0.3441, "step": 15450 }, { "epoch": 1.5708621390809272, "grad_norm": 0.2780817151069641, "learning_rate": 8.666603924961262e-06, "loss": 0.3776, "step": 15451 }, { "epoch": 1.5709638064253761, "grad_norm": 0.2905033528804779, "learning_rate": 8.666362634850445e-06, "loss": 0.3431, "step": 15452 }, { "epoch": 1.571065473769825, "grad_norm": 0.29283618927001953, "learning_rate": 8.666121326269365e-06, "loss": 0.3566, "step": 15453 }, { "epoch": 1.571167141114274, "grad_norm": 0.28862863779067993, "learning_rate": 8.665879999219237e-06, "loss": 0.3333, "step": 15454 }, { "epoch": 1.571268808458723, "grad_norm": 0.2779025435447693, "learning_rate": 8.665638653701276e-06, "loss": 0.3615, "step": 15455 }, { "epoch": 1.571370475803172, "grad_norm": 0.27534475922584534, "learning_rate": 8.6653972897167e-06, "loss": 0.3617, "step": 15456 }, { "epoch": 1.5714721431476208, "grad_norm": 0.3233850598335266, "learning_rate": 8.665155907266722e-06, "loss": 0.3464, "step": 15457 }, { "epoch": 1.5715738104920698, "grad_norm": 0.29302701354026794, "learning_rate": 8.66491450635256e-06, "loss": 0.3577, "step": 15458 }, { "epoch": 1.571675477836519, "grad_norm": 0.2932240068912506, "learning_rate": 8.66467308697543e-06, "loss": 0.3442, "step": 15459 }, { "epoch": 1.571777145180968, "grad_norm": 0.27068594098091125, "learning_rate": 8.664431649136548e-06, "loss": 0.3806, "step": 15460 }, { "epoch": 1.5718788125254168, "grad_norm": 0.2735501229763031, "learning_rate": 8.66419019283713e-06, "loss": 0.3438, "step": 15461 }, { "epoch": 1.5719804798698658, "grad_norm": 0.30619099736213684, "learning_rate": 8.663948718078391e-06, "loss": 0.3692, "step": 15462 }, { "epoch": 1.5720821472143147, "grad_norm": 0.28775742650032043, "learning_rate": 8.66370722486155e-06, "loss": 0.3714, "step": 15463 }, { "epoch": 1.5721838145587639, "grad_norm": 0.26999804377555847, "learning_rate": 8.663465713187823e-06, "loss": 0.365, "step": 15464 }, { "epoch": 1.5722854819032128, "grad_norm": 0.2993106245994568, "learning_rate": 8.663224183058427e-06, "loss": 0.365, "step": 15465 }, { "epoch": 1.5723871492476618, "grad_norm": 0.2971900403499603, "learning_rate": 8.662982634474577e-06, "loss": 0.3428, "step": 15466 }, { "epoch": 1.5724888165921107, "grad_norm": 0.2870604395866394, "learning_rate": 8.662741067437492e-06, "loss": 0.3492, "step": 15467 }, { "epoch": 1.5725904839365596, "grad_norm": 0.30017298460006714, "learning_rate": 8.662499481948387e-06, "loss": 0.4069, "step": 15468 }, { "epoch": 1.5726921512810086, "grad_norm": 0.27547159790992737, "learning_rate": 8.66225787800848e-06, "loss": 0.3415, "step": 15469 }, { "epoch": 1.5727938186254575, "grad_norm": 0.30070996284484863, "learning_rate": 8.662016255618988e-06, "loss": 0.3613, "step": 15470 }, { "epoch": 1.5728954859699065, "grad_norm": 0.31382110714912415, "learning_rate": 8.661774614781129e-06, "loss": 0.359, "step": 15471 }, { "epoch": 1.5729971533143554, "grad_norm": 0.31172189116477966, "learning_rate": 8.661532955496118e-06, "loss": 0.3846, "step": 15472 }, { "epoch": 1.5730988206588044, "grad_norm": 0.3077879250049591, "learning_rate": 8.661291277765177e-06, "loss": 0.3444, "step": 15473 }, { "epoch": 1.5732004880032533, "grad_norm": 0.2867811322212219, "learning_rate": 8.661049581589518e-06, "loss": 0.359, "step": 15474 }, { "epoch": 1.5733021553477022, "grad_norm": 0.2739994525909424, "learning_rate": 8.660807866970362e-06, "loss": 0.3101, "step": 15475 }, { "epoch": 1.5734038226921512, "grad_norm": 0.29466748237609863, "learning_rate": 8.660566133908928e-06, "loss": 0.3949, "step": 15476 }, { "epoch": 1.5735054900366001, "grad_norm": 0.3047284781932831, "learning_rate": 8.660324382406429e-06, "loss": 0.3298, "step": 15477 }, { "epoch": 1.573607157381049, "grad_norm": 0.2928370535373688, "learning_rate": 8.660082612464085e-06, "loss": 0.3277, "step": 15478 }, { "epoch": 1.5737088247254982, "grad_norm": 0.25820961594581604, "learning_rate": 8.659840824083117e-06, "loss": 0.355, "step": 15479 }, { "epoch": 1.5738104920699472, "grad_norm": 0.26529067754745483, "learning_rate": 8.659599017264739e-06, "loss": 0.3134, "step": 15480 }, { "epoch": 1.573912159414396, "grad_norm": 0.2759912610054016, "learning_rate": 8.659357192010171e-06, "loss": 0.3574, "step": 15481 }, { "epoch": 1.574013826758845, "grad_norm": 0.29992178082466125, "learning_rate": 8.659115348320633e-06, "loss": 0.3667, "step": 15482 }, { "epoch": 1.574115494103294, "grad_norm": 0.2813427150249481, "learning_rate": 8.65887348619734e-06, "loss": 0.3547, "step": 15483 }, { "epoch": 1.574217161447743, "grad_norm": 0.2829917371273041, "learning_rate": 8.65863160564151e-06, "loss": 0.3628, "step": 15484 }, { "epoch": 1.574318828792192, "grad_norm": 0.2708527445793152, "learning_rate": 8.658389706654365e-06, "loss": 0.3547, "step": 15485 }, { "epoch": 1.574420496136641, "grad_norm": 0.2783927321434021, "learning_rate": 8.658147789237124e-06, "loss": 0.3656, "step": 15486 }, { "epoch": 1.57452216348109, "grad_norm": 0.2507898807525635, "learning_rate": 8.657905853391e-06, "loss": 0.3425, "step": 15487 }, { "epoch": 1.574623830825539, "grad_norm": 0.270829439163208, "learning_rate": 8.657663899117217e-06, "loss": 0.3628, "step": 15488 }, { "epoch": 1.5747254981699879, "grad_norm": 0.29711103439331055, "learning_rate": 8.657421926416994e-06, "loss": 0.4164, "step": 15489 }, { "epoch": 1.5748271655144368, "grad_norm": 0.27934885025024414, "learning_rate": 8.657179935291548e-06, "loss": 0.3846, "step": 15490 }, { "epoch": 1.5749288328588857, "grad_norm": 0.2526525557041168, "learning_rate": 8.656937925742098e-06, "loss": 0.3565, "step": 15491 }, { "epoch": 1.5750305002033347, "grad_norm": 0.29844194650650024, "learning_rate": 8.656695897769863e-06, "loss": 0.3539, "step": 15492 }, { "epoch": 1.5751321675477836, "grad_norm": 0.2715221047401428, "learning_rate": 8.656453851376064e-06, "loss": 0.3447, "step": 15493 }, { "epoch": 1.5752338348922326, "grad_norm": 0.26878610253334045, "learning_rate": 8.656211786561919e-06, "loss": 0.3984, "step": 15494 }, { "epoch": 1.5753355022366815, "grad_norm": 0.28136759996414185, "learning_rate": 8.655969703328648e-06, "loss": 0.3764, "step": 15495 }, { "epoch": 1.5754371695811304, "grad_norm": 0.27980533242225647, "learning_rate": 8.65572760167747e-06, "loss": 0.3775, "step": 15496 }, { "epoch": 1.5755388369255794, "grad_norm": 0.267983615398407, "learning_rate": 8.655485481609606e-06, "loss": 0.3769, "step": 15497 }, { "epoch": 1.5756405042700283, "grad_norm": 0.28662100434303284, "learning_rate": 8.655243343126273e-06, "loss": 0.3767, "step": 15498 }, { "epoch": 1.5757421716144773, "grad_norm": 0.2659594714641571, "learning_rate": 8.655001186228695e-06, "loss": 0.3545, "step": 15499 }, { "epoch": 1.5758438389589264, "grad_norm": 0.272775799036026, "learning_rate": 8.654759010918089e-06, "loss": 0.3604, "step": 15500 }, { "epoch": 1.5759455063033754, "grad_norm": 0.262196809053421, "learning_rate": 8.654516817195675e-06, "loss": 0.3624, "step": 15501 }, { "epoch": 1.5760471736478243, "grad_norm": 0.28009435534477234, "learning_rate": 8.654274605062673e-06, "loss": 0.3617, "step": 15502 }, { "epoch": 1.5761488409922733, "grad_norm": 0.2586643099784851, "learning_rate": 8.654032374520305e-06, "loss": 0.3309, "step": 15503 }, { "epoch": 1.5762505083367222, "grad_norm": 0.27857542037963867, "learning_rate": 8.65379012556979e-06, "loss": 0.3275, "step": 15504 }, { "epoch": 1.5763521756811714, "grad_norm": 0.2693348228931427, "learning_rate": 8.65354785821235e-06, "loss": 0.3539, "step": 15505 }, { "epoch": 1.5764538430256203, "grad_norm": 0.2675234377384186, "learning_rate": 8.653305572449202e-06, "loss": 0.3581, "step": 15506 }, { "epoch": 1.5765555103700692, "grad_norm": 0.27001991868019104, "learning_rate": 8.65306326828157e-06, "loss": 0.3488, "step": 15507 }, { "epoch": 1.5766571777145182, "grad_norm": 0.26844191551208496, "learning_rate": 8.652820945710672e-06, "loss": 0.362, "step": 15508 }, { "epoch": 1.5767588450589671, "grad_norm": 0.26835331320762634, "learning_rate": 8.652578604737732e-06, "loss": 0.361, "step": 15509 }, { "epoch": 1.576860512403416, "grad_norm": 0.27279865741729736, "learning_rate": 8.652336245363968e-06, "loss": 0.3303, "step": 15510 }, { "epoch": 1.576962179747865, "grad_norm": 0.2903987467288971, "learning_rate": 8.6520938675906e-06, "loss": 0.3238, "step": 15511 }, { "epoch": 1.577063847092314, "grad_norm": 0.27437469363212585, "learning_rate": 8.651851471418854e-06, "loss": 0.3467, "step": 15512 }, { "epoch": 1.577165514436763, "grad_norm": 0.2919847369194031, "learning_rate": 8.651609056849946e-06, "loss": 0.3823, "step": 15513 }, { "epoch": 1.5772671817812118, "grad_norm": 0.27804121375083923, "learning_rate": 8.6513666238851e-06, "loss": 0.355, "step": 15514 }, { "epoch": 1.5773688491256608, "grad_norm": 0.3019604980945587, "learning_rate": 8.651124172525537e-06, "loss": 0.365, "step": 15515 }, { "epoch": 1.5774705164701097, "grad_norm": 0.29682955145835876, "learning_rate": 8.65088170277248e-06, "loss": 0.3728, "step": 15516 }, { "epoch": 1.5775721838145587, "grad_norm": 0.29282233119010925, "learning_rate": 8.650639214627146e-06, "loss": 0.364, "step": 15517 }, { "epoch": 1.5776738511590076, "grad_norm": 0.27843689918518066, "learning_rate": 8.650396708090758e-06, "loss": 0.3472, "step": 15518 }, { "epoch": 1.5777755185034565, "grad_norm": 0.2871688902378082, "learning_rate": 8.65015418316454e-06, "loss": 0.3516, "step": 15519 }, { "epoch": 1.5778771858479057, "grad_norm": 0.267634779214859, "learning_rate": 8.649911639849715e-06, "loss": 0.3661, "step": 15520 }, { "epoch": 1.5779788531923546, "grad_norm": 0.2836977541446686, "learning_rate": 8.6496690781475e-06, "loss": 0.3502, "step": 15521 }, { "epoch": 1.5780805205368036, "grad_norm": 0.27672702074050903, "learning_rate": 8.64942649805912e-06, "loss": 0.3352, "step": 15522 }, { "epoch": 1.5781821878812525, "grad_norm": 0.27285751700401306, "learning_rate": 8.649183899585797e-06, "loss": 0.341, "step": 15523 }, { "epoch": 1.5782838552257015, "grad_norm": 0.282353937625885, "learning_rate": 8.648941282728754e-06, "loss": 0.3486, "step": 15524 }, { "epoch": 1.5783855225701506, "grad_norm": 0.27119654417037964, "learning_rate": 8.648698647489209e-06, "loss": 0.3317, "step": 15525 }, { "epoch": 1.5784871899145996, "grad_norm": 0.2908012270927429, "learning_rate": 8.648455993868388e-06, "loss": 0.3737, "step": 15526 }, { "epoch": 1.5785888572590485, "grad_norm": 0.26428937911987305, "learning_rate": 8.648213321867514e-06, "loss": 0.3766, "step": 15527 }, { "epoch": 1.5786905246034975, "grad_norm": 0.27842918038368225, "learning_rate": 8.647970631487808e-06, "loss": 0.3696, "step": 15528 }, { "epoch": 1.5787921919479464, "grad_norm": 0.2933512032032013, "learning_rate": 8.647727922730492e-06, "loss": 0.3861, "step": 15529 }, { "epoch": 1.5788938592923953, "grad_norm": 0.26634693145751953, "learning_rate": 8.64748519559679e-06, "loss": 0.3874, "step": 15530 }, { "epoch": 1.5789955266368443, "grad_norm": 0.27038902044296265, "learning_rate": 8.647242450087923e-06, "loss": 0.3805, "step": 15531 }, { "epoch": 1.5790971939812932, "grad_norm": 0.2683446705341339, "learning_rate": 8.646999686205117e-06, "loss": 0.3362, "step": 15532 }, { "epoch": 1.5791988613257422, "grad_norm": 0.3092174530029297, "learning_rate": 8.646756903949593e-06, "loss": 0.3846, "step": 15533 }, { "epoch": 1.579300528670191, "grad_norm": 0.2732692062854767, "learning_rate": 8.646514103322574e-06, "loss": 0.3828, "step": 15534 }, { "epoch": 1.57940219601464, "grad_norm": 0.26616373658180237, "learning_rate": 8.646271284325282e-06, "loss": 0.3745, "step": 15535 }, { "epoch": 1.579503863359089, "grad_norm": 0.30393868684768677, "learning_rate": 8.646028446958945e-06, "loss": 0.4155, "step": 15536 }, { "epoch": 1.579605530703538, "grad_norm": 0.27587124705314636, "learning_rate": 8.64578559122478e-06, "loss": 0.376, "step": 15537 }, { "epoch": 1.5797071980479869, "grad_norm": 0.30181068181991577, "learning_rate": 8.645542717124016e-06, "loss": 0.3742, "step": 15538 }, { "epoch": 1.5798088653924358, "grad_norm": 0.27059274911880493, "learning_rate": 8.645299824657873e-06, "loss": 0.3597, "step": 15539 }, { "epoch": 1.5799105327368848, "grad_norm": 0.2837894856929779, "learning_rate": 8.645056913827576e-06, "loss": 0.3272, "step": 15540 }, { "epoch": 1.580012200081334, "grad_norm": 0.2798798084259033, "learning_rate": 8.644813984634349e-06, "loss": 0.3366, "step": 15541 }, { "epoch": 1.5801138674257829, "grad_norm": 0.28013160824775696, "learning_rate": 8.644571037079414e-06, "loss": 0.3899, "step": 15542 }, { "epoch": 1.5802155347702318, "grad_norm": 0.30475857853889465, "learning_rate": 8.644328071163996e-06, "loss": 0.3752, "step": 15543 }, { "epoch": 1.5803172021146807, "grad_norm": 0.2669315040111542, "learning_rate": 8.644085086889322e-06, "loss": 0.3467, "step": 15544 }, { "epoch": 1.5804188694591297, "grad_norm": 0.28646034002304077, "learning_rate": 8.64384208425661e-06, "loss": 0.3855, "step": 15545 }, { "epoch": 1.5805205368035788, "grad_norm": 0.2898864150047302, "learning_rate": 8.64359906326709e-06, "loss": 0.3605, "step": 15546 }, { "epoch": 1.5806222041480278, "grad_norm": 0.2669377624988556, "learning_rate": 8.643356023921983e-06, "loss": 0.357, "step": 15547 }, { "epoch": 1.5807238714924767, "grad_norm": 0.29235923290252686, "learning_rate": 8.643112966222514e-06, "loss": 0.3739, "step": 15548 }, { "epoch": 1.5808255388369257, "grad_norm": 0.28701362013816833, "learning_rate": 8.642869890169908e-06, "loss": 0.3826, "step": 15549 }, { "epoch": 1.5809272061813746, "grad_norm": 0.29386112093925476, "learning_rate": 8.642626795765387e-06, "loss": 0.3384, "step": 15550 }, { "epoch": 1.5810288735258236, "grad_norm": 0.2689259946346283, "learning_rate": 8.642383683010178e-06, "loss": 0.3619, "step": 15551 }, { "epoch": 1.5811305408702725, "grad_norm": 0.2785383462905884, "learning_rate": 8.642140551905509e-06, "loss": 0.3706, "step": 15552 }, { "epoch": 1.5812322082147214, "grad_norm": 0.2865228056907654, "learning_rate": 8.641897402452598e-06, "loss": 0.3756, "step": 15553 }, { "epoch": 1.5813338755591704, "grad_norm": 0.2900625765323639, "learning_rate": 8.641654234652675e-06, "loss": 0.371, "step": 15554 }, { "epoch": 1.5814355429036193, "grad_norm": 0.2715117931365967, "learning_rate": 8.641411048506963e-06, "loss": 0.3253, "step": 15555 }, { "epoch": 1.5815372102480683, "grad_norm": 0.31116417050361633, "learning_rate": 8.641167844016688e-06, "loss": 0.3388, "step": 15556 }, { "epoch": 1.5816388775925172, "grad_norm": 0.336773544549942, "learning_rate": 8.640924621183073e-06, "loss": 0.3665, "step": 15557 }, { "epoch": 1.5817405449369661, "grad_norm": 0.281742662191391, "learning_rate": 8.640681380007347e-06, "loss": 0.3552, "step": 15558 }, { "epoch": 1.581842212281415, "grad_norm": 0.30901163816452026, "learning_rate": 8.640438120490733e-06, "loss": 0.3222, "step": 15559 }, { "epoch": 1.581943879625864, "grad_norm": 0.31141605973243713, "learning_rate": 8.640194842634455e-06, "loss": 0.3391, "step": 15560 }, { "epoch": 1.5820455469703132, "grad_norm": 0.305891215801239, "learning_rate": 8.639951546439743e-06, "loss": 0.3667, "step": 15561 }, { "epoch": 1.5821472143147621, "grad_norm": 0.30836135149002075, "learning_rate": 8.639708231907819e-06, "loss": 0.3572, "step": 15562 }, { "epoch": 1.582248881659211, "grad_norm": 0.31712213158607483, "learning_rate": 8.639464899039909e-06, "loss": 0.3866, "step": 15563 }, { "epoch": 1.58235054900366, "grad_norm": 0.3234573304653168, "learning_rate": 8.63922154783724e-06, "loss": 0.388, "step": 15564 }, { "epoch": 1.582452216348109, "grad_norm": 0.2910611033439636, "learning_rate": 8.638978178301038e-06, "loss": 0.3488, "step": 15565 }, { "epoch": 1.5825538836925581, "grad_norm": 0.3102436363697052, "learning_rate": 8.638734790432529e-06, "loss": 0.3686, "step": 15566 }, { "epoch": 1.582655551037007, "grad_norm": 0.29497095942497253, "learning_rate": 8.638491384232938e-06, "loss": 0.338, "step": 15567 }, { "epoch": 1.582757218381456, "grad_norm": 0.2927168607711792, "learning_rate": 8.638247959703494e-06, "loss": 0.3701, "step": 15568 }, { "epoch": 1.582858885725905, "grad_norm": 0.31061071157455444, "learning_rate": 8.63800451684542e-06, "loss": 0.3532, "step": 15569 }, { "epoch": 1.5829605530703539, "grad_norm": 0.2796119451522827, "learning_rate": 8.637761055659943e-06, "loss": 0.3664, "step": 15570 }, { "epoch": 1.5830622204148028, "grad_norm": 0.283799946308136, "learning_rate": 8.63751757614829e-06, "loss": 0.397, "step": 15571 }, { "epoch": 1.5831638877592518, "grad_norm": 0.28282999992370605, "learning_rate": 8.63727407831169e-06, "loss": 0.3308, "step": 15572 }, { "epoch": 1.5832655551037007, "grad_norm": 0.279806911945343, "learning_rate": 8.637030562151367e-06, "loss": 0.3662, "step": 15573 }, { "epoch": 1.5833672224481496, "grad_norm": 0.2700951099395752, "learning_rate": 8.636787027668548e-06, "loss": 0.3342, "step": 15574 }, { "epoch": 1.5834688897925986, "grad_norm": 0.30641788244247437, "learning_rate": 8.63654347486446e-06, "loss": 0.3906, "step": 15575 }, { "epoch": 1.5835705571370475, "grad_norm": 0.29302963614463806, "learning_rate": 8.636299903740329e-06, "loss": 0.3662, "step": 15576 }, { "epoch": 1.5836722244814965, "grad_norm": 0.2606082558631897, "learning_rate": 8.636056314297386e-06, "loss": 0.3517, "step": 15577 }, { "epoch": 1.5837738918259454, "grad_norm": 0.31034860014915466, "learning_rate": 8.635812706536852e-06, "loss": 0.3477, "step": 15578 }, { "epoch": 1.5838755591703944, "grad_norm": 0.29631224274635315, "learning_rate": 8.63556908045996e-06, "loss": 0.3524, "step": 15579 }, { "epoch": 1.5839772265148433, "grad_norm": 0.262552946805954, "learning_rate": 8.635325436067933e-06, "loss": 0.3817, "step": 15580 }, { "epoch": 1.5840788938592922, "grad_norm": 0.3047557771205902, "learning_rate": 8.635081773362002e-06, "loss": 0.367, "step": 15581 }, { "epoch": 1.5841805612037414, "grad_norm": 0.2838800847530365, "learning_rate": 8.634838092343393e-06, "loss": 0.3489, "step": 15582 }, { "epoch": 1.5842822285481903, "grad_norm": 0.2617538869380951, "learning_rate": 8.634594393013332e-06, "loss": 0.3629, "step": 15583 }, { "epoch": 1.5843838958926393, "grad_norm": 0.2879447937011719, "learning_rate": 8.634350675373048e-06, "loss": 0.3561, "step": 15584 }, { "epoch": 1.5844855632370882, "grad_norm": 0.2781772315502167, "learning_rate": 8.634106939423771e-06, "loss": 0.3495, "step": 15585 }, { "epoch": 1.5845872305815372, "grad_norm": 0.2605058252811432, "learning_rate": 8.633863185166726e-06, "loss": 0.3609, "step": 15586 }, { "epoch": 1.5846888979259863, "grad_norm": 0.2786436676979065, "learning_rate": 8.633619412603142e-06, "loss": 0.3527, "step": 15587 }, { "epoch": 1.5847905652704353, "grad_norm": 0.28380507230758667, "learning_rate": 8.633375621734246e-06, "loss": 0.3715, "step": 15588 }, { "epoch": 1.5848922326148842, "grad_norm": 0.30075767636299133, "learning_rate": 8.633131812561268e-06, "loss": 0.3481, "step": 15589 }, { "epoch": 1.5849938999593332, "grad_norm": 0.29702433943748474, "learning_rate": 8.632887985085434e-06, "loss": 0.3369, "step": 15590 }, { "epoch": 1.585095567303782, "grad_norm": 0.2706017792224884, "learning_rate": 8.632644139307975e-06, "loss": 0.3888, "step": 15591 }, { "epoch": 1.585197234648231, "grad_norm": 0.3020849823951721, "learning_rate": 8.632400275230118e-06, "loss": 0.3673, "step": 15592 }, { "epoch": 1.58529890199268, "grad_norm": 0.28771793842315674, "learning_rate": 8.632156392853091e-06, "loss": 0.3804, "step": 15593 }, { "epoch": 1.585400569337129, "grad_norm": 0.276299387216568, "learning_rate": 8.631912492178122e-06, "loss": 0.354, "step": 15594 }, { "epoch": 1.5855022366815779, "grad_norm": 0.2955346405506134, "learning_rate": 8.631668573206443e-06, "loss": 0.3751, "step": 15595 }, { "epoch": 1.5856039040260268, "grad_norm": 0.2884269058704376, "learning_rate": 8.631424635939281e-06, "loss": 0.3403, "step": 15596 }, { "epoch": 1.5857055713704757, "grad_norm": 0.27992355823516846, "learning_rate": 8.631180680377861e-06, "loss": 0.348, "step": 15597 }, { "epoch": 1.5858072387149247, "grad_norm": 0.3199179470539093, "learning_rate": 8.630936706523421e-06, "loss": 0.3789, "step": 15598 }, { "epoch": 1.5859089060593736, "grad_norm": 0.2926171123981476, "learning_rate": 8.63069271437718e-06, "loss": 0.3782, "step": 15599 }, { "epoch": 1.5860105734038226, "grad_norm": 0.28310272097587585, "learning_rate": 8.630448703940376e-06, "loss": 0.3614, "step": 15600 }, { "epoch": 1.5861122407482715, "grad_norm": 0.2715466320514679, "learning_rate": 8.630204675214232e-06, "loss": 0.3632, "step": 15601 }, { "epoch": 1.5862139080927207, "grad_norm": 0.2779805064201355, "learning_rate": 8.629960628199979e-06, "loss": 0.3388, "step": 15602 }, { "epoch": 1.5863155754371696, "grad_norm": 0.26774823665618896, "learning_rate": 8.629716562898847e-06, "loss": 0.3429, "step": 15603 }, { "epoch": 1.5864172427816186, "grad_norm": 0.27691999077796936, "learning_rate": 8.629472479312066e-06, "loss": 0.3829, "step": 15604 }, { "epoch": 1.5865189101260675, "grad_norm": 0.2708742320537567, "learning_rate": 8.629228377440866e-06, "loss": 0.3777, "step": 15605 }, { "epoch": 1.5866205774705164, "grad_norm": 0.27673575282096863, "learning_rate": 8.628984257286474e-06, "loss": 0.3548, "step": 15606 }, { "epoch": 1.5867222448149656, "grad_norm": 0.26580584049224854, "learning_rate": 8.628740118850122e-06, "loss": 0.3429, "step": 15607 }, { "epoch": 1.5868239121594145, "grad_norm": 0.28254008293151855, "learning_rate": 8.628495962133041e-06, "loss": 0.3342, "step": 15608 }, { "epoch": 1.5869255795038635, "grad_norm": 0.278560996055603, "learning_rate": 8.628251787136458e-06, "loss": 0.354, "step": 15609 }, { "epoch": 1.5870272468483124, "grad_norm": 0.2690717279911041, "learning_rate": 8.628007593861606e-06, "loss": 0.3671, "step": 15610 }, { "epoch": 1.5871289141927614, "grad_norm": 0.2797527611255646, "learning_rate": 8.627763382309714e-06, "loss": 0.3686, "step": 15611 }, { "epoch": 1.5872305815372103, "grad_norm": 0.2882400453090668, "learning_rate": 8.627519152482011e-06, "loss": 0.3575, "step": 15612 }, { "epoch": 1.5873322488816592, "grad_norm": 0.2994360625743866, "learning_rate": 8.627274904379729e-06, "loss": 0.373, "step": 15613 }, { "epoch": 1.5874339162261082, "grad_norm": 0.29542556405067444, "learning_rate": 8.627030638004099e-06, "loss": 0.3418, "step": 15614 }, { "epoch": 1.5875355835705571, "grad_norm": 0.27965980768203735, "learning_rate": 8.626786353356352e-06, "loss": 0.349, "step": 15615 }, { "epoch": 1.587637250915006, "grad_norm": 0.2699507474899292, "learning_rate": 8.626542050437713e-06, "loss": 0.3561, "step": 15616 }, { "epoch": 1.587738918259455, "grad_norm": 0.27275577187538147, "learning_rate": 8.62629772924942e-06, "loss": 0.3537, "step": 15617 }, { "epoch": 1.587840585603904, "grad_norm": 0.29529955983161926, "learning_rate": 8.626053389792699e-06, "loss": 0.3587, "step": 15618 }, { "epoch": 1.587942252948353, "grad_norm": 0.2643098831176758, "learning_rate": 8.625809032068784e-06, "loss": 0.3559, "step": 15619 }, { "epoch": 1.5880439202928018, "grad_norm": 0.28661537170410156, "learning_rate": 8.625564656078904e-06, "loss": 0.4136, "step": 15620 }, { "epoch": 1.5881455876372508, "grad_norm": 0.27678772807121277, "learning_rate": 8.625320261824291e-06, "loss": 0.3786, "step": 15621 }, { "epoch": 1.5882472549816997, "grad_norm": 0.27872005105018616, "learning_rate": 8.625075849306175e-06, "loss": 0.3392, "step": 15622 }, { "epoch": 1.5883489223261489, "grad_norm": 0.28281739354133606, "learning_rate": 8.62483141852579e-06, "loss": 0.3523, "step": 15623 }, { "epoch": 1.5884505896705978, "grad_norm": 0.25925201177597046, "learning_rate": 8.624586969484366e-06, "loss": 0.3551, "step": 15624 }, { "epoch": 1.5885522570150468, "grad_norm": 0.2695588767528534, "learning_rate": 8.624342502183132e-06, "loss": 0.3587, "step": 15625 }, { "epoch": 1.5886539243594957, "grad_norm": 0.2822277247905731, "learning_rate": 8.624098016623324e-06, "loss": 0.3498, "step": 15626 }, { "epoch": 1.5887555917039446, "grad_norm": 0.2996997833251953, "learning_rate": 8.62385351280617e-06, "loss": 0.3887, "step": 15627 }, { "epoch": 1.5888572590483938, "grad_norm": 0.304845929145813, "learning_rate": 8.623608990732903e-06, "loss": 0.364, "step": 15628 }, { "epoch": 1.5889589263928428, "grad_norm": 0.26968806982040405, "learning_rate": 8.623364450404756e-06, "loss": 0.3503, "step": 15629 }, { "epoch": 1.5890605937372917, "grad_norm": 0.29147619009017944, "learning_rate": 8.62311989182296e-06, "loss": 0.3979, "step": 15630 }, { "epoch": 1.5891622610817406, "grad_norm": 0.2972167730331421, "learning_rate": 8.622875314988746e-06, "loss": 0.3649, "step": 15631 }, { "epoch": 1.5892639284261896, "grad_norm": 0.2759722173213959, "learning_rate": 8.622630719903346e-06, "loss": 0.3504, "step": 15632 }, { "epoch": 1.5893655957706385, "grad_norm": 0.3005886673927307, "learning_rate": 8.622386106567997e-06, "loss": 0.3868, "step": 15633 }, { "epoch": 1.5894672631150875, "grad_norm": 0.29596439003944397, "learning_rate": 8.622141474983926e-06, "loss": 0.3585, "step": 15634 }, { "epoch": 1.5895689304595364, "grad_norm": 0.28282368183135986, "learning_rate": 8.621896825152365e-06, "loss": 0.3368, "step": 15635 }, { "epoch": 1.5896705978039853, "grad_norm": 0.2758341133594513, "learning_rate": 8.62165215707455e-06, "loss": 0.345, "step": 15636 }, { "epoch": 1.5897722651484343, "grad_norm": 0.2866838872432709, "learning_rate": 8.621407470751712e-06, "loss": 0.3301, "step": 15637 }, { "epoch": 1.5898739324928832, "grad_norm": 0.28503385186195374, "learning_rate": 8.621162766185085e-06, "loss": 0.3924, "step": 15638 }, { "epoch": 1.5899755998373322, "grad_norm": 0.2831287086009979, "learning_rate": 8.620918043375898e-06, "loss": 0.3336, "step": 15639 }, { "epoch": 1.590077267181781, "grad_norm": 0.2892185151576996, "learning_rate": 8.620673302325388e-06, "loss": 0.3459, "step": 15640 }, { "epoch": 1.59017893452623, "grad_norm": 0.2973848283290863, "learning_rate": 8.620428543034787e-06, "loss": 0.3498, "step": 15641 }, { "epoch": 1.590280601870679, "grad_norm": 0.28791818022727966, "learning_rate": 8.620183765505326e-06, "loss": 0.3271, "step": 15642 }, { "epoch": 1.5903822692151282, "grad_norm": 0.30241870880126953, "learning_rate": 8.619938969738239e-06, "loss": 0.3969, "step": 15643 }, { "epoch": 1.590483936559577, "grad_norm": 0.26709863543510437, "learning_rate": 8.619694155734761e-06, "loss": 0.3682, "step": 15644 }, { "epoch": 1.590585603904026, "grad_norm": 0.3130398392677307, "learning_rate": 8.619449323496123e-06, "loss": 0.3646, "step": 15645 }, { "epoch": 1.590687271248475, "grad_norm": 0.3280853033065796, "learning_rate": 8.61920447302356e-06, "loss": 0.394, "step": 15646 }, { "epoch": 1.590788938592924, "grad_norm": 0.2563934922218323, "learning_rate": 8.618959604318304e-06, "loss": 0.374, "step": 15647 }, { "epoch": 1.590890605937373, "grad_norm": 0.26501524448394775, "learning_rate": 8.618714717381591e-06, "loss": 0.3573, "step": 15648 }, { "epoch": 1.590992273281822, "grad_norm": 0.31126874685287476, "learning_rate": 8.618469812214652e-06, "loss": 0.3737, "step": 15649 }, { "epoch": 1.591093940626271, "grad_norm": 0.29586443305015564, "learning_rate": 8.618224888818723e-06, "loss": 0.3447, "step": 15650 }, { "epoch": 1.59119560797072, "grad_norm": 0.2713780105113983, "learning_rate": 8.617979947195036e-06, "loss": 0.3563, "step": 15651 }, { "epoch": 1.5912972753151688, "grad_norm": 0.29708781838417053, "learning_rate": 8.617734987344828e-06, "loss": 0.3757, "step": 15652 }, { "epoch": 1.5913989426596178, "grad_norm": 0.30085811018943787, "learning_rate": 8.617490009269328e-06, "loss": 0.3759, "step": 15653 }, { "epoch": 1.5915006100040667, "grad_norm": 0.29981255531311035, "learning_rate": 8.617245012969774e-06, "loss": 0.3522, "step": 15654 }, { "epoch": 1.5916022773485157, "grad_norm": 0.2945826053619385, "learning_rate": 8.6169999984474e-06, "loss": 0.3502, "step": 15655 }, { "epoch": 1.5917039446929646, "grad_norm": 0.2765575051307678, "learning_rate": 8.616754965703438e-06, "loss": 0.3637, "step": 15656 }, { "epoch": 1.5918056120374136, "grad_norm": 0.2813355028629303, "learning_rate": 8.616509914739124e-06, "loss": 0.3946, "step": 15657 }, { "epoch": 1.5919072793818625, "grad_norm": 0.31666383147239685, "learning_rate": 8.616264845555694e-06, "loss": 0.3632, "step": 15658 }, { "epoch": 1.5920089467263114, "grad_norm": 0.26037922501564026, "learning_rate": 8.616019758154379e-06, "loss": 0.3806, "step": 15659 }, { "epoch": 1.5921106140707604, "grad_norm": 0.26327982544898987, "learning_rate": 8.615774652536417e-06, "loss": 0.3506, "step": 15660 }, { "epoch": 1.5922122814152093, "grad_norm": 0.2794165313243866, "learning_rate": 8.615529528703041e-06, "loss": 0.3557, "step": 15661 }, { "epoch": 1.5923139487596583, "grad_norm": 0.2818106710910797, "learning_rate": 8.615284386655489e-06, "loss": 0.3531, "step": 15662 }, { "epoch": 1.5924156161041072, "grad_norm": 0.2909727394580841, "learning_rate": 8.61503922639499e-06, "loss": 0.3553, "step": 15663 }, { "epoch": 1.5925172834485564, "grad_norm": 0.2783709466457367, "learning_rate": 8.614794047922782e-06, "loss": 0.3702, "step": 15664 }, { "epoch": 1.5926189507930053, "grad_norm": 0.28688526153564453, "learning_rate": 8.614548851240104e-06, "loss": 0.358, "step": 15665 }, { "epoch": 1.5927206181374542, "grad_norm": 0.27145159244537354, "learning_rate": 8.614303636348187e-06, "loss": 0.3651, "step": 15666 }, { "epoch": 1.5928222854819032, "grad_norm": 0.30695611238479614, "learning_rate": 8.614058403248266e-06, "loss": 0.3493, "step": 15667 }, { "epoch": 1.5929239528263521, "grad_norm": 0.27415308356285095, "learning_rate": 8.613813151941577e-06, "loss": 0.3676, "step": 15668 }, { "epoch": 1.5930256201708013, "grad_norm": 0.2783728241920471, "learning_rate": 8.613567882429356e-06, "loss": 0.3468, "step": 15669 }, { "epoch": 1.5931272875152502, "grad_norm": 0.27669963240623474, "learning_rate": 8.613322594712842e-06, "loss": 0.3769, "step": 15670 }, { "epoch": 1.5932289548596992, "grad_norm": 0.279816210269928, "learning_rate": 8.613077288793265e-06, "loss": 0.3163, "step": 15671 }, { "epoch": 1.5933306222041481, "grad_norm": 0.2718898057937622, "learning_rate": 8.612831964671863e-06, "loss": 0.3743, "step": 15672 }, { "epoch": 1.593432289548597, "grad_norm": 0.3176763653755188, "learning_rate": 8.612586622349872e-06, "loss": 0.3843, "step": 15673 }, { "epoch": 1.593533956893046, "grad_norm": 0.29056549072265625, "learning_rate": 8.612341261828529e-06, "loss": 0.3864, "step": 15674 }, { "epoch": 1.593635624237495, "grad_norm": 0.2835938334465027, "learning_rate": 8.612095883109068e-06, "loss": 0.3602, "step": 15675 }, { "epoch": 1.5937372915819439, "grad_norm": 0.31204912066459656, "learning_rate": 8.611850486192727e-06, "loss": 0.3586, "step": 15676 }, { "epoch": 1.5938389589263928, "grad_norm": 0.28828346729278564, "learning_rate": 8.611605071080743e-06, "loss": 0.3569, "step": 15677 }, { "epoch": 1.5939406262708418, "grad_norm": 0.2662424147129059, "learning_rate": 8.611359637774348e-06, "loss": 0.3783, "step": 15678 }, { "epoch": 1.5940422936152907, "grad_norm": 0.30131906270980835, "learning_rate": 8.611114186274784e-06, "loss": 0.3447, "step": 15679 }, { "epoch": 1.5941439609597396, "grad_norm": 0.27717825770378113, "learning_rate": 8.610868716583283e-06, "loss": 0.3573, "step": 15680 }, { "epoch": 1.5942456283041886, "grad_norm": 0.2707115411758423, "learning_rate": 8.610623228701084e-06, "loss": 0.3386, "step": 15681 }, { "epoch": 1.5943472956486375, "grad_norm": 0.2757936716079712, "learning_rate": 8.610377722629423e-06, "loss": 0.3537, "step": 15682 }, { "epoch": 1.5944489629930865, "grad_norm": 0.2656247615814209, "learning_rate": 8.610132198369538e-06, "loss": 0.3979, "step": 15683 }, { "epoch": 1.5945506303375356, "grad_norm": 0.2889605164527893, "learning_rate": 8.609886655922665e-06, "loss": 0.3519, "step": 15684 }, { "epoch": 1.5946522976819846, "grad_norm": 0.27599337697029114, "learning_rate": 8.60964109529004e-06, "loss": 0.3778, "step": 15685 }, { "epoch": 1.5947539650264335, "grad_norm": 0.288395494222641, "learning_rate": 8.6093955164729e-06, "loss": 0.352, "step": 15686 }, { "epoch": 1.5948556323708825, "grad_norm": 0.28730538487434387, "learning_rate": 8.609149919472485e-06, "loss": 0.3449, "step": 15687 }, { "epoch": 1.5949572997153314, "grad_norm": 0.29702916741371155, "learning_rate": 8.60890430429003e-06, "loss": 0.366, "step": 15688 }, { "epoch": 1.5950589670597806, "grad_norm": 0.29648563265800476, "learning_rate": 8.608658670926772e-06, "loss": 0.3877, "step": 15689 }, { "epoch": 1.5951606344042295, "grad_norm": 0.2882375419139862, "learning_rate": 8.60841301938395e-06, "loss": 0.3335, "step": 15690 }, { "epoch": 1.5952623017486784, "grad_norm": 0.2990598678588867, "learning_rate": 8.6081673496628e-06, "loss": 0.3469, "step": 15691 }, { "epoch": 1.5953639690931274, "grad_norm": 0.2878771126270294, "learning_rate": 8.60792166176456e-06, "loss": 0.3745, "step": 15692 }, { "epoch": 1.5954656364375763, "grad_norm": 0.2636818587779999, "learning_rate": 8.607675955690468e-06, "loss": 0.337, "step": 15693 }, { "epoch": 1.5955673037820253, "grad_norm": 0.2754352390766144, "learning_rate": 8.607430231441762e-06, "loss": 0.3407, "step": 15694 }, { "epoch": 1.5956689711264742, "grad_norm": 0.2824157476425171, "learning_rate": 8.60718448901968e-06, "loss": 0.3726, "step": 15695 }, { "epoch": 1.5957706384709232, "grad_norm": 0.2720159590244293, "learning_rate": 8.60693872842546e-06, "loss": 0.3693, "step": 15696 }, { "epoch": 1.595872305815372, "grad_norm": 0.28662070631980896, "learning_rate": 8.60669294966034e-06, "loss": 0.3512, "step": 15697 }, { "epoch": 1.595973973159821, "grad_norm": 0.27567553520202637, "learning_rate": 8.606447152725558e-06, "loss": 0.368, "step": 15698 }, { "epoch": 1.59607564050427, "grad_norm": 0.27551108598709106, "learning_rate": 8.606201337622352e-06, "loss": 0.3363, "step": 15699 }, { "epoch": 1.596177307848719, "grad_norm": 0.3003849685192108, "learning_rate": 8.60595550435196e-06, "loss": 0.3648, "step": 15700 }, { "epoch": 1.5962789751931679, "grad_norm": 0.28679606318473816, "learning_rate": 8.605709652915621e-06, "loss": 0.3921, "step": 15701 }, { "epoch": 1.5963806425376168, "grad_norm": 0.3056381344795227, "learning_rate": 8.605463783314577e-06, "loss": 0.4001, "step": 15702 }, { "epoch": 1.5964823098820657, "grad_norm": 0.288537859916687, "learning_rate": 8.60521789555006e-06, "loss": 0.3561, "step": 15703 }, { "epoch": 1.5965839772265147, "grad_norm": 0.2709338366985321, "learning_rate": 8.604971989623313e-06, "loss": 0.3744, "step": 15704 }, { "epoch": 1.5966856445709638, "grad_norm": 0.2670401334762573, "learning_rate": 8.604726065535571e-06, "loss": 0.3403, "step": 15705 }, { "epoch": 1.5967873119154128, "grad_norm": 0.2536883056163788, "learning_rate": 8.604480123288079e-06, "loss": 0.3761, "step": 15706 }, { "epoch": 1.5968889792598617, "grad_norm": 0.2722119688987732, "learning_rate": 8.60423416288207e-06, "loss": 0.3461, "step": 15707 }, { "epoch": 1.5969906466043107, "grad_norm": 0.2874647378921509, "learning_rate": 8.603988184318787e-06, "loss": 0.3853, "step": 15708 }, { "epoch": 1.5970923139487596, "grad_norm": 0.2999570369720459, "learning_rate": 8.603742187599472e-06, "loss": 0.3625, "step": 15709 }, { "epoch": 1.5971939812932088, "grad_norm": 0.2794033885002136, "learning_rate": 8.603496172725355e-06, "loss": 0.3791, "step": 15710 }, { "epoch": 1.5972956486376577, "grad_norm": 0.2966287136077881, "learning_rate": 8.603250139697682e-06, "loss": 0.346, "step": 15711 }, { "epoch": 1.5973973159821067, "grad_norm": 0.2688215970993042, "learning_rate": 8.60300408851769e-06, "loss": 0.3369, "step": 15712 }, { "epoch": 1.5974989833265556, "grad_norm": 0.3028956353664398, "learning_rate": 8.602758019186623e-06, "loss": 0.3799, "step": 15713 }, { "epoch": 1.5976006506710045, "grad_norm": 0.2843932509422302, "learning_rate": 8.602511931705714e-06, "loss": 0.3688, "step": 15714 }, { "epoch": 1.5977023180154535, "grad_norm": 0.2749704420566559, "learning_rate": 8.602265826076209e-06, "loss": 0.3511, "step": 15715 }, { "epoch": 1.5978039853599024, "grad_norm": 0.28029173612594604, "learning_rate": 8.602019702299342e-06, "loss": 0.3366, "step": 15716 }, { "epoch": 1.5979056527043514, "grad_norm": 0.29398825764656067, "learning_rate": 8.601773560376356e-06, "loss": 0.3773, "step": 15717 }, { "epoch": 1.5980073200488003, "grad_norm": 0.2807632088661194, "learning_rate": 8.601527400308492e-06, "loss": 0.3346, "step": 15718 }, { "epoch": 1.5981089873932492, "grad_norm": 0.27551770210266113, "learning_rate": 8.601281222096987e-06, "loss": 0.384, "step": 15719 }, { "epoch": 1.5982106547376982, "grad_norm": 0.2712375521659851, "learning_rate": 8.601035025743085e-06, "loss": 0.3649, "step": 15720 }, { "epoch": 1.5983123220821471, "grad_norm": 0.2788459062576294, "learning_rate": 8.600788811248022e-06, "loss": 0.4091, "step": 15721 }, { "epoch": 1.598413989426596, "grad_norm": 0.2742537558078766, "learning_rate": 8.600542578613043e-06, "loss": 0.3719, "step": 15722 }, { "epoch": 1.598515656771045, "grad_norm": 0.26976361870765686, "learning_rate": 8.600296327839386e-06, "loss": 0.3482, "step": 15723 }, { "epoch": 1.598617324115494, "grad_norm": 0.2811656892299652, "learning_rate": 8.600050058928291e-06, "loss": 0.3875, "step": 15724 }, { "epoch": 1.5987189914599431, "grad_norm": 0.3073756992816925, "learning_rate": 8.599803771881e-06, "loss": 0.3585, "step": 15725 }, { "epoch": 1.598820658804392, "grad_norm": 0.28818586468696594, "learning_rate": 8.59955746669875e-06, "loss": 0.3877, "step": 15726 }, { "epoch": 1.598922326148841, "grad_norm": 0.280968576669693, "learning_rate": 8.599311143382789e-06, "loss": 0.3841, "step": 15727 }, { "epoch": 1.59902399349329, "grad_norm": 0.2684844136238098, "learning_rate": 8.599064801934352e-06, "loss": 0.3644, "step": 15728 }, { "epoch": 1.5991256608377389, "grad_norm": 0.29913243651390076, "learning_rate": 8.59881844235468e-06, "loss": 0.3879, "step": 15729 }, { "epoch": 1.599227328182188, "grad_norm": 0.293722540140152, "learning_rate": 8.598572064645019e-06, "loss": 0.375, "step": 15730 }, { "epoch": 1.599328995526637, "grad_norm": 0.2831735908985138, "learning_rate": 8.598325668806604e-06, "loss": 0.352, "step": 15731 }, { "epoch": 1.599430662871086, "grad_norm": 0.28792017698287964, "learning_rate": 8.598079254840681e-06, "loss": 0.3669, "step": 15732 }, { "epoch": 1.5995323302155349, "grad_norm": 0.29631543159484863, "learning_rate": 8.59783282274849e-06, "loss": 0.3564, "step": 15733 }, { "epoch": 1.5996339975599838, "grad_norm": 0.28355711698532104, "learning_rate": 8.597586372531272e-06, "loss": 0.3823, "step": 15734 }, { "epoch": 1.5997356649044328, "grad_norm": 0.2692853510379791, "learning_rate": 8.59733990419027e-06, "loss": 0.3637, "step": 15735 }, { "epoch": 1.5998373322488817, "grad_norm": 0.3034537732601166, "learning_rate": 8.597093417726722e-06, "loss": 0.3342, "step": 15736 }, { "epoch": 1.5999389995933306, "grad_norm": 0.2569211721420288, "learning_rate": 8.596846913141874e-06, "loss": 0.3507, "step": 15737 }, { "epoch": 1.6000406669377796, "grad_norm": 0.2663518190383911, "learning_rate": 8.596600390436966e-06, "loss": 0.3411, "step": 15738 }, { "epoch": 1.6001423342822285, "grad_norm": 0.28416767716407776, "learning_rate": 8.59635384961324e-06, "loss": 0.3712, "step": 15739 }, { "epoch": 1.6002440016266775, "grad_norm": 0.2986801564693451, "learning_rate": 8.596107290671936e-06, "loss": 0.3933, "step": 15740 }, { "epoch": 1.6003456689711264, "grad_norm": 0.2909761965274811, "learning_rate": 8.5958607136143e-06, "loss": 0.359, "step": 15741 }, { "epoch": 1.6004473363155753, "grad_norm": 0.27665555477142334, "learning_rate": 8.59561411844157e-06, "loss": 0.3513, "step": 15742 }, { "epoch": 1.6005490036600243, "grad_norm": 0.2853943109512329, "learning_rate": 8.595367505154992e-06, "loss": 0.3788, "step": 15743 }, { "epoch": 1.6006506710044732, "grad_norm": 0.29180654883384705, "learning_rate": 8.595120873755807e-06, "loss": 0.3826, "step": 15744 }, { "epoch": 1.6007523383489222, "grad_norm": 0.2904074788093567, "learning_rate": 8.594874224245258e-06, "loss": 0.3647, "step": 15745 }, { "epoch": 1.6008540056933713, "grad_norm": 0.28897130489349365, "learning_rate": 8.594627556624585e-06, "loss": 0.3704, "step": 15746 }, { "epoch": 1.6009556730378203, "grad_norm": 0.30506858229637146, "learning_rate": 8.594380870895035e-06, "loss": 0.3875, "step": 15747 }, { "epoch": 1.6010573403822692, "grad_norm": 0.2758193612098694, "learning_rate": 8.594134167057846e-06, "loss": 0.3528, "step": 15748 }, { "epoch": 1.6011590077267182, "grad_norm": 0.28817084431648254, "learning_rate": 8.593887445114266e-06, "loss": 0.3766, "step": 15749 }, { "epoch": 1.601260675071167, "grad_norm": 0.2802286744117737, "learning_rate": 8.59364070506553e-06, "loss": 0.3303, "step": 15750 }, { "epoch": 1.6013623424156163, "grad_norm": 0.2834875285625458, "learning_rate": 8.593393946912891e-06, "loss": 0.4074, "step": 15751 }, { "epoch": 1.6014640097600652, "grad_norm": 0.28151723742485046, "learning_rate": 8.593147170657587e-06, "loss": 0.3727, "step": 15752 }, { "epoch": 1.6015656771045141, "grad_norm": 0.27634865045547485, "learning_rate": 8.592900376300858e-06, "loss": 0.3188, "step": 15753 }, { "epoch": 1.601667344448963, "grad_norm": 0.2938311994075775, "learning_rate": 8.592653563843954e-06, "loss": 0.3477, "step": 15754 }, { "epoch": 1.601769011793412, "grad_norm": 0.283268541097641, "learning_rate": 8.592406733288114e-06, "loss": 0.3598, "step": 15755 }, { "epoch": 1.601870679137861, "grad_norm": 0.29933780431747437, "learning_rate": 8.592159884634583e-06, "loss": 0.3964, "step": 15756 }, { "epoch": 1.60197234648231, "grad_norm": 0.2885017991065979, "learning_rate": 8.591913017884603e-06, "loss": 0.3738, "step": 15757 }, { "epoch": 1.6020740138267588, "grad_norm": 0.2991034686565399, "learning_rate": 8.591666133039421e-06, "loss": 0.3635, "step": 15758 }, { "epoch": 1.6021756811712078, "grad_norm": 0.29536598920822144, "learning_rate": 8.591419230100275e-06, "loss": 0.3528, "step": 15759 }, { "epoch": 1.6022773485156567, "grad_norm": 0.2909075915813446, "learning_rate": 8.591172309068414e-06, "loss": 0.3506, "step": 15760 }, { "epoch": 1.6023790158601057, "grad_norm": 0.2706224322319031, "learning_rate": 8.590925369945081e-06, "loss": 0.3691, "step": 15761 }, { "epoch": 1.6024806832045546, "grad_norm": 0.2544962167739868, "learning_rate": 8.590678412731519e-06, "loss": 0.3416, "step": 15762 }, { "epoch": 1.6025823505490036, "grad_norm": 0.30397310853004456, "learning_rate": 8.590431437428973e-06, "loss": 0.3642, "step": 15763 }, { "epoch": 1.6026840178934525, "grad_norm": 0.2724534869194031, "learning_rate": 8.590184444038685e-06, "loss": 0.3443, "step": 15764 }, { "epoch": 1.6027856852379014, "grad_norm": 0.29114657640457153, "learning_rate": 8.589937432561903e-06, "loss": 0.3409, "step": 15765 }, { "epoch": 1.6028873525823506, "grad_norm": 0.31059762835502625, "learning_rate": 8.589690402999868e-06, "loss": 0.3759, "step": 15766 }, { "epoch": 1.6029890199267995, "grad_norm": 0.27594971656799316, "learning_rate": 8.589443355353825e-06, "loss": 0.3384, "step": 15767 }, { "epoch": 1.6030906872712485, "grad_norm": 0.27902624011039734, "learning_rate": 8.589196289625022e-06, "loss": 0.3845, "step": 15768 }, { "epoch": 1.6031923546156974, "grad_norm": 0.29228833317756653, "learning_rate": 8.588949205814699e-06, "loss": 0.3518, "step": 15769 }, { "epoch": 1.6032940219601464, "grad_norm": 0.29511991143226624, "learning_rate": 8.588702103924102e-06, "loss": 0.3348, "step": 15770 }, { "epoch": 1.6033956893045955, "grad_norm": 0.31156694889068604, "learning_rate": 8.588454983954478e-06, "loss": 0.3351, "step": 15771 }, { "epoch": 1.6034973566490445, "grad_norm": 0.3018488585948944, "learning_rate": 8.58820784590707e-06, "loss": 0.3538, "step": 15772 }, { "epoch": 1.6035990239934934, "grad_norm": 0.29773831367492676, "learning_rate": 8.587960689783124e-06, "loss": 0.3528, "step": 15773 }, { "epoch": 1.6037006913379424, "grad_norm": 0.2914672791957855, "learning_rate": 8.587713515583886e-06, "loss": 0.3887, "step": 15774 }, { "epoch": 1.6038023586823913, "grad_norm": 0.32367923855781555, "learning_rate": 8.587466323310598e-06, "loss": 0.3784, "step": 15775 }, { "epoch": 1.6039040260268402, "grad_norm": 0.2743445932865143, "learning_rate": 8.587219112964507e-06, "loss": 0.3729, "step": 15776 }, { "epoch": 1.6040056933712892, "grad_norm": 0.2886735796928406, "learning_rate": 8.586971884546859e-06, "loss": 0.3846, "step": 15777 }, { "epoch": 1.6041073607157381, "grad_norm": 0.2888927161693573, "learning_rate": 8.586724638058899e-06, "loss": 0.3466, "step": 15778 }, { "epoch": 1.604209028060187, "grad_norm": 0.2982579171657562, "learning_rate": 8.586477373501874e-06, "loss": 0.3601, "step": 15779 }, { "epoch": 1.604310695404636, "grad_norm": 0.2900322675704956, "learning_rate": 8.586230090877026e-06, "loss": 0.3411, "step": 15780 }, { "epoch": 1.604412362749085, "grad_norm": 0.28635233640670776, "learning_rate": 8.585982790185604e-06, "loss": 0.375, "step": 15781 }, { "epoch": 1.6045140300935339, "grad_norm": 0.28494614362716675, "learning_rate": 8.585735471428854e-06, "loss": 0.351, "step": 15782 }, { "epoch": 1.6046156974379828, "grad_norm": 0.302104115486145, "learning_rate": 8.58548813460802e-06, "loss": 0.3567, "step": 15783 }, { "epoch": 1.6047173647824318, "grad_norm": 0.28328201174736023, "learning_rate": 8.585240779724348e-06, "loss": 0.3517, "step": 15784 }, { "epoch": 1.6048190321268807, "grad_norm": 0.27734363079071045, "learning_rate": 8.584993406779085e-06, "loss": 0.3323, "step": 15785 }, { "epoch": 1.6049206994713296, "grad_norm": 0.27614453434944153, "learning_rate": 8.584746015773478e-06, "loss": 0.3596, "step": 15786 }, { "epoch": 1.6050223668157788, "grad_norm": 0.2814143896102905, "learning_rate": 8.584498606708772e-06, "loss": 0.3611, "step": 15787 }, { "epoch": 1.6051240341602278, "grad_norm": 0.27870941162109375, "learning_rate": 8.584251179586214e-06, "loss": 0.3727, "step": 15788 }, { "epoch": 1.6052257015046767, "grad_norm": 0.3074091076850891, "learning_rate": 8.58400373440705e-06, "loss": 0.3567, "step": 15789 }, { "epoch": 1.6053273688491256, "grad_norm": 0.28321653604507446, "learning_rate": 8.583756271172528e-06, "loss": 0.3518, "step": 15790 }, { "epoch": 1.6054290361935746, "grad_norm": 0.2918950915336609, "learning_rate": 8.583508789883892e-06, "loss": 0.3636, "step": 15791 }, { "epoch": 1.6055307035380237, "grad_norm": 0.33723464608192444, "learning_rate": 8.58326129054239e-06, "loss": 0.3796, "step": 15792 }, { "epoch": 1.6056323708824727, "grad_norm": 0.29838067293167114, "learning_rate": 8.583013773149271e-06, "loss": 0.3476, "step": 15793 }, { "epoch": 1.6057340382269216, "grad_norm": 0.26080605387687683, "learning_rate": 8.58276623770578e-06, "loss": 0.3613, "step": 15794 }, { "epoch": 1.6058357055713706, "grad_norm": 0.28574061393737793, "learning_rate": 8.582518684213163e-06, "loss": 0.359, "step": 15795 }, { "epoch": 1.6059373729158195, "grad_norm": 0.29453492164611816, "learning_rate": 8.582271112672668e-06, "loss": 0.3819, "step": 15796 }, { "epoch": 1.6060390402602684, "grad_norm": 0.29177504777908325, "learning_rate": 8.582023523085544e-06, "loss": 0.3553, "step": 15797 }, { "epoch": 1.6061407076047174, "grad_norm": 0.27089396119117737, "learning_rate": 8.581775915453034e-06, "loss": 0.3579, "step": 15798 }, { "epoch": 1.6062423749491663, "grad_norm": 0.2957039177417755, "learning_rate": 8.58152828977639e-06, "loss": 0.3698, "step": 15799 }, { "epoch": 1.6063440422936153, "grad_norm": 0.30852675437927246, "learning_rate": 8.581280646056858e-06, "loss": 0.3489, "step": 15800 }, { "epoch": 1.6064457096380642, "grad_norm": 0.33557194471359253, "learning_rate": 8.581032984295682e-06, "loss": 0.4114, "step": 15801 }, { "epoch": 1.6065473769825132, "grad_norm": 0.29746055603027344, "learning_rate": 8.580785304494118e-06, "loss": 0.3473, "step": 15802 }, { "epoch": 1.606649044326962, "grad_norm": 0.2628600299358368, "learning_rate": 8.580537606653406e-06, "loss": 0.3534, "step": 15803 }, { "epoch": 1.606750711671411, "grad_norm": 0.3074256181716919, "learning_rate": 8.580289890774794e-06, "loss": 0.3983, "step": 15804 }, { "epoch": 1.60685237901586, "grad_norm": 0.3145337700843811, "learning_rate": 8.580042156859535e-06, "loss": 0.3671, "step": 15805 }, { "epoch": 1.606954046360309, "grad_norm": 0.2873009443283081, "learning_rate": 8.579794404908875e-06, "loss": 0.3634, "step": 15806 }, { "epoch": 1.607055713704758, "grad_norm": 0.2658672630786896, "learning_rate": 8.579546634924058e-06, "loss": 0.3514, "step": 15807 }, { "epoch": 1.607157381049207, "grad_norm": 0.3046723008155823, "learning_rate": 8.579298846906338e-06, "loss": 0.3624, "step": 15808 }, { "epoch": 1.607259048393656, "grad_norm": 0.30494508147239685, "learning_rate": 8.579051040856963e-06, "loss": 0.3649, "step": 15809 }, { "epoch": 1.607360715738105, "grad_norm": 0.2581736445426941, "learning_rate": 8.578803216777176e-06, "loss": 0.351, "step": 15810 }, { "epoch": 1.6074623830825538, "grad_norm": 0.27369752526283264, "learning_rate": 8.57855537466823e-06, "loss": 0.384, "step": 15811 }, { "epoch": 1.607564050427003, "grad_norm": 0.30384403467178345, "learning_rate": 8.578307514531373e-06, "loss": 0.3917, "step": 15812 }, { "epoch": 1.607665717771452, "grad_norm": 0.2742450535297394, "learning_rate": 8.578059636367852e-06, "loss": 0.3609, "step": 15813 }, { "epoch": 1.607767385115901, "grad_norm": 0.2752346098423004, "learning_rate": 8.577811740178919e-06, "loss": 0.3349, "step": 15814 }, { "epoch": 1.6078690524603498, "grad_norm": 0.30505871772766113, "learning_rate": 8.57756382596582e-06, "loss": 0.3692, "step": 15815 }, { "epoch": 1.6079707198047988, "grad_norm": 0.2933194637298584, "learning_rate": 8.577315893729804e-06, "loss": 0.3576, "step": 15816 }, { "epoch": 1.6080723871492477, "grad_norm": 0.2966380715370178, "learning_rate": 8.57706794347212e-06, "loss": 0.3858, "step": 15817 }, { "epoch": 1.6081740544936967, "grad_norm": 0.29344862699508667, "learning_rate": 8.576819975194017e-06, "loss": 0.3535, "step": 15818 }, { "epoch": 1.6082757218381456, "grad_norm": 0.3138641119003296, "learning_rate": 8.576571988896747e-06, "loss": 0.3728, "step": 15819 }, { "epoch": 1.6083773891825945, "grad_norm": 0.27867087721824646, "learning_rate": 8.576323984581556e-06, "loss": 0.343, "step": 15820 }, { "epoch": 1.6084790565270435, "grad_norm": 0.30170074105262756, "learning_rate": 8.576075962249695e-06, "loss": 0.3666, "step": 15821 }, { "epoch": 1.6085807238714924, "grad_norm": 0.3016473948955536, "learning_rate": 8.575827921902414e-06, "loss": 0.3573, "step": 15822 }, { "epoch": 1.6086823912159414, "grad_norm": 0.319002628326416, "learning_rate": 8.57557986354096e-06, "loss": 0.3513, "step": 15823 }, { "epoch": 1.6087840585603903, "grad_norm": 0.2939247488975525, "learning_rate": 8.575331787166585e-06, "loss": 0.3358, "step": 15824 }, { "epoch": 1.6088857259048392, "grad_norm": 0.2992945611476898, "learning_rate": 8.575083692780538e-06, "loss": 0.3413, "step": 15825 }, { "epoch": 1.6089873932492882, "grad_norm": 0.32076290249824524, "learning_rate": 8.574835580384069e-06, "loss": 0.3635, "step": 15826 }, { "epoch": 1.6090890605937371, "grad_norm": 0.2938394844532013, "learning_rate": 8.574587449978429e-06, "loss": 0.3533, "step": 15827 }, { "epoch": 1.6091907279381863, "grad_norm": 0.3269122242927551, "learning_rate": 8.574339301564865e-06, "loss": 0.3719, "step": 15828 }, { "epoch": 1.6092923952826352, "grad_norm": 0.3316378891468048, "learning_rate": 8.57409113514463e-06, "loss": 0.3798, "step": 15829 }, { "epoch": 1.6093940626270842, "grad_norm": 0.30548006296157837, "learning_rate": 8.573842950718973e-06, "loss": 0.3525, "step": 15830 }, { "epoch": 1.6094957299715331, "grad_norm": 0.332328736782074, "learning_rate": 8.573594748289145e-06, "loss": 0.3516, "step": 15831 }, { "epoch": 1.609597397315982, "grad_norm": 0.27723929286003113, "learning_rate": 8.573346527856395e-06, "loss": 0.378, "step": 15832 }, { "epoch": 1.6096990646604312, "grad_norm": 0.26339057087898254, "learning_rate": 8.573098289421975e-06, "loss": 0.3296, "step": 15833 }, { "epoch": 1.6098007320048802, "grad_norm": 0.29254505038261414, "learning_rate": 8.572850032987134e-06, "loss": 0.3511, "step": 15834 }, { "epoch": 1.609902399349329, "grad_norm": 0.2864939272403717, "learning_rate": 8.572601758553125e-06, "loss": 0.3636, "step": 15835 }, { "epoch": 1.610004066693778, "grad_norm": 0.2608216404914856, "learning_rate": 8.572353466121197e-06, "loss": 0.357, "step": 15836 }, { "epoch": 1.610105734038227, "grad_norm": 0.3015364408493042, "learning_rate": 8.5721051556926e-06, "loss": 0.36, "step": 15837 }, { "epoch": 1.610207401382676, "grad_norm": 0.3029719889163971, "learning_rate": 8.571856827268587e-06, "loss": 0.359, "step": 15838 }, { "epoch": 1.6103090687271249, "grad_norm": 0.28136900067329407, "learning_rate": 8.571608480850407e-06, "loss": 0.391, "step": 15839 }, { "epoch": 1.6104107360715738, "grad_norm": 0.2869836986064911, "learning_rate": 8.571360116439313e-06, "loss": 0.3592, "step": 15840 }, { "epoch": 1.6105124034160228, "grad_norm": 0.26692116260528564, "learning_rate": 8.571111734036557e-06, "loss": 0.3346, "step": 15841 }, { "epoch": 1.6106140707604717, "grad_norm": 0.29682615399360657, "learning_rate": 8.570863333643386e-06, "loss": 0.3397, "step": 15842 }, { "epoch": 1.6107157381049206, "grad_norm": 0.30107808113098145, "learning_rate": 8.570614915261057e-06, "loss": 0.3734, "step": 15843 }, { "epoch": 1.6108174054493696, "grad_norm": 0.28251785039901733, "learning_rate": 8.570366478890816e-06, "loss": 0.3311, "step": 15844 }, { "epoch": 1.6109190727938185, "grad_norm": 0.29815158247947693, "learning_rate": 8.570118024533918e-06, "loss": 0.3619, "step": 15845 }, { "epoch": 1.6110207401382675, "grad_norm": 0.3089359700679779, "learning_rate": 8.569869552191615e-06, "loss": 0.3565, "step": 15846 }, { "epoch": 1.6111224074827164, "grad_norm": 0.2981525659561157, "learning_rate": 8.569621061865156e-06, "loss": 0.3776, "step": 15847 }, { "epoch": 1.6112240748271656, "grad_norm": 0.27512145042419434, "learning_rate": 8.569372553555795e-06, "loss": 0.3697, "step": 15848 }, { "epoch": 1.6113257421716145, "grad_norm": 0.27645817399024963, "learning_rate": 8.56912402726478e-06, "loss": 0.3322, "step": 15849 }, { "epoch": 1.6114274095160634, "grad_norm": 0.3283596932888031, "learning_rate": 8.56887548299337e-06, "loss": 0.4125, "step": 15850 }, { "epoch": 1.6115290768605124, "grad_norm": 0.2944575250148773, "learning_rate": 8.568626920742814e-06, "loss": 0.3538, "step": 15851 }, { "epoch": 1.6116307442049613, "grad_norm": 0.2890952229499817, "learning_rate": 8.568378340514362e-06, "loss": 0.3815, "step": 15852 }, { "epoch": 1.6117324115494105, "grad_norm": 0.27709221839904785, "learning_rate": 8.56812974230927e-06, "loss": 0.3347, "step": 15853 }, { "epoch": 1.6118340788938594, "grad_norm": 0.2813904285430908, "learning_rate": 8.567881126128786e-06, "loss": 0.3611, "step": 15854 }, { "epoch": 1.6119357462383084, "grad_norm": 0.2871745526790619, "learning_rate": 8.567632491974164e-06, "loss": 0.3291, "step": 15855 }, { "epoch": 1.6120374135827573, "grad_norm": 0.26992663741111755, "learning_rate": 8.56738383984666e-06, "loss": 0.3807, "step": 15856 }, { "epoch": 1.6121390809272063, "grad_norm": 0.27950137853622437, "learning_rate": 8.567135169747522e-06, "loss": 0.3578, "step": 15857 }, { "epoch": 1.6122407482716552, "grad_norm": 0.28874367475509644, "learning_rate": 8.566886481678005e-06, "loss": 0.3629, "step": 15858 }, { "epoch": 1.6123424156161041, "grad_norm": 0.28798505663871765, "learning_rate": 8.566637775639362e-06, "loss": 0.3554, "step": 15859 }, { "epoch": 1.612444082960553, "grad_norm": 0.26893964409828186, "learning_rate": 8.566389051632845e-06, "loss": 0.3578, "step": 15860 }, { "epoch": 1.612545750305002, "grad_norm": 0.2890678942203522, "learning_rate": 8.566140309659708e-06, "loss": 0.3887, "step": 15861 }, { "epoch": 1.612647417649451, "grad_norm": 0.2823454439640045, "learning_rate": 8.565891549721204e-06, "loss": 0.369, "step": 15862 }, { "epoch": 1.6127490849939, "grad_norm": 0.281698077917099, "learning_rate": 8.565642771818585e-06, "loss": 0.3444, "step": 15863 }, { "epoch": 1.6128507523383488, "grad_norm": 0.28803905844688416, "learning_rate": 8.565393975953104e-06, "loss": 0.3532, "step": 15864 }, { "epoch": 1.6129524196827978, "grad_norm": 0.28054216504096985, "learning_rate": 8.565145162126016e-06, "loss": 0.3836, "step": 15865 }, { "epoch": 1.6130540870272467, "grad_norm": 0.2830198109149933, "learning_rate": 8.564896330338574e-06, "loss": 0.3436, "step": 15866 }, { "epoch": 1.6131557543716957, "grad_norm": 0.26155686378479004, "learning_rate": 8.564647480592032e-06, "loss": 0.3459, "step": 15867 }, { "epoch": 1.6132574217161446, "grad_norm": 0.28119516372680664, "learning_rate": 8.564398612887643e-06, "loss": 0.4076, "step": 15868 }, { "epoch": 1.6133590890605938, "grad_norm": 0.28245341777801514, "learning_rate": 8.564149727226661e-06, "loss": 0.3521, "step": 15869 }, { "epoch": 1.6134607564050427, "grad_norm": 0.3048584759235382, "learning_rate": 8.563900823610341e-06, "loss": 0.3546, "step": 15870 }, { "epoch": 1.6135624237494917, "grad_norm": 0.2745063304901123, "learning_rate": 8.563651902039933e-06, "loss": 0.3395, "step": 15871 }, { "epoch": 1.6136640910939406, "grad_norm": 0.26159971952438354, "learning_rate": 8.563402962516693e-06, "loss": 0.3842, "step": 15872 }, { "epoch": 1.6137657584383895, "grad_norm": 0.26934143900871277, "learning_rate": 8.563154005041878e-06, "loss": 0.3215, "step": 15873 }, { "epoch": 1.6138674257828387, "grad_norm": 0.2770584523677826, "learning_rate": 8.56290502961674e-06, "loss": 0.3278, "step": 15874 }, { "epoch": 1.6139690931272876, "grad_norm": 0.30114415287971497, "learning_rate": 8.562656036242532e-06, "loss": 0.3665, "step": 15875 }, { "epoch": 1.6140707604717366, "grad_norm": 0.2575727701187134, "learning_rate": 8.56240702492051e-06, "loss": 0.3611, "step": 15876 }, { "epoch": 1.6141724278161855, "grad_norm": 0.27618107199668884, "learning_rate": 8.562157995651927e-06, "loss": 0.3316, "step": 15877 }, { "epoch": 1.6142740951606345, "grad_norm": 0.2856297791004181, "learning_rate": 8.561908948438038e-06, "loss": 0.3747, "step": 15878 }, { "epoch": 1.6143757625050834, "grad_norm": 0.28021538257598877, "learning_rate": 8.5616598832801e-06, "loss": 0.3732, "step": 15879 }, { "epoch": 1.6144774298495324, "grad_norm": 0.286703884601593, "learning_rate": 8.561410800179365e-06, "loss": 0.36, "step": 15880 }, { "epoch": 1.6145790971939813, "grad_norm": 0.2810511291027069, "learning_rate": 8.561161699137088e-06, "loss": 0.3267, "step": 15881 }, { "epoch": 1.6146807645384302, "grad_norm": 0.31096914410591125, "learning_rate": 8.560912580154525e-06, "loss": 0.3755, "step": 15882 }, { "epoch": 1.6147824318828792, "grad_norm": 0.2848150432109833, "learning_rate": 8.560663443232932e-06, "loss": 0.3817, "step": 15883 }, { "epoch": 1.6148840992273281, "grad_norm": 0.2695382237434387, "learning_rate": 8.560414288373561e-06, "loss": 0.3239, "step": 15884 }, { "epoch": 1.614985766571777, "grad_norm": 0.27471452951431274, "learning_rate": 8.560165115577672e-06, "loss": 0.3233, "step": 15885 }, { "epoch": 1.615087433916226, "grad_norm": 0.3027341067790985, "learning_rate": 8.559915924846513e-06, "loss": 0.3704, "step": 15886 }, { "epoch": 1.615189101260675, "grad_norm": 0.2606520652770996, "learning_rate": 8.559666716181347e-06, "loss": 0.3946, "step": 15887 }, { "epoch": 1.6152907686051239, "grad_norm": 0.28129902482032776, "learning_rate": 8.559417489583424e-06, "loss": 0.358, "step": 15888 }, { "epoch": 1.615392435949573, "grad_norm": 0.2694172263145447, "learning_rate": 8.559168245054001e-06, "loss": 0.3663, "step": 15889 }, { "epoch": 1.615494103294022, "grad_norm": 0.30642032623291016, "learning_rate": 8.558918982594336e-06, "loss": 0.3706, "step": 15890 }, { "epoch": 1.615595770638471, "grad_norm": 0.2763260304927826, "learning_rate": 8.558669702205682e-06, "loss": 0.3768, "step": 15891 }, { "epoch": 1.6156974379829199, "grad_norm": 0.28882482647895813, "learning_rate": 8.558420403889296e-06, "loss": 0.3823, "step": 15892 }, { "epoch": 1.6157991053273688, "grad_norm": 0.2952781915664673, "learning_rate": 8.558171087646434e-06, "loss": 0.3395, "step": 15893 }, { "epoch": 1.615900772671818, "grad_norm": 0.27123886346817017, "learning_rate": 8.55792175347835e-06, "loss": 0.3705, "step": 15894 }, { "epoch": 1.616002440016267, "grad_norm": 0.2577010989189148, "learning_rate": 8.557672401386302e-06, "loss": 0.3424, "step": 15895 }, { "epoch": 1.6161041073607159, "grad_norm": 0.26566776633262634, "learning_rate": 8.557423031371547e-06, "loss": 0.4001, "step": 15896 }, { "epoch": 1.6162057747051648, "grad_norm": 0.2746531367301941, "learning_rate": 8.557173643435339e-06, "loss": 0.4066, "step": 15897 }, { "epoch": 1.6163074420496137, "grad_norm": 0.2624882459640503, "learning_rate": 8.556924237578937e-06, "loss": 0.3519, "step": 15898 }, { "epoch": 1.6164091093940627, "grad_norm": 0.28768667578697205, "learning_rate": 8.556674813803595e-06, "loss": 0.3687, "step": 15899 }, { "epoch": 1.6165107767385116, "grad_norm": 0.288308709859848, "learning_rate": 8.556425372110571e-06, "loss": 0.354, "step": 15900 }, { "epoch": 1.6166124440829606, "grad_norm": 0.2957081198692322, "learning_rate": 8.556175912501121e-06, "loss": 0.3405, "step": 15901 }, { "epoch": 1.6167141114274095, "grad_norm": 0.2888247072696686, "learning_rate": 8.5559264349765e-06, "loss": 0.3419, "step": 15902 }, { "epoch": 1.6168157787718584, "grad_norm": 0.2742486596107483, "learning_rate": 8.555676939537969e-06, "loss": 0.4008, "step": 15903 }, { "epoch": 1.6169174461163074, "grad_norm": 0.28536680340766907, "learning_rate": 8.555427426186782e-06, "loss": 0.3655, "step": 15904 }, { "epoch": 1.6170191134607563, "grad_norm": 0.26835545897483826, "learning_rate": 8.555177894924196e-06, "loss": 0.3574, "step": 15905 }, { "epoch": 1.6171207808052053, "grad_norm": 0.2846101224422455, "learning_rate": 8.55492834575147e-06, "loss": 0.3891, "step": 15906 }, { "epoch": 1.6172224481496542, "grad_norm": 0.2701222598552704, "learning_rate": 8.554678778669858e-06, "loss": 0.3282, "step": 15907 }, { "epoch": 1.6173241154941032, "grad_norm": 0.27186131477355957, "learning_rate": 8.55442919368062e-06, "loss": 0.3534, "step": 15908 }, { "epoch": 1.617425782838552, "grad_norm": 0.2968328297138214, "learning_rate": 8.55417959078501e-06, "loss": 0.3187, "step": 15909 }, { "epoch": 1.6175274501830013, "grad_norm": 0.29066288471221924, "learning_rate": 8.553929969984291e-06, "loss": 0.3455, "step": 15910 }, { "epoch": 1.6176291175274502, "grad_norm": 0.2822386622428894, "learning_rate": 8.553680331279716e-06, "loss": 0.337, "step": 15911 }, { "epoch": 1.6177307848718991, "grad_norm": 0.2797449231147766, "learning_rate": 8.553430674672544e-06, "loss": 0.3639, "step": 15912 }, { "epoch": 1.617832452216348, "grad_norm": 0.30695870518684387, "learning_rate": 8.553181000164034e-06, "loss": 0.3643, "step": 15913 }, { "epoch": 1.617934119560797, "grad_norm": 0.301167756319046, "learning_rate": 8.55293130775544e-06, "loss": 0.3368, "step": 15914 }, { "epoch": 1.6180357869052462, "grad_norm": 0.26984021067619324, "learning_rate": 8.552681597448025e-06, "loss": 0.326, "step": 15915 }, { "epoch": 1.6181374542496951, "grad_norm": 0.2735801637172699, "learning_rate": 8.552431869243043e-06, "loss": 0.3278, "step": 15916 }, { "epoch": 1.618239121594144, "grad_norm": 0.3180435001850128, "learning_rate": 8.552182123141753e-06, "loss": 0.3974, "step": 15917 }, { "epoch": 1.618340788938593, "grad_norm": 0.28512200713157654, "learning_rate": 8.551932359145415e-06, "loss": 0.3672, "step": 15918 }, { "epoch": 1.618442456283042, "grad_norm": 0.29207751154899597, "learning_rate": 8.551682577255285e-06, "loss": 0.3977, "step": 15919 }, { "epoch": 1.618544123627491, "grad_norm": 0.275747686624527, "learning_rate": 8.551432777472621e-06, "loss": 0.4052, "step": 15920 }, { "epoch": 1.6186457909719398, "grad_norm": 0.28316283226013184, "learning_rate": 8.551182959798685e-06, "loss": 0.3297, "step": 15921 }, { "epoch": 1.6187474583163888, "grad_norm": 0.2672053873538971, "learning_rate": 8.550933124234732e-06, "loss": 0.3381, "step": 15922 }, { "epoch": 1.6188491256608377, "grad_norm": 0.26935094594955444, "learning_rate": 8.550683270782021e-06, "loss": 0.3464, "step": 15923 }, { "epoch": 1.6189507930052867, "grad_norm": 0.2781962454319, "learning_rate": 8.550433399441812e-06, "loss": 0.3799, "step": 15924 }, { "epoch": 1.6190524603497356, "grad_norm": 0.31943419575691223, "learning_rate": 8.550183510215364e-06, "loss": 0.3594, "step": 15925 }, { "epoch": 1.6191541276941845, "grad_norm": 0.26927781105041504, "learning_rate": 8.549933603103934e-06, "loss": 0.3267, "step": 15926 }, { "epoch": 1.6192557950386335, "grad_norm": 0.2705366015434265, "learning_rate": 8.549683678108783e-06, "loss": 0.3479, "step": 15927 }, { "epoch": 1.6193574623830824, "grad_norm": 0.30995938181877136, "learning_rate": 8.549433735231168e-06, "loss": 0.3763, "step": 15928 }, { "epoch": 1.6194591297275314, "grad_norm": 0.31074249744415283, "learning_rate": 8.549183774472352e-06, "loss": 0.372, "step": 15929 }, { "epoch": 1.6195607970719805, "grad_norm": 0.282478392124176, "learning_rate": 8.548933795833587e-06, "loss": 0.3712, "step": 15930 }, { "epoch": 1.6196624644164295, "grad_norm": 0.2870032489299774, "learning_rate": 8.54868379931614e-06, "loss": 0.3937, "step": 15931 }, { "epoch": 1.6197641317608784, "grad_norm": 0.3188985288143158, "learning_rate": 8.548433784921268e-06, "loss": 0.4051, "step": 15932 }, { "epoch": 1.6198657991053274, "grad_norm": 0.2849908769130707, "learning_rate": 8.548183752650228e-06, "loss": 0.3691, "step": 15933 }, { "epoch": 1.6199674664497763, "grad_norm": 0.2760973572731018, "learning_rate": 8.547933702504283e-06, "loss": 0.3713, "step": 15934 }, { "epoch": 1.6200691337942255, "grad_norm": 0.33282560110092163, "learning_rate": 8.54768363448469e-06, "loss": 0.4052, "step": 15935 }, { "epoch": 1.6201708011386744, "grad_norm": 0.28042712807655334, "learning_rate": 8.54743354859271e-06, "loss": 0.3588, "step": 15936 }, { "epoch": 1.6202724684831233, "grad_norm": 0.29110145568847656, "learning_rate": 8.547183444829603e-06, "loss": 0.3401, "step": 15937 }, { "epoch": 1.6203741358275723, "grad_norm": 0.2835708558559418, "learning_rate": 8.546933323196629e-06, "loss": 0.3755, "step": 15938 }, { "epoch": 1.6204758031720212, "grad_norm": 0.27397483587265015, "learning_rate": 8.546683183695047e-06, "loss": 0.3599, "step": 15939 }, { "epoch": 1.6205774705164702, "grad_norm": 0.2848745584487915, "learning_rate": 8.546433026326118e-06, "loss": 0.3532, "step": 15940 }, { "epoch": 1.620679137860919, "grad_norm": 0.26935335993766785, "learning_rate": 8.546182851091103e-06, "loss": 0.371, "step": 15941 }, { "epoch": 1.620780805205368, "grad_norm": 0.2966611385345459, "learning_rate": 8.54593265799126e-06, "loss": 0.3562, "step": 15942 }, { "epoch": 1.620882472549817, "grad_norm": 0.29342377185821533, "learning_rate": 8.545682447027853e-06, "loss": 0.3562, "step": 15943 }, { "epoch": 1.620984139894266, "grad_norm": 0.2765437662601471, "learning_rate": 8.54543221820214e-06, "loss": 0.3498, "step": 15944 }, { "epoch": 1.6210858072387149, "grad_norm": 0.2777712047100067, "learning_rate": 8.545181971515381e-06, "loss": 0.3563, "step": 15945 }, { "epoch": 1.6211874745831638, "grad_norm": 0.2644233703613281, "learning_rate": 8.54493170696884e-06, "loss": 0.3435, "step": 15946 }, { "epoch": 1.6212891419276128, "grad_norm": 0.27966925501823425, "learning_rate": 8.544681424563772e-06, "loss": 0.352, "step": 15947 }, { "epoch": 1.6213908092720617, "grad_norm": 0.2805918753147125, "learning_rate": 8.544431124301442e-06, "loss": 0.3478, "step": 15948 }, { "epoch": 1.6214924766165106, "grad_norm": 0.31727614998817444, "learning_rate": 8.544180806183112e-06, "loss": 0.352, "step": 15949 }, { "epoch": 1.6215941439609596, "grad_norm": 0.30501052737236023, "learning_rate": 8.54393047021004e-06, "loss": 0.3659, "step": 15950 }, { "epoch": 1.6216958113054087, "grad_norm": 0.2828640341758728, "learning_rate": 8.54368011638349e-06, "loss": 0.3635, "step": 15951 }, { "epoch": 1.6217974786498577, "grad_norm": 0.28224799036979675, "learning_rate": 8.54342974470472e-06, "loss": 0.3428, "step": 15952 }, { "epoch": 1.6218991459943066, "grad_norm": 0.2644314467906952, "learning_rate": 8.543179355174993e-06, "loss": 0.3246, "step": 15953 }, { "epoch": 1.6220008133387556, "grad_norm": 0.27601444721221924, "learning_rate": 8.542928947795572e-06, "loss": 0.3552, "step": 15954 }, { "epoch": 1.6221024806832045, "grad_norm": 0.28889545798301697, "learning_rate": 8.542678522567716e-06, "loss": 0.355, "step": 15955 }, { "epoch": 1.6222041480276537, "grad_norm": 0.2623271644115448, "learning_rate": 8.542428079492687e-06, "loss": 0.3581, "step": 15956 }, { "epoch": 1.6223058153721026, "grad_norm": 0.2735211253166199, "learning_rate": 8.542177618571747e-06, "loss": 0.332, "step": 15957 }, { "epoch": 1.6224074827165516, "grad_norm": 0.2895454168319702, "learning_rate": 8.541927139806158e-06, "loss": 0.344, "step": 15958 }, { "epoch": 1.6225091500610005, "grad_norm": 0.2626703083515167, "learning_rate": 8.541676643197183e-06, "loss": 0.3415, "step": 15959 }, { "epoch": 1.6226108174054494, "grad_norm": 0.2659010887145996, "learning_rate": 8.541426128746083e-06, "loss": 0.3745, "step": 15960 }, { "epoch": 1.6227124847498984, "grad_norm": 0.27963075041770935, "learning_rate": 8.541175596454119e-06, "loss": 0.366, "step": 15961 }, { "epoch": 1.6228141520943473, "grad_norm": 0.2974802255630493, "learning_rate": 8.540925046322553e-06, "loss": 0.3579, "step": 15962 }, { "epoch": 1.6229158194387963, "grad_norm": 0.2657853364944458, "learning_rate": 8.540674478352648e-06, "loss": 0.3634, "step": 15963 }, { "epoch": 1.6230174867832452, "grad_norm": 0.2653054893016815, "learning_rate": 8.540423892545667e-06, "loss": 0.3793, "step": 15964 }, { "epoch": 1.6231191541276941, "grad_norm": 0.28537431359291077, "learning_rate": 8.540173288902872e-06, "loss": 0.3678, "step": 15965 }, { "epoch": 1.623220821472143, "grad_norm": 0.2736341059207916, "learning_rate": 8.539922667425524e-06, "loss": 0.3484, "step": 15966 }, { "epoch": 1.623322488816592, "grad_norm": 0.27279672026634216, "learning_rate": 8.53967202811489e-06, "loss": 0.3484, "step": 15967 }, { "epoch": 1.623424156161041, "grad_norm": 0.29688310623168945, "learning_rate": 8.539421370972225e-06, "loss": 0.3701, "step": 15968 }, { "epoch": 1.62352582350549, "grad_norm": 0.26694220304489136, "learning_rate": 8.5391706959988e-06, "loss": 0.3535, "step": 15969 }, { "epoch": 1.6236274908499388, "grad_norm": 0.2714281678199768, "learning_rate": 8.538920003195872e-06, "loss": 0.3613, "step": 15970 }, { "epoch": 1.623729158194388, "grad_norm": 0.26828086376190186, "learning_rate": 8.538669292564708e-06, "loss": 0.3755, "step": 15971 }, { "epoch": 1.623830825538837, "grad_norm": 0.2752322852611542, "learning_rate": 8.538418564106567e-06, "loss": 0.3763, "step": 15972 }, { "epoch": 1.623932492883286, "grad_norm": 0.2902398407459259, "learning_rate": 8.538167817822713e-06, "loss": 0.3608, "step": 15973 }, { "epoch": 1.6240341602277348, "grad_norm": 0.26337292790412903, "learning_rate": 8.537917053714413e-06, "loss": 0.3395, "step": 15974 }, { "epoch": 1.6241358275721838, "grad_norm": 0.28006651997566223, "learning_rate": 8.537666271782927e-06, "loss": 0.357, "step": 15975 }, { "epoch": 1.624237494916633, "grad_norm": 0.2790890038013458, "learning_rate": 8.537415472029518e-06, "loss": 0.3742, "step": 15976 }, { "epoch": 1.6243391622610819, "grad_norm": 0.26581764221191406, "learning_rate": 8.537164654455451e-06, "loss": 0.3555, "step": 15977 }, { "epoch": 1.6244408296055308, "grad_norm": 0.28272831439971924, "learning_rate": 8.53691381906199e-06, "loss": 0.3623, "step": 15978 }, { "epoch": 1.6245424969499798, "grad_norm": 0.2611547112464905, "learning_rate": 8.536662965850398e-06, "loss": 0.3419, "step": 15979 }, { "epoch": 1.6246441642944287, "grad_norm": 0.29764869809150696, "learning_rate": 8.536412094821935e-06, "loss": 0.3835, "step": 15980 }, { "epoch": 1.6247458316388776, "grad_norm": 0.29649507999420166, "learning_rate": 8.53616120597787e-06, "loss": 0.396, "step": 15981 }, { "epoch": 1.6248474989833266, "grad_norm": 0.28538721799850464, "learning_rate": 8.535910299319468e-06, "loss": 0.3845, "step": 15982 }, { "epoch": 1.6249491663277755, "grad_norm": 0.284986287355423, "learning_rate": 8.535659374847986e-06, "loss": 0.3458, "step": 15983 }, { "epoch": 1.6250508336722245, "grad_norm": 0.2913966476917267, "learning_rate": 8.535408432564695e-06, "loss": 0.3739, "step": 15984 }, { "epoch": 1.6251525010166734, "grad_norm": 0.2630612254142761, "learning_rate": 8.535157472470856e-06, "loss": 0.3484, "step": 15985 }, { "epoch": 1.6252541683611224, "grad_norm": 0.2815515697002411, "learning_rate": 8.534906494567733e-06, "loss": 0.3881, "step": 15986 }, { "epoch": 1.6253558357055713, "grad_norm": 0.28894415497779846, "learning_rate": 8.534655498856592e-06, "loss": 0.3683, "step": 15987 }, { "epoch": 1.6254575030500202, "grad_norm": 0.28847432136535645, "learning_rate": 8.534404485338695e-06, "loss": 0.3584, "step": 15988 }, { "epoch": 1.6255591703944692, "grad_norm": 0.3042430579662323, "learning_rate": 8.53415345401531e-06, "loss": 0.3924, "step": 15989 }, { "epoch": 1.6256608377389181, "grad_norm": 0.2847576439380646, "learning_rate": 8.533902404887699e-06, "loss": 0.3846, "step": 15990 }, { "epoch": 1.625762505083367, "grad_norm": 0.29012390971183777, "learning_rate": 8.533651337957127e-06, "loss": 0.3758, "step": 15991 }, { "epoch": 1.6258641724278162, "grad_norm": 0.3005511164665222, "learning_rate": 8.533400253224859e-06, "loss": 0.357, "step": 15992 }, { "epoch": 1.6259658397722652, "grad_norm": 0.27073749899864197, "learning_rate": 8.533149150692161e-06, "loss": 0.3145, "step": 15993 }, { "epoch": 1.626067507116714, "grad_norm": 0.31419438123703003, "learning_rate": 8.532898030360297e-06, "loss": 0.3512, "step": 15994 }, { "epoch": 1.626169174461163, "grad_norm": 0.2786407470703125, "learning_rate": 8.532646892230532e-06, "loss": 0.3454, "step": 15995 }, { "epoch": 1.626270841805612, "grad_norm": 0.2995870113372803, "learning_rate": 8.532395736304133e-06, "loss": 0.3688, "step": 15996 }, { "epoch": 1.6263725091500612, "grad_norm": 0.2868025302886963, "learning_rate": 8.532144562582361e-06, "loss": 0.3915, "step": 15997 }, { "epoch": 1.62647417649451, "grad_norm": 0.2851201891899109, "learning_rate": 8.531893371066487e-06, "loss": 0.4004, "step": 15998 }, { "epoch": 1.626575843838959, "grad_norm": 0.27112141251564026, "learning_rate": 8.53164216175777e-06, "loss": 0.3508, "step": 15999 }, { "epoch": 1.626677511183408, "grad_norm": 0.28576332330703735, "learning_rate": 8.531390934657483e-06, "loss": 0.3583, "step": 16000 }, { "epoch": 1.626779178527857, "grad_norm": 0.31408753991127014, "learning_rate": 8.531139689766885e-06, "loss": 0.3809, "step": 16001 }, { "epoch": 1.6268808458723059, "grad_norm": 0.25804927945137024, "learning_rate": 8.530888427087245e-06, "loss": 0.3632, "step": 16002 }, { "epoch": 1.6269825132167548, "grad_norm": 0.2728060781955719, "learning_rate": 8.530637146619826e-06, "loss": 0.3322, "step": 16003 }, { "epoch": 1.6270841805612037, "grad_norm": 0.270108163356781, "learning_rate": 8.5303858483659e-06, "loss": 0.3441, "step": 16004 }, { "epoch": 1.6271858479056527, "grad_norm": 0.27104026079177856, "learning_rate": 8.530134532326727e-06, "loss": 0.3701, "step": 16005 }, { "epoch": 1.6272875152501016, "grad_norm": 0.28176334500312805, "learning_rate": 8.529883198503575e-06, "loss": 0.3618, "step": 16006 }, { "epoch": 1.6273891825945506, "grad_norm": 0.27915826439857483, "learning_rate": 8.529631846897712e-06, "loss": 0.3584, "step": 16007 }, { "epoch": 1.6274908499389995, "grad_norm": 0.2801044285297394, "learning_rate": 8.529380477510401e-06, "loss": 0.3473, "step": 16008 }, { "epoch": 1.6275925172834484, "grad_norm": 0.300553560256958, "learning_rate": 8.52912909034291e-06, "loss": 0.3572, "step": 16009 }, { "epoch": 1.6276941846278974, "grad_norm": 0.2728651165962219, "learning_rate": 8.528877685396504e-06, "loss": 0.3618, "step": 16010 }, { "epoch": 1.6277958519723463, "grad_norm": 0.2570415735244751, "learning_rate": 8.528626262672453e-06, "loss": 0.335, "step": 16011 }, { "epoch": 1.6278975193167955, "grad_norm": 0.26281794905662537, "learning_rate": 8.52837482217202e-06, "loss": 0.3702, "step": 16012 }, { "epoch": 1.6279991866612444, "grad_norm": 0.2807271182537079, "learning_rate": 8.528123363896474e-06, "loss": 0.3257, "step": 16013 }, { "epoch": 1.6281008540056934, "grad_norm": 0.2826745808124542, "learning_rate": 8.527871887847082e-06, "loss": 0.3513, "step": 16014 }, { "epoch": 1.6282025213501423, "grad_norm": 0.2911592423915863, "learning_rate": 8.527620394025106e-06, "loss": 0.4105, "step": 16015 }, { "epoch": 1.6283041886945913, "grad_norm": 0.2782666087150574, "learning_rate": 8.52736888243182e-06, "loss": 0.3812, "step": 16016 }, { "epoch": 1.6284058560390404, "grad_norm": 0.2669074237346649, "learning_rate": 8.527117353068487e-06, "loss": 0.3716, "step": 16017 }, { "epoch": 1.6285075233834894, "grad_norm": 0.2747644782066345, "learning_rate": 8.526865805936375e-06, "loss": 0.3636, "step": 16018 }, { "epoch": 1.6286091907279383, "grad_norm": 0.2786441743373871, "learning_rate": 8.52661424103675e-06, "loss": 0.3455, "step": 16019 }, { "epoch": 1.6287108580723872, "grad_norm": 0.2657904624938965, "learning_rate": 8.526362658370883e-06, "loss": 0.3566, "step": 16020 }, { "epoch": 1.6288125254168362, "grad_norm": 0.27342721819877625, "learning_rate": 8.526111057940038e-06, "loss": 0.3666, "step": 16021 }, { "epoch": 1.6289141927612851, "grad_norm": 0.29112955927848816, "learning_rate": 8.525859439745483e-06, "loss": 0.3558, "step": 16022 }, { "epoch": 1.629015860105734, "grad_norm": 0.2697511911392212, "learning_rate": 8.525607803788486e-06, "loss": 0.3795, "step": 16023 }, { "epoch": 1.629117527450183, "grad_norm": 0.2586418092250824, "learning_rate": 8.525356150070314e-06, "loss": 0.3515, "step": 16024 }, { "epoch": 1.629219194794632, "grad_norm": 0.2908184826374054, "learning_rate": 8.525104478592236e-06, "loss": 0.3686, "step": 16025 }, { "epoch": 1.629320862139081, "grad_norm": 0.3051476776599884, "learning_rate": 8.52485278935552e-06, "loss": 0.3585, "step": 16026 }, { "epoch": 1.6294225294835298, "grad_norm": 0.2871970236301422, "learning_rate": 8.524601082361432e-06, "loss": 0.3553, "step": 16027 }, { "epoch": 1.6295241968279788, "grad_norm": 0.2918492555618286, "learning_rate": 8.524349357611242e-06, "loss": 0.3408, "step": 16028 }, { "epoch": 1.6296258641724277, "grad_norm": 0.3154577910900116, "learning_rate": 8.524097615106218e-06, "loss": 0.3633, "step": 16029 }, { "epoch": 1.6297275315168767, "grad_norm": 0.24535052478313446, "learning_rate": 8.523845854847627e-06, "loss": 0.3751, "step": 16030 }, { "epoch": 1.6298291988613256, "grad_norm": 0.3173307776451111, "learning_rate": 8.523594076836738e-06, "loss": 0.398, "step": 16031 }, { "epoch": 1.6299308662057745, "grad_norm": 0.2914191484451294, "learning_rate": 8.52334228107482e-06, "loss": 0.3162, "step": 16032 }, { "epoch": 1.6300325335502237, "grad_norm": 0.2574801445007324, "learning_rate": 8.52309046756314e-06, "loss": 0.3634, "step": 16033 }, { "epoch": 1.6301342008946726, "grad_norm": 0.304109662771225, "learning_rate": 8.522838636302968e-06, "loss": 0.3509, "step": 16034 }, { "epoch": 1.6302358682391216, "grad_norm": 0.28728538751602173, "learning_rate": 8.522586787295571e-06, "loss": 0.3709, "step": 16035 }, { "epoch": 1.6303375355835705, "grad_norm": 0.2782488465309143, "learning_rate": 8.522334920542222e-06, "loss": 0.391, "step": 16036 }, { "epoch": 1.6304392029280195, "grad_norm": 0.3001369535923004, "learning_rate": 8.522083036044184e-06, "loss": 0.3619, "step": 16037 }, { "epoch": 1.6305408702724686, "grad_norm": 0.28693559765815735, "learning_rate": 8.521831133802729e-06, "loss": 0.3558, "step": 16038 }, { "epoch": 1.6306425376169176, "grad_norm": 0.2691379189491272, "learning_rate": 8.521579213819125e-06, "loss": 0.3732, "step": 16039 }, { "epoch": 1.6307442049613665, "grad_norm": 0.3068828284740448, "learning_rate": 8.521327276094643e-06, "loss": 0.3678, "step": 16040 }, { "epoch": 1.6308458723058155, "grad_norm": 0.3112834393978119, "learning_rate": 8.52107532063055e-06, "loss": 0.3885, "step": 16041 }, { "epoch": 1.6309475396502644, "grad_norm": 0.27087682485580444, "learning_rate": 8.520823347428118e-06, "loss": 0.3575, "step": 16042 }, { "epoch": 1.6310492069947133, "grad_norm": 0.29587307572364807, "learning_rate": 8.520571356488612e-06, "loss": 0.3402, "step": 16043 }, { "epoch": 1.6311508743391623, "grad_norm": 0.29368457198143005, "learning_rate": 8.520319347813306e-06, "loss": 0.3741, "step": 16044 }, { "epoch": 1.6312525416836112, "grad_norm": 0.30083057284355164, "learning_rate": 8.520067321403467e-06, "loss": 0.3277, "step": 16045 }, { "epoch": 1.6313542090280602, "grad_norm": 0.2931582033634186, "learning_rate": 8.519815277260366e-06, "loss": 0.3715, "step": 16046 }, { "epoch": 1.631455876372509, "grad_norm": 0.28973594307899475, "learning_rate": 8.519563215385272e-06, "loss": 0.3851, "step": 16047 }, { "epoch": 1.631557543716958, "grad_norm": 0.3007747232913971, "learning_rate": 8.519311135779453e-06, "loss": 0.3551, "step": 16048 }, { "epoch": 1.631659211061407, "grad_norm": 0.28944531083106995, "learning_rate": 8.519059038444183e-06, "loss": 0.3833, "step": 16049 }, { "epoch": 1.631760878405856, "grad_norm": 0.32498660683631897, "learning_rate": 8.51880692338073e-06, "loss": 0.3321, "step": 16050 }, { "epoch": 1.6318625457503049, "grad_norm": 0.2842096984386444, "learning_rate": 8.518554790590364e-06, "loss": 0.374, "step": 16051 }, { "epoch": 1.6319642130947538, "grad_norm": 0.28840044140815735, "learning_rate": 8.518302640074353e-06, "loss": 0.3851, "step": 16052 }, { "epoch": 1.632065880439203, "grad_norm": 0.293876051902771, "learning_rate": 8.518050471833971e-06, "loss": 0.3738, "step": 16053 }, { "epoch": 1.632167547783652, "grad_norm": 0.30732062458992004, "learning_rate": 8.517798285870487e-06, "loss": 0.3828, "step": 16054 }, { "epoch": 1.6322692151281009, "grad_norm": 0.2955694794654846, "learning_rate": 8.51754608218517e-06, "loss": 0.3765, "step": 16055 }, { "epoch": 1.6323708824725498, "grad_norm": 0.2729301154613495, "learning_rate": 8.517293860779292e-06, "loss": 0.3826, "step": 16056 }, { "epoch": 1.6324725498169987, "grad_norm": 0.30933359265327454, "learning_rate": 8.517041621654123e-06, "loss": 0.3575, "step": 16057 }, { "epoch": 1.632574217161448, "grad_norm": 0.3267904818058014, "learning_rate": 8.516789364810935e-06, "loss": 0.3647, "step": 16058 }, { "epoch": 1.6326758845058968, "grad_norm": 0.2974923253059387, "learning_rate": 8.516537090250998e-06, "loss": 0.3924, "step": 16059 }, { "epoch": 1.6327775518503458, "grad_norm": 0.30986735224723816, "learning_rate": 8.516284797975582e-06, "loss": 0.3322, "step": 16060 }, { "epoch": 1.6328792191947947, "grad_norm": 0.2883044481277466, "learning_rate": 8.516032487985959e-06, "loss": 0.3481, "step": 16061 }, { "epoch": 1.6329808865392437, "grad_norm": 0.29035109281539917, "learning_rate": 8.5157801602834e-06, "loss": 0.3576, "step": 16062 }, { "epoch": 1.6330825538836926, "grad_norm": 0.30424773693084717, "learning_rate": 8.515527814869177e-06, "loss": 0.371, "step": 16063 }, { "epoch": 1.6331842212281416, "grad_norm": 0.2928328216075897, "learning_rate": 8.51527545174456e-06, "loss": 0.3663, "step": 16064 }, { "epoch": 1.6332858885725905, "grad_norm": 0.26358240842819214, "learning_rate": 8.51502307091082e-06, "loss": 0.3524, "step": 16065 }, { "epoch": 1.6333875559170394, "grad_norm": 0.2831067442893982, "learning_rate": 8.514770672369227e-06, "loss": 0.332, "step": 16066 }, { "epoch": 1.6334892232614884, "grad_norm": 0.28124603629112244, "learning_rate": 8.514518256121058e-06, "loss": 0.3576, "step": 16067 }, { "epoch": 1.6335908906059373, "grad_norm": 0.2642160654067993, "learning_rate": 8.51426582216758e-06, "loss": 0.3503, "step": 16068 }, { "epoch": 1.6336925579503863, "grad_norm": 0.30875322222709656, "learning_rate": 8.514013370510066e-06, "loss": 0.3747, "step": 16069 }, { "epoch": 1.6337942252948352, "grad_norm": 0.27531296014785767, "learning_rate": 8.513760901149786e-06, "loss": 0.3303, "step": 16070 }, { "epoch": 1.6338958926392841, "grad_norm": 0.27395281195640564, "learning_rate": 8.513508414088017e-06, "loss": 0.3543, "step": 16071 }, { "epoch": 1.633997559983733, "grad_norm": 0.2863098084926605, "learning_rate": 8.513255909326024e-06, "loss": 0.3353, "step": 16072 }, { "epoch": 1.634099227328182, "grad_norm": 0.2865888476371765, "learning_rate": 8.513003386865085e-06, "loss": 0.3649, "step": 16073 }, { "epoch": 1.6342008946726312, "grad_norm": 0.2937527596950531, "learning_rate": 8.512750846706468e-06, "loss": 0.3591, "step": 16074 }, { "epoch": 1.6343025620170801, "grad_norm": 0.2605798840522766, "learning_rate": 8.512498288851448e-06, "loss": 0.3441, "step": 16075 }, { "epoch": 1.634404229361529, "grad_norm": 0.3024041950702667, "learning_rate": 8.512245713301298e-06, "loss": 0.3749, "step": 16076 }, { "epoch": 1.634505896705978, "grad_norm": 0.2792051434516907, "learning_rate": 8.511993120057286e-06, "loss": 0.3393, "step": 16077 }, { "epoch": 1.634607564050427, "grad_norm": 0.3112941086292267, "learning_rate": 8.511740509120687e-06, "loss": 0.3879, "step": 16078 }, { "epoch": 1.6347092313948761, "grad_norm": 0.27953264117240906, "learning_rate": 8.511487880492777e-06, "loss": 0.3226, "step": 16079 }, { "epoch": 1.634810898739325, "grad_norm": 0.25987765192985535, "learning_rate": 8.511235234174821e-06, "loss": 0.3601, "step": 16080 }, { "epoch": 1.634912566083774, "grad_norm": 0.26303863525390625, "learning_rate": 8.5109825701681e-06, "loss": 0.3909, "step": 16081 }, { "epoch": 1.635014233428223, "grad_norm": 0.28855380415916443, "learning_rate": 8.510729888473882e-06, "loss": 0.3603, "step": 16082 }, { "epoch": 1.6351159007726719, "grad_norm": 0.3049769103527069, "learning_rate": 8.510477189093442e-06, "loss": 0.3356, "step": 16083 }, { "epoch": 1.6352175681171208, "grad_norm": 0.2829738259315491, "learning_rate": 8.510224472028052e-06, "loss": 0.322, "step": 16084 }, { "epoch": 1.6353192354615698, "grad_norm": 0.2990485727787018, "learning_rate": 8.509971737278984e-06, "loss": 0.373, "step": 16085 }, { "epoch": 1.6354209028060187, "grad_norm": 0.2710760831832886, "learning_rate": 8.509718984847512e-06, "loss": 0.3404, "step": 16086 }, { "epoch": 1.6355225701504676, "grad_norm": 0.2828955054283142, "learning_rate": 8.509466214734911e-06, "loss": 0.3272, "step": 16087 }, { "epoch": 1.6356242374949166, "grad_norm": 0.2977502644062042, "learning_rate": 8.509213426942453e-06, "loss": 0.3613, "step": 16088 }, { "epoch": 1.6357259048393655, "grad_norm": 0.2687395215034485, "learning_rate": 8.508960621471412e-06, "loss": 0.3067, "step": 16089 }, { "epoch": 1.6358275721838145, "grad_norm": 0.2843773066997528, "learning_rate": 8.508707798323062e-06, "loss": 0.376, "step": 16090 }, { "epoch": 1.6359292395282634, "grad_norm": 0.2948243021965027, "learning_rate": 8.508454957498674e-06, "loss": 0.3839, "step": 16091 }, { "epoch": 1.6360309068727124, "grad_norm": 0.2896985411643982, "learning_rate": 8.508202098999523e-06, "loss": 0.3547, "step": 16092 }, { "epoch": 1.6361325742171613, "grad_norm": 0.287704735994339, "learning_rate": 8.507949222826887e-06, "loss": 0.3827, "step": 16093 }, { "epoch": 1.6362342415616105, "grad_norm": 0.2817264199256897, "learning_rate": 8.507696328982034e-06, "loss": 0.3895, "step": 16094 }, { "epoch": 1.6363359089060594, "grad_norm": 0.2905123829841614, "learning_rate": 8.50744341746624e-06, "loss": 0.3765, "step": 16095 }, { "epoch": 1.6364375762505083, "grad_norm": 0.3041483461856842, "learning_rate": 8.50719048828078e-06, "loss": 0.3179, "step": 16096 }, { "epoch": 1.6365392435949573, "grad_norm": 0.41706743836402893, "learning_rate": 8.506937541426929e-06, "loss": 0.3837, "step": 16097 }, { "epoch": 1.6366409109394062, "grad_norm": 0.2634100615978241, "learning_rate": 8.506684576905959e-06, "loss": 0.3289, "step": 16098 }, { "epoch": 1.6367425782838554, "grad_norm": 0.2755894064903259, "learning_rate": 8.506431594719148e-06, "loss": 0.3454, "step": 16099 }, { "epoch": 1.6368442456283043, "grad_norm": 0.2856599986553192, "learning_rate": 8.506178594867764e-06, "loss": 0.346, "step": 16100 }, { "epoch": 1.6369459129727533, "grad_norm": 0.27254992723464966, "learning_rate": 8.505925577353087e-06, "loss": 0.3589, "step": 16101 }, { "epoch": 1.6370475803172022, "grad_norm": 0.28056132793426514, "learning_rate": 8.50567254217639e-06, "loss": 0.3244, "step": 16102 }, { "epoch": 1.6371492476616512, "grad_norm": 0.2737063467502594, "learning_rate": 8.505419489338948e-06, "loss": 0.3856, "step": 16103 }, { "epoch": 1.6372509150061, "grad_norm": 0.2709961533546448, "learning_rate": 8.505166418842034e-06, "loss": 0.3444, "step": 16104 }, { "epoch": 1.637352582350549, "grad_norm": 0.30092379450798035, "learning_rate": 8.504913330686927e-06, "loss": 0.3599, "step": 16105 }, { "epoch": 1.637454249694998, "grad_norm": 0.2688547670841217, "learning_rate": 8.504660224874898e-06, "loss": 0.378, "step": 16106 }, { "epoch": 1.637555917039447, "grad_norm": 0.274853378534317, "learning_rate": 8.504407101407225e-06, "loss": 0.3558, "step": 16107 }, { "epoch": 1.6376575843838959, "grad_norm": 0.30612388253211975, "learning_rate": 8.504153960285181e-06, "loss": 0.3948, "step": 16108 }, { "epoch": 1.6377592517283448, "grad_norm": 0.2652224004268646, "learning_rate": 8.503900801510042e-06, "loss": 0.3468, "step": 16109 }, { "epoch": 1.6378609190727937, "grad_norm": 0.2506941854953766, "learning_rate": 8.503647625083083e-06, "loss": 0.3547, "step": 16110 }, { "epoch": 1.6379625864172427, "grad_norm": 0.30296576023101807, "learning_rate": 8.50339443100558e-06, "loss": 0.3653, "step": 16111 }, { "epoch": 1.6380642537616916, "grad_norm": 0.2693122923374176, "learning_rate": 8.503141219278809e-06, "loss": 0.3604, "step": 16112 }, { "epoch": 1.6381659211061406, "grad_norm": 0.29833662509918213, "learning_rate": 8.502887989904045e-06, "loss": 0.3524, "step": 16113 }, { "epoch": 1.6382675884505895, "grad_norm": 0.2715514898300171, "learning_rate": 8.502634742882564e-06, "loss": 0.3464, "step": 16114 }, { "epoch": 1.6383692557950387, "grad_norm": 0.2889033555984497, "learning_rate": 8.50238147821564e-06, "loss": 0.3468, "step": 16115 }, { "epoch": 1.6384709231394876, "grad_norm": 0.26639002561569214, "learning_rate": 8.502128195904553e-06, "loss": 0.3238, "step": 16116 }, { "epoch": 1.6385725904839366, "grad_norm": 0.26692134141921997, "learning_rate": 8.501874895950573e-06, "loss": 0.3691, "step": 16117 }, { "epoch": 1.6386742578283855, "grad_norm": 0.27369967103004456, "learning_rate": 8.501621578354982e-06, "loss": 0.3552, "step": 16118 }, { "epoch": 1.6387759251728344, "grad_norm": 0.26911014318466187, "learning_rate": 8.501368243119053e-06, "loss": 0.3687, "step": 16119 }, { "epoch": 1.6388775925172836, "grad_norm": 0.28118160367012024, "learning_rate": 8.501114890244063e-06, "loss": 0.3613, "step": 16120 }, { "epoch": 1.6389792598617325, "grad_norm": 0.2716801166534424, "learning_rate": 8.500861519731287e-06, "loss": 0.3337, "step": 16121 }, { "epoch": 1.6390809272061815, "grad_norm": 0.26715323328971863, "learning_rate": 8.500608131582003e-06, "loss": 0.3751, "step": 16122 }, { "epoch": 1.6391825945506304, "grad_norm": 0.2599294185638428, "learning_rate": 8.500354725797489e-06, "loss": 0.3541, "step": 16123 }, { "epoch": 1.6392842618950794, "grad_norm": 0.25589239597320557, "learning_rate": 8.500101302379018e-06, "loss": 0.3522, "step": 16124 }, { "epoch": 1.6393859292395283, "grad_norm": 0.2481653243303299, "learning_rate": 8.499847861327868e-06, "loss": 0.3434, "step": 16125 }, { "epoch": 1.6394875965839772, "grad_norm": 0.30661314725875854, "learning_rate": 8.499594402645317e-06, "loss": 0.371, "step": 16126 }, { "epoch": 1.6395892639284262, "grad_norm": 0.2903357446193695, "learning_rate": 8.49934092633264e-06, "loss": 0.3915, "step": 16127 }, { "epoch": 1.6396909312728751, "grad_norm": 0.2972245514392853, "learning_rate": 8.499087432391116e-06, "loss": 0.3583, "step": 16128 }, { "epoch": 1.639792598617324, "grad_norm": 0.29100537300109863, "learning_rate": 8.49883392082202e-06, "loss": 0.3436, "step": 16129 }, { "epoch": 1.639894265961773, "grad_norm": 0.2937854528427124, "learning_rate": 8.498580391626632e-06, "loss": 0.3672, "step": 16130 }, { "epoch": 1.639995933306222, "grad_norm": 0.3074907660484314, "learning_rate": 8.498326844806225e-06, "loss": 0.3766, "step": 16131 }, { "epoch": 1.640097600650671, "grad_norm": 0.26543745398521423, "learning_rate": 8.498073280362082e-06, "loss": 0.3379, "step": 16132 }, { "epoch": 1.6401992679951198, "grad_norm": 0.2931617498397827, "learning_rate": 8.497819698295475e-06, "loss": 0.3964, "step": 16133 }, { "epoch": 1.6403009353395688, "grad_norm": 0.290569931268692, "learning_rate": 8.497566098607683e-06, "loss": 0.3614, "step": 16134 }, { "epoch": 1.640402602684018, "grad_norm": 0.2799844741821289, "learning_rate": 8.497312481299986e-06, "loss": 0.3693, "step": 16135 }, { "epoch": 1.6405042700284669, "grad_norm": 0.2870196998119354, "learning_rate": 8.497058846373658e-06, "loss": 0.3764, "step": 16136 }, { "epoch": 1.6406059373729158, "grad_norm": 0.3035157322883606, "learning_rate": 8.496805193829978e-06, "loss": 0.3959, "step": 16137 }, { "epoch": 1.6407076047173648, "grad_norm": 0.28402215242385864, "learning_rate": 8.496551523670227e-06, "loss": 0.369, "step": 16138 }, { "epoch": 1.6408092720618137, "grad_norm": 0.2674991190433502, "learning_rate": 8.496297835895679e-06, "loss": 0.3605, "step": 16139 }, { "epoch": 1.6409109394062629, "grad_norm": 0.288068026304245, "learning_rate": 8.496044130507613e-06, "loss": 0.3741, "step": 16140 }, { "epoch": 1.6410126067507118, "grad_norm": 0.2892187237739563, "learning_rate": 8.495790407507307e-06, "loss": 0.3491, "step": 16141 }, { "epoch": 1.6411142740951608, "grad_norm": 0.28227922320365906, "learning_rate": 8.49553666689604e-06, "loss": 0.3827, "step": 16142 }, { "epoch": 1.6412159414396097, "grad_norm": 0.2855263352394104, "learning_rate": 8.495282908675091e-06, "loss": 0.3976, "step": 16143 }, { "epoch": 1.6413176087840586, "grad_norm": 0.2876409590244293, "learning_rate": 8.495029132845737e-06, "loss": 0.3331, "step": 16144 }, { "epoch": 1.6414192761285076, "grad_norm": 0.26534581184387207, "learning_rate": 8.494775339409257e-06, "loss": 0.3255, "step": 16145 }, { "epoch": 1.6415209434729565, "grad_norm": 0.28661325573921204, "learning_rate": 8.494521528366929e-06, "loss": 0.3706, "step": 16146 }, { "epoch": 1.6416226108174055, "grad_norm": 0.300042986869812, "learning_rate": 8.49426769972003e-06, "loss": 0.3534, "step": 16147 }, { "epoch": 1.6417242781618544, "grad_norm": 0.2493450790643692, "learning_rate": 8.494013853469843e-06, "loss": 0.3449, "step": 16148 }, { "epoch": 1.6418259455063033, "grad_norm": 0.2551184296607971, "learning_rate": 8.493759989617645e-06, "loss": 0.3263, "step": 16149 }, { "epoch": 1.6419276128507523, "grad_norm": 0.2754236161708832, "learning_rate": 8.493506108164714e-06, "loss": 0.3456, "step": 16150 }, { "epoch": 1.6420292801952012, "grad_norm": 0.26591232419013977, "learning_rate": 8.493252209112327e-06, "loss": 0.3575, "step": 16151 }, { "epoch": 1.6421309475396502, "grad_norm": 0.2770429849624634, "learning_rate": 8.492998292461767e-06, "loss": 0.3511, "step": 16152 }, { "epoch": 1.642232614884099, "grad_norm": 0.2752322554588318, "learning_rate": 8.492744358214314e-06, "loss": 0.3179, "step": 16153 }, { "epoch": 1.642334282228548, "grad_norm": 0.29725727438926697, "learning_rate": 8.49249040637124e-06, "loss": 0.3662, "step": 16154 }, { "epoch": 1.642435949572997, "grad_norm": 0.27170529961586, "learning_rate": 8.492236436933835e-06, "loss": 0.3914, "step": 16155 }, { "epoch": 1.6425376169174462, "grad_norm": 0.26215067505836487, "learning_rate": 8.49198244990337e-06, "loss": 0.3525, "step": 16156 }, { "epoch": 1.642639284261895, "grad_norm": 0.25852006673812866, "learning_rate": 8.491728445281126e-06, "loss": 0.3595, "step": 16157 }, { "epoch": 1.642740951606344, "grad_norm": 0.290293425321579, "learning_rate": 8.491474423068384e-06, "loss": 0.356, "step": 16158 }, { "epoch": 1.642842618950793, "grad_norm": 0.28121939301490784, "learning_rate": 8.491220383266426e-06, "loss": 0.3361, "step": 16159 }, { "epoch": 1.642944286295242, "grad_norm": 0.2688332200050354, "learning_rate": 8.490966325876527e-06, "loss": 0.3663, "step": 16160 }, { "epoch": 1.643045953639691, "grad_norm": 0.2882257401943207, "learning_rate": 8.49071225089997e-06, "loss": 0.3528, "step": 16161 }, { "epoch": 1.64314762098414, "grad_norm": 0.28600555658340454, "learning_rate": 8.490458158338033e-06, "loss": 0.3363, "step": 16162 }, { "epoch": 1.643249288328589, "grad_norm": 0.2686663568019867, "learning_rate": 8.490204048191999e-06, "loss": 0.3583, "step": 16163 }, { "epoch": 1.643350955673038, "grad_norm": 0.2976347506046295, "learning_rate": 8.489949920463147e-06, "loss": 0.358, "step": 16164 }, { "epoch": 1.6434526230174868, "grad_norm": 0.2925088703632355, "learning_rate": 8.489695775152755e-06, "loss": 0.3982, "step": 16165 }, { "epoch": 1.6435542903619358, "grad_norm": 0.2734586298465729, "learning_rate": 8.489441612262105e-06, "loss": 0.3486, "step": 16166 }, { "epoch": 1.6436559577063847, "grad_norm": 0.25799229741096497, "learning_rate": 8.489187431792478e-06, "loss": 0.3144, "step": 16167 }, { "epoch": 1.6437576250508337, "grad_norm": 0.2737194001674652, "learning_rate": 8.488933233745153e-06, "loss": 0.3399, "step": 16168 }, { "epoch": 1.6438592923952826, "grad_norm": 0.29594799876213074, "learning_rate": 8.488679018121414e-06, "loss": 0.3675, "step": 16169 }, { "epoch": 1.6439609597397316, "grad_norm": 0.30202147364616394, "learning_rate": 8.488424784922537e-06, "loss": 0.3592, "step": 16170 }, { "epoch": 1.6440626270841805, "grad_norm": 0.26965922117233276, "learning_rate": 8.488170534149804e-06, "loss": 0.3704, "step": 16171 }, { "epoch": 1.6441642944286294, "grad_norm": 0.29056358337402344, "learning_rate": 8.4879162658045e-06, "loss": 0.3643, "step": 16172 }, { "epoch": 1.6442659617730784, "grad_norm": 0.28231197595596313, "learning_rate": 8.4876619798879e-06, "loss": 0.3383, "step": 16173 }, { "epoch": 1.6443676291175273, "grad_norm": 0.29649749398231506, "learning_rate": 8.48740767640129e-06, "loss": 0.4076, "step": 16174 }, { "epoch": 1.6444692964619763, "grad_norm": 0.2710723280906677, "learning_rate": 8.487153355345946e-06, "loss": 0.3651, "step": 16175 }, { "epoch": 1.6445709638064254, "grad_norm": 0.2607458233833313, "learning_rate": 8.486899016723153e-06, "loss": 0.3317, "step": 16176 }, { "epoch": 1.6446726311508744, "grad_norm": 0.30698034167289734, "learning_rate": 8.486644660534193e-06, "loss": 0.331, "step": 16177 }, { "epoch": 1.6447742984953233, "grad_norm": 0.29591453075408936, "learning_rate": 8.486390286780345e-06, "loss": 0.3587, "step": 16178 }, { "epoch": 1.6448759658397722, "grad_norm": 0.2861616015434265, "learning_rate": 8.48613589546289e-06, "loss": 0.3752, "step": 16179 }, { "epoch": 1.6449776331842212, "grad_norm": 0.27785226702690125, "learning_rate": 8.485881486583112e-06, "loss": 0.3517, "step": 16180 }, { "epoch": 1.6450793005286704, "grad_norm": 0.3089583218097687, "learning_rate": 8.48562706014229e-06, "loss": 0.379, "step": 16181 }, { "epoch": 1.6451809678731193, "grad_norm": 0.26742076873779297, "learning_rate": 8.485372616141708e-06, "loss": 0.3508, "step": 16182 }, { "epoch": 1.6452826352175682, "grad_norm": 0.2735835313796997, "learning_rate": 8.485118154582648e-06, "loss": 0.3483, "step": 16183 }, { "epoch": 1.6453843025620172, "grad_norm": 0.32775601744651794, "learning_rate": 8.484863675466391e-06, "loss": 0.3807, "step": 16184 }, { "epoch": 1.6454859699064661, "grad_norm": 0.27594679594039917, "learning_rate": 8.484609178794218e-06, "loss": 0.3773, "step": 16185 }, { "epoch": 1.645587637250915, "grad_norm": 0.300490140914917, "learning_rate": 8.48435466456741e-06, "loss": 0.3767, "step": 16186 }, { "epoch": 1.645689304595364, "grad_norm": 0.2863118052482605, "learning_rate": 8.484100132787253e-06, "loss": 0.3659, "step": 16187 }, { "epoch": 1.645790971939813, "grad_norm": 0.27822014689445496, "learning_rate": 8.48384558345503e-06, "loss": 0.3409, "step": 16188 }, { "epoch": 1.6458926392842619, "grad_norm": 0.29089272022247314, "learning_rate": 8.483591016572018e-06, "loss": 0.349, "step": 16189 }, { "epoch": 1.6459943066287108, "grad_norm": 0.2740683853626251, "learning_rate": 8.483336432139504e-06, "loss": 0.3303, "step": 16190 }, { "epoch": 1.6460959739731598, "grad_norm": 0.3057209551334381, "learning_rate": 8.483081830158767e-06, "loss": 0.3368, "step": 16191 }, { "epoch": 1.6461976413176087, "grad_norm": 0.3080507516860962, "learning_rate": 8.482827210631093e-06, "loss": 0.3667, "step": 16192 }, { "epoch": 1.6462993086620576, "grad_norm": 0.27215099334716797, "learning_rate": 8.482572573557761e-06, "loss": 0.3745, "step": 16193 }, { "epoch": 1.6464009760065066, "grad_norm": 0.27878037095069885, "learning_rate": 8.482317918940058e-06, "loss": 0.3719, "step": 16194 }, { "epoch": 1.6465026433509555, "grad_norm": 0.26423829793930054, "learning_rate": 8.482063246779265e-06, "loss": 0.3303, "step": 16195 }, { "epoch": 1.6466043106954045, "grad_norm": 0.26907631754875183, "learning_rate": 8.481808557076664e-06, "loss": 0.3649, "step": 16196 }, { "epoch": 1.6467059780398536, "grad_norm": 0.2883794605731964, "learning_rate": 8.481553849833538e-06, "loss": 0.3738, "step": 16197 }, { "epoch": 1.6468076453843026, "grad_norm": 0.2877052128314972, "learning_rate": 8.481299125051172e-06, "loss": 0.3649, "step": 16198 }, { "epoch": 1.6469093127287515, "grad_norm": 0.2689324915409088, "learning_rate": 8.481044382730847e-06, "loss": 0.3953, "step": 16199 }, { "epoch": 1.6470109800732005, "grad_norm": 0.2629041075706482, "learning_rate": 8.48078962287385e-06, "loss": 0.4014, "step": 16200 }, { "epoch": 1.6471126474176494, "grad_norm": 0.28790992498397827, "learning_rate": 8.48053484548146e-06, "loss": 0.3877, "step": 16201 }, { "epoch": 1.6472143147620986, "grad_norm": 0.28294870257377625, "learning_rate": 8.480280050554964e-06, "loss": 0.3871, "step": 16202 }, { "epoch": 1.6473159821065475, "grad_norm": 0.264059454202652, "learning_rate": 8.480025238095644e-06, "loss": 0.3799, "step": 16203 }, { "epoch": 1.6474176494509964, "grad_norm": 0.27330633997917175, "learning_rate": 8.479770408104783e-06, "loss": 0.3894, "step": 16204 }, { "epoch": 1.6475193167954454, "grad_norm": 0.2867503762245178, "learning_rate": 8.479515560583667e-06, "loss": 0.3502, "step": 16205 }, { "epoch": 1.6476209841398943, "grad_norm": 0.2773110270500183, "learning_rate": 8.479260695533577e-06, "loss": 0.3562, "step": 16206 }, { "epoch": 1.6477226514843433, "grad_norm": 0.2887609302997589, "learning_rate": 8.479005812955799e-06, "loss": 0.3403, "step": 16207 }, { "epoch": 1.6478243188287922, "grad_norm": 0.2855342626571655, "learning_rate": 8.478750912851616e-06, "loss": 0.3498, "step": 16208 }, { "epoch": 1.6479259861732412, "grad_norm": 0.27868378162384033, "learning_rate": 8.478495995222313e-06, "loss": 0.353, "step": 16209 }, { "epoch": 1.64802765351769, "grad_norm": 0.25702035427093506, "learning_rate": 8.478241060069174e-06, "loss": 0.3249, "step": 16210 }, { "epoch": 1.648129320862139, "grad_norm": 0.2774682343006134, "learning_rate": 8.477986107393484e-06, "loss": 0.3869, "step": 16211 }, { "epoch": 1.648230988206588, "grad_norm": 0.28445667028427124, "learning_rate": 8.477731137196525e-06, "loss": 0.3542, "step": 16212 }, { "epoch": 1.648332655551037, "grad_norm": 0.2790384590625763, "learning_rate": 8.477476149479583e-06, "loss": 0.3318, "step": 16213 }, { "epoch": 1.6484343228954859, "grad_norm": 0.24696952104568481, "learning_rate": 8.477221144243944e-06, "loss": 0.3416, "step": 16214 }, { "epoch": 1.6485359902399348, "grad_norm": 0.28338363766670227, "learning_rate": 8.476966121490888e-06, "loss": 0.3853, "step": 16215 }, { "epoch": 1.6486376575843837, "grad_norm": 0.31772828102111816, "learning_rate": 8.476711081221704e-06, "loss": 0.3467, "step": 16216 }, { "epoch": 1.648739324928833, "grad_norm": 0.29041168093681335, "learning_rate": 8.476456023437678e-06, "loss": 0.3444, "step": 16217 }, { "epoch": 1.6488409922732818, "grad_norm": 0.25724321603775024, "learning_rate": 8.476200948140092e-06, "loss": 0.3551, "step": 16218 }, { "epoch": 1.6489426596177308, "grad_norm": 0.26323437690734863, "learning_rate": 8.47594585533023e-06, "loss": 0.3239, "step": 16219 }, { "epoch": 1.6490443269621797, "grad_norm": 0.2762649357318878, "learning_rate": 8.47569074500938e-06, "loss": 0.3819, "step": 16220 }, { "epoch": 1.6491459943066287, "grad_norm": 0.26831328868865967, "learning_rate": 8.475435617178827e-06, "loss": 0.347, "step": 16221 }, { "epoch": 1.6492476616510778, "grad_norm": 0.28490373492240906, "learning_rate": 8.475180471839854e-06, "loss": 0.3219, "step": 16222 }, { "epoch": 1.6493493289955268, "grad_norm": 0.3002525269985199, "learning_rate": 8.474925308993747e-06, "loss": 0.3714, "step": 16223 }, { "epoch": 1.6494509963399757, "grad_norm": 0.26837465167045593, "learning_rate": 8.474670128641794e-06, "loss": 0.3595, "step": 16224 }, { "epoch": 1.6495526636844247, "grad_norm": 0.2848522961139679, "learning_rate": 8.474414930785278e-06, "loss": 0.351, "step": 16225 }, { "epoch": 1.6496543310288736, "grad_norm": 0.2770179212093353, "learning_rate": 8.474159715425483e-06, "loss": 0.3508, "step": 16226 }, { "epoch": 1.6497559983733225, "grad_norm": 0.298637330532074, "learning_rate": 8.473904482563699e-06, "loss": 0.3948, "step": 16227 }, { "epoch": 1.6498576657177715, "grad_norm": 0.27873653173446655, "learning_rate": 8.473649232201208e-06, "loss": 0.4067, "step": 16228 }, { "epoch": 1.6499593330622204, "grad_norm": 0.2907419502735138, "learning_rate": 8.4733939643393e-06, "loss": 0.3807, "step": 16229 }, { "epoch": 1.6500610004066694, "grad_norm": 0.2789565920829773, "learning_rate": 8.473138678979258e-06, "loss": 0.3608, "step": 16230 }, { "epoch": 1.6501626677511183, "grad_norm": 0.2641703188419342, "learning_rate": 8.472883376122368e-06, "loss": 0.3396, "step": 16231 }, { "epoch": 1.6502643350955672, "grad_norm": 0.29196271300315857, "learning_rate": 8.472628055769917e-06, "loss": 0.3983, "step": 16232 }, { "epoch": 1.6503660024400162, "grad_norm": 0.29516687989234924, "learning_rate": 8.472372717923192e-06, "loss": 0.3863, "step": 16233 }, { "epoch": 1.6504676697844651, "grad_norm": 0.28668710589408875, "learning_rate": 8.472117362583476e-06, "loss": 0.3477, "step": 16234 }, { "epoch": 1.650569337128914, "grad_norm": 0.3155664801597595, "learning_rate": 8.47186198975206e-06, "loss": 0.3339, "step": 16235 }, { "epoch": 1.650671004473363, "grad_norm": 0.31092506647109985, "learning_rate": 8.47160659943023e-06, "loss": 0.3382, "step": 16236 }, { "epoch": 1.650772671817812, "grad_norm": 0.2818610668182373, "learning_rate": 8.471351191619265e-06, "loss": 0.34, "step": 16237 }, { "epoch": 1.6508743391622611, "grad_norm": 0.2952294647693634, "learning_rate": 8.471095766320462e-06, "loss": 0.3703, "step": 16238 }, { "epoch": 1.65097600650671, "grad_norm": 0.2898872196674347, "learning_rate": 8.470840323535103e-06, "loss": 0.3545, "step": 16239 }, { "epoch": 1.651077673851159, "grad_norm": 0.27792420983314514, "learning_rate": 8.470584863264475e-06, "loss": 0.348, "step": 16240 }, { "epoch": 1.651179341195608, "grad_norm": 0.2544776201248169, "learning_rate": 8.470329385509865e-06, "loss": 0.3307, "step": 16241 }, { "epoch": 1.6512810085400569, "grad_norm": 0.29611721634864807, "learning_rate": 8.470073890272561e-06, "loss": 0.3715, "step": 16242 }, { "epoch": 1.651382675884506, "grad_norm": 0.26205506920814514, "learning_rate": 8.46981837755385e-06, "loss": 0.3532, "step": 16243 }, { "epoch": 1.651484343228955, "grad_norm": 0.2623928189277649, "learning_rate": 8.469562847355015e-06, "loss": 0.4114, "step": 16244 }, { "epoch": 1.651586010573404, "grad_norm": 0.25662267208099365, "learning_rate": 8.469307299677351e-06, "loss": 0.3399, "step": 16245 }, { "epoch": 1.6516876779178529, "grad_norm": 0.26643094420433044, "learning_rate": 8.46905173452214e-06, "loss": 0.3394, "step": 16246 }, { "epoch": 1.6517893452623018, "grad_norm": 0.25936198234558105, "learning_rate": 8.468796151890668e-06, "loss": 0.3313, "step": 16247 }, { "epoch": 1.6518910126067508, "grad_norm": 0.2991962432861328, "learning_rate": 8.468540551784228e-06, "loss": 0.3758, "step": 16248 }, { "epoch": 1.6519926799511997, "grad_norm": 0.2542663812637329, "learning_rate": 8.468284934204106e-06, "loss": 0.3534, "step": 16249 }, { "epoch": 1.6520943472956486, "grad_norm": 0.2927488386631012, "learning_rate": 8.468029299151585e-06, "loss": 0.3732, "step": 16250 }, { "epoch": 1.6521960146400976, "grad_norm": 0.30624091625213623, "learning_rate": 8.467773646627959e-06, "loss": 0.3702, "step": 16251 }, { "epoch": 1.6522976819845465, "grad_norm": 0.2884048819541931, "learning_rate": 8.467517976634512e-06, "loss": 0.4355, "step": 16252 }, { "epoch": 1.6523993493289955, "grad_norm": 0.2905122935771942, "learning_rate": 8.467262289172536e-06, "loss": 0.3751, "step": 16253 }, { "epoch": 1.6525010166734444, "grad_norm": 0.2676898241043091, "learning_rate": 8.467006584243315e-06, "loss": 0.3332, "step": 16254 }, { "epoch": 1.6526026840178933, "grad_norm": 0.29806122183799744, "learning_rate": 8.466750861848138e-06, "loss": 0.3589, "step": 16255 }, { "epoch": 1.6527043513623423, "grad_norm": 0.2847406268119812, "learning_rate": 8.466495121988297e-06, "loss": 0.3518, "step": 16256 }, { "epoch": 1.6528060187067912, "grad_norm": 0.272542268037796, "learning_rate": 8.466239364665074e-06, "loss": 0.3258, "step": 16257 }, { "epoch": 1.6529076860512404, "grad_norm": 0.2851261794567108, "learning_rate": 8.46598358987976e-06, "loss": 0.3613, "step": 16258 }, { "epoch": 1.6530093533956893, "grad_norm": 0.26212078332901, "learning_rate": 8.465727797633647e-06, "loss": 0.3727, "step": 16259 }, { "epoch": 1.6531110207401383, "grad_norm": 0.2563384771347046, "learning_rate": 8.46547198792802e-06, "loss": 0.3467, "step": 16260 }, { "epoch": 1.6532126880845872, "grad_norm": 0.2645306885242462, "learning_rate": 8.465216160764169e-06, "loss": 0.3779, "step": 16261 }, { "epoch": 1.6533143554290362, "grad_norm": 0.2615833878517151, "learning_rate": 8.464960316143382e-06, "loss": 0.3626, "step": 16262 }, { "epoch": 1.6534160227734853, "grad_norm": 0.2877205014228821, "learning_rate": 8.46470445406695e-06, "loss": 0.3615, "step": 16263 }, { "epoch": 1.6535176901179343, "grad_norm": 0.2723481059074402, "learning_rate": 8.464448574536157e-06, "loss": 0.367, "step": 16264 }, { "epoch": 1.6536193574623832, "grad_norm": 0.27994799613952637, "learning_rate": 8.4641926775523e-06, "loss": 0.3614, "step": 16265 }, { "epoch": 1.6537210248068321, "grad_norm": 0.2508404850959778, "learning_rate": 8.463936763116658e-06, "loss": 0.3549, "step": 16266 }, { "epoch": 1.653822692151281, "grad_norm": 0.31140804290771484, "learning_rate": 8.463680831230528e-06, "loss": 0.3478, "step": 16267 }, { "epoch": 1.65392435949573, "grad_norm": 0.27462494373321533, "learning_rate": 8.463424881895199e-06, "loss": 0.3407, "step": 16268 }, { "epoch": 1.654026026840179, "grad_norm": 0.2646385133266449, "learning_rate": 8.463168915111956e-06, "loss": 0.3655, "step": 16269 }, { "epoch": 1.654127694184628, "grad_norm": 0.30599287152290344, "learning_rate": 8.462912930882092e-06, "loss": 0.3869, "step": 16270 }, { "epoch": 1.6542293615290768, "grad_norm": 0.2924541234970093, "learning_rate": 8.462656929206896e-06, "loss": 0.3335, "step": 16271 }, { "epoch": 1.6543310288735258, "grad_norm": 0.29510021209716797, "learning_rate": 8.462400910087656e-06, "loss": 0.4025, "step": 16272 }, { "epoch": 1.6544326962179747, "grad_norm": 0.2704665958881378, "learning_rate": 8.462144873525665e-06, "loss": 0.3546, "step": 16273 }, { "epoch": 1.6545343635624237, "grad_norm": 0.27473026514053345, "learning_rate": 8.46188881952221e-06, "loss": 0.3291, "step": 16274 }, { "epoch": 1.6546360309068726, "grad_norm": 0.29465675354003906, "learning_rate": 8.46163274807858e-06, "loss": 0.3696, "step": 16275 }, { "epoch": 1.6547376982513216, "grad_norm": 0.2750000059604645, "learning_rate": 8.46137665919607e-06, "loss": 0.3444, "step": 16276 }, { "epoch": 1.6548393655957705, "grad_norm": 0.27757641673088074, "learning_rate": 8.461120552875964e-06, "loss": 0.334, "step": 16277 }, { "epoch": 1.6549410329402194, "grad_norm": 0.27546241879463196, "learning_rate": 8.460864429119558e-06, "loss": 0.3676, "step": 16278 }, { "epoch": 1.6550427002846686, "grad_norm": 0.27876630425453186, "learning_rate": 8.460608287928136e-06, "loss": 0.3405, "step": 16279 }, { "epoch": 1.6551443676291175, "grad_norm": 0.2797505259513855, "learning_rate": 8.460352129302995e-06, "loss": 0.3991, "step": 16280 }, { "epoch": 1.6552460349735665, "grad_norm": 0.26480790972709656, "learning_rate": 8.460095953245419e-06, "loss": 0.3516, "step": 16281 }, { "epoch": 1.6553477023180154, "grad_norm": 0.29777100682258606, "learning_rate": 8.459839759756705e-06, "loss": 0.3574, "step": 16282 }, { "epoch": 1.6554493696624644, "grad_norm": 0.2763989567756653, "learning_rate": 8.45958354883814e-06, "loss": 0.381, "step": 16283 }, { "epoch": 1.6555510370069135, "grad_norm": 0.2950122356414795, "learning_rate": 8.459327320491014e-06, "loss": 0.3506, "step": 16284 }, { "epoch": 1.6556527043513625, "grad_norm": 0.2802623212337494, "learning_rate": 8.459071074716618e-06, "loss": 0.3477, "step": 16285 }, { "epoch": 1.6557543716958114, "grad_norm": 0.2890247404575348, "learning_rate": 8.458814811516247e-06, "loss": 0.352, "step": 16286 }, { "epoch": 1.6558560390402604, "grad_norm": 0.2959488332271576, "learning_rate": 8.458558530891185e-06, "loss": 0.3448, "step": 16287 }, { "epoch": 1.6559577063847093, "grad_norm": 0.28610238432884216, "learning_rate": 8.45830223284273e-06, "loss": 0.3728, "step": 16288 }, { "epoch": 1.6560593737291582, "grad_norm": 0.2863575518131256, "learning_rate": 8.45804591737217e-06, "loss": 0.3643, "step": 16289 }, { "epoch": 1.6561610410736072, "grad_norm": 0.27225998044013977, "learning_rate": 8.457789584480796e-06, "loss": 0.3402, "step": 16290 }, { "epoch": 1.6562627084180561, "grad_norm": 0.2834334671497345, "learning_rate": 8.457533234169899e-06, "loss": 0.3748, "step": 16291 }, { "epoch": 1.656364375762505, "grad_norm": 0.289267361164093, "learning_rate": 8.457276866440773e-06, "loss": 0.3293, "step": 16292 }, { "epoch": 1.656466043106954, "grad_norm": 0.2690466344356537, "learning_rate": 8.457020481294707e-06, "loss": 0.3641, "step": 16293 }, { "epoch": 1.656567710451403, "grad_norm": 0.28134411573410034, "learning_rate": 8.456764078732991e-06, "loss": 0.3537, "step": 16294 }, { "epoch": 1.6566693777958519, "grad_norm": 0.2969609200954437, "learning_rate": 8.456507658756922e-06, "loss": 0.3569, "step": 16295 }, { "epoch": 1.6567710451403008, "grad_norm": 0.2826037108898163, "learning_rate": 8.456251221367788e-06, "loss": 0.361, "step": 16296 }, { "epoch": 1.6568727124847498, "grad_norm": 0.29362353682518005, "learning_rate": 8.455994766566881e-06, "loss": 0.369, "step": 16297 }, { "epoch": 1.6569743798291987, "grad_norm": 0.25426337122917175, "learning_rate": 8.455738294355493e-06, "loss": 0.3625, "step": 16298 }, { "epoch": 1.6570760471736479, "grad_norm": 0.29600954055786133, "learning_rate": 8.455481804734919e-06, "loss": 0.3287, "step": 16299 }, { "epoch": 1.6571777145180968, "grad_norm": 0.27639883756637573, "learning_rate": 8.455225297706448e-06, "loss": 0.3541, "step": 16300 }, { "epoch": 1.6572793818625458, "grad_norm": 0.28707364201545715, "learning_rate": 8.454968773271372e-06, "loss": 0.3286, "step": 16301 }, { "epoch": 1.6573810492069947, "grad_norm": 0.2850485146045685, "learning_rate": 8.454712231430984e-06, "loss": 0.3736, "step": 16302 }, { "epoch": 1.6574827165514436, "grad_norm": 0.2620977759361267, "learning_rate": 8.454455672186579e-06, "loss": 0.3484, "step": 16303 }, { "epoch": 1.6575843838958928, "grad_norm": 0.27249711751937866, "learning_rate": 8.454199095539447e-06, "loss": 0.3719, "step": 16304 }, { "epoch": 1.6576860512403417, "grad_norm": 0.26823657751083374, "learning_rate": 8.453942501490878e-06, "loss": 0.3592, "step": 16305 }, { "epoch": 1.6577877185847907, "grad_norm": 0.2882373034954071, "learning_rate": 8.453685890042169e-06, "loss": 0.3203, "step": 16306 }, { "epoch": 1.6578893859292396, "grad_norm": 0.27107006311416626, "learning_rate": 8.45342926119461e-06, "loss": 0.3305, "step": 16307 }, { "epoch": 1.6579910532736886, "grad_norm": 0.31066805124282837, "learning_rate": 8.453172614949498e-06, "loss": 0.353, "step": 16308 }, { "epoch": 1.6580927206181375, "grad_norm": 0.2765456736087799, "learning_rate": 8.45291595130812e-06, "loss": 0.3633, "step": 16309 }, { "epoch": 1.6581943879625864, "grad_norm": 0.28067320585250854, "learning_rate": 8.452659270271775e-06, "loss": 0.3479, "step": 16310 }, { "epoch": 1.6582960553070354, "grad_norm": 0.2723519504070282, "learning_rate": 8.45240257184175e-06, "loss": 0.3555, "step": 16311 }, { "epoch": 1.6583977226514843, "grad_norm": 0.2557125687599182, "learning_rate": 8.452145856019345e-06, "loss": 0.3284, "step": 16312 }, { "epoch": 1.6584993899959333, "grad_norm": 0.29084983468055725, "learning_rate": 8.451889122805845e-06, "loss": 0.3968, "step": 16313 }, { "epoch": 1.6586010573403822, "grad_norm": 0.30776017904281616, "learning_rate": 8.451632372202552e-06, "loss": 0.3849, "step": 16314 }, { "epoch": 1.6587027246848312, "grad_norm": 0.3115427792072296, "learning_rate": 8.451375604210751e-06, "loss": 0.3644, "step": 16315 }, { "epoch": 1.65880439202928, "grad_norm": 0.31177669763565063, "learning_rate": 8.451118818831745e-06, "loss": 0.3595, "step": 16316 }, { "epoch": 1.658906059373729, "grad_norm": 0.28182610869407654, "learning_rate": 8.45086201606682e-06, "loss": 0.372, "step": 16317 }, { "epoch": 1.659007726718178, "grad_norm": 0.28075477480888367, "learning_rate": 8.450605195917271e-06, "loss": 0.3435, "step": 16318 }, { "epoch": 1.659109394062627, "grad_norm": 0.2650876045227051, "learning_rate": 8.450348358384394e-06, "loss": 0.3431, "step": 16319 }, { "epoch": 1.659211061407076, "grad_norm": 0.3166678249835968, "learning_rate": 8.450091503469482e-06, "loss": 0.3994, "step": 16320 }, { "epoch": 1.659312728751525, "grad_norm": 0.27262941002845764, "learning_rate": 8.449834631173829e-06, "loss": 0.3387, "step": 16321 }, { "epoch": 1.659414396095974, "grad_norm": 0.26800045371055603, "learning_rate": 8.449577741498729e-06, "loss": 0.3855, "step": 16322 }, { "epoch": 1.659516063440423, "grad_norm": 0.289948433637619, "learning_rate": 8.449320834445476e-06, "loss": 0.3589, "step": 16323 }, { "epoch": 1.6596177307848718, "grad_norm": 0.29558080434799194, "learning_rate": 8.449063910015363e-06, "loss": 0.3522, "step": 16324 }, { "epoch": 1.659719398129321, "grad_norm": 0.266455739736557, "learning_rate": 8.448806968209687e-06, "loss": 0.3914, "step": 16325 }, { "epoch": 1.65982106547377, "grad_norm": 0.2941482365131378, "learning_rate": 8.448550009029741e-06, "loss": 0.3643, "step": 16326 }, { "epoch": 1.659922732818219, "grad_norm": 0.2639489471912384, "learning_rate": 8.44829303247682e-06, "loss": 0.3787, "step": 16327 }, { "epoch": 1.6600244001626678, "grad_norm": 0.28447455167770386, "learning_rate": 8.448036038552216e-06, "loss": 0.3459, "step": 16328 }, { "epoch": 1.6601260675071168, "grad_norm": 0.2721998989582062, "learning_rate": 8.447779027257229e-06, "loss": 0.3629, "step": 16329 }, { "epoch": 1.6602277348515657, "grad_norm": 0.29162245988845825, "learning_rate": 8.44752199859315e-06, "loss": 0.3596, "step": 16330 }, { "epoch": 1.6603294021960147, "grad_norm": 0.26721468567848206, "learning_rate": 8.447264952561271e-06, "loss": 0.3371, "step": 16331 }, { "epoch": 1.6604310695404636, "grad_norm": 0.2651223838329315, "learning_rate": 8.447007889162895e-06, "loss": 0.3481, "step": 16332 }, { "epoch": 1.6605327368849125, "grad_norm": 0.2608492374420166, "learning_rate": 8.446750808399309e-06, "loss": 0.3551, "step": 16333 }, { "epoch": 1.6606344042293615, "grad_norm": 0.2676336169242859, "learning_rate": 8.446493710271813e-06, "loss": 0.3813, "step": 16334 }, { "epoch": 1.6607360715738104, "grad_norm": 0.2872164845466614, "learning_rate": 8.4462365947817e-06, "loss": 0.3611, "step": 16335 }, { "epoch": 1.6608377389182594, "grad_norm": 0.277671217918396, "learning_rate": 8.445979461930268e-06, "loss": 0.361, "step": 16336 }, { "epoch": 1.6609394062627083, "grad_norm": 0.26675546169281006, "learning_rate": 8.445722311718807e-06, "loss": 0.3657, "step": 16337 }, { "epoch": 1.6610410736071572, "grad_norm": 0.25693807005882263, "learning_rate": 8.445465144148617e-06, "loss": 0.355, "step": 16338 }, { "epoch": 1.6611427409516062, "grad_norm": 0.28247955441474915, "learning_rate": 8.445207959220993e-06, "loss": 0.3402, "step": 16339 }, { "epoch": 1.6612444082960554, "grad_norm": 0.3065318167209625, "learning_rate": 8.44495075693723e-06, "loss": 0.3598, "step": 16340 }, { "epoch": 1.6613460756405043, "grad_norm": 0.2649977207183838, "learning_rate": 8.444693537298624e-06, "loss": 0.3753, "step": 16341 }, { "epoch": 1.6614477429849532, "grad_norm": 0.2768275737762451, "learning_rate": 8.444436300306472e-06, "loss": 0.3775, "step": 16342 }, { "epoch": 1.6615494103294022, "grad_norm": 0.2895314395427704, "learning_rate": 8.444179045962065e-06, "loss": 0.3789, "step": 16343 }, { "epoch": 1.6616510776738511, "grad_norm": 0.2722289562225342, "learning_rate": 8.443921774266703e-06, "loss": 0.3754, "step": 16344 }, { "epoch": 1.6617527450183003, "grad_norm": 0.2543821334838867, "learning_rate": 8.443664485221684e-06, "loss": 0.3383, "step": 16345 }, { "epoch": 1.6618544123627492, "grad_norm": 0.29227718710899353, "learning_rate": 8.4434071788283e-06, "loss": 0.3863, "step": 16346 }, { "epoch": 1.6619560797071982, "grad_norm": 0.28845059871673584, "learning_rate": 8.443149855087849e-06, "loss": 0.3698, "step": 16347 }, { "epoch": 1.662057747051647, "grad_norm": 0.2735283076763153, "learning_rate": 8.442892514001625e-06, "loss": 0.3774, "step": 16348 }, { "epoch": 1.662159414396096, "grad_norm": 0.2674656808376312, "learning_rate": 8.442635155570932e-06, "loss": 0.3391, "step": 16349 }, { "epoch": 1.662261081740545, "grad_norm": 0.2829659581184387, "learning_rate": 8.442377779797057e-06, "loss": 0.3668, "step": 16350 }, { "epoch": 1.662362749084994, "grad_norm": 0.2756985127925873, "learning_rate": 8.442120386681303e-06, "loss": 0.3185, "step": 16351 }, { "epoch": 1.6624644164294429, "grad_norm": 0.32656219601631165, "learning_rate": 8.441862976224964e-06, "loss": 0.38, "step": 16352 }, { "epoch": 1.6625660837738918, "grad_norm": 0.2629134953022003, "learning_rate": 8.441605548429335e-06, "loss": 0.3759, "step": 16353 }, { "epoch": 1.6626677511183408, "grad_norm": 0.2808711528778076, "learning_rate": 8.441348103295717e-06, "loss": 0.3684, "step": 16354 }, { "epoch": 1.6627694184627897, "grad_norm": 0.2925160527229309, "learning_rate": 8.441090640825404e-06, "loss": 0.3777, "step": 16355 }, { "epoch": 1.6628710858072386, "grad_norm": 0.2739967107772827, "learning_rate": 8.440833161019697e-06, "loss": 0.3547, "step": 16356 }, { "epoch": 1.6629727531516876, "grad_norm": 0.25535330176353455, "learning_rate": 8.440575663879887e-06, "loss": 0.3352, "step": 16357 }, { "epoch": 1.6630744204961365, "grad_norm": 0.2802087068557739, "learning_rate": 8.440318149407276e-06, "loss": 0.3461, "step": 16358 }, { "epoch": 1.6631760878405855, "grad_norm": 0.28411421179771423, "learning_rate": 8.440060617603158e-06, "loss": 0.3653, "step": 16359 }, { "epoch": 1.6632777551850344, "grad_norm": 0.27334949374198914, "learning_rate": 8.439803068468834e-06, "loss": 0.3907, "step": 16360 }, { "epoch": 1.6633794225294836, "grad_norm": 0.3064664304256439, "learning_rate": 8.4395455020056e-06, "loss": 0.3479, "step": 16361 }, { "epoch": 1.6634810898739325, "grad_norm": 0.2802054286003113, "learning_rate": 8.439287918214752e-06, "loss": 0.3897, "step": 16362 }, { "epoch": 1.6635827572183814, "grad_norm": 0.28321385383605957, "learning_rate": 8.439030317097587e-06, "loss": 0.3594, "step": 16363 }, { "epoch": 1.6636844245628304, "grad_norm": 0.2602243721485138, "learning_rate": 8.438772698655407e-06, "loss": 0.3405, "step": 16364 }, { "epoch": 1.6637860919072793, "grad_norm": 0.26234757900238037, "learning_rate": 8.438515062889508e-06, "loss": 0.3864, "step": 16365 }, { "epoch": 1.6638877592517285, "grad_norm": 0.28423115611076355, "learning_rate": 8.438257409801185e-06, "loss": 0.3685, "step": 16366 }, { "epoch": 1.6639894265961774, "grad_norm": 0.28237780928611755, "learning_rate": 8.43799973939174e-06, "loss": 0.3798, "step": 16367 }, { "epoch": 1.6640910939406264, "grad_norm": 0.2800920009613037, "learning_rate": 8.437742051662468e-06, "loss": 0.3724, "step": 16368 }, { "epoch": 1.6641927612850753, "grad_norm": 0.2557051479816437, "learning_rate": 8.437484346614669e-06, "loss": 0.3404, "step": 16369 }, { "epoch": 1.6642944286295243, "grad_norm": 0.28505581617355347, "learning_rate": 8.437226624249639e-06, "loss": 0.3444, "step": 16370 }, { "epoch": 1.6643960959739732, "grad_norm": 0.2736033797264099, "learning_rate": 8.43696888456868e-06, "loss": 0.3591, "step": 16371 }, { "epoch": 1.6644977633184221, "grad_norm": 0.2637992799282074, "learning_rate": 8.43671112757309e-06, "loss": 0.3625, "step": 16372 }, { "epoch": 1.664599430662871, "grad_norm": 0.2710845470428467, "learning_rate": 8.436453353264162e-06, "loss": 0.3458, "step": 16373 }, { "epoch": 1.66470109800732, "grad_norm": 0.26206356287002563, "learning_rate": 8.4361955616432e-06, "loss": 0.3482, "step": 16374 }, { "epoch": 1.664802765351769, "grad_norm": 0.2725861072540283, "learning_rate": 8.435937752711503e-06, "loss": 0.33, "step": 16375 }, { "epoch": 1.664904432696218, "grad_norm": 0.2901502847671509, "learning_rate": 8.435679926470367e-06, "loss": 0.3713, "step": 16376 }, { "epoch": 1.6650061000406668, "grad_norm": 0.26734471321105957, "learning_rate": 8.435422082921094e-06, "loss": 0.3438, "step": 16377 }, { "epoch": 1.6651077673851158, "grad_norm": 0.26228055357933044, "learning_rate": 8.435164222064978e-06, "loss": 0.3629, "step": 16378 }, { "epoch": 1.6652094347295647, "grad_norm": 0.2699790298938751, "learning_rate": 8.434906343903323e-06, "loss": 0.3206, "step": 16379 }, { "epoch": 1.6653111020740137, "grad_norm": 0.27721908688545227, "learning_rate": 8.434648448437427e-06, "loss": 0.389, "step": 16380 }, { "epoch": 1.6654127694184628, "grad_norm": 0.2937234938144684, "learning_rate": 8.434390535668584e-06, "loss": 0.3635, "step": 16381 }, { "epoch": 1.6655144367629118, "grad_norm": 0.29705336689949036, "learning_rate": 8.434132605598102e-06, "loss": 0.3624, "step": 16382 }, { "epoch": 1.6656161041073607, "grad_norm": 0.2620939016342163, "learning_rate": 8.433874658227272e-06, "loss": 0.3139, "step": 16383 }, { "epoch": 1.6657177714518097, "grad_norm": 0.27882644534111023, "learning_rate": 8.4336166935574e-06, "loss": 0.3889, "step": 16384 }, { "epoch": 1.6658194387962586, "grad_norm": 0.2818942964076996, "learning_rate": 8.433358711589783e-06, "loss": 0.3929, "step": 16385 }, { "epoch": 1.6659211061407078, "grad_norm": 0.26628467440605164, "learning_rate": 8.433100712325722e-06, "loss": 0.3497, "step": 16386 }, { "epoch": 1.6660227734851567, "grad_norm": 0.2842501997947693, "learning_rate": 8.432842695766512e-06, "loss": 0.3194, "step": 16387 }, { "epoch": 1.6661244408296056, "grad_norm": 0.2689712345600128, "learning_rate": 8.432584661913459e-06, "loss": 0.3709, "step": 16388 }, { "epoch": 1.6662261081740546, "grad_norm": 0.30172649025917053, "learning_rate": 8.432326610767859e-06, "loss": 0.3512, "step": 16389 }, { "epoch": 1.6663277755185035, "grad_norm": 0.28764525055885315, "learning_rate": 8.432068542331013e-06, "loss": 0.3729, "step": 16390 }, { "epoch": 1.6664294428629525, "grad_norm": 0.29578372836112976, "learning_rate": 8.431810456604222e-06, "loss": 0.3672, "step": 16391 }, { "epoch": 1.6665311102074014, "grad_norm": 0.28068190813064575, "learning_rate": 8.431552353588785e-06, "loss": 0.3847, "step": 16392 }, { "epoch": 1.6666327775518504, "grad_norm": 0.2916205823421478, "learning_rate": 8.431294233286003e-06, "loss": 0.376, "step": 16393 }, { "epoch": 1.6667344448962993, "grad_norm": 0.2895596921443939, "learning_rate": 8.431036095697175e-06, "loss": 0.3573, "step": 16394 }, { "epoch": 1.6668361122407482, "grad_norm": 0.2553310990333557, "learning_rate": 8.430777940823604e-06, "loss": 0.3787, "step": 16395 }, { "epoch": 1.6669377795851972, "grad_norm": 0.29840853810310364, "learning_rate": 8.43051976866659e-06, "loss": 0.3632, "step": 16396 }, { "epoch": 1.6670394469296461, "grad_norm": 0.2735442817211151, "learning_rate": 8.430261579227431e-06, "loss": 0.3467, "step": 16397 }, { "epoch": 1.667141114274095, "grad_norm": 0.28108468651771545, "learning_rate": 8.43000337250743e-06, "loss": 0.3559, "step": 16398 }, { "epoch": 1.667242781618544, "grad_norm": 0.2893837094306946, "learning_rate": 8.429745148507886e-06, "loss": 0.381, "step": 16399 }, { "epoch": 1.667344448962993, "grad_norm": 0.2781904935836792, "learning_rate": 8.429486907230101e-06, "loss": 0.3431, "step": 16400 }, { "epoch": 1.6674461163074419, "grad_norm": 0.3049153685569763, "learning_rate": 8.429228648675375e-06, "loss": 0.3452, "step": 16401 }, { "epoch": 1.667547783651891, "grad_norm": 0.2606939673423767, "learning_rate": 8.428970372845014e-06, "loss": 0.3492, "step": 16402 }, { "epoch": 1.66764945099634, "grad_norm": 0.292417973279953, "learning_rate": 8.42871207974031e-06, "loss": 0.3755, "step": 16403 }, { "epoch": 1.667751118340789, "grad_norm": 0.2843780219554901, "learning_rate": 8.428453769362573e-06, "loss": 0.3524, "step": 16404 }, { "epoch": 1.6678527856852379, "grad_norm": 0.2857057750225067, "learning_rate": 8.4281954417131e-06, "loss": 0.3429, "step": 16405 }, { "epoch": 1.6679544530296868, "grad_norm": 0.2664361894130707, "learning_rate": 8.427937096793193e-06, "loss": 0.3604, "step": 16406 }, { "epoch": 1.668056120374136, "grad_norm": 0.28546005487442017, "learning_rate": 8.427678734604152e-06, "loss": 0.3885, "step": 16407 }, { "epoch": 1.668157787718585, "grad_norm": 0.29237210750579834, "learning_rate": 8.427420355147282e-06, "loss": 0.4032, "step": 16408 }, { "epoch": 1.6682594550630339, "grad_norm": 0.2794090807437897, "learning_rate": 8.427161958423881e-06, "loss": 0.3474, "step": 16409 }, { "epoch": 1.6683611224074828, "grad_norm": 0.28933095932006836, "learning_rate": 8.426903544435254e-06, "loss": 0.3477, "step": 16410 }, { "epoch": 1.6684627897519317, "grad_norm": 0.2958664298057556, "learning_rate": 8.4266451131827e-06, "loss": 0.3585, "step": 16411 }, { "epoch": 1.6685644570963807, "grad_norm": 0.28271904587745667, "learning_rate": 8.426386664667522e-06, "loss": 0.3681, "step": 16412 }, { "epoch": 1.6686661244408296, "grad_norm": 0.27242305874824524, "learning_rate": 8.426128198891024e-06, "loss": 0.3318, "step": 16413 }, { "epoch": 1.6687677917852786, "grad_norm": 0.2529746890068054, "learning_rate": 8.425869715854505e-06, "loss": 0.3542, "step": 16414 }, { "epoch": 1.6688694591297275, "grad_norm": 0.30073535442352295, "learning_rate": 8.425611215559268e-06, "loss": 0.3633, "step": 16415 }, { "epoch": 1.6689711264741764, "grad_norm": 0.27784445881843567, "learning_rate": 8.425352698006619e-06, "loss": 0.3723, "step": 16416 }, { "epoch": 1.6690727938186254, "grad_norm": 0.26020318269729614, "learning_rate": 8.425094163197853e-06, "loss": 0.362, "step": 16417 }, { "epoch": 1.6691744611630743, "grad_norm": 0.27961134910583496, "learning_rate": 8.424835611134278e-06, "loss": 0.3574, "step": 16418 }, { "epoch": 1.6692761285075233, "grad_norm": 0.2594340145587921, "learning_rate": 8.424577041817194e-06, "loss": 0.3651, "step": 16419 }, { "epoch": 1.6693777958519722, "grad_norm": 0.2945448160171509, "learning_rate": 8.42431845524791e-06, "loss": 0.3679, "step": 16420 }, { "epoch": 1.6694794631964212, "grad_norm": 0.2814449667930603, "learning_rate": 8.424059851427717e-06, "loss": 0.3519, "step": 16421 }, { "epoch": 1.6695811305408703, "grad_norm": 0.2948983311653137, "learning_rate": 8.423801230357927e-06, "loss": 0.3665, "step": 16422 }, { "epoch": 1.6696827978853193, "grad_norm": 0.2827748954296112, "learning_rate": 8.42354259203984e-06, "loss": 0.3819, "step": 16423 }, { "epoch": 1.6697844652297682, "grad_norm": 0.2691977620124817, "learning_rate": 8.423283936474757e-06, "loss": 0.3634, "step": 16424 }, { "epoch": 1.6698861325742171, "grad_norm": 0.27104872465133667, "learning_rate": 8.423025263663984e-06, "loss": 0.3503, "step": 16425 }, { "epoch": 1.669987799918666, "grad_norm": 0.2918097674846649, "learning_rate": 8.422766573608825e-06, "loss": 0.3617, "step": 16426 }, { "epoch": 1.6700894672631152, "grad_norm": 0.2796231210231781, "learning_rate": 8.42250786631058e-06, "loss": 0.3499, "step": 16427 }, { "epoch": 1.6701911346075642, "grad_norm": 0.29379191994667053, "learning_rate": 8.422249141770553e-06, "loss": 0.3725, "step": 16428 }, { "epoch": 1.6702928019520131, "grad_norm": 0.2745251953601837, "learning_rate": 8.421990399990048e-06, "loss": 0.3495, "step": 16429 }, { "epoch": 1.670394469296462, "grad_norm": 0.27078405022621155, "learning_rate": 8.42173164097037e-06, "loss": 0.324, "step": 16430 }, { "epoch": 1.670496136640911, "grad_norm": 0.28536155819892883, "learning_rate": 8.421472864712821e-06, "loss": 0.4041, "step": 16431 }, { "epoch": 1.67059780398536, "grad_norm": 0.2822035253047943, "learning_rate": 8.421214071218705e-06, "loss": 0.3667, "step": 16432 }, { "epoch": 1.670699471329809, "grad_norm": 0.29276207089424133, "learning_rate": 8.420955260489323e-06, "loss": 0.3701, "step": 16433 }, { "epoch": 1.6708011386742578, "grad_norm": 0.2779538631439209, "learning_rate": 8.420696432525983e-06, "loss": 0.3483, "step": 16434 }, { "epoch": 1.6709028060187068, "grad_norm": 0.27544960379600525, "learning_rate": 8.420437587329989e-06, "loss": 0.3645, "step": 16435 }, { "epoch": 1.6710044733631557, "grad_norm": 0.2951573133468628, "learning_rate": 8.42017872490264e-06, "loss": 0.4038, "step": 16436 }, { "epoch": 1.6711061407076047, "grad_norm": 0.2877956032752991, "learning_rate": 8.419919845245246e-06, "loss": 0.3706, "step": 16437 }, { "epoch": 1.6712078080520536, "grad_norm": 0.31196996569633484, "learning_rate": 8.419660948359107e-06, "loss": 0.3786, "step": 16438 }, { "epoch": 1.6713094753965025, "grad_norm": 0.3001285493373871, "learning_rate": 8.41940203424553e-06, "loss": 0.3623, "step": 16439 }, { "epoch": 1.6714111427409515, "grad_norm": 0.2827003300189972, "learning_rate": 8.419143102905816e-06, "loss": 0.3618, "step": 16440 }, { "epoch": 1.6715128100854004, "grad_norm": 0.2749389410018921, "learning_rate": 8.418884154341273e-06, "loss": 0.3708, "step": 16441 }, { "epoch": 1.6716144774298494, "grad_norm": 0.2770349085330963, "learning_rate": 8.418625188553204e-06, "loss": 0.3651, "step": 16442 }, { "epoch": 1.6717161447742985, "grad_norm": 0.29853829741477966, "learning_rate": 8.418366205542913e-06, "loss": 0.3532, "step": 16443 }, { "epoch": 1.6718178121187475, "grad_norm": 0.2634296119213104, "learning_rate": 8.418107205311707e-06, "loss": 0.332, "step": 16444 }, { "epoch": 1.6719194794631964, "grad_norm": 0.288584440946579, "learning_rate": 8.417848187860888e-06, "loss": 0.3608, "step": 16445 }, { "epoch": 1.6720211468076454, "grad_norm": 0.2922063171863556, "learning_rate": 8.417589153191764e-06, "loss": 0.3925, "step": 16446 }, { "epoch": 1.6721228141520943, "grad_norm": 0.26288294792175293, "learning_rate": 8.417330101305634e-06, "loss": 0.3325, "step": 16447 }, { "epoch": 1.6722244814965435, "grad_norm": 0.29331138730049133, "learning_rate": 8.41707103220381e-06, "loss": 0.3585, "step": 16448 }, { "epoch": 1.6723261488409924, "grad_norm": 0.2619805335998535, "learning_rate": 8.416811945887593e-06, "loss": 0.3701, "step": 16449 }, { "epoch": 1.6724278161854413, "grad_norm": 0.2932499349117279, "learning_rate": 8.41655284235829e-06, "loss": 0.3199, "step": 16450 }, { "epoch": 1.6725294835298903, "grad_norm": 0.25888851284980774, "learning_rate": 8.416293721617206e-06, "loss": 0.3593, "step": 16451 }, { "epoch": 1.6726311508743392, "grad_norm": 0.2501729428768158, "learning_rate": 8.416034583665645e-06, "loss": 0.3473, "step": 16452 }, { "epoch": 1.6727328182187882, "grad_norm": 0.2857285737991333, "learning_rate": 8.415775428504915e-06, "loss": 0.3486, "step": 16453 }, { "epoch": 1.672834485563237, "grad_norm": 0.2821025252342224, "learning_rate": 8.415516256136319e-06, "loss": 0.3461, "step": 16454 }, { "epoch": 1.672936152907686, "grad_norm": 0.2833625078201294, "learning_rate": 8.415257066561163e-06, "loss": 0.3562, "step": 16455 }, { "epoch": 1.673037820252135, "grad_norm": 0.3050745725631714, "learning_rate": 8.414997859780756e-06, "loss": 0.3731, "step": 16456 }, { "epoch": 1.673139487596584, "grad_norm": 0.2984531819820404, "learning_rate": 8.4147386357964e-06, "loss": 0.4004, "step": 16457 }, { "epoch": 1.6732411549410329, "grad_norm": 0.28371909260749817, "learning_rate": 8.4144793946094e-06, "loss": 0.3526, "step": 16458 }, { "epoch": 1.6733428222854818, "grad_norm": 0.2907283902168274, "learning_rate": 8.414220136221068e-06, "loss": 0.3422, "step": 16459 }, { "epoch": 1.6734444896299308, "grad_norm": 0.2993612289428711, "learning_rate": 8.413960860632703e-06, "loss": 0.3355, "step": 16460 }, { "epoch": 1.6735461569743797, "grad_norm": 0.30835676193237305, "learning_rate": 8.413701567845616e-06, "loss": 0.3693, "step": 16461 }, { "epoch": 1.6736478243188286, "grad_norm": 0.29809415340423584, "learning_rate": 8.413442257861112e-06, "loss": 0.3277, "step": 16462 }, { "epoch": 1.6737494916632778, "grad_norm": 0.3078314960002899, "learning_rate": 8.413182930680497e-06, "loss": 0.3639, "step": 16463 }, { "epoch": 1.6738511590077267, "grad_norm": 0.3048894703388214, "learning_rate": 8.412923586305078e-06, "loss": 0.3998, "step": 16464 }, { "epoch": 1.6739528263521757, "grad_norm": 0.28452810645103455, "learning_rate": 8.412664224736159e-06, "loss": 0.3565, "step": 16465 }, { "epoch": 1.6740544936966246, "grad_norm": 0.30570653080940247, "learning_rate": 8.41240484597505e-06, "loss": 0.3489, "step": 16466 }, { "epoch": 1.6741561610410736, "grad_norm": 0.30382272601127625, "learning_rate": 8.412145450023057e-06, "loss": 0.3316, "step": 16467 }, { "epoch": 1.6742578283855227, "grad_norm": 0.2577160596847534, "learning_rate": 8.411886036881485e-06, "loss": 0.3486, "step": 16468 }, { "epoch": 1.6743594957299717, "grad_norm": 0.2773894965648651, "learning_rate": 8.411626606551642e-06, "loss": 0.3265, "step": 16469 }, { "epoch": 1.6744611630744206, "grad_norm": 0.296917200088501, "learning_rate": 8.411367159034834e-06, "loss": 0.3696, "step": 16470 }, { "epoch": 1.6745628304188696, "grad_norm": 0.29391998052597046, "learning_rate": 8.41110769433237e-06, "loss": 0.3932, "step": 16471 }, { "epoch": 1.6746644977633185, "grad_norm": 0.2941354811191559, "learning_rate": 8.410848212445557e-06, "loss": 0.3156, "step": 16472 }, { "epoch": 1.6747661651077674, "grad_norm": 0.27316316962242126, "learning_rate": 8.410588713375701e-06, "loss": 0.37, "step": 16473 }, { "epoch": 1.6748678324522164, "grad_norm": 0.27352097630500793, "learning_rate": 8.410329197124107e-06, "loss": 0.3309, "step": 16474 }, { "epoch": 1.6749694997966653, "grad_norm": 0.26270607113838196, "learning_rate": 8.410069663692086e-06, "loss": 0.3777, "step": 16475 }, { "epoch": 1.6750711671411143, "grad_norm": 0.27885404229164124, "learning_rate": 8.409810113080946e-06, "loss": 0.3568, "step": 16476 }, { "epoch": 1.6751728344855632, "grad_norm": 0.3232382833957672, "learning_rate": 8.409550545291993e-06, "loss": 0.3371, "step": 16477 }, { "epoch": 1.6752745018300121, "grad_norm": 0.2779048979282379, "learning_rate": 8.409290960326533e-06, "loss": 0.3764, "step": 16478 }, { "epoch": 1.675376169174461, "grad_norm": 0.31999504566192627, "learning_rate": 8.409031358185875e-06, "loss": 0.3561, "step": 16479 }, { "epoch": 1.67547783651891, "grad_norm": 0.3100243806838989, "learning_rate": 8.408771738871328e-06, "loss": 0.3234, "step": 16480 }, { "epoch": 1.675579503863359, "grad_norm": 0.29613208770751953, "learning_rate": 8.408512102384202e-06, "loss": 0.3717, "step": 16481 }, { "epoch": 1.675681171207808, "grad_norm": 0.26449769735336304, "learning_rate": 8.408252448725798e-06, "loss": 0.3267, "step": 16482 }, { "epoch": 1.675782838552257, "grad_norm": 0.290181428194046, "learning_rate": 8.40799277789743e-06, "loss": 0.3447, "step": 16483 }, { "epoch": 1.675884505896706, "grad_norm": 0.305771142244339, "learning_rate": 8.407733089900402e-06, "loss": 0.3648, "step": 16484 }, { "epoch": 1.675986173241155, "grad_norm": 0.29100972414016724, "learning_rate": 8.407473384736025e-06, "loss": 0.359, "step": 16485 }, { "epoch": 1.676087840585604, "grad_norm": 0.29236456751823425, "learning_rate": 8.407213662405608e-06, "loss": 0.344, "step": 16486 }, { "epoch": 1.6761895079300528, "grad_norm": 0.28691017627716064, "learning_rate": 8.40695392291046e-06, "loss": 0.363, "step": 16487 }, { "epoch": 1.6762911752745018, "grad_norm": 0.2859240472316742, "learning_rate": 8.406694166251885e-06, "loss": 0.3584, "step": 16488 }, { "epoch": 1.676392842618951, "grad_norm": 0.2999882102012634, "learning_rate": 8.406434392431194e-06, "loss": 0.3512, "step": 16489 }, { "epoch": 1.6764945099633999, "grad_norm": 0.28936484456062317, "learning_rate": 8.406174601449696e-06, "loss": 0.3522, "step": 16490 }, { "epoch": 1.6765961773078488, "grad_norm": 0.27684035897254944, "learning_rate": 8.405914793308702e-06, "loss": 0.3631, "step": 16491 }, { "epoch": 1.6766978446522978, "grad_norm": 0.3200056850910187, "learning_rate": 8.405654968009517e-06, "loss": 0.3594, "step": 16492 }, { "epoch": 1.6767995119967467, "grad_norm": 0.3069521486759186, "learning_rate": 8.405395125553453e-06, "loss": 0.3574, "step": 16493 }, { "epoch": 1.6769011793411956, "grad_norm": 0.2709505259990692, "learning_rate": 8.405135265941817e-06, "loss": 0.3665, "step": 16494 }, { "epoch": 1.6770028466856446, "grad_norm": 0.279597669839859, "learning_rate": 8.404875389175918e-06, "loss": 0.3807, "step": 16495 }, { "epoch": 1.6771045140300935, "grad_norm": 0.2875344455242157, "learning_rate": 8.404615495257068e-06, "loss": 0.3444, "step": 16496 }, { "epoch": 1.6772061813745425, "grad_norm": 0.2737649083137512, "learning_rate": 8.404355584186573e-06, "loss": 0.3559, "step": 16497 }, { "epoch": 1.6773078487189914, "grad_norm": 0.26375454664230347, "learning_rate": 8.404095655965743e-06, "loss": 0.349, "step": 16498 }, { "epoch": 1.6774095160634404, "grad_norm": 0.2513035833835602, "learning_rate": 8.403835710595887e-06, "loss": 0.3222, "step": 16499 }, { "epoch": 1.6775111834078893, "grad_norm": 0.2751750349998474, "learning_rate": 8.403575748078317e-06, "loss": 0.3939, "step": 16500 }, { "epoch": 1.6776128507523382, "grad_norm": 0.2615200877189636, "learning_rate": 8.403315768414342e-06, "loss": 0.3379, "step": 16501 }, { "epoch": 1.6777145180967872, "grad_norm": 0.26433882117271423, "learning_rate": 8.40305577160527e-06, "loss": 0.3453, "step": 16502 }, { "epoch": 1.6778161854412361, "grad_norm": 0.270526260137558, "learning_rate": 8.402795757652411e-06, "loss": 0.3466, "step": 16503 }, { "epoch": 1.6779178527856853, "grad_norm": 0.26098060607910156, "learning_rate": 8.402535726557075e-06, "loss": 0.3529, "step": 16504 }, { "epoch": 1.6780195201301342, "grad_norm": 0.2848908007144928, "learning_rate": 8.402275678320573e-06, "loss": 0.3683, "step": 16505 }, { "epoch": 1.6781211874745832, "grad_norm": 0.2670643925666809, "learning_rate": 8.402015612944216e-06, "loss": 0.3654, "step": 16506 }, { "epoch": 1.678222854819032, "grad_norm": 0.28993308544158936, "learning_rate": 8.401755530429313e-06, "loss": 0.3556, "step": 16507 }, { "epoch": 1.678324522163481, "grad_norm": 0.28473538160324097, "learning_rate": 8.401495430777173e-06, "loss": 0.3523, "step": 16508 }, { "epoch": 1.6784261895079302, "grad_norm": 0.27041804790496826, "learning_rate": 8.401235313989107e-06, "loss": 0.3491, "step": 16509 }, { "epoch": 1.6785278568523792, "grad_norm": 0.2850881814956665, "learning_rate": 8.400975180066426e-06, "loss": 0.3826, "step": 16510 }, { "epoch": 1.678629524196828, "grad_norm": 0.2874637842178345, "learning_rate": 8.400715029010441e-06, "loss": 0.3714, "step": 16511 }, { "epoch": 1.678731191541277, "grad_norm": 0.2905898988246918, "learning_rate": 8.400454860822462e-06, "loss": 0.3931, "step": 16512 }, { "epoch": 1.678832858885726, "grad_norm": 0.27392882108688354, "learning_rate": 8.400194675503798e-06, "loss": 0.364, "step": 16513 }, { "epoch": 1.678934526230175, "grad_norm": 0.2639305889606476, "learning_rate": 8.399934473055762e-06, "loss": 0.3317, "step": 16514 }, { "epoch": 1.6790361935746239, "grad_norm": 0.24978813529014587, "learning_rate": 8.399674253479664e-06, "loss": 0.3634, "step": 16515 }, { "epoch": 1.6791378609190728, "grad_norm": 0.2854065001010895, "learning_rate": 8.399414016776815e-06, "loss": 0.3706, "step": 16516 }, { "epoch": 1.6792395282635217, "grad_norm": 0.2721196413040161, "learning_rate": 8.399153762948526e-06, "loss": 0.3733, "step": 16517 }, { "epoch": 1.6793411956079707, "grad_norm": 0.2610434293746948, "learning_rate": 8.39889349199611e-06, "loss": 0.3624, "step": 16518 }, { "epoch": 1.6794428629524196, "grad_norm": 0.28907567262649536, "learning_rate": 8.398633203920875e-06, "loss": 0.3392, "step": 16519 }, { "epoch": 1.6795445302968686, "grad_norm": 0.28151243925094604, "learning_rate": 8.398372898724132e-06, "loss": 0.3407, "step": 16520 }, { "epoch": 1.6796461976413175, "grad_norm": 0.2795805037021637, "learning_rate": 8.398112576407194e-06, "loss": 0.376, "step": 16521 }, { "epoch": 1.6797478649857664, "grad_norm": 0.2842085659503937, "learning_rate": 8.397852236971374e-06, "loss": 0.353, "step": 16522 }, { "epoch": 1.6798495323302154, "grad_norm": 0.28025975823402405, "learning_rate": 8.397591880417981e-06, "loss": 0.3273, "step": 16523 }, { "epoch": 1.6799511996746646, "grad_norm": 0.26940786838531494, "learning_rate": 8.397331506748328e-06, "loss": 0.3563, "step": 16524 }, { "epoch": 1.6800528670191135, "grad_norm": 0.27837520837783813, "learning_rate": 8.397071115963725e-06, "loss": 0.3498, "step": 16525 }, { "epoch": 1.6801545343635624, "grad_norm": 0.26555055379867554, "learning_rate": 8.396810708065486e-06, "loss": 0.342, "step": 16526 }, { "epoch": 1.6802562017080114, "grad_norm": 0.2877112925052643, "learning_rate": 8.396550283054923e-06, "loss": 0.3886, "step": 16527 }, { "epoch": 1.6803578690524603, "grad_norm": 0.2645588219165802, "learning_rate": 8.396289840933346e-06, "loss": 0.3586, "step": 16528 }, { "epoch": 1.6804595363969093, "grad_norm": 0.28226229548454285, "learning_rate": 8.396029381702067e-06, "loss": 0.3609, "step": 16529 }, { "epoch": 1.6805612037413584, "grad_norm": 0.28878211975097656, "learning_rate": 8.395768905362398e-06, "loss": 0.3677, "step": 16530 }, { "epoch": 1.6806628710858074, "grad_norm": 0.2918381094932556, "learning_rate": 8.395508411915654e-06, "loss": 0.3757, "step": 16531 }, { "epoch": 1.6807645384302563, "grad_norm": 0.2577844262123108, "learning_rate": 8.395247901363146e-06, "loss": 0.3667, "step": 16532 }, { "epoch": 1.6808662057747052, "grad_norm": 0.2745163142681122, "learning_rate": 8.394987373706186e-06, "loss": 0.3376, "step": 16533 }, { "epoch": 1.6809678731191542, "grad_norm": 0.29916679859161377, "learning_rate": 8.394726828946086e-06, "loss": 0.3426, "step": 16534 }, { "epoch": 1.6810695404636031, "grad_norm": 0.2921213209629059, "learning_rate": 8.394466267084157e-06, "loss": 0.3559, "step": 16535 }, { "epoch": 1.681171207808052, "grad_norm": 0.3104252219200134, "learning_rate": 8.394205688121715e-06, "loss": 0.378, "step": 16536 }, { "epoch": 1.681272875152501, "grad_norm": 0.28801313042640686, "learning_rate": 8.393945092060071e-06, "loss": 0.3521, "step": 16537 }, { "epoch": 1.68137454249695, "grad_norm": 0.28322964906692505, "learning_rate": 8.39368447890054e-06, "loss": 0.3741, "step": 16538 }, { "epoch": 1.681476209841399, "grad_norm": 0.27366748452186584, "learning_rate": 8.39342384864443e-06, "loss": 0.3607, "step": 16539 }, { "epoch": 1.6815778771858478, "grad_norm": 0.2690199017524719, "learning_rate": 8.393163201293058e-06, "loss": 0.3651, "step": 16540 }, { "epoch": 1.6816795445302968, "grad_norm": 0.2759310305118561, "learning_rate": 8.392902536847736e-06, "loss": 0.4265, "step": 16541 }, { "epoch": 1.6817812118747457, "grad_norm": 0.25203171372413635, "learning_rate": 8.39264185530978e-06, "loss": 0.3623, "step": 16542 }, { "epoch": 1.6818828792191947, "grad_norm": 0.323128879070282, "learning_rate": 8.392381156680498e-06, "loss": 0.3671, "step": 16543 }, { "epoch": 1.6819845465636436, "grad_norm": 0.2706592082977295, "learning_rate": 8.392120440961207e-06, "loss": 0.3493, "step": 16544 }, { "epoch": 1.6820862139080928, "grad_norm": 0.2919204533100128, "learning_rate": 8.391859708153218e-06, "loss": 0.3794, "step": 16545 }, { "epoch": 1.6821878812525417, "grad_norm": 0.27227044105529785, "learning_rate": 8.391598958257845e-06, "loss": 0.3489, "step": 16546 }, { "epoch": 1.6822895485969906, "grad_norm": 0.26720741391181946, "learning_rate": 8.391338191276405e-06, "loss": 0.3669, "step": 16547 }, { "epoch": 1.6823912159414396, "grad_norm": 0.2813244163990021, "learning_rate": 8.391077407210209e-06, "loss": 0.3705, "step": 16548 }, { "epoch": 1.6824928832858885, "grad_norm": 0.28453248739242554, "learning_rate": 8.39081660606057e-06, "loss": 0.3563, "step": 16549 }, { "epoch": 1.6825945506303377, "grad_norm": 0.27152886986732483, "learning_rate": 8.390555787828803e-06, "loss": 0.3563, "step": 16550 }, { "epoch": 1.6826962179747866, "grad_norm": 0.3121895492076874, "learning_rate": 8.39029495251622e-06, "loss": 0.3915, "step": 16551 }, { "epoch": 1.6827978853192356, "grad_norm": 0.2739031910896301, "learning_rate": 8.390034100124138e-06, "loss": 0.3464, "step": 16552 }, { "epoch": 1.6828995526636845, "grad_norm": 0.28681570291519165, "learning_rate": 8.38977323065387e-06, "loss": 0.3595, "step": 16553 }, { "epoch": 1.6830012200081335, "grad_norm": 0.273962140083313, "learning_rate": 8.389512344106731e-06, "loss": 0.3377, "step": 16554 }, { "epoch": 1.6831028873525824, "grad_norm": 0.2956099808216095, "learning_rate": 8.389251440484031e-06, "loss": 0.3549, "step": 16555 }, { "epoch": 1.6832045546970313, "grad_norm": 0.2621668577194214, "learning_rate": 8.38899051978709e-06, "loss": 0.3235, "step": 16556 }, { "epoch": 1.6833062220414803, "grad_norm": 0.29436472058296204, "learning_rate": 8.388729582017222e-06, "loss": 0.341, "step": 16557 }, { "epoch": 1.6834078893859292, "grad_norm": 0.25963354110717773, "learning_rate": 8.388468627175737e-06, "loss": 0.3591, "step": 16558 }, { "epoch": 1.6835095567303782, "grad_norm": 0.2867891490459442, "learning_rate": 8.388207655263954e-06, "loss": 0.3667, "step": 16559 }, { "epoch": 1.683611224074827, "grad_norm": 0.27650904655456543, "learning_rate": 8.387946666283183e-06, "loss": 0.3486, "step": 16560 }, { "epoch": 1.683712891419276, "grad_norm": 0.29647135734558105, "learning_rate": 8.387685660234744e-06, "loss": 0.343, "step": 16561 }, { "epoch": 1.683814558763725, "grad_norm": 0.2832164168357849, "learning_rate": 8.387424637119949e-06, "loss": 0.3822, "step": 16562 }, { "epoch": 1.683916226108174, "grad_norm": 0.26326122879981995, "learning_rate": 8.387163596940114e-06, "loss": 0.3495, "step": 16563 }, { "epoch": 1.6840178934526229, "grad_norm": 0.26375436782836914, "learning_rate": 8.386902539696554e-06, "loss": 0.3578, "step": 16564 }, { "epoch": 1.684119560797072, "grad_norm": 0.276956170797348, "learning_rate": 8.386641465390583e-06, "loss": 0.3658, "step": 16565 }, { "epoch": 1.684221228141521, "grad_norm": 0.2841615080833435, "learning_rate": 8.386380374023517e-06, "loss": 0.3844, "step": 16566 }, { "epoch": 1.68432289548597, "grad_norm": 0.2697271704673767, "learning_rate": 8.386119265596672e-06, "loss": 0.3459, "step": 16567 }, { "epoch": 1.6844245628304189, "grad_norm": 0.27217787504196167, "learning_rate": 8.385858140111364e-06, "loss": 0.3223, "step": 16568 }, { "epoch": 1.6845262301748678, "grad_norm": 0.265828937292099, "learning_rate": 8.385596997568905e-06, "loss": 0.3403, "step": 16569 }, { "epoch": 1.6846278975193167, "grad_norm": 0.29704374074935913, "learning_rate": 8.385335837970615e-06, "loss": 0.3541, "step": 16570 }, { "epoch": 1.684729564863766, "grad_norm": 0.263042688369751, "learning_rate": 8.385074661317807e-06, "loss": 0.3291, "step": 16571 }, { "epoch": 1.6848312322082148, "grad_norm": 0.28352752327919006, "learning_rate": 8.384813467611796e-06, "loss": 0.3807, "step": 16572 }, { "epoch": 1.6849328995526638, "grad_norm": 0.30038127303123474, "learning_rate": 8.3845522568539e-06, "loss": 0.3544, "step": 16573 }, { "epoch": 1.6850345668971127, "grad_norm": 0.295166015625, "learning_rate": 8.384291029045433e-06, "loss": 0.3697, "step": 16574 }, { "epoch": 1.6851362342415617, "grad_norm": 0.2722613215446472, "learning_rate": 8.384029784187713e-06, "loss": 0.3685, "step": 16575 }, { "epoch": 1.6852379015860106, "grad_norm": 0.282158762216568, "learning_rate": 8.383768522282057e-06, "loss": 0.3593, "step": 16576 }, { "epoch": 1.6853395689304596, "grad_norm": 0.3149712383747101, "learning_rate": 8.383507243329777e-06, "loss": 0.3591, "step": 16577 }, { "epoch": 1.6854412362749085, "grad_norm": 0.2564859390258789, "learning_rate": 8.38324594733219e-06, "loss": 0.3706, "step": 16578 }, { "epoch": 1.6855429036193574, "grad_norm": 0.27006858587265015, "learning_rate": 8.382984634290616e-06, "loss": 0.3348, "step": 16579 }, { "epoch": 1.6856445709638064, "grad_norm": 0.2770495116710663, "learning_rate": 8.382723304206369e-06, "loss": 0.3535, "step": 16580 }, { "epoch": 1.6857462383082553, "grad_norm": 0.27825456857681274, "learning_rate": 8.382461957080767e-06, "loss": 0.3421, "step": 16581 }, { "epoch": 1.6858479056527043, "grad_norm": 0.2968696653842926, "learning_rate": 8.382200592915124e-06, "loss": 0.3607, "step": 16582 }, { "epoch": 1.6859495729971532, "grad_norm": 0.2856823801994324, "learning_rate": 8.381939211710759e-06, "loss": 0.3725, "step": 16583 }, { "epoch": 1.6860512403416021, "grad_norm": 0.29338687658309937, "learning_rate": 8.381677813468987e-06, "loss": 0.3636, "step": 16584 }, { "epoch": 1.686152907686051, "grad_norm": 0.273613840341568, "learning_rate": 8.381416398191127e-06, "loss": 0.3989, "step": 16585 }, { "epoch": 1.6862545750305002, "grad_norm": 0.25327423214912415, "learning_rate": 8.381154965878493e-06, "loss": 0.3636, "step": 16586 }, { "epoch": 1.6863562423749492, "grad_norm": 0.2661806046962738, "learning_rate": 8.380893516532405e-06, "loss": 0.3637, "step": 16587 }, { "epoch": 1.6864579097193981, "grad_norm": 0.29985105991363525, "learning_rate": 8.38063205015418e-06, "loss": 0.4005, "step": 16588 }, { "epoch": 1.686559577063847, "grad_norm": 0.28118205070495605, "learning_rate": 8.380370566745132e-06, "loss": 0.36, "step": 16589 }, { "epoch": 1.686661244408296, "grad_norm": 0.29131510853767395, "learning_rate": 8.38010906630658e-06, "loss": 0.3676, "step": 16590 }, { "epoch": 1.6867629117527452, "grad_norm": 0.27601566910743713, "learning_rate": 8.379847548839844e-06, "loss": 0.359, "step": 16591 }, { "epoch": 1.6868645790971941, "grad_norm": 0.28980106115341187, "learning_rate": 8.379586014346238e-06, "loss": 0.3682, "step": 16592 }, { "epoch": 1.686966246441643, "grad_norm": 0.31112003326416016, "learning_rate": 8.379324462827081e-06, "loss": 0.3286, "step": 16593 }, { "epoch": 1.687067913786092, "grad_norm": 0.2953706979751587, "learning_rate": 8.379062894283691e-06, "loss": 0.3408, "step": 16594 }, { "epoch": 1.687169581130541, "grad_norm": 0.2655097544193268, "learning_rate": 8.378801308717383e-06, "loss": 0.3881, "step": 16595 }, { "epoch": 1.6872712484749899, "grad_norm": 0.35543692111968994, "learning_rate": 8.378539706129478e-06, "loss": 0.3801, "step": 16596 }, { "epoch": 1.6873729158194388, "grad_norm": 0.3389607071876526, "learning_rate": 8.378278086521294e-06, "loss": 0.3487, "step": 16597 }, { "epoch": 1.6874745831638878, "grad_norm": 0.30866095423698425, "learning_rate": 8.378016449894147e-06, "loss": 0.3675, "step": 16598 }, { "epoch": 1.6875762505083367, "grad_norm": 0.28572601079940796, "learning_rate": 8.377754796249355e-06, "loss": 0.3254, "step": 16599 }, { "epoch": 1.6876779178527856, "grad_norm": 0.3128563463687897, "learning_rate": 8.377493125588238e-06, "loss": 0.3481, "step": 16600 }, { "epoch": 1.6877795851972346, "grad_norm": 0.3147204518318176, "learning_rate": 8.377231437912115e-06, "loss": 0.3387, "step": 16601 }, { "epoch": 1.6878812525416835, "grad_norm": 0.28802454471588135, "learning_rate": 8.3769697332223e-06, "loss": 0.3562, "step": 16602 }, { "epoch": 1.6879829198861325, "grad_norm": 0.2684684693813324, "learning_rate": 8.376708011520113e-06, "loss": 0.3486, "step": 16603 }, { "epoch": 1.6880845872305814, "grad_norm": 0.2740595042705536, "learning_rate": 8.376446272806877e-06, "loss": 0.3251, "step": 16604 }, { "epoch": 1.6881862545750304, "grad_norm": 0.27065566182136536, "learning_rate": 8.376184517083905e-06, "loss": 0.3859, "step": 16605 }, { "epoch": 1.6882879219194795, "grad_norm": 0.29219189286231995, "learning_rate": 8.375922744352517e-06, "loss": 0.3867, "step": 16606 }, { "epoch": 1.6883895892639285, "grad_norm": 0.28001701831817627, "learning_rate": 8.375660954614033e-06, "loss": 0.3582, "step": 16607 }, { "epoch": 1.6884912566083774, "grad_norm": 0.3020772337913513, "learning_rate": 8.375399147869772e-06, "loss": 0.3559, "step": 16608 }, { "epoch": 1.6885929239528263, "grad_norm": 0.29048365354537964, "learning_rate": 8.375137324121052e-06, "loss": 0.3688, "step": 16609 }, { "epoch": 1.6886945912972753, "grad_norm": 0.2893178462982178, "learning_rate": 8.374875483369192e-06, "loss": 0.3474, "step": 16610 }, { "epoch": 1.6887962586417242, "grad_norm": 0.2733190357685089, "learning_rate": 8.374613625615513e-06, "loss": 0.3467, "step": 16611 }, { "epoch": 1.6888979259861734, "grad_norm": 0.27904319763183594, "learning_rate": 8.37435175086133e-06, "loss": 0.3559, "step": 16612 }, { "epoch": 1.6889995933306223, "grad_norm": 0.3088499903678894, "learning_rate": 8.374089859107966e-06, "loss": 0.3973, "step": 16613 }, { "epoch": 1.6891012606750713, "grad_norm": 0.27343711256980896, "learning_rate": 8.373827950356738e-06, "loss": 0.3415, "step": 16614 }, { "epoch": 1.6892029280195202, "grad_norm": 0.27356481552124023, "learning_rate": 8.373566024608968e-06, "loss": 0.3726, "step": 16615 }, { "epoch": 1.6893045953639692, "grad_norm": 0.27782246470451355, "learning_rate": 8.373304081865972e-06, "loss": 0.3471, "step": 16616 }, { "epoch": 1.689406262708418, "grad_norm": 0.27341172099113464, "learning_rate": 8.373042122129074e-06, "loss": 0.383, "step": 16617 }, { "epoch": 1.689507930052867, "grad_norm": 0.273276150226593, "learning_rate": 8.372780145399591e-06, "loss": 0.3808, "step": 16618 }, { "epoch": 1.689609597397316, "grad_norm": 0.2616633176803589, "learning_rate": 8.372518151678841e-06, "loss": 0.3714, "step": 16619 }, { "epoch": 1.689711264741765, "grad_norm": 0.3135257065296173, "learning_rate": 8.372256140968147e-06, "loss": 0.4064, "step": 16620 }, { "epoch": 1.6898129320862139, "grad_norm": 0.27664071321487427, "learning_rate": 8.37199411326883e-06, "loss": 0.3932, "step": 16621 }, { "epoch": 1.6899145994306628, "grad_norm": 0.2652462422847748, "learning_rate": 8.371732068582205e-06, "loss": 0.3583, "step": 16622 }, { "epoch": 1.6900162667751117, "grad_norm": 0.27032530307769775, "learning_rate": 8.371470006909597e-06, "loss": 0.3106, "step": 16623 }, { "epoch": 1.6901179341195607, "grad_norm": 0.30401942133903503, "learning_rate": 8.371207928252322e-06, "loss": 0.3959, "step": 16624 }, { "epoch": 1.6902196014640096, "grad_norm": 0.28805485367774963, "learning_rate": 8.370945832611706e-06, "loss": 0.3461, "step": 16625 }, { "epoch": 1.6903212688084586, "grad_norm": 0.26443183422088623, "learning_rate": 8.370683719989064e-06, "loss": 0.3577, "step": 16626 }, { "epoch": 1.6904229361529077, "grad_norm": 0.27081847190856934, "learning_rate": 8.370421590385718e-06, "loss": 0.379, "step": 16627 }, { "epoch": 1.6905246034973567, "grad_norm": 0.2929019331932068, "learning_rate": 8.37015944380299e-06, "loss": 0.3714, "step": 16628 }, { "epoch": 1.6906262708418056, "grad_norm": 0.26665443181991577, "learning_rate": 8.369897280242198e-06, "loss": 0.3791, "step": 16629 }, { "epoch": 1.6907279381862546, "grad_norm": 0.3062307834625244, "learning_rate": 8.369635099704668e-06, "loss": 0.3658, "step": 16630 }, { "epoch": 1.6908296055307035, "grad_norm": 0.2753700613975525, "learning_rate": 8.369372902191712e-06, "loss": 0.3765, "step": 16631 }, { "epoch": 1.6909312728751527, "grad_norm": 0.2718527019023895, "learning_rate": 8.36911068770466e-06, "loss": 0.357, "step": 16632 }, { "epoch": 1.6910329402196016, "grad_norm": 0.30611518025398254, "learning_rate": 8.368848456244827e-06, "loss": 0.368, "step": 16633 }, { "epoch": 1.6911346075640505, "grad_norm": 0.28878024220466614, "learning_rate": 8.368586207813536e-06, "loss": 0.3328, "step": 16634 }, { "epoch": 1.6912362749084995, "grad_norm": 0.27592968940734863, "learning_rate": 8.368323942412112e-06, "loss": 0.3758, "step": 16635 }, { "epoch": 1.6913379422529484, "grad_norm": 0.2710326611995697, "learning_rate": 8.368061660041866e-06, "loss": 0.3657, "step": 16636 }, { "epoch": 1.6914396095973974, "grad_norm": 0.2510726749897003, "learning_rate": 8.367799360704129e-06, "loss": 0.3454, "step": 16637 }, { "epoch": 1.6915412769418463, "grad_norm": 0.30032089352607727, "learning_rate": 8.36753704440022e-06, "loss": 0.3756, "step": 16638 }, { "epoch": 1.6916429442862952, "grad_norm": 0.2830049395561218, "learning_rate": 8.367274711131458e-06, "loss": 0.3475, "step": 16639 }, { "epoch": 1.6917446116307442, "grad_norm": 0.27719932794570923, "learning_rate": 8.367012360899167e-06, "loss": 0.4046, "step": 16640 }, { "epoch": 1.6918462789751931, "grad_norm": 0.2889775037765503, "learning_rate": 8.366749993704667e-06, "loss": 0.3648, "step": 16641 }, { "epoch": 1.691947946319642, "grad_norm": 0.31560298800468445, "learning_rate": 8.366487609549283e-06, "loss": 0.3976, "step": 16642 }, { "epoch": 1.692049613664091, "grad_norm": 0.28153344988822937, "learning_rate": 8.366225208434334e-06, "loss": 0.3504, "step": 16643 }, { "epoch": 1.69215128100854, "grad_norm": 0.27636581659317017, "learning_rate": 8.36596279036114e-06, "loss": 0.3499, "step": 16644 }, { "epoch": 1.692252948352989, "grad_norm": 0.27984756231307983, "learning_rate": 8.365700355331026e-06, "loss": 0.3463, "step": 16645 }, { "epoch": 1.6923546156974378, "grad_norm": 0.2690059244632721, "learning_rate": 8.365437903345315e-06, "loss": 0.3498, "step": 16646 }, { "epoch": 1.692456283041887, "grad_norm": 0.2736988067626953, "learning_rate": 8.365175434405325e-06, "loss": 0.3589, "step": 16647 }, { "epoch": 1.692557950386336, "grad_norm": 0.27211707830429077, "learning_rate": 8.364912948512383e-06, "loss": 0.3427, "step": 16648 }, { "epoch": 1.6926596177307849, "grad_norm": 0.26688072085380554, "learning_rate": 8.364650445667808e-06, "loss": 0.3635, "step": 16649 }, { "epoch": 1.6927612850752338, "grad_norm": 0.2922184467315674, "learning_rate": 8.364387925872926e-06, "loss": 0.3796, "step": 16650 }, { "epoch": 1.6928629524196828, "grad_norm": 0.2752254605293274, "learning_rate": 8.364125389129054e-06, "loss": 0.3392, "step": 16651 }, { "epoch": 1.6929646197641317, "grad_norm": 0.28120166063308716, "learning_rate": 8.36386283543752e-06, "loss": 0.4006, "step": 16652 }, { "epoch": 1.6930662871085809, "grad_norm": 0.3064078390598297, "learning_rate": 8.363600264799642e-06, "loss": 0.3622, "step": 16653 }, { "epoch": 1.6931679544530298, "grad_norm": 0.2925477623939514, "learning_rate": 8.363337677216747e-06, "loss": 0.3567, "step": 16654 }, { "epoch": 1.6932696217974788, "grad_norm": 0.2819584012031555, "learning_rate": 8.363075072690156e-06, "loss": 0.3547, "step": 16655 }, { "epoch": 1.6933712891419277, "grad_norm": 0.2641620635986328, "learning_rate": 8.36281245122119e-06, "loss": 0.3682, "step": 16656 }, { "epoch": 1.6934729564863766, "grad_norm": 0.2765873670578003, "learning_rate": 8.362549812811175e-06, "loss": 0.3675, "step": 16657 }, { "epoch": 1.6935746238308256, "grad_norm": 0.2870866060256958, "learning_rate": 8.362287157461435e-06, "loss": 0.3712, "step": 16658 }, { "epoch": 1.6936762911752745, "grad_norm": 0.25753751397132874, "learning_rate": 8.36202448517329e-06, "loss": 0.3317, "step": 16659 }, { "epoch": 1.6937779585197235, "grad_norm": 0.2684714198112488, "learning_rate": 8.361761795948065e-06, "loss": 0.3393, "step": 16660 }, { "epoch": 1.6938796258641724, "grad_norm": 0.262768030166626, "learning_rate": 8.361499089787082e-06, "loss": 0.3651, "step": 16661 }, { "epoch": 1.6939812932086213, "grad_norm": 0.288425475358963, "learning_rate": 8.361236366691666e-06, "loss": 0.3271, "step": 16662 }, { "epoch": 1.6940829605530703, "grad_norm": 0.28723636269569397, "learning_rate": 8.36097362666314e-06, "loss": 0.3623, "step": 16663 }, { "epoch": 1.6941846278975192, "grad_norm": 0.27331554889678955, "learning_rate": 8.360710869702829e-06, "loss": 0.3673, "step": 16664 }, { "epoch": 1.6942862952419682, "grad_norm": 0.2785183787345886, "learning_rate": 8.360448095812052e-06, "loss": 0.3432, "step": 16665 }, { "epoch": 1.694387962586417, "grad_norm": 0.2966846823692322, "learning_rate": 8.360185304992138e-06, "loss": 0.3824, "step": 16666 }, { "epoch": 1.694489629930866, "grad_norm": 0.27990031242370605, "learning_rate": 8.359922497244408e-06, "loss": 0.3855, "step": 16667 }, { "epoch": 1.6945912972753152, "grad_norm": 0.3022998869419098, "learning_rate": 8.359659672570188e-06, "loss": 0.3843, "step": 16668 }, { "epoch": 1.6946929646197642, "grad_norm": 0.2759411334991455, "learning_rate": 8.359396830970801e-06, "loss": 0.352, "step": 16669 }, { "epoch": 1.694794631964213, "grad_norm": 0.2775408625602722, "learning_rate": 8.35913397244757e-06, "loss": 0.3509, "step": 16670 }, { "epoch": 1.694896299308662, "grad_norm": 0.29342684149742126, "learning_rate": 8.358871097001821e-06, "loss": 0.3744, "step": 16671 }, { "epoch": 1.694997966653111, "grad_norm": 0.2699930965900421, "learning_rate": 8.358608204634878e-06, "loss": 0.3172, "step": 16672 }, { "epoch": 1.6950996339975601, "grad_norm": 0.27424657344818115, "learning_rate": 8.358345295348065e-06, "loss": 0.3404, "step": 16673 }, { "epoch": 1.695201301342009, "grad_norm": 0.2739010155200958, "learning_rate": 8.358082369142706e-06, "loss": 0.3562, "step": 16674 }, { "epoch": 1.695302968686458, "grad_norm": 0.27714425325393677, "learning_rate": 8.357819426020127e-06, "loss": 0.383, "step": 16675 }, { "epoch": 1.695404636030907, "grad_norm": 0.25408634543418884, "learning_rate": 8.35755646598165e-06, "loss": 0.3278, "step": 16676 }, { "epoch": 1.695506303375356, "grad_norm": 0.2676194906234741, "learning_rate": 8.357293489028602e-06, "loss": 0.355, "step": 16677 }, { "epoch": 1.6956079707198048, "grad_norm": 0.27659404277801514, "learning_rate": 8.357030495162307e-06, "loss": 0.351, "step": 16678 }, { "epoch": 1.6957096380642538, "grad_norm": 0.2674456536769867, "learning_rate": 8.35676748438409e-06, "loss": 0.3533, "step": 16679 }, { "epoch": 1.6958113054087027, "grad_norm": 0.2837626338005066, "learning_rate": 8.356504456695276e-06, "loss": 0.3553, "step": 16680 }, { "epoch": 1.6959129727531517, "grad_norm": 0.30044886469841003, "learning_rate": 8.35624141209719e-06, "loss": 0.333, "step": 16681 }, { "epoch": 1.6960146400976006, "grad_norm": 0.27180978655815125, "learning_rate": 8.35597835059116e-06, "loss": 0.377, "step": 16682 }, { "epoch": 1.6961163074420496, "grad_norm": 0.2736060321331024, "learning_rate": 8.355715272178506e-06, "loss": 0.3756, "step": 16683 }, { "epoch": 1.6962179747864985, "grad_norm": 0.2774602472782135, "learning_rate": 8.355452176860558e-06, "loss": 0.3769, "step": 16684 }, { "epoch": 1.6963196421309474, "grad_norm": 0.26869097352027893, "learning_rate": 8.355189064638635e-06, "loss": 0.3764, "step": 16685 }, { "epoch": 1.6964213094753964, "grad_norm": 0.28875288367271423, "learning_rate": 8.35492593551407e-06, "loss": 0.3425, "step": 16686 }, { "epoch": 1.6965229768198453, "grad_norm": 0.2799758315086365, "learning_rate": 8.354662789488186e-06, "loss": 0.3393, "step": 16687 }, { "epoch": 1.6966246441642945, "grad_norm": 0.28398793935775757, "learning_rate": 8.354399626562305e-06, "loss": 0.4058, "step": 16688 }, { "epoch": 1.6967263115087434, "grad_norm": 0.2865881323814392, "learning_rate": 8.35413644673776e-06, "loss": 0.3542, "step": 16689 }, { "epoch": 1.6968279788531924, "grad_norm": 0.2763674557209015, "learning_rate": 8.35387325001587e-06, "loss": 0.3337, "step": 16690 }, { "epoch": 1.6969296461976413, "grad_norm": 0.31660935282707214, "learning_rate": 8.353610036397964e-06, "loss": 0.3726, "step": 16691 }, { "epoch": 1.6970313135420902, "grad_norm": 0.28455689549446106, "learning_rate": 8.353346805885367e-06, "loss": 0.3559, "step": 16692 }, { "epoch": 1.6971329808865392, "grad_norm": 0.25674164295196533, "learning_rate": 8.353083558479407e-06, "loss": 0.3413, "step": 16693 }, { "epoch": 1.6972346482309884, "grad_norm": 0.30448606610298157, "learning_rate": 8.352820294181407e-06, "loss": 0.3859, "step": 16694 }, { "epoch": 1.6973363155754373, "grad_norm": 0.2787178158760071, "learning_rate": 8.352557012992697e-06, "loss": 0.3263, "step": 16695 }, { "epoch": 1.6974379829198862, "grad_norm": 0.28949081897735596, "learning_rate": 8.3522937149146e-06, "loss": 0.3481, "step": 16696 }, { "epoch": 1.6975396502643352, "grad_norm": 0.2709391117095947, "learning_rate": 8.352030399948445e-06, "loss": 0.3786, "step": 16697 }, { "epoch": 1.6976413176087841, "grad_norm": 0.27852189540863037, "learning_rate": 8.351767068095557e-06, "loss": 0.361, "step": 16698 }, { "epoch": 1.697742984953233, "grad_norm": 0.29493948817253113, "learning_rate": 8.351503719357263e-06, "loss": 0.3933, "step": 16699 }, { "epoch": 1.697844652297682, "grad_norm": 0.28702378273010254, "learning_rate": 8.35124035373489e-06, "loss": 0.381, "step": 16700 }, { "epoch": 1.697946319642131, "grad_norm": 0.29235711693763733, "learning_rate": 8.350976971229764e-06, "loss": 0.3582, "step": 16701 }, { "epoch": 1.6980479869865799, "grad_norm": 0.2914169430732727, "learning_rate": 8.350713571843213e-06, "loss": 0.3766, "step": 16702 }, { "epoch": 1.6981496543310288, "grad_norm": 0.2833690643310547, "learning_rate": 8.350450155576563e-06, "loss": 0.3585, "step": 16703 }, { "epoch": 1.6982513216754778, "grad_norm": 0.265669047832489, "learning_rate": 8.350186722431141e-06, "loss": 0.3415, "step": 16704 }, { "epoch": 1.6983529890199267, "grad_norm": 0.30124056339263916, "learning_rate": 8.349923272408275e-06, "loss": 0.377, "step": 16705 }, { "epoch": 1.6984546563643756, "grad_norm": 0.283264696598053, "learning_rate": 8.349659805509292e-06, "loss": 0.3466, "step": 16706 }, { "epoch": 1.6985563237088246, "grad_norm": 0.2613103985786438, "learning_rate": 8.349396321735517e-06, "loss": 0.3501, "step": 16707 }, { "epoch": 1.6986579910532735, "grad_norm": 0.2653787136077881, "learning_rate": 8.349132821088281e-06, "loss": 0.4099, "step": 16708 }, { "epoch": 1.6987596583977227, "grad_norm": 0.27015289664268494, "learning_rate": 8.34886930356891e-06, "loss": 0.3431, "step": 16709 }, { "epoch": 1.6988613257421716, "grad_norm": 0.2713441550731659, "learning_rate": 8.34860576917873e-06, "loss": 0.351, "step": 16710 }, { "epoch": 1.6989629930866206, "grad_norm": 0.2744929790496826, "learning_rate": 8.34834221791907e-06, "loss": 0.3478, "step": 16711 }, { "epoch": 1.6990646604310695, "grad_norm": 0.26800352334976196, "learning_rate": 8.348078649791256e-06, "loss": 0.3808, "step": 16712 }, { "epoch": 1.6991663277755185, "grad_norm": 0.277192085981369, "learning_rate": 8.34781506479662e-06, "loss": 0.3558, "step": 16713 }, { "epoch": 1.6992679951199676, "grad_norm": 0.2770431637763977, "learning_rate": 8.347551462936488e-06, "loss": 0.326, "step": 16714 }, { "epoch": 1.6993696624644166, "grad_norm": 0.28434908390045166, "learning_rate": 8.347287844212185e-06, "loss": 0.3781, "step": 16715 }, { "epoch": 1.6994713298088655, "grad_norm": 0.3072255849838257, "learning_rate": 8.347024208625042e-06, "loss": 0.3608, "step": 16716 }, { "epoch": 1.6995729971533144, "grad_norm": 0.30859649181365967, "learning_rate": 8.346760556176385e-06, "loss": 0.3678, "step": 16717 }, { "epoch": 1.6996746644977634, "grad_norm": 0.27131640911102295, "learning_rate": 8.346496886867546e-06, "loss": 0.3419, "step": 16718 }, { "epoch": 1.6997763318422123, "grad_norm": 0.2733886241912842, "learning_rate": 8.34623320069985e-06, "loss": 0.3362, "step": 16719 }, { "epoch": 1.6998779991866613, "grad_norm": 0.29467883706092834, "learning_rate": 8.345969497674626e-06, "loss": 0.3735, "step": 16720 }, { "epoch": 1.6999796665311102, "grad_norm": 0.2876018285751343, "learning_rate": 8.345705777793204e-06, "loss": 0.3918, "step": 16721 }, { "epoch": 1.7000813338755592, "grad_norm": 0.26031336188316345, "learning_rate": 8.34544204105691e-06, "loss": 0.3246, "step": 16722 }, { "epoch": 1.700183001220008, "grad_norm": 0.29772692918777466, "learning_rate": 8.345178287467074e-06, "loss": 0.3474, "step": 16723 }, { "epoch": 1.700284668564457, "grad_norm": 0.2696884870529175, "learning_rate": 8.344914517025025e-06, "loss": 0.3559, "step": 16724 }, { "epoch": 1.700386335908906, "grad_norm": 0.29437288641929626, "learning_rate": 8.344650729732092e-06, "loss": 0.3515, "step": 16725 }, { "epoch": 1.700488003253355, "grad_norm": 0.2975544035434723, "learning_rate": 8.344386925589604e-06, "loss": 0.3617, "step": 16726 }, { "epoch": 1.7005896705978039, "grad_norm": 0.30720996856689453, "learning_rate": 8.344123104598887e-06, "loss": 0.397, "step": 16727 }, { "epoch": 1.7006913379422528, "grad_norm": 0.28143027424812317, "learning_rate": 8.343859266761274e-06, "loss": 0.3437, "step": 16728 }, { "epoch": 1.700793005286702, "grad_norm": 0.2939145267009735, "learning_rate": 8.343595412078094e-06, "loss": 0.3904, "step": 16729 }, { "epoch": 1.700894672631151, "grad_norm": 0.28673309087753296, "learning_rate": 8.343331540550674e-06, "loss": 0.3609, "step": 16730 }, { "epoch": 1.7009963399755998, "grad_norm": 0.29940396547317505, "learning_rate": 8.343067652180344e-06, "loss": 0.3574, "step": 16731 }, { "epoch": 1.7010980073200488, "grad_norm": 0.31121787428855896, "learning_rate": 8.342803746968431e-06, "loss": 0.3429, "step": 16732 }, { "epoch": 1.7011996746644977, "grad_norm": 0.2933480441570282, "learning_rate": 8.342539824916272e-06, "loss": 0.3553, "step": 16733 }, { "epoch": 1.7013013420089467, "grad_norm": 0.29904454946517944, "learning_rate": 8.342275886025189e-06, "loss": 0.3742, "step": 16734 }, { "epoch": 1.7014030093533958, "grad_norm": 0.2709807753562927, "learning_rate": 8.342011930296514e-06, "loss": 0.3349, "step": 16735 }, { "epoch": 1.7015046766978448, "grad_norm": 0.29728788137435913, "learning_rate": 8.341747957731577e-06, "loss": 0.3368, "step": 16736 }, { "epoch": 1.7016063440422937, "grad_norm": 0.2641860842704773, "learning_rate": 8.341483968331707e-06, "loss": 0.3584, "step": 16737 }, { "epoch": 1.7017080113867427, "grad_norm": 0.2888142764568329, "learning_rate": 8.341219962098237e-06, "loss": 0.3797, "step": 16738 }, { "epoch": 1.7018096787311916, "grad_norm": 0.26858407258987427, "learning_rate": 8.340955939032495e-06, "loss": 0.3521, "step": 16739 }, { "epoch": 1.7019113460756405, "grad_norm": 0.27943235635757446, "learning_rate": 8.34069189913581e-06, "loss": 0.3675, "step": 16740 }, { "epoch": 1.7020130134200895, "grad_norm": 0.29898688197135925, "learning_rate": 8.340427842409512e-06, "loss": 0.3879, "step": 16741 }, { "epoch": 1.7021146807645384, "grad_norm": 0.26782336831092834, "learning_rate": 8.340163768854934e-06, "loss": 0.3568, "step": 16742 }, { "epoch": 1.7022163481089874, "grad_norm": 0.28062689304351807, "learning_rate": 8.339899678473404e-06, "loss": 0.3564, "step": 16743 }, { "epoch": 1.7023180154534363, "grad_norm": 0.2757355570793152, "learning_rate": 8.339635571266253e-06, "loss": 0.3383, "step": 16744 }, { "epoch": 1.7024196827978852, "grad_norm": 0.29878658056259155, "learning_rate": 8.339371447234812e-06, "loss": 0.3587, "step": 16745 }, { "epoch": 1.7025213501423342, "grad_norm": 0.2996719479560852, "learning_rate": 8.339107306380411e-06, "loss": 0.3551, "step": 16746 }, { "epoch": 1.7026230174867831, "grad_norm": 0.2802753746509552, "learning_rate": 8.33884314870438e-06, "loss": 0.332, "step": 16747 }, { "epoch": 1.702724684831232, "grad_norm": 0.26713815331459045, "learning_rate": 8.338578974208053e-06, "loss": 0.3572, "step": 16748 }, { "epoch": 1.702826352175681, "grad_norm": 0.27860376238822937, "learning_rate": 8.338314782892757e-06, "loss": 0.3474, "step": 16749 }, { "epoch": 1.7029280195201302, "grad_norm": 0.320276141166687, "learning_rate": 8.338050574759824e-06, "loss": 0.3891, "step": 16750 }, { "epoch": 1.7030296868645791, "grad_norm": 0.27988287806510925, "learning_rate": 8.337786349810585e-06, "loss": 0.3222, "step": 16751 }, { "epoch": 1.703131354209028, "grad_norm": 0.3133689761161804, "learning_rate": 8.337522108046374e-06, "loss": 0.3974, "step": 16752 }, { "epoch": 1.703233021553477, "grad_norm": 0.30776524543762207, "learning_rate": 8.337257849468517e-06, "loss": 0.3537, "step": 16753 }, { "epoch": 1.703334688897926, "grad_norm": 0.29316267371177673, "learning_rate": 8.33699357407835e-06, "loss": 0.3692, "step": 16754 }, { "epoch": 1.703436356242375, "grad_norm": 0.3037787973880768, "learning_rate": 8.336729281877202e-06, "loss": 0.3656, "step": 16755 }, { "epoch": 1.703538023586824, "grad_norm": 0.2749171257019043, "learning_rate": 8.336464972866404e-06, "loss": 0.3447, "step": 16756 }, { "epoch": 1.703639690931273, "grad_norm": 0.26945292949676514, "learning_rate": 8.336200647047288e-06, "loss": 0.3426, "step": 16757 }, { "epoch": 1.703741358275722, "grad_norm": 0.29382073879241943, "learning_rate": 8.335936304421188e-06, "loss": 0.3431, "step": 16758 }, { "epoch": 1.7038430256201709, "grad_norm": 0.3064476251602173, "learning_rate": 8.335671944989432e-06, "loss": 0.3384, "step": 16759 }, { "epoch": 1.7039446929646198, "grad_norm": 0.3021097183227539, "learning_rate": 8.335407568753353e-06, "loss": 0.3705, "step": 16760 }, { "epoch": 1.7040463603090688, "grad_norm": 0.3063882887363434, "learning_rate": 8.335143175714285e-06, "loss": 0.3372, "step": 16761 }, { "epoch": 1.7041480276535177, "grad_norm": 0.3000994026660919, "learning_rate": 8.334878765873556e-06, "loss": 0.3418, "step": 16762 }, { "epoch": 1.7042496949979666, "grad_norm": 0.2791503667831421, "learning_rate": 8.334614339232502e-06, "loss": 0.3339, "step": 16763 }, { "epoch": 1.7043513623424156, "grad_norm": 0.3007037043571472, "learning_rate": 8.334349895792453e-06, "loss": 0.385, "step": 16764 }, { "epoch": 1.7044530296868645, "grad_norm": 0.30424991250038147, "learning_rate": 8.334085435554742e-06, "loss": 0.3279, "step": 16765 }, { "epoch": 1.7045546970313135, "grad_norm": 0.28609707951545715, "learning_rate": 8.3338209585207e-06, "loss": 0.3479, "step": 16766 }, { "epoch": 1.7046563643757624, "grad_norm": 0.2986813485622406, "learning_rate": 8.33355646469166e-06, "loss": 0.3272, "step": 16767 }, { "epoch": 1.7047580317202113, "grad_norm": 0.297229528427124, "learning_rate": 8.333291954068956e-06, "loss": 0.3388, "step": 16768 }, { "epoch": 1.7048596990646603, "grad_norm": 0.27542951703071594, "learning_rate": 8.333027426653918e-06, "loss": 0.3486, "step": 16769 }, { "epoch": 1.7049613664091094, "grad_norm": 0.2784675061702728, "learning_rate": 8.33276288244788e-06, "loss": 0.3703, "step": 16770 }, { "epoch": 1.7050630337535584, "grad_norm": 0.3026205599308014, "learning_rate": 8.332498321452176e-06, "loss": 0.3648, "step": 16771 }, { "epoch": 1.7051647010980073, "grad_norm": 0.28083905577659607, "learning_rate": 8.332233743668136e-06, "loss": 0.3644, "step": 16772 }, { "epoch": 1.7052663684424563, "grad_norm": 0.2640278935432434, "learning_rate": 8.331969149097095e-06, "loss": 0.3467, "step": 16773 }, { "epoch": 1.7053680357869052, "grad_norm": 0.27752843499183655, "learning_rate": 8.331704537740384e-06, "loss": 0.3544, "step": 16774 }, { "epoch": 1.7054697031313544, "grad_norm": 0.3088788688182831, "learning_rate": 8.331439909599339e-06, "loss": 0.3413, "step": 16775 }, { "epoch": 1.7055713704758033, "grad_norm": 0.29838770627975464, "learning_rate": 8.331175264675291e-06, "loss": 0.347, "step": 16776 }, { "epoch": 1.7056730378202523, "grad_norm": 0.2798544764518738, "learning_rate": 8.330910602969573e-06, "loss": 0.3826, "step": 16777 }, { "epoch": 1.7057747051647012, "grad_norm": 0.3178085386753082, "learning_rate": 8.330645924483518e-06, "loss": 0.414, "step": 16778 }, { "epoch": 1.7058763725091501, "grad_norm": 0.3093695342540741, "learning_rate": 8.330381229218461e-06, "loss": 0.3505, "step": 16779 }, { "epoch": 1.705978039853599, "grad_norm": 0.30372604727745056, "learning_rate": 8.330116517175736e-06, "loss": 0.3747, "step": 16780 }, { "epoch": 1.706079707198048, "grad_norm": 0.2842494249343872, "learning_rate": 8.329851788356675e-06, "loss": 0.3763, "step": 16781 }, { "epoch": 1.706181374542497, "grad_norm": 0.2692715525627136, "learning_rate": 8.329587042762613e-06, "loss": 0.3159, "step": 16782 }, { "epoch": 1.706283041886946, "grad_norm": 0.28509947657585144, "learning_rate": 8.329322280394879e-06, "loss": 0.358, "step": 16783 }, { "epoch": 1.7063847092313948, "grad_norm": 0.2826198637485504, "learning_rate": 8.329057501254813e-06, "loss": 0.3619, "step": 16784 }, { "epoch": 1.7064863765758438, "grad_norm": 0.2750685214996338, "learning_rate": 8.328792705343746e-06, "loss": 0.369, "step": 16785 }, { "epoch": 1.7065880439202927, "grad_norm": 0.27931737899780273, "learning_rate": 8.328527892663012e-06, "loss": 0.3433, "step": 16786 }, { "epoch": 1.7066897112647417, "grad_norm": 0.29694193601608276, "learning_rate": 8.328263063213947e-06, "loss": 0.3833, "step": 16787 }, { "epoch": 1.7067913786091906, "grad_norm": 0.2842184007167816, "learning_rate": 8.327998216997883e-06, "loss": 0.349, "step": 16788 }, { "epoch": 1.7068930459536396, "grad_norm": 0.2635915279388428, "learning_rate": 8.327733354016155e-06, "loss": 0.3419, "step": 16789 }, { "epoch": 1.7069947132980885, "grad_norm": 0.28342053294181824, "learning_rate": 8.327468474270095e-06, "loss": 0.3396, "step": 16790 }, { "epoch": 1.7070963806425377, "grad_norm": 0.28368741273880005, "learning_rate": 8.327203577761043e-06, "loss": 0.3646, "step": 16791 }, { "epoch": 1.7071980479869866, "grad_norm": 0.2889004647731781, "learning_rate": 8.326938664490327e-06, "loss": 0.335, "step": 16792 }, { "epoch": 1.7072997153314355, "grad_norm": 0.2688051462173462, "learning_rate": 8.326673734459286e-06, "loss": 0.3641, "step": 16793 }, { "epoch": 1.7074013826758845, "grad_norm": 0.28474321961402893, "learning_rate": 8.326408787669254e-06, "loss": 0.3662, "step": 16794 }, { "epoch": 1.7075030500203334, "grad_norm": 0.2714368402957916, "learning_rate": 8.326143824121564e-06, "loss": 0.3291, "step": 16795 }, { "epoch": 1.7076047173647826, "grad_norm": 0.28809094429016113, "learning_rate": 8.325878843817552e-06, "loss": 0.3803, "step": 16796 }, { "epoch": 1.7077063847092315, "grad_norm": 0.2969205975532532, "learning_rate": 8.325613846758552e-06, "loss": 0.3648, "step": 16797 }, { "epoch": 1.7078080520536805, "grad_norm": 0.2733517289161682, "learning_rate": 8.3253488329459e-06, "loss": 0.373, "step": 16798 }, { "epoch": 1.7079097193981294, "grad_norm": 0.2646864950656891, "learning_rate": 8.32508380238093e-06, "loss": 0.3805, "step": 16799 }, { "epoch": 1.7080113867425784, "grad_norm": 0.27886345982551575, "learning_rate": 8.32481875506498e-06, "loss": 0.3743, "step": 16800 }, { "epoch": 1.7081130540870273, "grad_norm": 0.27346476912498474, "learning_rate": 8.324553690999383e-06, "loss": 0.385, "step": 16801 }, { "epoch": 1.7082147214314762, "grad_norm": 0.2727774977684021, "learning_rate": 8.324288610185474e-06, "loss": 0.3395, "step": 16802 }, { "epoch": 1.7083163887759252, "grad_norm": 0.278532475233078, "learning_rate": 8.324023512624589e-06, "loss": 0.3987, "step": 16803 }, { "epoch": 1.7084180561203741, "grad_norm": 0.3071083128452301, "learning_rate": 8.323758398318063e-06, "loss": 0.3671, "step": 16804 }, { "epoch": 1.708519723464823, "grad_norm": 0.2799898684024811, "learning_rate": 8.323493267267233e-06, "loss": 0.3576, "step": 16805 }, { "epoch": 1.708621390809272, "grad_norm": 0.2900581657886505, "learning_rate": 8.323228119473432e-06, "loss": 0.3565, "step": 16806 }, { "epoch": 1.708723058153721, "grad_norm": 0.28279411792755127, "learning_rate": 8.322962954937998e-06, "loss": 0.3699, "step": 16807 }, { "epoch": 1.7088247254981699, "grad_norm": 0.2956421971321106, "learning_rate": 8.322697773662267e-06, "loss": 0.3753, "step": 16808 }, { "epoch": 1.7089263928426188, "grad_norm": 0.2824190557003021, "learning_rate": 8.322432575647575e-06, "loss": 0.3446, "step": 16809 }, { "epoch": 1.7090280601870678, "grad_norm": 0.3031657636165619, "learning_rate": 8.322167360895256e-06, "loss": 0.35, "step": 16810 }, { "epoch": 1.709129727531517, "grad_norm": 0.2647504210472107, "learning_rate": 8.321902129406647e-06, "loss": 0.3465, "step": 16811 }, { "epoch": 1.7092313948759659, "grad_norm": 0.2947419583797455, "learning_rate": 8.321636881183087e-06, "loss": 0.3605, "step": 16812 }, { "epoch": 1.7093330622204148, "grad_norm": 0.25823405385017395, "learning_rate": 8.321371616225905e-06, "loss": 0.3825, "step": 16813 }, { "epoch": 1.7094347295648638, "grad_norm": 0.30009880661964417, "learning_rate": 8.321106334536446e-06, "loss": 0.3907, "step": 16814 }, { "epoch": 1.7095363969093127, "grad_norm": 0.2940240204334259, "learning_rate": 8.32084103611604e-06, "loss": 0.3469, "step": 16815 }, { "epoch": 1.7096380642537619, "grad_norm": 0.2923177182674408, "learning_rate": 8.320575720966028e-06, "loss": 0.3814, "step": 16816 }, { "epoch": 1.7097397315982108, "grad_norm": 0.30954504013061523, "learning_rate": 8.320310389087745e-06, "loss": 0.4095, "step": 16817 }, { "epoch": 1.7098413989426597, "grad_norm": 0.27928096055984497, "learning_rate": 8.320045040482525e-06, "loss": 0.3339, "step": 16818 }, { "epoch": 1.7099430662871087, "grad_norm": 0.2657064199447632, "learning_rate": 8.319779675151709e-06, "loss": 0.3414, "step": 16819 }, { "epoch": 1.7100447336315576, "grad_norm": 0.268314003944397, "learning_rate": 8.319514293096631e-06, "loss": 0.3695, "step": 16820 }, { "epoch": 1.7101464009760066, "grad_norm": 0.2965695858001709, "learning_rate": 8.31924889431863e-06, "loss": 0.3574, "step": 16821 }, { "epoch": 1.7102480683204555, "grad_norm": 0.28358209133148193, "learning_rate": 8.318983478819041e-06, "loss": 0.348, "step": 16822 }, { "epoch": 1.7103497356649044, "grad_norm": 0.2680949866771698, "learning_rate": 8.3187180465992e-06, "loss": 0.3141, "step": 16823 }, { "epoch": 1.7104514030093534, "grad_norm": 0.28368696570396423, "learning_rate": 8.31845259766045e-06, "loss": 0.3549, "step": 16824 }, { "epoch": 1.7105530703538023, "grad_norm": 0.27381059527397156, "learning_rate": 8.31818713200412e-06, "loss": 0.3751, "step": 16825 }, { "epoch": 1.7106547376982513, "grad_norm": 0.2742384970188141, "learning_rate": 8.317921649631556e-06, "loss": 0.361, "step": 16826 }, { "epoch": 1.7107564050427002, "grad_norm": 0.268679141998291, "learning_rate": 8.317656150544089e-06, "loss": 0.3405, "step": 16827 }, { "epoch": 1.7108580723871492, "grad_norm": 0.27617916464805603, "learning_rate": 8.317390634743059e-06, "loss": 0.3429, "step": 16828 }, { "epoch": 1.710959739731598, "grad_norm": 0.258383184671402, "learning_rate": 8.317125102229803e-06, "loss": 0.3726, "step": 16829 }, { "epoch": 1.711061407076047, "grad_norm": 0.265846312046051, "learning_rate": 8.316859553005659e-06, "loss": 0.351, "step": 16830 }, { "epoch": 1.711163074420496, "grad_norm": 0.28051623702049255, "learning_rate": 8.316593987071966e-06, "loss": 0.3485, "step": 16831 }, { "epoch": 1.7112647417649451, "grad_norm": 0.2627933919429779, "learning_rate": 8.316328404430059e-06, "loss": 0.3393, "step": 16832 }, { "epoch": 1.711366409109394, "grad_norm": 0.2786090075969696, "learning_rate": 8.316062805081278e-06, "loss": 0.3562, "step": 16833 }, { "epoch": 1.711468076453843, "grad_norm": 0.2738001346588135, "learning_rate": 8.31579718902696e-06, "loss": 0.3325, "step": 16834 }, { "epoch": 1.711569743798292, "grad_norm": 0.2830659747123718, "learning_rate": 8.315531556268446e-06, "loss": 0.3456, "step": 16835 }, { "epoch": 1.711671411142741, "grad_norm": 0.26522916555404663, "learning_rate": 8.31526590680707e-06, "loss": 0.3413, "step": 16836 }, { "epoch": 1.71177307848719, "grad_norm": 0.2859169542789459, "learning_rate": 8.315000240644174e-06, "loss": 0.3891, "step": 16837 }, { "epoch": 1.711874745831639, "grad_norm": 0.28500229120254517, "learning_rate": 8.314734557781092e-06, "loss": 0.3575, "step": 16838 }, { "epoch": 1.711976413176088, "grad_norm": 0.294119656085968, "learning_rate": 8.314468858219166e-06, "loss": 0.3525, "step": 16839 }, { "epoch": 1.712078080520537, "grad_norm": 0.2941887378692627, "learning_rate": 8.314203141959734e-06, "loss": 0.3766, "step": 16840 }, { "epoch": 1.7121797478649858, "grad_norm": 0.27946317195892334, "learning_rate": 8.313937409004134e-06, "loss": 0.3685, "step": 16841 }, { "epoch": 1.7122814152094348, "grad_norm": 0.25848111510276794, "learning_rate": 8.313671659353704e-06, "loss": 0.3474, "step": 16842 }, { "epoch": 1.7123830825538837, "grad_norm": 0.2827009856700897, "learning_rate": 8.313405893009785e-06, "loss": 0.3474, "step": 16843 }, { "epoch": 1.7124847498983327, "grad_norm": 0.3004346489906311, "learning_rate": 8.313140109973714e-06, "loss": 0.3781, "step": 16844 }, { "epoch": 1.7125864172427816, "grad_norm": 0.2709197700023651, "learning_rate": 8.31287431024683e-06, "loss": 0.375, "step": 16845 }, { "epoch": 1.7126880845872305, "grad_norm": 0.26310446858406067, "learning_rate": 8.312608493830473e-06, "loss": 0.3563, "step": 16846 }, { "epoch": 1.7127897519316795, "grad_norm": 0.2759237587451935, "learning_rate": 8.312342660725982e-06, "loss": 0.3511, "step": 16847 }, { "epoch": 1.7128914192761284, "grad_norm": 0.2972370982170105, "learning_rate": 8.312076810934696e-06, "loss": 0.3388, "step": 16848 }, { "epoch": 1.7129930866205774, "grad_norm": 0.2842060327529907, "learning_rate": 8.311810944457951e-06, "loss": 0.3497, "step": 16849 }, { "epoch": 1.7130947539650263, "grad_norm": 0.28313103318214417, "learning_rate": 8.311545061297092e-06, "loss": 0.3376, "step": 16850 }, { "epoch": 1.7131964213094752, "grad_norm": 0.2752721607685089, "learning_rate": 8.311279161453457e-06, "loss": 0.3668, "step": 16851 }, { "epoch": 1.7132980886539244, "grad_norm": 0.2910676896572113, "learning_rate": 8.311013244928384e-06, "loss": 0.3606, "step": 16852 }, { "epoch": 1.7133997559983734, "grad_norm": 0.2953077554702759, "learning_rate": 8.31074731172321e-06, "loss": 0.3606, "step": 16853 }, { "epoch": 1.7135014233428223, "grad_norm": 0.2984154522418976, "learning_rate": 8.310481361839283e-06, "loss": 0.375, "step": 16854 }, { "epoch": 1.7136030906872712, "grad_norm": 0.2928291857242584, "learning_rate": 8.310215395277933e-06, "loss": 0.364, "step": 16855 }, { "epoch": 1.7137047580317202, "grad_norm": 0.3029008209705353, "learning_rate": 8.309949412040506e-06, "loss": 0.3872, "step": 16856 }, { "epoch": 1.7138064253761693, "grad_norm": 0.27668002247810364, "learning_rate": 8.30968341212834e-06, "loss": 0.3667, "step": 16857 }, { "epoch": 1.7139080927206183, "grad_norm": 0.2644983232021332, "learning_rate": 8.309417395542777e-06, "loss": 0.3563, "step": 16858 }, { "epoch": 1.7140097600650672, "grad_norm": 0.29178741574287415, "learning_rate": 8.309151362285156e-06, "loss": 0.3637, "step": 16859 }, { "epoch": 1.7141114274095162, "grad_norm": 0.2876967489719391, "learning_rate": 8.308885312356813e-06, "loss": 0.3725, "step": 16860 }, { "epoch": 1.714213094753965, "grad_norm": 0.28029918670654297, "learning_rate": 8.308619245759095e-06, "loss": 0.388, "step": 16861 }, { "epoch": 1.714314762098414, "grad_norm": 0.30063125491142273, "learning_rate": 8.308353162493342e-06, "loss": 0.3576, "step": 16862 }, { "epoch": 1.714416429442863, "grad_norm": 0.2661796808242798, "learning_rate": 8.308087062560887e-06, "loss": 0.359, "step": 16863 }, { "epoch": 1.714518096787312, "grad_norm": 0.2653605043888092, "learning_rate": 8.307820945963077e-06, "loss": 0.3412, "step": 16864 }, { "epoch": 1.7146197641317609, "grad_norm": 0.3051205277442932, "learning_rate": 8.307554812701253e-06, "loss": 0.4139, "step": 16865 }, { "epoch": 1.7147214314762098, "grad_norm": 0.2598394751548767, "learning_rate": 8.307288662776752e-06, "loss": 0.3567, "step": 16866 }, { "epoch": 1.7148230988206588, "grad_norm": 0.27348196506500244, "learning_rate": 8.307022496190917e-06, "loss": 0.3784, "step": 16867 }, { "epoch": 1.7149247661651077, "grad_norm": 0.2716384828090668, "learning_rate": 8.306756312945089e-06, "loss": 0.3815, "step": 16868 }, { "epoch": 1.7150264335095566, "grad_norm": 0.26147547364234924, "learning_rate": 8.306490113040608e-06, "loss": 0.3844, "step": 16869 }, { "epoch": 1.7151281008540056, "grad_norm": 0.28062471747398376, "learning_rate": 8.306223896478816e-06, "loss": 0.3522, "step": 16870 }, { "epoch": 1.7152297681984545, "grad_norm": 0.2766568958759308, "learning_rate": 8.305957663261052e-06, "loss": 0.3605, "step": 16871 }, { "epoch": 1.7153314355429035, "grad_norm": 0.28798890113830566, "learning_rate": 8.305691413388661e-06, "loss": 0.3676, "step": 16872 }, { "epoch": 1.7154331028873526, "grad_norm": 0.27728039026260376, "learning_rate": 8.305425146862982e-06, "loss": 0.3764, "step": 16873 }, { "epoch": 1.7155347702318016, "grad_norm": 0.3020400404930115, "learning_rate": 8.305158863685356e-06, "loss": 0.3897, "step": 16874 }, { "epoch": 1.7156364375762505, "grad_norm": 0.2761053442955017, "learning_rate": 8.304892563857125e-06, "loss": 0.352, "step": 16875 }, { "epoch": 1.7157381049206994, "grad_norm": 0.2820659875869751, "learning_rate": 8.304626247379631e-06, "loss": 0.3409, "step": 16876 }, { "epoch": 1.7158397722651484, "grad_norm": 0.31004953384399414, "learning_rate": 8.304359914254215e-06, "loss": 0.3808, "step": 16877 }, { "epoch": 1.7159414396095976, "grad_norm": 0.2750070095062256, "learning_rate": 8.304093564482219e-06, "loss": 0.3558, "step": 16878 }, { "epoch": 1.7160431069540465, "grad_norm": 0.2916753590106964, "learning_rate": 8.303827198064984e-06, "loss": 0.3638, "step": 16879 }, { "epoch": 1.7161447742984954, "grad_norm": 0.2587517201900482, "learning_rate": 8.303560815003855e-06, "loss": 0.3648, "step": 16880 }, { "epoch": 1.7162464416429444, "grad_norm": 0.29723063111305237, "learning_rate": 8.303294415300169e-06, "loss": 0.3526, "step": 16881 }, { "epoch": 1.7163481089873933, "grad_norm": 0.3092286288738251, "learning_rate": 8.303027998955273e-06, "loss": 0.3762, "step": 16882 }, { "epoch": 1.7164497763318423, "grad_norm": 0.27315616607666016, "learning_rate": 8.302761565970505e-06, "loss": 0.3315, "step": 16883 }, { "epoch": 1.7165514436762912, "grad_norm": 0.2750701904296875, "learning_rate": 8.302495116347211e-06, "loss": 0.3471, "step": 16884 }, { "epoch": 1.7166531110207401, "grad_norm": 0.2931397557258606, "learning_rate": 8.30222865008673e-06, "loss": 0.3629, "step": 16885 }, { "epoch": 1.716754778365189, "grad_norm": 0.2863004505634308, "learning_rate": 8.301962167190406e-06, "loss": 0.3618, "step": 16886 }, { "epoch": 1.716856445709638, "grad_norm": 0.29890426993370056, "learning_rate": 8.301695667659582e-06, "loss": 0.3787, "step": 16887 }, { "epoch": 1.716958113054087, "grad_norm": 0.2853459119796753, "learning_rate": 8.3014291514956e-06, "loss": 0.3542, "step": 16888 }, { "epoch": 1.717059780398536, "grad_norm": 0.27826935052871704, "learning_rate": 8.3011626186998e-06, "loss": 0.364, "step": 16889 }, { "epoch": 1.7171614477429848, "grad_norm": 0.28014394640922546, "learning_rate": 8.30089606927353e-06, "loss": 0.3457, "step": 16890 }, { "epoch": 1.7172631150874338, "grad_norm": 0.2768082618713379, "learning_rate": 8.300629503218128e-06, "loss": 0.3351, "step": 16891 }, { "epoch": 1.7173647824318827, "grad_norm": 0.28423357009887695, "learning_rate": 8.30036292053494e-06, "loss": 0.3603, "step": 16892 }, { "epoch": 1.717466449776332, "grad_norm": 0.27934032678604126, "learning_rate": 8.300096321225308e-06, "loss": 0.3674, "step": 16893 }, { "epoch": 1.7175681171207808, "grad_norm": 0.28923577070236206, "learning_rate": 8.299829705290575e-06, "loss": 0.3345, "step": 16894 }, { "epoch": 1.7176697844652298, "grad_norm": 0.2957249879837036, "learning_rate": 8.299563072732083e-06, "loss": 0.3462, "step": 16895 }, { "epoch": 1.7177714518096787, "grad_norm": 0.2881259024143219, "learning_rate": 8.299296423551178e-06, "loss": 0.3636, "step": 16896 }, { "epoch": 1.7178731191541277, "grad_norm": 0.2832778990268707, "learning_rate": 8.299029757749199e-06, "loss": 0.3655, "step": 16897 }, { "epoch": 1.7179747864985768, "grad_norm": 0.28494539856910706, "learning_rate": 8.298763075327495e-06, "loss": 0.375, "step": 16898 }, { "epoch": 1.7180764538430258, "grad_norm": 0.2869209051132202, "learning_rate": 8.298496376287405e-06, "loss": 0.3605, "step": 16899 }, { "epoch": 1.7181781211874747, "grad_norm": 0.26179513335227966, "learning_rate": 8.298229660630275e-06, "loss": 0.3795, "step": 16900 }, { "epoch": 1.7182797885319236, "grad_norm": 0.27846333384513855, "learning_rate": 8.297962928357446e-06, "loss": 0.3722, "step": 16901 }, { "epoch": 1.7183814558763726, "grad_norm": 0.27325156331062317, "learning_rate": 8.297696179470265e-06, "loss": 0.3798, "step": 16902 }, { "epoch": 1.7184831232208215, "grad_norm": 0.2769671380519867, "learning_rate": 8.297429413970076e-06, "loss": 0.3524, "step": 16903 }, { "epoch": 1.7185847905652705, "grad_norm": 0.2881491184234619, "learning_rate": 8.297162631858218e-06, "loss": 0.3917, "step": 16904 }, { "epoch": 1.7186864579097194, "grad_norm": 0.26363518834114075, "learning_rate": 8.296895833136038e-06, "loss": 0.3457, "step": 16905 }, { "epoch": 1.7187881252541684, "grad_norm": 0.2806682586669922, "learning_rate": 8.296629017804882e-06, "loss": 0.3713, "step": 16906 }, { "epoch": 1.7188897925986173, "grad_norm": 0.2882317900657654, "learning_rate": 8.296362185866093e-06, "loss": 0.3842, "step": 16907 }, { "epoch": 1.7189914599430662, "grad_norm": 0.2989159822463989, "learning_rate": 8.296095337321013e-06, "loss": 0.3871, "step": 16908 }, { "epoch": 1.7190931272875152, "grad_norm": 0.294812947511673, "learning_rate": 8.295828472170987e-06, "loss": 0.3344, "step": 16909 }, { "epoch": 1.7191947946319641, "grad_norm": 0.2863336503505707, "learning_rate": 8.295561590417364e-06, "loss": 0.3741, "step": 16910 }, { "epoch": 1.719296461976413, "grad_norm": 0.29069769382476807, "learning_rate": 8.295294692061482e-06, "loss": 0.3828, "step": 16911 }, { "epoch": 1.719398129320862, "grad_norm": 0.27031949162483215, "learning_rate": 8.295027777104689e-06, "loss": 0.3838, "step": 16912 }, { "epoch": 1.719499796665311, "grad_norm": 0.2949613630771637, "learning_rate": 8.29476084554833e-06, "loss": 0.3876, "step": 16913 }, { "epoch": 1.71960146400976, "grad_norm": 0.2613096535205841, "learning_rate": 8.294493897393748e-06, "loss": 0.3576, "step": 16914 }, { "epoch": 1.719703131354209, "grad_norm": 0.2675130367279053, "learning_rate": 8.294226932642288e-06, "loss": 0.3783, "step": 16915 }, { "epoch": 1.719804798698658, "grad_norm": 0.2885277569293976, "learning_rate": 8.293959951295297e-06, "loss": 0.3877, "step": 16916 }, { "epoch": 1.719906466043107, "grad_norm": 0.2927490174770355, "learning_rate": 8.293692953354118e-06, "loss": 0.3538, "step": 16917 }, { "epoch": 1.7200081333875559, "grad_norm": 0.27177146077156067, "learning_rate": 8.293425938820096e-06, "loss": 0.3509, "step": 16918 }, { "epoch": 1.720109800732005, "grad_norm": 0.2748476564884186, "learning_rate": 8.293158907694578e-06, "loss": 0.3784, "step": 16919 }, { "epoch": 1.720211468076454, "grad_norm": 0.2785893380641937, "learning_rate": 8.292891859978906e-06, "loss": 0.3581, "step": 16920 }, { "epoch": 1.720313135420903, "grad_norm": 0.3042404353618622, "learning_rate": 8.292624795674429e-06, "loss": 0.3387, "step": 16921 }, { "epoch": 1.7204148027653519, "grad_norm": 0.2926573157310486, "learning_rate": 8.292357714782491e-06, "loss": 0.3765, "step": 16922 }, { "epoch": 1.7205164701098008, "grad_norm": 0.2537381649017334, "learning_rate": 8.292090617304436e-06, "loss": 0.343, "step": 16923 }, { "epoch": 1.7206181374542497, "grad_norm": 0.27071067690849304, "learning_rate": 8.291823503241612e-06, "loss": 0.3826, "step": 16924 }, { "epoch": 1.7207198047986987, "grad_norm": 0.27411872148513794, "learning_rate": 8.291556372595362e-06, "loss": 0.3526, "step": 16925 }, { "epoch": 1.7208214721431476, "grad_norm": 0.3034592866897583, "learning_rate": 8.291289225367034e-06, "loss": 0.3774, "step": 16926 }, { "epoch": 1.7209231394875966, "grad_norm": 0.2772270739078522, "learning_rate": 8.291022061557975e-06, "loss": 0.3431, "step": 16927 }, { "epoch": 1.7210248068320455, "grad_norm": 0.29418638348579407, "learning_rate": 8.290754881169525e-06, "loss": 0.3531, "step": 16928 }, { "epoch": 1.7211264741764944, "grad_norm": 0.3479117155075073, "learning_rate": 8.290487684203036e-06, "loss": 0.3831, "step": 16929 }, { "epoch": 1.7212281415209434, "grad_norm": 0.2899363338947296, "learning_rate": 8.290220470659851e-06, "loss": 0.3201, "step": 16930 }, { "epoch": 1.7213298088653923, "grad_norm": 0.29503169655799866, "learning_rate": 8.28995324054132e-06, "loss": 0.348, "step": 16931 }, { "epoch": 1.7214314762098413, "grad_norm": 0.30012452602386475, "learning_rate": 8.289685993848782e-06, "loss": 0.3608, "step": 16932 }, { "epoch": 1.7215331435542902, "grad_norm": 0.3037393093109131, "learning_rate": 8.289418730583591e-06, "loss": 0.3488, "step": 16933 }, { "epoch": 1.7216348108987394, "grad_norm": 0.27284714579582214, "learning_rate": 8.289151450747088e-06, "loss": 0.3517, "step": 16934 }, { "epoch": 1.7217364782431883, "grad_norm": 0.28849533200263977, "learning_rate": 8.288884154340624e-06, "loss": 0.382, "step": 16935 }, { "epoch": 1.7218381455876373, "grad_norm": 0.2852766811847687, "learning_rate": 8.288616841365542e-06, "loss": 0.3571, "step": 16936 }, { "epoch": 1.7219398129320862, "grad_norm": 0.3054325580596924, "learning_rate": 8.28834951182319e-06, "loss": 0.3608, "step": 16937 }, { "epoch": 1.7220414802765351, "grad_norm": 0.29768088459968567, "learning_rate": 8.288082165714915e-06, "loss": 0.3558, "step": 16938 }, { "epoch": 1.7221431476209843, "grad_norm": 0.288679301738739, "learning_rate": 8.287814803042061e-06, "loss": 0.3448, "step": 16939 }, { "epoch": 1.7222448149654332, "grad_norm": 0.2629259526729584, "learning_rate": 8.28754742380598e-06, "loss": 0.3807, "step": 16940 }, { "epoch": 1.7223464823098822, "grad_norm": 0.30999624729156494, "learning_rate": 8.287280028008015e-06, "loss": 0.3551, "step": 16941 }, { "epoch": 1.7224481496543311, "grad_norm": 0.2851434350013733, "learning_rate": 8.287012615649514e-06, "loss": 0.3478, "step": 16942 }, { "epoch": 1.72254981699878, "grad_norm": 0.27756398916244507, "learning_rate": 8.286745186731826e-06, "loss": 0.3591, "step": 16943 }, { "epoch": 1.722651484343229, "grad_norm": 0.2764637768268585, "learning_rate": 8.286477741256296e-06, "loss": 0.3745, "step": 16944 }, { "epoch": 1.722753151687678, "grad_norm": 0.28294074535369873, "learning_rate": 8.286210279224273e-06, "loss": 0.3541, "step": 16945 }, { "epoch": 1.722854819032127, "grad_norm": 0.27360475063323975, "learning_rate": 8.285942800637102e-06, "loss": 0.3594, "step": 16946 }, { "epoch": 1.7229564863765758, "grad_norm": 0.27201011776924133, "learning_rate": 8.285675305496133e-06, "loss": 0.3367, "step": 16947 }, { "epoch": 1.7230581537210248, "grad_norm": 0.30351522564888, "learning_rate": 8.285407793802713e-06, "loss": 0.3785, "step": 16948 }, { "epoch": 1.7231598210654737, "grad_norm": 0.2711757719516754, "learning_rate": 8.285140265558187e-06, "loss": 0.3445, "step": 16949 }, { "epoch": 1.7232614884099227, "grad_norm": 0.2919308841228485, "learning_rate": 8.284872720763909e-06, "loss": 0.3713, "step": 16950 }, { "epoch": 1.7233631557543716, "grad_norm": 0.28121963143348694, "learning_rate": 8.28460515942122e-06, "loss": 0.3306, "step": 16951 }, { "epoch": 1.7234648230988205, "grad_norm": 0.3041607141494751, "learning_rate": 8.284337581531472e-06, "loss": 0.3631, "step": 16952 }, { "epoch": 1.7235664904432695, "grad_norm": 0.26556912064552307, "learning_rate": 8.284069987096011e-06, "loss": 0.3569, "step": 16953 }, { "epoch": 1.7236681577877184, "grad_norm": 0.27936214208602905, "learning_rate": 8.283802376116185e-06, "loss": 0.3341, "step": 16954 }, { "epoch": 1.7237698251321676, "grad_norm": 0.2732757031917572, "learning_rate": 8.283534748593344e-06, "loss": 0.3525, "step": 16955 }, { "epoch": 1.7238714924766165, "grad_norm": 0.26022160053253174, "learning_rate": 8.283267104528837e-06, "loss": 0.3676, "step": 16956 }, { "epoch": 1.7239731598210655, "grad_norm": 0.261890172958374, "learning_rate": 8.282999443924007e-06, "loss": 0.3533, "step": 16957 }, { "epoch": 1.7240748271655144, "grad_norm": 0.2901664972305298, "learning_rate": 8.282731766780207e-06, "loss": 0.364, "step": 16958 }, { "epoch": 1.7241764945099634, "grad_norm": 0.2580564618110657, "learning_rate": 8.282464073098786e-06, "loss": 0.3785, "step": 16959 }, { "epoch": 1.7242781618544125, "grad_norm": 0.3180593252182007, "learning_rate": 8.282196362881089e-06, "loss": 0.3904, "step": 16960 }, { "epoch": 1.7243798291988615, "grad_norm": 0.2585071921348572, "learning_rate": 8.281928636128468e-06, "loss": 0.3458, "step": 16961 }, { "epoch": 1.7244814965433104, "grad_norm": 0.2737637758255005, "learning_rate": 8.28166089284227e-06, "loss": 0.3518, "step": 16962 }, { "epoch": 1.7245831638877593, "grad_norm": 0.2586846351623535, "learning_rate": 8.281393133023845e-06, "loss": 0.3412, "step": 16963 }, { "epoch": 1.7246848312322083, "grad_norm": 0.299437552690506, "learning_rate": 8.28112535667454e-06, "loss": 0.3431, "step": 16964 }, { "epoch": 1.7247864985766572, "grad_norm": 0.28589075803756714, "learning_rate": 8.280857563795707e-06, "loss": 0.3843, "step": 16965 }, { "epoch": 1.7248881659211062, "grad_norm": 0.2734379470348358, "learning_rate": 8.280589754388693e-06, "loss": 0.3495, "step": 16966 }, { "epoch": 1.724989833265555, "grad_norm": 0.261275053024292, "learning_rate": 8.280321928454847e-06, "loss": 0.3497, "step": 16967 }, { "epoch": 1.725091500610004, "grad_norm": 0.2670762538909912, "learning_rate": 8.280054085995518e-06, "loss": 0.3453, "step": 16968 }, { "epoch": 1.725193167954453, "grad_norm": 0.27684712409973145, "learning_rate": 8.279786227012055e-06, "loss": 0.4015, "step": 16969 }, { "epoch": 1.725294835298902, "grad_norm": 0.2726144790649414, "learning_rate": 8.279518351505809e-06, "loss": 0.3351, "step": 16970 }, { "epoch": 1.7253965026433509, "grad_norm": 0.2785806953907013, "learning_rate": 8.27925045947813e-06, "loss": 0.3469, "step": 16971 }, { "epoch": 1.7254981699877998, "grad_norm": 0.24473340809345245, "learning_rate": 8.278982550930365e-06, "loss": 0.3478, "step": 16972 }, { "epoch": 1.7255998373322488, "grad_norm": 0.2693031132221222, "learning_rate": 8.278714625863867e-06, "loss": 0.3487, "step": 16973 }, { "epoch": 1.7257015046766977, "grad_norm": 0.27715587615966797, "learning_rate": 8.278446684279982e-06, "loss": 0.366, "step": 16974 }, { "epoch": 1.7258031720211469, "grad_norm": 0.2866440713405609, "learning_rate": 8.278178726180063e-06, "loss": 0.3522, "step": 16975 }, { "epoch": 1.7259048393655958, "grad_norm": 0.2672497630119324, "learning_rate": 8.27791075156546e-06, "loss": 0.39, "step": 16976 }, { "epoch": 1.7260065067100447, "grad_norm": 0.2681318521499634, "learning_rate": 8.277642760437518e-06, "loss": 0.3466, "step": 16977 }, { "epoch": 1.7261081740544937, "grad_norm": 0.26705682277679443, "learning_rate": 8.277374752797593e-06, "loss": 0.3756, "step": 16978 }, { "epoch": 1.7262098413989426, "grad_norm": 0.26572567224502563, "learning_rate": 8.27710672864703e-06, "loss": 0.3572, "step": 16979 }, { "epoch": 1.7263115087433918, "grad_norm": 0.26925861835479736, "learning_rate": 8.276838687987186e-06, "loss": 0.3474, "step": 16980 }, { "epoch": 1.7264131760878407, "grad_norm": 0.25912871956825256, "learning_rate": 8.276570630819404e-06, "loss": 0.3884, "step": 16981 }, { "epoch": 1.7265148434322897, "grad_norm": 0.3195558190345764, "learning_rate": 8.276302557145041e-06, "loss": 0.3892, "step": 16982 }, { "epoch": 1.7266165107767386, "grad_norm": 0.28901270031929016, "learning_rate": 8.276034466965442e-06, "loss": 0.3427, "step": 16983 }, { "epoch": 1.7267181781211876, "grad_norm": 0.2909621298313141, "learning_rate": 8.27576636028196e-06, "loss": 0.3358, "step": 16984 }, { "epoch": 1.7268198454656365, "grad_norm": 0.24724964797496796, "learning_rate": 8.275498237095947e-06, "loss": 0.3479, "step": 16985 }, { "epoch": 1.7269215128100854, "grad_norm": 0.28176620602607727, "learning_rate": 8.27523009740875e-06, "loss": 0.3325, "step": 16986 }, { "epoch": 1.7270231801545344, "grad_norm": 0.30270010232925415, "learning_rate": 8.274961941221725e-06, "loss": 0.3464, "step": 16987 }, { "epoch": 1.7271248474989833, "grad_norm": 0.3128783106803894, "learning_rate": 8.274693768536218e-06, "loss": 0.3637, "step": 16988 }, { "epoch": 1.7272265148434323, "grad_norm": 0.28726232051849365, "learning_rate": 8.274425579353582e-06, "loss": 0.3694, "step": 16989 }, { "epoch": 1.7273281821878812, "grad_norm": 0.29021960496902466, "learning_rate": 8.27415737367517e-06, "loss": 0.375, "step": 16990 }, { "epoch": 1.7274298495323301, "grad_norm": 0.2871814966201782, "learning_rate": 8.273889151502328e-06, "loss": 0.3714, "step": 16991 }, { "epoch": 1.727531516876779, "grad_norm": 0.2982734739780426, "learning_rate": 8.273620912836412e-06, "loss": 0.3781, "step": 16992 }, { "epoch": 1.727633184221228, "grad_norm": 0.2940289378166199, "learning_rate": 8.273352657678772e-06, "loss": 0.3798, "step": 16993 }, { "epoch": 1.727734851565677, "grad_norm": 0.2945384979248047, "learning_rate": 8.27308438603076e-06, "loss": 0.3941, "step": 16994 }, { "epoch": 1.727836518910126, "grad_norm": 0.3042204678058624, "learning_rate": 8.272816097893726e-06, "loss": 0.3794, "step": 16995 }, { "epoch": 1.727938186254575, "grad_norm": 0.2637641727924347, "learning_rate": 8.272547793269021e-06, "loss": 0.3587, "step": 16996 }, { "epoch": 1.728039853599024, "grad_norm": 0.2778112292289734, "learning_rate": 8.272279472158e-06, "loss": 0.3716, "step": 16997 }, { "epoch": 1.728141520943473, "grad_norm": 0.26319581270217896, "learning_rate": 8.27201113456201e-06, "loss": 0.3607, "step": 16998 }, { "epoch": 1.728243188287922, "grad_norm": 0.26419246196746826, "learning_rate": 8.271742780482408e-06, "loss": 0.3658, "step": 16999 }, { "epoch": 1.7283448556323708, "grad_norm": 0.28741687536239624, "learning_rate": 8.271474409920542e-06, "loss": 0.3746, "step": 17000 }, { "epoch": 1.72844652297682, "grad_norm": 0.3059135377407074, "learning_rate": 8.271206022877765e-06, "loss": 0.3808, "step": 17001 }, { "epoch": 1.728548190321269, "grad_norm": 0.29505807161331177, "learning_rate": 8.27093761935543e-06, "loss": 0.3472, "step": 17002 }, { "epoch": 1.7286498576657179, "grad_norm": 0.2864242196083069, "learning_rate": 8.270669199354888e-06, "loss": 0.3658, "step": 17003 }, { "epoch": 1.7287515250101668, "grad_norm": 0.2594144642353058, "learning_rate": 8.270400762877493e-06, "loss": 0.3518, "step": 17004 }, { "epoch": 1.7288531923546158, "grad_norm": 0.3137771487236023, "learning_rate": 8.270132309924595e-06, "loss": 0.3878, "step": 17005 }, { "epoch": 1.7289548596990647, "grad_norm": 0.26236945390701294, "learning_rate": 8.26986384049755e-06, "loss": 0.381, "step": 17006 }, { "epoch": 1.7290565270435136, "grad_norm": 0.26970916986465454, "learning_rate": 8.269595354597705e-06, "loss": 0.3591, "step": 17007 }, { "epoch": 1.7291581943879626, "grad_norm": 0.280049204826355, "learning_rate": 8.269326852226416e-06, "loss": 0.3748, "step": 17008 }, { "epoch": 1.7292598617324115, "grad_norm": 0.27075451612472534, "learning_rate": 8.269058333385036e-06, "loss": 0.3412, "step": 17009 }, { "epoch": 1.7293615290768605, "grad_norm": 0.27323877811431885, "learning_rate": 8.268789798074917e-06, "loss": 0.3347, "step": 17010 }, { "epoch": 1.7294631964213094, "grad_norm": 0.267787903547287, "learning_rate": 8.268521246297413e-06, "loss": 0.3285, "step": 17011 }, { "epoch": 1.7295648637657584, "grad_norm": 0.2836796045303345, "learning_rate": 8.268252678053874e-06, "loss": 0.3604, "step": 17012 }, { "epoch": 1.7296665311102073, "grad_norm": 0.2970920205116272, "learning_rate": 8.267984093345654e-06, "loss": 0.3677, "step": 17013 }, { "epoch": 1.7297681984546562, "grad_norm": 0.27447670698165894, "learning_rate": 8.267715492174108e-06, "loss": 0.3536, "step": 17014 }, { "epoch": 1.7298698657991052, "grad_norm": 0.3045895993709564, "learning_rate": 8.267446874540589e-06, "loss": 0.3721, "step": 17015 }, { "epoch": 1.7299715331435543, "grad_norm": 0.28225505352020264, "learning_rate": 8.267178240446446e-06, "loss": 0.376, "step": 17016 }, { "epoch": 1.7300732004880033, "grad_norm": 0.27098339796066284, "learning_rate": 8.266909589893038e-06, "loss": 0.3669, "step": 17017 }, { "epoch": 1.7301748678324522, "grad_norm": 0.2658153772354126, "learning_rate": 8.266640922881714e-06, "loss": 0.3635, "step": 17018 }, { "epoch": 1.7302765351769012, "grad_norm": 0.30194932222366333, "learning_rate": 8.266372239413831e-06, "loss": 0.379, "step": 17019 }, { "epoch": 1.73037820252135, "grad_norm": 0.28462496399879456, "learning_rate": 8.26610353949074e-06, "loss": 0.3734, "step": 17020 }, { "epoch": 1.7304798698657993, "grad_norm": 0.28016775846481323, "learning_rate": 8.265834823113795e-06, "loss": 0.3759, "step": 17021 }, { "epoch": 1.7305815372102482, "grad_norm": 0.2877216339111328, "learning_rate": 8.265566090284352e-06, "loss": 0.3433, "step": 17022 }, { "epoch": 1.7306832045546972, "grad_norm": 0.2844841778278351, "learning_rate": 8.265297341003762e-06, "loss": 0.3586, "step": 17023 }, { "epoch": 1.730784871899146, "grad_norm": 0.2735595703125, "learning_rate": 8.26502857527338e-06, "loss": 0.3742, "step": 17024 }, { "epoch": 1.730886539243595, "grad_norm": 0.28821200132369995, "learning_rate": 8.264759793094559e-06, "loss": 0.3736, "step": 17025 }, { "epoch": 1.730988206588044, "grad_norm": 0.2719022035598755, "learning_rate": 8.264490994468655e-06, "loss": 0.3384, "step": 17026 }, { "epoch": 1.731089873932493, "grad_norm": 0.29244789481163025, "learning_rate": 8.264222179397021e-06, "loss": 0.353, "step": 17027 }, { "epoch": 1.7311915412769419, "grad_norm": 0.3040948808193207, "learning_rate": 8.263953347881011e-06, "loss": 0.3765, "step": 17028 }, { "epoch": 1.7312932086213908, "grad_norm": 0.32979312539100647, "learning_rate": 8.26368449992198e-06, "loss": 0.3791, "step": 17029 }, { "epoch": 1.7313948759658397, "grad_norm": 0.2945859432220459, "learning_rate": 8.26341563552128e-06, "loss": 0.3453, "step": 17030 }, { "epoch": 1.7314965433102887, "grad_norm": 0.2856007218360901, "learning_rate": 8.26314675468027e-06, "loss": 0.3408, "step": 17031 }, { "epoch": 1.7315982106547376, "grad_norm": 0.2593441903591156, "learning_rate": 8.262877857400302e-06, "loss": 0.3514, "step": 17032 }, { "epoch": 1.7316998779991866, "grad_norm": 0.270088255405426, "learning_rate": 8.262608943682732e-06, "loss": 0.3651, "step": 17033 }, { "epoch": 1.7318015453436355, "grad_norm": 0.2678025960922241, "learning_rate": 8.262340013528912e-06, "loss": 0.3583, "step": 17034 }, { "epoch": 1.7319032126880844, "grad_norm": 0.2610210180282593, "learning_rate": 8.262071066940196e-06, "loss": 0.3328, "step": 17035 }, { "epoch": 1.7320048800325334, "grad_norm": 0.28151071071624756, "learning_rate": 8.261802103917944e-06, "loss": 0.3704, "step": 17036 }, { "epoch": 1.7321065473769826, "grad_norm": 0.2693970203399658, "learning_rate": 8.261533124463508e-06, "loss": 0.3549, "step": 17037 }, { "epoch": 1.7322082147214315, "grad_norm": 0.2817913293838501, "learning_rate": 8.261264128578244e-06, "loss": 0.3702, "step": 17038 }, { "epoch": 1.7323098820658804, "grad_norm": 0.2695510685443878, "learning_rate": 8.260995116263504e-06, "loss": 0.3654, "step": 17039 }, { "epoch": 1.7324115494103294, "grad_norm": 0.30566269159317017, "learning_rate": 8.260726087520645e-06, "loss": 0.3375, "step": 17040 }, { "epoch": 1.7325132167547783, "grad_norm": 0.27947551012039185, "learning_rate": 8.260457042351026e-06, "loss": 0.3485, "step": 17041 }, { "epoch": 1.7326148840992275, "grad_norm": 0.3052970767021179, "learning_rate": 8.260187980755997e-06, "loss": 0.3776, "step": 17042 }, { "epoch": 1.7327165514436764, "grad_norm": 0.2837497591972351, "learning_rate": 8.259918902736917e-06, "loss": 0.3643, "step": 17043 }, { "epoch": 1.7328182187881254, "grad_norm": 0.29214733839035034, "learning_rate": 8.259649808295138e-06, "loss": 0.3499, "step": 17044 }, { "epoch": 1.7329198861325743, "grad_norm": 0.2639951705932617, "learning_rate": 8.259380697432019e-06, "loss": 0.3493, "step": 17045 }, { "epoch": 1.7330215534770232, "grad_norm": 0.28318098187446594, "learning_rate": 8.259111570148916e-06, "loss": 0.3614, "step": 17046 }, { "epoch": 1.7331232208214722, "grad_norm": 0.26967424154281616, "learning_rate": 8.258842426447182e-06, "loss": 0.3586, "step": 17047 }, { "epoch": 1.7332248881659211, "grad_norm": 0.2857245206832886, "learning_rate": 8.258573266328174e-06, "loss": 0.3772, "step": 17048 }, { "epoch": 1.73332655551037, "grad_norm": 0.28825652599334717, "learning_rate": 8.258304089793249e-06, "loss": 0.3539, "step": 17049 }, { "epoch": 1.733428222854819, "grad_norm": 0.29305773973464966, "learning_rate": 8.258034896843762e-06, "loss": 0.3467, "step": 17050 }, { "epoch": 1.733529890199268, "grad_norm": 0.29466280341148376, "learning_rate": 8.25776568748107e-06, "loss": 0.3802, "step": 17051 }, { "epoch": 1.733631557543717, "grad_norm": 0.2703191637992859, "learning_rate": 8.257496461706527e-06, "loss": 0.3274, "step": 17052 }, { "epoch": 1.7337332248881658, "grad_norm": 0.27534884214401245, "learning_rate": 8.257227219521494e-06, "loss": 0.3435, "step": 17053 }, { "epoch": 1.7338348922326148, "grad_norm": 0.26676145195961, "learning_rate": 8.256957960927321e-06, "loss": 0.3533, "step": 17054 }, { "epoch": 1.7339365595770637, "grad_norm": 0.30626043677330017, "learning_rate": 8.256688685925369e-06, "loss": 0.3505, "step": 17055 }, { "epoch": 1.7340382269215127, "grad_norm": 0.2934536933898926, "learning_rate": 8.256419394516994e-06, "loss": 0.3609, "step": 17056 }, { "epoch": 1.7341398942659618, "grad_norm": 0.27501076459884644, "learning_rate": 8.256150086703552e-06, "loss": 0.3609, "step": 17057 }, { "epoch": 1.7342415616104108, "grad_norm": 0.2654562294483185, "learning_rate": 8.255880762486398e-06, "loss": 0.3533, "step": 17058 }, { "epoch": 1.7343432289548597, "grad_norm": 0.29021957516670227, "learning_rate": 8.255611421866892e-06, "loss": 0.3454, "step": 17059 }, { "epoch": 1.7344448962993086, "grad_norm": 0.25514644384384155, "learning_rate": 8.255342064846388e-06, "loss": 0.3416, "step": 17060 }, { "epoch": 1.7345465636437576, "grad_norm": 0.2736089527606964, "learning_rate": 8.255072691426244e-06, "loss": 0.3294, "step": 17061 }, { "epoch": 1.7346482309882068, "grad_norm": 0.2887352406978607, "learning_rate": 8.254803301607818e-06, "loss": 0.3872, "step": 17062 }, { "epoch": 1.7347498983326557, "grad_norm": 0.28164437413215637, "learning_rate": 8.25453389539247e-06, "loss": 0.3596, "step": 17063 }, { "epoch": 1.7348515656771046, "grad_norm": 0.2618791162967682, "learning_rate": 8.25426447278155e-06, "loss": 0.352, "step": 17064 }, { "epoch": 1.7349532330215536, "grad_norm": 0.29017093777656555, "learning_rate": 8.253995033776419e-06, "loss": 0.3735, "step": 17065 }, { "epoch": 1.7350549003660025, "grad_norm": 0.275936096906662, "learning_rate": 8.253725578378434e-06, "loss": 0.3553, "step": 17066 }, { "epoch": 1.7351565677104515, "grad_norm": 0.29757872223854065, "learning_rate": 8.253456106588954e-06, "loss": 0.3701, "step": 17067 }, { "epoch": 1.7352582350549004, "grad_norm": 0.2602056860923767, "learning_rate": 8.253186618409333e-06, "loss": 0.3768, "step": 17068 }, { "epoch": 1.7353599023993493, "grad_norm": 0.28943145275115967, "learning_rate": 8.252917113840932e-06, "loss": 0.3529, "step": 17069 }, { "epoch": 1.7354615697437983, "grad_norm": 0.2932639718055725, "learning_rate": 8.252647592885109e-06, "loss": 0.33, "step": 17070 }, { "epoch": 1.7355632370882472, "grad_norm": 0.2833607792854309, "learning_rate": 8.25237805554322e-06, "loss": 0.3457, "step": 17071 }, { "epoch": 1.7356649044326962, "grad_norm": 0.3039880096912384, "learning_rate": 8.252108501816623e-06, "loss": 0.3604, "step": 17072 }, { "epoch": 1.735766571777145, "grad_norm": 0.27534568309783936, "learning_rate": 8.251838931706676e-06, "loss": 0.3703, "step": 17073 }, { "epoch": 1.735868239121594, "grad_norm": 0.29207366704940796, "learning_rate": 8.251569345214736e-06, "loss": 0.349, "step": 17074 }, { "epoch": 1.735969906466043, "grad_norm": 0.28794968128204346, "learning_rate": 8.251299742342164e-06, "loss": 0.3746, "step": 17075 }, { "epoch": 1.736071573810492, "grad_norm": 0.269451379776001, "learning_rate": 8.251030123090316e-06, "loss": 0.3535, "step": 17076 }, { "epoch": 1.7361732411549409, "grad_norm": 0.28430813550949097, "learning_rate": 8.250760487460549e-06, "loss": 0.3855, "step": 17077 }, { "epoch": 1.73627490849939, "grad_norm": 0.2895045280456543, "learning_rate": 8.250490835454226e-06, "loss": 0.3426, "step": 17078 }, { "epoch": 1.736376575843839, "grad_norm": 0.3176672160625458, "learning_rate": 8.250221167072702e-06, "loss": 0.4017, "step": 17079 }, { "epoch": 1.736478243188288, "grad_norm": 0.26961857080459595, "learning_rate": 8.249951482317335e-06, "loss": 0.4152, "step": 17080 }, { "epoch": 1.7365799105327369, "grad_norm": 0.2915196418762207, "learning_rate": 8.249681781189486e-06, "loss": 0.3879, "step": 17081 }, { "epoch": 1.7366815778771858, "grad_norm": 0.2665242552757263, "learning_rate": 8.249412063690512e-06, "loss": 0.3408, "step": 17082 }, { "epoch": 1.736783245221635, "grad_norm": 0.2816702425479889, "learning_rate": 8.24914232982177e-06, "loss": 0.3374, "step": 17083 }, { "epoch": 1.736884912566084, "grad_norm": 0.26453545689582825, "learning_rate": 8.248872579584625e-06, "loss": 0.3553, "step": 17084 }, { "epoch": 1.7369865799105328, "grad_norm": 0.2702883780002594, "learning_rate": 8.24860281298043e-06, "loss": 0.361, "step": 17085 }, { "epoch": 1.7370882472549818, "grad_norm": 0.2831117808818817, "learning_rate": 8.248333030010546e-06, "loss": 0.3688, "step": 17086 }, { "epoch": 1.7371899145994307, "grad_norm": 0.2854655683040619, "learning_rate": 8.248063230676332e-06, "loss": 0.3441, "step": 17087 }, { "epoch": 1.7372915819438797, "grad_norm": 0.2904248535633087, "learning_rate": 8.247793414979149e-06, "loss": 0.3398, "step": 17088 }, { "epoch": 1.7373932492883286, "grad_norm": 0.2629278004169464, "learning_rate": 8.247523582920351e-06, "loss": 0.3594, "step": 17089 }, { "epoch": 1.7374949166327776, "grad_norm": 0.27586859464645386, "learning_rate": 8.247253734501304e-06, "loss": 0.3861, "step": 17090 }, { "epoch": 1.7375965839772265, "grad_norm": 0.3061559200286865, "learning_rate": 8.246983869723363e-06, "loss": 0.3537, "step": 17091 }, { "epoch": 1.7376982513216754, "grad_norm": 0.2813096344470978, "learning_rate": 8.24671398858789e-06, "loss": 0.3678, "step": 17092 }, { "epoch": 1.7377999186661244, "grad_norm": 0.25385168194770813, "learning_rate": 8.246444091096242e-06, "loss": 0.3368, "step": 17093 }, { "epoch": 1.7379015860105733, "grad_norm": 0.2782873213291168, "learning_rate": 8.24617417724978e-06, "loss": 0.3651, "step": 17094 }, { "epoch": 1.7380032533550223, "grad_norm": 0.28611987829208374, "learning_rate": 8.245904247049866e-06, "loss": 0.3479, "step": 17095 }, { "epoch": 1.7381049206994712, "grad_norm": 0.2967594563961029, "learning_rate": 8.245634300497856e-06, "loss": 0.374, "step": 17096 }, { "epoch": 1.7382065880439201, "grad_norm": 0.2919350564479828, "learning_rate": 8.245364337595112e-06, "loss": 0.3478, "step": 17097 }, { "epoch": 1.7383082553883693, "grad_norm": 0.26472148299217224, "learning_rate": 8.245094358342993e-06, "loss": 0.3747, "step": 17098 }, { "epoch": 1.7384099227328182, "grad_norm": 0.2918909788131714, "learning_rate": 8.244824362742863e-06, "loss": 0.3472, "step": 17099 }, { "epoch": 1.7385115900772672, "grad_norm": 0.3202909529209137, "learning_rate": 8.244554350796076e-06, "loss": 0.3858, "step": 17100 }, { "epoch": 1.7386132574217161, "grad_norm": 0.30243462324142456, "learning_rate": 8.244284322503994e-06, "loss": 0.3709, "step": 17101 }, { "epoch": 1.738714924766165, "grad_norm": 0.2600765824317932, "learning_rate": 8.24401427786798e-06, "loss": 0.321, "step": 17102 }, { "epoch": 1.7388165921106142, "grad_norm": 0.2887658178806305, "learning_rate": 8.243744216889394e-06, "loss": 0.3282, "step": 17103 }, { "epoch": 1.7389182594550632, "grad_norm": 0.32007744908332825, "learning_rate": 8.243474139569597e-06, "loss": 0.3661, "step": 17104 }, { "epoch": 1.7390199267995121, "grad_norm": 0.30043306946754456, "learning_rate": 8.243204045909945e-06, "loss": 0.356, "step": 17105 }, { "epoch": 1.739121594143961, "grad_norm": 0.27774354815483093, "learning_rate": 8.242933935911802e-06, "loss": 0.3359, "step": 17106 }, { "epoch": 1.73922326148841, "grad_norm": 0.2626799941062927, "learning_rate": 8.242663809576527e-06, "loss": 0.337, "step": 17107 }, { "epoch": 1.739324928832859, "grad_norm": 0.34488463401794434, "learning_rate": 8.242393666905486e-06, "loss": 0.3823, "step": 17108 }, { "epoch": 1.7394265961773079, "grad_norm": 0.2867605686187744, "learning_rate": 8.242123507900035e-06, "loss": 0.3604, "step": 17109 }, { "epoch": 1.7395282635217568, "grad_norm": 0.25992780923843384, "learning_rate": 8.241853332561535e-06, "loss": 0.3459, "step": 17110 }, { "epoch": 1.7396299308662058, "grad_norm": 0.35937950015068054, "learning_rate": 8.24158314089135e-06, "loss": 0.3628, "step": 17111 }, { "epoch": 1.7397315982106547, "grad_norm": 0.29794004559516907, "learning_rate": 8.241312932890837e-06, "loss": 0.3358, "step": 17112 }, { "epoch": 1.7398332655551036, "grad_norm": 0.2701333165168762, "learning_rate": 8.241042708561362e-06, "loss": 0.3673, "step": 17113 }, { "epoch": 1.7399349328995526, "grad_norm": 0.27931177616119385, "learning_rate": 8.240772467904284e-06, "loss": 0.3666, "step": 17114 }, { "epoch": 1.7400366002440015, "grad_norm": 0.3115966022014618, "learning_rate": 8.240502210920962e-06, "loss": 0.366, "step": 17115 }, { "epoch": 1.7401382675884505, "grad_norm": 0.2947292923927307, "learning_rate": 8.240231937612761e-06, "loss": 0.3167, "step": 17116 }, { "epoch": 1.7402399349328994, "grad_norm": 0.29598134756088257, "learning_rate": 8.239961647981042e-06, "loss": 0.3551, "step": 17117 }, { "epoch": 1.7403416022773484, "grad_norm": 0.30232855677604675, "learning_rate": 8.239691342027168e-06, "loss": 0.3395, "step": 17118 }, { "epoch": 1.7404432696217975, "grad_norm": 0.27634304761886597, "learning_rate": 8.239421019752495e-06, "loss": 0.3533, "step": 17119 }, { "epoch": 1.7405449369662465, "grad_norm": 0.2598057687282562, "learning_rate": 8.239150681158393e-06, "loss": 0.3437, "step": 17120 }, { "epoch": 1.7406466043106954, "grad_norm": 0.29144757986068726, "learning_rate": 8.238880326246217e-06, "loss": 0.3663, "step": 17121 }, { "epoch": 1.7407482716551443, "grad_norm": 0.2753475308418274, "learning_rate": 8.238609955017331e-06, "loss": 0.3348, "step": 17122 }, { "epoch": 1.7408499389995933, "grad_norm": 0.26521894335746765, "learning_rate": 8.2383395674731e-06, "loss": 0.315, "step": 17123 }, { "epoch": 1.7409516063440424, "grad_norm": 0.3059134781360626, "learning_rate": 8.238069163614882e-06, "loss": 0.3583, "step": 17124 }, { "epoch": 1.7410532736884914, "grad_norm": 0.26657915115356445, "learning_rate": 8.23779874344404e-06, "loss": 0.3507, "step": 17125 }, { "epoch": 1.7411549410329403, "grad_norm": 0.2632102370262146, "learning_rate": 8.23752830696194e-06, "loss": 0.351, "step": 17126 }, { "epoch": 1.7412566083773893, "grad_norm": 0.25880685448646545, "learning_rate": 8.23725785416994e-06, "loss": 0.336, "step": 17127 }, { "epoch": 1.7413582757218382, "grad_norm": 0.28530871868133545, "learning_rate": 8.236987385069406e-06, "loss": 0.3568, "step": 17128 }, { "epoch": 1.7414599430662872, "grad_norm": 0.28710535168647766, "learning_rate": 8.236716899661697e-06, "loss": 0.3385, "step": 17129 }, { "epoch": 1.741561610410736, "grad_norm": 0.2701575756072998, "learning_rate": 8.23644639794818e-06, "loss": 0.3643, "step": 17130 }, { "epoch": 1.741663277755185, "grad_norm": 0.27035653591156006, "learning_rate": 8.23617587993021e-06, "loss": 0.3423, "step": 17131 }, { "epoch": 1.741764945099634, "grad_norm": 0.2842746078968048, "learning_rate": 8.235905345609159e-06, "loss": 0.3771, "step": 17132 }, { "epoch": 1.741866612444083, "grad_norm": 0.2554756700992584, "learning_rate": 8.235634794986385e-06, "loss": 0.3547, "step": 17133 }, { "epoch": 1.7419682797885319, "grad_norm": 0.2910318970680237, "learning_rate": 8.235364228063252e-06, "loss": 0.3907, "step": 17134 }, { "epoch": 1.7420699471329808, "grad_norm": 0.27212756872177124, "learning_rate": 8.235093644841124e-06, "loss": 0.3391, "step": 17135 }, { "epoch": 1.7421716144774297, "grad_norm": 0.2811014950275421, "learning_rate": 8.234823045321361e-06, "loss": 0.3843, "step": 17136 }, { "epoch": 1.7422732818218787, "grad_norm": 0.30843865871429443, "learning_rate": 8.234552429505328e-06, "loss": 0.3966, "step": 17137 }, { "epoch": 1.7423749491663276, "grad_norm": 0.30878305435180664, "learning_rate": 8.23428179739439e-06, "loss": 0.3713, "step": 17138 }, { "epoch": 1.7424766165107768, "grad_norm": 0.2773054540157318, "learning_rate": 8.234011148989908e-06, "loss": 0.3688, "step": 17139 }, { "epoch": 1.7425782838552257, "grad_norm": 0.29075682163238525, "learning_rate": 8.233740484293247e-06, "loss": 0.3586, "step": 17140 }, { "epoch": 1.7426799511996747, "grad_norm": 0.2650967240333557, "learning_rate": 8.23346980330577e-06, "loss": 0.3643, "step": 17141 }, { "epoch": 1.7427816185441236, "grad_norm": 0.34616658091545105, "learning_rate": 8.233199106028838e-06, "loss": 0.3845, "step": 17142 }, { "epoch": 1.7428832858885726, "grad_norm": 0.30148887634277344, "learning_rate": 8.23292839246382e-06, "loss": 0.3717, "step": 17143 }, { "epoch": 1.7429849532330217, "grad_norm": 0.2655097246170044, "learning_rate": 8.232657662612076e-06, "loss": 0.3389, "step": 17144 }, { "epoch": 1.7430866205774707, "grad_norm": 0.26350441575050354, "learning_rate": 8.232386916474971e-06, "loss": 0.3941, "step": 17145 }, { "epoch": 1.7431882879219196, "grad_norm": 0.29337307810783386, "learning_rate": 8.232116154053869e-06, "loss": 0.3661, "step": 17146 }, { "epoch": 1.7432899552663685, "grad_norm": 0.2713399827480316, "learning_rate": 8.231845375350134e-06, "loss": 0.3434, "step": 17147 }, { "epoch": 1.7433916226108175, "grad_norm": 0.2756417393684387, "learning_rate": 8.231574580365129e-06, "loss": 0.36, "step": 17148 }, { "epoch": 1.7434932899552664, "grad_norm": 0.2698444128036499, "learning_rate": 8.23130376910022e-06, "loss": 0.3475, "step": 17149 }, { "epoch": 1.7435949572997154, "grad_norm": 0.269117534160614, "learning_rate": 8.231032941556771e-06, "loss": 0.353, "step": 17150 }, { "epoch": 1.7436966246441643, "grad_norm": 0.28912392258644104, "learning_rate": 8.230762097736145e-06, "loss": 0.3376, "step": 17151 }, { "epoch": 1.7437982919886132, "grad_norm": 0.2564544677734375, "learning_rate": 8.230491237639709e-06, "loss": 0.3354, "step": 17152 }, { "epoch": 1.7438999593330622, "grad_norm": 0.2800321578979492, "learning_rate": 8.230220361268823e-06, "loss": 0.4009, "step": 17153 }, { "epoch": 1.7440016266775111, "grad_norm": 0.2943761348724365, "learning_rate": 8.229949468624857e-06, "loss": 0.3425, "step": 17154 }, { "epoch": 1.74410329402196, "grad_norm": 0.2613385021686554, "learning_rate": 8.229678559709172e-06, "loss": 0.3805, "step": 17155 }, { "epoch": 1.744204961366409, "grad_norm": 0.27176475524902344, "learning_rate": 8.229407634523135e-06, "loss": 0.3531, "step": 17156 }, { "epoch": 1.744306628710858, "grad_norm": 0.2652058005332947, "learning_rate": 8.229136693068108e-06, "loss": 0.3271, "step": 17157 }, { "epoch": 1.744408296055307, "grad_norm": 0.29748857021331787, "learning_rate": 8.228865735345459e-06, "loss": 0.4253, "step": 17158 }, { "epoch": 1.7445099633997558, "grad_norm": 0.256904661655426, "learning_rate": 8.22859476135655e-06, "loss": 0.3317, "step": 17159 }, { "epoch": 1.744611630744205, "grad_norm": 0.2709876596927643, "learning_rate": 8.228323771102749e-06, "loss": 0.371, "step": 17160 }, { "epoch": 1.744713298088654, "grad_norm": 0.27842170000076294, "learning_rate": 8.228052764585422e-06, "loss": 0.3593, "step": 17161 }, { "epoch": 1.7448149654331029, "grad_norm": 0.2777070105075836, "learning_rate": 8.22778174180593e-06, "loss": 0.3768, "step": 17162 }, { "epoch": 1.7449166327775518, "grad_norm": 0.32250863313674927, "learning_rate": 8.22751070276564e-06, "loss": 0.3831, "step": 17163 }, { "epoch": 1.7450183001220008, "grad_norm": 0.2653355002403259, "learning_rate": 8.227239647465918e-06, "loss": 0.371, "step": 17164 }, { "epoch": 1.74511996746645, "grad_norm": 0.2949140667915344, "learning_rate": 8.226968575908131e-06, "loss": 0.4018, "step": 17165 }, { "epoch": 1.7452216348108989, "grad_norm": 0.31705230474472046, "learning_rate": 8.226697488093643e-06, "loss": 0.3434, "step": 17166 }, { "epoch": 1.7453233021553478, "grad_norm": 0.2563791275024414, "learning_rate": 8.22642638402382e-06, "loss": 0.3654, "step": 17167 }, { "epoch": 1.7454249694997968, "grad_norm": 0.2678699195384979, "learning_rate": 8.226155263700026e-06, "loss": 0.3616, "step": 17168 }, { "epoch": 1.7455266368442457, "grad_norm": 0.2661028802394867, "learning_rate": 8.22588412712363e-06, "loss": 0.3379, "step": 17169 }, { "epoch": 1.7456283041886946, "grad_norm": 0.2533591389656067, "learning_rate": 8.225612974295996e-06, "loss": 0.3525, "step": 17170 }, { "epoch": 1.7457299715331436, "grad_norm": 0.25510480999946594, "learning_rate": 8.225341805218491e-06, "loss": 0.329, "step": 17171 }, { "epoch": 1.7458316388775925, "grad_norm": 0.27465400099754333, "learning_rate": 8.225070619892478e-06, "loss": 0.3591, "step": 17172 }, { "epoch": 1.7459333062220415, "grad_norm": 0.2647949457168579, "learning_rate": 8.224799418319329e-06, "loss": 0.3573, "step": 17173 }, { "epoch": 1.7460349735664904, "grad_norm": 0.27719220519065857, "learning_rate": 8.224528200500404e-06, "loss": 0.3755, "step": 17174 }, { "epoch": 1.7461366409109393, "grad_norm": 0.26808202266693115, "learning_rate": 8.224256966437074e-06, "loss": 0.3492, "step": 17175 }, { "epoch": 1.7462383082553883, "grad_norm": 0.28136536478996277, "learning_rate": 8.223985716130704e-06, "loss": 0.3618, "step": 17176 }, { "epoch": 1.7463399755998372, "grad_norm": 0.28901562094688416, "learning_rate": 8.223714449582658e-06, "loss": 0.3701, "step": 17177 }, { "epoch": 1.7464416429442862, "grad_norm": 0.2709527611732483, "learning_rate": 8.223443166794305e-06, "loss": 0.3472, "step": 17178 }, { "epoch": 1.746543310288735, "grad_norm": 0.27926480770111084, "learning_rate": 8.223171867767013e-06, "loss": 0.3838, "step": 17179 }, { "epoch": 1.7466449776331843, "grad_norm": 0.26021429896354675, "learning_rate": 8.222900552502147e-06, "loss": 0.3295, "step": 17180 }, { "epoch": 1.7467466449776332, "grad_norm": 0.276978999376297, "learning_rate": 8.222629221001073e-06, "loss": 0.3638, "step": 17181 }, { "epoch": 1.7468483123220822, "grad_norm": 0.2753983438014984, "learning_rate": 8.222357873265158e-06, "loss": 0.338, "step": 17182 }, { "epoch": 1.746949979666531, "grad_norm": 0.27278366684913635, "learning_rate": 8.22208650929577e-06, "loss": 0.3792, "step": 17183 }, { "epoch": 1.74705164701098, "grad_norm": 0.2931290566921234, "learning_rate": 8.221815129094276e-06, "loss": 0.3568, "step": 17184 }, { "epoch": 1.7471533143554292, "grad_norm": 0.28712543845176697, "learning_rate": 8.221543732662046e-06, "loss": 0.3486, "step": 17185 }, { "epoch": 1.7472549816998781, "grad_norm": 0.2709400951862335, "learning_rate": 8.22127232000044e-06, "loss": 0.3376, "step": 17186 }, { "epoch": 1.747356649044327, "grad_norm": 0.28362834453582764, "learning_rate": 8.22100089111083e-06, "loss": 0.3382, "step": 17187 }, { "epoch": 1.747458316388776, "grad_norm": 0.2623334527015686, "learning_rate": 8.220729445994587e-06, "loss": 0.3717, "step": 17188 }, { "epoch": 1.747559983733225, "grad_norm": 0.27343085408210754, "learning_rate": 8.220457984653071e-06, "loss": 0.3684, "step": 17189 }, { "epoch": 1.747661651077674, "grad_norm": 0.2677614092826843, "learning_rate": 8.220186507087653e-06, "loss": 0.3334, "step": 17190 }, { "epoch": 1.7477633184221228, "grad_norm": 0.26544034481048584, "learning_rate": 8.219915013299701e-06, "loss": 0.3565, "step": 17191 }, { "epoch": 1.7478649857665718, "grad_norm": 0.2719080150127411, "learning_rate": 8.219643503290581e-06, "loss": 0.3775, "step": 17192 }, { "epoch": 1.7479666531110207, "grad_norm": 0.2725015878677368, "learning_rate": 8.219371977061665e-06, "loss": 0.3651, "step": 17193 }, { "epoch": 1.7480683204554697, "grad_norm": 0.296006441116333, "learning_rate": 8.219100434614317e-06, "loss": 0.3566, "step": 17194 }, { "epoch": 1.7481699877999186, "grad_norm": 0.2717026174068451, "learning_rate": 8.218828875949907e-06, "loss": 0.3553, "step": 17195 }, { "epoch": 1.7482716551443676, "grad_norm": 0.25383058190345764, "learning_rate": 8.218557301069801e-06, "loss": 0.3749, "step": 17196 }, { "epoch": 1.7483733224888165, "grad_norm": 0.26307031512260437, "learning_rate": 8.218285709975367e-06, "loss": 0.3599, "step": 17197 }, { "epoch": 1.7484749898332654, "grad_norm": 0.2843794822692871, "learning_rate": 8.218014102667976e-06, "loss": 0.3773, "step": 17198 }, { "epoch": 1.7485766571777144, "grad_norm": 0.28194305300712585, "learning_rate": 8.217742479148994e-06, "loss": 0.4073, "step": 17199 }, { "epoch": 1.7486783245221633, "grad_norm": 0.2705431878566742, "learning_rate": 8.217470839419788e-06, "loss": 0.3458, "step": 17200 }, { "epoch": 1.7487799918666125, "grad_norm": 0.297253280878067, "learning_rate": 8.217199183481733e-06, "loss": 0.3881, "step": 17201 }, { "epoch": 1.7488816592110614, "grad_norm": 0.27634289860725403, "learning_rate": 8.216927511336189e-06, "loss": 0.3422, "step": 17202 }, { "epoch": 1.7489833265555104, "grad_norm": 0.2883521616458893, "learning_rate": 8.216655822984529e-06, "loss": 0.3981, "step": 17203 }, { "epoch": 1.7490849938999593, "grad_norm": 0.2890165150165558, "learning_rate": 8.216384118428123e-06, "loss": 0.3702, "step": 17204 }, { "epoch": 1.7491866612444082, "grad_norm": 0.28772175312042236, "learning_rate": 8.216112397668339e-06, "loss": 0.3631, "step": 17205 }, { "epoch": 1.7492883285888574, "grad_norm": 0.2720433175563812, "learning_rate": 8.215840660706544e-06, "loss": 0.3641, "step": 17206 }, { "epoch": 1.7493899959333064, "grad_norm": 0.2633342146873474, "learning_rate": 8.215568907544107e-06, "loss": 0.3708, "step": 17207 }, { "epoch": 1.7494916632777553, "grad_norm": 0.27911558747291565, "learning_rate": 8.215297138182398e-06, "loss": 0.3454, "step": 17208 }, { "epoch": 1.7495933306222042, "grad_norm": 0.2973769009113312, "learning_rate": 8.215025352622788e-06, "loss": 0.3579, "step": 17209 }, { "epoch": 1.7496949979666532, "grad_norm": 0.29885396361351013, "learning_rate": 8.214753550866642e-06, "loss": 0.3659, "step": 17210 }, { "epoch": 1.7497966653111021, "grad_norm": 0.2797137200832367, "learning_rate": 8.214481732915333e-06, "loss": 0.3526, "step": 17211 }, { "epoch": 1.749898332655551, "grad_norm": 0.27524223923683167, "learning_rate": 8.214209898770228e-06, "loss": 0.3474, "step": 17212 }, { "epoch": 1.75, "grad_norm": 0.28789278864860535, "learning_rate": 8.213938048432697e-06, "loss": 0.3705, "step": 17213 }, { "epoch": 1.750101667344449, "grad_norm": 0.25997433066368103, "learning_rate": 8.213666181904113e-06, "loss": 0.3911, "step": 17214 }, { "epoch": 1.7502033346888979, "grad_norm": 0.2689090669155121, "learning_rate": 8.213394299185839e-06, "loss": 0.3423, "step": 17215 }, { "epoch": 1.7503050020333468, "grad_norm": 0.2686541974544525, "learning_rate": 8.213122400279249e-06, "loss": 0.3479, "step": 17216 }, { "epoch": 1.7504066693777958, "grad_norm": 0.25936460494995117, "learning_rate": 8.212850485185712e-06, "loss": 0.3757, "step": 17217 }, { "epoch": 1.7505083367222447, "grad_norm": 0.27876782417297363, "learning_rate": 8.212578553906598e-06, "loss": 0.3623, "step": 17218 }, { "epoch": 1.7506100040666936, "grad_norm": 0.2610397934913635, "learning_rate": 8.212306606443277e-06, "loss": 0.3421, "step": 17219 }, { "epoch": 1.7507116714111426, "grad_norm": 0.28704124689102173, "learning_rate": 8.212034642797117e-06, "loss": 0.3769, "step": 17220 }, { "epoch": 1.7508133387555918, "grad_norm": 0.29400914907455444, "learning_rate": 8.211762662969492e-06, "loss": 0.3535, "step": 17221 }, { "epoch": 1.7509150061000407, "grad_norm": 0.3034343719482422, "learning_rate": 8.211490666961768e-06, "loss": 0.3473, "step": 17222 }, { "epoch": 1.7510166734444896, "grad_norm": 0.26769739389419556, "learning_rate": 8.211218654775318e-06, "loss": 0.343, "step": 17223 }, { "epoch": 1.7511183407889386, "grad_norm": 0.2815788984298706, "learning_rate": 8.21094662641151e-06, "loss": 0.3156, "step": 17224 }, { "epoch": 1.7512200081333875, "grad_norm": 0.3142077624797821, "learning_rate": 8.21067458187172e-06, "loss": 0.3633, "step": 17225 }, { "epoch": 1.7513216754778367, "grad_norm": 0.28671589493751526, "learning_rate": 8.210402521157313e-06, "loss": 0.3581, "step": 17226 }, { "epoch": 1.7514233428222856, "grad_norm": 0.2886134386062622, "learning_rate": 8.210130444269658e-06, "loss": 0.3978, "step": 17227 }, { "epoch": 1.7515250101667346, "grad_norm": 0.2844267189502716, "learning_rate": 8.209858351210132e-06, "loss": 0.3447, "step": 17228 }, { "epoch": 1.7516266775111835, "grad_norm": 0.2855874300003052, "learning_rate": 8.209586241980101e-06, "loss": 0.4054, "step": 17229 }, { "epoch": 1.7517283448556324, "grad_norm": 0.3039226830005646, "learning_rate": 8.209314116580937e-06, "loss": 0.3555, "step": 17230 }, { "epoch": 1.7518300122000814, "grad_norm": 0.2874864339828491, "learning_rate": 8.20904197501401e-06, "loss": 0.3789, "step": 17231 }, { "epoch": 1.7519316795445303, "grad_norm": 0.2804999053478241, "learning_rate": 8.208769817280695e-06, "loss": 0.3366, "step": 17232 }, { "epoch": 1.7520333468889793, "grad_norm": 0.28674063086509705, "learning_rate": 8.208497643382359e-06, "loss": 0.3575, "step": 17233 }, { "epoch": 1.7521350142334282, "grad_norm": 0.3291318714618683, "learning_rate": 8.208225453320374e-06, "loss": 0.3565, "step": 17234 }, { "epoch": 1.7522366815778772, "grad_norm": 0.26403605937957764, "learning_rate": 8.20795324709611e-06, "loss": 0.3361, "step": 17235 }, { "epoch": 1.752338348922326, "grad_norm": 0.28547561168670654, "learning_rate": 8.207681024710944e-06, "loss": 0.3556, "step": 17236 }, { "epoch": 1.752440016266775, "grad_norm": 0.2725462019443512, "learning_rate": 8.20740878616624e-06, "loss": 0.3386, "step": 17237 }, { "epoch": 1.752541683611224, "grad_norm": 0.2786906063556671, "learning_rate": 8.207136531463374e-06, "loss": 0.3339, "step": 17238 }, { "epoch": 1.752643350955673, "grad_norm": 0.28547173738479614, "learning_rate": 8.206864260603716e-06, "loss": 0.364, "step": 17239 }, { "epoch": 1.7527450183001219, "grad_norm": 0.26558321714401245, "learning_rate": 8.206591973588635e-06, "loss": 0.3268, "step": 17240 }, { "epoch": 1.7528466856445708, "grad_norm": 0.2757493257522583, "learning_rate": 8.206319670419508e-06, "loss": 0.3565, "step": 17241 }, { "epoch": 1.75294835298902, "grad_norm": 0.28793632984161377, "learning_rate": 8.206047351097705e-06, "loss": 0.3663, "step": 17242 }, { "epoch": 1.753050020333469, "grad_norm": 0.29156044125556946, "learning_rate": 8.205775015624599e-06, "loss": 0.3551, "step": 17243 }, { "epoch": 1.7531516876779178, "grad_norm": 0.2709364891052246, "learning_rate": 8.205502664001559e-06, "loss": 0.346, "step": 17244 }, { "epoch": 1.7532533550223668, "grad_norm": 0.2851077914237976, "learning_rate": 8.205230296229957e-06, "loss": 0.3632, "step": 17245 }, { "epoch": 1.7533550223668157, "grad_norm": 0.2608727514743805, "learning_rate": 8.204957912311167e-06, "loss": 0.3546, "step": 17246 }, { "epoch": 1.753456689711265, "grad_norm": 0.31599748134613037, "learning_rate": 8.20468551224656e-06, "loss": 0.3501, "step": 17247 }, { "epoch": 1.7535583570557138, "grad_norm": 0.283674418926239, "learning_rate": 8.20441309603751e-06, "loss": 0.3951, "step": 17248 }, { "epoch": 1.7536600244001628, "grad_norm": 0.28266069293022156, "learning_rate": 8.204140663685387e-06, "loss": 0.366, "step": 17249 }, { "epoch": 1.7537616917446117, "grad_norm": 0.30694204568862915, "learning_rate": 8.203868215191567e-06, "loss": 0.3559, "step": 17250 }, { "epoch": 1.7538633590890607, "grad_norm": 0.3054816424846649, "learning_rate": 8.203595750557421e-06, "loss": 0.3824, "step": 17251 }, { "epoch": 1.7539650264335096, "grad_norm": 0.281882643699646, "learning_rate": 8.203323269784318e-06, "loss": 0.362, "step": 17252 }, { "epoch": 1.7540666937779585, "grad_norm": 0.2658376097679138, "learning_rate": 8.203050772873633e-06, "loss": 0.3791, "step": 17253 }, { "epoch": 1.7541683611224075, "grad_norm": 0.2708401679992676, "learning_rate": 8.202778259826742e-06, "loss": 0.3457, "step": 17254 }, { "epoch": 1.7542700284668564, "grad_norm": 0.2953862249851227, "learning_rate": 8.202505730645015e-06, "loss": 0.3548, "step": 17255 }, { "epoch": 1.7543716958113054, "grad_norm": 0.29882630705833435, "learning_rate": 8.202233185329824e-06, "loss": 0.39, "step": 17256 }, { "epoch": 1.7544733631557543, "grad_norm": 0.2689670920372009, "learning_rate": 8.201960623882544e-06, "loss": 0.3834, "step": 17257 }, { "epoch": 1.7545750305002032, "grad_norm": 0.301357239484787, "learning_rate": 8.201688046304545e-06, "loss": 0.384, "step": 17258 }, { "epoch": 1.7546766978446522, "grad_norm": 0.2890520989894867, "learning_rate": 8.201415452597205e-06, "loss": 0.3547, "step": 17259 }, { "epoch": 1.7547783651891011, "grad_norm": 0.28263747692108154, "learning_rate": 8.201142842761894e-06, "loss": 0.334, "step": 17260 }, { "epoch": 1.75488003253355, "grad_norm": 0.2910038232803345, "learning_rate": 8.200870216799986e-06, "loss": 0.3581, "step": 17261 }, { "epoch": 1.7549816998779992, "grad_norm": 0.2799658477306366, "learning_rate": 8.200597574712855e-06, "loss": 0.3657, "step": 17262 }, { "epoch": 1.7550833672224482, "grad_norm": 0.27291956543922424, "learning_rate": 8.200324916501872e-06, "loss": 0.3486, "step": 17263 }, { "epoch": 1.7551850345668971, "grad_norm": 0.25199094414711, "learning_rate": 8.200052242168415e-06, "loss": 0.3734, "step": 17264 }, { "epoch": 1.755286701911346, "grad_norm": 0.2735960781574249, "learning_rate": 8.199779551713856e-06, "loss": 0.3822, "step": 17265 }, { "epoch": 1.755388369255795, "grad_norm": 0.2706296741962433, "learning_rate": 8.199506845139566e-06, "loss": 0.3516, "step": 17266 }, { "epoch": 1.7554900366002442, "grad_norm": 0.2527170479297638, "learning_rate": 8.199234122446921e-06, "loss": 0.3416, "step": 17267 }, { "epoch": 1.755591703944693, "grad_norm": 0.2559928596019745, "learning_rate": 8.198961383637294e-06, "loss": 0.3414, "step": 17268 }, { "epoch": 1.755693371289142, "grad_norm": 0.2942732870578766, "learning_rate": 8.198688628712062e-06, "loss": 0.3579, "step": 17269 }, { "epoch": 1.755795038633591, "grad_norm": 0.3027549088001251, "learning_rate": 8.198415857672595e-06, "loss": 0.3628, "step": 17270 }, { "epoch": 1.75589670597804, "grad_norm": 0.2755458950996399, "learning_rate": 8.19814307052027e-06, "loss": 0.3692, "step": 17271 }, { "epoch": 1.7559983733224889, "grad_norm": 0.25599023699760437, "learning_rate": 8.197870267256458e-06, "loss": 0.383, "step": 17272 }, { "epoch": 1.7561000406669378, "grad_norm": 0.28253936767578125, "learning_rate": 8.197597447882538e-06, "loss": 0.365, "step": 17273 }, { "epoch": 1.7562017080113868, "grad_norm": 0.32311639189720154, "learning_rate": 8.197324612399882e-06, "loss": 0.3543, "step": 17274 }, { "epoch": 1.7563033753558357, "grad_norm": 0.28572261333465576, "learning_rate": 8.197051760809863e-06, "loss": 0.4051, "step": 17275 }, { "epoch": 1.7564050427002846, "grad_norm": 0.26500204205513, "learning_rate": 8.196778893113858e-06, "loss": 0.3579, "step": 17276 }, { "epoch": 1.7565067100447336, "grad_norm": 0.2735654413700104, "learning_rate": 8.196506009313239e-06, "loss": 0.3442, "step": 17277 }, { "epoch": 1.7566083773891825, "grad_norm": 0.2894761264324188, "learning_rate": 8.196233109409384e-06, "loss": 0.3221, "step": 17278 }, { "epoch": 1.7567100447336315, "grad_norm": 0.26564574241638184, "learning_rate": 8.195960193403664e-06, "loss": 0.3667, "step": 17279 }, { "epoch": 1.7568117120780804, "grad_norm": 0.2645260691642761, "learning_rate": 8.195687261297458e-06, "loss": 0.3508, "step": 17280 }, { "epoch": 1.7569133794225293, "grad_norm": 0.28715986013412476, "learning_rate": 8.195414313092139e-06, "loss": 0.3548, "step": 17281 }, { "epoch": 1.7570150467669783, "grad_norm": 0.2722907066345215, "learning_rate": 8.195141348789082e-06, "loss": 0.3567, "step": 17282 }, { "epoch": 1.7571167141114274, "grad_norm": 0.32500213384628296, "learning_rate": 8.194868368389661e-06, "loss": 0.3992, "step": 17283 }, { "epoch": 1.7572183814558764, "grad_norm": 0.28170230984687805, "learning_rate": 8.194595371895254e-06, "loss": 0.3513, "step": 17284 }, { "epoch": 1.7573200488003253, "grad_norm": 0.2544357180595398, "learning_rate": 8.194322359307235e-06, "loss": 0.3329, "step": 17285 }, { "epoch": 1.7574217161447743, "grad_norm": 0.2796325087547302, "learning_rate": 8.194049330626977e-06, "loss": 0.3562, "step": 17286 }, { "epoch": 1.7575233834892232, "grad_norm": 0.28544536232948303, "learning_rate": 8.19377628585586e-06, "loss": 0.3949, "step": 17287 }, { "epoch": 1.7576250508336724, "grad_norm": 0.2736043930053711, "learning_rate": 8.193503224995255e-06, "loss": 0.3151, "step": 17288 }, { "epoch": 1.7577267181781213, "grad_norm": 0.25331592559814453, "learning_rate": 8.193230148046538e-06, "loss": 0.3518, "step": 17289 }, { "epoch": 1.7578283855225703, "grad_norm": 0.27483654022216797, "learning_rate": 8.192957055011088e-06, "loss": 0.323, "step": 17290 }, { "epoch": 1.7579300528670192, "grad_norm": 0.2639522850513458, "learning_rate": 8.19268394589028e-06, "loss": 0.3512, "step": 17291 }, { "epoch": 1.7580317202114681, "grad_norm": 0.2711343765258789, "learning_rate": 8.19241082068549e-06, "loss": 0.3844, "step": 17292 }, { "epoch": 1.758133387555917, "grad_norm": 0.27205219864845276, "learning_rate": 8.19213767939809e-06, "loss": 0.3433, "step": 17293 }, { "epoch": 1.758235054900366, "grad_norm": 0.2700498402118683, "learning_rate": 8.19186452202946e-06, "loss": 0.333, "step": 17294 }, { "epoch": 1.758336722244815, "grad_norm": 0.25402697920799255, "learning_rate": 8.191591348580977e-06, "loss": 0.3418, "step": 17295 }, { "epoch": 1.758438389589264, "grad_norm": 0.2760668098926544, "learning_rate": 8.191318159054014e-06, "loss": 0.3789, "step": 17296 }, { "epoch": 1.7585400569337128, "grad_norm": 0.26790767908096313, "learning_rate": 8.191044953449947e-06, "loss": 0.3787, "step": 17297 }, { "epoch": 1.7586417242781618, "grad_norm": 0.2623659670352936, "learning_rate": 8.190771731770156e-06, "loss": 0.3382, "step": 17298 }, { "epoch": 1.7587433916226107, "grad_norm": 0.2759953737258911, "learning_rate": 8.190498494016012e-06, "loss": 0.3679, "step": 17299 }, { "epoch": 1.7588450589670597, "grad_norm": 0.29765209555625916, "learning_rate": 8.190225240188898e-06, "loss": 0.3457, "step": 17300 }, { "epoch": 1.7589467263115086, "grad_norm": 0.2842617630958557, "learning_rate": 8.189951970290187e-06, "loss": 0.3688, "step": 17301 }, { "epoch": 1.7590483936559576, "grad_norm": 0.2726265788078308, "learning_rate": 8.189678684321256e-06, "loss": 0.3466, "step": 17302 }, { "epoch": 1.7591500610004067, "grad_norm": 0.2782469391822815, "learning_rate": 8.18940538228348e-06, "loss": 0.3315, "step": 17303 }, { "epoch": 1.7592517283448557, "grad_norm": 0.27877679467201233, "learning_rate": 8.189132064178238e-06, "loss": 0.3553, "step": 17304 }, { "epoch": 1.7593533956893046, "grad_norm": 0.26825886964797974, "learning_rate": 8.188858730006908e-06, "loss": 0.3384, "step": 17305 }, { "epoch": 1.7594550630337535, "grad_norm": 0.2656278610229492, "learning_rate": 8.188585379770865e-06, "loss": 0.3448, "step": 17306 }, { "epoch": 1.7595567303782025, "grad_norm": 0.29227253794670105, "learning_rate": 8.188312013471487e-06, "loss": 0.3367, "step": 17307 }, { "epoch": 1.7596583977226516, "grad_norm": 0.2777699828147888, "learning_rate": 8.188038631110148e-06, "loss": 0.3273, "step": 17308 }, { "epoch": 1.7597600650671006, "grad_norm": 0.2621908485889435, "learning_rate": 8.18776523268823e-06, "loss": 0.3413, "step": 17309 }, { "epoch": 1.7598617324115495, "grad_norm": 0.27504032850265503, "learning_rate": 8.18749181820711e-06, "loss": 0.3579, "step": 17310 }, { "epoch": 1.7599633997559985, "grad_norm": 0.28313952684402466, "learning_rate": 8.187218387668162e-06, "loss": 0.3719, "step": 17311 }, { "epoch": 1.7600650671004474, "grad_norm": 0.2874654531478882, "learning_rate": 8.186944941072766e-06, "loss": 0.3632, "step": 17312 }, { "epoch": 1.7601667344448964, "grad_norm": 0.29560697078704834, "learning_rate": 8.186671478422296e-06, "loss": 0.3596, "step": 17313 }, { "epoch": 1.7602684017893453, "grad_norm": 0.2938641309738159, "learning_rate": 8.186397999718134e-06, "loss": 0.3565, "step": 17314 }, { "epoch": 1.7603700691337942, "grad_norm": 0.26037442684173584, "learning_rate": 8.186124504961656e-06, "loss": 0.3445, "step": 17315 }, { "epoch": 1.7604717364782432, "grad_norm": 0.2753298878669739, "learning_rate": 8.185850994154241e-06, "loss": 0.3471, "step": 17316 }, { "epoch": 1.7605734038226921, "grad_norm": 0.26207977533340454, "learning_rate": 8.185577467297266e-06, "loss": 0.3696, "step": 17317 }, { "epoch": 1.760675071167141, "grad_norm": 0.2727833390235901, "learning_rate": 8.185303924392108e-06, "loss": 0.3626, "step": 17318 }, { "epoch": 1.76077673851159, "grad_norm": 0.26096227765083313, "learning_rate": 8.185030365440145e-06, "loss": 0.3743, "step": 17319 }, { "epoch": 1.760878405856039, "grad_norm": 0.3086095452308655, "learning_rate": 8.184756790442755e-06, "loss": 0.3824, "step": 17320 }, { "epoch": 1.7609800732004879, "grad_norm": 0.28324460983276367, "learning_rate": 8.184483199401318e-06, "loss": 0.3632, "step": 17321 }, { "epoch": 1.7610817405449368, "grad_norm": 0.31001272797584534, "learning_rate": 8.184209592317214e-06, "loss": 0.3461, "step": 17322 }, { "epoch": 1.7611834078893858, "grad_norm": 0.27175286412239075, "learning_rate": 8.183935969191816e-06, "loss": 0.3707, "step": 17323 }, { "epoch": 1.761285075233835, "grad_norm": 0.3047729730606079, "learning_rate": 8.183662330026505e-06, "loss": 0.3774, "step": 17324 }, { "epoch": 1.7613867425782839, "grad_norm": 0.2942942678928375, "learning_rate": 8.183388674822661e-06, "loss": 0.3895, "step": 17325 }, { "epoch": 1.7614884099227328, "grad_norm": 0.2877197563648224, "learning_rate": 8.183115003581661e-06, "loss": 0.38, "step": 17326 }, { "epoch": 1.7615900772671818, "grad_norm": 0.27990251779556274, "learning_rate": 8.182841316304884e-06, "loss": 0.3463, "step": 17327 }, { "epoch": 1.7616917446116307, "grad_norm": 0.2802419364452362, "learning_rate": 8.182567612993709e-06, "loss": 0.3746, "step": 17328 }, { "epoch": 1.7617934119560799, "grad_norm": 0.26852813363075256, "learning_rate": 8.182293893649513e-06, "loss": 0.359, "step": 17329 }, { "epoch": 1.7618950793005288, "grad_norm": 0.2745094299316406, "learning_rate": 8.182020158273678e-06, "loss": 0.3826, "step": 17330 }, { "epoch": 1.7619967466449777, "grad_norm": 0.28087979555130005, "learning_rate": 8.181746406867583e-06, "loss": 0.3522, "step": 17331 }, { "epoch": 1.7620984139894267, "grad_norm": 0.2596764862537384, "learning_rate": 8.181472639432603e-06, "loss": 0.3377, "step": 17332 }, { "epoch": 1.7622000813338756, "grad_norm": 0.27052271366119385, "learning_rate": 8.181198855970122e-06, "loss": 0.3527, "step": 17333 }, { "epoch": 1.7623017486783246, "grad_norm": 0.2698070704936981, "learning_rate": 8.180925056481517e-06, "loss": 0.3886, "step": 17334 }, { "epoch": 1.7624034160227735, "grad_norm": 0.29323625564575195, "learning_rate": 8.180651240968167e-06, "loss": 0.3582, "step": 17335 }, { "epoch": 1.7625050833672224, "grad_norm": 0.2712267339229584, "learning_rate": 8.180377409431452e-06, "loss": 0.3574, "step": 17336 }, { "epoch": 1.7626067507116714, "grad_norm": 0.26678237318992615, "learning_rate": 8.18010356187275e-06, "loss": 0.364, "step": 17337 }, { "epoch": 1.7627084180561203, "grad_norm": 0.25554555654525757, "learning_rate": 8.179829698293442e-06, "loss": 0.3635, "step": 17338 }, { "epoch": 1.7628100854005693, "grad_norm": 0.2867887020111084, "learning_rate": 8.17955581869491e-06, "loss": 0.3704, "step": 17339 }, { "epoch": 1.7629117527450182, "grad_norm": 0.29939618706703186, "learning_rate": 8.17928192307853e-06, "loss": 0.4075, "step": 17340 }, { "epoch": 1.7630134200894672, "grad_norm": 0.28346365690231323, "learning_rate": 8.179008011445684e-06, "loss": 0.3859, "step": 17341 }, { "epoch": 1.763115087433916, "grad_norm": 0.2579140067100525, "learning_rate": 8.17873408379775e-06, "loss": 0.3381, "step": 17342 }, { "epoch": 1.763216754778365, "grad_norm": 0.3007569909095764, "learning_rate": 8.17846014013611e-06, "loss": 0.3638, "step": 17343 }, { "epoch": 1.7633184221228142, "grad_norm": 0.28793230652809143, "learning_rate": 8.17818618046214e-06, "loss": 0.3644, "step": 17344 }, { "epoch": 1.7634200894672631, "grad_norm": 0.28804871439933777, "learning_rate": 8.177912204777228e-06, "loss": 0.3474, "step": 17345 }, { "epoch": 1.763521756811712, "grad_norm": 0.2613820433616638, "learning_rate": 8.177638213082746e-06, "loss": 0.344, "step": 17346 }, { "epoch": 1.763623424156161, "grad_norm": 0.26726338267326355, "learning_rate": 8.177364205380081e-06, "loss": 0.3564, "step": 17347 }, { "epoch": 1.76372509150061, "grad_norm": 0.281612366437912, "learning_rate": 8.177090181670608e-06, "loss": 0.3408, "step": 17348 }, { "epoch": 1.7638267588450591, "grad_norm": 0.2912062108516693, "learning_rate": 8.176816141955708e-06, "loss": 0.3675, "step": 17349 }, { "epoch": 1.763928426189508, "grad_norm": 0.29625314474105835, "learning_rate": 8.176542086236767e-06, "loss": 0.3529, "step": 17350 }, { "epoch": 1.764030093533957, "grad_norm": 0.27975499629974365, "learning_rate": 8.176268014515159e-06, "loss": 0.3423, "step": 17351 }, { "epoch": 1.764131760878406, "grad_norm": 0.2754144072532654, "learning_rate": 8.175993926792268e-06, "loss": 0.3195, "step": 17352 }, { "epoch": 1.764233428222855, "grad_norm": 0.2930322587490082, "learning_rate": 8.175719823069476e-06, "loss": 0.3764, "step": 17353 }, { "epoch": 1.7643350955673038, "grad_norm": 0.2784636318683624, "learning_rate": 8.17544570334816e-06, "loss": 0.3454, "step": 17354 }, { "epoch": 1.7644367629117528, "grad_norm": 0.28202107548713684, "learning_rate": 8.175171567629705e-06, "loss": 0.3467, "step": 17355 }, { "epoch": 1.7645384302562017, "grad_norm": 0.2884051501750946, "learning_rate": 8.174897415915488e-06, "loss": 0.3709, "step": 17356 }, { "epoch": 1.7646400976006507, "grad_norm": 0.2656329572200775, "learning_rate": 8.174623248206894e-06, "loss": 0.3281, "step": 17357 }, { "epoch": 1.7647417649450996, "grad_norm": 0.27916592359542847, "learning_rate": 8.174349064505303e-06, "loss": 0.3589, "step": 17358 }, { "epoch": 1.7648434322895485, "grad_norm": 0.3010324239730835, "learning_rate": 8.174074864812093e-06, "loss": 0.3508, "step": 17359 }, { "epoch": 1.7649450996339975, "grad_norm": 0.2773228883743286, "learning_rate": 8.173800649128652e-06, "loss": 0.3828, "step": 17360 }, { "epoch": 1.7650467669784464, "grad_norm": 0.2891453504562378, "learning_rate": 8.173526417456354e-06, "loss": 0.3434, "step": 17361 }, { "epoch": 1.7651484343228954, "grad_norm": 0.27638497948646545, "learning_rate": 8.173252169796586e-06, "loss": 0.3593, "step": 17362 }, { "epoch": 1.7652501016673443, "grad_norm": 0.27165430784225464, "learning_rate": 8.172977906150727e-06, "loss": 0.3558, "step": 17363 }, { "epoch": 1.7653517690117932, "grad_norm": 0.26990196108818054, "learning_rate": 8.17270362652016e-06, "loss": 0.3724, "step": 17364 }, { "epoch": 1.7654534363562424, "grad_norm": 0.28418388962745667, "learning_rate": 8.172429330906266e-06, "loss": 0.3487, "step": 17365 }, { "epoch": 1.7655551037006914, "grad_norm": 0.2753138840198517, "learning_rate": 8.172155019310426e-06, "loss": 0.371, "step": 17366 }, { "epoch": 1.7656567710451403, "grad_norm": 0.3152087926864624, "learning_rate": 8.171880691734024e-06, "loss": 0.3859, "step": 17367 }, { "epoch": 1.7657584383895892, "grad_norm": 0.270974338054657, "learning_rate": 8.17160634817844e-06, "loss": 0.3694, "step": 17368 }, { "epoch": 1.7658601057340382, "grad_norm": 0.3064121901988983, "learning_rate": 8.171331988645058e-06, "loss": 0.3623, "step": 17369 }, { "epoch": 1.7659617730784873, "grad_norm": 0.2930337190628052, "learning_rate": 8.171057613135257e-06, "loss": 0.3477, "step": 17370 }, { "epoch": 1.7660634404229363, "grad_norm": 0.2873826324939728, "learning_rate": 8.170783221650423e-06, "loss": 0.3499, "step": 17371 }, { "epoch": 1.7661651077673852, "grad_norm": 0.31322434544563293, "learning_rate": 8.170508814191936e-06, "loss": 0.3902, "step": 17372 }, { "epoch": 1.7662667751118342, "grad_norm": 0.28236937522888184, "learning_rate": 8.170234390761179e-06, "loss": 0.3387, "step": 17373 }, { "epoch": 1.766368442456283, "grad_norm": 0.28954455256462097, "learning_rate": 8.169959951359535e-06, "loss": 0.3673, "step": 17374 }, { "epoch": 1.766470109800732, "grad_norm": 0.2771856486797333, "learning_rate": 8.169685495988385e-06, "loss": 0.3464, "step": 17375 }, { "epoch": 1.766571777145181, "grad_norm": 0.2623003125190735, "learning_rate": 8.169411024649115e-06, "loss": 0.3559, "step": 17376 }, { "epoch": 1.76667344448963, "grad_norm": 0.2680065631866455, "learning_rate": 8.169136537343104e-06, "loss": 0.3621, "step": 17377 }, { "epoch": 1.7667751118340789, "grad_norm": 0.27859196066856384, "learning_rate": 8.168862034071734e-06, "loss": 0.3288, "step": 17378 }, { "epoch": 1.7668767791785278, "grad_norm": 0.2781399190425873, "learning_rate": 8.168587514836393e-06, "loss": 0.3261, "step": 17379 }, { "epoch": 1.7669784465229768, "grad_norm": 0.2458687573671341, "learning_rate": 8.16831297963846e-06, "loss": 0.3143, "step": 17380 }, { "epoch": 1.7670801138674257, "grad_norm": 0.2677275836467743, "learning_rate": 8.168038428479319e-06, "loss": 0.3297, "step": 17381 }, { "epoch": 1.7671817812118746, "grad_norm": 0.2802504599094391, "learning_rate": 8.167763861360354e-06, "loss": 0.3563, "step": 17382 }, { "epoch": 1.7672834485563236, "grad_norm": 0.263166606426239, "learning_rate": 8.167489278282947e-06, "loss": 0.3596, "step": 17383 }, { "epoch": 1.7673851159007725, "grad_norm": 0.261231005191803, "learning_rate": 8.167214679248482e-06, "loss": 0.3556, "step": 17384 }, { "epoch": 1.7674867832452217, "grad_norm": 0.28648316860198975, "learning_rate": 8.166940064258343e-06, "loss": 0.3729, "step": 17385 }, { "epoch": 1.7675884505896706, "grad_norm": 0.2832070291042328, "learning_rate": 8.16666543331391e-06, "loss": 0.3333, "step": 17386 }, { "epoch": 1.7676901179341196, "grad_norm": 0.26798325777053833, "learning_rate": 8.166390786416572e-06, "loss": 0.329, "step": 17387 }, { "epoch": 1.7677917852785685, "grad_norm": 0.2595426142215729, "learning_rate": 8.166116123567708e-06, "loss": 0.3901, "step": 17388 }, { "epoch": 1.7678934526230174, "grad_norm": 0.259399950504303, "learning_rate": 8.165841444768703e-06, "loss": 0.3492, "step": 17389 }, { "epoch": 1.7679951199674666, "grad_norm": 0.28957727551460266, "learning_rate": 8.165566750020941e-06, "loss": 0.3552, "step": 17390 }, { "epoch": 1.7680967873119156, "grad_norm": 0.26470038294792175, "learning_rate": 8.165292039325808e-06, "loss": 0.3477, "step": 17391 }, { "epoch": 1.7681984546563645, "grad_norm": 0.28084349632263184, "learning_rate": 8.165017312684683e-06, "loss": 0.401, "step": 17392 }, { "epoch": 1.7683001220008134, "grad_norm": 0.2778260409832001, "learning_rate": 8.164742570098955e-06, "loss": 0.3807, "step": 17393 }, { "epoch": 1.7684017893452624, "grad_norm": 0.2635866403579712, "learning_rate": 8.164467811570005e-06, "loss": 0.345, "step": 17394 }, { "epoch": 1.7685034566897113, "grad_norm": 0.28337419033050537, "learning_rate": 8.164193037099218e-06, "loss": 0.3634, "step": 17395 }, { "epoch": 1.7686051240341603, "grad_norm": 0.26014527678489685, "learning_rate": 8.163918246687979e-06, "loss": 0.3465, "step": 17396 }, { "epoch": 1.7687067913786092, "grad_norm": 0.2712462246417999, "learning_rate": 8.163643440337672e-06, "loss": 0.3566, "step": 17397 }, { "epoch": 1.7688084587230581, "grad_norm": 0.25721192359924316, "learning_rate": 8.16336861804968e-06, "loss": 0.3959, "step": 17398 }, { "epoch": 1.768910126067507, "grad_norm": 0.24430152773857117, "learning_rate": 8.163093779825391e-06, "loss": 0.386, "step": 17399 }, { "epoch": 1.769011793411956, "grad_norm": 0.25781795382499695, "learning_rate": 8.162818925666185e-06, "loss": 0.3342, "step": 17400 }, { "epoch": 1.769113460756405, "grad_norm": 0.2973158061504364, "learning_rate": 8.16254405557345e-06, "loss": 0.3363, "step": 17401 }, { "epoch": 1.769215128100854, "grad_norm": 0.25155195593833923, "learning_rate": 8.162269169548569e-06, "loss": 0.3595, "step": 17402 }, { "epoch": 1.7693167954453028, "grad_norm": 0.27171096205711365, "learning_rate": 8.161994267592927e-06, "loss": 0.3522, "step": 17403 }, { "epoch": 1.7694184627897518, "grad_norm": 0.29447776079177856, "learning_rate": 8.16171934970791e-06, "loss": 0.3702, "step": 17404 }, { "epoch": 1.7695201301342007, "grad_norm": 0.2643183469772339, "learning_rate": 8.161444415894902e-06, "loss": 0.3319, "step": 17405 }, { "epoch": 1.76962179747865, "grad_norm": 0.26588600873947144, "learning_rate": 8.161169466155288e-06, "loss": 0.3548, "step": 17406 }, { "epoch": 1.7697234648230988, "grad_norm": 0.28516146540641785, "learning_rate": 8.160894500490454e-06, "loss": 0.3567, "step": 17407 }, { "epoch": 1.7698251321675478, "grad_norm": 0.27619460225105286, "learning_rate": 8.160619518901785e-06, "loss": 0.346, "step": 17408 }, { "epoch": 1.7699267995119967, "grad_norm": 0.25500765442848206, "learning_rate": 8.160344521390664e-06, "loss": 0.3408, "step": 17409 }, { "epoch": 1.7700284668564457, "grad_norm": 0.26192402839660645, "learning_rate": 8.160069507958481e-06, "loss": 0.3839, "step": 17410 }, { "epoch": 1.7701301342008948, "grad_norm": 0.27691128849983215, "learning_rate": 8.159794478606617e-06, "loss": 0.3663, "step": 17411 }, { "epoch": 1.7702318015453438, "grad_norm": 0.26377326250076294, "learning_rate": 8.159519433336461e-06, "loss": 0.3396, "step": 17412 }, { "epoch": 1.7703334688897927, "grad_norm": 0.25910019874572754, "learning_rate": 8.159244372149396e-06, "loss": 0.3258, "step": 17413 }, { "epoch": 1.7704351362342416, "grad_norm": 0.2542837858200073, "learning_rate": 8.158969295046806e-06, "loss": 0.3348, "step": 17414 }, { "epoch": 1.7705368035786906, "grad_norm": 0.268810898065567, "learning_rate": 8.158694202030083e-06, "loss": 0.373, "step": 17415 }, { "epoch": 1.7706384709231395, "grad_norm": 0.24473679065704346, "learning_rate": 8.158419093100607e-06, "loss": 0.3441, "step": 17416 }, { "epoch": 1.7707401382675885, "grad_norm": 0.2512023448944092, "learning_rate": 8.158143968259767e-06, "loss": 0.3559, "step": 17417 }, { "epoch": 1.7708418056120374, "grad_norm": 0.27173879742622375, "learning_rate": 8.157868827508947e-06, "loss": 0.3684, "step": 17418 }, { "epoch": 1.7709434729564864, "grad_norm": 0.28445276618003845, "learning_rate": 8.157593670849536e-06, "loss": 0.3762, "step": 17419 }, { "epoch": 1.7710451403009353, "grad_norm": 0.2971835136413574, "learning_rate": 8.157318498282918e-06, "loss": 0.3906, "step": 17420 }, { "epoch": 1.7711468076453842, "grad_norm": 0.2766507565975189, "learning_rate": 8.15704330981048e-06, "loss": 0.377, "step": 17421 }, { "epoch": 1.7712484749898332, "grad_norm": 0.2836569845676422, "learning_rate": 8.156768105433608e-06, "loss": 0.3588, "step": 17422 }, { "epoch": 1.7713501423342821, "grad_norm": 0.2713598906993866, "learning_rate": 8.156492885153689e-06, "loss": 0.3364, "step": 17423 }, { "epoch": 1.771451809678731, "grad_norm": 0.2631217837333679, "learning_rate": 8.156217648972107e-06, "loss": 0.3435, "step": 17424 }, { "epoch": 1.77155347702318, "grad_norm": 0.26905471086502075, "learning_rate": 8.155942396890251e-06, "loss": 0.369, "step": 17425 }, { "epoch": 1.7716551443676292, "grad_norm": 0.25134727358818054, "learning_rate": 8.155667128909507e-06, "loss": 0.3454, "step": 17426 }, { "epoch": 1.771756811712078, "grad_norm": 0.2644682824611664, "learning_rate": 8.155391845031265e-06, "loss": 0.3835, "step": 17427 }, { "epoch": 1.771858479056527, "grad_norm": 0.2518286108970642, "learning_rate": 8.155116545256905e-06, "loss": 0.3792, "step": 17428 }, { "epoch": 1.771960146400976, "grad_norm": 0.27677297592163086, "learning_rate": 8.154841229587819e-06, "loss": 0.3616, "step": 17429 }, { "epoch": 1.772061813745425, "grad_norm": 0.2786446809768677, "learning_rate": 8.154565898025394e-06, "loss": 0.3719, "step": 17430 }, { "epoch": 1.772163481089874, "grad_norm": 0.2782561480998993, "learning_rate": 8.154290550571013e-06, "loss": 0.3912, "step": 17431 }, { "epoch": 1.772265148434323, "grad_norm": 0.2739686369895935, "learning_rate": 8.154015187226068e-06, "loss": 0.3298, "step": 17432 }, { "epoch": 1.772366815778772, "grad_norm": 0.2926445007324219, "learning_rate": 8.153739807991943e-06, "loss": 0.3585, "step": 17433 }, { "epoch": 1.772468483123221, "grad_norm": 0.2673426866531372, "learning_rate": 8.153464412870024e-06, "loss": 0.3861, "step": 17434 }, { "epoch": 1.7725701504676699, "grad_norm": 0.2751852869987488, "learning_rate": 8.153189001861704e-06, "loss": 0.3483, "step": 17435 }, { "epoch": 1.7726718178121188, "grad_norm": 0.2729591727256775, "learning_rate": 8.152913574968367e-06, "loss": 0.3361, "step": 17436 }, { "epoch": 1.7727734851565677, "grad_norm": 0.29797711968421936, "learning_rate": 8.1526381321914e-06, "loss": 0.359, "step": 17437 }, { "epoch": 1.7728751525010167, "grad_norm": 0.2780555486679077, "learning_rate": 8.15236267353219e-06, "loss": 0.3781, "step": 17438 }, { "epoch": 1.7729768198454656, "grad_norm": 0.30694779753685, "learning_rate": 8.152087198992126e-06, "loss": 0.3641, "step": 17439 }, { "epoch": 1.7730784871899146, "grad_norm": 0.2541140019893646, "learning_rate": 8.151811708572599e-06, "loss": 0.3496, "step": 17440 }, { "epoch": 1.7731801545343635, "grad_norm": 0.28569498658180237, "learning_rate": 8.15153620227499e-06, "loss": 0.3552, "step": 17441 }, { "epoch": 1.7732818218788124, "grad_norm": 0.2860792577266693, "learning_rate": 8.151260680100692e-06, "loss": 0.3445, "step": 17442 }, { "epoch": 1.7733834892232614, "grad_norm": 0.27150148153305054, "learning_rate": 8.15098514205109e-06, "loss": 0.3513, "step": 17443 }, { "epoch": 1.7734851565677103, "grad_norm": 0.28002694249153137, "learning_rate": 8.150709588127577e-06, "loss": 0.3825, "step": 17444 }, { "epoch": 1.7735868239121593, "grad_norm": 0.29654577374458313, "learning_rate": 8.150434018331535e-06, "loss": 0.3832, "step": 17445 }, { "epoch": 1.7736884912566082, "grad_norm": 0.28279909491539, "learning_rate": 8.150158432664357e-06, "loss": 0.3865, "step": 17446 }, { "epoch": 1.7737901586010574, "grad_norm": 0.26418399810791016, "learning_rate": 8.149882831127427e-06, "loss": 0.3544, "step": 17447 }, { "epoch": 1.7738918259455063, "grad_norm": 0.26537540555000305, "learning_rate": 8.149607213722137e-06, "loss": 0.3155, "step": 17448 }, { "epoch": 1.7739934932899553, "grad_norm": 0.2714082896709442, "learning_rate": 8.149331580449876e-06, "loss": 0.3567, "step": 17449 }, { "epoch": 1.7740951606344042, "grad_norm": 0.2837705910205841, "learning_rate": 8.149055931312029e-06, "loss": 0.3462, "step": 17450 }, { "epoch": 1.7741968279788531, "grad_norm": 0.27281200885772705, "learning_rate": 8.148780266309985e-06, "loss": 0.367, "step": 17451 }, { "epoch": 1.7742984953233023, "grad_norm": 0.2830628752708435, "learning_rate": 8.148504585445139e-06, "loss": 0.3562, "step": 17452 }, { "epoch": 1.7744001626677512, "grad_norm": 0.25780728459358215, "learning_rate": 8.148228888718872e-06, "loss": 0.3921, "step": 17453 }, { "epoch": 1.7745018300122002, "grad_norm": 0.2812616527080536, "learning_rate": 8.147953176132575e-06, "loss": 0.351, "step": 17454 }, { "epoch": 1.7746034973566491, "grad_norm": 0.26530972123146057, "learning_rate": 8.147677447687638e-06, "loss": 0.3942, "step": 17455 }, { "epoch": 1.774705164701098, "grad_norm": 0.28000232577323914, "learning_rate": 8.147401703385452e-06, "loss": 0.3524, "step": 17456 }, { "epoch": 1.774806832045547, "grad_norm": 0.2896440625190735, "learning_rate": 8.147125943227405e-06, "loss": 0.37, "step": 17457 }, { "epoch": 1.774908499389996, "grad_norm": 0.25885647535324097, "learning_rate": 8.146850167214883e-06, "loss": 0.3577, "step": 17458 }, { "epoch": 1.775010166734445, "grad_norm": 0.27631065249443054, "learning_rate": 8.146574375349279e-06, "loss": 0.3156, "step": 17459 }, { "epoch": 1.7751118340788938, "grad_norm": 0.2780025899410248, "learning_rate": 8.14629856763198e-06, "loss": 0.3317, "step": 17460 }, { "epoch": 1.7752135014233428, "grad_norm": 0.25438880920410156, "learning_rate": 8.146022744064376e-06, "loss": 0.3469, "step": 17461 }, { "epoch": 1.7753151687677917, "grad_norm": 0.2784384489059448, "learning_rate": 8.145746904647859e-06, "loss": 0.3739, "step": 17462 }, { "epoch": 1.7754168361122407, "grad_norm": 0.25951507687568665, "learning_rate": 8.145471049383816e-06, "loss": 0.3803, "step": 17463 }, { "epoch": 1.7755185034566896, "grad_norm": 0.2599668800830841, "learning_rate": 8.145195178273638e-06, "loss": 0.3486, "step": 17464 }, { "epoch": 1.7756201708011385, "grad_norm": 0.25761768221855164, "learning_rate": 8.144919291318711e-06, "loss": 0.3485, "step": 17465 }, { "epoch": 1.7757218381455875, "grad_norm": 0.27500227093696594, "learning_rate": 8.14464338852043e-06, "loss": 0.3441, "step": 17466 }, { "epoch": 1.7758235054900366, "grad_norm": 0.2766018509864807, "learning_rate": 8.144367469880183e-06, "loss": 0.346, "step": 17467 }, { "epoch": 1.7759251728344856, "grad_norm": 0.27234581112861633, "learning_rate": 8.14409153539936e-06, "loss": 0.3765, "step": 17468 }, { "epoch": 1.7760268401789345, "grad_norm": 0.2797076404094696, "learning_rate": 8.143815585079351e-06, "loss": 0.3705, "step": 17469 }, { "epoch": 1.7761285075233835, "grad_norm": 0.2708577811717987, "learning_rate": 8.143539618921546e-06, "loss": 0.3426, "step": 17470 }, { "epoch": 1.7762301748678324, "grad_norm": 0.2757086753845215, "learning_rate": 8.143263636927337e-06, "loss": 0.3546, "step": 17471 }, { "epoch": 1.7763318422122816, "grad_norm": 0.26868343353271484, "learning_rate": 8.14298763909811e-06, "loss": 0.3564, "step": 17472 }, { "epoch": 1.7764335095567305, "grad_norm": 0.27002495527267456, "learning_rate": 8.14271162543526e-06, "loss": 0.3294, "step": 17473 }, { "epoch": 1.7765351769011795, "grad_norm": 0.27836501598358154, "learning_rate": 8.142435595940175e-06, "loss": 0.3693, "step": 17474 }, { "epoch": 1.7766368442456284, "grad_norm": 0.2726467251777649, "learning_rate": 8.142159550614246e-06, "loss": 0.3854, "step": 17475 }, { "epoch": 1.7767385115900773, "grad_norm": 0.31282731890678406, "learning_rate": 8.141883489458864e-06, "loss": 0.3649, "step": 17476 }, { "epoch": 1.7768401789345263, "grad_norm": 0.28745704889297485, "learning_rate": 8.14160741247542e-06, "loss": 0.354, "step": 17477 }, { "epoch": 1.7769418462789752, "grad_norm": 0.26501214504241943, "learning_rate": 8.141331319665304e-06, "loss": 0.35, "step": 17478 }, { "epoch": 1.7770435136234242, "grad_norm": 0.2732386887073517, "learning_rate": 8.141055211029906e-06, "loss": 0.3611, "step": 17479 }, { "epoch": 1.777145180967873, "grad_norm": 0.2916928231716156, "learning_rate": 8.14077908657062e-06, "loss": 0.3488, "step": 17480 }, { "epoch": 1.777246848312322, "grad_norm": 0.30412086844444275, "learning_rate": 8.140502946288835e-06, "loss": 0.3446, "step": 17481 }, { "epoch": 1.777348515656771, "grad_norm": 0.2738465666770935, "learning_rate": 8.140226790185944e-06, "loss": 0.3708, "step": 17482 }, { "epoch": 1.77745018300122, "grad_norm": 0.27537912130355835, "learning_rate": 8.139950618263334e-06, "loss": 0.334, "step": 17483 }, { "epoch": 1.7775518503456689, "grad_norm": 0.28636109828948975, "learning_rate": 8.139674430522402e-06, "loss": 0.3671, "step": 17484 }, { "epoch": 1.7776535176901178, "grad_norm": 0.2739374041557312, "learning_rate": 8.139398226964533e-06, "loss": 0.3712, "step": 17485 }, { "epoch": 1.7777551850345668, "grad_norm": 0.26721256971359253, "learning_rate": 8.139122007591123e-06, "loss": 0.3851, "step": 17486 }, { "epoch": 1.7778568523790157, "grad_norm": 0.26026204228401184, "learning_rate": 8.138845772403563e-06, "loss": 0.3344, "step": 17487 }, { "epoch": 1.7779585197234649, "grad_norm": 0.2597612738609314, "learning_rate": 8.138569521403244e-06, "loss": 0.3612, "step": 17488 }, { "epoch": 1.7780601870679138, "grad_norm": 0.2768628001213074, "learning_rate": 8.138293254591557e-06, "loss": 0.369, "step": 17489 }, { "epoch": 1.7781618544123627, "grad_norm": 0.289949506521225, "learning_rate": 8.138016971969893e-06, "loss": 0.3629, "step": 17490 }, { "epoch": 1.7782635217568117, "grad_norm": 0.27370092272758484, "learning_rate": 8.137740673539647e-06, "loss": 0.3442, "step": 17491 }, { "epoch": 1.7783651891012606, "grad_norm": 0.26165154576301575, "learning_rate": 8.137464359302209e-06, "loss": 0.3683, "step": 17492 }, { "epoch": 1.7784668564457098, "grad_norm": 0.31371912360191345, "learning_rate": 8.13718802925897e-06, "loss": 0.3629, "step": 17493 }, { "epoch": 1.7785685237901587, "grad_norm": 0.28751635551452637, "learning_rate": 8.136911683411325e-06, "loss": 0.3409, "step": 17494 }, { "epoch": 1.7786701911346077, "grad_norm": 0.2648873031139374, "learning_rate": 8.136635321760663e-06, "loss": 0.358, "step": 17495 }, { "epoch": 1.7787718584790566, "grad_norm": 0.29978910088539124, "learning_rate": 8.136358944308378e-06, "loss": 0.3915, "step": 17496 }, { "epoch": 1.7788735258235056, "grad_norm": 0.27614057064056396, "learning_rate": 8.136082551055862e-06, "loss": 0.3363, "step": 17497 }, { "epoch": 1.7789751931679545, "grad_norm": 0.28655532002449036, "learning_rate": 8.135806142004507e-06, "loss": 0.3772, "step": 17498 }, { "epoch": 1.7790768605124034, "grad_norm": 0.26968589425086975, "learning_rate": 8.135529717155706e-06, "loss": 0.3472, "step": 17499 }, { "epoch": 1.7791785278568524, "grad_norm": 0.2709541618824005, "learning_rate": 8.13525327651085e-06, "loss": 0.3731, "step": 17500 }, { "epoch": 1.7792801952013013, "grad_norm": 0.2936701774597168, "learning_rate": 8.134976820071337e-06, "loss": 0.3741, "step": 17501 }, { "epoch": 1.7793818625457503, "grad_norm": 0.2841014564037323, "learning_rate": 8.134700347838554e-06, "loss": 0.3538, "step": 17502 }, { "epoch": 1.7794835298901992, "grad_norm": 0.2783766984939575, "learning_rate": 8.134423859813894e-06, "loss": 0.3513, "step": 17503 }, { "epoch": 1.7795851972346481, "grad_norm": 0.2694052755832672, "learning_rate": 8.134147355998753e-06, "loss": 0.3337, "step": 17504 }, { "epoch": 1.779686864579097, "grad_norm": 0.28017616271972656, "learning_rate": 8.133870836394522e-06, "loss": 0.3491, "step": 17505 }, { "epoch": 1.779788531923546, "grad_norm": 0.2983439564704895, "learning_rate": 8.133594301002594e-06, "loss": 0.3567, "step": 17506 }, { "epoch": 1.779890199267995, "grad_norm": 0.2686065435409546, "learning_rate": 8.133317749824363e-06, "loss": 0.3614, "step": 17507 }, { "epoch": 1.7799918666124441, "grad_norm": 0.32314610481262207, "learning_rate": 8.13304118286122e-06, "loss": 0.3486, "step": 17508 }, { "epoch": 1.780093533956893, "grad_norm": 0.27439644932746887, "learning_rate": 8.132764600114562e-06, "loss": 0.343, "step": 17509 }, { "epoch": 1.780195201301342, "grad_norm": 0.274715393781662, "learning_rate": 8.13248800158578e-06, "loss": 0.346, "step": 17510 }, { "epoch": 1.780296868645791, "grad_norm": 0.27817896008491516, "learning_rate": 8.132211387276269e-06, "loss": 0.3469, "step": 17511 }, { "epoch": 1.78039853599024, "grad_norm": 0.26313483715057373, "learning_rate": 8.131934757187421e-06, "loss": 0.3495, "step": 17512 }, { "epoch": 1.780500203334689, "grad_norm": 0.2652257978916168, "learning_rate": 8.131658111320628e-06, "loss": 0.3712, "step": 17513 }, { "epoch": 1.780601870679138, "grad_norm": 0.3024686574935913, "learning_rate": 8.131381449677287e-06, "loss": 0.3662, "step": 17514 }, { "epoch": 1.780703538023587, "grad_norm": 0.2728891372680664, "learning_rate": 8.13110477225879e-06, "loss": 0.3401, "step": 17515 }, { "epoch": 1.7808052053680359, "grad_norm": 0.25445085763931274, "learning_rate": 8.130828079066531e-06, "loss": 0.335, "step": 17516 }, { "epoch": 1.7809068727124848, "grad_norm": 0.2976900339126587, "learning_rate": 8.130551370101904e-06, "loss": 0.3675, "step": 17517 }, { "epoch": 1.7810085400569338, "grad_norm": 0.2895665764808655, "learning_rate": 8.130274645366305e-06, "loss": 0.3885, "step": 17518 }, { "epoch": 1.7811102074013827, "grad_norm": 0.2639504671096802, "learning_rate": 8.129997904861125e-06, "loss": 0.3211, "step": 17519 }, { "epoch": 1.7812118747458316, "grad_norm": 0.2640317678451538, "learning_rate": 8.12972114858776e-06, "loss": 0.3635, "step": 17520 }, { "epoch": 1.7813135420902806, "grad_norm": 0.2563877999782562, "learning_rate": 8.129444376547601e-06, "loss": 0.3309, "step": 17521 }, { "epoch": 1.7814152094347295, "grad_norm": 0.259011834859848, "learning_rate": 8.129167588742049e-06, "loss": 0.3396, "step": 17522 }, { "epoch": 1.7815168767791785, "grad_norm": 0.26279526948928833, "learning_rate": 8.12889078517249e-06, "loss": 0.3346, "step": 17523 }, { "epoch": 1.7816185441236274, "grad_norm": 0.266021728515625, "learning_rate": 8.128613965840325e-06, "loss": 0.3573, "step": 17524 }, { "epoch": 1.7817202114680764, "grad_norm": 0.27425408363342285, "learning_rate": 8.128337130746946e-06, "loss": 0.3668, "step": 17525 }, { "epoch": 1.7818218788125253, "grad_norm": 0.28275853395462036, "learning_rate": 8.128060279893748e-06, "loss": 0.3589, "step": 17526 }, { "epoch": 1.7819235461569742, "grad_norm": 0.2777123749256134, "learning_rate": 8.127783413282126e-06, "loss": 0.3602, "step": 17527 }, { "epoch": 1.7820252135014232, "grad_norm": 0.26529642939567566, "learning_rate": 8.127506530913475e-06, "loss": 0.3892, "step": 17528 }, { "epoch": 1.7821268808458723, "grad_norm": 0.2698657512664795, "learning_rate": 8.127229632789189e-06, "loss": 0.3347, "step": 17529 }, { "epoch": 1.7822285481903213, "grad_norm": 0.2822650671005249, "learning_rate": 8.126952718910661e-06, "loss": 0.3558, "step": 17530 }, { "epoch": 1.7823302155347702, "grad_norm": 0.3088230788707733, "learning_rate": 8.12667578927929e-06, "loss": 0.3819, "step": 17531 }, { "epoch": 1.7824318828792192, "grad_norm": 0.25491729378700256, "learning_rate": 8.12639884389647e-06, "loss": 0.3616, "step": 17532 }, { "epoch": 1.782533550223668, "grad_norm": 0.2862944006919861, "learning_rate": 8.126121882763596e-06, "loss": 0.3572, "step": 17533 }, { "epoch": 1.7826352175681173, "grad_norm": 0.2900952100753784, "learning_rate": 8.12584490588206e-06, "loss": 0.3456, "step": 17534 }, { "epoch": 1.7827368849125662, "grad_norm": 0.2874585688114166, "learning_rate": 8.125567913253263e-06, "loss": 0.3629, "step": 17535 }, { "epoch": 1.7828385522570152, "grad_norm": 0.2763063609600067, "learning_rate": 8.125290904878597e-06, "loss": 0.3363, "step": 17536 }, { "epoch": 1.782940219601464, "grad_norm": 0.29786962270736694, "learning_rate": 8.125013880759456e-06, "loss": 0.3686, "step": 17537 }, { "epoch": 1.783041886945913, "grad_norm": 0.294409841299057, "learning_rate": 8.124736840897241e-06, "loss": 0.3819, "step": 17538 }, { "epoch": 1.783143554290362, "grad_norm": 0.26534542441368103, "learning_rate": 8.124459785293342e-06, "loss": 0.3779, "step": 17539 }, { "epoch": 1.783245221634811, "grad_norm": 0.26286134123802185, "learning_rate": 8.124182713949158e-06, "loss": 0.3443, "step": 17540 }, { "epoch": 1.7833468889792599, "grad_norm": 0.2829109728336334, "learning_rate": 8.123905626866085e-06, "loss": 0.3419, "step": 17541 }, { "epoch": 1.7834485563237088, "grad_norm": 0.2792001962661743, "learning_rate": 8.123628524045515e-06, "loss": 0.3825, "step": 17542 }, { "epoch": 1.7835502236681577, "grad_norm": 0.28434064984321594, "learning_rate": 8.123351405488848e-06, "loss": 0.3326, "step": 17543 }, { "epoch": 1.7836518910126067, "grad_norm": 0.276195228099823, "learning_rate": 8.12307427119748e-06, "loss": 0.3733, "step": 17544 }, { "epoch": 1.7837535583570556, "grad_norm": 0.32134610414505005, "learning_rate": 8.122797121172804e-06, "loss": 0.3672, "step": 17545 }, { "epoch": 1.7838552257015046, "grad_norm": 0.28032413125038147, "learning_rate": 8.12251995541622e-06, "loss": 0.3515, "step": 17546 }, { "epoch": 1.7839568930459535, "grad_norm": 0.29075536131858826, "learning_rate": 8.122242773929121e-06, "loss": 0.3709, "step": 17547 }, { "epoch": 1.7840585603904024, "grad_norm": 0.3003481924533844, "learning_rate": 8.121965576712906e-06, "loss": 0.3512, "step": 17548 }, { "epoch": 1.7841602277348516, "grad_norm": 0.3088100254535675, "learning_rate": 8.121688363768971e-06, "loss": 0.3563, "step": 17549 }, { "epoch": 1.7842618950793006, "grad_norm": 0.2950851321220398, "learning_rate": 8.12141113509871e-06, "loss": 0.3873, "step": 17550 }, { "epoch": 1.7843635624237495, "grad_norm": 0.27086031436920166, "learning_rate": 8.121133890703523e-06, "loss": 0.3552, "step": 17551 }, { "epoch": 1.7844652297681984, "grad_norm": 0.2936359941959381, "learning_rate": 8.120856630584804e-06, "loss": 0.3432, "step": 17552 }, { "epoch": 1.7845668971126474, "grad_norm": 0.27306416630744934, "learning_rate": 8.120579354743952e-06, "loss": 0.3488, "step": 17553 }, { "epoch": 1.7846685644570965, "grad_norm": 0.2822171747684479, "learning_rate": 8.12030206318236e-06, "loss": 0.3585, "step": 17554 }, { "epoch": 1.7847702318015455, "grad_norm": 0.27318406105041504, "learning_rate": 8.120024755901432e-06, "loss": 0.3683, "step": 17555 }, { "epoch": 1.7848718991459944, "grad_norm": 0.29986828565597534, "learning_rate": 8.119747432902559e-06, "loss": 0.343, "step": 17556 }, { "epoch": 1.7849735664904434, "grad_norm": 0.2608938217163086, "learning_rate": 8.119470094187138e-06, "loss": 0.3671, "step": 17557 }, { "epoch": 1.7850752338348923, "grad_norm": 0.2853940725326538, "learning_rate": 8.11919273975657e-06, "loss": 0.382, "step": 17558 }, { "epoch": 1.7851769011793412, "grad_norm": 0.30378949642181396, "learning_rate": 8.11891536961225e-06, "loss": 0.3615, "step": 17559 }, { "epoch": 1.7852785685237902, "grad_norm": 0.26941561698913574, "learning_rate": 8.118637983755574e-06, "loss": 0.3546, "step": 17560 }, { "epoch": 1.7853802358682391, "grad_norm": 0.28358030319213867, "learning_rate": 8.118360582187943e-06, "loss": 0.3541, "step": 17561 }, { "epoch": 1.785481903212688, "grad_norm": 0.27663201093673706, "learning_rate": 8.11808316491075e-06, "loss": 0.3343, "step": 17562 }, { "epoch": 1.785583570557137, "grad_norm": 0.2896236181259155, "learning_rate": 8.117805731925397e-06, "loss": 0.3339, "step": 17563 }, { "epoch": 1.785685237901586, "grad_norm": 0.25459638237953186, "learning_rate": 8.11752828323328e-06, "loss": 0.334, "step": 17564 }, { "epoch": 1.785786905246035, "grad_norm": 0.29108208417892456, "learning_rate": 8.117250818835796e-06, "loss": 0.3766, "step": 17565 }, { "epoch": 1.7858885725904838, "grad_norm": 0.27717873454093933, "learning_rate": 8.116973338734342e-06, "loss": 0.3805, "step": 17566 }, { "epoch": 1.7859902399349328, "grad_norm": 0.2706206440925598, "learning_rate": 8.116695842930317e-06, "loss": 0.3603, "step": 17567 }, { "epoch": 1.7860919072793817, "grad_norm": 0.2613356113433838, "learning_rate": 8.11641833142512e-06, "loss": 0.3758, "step": 17568 }, { "epoch": 1.7861935746238307, "grad_norm": 0.26839494705200195, "learning_rate": 8.116140804220148e-06, "loss": 0.3544, "step": 17569 }, { "epoch": 1.7862952419682798, "grad_norm": 0.28380274772644043, "learning_rate": 8.115863261316798e-06, "loss": 0.3184, "step": 17570 }, { "epoch": 1.7863969093127288, "grad_norm": 0.2532217800617218, "learning_rate": 8.11558570271647e-06, "loss": 0.3555, "step": 17571 }, { "epoch": 1.7864985766571777, "grad_norm": 0.26374900341033936, "learning_rate": 8.115308128420561e-06, "loss": 0.3414, "step": 17572 }, { "epoch": 1.7866002440016266, "grad_norm": 0.28389742970466614, "learning_rate": 8.115030538430472e-06, "loss": 0.3461, "step": 17573 }, { "epoch": 1.7867019113460756, "grad_norm": 0.2919837534427643, "learning_rate": 8.114752932747599e-06, "loss": 0.3753, "step": 17574 }, { "epoch": 1.7868035786905248, "grad_norm": 0.2708270251750946, "learning_rate": 8.114475311373338e-06, "loss": 0.3338, "step": 17575 }, { "epoch": 1.7869052460349737, "grad_norm": 0.29364994168281555, "learning_rate": 8.114197674309092e-06, "loss": 0.3677, "step": 17576 }, { "epoch": 1.7870069133794226, "grad_norm": 0.2775510251522064, "learning_rate": 8.113920021556258e-06, "loss": 0.349, "step": 17577 }, { "epoch": 1.7871085807238716, "grad_norm": 0.27456989884376526, "learning_rate": 8.113642353116236e-06, "loss": 0.3585, "step": 17578 }, { "epoch": 1.7872102480683205, "grad_norm": 0.2868919372558594, "learning_rate": 8.113364668990424e-06, "loss": 0.3434, "step": 17579 }, { "epoch": 1.7873119154127695, "grad_norm": 0.274197518825531, "learning_rate": 8.11308696918022e-06, "loss": 0.3719, "step": 17580 }, { "epoch": 1.7874135827572184, "grad_norm": 0.2766059637069702, "learning_rate": 8.112809253687023e-06, "loss": 0.3976, "step": 17581 }, { "epoch": 1.7875152501016673, "grad_norm": 0.2787856459617615, "learning_rate": 8.112531522512233e-06, "loss": 0.3399, "step": 17582 }, { "epoch": 1.7876169174461163, "grad_norm": 0.27775290608406067, "learning_rate": 8.11225377565725e-06, "loss": 0.3428, "step": 17583 }, { "epoch": 1.7877185847905652, "grad_norm": 0.27776584029197693, "learning_rate": 8.11197601312347e-06, "loss": 0.348, "step": 17584 }, { "epoch": 1.7878202521350142, "grad_norm": 0.2832111716270447, "learning_rate": 8.111698234912296e-06, "loss": 0.3642, "step": 17585 }, { "epoch": 1.787921919479463, "grad_norm": 0.28552624583244324, "learning_rate": 8.111420441025124e-06, "loss": 0.3415, "step": 17586 }, { "epoch": 1.788023586823912, "grad_norm": 0.2772558033466339, "learning_rate": 8.111142631463358e-06, "loss": 0.3504, "step": 17587 }, { "epoch": 1.788125254168361, "grad_norm": 0.27169668674468994, "learning_rate": 8.110864806228394e-06, "loss": 0.3607, "step": 17588 }, { "epoch": 1.78822692151281, "grad_norm": 0.29181668162345886, "learning_rate": 8.11058696532163e-06, "loss": 0.3654, "step": 17589 }, { "epoch": 1.788328588857259, "grad_norm": 0.2879168391227722, "learning_rate": 8.11030910874447e-06, "loss": 0.3434, "step": 17590 }, { "epoch": 1.788430256201708, "grad_norm": 0.27197569608688354, "learning_rate": 8.11003123649831e-06, "loss": 0.3825, "step": 17591 }, { "epoch": 1.788531923546157, "grad_norm": 0.2746628522872925, "learning_rate": 8.109753348584554e-06, "loss": 0.369, "step": 17592 }, { "epoch": 1.788633590890606, "grad_norm": 0.2610449492931366, "learning_rate": 8.1094754450046e-06, "loss": 0.3443, "step": 17593 }, { "epoch": 1.7887352582350549, "grad_norm": 0.2768079936504364, "learning_rate": 8.109197525759846e-06, "loss": 0.3537, "step": 17594 }, { "epoch": 1.788836925579504, "grad_norm": 0.2683434784412384, "learning_rate": 8.108919590851693e-06, "loss": 0.3397, "step": 17595 }, { "epoch": 1.788938592923953, "grad_norm": 0.2784518599510193, "learning_rate": 8.108641640281544e-06, "loss": 0.3918, "step": 17596 }, { "epoch": 1.789040260268402, "grad_norm": 0.2884862720966339, "learning_rate": 8.108363674050797e-06, "loss": 0.3475, "step": 17597 }, { "epoch": 1.7891419276128508, "grad_norm": 0.26488977670669556, "learning_rate": 8.10808569216085e-06, "loss": 0.3851, "step": 17598 }, { "epoch": 1.7892435949572998, "grad_norm": 0.29825037717819214, "learning_rate": 8.107807694613109e-06, "loss": 0.3338, "step": 17599 }, { "epoch": 1.7893452623017487, "grad_norm": 0.2734663486480713, "learning_rate": 8.107529681408969e-06, "loss": 0.3545, "step": 17600 }, { "epoch": 1.7894469296461977, "grad_norm": 0.26872870326042175, "learning_rate": 8.107251652549834e-06, "loss": 0.3381, "step": 17601 }, { "epoch": 1.7895485969906466, "grad_norm": 0.2680809497833252, "learning_rate": 8.106973608037104e-06, "loss": 0.3862, "step": 17602 }, { "epoch": 1.7896502643350956, "grad_norm": 0.2825931906700134, "learning_rate": 8.106695547872177e-06, "loss": 0.3641, "step": 17603 }, { "epoch": 1.7897519316795445, "grad_norm": 0.27679452300071716, "learning_rate": 8.106417472056458e-06, "loss": 0.3434, "step": 17604 }, { "epoch": 1.7898535990239934, "grad_norm": 0.27007776498794556, "learning_rate": 8.106139380591346e-06, "loss": 0.3309, "step": 17605 }, { "epoch": 1.7899552663684424, "grad_norm": 0.2736372649669647, "learning_rate": 8.105861273478241e-06, "loss": 0.3362, "step": 17606 }, { "epoch": 1.7900569337128913, "grad_norm": 0.29907843470573425, "learning_rate": 8.105583150718545e-06, "loss": 0.3706, "step": 17607 }, { "epoch": 1.7901586010573403, "grad_norm": 0.28024572134017944, "learning_rate": 8.10530501231366e-06, "loss": 0.3559, "step": 17608 }, { "epoch": 1.7902602684017892, "grad_norm": 0.2547658681869507, "learning_rate": 8.105026858264986e-06, "loss": 0.3821, "step": 17609 }, { "epoch": 1.7903619357462381, "grad_norm": 0.28850749135017395, "learning_rate": 8.104748688573925e-06, "loss": 0.3551, "step": 17610 }, { "epoch": 1.7904636030906873, "grad_norm": 0.2828538119792938, "learning_rate": 8.104470503241877e-06, "loss": 0.3282, "step": 17611 }, { "epoch": 1.7905652704351362, "grad_norm": 0.28765517473220825, "learning_rate": 8.104192302270246e-06, "loss": 0.4204, "step": 17612 }, { "epoch": 1.7906669377795852, "grad_norm": 0.2877914309501648, "learning_rate": 8.103914085660429e-06, "loss": 0.396, "step": 17613 }, { "epoch": 1.7907686051240341, "grad_norm": 0.26913678646087646, "learning_rate": 8.103635853413833e-06, "loss": 0.349, "step": 17614 }, { "epoch": 1.790870272468483, "grad_norm": 0.2915576100349426, "learning_rate": 8.103357605531857e-06, "loss": 0.3888, "step": 17615 }, { "epoch": 1.7909719398129322, "grad_norm": 0.26778340339660645, "learning_rate": 8.1030793420159e-06, "loss": 0.3183, "step": 17616 }, { "epoch": 1.7910736071573812, "grad_norm": 0.2525674104690552, "learning_rate": 8.102801062867369e-06, "loss": 0.3733, "step": 17617 }, { "epoch": 1.7911752745018301, "grad_norm": 0.2815380096435547, "learning_rate": 8.102522768087663e-06, "loss": 0.3418, "step": 17618 }, { "epoch": 1.791276941846279, "grad_norm": 0.2738288342952728, "learning_rate": 8.102244457678185e-06, "loss": 0.3491, "step": 17619 }, { "epoch": 1.791378609190728, "grad_norm": 0.27614015340805054, "learning_rate": 8.101966131640338e-06, "loss": 0.3214, "step": 17620 }, { "epoch": 1.791480276535177, "grad_norm": 0.2747124433517456, "learning_rate": 8.10168778997552e-06, "loss": 0.3463, "step": 17621 }, { "epoch": 1.7915819438796259, "grad_norm": 0.27971842885017395, "learning_rate": 8.101409432685137e-06, "loss": 0.3414, "step": 17622 }, { "epoch": 1.7916836112240748, "grad_norm": 0.2864797115325928, "learning_rate": 8.101131059770591e-06, "loss": 0.3651, "step": 17623 }, { "epoch": 1.7917852785685238, "grad_norm": 0.2757561206817627, "learning_rate": 8.100852671233283e-06, "loss": 0.3301, "step": 17624 }, { "epoch": 1.7918869459129727, "grad_norm": 0.27486053109169006, "learning_rate": 8.100574267074617e-06, "loss": 0.3714, "step": 17625 }, { "epoch": 1.7919886132574216, "grad_norm": 0.31343817710876465, "learning_rate": 8.100295847295994e-06, "loss": 0.3999, "step": 17626 }, { "epoch": 1.7920902806018706, "grad_norm": 0.27748551964759827, "learning_rate": 8.100017411898817e-06, "loss": 0.39, "step": 17627 }, { "epoch": 1.7921919479463195, "grad_norm": 0.2869316339492798, "learning_rate": 8.09973896088449e-06, "loss": 0.3872, "step": 17628 }, { "epoch": 1.7922936152907685, "grad_norm": 0.27013328671455383, "learning_rate": 8.099460494254412e-06, "loss": 0.3637, "step": 17629 }, { "epoch": 1.7923952826352174, "grad_norm": 0.2816872298717499, "learning_rate": 8.099182012009991e-06, "loss": 0.3437, "step": 17630 }, { "epoch": 1.7924969499796666, "grad_norm": 0.31235915422439575, "learning_rate": 8.098903514152629e-06, "loss": 0.4359, "step": 17631 }, { "epoch": 1.7925986173241155, "grad_norm": 0.2838628888130188, "learning_rate": 8.098625000683724e-06, "loss": 0.3589, "step": 17632 }, { "epoch": 1.7927002846685645, "grad_norm": 0.26471754908561707, "learning_rate": 8.098346471604683e-06, "loss": 0.3774, "step": 17633 }, { "epoch": 1.7928019520130134, "grad_norm": 0.28600868582725525, "learning_rate": 8.09806792691691e-06, "loss": 0.3328, "step": 17634 }, { "epoch": 1.7929036193574623, "grad_norm": 0.3068191111087799, "learning_rate": 8.097789366621808e-06, "loss": 0.4063, "step": 17635 }, { "epoch": 1.7930052867019115, "grad_norm": 0.27002066373825073, "learning_rate": 8.097510790720778e-06, "loss": 0.3754, "step": 17636 }, { "epoch": 1.7931069540463604, "grad_norm": 0.2834111452102661, "learning_rate": 8.097232199215224e-06, "loss": 0.3789, "step": 17637 }, { "epoch": 1.7932086213908094, "grad_norm": 0.30339673161506653, "learning_rate": 8.09695359210655e-06, "loss": 0.3847, "step": 17638 }, { "epoch": 1.7933102887352583, "grad_norm": 0.28328680992126465, "learning_rate": 8.096674969396162e-06, "loss": 0.332, "step": 17639 }, { "epoch": 1.7934119560797073, "grad_norm": 0.2851046323776245, "learning_rate": 8.09639633108546e-06, "loss": 0.3305, "step": 17640 }, { "epoch": 1.7935136234241562, "grad_norm": 0.27539992332458496, "learning_rate": 8.096117677175852e-06, "loss": 0.3938, "step": 17641 }, { "epoch": 1.7936152907686052, "grad_norm": 0.27943921089172363, "learning_rate": 8.095839007668734e-06, "loss": 0.3568, "step": 17642 }, { "epoch": 1.793716958113054, "grad_norm": 0.2753530442714691, "learning_rate": 8.095560322565516e-06, "loss": 0.3277, "step": 17643 }, { "epoch": 1.793818625457503, "grad_norm": 0.26302385330200195, "learning_rate": 8.095281621867604e-06, "loss": 0.3635, "step": 17644 }, { "epoch": 1.793920292801952, "grad_norm": 0.26724785566329956, "learning_rate": 8.095002905576397e-06, "loss": 0.3473, "step": 17645 }, { "epoch": 1.794021960146401, "grad_norm": 0.31296414136886597, "learning_rate": 8.094724173693299e-06, "loss": 0.3729, "step": 17646 }, { "epoch": 1.7941236274908499, "grad_norm": 0.27628374099731445, "learning_rate": 8.094445426219716e-06, "loss": 0.3589, "step": 17647 }, { "epoch": 1.7942252948352988, "grad_norm": 0.25972500443458557, "learning_rate": 8.094166663157055e-06, "loss": 0.3471, "step": 17648 }, { "epoch": 1.7943269621797477, "grad_norm": 0.27608340978622437, "learning_rate": 8.093887884506717e-06, "loss": 0.3568, "step": 17649 }, { "epoch": 1.7944286295241967, "grad_norm": 0.2993727922439575, "learning_rate": 8.093609090270107e-06, "loss": 0.4082, "step": 17650 }, { "epoch": 1.7945302968686456, "grad_norm": 0.2891958951950073, "learning_rate": 8.093330280448627e-06, "loss": 0.3701, "step": 17651 }, { "epoch": 1.7946319642130948, "grad_norm": 0.28255388140678406, "learning_rate": 8.093051455043685e-06, "loss": 0.3413, "step": 17652 }, { "epoch": 1.7947336315575437, "grad_norm": 0.27937012910842896, "learning_rate": 8.092772614056686e-06, "loss": 0.3782, "step": 17653 }, { "epoch": 1.7948352989019927, "grad_norm": 0.28037768602371216, "learning_rate": 8.092493757489035e-06, "loss": 0.3752, "step": 17654 }, { "epoch": 1.7949369662464416, "grad_norm": 0.2740184962749481, "learning_rate": 8.092214885342133e-06, "loss": 0.3784, "step": 17655 }, { "epoch": 1.7950386335908906, "grad_norm": 0.2877614498138428, "learning_rate": 8.09193599761739e-06, "loss": 0.3593, "step": 17656 }, { "epoch": 1.7951403009353397, "grad_norm": 0.26234766840934753, "learning_rate": 8.091657094316204e-06, "loss": 0.3302, "step": 17657 }, { "epoch": 1.7952419682797887, "grad_norm": 0.27359211444854736, "learning_rate": 8.091378175439985e-06, "loss": 0.3746, "step": 17658 }, { "epoch": 1.7953436356242376, "grad_norm": 0.2605939209461212, "learning_rate": 8.09109924099014e-06, "loss": 0.3568, "step": 17659 }, { "epoch": 1.7954453029686865, "grad_norm": 0.28719621896743774, "learning_rate": 8.09082029096807e-06, "loss": 0.3634, "step": 17660 }, { "epoch": 1.7955469703131355, "grad_norm": 0.2908342480659485, "learning_rate": 8.090541325375183e-06, "loss": 0.385, "step": 17661 }, { "epoch": 1.7956486376575844, "grad_norm": 0.2664256989955902, "learning_rate": 8.09026234421288e-06, "loss": 0.3736, "step": 17662 }, { "epoch": 1.7957503050020334, "grad_norm": 0.2600212097167969, "learning_rate": 8.089983347482574e-06, "loss": 0.3297, "step": 17663 }, { "epoch": 1.7958519723464823, "grad_norm": 0.2675461173057556, "learning_rate": 8.089704335185665e-06, "loss": 0.3364, "step": 17664 }, { "epoch": 1.7959536396909312, "grad_norm": 0.2734503149986267, "learning_rate": 8.089425307323558e-06, "loss": 0.3796, "step": 17665 }, { "epoch": 1.7960553070353802, "grad_norm": 0.29189613461494446, "learning_rate": 8.089146263897661e-06, "loss": 0.3544, "step": 17666 }, { "epoch": 1.7961569743798291, "grad_norm": 0.27192890644073486, "learning_rate": 8.08886720490938e-06, "loss": 0.3464, "step": 17667 }, { "epoch": 1.796258641724278, "grad_norm": 0.28646960854530334, "learning_rate": 8.08858813036012e-06, "loss": 0.3406, "step": 17668 }, { "epoch": 1.796360309068727, "grad_norm": 0.2889789044857025, "learning_rate": 8.088309040251287e-06, "loss": 0.405, "step": 17669 }, { "epoch": 1.796461976413176, "grad_norm": 0.2626655697822571, "learning_rate": 8.088029934584286e-06, "loss": 0.333, "step": 17670 }, { "epoch": 1.796563643757625, "grad_norm": 0.26525118947029114, "learning_rate": 8.087750813360525e-06, "loss": 0.3821, "step": 17671 }, { "epoch": 1.796665311102074, "grad_norm": 0.27404502034187317, "learning_rate": 8.08747167658141e-06, "loss": 0.3598, "step": 17672 }, { "epoch": 1.796766978446523, "grad_norm": 0.3093450665473938, "learning_rate": 8.087192524248345e-06, "loss": 0.37, "step": 17673 }, { "epoch": 1.796868645790972, "grad_norm": 0.28001144528388977, "learning_rate": 8.086913356362737e-06, "loss": 0.3601, "step": 17674 }, { "epoch": 1.7969703131354209, "grad_norm": 0.28769394755363464, "learning_rate": 8.086634172925994e-06, "loss": 0.3904, "step": 17675 }, { "epoch": 1.7970719804798698, "grad_norm": 0.29704993963241577, "learning_rate": 8.086354973939522e-06, "loss": 0.3812, "step": 17676 }, { "epoch": 1.797173647824319, "grad_norm": 0.27695268392562866, "learning_rate": 8.086075759404726e-06, "loss": 0.3724, "step": 17677 }, { "epoch": 1.797275315168768, "grad_norm": 0.27268609404563904, "learning_rate": 8.085796529323014e-06, "loss": 0.335, "step": 17678 }, { "epoch": 1.7973769825132169, "grad_norm": 0.2724544405937195, "learning_rate": 8.085517283695794e-06, "loss": 0.3537, "step": 17679 }, { "epoch": 1.7974786498576658, "grad_norm": 0.2884314954280853, "learning_rate": 8.085238022524469e-06, "loss": 0.3636, "step": 17680 }, { "epoch": 1.7975803172021148, "grad_norm": 0.2769625782966614, "learning_rate": 8.08495874581045e-06, "loss": 0.3389, "step": 17681 }, { "epoch": 1.7976819845465637, "grad_norm": 0.2887183129787445, "learning_rate": 8.08467945355514e-06, "loss": 0.3313, "step": 17682 }, { "epoch": 1.7977836518910126, "grad_norm": 0.28213462233543396, "learning_rate": 8.084400145759947e-06, "loss": 0.3324, "step": 17683 }, { "epoch": 1.7978853192354616, "grad_norm": 0.26852887868881226, "learning_rate": 8.084120822426282e-06, "loss": 0.3622, "step": 17684 }, { "epoch": 1.7979869865799105, "grad_norm": 0.2616860568523407, "learning_rate": 8.083841483555547e-06, "loss": 0.3529, "step": 17685 }, { "epoch": 1.7980886539243595, "grad_norm": 0.29666808247566223, "learning_rate": 8.083562129149154e-06, "loss": 0.3621, "step": 17686 }, { "epoch": 1.7981903212688084, "grad_norm": 0.30250978469848633, "learning_rate": 8.083282759208506e-06, "loss": 0.3445, "step": 17687 }, { "epoch": 1.7982919886132573, "grad_norm": 0.296762079000473, "learning_rate": 8.083003373735013e-06, "loss": 0.3427, "step": 17688 }, { "epoch": 1.7983936559577063, "grad_norm": 0.2849757969379425, "learning_rate": 8.08272397273008e-06, "loss": 0.3479, "step": 17689 }, { "epoch": 1.7984953233021552, "grad_norm": 0.2949605882167816, "learning_rate": 8.082444556195118e-06, "loss": 0.3378, "step": 17690 }, { "epoch": 1.7985969906466042, "grad_norm": 0.30654048919677734, "learning_rate": 8.082165124131531e-06, "loss": 0.3426, "step": 17691 }, { "epoch": 1.7986986579910533, "grad_norm": 0.2899562120437622, "learning_rate": 8.081885676540728e-06, "loss": 0.3533, "step": 17692 }, { "epoch": 1.7988003253355023, "grad_norm": 0.28459036350250244, "learning_rate": 8.08160621342412e-06, "loss": 0.3754, "step": 17693 }, { "epoch": 1.7989019926799512, "grad_norm": 0.27035900950431824, "learning_rate": 8.08132673478311e-06, "loss": 0.3441, "step": 17694 }, { "epoch": 1.7990036600244002, "grad_norm": 0.2629634737968445, "learning_rate": 8.08104724061911e-06, "loss": 0.3829, "step": 17695 }, { "epoch": 1.799105327368849, "grad_norm": 0.28739622235298157, "learning_rate": 8.080767730933525e-06, "loss": 0.4011, "step": 17696 }, { "epoch": 1.799206994713298, "grad_norm": 0.2823467552661896, "learning_rate": 8.080488205727766e-06, "loss": 0.3335, "step": 17697 }, { "epoch": 1.7993086620577472, "grad_norm": 0.28105682134628296, "learning_rate": 8.080208665003237e-06, "loss": 0.3644, "step": 17698 }, { "epoch": 1.7994103294021961, "grad_norm": 0.28458356857299805, "learning_rate": 8.07992910876135e-06, "loss": 0.3801, "step": 17699 }, { "epoch": 1.799511996746645, "grad_norm": 0.2856563627719879, "learning_rate": 8.07964953700351e-06, "loss": 0.3902, "step": 17700 }, { "epoch": 1.799613664091094, "grad_norm": 0.2622195780277252, "learning_rate": 8.079369949731131e-06, "loss": 0.3291, "step": 17701 }, { "epoch": 1.799715331435543, "grad_norm": 0.29460641741752625, "learning_rate": 8.079090346945616e-06, "loss": 0.3469, "step": 17702 }, { "epoch": 1.799816998779992, "grad_norm": 0.2800463140010834, "learning_rate": 8.078810728648377e-06, "loss": 0.3631, "step": 17703 }, { "epoch": 1.7999186661244408, "grad_norm": 0.27976489067077637, "learning_rate": 8.07853109484082e-06, "loss": 0.3566, "step": 17704 }, { "epoch": 1.8000203334688898, "grad_norm": 0.25380316376686096, "learning_rate": 8.078251445524354e-06, "loss": 0.3469, "step": 17705 }, { "epoch": 1.8001220008133387, "grad_norm": 0.296804279088974, "learning_rate": 8.07797178070039e-06, "loss": 0.3433, "step": 17706 }, { "epoch": 1.8002236681577877, "grad_norm": 0.3013385832309723, "learning_rate": 8.077692100370335e-06, "loss": 0.3447, "step": 17707 }, { "epoch": 1.8003253355022366, "grad_norm": 0.2831592559814453, "learning_rate": 8.077412404535598e-06, "loss": 0.3988, "step": 17708 }, { "epoch": 1.8004270028466856, "grad_norm": 0.2923840582370758, "learning_rate": 8.07713269319759e-06, "loss": 0.3699, "step": 17709 }, { "epoch": 1.8005286701911345, "grad_norm": 0.29804688692092896, "learning_rate": 8.076852966357719e-06, "loss": 0.3585, "step": 17710 }, { "epoch": 1.8006303375355834, "grad_norm": 0.29876697063446045, "learning_rate": 8.076573224017391e-06, "loss": 0.3797, "step": 17711 }, { "epoch": 1.8007320048800324, "grad_norm": 0.28264662623405457, "learning_rate": 8.076293466178022e-06, "loss": 0.3633, "step": 17712 }, { "epoch": 1.8008336722244815, "grad_norm": 0.30675891041755676, "learning_rate": 8.076013692841015e-06, "loss": 0.3174, "step": 17713 }, { "epoch": 1.8009353395689305, "grad_norm": 0.2749154567718506, "learning_rate": 8.075733904007783e-06, "loss": 0.3564, "step": 17714 }, { "epoch": 1.8010370069133794, "grad_norm": 0.27683916687965393, "learning_rate": 8.075454099679735e-06, "loss": 0.3711, "step": 17715 }, { "epoch": 1.8011386742578284, "grad_norm": 0.2790522277355194, "learning_rate": 8.07517427985828e-06, "loss": 0.345, "step": 17716 }, { "epoch": 1.8012403416022773, "grad_norm": 0.27924227714538574, "learning_rate": 8.074894444544827e-06, "loss": 0.3834, "step": 17717 }, { "epoch": 1.8013420089467265, "grad_norm": 0.2849026620388031, "learning_rate": 8.074614593740786e-06, "loss": 0.3634, "step": 17718 }, { "epoch": 1.8014436762911754, "grad_norm": 0.29555028676986694, "learning_rate": 8.074334727447568e-06, "loss": 0.3737, "step": 17719 }, { "epoch": 1.8015453436356244, "grad_norm": 0.2960297465324402, "learning_rate": 8.074054845666582e-06, "loss": 0.371, "step": 17720 }, { "epoch": 1.8016470109800733, "grad_norm": 0.2672255337238312, "learning_rate": 8.07377494839924e-06, "loss": 0.3303, "step": 17721 }, { "epoch": 1.8017486783245222, "grad_norm": 0.2816237807273865, "learning_rate": 8.073495035646947e-06, "loss": 0.348, "step": 17722 }, { "epoch": 1.8018503456689712, "grad_norm": 0.2996600270271301, "learning_rate": 8.073215107411118e-06, "loss": 0.361, "step": 17723 }, { "epoch": 1.8019520130134201, "grad_norm": 0.2655590772628784, "learning_rate": 8.072935163693162e-06, "loss": 0.3473, "step": 17724 }, { "epoch": 1.802053680357869, "grad_norm": 0.2757731080055237, "learning_rate": 8.072655204494489e-06, "loss": 0.3789, "step": 17725 }, { "epoch": 1.802155347702318, "grad_norm": 0.2902708649635315, "learning_rate": 8.072375229816509e-06, "loss": 0.3798, "step": 17726 }, { "epoch": 1.802257015046767, "grad_norm": 0.27901527285575867, "learning_rate": 8.072095239660632e-06, "loss": 0.3641, "step": 17727 }, { "epoch": 1.8023586823912159, "grad_norm": 0.28927621245384216, "learning_rate": 8.071815234028268e-06, "loss": 0.3725, "step": 17728 }, { "epoch": 1.8024603497356648, "grad_norm": 0.3153776228427887, "learning_rate": 8.071535212920832e-06, "loss": 0.3377, "step": 17729 }, { "epoch": 1.8025620170801138, "grad_norm": 0.2773902118206024, "learning_rate": 8.071255176339729e-06, "loss": 0.3512, "step": 17730 }, { "epoch": 1.8026636844245627, "grad_norm": 0.2930152416229248, "learning_rate": 8.070975124286375e-06, "loss": 0.3644, "step": 17731 }, { "epoch": 1.8027653517690116, "grad_norm": 0.30034908652305603, "learning_rate": 8.070695056762177e-06, "loss": 0.3897, "step": 17732 }, { "epoch": 1.8028670191134608, "grad_norm": 0.28878283500671387, "learning_rate": 8.070414973768546e-06, "loss": 0.3734, "step": 17733 }, { "epoch": 1.8029686864579098, "grad_norm": 0.2921368479728699, "learning_rate": 8.070134875306894e-06, "loss": 0.3511, "step": 17734 }, { "epoch": 1.8030703538023587, "grad_norm": 0.2799437642097473, "learning_rate": 8.069854761378632e-06, "loss": 0.3584, "step": 17735 }, { "epoch": 1.8031720211468076, "grad_norm": 0.2577817738056183, "learning_rate": 8.069574631985172e-06, "loss": 0.3313, "step": 17736 }, { "epoch": 1.8032736884912566, "grad_norm": 0.2920793890953064, "learning_rate": 8.069294487127924e-06, "loss": 0.3834, "step": 17737 }, { "epoch": 1.8033753558357055, "grad_norm": 0.2664307653903961, "learning_rate": 8.0690143268083e-06, "loss": 0.3464, "step": 17738 }, { "epoch": 1.8034770231801547, "grad_norm": 0.3024786412715912, "learning_rate": 8.06873415102771e-06, "loss": 0.3998, "step": 17739 }, { "epoch": 1.8035786905246036, "grad_norm": 0.2636546790599823, "learning_rate": 8.068453959787568e-06, "loss": 0.3525, "step": 17740 }, { "epoch": 1.8036803578690526, "grad_norm": 0.27966076135635376, "learning_rate": 8.068173753089285e-06, "loss": 0.3716, "step": 17741 }, { "epoch": 1.8037820252135015, "grad_norm": 0.2899412512779236, "learning_rate": 8.06789353093427e-06, "loss": 0.3846, "step": 17742 }, { "epoch": 1.8038836925579504, "grad_norm": 0.27407628297805786, "learning_rate": 8.067613293323938e-06, "loss": 0.3496, "step": 17743 }, { "epoch": 1.8039853599023994, "grad_norm": 0.2604186236858368, "learning_rate": 8.067333040259698e-06, "loss": 0.3543, "step": 17744 }, { "epoch": 1.8040870272468483, "grad_norm": 0.2897993326187134, "learning_rate": 8.067052771742963e-06, "loss": 0.3575, "step": 17745 }, { "epoch": 1.8041886945912973, "grad_norm": 0.27587059140205383, "learning_rate": 8.066772487775145e-06, "loss": 0.3517, "step": 17746 }, { "epoch": 1.8042903619357462, "grad_norm": 0.27551910281181335, "learning_rate": 8.066492188357655e-06, "loss": 0.3492, "step": 17747 }, { "epoch": 1.8043920292801952, "grad_norm": 0.28006309270858765, "learning_rate": 8.066211873491908e-06, "loss": 0.362, "step": 17748 }, { "epoch": 1.804493696624644, "grad_norm": 0.2868136465549469, "learning_rate": 8.065931543179313e-06, "loss": 0.3644, "step": 17749 }, { "epoch": 1.804595363969093, "grad_norm": 0.2564800977706909, "learning_rate": 8.065651197421285e-06, "loss": 0.3373, "step": 17750 }, { "epoch": 1.804697031313542, "grad_norm": 0.2765505611896515, "learning_rate": 8.065370836219234e-06, "loss": 0.3769, "step": 17751 }, { "epoch": 1.804798698657991, "grad_norm": 0.2792983949184418, "learning_rate": 8.065090459574572e-06, "loss": 0.3648, "step": 17752 }, { "epoch": 1.8049003660024399, "grad_norm": 0.2820664048194885, "learning_rate": 8.064810067488714e-06, "loss": 0.3867, "step": 17753 }, { "epoch": 1.805002033346889, "grad_norm": 0.2733495235443115, "learning_rate": 8.06452965996307e-06, "loss": 0.3496, "step": 17754 }, { "epoch": 1.805103700691338, "grad_norm": 0.2764248847961426, "learning_rate": 8.064249236999055e-06, "loss": 0.3772, "step": 17755 }, { "epoch": 1.805205368035787, "grad_norm": 0.2934161126613617, "learning_rate": 8.063968798598079e-06, "loss": 0.3658, "step": 17756 }, { "epoch": 1.8053070353802358, "grad_norm": 0.2695944309234619, "learning_rate": 8.063688344761558e-06, "loss": 0.3414, "step": 17757 }, { "epoch": 1.8054087027246848, "grad_norm": 0.2952617108821869, "learning_rate": 8.063407875490902e-06, "loss": 0.3886, "step": 17758 }, { "epoch": 1.805510370069134, "grad_norm": 0.3028087913990021, "learning_rate": 8.063127390787527e-06, "loss": 0.3592, "step": 17759 }, { "epoch": 1.805612037413583, "grad_norm": 0.2956688106060028, "learning_rate": 8.062846890652841e-06, "loss": 0.3499, "step": 17760 }, { "epoch": 1.8057137047580318, "grad_norm": 0.27513396739959717, "learning_rate": 8.062566375088263e-06, "loss": 0.3598, "step": 17761 }, { "epoch": 1.8058153721024808, "grad_norm": 0.26805052161216736, "learning_rate": 8.062285844095201e-06, "loss": 0.3487, "step": 17762 }, { "epoch": 1.8059170394469297, "grad_norm": 0.3385004699230194, "learning_rate": 8.062005297675073e-06, "loss": 0.4091, "step": 17763 }, { "epoch": 1.8060187067913787, "grad_norm": 0.29979199171066284, "learning_rate": 8.061724735829288e-06, "loss": 0.3511, "step": 17764 }, { "epoch": 1.8061203741358276, "grad_norm": 0.28354158997535706, "learning_rate": 8.061444158559261e-06, "loss": 0.3511, "step": 17765 }, { "epoch": 1.8062220414802765, "grad_norm": 0.25924012064933777, "learning_rate": 8.061163565866409e-06, "loss": 0.4044, "step": 17766 }, { "epoch": 1.8063237088247255, "grad_norm": 0.2705894708633423, "learning_rate": 8.06088295775214e-06, "loss": 0.3753, "step": 17767 }, { "epoch": 1.8064253761691744, "grad_norm": 0.27765849232673645, "learning_rate": 8.06060233421787e-06, "loss": 0.36, "step": 17768 }, { "epoch": 1.8065270435136234, "grad_norm": 0.2755289673805237, "learning_rate": 8.060321695265015e-06, "loss": 0.3498, "step": 17769 }, { "epoch": 1.8066287108580723, "grad_norm": 0.2831696569919586, "learning_rate": 8.060041040894984e-06, "loss": 0.3635, "step": 17770 }, { "epoch": 1.8067303782025212, "grad_norm": 0.2658166289329529, "learning_rate": 8.059760371109194e-06, "loss": 0.3415, "step": 17771 }, { "epoch": 1.8068320455469702, "grad_norm": 0.25743696093559265, "learning_rate": 8.059479685909058e-06, "loss": 0.3413, "step": 17772 }, { "epoch": 1.8069337128914191, "grad_norm": 0.26276710629463196, "learning_rate": 8.05919898529599e-06, "loss": 0.362, "step": 17773 }, { "epoch": 1.8070353802358683, "grad_norm": 0.2422522008419037, "learning_rate": 8.058918269271407e-06, "loss": 0.3518, "step": 17774 }, { "epoch": 1.8071370475803172, "grad_norm": 0.2945030927658081, "learning_rate": 8.05863753783672e-06, "loss": 0.3855, "step": 17775 }, { "epoch": 1.8072387149247662, "grad_norm": 0.26440542936325073, "learning_rate": 8.058356790993343e-06, "loss": 0.3579, "step": 17776 }, { "epoch": 1.8073403822692151, "grad_norm": 0.2548571825027466, "learning_rate": 8.05807602874269e-06, "loss": 0.3243, "step": 17777 }, { "epoch": 1.807442049613664, "grad_norm": 0.2821008265018463, "learning_rate": 8.057795251086177e-06, "loss": 0.3346, "step": 17778 }, { "epoch": 1.807543716958113, "grad_norm": 0.28432562947273254, "learning_rate": 8.057514458025221e-06, "loss": 0.3318, "step": 17779 }, { "epoch": 1.8076453843025622, "grad_norm": 0.2658080756664276, "learning_rate": 8.05723364956123e-06, "loss": 0.3516, "step": 17780 }, { "epoch": 1.807747051647011, "grad_norm": 0.2682093381881714, "learning_rate": 8.056952825695625e-06, "loss": 0.3801, "step": 17781 }, { "epoch": 1.80784871899146, "grad_norm": 0.28161540627479553, "learning_rate": 8.056671986429818e-06, "loss": 0.335, "step": 17782 }, { "epoch": 1.807950386335909, "grad_norm": 0.27408307790756226, "learning_rate": 8.056391131765223e-06, "loss": 0.3789, "step": 17783 }, { "epoch": 1.808052053680358, "grad_norm": 0.29497793316841125, "learning_rate": 8.056110261703256e-06, "loss": 0.3566, "step": 17784 }, { "epoch": 1.8081537210248069, "grad_norm": 0.2622810900211334, "learning_rate": 8.055829376245333e-06, "loss": 0.3296, "step": 17785 }, { "epoch": 1.8082553883692558, "grad_norm": 0.26826441287994385, "learning_rate": 8.055548475392867e-06, "loss": 0.3761, "step": 17786 }, { "epoch": 1.8083570557137048, "grad_norm": 0.2868983745574951, "learning_rate": 8.055267559147272e-06, "loss": 0.3814, "step": 17787 }, { "epoch": 1.8084587230581537, "grad_norm": 0.28331276774406433, "learning_rate": 8.054986627509966e-06, "loss": 0.3979, "step": 17788 }, { "epoch": 1.8085603904026026, "grad_norm": 0.25999578833580017, "learning_rate": 8.054705680482365e-06, "loss": 0.3735, "step": 17789 }, { "epoch": 1.8086620577470516, "grad_norm": 0.28896522521972656, "learning_rate": 8.054424718065882e-06, "loss": 0.3808, "step": 17790 }, { "epoch": 1.8087637250915005, "grad_norm": 0.2784673571586609, "learning_rate": 8.054143740261933e-06, "loss": 0.3728, "step": 17791 }, { "epoch": 1.8088653924359495, "grad_norm": 0.26981258392333984, "learning_rate": 8.053862747071933e-06, "loss": 0.3772, "step": 17792 }, { "epoch": 1.8089670597803984, "grad_norm": 0.2700099050998688, "learning_rate": 8.053581738497298e-06, "loss": 0.3504, "step": 17793 }, { "epoch": 1.8090687271248473, "grad_norm": 0.2552802562713623, "learning_rate": 8.053300714539445e-06, "loss": 0.3377, "step": 17794 }, { "epoch": 1.8091703944692965, "grad_norm": 0.2922629415988922, "learning_rate": 8.053019675199787e-06, "loss": 0.3371, "step": 17795 }, { "epoch": 1.8092720618137454, "grad_norm": 0.27006569504737854, "learning_rate": 8.052738620479742e-06, "loss": 0.333, "step": 17796 }, { "epoch": 1.8093737291581944, "grad_norm": 0.28774523735046387, "learning_rate": 8.052457550380729e-06, "loss": 0.3911, "step": 17797 }, { "epoch": 1.8094753965026433, "grad_norm": 0.2769710123538971, "learning_rate": 8.052176464904155e-06, "loss": 0.3454, "step": 17798 }, { "epoch": 1.8095770638470923, "grad_norm": 0.27136608958244324, "learning_rate": 8.051895364051443e-06, "loss": 0.3552, "step": 17799 }, { "epoch": 1.8096787311915414, "grad_norm": 0.2870734632015228, "learning_rate": 8.051614247824006e-06, "loss": 0.3316, "step": 17800 }, { "epoch": 1.8097803985359904, "grad_norm": 0.27114033699035645, "learning_rate": 8.051333116223265e-06, "loss": 0.3556, "step": 17801 }, { "epoch": 1.8098820658804393, "grad_norm": 0.27309319376945496, "learning_rate": 8.05105196925063e-06, "loss": 0.3637, "step": 17802 }, { "epoch": 1.8099837332248883, "grad_norm": 0.2920272946357727, "learning_rate": 8.050770806907522e-06, "loss": 0.3425, "step": 17803 }, { "epoch": 1.8100854005693372, "grad_norm": 0.27528107166290283, "learning_rate": 8.050489629195355e-06, "loss": 0.3189, "step": 17804 }, { "epoch": 1.8101870679137861, "grad_norm": 0.2670007646083832, "learning_rate": 8.050208436115545e-06, "loss": 0.3414, "step": 17805 }, { "epoch": 1.810288735258235, "grad_norm": 0.2693277597427368, "learning_rate": 8.049927227669511e-06, "loss": 0.3898, "step": 17806 }, { "epoch": 1.810390402602684, "grad_norm": 0.27963903546333313, "learning_rate": 8.04964600385867e-06, "loss": 0.3539, "step": 17807 }, { "epoch": 1.810492069947133, "grad_norm": 0.2688065469264984, "learning_rate": 8.049364764684432e-06, "loss": 0.3513, "step": 17808 }, { "epoch": 1.810593737291582, "grad_norm": 0.2782614529132843, "learning_rate": 8.049083510148224e-06, "loss": 0.389, "step": 17809 }, { "epoch": 1.8106954046360308, "grad_norm": 0.2552974820137024, "learning_rate": 8.048802240251457e-06, "loss": 0.3163, "step": 17810 }, { "epoch": 1.8107970719804798, "grad_norm": 0.29765406250953674, "learning_rate": 8.048520954995547e-06, "loss": 0.3596, "step": 17811 }, { "epoch": 1.8108987393249287, "grad_norm": 0.2648915946483612, "learning_rate": 8.048239654381914e-06, "loss": 0.3624, "step": 17812 }, { "epoch": 1.8110004066693777, "grad_norm": 0.2679738402366638, "learning_rate": 8.047958338411975e-06, "loss": 0.4069, "step": 17813 }, { "epoch": 1.8111020740138266, "grad_norm": 0.2777928411960602, "learning_rate": 8.047677007087143e-06, "loss": 0.3662, "step": 17814 }, { "epoch": 1.8112037413582758, "grad_norm": 0.2836422920227051, "learning_rate": 8.047395660408841e-06, "loss": 0.3321, "step": 17815 }, { "epoch": 1.8113054087027247, "grad_norm": 0.3424239158630371, "learning_rate": 8.047114298378485e-06, "loss": 0.3765, "step": 17816 }, { "epoch": 1.8114070760471737, "grad_norm": 0.28026217222213745, "learning_rate": 8.046832920997489e-06, "loss": 0.3843, "step": 17817 }, { "epoch": 1.8115087433916226, "grad_norm": 0.2621987760066986, "learning_rate": 8.046551528267272e-06, "loss": 0.3568, "step": 17818 }, { "epoch": 1.8116104107360715, "grad_norm": 0.2717829644680023, "learning_rate": 8.046270120189255e-06, "loss": 0.3141, "step": 17819 }, { "epoch": 1.8117120780805205, "grad_norm": 0.2730671763420105, "learning_rate": 8.04598869676485e-06, "loss": 0.3672, "step": 17820 }, { "epoch": 1.8118137454249696, "grad_norm": 0.2747778594493866, "learning_rate": 8.04570725799548e-06, "loss": 0.3732, "step": 17821 }, { "epoch": 1.8119154127694186, "grad_norm": 0.2737196981906891, "learning_rate": 8.04542580388256e-06, "loss": 0.3483, "step": 17822 }, { "epoch": 1.8120170801138675, "grad_norm": 0.2675020098686218, "learning_rate": 8.04514433442751e-06, "loss": 0.3875, "step": 17823 }, { "epoch": 1.8121187474583165, "grad_norm": 0.29108819365501404, "learning_rate": 8.044862849631745e-06, "loss": 0.3363, "step": 17824 }, { "epoch": 1.8122204148027654, "grad_norm": 0.28849881887435913, "learning_rate": 8.044581349496684e-06, "loss": 0.3867, "step": 17825 }, { "epoch": 1.8123220821472144, "grad_norm": 0.2643357515335083, "learning_rate": 8.044299834023747e-06, "loss": 0.3673, "step": 17826 }, { "epoch": 1.8124237494916633, "grad_norm": 0.2569074034690857, "learning_rate": 8.04401830321435e-06, "loss": 0.3428, "step": 17827 }, { "epoch": 1.8125254168361122, "grad_norm": 0.26787006855010986, "learning_rate": 8.043736757069912e-06, "loss": 0.3549, "step": 17828 }, { "epoch": 1.8126270841805612, "grad_norm": 0.2790603041648865, "learning_rate": 8.043455195591851e-06, "loss": 0.3736, "step": 17829 }, { "epoch": 1.8127287515250101, "grad_norm": 0.26460322737693787, "learning_rate": 8.043173618781587e-06, "loss": 0.3477, "step": 17830 }, { "epoch": 1.812830418869459, "grad_norm": 0.29818710684776306, "learning_rate": 8.042892026640538e-06, "loss": 0.4142, "step": 17831 }, { "epoch": 1.812932086213908, "grad_norm": 0.2501280605792999, "learning_rate": 8.042610419170121e-06, "loss": 0.3583, "step": 17832 }, { "epoch": 1.813033753558357, "grad_norm": 0.2756044864654541, "learning_rate": 8.042328796371757e-06, "loss": 0.3676, "step": 17833 }, { "epoch": 1.8131354209028059, "grad_norm": 0.2760467827320099, "learning_rate": 8.042047158246861e-06, "loss": 0.3758, "step": 17834 }, { "epoch": 1.8132370882472548, "grad_norm": 0.28101930022239685, "learning_rate": 8.041765504796856e-06, "loss": 0.3622, "step": 17835 }, { "epoch": 1.813338755591704, "grad_norm": 0.26178213953971863, "learning_rate": 8.04148383602316e-06, "loss": 0.3497, "step": 17836 }, { "epoch": 1.813440422936153, "grad_norm": 0.24337440729141235, "learning_rate": 8.041202151927191e-06, "loss": 0.344, "step": 17837 }, { "epoch": 1.8135420902806019, "grad_norm": 0.2673787772655487, "learning_rate": 8.040920452510368e-06, "loss": 0.3503, "step": 17838 }, { "epoch": 1.8136437576250508, "grad_norm": 0.27771228551864624, "learning_rate": 8.04063873777411e-06, "loss": 0.346, "step": 17839 }, { "epoch": 1.8137454249694998, "grad_norm": 0.2735012173652649, "learning_rate": 8.040357007719837e-06, "loss": 0.3889, "step": 17840 }, { "epoch": 1.813847092313949, "grad_norm": 0.27137890458106995, "learning_rate": 8.040075262348968e-06, "loss": 0.3762, "step": 17841 }, { "epoch": 1.8139487596583979, "grad_norm": 0.2679233253002167, "learning_rate": 8.03979350166292e-06, "loss": 0.3325, "step": 17842 }, { "epoch": 1.8140504270028468, "grad_norm": 0.27777841687202454, "learning_rate": 8.039511725663117e-06, "loss": 0.3075, "step": 17843 }, { "epoch": 1.8141520943472957, "grad_norm": 0.2794532775878906, "learning_rate": 8.039229934350975e-06, "loss": 0.3475, "step": 17844 }, { "epoch": 1.8142537616917447, "grad_norm": 0.26516222953796387, "learning_rate": 8.038948127727916e-06, "loss": 0.3382, "step": 17845 }, { "epoch": 1.8143554290361936, "grad_norm": 0.27545464038848877, "learning_rate": 8.038666305795359e-06, "loss": 0.3528, "step": 17846 }, { "epoch": 1.8144570963806426, "grad_norm": 0.26288723945617676, "learning_rate": 8.038384468554722e-06, "loss": 0.3682, "step": 17847 }, { "epoch": 1.8145587637250915, "grad_norm": 0.27689552307128906, "learning_rate": 8.038102616007425e-06, "loss": 0.3472, "step": 17848 }, { "epoch": 1.8146604310695404, "grad_norm": 0.3009498715400696, "learning_rate": 8.03782074815489e-06, "loss": 0.3198, "step": 17849 }, { "epoch": 1.8147620984139894, "grad_norm": 0.28181272745132446, "learning_rate": 8.037538864998538e-06, "loss": 0.3617, "step": 17850 }, { "epoch": 1.8148637657584383, "grad_norm": 0.2602069079875946, "learning_rate": 8.037256966539785e-06, "loss": 0.3382, "step": 17851 }, { "epoch": 1.8149654331028873, "grad_norm": 0.26550737023353577, "learning_rate": 8.036975052780054e-06, "loss": 0.3559, "step": 17852 }, { "epoch": 1.8150671004473362, "grad_norm": 0.2941248416900635, "learning_rate": 8.036693123720764e-06, "loss": 0.367, "step": 17853 }, { "epoch": 1.8151687677917852, "grad_norm": 0.27183887362480164, "learning_rate": 8.036411179363335e-06, "loss": 0.3484, "step": 17854 }, { "epoch": 1.815270435136234, "grad_norm": 0.28636473417282104, "learning_rate": 8.036129219709188e-06, "loss": 0.3548, "step": 17855 }, { "epoch": 1.8153721024806833, "grad_norm": 0.26841607689857483, "learning_rate": 8.035847244759745e-06, "loss": 0.3368, "step": 17856 }, { "epoch": 1.8154737698251322, "grad_norm": 0.2634454667568207, "learning_rate": 8.035565254516423e-06, "loss": 0.3669, "step": 17857 }, { "epoch": 1.8155754371695811, "grad_norm": 0.2723906636238098, "learning_rate": 8.035283248980647e-06, "loss": 0.3178, "step": 17858 }, { "epoch": 1.81567710451403, "grad_norm": 0.2768501937389374, "learning_rate": 8.035001228153833e-06, "loss": 0.3928, "step": 17859 }, { "epoch": 1.815778771858479, "grad_norm": 0.2569775879383087, "learning_rate": 8.034719192037405e-06, "loss": 0.3511, "step": 17860 }, { "epoch": 1.815880439202928, "grad_norm": 0.2789634168148041, "learning_rate": 8.034437140632784e-06, "loss": 0.3422, "step": 17861 }, { "epoch": 1.8159821065473771, "grad_norm": 0.2716583013534546, "learning_rate": 8.034155073941387e-06, "loss": 0.3754, "step": 17862 }, { "epoch": 1.816083773891826, "grad_norm": 0.2841648757457733, "learning_rate": 8.033872991964641e-06, "loss": 0.3473, "step": 17863 }, { "epoch": 1.816185441236275, "grad_norm": 0.276699036359787, "learning_rate": 8.033590894703961e-06, "loss": 0.377, "step": 17864 }, { "epoch": 1.816287108580724, "grad_norm": 0.2843618392944336, "learning_rate": 8.033308782160774e-06, "loss": 0.3555, "step": 17865 }, { "epoch": 1.816388775925173, "grad_norm": 0.26950517296791077, "learning_rate": 8.033026654336496e-06, "loss": 0.3424, "step": 17866 }, { "epoch": 1.8164904432696218, "grad_norm": 0.28112977743148804, "learning_rate": 8.032744511232552e-06, "loss": 0.3641, "step": 17867 }, { "epoch": 1.8165921106140708, "grad_norm": 0.27455398440361023, "learning_rate": 8.03246235285036e-06, "loss": 0.3461, "step": 17868 }, { "epoch": 1.8166937779585197, "grad_norm": 0.2491142898797989, "learning_rate": 8.032180179191342e-06, "loss": 0.3687, "step": 17869 }, { "epoch": 1.8167954453029687, "grad_norm": 0.2534092366695404, "learning_rate": 8.031897990256924e-06, "loss": 0.3809, "step": 17870 }, { "epoch": 1.8168971126474176, "grad_norm": 0.24972720444202423, "learning_rate": 8.031615786048524e-06, "loss": 0.3557, "step": 17871 }, { "epoch": 1.8169987799918665, "grad_norm": 0.28263863921165466, "learning_rate": 8.031333566567563e-06, "loss": 0.3677, "step": 17872 }, { "epoch": 1.8171004473363155, "grad_norm": 0.2539368271827698, "learning_rate": 8.031051331815465e-06, "loss": 0.3422, "step": 17873 }, { "epoch": 1.8172021146807644, "grad_norm": 0.2776089906692505, "learning_rate": 8.030769081793648e-06, "loss": 0.4081, "step": 17874 }, { "epoch": 1.8173037820252134, "grad_norm": 0.2630215883255005, "learning_rate": 8.030486816503539e-06, "loss": 0.3595, "step": 17875 }, { "epoch": 1.8174054493696623, "grad_norm": 0.2711470127105713, "learning_rate": 8.030204535946557e-06, "loss": 0.355, "step": 17876 }, { "epoch": 1.8175071167141115, "grad_norm": 0.2668159306049347, "learning_rate": 8.029922240124126e-06, "loss": 0.3469, "step": 17877 }, { "epoch": 1.8176087840585604, "grad_norm": 0.2805328071117401, "learning_rate": 8.029639929037665e-06, "loss": 0.3765, "step": 17878 }, { "epoch": 1.8177104514030094, "grad_norm": 0.25206926465034485, "learning_rate": 8.029357602688598e-06, "loss": 0.3563, "step": 17879 }, { "epoch": 1.8178121187474583, "grad_norm": 0.2675440013408661, "learning_rate": 8.029075261078347e-06, "loss": 0.3513, "step": 17880 }, { "epoch": 1.8179137860919072, "grad_norm": 0.28148090839385986, "learning_rate": 8.028792904208335e-06, "loss": 0.3597, "step": 17881 }, { "epoch": 1.8180154534363564, "grad_norm": 0.27759993076324463, "learning_rate": 8.028510532079985e-06, "loss": 0.336, "step": 17882 }, { "epoch": 1.8181171207808053, "grad_norm": 0.27529358863830566, "learning_rate": 8.028228144694719e-06, "loss": 0.361, "step": 17883 }, { "epoch": 1.8182187881252543, "grad_norm": 0.2673918306827545, "learning_rate": 8.027945742053957e-06, "loss": 0.346, "step": 17884 }, { "epoch": 1.8183204554697032, "grad_norm": 0.2720523476600647, "learning_rate": 8.027663324159126e-06, "loss": 0.3149, "step": 17885 }, { "epoch": 1.8184221228141522, "grad_norm": 0.2722933292388916, "learning_rate": 8.027380891011647e-06, "loss": 0.3397, "step": 17886 }, { "epoch": 1.818523790158601, "grad_norm": 0.27185899019241333, "learning_rate": 8.02709844261294e-06, "loss": 0.3871, "step": 17887 }, { "epoch": 1.81862545750305, "grad_norm": 0.25080668926239014, "learning_rate": 8.026815978964433e-06, "loss": 0.3373, "step": 17888 }, { "epoch": 1.818727124847499, "grad_norm": 0.28684377670288086, "learning_rate": 8.026533500067544e-06, "loss": 0.3833, "step": 17889 }, { "epoch": 1.818828792191948, "grad_norm": 0.29178130626678467, "learning_rate": 8.0262510059237e-06, "loss": 0.3902, "step": 17890 }, { "epoch": 1.8189304595363969, "grad_norm": 0.2684820592403412, "learning_rate": 8.025968496534322e-06, "loss": 0.3306, "step": 17891 }, { "epoch": 1.8190321268808458, "grad_norm": 0.2651631236076355, "learning_rate": 8.025685971900834e-06, "loss": 0.3587, "step": 17892 }, { "epoch": 1.8191337942252948, "grad_norm": 0.3079184889793396, "learning_rate": 8.025403432024658e-06, "loss": 0.3934, "step": 17893 }, { "epoch": 1.8192354615697437, "grad_norm": 0.3118070960044861, "learning_rate": 8.02512087690722e-06, "loss": 0.3874, "step": 17894 }, { "epoch": 1.8193371289141926, "grad_norm": 0.26803070306777954, "learning_rate": 8.024838306549942e-06, "loss": 0.3668, "step": 17895 }, { "epoch": 1.8194387962586416, "grad_norm": 0.2743052840232849, "learning_rate": 8.024555720954248e-06, "loss": 0.3687, "step": 17896 }, { "epoch": 1.8195404636030907, "grad_norm": 0.28140804171562195, "learning_rate": 8.024273120121559e-06, "loss": 0.3729, "step": 17897 }, { "epoch": 1.8196421309475397, "grad_norm": 0.27318209409713745, "learning_rate": 8.023990504053302e-06, "loss": 0.3761, "step": 17898 }, { "epoch": 1.8197437982919886, "grad_norm": 0.2699766457080841, "learning_rate": 8.023707872750897e-06, "loss": 0.3435, "step": 17899 }, { "epoch": 1.8198454656364376, "grad_norm": 0.27951350808143616, "learning_rate": 8.023425226215773e-06, "loss": 0.3612, "step": 17900 }, { "epoch": 1.8199471329808865, "grad_norm": 0.28794005513191223, "learning_rate": 8.02314256444935e-06, "loss": 0.3704, "step": 17901 }, { "epoch": 1.8200488003253354, "grad_norm": 0.2951611280441284, "learning_rate": 8.022859887453055e-06, "loss": 0.3564, "step": 17902 }, { "epoch": 1.8201504676697846, "grad_norm": 0.2785404920578003, "learning_rate": 8.02257719522831e-06, "loss": 0.345, "step": 17903 }, { "epoch": 1.8202521350142336, "grad_norm": 0.2861516773700714, "learning_rate": 8.022294487776536e-06, "loss": 0.3877, "step": 17904 }, { "epoch": 1.8203538023586825, "grad_norm": 0.24975703656673431, "learning_rate": 8.022011765099163e-06, "loss": 0.325, "step": 17905 }, { "epoch": 1.8204554697031314, "grad_norm": 0.28161245584487915, "learning_rate": 8.021729027197614e-06, "loss": 0.341, "step": 17906 }, { "epoch": 1.8205571370475804, "grad_norm": 0.28569597005844116, "learning_rate": 8.02144627407331e-06, "loss": 0.3677, "step": 17907 }, { "epoch": 1.8206588043920293, "grad_norm": 0.28850364685058594, "learning_rate": 8.021163505727678e-06, "loss": 0.3638, "step": 17908 }, { "epoch": 1.8207604717364783, "grad_norm": 0.27774256467819214, "learning_rate": 8.020880722162144e-06, "loss": 0.3638, "step": 17909 }, { "epoch": 1.8208621390809272, "grad_norm": 0.27732235193252563, "learning_rate": 8.020597923378128e-06, "loss": 0.3615, "step": 17910 }, { "epoch": 1.8209638064253761, "grad_norm": 0.2866485118865967, "learning_rate": 8.02031510937706e-06, "loss": 0.3613, "step": 17911 }, { "epoch": 1.821065473769825, "grad_norm": 0.2823662757873535, "learning_rate": 8.02003228016036e-06, "loss": 0.387, "step": 17912 }, { "epoch": 1.821167141114274, "grad_norm": 0.2869933545589447, "learning_rate": 8.019749435729458e-06, "loss": 0.3755, "step": 17913 }, { "epoch": 1.821268808458723, "grad_norm": 0.28272366523742676, "learning_rate": 8.019466576085775e-06, "loss": 0.3711, "step": 17914 }, { "epoch": 1.821370475803172, "grad_norm": 0.27638766169548035, "learning_rate": 8.019183701230735e-06, "loss": 0.3712, "step": 17915 }, { "epoch": 1.8214721431476208, "grad_norm": 0.27819639444351196, "learning_rate": 8.018900811165766e-06, "loss": 0.3302, "step": 17916 }, { "epoch": 1.8215738104920698, "grad_norm": 0.29432985186576843, "learning_rate": 8.018617905892292e-06, "loss": 0.3568, "step": 17917 }, { "epoch": 1.821675477836519, "grad_norm": 0.28553521633148193, "learning_rate": 8.01833498541174e-06, "loss": 0.3847, "step": 17918 }, { "epoch": 1.821777145180968, "grad_norm": 0.26597851514816284, "learning_rate": 8.01805204972553e-06, "loss": 0.3489, "step": 17919 }, { "epoch": 1.8218788125254168, "grad_norm": 0.2986414134502411, "learning_rate": 8.017769098835094e-06, "loss": 0.3238, "step": 17920 }, { "epoch": 1.8219804798698658, "grad_norm": 0.31404727697372437, "learning_rate": 8.017486132741854e-06, "loss": 0.3568, "step": 17921 }, { "epoch": 1.8220821472143147, "grad_norm": 0.2890923023223877, "learning_rate": 8.017203151447234e-06, "loss": 0.3353, "step": 17922 }, { "epoch": 1.8221838145587639, "grad_norm": 0.27655714750289917, "learning_rate": 8.016920154952666e-06, "loss": 0.3206, "step": 17923 }, { "epoch": 1.8222854819032128, "grad_norm": 0.28512895107269287, "learning_rate": 8.016637143259567e-06, "loss": 0.3817, "step": 17924 }, { "epoch": 1.8223871492476618, "grad_norm": 0.2858106195926666, "learning_rate": 8.016354116369368e-06, "loss": 0.372, "step": 17925 }, { "epoch": 1.8224888165921107, "grad_norm": 0.28460216522216797, "learning_rate": 8.016071074283494e-06, "loss": 0.3471, "step": 17926 }, { "epoch": 1.8225904839365596, "grad_norm": 0.289376437664032, "learning_rate": 8.015788017003369e-06, "loss": 0.3831, "step": 17927 }, { "epoch": 1.8226921512810086, "grad_norm": 0.29362544417381287, "learning_rate": 8.015504944530423e-06, "loss": 0.3964, "step": 17928 }, { "epoch": 1.8227938186254575, "grad_norm": 0.28275033831596375, "learning_rate": 8.01522185686608e-06, "loss": 0.3595, "step": 17929 }, { "epoch": 1.8228954859699065, "grad_norm": 0.2675510048866272, "learning_rate": 8.014938754011764e-06, "loss": 0.3657, "step": 17930 }, { "epoch": 1.8229971533143554, "grad_norm": 0.2974469065666199, "learning_rate": 8.014655635968905e-06, "loss": 0.3577, "step": 17931 }, { "epoch": 1.8230988206588044, "grad_norm": 0.2893132269382477, "learning_rate": 8.014372502738926e-06, "loss": 0.3562, "step": 17932 }, { "epoch": 1.8232004880032533, "grad_norm": 0.2658003270626068, "learning_rate": 8.014089354323253e-06, "loss": 0.3573, "step": 17933 }, { "epoch": 1.8233021553477022, "grad_norm": 0.2987016439437866, "learning_rate": 8.013806190723316e-06, "loss": 0.3728, "step": 17934 }, { "epoch": 1.8234038226921512, "grad_norm": 0.26036345958709717, "learning_rate": 8.013523011940539e-06, "loss": 0.3503, "step": 17935 }, { "epoch": 1.8235054900366001, "grad_norm": 0.2642783224582672, "learning_rate": 8.013239817976351e-06, "loss": 0.3909, "step": 17936 }, { "epoch": 1.823607157381049, "grad_norm": 0.2766302227973938, "learning_rate": 8.012956608832174e-06, "loss": 0.3481, "step": 17937 }, { "epoch": 1.8237088247254982, "grad_norm": 0.27552446722984314, "learning_rate": 8.012673384509439e-06, "loss": 0.362, "step": 17938 }, { "epoch": 1.8238104920699472, "grad_norm": 0.2621472179889679, "learning_rate": 8.012390145009571e-06, "loss": 0.3791, "step": 17939 }, { "epoch": 1.823912159414396, "grad_norm": 0.259034126996994, "learning_rate": 8.012106890333999e-06, "loss": 0.3446, "step": 17940 }, { "epoch": 1.824013826758845, "grad_norm": 0.2938830256462097, "learning_rate": 8.011823620484148e-06, "loss": 0.3649, "step": 17941 }, { "epoch": 1.824115494103294, "grad_norm": 0.31998318433761597, "learning_rate": 8.011540335461443e-06, "loss": 0.3864, "step": 17942 }, { "epoch": 1.824217161447743, "grad_norm": 0.27342015504837036, "learning_rate": 8.011257035267314e-06, "loss": 0.3689, "step": 17943 }, { "epoch": 1.824318828792192, "grad_norm": 0.26188039779663086, "learning_rate": 8.01097371990319e-06, "loss": 0.3498, "step": 17944 }, { "epoch": 1.824420496136641, "grad_norm": 0.25845327973365784, "learning_rate": 8.010690389370494e-06, "loss": 0.3723, "step": 17945 }, { "epoch": 1.82452216348109, "grad_norm": 0.28992050886154175, "learning_rate": 8.010407043670655e-06, "loss": 0.3578, "step": 17946 }, { "epoch": 1.824623830825539, "grad_norm": 0.2916232943534851, "learning_rate": 8.010123682805101e-06, "loss": 0.3355, "step": 17947 }, { "epoch": 1.8247254981699879, "grad_norm": 0.28556063771247864, "learning_rate": 8.00984030677526e-06, "loss": 0.3707, "step": 17948 }, { "epoch": 1.8248271655144368, "grad_norm": 0.2647605240345001, "learning_rate": 8.00955691558256e-06, "loss": 0.3448, "step": 17949 }, { "epoch": 1.8249288328588857, "grad_norm": 0.2750565707683563, "learning_rate": 8.009273509228423e-06, "loss": 0.3688, "step": 17950 }, { "epoch": 1.8250305002033347, "grad_norm": 0.2846059501171112, "learning_rate": 8.008990087714285e-06, "loss": 0.3505, "step": 17951 }, { "epoch": 1.8251321675477836, "grad_norm": 0.2889816164970398, "learning_rate": 8.00870665104157e-06, "loss": 0.3537, "step": 17952 }, { "epoch": 1.8252338348922326, "grad_norm": 0.24327202141284943, "learning_rate": 8.008423199211703e-06, "loss": 0.371, "step": 17953 }, { "epoch": 1.8253355022366815, "grad_norm": 0.29135870933532715, "learning_rate": 8.008139732226117e-06, "loss": 0.3351, "step": 17954 }, { "epoch": 1.8254371695811304, "grad_norm": 0.2584019601345062, "learning_rate": 8.007856250086237e-06, "loss": 0.3344, "step": 17955 }, { "epoch": 1.8255388369255794, "grad_norm": 0.27992114424705505, "learning_rate": 8.007572752793492e-06, "loss": 0.3321, "step": 17956 }, { "epoch": 1.8256405042700283, "grad_norm": 0.2898850440979004, "learning_rate": 8.007289240349312e-06, "loss": 0.4189, "step": 17957 }, { "epoch": 1.8257421716144773, "grad_norm": 0.24884340167045593, "learning_rate": 8.007005712755123e-06, "loss": 0.3483, "step": 17958 }, { "epoch": 1.8258438389589264, "grad_norm": 0.2792447507381439, "learning_rate": 8.006722170012351e-06, "loss": 0.3621, "step": 17959 }, { "epoch": 1.8259455063033754, "grad_norm": 0.27937668561935425, "learning_rate": 8.00643861212243e-06, "loss": 0.3359, "step": 17960 }, { "epoch": 1.8260471736478243, "grad_norm": 0.26682960987091064, "learning_rate": 8.006155039086784e-06, "loss": 0.3574, "step": 17961 }, { "epoch": 1.8261488409922733, "grad_norm": 0.26750364899635315, "learning_rate": 8.005871450906844e-06, "loss": 0.3652, "step": 17962 }, { "epoch": 1.8262505083367222, "grad_norm": 0.26413604617118835, "learning_rate": 8.005587847584038e-06, "loss": 0.3457, "step": 17963 }, { "epoch": 1.8263521756811714, "grad_norm": 0.2567245364189148, "learning_rate": 8.005304229119795e-06, "loss": 0.3605, "step": 17964 }, { "epoch": 1.8264538430256203, "grad_norm": 0.26935330033302307, "learning_rate": 8.005020595515543e-06, "loss": 0.3534, "step": 17965 }, { "epoch": 1.8265555103700692, "grad_norm": 0.2899347245693207, "learning_rate": 8.004736946772711e-06, "loss": 0.3832, "step": 17966 }, { "epoch": 1.8266571777145182, "grad_norm": 0.27521029114723206, "learning_rate": 8.00445328289273e-06, "loss": 0.3541, "step": 17967 }, { "epoch": 1.8267588450589671, "grad_norm": 0.27077221870422363, "learning_rate": 8.004169603877026e-06, "loss": 0.3609, "step": 17968 }, { "epoch": 1.826860512403416, "grad_norm": 0.2777841091156006, "learning_rate": 8.00388590972703e-06, "loss": 0.3251, "step": 17969 }, { "epoch": 1.826962179747865, "grad_norm": 0.268733412027359, "learning_rate": 8.003602200444168e-06, "loss": 0.3506, "step": 17970 }, { "epoch": 1.827063847092314, "grad_norm": 0.28655827045440674, "learning_rate": 8.003318476029875e-06, "loss": 0.3488, "step": 17971 }, { "epoch": 1.827165514436763, "grad_norm": 0.2828577756881714, "learning_rate": 8.003034736485577e-06, "loss": 0.3873, "step": 17972 }, { "epoch": 1.8272671817812118, "grad_norm": 0.2561606466770172, "learning_rate": 8.002750981812702e-06, "loss": 0.3264, "step": 17973 }, { "epoch": 1.8273688491256608, "grad_norm": 0.27247166633605957, "learning_rate": 8.002467212012682e-06, "loss": 0.3405, "step": 17974 }, { "epoch": 1.8274705164701097, "grad_norm": 0.2710098326206207, "learning_rate": 8.002183427086944e-06, "loss": 0.3666, "step": 17975 }, { "epoch": 1.8275721838145587, "grad_norm": 0.26175054907798767, "learning_rate": 8.001899627036921e-06, "loss": 0.3659, "step": 17976 }, { "epoch": 1.8276738511590076, "grad_norm": 0.26852333545684814, "learning_rate": 8.001615811864044e-06, "loss": 0.394, "step": 17977 }, { "epoch": 1.8277755185034565, "grad_norm": 0.2744646370410919, "learning_rate": 8.001331981569735e-06, "loss": 0.3779, "step": 17978 }, { "epoch": 1.8278771858479057, "grad_norm": 0.2753269374370575, "learning_rate": 8.001048136155432e-06, "loss": 0.3539, "step": 17979 }, { "epoch": 1.8279788531923546, "grad_norm": 0.2990582585334778, "learning_rate": 8.000764275622559e-06, "loss": 0.3436, "step": 17980 }, { "epoch": 1.8280805205368036, "grad_norm": 0.28940388560295105, "learning_rate": 8.00048039997255e-06, "loss": 0.3826, "step": 17981 }, { "epoch": 1.8281821878812525, "grad_norm": 0.27814191579818726, "learning_rate": 8.000196509206835e-06, "loss": 0.3403, "step": 17982 }, { "epoch": 1.8282838552257015, "grad_norm": 0.2513794004917145, "learning_rate": 7.99991260332684e-06, "loss": 0.3521, "step": 17983 }, { "epoch": 1.8283855225701506, "grad_norm": 0.29391738772392273, "learning_rate": 7.999628682334e-06, "loss": 0.3733, "step": 17984 }, { "epoch": 1.8284871899145996, "grad_norm": 0.2797911465167999, "learning_rate": 7.999344746229743e-06, "loss": 0.3476, "step": 17985 }, { "epoch": 1.8285888572590485, "grad_norm": 0.2550829350948334, "learning_rate": 7.9990607950155e-06, "loss": 0.3449, "step": 17986 }, { "epoch": 1.8286905246034975, "grad_norm": 0.2742222547531128, "learning_rate": 7.998776828692703e-06, "loss": 0.3664, "step": 17987 }, { "epoch": 1.8287921919479464, "grad_norm": 0.26010987162590027, "learning_rate": 7.99849284726278e-06, "loss": 0.3468, "step": 17988 }, { "epoch": 1.8288938592923953, "grad_norm": 0.27008751034736633, "learning_rate": 7.99820885072716e-06, "loss": 0.3961, "step": 17989 }, { "epoch": 1.8289955266368443, "grad_norm": 0.28898537158966064, "learning_rate": 7.99792483908728e-06, "loss": 0.3631, "step": 17990 }, { "epoch": 1.8290971939812932, "grad_norm": 0.2676798403263092, "learning_rate": 7.997640812344565e-06, "loss": 0.3582, "step": 17991 }, { "epoch": 1.8291988613257422, "grad_norm": 0.258886456489563, "learning_rate": 7.997356770500448e-06, "loss": 0.3456, "step": 17992 }, { "epoch": 1.829300528670191, "grad_norm": 0.278221994638443, "learning_rate": 7.99707271355636e-06, "loss": 0.3689, "step": 17993 }, { "epoch": 1.82940219601464, "grad_norm": 0.2826783359050751, "learning_rate": 7.996788641513733e-06, "loss": 0.3397, "step": 17994 }, { "epoch": 1.829503863359089, "grad_norm": 0.2868598699569702, "learning_rate": 7.996504554373995e-06, "loss": 0.3706, "step": 17995 }, { "epoch": 1.829605530703538, "grad_norm": 0.26483315229415894, "learning_rate": 7.99622045213858e-06, "loss": 0.3299, "step": 17996 }, { "epoch": 1.8297071980479869, "grad_norm": 0.27064642310142517, "learning_rate": 7.99593633480892e-06, "loss": 0.3318, "step": 17997 }, { "epoch": 1.8298088653924358, "grad_norm": 0.2841903567314148, "learning_rate": 7.995652202386443e-06, "loss": 0.3885, "step": 17998 }, { "epoch": 1.8299105327368848, "grad_norm": 0.2877318859100342, "learning_rate": 7.995368054872584e-06, "loss": 0.3563, "step": 17999 }, { "epoch": 1.830012200081334, "grad_norm": 0.25620171427726746, "learning_rate": 7.995083892268772e-06, "loss": 0.39, "step": 18000 }, { "epoch": 1.8301138674257829, "grad_norm": 0.2733658254146576, "learning_rate": 7.994799714576436e-06, "loss": 0.3546, "step": 18001 }, { "epoch": 1.8302155347702318, "grad_norm": 0.2791402339935303, "learning_rate": 7.994515521797012e-06, "loss": 0.3447, "step": 18002 }, { "epoch": 1.8303172021146807, "grad_norm": 0.2819608151912689, "learning_rate": 7.994231313931932e-06, "loss": 0.3967, "step": 18003 }, { "epoch": 1.8304188694591297, "grad_norm": 0.2393859177827835, "learning_rate": 7.993947090982626e-06, "loss": 0.3406, "step": 18004 }, { "epoch": 1.8305205368035788, "grad_norm": 0.2846309542655945, "learning_rate": 7.993662852950525e-06, "loss": 0.3785, "step": 18005 }, { "epoch": 1.8306222041480278, "grad_norm": 0.27676814794540405, "learning_rate": 7.993378599837063e-06, "loss": 0.3506, "step": 18006 }, { "epoch": 1.8307238714924767, "grad_norm": 0.2680177390575409, "learning_rate": 7.99309433164367e-06, "loss": 0.3428, "step": 18007 }, { "epoch": 1.8308255388369257, "grad_norm": 0.2644941210746765, "learning_rate": 7.99281004837178e-06, "loss": 0.3403, "step": 18008 }, { "epoch": 1.8309272061813746, "grad_norm": 0.2811431288719177, "learning_rate": 7.992525750022823e-06, "loss": 0.3723, "step": 18009 }, { "epoch": 1.8310288735258236, "grad_norm": 0.289745956659317, "learning_rate": 7.992241436598233e-06, "loss": 0.33, "step": 18010 }, { "epoch": 1.8311305408702725, "grad_norm": 0.25935089588165283, "learning_rate": 7.991957108099442e-06, "loss": 0.3894, "step": 18011 }, { "epoch": 1.8312322082147214, "grad_norm": 0.28038153052330017, "learning_rate": 7.991672764527881e-06, "loss": 0.3839, "step": 18012 }, { "epoch": 1.8313338755591704, "grad_norm": 0.29409104585647583, "learning_rate": 7.991388405884985e-06, "loss": 0.3402, "step": 18013 }, { "epoch": 1.8314355429036193, "grad_norm": 0.2689124345779419, "learning_rate": 7.991104032172183e-06, "loss": 0.3392, "step": 18014 }, { "epoch": 1.8315372102480683, "grad_norm": 0.2937428653240204, "learning_rate": 7.990819643390912e-06, "loss": 0.399, "step": 18015 }, { "epoch": 1.8316388775925172, "grad_norm": 0.2735884189605713, "learning_rate": 7.990535239542601e-06, "loss": 0.388, "step": 18016 }, { "epoch": 1.8317405449369661, "grad_norm": 0.2663764953613281, "learning_rate": 7.990250820628684e-06, "loss": 0.38, "step": 18017 }, { "epoch": 1.831842212281415, "grad_norm": 0.2926095426082611, "learning_rate": 7.989966386650596e-06, "loss": 0.3482, "step": 18018 }, { "epoch": 1.831943879625864, "grad_norm": 0.2560633718967438, "learning_rate": 7.989681937609765e-06, "loss": 0.3471, "step": 18019 }, { "epoch": 1.8320455469703132, "grad_norm": 0.2942519783973694, "learning_rate": 7.98939747350763e-06, "loss": 0.3532, "step": 18020 }, { "epoch": 1.8321472143147621, "grad_norm": 0.25907275080680847, "learning_rate": 7.989112994345618e-06, "loss": 0.365, "step": 18021 }, { "epoch": 1.832248881659211, "grad_norm": 0.29898229241371155, "learning_rate": 7.988828500125166e-06, "loss": 0.41, "step": 18022 }, { "epoch": 1.83235054900366, "grad_norm": 0.28219833970069885, "learning_rate": 7.988543990847708e-06, "loss": 0.3584, "step": 18023 }, { "epoch": 1.832452216348109, "grad_norm": 0.268880158662796, "learning_rate": 7.988259466514674e-06, "loss": 0.3713, "step": 18024 }, { "epoch": 1.8325538836925581, "grad_norm": 0.2680380344390869, "learning_rate": 7.987974927127498e-06, "loss": 0.3325, "step": 18025 }, { "epoch": 1.832655551037007, "grad_norm": 0.254884272813797, "learning_rate": 7.987690372687615e-06, "loss": 0.3501, "step": 18026 }, { "epoch": 1.832757218381456, "grad_norm": 0.3060966432094574, "learning_rate": 7.987405803196457e-06, "loss": 0.3507, "step": 18027 }, { "epoch": 1.832858885725905, "grad_norm": 0.2880839705467224, "learning_rate": 7.987121218655461e-06, "loss": 0.3406, "step": 18028 }, { "epoch": 1.8329605530703539, "grad_norm": 0.2713621258735657, "learning_rate": 7.986836619066057e-06, "loss": 0.3609, "step": 18029 }, { "epoch": 1.8330622204148028, "grad_norm": 0.2671704590320587, "learning_rate": 7.98655200442968e-06, "loss": 0.3451, "step": 18030 }, { "epoch": 1.8331638877592518, "grad_norm": 0.2830808460712433, "learning_rate": 7.986267374747762e-06, "loss": 0.366, "step": 18031 }, { "epoch": 1.8332655551037007, "grad_norm": 0.2896807789802551, "learning_rate": 7.985982730021741e-06, "loss": 0.3771, "step": 18032 }, { "epoch": 1.8333672224481496, "grad_norm": 0.2919365465641022, "learning_rate": 7.985698070253048e-06, "loss": 0.3591, "step": 18033 }, { "epoch": 1.8334688897925986, "grad_norm": 0.27924737334251404, "learning_rate": 7.985413395443117e-06, "loss": 0.3296, "step": 18034 }, { "epoch": 1.8335705571370475, "grad_norm": 0.2803104519844055, "learning_rate": 7.985128705593384e-06, "loss": 0.3491, "step": 18035 }, { "epoch": 1.8336722244814965, "grad_norm": 0.2836187481880188, "learning_rate": 7.98484400070528e-06, "loss": 0.3867, "step": 18036 }, { "epoch": 1.8337738918259454, "grad_norm": 0.30272963643074036, "learning_rate": 7.984559280780242e-06, "loss": 0.3676, "step": 18037 }, { "epoch": 1.8338755591703944, "grad_norm": 0.28221914172172546, "learning_rate": 7.984274545819703e-06, "loss": 0.3599, "step": 18038 }, { "epoch": 1.8339772265148433, "grad_norm": 0.2913881540298462, "learning_rate": 7.9839897958251e-06, "loss": 0.397, "step": 18039 }, { "epoch": 1.8340788938592922, "grad_norm": 0.2957814335823059, "learning_rate": 7.983705030797865e-06, "loss": 0.3472, "step": 18040 }, { "epoch": 1.8341805612037414, "grad_norm": 0.26674193143844604, "learning_rate": 7.98342025073943e-06, "loss": 0.3704, "step": 18041 }, { "epoch": 1.8342822285481903, "grad_norm": 0.2988957464694977, "learning_rate": 7.983135455651236e-06, "loss": 0.358, "step": 18042 }, { "epoch": 1.8343838958926393, "grad_norm": 0.2836105525493622, "learning_rate": 7.982850645534715e-06, "loss": 0.3419, "step": 18043 }, { "epoch": 1.8344855632370882, "grad_norm": 0.28979259729385376, "learning_rate": 7.9825658203913e-06, "loss": 0.3744, "step": 18044 }, { "epoch": 1.8345872305815372, "grad_norm": 0.27445003390312195, "learning_rate": 7.982280980222427e-06, "loss": 0.3288, "step": 18045 }, { "epoch": 1.8346888979259863, "grad_norm": 0.27136412262916565, "learning_rate": 7.98199612502953e-06, "loss": 0.3633, "step": 18046 }, { "epoch": 1.8347905652704353, "grad_norm": 0.26604488492012024, "learning_rate": 7.981711254814047e-06, "loss": 0.3916, "step": 18047 }, { "epoch": 1.8348922326148842, "grad_norm": 0.2768523693084717, "learning_rate": 7.981426369577411e-06, "loss": 0.3842, "step": 18048 }, { "epoch": 1.8349938999593332, "grad_norm": 0.27244555950164795, "learning_rate": 7.981141469321057e-06, "loss": 0.39, "step": 18049 }, { "epoch": 1.835095567303782, "grad_norm": 0.27879253029823303, "learning_rate": 7.98085655404642e-06, "loss": 0.3357, "step": 18050 }, { "epoch": 1.835197234648231, "grad_norm": 0.25281500816345215, "learning_rate": 7.980571623754937e-06, "loss": 0.3373, "step": 18051 }, { "epoch": 1.83529890199268, "grad_norm": 0.2828572690486908, "learning_rate": 7.980286678448043e-06, "loss": 0.3613, "step": 18052 }, { "epoch": 1.835400569337129, "grad_norm": 0.282160222530365, "learning_rate": 7.980001718127173e-06, "loss": 0.4026, "step": 18053 }, { "epoch": 1.8355022366815779, "grad_norm": 0.27646610140800476, "learning_rate": 7.97971674279376e-06, "loss": 0.3544, "step": 18054 }, { "epoch": 1.8356039040260268, "grad_norm": 0.2880265414714813, "learning_rate": 7.979431752449245e-06, "loss": 0.3589, "step": 18055 }, { "epoch": 1.8357055713704757, "grad_norm": 0.276178777217865, "learning_rate": 7.979146747095059e-06, "loss": 0.3683, "step": 18056 }, { "epoch": 1.8358072387149247, "grad_norm": 0.27255356311798096, "learning_rate": 7.978861726732641e-06, "loss": 0.3741, "step": 18057 }, { "epoch": 1.8359089060593736, "grad_norm": 0.27854007482528687, "learning_rate": 7.978576691363424e-06, "loss": 0.3646, "step": 18058 }, { "epoch": 1.8360105734038226, "grad_norm": 0.30321285128593445, "learning_rate": 7.978291640988847e-06, "loss": 0.3778, "step": 18059 }, { "epoch": 1.8361122407482715, "grad_norm": 0.2754313349723816, "learning_rate": 7.978006575610343e-06, "loss": 0.349, "step": 18060 }, { "epoch": 1.8362139080927207, "grad_norm": 0.273872047662735, "learning_rate": 7.977721495229352e-06, "loss": 0.3542, "step": 18061 }, { "epoch": 1.8363155754371696, "grad_norm": 0.272614449262619, "learning_rate": 7.977436399847304e-06, "loss": 0.3483, "step": 18062 }, { "epoch": 1.8364172427816186, "grad_norm": 0.2680436670780182, "learning_rate": 7.97715128946564e-06, "loss": 0.3593, "step": 18063 }, { "epoch": 1.8365189101260675, "grad_norm": 0.27592533826828003, "learning_rate": 7.976866164085798e-06, "loss": 0.3654, "step": 18064 }, { "epoch": 1.8366205774705164, "grad_norm": 0.2801904082298279, "learning_rate": 7.97658102370921e-06, "loss": 0.3469, "step": 18065 }, { "epoch": 1.8367222448149656, "grad_norm": 0.27400535345077515, "learning_rate": 7.976295868337313e-06, "loss": 0.3584, "step": 18066 }, { "epoch": 1.8368239121594145, "grad_norm": 0.2651316523551941, "learning_rate": 7.976010697971547e-06, "loss": 0.3483, "step": 18067 }, { "epoch": 1.8369255795038635, "grad_norm": 0.27015870809555054, "learning_rate": 7.975725512613344e-06, "loss": 0.3533, "step": 18068 }, { "epoch": 1.8370272468483124, "grad_norm": 0.27601155638694763, "learning_rate": 7.975440312264145e-06, "loss": 0.3434, "step": 18069 }, { "epoch": 1.8371289141927614, "grad_norm": 0.27448272705078125, "learning_rate": 7.975155096925384e-06, "loss": 0.3605, "step": 18070 }, { "epoch": 1.8372305815372103, "grad_norm": 0.2846493721008301, "learning_rate": 7.974869866598498e-06, "loss": 0.3393, "step": 18071 }, { "epoch": 1.8373322488816592, "grad_norm": 0.2743033468723297, "learning_rate": 7.974584621284926e-06, "loss": 0.3304, "step": 18072 }, { "epoch": 1.8374339162261082, "grad_norm": 0.2654605805873871, "learning_rate": 7.974299360986102e-06, "loss": 0.3406, "step": 18073 }, { "epoch": 1.8375355835705571, "grad_norm": 0.2776185870170593, "learning_rate": 7.974014085703465e-06, "loss": 0.3408, "step": 18074 }, { "epoch": 1.837637250915006, "grad_norm": 0.2744572162628174, "learning_rate": 7.973728795438454e-06, "loss": 0.36, "step": 18075 }, { "epoch": 1.837738918259455, "grad_norm": 0.26755452156066895, "learning_rate": 7.9734434901925e-06, "loss": 0.354, "step": 18076 }, { "epoch": 1.837840585603904, "grad_norm": 0.2606358826160431, "learning_rate": 7.973158169967046e-06, "loss": 0.3918, "step": 18077 }, { "epoch": 1.837942252948353, "grad_norm": 0.2859191596508026, "learning_rate": 7.972872834763529e-06, "loss": 0.325, "step": 18078 }, { "epoch": 1.8380439202928018, "grad_norm": 0.27755802869796753, "learning_rate": 7.972587484583383e-06, "loss": 0.3503, "step": 18079 }, { "epoch": 1.8381455876372508, "grad_norm": 0.28855040669441223, "learning_rate": 7.972302119428051e-06, "loss": 0.3736, "step": 18080 }, { "epoch": 1.8382472549816997, "grad_norm": 0.24808011949062347, "learning_rate": 7.972016739298964e-06, "loss": 0.3591, "step": 18081 }, { "epoch": 1.8383489223261489, "grad_norm": 0.26220861077308655, "learning_rate": 7.971731344197564e-06, "loss": 0.3372, "step": 18082 }, { "epoch": 1.8384505896705978, "grad_norm": 0.288442462682724, "learning_rate": 7.971445934125288e-06, "loss": 0.3568, "step": 18083 }, { "epoch": 1.8385522570150468, "grad_norm": 0.2704014778137207, "learning_rate": 7.971160509083572e-06, "loss": 0.3608, "step": 18084 }, { "epoch": 1.8386539243594957, "grad_norm": 0.25752437114715576, "learning_rate": 7.970875069073856e-06, "loss": 0.3492, "step": 18085 }, { "epoch": 1.8387555917039446, "grad_norm": 0.28673115372657776, "learning_rate": 7.970589614097578e-06, "loss": 0.3559, "step": 18086 }, { "epoch": 1.8388572590483938, "grad_norm": 0.2702634036540985, "learning_rate": 7.970304144156176e-06, "loss": 0.3372, "step": 18087 }, { "epoch": 1.8389589263928428, "grad_norm": 0.28566688299179077, "learning_rate": 7.970018659251087e-06, "loss": 0.3485, "step": 18088 }, { "epoch": 1.8390605937372917, "grad_norm": 0.3138720691204071, "learning_rate": 7.96973315938375e-06, "loss": 0.361, "step": 18089 }, { "epoch": 1.8391622610817406, "grad_norm": 0.2792051136493683, "learning_rate": 7.969447644555604e-06, "loss": 0.3691, "step": 18090 }, { "epoch": 1.8392639284261896, "grad_norm": 0.2637958228588104, "learning_rate": 7.969162114768083e-06, "loss": 0.3314, "step": 18091 }, { "epoch": 1.8393655957706385, "grad_norm": 0.27450400590896606, "learning_rate": 7.968876570022631e-06, "loss": 0.3591, "step": 18092 }, { "epoch": 1.8394672631150875, "grad_norm": 0.2648088037967682, "learning_rate": 7.968591010320685e-06, "loss": 0.3539, "step": 18093 }, { "epoch": 1.8395689304595364, "grad_norm": 0.274168998003006, "learning_rate": 7.968305435663682e-06, "loss": 0.3876, "step": 18094 }, { "epoch": 1.8396705978039853, "grad_norm": 0.2780781686306, "learning_rate": 7.968019846053063e-06, "loss": 0.3729, "step": 18095 }, { "epoch": 1.8397722651484343, "grad_norm": 0.26954159140586853, "learning_rate": 7.967734241490263e-06, "loss": 0.3711, "step": 18096 }, { "epoch": 1.8398739324928832, "grad_norm": 0.2735937237739563, "learning_rate": 7.967448621976726e-06, "loss": 0.3304, "step": 18097 }, { "epoch": 1.8399755998373322, "grad_norm": 0.255809485912323, "learning_rate": 7.967162987513885e-06, "loss": 0.3472, "step": 18098 }, { "epoch": 1.840077267181781, "grad_norm": 0.27320796251296997, "learning_rate": 7.966877338103184e-06, "loss": 0.3571, "step": 18099 }, { "epoch": 1.84017893452623, "grad_norm": 0.2754558026790619, "learning_rate": 7.96659167374606e-06, "loss": 0.3524, "step": 18100 }, { "epoch": 1.840280601870679, "grad_norm": 0.28809595108032227, "learning_rate": 7.966305994443952e-06, "loss": 0.3287, "step": 18101 }, { "epoch": 1.8403822692151282, "grad_norm": 0.259259968996048, "learning_rate": 7.966020300198298e-06, "loss": 0.3461, "step": 18102 }, { "epoch": 1.840483936559577, "grad_norm": 0.267223984003067, "learning_rate": 7.96573459101054e-06, "loss": 0.3613, "step": 18103 }, { "epoch": 1.840585603904026, "grad_norm": 0.270443320274353, "learning_rate": 7.965448866882114e-06, "loss": 0.3688, "step": 18104 }, { "epoch": 1.840687271248475, "grad_norm": 0.29320216178894043, "learning_rate": 7.965163127814463e-06, "loss": 0.3804, "step": 18105 }, { "epoch": 1.840788938592924, "grad_norm": 0.2736354470252991, "learning_rate": 7.964877373809024e-06, "loss": 0.3556, "step": 18106 }, { "epoch": 1.840890605937373, "grad_norm": 0.25425904989242554, "learning_rate": 7.964591604867238e-06, "loss": 0.3924, "step": 18107 }, { "epoch": 1.840992273281822, "grad_norm": 0.27431797981262207, "learning_rate": 7.964305820990543e-06, "loss": 0.3175, "step": 18108 }, { "epoch": 1.841093940626271, "grad_norm": 0.27399328351020813, "learning_rate": 7.964020022180381e-06, "loss": 0.3455, "step": 18109 }, { "epoch": 1.84119560797072, "grad_norm": 0.25736209750175476, "learning_rate": 7.963734208438189e-06, "loss": 0.3329, "step": 18110 }, { "epoch": 1.8412972753151688, "grad_norm": 0.28452056646347046, "learning_rate": 7.963448379765411e-06, "loss": 0.3872, "step": 18111 }, { "epoch": 1.8413989426596178, "grad_norm": 0.2522645592689514, "learning_rate": 7.963162536163481e-06, "loss": 0.3534, "step": 18112 }, { "epoch": 1.8415006100040667, "grad_norm": 0.2809069752693176, "learning_rate": 7.962876677633845e-06, "loss": 0.3586, "step": 18113 }, { "epoch": 1.8416022773485157, "grad_norm": 0.26804348826408386, "learning_rate": 7.962590804177939e-06, "loss": 0.3698, "step": 18114 }, { "epoch": 1.8417039446929646, "grad_norm": 0.26208117604255676, "learning_rate": 7.962304915797206e-06, "loss": 0.3442, "step": 18115 }, { "epoch": 1.8418056120374136, "grad_norm": 0.2866831421852112, "learning_rate": 7.962019012493083e-06, "loss": 0.3476, "step": 18116 }, { "epoch": 1.8419072793818625, "grad_norm": 0.25757744908332825, "learning_rate": 7.96173309426701e-06, "loss": 0.3605, "step": 18117 }, { "epoch": 1.8420089467263114, "grad_norm": 0.2937341630458832, "learning_rate": 7.961447161120434e-06, "loss": 0.3635, "step": 18118 }, { "epoch": 1.8421106140707604, "grad_norm": 0.27347397804260254, "learning_rate": 7.96116121305479e-06, "loss": 0.3435, "step": 18119 }, { "epoch": 1.8422122814152093, "grad_norm": 0.27723783254623413, "learning_rate": 7.960875250071518e-06, "loss": 0.3684, "step": 18120 }, { "epoch": 1.8423139487596583, "grad_norm": 0.2756158113479614, "learning_rate": 7.960589272172062e-06, "loss": 0.3496, "step": 18121 }, { "epoch": 1.8424156161041072, "grad_norm": 0.257621705532074, "learning_rate": 7.960303279357857e-06, "loss": 0.3801, "step": 18122 }, { "epoch": 1.8425172834485564, "grad_norm": 0.2699372172355652, "learning_rate": 7.96001727163035e-06, "loss": 0.3786, "step": 18123 }, { "epoch": 1.8426189507930053, "grad_norm": 0.2839343249797821, "learning_rate": 7.95973124899098e-06, "loss": 0.378, "step": 18124 }, { "epoch": 1.8427206181374542, "grad_norm": 0.26852309703826904, "learning_rate": 7.959445211441187e-06, "loss": 0.3785, "step": 18125 }, { "epoch": 1.8428222854819032, "grad_norm": 0.27633994817733765, "learning_rate": 7.959159158982412e-06, "loss": 0.3554, "step": 18126 }, { "epoch": 1.8429239528263521, "grad_norm": 0.24805903434753418, "learning_rate": 7.958873091616095e-06, "loss": 0.3399, "step": 18127 }, { "epoch": 1.8430256201708013, "grad_norm": 0.26176029443740845, "learning_rate": 7.958587009343681e-06, "loss": 0.3636, "step": 18128 }, { "epoch": 1.8431272875152502, "grad_norm": 0.2742636203765869, "learning_rate": 7.958300912166608e-06, "loss": 0.3695, "step": 18129 }, { "epoch": 1.8432289548596992, "grad_norm": 0.24476438760757446, "learning_rate": 7.958014800086318e-06, "loss": 0.3627, "step": 18130 }, { "epoch": 1.8433306222041481, "grad_norm": 0.2636452317237854, "learning_rate": 7.957728673104252e-06, "loss": 0.3614, "step": 18131 }, { "epoch": 1.843432289548597, "grad_norm": 0.2593740224838257, "learning_rate": 7.95744253122185e-06, "loss": 0.3505, "step": 18132 }, { "epoch": 1.843533956893046, "grad_norm": 0.2762523591518402, "learning_rate": 7.957156374440557e-06, "loss": 0.3384, "step": 18133 }, { "epoch": 1.843635624237495, "grad_norm": 0.28530997037887573, "learning_rate": 7.956870202761815e-06, "loss": 0.3604, "step": 18134 }, { "epoch": 1.8437372915819439, "grad_norm": 0.26520583033561707, "learning_rate": 7.956584016187061e-06, "loss": 0.3373, "step": 18135 }, { "epoch": 1.8438389589263928, "grad_norm": 0.27304553985595703, "learning_rate": 7.95629781471774e-06, "loss": 0.35, "step": 18136 }, { "epoch": 1.8439406262708418, "grad_norm": 0.2873920798301697, "learning_rate": 7.956011598355294e-06, "loss": 0.3606, "step": 18137 }, { "epoch": 1.8440422936152907, "grad_norm": 0.2681155502796173, "learning_rate": 7.955725367101161e-06, "loss": 0.3449, "step": 18138 }, { "epoch": 1.8441439609597396, "grad_norm": 0.3043711185455322, "learning_rate": 7.95543912095679e-06, "loss": 0.3832, "step": 18139 }, { "epoch": 1.8442456283041886, "grad_norm": 0.2899303734302521, "learning_rate": 7.955152859923616e-06, "loss": 0.3736, "step": 18140 }, { "epoch": 1.8443472956486375, "grad_norm": 0.2642125189304352, "learning_rate": 7.954866584003085e-06, "loss": 0.3326, "step": 18141 }, { "epoch": 1.8444489629930865, "grad_norm": 0.2516782879829407, "learning_rate": 7.954580293196638e-06, "loss": 0.3885, "step": 18142 }, { "epoch": 1.8445506303375356, "grad_norm": 0.2952083945274353, "learning_rate": 7.954293987505718e-06, "loss": 0.3384, "step": 18143 }, { "epoch": 1.8446522976819846, "grad_norm": 0.28299349546432495, "learning_rate": 7.954007666931768e-06, "loss": 0.349, "step": 18144 }, { "epoch": 1.8447539650264335, "grad_norm": 0.28621307015419006, "learning_rate": 7.95372133147623e-06, "loss": 0.337, "step": 18145 }, { "epoch": 1.8448556323708825, "grad_norm": 0.29824766516685486, "learning_rate": 7.953434981140544e-06, "loss": 0.3645, "step": 18146 }, { "epoch": 1.8449572997153314, "grad_norm": 0.28989842534065247, "learning_rate": 7.953148615926153e-06, "loss": 0.3457, "step": 18147 }, { "epoch": 1.8450589670597806, "grad_norm": 0.27353373169898987, "learning_rate": 7.952862235834504e-06, "loss": 0.3956, "step": 18148 }, { "epoch": 1.8451606344042295, "grad_norm": 0.2868924140930176, "learning_rate": 7.952575840867034e-06, "loss": 0.3483, "step": 18149 }, { "epoch": 1.8452623017486784, "grad_norm": 0.31744733452796936, "learning_rate": 7.95228943102519e-06, "loss": 0.3619, "step": 18150 }, { "epoch": 1.8453639690931274, "grad_norm": 0.2653900682926178, "learning_rate": 7.952003006310415e-06, "loss": 0.3511, "step": 18151 }, { "epoch": 1.8454656364375763, "grad_norm": 0.2784762978553772, "learning_rate": 7.951716566724149e-06, "loss": 0.3694, "step": 18152 }, { "epoch": 1.8455673037820253, "grad_norm": 0.2816712260246277, "learning_rate": 7.951430112267836e-06, "loss": 0.3359, "step": 18153 }, { "epoch": 1.8456689711264742, "grad_norm": 0.2635461688041687, "learning_rate": 7.951143642942918e-06, "loss": 0.355, "step": 18154 }, { "epoch": 1.8457706384709232, "grad_norm": 0.2782592475414276, "learning_rate": 7.950857158750842e-06, "loss": 0.3874, "step": 18155 }, { "epoch": 1.845872305815372, "grad_norm": 0.28989359736442566, "learning_rate": 7.950570659693048e-06, "loss": 0.3632, "step": 18156 }, { "epoch": 1.845973973159821, "grad_norm": 0.2668580710887909, "learning_rate": 7.95028414577098e-06, "loss": 0.384, "step": 18157 }, { "epoch": 1.84607564050427, "grad_norm": 0.26787999272346497, "learning_rate": 7.949997616986082e-06, "loss": 0.353, "step": 18158 }, { "epoch": 1.846177307848719, "grad_norm": 0.3036784827709198, "learning_rate": 7.949711073339798e-06, "loss": 0.3488, "step": 18159 }, { "epoch": 1.8462789751931679, "grad_norm": 0.28592726588249207, "learning_rate": 7.94942451483357e-06, "loss": 0.3489, "step": 18160 }, { "epoch": 1.8463806425376168, "grad_norm": 0.2544069290161133, "learning_rate": 7.949137941468841e-06, "loss": 0.354, "step": 18161 }, { "epoch": 1.8464823098820657, "grad_norm": 0.3220250606536865, "learning_rate": 7.948851353247056e-06, "loss": 0.3692, "step": 18162 }, { "epoch": 1.8465839772265147, "grad_norm": 0.26958537101745605, "learning_rate": 7.948564750169661e-06, "loss": 0.3594, "step": 18163 }, { "epoch": 1.8466856445709638, "grad_norm": 0.2786617875099182, "learning_rate": 7.948278132238094e-06, "loss": 0.3608, "step": 18164 }, { "epoch": 1.8467873119154128, "grad_norm": 0.28483128547668457, "learning_rate": 7.947991499453804e-06, "loss": 0.3413, "step": 18165 }, { "epoch": 1.8468889792598617, "grad_norm": 0.2644502818584442, "learning_rate": 7.947704851818234e-06, "loss": 0.3663, "step": 18166 }, { "epoch": 1.8469906466043107, "grad_norm": 0.263447105884552, "learning_rate": 7.947418189332827e-06, "loss": 0.3649, "step": 18167 }, { "epoch": 1.8470923139487596, "grad_norm": 0.2553865611553192, "learning_rate": 7.947131511999026e-06, "loss": 0.3699, "step": 18168 }, { "epoch": 1.8471939812932088, "grad_norm": 0.2939961850643158, "learning_rate": 7.94684481981828e-06, "loss": 0.3443, "step": 18169 }, { "epoch": 1.8472956486376577, "grad_norm": 0.27575305104255676, "learning_rate": 7.946558112792028e-06, "loss": 0.3403, "step": 18170 }, { "epoch": 1.8473973159821067, "grad_norm": 0.27306413650512695, "learning_rate": 7.946271390921717e-06, "loss": 0.3318, "step": 18171 }, { "epoch": 1.8474989833265556, "grad_norm": 0.284344345331192, "learning_rate": 7.94598465420879e-06, "loss": 0.3634, "step": 18172 }, { "epoch": 1.8476006506710045, "grad_norm": 0.2714105248451233, "learning_rate": 7.945697902654695e-06, "loss": 0.3489, "step": 18173 }, { "epoch": 1.8477023180154535, "grad_norm": 0.301725834608078, "learning_rate": 7.945411136260869e-06, "loss": 0.3774, "step": 18174 }, { "epoch": 1.8478039853599024, "grad_norm": 0.271411657333374, "learning_rate": 7.945124355028766e-06, "loss": 0.3466, "step": 18175 }, { "epoch": 1.8479056527043514, "grad_norm": 0.2708613872528076, "learning_rate": 7.944837558959825e-06, "loss": 0.3398, "step": 18176 }, { "epoch": 1.8480073200488003, "grad_norm": 0.27481546998023987, "learning_rate": 7.944550748055492e-06, "loss": 0.3476, "step": 18177 }, { "epoch": 1.8481089873932492, "grad_norm": 0.25507405400276184, "learning_rate": 7.944263922317212e-06, "loss": 0.3561, "step": 18178 }, { "epoch": 1.8482106547376982, "grad_norm": 0.2689586579799652, "learning_rate": 7.94397708174643e-06, "loss": 0.4111, "step": 18179 }, { "epoch": 1.8483123220821471, "grad_norm": 0.2736053466796875, "learning_rate": 7.943690226344592e-06, "loss": 0.3654, "step": 18180 }, { "epoch": 1.848413989426596, "grad_norm": 0.2759285271167755, "learning_rate": 7.943403356113142e-06, "loss": 0.3574, "step": 18181 }, { "epoch": 1.848515656771045, "grad_norm": 0.2914547920227051, "learning_rate": 7.943116471053525e-06, "loss": 0.3946, "step": 18182 }, { "epoch": 1.848617324115494, "grad_norm": 0.2511330842971802, "learning_rate": 7.942829571167185e-06, "loss": 0.3257, "step": 18183 }, { "epoch": 1.8487189914599431, "grad_norm": 0.25764477252960205, "learning_rate": 7.942542656455571e-06, "loss": 0.34, "step": 18184 }, { "epoch": 1.848820658804392, "grad_norm": 0.2720515727996826, "learning_rate": 7.942255726920127e-06, "loss": 0.3518, "step": 18185 }, { "epoch": 1.848922326148841, "grad_norm": 0.2769568860530853, "learning_rate": 7.941968782562297e-06, "loss": 0.36, "step": 18186 }, { "epoch": 1.84902399349329, "grad_norm": 0.28447261452674866, "learning_rate": 7.941681823383525e-06, "loss": 0.3977, "step": 18187 }, { "epoch": 1.8491256608377389, "grad_norm": 0.2848506569862366, "learning_rate": 7.941394849385263e-06, "loss": 0.341, "step": 18188 }, { "epoch": 1.849227328182188, "grad_norm": 0.28480836749076843, "learning_rate": 7.941107860568949e-06, "loss": 0.3708, "step": 18189 }, { "epoch": 1.849328995526637, "grad_norm": 0.2973577380180359, "learning_rate": 7.940820856936036e-06, "loss": 0.3401, "step": 18190 }, { "epoch": 1.849430662871086, "grad_norm": 0.2764259874820709, "learning_rate": 7.940533838487965e-06, "loss": 0.3419, "step": 18191 }, { "epoch": 1.8495323302155349, "grad_norm": 0.2649041414260864, "learning_rate": 7.940246805226182e-06, "loss": 0.3792, "step": 18192 }, { "epoch": 1.8496339975599838, "grad_norm": 0.2708909511566162, "learning_rate": 7.939959757152135e-06, "loss": 0.3409, "step": 18193 }, { "epoch": 1.8497356649044328, "grad_norm": 0.2824760973453522, "learning_rate": 7.93967269426727e-06, "loss": 0.3755, "step": 18194 }, { "epoch": 1.8498373322488817, "grad_norm": 0.26034310460090637, "learning_rate": 7.939385616573032e-06, "loss": 0.3367, "step": 18195 }, { "epoch": 1.8499389995933306, "grad_norm": 0.26892778277397156, "learning_rate": 7.939098524070868e-06, "loss": 0.3524, "step": 18196 }, { "epoch": 1.8500406669377796, "grad_norm": 0.28514114022254944, "learning_rate": 7.938811416762224e-06, "loss": 0.3293, "step": 18197 }, { "epoch": 1.8501423342822285, "grad_norm": 0.2891714572906494, "learning_rate": 7.938524294648547e-06, "loss": 0.3321, "step": 18198 }, { "epoch": 1.8502440016266775, "grad_norm": 0.27585846185684204, "learning_rate": 7.938237157731281e-06, "loss": 0.3737, "step": 18199 }, { "epoch": 1.8503456689711264, "grad_norm": 0.2593163847923279, "learning_rate": 7.937950006011876e-06, "loss": 0.3885, "step": 18200 }, { "epoch": 1.8504473363155753, "grad_norm": 0.3319339156150818, "learning_rate": 7.937662839491777e-06, "loss": 0.3621, "step": 18201 }, { "epoch": 1.8505490036600243, "grad_norm": 0.28610262274742126, "learning_rate": 7.93737565817243e-06, "loss": 0.3278, "step": 18202 }, { "epoch": 1.8506506710044732, "grad_norm": 0.2589469254016876, "learning_rate": 7.937088462055283e-06, "loss": 0.3737, "step": 18203 }, { "epoch": 1.8507523383489222, "grad_norm": 0.28137752413749695, "learning_rate": 7.936801251141784e-06, "loss": 0.3496, "step": 18204 }, { "epoch": 1.8508540056933713, "grad_norm": 0.3042852580547333, "learning_rate": 7.936514025433375e-06, "loss": 0.3514, "step": 18205 }, { "epoch": 1.8509556730378203, "grad_norm": 0.29970118403434753, "learning_rate": 7.936226784931508e-06, "loss": 0.3625, "step": 18206 }, { "epoch": 1.8510573403822692, "grad_norm": 0.2487478107213974, "learning_rate": 7.935939529637628e-06, "loss": 0.3194, "step": 18207 }, { "epoch": 1.8511590077267182, "grad_norm": 0.28056925535202026, "learning_rate": 7.935652259553182e-06, "loss": 0.3559, "step": 18208 }, { "epoch": 1.851260675071167, "grad_norm": 0.2971063554286957, "learning_rate": 7.935364974679618e-06, "loss": 0.3608, "step": 18209 }, { "epoch": 1.8513623424156163, "grad_norm": 0.2649998366832733, "learning_rate": 7.935077675018384e-06, "loss": 0.3544, "step": 18210 }, { "epoch": 1.8514640097600652, "grad_norm": 0.2927151322364807, "learning_rate": 7.934790360570923e-06, "loss": 0.3465, "step": 18211 }, { "epoch": 1.8515656771045141, "grad_norm": 0.3047434687614441, "learning_rate": 7.934503031338687e-06, "loss": 0.3779, "step": 18212 }, { "epoch": 1.851667344448963, "grad_norm": 0.2982664108276367, "learning_rate": 7.934215687323124e-06, "loss": 0.3731, "step": 18213 }, { "epoch": 1.851769011793412, "grad_norm": 0.28833407163619995, "learning_rate": 7.933928328525678e-06, "loss": 0.3549, "step": 18214 }, { "epoch": 1.851870679137861, "grad_norm": 0.282814085483551, "learning_rate": 7.933640954947797e-06, "loss": 0.3694, "step": 18215 }, { "epoch": 1.85197234648231, "grad_norm": 0.2795311212539673, "learning_rate": 7.93335356659093e-06, "loss": 0.3484, "step": 18216 }, { "epoch": 1.8520740138267588, "grad_norm": 0.2465471476316452, "learning_rate": 7.933066163456528e-06, "loss": 0.3292, "step": 18217 }, { "epoch": 1.8521756811712078, "grad_norm": 0.29883262515068054, "learning_rate": 7.932778745546033e-06, "loss": 0.3586, "step": 18218 }, { "epoch": 1.8522773485156567, "grad_norm": 0.28085678815841675, "learning_rate": 7.932491312860897e-06, "loss": 0.3459, "step": 18219 }, { "epoch": 1.8523790158601057, "grad_norm": 0.26470115780830383, "learning_rate": 7.932203865402566e-06, "loss": 0.3438, "step": 18220 }, { "epoch": 1.8524806832045546, "grad_norm": 0.28418728709220886, "learning_rate": 7.931916403172489e-06, "loss": 0.3265, "step": 18221 }, { "epoch": 1.8525823505490036, "grad_norm": 0.28052714467048645, "learning_rate": 7.931628926172112e-06, "loss": 0.3276, "step": 18222 }, { "epoch": 1.8526840178934525, "grad_norm": 0.2656075656414032, "learning_rate": 7.931341434402889e-06, "loss": 0.3528, "step": 18223 }, { "epoch": 1.8527856852379014, "grad_norm": 0.2526749074459076, "learning_rate": 7.931053927866261e-06, "loss": 0.3254, "step": 18224 }, { "epoch": 1.8528873525823506, "grad_norm": 0.2564344108104706, "learning_rate": 7.93076640656368e-06, "loss": 0.3753, "step": 18225 }, { "epoch": 1.8529890199267995, "grad_norm": 0.27743643522262573, "learning_rate": 7.930478870496596e-06, "loss": 0.3616, "step": 18226 }, { "epoch": 1.8530906872712485, "grad_norm": 0.26068922877311707, "learning_rate": 7.930191319666456e-06, "loss": 0.3516, "step": 18227 }, { "epoch": 1.8531923546156974, "grad_norm": 0.24976441264152527, "learning_rate": 7.929903754074707e-06, "loss": 0.3543, "step": 18228 }, { "epoch": 1.8532940219601464, "grad_norm": 0.2873729467391968, "learning_rate": 7.9296161737228e-06, "loss": 0.316, "step": 18229 }, { "epoch": 1.8533956893045955, "grad_norm": 0.28914034366607666, "learning_rate": 7.929328578612183e-06, "loss": 0.372, "step": 18230 }, { "epoch": 1.8534973566490445, "grad_norm": 0.25364482402801514, "learning_rate": 7.929040968744303e-06, "loss": 0.3706, "step": 18231 }, { "epoch": 1.8535990239934934, "grad_norm": 0.2663930356502533, "learning_rate": 7.928753344120614e-06, "loss": 0.3468, "step": 18232 }, { "epoch": 1.8537006913379424, "grad_norm": 0.3283073306083679, "learning_rate": 7.928465704742559e-06, "loss": 0.367, "step": 18233 }, { "epoch": 1.8538023586823913, "grad_norm": 0.2786787450313568, "learning_rate": 7.92817805061159e-06, "loss": 0.3688, "step": 18234 }, { "epoch": 1.8539040260268402, "grad_norm": 0.264313668012619, "learning_rate": 7.927890381729157e-06, "loss": 0.349, "step": 18235 }, { "epoch": 1.8540056933712892, "grad_norm": 0.2690378725528717, "learning_rate": 7.927602698096705e-06, "loss": 0.4077, "step": 18236 }, { "epoch": 1.8541073607157381, "grad_norm": 0.2818026542663574, "learning_rate": 7.927314999715689e-06, "loss": 0.3279, "step": 18237 }, { "epoch": 1.854209028060187, "grad_norm": 0.27230915427207947, "learning_rate": 7.927027286587556e-06, "loss": 0.3451, "step": 18238 }, { "epoch": 1.854310695404636, "grad_norm": 0.2522919774055481, "learning_rate": 7.926739558713754e-06, "loss": 0.3823, "step": 18239 }, { "epoch": 1.854412362749085, "grad_norm": 0.2698417901992798, "learning_rate": 7.926451816095735e-06, "loss": 0.323, "step": 18240 }, { "epoch": 1.8545140300935339, "grad_norm": 0.27248531579971313, "learning_rate": 7.926164058734945e-06, "loss": 0.3699, "step": 18241 }, { "epoch": 1.8546156974379828, "grad_norm": 0.26193535327911377, "learning_rate": 7.925876286632838e-06, "loss": 0.3588, "step": 18242 }, { "epoch": 1.8547173647824318, "grad_norm": 0.27758684754371643, "learning_rate": 7.92558849979086e-06, "loss": 0.3588, "step": 18243 }, { "epoch": 1.8548190321268807, "grad_norm": 0.25854259729385376, "learning_rate": 7.925300698210463e-06, "loss": 0.3321, "step": 18244 }, { "epoch": 1.8549206994713296, "grad_norm": 0.27335429191589355, "learning_rate": 7.925012881893095e-06, "loss": 0.3547, "step": 18245 }, { "epoch": 1.8550223668157788, "grad_norm": 0.26877421140670776, "learning_rate": 7.924725050840209e-06, "loss": 0.3597, "step": 18246 }, { "epoch": 1.8551240341602278, "grad_norm": 0.2787460386753082, "learning_rate": 7.924437205053252e-06, "loss": 0.384, "step": 18247 }, { "epoch": 1.8552257015046767, "grad_norm": 0.2691980004310608, "learning_rate": 7.924149344533675e-06, "loss": 0.3517, "step": 18248 }, { "epoch": 1.8553273688491256, "grad_norm": 0.25520920753479004, "learning_rate": 7.923861469282931e-06, "loss": 0.3439, "step": 18249 }, { "epoch": 1.8554290361935746, "grad_norm": 0.285788893699646, "learning_rate": 7.923573579302466e-06, "loss": 0.3863, "step": 18250 }, { "epoch": 1.8555307035380237, "grad_norm": 0.25677311420440674, "learning_rate": 7.923285674593732e-06, "loss": 0.3491, "step": 18251 }, { "epoch": 1.8556323708824727, "grad_norm": 0.25001615285873413, "learning_rate": 7.922997755158179e-06, "loss": 0.3441, "step": 18252 }, { "epoch": 1.8557340382269216, "grad_norm": 0.2785533666610718, "learning_rate": 7.92270982099726e-06, "loss": 0.359, "step": 18253 }, { "epoch": 1.8558357055713706, "grad_norm": 0.29145514965057373, "learning_rate": 7.92242187211242e-06, "loss": 0.3653, "step": 18254 }, { "epoch": 1.8559373729158195, "grad_norm": 0.2860088646411896, "learning_rate": 7.922133908505117e-06, "loss": 0.3433, "step": 18255 }, { "epoch": 1.8560390402602684, "grad_norm": 0.2788941562175751, "learning_rate": 7.921845930176794e-06, "loss": 0.3937, "step": 18256 }, { "epoch": 1.8561407076047174, "grad_norm": 0.26021912693977356, "learning_rate": 7.921557937128907e-06, "loss": 0.3432, "step": 18257 }, { "epoch": 1.8562423749491663, "grad_norm": 0.2631160318851471, "learning_rate": 7.921269929362907e-06, "loss": 0.3501, "step": 18258 }, { "epoch": 1.8563440422936153, "grad_norm": 0.2988872230052948, "learning_rate": 7.92098190688024e-06, "loss": 0.3705, "step": 18259 }, { "epoch": 1.8564457096380642, "grad_norm": 0.2817014753818512, "learning_rate": 7.920693869682364e-06, "loss": 0.3908, "step": 18260 }, { "epoch": 1.8565473769825132, "grad_norm": 0.2622126042842865, "learning_rate": 7.920405817770724e-06, "loss": 0.3573, "step": 18261 }, { "epoch": 1.856649044326962, "grad_norm": 0.2931038737297058, "learning_rate": 7.920117751146775e-06, "loss": 0.367, "step": 18262 }, { "epoch": 1.856750711671411, "grad_norm": 0.27990075945854187, "learning_rate": 7.919829669811965e-06, "loss": 0.371, "step": 18263 }, { "epoch": 1.85685237901586, "grad_norm": 0.2805379331111908, "learning_rate": 7.919541573767748e-06, "loss": 0.3435, "step": 18264 }, { "epoch": 1.856954046360309, "grad_norm": 0.29162657260894775, "learning_rate": 7.919253463015575e-06, "loss": 0.3809, "step": 18265 }, { "epoch": 1.857055713704758, "grad_norm": 0.2937110364437103, "learning_rate": 7.918965337556896e-06, "loss": 0.359, "step": 18266 }, { "epoch": 1.857157381049207, "grad_norm": 0.284248024225235, "learning_rate": 7.918677197393162e-06, "loss": 0.3352, "step": 18267 }, { "epoch": 1.857259048393656, "grad_norm": 0.2766365110874176, "learning_rate": 7.918389042525826e-06, "loss": 0.3631, "step": 18268 }, { "epoch": 1.857360715738105, "grad_norm": 0.2621828019618988, "learning_rate": 7.918100872956343e-06, "loss": 0.359, "step": 18269 }, { "epoch": 1.8574623830825538, "grad_norm": 0.2875351905822754, "learning_rate": 7.917812688686159e-06, "loss": 0.3393, "step": 18270 }, { "epoch": 1.857564050427003, "grad_norm": 0.26977357268333435, "learning_rate": 7.917524489716727e-06, "loss": 0.3501, "step": 18271 }, { "epoch": 1.857665717771452, "grad_norm": 0.2850581109523773, "learning_rate": 7.9172362760495e-06, "loss": 0.3678, "step": 18272 }, { "epoch": 1.857767385115901, "grad_norm": 0.2738091051578522, "learning_rate": 7.91694804768593e-06, "loss": 0.3657, "step": 18273 }, { "epoch": 1.8578690524603498, "grad_norm": 0.26535719633102417, "learning_rate": 7.91665980462747e-06, "loss": 0.3921, "step": 18274 }, { "epoch": 1.8579707198047988, "grad_norm": 0.2702404260635376, "learning_rate": 7.916371546875572e-06, "loss": 0.3629, "step": 18275 }, { "epoch": 1.8580723871492477, "grad_norm": 0.27447509765625, "learning_rate": 7.916083274431686e-06, "loss": 0.3822, "step": 18276 }, { "epoch": 1.8581740544936967, "grad_norm": 0.2733185887336731, "learning_rate": 7.915794987297265e-06, "loss": 0.3533, "step": 18277 }, { "epoch": 1.8582757218381456, "grad_norm": 0.2759585976600647, "learning_rate": 7.915506685473761e-06, "loss": 0.382, "step": 18278 }, { "epoch": 1.8583773891825945, "grad_norm": 0.29306110739707947, "learning_rate": 7.915218368962628e-06, "loss": 0.3483, "step": 18279 }, { "epoch": 1.8584790565270435, "grad_norm": 0.24611088633537292, "learning_rate": 7.914930037765318e-06, "loss": 0.3451, "step": 18280 }, { "epoch": 1.8585807238714924, "grad_norm": 0.2729566693305969, "learning_rate": 7.914641691883284e-06, "loss": 0.3636, "step": 18281 }, { "epoch": 1.8586823912159414, "grad_norm": 0.29433614015579224, "learning_rate": 7.914353331317977e-06, "loss": 0.3794, "step": 18282 }, { "epoch": 1.8587840585603903, "grad_norm": 0.26758795976638794, "learning_rate": 7.914064956070849e-06, "loss": 0.3793, "step": 18283 }, { "epoch": 1.8588857259048392, "grad_norm": 0.28426593542099, "learning_rate": 7.913776566143356e-06, "loss": 0.4018, "step": 18284 }, { "epoch": 1.8589873932492882, "grad_norm": 0.25313273072242737, "learning_rate": 7.913488161536949e-06, "loss": 0.3732, "step": 18285 }, { "epoch": 1.8590890605937371, "grad_norm": 0.29284903407096863, "learning_rate": 7.913199742253079e-06, "loss": 0.3416, "step": 18286 }, { "epoch": 1.8591907279381863, "grad_norm": 0.28014034032821655, "learning_rate": 7.912911308293203e-06, "loss": 0.3517, "step": 18287 }, { "epoch": 1.8592923952826352, "grad_norm": 0.2818607985973358, "learning_rate": 7.912622859658773e-06, "loss": 0.3478, "step": 18288 }, { "epoch": 1.8593940626270842, "grad_norm": 0.3074125349521637, "learning_rate": 7.912334396351238e-06, "loss": 0.3648, "step": 18289 }, { "epoch": 1.8594957299715331, "grad_norm": 0.25980085134506226, "learning_rate": 7.912045918372057e-06, "loss": 0.3607, "step": 18290 }, { "epoch": 1.859597397315982, "grad_norm": 0.2683587372303009, "learning_rate": 7.91175742572268e-06, "loss": 0.3366, "step": 18291 }, { "epoch": 1.8596990646604312, "grad_norm": 0.2674200236797333, "learning_rate": 7.91146891840456e-06, "loss": 0.3412, "step": 18292 }, { "epoch": 1.8598007320048802, "grad_norm": 0.2695826590061188, "learning_rate": 7.911180396419153e-06, "loss": 0.369, "step": 18293 }, { "epoch": 1.859902399349329, "grad_norm": 0.2610853314399719, "learning_rate": 7.91089185976791e-06, "loss": 0.3324, "step": 18294 }, { "epoch": 1.860004066693778, "grad_norm": 0.2772105038166046, "learning_rate": 7.910603308452284e-06, "loss": 0.3601, "step": 18295 }, { "epoch": 1.860105734038227, "grad_norm": 0.2670878767967224, "learning_rate": 7.910314742473733e-06, "loss": 0.3622, "step": 18296 }, { "epoch": 1.860207401382676, "grad_norm": 0.26632848381996155, "learning_rate": 7.910026161833707e-06, "loss": 0.3498, "step": 18297 }, { "epoch": 1.8603090687271249, "grad_norm": 0.26687315106391907, "learning_rate": 7.909737566533661e-06, "loss": 0.3606, "step": 18298 }, { "epoch": 1.8604107360715738, "grad_norm": 0.2691701054573059, "learning_rate": 7.909448956575048e-06, "loss": 0.3621, "step": 18299 }, { "epoch": 1.8605124034160228, "grad_norm": 0.26117485761642456, "learning_rate": 7.909160331959323e-06, "loss": 0.3805, "step": 18300 }, { "epoch": 1.8606140707604717, "grad_norm": 0.2773144543170929, "learning_rate": 7.908871692687939e-06, "loss": 0.3726, "step": 18301 }, { "epoch": 1.8607157381049206, "grad_norm": 0.2764991819858551, "learning_rate": 7.908583038762351e-06, "loss": 0.3557, "step": 18302 }, { "epoch": 1.8608174054493696, "grad_norm": 0.2903461456298828, "learning_rate": 7.908294370184014e-06, "loss": 0.369, "step": 18303 }, { "epoch": 1.8609190727938185, "grad_norm": 0.2593379616737366, "learning_rate": 7.908005686954379e-06, "loss": 0.3512, "step": 18304 }, { "epoch": 1.8610207401382675, "grad_norm": 0.26054513454437256, "learning_rate": 7.907716989074903e-06, "loss": 0.3108, "step": 18305 }, { "epoch": 1.8611224074827164, "grad_norm": 0.2783602774143219, "learning_rate": 7.90742827654704e-06, "loss": 0.3564, "step": 18306 }, { "epoch": 1.8612240748271656, "grad_norm": 0.26955676078796387, "learning_rate": 7.907139549372245e-06, "loss": 0.3256, "step": 18307 }, { "epoch": 1.8613257421716145, "grad_norm": 0.27087950706481934, "learning_rate": 7.90685080755197e-06, "loss": 0.3675, "step": 18308 }, { "epoch": 1.8614274095160634, "grad_norm": 0.26807454228401184, "learning_rate": 7.906562051087672e-06, "loss": 0.3509, "step": 18309 }, { "epoch": 1.8615290768605124, "grad_norm": 0.2868829667568207, "learning_rate": 7.906273279980807e-06, "loss": 0.3796, "step": 18310 }, { "epoch": 1.8616307442049613, "grad_norm": 0.26205873489379883, "learning_rate": 7.905984494232825e-06, "loss": 0.3428, "step": 18311 }, { "epoch": 1.8617324115494105, "grad_norm": 0.2573947012424469, "learning_rate": 7.905695693845186e-06, "loss": 0.3858, "step": 18312 }, { "epoch": 1.8618340788938594, "grad_norm": 0.27272382378578186, "learning_rate": 7.905406878819341e-06, "loss": 0.3629, "step": 18313 }, { "epoch": 1.8619357462383084, "grad_norm": 0.2505384385585785, "learning_rate": 7.905118049156748e-06, "loss": 0.3397, "step": 18314 }, { "epoch": 1.8620374135827573, "grad_norm": 0.26396644115448, "learning_rate": 7.904829204858859e-06, "loss": 0.3737, "step": 18315 }, { "epoch": 1.8621390809272063, "grad_norm": 0.27208900451660156, "learning_rate": 7.904540345927132e-06, "loss": 0.3511, "step": 18316 }, { "epoch": 1.8622407482716552, "grad_norm": 0.269835889339447, "learning_rate": 7.904251472363021e-06, "loss": 0.3552, "step": 18317 }, { "epoch": 1.8623424156161041, "grad_norm": 0.27079129219055176, "learning_rate": 7.903962584167982e-06, "loss": 0.3486, "step": 18318 }, { "epoch": 1.862444082960553, "grad_norm": 0.2690374553203583, "learning_rate": 7.903673681343467e-06, "loss": 0.3499, "step": 18319 }, { "epoch": 1.862545750305002, "grad_norm": 0.28382548689842224, "learning_rate": 7.903384763890935e-06, "loss": 0.3776, "step": 18320 }, { "epoch": 1.862647417649451, "grad_norm": 0.26849082112312317, "learning_rate": 7.903095831811842e-06, "loss": 0.3453, "step": 18321 }, { "epoch": 1.8627490849939, "grad_norm": 0.27640920877456665, "learning_rate": 7.90280688510764e-06, "loss": 0.3655, "step": 18322 }, { "epoch": 1.8628507523383488, "grad_norm": 0.2647862434387207, "learning_rate": 7.90251792377979e-06, "loss": 0.3795, "step": 18323 }, { "epoch": 1.8629524196827978, "grad_norm": 0.28937599062919617, "learning_rate": 7.902228947829743e-06, "loss": 0.3742, "step": 18324 }, { "epoch": 1.8630540870272467, "grad_norm": 0.2562609314918518, "learning_rate": 7.901939957258954e-06, "loss": 0.3535, "step": 18325 }, { "epoch": 1.8631557543716957, "grad_norm": 0.2422601282596588, "learning_rate": 7.901650952068882e-06, "loss": 0.3348, "step": 18326 }, { "epoch": 1.8632574217161446, "grad_norm": 0.28215938806533813, "learning_rate": 7.901361932260983e-06, "loss": 0.3554, "step": 18327 }, { "epoch": 1.8633590890605938, "grad_norm": 0.25503259897232056, "learning_rate": 7.901072897836713e-06, "loss": 0.3389, "step": 18328 }, { "epoch": 1.8634607564050427, "grad_norm": 0.2894284129142761, "learning_rate": 7.900783848797524e-06, "loss": 0.37, "step": 18329 }, { "epoch": 1.8635624237494917, "grad_norm": 0.2738896310329437, "learning_rate": 7.900494785144878e-06, "loss": 0.3595, "step": 18330 }, { "epoch": 1.8636640910939406, "grad_norm": 0.2583443820476532, "learning_rate": 7.900205706880228e-06, "loss": 0.3326, "step": 18331 }, { "epoch": 1.8637657584383895, "grad_norm": 0.28044620156288147, "learning_rate": 7.89991661400503e-06, "loss": 0.3663, "step": 18332 }, { "epoch": 1.8638674257828387, "grad_norm": 0.2972729802131653, "learning_rate": 7.899627506520742e-06, "loss": 0.3621, "step": 18333 }, { "epoch": 1.8639690931272876, "grad_norm": 0.3042079210281372, "learning_rate": 7.899338384428818e-06, "loss": 0.3872, "step": 18334 }, { "epoch": 1.8640707604717366, "grad_norm": 0.27105626463890076, "learning_rate": 7.899049247730719e-06, "loss": 0.3546, "step": 18335 }, { "epoch": 1.8641724278161855, "grad_norm": 0.25978755950927734, "learning_rate": 7.898760096427898e-06, "loss": 0.3549, "step": 18336 }, { "epoch": 1.8642740951606345, "grad_norm": 0.31180378794670105, "learning_rate": 7.898470930521812e-06, "loss": 0.3641, "step": 18337 }, { "epoch": 1.8643757625050834, "grad_norm": 0.31307631731033325, "learning_rate": 7.898181750013919e-06, "loss": 0.3609, "step": 18338 }, { "epoch": 1.8644774298495324, "grad_norm": 0.27349644899368286, "learning_rate": 7.897892554905672e-06, "loss": 0.346, "step": 18339 }, { "epoch": 1.8645790971939813, "grad_norm": 0.2835419178009033, "learning_rate": 7.897603345198535e-06, "loss": 0.3814, "step": 18340 }, { "epoch": 1.8646807645384302, "grad_norm": 0.2770947217941284, "learning_rate": 7.897314120893958e-06, "loss": 0.3583, "step": 18341 }, { "epoch": 1.8647824318828792, "grad_norm": 0.2994326055049896, "learning_rate": 7.897024881993402e-06, "loss": 0.3364, "step": 18342 }, { "epoch": 1.8648840992273281, "grad_norm": 0.28037360310554504, "learning_rate": 7.896735628498324e-06, "loss": 0.363, "step": 18343 }, { "epoch": 1.864985766571777, "grad_norm": 0.2734663486480713, "learning_rate": 7.896446360410178e-06, "loss": 0.3871, "step": 18344 }, { "epoch": 1.865087433916226, "grad_norm": 0.2935870885848999, "learning_rate": 7.896157077730426e-06, "loss": 0.3484, "step": 18345 }, { "epoch": 1.865189101260675, "grad_norm": 0.2450009137392044, "learning_rate": 7.895867780460521e-06, "loss": 0.3531, "step": 18346 }, { "epoch": 1.8652907686051239, "grad_norm": 0.2812175154685974, "learning_rate": 7.895578468601924e-06, "loss": 0.3646, "step": 18347 }, { "epoch": 1.865392435949573, "grad_norm": 0.2956937551498413, "learning_rate": 7.895289142156091e-06, "loss": 0.3812, "step": 18348 }, { "epoch": 1.865494103294022, "grad_norm": 0.2946004569530487, "learning_rate": 7.894999801124477e-06, "loss": 0.3833, "step": 18349 }, { "epoch": 1.865595770638471, "grad_norm": 0.26570940017700195, "learning_rate": 7.894710445508542e-06, "loss": 0.3359, "step": 18350 }, { "epoch": 1.8656974379829199, "grad_norm": 0.3214670419692993, "learning_rate": 7.894421075309746e-06, "loss": 0.3778, "step": 18351 }, { "epoch": 1.8657991053273688, "grad_norm": 0.31013545393943787, "learning_rate": 7.894131690529542e-06, "loss": 0.3453, "step": 18352 }, { "epoch": 1.865900772671818, "grad_norm": 0.28299063444137573, "learning_rate": 7.893842291169394e-06, "loss": 0.3797, "step": 18353 }, { "epoch": 1.866002440016267, "grad_norm": 0.29051437973976135, "learning_rate": 7.893552877230753e-06, "loss": 0.354, "step": 18354 }, { "epoch": 1.8661041073607159, "grad_norm": 0.29817941784858704, "learning_rate": 7.89326344871508e-06, "loss": 0.3301, "step": 18355 }, { "epoch": 1.8662057747051648, "grad_norm": 0.2831251323223114, "learning_rate": 7.892974005623834e-06, "loss": 0.3272, "step": 18356 }, { "epoch": 1.8663074420496137, "grad_norm": 0.2723675072193146, "learning_rate": 7.892684547958474e-06, "loss": 0.3468, "step": 18357 }, { "epoch": 1.8664091093940627, "grad_norm": 0.2783981263637543, "learning_rate": 7.892395075720457e-06, "loss": 0.3738, "step": 18358 }, { "epoch": 1.8665107767385116, "grad_norm": 0.29063835740089417, "learning_rate": 7.892105588911236e-06, "loss": 0.3736, "step": 18359 }, { "epoch": 1.8666124440829606, "grad_norm": 0.31376370787620544, "learning_rate": 7.89181608753228e-06, "loss": 0.324, "step": 18360 }, { "epoch": 1.8667141114274095, "grad_norm": 0.2793951630592346, "learning_rate": 7.89152657158504e-06, "loss": 0.3695, "step": 18361 }, { "epoch": 1.8668157787718584, "grad_norm": 0.2738974988460541, "learning_rate": 7.891237041070974e-06, "loss": 0.3415, "step": 18362 }, { "epoch": 1.8669174461163074, "grad_norm": 0.3011711835861206, "learning_rate": 7.890947495991546e-06, "loss": 0.3523, "step": 18363 }, { "epoch": 1.8670191134607563, "grad_norm": 0.2793670892715454, "learning_rate": 7.890657936348209e-06, "loss": 0.3387, "step": 18364 }, { "epoch": 1.8671207808052053, "grad_norm": 0.2769588232040405, "learning_rate": 7.890368362142425e-06, "loss": 0.3756, "step": 18365 }, { "epoch": 1.8672224481496542, "grad_norm": 0.26935020089149475, "learning_rate": 7.890078773375654e-06, "loss": 0.374, "step": 18366 }, { "epoch": 1.8673241154941032, "grad_norm": 0.32626447081565857, "learning_rate": 7.88978917004935e-06, "loss": 0.4064, "step": 18367 }, { "epoch": 1.867425782838552, "grad_norm": 0.2966321110725403, "learning_rate": 7.889499552164976e-06, "loss": 0.3993, "step": 18368 }, { "epoch": 1.8675274501830013, "grad_norm": 0.2853107750415802, "learning_rate": 7.88920991972399e-06, "loss": 0.3813, "step": 18369 }, { "epoch": 1.8676291175274502, "grad_norm": 0.2679377794265747, "learning_rate": 7.888920272727851e-06, "loss": 0.3572, "step": 18370 }, { "epoch": 1.8677307848718991, "grad_norm": 0.27113381028175354, "learning_rate": 7.888630611178019e-06, "loss": 0.3419, "step": 18371 }, { "epoch": 1.867832452216348, "grad_norm": 0.27788078784942627, "learning_rate": 7.88834093507595e-06, "loss": 0.3884, "step": 18372 }, { "epoch": 1.867934119560797, "grad_norm": 0.26260441541671753, "learning_rate": 7.88805124442311e-06, "loss": 0.357, "step": 18373 }, { "epoch": 1.8680357869052462, "grad_norm": 0.2736179828643799, "learning_rate": 7.88776153922095e-06, "loss": 0.3606, "step": 18374 }, { "epoch": 1.8681374542496951, "grad_norm": 0.2847083508968353, "learning_rate": 7.887471819470935e-06, "loss": 0.3631, "step": 18375 }, { "epoch": 1.868239121594144, "grad_norm": 0.30665838718414307, "learning_rate": 7.887182085174523e-06, "loss": 0.3719, "step": 18376 }, { "epoch": 1.868340788938593, "grad_norm": 0.27322688698768616, "learning_rate": 7.886892336333174e-06, "loss": 0.3348, "step": 18377 }, { "epoch": 1.868442456283042, "grad_norm": 0.27265429496765137, "learning_rate": 7.886602572948348e-06, "loss": 0.3137, "step": 18378 }, { "epoch": 1.868544123627491, "grad_norm": 0.2649329602718353, "learning_rate": 7.886312795021505e-06, "loss": 0.321, "step": 18379 }, { "epoch": 1.8686457909719398, "grad_norm": 0.311630517244339, "learning_rate": 7.886023002554102e-06, "loss": 0.3586, "step": 18380 }, { "epoch": 1.8687474583163888, "grad_norm": 0.2831904888153076, "learning_rate": 7.885733195547602e-06, "loss": 0.3546, "step": 18381 }, { "epoch": 1.8688491256608377, "grad_norm": 0.2682308256626129, "learning_rate": 7.885443374003463e-06, "loss": 0.3504, "step": 18382 }, { "epoch": 1.8689507930052867, "grad_norm": 0.29843273758888245, "learning_rate": 7.885153537923147e-06, "loss": 0.3838, "step": 18383 }, { "epoch": 1.8690524603497356, "grad_norm": 0.2823517918586731, "learning_rate": 7.884863687308112e-06, "loss": 0.392, "step": 18384 }, { "epoch": 1.8691541276941845, "grad_norm": 0.277347594499588, "learning_rate": 7.88457382215982e-06, "loss": 0.3759, "step": 18385 }, { "epoch": 1.8692557950386335, "grad_norm": 0.2598741352558136, "learning_rate": 7.88428394247973e-06, "loss": 0.3564, "step": 18386 }, { "epoch": 1.8693574623830824, "grad_norm": 0.2787284851074219, "learning_rate": 7.883994048269304e-06, "loss": 0.3479, "step": 18387 }, { "epoch": 1.8694591297275314, "grad_norm": 0.28736811876296997, "learning_rate": 7.88370413953e-06, "loss": 0.3367, "step": 18388 }, { "epoch": 1.8695607970719805, "grad_norm": 0.25326505303382874, "learning_rate": 7.883414216263281e-06, "loss": 0.3213, "step": 18389 }, { "epoch": 1.8696624644164295, "grad_norm": 0.27730798721313477, "learning_rate": 7.883124278470606e-06, "loss": 0.3456, "step": 18390 }, { "epoch": 1.8697641317608784, "grad_norm": 0.2842380702495575, "learning_rate": 7.882834326153436e-06, "loss": 0.3539, "step": 18391 }, { "epoch": 1.8698657991053274, "grad_norm": 0.2880619466304779, "learning_rate": 7.882544359313231e-06, "loss": 0.3972, "step": 18392 }, { "epoch": 1.8699674664497763, "grad_norm": 0.27922379970550537, "learning_rate": 7.882254377951454e-06, "loss": 0.3728, "step": 18393 }, { "epoch": 1.8700691337942255, "grad_norm": 0.2916083037853241, "learning_rate": 7.881964382069564e-06, "loss": 0.3494, "step": 18394 }, { "epoch": 1.8701708011386744, "grad_norm": 0.30680954456329346, "learning_rate": 7.881674371669021e-06, "loss": 0.3476, "step": 18395 }, { "epoch": 1.8702724684831233, "grad_norm": 0.305451363325119, "learning_rate": 7.881384346751289e-06, "loss": 0.3446, "step": 18396 }, { "epoch": 1.8703741358275723, "grad_norm": 0.305172860622406, "learning_rate": 7.881094307317826e-06, "loss": 0.3681, "step": 18397 }, { "epoch": 1.8704758031720212, "grad_norm": 0.26250123977661133, "learning_rate": 7.880804253370096e-06, "loss": 0.3562, "step": 18398 }, { "epoch": 1.8705774705164702, "grad_norm": 0.27784860134124756, "learning_rate": 7.880514184909558e-06, "loss": 0.3352, "step": 18399 }, { "epoch": 1.870679137860919, "grad_norm": 0.3082588315010071, "learning_rate": 7.880224101937673e-06, "loss": 0.3447, "step": 18400 }, { "epoch": 1.870780805205368, "grad_norm": 0.2821827232837677, "learning_rate": 7.879934004455903e-06, "loss": 0.3769, "step": 18401 }, { "epoch": 1.870882472549817, "grad_norm": 0.2664308547973633, "learning_rate": 7.879643892465712e-06, "loss": 0.3553, "step": 18402 }, { "epoch": 1.870984139894266, "grad_norm": 0.3050393760204315, "learning_rate": 7.879353765968558e-06, "loss": 0.3981, "step": 18403 }, { "epoch": 1.8710858072387149, "grad_norm": 0.2875288128852844, "learning_rate": 7.879063624965905e-06, "loss": 0.3378, "step": 18404 }, { "epoch": 1.8711874745831638, "grad_norm": 0.2799258828163147, "learning_rate": 7.878773469459213e-06, "loss": 0.3517, "step": 18405 }, { "epoch": 1.8712891419276128, "grad_norm": 0.30181849002838135, "learning_rate": 7.878483299449944e-06, "loss": 0.3626, "step": 18406 }, { "epoch": 1.8713908092720617, "grad_norm": 0.2928398847579956, "learning_rate": 7.878193114939562e-06, "loss": 0.3754, "step": 18407 }, { "epoch": 1.8714924766165106, "grad_norm": 0.2568616569042206, "learning_rate": 7.877902915929525e-06, "loss": 0.3503, "step": 18408 }, { "epoch": 1.8715941439609596, "grad_norm": 0.31721851229667664, "learning_rate": 7.8776127024213e-06, "loss": 0.3683, "step": 18409 }, { "epoch": 1.8716958113054087, "grad_norm": 0.307844340801239, "learning_rate": 7.877322474416342e-06, "loss": 0.367, "step": 18410 }, { "epoch": 1.8717974786498577, "grad_norm": 0.27612417936325073, "learning_rate": 7.877032231916118e-06, "loss": 0.3117, "step": 18411 }, { "epoch": 1.8718991459943066, "grad_norm": 0.29122886061668396, "learning_rate": 7.87674197492209e-06, "loss": 0.3496, "step": 18412 }, { "epoch": 1.8720008133387556, "grad_norm": 0.30451658368110657, "learning_rate": 7.876451703435722e-06, "loss": 0.3449, "step": 18413 }, { "epoch": 1.8721024806832045, "grad_norm": 0.2876449227333069, "learning_rate": 7.876161417458471e-06, "loss": 0.3615, "step": 18414 }, { "epoch": 1.8722041480276537, "grad_norm": 0.27873480319976807, "learning_rate": 7.875871116991802e-06, "loss": 0.3382, "step": 18415 }, { "epoch": 1.8723058153721026, "grad_norm": 0.27992889285087585, "learning_rate": 7.87558080203718e-06, "loss": 0.3632, "step": 18416 }, { "epoch": 1.8724074827165516, "grad_norm": 0.27150195837020874, "learning_rate": 7.875290472596063e-06, "loss": 0.3613, "step": 18417 }, { "epoch": 1.8725091500610005, "grad_norm": 0.26945826411247253, "learning_rate": 7.875000128669915e-06, "loss": 0.3252, "step": 18418 }, { "epoch": 1.8726108174054494, "grad_norm": 0.28187426924705505, "learning_rate": 7.874709770260202e-06, "loss": 0.3751, "step": 18419 }, { "epoch": 1.8727124847498984, "grad_norm": 0.26712849736213684, "learning_rate": 7.874419397368384e-06, "loss": 0.3733, "step": 18420 }, { "epoch": 1.8728141520943473, "grad_norm": 0.2526402175426483, "learning_rate": 7.874129009995923e-06, "loss": 0.3655, "step": 18421 }, { "epoch": 1.8729158194387963, "grad_norm": 0.2684488296508789, "learning_rate": 7.873838608144285e-06, "loss": 0.3366, "step": 18422 }, { "epoch": 1.8730174867832452, "grad_norm": 0.28544363379478455, "learning_rate": 7.87354819181493e-06, "loss": 0.3626, "step": 18423 }, { "epoch": 1.8731191541276941, "grad_norm": 0.29355892539024353, "learning_rate": 7.873257761009323e-06, "loss": 0.3565, "step": 18424 }, { "epoch": 1.873220821472143, "grad_norm": 0.29133176803588867, "learning_rate": 7.872967315728923e-06, "loss": 0.3615, "step": 18425 }, { "epoch": 1.873322488816592, "grad_norm": 0.2795342206954956, "learning_rate": 7.872676855975199e-06, "loss": 0.3945, "step": 18426 }, { "epoch": 1.873424156161041, "grad_norm": 0.28464174270629883, "learning_rate": 7.872386381749612e-06, "loss": 0.3769, "step": 18427 }, { "epoch": 1.87352582350549, "grad_norm": 0.2767748534679413, "learning_rate": 7.872095893053625e-06, "loss": 0.3657, "step": 18428 }, { "epoch": 1.8736274908499388, "grad_norm": 0.2569705545902252, "learning_rate": 7.8718053898887e-06, "loss": 0.3507, "step": 18429 }, { "epoch": 1.873729158194388, "grad_norm": 0.2579343020915985, "learning_rate": 7.8715148722563e-06, "loss": 0.3562, "step": 18430 }, { "epoch": 1.873830825538837, "grad_norm": 0.2728685438632965, "learning_rate": 7.871224340157895e-06, "loss": 0.3536, "step": 18431 }, { "epoch": 1.873932492883286, "grad_norm": 0.27525392174720764, "learning_rate": 7.870933793594941e-06, "loss": 0.3478, "step": 18432 }, { "epoch": 1.8740341602277348, "grad_norm": 0.28352510929107666, "learning_rate": 7.870643232568906e-06, "loss": 0.3833, "step": 18433 }, { "epoch": 1.8741358275721838, "grad_norm": 0.26468998193740845, "learning_rate": 7.870352657081252e-06, "loss": 0.3461, "step": 18434 }, { "epoch": 1.874237494916633, "grad_norm": 0.2622133195400238, "learning_rate": 7.870062067133443e-06, "loss": 0.335, "step": 18435 }, { "epoch": 1.8743391622610819, "grad_norm": 0.2889951467514038, "learning_rate": 7.869771462726944e-06, "loss": 0.3711, "step": 18436 }, { "epoch": 1.8744408296055308, "grad_norm": 0.29776692390441895, "learning_rate": 7.86948084386322e-06, "loss": 0.3516, "step": 18437 }, { "epoch": 1.8745424969499798, "grad_norm": 0.28183847665786743, "learning_rate": 7.86919021054373e-06, "loss": 0.3976, "step": 18438 }, { "epoch": 1.8746441642944287, "grad_norm": 0.27920421957969666, "learning_rate": 7.868899562769945e-06, "loss": 0.3806, "step": 18439 }, { "epoch": 1.8747458316388776, "grad_norm": 0.2679016590118408, "learning_rate": 7.868608900543323e-06, "loss": 0.3479, "step": 18440 }, { "epoch": 1.8748474989833266, "grad_norm": 0.2749505639076233, "learning_rate": 7.868318223865333e-06, "loss": 0.332, "step": 18441 }, { "epoch": 1.8749491663277755, "grad_norm": 0.26519355177879333, "learning_rate": 7.868027532737437e-06, "loss": 0.3958, "step": 18442 }, { "epoch": 1.8750508336722245, "grad_norm": 0.2756473422050476, "learning_rate": 7.867736827161098e-06, "loss": 0.3416, "step": 18443 }, { "epoch": 1.8751525010166734, "grad_norm": 0.2964096963405609, "learning_rate": 7.867446107137785e-06, "loss": 0.3608, "step": 18444 }, { "epoch": 1.8752541683611224, "grad_norm": 0.2813710868358612, "learning_rate": 7.867155372668957e-06, "loss": 0.3535, "step": 18445 }, { "epoch": 1.8753558357055713, "grad_norm": 0.2597962021827698, "learning_rate": 7.866864623756085e-06, "loss": 0.3385, "step": 18446 }, { "epoch": 1.8754575030500202, "grad_norm": 0.29452037811279297, "learning_rate": 7.866573860400628e-06, "loss": 0.3724, "step": 18447 }, { "epoch": 1.8755591703944692, "grad_norm": 0.3025066554546356, "learning_rate": 7.866283082604054e-06, "loss": 0.3568, "step": 18448 }, { "epoch": 1.8756608377389181, "grad_norm": 0.2696537971496582, "learning_rate": 7.865992290367826e-06, "loss": 0.3874, "step": 18449 }, { "epoch": 1.875762505083367, "grad_norm": 0.2721349000930786, "learning_rate": 7.86570148369341e-06, "loss": 0.3304, "step": 18450 }, { "epoch": 1.8758641724278162, "grad_norm": 0.29482656717300415, "learning_rate": 7.865410662582272e-06, "loss": 0.3678, "step": 18451 }, { "epoch": 1.8759658397722652, "grad_norm": 0.2841028869152069, "learning_rate": 7.865119827035876e-06, "loss": 0.4117, "step": 18452 }, { "epoch": 1.876067507116714, "grad_norm": 0.27726927399635315, "learning_rate": 7.864828977055686e-06, "loss": 0.3905, "step": 18453 }, { "epoch": 1.876169174461163, "grad_norm": 0.2705690264701843, "learning_rate": 7.864538112643169e-06, "loss": 0.3649, "step": 18454 }, { "epoch": 1.876270841805612, "grad_norm": 0.28642377257347107, "learning_rate": 7.86424723379979e-06, "loss": 0.3403, "step": 18455 }, { "epoch": 1.8763725091500612, "grad_norm": 0.27391210198402405, "learning_rate": 7.863956340527012e-06, "loss": 0.3302, "step": 18456 }, { "epoch": 1.87647417649451, "grad_norm": 0.26358312368392944, "learning_rate": 7.863665432826306e-06, "loss": 0.398, "step": 18457 }, { "epoch": 1.876575843838959, "grad_norm": 0.27148476243019104, "learning_rate": 7.863374510699133e-06, "loss": 0.3426, "step": 18458 }, { "epoch": 1.876677511183408, "grad_norm": 0.2969543933868408, "learning_rate": 7.863083574146958e-06, "loss": 0.3828, "step": 18459 }, { "epoch": 1.876779178527857, "grad_norm": 0.2674320340156555, "learning_rate": 7.862792623171248e-06, "loss": 0.3614, "step": 18460 }, { "epoch": 1.8768808458723059, "grad_norm": 0.25452861189842224, "learning_rate": 7.862501657773472e-06, "loss": 0.3754, "step": 18461 }, { "epoch": 1.8769825132167548, "grad_norm": 0.30209803581237793, "learning_rate": 7.862210677955091e-06, "loss": 0.3951, "step": 18462 }, { "epoch": 1.8770841805612037, "grad_norm": 0.25735315680503845, "learning_rate": 7.861919683717574e-06, "loss": 0.3304, "step": 18463 }, { "epoch": 1.8771858479056527, "grad_norm": 0.2793252170085907, "learning_rate": 7.861628675062385e-06, "loss": 0.3851, "step": 18464 }, { "epoch": 1.8772875152501016, "grad_norm": 0.262513667345047, "learning_rate": 7.86133765199099e-06, "loss": 0.3677, "step": 18465 }, { "epoch": 1.8773891825945506, "grad_norm": 0.28246521949768066, "learning_rate": 7.861046614504855e-06, "loss": 0.3893, "step": 18466 }, { "epoch": 1.8774908499389995, "grad_norm": 0.2777552306652069, "learning_rate": 7.86075556260545e-06, "loss": 0.3866, "step": 18467 }, { "epoch": 1.8775925172834484, "grad_norm": 0.28325703740119934, "learning_rate": 7.860464496294236e-06, "loss": 0.3756, "step": 18468 }, { "epoch": 1.8776941846278974, "grad_norm": 0.29862290620803833, "learning_rate": 7.860173415572683e-06, "loss": 0.3717, "step": 18469 }, { "epoch": 1.8777958519723463, "grad_norm": 0.27145615220069885, "learning_rate": 7.859882320442256e-06, "loss": 0.3416, "step": 18470 }, { "epoch": 1.8778975193167955, "grad_norm": 0.2806597054004669, "learning_rate": 7.85959121090442e-06, "loss": 0.3654, "step": 18471 }, { "epoch": 1.8779991866612444, "grad_norm": 0.26536825299263, "learning_rate": 7.859300086960644e-06, "loss": 0.312, "step": 18472 }, { "epoch": 1.8781008540056934, "grad_norm": 0.281103253364563, "learning_rate": 7.859008948612393e-06, "loss": 0.3615, "step": 18473 }, { "epoch": 1.8782025213501423, "grad_norm": 0.27746444940567017, "learning_rate": 7.858717795861135e-06, "loss": 0.3806, "step": 18474 }, { "epoch": 1.8783041886945913, "grad_norm": 0.2716307044029236, "learning_rate": 7.858426628708337e-06, "loss": 0.3582, "step": 18475 }, { "epoch": 1.8784058560390404, "grad_norm": 0.3157985508441925, "learning_rate": 7.858135447155463e-06, "loss": 0.3503, "step": 18476 }, { "epoch": 1.8785075233834894, "grad_norm": 0.281455397605896, "learning_rate": 7.857844251203983e-06, "loss": 0.3547, "step": 18477 }, { "epoch": 1.8786091907279383, "grad_norm": 0.2963292896747589, "learning_rate": 7.857553040855363e-06, "loss": 0.3482, "step": 18478 }, { "epoch": 1.8787108580723872, "grad_norm": 0.26894205808639526, "learning_rate": 7.857261816111068e-06, "loss": 0.3441, "step": 18479 }, { "epoch": 1.8788125254168362, "grad_norm": 0.27398914098739624, "learning_rate": 7.85697057697257e-06, "loss": 0.3254, "step": 18480 }, { "epoch": 1.8789141927612851, "grad_norm": 0.2995492219924927, "learning_rate": 7.85667932344133e-06, "loss": 0.3996, "step": 18481 }, { "epoch": 1.879015860105734, "grad_norm": 0.27650022506713867, "learning_rate": 7.85638805551882e-06, "loss": 0.3413, "step": 18482 }, { "epoch": 1.879117527450183, "grad_norm": 0.2529209554195404, "learning_rate": 7.856096773206506e-06, "loss": 0.3569, "step": 18483 }, { "epoch": 1.879219194794632, "grad_norm": 0.2679433226585388, "learning_rate": 7.855805476505854e-06, "loss": 0.3324, "step": 18484 }, { "epoch": 1.879320862139081, "grad_norm": 0.25505709648132324, "learning_rate": 7.855514165418334e-06, "loss": 0.3633, "step": 18485 }, { "epoch": 1.8794225294835298, "grad_norm": 0.29553258419036865, "learning_rate": 7.855222839945412e-06, "loss": 0.3673, "step": 18486 }, { "epoch": 1.8795241968279788, "grad_norm": 0.29844221472740173, "learning_rate": 7.854931500088553e-06, "loss": 0.3643, "step": 18487 }, { "epoch": 1.8796258641724277, "grad_norm": 0.28120157122612, "learning_rate": 7.854640145849231e-06, "loss": 0.3605, "step": 18488 }, { "epoch": 1.8797275315168767, "grad_norm": 0.30335158109664917, "learning_rate": 7.854348777228911e-06, "loss": 0.3709, "step": 18489 }, { "epoch": 1.8798291988613256, "grad_norm": 0.2816961407661438, "learning_rate": 7.854057394229058e-06, "loss": 0.3628, "step": 18490 }, { "epoch": 1.8799308662057745, "grad_norm": 0.29364386200904846, "learning_rate": 7.853765996851141e-06, "loss": 0.3449, "step": 18491 }, { "epoch": 1.8800325335502237, "grad_norm": 0.3332781195640564, "learning_rate": 7.853474585096632e-06, "loss": 0.3696, "step": 18492 }, { "epoch": 1.8801342008946726, "grad_norm": 0.27876266837120056, "learning_rate": 7.853183158966995e-06, "loss": 0.3516, "step": 18493 }, { "epoch": 1.8802358682391216, "grad_norm": 0.2491769790649414, "learning_rate": 7.8528917184637e-06, "loss": 0.3219, "step": 18494 }, { "epoch": 1.8803375355835705, "grad_norm": 0.26963144540786743, "learning_rate": 7.852600263588213e-06, "loss": 0.3552, "step": 18495 }, { "epoch": 1.8804392029280195, "grad_norm": 0.26928478479385376, "learning_rate": 7.852308794342004e-06, "loss": 0.3218, "step": 18496 }, { "epoch": 1.8805408702724686, "grad_norm": 0.291838675737381, "learning_rate": 7.85201731072654e-06, "loss": 0.3317, "step": 18497 }, { "epoch": 1.8806425376169176, "grad_norm": 0.2737736403942108, "learning_rate": 7.851725812743293e-06, "loss": 0.3346, "step": 18498 }, { "epoch": 1.8807442049613665, "grad_norm": 0.31021803617477417, "learning_rate": 7.851434300393728e-06, "loss": 0.3678, "step": 18499 }, { "epoch": 1.8808458723058155, "grad_norm": 0.267719566822052, "learning_rate": 7.851142773679315e-06, "loss": 0.3618, "step": 18500 }, { "epoch": 1.8809475396502644, "grad_norm": 0.2636566162109375, "learning_rate": 7.85085123260152e-06, "loss": 0.3395, "step": 18501 }, { "epoch": 1.8810492069947133, "grad_norm": 0.3001590967178345, "learning_rate": 7.850559677161815e-06, "loss": 0.3607, "step": 18502 }, { "epoch": 1.8811508743391623, "grad_norm": 0.28351759910583496, "learning_rate": 7.85026810736167e-06, "loss": 0.3519, "step": 18503 }, { "epoch": 1.8812525416836112, "grad_norm": 0.25257807970046997, "learning_rate": 7.84997652320255e-06, "loss": 0.375, "step": 18504 }, { "epoch": 1.8813542090280602, "grad_norm": 0.2667992413043976, "learning_rate": 7.849684924685926e-06, "loss": 0.3643, "step": 18505 }, { "epoch": 1.881455876372509, "grad_norm": 0.2767763137817383, "learning_rate": 7.849393311813264e-06, "loss": 0.3687, "step": 18506 }, { "epoch": 1.881557543716958, "grad_norm": 0.2690214514732361, "learning_rate": 7.84910168458604e-06, "loss": 0.3921, "step": 18507 }, { "epoch": 1.881659211061407, "grad_norm": 0.2825146019458771, "learning_rate": 7.848810043005717e-06, "loss": 0.3585, "step": 18508 }, { "epoch": 1.881760878405856, "grad_norm": 0.27481451630592346, "learning_rate": 7.848518387073764e-06, "loss": 0.3175, "step": 18509 }, { "epoch": 1.8818625457503049, "grad_norm": 0.2960725724697113, "learning_rate": 7.848226716791654e-06, "loss": 0.3665, "step": 18510 }, { "epoch": 1.8819642130947538, "grad_norm": 0.2855319678783417, "learning_rate": 7.847935032160854e-06, "loss": 0.3633, "step": 18511 }, { "epoch": 1.882065880439203, "grad_norm": 0.29714998602867126, "learning_rate": 7.847643333182834e-06, "loss": 0.3742, "step": 18512 }, { "epoch": 1.882167547783652, "grad_norm": 0.29358336329460144, "learning_rate": 7.847351619859065e-06, "loss": 0.3585, "step": 18513 }, { "epoch": 1.8822692151281009, "grad_norm": 0.24239762127399445, "learning_rate": 7.847059892191014e-06, "loss": 0.3201, "step": 18514 }, { "epoch": 1.8823708824725498, "grad_norm": 0.2965962588787079, "learning_rate": 7.846768150180153e-06, "loss": 0.34, "step": 18515 }, { "epoch": 1.8824725498169987, "grad_norm": 0.2989634573459625, "learning_rate": 7.846476393827948e-06, "loss": 0.3476, "step": 18516 }, { "epoch": 1.882574217161448, "grad_norm": 0.2885744273662567, "learning_rate": 7.846184623135874e-06, "loss": 0.3218, "step": 18517 }, { "epoch": 1.8826758845058968, "grad_norm": 0.2842082381248474, "learning_rate": 7.845892838105398e-06, "loss": 0.34, "step": 18518 }, { "epoch": 1.8827775518503458, "grad_norm": 0.2990163266658783, "learning_rate": 7.845601038737989e-06, "loss": 0.3706, "step": 18519 }, { "epoch": 1.8828792191947947, "grad_norm": 0.29031842947006226, "learning_rate": 7.845309225035118e-06, "loss": 0.3669, "step": 18520 }, { "epoch": 1.8829808865392437, "grad_norm": 0.27013102173805237, "learning_rate": 7.845017396998256e-06, "loss": 0.3348, "step": 18521 }, { "epoch": 1.8830825538836926, "grad_norm": 0.2721259891986847, "learning_rate": 7.844725554628873e-06, "loss": 0.3231, "step": 18522 }, { "epoch": 1.8831842212281416, "grad_norm": 0.2731782793998718, "learning_rate": 7.844433697928437e-06, "loss": 0.3418, "step": 18523 }, { "epoch": 1.8832858885725905, "grad_norm": 0.303684800863266, "learning_rate": 7.844141826898422e-06, "loss": 0.3579, "step": 18524 }, { "epoch": 1.8833875559170394, "grad_norm": 0.27162301540374756, "learning_rate": 7.843849941540294e-06, "loss": 0.3752, "step": 18525 }, { "epoch": 1.8834892232614884, "grad_norm": 0.27677562832832336, "learning_rate": 7.843558041855527e-06, "loss": 0.3692, "step": 18526 }, { "epoch": 1.8835908906059373, "grad_norm": 0.2746870517730713, "learning_rate": 7.84326612784559e-06, "loss": 0.3657, "step": 18527 }, { "epoch": 1.8836925579503863, "grad_norm": 0.29538100957870483, "learning_rate": 7.842974199511955e-06, "loss": 0.3119, "step": 18528 }, { "epoch": 1.8837942252948352, "grad_norm": 0.26186543703079224, "learning_rate": 7.842682256856091e-06, "loss": 0.372, "step": 18529 }, { "epoch": 1.8838958926392841, "grad_norm": 0.2711031436920166, "learning_rate": 7.84239029987947e-06, "loss": 0.333, "step": 18530 }, { "epoch": 1.883997559983733, "grad_norm": 0.28405457735061646, "learning_rate": 7.84209832858356e-06, "loss": 0.3756, "step": 18531 }, { "epoch": 1.884099227328182, "grad_norm": 0.2774793803691864, "learning_rate": 7.841806342969835e-06, "loss": 0.3471, "step": 18532 }, { "epoch": 1.8842008946726312, "grad_norm": 0.26225101947784424, "learning_rate": 7.841514343039766e-06, "loss": 0.4195, "step": 18533 }, { "epoch": 1.8843025620170801, "grad_norm": 0.2778155505657196, "learning_rate": 7.841222328794821e-06, "loss": 0.3991, "step": 18534 }, { "epoch": 1.884404229361529, "grad_norm": 0.24752749502658844, "learning_rate": 7.840930300236476e-06, "loss": 0.3816, "step": 18535 }, { "epoch": 1.884505896705978, "grad_norm": 0.29440152645111084, "learning_rate": 7.840638257366197e-06, "loss": 0.3766, "step": 18536 }, { "epoch": 1.884607564050427, "grad_norm": 0.24860094487667084, "learning_rate": 7.840346200185459e-06, "loss": 0.3698, "step": 18537 }, { "epoch": 1.8847092313948761, "grad_norm": 0.2770242393016815, "learning_rate": 7.840054128695732e-06, "loss": 0.3424, "step": 18538 }, { "epoch": 1.884810898739325, "grad_norm": 0.2911463975906372, "learning_rate": 7.839762042898485e-06, "loss": 0.3646, "step": 18539 }, { "epoch": 1.884912566083774, "grad_norm": 0.24948422610759735, "learning_rate": 7.839469942795193e-06, "loss": 0.3396, "step": 18540 }, { "epoch": 1.885014233428223, "grad_norm": 0.2743324935436249, "learning_rate": 7.839177828387326e-06, "loss": 0.3642, "step": 18541 }, { "epoch": 1.8851159007726719, "grad_norm": 0.2503974735736847, "learning_rate": 7.838885699676354e-06, "loss": 0.3433, "step": 18542 }, { "epoch": 1.8852175681171208, "grad_norm": 0.25517037510871887, "learning_rate": 7.838593556663753e-06, "loss": 0.3733, "step": 18543 }, { "epoch": 1.8853192354615698, "grad_norm": 0.2794751226902008, "learning_rate": 7.838301399350991e-06, "loss": 0.3973, "step": 18544 }, { "epoch": 1.8854209028060187, "grad_norm": 0.288549542427063, "learning_rate": 7.838009227739543e-06, "loss": 0.3657, "step": 18545 }, { "epoch": 1.8855225701504676, "grad_norm": 0.2588982880115509, "learning_rate": 7.837717041830877e-06, "loss": 0.3667, "step": 18546 }, { "epoch": 1.8856242374949166, "grad_norm": 0.30957868695259094, "learning_rate": 7.837424841626466e-06, "loss": 0.3718, "step": 18547 }, { "epoch": 1.8857259048393655, "grad_norm": 0.28615498542785645, "learning_rate": 7.837132627127784e-06, "loss": 0.344, "step": 18548 }, { "epoch": 1.8858275721838145, "grad_norm": 0.2511961758136749, "learning_rate": 7.836840398336302e-06, "loss": 0.3434, "step": 18549 }, { "epoch": 1.8859292395282634, "grad_norm": 0.27418211102485657, "learning_rate": 7.836548155253494e-06, "loss": 0.3676, "step": 18550 }, { "epoch": 1.8860309068727124, "grad_norm": 0.3058408796787262, "learning_rate": 7.836255897880828e-06, "loss": 0.3198, "step": 18551 }, { "epoch": 1.8861325742171613, "grad_norm": 0.2646738588809967, "learning_rate": 7.83596362621978e-06, "loss": 0.3467, "step": 18552 }, { "epoch": 1.8862342415616105, "grad_norm": 0.2757192850112915, "learning_rate": 7.835671340271821e-06, "loss": 0.3789, "step": 18553 }, { "epoch": 1.8863359089060594, "grad_norm": 0.23380893468856812, "learning_rate": 7.835379040038425e-06, "loss": 0.3537, "step": 18554 }, { "epoch": 1.8864375762505083, "grad_norm": 0.2655125856399536, "learning_rate": 7.83508672552106e-06, "loss": 0.3603, "step": 18555 }, { "epoch": 1.8865392435949573, "grad_norm": 0.26722124218940735, "learning_rate": 7.834794396721203e-06, "loss": 0.3961, "step": 18556 }, { "epoch": 1.8866409109394062, "grad_norm": 0.2693021595478058, "learning_rate": 7.834502053640328e-06, "loss": 0.3667, "step": 18557 }, { "epoch": 1.8867425782838554, "grad_norm": 0.27995726466178894, "learning_rate": 7.834209696279902e-06, "loss": 0.3751, "step": 18558 }, { "epoch": 1.8868442456283043, "grad_norm": 0.2771218419075012, "learning_rate": 7.833917324641402e-06, "loss": 0.3585, "step": 18559 }, { "epoch": 1.8869459129727533, "grad_norm": 0.27078336477279663, "learning_rate": 7.833624938726301e-06, "loss": 0.3556, "step": 18560 }, { "epoch": 1.8870475803172022, "grad_norm": 0.25344058871269226, "learning_rate": 7.83333253853607e-06, "loss": 0.3718, "step": 18561 }, { "epoch": 1.8871492476616512, "grad_norm": 0.25275319814682007, "learning_rate": 7.833040124072182e-06, "loss": 0.3631, "step": 18562 }, { "epoch": 1.8872509150061, "grad_norm": 0.2596389949321747, "learning_rate": 7.832747695336113e-06, "loss": 0.3465, "step": 18563 }, { "epoch": 1.887352582350549, "grad_norm": 0.2662656009197235, "learning_rate": 7.832455252329335e-06, "loss": 0.3717, "step": 18564 }, { "epoch": 1.887454249694998, "grad_norm": 0.27961623668670654, "learning_rate": 7.832162795053319e-06, "loss": 0.3776, "step": 18565 }, { "epoch": 1.887555917039447, "grad_norm": 0.2902851402759552, "learning_rate": 7.83187032350954e-06, "loss": 0.3503, "step": 18566 }, { "epoch": 1.8876575843838959, "grad_norm": 0.26580893993377686, "learning_rate": 7.83157783769947e-06, "loss": 0.3498, "step": 18567 }, { "epoch": 1.8877592517283448, "grad_norm": 0.27166181802749634, "learning_rate": 7.831285337624585e-06, "loss": 0.3563, "step": 18568 }, { "epoch": 1.8878609190727937, "grad_norm": 0.2864287793636322, "learning_rate": 7.830992823286356e-06, "loss": 0.3644, "step": 18569 }, { "epoch": 1.8879625864172427, "grad_norm": 0.2703264653682709, "learning_rate": 7.830700294686262e-06, "loss": 0.3847, "step": 18570 }, { "epoch": 1.8880642537616916, "grad_norm": 0.2670212984085083, "learning_rate": 7.830407751825768e-06, "loss": 0.3731, "step": 18571 }, { "epoch": 1.8881659211061406, "grad_norm": 0.28034508228302, "learning_rate": 7.830115194706353e-06, "loss": 0.3735, "step": 18572 }, { "epoch": 1.8882675884505895, "grad_norm": 0.29095152020454407, "learning_rate": 7.829822623329492e-06, "loss": 0.3437, "step": 18573 }, { "epoch": 1.8883692557950387, "grad_norm": 0.2595568597316742, "learning_rate": 7.829530037696658e-06, "loss": 0.358, "step": 18574 }, { "epoch": 1.8884709231394876, "grad_norm": 0.3007574677467346, "learning_rate": 7.829237437809322e-06, "loss": 0.3606, "step": 18575 }, { "epoch": 1.8885725904839366, "grad_norm": 0.27462711930274963, "learning_rate": 7.82894482366896e-06, "loss": 0.3545, "step": 18576 }, { "epoch": 1.8886742578283855, "grad_norm": 0.28567269444465637, "learning_rate": 7.828652195277046e-06, "loss": 0.3688, "step": 18577 }, { "epoch": 1.8887759251728344, "grad_norm": 0.2867511510848999, "learning_rate": 7.828359552635055e-06, "loss": 0.3682, "step": 18578 }, { "epoch": 1.8888775925172836, "grad_norm": 0.32035282254219055, "learning_rate": 7.828066895744461e-06, "loss": 0.3771, "step": 18579 }, { "epoch": 1.8889792598617325, "grad_norm": 0.2983818054199219, "learning_rate": 7.82777422460674e-06, "loss": 0.3555, "step": 18580 }, { "epoch": 1.8890809272061815, "grad_norm": 0.2814804017543793, "learning_rate": 7.82748153922336e-06, "loss": 0.3669, "step": 18581 }, { "epoch": 1.8891825945506304, "grad_norm": 0.32222890853881836, "learning_rate": 7.827188839595802e-06, "loss": 0.3575, "step": 18582 }, { "epoch": 1.8892842618950794, "grad_norm": 0.2893126606941223, "learning_rate": 7.826896125725541e-06, "loss": 0.403, "step": 18583 }, { "epoch": 1.8893859292395283, "grad_norm": 0.2812158167362213, "learning_rate": 7.826603397614046e-06, "loss": 0.3478, "step": 18584 }, { "epoch": 1.8894875965839772, "grad_norm": 0.2840556800365448, "learning_rate": 7.826310655262795e-06, "loss": 0.3541, "step": 18585 }, { "epoch": 1.8895892639284262, "grad_norm": 0.2885880172252655, "learning_rate": 7.826017898673264e-06, "loss": 0.3667, "step": 18586 }, { "epoch": 1.8896909312728751, "grad_norm": 0.2596282958984375, "learning_rate": 7.825725127846923e-06, "loss": 0.3503, "step": 18587 }, { "epoch": 1.889792598617324, "grad_norm": 0.26661649346351624, "learning_rate": 7.825432342785256e-06, "loss": 0.3528, "step": 18588 }, { "epoch": 1.889894265961773, "grad_norm": 0.2612856924533844, "learning_rate": 7.825139543489728e-06, "loss": 0.3466, "step": 18589 }, { "epoch": 1.889995933306222, "grad_norm": 0.28544268012046814, "learning_rate": 7.824846729961819e-06, "loss": 0.3717, "step": 18590 }, { "epoch": 1.890097600650671, "grad_norm": 0.2524190843105316, "learning_rate": 7.824553902203004e-06, "loss": 0.3478, "step": 18591 }, { "epoch": 1.8901992679951198, "grad_norm": 0.3029620945453644, "learning_rate": 7.824261060214758e-06, "loss": 0.3741, "step": 18592 }, { "epoch": 1.8903009353395688, "grad_norm": 0.2850031852722168, "learning_rate": 7.823968203998555e-06, "loss": 0.3491, "step": 18593 }, { "epoch": 1.890402602684018, "grad_norm": 0.2915181815624237, "learning_rate": 7.823675333555872e-06, "loss": 0.3748, "step": 18594 }, { "epoch": 1.8905042700284669, "grad_norm": 0.27571535110473633, "learning_rate": 7.823382448888185e-06, "loss": 0.3837, "step": 18595 }, { "epoch": 1.8906059373729158, "grad_norm": 0.2536724805831909, "learning_rate": 7.823089549996966e-06, "loss": 0.3653, "step": 18596 }, { "epoch": 1.8907076047173648, "grad_norm": 0.2759750783443451, "learning_rate": 7.822796636883693e-06, "loss": 0.368, "step": 18597 }, { "epoch": 1.8908092720618137, "grad_norm": 0.2870499789714813, "learning_rate": 7.822503709549842e-06, "loss": 0.3598, "step": 18598 }, { "epoch": 1.8909109394062629, "grad_norm": 0.289101779460907, "learning_rate": 7.822210767996888e-06, "loss": 0.3386, "step": 18599 }, { "epoch": 1.8910126067507118, "grad_norm": 0.28588688373565674, "learning_rate": 7.821917812226305e-06, "loss": 0.365, "step": 18600 }, { "epoch": 1.8911142740951608, "grad_norm": 0.290161669254303, "learning_rate": 7.821624842239573e-06, "loss": 0.3628, "step": 18601 }, { "epoch": 1.8912159414396097, "grad_norm": 0.2827723026275635, "learning_rate": 7.821331858038164e-06, "loss": 0.3542, "step": 18602 }, { "epoch": 1.8913176087840586, "grad_norm": 0.2865931987762451, "learning_rate": 7.821038859623557e-06, "loss": 0.3713, "step": 18603 }, { "epoch": 1.8914192761285076, "grad_norm": 0.2652360200881958, "learning_rate": 7.820745846997227e-06, "loss": 0.3569, "step": 18604 }, { "epoch": 1.8915209434729565, "grad_norm": 0.29300084710121155, "learning_rate": 7.820452820160649e-06, "loss": 0.3898, "step": 18605 }, { "epoch": 1.8916226108174055, "grad_norm": 0.26052355766296387, "learning_rate": 7.820159779115299e-06, "loss": 0.3321, "step": 18606 }, { "epoch": 1.8917242781618544, "grad_norm": 0.2726248800754547, "learning_rate": 7.819866723862654e-06, "loss": 0.3334, "step": 18607 }, { "epoch": 1.8918259455063033, "grad_norm": 0.25625306367874146, "learning_rate": 7.81957365440419e-06, "loss": 0.3474, "step": 18608 }, { "epoch": 1.8919276128507523, "grad_norm": 0.32223570346832275, "learning_rate": 7.819280570741388e-06, "loss": 0.3979, "step": 18609 }, { "epoch": 1.8920292801952012, "grad_norm": 0.2560012936592102, "learning_rate": 7.818987472875716e-06, "loss": 0.3628, "step": 18610 }, { "epoch": 1.8921309475396502, "grad_norm": 0.27664443850517273, "learning_rate": 7.818694360808658e-06, "loss": 0.3437, "step": 18611 }, { "epoch": 1.892232614884099, "grad_norm": 0.28800106048583984, "learning_rate": 7.818401234541686e-06, "loss": 0.3692, "step": 18612 }, { "epoch": 1.892334282228548, "grad_norm": 0.2556350529193878, "learning_rate": 7.81810809407628e-06, "loss": 0.3634, "step": 18613 }, { "epoch": 1.892435949572997, "grad_norm": 0.2742467522621155, "learning_rate": 7.817814939413913e-06, "loss": 0.3647, "step": 18614 }, { "epoch": 1.8925376169174462, "grad_norm": 0.2998797297477722, "learning_rate": 7.817521770556065e-06, "loss": 0.3534, "step": 18615 }, { "epoch": 1.892639284261895, "grad_norm": 0.275303453207016, "learning_rate": 7.81722858750421e-06, "loss": 0.3554, "step": 18616 }, { "epoch": 1.892740951606344, "grad_norm": 0.26684480905532837, "learning_rate": 7.81693539025983e-06, "loss": 0.345, "step": 18617 }, { "epoch": 1.892842618950793, "grad_norm": 0.2652316391468048, "learning_rate": 7.816642178824397e-06, "loss": 0.3531, "step": 18618 }, { "epoch": 1.892944286295242, "grad_norm": 0.2994735538959503, "learning_rate": 7.81634895319939e-06, "loss": 0.3492, "step": 18619 }, { "epoch": 1.893045953639691, "grad_norm": 0.3003387749195099, "learning_rate": 7.816055713386286e-06, "loss": 0.3825, "step": 18620 }, { "epoch": 1.89314762098414, "grad_norm": 0.27313315868377686, "learning_rate": 7.815762459386564e-06, "loss": 0.3758, "step": 18621 }, { "epoch": 1.893249288328589, "grad_norm": 0.27399829030036926, "learning_rate": 7.8154691912017e-06, "loss": 0.3516, "step": 18622 }, { "epoch": 1.893350955673038, "grad_norm": 0.2933048903942108, "learning_rate": 7.81517590883317e-06, "loss": 0.3503, "step": 18623 }, { "epoch": 1.8934526230174868, "grad_norm": 0.2786032557487488, "learning_rate": 7.814882612282453e-06, "loss": 0.4175, "step": 18624 }, { "epoch": 1.8935542903619358, "grad_norm": 0.28151023387908936, "learning_rate": 7.814589301551027e-06, "loss": 0.3502, "step": 18625 }, { "epoch": 1.8936559577063847, "grad_norm": 0.2960216701030731, "learning_rate": 7.814295976640368e-06, "loss": 0.3905, "step": 18626 }, { "epoch": 1.8937576250508337, "grad_norm": 0.2825864255428314, "learning_rate": 7.814002637551954e-06, "loss": 0.3523, "step": 18627 }, { "epoch": 1.8938592923952826, "grad_norm": 0.2673647999763489, "learning_rate": 7.813709284287265e-06, "loss": 0.3594, "step": 18628 }, { "epoch": 1.8939609597397316, "grad_norm": 0.2912595570087433, "learning_rate": 7.813415916847778e-06, "loss": 0.3599, "step": 18629 }, { "epoch": 1.8940626270841805, "grad_norm": 0.2961191236972809, "learning_rate": 7.813122535234968e-06, "loss": 0.3666, "step": 18630 }, { "epoch": 1.8941642944286294, "grad_norm": 0.2812081277370453, "learning_rate": 7.812829139450316e-06, "loss": 0.3721, "step": 18631 }, { "epoch": 1.8942659617730784, "grad_norm": 0.2681153118610382, "learning_rate": 7.812535729495299e-06, "loss": 0.3657, "step": 18632 }, { "epoch": 1.8943676291175273, "grad_norm": 0.29502028226852417, "learning_rate": 7.812242305371396e-06, "loss": 0.3565, "step": 18633 }, { "epoch": 1.8944692964619763, "grad_norm": 0.2626835107803345, "learning_rate": 7.811948867080083e-06, "loss": 0.3766, "step": 18634 }, { "epoch": 1.8945709638064254, "grad_norm": 0.2775039076805115, "learning_rate": 7.811655414622841e-06, "loss": 0.3891, "step": 18635 }, { "epoch": 1.8946726311508744, "grad_norm": 0.2972318232059479, "learning_rate": 7.811361948001148e-06, "loss": 0.3629, "step": 18636 }, { "epoch": 1.8947742984953233, "grad_norm": 0.2754078805446625, "learning_rate": 7.81106846721648e-06, "loss": 0.3427, "step": 18637 }, { "epoch": 1.8948759658397722, "grad_norm": 0.276483952999115, "learning_rate": 7.810774972270318e-06, "loss": 0.3926, "step": 18638 }, { "epoch": 1.8949776331842212, "grad_norm": 0.26764219999313354, "learning_rate": 7.81048146316414e-06, "loss": 0.3982, "step": 18639 }, { "epoch": 1.8950793005286704, "grad_norm": 0.28039705753326416, "learning_rate": 7.810187939899423e-06, "loss": 0.3454, "step": 18640 }, { "epoch": 1.8951809678731193, "grad_norm": 0.2643284499645233, "learning_rate": 7.809894402477649e-06, "loss": 0.3647, "step": 18641 }, { "epoch": 1.8952826352175682, "grad_norm": 0.2929536700248718, "learning_rate": 7.809600850900294e-06, "loss": 0.3471, "step": 18642 }, { "epoch": 1.8953843025620172, "grad_norm": 0.2819604277610779, "learning_rate": 7.809307285168837e-06, "loss": 0.3896, "step": 18643 }, { "epoch": 1.8954859699064661, "grad_norm": 0.2853623032569885, "learning_rate": 7.809013705284757e-06, "loss": 0.3459, "step": 18644 }, { "epoch": 1.895587637250915, "grad_norm": 0.26667237281799316, "learning_rate": 7.808720111249534e-06, "loss": 0.3616, "step": 18645 }, { "epoch": 1.895689304595364, "grad_norm": 0.28814437985420227, "learning_rate": 7.808426503064647e-06, "loss": 0.3534, "step": 18646 }, { "epoch": 1.895790971939813, "grad_norm": 0.27950319647789, "learning_rate": 7.808132880731574e-06, "loss": 0.3541, "step": 18647 }, { "epoch": 1.8958926392842619, "grad_norm": 0.30212581157684326, "learning_rate": 7.807839244251793e-06, "loss": 0.3685, "step": 18648 }, { "epoch": 1.8959943066287108, "grad_norm": 0.29836905002593994, "learning_rate": 7.807545593626789e-06, "loss": 0.3601, "step": 18649 }, { "epoch": 1.8960959739731598, "grad_norm": 0.27246204018592834, "learning_rate": 7.807251928858036e-06, "loss": 0.3506, "step": 18650 }, { "epoch": 1.8961976413176087, "grad_norm": 0.28294649720191956, "learning_rate": 7.806958249947014e-06, "loss": 0.3701, "step": 18651 }, { "epoch": 1.8962993086620576, "grad_norm": 0.2962004840373993, "learning_rate": 7.806664556895204e-06, "loss": 0.3325, "step": 18652 }, { "epoch": 1.8964009760065066, "grad_norm": 0.27265268564224243, "learning_rate": 7.806370849704084e-06, "loss": 0.347, "step": 18653 }, { "epoch": 1.8965026433509555, "grad_norm": 0.26473742723464966, "learning_rate": 7.806077128375136e-06, "loss": 0.3411, "step": 18654 }, { "epoch": 1.8966043106954045, "grad_norm": 0.2921057641506195, "learning_rate": 7.805783392909839e-06, "loss": 0.373, "step": 18655 }, { "epoch": 1.8967059780398536, "grad_norm": 0.26421093940734863, "learning_rate": 7.80548964330967e-06, "loss": 0.355, "step": 18656 }, { "epoch": 1.8968076453843026, "grad_norm": 0.2703958749771118, "learning_rate": 7.805195879576111e-06, "loss": 0.3596, "step": 18657 }, { "epoch": 1.8969093127287515, "grad_norm": 0.29058533906936646, "learning_rate": 7.804902101710643e-06, "loss": 0.3806, "step": 18658 }, { "epoch": 1.8970109800732005, "grad_norm": 0.2664410471916199, "learning_rate": 7.804608309714744e-06, "loss": 0.3932, "step": 18659 }, { "epoch": 1.8971126474176494, "grad_norm": 0.28094246983528137, "learning_rate": 7.804314503589896e-06, "loss": 0.3529, "step": 18660 }, { "epoch": 1.8972143147620986, "grad_norm": 0.283633291721344, "learning_rate": 7.804020683337577e-06, "loss": 0.3785, "step": 18661 }, { "epoch": 1.8973159821065475, "grad_norm": 0.28568631410598755, "learning_rate": 7.803726848959269e-06, "loss": 0.3624, "step": 18662 }, { "epoch": 1.8974176494509964, "grad_norm": 0.26616019010543823, "learning_rate": 7.803433000456447e-06, "loss": 0.3841, "step": 18663 }, { "epoch": 1.8975193167954454, "grad_norm": 0.2898610532283783, "learning_rate": 7.8031391378306e-06, "loss": 0.3931, "step": 18664 }, { "epoch": 1.8976209841398943, "grad_norm": 0.2735244929790497, "learning_rate": 7.802845261083202e-06, "loss": 0.3481, "step": 18665 }, { "epoch": 1.8977226514843433, "grad_norm": 0.2631167769432068, "learning_rate": 7.80255137021574e-06, "loss": 0.3341, "step": 18666 }, { "epoch": 1.8978243188287922, "grad_norm": 0.26847612857818604, "learning_rate": 7.802257465229686e-06, "loss": 0.3571, "step": 18667 }, { "epoch": 1.8979259861732412, "grad_norm": 0.26149725914001465, "learning_rate": 7.801963546126525e-06, "loss": 0.3309, "step": 18668 }, { "epoch": 1.89802765351769, "grad_norm": 0.2607784867286682, "learning_rate": 7.801669612907739e-06, "loss": 0.3362, "step": 18669 }, { "epoch": 1.898129320862139, "grad_norm": 0.28741368651390076, "learning_rate": 7.801375665574806e-06, "loss": 0.3595, "step": 18670 }, { "epoch": 1.898230988206588, "grad_norm": 0.29674074053764343, "learning_rate": 7.80108170412921e-06, "loss": 0.3651, "step": 18671 }, { "epoch": 1.898332655551037, "grad_norm": 0.26395824551582336, "learning_rate": 7.800787728572428e-06, "loss": 0.3853, "step": 18672 }, { "epoch": 1.8984343228954859, "grad_norm": 0.26498210430145264, "learning_rate": 7.800493738905942e-06, "loss": 0.3735, "step": 18673 }, { "epoch": 1.8985359902399348, "grad_norm": 0.3061096668243408, "learning_rate": 7.800199735131237e-06, "loss": 0.387, "step": 18674 }, { "epoch": 1.8986376575843837, "grad_norm": 0.30743327736854553, "learning_rate": 7.79990571724979e-06, "loss": 0.3658, "step": 18675 }, { "epoch": 1.898739324928833, "grad_norm": 0.27417758107185364, "learning_rate": 7.799611685263084e-06, "loss": 0.3694, "step": 18676 }, { "epoch": 1.8988409922732818, "grad_norm": 0.3007684350013733, "learning_rate": 7.799317639172598e-06, "loss": 0.3826, "step": 18677 }, { "epoch": 1.8989426596177308, "grad_norm": 0.29285869002342224, "learning_rate": 7.799023578979816e-06, "loss": 0.3642, "step": 18678 }, { "epoch": 1.8990443269621797, "grad_norm": 0.26191356778144836, "learning_rate": 7.79872950468622e-06, "loss": 0.3408, "step": 18679 }, { "epoch": 1.8991459943066287, "grad_norm": 0.25701767206192017, "learning_rate": 7.798435416293287e-06, "loss": 0.3456, "step": 18680 }, { "epoch": 1.8992476616510778, "grad_norm": 0.28569459915161133, "learning_rate": 7.798141313802503e-06, "loss": 0.3581, "step": 18681 }, { "epoch": 1.8993493289955268, "grad_norm": 0.2605530321598053, "learning_rate": 7.797847197215347e-06, "loss": 0.3309, "step": 18682 }, { "epoch": 1.8994509963399757, "grad_norm": 0.2799971103668213, "learning_rate": 7.797553066533302e-06, "loss": 0.3568, "step": 18683 }, { "epoch": 1.8995526636844247, "grad_norm": 0.2783256471157074, "learning_rate": 7.797258921757849e-06, "loss": 0.3395, "step": 18684 }, { "epoch": 1.8996543310288736, "grad_norm": 0.26054298877716064, "learning_rate": 7.796964762890471e-06, "loss": 0.3742, "step": 18685 }, { "epoch": 1.8997559983733225, "grad_norm": 0.27301132678985596, "learning_rate": 7.79667058993265e-06, "loss": 0.3368, "step": 18686 }, { "epoch": 1.8998576657177715, "grad_norm": 0.2708194851875305, "learning_rate": 7.796376402885866e-06, "loss": 0.3644, "step": 18687 }, { "epoch": 1.8999593330622204, "grad_norm": 0.3007480502128601, "learning_rate": 7.796082201751602e-06, "loss": 0.388, "step": 18688 }, { "epoch": 1.9000610004066694, "grad_norm": 0.2905254662036896, "learning_rate": 7.79578798653134e-06, "loss": 0.3464, "step": 18689 }, { "epoch": 1.9001626677511183, "grad_norm": 0.2485785335302353, "learning_rate": 7.795493757226565e-06, "loss": 0.3625, "step": 18690 }, { "epoch": 1.9002643350955672, "grad_norm": 0.2788926661014557, "learning_rate": 7.795199513838755e-06, "loss": 0.3422, "step": 18691 }, { "epoch": 1.9003660024400162, "grad_norm": 0.3034170866012573, "learning_rate": 7.794905256369395e-06, "loss": 0.3366, "step": 18692 }, { "epoch": 1.9004676697844651, "grad_norm": 0.2777867019176483, "learning_rate": 7.794610984819965e-06, "loss": 0.3595, "step": 18693 }, { "epoch": 1.900569337128914, "grad_norm": 0.26064038276672363, "learning_rate": 7.79431669919195e-06, "loss": 0.3701, "step": 18694 }, { "epoch": 1.900671004473363, "grad_norm": 0.2650676965713501, "learning_rate": 7.79402239948683e-06, "loss": 0.3353, "step": 18695 }, { "epoch": 1.900772671817812, "grad_norm": 0.2768256664276123, "learning_rate": 7.79372808570609e-06, "loss": 0.3663, "step": 18696 }, { "epoch": 1.9008743391622611, "grad_norm": 0.25760406255722046, "learning_rate": 7.793433757851212e-06, "loss": 0.33, "step": 18697 }, { "epoch": 1.90097600650671, "grad_norm": 0.27749359607696533, "learning_rate": 7.793139415923679e-06, "loss": 0.3295, "step": 18698 }, { "epoch": 1.901077673851159, "grad_norm": 0.2847328186035156, "learning_rate": 7.792845059924973e-06, "loss": 0.3499, "step": 18699 }, { "epoch": 1.901179341195608, "grad_norm": 0.32954660058021545, "learning_rate": 7.792550689856577e-06, "loss": 0.3971, "step": 18700 }, { "epoch": 1.9012810085400569, "grad_norm": 0.2602763772010803, "learning_rate": 7.792256305719974e-06, "loss": 0.3379, "step": 18701 }, { "epoch": 1.901382675884506, "grad_norm": 0.259298175573349, "learning_rate": 7.791961907516649e-06, "loss": 0.329, "step": 18702 }, { "epoch": 1.901484343228955, "grad_norm": 0.27315521240234375, "learning_rate": 7.79166749524808e-06, "loss": 0.36, "step": 18703 }, { "epoch": 1.901586010573404, "grad_norm": 0.26690250635147095, "learning_rate": 7.791373068915757e-06, "loss": 0.3771, "step": 18704 }, { "epoch": 1.9016876779178529, "grad_norm": 0.28339990973472595, "learning_rate": 7.791078628521158e-06, "loss": 0.3499, "step": 18705 }, { "epoch": 1.9017893452623018, "grad_norm": 0.27276691794395447, "learning_rate": 7.790784174065768e-06, "loss": 0.3413, "step": 18706 }, { "epoch": 1.9018910126067508, "grad_norm": 0.2790939211845398, "learning_rate": 7.790489705551072e-06, "loss": 0.3634, "step": 18707 }, { "epoch": 1.9019926799511997, "grad_norm": 0.27581438422203064, "learning_rate": 7.790195222978552e-06, "loss": 0.386, "step": 18708 }, { "epoch": 1.9020943472956486, "grad_norm": 0.25137031078338623, "learning_rate": 7.78990072634969e-06, "loss": 0.3576, "step": 18709 }, { "epoch": 1.9021960146400976, "grad_norm": 0.3187187612056732, "learning_rate": 7.789606215665973e-06, "loss": 0.3672, "step": 18710 }, { "epoch": 1.9022976819845465, "grad_norm": 0.2815122902393341, "learning_rate": 7.78931169092888e-06, "loss": 0.3485, "step": 18711 }, { "epoch": 1.9023993493289955, "grad_norm": 0.280838280916214, "learning_rate": 7.7890171521399e-06, "loss": 0.3649, "step": 18712 }, { "epoch": 1.9025010166734444, "grad_norm": 0.28423795104026794, "learning_rate": 7.788722599300514e-06, "loss": 0.3667, "step": 18713 }, { "epoch": 1.9026026840178933, "grad_norm": 0.28530704975128174, "learning_rate": 7.788428032412205e-06, "loss": 0.3469, "step": 18714 }, { "epoch": 1.9027043513623423, "grad_norm": 0.2564767003059387, "learning_rate": 7.788133451476458e-06, "loss": 0.3392, "step": 18715 }, { "epoch": 1.9028060187067912, "grad_norm": 0.2786701023578644, "learning_rate": 7.787838856494758e-06, "loss": 0.37, "step": 18716 }, { "epoch": 1.9029076860512404, "grad_norm": 0.27103516459465027, "learning_rate": 7.78754424746859e-06, "loss": 0.349, "step": 18717 }, { "epoch": 1.9030093533956893, "grad_norm": 0.26247313618659973, "learning_rate": 7.787249624399434e-06, "loss": 0.403, "step": 18718 }, { "epoch": 1.9031110207401383, "grad_norm": 0.2648465931415558, "learning_rate": 7.786954987288777e-06, "loss": 0.3737, "step": 18719 }, { "epoch": 1.9032126880845872, "grad_norm": 0.3043089509010315, "learning_rate": 7.786660336138105e-06, "loss": 0.3615, "step": 18720 }, { "epoch": 1.9033143554290362, "grad_norm": 0.27559104561805725, "learning_rate": 7.786365670948899e-06, "loss": 0.3613, "step": 18721 }, { "epoch": 1.9034160227734853, "grad_norm": 0.2870687246322632, "learning_rate": 7.786070991722646e-06, "loss": 0.3583, "step": 18722 }, { "epoch": 1.9035176901179343, "grad_norm": 0.27099230885505676, "learning_rate": 7.785776298460828e-06, "loss": 0.3863, "step": 18723 }, { "epoch": 1.9036193574623832, "grad_norm": 0.2821052074432373, "learning_rate": 7.785481591164931e-06, "loss": 0.348, "step": 18724 }, { "epoch": 1.9037210248068321, "grad_norm": 0.2681090533733368, "learning_rate": 7.785186869836441e-06, "loss": 0.3635, "step": 18725 }, { "epoch": 1.903822692151281, "grad_norm": 0.2963370680809021, "learning_rate": 7.78489213447684e-06, "loss": 0.3523, "step": 18726 }, { "epoch": 1.90392435949573, "grad_norm": 0.2598707675933838, "learning_rate": 7.784597385087616e-06, "loss": 0.3264, "step": 18727 }, { "epoch": 1.904026026840179, "grad_norm": 0.2742220461368561, "learning_rate": 7.78430262167025e-06, "loss": 0.3652, "step": 18728 }, { "epoch": 1.904127694184628, "grad_norm": 0.28551116585731506, "learning_rate": 7.784007844226229e-06, "loss": 0.372, "step": 18729 }, { "epoch": 1.9042293615290768, "grad_norm": 0.27556782960891724, "learning_rate": 7.783713052757039e-06, "loss": 0.3751, "step": 18730 }, { "epoch": 1.9043310288735258, "grad_norm": 0.2945651113986969, "learning_rate": 7.783418247264166e-06, "loss": 0.3857, "step": 18731 }, { "epoch": 1.9044326962179747, "grad_norm": 0.2760544419288635, "learning_rate": 7.78312342774909e-06, "loss": 0.3536, "step": 18732 }, { "epoch": 1.9045343635624237, "grad_norm": 0.2787666320800781, "learning_rate": 7.7828285942133e-06, "loss": 0.3432, "step": 18733 }, { "epoch": 1.9046360309068726, "grad_norm": 0.26596909761428833, "learning_rate": 7.782533746658283e-06, "loss": 0.3533, "step": 18734 }, { "epoch": 1.9047376982513216, "grad_norm": 0.2696046531200409, "learning_rate": 7.782238885085522e-06, "loss": 0.3438, "step": 18735 }, { "epoch": 1.9048393655957705, "grad_norm": 0.2565912902355194, "learning_rate": 7.781944009496501e-06, "loss": 0.3448, "step": 18736 }, { "epoch": 1.9049410329402194, "grad_norm": 0.2884356677532196, "learning_rate": 7.781649119892709e-06, "loss": 0.3652, "step": 18737 }, { "epoch": 1.9050427002846686, "grad_norm": 0.2905336916446686, "learning_rate": 7.781354216275627e-06, "loss": 0.3778, "step": 18738 }, { "epoch": 1.9051443676291175, "grad_norm": 0.27403363585472107, "learning_rate": 7.781059298646746e-06, "loss": 0.3901, "step": 18739 }, { "epoch": 1.9052460349735665, "grad_norm": 0.2596767246723175, "learning_rate": 7.780764367007548e-06, "loss": 0.352, "step": 18740 }, { "epoch": 1.9053477023180154, "grad_norm": 0.2781451642513275, "learning_rate": 7.78046942135952e-06, "loss": 0.3591, "step": 18741 }, { "epoch": 1.9054493696624644, "grad_norm": 0.2926746904850006, "learning_rate": 7.780174461704149e-06, "loss": 0.3337, "step": 18742 }, { "epoch": 1.9055510370069135, "grad_norm": 0.28375887870788574, "learning_rate": 7.779879488042917e-06, "loss": 0.3861, "step": 18743 }, { "epoch": 1.9056527043513625, "grad_norm": 0.2744199335575104, "learning_rate": 7.779584500377314e-06, "loss": 0.364, "step": 18744 }, { "epoch": 1.9057543716958114, "grad_norm": 0.2725566029548645, "learning_rate": 7.779289498708827e-06, "loss": 0.3746, "step": 18745 }, { "epoch": 1.9058560390402604, "grad_norm": 0.29690322279930115, "learning_rate": 7.778994483038937e-06, "loss": 0.3421, "step": 18746 }, { "epoch": 1.9059577063847093, "grad_norm": 0.2831660807132721, "learning_rate": 7.778699453369136e-06, "loss": 0.3382, "step": 18747 }, { "epoch": 1.9060593737291582, "grad_norm": 0.269452840089798, "learning_rate": 7.778404409700905e-06, "loss": 0.3199, "step": 18748 }, { "epoch": 1.9061610410736072, "grad_norm": 0.31083452701568604, "learning_rate": 7.778109352035735e-06, "loss": 0.3929, "step": 18749 }, { "epoch": 1.9062627084180561, "grad_norm": 0.2775413990020752, "learning_rate": 7.777814280375109e-06, "loss": 0.3671, "step": 18750 }, { "epoch": 1.906364375762505, "grad_norm": 0.2633584141731262, "learning_rate": 7.777519194720515e-06, "loss": 0.3605, "step": 18751 }, { "epoch": 1.906466043106954, "grad_norm": 0.2725122570991516, "learning_rate": 7.777224095073437e-06, "loss": 0.3443, "step": 18752 }, { "epoch": 1.906567710451403, "grad_norm": 0.26245829463005066, "learning_rate": 7.776928981435368e-06, "loss": 0.3622, "step": 18753 }, { "epoch": 1.9066693777958519, "grad_norm": 0.2554575204849243, "learning_rate": 7.776633853807787e-06, "loss": 0.3396, "step": 18754 }, { "epoch": 1.9067710451403008, "grad_norm": 0.26046547293663025, "learning_rate": 7.776338712192186e-06, "loss": 0.3787, "step": 18755 }, { "epoch": 1.9068727124847498, "grad_norm": 0.2994793653488159, "learning_rate": 7.776043556590051e-06, "loss": 0.3555, "step": 18756 }, { "epoch": 1.9069743798291987, "grad_norm": 0.2931906580924988, "learning_rate": 7.775748387002867e-06, "loss": 0.3401, "step": 18757 }, { "epoch": 1.9070760471736479, "grad_norm": 0.2849563658237457, "learning_rate": 7.775453203432123e-06, "loss": 0.3666, "step": 18758 }, { "epoch": 1.9071777145180968, "grad_norm": 0.27972668409347534, "learning_rate": 7.775158005879306e-06, "loss": 0.3757, "step": 18759 }, { "epoch": 1.9072793818625458, "grad_norm": 0.26933854818344116, "learning_rate": 7.774862794345904e-06, "loss": 0.3573, "step": 18760 }, { "epoch": 1.9073810492069947, "grad_norm": 0.2759581506252289, "learning_rate": 7.774567568833398e-06, "loss": 0.3527, "step": 18761 }, { "epoch": 1.9074827165514436, "grad_norm": 0.2718932330608368, "learning_rate": 7.774272329343284e-06, "loss": 0.3478, "step": 18762 }, { "epoch": 1.9075843838958928, "grad_norm": 0.27910712361335754, "learning_rate": 7.773977075877045e-06, "loss": 0.3657, "step": 18763 }, { "epoch": 1.9076860512403417, "grad_norm": 0.2918876111507416, "learning_rate": 7.773681808436166e-06, "loss": 0.3782, "step": 18764 }, { "epoch": 1.9077877185847907, "grad_norm": 0.27437421679496765, "learning_rate": 7.773386527022137e-06, "loss": 0.3514, "step": 18765 }, { "epoch": 1.9078893859292396, "grad_norm": 0.30236753821372986, "learning_rate": 7.773091231636449e-06, "loss": 0.4014, "step": 18766 }, { "epoch": 1.9079910532736886, "grad_norm": 0.32029372453689575, "learning_rate": 7.772795922280584e-06, "loss": 0.3738, "step": 18767 }, { "epoch": 1.9080927206181375, "grad_norm": 0.2791934311389923, "learning_rate": 7.772500598956032e-06, "loss": 0.3593, "step": 18768 }, { "epoch": 1.9081943879625864, "grad_norm": 0.2984010577201843, "learning_rate": 7.772205261664282e-06, "loss": 0.3453, "step": 18769 }, { "epoch": 1.9082960553070354, "grad_norm": 0.2771478593349457, "learning_rate": 7.771909910406821e-06, "loss": 0.3629, "step": 18770 }, { "epoch": 1.9083977226514843, "grad_norm": 0.27880340814590454, "learning_rate": 7.771614545185135e-06, "loss": 0.3572, "step": 18771 }, { "epoch": 1.9084993899959333, "grad_norm": 0.26980599761009216, "learning_rate": 7.771319166000716e-06, "loss": 0.3388, "step": 18772 }, { "epoch": 1.9086010573403822, "grad_norm": 0.28048595786094666, "learning_rate": 7.771023772855047e-06, "loss": 0.3593, "step": 18773 }, { "epoch": 1.9087027246848312, "grad_norm": 0.261010080575943, "learning_rate": 7.770728365749618e-06, "loss": 0.3436, "step": 18774 }, { "epoch": 1.90880439202928, "grad_norm": 0.30233633518218994, "learning_rate": 7.770432944685919e-06, "loss": 0.3534, "step": 18775 }, { "epoch": 1.908906059373729, "grad_norm": 0.27162840962409973, "learning_rate": 7.770137509665438e-06, "loss": 0.355, "step": 18776 }, { "epoch": 1.909007726718178, "grad_norm": 0.2818980813026428, "learning_rate": 7.769842060689663e-06, "loss": 0.3493, "step": 18777 }, { "epoch": 1.909109394062627, "grad_norm": 0.3158940374851227, "learning_rate": 7.769546597760082e-06, "loss": 0.3554, "step": 18778 }, { "epoch": 1.909211061407076, "grad_norm": 0.283390074968338, "learning_rate": 7.769251120878183e-06, "loss": 0.3462, "step": 18779 }, { "epoch": 1.909312728751525, "grad_norm": 0.2894665002822876, "learning_rate": 7.768955630045454e-06, "loss": 0.3761, "step": 18780 }, { "epoch": 1.909414396095974, "grad_norm": 0.2767643630504608, "learning_rate": 7.768660125263386e-06, "loss": 0.3459, "step": 18781 }, { "epoch": 1.909516063440423, "grad_norm": 0.28015798330307007, "learning_rate": 7.768364606533464e-06, "loss": 0.3773, "step": 18782 }, { "epoch": 1.9096177307848718, "grad_norm": 0.2602670192718506, "learning_rate": 7.768069073857181e-06, "loss": 0.331, "step": 18783 }, { "epoch": 1.909719398129321, "grad_norm": 0.3153010308742523, "learning_rate": 7.767773527236022e-06, "loss": 0.4139, "step": 18784 }, { "epoch": 1.90982106547377, "grad_norm": 0.2615756094455719, "learning_rate": 7.767477966671479e-06, "loss": 0.3731, "step": 18785 }, { "epoch": 1.909922732818219, "grad_norm": 0.2810882031917572, "learning_rate": 7.76718239216504e-06, "loss": 0.3643, "step": 18786 }, { "epoch": 1.9100244001626678, "grad_norm": 0.29760634899139404, "learning_rate": 7.766886803718191e-06, "loss": 0.3691, "step": 18787 }, { "epoch": 1.9101260675071168, "grad_norm": 0.27897894382476807, "learning_rate": 7.766591201332427e-06, "loss": 0.3589, "step": 18788 }, { "epoch": 1.9102277348515657, "grad_norm": 0.2713359594345093, "learning_rate": 7.766295585009234e-06, "loss": 0.3485, "step": 18789 }, { "epoch": 1.9103294021960147, "grad_norm": 0.25945085287094116, "learning_rate": 7.765999954750099e-06, "loss": 0.383, "step": 18790 }, { "epoch": 1.9104310695404636, "grad_norm": 0.300719290971756, "learning_rate": 7.765704310556514e-06, "loss": 0.3691, "step": 18791 }, { "epoch": 1.9105327368849125, "grad_norm": 0.270556777715683, "learning_rate": 7.765408652429969e-06, "loss": 0.3676, "step": 18792 }, { "epoch": 1.9106344042293615, "grad_norm": 0.2639871835708618, "learning_rate": 7.765112980371953e-06, "loss": 0.3796, "step": 18793 }, { "epoch": 1.9107360715738104, "grad_norm": 0.24768474698066711, "learning_rate": 7.764817294383953e-06, "loss": 0.3377, "step": 18794 }, { "epoch": 1.9108377389182594, "grad_norm": 0.2743246853351593, "learning_rate": 7.76452159446746e-06, "loss": 0.3662, "step": 18795 }, { "epoch": 1.9109394062627083, "grad_norm": 0.27990972995758057, "learning_rate": 7.764225880623966e-06, "loss": 0.3437, "step": 18796 }, { "epoch": 1.9110410736071572, "grad_norm": 0.2694929540157318, "learning_rate": 7.76393015285496e-06, "loss": 0.3556, "step": 18797 }, { "epoch": 1.9111427409516062, "grad_norm": 0.2607190012931824, "learning_rate": 7.763634411161928e-06, "loss": 0.3616, "step": 18798 }, { "epoch": 1.9112444082960554, "grad_norm": 0.2986314594745636, "learning_rate": 7.763338655546363e-06, "loss": 0.3579, "step": 18799 }, { "epoch": 1.9113460756405043, "grad_norm": 0.25309398770332336, "learning_rate": 7.763042886009754e-06, "loss": 0.3869, "step": 18800 }, { "epoch": 1.9114477429849532, "grad_norm": 0.26334625482559204, "learning_rate": 7.762747102553594e-06, "loss": 0.3581, "step": 18801 }, { "epoch": 1.9115494103294022, "grad_norm": 0.2807494103908539, "learning_rate": 7.76245130517937e-06, "loss": 0.3589, "step": 18802 }, { "epoch": 1.9116510776738511, "grad_norm": 0.31590771675109863, "learning_rate": 7.762155493888573e-06, "loss": 0.3659, "step": 18803 }, { "epoch": 1.9117527450183003, "grad_norm": 0.28231555223464966, "learning_rate": 7.76185966868269e-06, "loss": 0.3671, "step": 18804 }, { "epoch": 1.9118544123627492, "grad_norm": 0.2826099097728729, "learning_rate": 7.761563829563217e-06, "loss": 0.3868, "step": 18805 }, { "epoch": 1.9119560797071982, "grad_norm": 0.2993791401386261, "learning_rate": 7.761267976531642e-06, "loss": 0.3581, "step": 18806 }, { "epoch": 1.912057747051647, "grad_norm": 0.29847970604896545, "learning_rate": 7.760972109589454e-06, "loss": 0.3808, "step": 18807 }, { "epoch": 1.912159414396096, "grad_norm": 0.27312856912612915, "learning_rate": 7.760676228738145e-06, "loss": 0.3892, "step": 18808 }, { "epoch": 1.912261081740545, "grad_norm": 0.2807067036628723, "learning_rate": 7.760380333979204e-06, "loss": 0.3685, "step": 18809 }, { "epoch": 1.912362749084994, "grad_norm": 0.29519587755203247, "learning_rate": 7.760084425314123e-06, "loss": 0.3518, "step": 18810 }, { "epoch": 1.9124644164294429, "grad_norm": 0.3018767535686493, "learning_rate": 7.759788502744394e-06, "loss": 0.3517, "step": 18811 }, { "epoch": 1.9125660837738918, "grad_norm": 0.2608031630516052, "learning_rate": 7.759492566271507e-06, "loss": 0.3658, "step": 18812 }, { "epoch": 1.9126677511183408, "grad_norm": 0.2971365451812744, "learning_rate": 7.75919661589695e-06, "loss": 0.3752, "step": 18813 }, { "epoch": 1.9127694184627897, "grad_norm": 0.2957441210746765, "learning_rate": 7.758900651622217e-06, "loss": 0.3678, "step": 18814 }, { "epoch": 1.9128710858072386, "grad_norm": 0.2834078371524811, "learning_rate": 7.758604673448798e-06, "loss": 0.3399, "step": 18815 }, { "epoch": 1.9129727531516876, "grad_norm": 0.28063005208969116, "learning_rate": 7.758308681378185e-06, "loss": 0.3495, "step": 18816 }, { "epoch": 1.9130744204961365, "grad_norm": 0.26733502745628357, "learning_rate": 7.758012675411869e-06, "loss": 0.3375, "step": 18817 }, { "epoch": 1.9131760878405855, "grad_norm": 0.298689603805542, "learning_rate": 7.757716655551338e-06, "loss": 0.3426, "step": 18818 }, { "epoch": 1.9132777551850344, "grad_norm": 0.26999685168266296, "learning_rate": 7.757420621798087e-06, "loss": 0.342, "step": 18819 }, { "epoch": 1.9133794225294836, "grad_norm": 0.2649867534637451, "learning_rate": 7.757124574153607e-06, "loss": 0.3896, "step": 18820 }, { "epoch": 1.9134810898739325, "grad_norm": 0.2622387409210205, "learning_rate": 7.756828512619388e-06, "loss": 0.3237, "step": 18821 }, { "epoch": 1.9135827572183814, "grad_norm": 0.26741206645965576, "learning_rate": 7.756532437196922e-06, "loss": 0.3475, "step": 18822 }, { "epoch": 1.9136844245628304, "grad_norm": 0.26437923312187195, "learning_rate": 7.756236347887701e-06, "loss": 0.3664, "step": 18823 }, { "epoch": 1.9137860919072793, "grad_norm": 0.2639336884021759, "learning_rate": 7.755940244693215e-06, "loss": 0.3646, "step": 18824 }, { "epoch": 1.9138877592517285, "grad_norm": 0.26953789591789246, "learning_rate": 7.755644127614959e-06, "loss": 0.3506, "step": 18825 }, { "epoch": 1.9139894265961774, "grad_norm": 0.2733462154865265, "learning_rate": 7.755347996654421e-06, "loss": 0.3339, "step": 18826 }, { "epoch": 1.9140910939406264, "grad_norm": 0.2680930197238922, "learning_rate": 7.755051851813095e-06, "loss": 0.3344, "step": 18827 }, { "epoch": 1.9141927612850753, "grad_norm": 0.24947716295719147, "learning_rate": 7.754755693092472e-06, "loss": 0.3252, "step": 18828 }, { "epoch": 1.9142944286295243, "grad_norm": 0.2731761634349823, "learning_rate": 7.754459520494044e-06, "loss": 0.3358, "step": 18829 }, { "epoch": 1.9143960959739732, "grad_norm": 0.2941074073314667, "learning_rate": 7.754163334019306e-06, "loss": 0.3688, "step": 18830 }, { "epoch": 1.9144977633184221, "grad_norm": 0.30603304505348206, "learning_rate": 7.753867133669746e-06, "loss": 0.3612, "step": 18831 }, { "epoch": 1.914599430662871, "grad_norm": 0.2639386057853699, "learning_rate": 7.753570919446857e-06, "loss": 0.346, "step": 18832 }, { "epoch": 1.91470109800732, "grad_norm": 0.2931247353553772, "learning_rate": 7.753274691352133e-06, "loss": 0.3446, "step": 18833 }, { "epoch": 1.914802765351769, "grad_norm": 0.25410208106040955, "learning_rate": 7.752978449387065e-06, "loss": 0.3485, "step": 18834 }, { "epoch": 1.914904432696218, "grad_norm": 0.2827886939048767, "learning_rate": 7.752682193553148e-06, "loss": 0.3381, "step": 18835 }, { "epoch": 1.9150061000406668, "grad_norm": 0.2782060205936432, "learning_rate": 7.752385923851869e-06, "loss": 0.3407, "step": 18836 }, { "epoch": 1.9151077673851158, "grad_norm": 0.25993168354034424, "learning_rate": 7.752089640284725e-06, "loss": 0.355, "step": 18837 }, { "epoch": 1.9152094347295647, "grad_norm": 0.26482295989990234, "learning_rate": 7.75179334285321e-06, "loss": 0.3978, "step": 18838 }, { "epoch": 1.9153111020740137, "grad_norm": 0.27599066495895386, "learning_rate": 7.75149703155881e-06, "loss": 0.3634, "step": 18839 }, { "epoch": 1.9154127694184628, "grad_norm": 0.2925635278224945, "learning_rate": 7.751200706403024e-06, "loss": 0.3226, "step": 18840 }, { "epoch": 1.9155144367629118, "grad_norm": 0.273270845413208, "learning_rate": 7.750904367387342e-06, "loss": 0.3756, "step": 18841 }, { "epoch": 1.9156161041073607, "grad_norm": 0.27075934410095215, "learning_rate": 7.750608014513258e-06, "loss": 0.3603, "step": 18842 }, { "epoch": 1.9157177714518097, "grad_norm": 0.28669238090515137, "learning_rate": 7.750311647782263e-06, "loss": 0.384, "step": 18843 }, { "epoch": 1.9158194387962586, "grad_norm": 0.27392616868019104, "learning_rate": 7.750015267195853e-06, "loss": 0.3614, "step": 18844 }, { "epoch": 1.9159211061407078, "grad_norm": 0.24996042251586914, "learning_rate": 7.749718872755519e-06, "loss": 0.3435, "step": 18845 }, { "epoch": 1.9160227734851567, "grad_norm": 0.25684744119644165, "learning_rate": 7.749422464462755e-06, "loss": 0.3532, "step": 18846 }, { "epoch": 1.9161244408296056, "grad_norm": 0.25156277418136597, "learning_rate": 7.749126042319055e-06, "loss": 0.3604, "step": 18847 }, { "epoch": 1.9162261081740546, "grad_norm": 0.2971284091472626, "learning_rate": 7.74882960632591e-06, "loss": 0.3662, "step": 18848 }, { "epoch": 1.9163277755185035, "grad_norm": 0.2569815516471863, "learning_rate": 7.748533156484815e-06, "loss": 0.3503, "step": 18849 }, { "epoch": 1.9164294428629525, "grad_norm": 0.2715558111667633, "learning_rate": 7.748236692797262e-06, "loss": 0.3468, "step": 18850 }, { "epoch": 1.9165311102074014, "grad_norm": 0.2732459008693695, "learning_rate": 7.747940215264747e-06, "loss": 0.3509, "step": 18851 }, { "epoch": 1.9166327775518504, "grad_norm": 0.2809031307697296, "learning_rate": 7.74764372388876e-06, "loss": 0.337, "step": 18852 }, { "epoch": 1.9167344448962993, "grad_norm": 0.25678515434265137, "learning_rate": 7.7473472186708e-06, "loss": 0.349, "step": 18853 }, { "epoch": 1.9168361122407482, "grad_norm": 0.26903006434440613, "learning_rate": 7.747050699612356e-06, "loss": 0.3457, "step": 18854 }, { "epoch": 1.9169377795851972, "grad_norm": 0.2672838568687439, "learning_rate": 7.746754166714923e-06, "loss": 0.3523, "step": 18855 }, { "epoch": 1.9170394469296461, "grad_norm": 0.2930431663990021, "learning_rate": 7.746457619979998e-06, "loss": 0.3894, "step": 18856 }, { "epoch": 1.917141114274095, "grad_norm": 0.30300813913345337, "learning_rate": 7.746161059409069e-06, "loss": 0.3509, "step": 18857 }, { "epoch": 1.917242781618544, "grad_norm": 0.3056850731372833, "learning_rate": 7.745864485003633e-06, "loss": 0.3792, "step": 18858 }, { "epoch": 1.917344448962993, "grad_norm": 0.2778773605823517, "learning_rate": 7.745567896765186e-06, "loss": 0.3535, "step": 18859 }, { "epoch": 1.9174461163074419, "grad_norm": 0.30307814478874207, "learning_rate": 7.74527129469522e-06, "loss": 0.3784, "step": 18860 }, { "epoch": 1.917547783651891, "grad_norm": 0.2953524589538574, "learning_rate": 7.74497467879523e-06, "loss": 0.3318, "step": 18861 }, { "epoch": 1.91764945099634, "grad_norm": 0.27704593539237976, "learning_rate": 7.744678049066708e-06, "loss": 0.3509, "step": 18862 }, { "epoch": 1.917751118340789, "grad_norm": 0.26266586780548096, "learning_rate": 7.744381405511151e-06, "loss": 0.3355, "step": 18863 }, { "epoch": 1.9178527856852379, "grad_norm": 0.24730657041072845, "learning_rate": 7.744084748130054e-06, "loss": 0.3647, "step": 18864 }, { "epoch": 1.9179544530296868, "grad_norm": 0.26633885502815247, "learning_rate": 7.743788076924909e-06, "loss": 0.3296, "step": 18865 }, { "epoch": 1.918056120374136, "grad_norm": 0.26522448658943176, "learning_rate": 7.743491391897211e-06, "loss": 0.3648, "step": 18866 }, { "epoch": 1.918157787718585, "grad_norm": 0.27164003252983093, "learning_rate": 7.743194693048457e-06, "loss": 0.3411, "step": 18867 }, { "epoch": 1.9182594550630339, "grad_norm": 0.2514868974685669, "learning_rate": 7.74289798038014e-06, "loss": 0.3621, "step": 18868 }, { "epoch": 1.9183611224074828, "grad_norm": 0.27685943245887756, "learning_rate": 7.742601253893754e-06, "loss": 0.3711, "step": 18869 }, { "epoch": 1.9184627897519317, "grad_norm": 0.294875830411911, "learning_rate": 7.742304513590795e-06, "loss": 0.3541, "step": 18870 }, { "epoch": 1.9185644570963807, "grad_norm": 0.25429001450538635, "learning_rate": 7.742007759472757e-06, "loss": 0.3445, "step": 18871 }, { "epoch": 1.9186661244408296, "grad_norm": 0.2615124583244324, "learning_rate": 7.741710991541135e-06, "loss": 0.3363, "step": 18872 }, { "epoch": 1.9187677917852786, "grad_norm": 0.26523321866989136, "learning_rate": 7.741414209797427e-06, "loss": 0.3869, "step": 18873 }, { "epoch": 1.9188694591297275, "grad_norm": 0.28702205419540405, "learning_rate": 7.741117414243124e-06, "loss": 0.3602, "step": 18874 }, { "epoch": 1.9189711264741764, "grad_norm": 0.2763153910636902, "learning_rate": 7.740820604879721e-06, "loss": 0.3541, "step": 18875 }, { "epoch": 1.9190727938186254, "grad_norm": 0.2880450189113617, "learning_rate": 7.740523781708717e-06, "loss": 0.3404, "step": 18876 }, { "epoch": 1.9191744611630743, "grad_norm": 0.26984795928001404, "learning_rate": 7.740226944731606e-06, "loss": 0.343, "step": 18877 }, { "epoch": 1.9192761285075233, "grad_norm": 0.26480406522750854, "learning_rate": 7.739930093949883e-06, "loss": 0.339, "step": 18878 }, { "epoch": 1.9193777958519722, "grad_norm": 0.27003681659698486, "learning_rate": 7.739633229365043e-06, "loss": 0.3321, "step": 18879 }, { "epoch": 1.9194794631964212, "grad_norm": 0.27685147523880005, "learning_rate": 7.73933635097858e-06, "loss": 0.3358, "step": 18880 }, { "epoch": 1.9195811305408703, "grad_norm": 0.2934374511241913, "learning_rate": 7.739039458791992e-06, "loss": 0.3781, "step": 18881 }, { "epoch": 1.9196827978853193, "grad_norm": 0.2795506715774536, "learning_rate": 7.738742552806775e-06, "loss": 0.385, "step": 18882 }, { "epoch": 1.9197844652297682, "grad_norm": 0.28346768021583557, "learning_rate": 7.738445633024425e-06, "loss": 0.3631, "step": 18883 }, { "epoch": 1.9198861325742171, "grad_norm": 0.26417505741119385, "learning_rate": 7.738148699446436e-06, "loss": 0.3573, "step": 18884 }, { "epoch": 1.919987799918666, "grad_norm": 0.2947542369365692, "learning_rate": 7.737851752074303e-06, "loss": 0.3674, "step": 18885 }, { "epoch": 1.9200894672631152, "grad_norm": 0.26831814646720886, "learning_rate": 7.737554790909524e-06, "loss": 0.3371, "step": 18886 }, { "epoch": 1.9201911346075642, "grad_norm": 0.2808251678943634, "learning_rate": 7.737257815953596e-06, "loss": 0.3426, "step": 18887 }, { "epoch": 1.9202928019520131, "grad_norm": 0.2762787342071533, "learning_rate": 7.736960827208012e-06, "loss": 0.3483, "step": 18888 }, { "epoch": 1.920394469296462, "grad_norm": 0.2608581781387329, "learning_rate": 7.736663824674271e-06, "loss": 0.3323, "step": 18889 }, { "epoch": 1.920496136640911, "grad_norm": 0.2604788541793823, "learning_rate": 7.736366808353865e-06, "loss": 0.3448, "step": 18890 }, { "epoch": 1.92059780398536, "grad_norm": 0.2730226516723633, "learning_rate": 7.736069778248296e-06, "loss": 0.3306, "step": 18891 }, { "epoch": 1.920699471329809, "grad_norm": 0.287384569644928, "learning_rate": 7.735772734359059e-06, "loss": 0.3492, "step": 18892 }, { "epoch": 1.9208011386742578, "grad_norm": 0.2613537013530731, "learning_rate": 7.735475676687648e-06, "loss": 0.376, "step": 18893 }, { "epoch": 1.9209028060187068, "grad_norm": 0.2801903784275055, "learning_rate": 7.735178605235558e-06, "loss": 0.379, "step": 18894 }, { "epoch": 1.9210044733631557, "grad_norm": 0.2708979547023773, "learning_rate": 7.73488152000429e-06, "loss": 0.375, "step": 18895 }, { "epoch": 1.9211061407076047, "grad_norm": 0.2971177101135254, "learning_rate": 7.73458442099534e-06, "loss": 0.36, "step": 18896 }, { "epoch": 1.9212078080520536, "grad_norm": 0.2624354660511017, "learning_rate": 7.734287308210201e-06, "loss": 0.3505, "step": 18897 }, { "epoch": 1.9213094753965025, "grad_norm": 0.27648329734802246, "learning_rate": 7.733990181650375e-06, "loss": 0.3716, "step": 18898 }, { "epoch": 1.9214111427409515, "grad_norm": 0.26967552304267883, "learning_rate": 7.733693041317353e-06, "loss": 0.3498, "step": 18899 }, { "epoch": 1.9215128100854004, "grad_norm": 0.2685016095638275, "learning_rate": 7.733395887212639e-06, "loss": 0.3561, "step": 18900 }, { "epoch": 1.9216144774298494, "grad_norm": 0.278702974319458, "learning_rate": 7.733098719337723e-06, "loss": 0.3386, "step": 18901 }, { "epoch": 1.9217161447742985, "grad_norm": 0.2707488536834717, "learning_rate": 7.732801537694105e-06, "loss": 0.3619, "step": 18902 }, { "epoch": 1.9218178121187475, "grad_norm": 0.26605841517448425, "learning_rate": 7.732504342283285e-06, "loss": 0.3758, "step": 18903 }, { "epoch": 1.9219194794631964, "grad_norm": 0.26146697998046875, "learning_rate": 7.732207133106755e-06, "loss": 0.3522, "step": 18904 }, { "epoch": 1.9220211468076454, "grad_norm": 0.25960928201675415, "learning_rate": 7.731909910166015e-06, "loss": 0.3617, "step": 18905 }, { "epoch": 1.9221228141520943, "grad_norm": 0.2623993158340454, "learning_rate": 7.731612673462563e-06, "loss": 0.3519, "step": 18906 }, { "epoch": 1.9222244814965435, "grad_norm": 0.26216018199920654, "learning_rate": 7.731315422997896e-06, "loss": 0.3982, "step": 18907 }, { "epoch": 1.9223261488409924, "grad_norm": 0.27888011932373047, "learning_rate": 7.731018158773509e-06, "loss": 0.3551, "step": 18908 }, { "epoch": 1.9224278161854413, "grad_norm": 0.2757355570793152, "learning_rate": 7.730720880790903e-06, "loss": 0.3592, "step": 18909 }, { "epoch": 1.9225294835298903, "grad_norm": 0.25056329369544983, "learning_rate": 7.730423589051575e-06, "loss": 0.3628, "step": 18910 }, { "epoch": 1.9226311508743392, "grad_norm": 0.2848670780658722, "learning_rate": 7.730126283557019e-06, "loss": 0.3736, "step": 18911 }, { "epoch": 1.9227328182187882, "grad_norm": 0.27916547656059265, "learning_rate": 7.729828964308738e-06, "loss": 0.3642, "step": 18912 }, { "epoch": 1.922834485563237, "grad_norm": 0.2851249575614929, "learning_rate": 7.729531631308225e-06, "loss": 0.3424, "step": 18913 }, { "epoch": 1.922936152907686, "grad_norm": 0.2711632251739502, "learning_rate": 7.729234284556983e-06, "loss": 0.3612, "step": 18914 }, { "epoch": 1.923037820252135, "grad_norm": 0.28051313757896423, "learning_rate": 7.728936924056504e-06, "loss": 0.3425, "step": 18915 }, { "epoch": 1.923139487596584, "grad_norm": 0.27873551845550537, "learning_rate": 7.728639549808292e-06, "loss": 0.3343, "step": 18916 }, { "epoch": 1.9232411549410329, "grad_norm": 0.2841748595237732, "learning_rate": 7.728342161813842e-06, "loss": 0.3733, "step": 18917 }, { "epoch": 1.9233428222854818, "grad_norm": 0.27994656562805176, "learning_rate": 7.728044760074653e-06, "loss": 0.364, "step": 18918 }, { "epoch": 1.9234444896299308, "grad_norm": 0.26378315687179565, "learning_rate": 7.727747344592223e-06, "loss": 0.3516, "step": 18919 }, { "epoch": 1.9235461569743797, "grad_norm": 0.27265849709510803, "learning_rate": 7.727449915368047e-06, "loss": 0.3613, "step": 18920 }, { "epoch": 1.9236478243188286, "grad_norm": 0.30595067143440247, "learning_rate": 7.727152472403629e-06, "loss": 0.3724, "step": 18921 }, { "epoch": 1.9237494916632778, "grad_norm": 0.2671040892601013, "learning_rate": 7.726855015700465e-06, "loss": 0.3693, "step": 18922 }, { "epoch": 1.9238511590077267, "grad_norm": 0.2606428861618042, "learning_rate": 7.726557545260052e-06, "loss": 0.3376, "step": 18923 }, { "epoch": 1.9239528263521757, "grad_norm": 0.2900896966457367, "learning_rate": 7.726260061083893e-06, "loss": 0.4011, "step": 18924 }, { "epoch": 1.9240544936966246, "grad_norm": 0.27601194381713867, "learning_rate": 7.725962563173478e-06, "loss": 0.3438, "step": 18925 }, { "epoch": 1.9241561610410736, "grad_norm": 0.26811060309410095, "learning_rate": 7.725665051530317e-06, "loss": 0.3677, "step": 18926 }, { "epoch": 1.9242578283855227, "grad_norm": 0.2690540552139282, "learning_rate": 7.7253675261559e-06, "loss": 0.3225, "step": 18927 }, { "epoch": 1.9243594957299717, "grad_norm": 0.2713735103607178, "learning_rate": 7.72506998705173e-06, "loss": 0.3537, "step": 18928 }, { "epoch": 1.9244611630744206, "grad_norm": 0.2569715678691864, "learning_rate": 7.724772434219307e-06, "loss": 0.3821, "step": 18929 }, { "epoch": 1.9245628304188696, "grad_norm": 0.2518937587738037, "learning_rate": 7.724474867660125e-06, "loss": 0.3931, "step": 18930 }, { "epoch": 1.9246644977633185, "grad_norm": 0.24628381431102753, "learning_rate": 7.724177287375687e-06, "loss": 0.3229, "step": 18931 }, { "epoch": 1.9247661651077674, "grad_norm": 0.26793521642684937, "learning_rate": 7.723879693367493e-06, "loss": 0.3652, "step": 18932 }, { "epoch": 1.9248678324522164, "grad_norm": 0.26706254482269287, "learning_rate": 7.723582085637038e-06, "loss": 0.3638, "step": 18933 }, { "epoch": 1.9249694997966653, "grad_norm": 0.3133876919746399, "learning_rate": 7.723284464185824e-06, "loss": 0.3591, "step": 18934 }, { "epoch": 1.9250711671411143, "grad_norm": 0.2701975703239441, "learning_rate": 7.72298682901535e-06, "loss": 0.3316, "step": 18935 }, { "epoch": 1.9251728344855632, "grad_norm": 0.28900644183158875, "learning_rate": 7.722689180127116e-06, "loss": 0.3446, "step": 18936 }, { "epoch": 1.9252745018300121, "grad_norm": 0.2664659023284912, "learning_rate": 7.72239151752262e-06, "loss": 0.3482, "step": 18937 }, { "epoch": 1.925376169174461, "grad_norm": 0.2742452919483185, "learning_rate": 7.722093841203364e-06, "loss": 0.3588, "step": 18938 }, { "epoch": 1.92547783651891, "grad_norm": 0.313527911901474, "learning_rate": 7.721796151170846e-06, "loss": 0.3446, "step": 18939 }, { "epoch": 1.925579503863359, "grad_norm": 0.25871971249580383, "learning_rate": 7.721498447426565e-06, "loss": 0.3523, "step": 18940 }, { "epoch": 1.925681171207808, "grad_norm": 0.279217928647995, "learning_rate": 7.721200729972023e-06, "loss": 0.3618, "step": 18941 }, { "epoch": 1.925782838552257, "grad_norm": 0.2649628818035126, "learning_rate": 7.720902998808717e-06, "loss": 0.3361, "step": 18942 }, { "epoch": 1.925884505896706, "grad_norm": 0.26919180154800415, "learning_rate": 7.720605253938147e-06, "loss": 0.3583, "step": 18943 }, { "epoch": 1.925986173241155, "grad_norm": 0.2604895234107971, "learning_rate": 7.720307495361815e-06, "loss": 0.3691, "step": 18944 }, { "epoch": 1.926087840585604, "grad_norm": 0.276355117559433, "learning_rate": 7.720009723081222e-06, "loss": 0.3708, "step": 18945 }, { "epoch": 1.9261895079300528, "grad_norm": 0.27859318256378174, "learning_rate": 7.719711937097864e-06, "loss": 0.3584, "step": 18946 }, { "epoch": 1.9262911752745018, "grad_norm": 0.2652789056301117, "learning_rate": 7.719414137413246e-06, "loss": 0.3581, "step": 18947 }, { "epoch": 1.926392842618951, "grad_norm": 0.27119043469429016, "learning_rate": 7.719116324028865e-06, "loss": 0.342, "step": 18948 }, { "epoch": 1.9264945099633999, "grad_norm": 0.26571083068847656, "learning_rate": 7.718818496946223e-06, "loss": 0.3643, "step": 18949 }, { "epoch": 1.9265961773078488, "grad_norm": 0.2957260310649872, "learning_rate": 7.718520656166817e-06, "loss": 0.3738, "step": 18950 }, { "epoch": 1.9266978446522978, "grad_norm": 0.2832185924053192, "learning_rate": 7.718222801692151e-06, "loss": 0.3616, "step": 18951 }, { "epoch": 1.9267995119967467, "grad_norm": 0.2901241183280945, "learning_rate": 7.717924933523726e-06, "loss": 0.3806, "step": 18952 }, { "epoch": 1.9269011793411956, "grad_norm": 0.27546101808547974, "learning_rate": 7.71762705166304e-06, "loss": 0.3378, "step": 18953 }, { "epoch": 1.9270028466856446, "grad_norm": 0.27351197600364685, "learning_rate": 7.717329156111593e-06, "loss": 0.3318, "step": 18954 }, { "epoch": 1.9271045140300935, "grad_norm": 0.28223371505737305, "learning_rate": 7.71703124687089e-06, "loss": 0.3655, "step": 18955 }, { "epoch": 1.9272061813745425, "grad_norm": 0.30272701382637024, "learning_rate": 7.716733323942427e-06, "loss": 0.3852, "step": 18956 }, { "epoch": 1.9273078487189914, "grad_norm": 0.29870617389678955, "learning_rate": 7.71643538732771e-06, "loss": 0.3751, "step": 18957 }, { "epoch": 1.9274095160634404, "grad_norm": 0.27235427498817444, "learning_rate": 7.716137437028235e-06, "loss": 0.3534, "step": 18958 }, { "epoch": 1.9275111834078893, "grad_norm": 0.24262626469135284, "learning_rate": 7.715839473045505e-06, "loss": 0.3518, "step": 18959 }, { "epoch": 1.9276128507523382, "grad_norm": 0.27620887756347656, "learning_rate": 7.715541495381022e-06, "loss": 0.3513, "step": 18960 }, { "epoch": 1.9277145180967872, "grad_norm": 0.28799575567245483, "learning_rate": 7.715243504036286e-06, "loss": 0.3649, "step": 18961 }, { "epoch": 1.9278161854412361, "grad_norm": 0.2877293825149536, "learning_rate": 7.714945499012797e-06, "loss": 0.3777, "step": 18962 }, { "epoch": 1.9279178527856853, "grad_norm": 0.2636508047580719, "learning_rate": 7.71464748031206e-06, "loss": 0.3377, "step": 18963 }, { "epoch": 1.9280195201301342, "grad_norm": 0.24981284141540527, "learning_rate": 7.714349447935574e-06, "loss": 0.3494, "step": 18964 }, { "epoch": 1.9281211874745832, "grad_norm": 0.28643670678138733, "learning_rate": 7.714051401884838e-06, "loss": 0.3556, "step": 18965 }, { "epoch": 1.928222854819032, "grad_norm": 0.27274367213249207, "learning_rate": 7.713753342161358e-06, "loss": 0.3563, "step": 18966 }, { "epoch": 1.928324522163481, "grad_norm": 0.25446540117263794, "learning_rate": 7.713455268766632e-06, "loss": 0.3487, "step": 18967 }, { "epoch": 1.9284261895079302, "grad_norm": 0.2820490002632141, "learning_rate": 7.713157181702165e-06, "loss": 0.3623, "step": 18968 }, { "epoch": 1.9285278568523792, "grad_norm": 0.2871074676513672, "learning_rate": 7.712859080969456e-06, "loss": 0.3656, "step": 18969 }, { "epoch": 1.928629524196828, "grad_norm": 0.2754804790019989, "learning_rate": 7.712560966570008e-06, "loss": 0.3824, "step": 18970 }, { "epoch": 1.928731191541277, "grad_norm": 0.26886269450187683, "learning_rate": 7.712262838505323e-06, "loss": 0.333, "step": 18971 }, { "epoch": 1.928832858885726, "grad_norm": 0.2875978946685791, "learning_rate": 7.711964696776902e-06, "loss": 0.3686, "step": 18972 }, { "epoch": 1.928934526230175, "grad_norm": 0.2847452759742737, "learning_rate": 7.711666541386246e-06, "loss": 0.3526, "step": 18973 }, { "epoch": 1.9290361935746239, "grad_norm": 0.2821862995624542, "learning_rate": 7.71136837233486e-06, "loss": 0.3369, "step": 18974 }, { "epoch": 1.9291378609190728, "grad_norm": 0.2750234305858612, "learning_rate": 7.711070189624244e-06, "loss": 0.3589, "step": 18975 }, { "epoch": 1.9292395282635217, "grad_norm": 0.27204540371894836, "learning_rate": 7.710771993255899e-06, "loss": 0.3965, "step": 18976 }, { "epoch": 1.9293411956079707, "grad_norm": 0.27540722489356995, "learning_rate": 7.710473783231332e-06, "loss": 0.337, "step": 18977 }, { "epoch": 1.9294428629524196, "grad_norm": 0.27445685863494873, "learning_rate": 7.71017555955204e-06, "loss": 0.3477, "step": 18978 }, { "epoch": 1.9295445302968686, "grad_norm": 0.2802554666996002, "learning_rate": 7.709877322219528e-06, "loss": 0.3646, "step": 18979 }, { "epoch": 1.9296461976413175, "grad_norm": 0.27602535486221313, "learning_rate": 7.709579071235299e-06, "loss": 0.36, "step": 18980 }, { "epoch": 1.9297478649857664, "grad_norm": 0.3163074851036072, "learning_rate": 7.709280806600852e-06, "loss": 0.3739, "step": 18981 }, { "epoch": 1.9298495323302154, "grad_norm": 0.2883704900741577, "learning_rate": 7.708982528317693e-06, "loss": 0.37, "step": 18982 }, { "epoch": 1.9299511996746646, "grad_norm": 0.26553043723106384, "learning_rate": 7.708684236387325e-06, "loss": 0.3449, "step": 18983 }, { "epoch": 1.9300528670191135, "grad_norm": 0.2868873178958893, "learning_rate": 7.70838593081125e-06, "loss": 0.3471, "step": 18984 }, { "epoch": 1.9301545343635624, "grad_norm": 0.268358439207077, "learning_rate": 7.708087611590969e-06, "loss": 0.3485, "step": 18985 }, { "epoch": 1.9302562017080114, "grad_norm": 0.292252779006958, "learning_rate": 7.707789278727986e-06, "loss": 0.3462, "step": 18986 }, { "epoch": 1.9303578690524603, "grad_norm": 0.2611906826496124, "learning_rate": 7.707490932223804e-06, "loss": 0.3761, "step": 18987 }, { "epoch": 1.9304595363969093, "grad_norm": 0.2659342586994171, "learning_rate": 7.707192572079926e-06, "loss": 0.3237, "step": 18988 }, { "epoch": 1.9305612037413584, "grad_norm": 0.2718345522880554, "learning_rate": 7.706894198297856e-06, "loss": 0.3424, "step": 18989 }, { "epoch": 1.9306628710858074, "grad_norm": 0.2791522145271301, "learning_rate": 7.706595810879096e-06, "loss": 0.3876, "step": 18990 }, { "epoch": 1.9307645384302563, "grad_norm": 0.2697334587574005, "learning_rate": 7.70629740982515e-06, "loss": 0.3479, "step": 18991 }, { "epoch": 1.9308662057747052, "grad_norm": 0.26835814118385315, "learning_rate": 7.70599899513752e-06, "loss": 0.3482, "step": 18992 }, { "epoch": 1.9309678731191542, "grad_norm": 0.25994873046875, "learning_rate": 7.70570056681771e-06, "loss": 0.3469, "step": 18993 }, { "epoch": 1.9310695404636031, "grad_norm": 0.287128210067749, "learning_rate": 7.705402124867225e-06, "loss": 0.3679, "step": 18994 }, { "epoch": 1.931171207808052, "grad_norm": 0.2761858105659485, "learning_rate": 7.705103669287566e-06, "loss": 0.383, "step": 18995 }, { "epoch": 1.931272875152501, "grad_norm": 0.2916618585586548, "learning_rate": 7.704805200080236e-06, "loss": 0.3543, "step": 18996 }, { "epoch": 1.93137454249695, "grad_norm": 0.2761490046977997, "learning_rate": 7.704506717246743e-06, "loss": 0.3664, "step": 18997 }, { "epoch": 1.931476209841399, "grad_norm": 0.2689771056175232, "learning_rate": 7.704208220788586e-06, "loss": 0.3182, "step": 18998 }, { "epoch": 1.9315778771858478, "grad_norm": 0.2554517984390259, "learning_rate": 7.70390971070727e-06, "loss": 0.3469, "step": 18999 }, { "epoch": 1.9316795445302968, "grad_norm": 0.27135005593299866, "learning_rate": 7.7036111870043e-06, "loss": 0.3705, "step": 19000 }, { "epoch": 1.9317812118747457, "grad_norm": 0.3019295334815979, "learning_rate": 7.70331264968118e-06, "loss": 0.3459, "step": 19001 }, { "epoch": 1.9318828792191947, "grad_norm": 0.267270565032959, "learning_rate": 7.703014098739414e-06, "loss": 0.3309, "step": 19002 }, { "epoch": 1.9319845465636436, "grad_norm": 0.26838114857673645, "learning_rate": 7.702715534180504e-06, "loss": 0.4034, "step": 19003 }, { "epoch": 1.9320862139080928, "grad_norm": 0.25361841917037964, "learning_rate": 7.702416956005957e-06, "loss": 0.3513, "step": 19004 }, { "epoch": 1.9321878812525417, "grad_norm": 0.27876147627830505, "learning_rate": 7.702118364217274e-06, "loss": 0.3502, "step": 19005 }, { "epoch": 1.9322895485969906, "grad_norm": 0.2644338011741638, "learning_rate": 7.70181975881596e-06, "loss": 0.3727, "step": 19006 }, { "epoch": 1.9323912159414396, "grad_norm": 0.2681674659252167, "learning_rate": 7.701521139803521e-06, "loss": 0.3583, "step": 19007 }, { "epoch": 1.9324928832858885, "grad_norm": 0.2586827874183655, "learning_rate": 7.701222507181462e-06, "loss": 0.365, "step": 19008 }, { "epoch": 1.9325945506303377, "grad_norm": 0.25360897183418274, "learning_rate": 7.700923860951285e-06, "loss": 0.3774, "step": 19009 }, { "epoch": 1.9326962179747866, "grad_norm": 0.2578155994415283, "learning_rate": 7.700625201114494e-06, "loss": 0.3591, "step": 19010 }, { "epoch": 1.9327978853192356, "grad_norm": 0.2896404266357422, "learning_rate": 7.700326527672597e-06, "loss": 0.3507, "step": 19011 }, { "epoch": 1.9328995526636845, "grad_norm": 0.28970587253570557, "learning_rate": 7.700027840627094e-06, "loss": 0.3527, "step": 19012 }, { "epoch": 1.9330012200081335, "grad_norm": 0.257121205329895, "learning_rate": 7.699729139979495e-06, "loss": 0.3576, "step": 19013 }, { "epoch": 1.9331028873525824, "grad_norm": 0.2741464078426361, "learning_rate": 7.699430425731302e-06, "loss": 0.3655, "step": 19014 }, { "epoch": 1.9332045546970313, "grad_norm": 0.3095250427722931, "learning_rate": 7.699131697884018e-06, "loss": 0.3857, "step": 19015 }, { "epoch": 1.9333062220414803, "grad_norm": 0.2752871513366699, "learning_rate": 7.698832956439152e-06, "loss": 0.3637, "step": 19016 }, { "epoch": 1.9334078893859292, "grad_norm": 0.26542940735816956, "learning_rate": 7.698534201398206e-06, "loss": 0.3435, "step": 19017 }, { "epoch": 1.9335095567303782, "grad_norm": 0.26657646894454956, "learning_rate": 7.698235432762686e-06, "loss": 0.3303, "step": 19018 }, { "epoch": 1.933611224074827, "grad_norm": 0.2651326060295105, "learning_rate": 7.697936650534097e-06, "loss": 0.3284, "step": 19019 }, { "epoch": 1.933712891419276, "grad_norm": 0.27009081840515137, "learning_rate": 7.697637854713944e-06, "loss": 0.3685, "step": 19020 }, { "epoch": 1.933814558763725, "grad_norm": 0.3101685047149658, "learning_rate": 7.697339045303733e-06, "loss": 0.3816, "step": 19021 }, { "epoch": 1.933916226108174, "grad_norm": 0.2562858462333679, "learning_rate": 7.69704022230497e-06, "loss": 0.3362, "step": 19022 }, { "epoch": 1.9340178934526229, "grad_norm": 0.2686994671821594, "learning_rate": 7.696741385719157e-06, "loss": 0.3642, "step": 19023 }, { "epoch": 1.934119560797072, "grad_norm": 0.2796644866466522, "learning_rate": 7.696442535547804e-06, "loss": 0.3497, "step": 19024 }, { "epoch": 1.934221228141521, "grad_norm": 0.26923149824142456, "learning_rate": 7.696143671792412e-06, "loss": 0.3508, "step": 19025 }, { "epoch": 1.93432289548597, "grad_norm": 0.2879955470561981, "learning_rate": 7.69584479445449e-06, "loss": 0.3818, "step": 19026 }, { "epoch": 1.9344245628304189, "grad_norm": 0.26962417364120483, "learning_rate": 7.695545903535543e-06, "loss": 0.3435, "step": 19027 }, { "epoch": 1.9345262301748678, "grad_norm": 0.2527662217617035, "learning_rate": 7.695246999037077e-06, "loss": 0.3633, "step": 19028 }, { "epoch": 1.9346278975193167, "grad_norm": 0.2703421115875244, "learning_rate": 7.694948080960594e-06, "loss": 0.3834, "step": 19029 }, { "epoch": 1.934729564863766, "grad_norm": 0.28324270248413086, "learning_rate": 7.694649149307605e-06, "loss": 0.3697, "step": 19030 }, { "epoch": 1.9348312322082148, "grad_norm": 0.301981121301651, "learning_rate": 7.694350204079614e-06, "loss": 0.379, "step": 19031 }, { "epoch": 1.9349328995526638, "grad_norm": 0.29157769680023193, "learning_rate": 7.694051245278126e-06, "loss": 0.3346, "step": 19032 }, { "epoch": 1.9350345668971127, "grad_norm": 0.2873024344444275, "learning_rate": 7.69375227290465e-06, "loss": 0.357, "step": 19033 }, { "epoch": 1.9351362342415617, "grad_norm": 0.31659942865371704, "learning_rate": 7.69345328696069e-06, "loss": 0.3607, "step": 19034 }, { "epoch": 1.9352379015860106, "grad_norm": 0.2671392560005188, "learning_rate": 7.693154287447751e-06, "loss": 0.3593, "step": 19035 }, { "epoch": 1.9353395689304596, "grad_norm": 0.27725380659103394, "learning_rate": 7.692855274367342e-06, "loss": 0.3288, "step": 19036 }, { "epoch": 1.9354412362749085, "grad_norm": 0.266916960477829, "learning_rate": 7.692556247720967e-06, "loss": 0.3701, "step": 19037 }, { "epoch": 1.9355429036193574, "grad_norm": 0.2777109742164612, "learning_rate": 7.692257207510133e-06, "loss": 0.3445, "step": 19038 }, { "epoch": 1.9356445709638064, "grad_norm": 0.27870693802833557, "learning_rate": 7.691958153736348e-06, "loss": 0.3482, "step": 19039 }, { "epoch": 1.9357462383082553, "grad_norm": 0.26102569699287415, "learning_rate": 7.691659086401115e-06, "loss": 0.3397, "step": 19040 }, { "epoch": 1.9358479056527043, "grad_norm": 0.2981798052787781, "learning_rate": 7.691360005505947e-06, "loss": 0.3785, "step": 19041 }, { "epoch": 1.9359495729971532, "grad_norm": 0.28698116540908813, "learning_rate": 7.691060911052345e-06, "loss": 0.4119, "step": 19042 }, { "epoch": 1.9360512403416021, "grad_norm": 0.26609939336776733, "learning_rate": 7.690761803041818e-06, "loss": 0.3333, "step": 19043 }, { "epoch": 1.936152907686051, "grad_norm": 0.2753080725669861, "learning_rate": 7.690462681475872e-06, "loss": 0.336, "step": 19044 }, { "epoch": 1.9362545750305002, "grad_norm": 0.2829822599887848, "learning_rate": 7.690163546356014e-06, "loss": 0.3817, "step": 19045 }, { "epoch": 1.9363562423749492, "grad_norm": 0.29579395055770874, "learning_rate": 7.689864397683753e-06, "loss": 0.3683, "step": 19046 }, { "epoch": 1.9364579097193981, "grad_norm": 0.2767915427684784, "learning_rate": 7.689565235460591e-06, "loss": 0.3422, "step": 19047 }, { "epoch": 1.936559577063847, "grad_norm": 0.28398653864860535, "learning_rate": 7.689266059688042e-06, "loss": 0.3771, "step": 19048 }, { "epoch": 1.936661244408296, "grad_norm": 0.2544459104537964, "learning_rate": 7.68896687036761e-06, "loss": 0.3744, "step": 19049 }, { "epoch": 1.9367629117527452, "grad_norm": 0.2728283107280731, "learning_rate": 7.6886676675008e-06, "loss": 0.3894, "step": 19050 }, { "epoch": 1.9368645790971941, "grad_norm": 0.27224093675613403, "learning_rate": 7.68836845108912e-06, "loss": 0.3493, "step": 19051 }, { "epoch": 1.936966246441643, "grad_norm": 0.3155258893966675, "learning_rate": 7.688069221134082e-06, "loss": 0.3571, "step": 19052 }, { "epoch": 1.937067913786092, "grad_norm": 0.27594664692878723, "learning_rate": 7.687769977637187e-06, "loss": 0.3464, "step": 19053 }, { "epoch": 1.937169581130541, "grad_norm": 0.2683277130126953, "learning_rate": 7.687470720599948e-06, "loss": 0.3545, "step": 19054 }, { "epoch": 1.9372712484749899, "grad_norm": 0.25687405467033386, "learning_rate": 7.687171450023869e-06, "loss": 0.3679, "step": 19055 }, { "epoch": 1.9373729158194388, "grad_norm": 0.2689336836338043, "learning_rate": 7.686872165910459e-06, "loss": 0.324, "step": 19056 }, { "epoch": 1.9374745831638878, "grad_norm": 0.2723154127597809, "learning_rate": 7.686572868261225e-06, "loss": 0.3604, "step": 19057 }, { "epoch": 1.9375762505083367, "grad_norm": 0.2714349031448364, "learning_rate": 7.686273557077675e-06, "loss": 0.3591, "step": 19058 }, { "epoch": 1.9376779178527856, "grad_norm": 0.26909711956977844, "learning_rate": 7.685974232361317e-06, "loss": 0.3667, "step": 19059 }, { "epoch": 1.9377795851972346, "grad_norm": 0.25475969910621643, "learning_rate": 7.68567489411366e-06, "loss": 0.3591, "step": 19060 }, { "epoch": 1.9378812525416835, "grad_norm": 0.2544063627719879, "learning_rate": 7.685375542336212e-06, "loss": 0.3446, "step": 19061 }, { "epoch": 1.9379829198861325, "grad_norm": 0.27632108330726624, "learning_rate": 7.685076177030479e-06, "loss": 0.3509, "step": 19062 }, { "epoch": 1.9380845872305814, "grad_norm": 0.2808269262313843, "learning_rate": 7.68477679819797e-06, "loss": 0.3688, "step": 19063 }, { "epoch": 1.9381862545750304, "grad_norm": 0.27803587913513184, "learning_rate": 7.684477405840193e-06, "loss": 0.34, "step": 19064 }, { "epoch": 1.9382879219194795, "grad_norm": 0.3027963936328888, "learning_rate": 7.684177999958658e-06, "loss": 0.3529, "step": 19065 }, { "epoch": 1.9383895892639285, "grad_norm": 0.2774600684642792, "learning_rate": 7.683878580554874e-06, "loss": 0.3621, "step": 19066 }, { "epoch": 1.9384912566083774, "grad_norm": 0.29067641496658325, "learning_rate": 7.683579147630343e-06, "loss": 0.3884, "step": 19067 }, { "epoch": 1.9385929239528263, "grad_norm": 0.29709720611572266, "learning_rate": 7.68327970118658e-06, "loss": 0.3492, "step": 19068 }, { "epoch": 1.9386945912972753, "grad_norm": 0.289800226688385, "learning_rate": 7.682980241225093e-06, "loss": 0.3552, "step": 19069 }, { "epoch": 1.9387962586417242, "grad_norm": 0.2838762104511261, "learning_rate": 7.682680767747388e-06, "loss": 0.3792, "step": 19070 }, { "epoch": 1.9388979259861734, "grad_norm": 0.2778604328632355, "learning_rate": 7.682381280754973e-06, "loss": 0.329, "step": 19071 }, { "epoch": 1.9389995933306223, "grad_norm": 0.285098135471344, "learning_rate": 7.682081780249361e-06, "loss": 0.3436, "step": 19072 }, { "epoch": 1.9391012606750713, "grad_norm": 0.2534632682800293, "learning_rate": 7.681782266232058e-06, "loss": 0.3583, "step": 19073 }, { "epoch": 1.9392029280195202, "grad_norm": 0.27892759442329407, "learning_rate": 7.681482738704574e-06, "loss": 0.3343, "step": 19074 }, { "epoch": 1.9393045953639692, "grad_norm": 0.29004356265068054, "learning_rate": 7.681183197668417e-06, "loss": 0.3805, "step": 19075 }, { "epoch": 1.939406262708418, "grad_norm": 0.2826150059700012, "learning_rate": 7.680883643125093e-06, "loss": 0.37, "step": 19076 }, { "epoch": 1.939507930052867, "grad_norm": 0.27421391010284424, "learning_rate": 7.680584075076117e-06, "loss": 0.3764, "step": 19077 }, { "epoch": 1.939609597397316, "grad_norm": 0.2858397364616394, "learning_rate": 7.680284493522994e-06, "loss": 0.3612, "step": 19078 }, { "epoch": 1.939711264741765, "grad_norm": 0.2898505926132202, "learning_rate": 7.679984898467235e-06, "loss": 0.37, "step": 19079 }, { "epoch": 1.9398129320862139, "grad_norm": 0.2885952591896057, "learning_rate": 7.679685289910351e-06, "loss": 0.343, "step": 19080 }, { "epoch": 1.9399145994306628, "grad_norm": 0.2976129949092865, "learning_rate": 7.679385667853848e-06, "loss": 0.3583, "step": 19081 }, { "epoch": 1.9400162667751117, "grad_norm": 0.2815192639827728, "learning_rate": 7.679086032299234e-06, "loss": 0.377, "step": 19082 }, { "epoch": 1.9401179341195607, "grad_norm": 0.2660072147846222, "learning_rate": 7.678786383248024e-06, "loss": 0.3796, "step": 19083 }, { "epoch": 1.9402196014640096, "grad_norm": 0.2955929636955261, "learning_rate": 7.678486720701725e-06, "loss": 0.3725, "step": 19084 }, { "epoch": 1.9403212688084586, "grad_norm": 0.2986057698726654, "learning_rate": 7.678187044661847e-06, "loss": 0.36, "step": 19085 }, { "epoch": 1.9404229361529077, "grad_norm": 0.28127342462539673, "learning_rate": 7.677887355129899e-06, "loss": 0.3457, "step": 19086 }, { "epoch": 1.9405246034973567, "grad_norm": 0.2737928330898285, "learning_rate": 7.677587652107388e-06, "loss": 0.3671, "step": 19087 }, { "epoch": 1.9406262708418056, "grad_norm": 0.26769712567329407, "learning_rate": 7.677287935595829e-06, "loss": 0.3312, "step": 19088 }, { "epoch": 1.9407279381862546, "grad_norm": 0.2683885395526886, "learning_rate": 7.67698820559673e-06, "loss": 0.3815, "step": 19089 }, { "epoch": 1.9408296055307035, "grad_norm": 0.2797435522079468, "learning_rate": 7.676688462111599e-06, "loss": 0.3607, "step": 19090 }, { "epoch": 1.9409312728751527, "grad_norm": 0.27126938104629517, "learning_rate": 7.676388705141949e-06, "loss": 0.3505, "step": 19091 }, { "epoch": 1.9410329402196016, "grad_norm": 0.27043721079826355, "learning_rate": 7.676088934689288e-06, "loss": 0.3741, "step": 19092 }, { "epoch": 1.9411346075640505, "grad_norm": 0.2583831250667572, "learning_rate": 7.675789150755126e-06, "loss": 0.3666, "step": 19093 }, { "epoch": 1.9412362749084995, "grad_norm": 0.2737385928630829, "learning_rate": 7.675489353340974e-06, "loss": 0.365, "step": 19094 }, { "epoch": 1.9413379422529484, "grad_norm": 0.2466626763343811, "learning_rate": 7.675189542448345e-06, "loss": 0.3519, "step": 19095 }, { "epoch": 1.9414396095973974, "grad_norm": 0.279543936252594, "learning_rate": 7.674889718078745e-06, "loss": 0.3761, "step": 19096 }, { "epoch": 1.9415412769418463, "grad_norm": 0.2845466136932373, "learning_rate": 7.674589880233687e-06, "loss": 0.332, "step": 19097 }, { "epoch": 1.9416429442862952, "grad_norm": 0.26509037613868713, "learning_rate": 7.67429002891468e-06, "loss": 0.3549, "step": 19098 }, { "epoch": 1.9417446116307442, "grad_norm": 0.27021852135658264, "learning_rate": 7.673990164123236e-06, "loss": 0.3595, "step": 19099 }, { "epoch": 1.9418462789751931, "grad_norm": 0.26435163617134094, "learning_rate": 7.673690285860863e-06, "loss": 0.3238, "step": 19100 }, { "epoch": 1.941947946319642, "grad_norm": 0.26782286167144775, "learning_rate": 7.673390394129077e-06, "loss": 0.397, "step": 19101 }, { "epoch": 1.942049613664091, "grad_norm": 0.2801359295845032, "learning_rate": 7.673090488929383e-06, "loss": 0.3783, "step": 19102 }, { "epoch": 1.94215128100854, "grad_norm": 0.304377019405365, "learning_rate": 7.672790570263295e-06, "loss": 0.3937, "step": 19103 }, { "epoch": 1.942252948352989, "grad_norm": 0.2564025819301605, "learning_rate": 7.672490638132324e-06, "loss": 0.3259, "step": 19104 }, { "epoch": 1.9423546156974378, "grad_norm": 0.2789921760559082, "learning_rate": 7.67219069253798e-06, "loss": 0.3423, "step": 19105 }, { "epoch": 1.942456283041887, "grad_norm": 0.31457602977752686, "learning_rate": 7.671890733481775e-06, "loss": 0.3525, "step": 19106 }, { "epoch": 1.942557950386336, "grad_norm": 0.25542858242988586, "learning_rate": 7.671590760965217e-06, "loss": 0.3678, "step": 19107 }, { "epoch": 1.9426596177307849, "grad_norm": 0.25893089175224304, "learning_rate": 7.671290774989822e-06, "loss": 0.3438, "step": 19108 }, { "epoch": 1.9427612850752338, "grad_norm": 0.2696230709552765, "learning_rate": 7.670990775557096e-06, "loss": 0.3585, "step": 19109 }, { "epoch": 1.9428629524196828, "grad_norm": 0.26349031925201416, "learning_rate": 7.670690762668557e-06, "loss": 0.3665, "step": 19110 }, { "epoch": 1.9429646197641317, "grad_norm": 0.306395024061203, "learning_rate": 7.67039073632571e-06, "loss": 0.3806, "step": 19111 }, { "epoch": 1.9430662871085809, "grad_norm": 0.2626596987247467, "learning_rate": 7.67009069653007e-06, "loss": 0.3866, "step": 19112 }, { "epoch": 1.9431679544530298, "grad_norm": 0.2698659598827362, "learning_rate": 7.669790643283148e-06, "loss": 0.343, "step": 19113 }, { "epoch": 1.9432696217974788, "grad_norm": 0.2702869772911072, "learning_rate": 7.669490576586453e-06, "loss": 0.3627, "step": 19114 }, { "epoch": 1.9433712891419277, "grad_norm": 0.24234578013420105, "learning_rate": 7.669190496441501e-06, "loss": 0.3304, "step": 19115 }, { "epoch": 1.9434729564863766, "grad_norm": 0.25136587023735046, "learning_rate": 7.668890402849801e-06, "loss": 0.3382, "step": 19116 }, { "epoch": 1.9435746238308256, "grad_norm": 0.2994263768196106, "learning_rate": 7.668590295812866e-06, "loss": 0.3879, "step": 19117 }, { "epoch": 1.9436762911752745, "grad_norm": 0.27119672298431396, "learning_rate": 7.668290175332207e-06, "loss": 0.3612, "step": 19118 }, { "epoch": 1.9437779585197235, "grad_norm": 0.26370948553085327, "learning_rate": 7.667990041409334e-06, "loss": 0.3459, "step": 19119 }, { "epoch": 1.9438796258641724, "grad_norm": 0.2712564766407013, "learning_rate": 7.667689894045764e-06, "loss": 0.3487, "step": 19120 }, { "epoch": 1.9439812932086213, "grad_norm": 0.26584720611572266, "learning_rate": 7.667389733243004e-06, "loss": 0.3458, "step": 19121 }, { "epoch": 1.9440829605530703, "grad_norm": 0.2971572279930115, "learning_rate": 7.66708955900257e-06, "loss": 0.356, "step": 19122 }, { "epoch": 1.9441846278975192, "grad_norm": 0.25369560718536377, "learning_rate": 7.666789371325974e-06, "loss": 0.3637, "step": 19123 }, { "epoch": 1.9442862952419682, "grad_norm": 0.2766207754611969, "learning_rate": 7.666489170214723e-06, "loss": 0.3451, "step": 19124 }, { "epoch": 1.944387962586417, "grad_norm": 0.27475541830062866, "learning_rate": 7.666188955670335e-06, "loss": 0.3695, "step": 19125 }, { "epoch": 1.944489629930866, "grad_norm": 0.26309579610824585, "learning_rate": 7.665888727694322e-06, "loss": 0.3665, "step": 19126 }, { "epoch": 1.9445912972753152, "grad_norm": 0.28191038966178894, "learning_rate": 7.665588486288195e-06, "loss": 0.3591, "step": 19127 }, { "epoch": 1.9446929646197642, "grad_norm": 0.26191446185112, "learning_rate": 7.665288231453465e-06, "loss": 0.3873, "step": 19128 }, { "epoch": 1.944794631964213, "grad_norm": 0.2584264576435089, "learning_rate": 7.664987963191645e-06, "loss": 0.326, "step": 19129 }, { "epoch": 1.944896299308662, "grad_norm": 0.2892782986164093, "learning_rate": 7.664687681504253e-06, "loss": 0.3878, "step": 19130 }, { "epoch": 1.944997966653111, "grad_norm": 0.28385332226753235, "learning_rate": 7.664387386392795e-06, "loss": 0.3671, "step": 19131 }, { "epoch": 1.9450996339975601, "grad_norm": 0.26145055890083313, "learning_rate": 7.664087077858787e-06, "loss": 0.3471, "step": 19132 }, { "epoch": 1.945201301342009, "grad_norm": 0.2578631639480591, "learning_rate": 7.663786755903742e-06, "loss": 0.3491, "step": 19133 }, { "epoch": 1.945302968686458, "grad_norm": 0.2649777829647064, "learning_rate": 7.663486420529172e-06, "loss": 0.3536, "step": 19134 }, { "epoch": 1.945404636030907, "grad_norm": 0.27076807618141174, "learning_rate": 7.66318607173659e-06, "loss": 0.3566, "step": 19135 }, { "epoch": 1.945506303375356, "grad_norm": 0.25464123487472534, "learning_rate": 7.662885709527509e-06, "loss": 0.3316, "step": 19136 }, { "epoch": 1.9456079707198048, "grad_norm": 0.27851763367652893, "learning_rate": 7.662585333903443e-06, "loss": 0.3712, "step": 19137 }, { "epoch": 1.9457096380642538, "grad_norm": 0.26283136010169983, "learning_rate": 7.662284944865906e-06, "loss": 0.3317, "step": 19138 }, { "epoch": 1.9458113054087027, "grad_norm": 0.2849311828613281, "learning_rate": 7.661984542416408e-06, "loss": 0.3646, "step": 19139 }, { "epoch": 1.9459129727531517, "grad_norm": 0.2586698830127716, "learning_rate": 7.661684126556466e-06, "loss": 0.3294, "step": 19140 }, { "epoch": 1.9460146400976006, "grad_norm": 0.255942702293396, "learning_rate": 7.66138369728759e-06, "loss": 0.3416, "step": 19141 }, { "epoch": 1.9461163074420496, "grad_norm": 0.27555975317955017, "learning_rate": 7.661083254611298e-06, "loss": 0.3496, "step": 19142 }, { "epoch": 1.9462179747864985, "grad_norm": 0.2796792685985565, "learning_rate": 7.6607827985291e-06, "loss": 0.3461, "step": 19143 }, { "epoch": 1.9463196421309474, "grad_norm": 0.27231842279434204, "learning_rate": 7.66048232904251e-06, "loss": 0.3705, "step": 19144 }, { "epoch": 1.9464213094753964, "grad_norm": 0.26273125410079956, "learning_rate": 7.660181846153042e-06, "loss": 0.343, "step": 19145 }, { "epoch": 1.9465229768198453, "grad_norm": 0.2382507175207138, "learning_rate": 7.659881349862211e-06, "loss": 0.3296, "step": 19146 }, { "epoch": 1.9466246441642945, "grad_norm": 0.28723976016044617, "learning_rate": 7.65958084017153e-06, "loss": 0.3416, "step": 19147 }, { "epoch": 1.9467263115087434, "grad_norm": 0.26113682985305786, "learning_rate": 7.659280317082511e-06, "loss": 0.3493, "step": 19148 }, { "epoch": 1.9468279788531924, "grad_norm": 0.29130977392196655, "learning_rate": 7.65897978059667e-06, "loss": 0.377, "step": 19149 }, { "epoch": 1.9469296461976413, "grad_norm": 0.27224454283714294, "learning_rate": 7.658679230715523e-06, "loss": 0.3763, "step": 19150 }, { "epoch": 1.9470313135420902, "grad_norm": 0.2813911437988281, "learning_rate": 7.65837866744058e-06, "loss": 0.3844, "step": 19151 }, { "epoch": 1.9471329808865392, "grad_norm": 0.27604565024375916, "learning_rate": 7.658078090773358e-06, "loss": 0.3187, "step": 19152 }, { "epoch": 1.9472346482309884, "grad_norm": 0.2873641550540924, "learning_rate": 7.657777500715368e-06, "loss": 0.3478, "step": 19153 }, { "epoch": 1.9473363155754373, "grad_norm": 0.2644921839237213, "learning_rate": 7.657476897268129e-06, "loss": 0.3611, "step": 19154 }, { "epoch": 1.9474379829198862, "grad_norm": 0.2563723921775818, "learning_rate": 7.657176280433152e-06, "loss": 0.3727, "step": 19155 }, { "epoch": 1.9475396502643352, "grad_norm": 0.27350008487701416, "learning_rate": 7.656875650211953e-06, "loss": 0.3357, "step": 19156 }, { "epoch": 1.9476413176087841, "grad_norm": 0.2838839888572693, "learning_rate": 7.656575006606044e-06, "loss": 0.3462, "step": 19157 }, { "epoch": 1.947742984953233, "grad_norm": 0.24780277907848358, "learning_rate": 7.656274349616944e-06, "loss": 0.3754, "step": 19158 }, { "epoch": 1.947844652297682, "grad_norm": 0.26425474882125854, "learning_rate": 7.655973679246163e-06, "loss": 0.3481, "step": 19159 }, { "epoch": 1.947946319642131, "grad_norm": 0.25557050108909607, "learning_rate": 7.65567299549522e-06, "loss": 0.3445, "step": 19160 }, { "epoch": 1.9480479869865799, "grad_norm": 0.2770862579345703, "learning_rate": 7.655372298365626e-06, "loss": 0.357, "step": 19161 }, { "epoch": 1.9481496543310288, "grad_norm": 0.2725922167301178, "learning_rate": 7.655071587858897e-06, "loss": 0.3654, "step": 19162 }, { "epoch": 1.9482513216754778, "grad_norm": 0.2761527895927429, "learning_rate": 7.654770863976548e-06, "loss": 0.3401, "step": 19163 }, { "epoch": 1.9483529890199267, "grad_norm": 0.2586384117603302, "learning_rate": 7.654470126720095e-06, "loss": 0.3994, "step": 19164 }, { "epoch": 1.9484546563643756, "grad_norm": 0.2671412229537964, "learning_rate": 7.654169376091052e-06, "loss": 0.3626, "step": 19165 }, { "epoch": 1.9485563237088246, "grad_norm": 0.27562955021858215, "learning_rate": 7.653868612090936e-06, "loss": 0.361, "step": 19166 }, { "epoch": 1.9486579910532735, "grad_norm": 0.2610336244106293, "learning_rate": 7.653567834721258e-06, "loss": 0.3687, "step": 19167 }, { "epoch": 1.9487596583977227, "grad_norm": 0.2665996253490448, "learning_rate": 7.653267043983537e-06, "loss": 0.3616, "step": 19168 }, { "epoch": 1.9488613257421716, "grad_norm": 0.28296175599098206, "learning_rate": 7.652966239879288e-06, "loss": 0.3576, "step": 19169 }, { "epoch": 1.9489629930866206, "grad_norm": 0.2554892301559448, "learning_rate": 7.652665422410025e-06, "loss": 0.3395, "step": 19170 }, { "epoch": 1.9490646604310695, "grad_norm": 0.25974541902542114, "learning_rate": 7.652364591577265e-06, "loss": 0.3655, "step": 19171 }, { "epoch": 1.9491663277755185, "grad_norm": 0.2860063314437866, "learning_rate": 7.652063747382519e-06, "loss": 0.3724, "step": 19172 }, { "epoch": 1.9492679951199676, "grad_norm": 0.2857538163661957, "learning_rate": 7.651762889827308e-06, "loss": 0.3573, "step": 19173 }, { "epoch": 1.9493696624644166, "grad_norm": 0.26795610785484314, "learning_rate": 7.651462018913147e-06, "loss": 0.3779, "step": 19174 }, { "epoch": 1.9494713298088655, "grad_norm": 0.2827599048614502, "learning_rate": 7.651161134641549e-06, "loss": 0.3461, "step": 19175 }, { "epoch": 1.9495729971533144, "grad_norm": 0.2989339828491211, "learning_rate": 7.650860237014032e-06, "loss": 0.3905, "step": 19176 }, { "epoch": 1.9496746644977634, "grad_norm": 0.31431326270103455, "learning_rate": 7.65055932603211e-06, "loss": 0.3964, "step": 19177 }, { "epoch": 1.9497763318422123, "grad_norm": 0.27414682507514954, "learning_rate": 7.650258401697302e-06, "loss": 0.3746, "step": 19178 }, { "epoch": 1.9498779991866613, "grad_norm": 0.27790138125419617, "learning_rate": 7.64995746401112e-06, "loss": 0.3677, "step": 19179 }, { "epoch": 1.9499796665311102, "grad_norm": 0.27532362937927246, "learning_rate": 7.649656512975083e-06, "loss": 0.382, "step": 19180 }, { "epoch": 1.9500813338755592, "grad_norm": 0.29794812202453613, "learning_rate": 7.649355548590706e-06, "loss": 0.3707, "step": 19181 }, { "epoch": 1.950183001220008, "grad_norm": 0.26959922909736633, "learning_rate": 7.649054570859505e-06, "loss": 0.3441, "step": 19182 }, { "epoch": 1.950284668564457, "grad_norm": 0.26668718457221985, "learning_rate": 7.648753579782995e-06, "loss": 0.3537, "step": 19183 }, { "epoch": 1.950386335908906, "grad_norm": 0.2754223048686981, "learning_rate": 7.648452575362696e-06, "loss": 0.3458, "step": 19184 }, { "epoch": 1.950488003253355, "grad_norm": 0.3011699616909027, "learning_rate": 7.648151557600122e-06, "loss": 0.3675, "step": 19185 }, { "epoch": 1.9505896705978039, "grad_norm": 0.28733211755752563, "learning_rate": 7.647850526496791e-06, "loss": 0.3679, "step": 19186 }, { "epoch": 1.9506913379422528, "grad_norm": 0.29023119807243347, "learning_rate": 7.647549482054216e-06, "loss": 0.3621, "step": 19187 }, { "epoch": 1.950793005286702, "grad_norm": 0.27270224690437317, "learning_rate": 7.647248424273916e-06, "loss": 0.403, "step": 19188 }, { "epoch": 1.950894672631151, "grad_norm": 0.2689612805843353, "learning_rate": 7.64694735315741e-06, "loss": 0.3528, "step": 19189 }, { "epoch": 1.9509963399755998, "grad_norm": 0.26721617579460144, "learning_rate": 7.64664626870621e-06, "loss": 0.3875, "step": 19190 }, { "epoch": 1.9510980073200488, "grad_norm": 0.2630697786808014, "learning_rate": 7.646345170921836e-06, "loss": 0.3255, "step": 19191 }, { "epoch": 1.9511996746644977, "grad_norm": 0.2754421830177307, "learning_rate": 7.646044059805804e-06, "loss": 0.348, "step": 19192 }, { "epoch": 1.9513013420089467, "grad_norm": 0.2752842903137207, "learning_rate": 7.645742935359629e-06, "loss": 0.3433, "step": 19193 }, { "epoch": 1.9514030093533958, "grad_norm": 0.2678271532058716, "learning_rate": 7.645441797584831e-06, "loss": 0.3358, "step": 19194 }, { "epoch": 1.9515046766978448, "grad_norm": 0.27436503767967224, "learning_rate": 7.645140646482927e-06, "loss": 0.3472, "step": 19195 }, { "epoch": 1.9516063440422937, "grad_norm": 0.2517959475517273, "learning_rate": 7.644839482055432e-06, "loss": 0.3819, "step": 19196 }, { "epoch": 1.9517080113867427, "grad_norm": 0.2942622900009155, "learning_rate": 7.644538304303864e-06, "loss": 0.3846, "step": 19197 }, { "epoch": 1.9518096787311916, "grad_norm": 0.2882440686225891, "learning_rate": 7.64423711322974e-06, "loss": 0.3397, "step": 19198 }, { "epoch": 1.9519113460756405, "grad_norm": 0.26842179894447327, "learning_rate": 7.643935908834579e-06, "loss": 0.3395, "step": 19199 }, { "epoch": 1.9520130134200895, "grad_norm": 0.25973349809646606, "learning_rate": 7.643634691119896e-06, "loss": 0.3921, "step": 19200 }, { "epoch": 1.9521146807645384, "grad_norm": 0.27081236243247986, "learning_rate": 7.643333460087213e-06, "loss": 0.3541, "step": 19201 }, { "epoch": 1.9522163481089874, "grad_norm": 0.2919617295265198, "learning_rate": 7.64303221573804e-06, "loss": 0.3693, "step": 19202 }, { "epoch": 1.9523180154534363, "grad_norm": 0.27419614791870117, "learning_rate": 7.642730958073899e-06, "loss": 0.3438, "step": 19203 }, { "epoch": 1.9524196827978852, "grad_norm": 0.25441959500312805, "learning_rate": 7.64242968709631e-06, "loss": 0.3949, "step": 19204 }, { "epoch": 1.9525213501423342, "grad_norm": 0.260328471660614, "learning_rate": 7.642128402806787e-06, "loss": 0.3617, "step": 19205 }, { "epoch": 1.9526230174867831, "grad_norm": 0.2602963447570801, "learning_rate": 7.64182710520685e-06, "loss": 0.3346, "step": 19206 }, { "epoch": 1.952724684831232, "grad_norm": 0.2686021327972412, "learning_rate": 7.641525794298014e-06, "loss": 0.3527, "step": 19207 }, { "epoch": 1.952826352175681, "grad_norm": 0.26269271969795227, "learning_rate": 7.641224470081798e-06, "loss": 0.3396, "step": 19208 }, { "epoch": 1.9529280195201302, "grad_norm": 0.30477195978164673, "learning_rate": 7.640923132559724e-06, "loss": 0.365, "step": 19209 }, { "epoch": 1.9530296868645791, "grad_norm": 0.2696059048175812, "learning_rate": 7.640621781733304e-06, "loss": 0.3547, "step": 19210 }, { "epoch": 1.953131354209028, "grad_norm": 0.2650267779827118, "learning_rate": 7.640320417604062e-06, "loss": 0.3904, "step": 19211 }, { "epoch": 1.953233021553477, "grad_norm": 0.2789126932621002, "learning_rate": 7.640019040173509e-06, "loss": 0.3635, "step": 19212 }, { "epoch": 1.953334688897926, "grad_norm": 0.2679588198661804, "learning_rate": 7.63971764944317e-06, "loss": 0.3744, "step": 19213 }, { "epoch": 1.953436356242375, "grad_norm": 0.2970401346683502, "learning_rate": 7.639416245414561e-06, "loss": 0.3669, "step": 19214 }, { "epoch": 1.953538023586824, "grad_norm": 0.2871188223361969, "learning_rate": 7.639114828089199e-06, "loss": 0.3905, "step": 19215 }, { "epoch": 1.953639690931273, "grad_norm": 0.2564987540245056, "learning_rate": 7.638813397468606e-06, "loss": 0.3439, "step": 19216 }, { "epoch": 1.953741358275722, "grad_norm": 0.2651991844177246, "learning_rate": 7.638511953554294e-06, "loss": 0.3176, "step": 19217 }, { "epoch": 1.9538430256201709, "grad_norm": 0.31902971863746643, "learning_rate": 7.638210496347788e-06, "loss": 0.342, "step": 19218 }, { "epoch": 1.9539446929646198, "grad_norm": 0.2734951078891754, "learning_rate": 7.637909025850607e-06, "loss": 0.3717, "step": 19219 }, { "epoch": 1.9540463603090688, "grad_norm": 0.27142995595932007, "learning_rate": 7.637607542064264e-06, "loss": 0.3765, "step": 19220 }, { "epoch": 1.9541480276535177, "grad_norm": 0.28446313738822937, "learning_rate": 7.637306044990282e-06, "loss": 0.346, "step": 19221 }, { "epoch": 1.9542496949979666, "grad_norm": 0.2817540466785431, "learning_rate": 7.637004534630179e-06, "loss": 0.3656, "step": 19222 }, { "epoch": 1.9543513623424156, "grad_norm": 0.2461645007133484, "learning_rate": 7.636703010985473e-06, "loss": 0.3366, "step": 19223 }, { "epoch": 1.9544530296868645, "grad_norm": 0.2561526894569397, "learning_rate": 7.636401474057684e-06, "loss": 0.3571, "step": 19224 }, { "epoch": 1.9545546970313135, "grad_norm": 0.2505494952201843, "learning_rate": 7.636099923848333e-06, "loss": 0.3578, "step": 19225 }, { "epoch": 1.9546563643757624, "grad_norm": 0.2816905081272125, "learning_rate": 7.635798360358934e-06, "loss": 0.3706, "step": 19226 }, { "epoch": 1.9547580317202113, "grad_norm": 0.2654787003993988, "learning_rate": 7.635496783591012e-06, "loss": 0.3405, "step": 19227 }, { "epoch": 1.9548596990646603, "grad_norm": 0.27600720524787903, "learning_rate": 7.63519519354608e-06, "loss": 0.324, "step": 19228 }, { "epoch": 1.9549613664091094, "grad_norm": 0.2582492232322693, "learning_rate": 7.634893590225664e-06, "loss": 0.3508, "step": 19229 }, { "epoch": 1.9550630337535584, "grad_norm": 0.2963923513889313, "learning_rate": 7.634591973631278e-06, "loss": 0.3457, "step": 19230 }, { "epoch": 1.9551647010980073, "grad_norm": 0.2605874538421631, "learning_rate": 7.634290343764446e-06, "loss": 0.3385, "step": 19231 }, { "epoch": 1.9552663684424563, "grad_norm": 0.2659302055835724, "learning_rate": 7.633988700626684e-06, "loss": 0.3693, "step": 19232 }, { "epoch": 1.9553680357869052, "grad_norm": 0.2935798168182373, "learning_rate": 7.633687044219513e-06, "loss": 0.3548, "step": 19233 }, { "epoch": 1.9554697031313544, "grad_norm": 0.2851366698741913, "learning_rate": 7.633385374544452e-06, "loss": 0.3549, "step": 19234 }, { "epoch": 1.9555713704758033, "grad_norm": 0.28001105785369873, "learning_rate": 7.63308369160302e-06, "loss": 0.3819, "step": 19235 }, { "epoch": 1.9556730378202523, "grad_norm": 0.25245317816734314, "learning_rate": 7.63278199539674e-06, "loss": 0.3674, "step": 19236 }, { "epoch": 1.9557747051647012, "grad_norm": 0.2589276134967804, "learning_rate": 7.632480285927132e-06, "loss": 0.3781, "step": 19237 }, { "epoch": 1.9558763725091501, "grad_norm": 0.27500608563423157, "learning_rate": 7.632178563195711e-06, "loss": 0.3641, "step": 19238 }, { "epoch": 1.955978039853599, "grad_norm": 0.2755409777164459, "learning_rate": 7.631876827204e-06, "loss": 0.3536, "step": 19239 }, { "epoch": 1.956079707198048, "grad_norm": 0.26669836044311523, "learning_rate": 7.63157507795352e-06, "loss": 0.3573, "step": 19240 }, { "epoch": 1.956181374542497, "grad_norm": 0.2861838936805725, "learning_rate": 7.631273315445788e-06, "loss": 0.4096, "step": 19241 }, { "epoch": 1.956283041886946, "grad_norm": 0.2727002799510956, "learning_rate": 7.63097153968233e-06, "loss": 0.3647, "step": 19242 }, { "epoch": 1.9563847092313948, "grad_norm": 0.27932462096214294, "learning_rate": 7.63066975066466e-06, "loss": 0.3591, "step": 19243 }, { "epoch": 1.9564863765758438, "grad_norm": 0.2651660740375519, "learning_rate": 7.630367948394302e-06, "loss": 0.3082, "step": 19244 }, { "epoch": 1.9565880439202927, "grad_norm": 0.28435757756233215, "learning_rate": 7.630066132872775e-06, "loss": 0.3595, "step": 19245 }, { "epoch": 1.9566897112647417, "grad_norm": 0.25881126523017883, "learning_rate": 7.629764304101599e-06, "loss": 0.3609, "step": 19246 }, { "epoch": 1.9567913786091906, "grad_norm": 0.2782239615917206, "learning_rate": 7.629462462082295e-06, "loss": 0.3791, "step": 19247 }, { "epoch": 1.9568930459536396, "grad_norm": 0.2606875002384186, "learning_rate": 7.629160606816386e-06, "loss": 0.3771, "step": 19248 }, { "epoch": 1.9569947132980885, "grad_norm": 0.2850891649723053, "learning_rate": 7.628858738305389e-06, "loss": 0.3064, "step": 19249 }, { "epoch": 1.9570963806425377, "grad_norm": 0.283192902803421, "learning_rate": 7.628556856550828e-06, "loss": 0.3731, "step": 19250 }, { "epoch": 1.9571980479869866, "grad_norm": 0.3019637167453766, "learning_rate": 7.62825496155422e-06, "loss": 0.3581, "step": 19251 }, { "epoch": 1.9572997153314355, "grad_norm": 0.2887398600578308, "learning_rate": 7.62795305331709e-06, "loss": 0.3622, "step": 19252 }, { "epoch": 1.9574013826758845, "grad_norm": 0.25560271739959717, "learning_rate": 7.627651131840956e-06, "loss": 0.353, "step": 19253 }, { "epoch": 1.9575030500203334, "grad_norm": 0.27309563755989075, "learning_rate": 7.627349197127341e-06, "loss": 0.3289, "step": 19254 }, { "epoch": 1.9576047173647826, "grad_norm": 0.30186277627944946, "learning_rate": 7.627047249177764e-06, "loss": 0.3574, "step": 19255 }, { "epoch": 1.9577063847092315, "grad_norm": 0.26026567816734314, "learning_rate": 7.626745287993747e-06, "loss": 0.3691, "step": 19256 }, { "epoch": 1.9578080520536805, "grad_norm": 0.2771111726760864, "learning_rate": 7.626443313576813e-06, "loss": 0.3608, "step": 19257 }, { "epoch": 1.9579097193981294, "grad_norm": 0.2556893825531006, "learning_rate": 7.62614132592848e-06, "loss": 0.3846, "step": 19258 }, { "epoch": 1.9580113867425784, "grad_norm": 0.30019402503967285, "learning_rate": 7.625839325050272e-06, "loss": 0.3613, "step": 19259 }, { "epoch": 1.9581130540870273, "grad_norm": 0.26415836811065674, "learning_rate": 7.625537310943709e-06, "loss": 0.3468, "step": 19260 }, { "epoch": 1.9582147214314762, "grad_norm": 0.30317234992980957, "learning_rate": 7.625235283610314e-06, "loss": 0.3471, "step": 19261 }, { "epoch": 1.9583163887759252, "grad_norm": 0.25817424058914185, "learning_rate": 7.624933243051606e-06, "loss": 0.3428, "step": 19262 }, { "epoch": 1.9584180561203741, "grad_norm": 0.2593163251876831, "learning_rate": 7.624631189269109e-06, "loss": 0.3668, "step": 19263 }, { "epoch": 1.958519723464823, "grad_norm": 0.272418349981308, "learning_rate": 7.624329122264344e-06, "loss": 0.3381, "step": 19264 }, { "epoch": 1.958621390809272, "grad_norm": 0.29909029603004456, "learning_rate": 7.624027042038833e-06, "loss": 0.3651, "step": 19265 }, { "epoch": 1.958723058153721, "grad_norm": 0.2640475928783417, "learning_rate": 7.623724948594096e-06, "loss": 0.3473, "step": 19266 }, { "epoch": 1.9588247254981699, "grad_norm": 0.2832050025463104, "learning_rate": 7.623422841931658e-06, "loss": 0.3639, "step": 19267 }, { "epoch": 1.9589263928426188, "grad_norm": 0.2676084041595459, "learning_rate": 7.6231207220530364e-06, "loss": 0.3499, "step": 19268 }, { "epoch": 1.9590280601870678, "grad_norm": 0.28639498353004456, "learning_rate": 7.622818588959757e-06, "loss": 0.3697, "step": 19269 }, { "epoch": 1.959129727531517, "grad_norm": 0.2876112759113312, "learning_rate": 7.622516442653342e-06, "loss": 0.3075, "step": 19270 }, { "epoch": 1.9592313948759659, "grad_norm": 0.2736780345439911, "learning_rate": 7.622214283135312e-06, "loss": 0.326, "step": 19271 }, { "epoch": 1.9593330622204148, "grad_norm": 0.26082843542099, "learning_rate": 7.621912110407189e-06, "loss": 0.3416, "step": 19272 }, { "epoch": 1.9594347295648638, "grad_norm": 0.27705711126327515, "learning_rate": 7.621609924470496e-06, "loss": 0.3405, "step": 19273 }, { "epoch": 1.9595363969093127, "grad_norm": 0.2708452343940735, "learning_rate": 7.621307725326754e-06, "loss": 0.3477, "step": 19274 }, { "epoch": 1.9596380642537619, "grad_norm": 0.2595996558666229, "learning_rate": 7.621005512977489e-06, "loss": 0.3344, "step": 19275 }, { "epoch": 1.9597397315982108, "grad_norm": 0.27835437655448914, "learning_rate": 7.620703287424221e-06, "loss": 0.3272, "step": 19276 }, { "epoch": 1.9598413989426597, "grad_norm": 0.2540099322795868, "learning_rate": 7.620401048668471e-06, "loss": 0.3831, "step": 19277 }, { "epoch": 1.9599430662871087, "grad_norm": 0.25549671053886414, "learning_rate": 7.620098796711763e-06, "loss": 0.356, "step": 19278 }, { "epoch": 1.9600447336315576, "grad_norm": 0.2996080219745636, "learning_rate": 7.619796531555621e-06, "loss": 0.359, "step": 19279 }, { "epoch": 1.9601464009760066, "grad_norm": 0.31800228357315063, "learning_rate": 7.619494253201567e-06, "loss": 0.3662, "step": 19280 }, { "epoch": 1.9602480683204555, "grad_norm": 0.2811755836009979, "learning_rate": 7.6191919616511225e-06, "loss": 0.3512, "step": 19281 }, { "epoch": 1.9603497356649044, "grad_norm": 0.27132850885391235, "learning_rate": 7.618889656905811e-06, "loss": 0.3578, "step": 19282 }, { "epoch": 1.9604514030093534, "grad_norm": 0.286775141954422, "learning_rate": 7.618587338967156e-06, "loss": 0.3205, "step": 19283 }, { "epoch": 1.9605530703538023, "grad_norm": 0.28078797459602356, "learning_rate": 7.618285007836681e-06, "loss": 0.3482, "step": 19284 }, { "epoch": 1.9606547376982513, "grad_norm": 0.2703850567340851, "learning_rate": 7.617982663515908e-06, "loss": 0.3765, "step": 19285 }, { "epoch": 1.9607564050427002, "grad_norm": 0.2759791612625122, "learning_rate": 7.61768030600636e-06, "loss": 0.3695, "step": 19286 }, { "epoch": 1.9608580723871492, "grad_norm": 0.29013940691947937, "learning_rate": 7.617377935309561e-06, "loss": 0.3575, "step": 19287 }, { "epoch": 1.960959739731598, "grad_norm": 0.28741320967674255, "learning_rate": 7.617075551427034e-06, "loss": 0.3401, "step": 19288 }, { "epoch": 1.961061407076047, "grad_norm": 0.2696237564086914, "learning_rate": 7.616773154360302e-06, "loss": 0.3429, "step": 19289 }, { "epoch": 1.961163074420496, "grad_norm": 0.26975616812705994, "learning_rate": 7.616470744110889e-06, "loss": 0.3754, "step": 19290 }, { "epoch": 1.9612647417649451, "grad_norm": 0.3221967816352844, "learning_rate": 7.616168320680317e-06, "loss": 0.364, "step": 19291 }, { "epoch": 1.961366409109394, "grad_norm": 0.2773047685623169, "learning_rate": 7.615865884070113e-06, "loss": 0.3709, "step": 19292 }, { "epoch": 1.961468076453843, "grad_norm": 0.27064114809036255, "learning_rate": 7.6155634342817965e-06, "loss": 0.3217, "step": 19293 }, { "epoch": 1.961569743798292, "grad_norm": 0.30334898829460144, "learning_rate": 7.615260971316893e-06, "loss": 0.3715, "step": 19294 }, { "epoch": 1.961671411142741, "grad_norm": 0.2861151099205017, "learning_rate": 7.614958495176927e-06, "loss": 0.3622, "step": 19295 }, { "epoch": 1.96177307848719, "grad_norm": 0.27924731373786926, "learning_rate": 7.614656005863421e-06, "loss": 0.3629, "step": 19296 }, { "epoch": 1.961874745831639, "grad_norm": 0.27311769127845764, "learning_rate": 7.6143535033779e-06, "loss": 0.3391, "step": 19297 }, { "epoch": 1.961976413176088, "grad_norm": 0.26294082403182983, "learning_rate": 7.614050987721888e-06, "loss": 0.3596, "step": 19298 }, { "epoch": 1.962078080520537, "grad_norm": 0.301985502243042, "learning_rate": 7.613748458896907e-06, "loss": 0.3575, "step": 19299 }, { "epoch": 1.9621797478649858, "grad_norm": 0.27298933267593384, "learning_rate": 7.613445916904483e-06, "loss": 0.3594, "step": 19300 }, { "epoch": 1.9622814152094348, "grad_norm": 0.30082470178604126, "learning_rate": 7.613143361746138e-06, "loss": 0.3768, "step": 19301 }, { "epoch": 1.9623830825538837, "grad_norm": 0.29819512367248535, "learning_rate": 7.6128407934234e-06, "loss": 0.3377, "step": 19302 }, { "epoch": 1.9624847498983327, "grad_norm": 0.2599317133426666, "learning_rate": 7.61253821193779e-06, "loss": 0.3608, "step": 19303 }, { "epoch": 1.9625864172427816, "grad_norm": 0.2750687301158905, "learning_rate": 7.612235617290833e-06, "loss": 0.3629, "step": 19304 }, { "epoch": 1.9626880845872305, "grad_norm": 0.29242995381355286, "learning_rate": 7.611933009484054e-06, "loss": 0.371, "step": 19305 }, { "epoch": 1.9627897519316795, "grad_norm": 0.25356608629226685, "learning_rate": 7.611630388518977e-06, "loss": 0.3493, "step": 19306 }, { "epoch": 1.9628914192761284, "grad_norm": 0.2613098621368408, "learning_rate": 7.611327754397128e-06, "loss": 0.3737, "step": 19307 }, { "epoch": 1.9629930866205774, "grad_norm": 0.2710558772087097, "learning_rate": 7.611025107120029e-06, "loss": 0.3352, "step": 19308 }, { "epoch": 1.9630947539650263, "grad_norm": 0.2525400221347809, "learning_rate": 7.610722446689205e-06, "loss": 0.33, "step": 19309 }, { "epoch": 1.9631964213094752, "grad_norm": 0.24276527762413025, "learning_rate": 7.610419773106184e-06, "loss": 0.3462, "step": 19310 }, { "epoch": 1.9632980886539244, "grad_norm": 0.2769176959991455, "learning_rate": 7.610117086372486e-06, "loss": 0.3697, "step": 19311 }, { "epoch": 1.9633997559983734, "grad_norm": 0.2874390184879303, "learning_rate": 7.60981438648964e-06, "loss": 0.368, "step": 19312 }, { "epoch": 1.9635014233428223, "grad_norm": 0.3003738522529602, "learning_rate": 7.6095116734591715e-06, "loss": 0.3667, "step": 19313 }, { "epoch": 1.9636030906872712, "grad_norm": 0.23984749615192413, "learning_rate": 7.609208947282599e-06, "loss": 0.3467, "step": 19314 }, { "epoch": 1.9637047580317202, "grad_norm": 0.2888873815536499, "learning_rate": 7.608906207961454e-06, "loss": 0.3903, "step": 19315 }, { "epoch": 1.9638064253761693, "grad_norm": 0.27338194847106934, "learning_rate": 7.60860345549726e-06, "loss": 0.3396, "step": 19316 }, { "epoch": 1.9639080927206183, "grad_norm": 0.26125627756118774, "learning_rate": 7.60830068989154e-06, "loss": 0.3621, "step": 19317 }, { "epoch": 1.9640097600650672, "grad_norm": 0.2580707371234894, "learning_rate": 7.607997911145822e-06, "loss": 0.3204, "step": 19318 }, { "epoch": 1.9641114274095162, "grad_norm": 0.28439223766326904, "learning_rate": 7.607695119261631e-06, "loss": 0.3626, "step": 19319 }, { "epoch": 1.964213094753965, "grad_norm": 0.272012323141098, "learning_rate": 7.60739231424049e-06, "loss": 0.3868, "step": 19320 }, { "epoch": 1.964314762098414, "grad_norm": 0.26879245042800903, "learning_rate": 7.607089496083928e-06, "loss": 0.3544, "step": 19321 }, { "epoch": 1.964416429442863, "grad_norm": 0.31519222259521484, "learning_rate": 7.606786664793466e-06, "loss": 0.3759, "step": 19322 }, { "epoch": 1.964518096787312, "grad_norm": 0.26262351870536804, "learning_rate": 7.606483820370636e-06, "loss": 0.3525, "step": 19323 }, { "epoch": 1.9646197641317609, "grad_norm": 0.25768107175827026, "learning_rate": 7.606180962816956e-06, "loss": 0.3442, "step": 19324 }, { "epoch": 1.9647214314762098, "grad_norm": 0.2563973367214203, "learning_rate": 7.605878092133958e-06, "loss": 0.3931, "step": 19325 }, { "epoch": 1.9648230988206588, "grad_norm": 0.2736473083496094, "learning_rate": 7.605575208323165e-06, "loss": 0.3669, "step": 19326 }, { "epoch": 1.9649247661651077, "grad_norm": 0.299977570772171, "learning_rate": 7.605272311386103e-06, "loss": 0.3451, "step": 19327 }, { "epoch": 1.9650264335095566, "grad_norm": 0.29365861415863037, "learning_rate": 7.604969401324298e-06, "loss": 0.3485, "step": 19328 }, { "epoch": 1.9651281008540056, "grad_norm": 0.3077484965324402, "learning_rate": 7.604666478139278e-06, "loss": 0.3851, "step": 19329 }, { "epoch": 1.9652297681984545, "grad_norm": 0.27258267998695374, "learning_rate": 7.604363541832564e-06, "loss": 0.3486, "step": 19330 }, { "epoch": 1.9653314355429035, "grad_norm": 0.26388809084892273, "learning_rate": 7.604060592405689e-06, "loss": 0.3604, "step": 19331 }, { "epoch": 1.9654331028873526, "grad_norm": 0.2651573419570923, "learning_rate": 7.603757629860172e-06, "loss": 0.3547, "step": 19332 }, { "epoch": 1.9655347702318016, "grad_norm": 0.26787251234054565, "learning_rate": 7.603454654197545e-06, "loss": 0.3903, "step": 19333 }, { "epoch": 1.9656364375762505, "grad_norm": 0.28815779089927673, "learning_rate": 7.603151665419332e-06, "loss": 0.3919, "step": 19334 }, { "epoch": 1.9657381049206994, "grad_norm": 0.28942564129829407, "learning_rate": 7.602848663527058e-06, "loss": 0.3874, "step": 19335 }, { "epoch": 1.9658397722651484, "grad_norm": 0.2757797837257385, "learning_rate": 7.602545648522253e-06, "loss": 0.3519, "step": 19336 }, { "epoch": 1.9659414396095976, "grad_norm": 0.28117111325263977, "learning_rate": 7.60224262040644e-06, "loss": 0.3175, "step": 19337 }, { "epoch": 1.9660431069540465, "grad_norm": 0.28044652938842773, "learning_rate": 7.601939579181148e-06, "loss": 0.3946, "step": 19338 }, { "epoch": 1.9661447742984954, "grad_norm": 0.27085503935813904, "learning_rate": 7.601636524847902e-06, "loss": 0.3454, "step": 19339 }, { "epoch": 1.9662464416429444, "grad_norm": 0.3036467432975769, "learning_rate": 7.60133345740823e-06, "loss": 0.3515, "step": 19340 }, { "epoch": 1.9663481089873933, "grad_norm": 0.28074008226394653, "learning_rate": 7.601030376863658e-06, "loss": 0.3376, "step": 19341 }, { "epoch": 1.9664497763318423, "grad_norm": 0.27307793498039246, "learning_rate": 7.600727283215713e-06, "loss": 0.3469, "step": 19342 }, { "epoch": 1.9665514436762912, "grad_norm": 0.2825579345226288, "learning_rate": 7.600424176465924e-06, "loss": 0.34, "step": 19343 }, { "epoch": 1.9666531110207401, "grad_norm": 0.27051833271980286, "learning_rate": 7.600121056615812e-06, "loss": 0.3357, "step": 19344 }, { "epoch": 1.966754778365189, "grad_norm": 0.2837890684604645, "learning_rate": 7.59981792366691e-06, "loss": 0.3374, "step": 19345 }, { "epoch": 1.966856445709638, "grad_norm": 0.27843210101127625, "learning_rate": 7.599514777620744e-06, "loss": 0.3522, "step": 19346 }, { "epoch": 1.966958113054087, "grad_norm": 0.26735979318618774, "learning_rate": 7.599211618478839e-06, "loss": 0.3691, "step": 19347 }, { "epoch": 1.967059780398536, "grad_norm": 0.29865553975105286, "learning_rate": 7.598908446242725e-06, "loss": 0.3383, "step": 19348 }, { "epoch": 1.9671614477429848, "grad_norm": 0.27410417795181274, "learning_rate": 7.598605260913926e-06, "loss": 0.3705, "step": 19349 }, { "epoch": 1.9672631150874338, "grad_norm": 0.28155797719955444, "learning_rate": 7.598302062493972e-06, "loss": 0.387, "step": 19350 }, { "epoch": 1.9673647824318827, "grad_norm": 0.25937116146087646, "learning_rate": 7.597998850984389e-06, "loss": 0.3737, "step": 19351 }, { "epoch": 1.967466449776332, "grad_norm": 0.28177255392074585, "learning_rate": 7.597695626386706e-06, "loss": 0.3751, "step": 19352 }, { "epoch": 1.9675681171207808, "grad_norm": 0.2863805294036865, "learning_rate": 7.5973923887024506e-06, "loss": 0.3619, "step": 19353 }, { "epoch": 1.9676697844652298, "grad_norm": 0.2957145869731903, "learning_rate": 7.597089137933148e-06, "loss": 0.3615, "step": 19354 }, { "epoch": 1.9677714518096787, "grad_norm": 0.26365211606025696, "learning_rate": 7.596785874080328e-06, "loss": 0.3229, "step": 19355 }, { "epoch": 1.9678731191541277, "grad_norm": 0.26748591661453247, "learning_rate": 7.596482597145518e-06, "loss": 0.3847, "step": 19356 }, { "epoch": 1.9679747864985768, "grad_norm": 0.2984381914138794, "learning_rate": 7.596179307130245e-06, "loss": 0.339, "step": 19357 }, { "epoch": 1.9680764538430258, "grad_norm": 0.2857428193092346, "learning_rate": 7.595876004036039e-06, "loss": 0.3772, "step": 19358 }, { "epoch": 1.9681781211874747, "grad_norm": 0.2685186564922333, "learning_rate": 7.595572687864425e-06, "loss": 0.3531, "step": 19359 }, { "epoch": 1.9682797885319236, "grad_norm": 0.268301784992218, "learning_rate": 7.595269358616934e-06, "loss": 0.3448, "step": 19360 }, { "epoch": 1.9683814558763726, "grad_norm": 0.27255597710609436, "learning_rate": 7.594966016295092e-06, "loss": 0.3232, "step": 19361 }, { "epoch": 1.9684831232208215, "grad_norm": 0.2737410068511963, "learning_rate": 7.594662660900427e-06, "loss": 0.3862, "step": 19362 }, { "epoch": 1.9685847905652705, "grad_norm": 0.2605449855327606, "learning_rate": 7.59435929243447e-06, "loss": 0.3599, "step": 19363 }, { "epoch": 1.9686864579097194, "grad_norm": 0.2666490972042084, "learning_rate": 7.594055910898746e-06, "loss": 0.359, "step": 19364 }, { "epoch": 1.9687881252541684, "grad_norm": 0.2669457495212555, "learning_rate": 7.593752516294785e-06, "loss": 0.3445, "step": 19365 }, { "epoch": 1.9688897925986173, "grad_norm": 0.27430954575538635, "learning_rate": 7.593449108624116e-06, "loss": 0.355, "step": 19366 }, { "epoch": 1.9689914599430662, "grad_norm": 0.2843824028968811, "learning_rate": 7.593145687888266e-06, "loss": 0.3749, "step": 19367 }, { "epoch": 1.9690931272875152, "grad_norm": 0.2750973105430603, "learning_rate": 7.5928422540887645e-06, "loss": 0.3579, "step": 19368 }, { "epoch": 1.9691947946319641, "grad_norm": 0.28599223494529724, "learning_rate": 7.592538807227139e-06, "loss": 0.3675, "step": 19369 }, { "epoch": 1.969296461976413, "grad_norm": 0.2685045897960663, "learning_rate": 7.5922353473049195e-06, "loss": 0.3588, "step": 19370 }, { "epoch": 1.969398129320862, "grad_norm": 0.2617088854312897, "learning_rate": 7.5919318743236355e-06, "loss": 0.3298, "step": 19371 }, { "epoch": 1.969499796665311, "grad_norm": 0.2992767095565796, "learning_rate": 7.591628388284813e-06, "loss": 0.3855, "step": 19372 }, { "epoch": 1.96960146400976, "grad_norm": 0.2764100432395935, "learning_rate": 7.591324889189984e-06, "loss": 0.3558, "step": 19373 }, { "epoch": 1.969703131354209, "grad_norm": 0.2799224257469177, "learning_rate": 7.591021377040677e-06, "loss": 0.3531, "step": 19374 }, { "epoch": 1.969804798698658, "grad_norm": 0.29050320386886597, "learning_rate": 7.590717851838416e-06, "loss": 0.3251, "step": 19375 }, { "epoch": 1.969906466043107, "grad_norm": 0.2997325360774994, "learning_rate": 7.590414313584737e-06, "loss": 0.3539, "step": 19376 }, { "epoch": 1.9700081333875559, "grad_norm": 0.285515159368515, "learning_rate": 7.590110762281167e-06, "loss": 0.3518, "step": 19377 }, { "epoch": 1.970109800732005, "grad_norm": 0.27498379349708557, "learning_rate": 7.589807197929233e-06, "loss": 0.3531, "step": 19378 }, { "epoch": 1.970211468076454, "grad_norm": 0.30209487676620483, "learning_rate": 7.589503620530466e-06, "loss": 0.3599, "step": 19379 }, { "epoch": 1.970313135420903, "grad_norm": 0.29904764890670776, "learning_rate": 7.589200030086394e-06, "loss": 0.3474, "step": 19380 }, { "epoch": 1.9704148027653519, "grad_norm": 0.25233709812164307, "learning_rate": 7.588896426598549e-06, "loss": 0.3145, "step": 19381 }, { "epoch": 1.9705164701098008, "grad_norm": 0.2672663629055023, "learning_rate": 7.588592810068458e-06, "loss": 0.3277, "step": 19382 }, { "epoch": 1.9706181374542497, "grad_norm": 0.2703883647918701, "learning_rate": 7.588289180497653e-06, "loss": 0.3433, "step": 19383 }, { "epoch": 1.9707198047986987, "grad_norm": 0.2530137598514557, "learning_rate": 7.587985537887662e-06, "loss": 0.3498, "step": 19384 }, { "epoch": 1.9708214721431476, "grad_norm": 0.2851537764072418, "learning_rate": 7.587681882240012e-06, "loss": 0.3614, "step": 19385 }, { "epoch": 1.9709231394875966, "grad_norm": 0.28084152936935425, "learning_rate": 7.58737821355624e-06, "loss": 0.3735, "step": 19386 }, { "epoch": 1.9710248068320455, "grad_norm": 0.26343265175819397, "learning_rate": 7.587074531837868e-06, "loss": 0.3526, "step": 19387 }, { "epoch": 1.9711264741764944, "grad_norm": 0.30861154198646545, "learning_rate": 7.586770837086431e-06, "loss": 0.3484, "step": 19388 }, { "epoch": 1.9712281415209434, "grad_norm": 0.29486197233200073, "learning_rate": 7.586467129303458e-06, "loss": 0.3399, "step": 19389 }, { "epoch": 1.9713298088653923, "grad_norm": 0.2802548408508301, "learning_rate": 7.586163408490476e-06, "loss": 0.3792, "step": 19390 }, { "epoch": 1.9714314762098413, "grad_norm": 0.3023608922958374, "learning_rate": 7.585859674649018e-06, "loss": 0.3595, "step": 19391 }, { "epoch": 1.9715331435542902, "grad_norm": 0.2965119779109955, "learning_rate": 7.585555927780614e-06, "loss": 0.3666, "step": 19392 }, { "epoch": 1.9716348108987394, "grad_norm": 0.2902584969997406, "learning_rate": 7.585252167886794e-06, "loss": 0.3779, "step": 19393 }, { "epoch": 1.9717364782431883, "grad_norm": 0.28680020570755005, "learning_rate": 7.584948394969087e-06, "loss": 0.355, "step": 19394 }, { "epoch": 1.9718381455876373, "grad_norm": 0.3001338243484497, "learning_rate": 7.584644609029024e-06, "loss": 0.3731, "step": 19395 }, { "epoch": 1.9719398129320862, "grad_norm": 0.2873584032058716, "learning_rate": 7.584340810068137e-06, "loss": 0.3552, "step": 19396 }, { "epoch": 1.9720414802765351, "grad_norm": 0.2791505455970764, "learning_rate": 7.584036998087956e-06, "loss": 0.3758, "step": 19397 }, { "epoch": 1.9721431476209843, "grad_norm": 0.3083912134170532, "learning_rate": 7.583733173090007e-06, "loss": 0.3708, "step": 19398 }, { "epoch": 1.9722448149654332, "grad_norm": 0.2916371822357178, "learning_rate": 7.583429335075827e-06, "loss": 0.3949, "step": 19399 }, { "epoch": 1.9723464823098822, "grad_norm": 0.28037184476852417, "learning_rate": 7.5831254840469426e-06, "loss": 0.3412, "step": 19400 }, { "epoch": 1.9724481496543311, "grad_norm": 0.27539560198783875, "learning_rate": 7.582821620004888e-06, "loss": 0.3899, "step": 19401 }, { "epoch": 1.97254981699878, "grad_norm": 0.2892289459705353, "learning_rate": 7.58251774295119e-06, "loss": 0.3118, "step": 19402 }, { "epoch": 1.972651484343229, "grad_norm": 0.26426801085472107, "learning_rate": 7.582213852887381e-06, "loss": 0.3446, "step": 19403 }, { "epoch": 1.972753151687678, "grad_norm": 0.264234721660614, "learning_rate": 7.581909949814994e-06, "loss": 0.3449, "step": 19404 }, { "epoch": 1.972854819032127, "grad_norm": 0.2758341133594513, "learning_rate": 7.5816060337355555e-06, "loss": 0.3246, "step": 19405 }, { "epoch": 1.9729564863765758, "grad_norm": 0.26944127678871155, "learning_rate": 7.5813021046506015e-06, "loss": 0.3367, "step": 19406 }, { "epoch": 1.9730581537210248, "grad_norm": 0.2774844467639923, "learning_rate": 7.5809981625616614e-06, "loss": 0.3568, "step": 19407 }, { "epoch": 1.9731598210654737, "grad_norm": 0.28644630312919617, "learning_rate": 7.580694207470265e-06, "loss": 0.3462, "step": 19408 }, { "epoch": 1.9732614884099227, "grad_norm": 0.2822418212890625, "learning_rate": 7.580390239377944e-06, "loss": 0.3706, "step": 19409 }, { "epoch": 1.9733631557543716, "grad_norm": 0.2862996459007263, "learning_rate": 7.58008625828623e-06, "loss": 0.3703, "step": 19410 }, { "epoch": 1.9734648230988205, "grad_norm": 0.27097585797309875, "learning_rate": 7.5797822641966555e-06, "loss": 0.385, "step": 19411 }, { "epoch": 1.9735664904432695, "grad_norm": 0.257943719625473, "learning_rate": 7.579478257110751e-06, "loss": 0.3683, "step": 19412 }, { "epoch": 1.9736681577877184, "grad_norm": 0.278887540102005, "learning_rate": 7.579174237030047e-06, "loss": 0.3705, "step": 19413 }, { "epoch": 1.9737698251321676, "grad_norm": 0.2654563784599304, "learning_rate": 7.5788702039560766e-06, "loss": 0.3672, "step": 19414 }, { "epoch": 1.9738714924766165, "grad_norm": 0.26055216789245605, "learning_rate": 7.578566157890373e-06, "loss": 0.3263, "step": 19415 }, { "epoch": 1.9739731598210655, "grad_norm": 0.26398810744285583, "learning_rate": 7.5782620988344624e-06, "loss": 0.3467, "step": 19416 }, { "epoch": 1.9740748271655144, "grad_norm": 0.27175846695899963, "learning_rate": 7.577958026789882e-06, "loss": 0.3539, "step": 19417 }, { "epoch": 1.9741764945099634, "grad_norm": 0.2763414680957794, "learning_rate": 7.5776539417581605e-06, "loss": 0.3494, "step": 19418 }, { "epoch": 1.9742781618544125, "grad_norm": 0.2742363512516022, "learning_rate": 7.577349843740832e-06, "loss": 0.3533, "step": 19419 }, { "epoch": 1.9743798291988615, "grad_norm": 0.26522698998451233, "learning_rate": 7.5770457327394275e-06, "loss": 0.3345, "step": 19420 }, { "epoch": 1.9744814965433104, "grad_norm": 0.2747972905635834, "learning_rate": 7.576741608755478e-06, "loss": 0.3473, "step": 19421 }, { "epoch": 1.9745831638877593, "grad_norm": 0.2752835154533386, "learning_rate": 7.576437471790517e-06, "loss": 0.3148, "step": 19422 }, { "epoch": 1.9746848312322083, "grad_norm": 0.27241095900535583, "learning_rate": 7.576133321846078e-06, "loss": 0.3546, "step": 19423 }, { "epoch": 1.9747864985766572, "grad_norm": 0.28218215703964233, "learning_rate": 7.575829158923689e-06, "loss": 0.3125, "step": 19424 }, { "epoch": 1.9748881659211062, "grad_norm": 0.2603169083595276, "learning_rate": 7.575524983024887e-06, "loss": 0.3895, "step": 19425 }, { "epoch": 1.974989833265555, "grad_norm": 0.2857864499092102, "learning_rate": 7.575220794151199e-06, "loss": 0.3719, "step": 19426 }, { "epoch": 1.975091500610004, "grad_norm": 0.265106201171875, "learning_rate": 7.574916592304165e-06, "loss": 0.3339, "step": 19427 }, { "epoch": 1.975193167954453, "grad_norm": 0.25628557801246643, "learning_rate": 7.574612377485309e-06, "loss": 0.3455, "step": 19428 }, { "epoch": 1.975294835298902, "grad_norm": 0.2652638554573059, "learning_rate": 7.57430814969617e-06, "loss": 0.3826, "step": 19429 }, { "epoch": 1.9753965026433509, "grad_norm": 0.30830737948417664, "learning_rate": 7.574003908938277e-06, "loss": 0.3717, "step": 19430 }, { "epoch": 1.9754981699877998, "grad_norm": 0.2711601257324219, "learning_rate": 7.5736996552131644e-06, "loss": 0.365, "step": 19431 }, { "epoch": 1.9755998373322488, "grad_norm": 0.27281615138053894, "learning_rate": 7.573395388522365e-06, "loss": 0.3467, "step": 19432 }, { "epoch": 1.9757015046766977, "grad_norm": 0.25871238112449646, "learning_rate": 7.573091108867412e-06, "loss": 0.3437, "step": 19433 }, { "epoch": 1.9758031720211469, "grad_norm": 0.2665036916732788, "learning_rate": 7.572786816249836e-06, "loss": 0.3792, "step": 19434 }, { "epoch": 1.9759048393655958, "grad_norm": 0.2691057324409485, "learning_rate": 7.572482510671172e-06, "loss": 0.3552, "step": 19435 }, { "epoch": 1.9760065067100447, "grad_norm": 0.268883615732193, "learning_rate": 7.572178192132952e-06, "loss": 0.344, "step": 19436 }, { "epoch": 1.9761081740544937, "grad_norm": 0.27023231983184814, "learning_rate": 7.5718738606367095e-06, "loss": 0.4046, "step": 19437 }, { "epoch": 1.9762098413989426, "grad_norm": 0.2610720992088318, "learning_rate": 7.5715695161839785e-06, "loss": 0.335, "step": 19438 }, { "epoch": 1.9763115087433918, "grad_norm": 0.270494669675827, "learning_rate": 7.571265158776291e-06, "loss": 0.327, "step": 19439 }, { "epoch": 1.9764131760878407, "grad_norm": 0.2671207785606384, "learning_rate": 7.5709607884151805e-06, "loss": 0.3515, "step": 19440 }, { "epoch": 1.9765148434322897, "grad_norm": 0.2697709798812866, "learning_rate": 7.570656405102179e-06, "loss": 0.3549, "step": 19441 }, { "epoch": 1.9766165107767386, "grad_norm": 0.28093692660331726, "learning_rate": 7.5703520088388235e-06, "loss": 0.3345, "step": 19442 }, { "epoch": 1.9767181781211876, "grad_norm": 0.28227511048316956, "learning_rate": 7.570047599626644e-06, "loss": 0.3292, "step": 19443 }, { "epoch": 1.9768198454656365, "grad_norm": 0.2557263672351837, "learning_rate": 7.569743177467177e-06, "loss": 0.3659, "step": 19444 }, { "epoch": 1.9769215128100854, "grad_norm": 0.2773098349571228, "learning_rate": 7.569438742361954e-06, "loss": 0.3557, "step": 19445 }, { "epoch": 1.9770231801545344, "grad_norm": 0.2857299745082855, "learning_rate": 7.569134294312508e-06, "loss": 0.3358, "step": 19446 }, { "epoch": 1.9771248474989833, "grad_norm": 0.27732276916503906, "learning_rate": 7.568829833320376e-06, "loss": 0.3557, "step": 19447 }, { "epoch": 1.9772265148434323, "grad_norm": 0.2697017788887024, "learning_rate": 7.568525359387089e-06, "loss": 0.3586, "step": 19448 }, { "epoch": 1.9773281821878812, "grad_norm": 0.30870676040649414, "learning_rate": 7.5682208725141805e-06, "loss": 0.3731, "step": 19449 }, { "epoch": 1.9774298495323301, "grad_norm": 0.2906353771686554, "learning_rate": 7.567916372703188e-06, "loss": 0.3753, "step": 19450 }, { "epoch": 1.977531516876779, "grad_norm": 0.26808416843414307, "learning_rate": 7.567611859955639e-06, "loss": 0.3395, "step": 19451 }, { "epoch": 1.977633184221228, "grad_norm": 0.27705612778663635, "learning_rate": 7.567307334273075e-06, "loss": 0.3465, "step": 19452 }, { "epoch": 1.977734851565677, "grad_norm": 0.2645781636238098, "learning_rate": 7.5670027956570255e-06, "loss": 0.3745, "step": 19453 }, { "epoch": 1.977836518910126, "grad_norm": 0.2822703719139099, "learning_rate": 7.566698244109027e-06, "loss": 0.3603, "step": 19454 }, { "epoch": 1.977938186254575, "grad_norm": 0.2729737460613251, "learning_rate": 7.566393679630612e-06, "loss": 0.3646, "step": 19455 }, { "epoch": 1.978039853599024, "grad_norm": 0.2802335023880005, "learning_rate": 7.566089102223314e-06, "loss": 0.3249, "step": 19456 }, { "epoch": 1.978141520943473, "grad_norm": 0.25257351994514465, "learning_rate": 7.56578451188867e-06, "loss": 0.363, "step": 19457 }, { "epoch": 1.978243188287922, "grad_norm": 0.27548283338546753, "learning_rate": 7.565479908628214e-06, "loss": 0.3469, "step": 19458 }, { "epoch": 1.9783448556323708, "grad_norm": 0.2718614935874939, "learning_rate": 7.56517529244348e-06, "loss": 0.3417, "step": 19459 }, { "epoch": 1.97844652297682, "grad_norm": 0.27522164583206177, "learning_rate": 7.564870663336001e-06, "loss": 0.4034, "step": 19460 }, { "epoch": 1.978548190321269, "grad_norm": 0.25882241129875183, "learning_rate": 7.5645660213073136e-06, "loss": 0.3934, "step": 19461 }, { "epoch": 1.9786498576657179, "grad_norm": 0.27793213725090027, "learning_rate": 7.5642613663589535e-06, "loss": 0.3509, "step": 19462 }, { "epoch": 1.9787515250101668, "grad_norm": 0.25993576645851135, "learning_rate": 7.563956698492452e-06, "loss": 0.3571, "step": 19463 }, { "epoch": 1.9788531923546158, "grad_norm": 0.26807117462158203, "learning_rate": 7.563652017709348e-06, "loss": 0.37, "step": 19464 }, { "epoch": 1.9789548596990647, "grad_norm": 0.2620564103126526, "learning_rate": 7.563347324011174e-06, "loss": 0.3486, "step": 19465 }, { "epoch": 1.9790565270435136, "grad_norm": 0.29529818892478943, "learning_rate": 7.5630426173994634e-06, "loss": 0.3662, "step": 19466 }, { "epoch": 1.9791581943879626, "grad_norm": 0.2658693492412567, "learning_rate": 7.5627378978757535e-06, "loss": 0.359, "step": 19467 }, { "epoch": 1.9792598617324115, "grad_norm": 0.27492502331733704, "learning_rate": 7.562433165441581e-06, "loss": 0.3928, "step": 19468 }, { "epoch": 1.9793615290768605, "grad_norm": 0.27603861689567566, "learning_rate": 7.562128420098478e-06, "loss": 0.3615, "step": 19469 }, { "epoch": 1.9794631964213094, "grad_norm": 0.2741944193840027, "learning_rate": 7.561823661847981e-06, "loss": 0.3788, "step": 19470 }, { "epoch": 1.9795648637657584, "grad_norm": 0.2515734136104584, "learning_rate": 7.561518890691624e-06, "loss": 0.3335, "step": 19471 }, { "epoch": 1.9796665311102073, "grad_norm": 0.27244168519973755, "learning_rate": 7.561214106630945e-06, "loss": 0.345, "step": 19472 }, { "epoch": 1.9797681984546562, "grad_norm": 0.2758391201496124, "learning_rate": 7.5609093096674794e-06, "loss": 0.3288, "step": 19473 }, { "epoch": 1.9798698657991052, "grad_norm": 0.28612422943115234, "learning_rate": 7.560604499802757e-06, "loss": 0.357, "step": 19474 }, { "epoch": 1.9799715331435543, "grad_norm": 0.2941358685493469, "learning_rate": 7.5602996770383205e-06, "loss": 0.3828, "step": 19475 }, { "epoch": 1.9800732004880033, "grad_norm": 0.268374502658844, "learning_rate": 7.559994841375702e-06, "loss": 0.3208, "step": 19476 }, { "epoch": 1.9801748678324522, "grad_norm": 0.2758242189884186, "learning_rate": 7.559689992816439e-06, "loss": 0.3281, "step": 19477 }, { "epoch": 1.9802765351769012, "grad_norm": 0.27828386425971985, "learning_rate": 7.559385131362065e-06, "loss": 0.3267, "step": 19478 }, { "epoch": 1.98037820252135, "grad_norm": 0.28509241342544556, "learning_rate": 7.5590802570141175e-06, "loss": 0.3931, "step": 19479 }, { "epoch": 1.9804798698657993, "grad_norm": 0.28067123889923096, "learning_rate": 7.558775369774132e-06, "loss": 0.3489, "step": 19480 }, { "epoch": 1.9805815372102482, "grad_norm": 0.2721274495124817, "learning_rate": 7.558470469643642e-06, "loss": 0.3629, "step": 19481 }, { "epoch": 1.9806832045546972, "grad_norm": 0.2567715346813202, "learning_rate": 7.558165556624188e-06, "loss": 0.349, "step": 19482 }, { "epoch": 1.980784871899146, "grad_norm": 0.28814494609832764, "learning_rate": 7.557860630717303e-06, "loss": 0.3766, "step": 19483 }, { "epoch": 1.980886539243595, "grad_norm": 0.2823057174682617, "learning_rate": 7.557555691924524e-06, "loss": 0.3439, "step": 19484 }, { "epoch": 1.980988206588044, "grad_norm": 0.2832958400249481, "learning_rate": 7.557250740247388e-06, "loss": 0.3655, "step": 19485 }, { "epoch": 1.981089873932493, "grad_norm": 0.2915927469730377, "learning_rate": 7.556945775687428e-06, "loss": 0.343, "step": 19486 }, { "epoch": 1.9811915412769419, "grad_norm": 0.29145970940589905, "learning_rate": 7.5566407982461865e-06, "loss": 0.3516, "step": 19487 }, { "epoch": 1.9812932086213908, "grad_norm": 0.28741195797920227, "learning_rate": 7.5563358079251946e-06, "loss": 0.3568, "step": 19488 }, { "epoch": 1.9813948759658397, "grad_norm": 0.2982957661151886, "learning_rate": 7.556030804725989e-06, "loss": 0.3913, "step": 19489 }, { "epoch": 1.9814965433102887, "grad_norm": 0.258915513753891, "learning_rate": 7.55572578865011e-06, "loss": 0.3056, "step": 19490 }, { "epoch": 1.9815982106547376, "grad_norm": 0.2918756306171417, "learning_rate": 7.55542075969909e-06, "loss": 0.3559, "step": 19491 }, { "epoch": 1.9816998779991866, "grad_norm": 0.2789159417152405, "learning_rate": 7.5551157178744675e-06, "loss": 0.3566, "step": 19492 }, { "epoch": 1.9818015453436355, "grad_norm": 0.2751575708389282, "learning_rate": 7.554810663177781e-06, "loss": 0.3561, "step": 19493 }, { "epoch": 1.9819032126880844, "grad_norm": 0.261328786611557, "learning_rate": 7.554505595610563e-06, "loss": 0.3464, "step": 19494 }, { "epoch": 1.9820048800325334, "grad_norm": 0.29565170407295227, "learning_rate": 7.554200515174355e-06, "loss": 0.3829, "step": 19495 }, { "epoch": 1.9821065473769826, "grad_norm": 0.2842448055744171, "learning_rate": 7.55389542187069e-06, "loss": 0.3037, "step": 19496 }, { "epoch": 1.9822082147214315, "grad_norm": 0.26194074749946594, "learning_rate": 7.553590315701107e-06, "loss": 0.3365, "step": 19497 }, { "epoch": 1.9823098820658804, "grad_norm": 0.2581077814102173, "learning_rate": 7.553285196667144e-06, "loss": 0.3711, "step": 19498 }, { "epoch": 1.9824115494103294, "grad_norm": 0.30472248792648315, "learning_rate": 7.552980064770337e-06, "loss": 0.4043, "step": 19499 }, { "epoch": 1.9825132167547783, "grad_norm": 0.2589379549026489, "learning_rate": 7.552674920012222e-06, "loss": 0.3448, "step": 19500 }, { "epoch": 1.9826148840992275, "grad_norm": 0.2543349862098694, "learning_rate": 7.552369762394338e-06, "loss": 0.347, "step": 19501 }, { "epoch": 1.9827165514436764, "grad_norm": 0.26181212067604065, "learning_rate": 7.552064591918221e-06, "loss": 0.3417, "step": 19502 }, { "epoch": 1.9828182187881254, "grad_norm": 0.2685113251209259, "learning_rate": 7.55175940858541e-06, "loss": 0.3922, "step": 19503 }, { "epoch": 1.9829198861325743, "grad_norm": 0.2664266526699066, "learning_rate": 7.55145421239744e-06, "loss": 0.3009, "step": 19504 }, { "epoch": 1.9830215534770232, "grad_norm": 0.2869313955307007, "learning_rate": 7.5511490033558525e-06, "loss": 0.3419, "step": 19505 }, { "epoch": 1.9831232208214722, "grad_norm": 0.2667534053325653, "learning_rate": 7.55084378146218e-06, "loss": 0.3512, "step": 19506 }, { "epoch": 1.9832248881659211, "grad_norm": 0.252780556678772, "learning_rate": 7.550538546717964e-06, "loss": 0.3503, "step": 19507 }, { "epoch": 1.98332655551037, "grad_norm": 0.27111080288887024, "learning_rate": 7.550233299124741e-06, "loss": 0.3344, "step": 19508 }, { "epoch": 1.983428222854819, "grad_norm": 0.2709418833255768, "learning_rate": 7.549928038684049e-06, "loss": 0.3998, "step": 19509 }, { "epoch": 1.983529890199268, "grad_norm": 0.269461452960968, "learning_rate": 7.5496227653974245e-06, "loss": 0.3486, "step": 19510 }, { "epoch": 1.983631557543717, "grad_norm": 0.29954543709754944, "learning_rate": 7.549317479266407e-06, "loss": 0.3785, "step": 19511 }, { "epoch": 1.9837332248881658, "grad_norm": 0.2681880295276642, "learning_rate": 7.549012180292533e-06, "loss": 0.3694, "step": 19512 }, { "epoch": 1.9838348922326148, "grad_norm": 0.28457358479499817, "learning_rate": 7.548706868477342e-06, "loss": 0.3517, "step": 19513 }, { "epoch": 1.9839365595770637, "grad_norm": 0.27698642015457153, "learning_rate": 7.548401543822372e-06, "loss": 0.3952, "step": 19514 }, { "epoch": 1.9840382269215127, "grad_norm": 0.2622256278991699, "learning_rate": 7.548096206329161e-06, "loss": 0.3787, "step": 19515 }, { "epoch": 1.9841398942659618, "grad_norm": 0.25514650344848633, "learning_rate": 7.547790855999244e-06, "loss": 0.3644, "step": 19516 }, { "epoch": 1.9842415616104108, "grad_norm": 0.2731438875198364, "learning_rate": 7.5474854928341644e-06, "loss": 0.3719, "step": 19517 }, { "epoch": 1.9843432289548597, "grad_norm": 0.2651727795600891, "learning_rate": 7.547180116835459e-06, "loss": 0.3469, "step": 19518 }, { "epoch": 1.9844448962993086, "grad_norm": 0.27369698882102966, "learning_rate": 7.546874728004662e-06, "loss": 0.35, "step": 19519 }, { "epoch": 1.9845465636437576, "grad_norm": 0.2709207236766815, "learning_rate": 7.54656932634332e-06, "loss": 0.3677, "step": 19520 }, { "epoch": 1.9846482309882068, "grad_norm": 0.260431706905365, "learning_rate": 7.546263911852965e-06, "loss": 0.3558, "step": 19521 }, { "epoch": 1.9847498983326557, "grad_norm": 0.2775580585002899, "learning_rate": 7.545958484535136e-06, "loss": 0.3538, "step": 19522 }, { "epoch": 1.9848515656771046, "grad_norm": 0.2637113332748413, "learning_rate": 7.545653044391374e-06, "loss": 0.341, "step": 19523 }, { "epoch": 1.9849532330215536, "grad_norm": 0.27936476469039917, "learning_rate": 7.545347591423217e-06, "loss": 0.3406, "step": 19524 }, { "epoch": 1.9850549003660025, "grad_norm": 0.2794383764266968, "learning_rate": 7.545042125632205e-06, "loss": 0.3406, "step": 19525 }, { "epoch": 1.9851565677104515, "grad_norm": 0.27321699261665344, "learning_rate": 7.544736647019874e-06, "loss": 0.3656, "step": 19526 }, { "epoch": 1.9852582350549004, "grad_norm": 0.26994776725769043, "learning_rate": 7.5444311555877635e-06, "loss": 0.3646, "step": 19527 }, { "epoch": 1.9853599023993493, "grad_norm": 0.26637813448905945, "learning_rate": 7.544125651337416e-06, "loss": 0.3664, "step": 19528 }, { "epoch": 1.9854615697437983, "grad_norm": 0.2713051438331604, "learning_rate": 7.543820134270368e-06, "loss": 0.3841, "step": 19529 }, { "epoch": 1.9855632370882472, "grad_norm": 0.27037596702575684, "learning_rate": 7.543514604388157e-06, "loss": 0.352, "step": 19530 }, { "epoch": 1.9856649044326962, "grad_norm": 0.2654602527618408, "learning_rate": 7.543209061692325e-06, "loss": 0.3631, "step": 19531 }, { "epoch": 1.985766571777145, "grad_norm": 0.3000263571739197, "learning_rate": 7.542903506184409e-06, "loss": 0.3418, "step": 19532 }, { "epoch": 1.985868239121594, "grad_norm": 0.24749113619327545, "learning_rate": 7.54259793786595e-06, "loss": 0.3326, "step": 19533 }, { "epoch": 1.985969906466043, "grad_norm": 0.27362290024757385, "learning_rate": 7.542292356738486e-06, "loss": 0.3752, "step": 19534 }, { "epoch": 1.986071573810492, "grad_norm": 0.2923112213611603, "learning_rate": 7.541986762803559e-06, "loss": 0.3655, "step": 19535 }, { "epoch": 1.9861732411549409, "grad_norm": 0.274085134267807, "learning_rate": 7.541681156062706e-06, "loss": 0.3578, "step": 19536 }, { "epoch": 1.98627490849939, "grad_norm": 0.266832172870636, "learning_rate": 7.541375536517466e-06, "loss": 0.3581, "step": 19537 }, { "epoch": 1.986376575843839, "grad_norm": 0.26627808809280396, "learning_rate": 7.541069904169383e-06, "loss": 0.3508, "step": 19538 }, { "epoch": 1.986478243188288, "grad_norm": 0.26763007044792175, "learning_rate": 7.54076425901999e-06, "loss": 0.4086, "step": 19539 }, { "epoch": 1.9865799105327369, "grad_norm": 0.2714710533618927, "learning_rate": 7.540458601070832e-06, "loss": 0.3619, "step": 19540 }, { "epoch": 1.9866815778771858, "grad_norm": 0.298997163772583, "learning_rate": 7.54015293032345e-06, "loss": 0.3561, "step": 19541 }, { "epoch": 1.986783245221635, "grad_norm": 0.3046496510505676, "learning_rate": 7.539847246779377e-06, "loss": 0.364, "step": 19542 }, { "epoch": 1.986884912566084, "grad_norm": 0.2767415940761566, "learning_rate": 7.5395415504401595e-06, "loss": 0.3588, "step": 19543 }, { "epoch": 1.9869865799105328, "grad_norm": 0.2831740379333496, "learning_rate": 7.539235841307335e-06, "loss": 0.3895, "step": 19544 }, { "epoch": 1.9870882472549818, "grad_norm": 0.27236470580101013, "learning_rate": 7.538930119382443e-06, "loss": 0.3511, "step": 19545 }, { "epoch": 1.9871899145994307, "grad_norm": 0.31076380610466003, "learning_rate": 7.5386243846670245e-06, "loss": 0.4173, "step": 19546 }, { "epoch": 1.9872915819438797, "grad_norm": 0.29862797260284424, "learning_rate": 7.5383186371626185e-06, "loss": 0.3564, "step": 19547 }, { "epoch": 1.9873932492883286, "grad_norm": 0.30130332708358765, "learning_rate": 7.538012876870769e-06, "loss": 0.3815, "step": 19548 }, { "epoch": 1.9874949166327776, "grad_norm": 0.3002007305622101, "learning_rate": 7.5377071037930125e-06, "loss": 0.3434, "step": 19549 }, { "epoch": 1.9875965839772265, "grad_norm": 0.3132460117340088, "learning_rate": 7.5374013179308905e-06, "loss": 0.3999, "step": 19550 }, { "epoch": 1.9876982513216754, "grad_norm": 0.3012748956680298, "learning_rate": 7.537095519285944e-06, "loss": 0.3326, "step": 19551 }, { "epoch": 1.9877999186661244, "grad_norm": 0.2797011137008667, "learning_rate": 7.536789707859711e-06, "loss": 0.3316, "step": 19552 }, { "epoch": 1.9879015860105733, "grad_norm": 0.26770925521850586, "learning_rate": 7.5364838836537365e-06, "loss": 0.3418, "step": 19553 }, { "epoch": 1.9880032533550223, "grad_norm": 0.2803926467895508, "learning_rate": 7.53617804666956e-06, "loss": 0.3837, "step": 19554 }, { "epoch": 1.9881049206994712, "grad_norm": 0.2633599638938904, "learning_rate": 7.535872196908719e-06, "loss": 0.3713, "step": 19555 }, { "epoch": 1.9882065880439201, "grad_norm": 0.31173470616340637, "learning_rate": 7.535566334372758e-06, "loss": 0.354, "step": 19556 }, { "epoch": 1.9883082553883693, "grad_norm": 0.28133273124694824, "learning_rate": 7.5352604590632136e-06, "loss": 0.366, "step": 19557 }, { "epoch": 1.9884099227328182, "grad_norm": 0.27543261647224426, "learning_rate": 7.534954570981633e-06, "loss": 0.3564, "step": 19558 }, { "epoch": 1.9885115900772672, "grad_norm": 0.3055974543094635, "learning_rate": 7.5346486701295516e-06, "loss": 0.3636, "step": 19559 }, { "epoch": 1.9886132574217161, "grad_norm": 0.2902206480503082, "learning_rate": 7.534342756508513e-06, "loss": 0.3602, "step": 19560 }, { "epoch": 1.988714924766165, "grad_norm": 0.2739346921443939, "learning_rate": 7.5340368301200595e-06, "loss": 0.3963, "step": 19561 }, { "epoch": 1.9888165921106142, "grad_norm": 0.2693065404891968, "learning_rate": 7.533730890965727e-06, "loss": 0.3642, "step": 19562 }, { "epoch": 1.9889182594550632, "grad_norm": 0.27473461627960205, "learning_rate": 7.533424939047063e-06, "loss": 0.3452, "step": 19563 }, { "epoch": 1.9890199267995121, "grad_norm": 0.26176050305366516, "learning_rate": 7.533118974365608e-06, "loss": 0.3668, "step": 19564 }, { "epoch": 1.989121594143961, "grad_norm": 0.2673783004283905, "learning_rate": 7.532812996922898e-06, "loss": 0.3671, "step": 19565 }, { "epoch": 1.98922326148841, "grad_norm": 0.2705097496509552, "learning_rate": 7.53250700672048e-06, "loss": 0.3519, "step": 19566 }, { "epoch": 1.989324928832859, "grad_norm": 0.25413376092910767, "learning_rate": 7.532201003759891e-06, "loss": 0.3126, "step": 19567 }, { "epoch": 1.9894265961773079, "grad_norm": 0.2875589430332184, "learning_rate": 7.531894988042678e-06, "loss": 0.334, "step": 19568 }, { "epoch": 1.9895282635217568, "grad_norm": 0.2568243741989136, "learning_rate": 7.531588959570379e-06, "loss": 0.3495, "step": 19569 }, { "epoch": 1.9896299308662058, "grad_norm": 0.3000001907348633, "learning_rate": 7.531282918344536e-06, "loss": 0.3403, "step": 19570 }, { "epoch": 1.9897315982106547, "grad_norm": 0.2687127888202667, "learning_rate": 7.53097686436669e-06, "loss": 0.3383, "step": 19571 }, { "epoch": 1.9898332655551036, "grad_norm": 0.26093247532844543, "learning_rate": 7.530670797638386e-06, "loss": 0.3563, "step": 19572 }, { "epoch": 1.9899349328995526, "grad_norm": 0.25719866156578064, "learning_rate": 7.5303647181611625e-06, "loss": 0.3526, "step": 19573 }, { "epoch": 1.9900366002440015, "grad_norm": 0.24654661118984222, "learning_rate": 7.530058625936564e-06, "loss": 0.3497, "step": 19574 }, { "epoch": 1.9901382675884505, "grad_norm": 0.26820680499076843, "learning_rate": 7.52975252096613e-06, "loss": 0.3432, "step": 19575 }, { "epoch": 1.9902399349328994, "grad_norm": 0.2746365964412689, "learning_rate": 7.5294464032514055e-06, "loss": 0.3577, "step": 19576 }, { "epoch": 1.9903416022773484, "grad_norm": 0.2743469476699829, "learning_rate": 7.52914027279393e-06, "loss": 0.3449, "step": 19577 }, { "epoch": 1.9904432696217975, "grad_norm": 0.273258775472641, "learning_rate": 7.528834129595248e-06, "loss": 0.3615, "step": 19578 }, { "epoch": 1.9905449369662465, "grad_norm": 0.26101022958755493, "learning_rate": 7.528527973656899e-06, "loss": 0.3488, "step": 19579 }, { "epoch": 1.9906466043106954, "grad_norm": 0.27324384450912476, "learning_rate": 7.528221804980428e-06, "loss": 0.4052, "step": 19580 }, { "epoch": 1.9907482716551443, "grad_norm": 0.2681247889995575, "learning_rate": 7.5279156235673765e-06, "loss": 0.3548, "step": 19581 }, { "epoch": 1.9908499389995933, "grad_norm": 0.27524805068969727, "learning_rate": 7.5276094294192855e-06, "loss": 0.385, "step": 19582 }, { "epoch": 1.9909516063440424, "grad_norm": 0.27690601348876953, "learning_rate": 7.5273032225377e-06, "loss": 0.3543, "step": 19583 }, { "epoch": 1.9910532736884914, "grad_norm": 0.28384265303611755, "learning_rate": 7.526997002924161e-06, "loss": 0.399, "step": 19584 }, { "epoch": 1.9911549410329403, "grad_norm": 0.284220814704895, "learning_rate": 7.526690770580211e-06, "loss": 0.3334, "step": 19585 }, { "epoch": 1.9912566083773893, "grad_norm": 0.28883621096611023, "learning_rate": 7.526384525507395e-06, "loss": 0.3641, "step": 19586 }, { "epoch": 1.9913582757218382, "grad_norm": 0.2596173584461212, "learning_rate": 7.526078267707253e-06, "loss": 0.3939, "step": 19587 }, { "epoch": 1.9914599430662872, "grad_norm": 0.2541208565235138, "learning_rate": 7.525771997181329e-06, "loss": 0.3525, "step": 19588 }, { "epoch": 1.991561610410736, "grad_norm": 0.26276615262031555, "learning_rate": 7.525465713931167e-06, "loss": 0.3428, "step": 19589 }, { "epoch": 1.991663277755185, "grad_norm": 0.2741941511631012, "learning_rate": 7.525159417958308e-06, "loss": 0.3999, "step": 19590 }, { "epoch": 1.991764945099634, "grad_norm": 0.27000904083251953, "learning_rate": 7.524853109264296e-06, "loss": 0.3477, "step": 19591 }, { "epoch": 1.991866612444083, "grad_norm": 0.2871954143047333, "learning_rate": 7.524546787850674e-06, "loss": 0.3415, "step": 19592 }, { "epoch": 1.9919682797885319, "grad_norm": 0.2842455506324768, "learning_rate": 7.524240453718985e-06, "loss": 0.348, "step": 19593 }, { "epoch": 1.9920699471329808, "grad_norm": 0.27505284547805786, "learning_rate": 7.523934106870773e-06, "loss": 0.3578, "step": 19594 }, { "epoch": 1.9921716144774297, "grad_norm": 0.27755507826805115, "learning_rate": 7.52362774730758e-06, "loss": 0.3784, "step": 19595 }, { "epoch": 1.9922732818218787, "grad_norm": 0.2935277223587036, "learning_rate": 7.523321375030951e-06, "loss": 0.388, "step": 19596 }, { "epoch": 1.9923749491663276, "grad_norm": 0.2724854648113251, "learning_rate": 7.523014990042427e-06, "loss": 0.321, "step": 19597 }, { "epoch": 1.9924766165107768, "grad_norm": 0.28944915533065796, "learning_rate": 7.522708592343554e-06, "loss": 0.3662, "step": 19598 }, { "epoch": 1.9925782838552257, "grad_norm": 0.2802143394947052, "learning_rate": 7.522402181935875e-06, "loss": 0.3641, "step": 19599 }, { "epoch": 1.9926799511996747, "grad_norm": 0.26431816816329956, "learning_rate": 7.522095758820933e-06, "loss": 0.337, "step": 19600 }, { "epoch": 1.9927816185441236, "grad_norm": 0.27182120084762573, "learning_rate": 7.521789323000273e-06, "loss": 0.3301, "step": 19601 }, { "epoch": 1.9928832858885726, "grad_norm": 0.2679717242717743, "learning_rate": 7.5214828744754364e-06, "loss": 0.3479, "step": 19602 }, { "epoch": 1.9929849532330217, "grad_norm": 0.2674984633922577, "learning_rate": 7.521176413247967e-06, "loss": 0.363, "step": 19603 }, { "epoch": 1.9930866205774707, "grad_norm": 0.24954943358898163, "learning_rate": 7.520869939319412e-06, "loss": 0.3386, "step": 19604 }, { "epoch": 1.9931882879219196, "grad_norm": 0.2918853461742401, "learning_rate": 7.5205634526913115e-06, "loss": 0.3601, "step": 19605 }, { "epoch": 1.9932899552663685, "grad_norm": 0.2861446440219879, "learning_rate": 7.5202569533652125e-06, "loss": 0.3411, "step": 19606 }, { "epoch": 1.9933916226108175, "grad_norm": 0.2693787217140198, "learning_rate": 7.519950441342658e-06, "loss": 0.3793, "step": 19607 }, { "epoch": 1.9934932899552664, "grad_norm": 0.289743572473526, "learning_rate": 7.519643916625192e-06, "loss": 0.3695, "step": 19608 }, { "epoch": 1.9935949572997154, "grad_norm": 0.26091688871383667, "learning_rate": 7.519337379214359e-06, "loss": 0.3071, "step": 19609 }, { "epoch": 1.9936966246441643, "grad_norm": 0.2710237205028534, "learning_rate": 7.519030829111702e-06, "loss": 0.3326, "step": 19610 }, { "epoch": 1.9937982919886132, "grad_norm": 0.27739858627319336, "learning_rate": 7.518724266318767e-06, "loss": 0.3821, "step": 19611 }, { "epoch": 1.9938999593330622, "grad_norm": 0.301286518573761, "learning_rate": 7.518417690837098e-06, "loss": 0.3594, "step": 19612 }, { "epoch": 1.9940016266775111, "grad_norm": 0.27294978499412537, "learning_rate": 7.518111102668238e-06, "loss": 0.3074, "step": 19613 }, { "epoch": 1.99410329402196, "grad_norm": 0.2693938910961151, "learning_rate": 7.517804501813732e-06, "loss": 0.3577, "step": 19614 }, { "epoch": 1.994204961366409, "grad_norm": 0.27079612016677856, "learning_rate": 7.517497888275127e-06, "loss": 0.3519, "step": 19615 }, { "epoch": 1.994306628710858, "grad_norm": 0.25530868768692017, "learning_rate": 7.517191262053967e-06, "loss": 0.3655, "step": 19616 }, { "epoch": 1.994408296055307, "grad_norm": 0.27121126651763916, "learning_rate": 7.516884623151793e-06, "loss": 0.3528, "step": 19617 }, { "epoch": 1.9945099633997558, "grad_norm": 0.28022316098213196, "learning_rate": 7.516577971570152e-06, "loss": 0.3603, "step": 19618 }, { "epoch": 1.994611630744205, "grad_norm": 0.2750369608402252, "learning_rate": 7.516271307310592e-06, "loss": 0.3773, "step": 19619 }, { "epoch": 1.994713298088654, "grad_norm": 0.30036914348602295, "learning_rate": 7.515964630374654e-06, "loss": 0.3979, "step": 19620 }, { "epoch": 1.9948149654331029, "grad_norm": 0.2523811459541321, "learning_rate": 7.515657940763883e-06, "loss": 0.3472, "step": 19621 }, { "epoch": 1.9949166327775518, "grad_norm": 0.2530333995819092, "learning_rate": 7.515351238479827e-06, "loss": 0.3265, "step": 19622 }, { "epoch": 1.9950183001220008, "grad_norm": 0.2902931869029999, "learning_rate": 7.515044523524027e-06, "loss": 0.3539, "step": 19623 }, { "epoch": 1.99511996746645, "grad_norm": 0.29221946001052856, "learning_rate": 7.5147377958980305e-06, "loss": 0.345, "step": 19624 }, { "epoch": 1.9952216348108989, "grad_norm": 0.2874942421913147, "learning_rate": 7.514431055603384e-06, "loss": 0.3342, "step": 19625 }, { "epoch": 1.9953233021553478, "grad_norm": 0.28015345335006714, "learning_rate": 7.514124302641631e-06, "loss": 0.3816, "step": 19626 }, { "epoch": 1.9954249694997968, "grad_norm": 0.2858276665210724, "learning_rate": 7.513817537014319e-06, "loss": 0.342, "step": 19627 }, { "epoch": 1.9955266368442457, "grad_norm": 0.2807672917842865, "learning_rate": 7.513510758722987e-06, "loss": 0.3635, "step": 19628 }, { "epoch": 1.9956283041886946, "grad_norm": 0.28036215901374817, "learning_rate": 7.513203967769188e-06, "loss": 0.36, "step": 19629 }, { "epoch": 1.9957299715331436, "grad_norm": 0.2682492136955261, "learning_rate": 7.512897164154465e-06, "loss": 0.3499, "step": 19630 }, { "epoch": 1.9958316388775925, "grad_norm": 0.31192174553871155, "learning_rate": 7.512590347880362e-06, "loss": 0.3766, "step": 19631 }, { "epoch": 1.9959333062220415, "grad_norm": 0.2801617980003357, "learning_rate": 7.512283518948428e-06, "loss": 0.3306, "step": 19632 }, { "epoch": 1.9960349735664904, "grad_norm": 0.2726544737815857, "learning_rate": 7.511976677360204e-06, "loss": 0.3747, "step": 19633 }, { "epoch": 1.9961366409109393, "grad_norm": 0.28049731254577637, "learning_rate": 7.51166982311724e-06, "loss": 0.3364, "step": 19634 }, { "epoch": 1.9962383082553883, "grad_norm": 0.2883530855178833, "learning_rate": 7.51136295622108e-06, "loss": 0.3525, "step": 19635 }, { "epoch": 1.9963399755998372, "grad_norm": 0.2775711417198181, "learning_rate": 7.5110560766732695e-06, "loss": 0.3803, "step": 19636 }, { "epoch": 1.9964416429442862, "grad_norm": 0.2661595344543457, "learning_rate": 7.510749184475356e-06, "loss": 0.3555, "step": 19637 }, { "epoch": 1.996543310288735, "grad_norm": 0.25348371267318726, "learning_rate": 7.5104422796288846e-06, "loss": 0.3536, "step": 19638 }, { "epoch": 1.9966449776331843, "grad_norm": 0.25891298055648804, "learning_rate": 7.510135362135402e-06, "loss": 0.3601, "step": 19639 }, { "epoch": 1.9967466449776332, "grad_norm": 0.2811260223388672, "learning_rate": 7.509828431996453e-06, "loss": 0.3559, "step": 19640 }, { "epoch": 1.9968483123220822, "grad_norm": 0.2597295641899109, "learning_rate": 7.509521489213586e-06, "loss": 0.3582, "step": 19641 }, { "epoch": 1.996949979666531, "grad_norm": 0.27431759238243103, "learning_rate": 7.5092145337883456e-06, "loss": 0.3498, "step": 19642 }, { "epoch": 1.99705164701098, "grad_norm": 0.27352553606033325, "learning_rate": 7.508907565722279e-06, "loss": 0.3761, "step": 19643 }, { "epoch": 1.9971533143554292, "grad_norm": 0.29827359318733215, "learning_rate": 7.508600585016931e-06, "loss": 0.38, "step": 19644 }, { "epoch": 1.9972549816998781, "grad_norm": 0.28914931416511536, "learning_rate": 7.508293591673849e-06, "loss": 0.3683, "step": 19645 }, { "epoch": 1.997356649044327, "grad_norm": 0.26952064037323, "learning_rate": 7.507986585694582e-06, "loss": 0.3379, "step": 19646 }, { "epoch": 1.997458316388776, "grad_norm": 0.28276002407073975, "learning_rate": 7.507679567080674e-06, "loss": 0.3798, "step": 19647 }, { "epoch": 1.997559983733225, "grad_norm": 0.29805782437324524, "learning_rate": 7.507372535833672e-06, "loss": 0.3642, "step": 19648 }, { "epoch": 1.997661651077674, "grad_norm": 0.26925498247146606, "learning_rate": 7.507065491955122e-06, "loss": 0.34, "step": 19649 }, { "epoch": 1.9977633184221228, "grad_norm": 0.2730850577354431, "learning_rate": 7.506758435446573e-06, "loss": 0.3982, "step": 19650 }, { "epoch": 1.9978649857665718, "grad_norm": 0.2706328332424164, "learning_rate": 7.50645136630957e-06, "loss": 0.3748, "step": 19651 }, { "epoch": 1.9979666531110207, "grad_norm": 0.27611303329467773, "learning_rate": 7.506144284545662e-06, "loss": 0.384, "step": 19652 }, { "epoch": 1.9980683204554697, "grad_norm": 0.2572900056838989, "learning_rate": 7.505837190156393e-06, "loss": 0.3437, "step": 19653 }, { "epoch": 1.9981699877999186, "grad_norm": 0.2485494762659073, "learning_rate": 7.505530083143313e-06, "loss": 0.35, "step": 19654 }, { "epoch": 1.9982716551443676, "grad_norm": 0.278634637594223, "learning_rate": 7.5052229635079675e-06, "loss": 0.3443, "step": 19655 }, { "epoch": 1.9983733224888165, "grad_norm": 0.2655584514141083, "learning_rate": 7.504915831251904e-06, "loss": 0.3829, "step": 19656 }, { "epoch": 1.9984749898332654, "grad_norm": 0.2660953998565674, "learning_rate": 7.50460868637667e-06, "loss": 0.3755, "step": 19657 }, { "epoch": 1.9985766571777144, "grad_norm": 0.2664497494697571, "learning_rate": 7.504301528883812e-06, "loss": 0.3328, "step": 19658 }, { "epoch": 1.9986783245221633, "grad_norm": 0.2765304744243622, "learning_rate": 7.503994358774877e-06, "loss": 0.3477, "step": 19659 }, { "epoch": 1.9987799918666125, "grad_norm": 0.28387290239334106, "learning_rate": 7.503687176051415e-06, "loss": 0.3799, "step": 19660 }, { "epoch": 1.9988816592110614, "grad_norm": 0.26903945207595825, "learning_rate": 7.503379980714972e-06, "loss": 0.3686, "step": 19661 }, { "epoch": 1.9989833265555104, "grad_norm": 0.24582423269748688, "learning_rate": 7.503072772767096e-06, "loss": 0.316, "step": 19662 }, { "epoch": 1.9990849938999593, "grad_norm": 0.26507192850112915, "learning_rate": 7.502765552209332e-06, "loss": 0.3723, "step": 19663 }, { "epoch": 1.9991866612444082, "grad_norm": 0.26475462317466736, "learning_rate": 7.502458319043232e-06, "loss": 0.3139, "step": 19664 }, { "epoch": 1.9992883285888574, "grad_norm": 0.2949521243572235, "learning_rate": 7.502151073270341e-06, "loss": 0.413, "step": 19665 }, { "epoch": 1.9993899959333064, "grad_norm": 0.2688848674297333, "learning_rate": 7.501843814892207e-06, "loss": 0.3868, "step": 19666 }, { "epoch": 1.9994916632777553, "grad_norm": 0.2713521122932434, "learning_rate": 7.501536543910379e-06, "loss": 0.3584, "step": 19667 }, { "epoch": 1.9995933306222042, "grad_norm": 0.2698347866535187, "learning_rate": 7.501229260326405e-06, "loss": 0.3713, "step": 19668 }, { "epoch": 1.9996949979666532, "grad_norm": 0.26568537950515747, "learning_rate": 7.5009219641418315e-06, "loss": 0.3677, "step": 19669 }, { "epoch": 1.9997966653111021, "grad_norm": 0.2781897187232971, "learning_rate": 7.500614655358207e-06, "loss": 0.3439, "step": 19670 }, { "epoch": 1.999898332655551, "grad_norm": 0.26793697476387024, "learning_rate": 7.5003073339770815e-06, "loss": 0.3308, "step": 19671 }, { "epoch": 2.0, "grad_norm": 0.2665131688117981, "learning_rate": 7.500000000000001e-06, "loss": 0.3709, "step": 19672 }, { "epoch": 2.000101667344449, "grad_norm": 0.2559494376182556, "learning_rate": 7.499692653428514e-06, "loss": 0.3113, "step": 19673 }, { "epoch": 2.000203334688898, "grad_norm": 0.2610194683074951, "learning_rate": 7.49938529426417e-06, "loss": 0.3302, "step": 19674 }, { "epoch": 2.000305002033347, "grad_norm": 0.267328679561615, "learning_rate": 7.499077922508517e-06, "loss": 0.3675, "step": 19675 }, { "epoch": 2.0004066693777958, "grad_norm": 0.2634742558002472, "learning_rate": 7.498770538163103e-06, "loss": 0.3065, "step": 19676 }, { "epoch": 2.0005083367222447, "grad_norm": 0.2978297770023346, "learning_rate": 7.498463141229478e-06, "loss": 0.3477, "step": 19677 }, { "epoch": 2.0006100040666936, "grad_norm": 0.27406227588653564, "learning_rate": 7.498155731709188e-06, "loss": 0.3177, "step": 19678 }, { "epoch": 2.0007116714111426, "grad_norm": 0.29066574573516846, "learning_rate": 7.497848309603782e-06, "loss": 0.3276, "step": 19679 }, { "epoch": 2.0008133387555915, "grad_norm": 0.2861016094684601, "learning_rate": 7.497540874914813e-06, "loss": 0.3203, "step": 19680 }, { "epoch": 2.0009150061000405, "grad_norm": 0.27882590889930725, "learning_rate": 7.497233427643824e-06, "loss": 0.3409, "step": 19681 }, { "epoch": 2.0010166734444894, "grad_norm": 0.2820242941379547, "learning_rate": 7.496925967792368e-06, "loss": 0.3086, "step": 19682 }, { "epoch": 2.001118340788939, "grad_norm": 0.27334392070770264, "learning_rate": 7.496618495361992e-06, "loss": 0.3396, "step": 19683 }, { "epoch": 2.0012200081333877, "grad_norm": 0.2606047987937927, "learning_rate": 7.496311010354243e-06, "loss": 0.3448, "step": 19684 }, { "epoch": 2.0013216754778367, "grad_norm": 0.2697124481201172, "learning_rate": 7.496003512770675e-06, "loss": 0.3321, "step": 19685 }, { "epoch": 2.0014233428222856, "grad_norm": 0.27266794443130493, "learning_rate": 7.495696002612833e-06, "loss": 0.3192, "step": 19686 }, { "epoch": 2.0015250101667346, "grad_norm": 0.2636544704437256, "learning_rate": 7.495388479882269e-06, "loss": 0.3463, "step": 19687 }, { "epoch": 2.0016266775111835, "grad_norm": 0.27144235372543335, "learning_rate": 7.495080944580531e-06, "loss": 0.3496, "step": 19688 }, { "epoch": 2.0017283448556324, "grad_norm": 0.28311651945114136, "learning_rate": 7.494773396709166e-06, "loss": 0.3504, "step": 19689 }, { "epoch": 2.0018300122000814, "grad_norm": 0.26670005917549133, "learning_rate": 7.494465836269728e-06, "loss": 0.3213, "step": 19690 }, { "epoch": 2.0019316795445303, "grad_norm": 0.2688846290111542, "learning_rate": 7.494158263263762e-06, "loss": 0.3177, "step": 19691 }, { "epoch": 2.0020333468889793, "grad_norm": 0.2833654284477234, "learning_rate": 7.49385067769282e-06, "loss": 0.3338, "step": 19692 }, { "epoch": 2.002135014233428, "grad_norm": 0.2790255844593048, "learning_rate": 7.493543079558452e-06, "loss": 0.328, "step": 19693 }, { "epoch": 2.002236681577877, "grad_norm": 0.27067798376083374, "learning_rate": 7.493235468862203e-06, "loss": 0.3135, "step": 19694 }, { "epoch": 2.002338348922326, "grad_norm": 0.2710864245891571, "learning_rate": 7.49292784560563e-06, "loss": 0.3391, "step": 19695 }, { "epoch": 2.002440016266775, "grad_norm": 0.29110434651374817, "learning_rate": 7.4926202097902775e-06, "loss": 0.322, "step": 19696 }, { "epoch": 2.002541683611224, "grad_norm": 0.27403879165649414, "learning_rate": 7.492312561417697e-06, "loss": 0.3603, "step": 19697 }, { "epoch": 2.002643350955673, "grad_norm": 0.26941925287246704, "learning_rate": 7.492004900489438e-06, "loss": 0.3495, "step": 19698 }, { "epoch": 2.002745018300122, "grad_norm": 0.2711637020111084, "learning_rate": 7.49169722700705e-06, "loss": 0.3499, "step": 19699 }, { "epoch": 2.002846685644571, "grad_norm": 0.29865556955337524, "learning_rate": 7.4913895409720835e-06, "loss": 0.3148, "step": 19700 }, { "epoch": 2.0029483529890197, "grad_norm": 0.2896521985530853, "learning_rate": 7.49108184238609e-06, "loss": 0.3559, "step": 19701 }, { "epoch": 2.0030500203334687, "grad_norm": 0.2619451582431793, "learning_rate": 7.4907741312506165e-06, "loss": 0.3492, "step": 19702 }, { "epoch": 2.003151687677918, "grad_norm": 0.2664721608161926, "learning_rate": 7.490466407567216e-06, "loss": 0.3372, "step": 19703 }, { "epoch": 2.003253355022367, "grad_norm": 0.2992296814918518, "learning_rate": 7.4901586713374355e-06, "loss": 0.3417, "step": 19704 }, { "epoch": 2.003355022366816, "grad_norm": 0.2922840416431427, "learning_rate": 7.489850922562829e-06, "loss": 0.3497, "step": 19705 }, { "epoch": 2.003456689711265, "grad_norm": 0.26403969526290894, "learning_rate": 7.489543161244945e-06, "loss": 0.325, "step": 19706 }, { "epoch": 2.003558357055714, "grad_norm": 0.30683591961860657, "learning_rate": 7.4892353873853356e-06, "loss": 0.341, "step": 19707 }, { "epoch": 2.0036600244001628, "grad_norm": 0.2866138219833374, "learning_rate": 7.488927600985548e-06, "loss": 0.3353, "step": 19708 }, { "epoch": 2.0037616917446117, "grad_norm": 0.2889030873775482, "learning_rate": 7.488619802047134e-06, "loss": 0.34, "step": 19709 }, { "epoch": 2.0038633590890607, "grad_norm": 0.2773015797138214, "learning_rate": 7.488311990571646e-06, "loss": 0.3388, "step": 19710 }, { "epoch": 2.0039650264335096, "grad_norm": 0.26376697421073914, "learning_rate": 7.488004166560634e-06, "loss": 0.332, "step": 19711 }, { "epoch": 2.0040666937779585, "grad_norm": 0.2921769320964813, "learning_rate": 7.487696330015648e-06, "loss": 0.3503, "step": 19712 }, { "epoch": 2.0041683611224075, "grad_norm": 0.2824597954750061, "learning_rate": 7.487388480938239e-06, "loss": 0.3344, "step": 19713 }, { "epoch": 2.0042700284668564, "grad_norm": 0.27233949303627014, "learning_rate": 7.487080619329957e-06, "loss": 0.3197, "step": 19714 }, { "epoch": 2.0043716958113054, "grad_norm": 0.2830701172351837, "learning_rate": 7.486772745192354e-06, "loss": 0.3179, "step": 19715 }, { "epoch": 2.0044733631557543, "grad_norm": 0.2712413966655731, "learning_rate": 7.486464858526982e-06, "loss": 0.3255, "step": 19716 }, { "epoch": 2.0045750305002032, "grad_norm": 0.2615457773208618, "learning_rate": 7.48615695933539e-06, "loss": 0.352, "step": 19717 }, { "epoch": 2.004676697844652, "grad_norm": 0.3140283524990082, "learning_rate": 7.48584904761913e-06, "loss": 0.3529, "step": 19718 }, { "epoch": 2.004778365189101, "grad_norm": 0.30923885107040405, "learning_rate": 7.485541123379754e-06, "loss": 0.3608, "step": 19719 }, { "epoch": 2.00488003253355, "grad_norm": 0.28526821732521057, "learning_rate": 7.485233186618811e-06, "loss": 0.3077, "step": 19720 }, { "epoch": 2.004981699877999, "grad_norm": 0.2692435681819916, "learning_rate": 7.484925237337855e-06, "loss": 0.3338, "step": 19721 }, { "epoch": 2.005083367222448, "grad_norm": 0.27004286646842957, "learning_rate": 7.484617275538436e-06, "loss": 0.3158, "step": 19722 }, { "epoch": 2.005185034566897, "grad_norm": 0.282153457403183, "learning_rate": 7.484309301222106e-06, "loss": 0.3535, "step": 19723 }, { "epoch": 2.0052867019113463, "grad_norm": 0.28403717279434204, "learning_rate": 7.4840013143904154e-06, "loss": 0.3025, "step": 19724 }, { "epoch": 2.0053883692557952, "grad_norm": 0.2847992479801178, "learning_rate": 7.483693315044916e-06, "loss": 0.359, "step": 19725 }, { "epoch": 2.005490036600244, "grad_norm": 0.26240551471710205, "learning_rate": 7.483385303187162e-06, "loss": 0.3308, "step": 19726 }, { "epoch": 2.005591703944693, "grad_norm": 0.2613714635372162, "learning_rate": 7.483077278818701e-06, "loss": 0.295, "step": 19727 }, { "epoch": 2.005693371289142, "grad_norm": 0.29133960604667664, "learning_rate": 7.482769241941088e-06, "loss": 0.3245, "step": 19728 }, { "epoch": 2.005795038633591, "grad_norm": 0.30533280968666077, "learning_rate": 7.482461192555872e-06, "loss": 0.3527, "step": 19729 }, { "epoch": 2.00589670597804, "grad_norm": 0.2701137065887451, "learning_rate": 7.482153130664607e-06, "loss": 0.3263, "step": 19730 }, { "epoch": 2.005998373322489, "grad_norm": 0.28368040919303894, "learning_rate": 7.481845056268846e-06, "loss": 0.3381, "step": 19731 }, { "epoch": 2.006100040666938, "grad_norm": 0.2723349332809448, "learning_rate": 7.481536969370138e-06, "loss": 0.3217, "step": 19732 }, { "epoch": 2.0062017080113868, "grad_norm": 0.2879738509654999, "learning_rate": 7.481228869970037e-06, "loss": 0.3371, "step": 19733 }, { "epoch": 2.0063033753558357, "grad_norm": 0.27231401205062866, "learning_rate": 7.480920758070093e-06, "loss": 0.3386, "step": 19734 }, { "epoch": 2.0064050427002846, "grad_norm": 0.27385836839675903, "learning_rate": 7.48061263367186e-06, "loss": 0.3148, "step": 19735 }, { "epoch": 2.0065067100447336, "grad_norm": 0.2656877636909485, "learning_rate": 7.480304496776892e-06, "loss": 0.3533, "step": 19736 }, { "epoch": 2.0066083773891825, "grad_norm": 0.27673614025115967, "learning_rate": 7.479996347386737e-06, "loss": 0.3431, "step": 19737 }, { "epoch": 2.0067100447336315, "grad_norm": 0.2591179609298706, "learning_rate": 7.4796881855029525e-06, "loss": 0.3219, "step": 19738 }, { "epoch": 2.0068117120780804, "grad_norm": 0.2604687511920929, "learning_rate": 7.479380011127087e-06, "loss": 0.3329, "step": 19739 }, { "epoch": 2.0069133794225293, "grad_norm": 0.26077115535736084, "learning_rate": 7.479071824260693e-06, "loss": 0.3313, "step": 19740 }, { "epoch": 2.0070150467669783, "grad_norm": 0.2637123167514801, "learning_rate": 7.478763624905325e-06, "loss": 0.3339, "step": 19741 }, { "epoch": 2.0071167141114272, "grad_norm": 0.2639327943325043, "learning_rate": 7.478455413062535e-06, "loss": 0.3597, "step": 19742 }, { "epoch": 2.007218381455876, "grad_norm": 0.29107773303985596, "learning_rate": 7.478147188733876e-06, "loss": 0.3028, "step": 19743 }, { "epoch": 2.0073200488003256, "grad_norm": 0.2826443910598755, "learning_rate": 7.4778389519209e-06, "loss": 0.3584, "step": 19744 }, { "epoch": 2.0074217161447745, "grad_norm": 0.27088555693626404, "learning_rate": 7.47753070262516e-06, "loss": 0.3182, "step": 19745 }, { "epoch": 2.0075233834892234, "grad_norm": 0.2668166160583496, "learning_rate": 7.47722244084821e-06, "loss": 0.3417, "step": 19746 }, { "epoch": 2.0076250508336724, "grad_norm": 0.27202534675598145, "learning_rate": 7.476914166591601e-06, "loss": 0.3444, "step": 19747 }, { "epoch": 2.0077267181781213, "grad_norm": 0.27119114995002747, "learning_rate": 7.476605879856888e-06, "loss": 0.3278, "step": 19748 }, { "epoch": 2.0078283855225703, "grad_norm": 0.2679954469203949, "learning_rate": 7.476297580645622e-06, "loss": 0.3202, "step": 19749 }, { "epoch": 2.007930052867019, "grad_norm": 0.28021690249443054, "learning_rate": 7.475989268959357e-06, "loss": 0.3797, "step": 19750 }, { "epoch": 2.008031720211468, "grad_norm": 0.29119184613227844, "learning_rate": 7.475680944799648e-06, "loss": 0.3654, "step": 19751 }, { "epoch": 2.008133387555917, "grad_norm": 0.2760576009750366, "learning_rate": 7.475372608168046e-06, "loss": 0.3396, "step": 19752 }, { "epoch": 2.008235054900366, "grad_norm": 0.3034658133983612, "learning_rate": 7.475064259066105e-06, "loss": 0.362, "step": 19753 }, { "epoch": 2.008336722244815, "grad_norm": 0.28808319568634033, "learning_rate": 7.474755897495378e-06, "loss": 0.3308, "step": 19754 }, { "epoch": 2.008438389589264, "grad_norm": 0.31020358204841614, "learning_rate": 7.474447523457419e-06, "loss": 0.3646, "step": 19755 }, { "epoch": 2.008540056933713, "grad_norm": 0.28902506828308105, "learning_rate": 7.474139136953782e-06, "loss": 0.3518, "step": 19756 }, { "epoch": 2.008641724278162, "grad_norm": 0.3097357749938965, "learning_rate": 7.473830737986019e-06, "loss": 0.3293, "step": 19757 }, { "epoch": 2.0087433916226107, "grad_norm": 0.311779648065567, "learning_rate": 7.473522326555685e-06, "loss": 0.332, "step": 19758 }, { "epoch": 2.0088450589670597, "grad_norm": 0.3060608208179474, "learning_rate": 7.4732139026643336e-06, "loss": 0.37, "step": 19759 }, { "epoch": 2.0089467263115086, "grad_norm": 0.2865106463432312, "learning_rate": 7.4729054663135184e-06, "loss": 0.361, "step": 19760 }, { "epoch": 2.0090483936559576, "grad_norm": 0.3177827000617981, "learning_rate": 7.4725970175047934e-06, "loss": 0.3741, "step": 19761 }, { "epoch": 2.0091500610004065, "grad_norm": 0.30072957277297974, "learning_rate": 7.47228855623971e-06, "loss": 0.3335, "step": 19762 }, { "epoch": 2.0092517283448554, "grad_norm": 0.288580060005188, "learning_rate": 7.471980082519828e-06, "loss": 0.3217, "step": 19763 }, { "epoch": 2.0093533956893044, "grad_norm": 0.28781068325042725, "learning_rate": 7.471671596346695e-06, "loss": 0.3689, "step": 19764 }, { "epoch": 2.0094550630337538, "grad_norm": 0.2798904478549957, "learning_rate": 7.471363097721869e-06, "loss": 0.3222, "step": 19765 }, { "epoch": 2.0095567303782027, "grad_norm": 0.3182898461818695, "learning_rate": 7.471054586646903e-06, "loss": 0.3347, "step": 19766 }, { "epoch": 2.0096583977226516, "grad_norm": 0.2956169843673706, "learning_rate": 7.470746063123351e-06, "loss": 0.322, "step": 19767 }, { "epoch": 2.0097600650671006, "grad_norm": 0.2728760838508606, "learning_rate": 7.470437527152767e-06, "loss": 0.3002, "step": 19768 }, { "epoch": 2.0098617324115495, "grad_norm": 0.2672533094882965, "learning_rate": 7.470128978736706e-06, "loss": 0.3025, "step": 19769 }, { "epoch": 2.0099633997559985, "grad_norm": 0.2762434184551239, "learning_rate": 7.469820417876722e-06, "loss": 0.3201, "step": 19770 }, { "epoch": 2.0100650671004474, "grad_norm": 0.32984042167663574, "learning_rate": 7.469511844574371e-06, "loss": 0.3288, "step": 19771 }, { "epoch": 2.0101667344448964, "grad_norm": 0.29380130767822266, "learning_rate": 7.4692032588312055e-06, "loss": 0.3427, "step": 19772 }, { "epoch": 2.0102684017893453, "grad_norm": 0.2815975248813629, "learning_rate": 7.46889466064878e-06, "loss": 0.3527, "step": 19773 }, { "epoch": 2.0103700691337942, "grad_norm": 0.26627588272094727, "learning_rate": 7.46858605002865e-06, "loss": 0.3347, "step": 19774 }, { "epoch": 2.010471736478243, "grad_norm": 0.2950794994831085, "learning_rate": 7.46827742697237e-06, "loss": 0.3325, "step": 19775 }, { "epoch": 2.010573403822692, "grad_norm": 0.28858816623687744, "learning_rate": 7.467968791481496e-06, "loss": 0.3219, "step": 19776 }, { "epoch": 2.010675071167141, "grad_norm": 0.26791831851005554, "learning_rate": 7.467660143557583e-06, "loss": 0.3419, "step": 19777 }, { "epoch": 2.01077673851159, "grad_norm": 0.30544567108154297, "learning_rate": 7.467351483202181e-06, "loss": 0.308, "step": 19778 }, { "epoch": 2.010878405856039, "grad_norm": 0.29534533619880676, "learning_rate": 7.46704281041685e-06, "loss": 0.3181, "step": 19779 }, { "epoch": 2.010980073200488, "grad_norm": 0.2841753363609314, "learning_rate": 7.466734125203143e-06, "loss": 0.3083, "step": 19780 }, { "epoch": 2.011081740544937, "grad_norm": 0.32532402873039246, "learning_rate": 7.466425427562618e-06, "loss": 0.3403, "step": 19781 }, { "epoch": 2.0111834078893858, "grad_norm": 0.28988561034202576, "learning_rate": 7.466116717496826e-06, "loss": 0.367, "step": 19782 }, { "epoch": 2.0112850752338347, "grad_norm": 0.2963544428348541, "learning_rate": 7.4658079950073235e-06, "loss": 0.3091, "step": 19783 }, { "epoch": 2.0113867425782836, "grad_norm": 0.28373587131500244, "learning_rate": 7.4654992600956675e-06, "loss": 0.3033, "step": 19784 }, { "epoch": 2.011488409922733, "grad_norm": 0.3020053207874298, "learning_rate": 7.465190512763412e-06, "loss": 0.3728, "step": 19785 }, { "epoch": 2.011590077267182, "grad_norm": 0.2761256694793701, "learning_rate": 7.464881753012112e-06, "loss": 0.3601, "step": 19786 }, { "epoch": 2.011691744611631, "grad_norm": 0.2779301702976227, "learning_rate": 7.464572980843324e-06, "loss": 0.3326, "step": 19787 }, { "epoch": 2.01179341195608, "grad_norm": 0.28650712966918945, "learning_rate": 7.4642641962586025e-06, "loss": 0.3689, "step": 19788 }, { "epoch": 2.011895079300529, "grad_norm": 0.3004181683063507, "learning_rate": 7.4639553992595035e-06, "loss": 0.3035, "step": 19789 }, { "epoch": 2.0119967466449777, "grad_norm": 0.28662025928497314, "learning_rate": 7.463646589847584e-06, "loss": 0.3161, "step": 19790 }, { "epoch": 2.0120984139894267, "grad_norm": 0.2604750692844391, "learning_rate": 7.463337768024397e-06, "loss": 0.3253, "step": 19791 }, { "epoch": 2.0122000813338756, "grad_norm": 0.25328364968299866, "learning_rate": 7.463028933791501e-06, "loss": 0.3131, "step": 19792 }, { "epoch": 2.0123017486783246, "grad_norm": 0.27074524760246277, "learning_rate": 7.462720087150448e-06, "loss": 0.338, "step": 19793 }, { "epoch": 2.0124034160227735, "grad_norm": 0.30870312452316284, "learning_rate": 7.4624112281028e-06, "loss": 0.322, "step": 19794 }, { "epoch": 2.0125050833672224, "grad_norm": 0.2903730571269989, "learning_rate": 7.462102356650106e-06, "loss": 0.3224, "step": 19795 }, { "epoch": 2.0126067507116714, "grad_norm": 0.29374003410339355, "learning_rate": 7.461793472793927e-06, "loss": 0.2967, "step": 19796 }, { "epoch": 2.0127084180561203, "grad_norm": 0.25756171345710754, "learning_rate": 7.461484576535818e-06, "loss": 0.3436, "step": 19797 }, { "epoch": 2.0128100854005693, "grad_norm": 0.26965203881263733, "learning_rate": 7.461175667877334e-06, "loss": 0.3206, "step": 19798 }, { "epoch": 2.012911752745018, "grad_norm": 0.2873956263065338, "learning_rate": 7.460866746820032e-06, "loss": 0.3404, "step": 19799 }, { "epoch": 2.013013420089467, "grad_norm": 0.295170396566391, "learning_rate": 7.460557813365468e-06, "loss": 0.3323, "step": 19800 }, { "epoch": 2.013115087433916, "grad_norm": 0.28461191058158875, "learning_rate": 7.460248867515197e-06, "loss": 0.3053, "step": 19801 }, { "epoch": 2.013216754778365, "grad_norm": 0.2787421941757202, "learning_rate": 7.459939909270779e-06, "loss": 0.3174, "step": 19802 }, { "epoch": 2.013318422122814, "grad_norm": 0.2698934078216553, "learning_rate": 7.459630938633767e-06, "loss": 0.334, "step": 19803 }, { "epoch": 2.013420089467263, "grad_norm": 0.2722932696342468, "learning_rate": 7.459321955605721e-06, "loss": 0.3732, "step": 19804 }, { "epoch": 2.013521756811712, "grad_norm": 0.27290746569633484, "learning_rate": 7.4590129601881934e-06, "loss": 0.31, "step": 19805 }, { "epoch": 2.0136234241561612, "grad_norm": 0.2843509912490845, "learning_rate": 7.458703952382742e-06, "loss": 0.327, "step": 19806 }, { "epoch": 2.01372509150061, "grad_norm": 0.28828001022338867, "learning_rate": 7.458394932190927e-06, "loss": 0.3176, "step": 19807 }, { "epoch": 2.013826758845059, "grad_norm": 0.2636360228061676, "learning_rate": 7.4580858996143e-06, "loss": 0.3222, "step": 19808 }, { "epoch": 2.013928426189508, "grad_norm": 0.28634050488471985, "learning_rate": 7.457776854654423e-06, "loss": 0.3322, "step": 19809 }, { "epoch": 2.014030093533957, "grad_norm": 0.26274701952934265, "learning_rate": 7.457467797312848e-06, "loss": 0.3357, "step": 19810 }, { "epoch": 2.014131760878406, "grad_norm": 0.29470065236091614, "learning_rate": 7.457158727591135e-06, "loss": 0.3175, "step": 19811 }, { "epoch": 2.014233428222855, "grad_norm": 0.2762458324432373, "learning_rate": 7.456849645490841e-06, "loss": 0.3366, "step": 19812 }, { "epoch": 2.014335095567304, "grad_norm": 0.2815287709236145, "learning_rate": 7.456540551013521e-06, "loss": 0.3198, "step": 19813 }, { "epoch": 2.0144367629117528, "grad_norm": 0.27215781807899475, "learning_rate": 7.4562314441607355e-06, "loss": 0.2976, "step": 19814 }, { "epoch": 2.0145384302562017, "grad_norm": 0.26843130588531494, "learning_rate": 7.455922324934037e-06, "loss": 0.3236, "step": 19815 }, { "epoch": 2.0146400976006507, "grad_norm": 0.30088284611701965, "learning_rate": 7.455613193334987e-06, "loss": 0.3261, "step": 19816 }, { "epoch": 2.0147417649450996, "grad_norm": 0.260233074426651, "learning_rate": 7.455304049365142e-06, "loss": 0.3408, "step": 19817 }, { "epoch": 2.0148434322895485, "grad_norm": 0.2776186466217041, "learning_rate": 7.454994893026056e-06, "loss": 0.3464, "step": 19818 }, { "epoch": 2.0149450996339975, "grad_norm": 0.2705894112586975, "learning_rate": 7.454685724319292e-06, "loss": 0.3091, "step": 19819 }, { "epoch": 2.0150467669784464, "grad_norm": 0.27930405735969543, "learning_rate": 7.454376543246403e-06, "loss": 0.3201, "step": 19820 }, { "epoch": 2.0151484343228954, "grad_norm": 0.27519387006759644, "learning_rate": 7.454067349808949e-06, "loss": 0.3349, "step": 19821 }, { "epoch": 2.0152501016673443, "grad_norm": 0.27374547719955444, "learning_rate": 7.453758144008486e-06, "loss": 0.3388, "step": 19822 }, { "epoch": 2.0153517690117932, "grad_norm": 0.2639886140823364, "learning_rate": 7.453448925846574e-06, "loss": 0.3594, "step": 19823 }, { "epoch": 2.015453436356242, "grad_norm": 0.28567156195640564, "learning_rate": 7.453139695324768e-06, "loss": 0.3489, "step": 19824 }, { "epoch": 2.015555103700691, "grad_norm": 0.24538521468639374, "learning_rate": 7.452830452444627e-06, "loss": 0.312, "step": 19825 }, { "epoch": 2.0156567710451405, "grad_norm": 0.2803558111190796, "learning_rate": 7.452521197207709e-06, "loss": 0.2959, "step": 19826 }, { "epoch": 2.0157584383895895, "grad_norm": 0.2537277936935425, "learning_rate": 7.4522119296155735e-06, "loss": 0.3267, "step": 19827 }, { "epoch": 2.0158601057340384, "grad_norm": 0.27601099014282227, "learning_rate": 7.451902649669776e-06, "loss": 0.3454, "step": 19828 }, { "epoch": 2.0159617730784873, "grad_norm": 0.293903648853302, "learning_rate": 7.4515933573718766e-06, "loss": 0.3261, "step": 19829 }, { "epoch": 2.0160634404229363, "grad_norm": 0.28439390659332275, "learning_rate": 7.451284052723431e-06, "loss": 0.3288, "step": 19830 }, { "epoch": 2.0161651077673852, "grad_norm": 0.2788034975528717, "learning_rate": 7.450974735726e-06, "loss": 0.3664, "step": 19831 }, { "epoch": 2.016266775111834, "grad_norm": 0.2769289016723633, "learning_rate": 7.45066540638114e-06, "loss": 0.3414, "step": 19832 }, { "epoch": 2.016368442456283, "grad_norm": 0.305596262216568, "learning_rate": 7.45035606469041e-06, "loss": 0.3288, "step": 19833 }, { "epoch": 2.016470109800732, "grad_norm": 0.2742573022842407, "learning_rate": 7.45004671065537e-06, "loss": 0.3513, "step": 19834 }, { "epoch": 2.016571777145181, "grad_norm": 0.2741946280002594, "learning_rate": 7.449737344277574e-06, "loss": 0.3197, "step": 19835 }, { "epoch": 2.01667344448963, "grad_norm": 0.2908695638179779, "learning_rate": 7.449427965558585e-06, "loss": 0.354, "step": 19836 }, { "epoch": 2.016775111834079, "grad_norm": 0.2844820022583008, "learning_rate": 7.449118574499961e-06, "loss": 0.3441, "step": 19837 }, { "epoch": 2.016876779178528, "grad_norm": 0.28383174538612366, "learning_rate": 7.4488091711032585e-06, "loss": 0.3731, "step": 19838 }, { "epoch": 2.0169784465229768, "grad_norm": 0.28419116139411926, "learning_rate": 7.448499755370038e-06, "loss": 0.338, "step": 19839 }, { "epoch": 2.0170801138674257, "grad_norm": 0.28327497839927673, "learning_rate": 7.4481903273018566e-06, "loss": 0.3427, "step": 19840 }, { "epoch": 2.0171817812118746, "grad_norm": 0.2851791977882385, "learning_rate": 7.447880886900274e-06, "loss": 0.357, "step": 19841 }, { "epoch": 2.0172834485563236, "grad_norm": 0.31374403834342957, "learning_rate": 7.447571434166851e-06, "loss": 0.3708, "step": 19842 }, { "epoch": 2.0173851159007725, "grad_norm": 0.27983358502388, "learning_rate": 7.4472619691031435e-06, "loss": 0.3376, "step": 19843 }, { "epoch": 2.0174867832452215, "grad_norm": 0.28023040294647217, "learning_rate": 7.446952491710712e-06, "loss": 0.3302, "step": 19844 }, { "epoch": 2.0175884505896704, "grad_norm": 0.3135841488838196, "learning_rate": 7.446643001991114e-06, "loss": 0.3301, "step": 19845 }, { "epoch": 2.0176901179341193, "grad_norm": 0.29690736532211304, "learning_rate": 7.446333499945911e-06, "loss": 0.3703, "step": 19846 }, { "epoch": 2.0177917852785687, "grad_norm": 0.29554474353790283, "learning_rate": 7.446023985576662e-06, "loss": 0.3815, "step": 19847 }, { "epoch": 2.0178934526230177, "grad_norm": 0.298819363117218, "learning_rate": 7.445714458884925e-06, "loss": 0.3399, "step": 19848 }, { "epoch": 2.0179951199674666, "grad_norm": 0.3090037405490875, "learning_rate": 7.445404919872259e-06, "loss": 0.346, "step": 19849 }, { "epoch": 2.0180967873119156, "grad_norm": 0.2553383409976959, "learning_rate": 7.445095368540225e-06, "loss": 0.3145, "step": 19850 }, { "epoch": 2.0181984546563645, "grad_norm": 0.26325523853302, "learning_rate": 7.44478580489038e-06, "loss": 0.3664, "step": 19851 }, { "epoch": 2.0183001220008134, "grad_norm": 0.2772153615951538, "learning_rate": 7.444476228924287e-06, "loss": 0.3297, "step": 19852 }, { "epoch": 2.0184017893452624, "grad_norm": 0.2820109724998474, "learning_rate": 7.444166640643504e-06, "loss": 0.3326, "step": 19853 }, { "epoch": 2.0185034566897113, "grad_norm": 0.2834419310092926, "learning_rate": 7.443857040049588e-06, "loss": 0.3207, "step": 19854 }, { "epoch": 2.0186051240341603, "grad_norm": 0.27813631296157837, "learning_rate": 7.4435474271441025e-06, "loss": 0.3218, "step": 19855 }, { "epoch": 2.018706791378609, "grad_norm": 0.26687371730804443, "learning_rate": 7.443237801928604e-06, "loss": 0.3186, "step": 19856 }, { "epoch": 2.018808458723058, "grad_norm": 0.26986223459243774, "learning_rate": 7.442928164404657e-06, "loss": 0.3224, "step": 19857 }, { "epoch": 2.018910126067507, "grad_norm": 0.2695758640766144, "learning_rate": 7.442618514573817e-06, "loss": 0.3068, "step": 19858 }, { "epoch": 2.019011793411956, "grad_norm": 0.2719757854938507, "learning_rate": 7.442308852437645e-06, "loss": 0.3046, "step": 19859 }, { "epoch": 2.019113460756405, "grad_norm": 0.28415390849113464, "learning_rate": 7.4419991779977006e-06, "loss": 0.3408, "step": 19860 }, { "epoch": 2.019215128100854, "grad_norm": 0.29675230383872986, "learning_rate": 7.441689491255546e-06, "loss": 0.3292, "step": 19861 }, { "epoch": 2.019316795445303, "grad_norm": 0.2731279134750366, "learning_rate": 7.44137979221274e-06, "loss": 0.3253, "step": 19862 }, { "epoch": 2.019418462789752, "grad_norm": 0.27209919691085815, "learning_rate": 7.4410700808708436e-06, "loss": 0.3576, "step": 19863 }, { "epoch": 2.0195201301342007, "grad_norm": 0.2918727397918701, "learning_rate": 7.4407603572314145e-06, "loss": 0.3283, "step": 19864 }, { "epoch": 2.0196217974786497, "grad_norm": 0.29694369435310364, "learning_rate": 7.440450621296015e-06, "loss": 0.3352, "step": 19865 }, { "epoch": 2.0197234648230986, "grad_norm": 0.2688201367855072, "learning_rate": 7.440140873066206e-06, "loss": 0.3362, "step": 19866 }, { "epoch": 2.019825132167548, "grad_norm": 0.2824976146221161, "learning_rate": 7.439831112543547e-06, "loss": 0.3337, "step": 19867 }, { "epoch": 2.019926799511997, "grad_norm": 0.28609955310821533, "learning_rate": 7.439521339729599e-06, "loss": 0.3415, "step": 19868 }, { "epoch": 2.020028466856446, "grad_norm": 0.2777491509914398, "learning_rate": 7.43921155462592e-06, "loss": 0.3183, "step": 19869 }, { "epoch": 2.020130134200895, "grad_norm": 0.2741713225841522, "learning_rate": 7.438901757234074e-06, "loss": 0.3268, "step": 19870 }, { "epoch": 2.0202318015453438, "grad_norm": 0.27627211809158325, "learning_rate": 7.438591947555621e-06, "loss": 0.3916, "step": 19871 }, { "epoch": 2.0203334688897927, "grad_norm": 0.2603740990161896, "learning_rate": 7.438282125592121e-06, "loss": 0.3361, "step": 19872 }, { "epoch": 2.0204351362342416, "grad_norm": 0.2823377847671509, "learning_rate": 7.437972291345136e-06, "loss": 0.3288, "step": 19873 }, { "epoch": 2.0205368035786906, "grad_norm": 0.2802189290523529, "learning_rate": 7.437662444816225e-06, "loss": 0.3578, "step": 19874 }, { "epoch": 2.0206384709231395, "grad_norm": 0.2733452320098877, "learning_rate": 7.43735258600695e-06, "loss": 0.3321, "step": 19875 }, { "epoch": 2.0207401382675885, "grad_norm": 0.25843140482902527, "learning_rate": 7.437042714918871e-06, "loss": 0.3146, "step": 19876 }, { "epoch": 2.0208418056120374, "grad_norm": 0.29599639773368835, "learning_rate": 7.436732831553552e-06, "loss": 0.341, "step": 19877 }, { "epoch": 2.0209434729564864, "grad_norm": 0.28059980273246765, "learning_rate": 7.4364229359125505e-06, "loss": 0.3278, "step": 19878 }, { "epoch": 2.0210451403009353, "grad_norm": 0.2682357132434845, "learning_rate": 7.436113027997429e-06, "loss": 0.3233, "step": 19879 }, { "epoch": 2.0211468076453842, "grad_norm": 0.2638145983219147, "learning_rate": 7.43580310780975e-06, "loss": 0.33, "step": 19880 }, { "epoch": 2.021248474989833, "grad_norm": 0.26840564608573914, "learning_rate": 7.435493175351074e-06, "loss": 0.3446, "step": 19881 }, { "epoch": 2.021350142334282, "grad_norm": 0.2935432195663452, "learning_rate": 7.435183230622961e-06, "loss": 0.3619, "step": 19882 }, { "epoch": 2.021451809678731, "grad_norm": 0.26791608333587646, "learning_rate": 7.434873273626974e-06, "loss": 0.3566, "step": 19883 }, { "epoch": 2.02155347702318, "grad_norm": 0.2848019301891327, "learning_rate": 7.434563304364675e-06, "loss": 0.3521, "step": 19884 }, { "epoch": 2.021655144367629, "grad_norm": 0.26481959223747253, "learning_rate": 7.434253322837623e-06, "loss": 0.3203, "step": 19885 }, { "epoch": 2.021756811712078, "grad_norm": 0.3150217831134796, "learning_rate": 7.433943329047381e-06, "loss": 0.3451, "step": 19886 }, { "epoch": 2.021858479056527, "grad_norm": 0.28834155201911926, "learning_rate": 7.4336333229955126e-06, "loss": 0.3324, "step": 19887 }, { "epoch": 2.021960146400976, "grad_norm": 0.2572198510169983, "learning_rate": 7.433323304683577e-06, "loss": 0.3322, "step": 19888 }, { "epoch": 2.022061813745425, "grad_norm": 0.3024367690086365, "learning_rate": 7.433013274113137e-06, "loss": 0.3391, "step": 19889 }, { "epoch": 2.022163481089874, "grad_norm": 0.32255420088768005, "learning_rate": 7.432703231285755e-06, "loss": 0.3108, "step": 19890 }, { "epoch": 2.022265148434323, "grad_norm": 0.27867335081100464, "learning_rate": 7.4323931762029914e-06, "loss": 0.3472, "step": 19891 }, { "epoch": 2.022366815778772, "grad_norm": 0.279178649187088, "learning_rate": 7.432083108866408e-06, "loss": 0.3508, "step": 19892 }, { "epoch": 2.022468483123221, "grad_norm": 0.27094268798828125, "learning_rate": 7.43177302927757e-06, "loss": 0.3477, "step": 19893 }, { "epoch": 2.02257015046767, "grad_norm": 0.2679741382598877, "learning_rate": 7.431462937438037e-06, "loss": 0.3241, "step": 19894 }, { "epoch": 2.022671817812119, "grad_norm": 0.30414310097694397, "learning_rate": 7.431152833349371e-06, "loss": 0.3459, "step": 19895 }, { "epoch": 2.0227734851565677, "grad_norm": 0.2912982702255249, "learning_rate": 7.430842717013134e-06, "loss": 0.3348, "step": 19896 }, { "epoch": 2.0228751525010167, "grad_norm": 0.29610520601272583, "learning_rate": 7.4305325884308885e-06, "loss": 0.3102, "step": 19897 }, { "epoch": 2.0229768198454656, "grad_norm": 0.2849031984806061, "learning_rate": 7.4302224476042e-06, "loss": 0.3466, "step": 19898 }, { "epoch": 2.0230784871899146, "grad_norm": 0.2680743634700775, "learning_rate": 7.429912294534626e-06, "loss": 0.317, "step": 19899 }, { "epoch": 2.0231801545343635, "grad_norm": 0.2725289762020111, "learning_rate": 7.429602129223733e-06, "loss": 0.3804, "step": 19900 }, { "epoch": 2.0232818218788124, "grad_norm": 0.2817048132419586, "learning_rate": 7.42929195167308e-06, "loss": 0.3421, "step": 19901 }, { "epoch": 2.0233834892232614, "grad_norm": 0.2635151445865631, "learning_rate": 7.428981761884232e-06, "loss": 0.3261, "step": 19902 }, { "epoch": 2.0234851565677103, "grad_norm": 0.2708628475666046, "learning_rate": 7.428671559858752e-06, "loss": 0.3436, "step": 19903 }, { "epoch": 2.0235868239121593, "grad_norm": 0.260925829410553, "learning_rate": 7.4283613455982015e-06, "loss": 0.3157, "step": 19904 }, { "epoch": 2.023688491256608, "grad_norm": 0.27964940667152405, "learning_rate": 7.428051119104144e-06, "loss": 0.3437, "step": 19905 }, { "epoch": 2.023790158601057, "grad_norm": 0.24886204302310944, "learning_rate": 7.42774088037814e-06, "loss": 0.321, "step": 19906 }, { "epoch": 2.023891825945506, "grad_norm": 0.28814497590065, "learning_rate": 7.427430629421757e-06, "loss": 0.3563, "step": 19907 }, { "epoch": 2.0239934932899555, "grad_norm": 0.2923535406589508, "learning_rate": 7.427120366236554e-06, "loss": 0.3274, "step": 19908 }, { "epoch": 2.0240951606344044, "grad_norm": 0.28720977902412415, "learning_rate": 7.426810090824095e-06, "loss": 0.3083, "step": 19909 }, { "epoch": 2.0241968279788534, "grad_norm": 0.2706294655799866, "learning_rate": 7.426499803185945e-06, "loss": 0.3219, "step": 19910 }, { "epoch": 2.0242984953233023, "grad_norm": 0.2562430500984192, "learning_rate": 7.426189503323664e-06, "loss": 0.3174, "step": 19911 }, { "epoch": 2.0244001626677512, "grad_norm": 0.2784916162490845, "learning_rate": 7.425879191238816e-06, "loss": 0.3227, "step": 19912 }, { "epoch": 2.0245018300122, "grad_norm": 0.29245975613594055, "learning_rate": 7.425568866932969e-06, "loss": 0.3379, "step": 19913 }, { "epoch": 2.024603497356649, "grad_norm": 0.28935515880584717, "learning_rate": 7.425258530407679e-06, "loss": 0.3447, "step": 19914 }, { "epoch": 2.024705164701098, "grad_norm": 0.29706624150276184, "learning_rate": 7.424948181664513e-06, "loss": 0.3028, "step": 19915 }, { "epoch": 2.024806832045547, "grad_norm": 0.25542524456977844, "learning_rate": 7.424637820705035e-06, "loss": 0.3256, "step": 19916 }, { "epoch": 2.024908499389996, "grad_norm": 0.3053114712238312, "learning_rate": 7.424327447530808e-06, "loss": 0.31, "step": 19917 }, { "epoch": 2.025010166734445, "grad_norm": 0.2640821039676666, "learning_rate": 7.424017062143396e-06, "loss": 0.3016, "step": 19918 }, { "epoch": 2.025111834078894, "grad_norm": 0.27784672379493713, "learning_rate": 7.423706664544361e-06, "loss": 0.3058, "step": 19919 }, { "epoch": 2.0252135014233428, "grad_norm": 0.25053635239601135, "learning_rate": 7.423396254735269e-06, "loss": 0.345, "step": 19920 }, { "epoch": 2.0253151687677917, "grad_norm": 0.29351645708084106, "learning_rate": 7.423085832717681e-06, "loss": 0.3456, "step": 19921 }, { "epoch": 2.0254168361122407, "grad_norm": 0.28330832719802856, "learning_rate": 7.422775398493163e-06, "loss": 0.3193, "step": 19922 }, { "epoch": 2.0255185034566896, "grad_norm": 0.2756003141403198, "learning_rate": 7.42246495206328e-06, "loss": 0.3286, "step": 19923 }, { "epoch": 2.0256201708011385, "grad_norm": 0.2612514793872833, "learning_rate": 7.422154493429592e-06, "loss": 0.3312, "step": 19924 }, { "epoch": 2.0257218381455875, "grad_norm": 0.27267420291900635, "learning_rate": 7.421844022593666e-06, "loss": 0.3221, "step": 19925 }, { "epoch": 2.0258235054900364, "grad_norm": 0.271736741065979, "learning_rate": 7.4215335395570646e-06, "loss": 0.2967, "step": 19926 }, { "epoch": 2.0259251728344854, "grad_norm": 0.302700936794281, "learning_rate": 7.421223044321353e-06, "loss": 0.3267, "step": 19927 }, { "epoch": 2.0260268401789343, "grad_norm": 0.2798515260219574, "learning_rate": 7.420912536888096e-06, "loss": 0.3164, "step": 19928 }, { "epoch": 2.0261285075233837, "grad_norm": 0.2607036232948303, "learning_rate": 7.420602017258858e-06, "loss": 0.3355, "step": 19929 }, { "epoch": 2.0262301748678326, "grad_norm": 0.2934202253818512, "learning_rate": 7.420291485435199e-06, "loss": 0.3231, "step": 19930 }, { "epoch": 2.0263318422122816, "grad_norm": 0.2864862382411957, "learning_rate": 7.419980941418689e-06, "loss": 0.36, "step": 19931 }, { "epoch": 2.0264335095567305, "grad_norm": 0.278128445148468, "learning_rate": 7.419670385210888e-06, "loss": 0.3624, "step": 19932 }, { "epoch": 2.0265351769011795, "grad_norm": 0.29132309556007385, "learning_rate": 7.4193598168133655e-06, "loss": 0.3641, "step": 19933 }, { "epoch": 2.0266368442456284, "grad_norm": 0.30786147713661194, "learning_rate": 7.419049236227682e-06, "loss": 0.3308, "step": 19934 }, { "epoch": 2.0267385115900773, "grad_norm": 0.27076882123947144, "learning_rate": 7.418738643455402e-06, "loss": 0.3389, "step": 19935 }, { "epoch": 2.0268401789345263, "grad_norm": 0.28383898735046387, "learning_rate": 7.418428038498092e-06, "loss": 0.3181, "step": 19936 }, { "epoch": 2.0269418462789752, "grad_norm": 0.2754775285720825, "learning_rate": 7.418117421357317e-06, "loss": 0.3452, "step": 19937 }, { "epoch": 2.027043513623424, "grad_norm": 0.2696172893047333, "learning_rate": 7.417806792034641e-06, "loss": 0.3214, "step": 19938 }, { "epoch": 2.027145180967873, "grad_norm": 0.2784556448459625, "learning_rate": 7.417496150531629e-06, "loss": 0.3365, "step": 19939 }, { "epoch": 2.027246848312322, "grad_norm": 0.2649492025375366, "learning_rate": 7.417185496849846e-06, "loss": 0.3287, "step": 19940 }, { "epoch": 2.027348515656771, "grad_norm": 0.2817789614200592, "learning_rate": 7.416874830990855e-06, "loss": 0.3443, "step": 19941 }, { "epoch": 2.02745018300122, "grad_norm": 0.2993023097515106, "learning_rate": 7.416564152956225e-06, "loss": 0.3127, "step": 19942 }, { "epoch": 2.027551850345669, "grad_norm": 0.26941514015197754, "learning_rate": 7.416253462747518e-06, "loss": 0.3381, "step": 19943 }, { "epoch": 2.027653517690118, "grad_norm": 0.2727583944797516, "learning_rate": 7.415942760366302e-06, "loss": 0.3293, "step": 19944 }, { "epoch": 2.0277551850345668, "grad_norm": 0.2954673767089844, "learning_rate": 7.415632045814138e-06, "loss": 0.3247, "step": 19945 }, { "epoch": 2.0278568523790157, "grad_norm": 0.28217563033103943, "learning_rate": 7.415321319092596e-06, "loss": 0.3086, "step": 19946 }, { "epoch": 2.0279585197234646, "grad_norm": 0.2507857382297516, "learning_rate": 7.415010580203238e-06, "loss": 0.323, "step": 19947 }, { "epoch": 2.0280601870679136, "grad_norm": 0.2621062099933624, "learning_rate": 7.41469982914763e-06, "loss": 0.3274, "step": 19948 }, { "epoch": 2.028161854412363, "grad_norm": 0.3053457736968994, "learning_rate": 7.41438906592734e-06, "loss": 0.3513, "step": 19949 }, { "epoch": 2.028263521756812, "grad_norm": 0.2959136962890625, "learning_rate": 7.41407829054393e-06, "loss": 0.3254, "step": 19950 }, { "epoch": 2.028365189101261, "grad_norm": 0.26753315329551697, "learning_rate": 7.413767502998968e-06, "loss": 0.3294, "step": 19951 }, { "epoch": 2.02846685644571, "grad_norm": 0.2982180714607239, "learning_rate": 7.413456703294018e-06, "loss": 0.3263, "step": 19952 }, { "epoch": 2.0285685237901587, "grad_norm": 0.2811172902584076, "learning_rate": 7.413145891430648e-06, "loss": 0.3196, "step": 19953 }, { "epoch": 2.0286701911346077, "grad_norm": 0.2944713234901428, "learning_rate": 7.412835067410422e-06, "loss": 0.3403, "step": 19954 }, { "epoch": 2.0287718584790566, "grad_norm": 0.27858975529670715, "learning_rate": 7.412524231234905e-06, "loss": 0.3158, "step": 19955 }, { "epoch": 2.0288735258235056, "grad_norm": 0.27854835987091064, "learning_rate": 7.412213382905665e-06, "loss": 0.3255, "step": 19956 }, { "epoch": 2.0289751931679545, "grad_norm": 0.2722752094268799, "learning_rate": 7.411902522424268e-06, "loss": 0.3283, "step": 19957 }, { "epoch": 2.0290768605124034, "grad_norm": 0.27900612354278564, "learning_rate": 7.411591649792278e-06, "loss": 0.3373, "step": 19958 }, { "epoch": 2.0291785278568524, "grad_norm": 0.28174102306365967, "learning_rate": 7.411280765011264e-06, "loss": 0.3128, "step": 19959 }, { "epoch": 2.0292801952013013, "grad_norm": 0.2871170938014984, "learning_rate": 7.410969868082789e-06, "loss": 0.3622, "step": 19960 }, { "epoch": 2.0293818625457503, "grad_norm": 0.3019619286060333, "learning_rate": 7.410658959008421e-06, "loss": 0.3246, "step": 19961 }, { "epoch": 2.029483529890199, "grad_norm": 0.284076064825058, "learning_rate": 7.4103480377897275e-06, "loss": 0.3267, "step": 19962 }, { "epoch": 2.029585197234648, "grad_norm": 0.28750818967819214, "learning_rate": 7.410037104428272e-06, "loss": 0.3344, "step": 19963 }, { "epoch": 2.029686864579097, "grad_norm": 0.2742035388946533, "learning_rate": 7.409726158925623e-06, "loss": 0.3106, "step": 19964 }, { "epoch": 2.029788531923546, "grad_norm": 0.2825578451156616, "learning_rate": 7.409415201283345e-06, "loss": 0.3375, "step": 19965 }, { "epoch": 2.029890199267995, "grad_norm": 0.2935093939304352, "learning_rate": 7.409104231503006e-06, "loss": 0.3364, "step": 19966 }, { "epoch": 2.029991866612444, "grad_norm": 0.31625720858573914, "learning_rate": 7.4087932495861736e-06, "loss": 0.2917, "step": 19967 }, { "epoch": 2.030093533956893, "grad_norm": 0.28505024313926697, "learning_rate": 7.4084822555344126e-06, "loss": 0.3218, "step": 19968 }, { "epoch": 2.030195201301342, "grad_norm": 0.26176080107688904, "learning_rate": 7.40817124934929e-06, "loss": 0.3422, "step": 19969 }, { "epoch": 2.030296868645791, "grad_norm": 0.27801212668418884, "learning_rate": 7.407860231032374e-06, "loss": 0.3666, "step": 19970 }, { "epoch": 2.03039853599024, "grad_norm": 0.2655383348464966, "learning_rate": 7.407549200585229e-06, "loss": 0.3327, "step": 19971 }, { "epoch": 2.030500203334689, "grad_norm": 0.282000333070755, "learning_rate": 7.407238158009423e-06, "loss": 0.3133, "step": 19972 }, { "epoch": 2.030601870679138, "grad_norm": 0.287891685962677, "learning_rate": 7.406927103306525e-06, "loss": 0.3493, "step": 19973 }, { "epoch": 2.030703538023587, "grad_norm": 0.26038989424705505, "learning_rate": 7.406616036478099e-06, "loss": 0.3046, "step": 19974 }, { "epoch": 2.030805205368036, "grad_norm": 0.27316024899482727, "learning_rate": 7.4063049575257125e-06, "loss": 0.3636, "step": 19975 }, { "epoch": 2.030906872712485, "grad_norm": 0.270311176776886, "learning_rate": 7.405993866450935e-06, "loss": 0.3321, "step": 19976 }, { "epoch": 2.0310085400569338, "grad_norm": 0.29690420627593994, "learning_rate": 7.405682763255332e-06, "loss": 0.3181, "step": 19977 }, { "epoch": 2.0311102074013827, "grad_norm": 0.2683732211589813, "learning_rate": 7.4053716479404705e-06, "loss": 0.3146, "step": 19978 }, { "epoch": 2.0312118747458316, "grad_norm": 0.28563693165779114, "learning_rate": 7.405060520507919e-06, "loss": 0.3388, "step": 19979 }, { "epoch": 2.0313135420902806, "grad_norm": 0.2851477265357971, "learning_rate": 7.4047493809592415e-06, "loss": 0.3396, "step": 19980 }, { "epoch": 2.0314152094347295, "grad_norm": 0.2712664306163788, "learning_rate": 7.404438229296011e-06, "loss": 0.334, "step": 19981 }, { "epoch": 2.0315168767791785, "grad_norm": 0.27099844813346863, "learning_rate": 7.404127065519791e-06, "loss": 0.3472, "step": 19982 }, { "epoch": 2.0316185441236274, "grad_norm": 0.30091458559036255, "learning_rate": 7.40381588963215e-06, "loss": 0.3243, "step": 19983 }, { "epoch": 2.0317202114680764, "grad_norm": 0.2645602524280548, "learning_rate": 7.403504701634656e-06, "loss": 0.3213, "step": 19984 }, { "epoch": 2.0318218788125253, "grad_norm": 0.2880666255950928, "learning_rate": 7.403193501528877e-06, "loss": 0.3253, "step": 19985 }, { "epoch": 2.0319235461569742, "grad_norm": 0.29195883870124817, "learning_rate": 7.40288228931638e-06, "loss": 0.3406, "step": 19986 }, { "epoch": 2.032025213501423, "grad_norm": 0.27432823181152344, "learning_rate": 7.4025710649987325e-06, "loss": 0.3073, "step": 19987 }, { "epoch": 2.032126880845872, "grad_norm": 0.2679094672203064, "learning_rate": 7.402259828577503e-06, "loss": 0.3432, "step": 19988 }, { "epoch": 2.032228548190321, "grad_norm": 0.2663065493106842, "learning_rate": 7.40194858005426e-06, "loss": 0.3404, "step": 19989 }, { "epoch": 2.0323302155347704, "grad_norm": 0.28089696168899536, "learning_rate": 7.4016373194305705e-06, "loss": 0.3701, "step": 19990 }, { "epoch": 2.0324318828792194, "grad_norm": 0.2565089762210846, "learning_rate": 7.401326046708003e-06, "loss": 0.3241, "step": 19991 }, { "epoch": 2.0325335502236683, "grad_norm": 0.2794768512248993, "learning_rate": 7.401014761888125e-06, "loss": 0.32, "step": 19992 }, { "epoch": 2.0326352175681173, "grad_norm": 0.30086037516593933, "learning_rate": 7.400703464972505e-06, "loss": 0.3184, "step": 19993 }, { "epoch": 2.032736884912566, "grad_norm": 0.27820292115211487, "learning_rate": 7.400392155962712e-06, "loss": 0.3443, "step": 19994 }, { "epoch": 2.032838552257015, "grad_norm": 0.265080064535141, "learning_rate": 7.400080834860314e-06, "loss": 0.3252, "step": 19995 }, { "epoch": 2.032940219601464, "grad_norm": 0.3026464283466339, "learning_rate": 7.399769501666879e-06, "loss": 0.3505, "step": 19996 }, { "epoch": 2.033041886945913, "grad_norm": 0.29797378182411194, "learning_rate": 7.399458156383974e-06, "loss": 0.3171, "step": 19997 }, { "epoch": 2.033143554290362, "grad_norm": 0.267836332321167, "learning_rate": 7.3991467990131706e-06, "loss": 0.3161, "step": 19998 }, { "epoch": 2.033245221634811, "grad_norm": 0.27218401432037354, "learning_rate": 7.398835429556036e-06, "loss": 0.3286, "step": 19999 }, { "epoch": 2.03334688897926, "grad_norm": 0.29995325207710266, "learning_rate": 7.398524048014138e-06, "loss": 0.3251, "step": 20000 }, { "epoch": 2.033448556323709, "grad_norm": 0.27071529626846313, "learning_rate": 7.398212654389046e-06, "loss": 0.3208, "step": 20001 }, { "epoch": 2.0335502236681577, "grad_norm": 0.34021130204200745, "learning_rate": 7.397901248682328e-06, "loss": 0.323, "step": 20002 }, { "epoch": 2.0336518910126067, "grad_norm": 0.2736949920654297, "learning_rate": 7.3975898308955526e-06, "loss": 0.3367, "step": 20003 }, { "epoch": 2.0337535583570556, "grad_norm": 0.28259068727493286, "learning_rate": 7.397278401030293e-06, "loss": 0.3388, "step": 20004 }, { "epoch": 2.0338552257015046, "grad_norm": 0.27093568444252014, "learning_rate": 7.396966959088111e-06, "loss": 0.3487, "step": 20005 }, { "epoch": 2.0339568930459535, "grad_norm": 0.2769097685813904, "learning_rate": 7.39665550507058e-06, "loss": 0.3222, "step": 20006 }, { "epoch": 2.0340585603904024, "grad_norm": 0.2973305284976959, "learning_rate": 7.396344038979267e-06, "loss": 0.3473, "step": 20007 }, { "epoch": 2.0341602277348514, "grad_norm": 0.28075742721557617, "learning_rate": 7.396032560815744e-06, "loss": 0.3163, "step": 20008 }, { "epoch": 2.0342618950793003, "grad_norm": 0.26879215240478516, "learning_rate": 7.3957210705815785e-06, "loss": 0.377, "step": 20009 }, { "epoch": 2.0343635624237493, "grad_norm": 0.2551593780517578, "learning_rate": 7.395409568278339e-06, "loss": 0.3351, "step": 20010 }, { "epoch": 2.0344652297681987, "grad_norm": 0.2884705662727356, "learning_rate": 7.395098053907594e-06, "loss": 0.3482, "step": 20011 }, { "epoch": 2.0345668971126476, "grad_norm": 0.27878278493881226, "learning_rate": 7.3947865274709154e-06, "loss": 0.3444, "step": 20012 }, { "epoch": 2.0346685644570965, "grad_norm": 0.2648642957210541, "learning_rate": 7.394474988969871e-06, "loss": 0.3459, "step": 20013 }, { "epoch": 2.0347702318015455, "grad_norm": 0.27552396059036255, "learning_rate": 7.394163438406031e-06, "loss": 0.3139, "step": 20014 }, { "epoch": 2.0348718991459944, "grad_norm": 0.2845723628997803, "learning_rate": 7.393851875780965e-06, "loss": 0.3198, "step": 20015 }, { "epoch": 2.0349735664904434, "grad_norm": 0.31605634093284607, "learning_rate": 7.3935403010962416e-06, "loss": 0.3154, "step": 20016 }, { "epoch": 2.0350752338348923, "grad_norm": 0.28022074699401855, "learning_rate": 7.39322871435343e-06, "loss": 0.333, "step": 20017 }, { "epoch": 2.0351769011793412, "grad_norm": 0.2732621133327484, "learning_rate": 7.392917115554102e-06, "loss": 0.3452, "step": 20018 }, { "epoch": 2.03527856852379, "grad_norm": 0.28762349486351013, "learning_rate": 7.392605504699826e-06, "loss": 0.3433, "step": 20019 }, { "epoch": 2.035380235868239, "grad_norm": 0.2969841957092285, "learning_rate": 7.392293881792172e-06, "loss": 0.3733, "step": 20020 }, { "epoch": 2.035481903212688, "grad_norm": 0.23856009542942047, "learning_rate": 7.3919822468327095e-06, "loss": 0.3336, "step": 20021 }, { "epoch": 2.035583570557137, "grad_norm": 0.2743503451347351, "learning_rate": 7.391670599823009e-06, "loss": 0.3576, "step": 20022 }, { "epoch": 2.035685237901586, "grad_norm": 0.2611530125141144, "learning_rate": 7.391358940764641e-06, "loss": 0.328, "step": 20023 }, { "epoch": 2.035786905246035, "grad_norm": 0.2711888551712036, "learning_rate": 7.391047269659174e-06, "loss": 0.324, "step": 20024 }, { "epoch": 2.035888572590484, "grad_norm": 0.2831880450248718, "learning_rate": 7.390735586508181e-06, "loss": 0.3135, "step": 20025 }, { "epoch": 2.0359902399349328, "grad_norm": 0.2842046320438385, "learning_rate": 7.390423891313228e-06, "loss": 0.3245, "step": 20026 }, { "epoch": 2.0360919072793817, "grad_norm": 0.2615244388580322, "learning_rate": 7.390112184075888e-06, "loss": 0.3268, "step": 20027 }, { "epoch": 2.0361935746238307, "grad_norm": 0.2735850512981415, "learning_rate": 7.389800464797731e-06, "loss": 0.3341, "step": 20028 }, { "epoch": 2.0362952419682796, "grad_norm": 0.29342734813690186, "learning_rate": 7.389488733480328e-06, "loss": 0.3171, "step": 20029 }, { "epoch": 2.0363969093127285, "grad_norm": 0.2961084246635437, "learning_rate": 7.3891769901252475e-06, "loss": 0.3346, "step": 20030 }, { "epoch": 2.036498576657178, "grad_norm": 0.2783409655094147, "learning_rate": 7.388865234734062e-06, "loss": 0.336, "step": 20031 }, { "epoch": 2.036600244001627, "grad_norm": 0.26159918308258057, "learning_rate": 7.388553467308339e-06, "loss": 0.3296, "step": 20032 }, { "epoch": 2.036701911346076, "grad_norm": 0.28466835618019104, "learning_rate": 7.3882416878496545e-06, "loss": 0.3242, "step": 20033 }, { "epoch": 2.0368035786905248, "grad_norm": 0.2816089689731598, "learning_rate": 7.387929896359573e-06, "loss": 0.3396, "step": 20034 }, { "epoch": 2.0369052460349737, "grad_norm": 0.26630812883377075, "learning_rate": 7.38761809283967e-06, "loss": 0.3058, "step": 20035 }, { "epoch": 2.0370069133794226, "grad_norm": 0.27512842416763306, "learning_rate": 7.387306277291514e-06, "loss": 0.2999, "step": 20036 }, { "epoch": 2.0371085807238716, "grad_norm": 0.30764472484588623, "learning_rate": 7.386994449716676e-06, "loss": 0.3383, "step": 20037 }, { "epoch": 2.0372102480683205, "grad_norm": 0.2719617784023285, "learning_rate": 7.386682610116726e-06, "loss": 0.3367, "step": 20038 }, { "epoch": 2.0373119154127695, "grad_norm": 0.2709171175956726, "learning_rate": 7.386370758493238e-06, "loss": 0.3366, "step": 20039 }, { "epoch": 2.0374135827572184, "grad_norm": 0.278728723526001, "learning_rate": 7.386058894847782e-06, "loss": 0.3467, "step": 20040 }, { "epoch": 2.0375152501016673, "grad_norm": 0.2901865541934967, "learning_rate": 7.385747019181926e-06, "loss": 0.3213, "step": 20041 }, { "epoch": 2.0376169174461163, "grad_norm": 0.2655770480632782, "learning_rate": 7.385435131497244e-06, "loss": 0.3562, "step": 20042 }, { "epoch": 2.0377185847905652, "grad_norm": 0.28136736154556274, "learning_rate": 7.385123231795308e-06, "loss": 0.3443, "step": 20043 }, { "epoch": 2.037820252135014, "grad_norm": 0.29280874133110046, "learning_rate": 7.384811320077686e-06, "loss": 0.3307, "step": 20044 }, { "epoch": 2.037921919479463, "grad_norm": 0.29181811213493347, "learning_rate": 7.384499396345953e-06, "loss": 0.3363, "step": 20045 }, { "epoch": 2.038023586823912, "grad_norm": 0.28537189960479736, "learning_rate": 7.384187460601677e-06, "loss": 0.3233, "step": 20046 }, { "epoch": 2.038125254168361, "grad_norm": 0.2691654860973358, "learning_rate": 7.383875512846432e-06, "loss": 0.3603, "step": 20047 }, { "epoch": 2.03822692151281, "grad_norm": 0.26522213220596313, "learning_rate": 7.38356355308179e-06, "loss": 0.298, "step": 20048 }, { "epoch": 2.038328588857259, "grad_norm": 0.27463796734809875, "learning_rate": 7.38325158130932e-06, "loss": 0.3478, "step": 20049 }, { "epoch": 2.038430256201708, "grad_norm": 0.27037522196769714, "learning_rate": 7.3829395975305954e-06, "loss": 0.3381, "step": 20050 }, { "epoch": 2.0385319235461568, "grad_norm": 0.2818300127983093, "learning_rate": 7.382627601747186e-06, "loss": 0.3648, "step": 20051 }, { "epoch": 2.038633590890606, "grad_norm": 0.2679083049297333, "learning_rate": 7.382315593960666e-06, "loss": 0.3152, "step": 20052 }, { "epoch": 2.038735258235055, "grad_norm": 0.26705121994018555, "learning_rate": 7.382003574172607e-06, "loss": 0.3896, "step": 20053 }, { "epoch": 2.038836925579504, "grad_norm": 0.2913591265678406, "learning_rate": 7.381691542384578e-06, "loss": 0.3682, "step": 20054 }, { "epoch": 2.038938592923953, "grad_norm": 0.27112656831741333, "learning_rate": 7.3813794985981545e-06, "loss": 0.3404, "step": 20055 }, { "epoch": 2.039040260268402, "grad_norm": 0.2607578933238983, "learning_rate": 7.3810674428149065e-06, "loss": 0.3121, "step": 20056 }, { "epoch": 2.039141927612851, "grad_norm": 0.28885191679000854, "learning_rate": 7.380755375036406e-06, "loss": 0.3324, "step": 20057 }, { "epoch": 2.0392435949573, "grad_norm": 0.2610258162021637, "learning_rate": 7.3804432952642266e-06, "loss": 0.3441, "step": 20058 }, { "epoch": 2.0393452623017487, "grad_norm": 0.2715853154659271, "learning_rate": 7.38013120349994e-06, "loss": 0.3239, "step": 20059 }, { "epoch": 2.0394469296461977, "grad_norm": 0.2802349030971527, "learning_rate": 7.379819099745117e-06, "loss": 0.3723, "step": 20060 }, { "epoch": 2.0395485969906466, "grad_norm": 0.2699083983898163, "learning_rate": 7.37950698400133e-06, "loss": 0.3346, "step": 20061 }, { "epoch": 2.0396502643350956, "grad_norm": 0.2929091155529022, "learning_rate": 7.379194856270153e-06, "loss": 0.2921, "step": 20062 }, { "epoch": 2.0397519316795445, "grad_norm": 0.2878040373325348, "learning_rate": 7.378882716553159e-06, "loss": 0.3506, "step": 20063 }, { "epoch": 2.0398535990239934, "grad_norm": 0.3128967881202698, "learning_rate": 7.378570564851918e-06, "loss": 0.3212, "step": 20064 }, { "epoch": 2.0399552663684424, "grad_norm": 0.29127559065818787, "learning_rate": 7.378258401168005e-06, "loss": 0.3385, "step": 20065 }, { "epoch": 2.0400569337128913, "grad_norm": 0.279714435338974, "learning_rate": 7.37794622550299e-06, "loss": 0.3263, "step": 20066 }, { "epoch": 2.0401586010573403, "grad_norm": 0.28653693199157715, "learning_rate": 7.3776340378584476e-06, "loss": 0.3899, "step": 20067 }, { "epoch": 2.040260268401789, "grad_norm": 0.3113422393798828, "learning_rate": 7.377321838235949e-06, "loss": 0.2948, "step": 20068 }, { "epoch": 2.040361935746238, "grad_norm": 0.2795831859111786, "learning_rate": 7.3770096266370685e-06, "loss": 0.3293, "step": 20069 }, { "epoch": 2.040463603090687, "grad_norm": 0.2929895520210266, "learning_rate": 7.37669740306338e-06, "loss": 0.3235, "step": 20070 }, { "epoch": 2.040565270435136, "grad_norm": 0.29473528265953064, "learning_rate": 7.376385167516452e-06, "loss": 0.3419, "step": 20071 }, { "epoch": 2.0406669377795854, "grad_norm": 0.31303396821022034, "learning_rate": 7.376072919997862e-06, "loss": 0.3579, "step": 20072 }, { "epoch": 2.0407686051240344, "grad_norm": 0.2844153344631195, "learning_rate": 7.37576066050918e-06, "loss": 0.3163, "step": 20073 }, { "epoch": 2.0408702724684833, "grad_norm": 0.27774062752723694, "learning_rate": 7.375448389051981e-06, "loss": 0.3212, "step": 20074 }, { "epoch": 2.0409719398129322, "grad_norm": 0.2714329957962036, "learning_rate": 7.3751361056278385e-06, "loss": 0.2841, "step": 20075 }, { "epoch": 2.041073607157381, "grad_norm": 0.30409640073776245, "learning_rate": 7.374823810238325e-06, "loss": 0.3764, "step": 20076 }, { "epoch": 2.04117527450183, "grad_norm": 0.27524009346961975, "learning_rate": 7.374511502885011e-06, "loss": 0.3491, "step": 20077 }, { "epoch": 2.041276941846279, "grad_norm": 0.2712286710739136, "learning_rate": 7.3741991835694735e-06, "loss": 0.3224, "step": 20078 }, { "epoch": 2.041378609190728, "grad_norm": 0.27316606044769287, "learning_rate": 7.373886852293285e-06, "loss": 0.333, "step": 20079 }, { "epoch": 2.041480276535177, "grad_norm": 0.27654117345809937, "learning_rate": 7.373574509058019e-06, "loss": 0.3297, "step": 20080 }, { "epoch": 2.041581943879626, "grad_norm": 0.25700414180755615, "learning_rate": 7.373262153865248e-06, "loss": 0.3337, "step": 20081 }, { "epoch": 2.041683611224075, "grad_norm": 0.3413560688495636, "learning_rate": 7.372949786716547e-06, "loss": 0.3494, "step": 20082 }, { "epoch": 2.0417852785685238, "grad_norm": 0.26342135667800903, "learning_rate": 7.3726374076134875e-06, "loss": 0.3777, "step": 20083 }, { "epoch": 2.0418869459129727, "grad_norm": 0.25945714116096497, "learning_rate": 7.372325016557644e-06, "loss": 0.3372, "step": 20084 }, { "epoch": 2.0419886132574216, "grad_norm": 0.2661755681037903, "learning_rate": 7.372012613550594e-06, "loss": 0.3584, "step": 20085 }, { "epoch": 2.0420902806018706, "grad_norm": 0.2741839587688446, "learning_rate": 7.371700198593907e-06, "loss": 0.3042, "step": 20086 }, { "epoch": 2.0421919479463195, "grad_norm": 0.27740001678466797, "learning_rate": 7.371387771689157e-06, "loss": 0.3554, "step": 20087 }, { "epoch": 2.0422936152907685, "grad_norm": 0.26102563738822937, "learning_rate": 7.371075332837918e-06, "loss": 0.3391, "step": 20088 }, { "epoch": 2.0423952826352174, "grad_norm": 0.2773694097995758, "learning_rate": 7.370762882041767e-06, "loss": 0.332, "step": 20089 }, { "epoch": 2.0424969499796664, "grad_norm": 0.28693151473999023, "learning_rate": 7.370450419302275e-06, "loss": 0.3339, "step": 20090 }, { "epoch": 2.0425986173241153, "grad_norm": 0.256390243768692, "learning_rate": 7.370137944621018e-06, "loss": 0.3232, "step": 20091 }, { "epoch": 2.0427002846685642, "grad_norm": 0.2796918451786041, "learning_rate": 7.369825457999568e-06, "loss": 0.3358, "step": 20092 }, { "epoch": 2.0428019520130136, "grad_norm": 0.2678847312927246, "learning_rate": 7.369512959439501e-06, "loss": 0.3281, "step": 20093 }, { "epoch": 2.0429036193574626, "grad_norm": 0.2876442074775696, "learning_rate": 7.369200448942391e-06, "loss": 0.3238, "step": 20094 }, { "epoch": 2.0430052867019115, "grad_norm": 0.2874675393104553, "learning_rate": 7.368887926509811e-06, "loss": 0.3429, "step": 20095 }, { "epoch": 2.0431069540463604, "grad_norm": 0.2857605516910553, "learning_rate": 7.368575392143339e-06, "loss": 0.315, "step": 20096 }, { "epoch": 2.0432086213908094, "grad_norm": 0.2760693430900574, "learning_rate": 7.368262845844545e-06, "loss": 0.3436, "step": 20097 }, { "epoch": 2.0433102887352583, "grad_norm": 0.26754704117774963, "learning_rate": 7.367950287615005e-06, "loss": 0.333, "step": 20098 }, { "epoch": 2.0434119560797073, "grad_norm": 0.2764509916305542, "learning_rate": 7.3676377174562965e-06, "loss": 0.3439, "step": 20099 }, { "epoch": 2.043513623424156, "grad_norm": 0.267945796251297, "learning_rate": 7.367325135369991e-06, "loss": 0.3512, "step": 20100 }, { "epoch": 2.043615290768605, "grad_norm": 0.273532509803772, "learning_rate": 7.3670125413576635e-06, "loss": 0.3496, "step": 20101 }, { "epoch": 2.043716958113054, "grad_norm": 0.2733628451824188, "learning_rate": 7.366699935420889e-06, "loss": 0.366, "step": 20102 }, { "epoch": 2.043818625457503, "grad_norm": 0.2763861119747162, "learning_rate": 7.366387317561243e-06, "loss": 0.3279, "step": 20103 }, { "epoch": 2.043920292801952, "grad_norm": 0.2679263949394226, "learning_rate": 7.3660746877803e-06, "loss": 0.3047, "step": 20104 }, { "epoch": 2.044021960146401, "grad_norm": 0.28357386589050293, "learning_rate": 7.365762046079635e-06, "loss": 0.3415, "step": 20105 }, { "epoch": 2.04412362749085, "grad_norm": 0.25256794691085815, "learning_rate": 7.365449392460824e-06, "loss": 0.2973, "step": 20106 }, { "epoch": 2.044225294835299, "grad_norm": 0.25696861743927, "learning_rate": 7.365136726925439e-06, "loss": 0.306, "step": 20107 }, { "epoch": 2.0443269621797477, "grad_norm": 0.4054446220397949, "learning_rate": 7.364824049475059e-06, "loss": 0.3101, "step": 20108 }, { "epoch": 2.0444286295241967, "grad_norm": 0.2707562744617462, "learning_rate": 7.364511360111257e-06, "loss": 0.3439, "step": 20109 }, { "epoch": 2.0445302968686456, "grad_norm": 0.29714280366897583, "learning_rate": 7.364198658835609e-06, "loss": 0.3226, "step": 20110 }, { "epoch": 2.0446319642130946, "grad_norm": 0.28064659237861633, "learning_rate": 7.36388594564969e-06, "loss": 0.3432, "step": 20111 }, { "epoch": 2.0447336315575435, "grad_norm": 0.28539401292800903, "learning_rate": 7.363573220555073e-06, "loss": 0.3039, "step": 20112 }, { "epoch": 2.044835298901993, "grad_norm": 0.27263322472572327, "learning_rate": 7.363260483553338e-06, "loss": 0.3368, "step": 20113 }, { "epoch": 2.044936966246442, "grad_norm": 0.27596884965896606, "learning_rate": 7.3629477346460574e-06, "loss": 0.3569, "step": 20114 }, { "epoch": 2.0450386335908908, "grad_norm": 0.2873016893863678, "learning_rate": 7.362634973834808e-06, "loss": 0.3208, "step": 20115 }, { "epoch": 2.0451403009353397, "grad_norm": 0.28402799367904663, "learning_rate": 7.362322201121166e-06, "loss": 0.3227, "step": 20116 }, { "epoch": 2.0452419682797887, "grad_norm": 0.2977900207042694, "learning_rate": 7.3620094165067055e-06, "loss": 0.3266, "step": 20117 }, { "epoch": 2.0453436356242376, "grad_norm": 0.28148433566093445, "learning_rate": 7.361696619993002e-06, "loss": 0.3408, "step": 20118 }, { "epoch": 2.0454453029686865, "grad_norm": 0.27939021587371826, "learning_rate": 7.361383811581633e-06, "loss": 0.3245, "step": 20119 }, { "epoch": 2.0455469703131355, "grad_norm": 0.2829696238040924, "learning_rate": 7.361070991274173e-06, "loss": 0.357, "step": 20120 }, { "epoch": 2.0456486376575844, "grad_norm": 0.2824074327945709, "learning_rate": 7.360758159072199e-06, "loss": 0.3361, "step": 20121 }, { "epoch": 2.0457503050020334, "grad_norm": 0.260516494512558, "learning_rate": 7.360445314977286e-06, "loss": 0.3425, "step": 20122 }, { "epoch": 2.0458519723464823, "grad_norm": 0.27030307054519653, "learning_rate": 7.360132458991011e-06, "loss": 0.326, "step": 20123 }, { "epoch": 2.0459536396909312, "grad_norm": 0.2852603495121002, "learning_rate": 7.35981959111495e-06, "loss": 0.3794, "step": 20124 }, { "epoch": 2.04605530703538, "grad_norm": 0.2701484262943268, "learning_rate": 7.359506711350677e-06, "loss": 0.3222, "step": 20125 }, { "epoch": 2.046156974379829, "grad_norm": 0.28162482380867004, "learning_rate": 7.359193819699771e-06, "loss": 0.3391, "step": 20126 }, { "epoch": 2.046258641724278, "grad_norm": 0.24614618718624115, "learning_rate": 7.358880916163807e-06, "loss": 0.346, "step": 20127 }, { "epoch": 2.046360309068727, "grad_norm": 0.28876131772994995, "learning_rate": 7.358568000744359e-06, "loss": 0.3315, "step": 20128 }, { "epoch": 2.046461976413176, "grad_norm": 0.29354557394981384, "learning_rate": 7.35825507344301e-06, "loss": 0.3524, "step": 20129 }, { "epoch": 2.046563643757625, "grad_norm": 0.2705117166042328, "learning_rate": 7.35794213426133e-06, "loss": 0.3145, "step": 20130 }, { "epoch": 2.046665311102074, "grad_norm": 0.2688809931278229, "learning_rate": 7.357629183200899e-06, "loss": 0.3246, "step": 20131 }, { "epoch": 2.0467669784465228, "grad_norm": 0.2937747836112976, "learning_rate": 7.357316220263291e-06, "loss": 0.3229, "step": 20132 }, { "epoch": 2.0468686457909717, "grad_norm": 0.292814701795578, "learning_rate": 7.357003245450085e-06, "loss": 0.3309, "step": 20133 }, { "epoch": 2.046970313135421, "grad_norm": 0.2888932228088379, "learning_rate": 7.356690258762858e-06, "loss": 0.301, "step": 20134 }, { "epoch": 2.04707198047987, "grad_norm": 0.2608680725097656, "learning_rate": 7.3563772602031825e-06, "loss": 0.3215, "step": 20135 }, { "epoch": 2.047173647824319, "grad_norm": 0.27810347080230713, "learning_rate": 7.3560642497726406e-06, "loss": 0.3162, "step": 20136 }, { "epoch": 2.047275315168768, "grad_norm": 0.2968187928199768, "learning_rate": 7.355751227472806e-06, "loss": 0.3123, "step": 20137 }, { "epoch": 2.047376982513217, "grad_norm": 0.2618319094181061, "learning_rate": 7.355438193305256e-06, "loss": 0.3358, "step": 20138 }, { "epoch": 2.047478649857666, "grad_norm": 0.28922057151794434, "learning_rate": 7.35512514727157e-06, "loss": 0.3497, "step": 20139 }, { "epoch": 2.0475803172021148, "grad_norm": 0.27885499596595764, "learning_rate": 7.354812089373322e-06, "loss": 0.3416, "step": 20140 }, { "epoch": 2.0476819845465637, "grad_norm": 0.28222760558128357, "learning_rate": 7.354499019612091e-06, "loss": 0.3319, "step": 20141 }, { "epoch": 2.0477836518910126, "grad_norm": 0.2845768332481384, "learning_rate": 7.354185937989453e-06, "loss": 0.3356, "step": 20142 }, { "epoch": 2.0478853192354616, "grad_norm": 0.28401979804039, "learning_rate": 7.353872844506984e-06, "loss": 0.3256, "step": 20143 }, { "epoch": 2.0479869865799105, "grad_norm": 0.2726580798625946, "learning_rate": 7.353559739166266e-06, "loss": 0.3483, "step": 20144 }, { "epoch": 2.0480886539243595, "grad_norm": 0.2690335512161255, "learning_rate": 7.353246621968872e-06, "loss": 0.3514, "step": 20145 }, { "epoch": 2.0481903212688084, "grad_norm": 0.29836228489875793, "learning_rate": 7.352933492916381e-06, "loss": 0.3383, "step": 20146 }, { "epoch": 2.0482919886132573, "grad_norm": 0.29046645760536194, "learning_rate": 7.352620352010369e-06, "loss": 0.3265, "step": 20147 }, { "epoch": 2.0483936559577063, "grad_norm": 0.2602779269218445, "learning_rate": 7.352307199252415e-06, "loss": 0.3172, "step": 20148 }, { "epoch": 2.0484953233021552, "grad_norm": 0.2695003151893616, "learning_rate": 7.351994034644098e-06, "loss": 0.3113, "step": 20149 }, { "epoch": 2.048596990646604, "grad_norm": 0.27437445521354675, "learning_rate": 7.351680858186991e-06, "loss": 0.3235, "step": 20150 }, { "epoch": 2.048698657991053, "grad_norm": 0.2882959544658661, "learning_rate": 7.351367669882679e-06, "loss": 0.3256, "step": 20151 }, { "epoch": 2.048800325335502, "grad_norm": 0.2932128608226776, "learning_rate": 7.351054469732732e-06, "loss": 0.3472, "step": 20152 }, { "epoch": 2.048901992679951, "grad_norm": 0.29557621479034424, "learning_rate": 7.350741257738732e-06, "loss": 0.3392, "step": 20153 }, { "epoch": 2.0490036600244004, "grad_norm": 0.32286420464515686, "learning_rate": 7.3504280339022564e-06, "loss": 0.3616, "step": 20154 }, { "epoch": 2.0491053273688493, "grad_norm": 0.28787073493003845, "learning_rate": 7.350114798224882e-06, "loss": 0.3091, "step": 20155 }, { "epoch": 2.0492069947132983, "grad_norm": 0.3424341082572937, "learning_rate": 7.34980155070819e-06, "loss": 0.3699, "step": 20156 }, { "epoch": 2.049308662057747, "grad_norm": 0.3124755024909973, "learning_rate": 7.3494882913537545e-06, "loss": 0.3472, "step": 20157 }, { "epoch": 2.049410329402196, "grad_norm": 0.2965821921825409, "learning_rate": 7.349175020163156e-06, "loss": 0.3134, "step": 20158 }, { "epoch": 2.049511996746645, "grad_norm": 0.2987152934074402, "learning_rate": 7.348861737137971e-06, "loss": 0.3606, "step": 20159 }, { "epoch": 2.049613664091094, "grad_norm": 0.3040368854999542, "learning_rate": 7.3485484422797794e-06, "loss": 0.3318, "step": 20160 }, { "epoch": 2.049715331435543, "grad_norm": 0.2843135595321655, "learning_rate": 7.3482351355901595e-06, "loss": 0.3155, "step": 20161 }, { "epoch": 2.049816998779992, "grad_norm": 0.2807578146457672, "learning_rate": 7.347921817070689e-06, "loss": 0.339, "step": 20162 }, { "epoch": 2.049918666124441, "grad_norm": 0.26166072487831116, "learning_rate": 7.3476084867229446e-06, "loss": 0.3191, "step": 20163 }, { "epoch": 2.05002033346889, "grad_norm": 0.2900986671447754, "learning_rate": 7.347295144548507e-06, "loss": 0.3378, "step": 20164 }, { "epoch": 2.0501220008133387, "grad_norm": 0.2934637665748596, "learning_rate": 7.346981790548955e-06, "loss": 0.3339, "step": 20165 }, { "epoch": 2.0502236681577877, "grad_norm": 0.2849024832248688, "learning_rate": 7.346668424725867e-06, "loss": 0.3253, "step": 20166 }, { "epoch": 2.0503253355022366, "grad_norm": 0.2958340346813202, "learning_rate": 7.346355047080821e-06, "loss": 0.3094, "step": 20167 }, { "epoch": 2.0504270028466856, "grad_norm": 0.265651136636734, "learning_rate": 7.346041657615395e-06, "loss": 0.3282, "step": 20168 }, { "epoch": 2.0505286701911345, "grad_norm": 0.2688709795475006, "learning_rate": 7.3457282563311685e-06, "loss": 0.3367, "step": 20169 }, { "epoch": 2.0506303375355834, "grad_norm": 0.29251837730407715, "learning_rate": 7.345414843229721e-06, "loss": 0.3239, "step": 20170 }, { "epoch": 2.0507320048800324, "grad_norm": 0.2863207459449768, "learning_rate": 7.34510141831263e-06, "loss": 0.3095, "step": 20171 }, { "epoch": 2.0508336722244813, "grad_norm": 0.2644411027431488, "learning_rate": 7.344787981581477e-06, "loss": 0.3158, "step": 20172 }, { "epoch": 2.0509353395689303, "grad_norm": 0.273260235786438, "learning_rate": 7.344474533037838e-06, "loss": 0.3548, "step": 20173 }, { "epoch": 2.051037006913379, "grad_norm": 0.3012017607688904, "learning_rate": 7.344161072683293e-06, "loss": 0.3496, "step": 20174 }, { "epoch": 2.0511386742578286, "grad_norm": 0.3165316879749298, "learning_rate": 7.343847600519424e-06, "loss": 0.3383, "step": 20175 }, { "epoch": 2.0512403416022775, "grad_norm": 0.27637729048728943, "learning_rate": 7.343534116547806e-06, "loss": 0.3336, "step": 20176 }, { "epoch": 2.0513420089467265, "grad_norm": 0.2961975038051605, "learning_rate": 7.343220620770021e-06, "loss": 0.3721, "step": 20177 }, { "epoch": 2.0514436762911754, "grad_norm": 0.2986227869987488, "learning_rate": 7.342907113187646e-06, "loss": 0.3558, "step": 20178 }, { "epoch": 2.0515453436356244, "grad_norm": 0.2810993492603302, "learning_rate": 7.342593593802262e-06, "loss": 0.355, "step": 20179 }, { "epoch": 2.0516470109800733, "grad_norm": 0.28687214851379395, "learning_rate": 7.342280062615449e-06, "loss": 0.3416, "step": 20180 }, { "epoch": 2.0517486783245222, "grad_norm": 0.28670310974121094, "learning_rate": 7.341966519628785e-06, "loss": 0.3455, "step": 20181 }, { "epoch": 2.051850345668971, "grad_norm": 0.27030473947525024, "learning_rate": 7.34165296484385e-06, "loss": 0.3429, "step": 20182 }, { "epoch": 2.05195201301342, "grad_norm": 0.27537447214126587, "learning_rate": 7.341339398262225e-06, "loss": 0.3319, "step": 20183 }, { "epoch": 2.052053680357869, "grad_norm": 0.25855425000190735, "learning_rate": 7.341025819885488e-06, "loss": 0.323, "step": 20184 }, { "epoch": 2.052155347702318, "grad_norm": 0.28233280777931213, "learning_rate": 7.340712229715219e-06, "loss": 0.3435, "step": 20185 }, { "epoch": 2.052257015046767, "grad_norm": 0.29727107286453247, "learning_rate": 7.340398627752997e-06, "loss": 0.3534, "step": 20186 }, { "epoch": 2.052358682391216, "grad_norm": 0.2570190727710724, "learning_rate": 7.340085014000404e-06, "loss": 0.3235, "step": 20187 }, { "epoch": 2.052460349735665, "grad_norm": 0.27535754442214966, "learning_rate": 7.339771388459019e-06, "loss": 0.3277, "step": 20188 }, { "epoch": 2.0525620170801138, "grad_norm": 0.2650308310985565, "learning_rate": 7.339457751130421e-06, "loss": 0.3267, "step": 20189 }, { "epoch": 2.0526636844245627, "grad_norm": 0.29144784808158875, "learning_rate": 7.339144102016192e-06, "loss": 0.3725, "step": 20190 }, { "epoch": 2.0527653517690116, "grad_norm": 0.29672718048095703, "learning_rate": 7.33883044111791e-06, "loss": 0.3149, "step": 20191 }, { "epoch": 2.0528670191134606, "grad_norm": 0.2701565623283386, "learning_rate": 7.338516768437156e-06, "loss": 0.3146, "step": 20192 }, { "epoch": 2.0529686864579095, "grad_norm": 0.2921900153160095, "learning_rate": 7.338203083975511e-06, "loss": 0.3449, "step": 20193 }, { "epoch": 2.0530703538023585, "grad_norm": 0.2941875457763672, "learning_rate": 7.337889387734552e-06, "loss": 0.3276, "step": 20194 }, { "epoch": 2.053172021146808, "grad_norm": 0.28178146481513977, "learning_rate": 7.337575679715865e-06, "loss": 0.3314, "step": 20195 }, { "epoch": 2.053273688491257, "grad_norm": 0.2806199789047241, "learning_rate": 7.3372619599210245e-06, "loss": 0.324, "step": 20196 }, { "epoch": 2.0533753558357057, "grad_norm": 0.2942938506603241, "learning_rate": 7.336948228351617e-06, "loss": 0.3559, "step": 20197 }, { "epoch": 2.0534770231801547, "grad_norm": 0.29041603207588196, "learning_rate": 7.336634485009217e-06, "loss": 0.3264, "step": 20198 }, { "epoch": 2.0535786905246036, "grad_norm": 0.2962261140346527, "learning_rate": 7.3363207298954075e-06, "loss": 0.3253, "step": 20199 }, { "epoch": 2.0536803578690526, "grad_norm": 0.2888607680797577, "learning_rate": 7.336006963011772e-06, "loss": 0.352, "step": 20200 }, { "epoch": 2.0537820252135015, "grad_norm": 0.29478543996810913, "learning_rate": 7.335693184359885e-06, "loss": 0.358, "step": 20201 }, { "epoch": 2.0538836925579504, "grad_norm": 0.2688162922859192, "learning_rate": 7.335379393941334e-06, "loss": 0.3054, "step": 20202 }, { "epoch": 2.0539853599023994, "grad_norm": 0.26481255888938904, "learning_rate": 7.335065591757695e-06, "loss": 0.331, "step": 20203 }, { "epoch": 2.0540870272468483, "grad_norm": 0.2577117681503296, "learning_rate": 7.334751777810551e-06, "loss": 0.3081, "step": 20204 }, { "epoch": 2.0541886945912973, "grad_norm": 0.2893187999725342, "learning_rate": 7.334437952101482e-06, "loss": 0.3337, "step": 20205 }, { "epoch": 2.054290361935746, "grad_norm": 0.26979419589042664, "learning_rate": 7.3341241146320685e-06, "loss": 0.3262, "step": 20206 }, { "epoch": 2.054392029280195, "grad_norm": 0.2745967507362366, "learning_rate": 7.333810265403894e-06, "loss": 0.3741, "step": 20207 }, { "epoch": 2.054493696624644, "grad_norm": 0.2691337764263153, "learning_rate": 7.333496404418536e-06, "loss": 0.3475, "step": 20208 }, { "epoch": 2.054595363969093, "grad_norm": 0.2905018627643585, "learning_rate": 7.3331825316775796e-06, "loss": 0.3623, "step": 20209 }, { "epoch": 2.054697031313542, "grad_norm": 0.28504887223243713, "learning_rate": 7.332868647182604e-06, "loss": 0.3223, "step": 20210 }, { "epoch": 2.054798698657991, "grad_norm": 0.28867462277412415, "learning_rate": 7.332554750935189e-06, "loss": 0.3085, "step": 20211 }, { "epoch": 2.05490036600244, "grad_norm": 0.30002424120903015, "learning_rate": 7.33224084293692e-06, "loss": 0.3112, "step": 20212 }, { "epoch": 2.055002033346889, "grad_norm": 0.2761663794517517, "learning_rate": 7.331926923189373e-06, "loss": 0.3643, "step": 20213 }, { "epoch": 2.0551037006913377, "grad_norm": 0.2680511772632599, "learning_rate": 7.331612991694133e-06, "loss": 0.3275, "step": 20214 }, { "epoch": 2.0552053680357867, "grad_norm": 0.28477832674980164, "learning_rate": 7.331299048452783e-06, "loss": 0.3619, "step": 20215 }, { "epoch": 2.055307035380236, "grad_norm": 0.26810184121131897, "learning_rate": 7.330985093466899e-06, "loss": 0.3274, "step": 20216 }, { "epoch": 2.055408702724685, "grad_norm": 0.2802281677722931, "learning_rate": 7.330671126738069e-06, "loss": 0.3526, "step": 20217 }, { "epoch": 2.055510370069134, "grad_norm": 0.2874794602394104, "learning_rate": 7.33035714826787e-06, "loss": 0.356, "step": 20218 }, { "epoch": 2.055612037413583, "grad_norm": 0.2948509156703949, "learning_rate": 7.3300431580578856e-06, "loss": 0.3438, "step": 20219 }, { "epoch": 2.055713704758032, "grad_norm": 0.27106571197509766, "learning_rate": 7.329729156109698e-06, "loss": 0.3373, "step": 20220 }, { "epoch": 2.0558153721024808, "grad_norm": 0.2772195339202881, "learning_rate": 7.329415142424888e-06, "loss": 0.3388, "step": 20221 }, { "epoch": 2.0559170394469297, "grad_norm": 0.2869126498699188, "learning_rate": 7.329101117005039e-06, "loss": 0.3058, "step": 20222 }, { "epoch": 2.0560187067913787, "grad_norm": 0.2729997932910919, "learning_rate": 7.328787079851732e-06, "loss": 0.3159, "step": 20223 }, { "epoch": 2.0561203741358276, "grad_norm": 0.2749825417995453, "learning_rate": 7.328473030966548e-06, "loss": 0.3264, "step": 20224 }, { "epoch": 2.0562220414802765, "grad_norm": 0.30740225315093994, "learning_rate": 7.328158970351071e-06, "loss": 0.3501, "step": 20225 }, { "epoch": 2.0563237088247255, "grad_norm": 0.265165239572525, "learning_rate": 7.327844898006882e-06, "loss": 0.3254, "step": 20226 }, { "epoch": 2.0564253761691744, "grad_norm": 0.2622714042663574, "learning_rate": 7.3275308139355636e-06, "loss": 0.3444, "step": 20227 }, { "epoch": 2.0565270435136234, "grad_norm": 0.3116099536418915, "learning_rate": 7.327216718138699e-06, "loss": 0.3516, "step": 20228 }, { "epoch": 2.0566287108580723, "grad_norm": 0.30443882942199707, "learning_rate": 7.326902610617867e-06, "loss": 0.3323, "step": 20229 }, { "epoch": 2.0567303782025212, "grad_norm": 0.2765986919403076, "learning_rate": 7.3265884913746555e-06, "loss": 0.3531, "step": 20230 }, { "epoch": 2.05683204554697, "grad_norm": 0.28294387459754944, "learning_rate": 7.326274360410642e-06, "loss": 0.3035, "step": 20231 }, { "epoch": 2.056933712891419, "grad_norm": 0.27237507700920105, "learning_rate": 7.325960217727412e-06, "loss": 0.2922, "step": 20232 }, { "epoch": 2.057035380235868, "grad_norm": 0.2803478240966797, "learning_rate": 7.3256460633265495e-06, "loss": 0.3849, "step": 20233 }, { "epoch": 2.057137047580317, "grad_norm": 0.25107043981552124, "learning_rate": 7.325331897209631e-06, "loss": 0.3144, "step": 20234 }, { "epoch": 2.057238714924766, "grad_norm": 0.28763899207115173, "learning_rate": 7.325017719378245e-06, "loss": 0.3182, "step": 20235 }, { "epoch": 2.0573403822692153, "grad_norm": 0.3036603629589081, "learning_rate": 7.324703529833971e-06, "loss": 0.3358, "step": 20236 }, { "epoch": 2.0574420496136643, "grad_norm": 0.26898765563964844, "learning_rate": 7.324389328578394e-06, "loss": 0.3358, "step": 20237 }, { "epoch": 2.0575437169581132, "grad_norm": 0.2708130180835724, "learning_rate": 7.324075115613096e-06, "loss": 0.3289, "step": 20238 }, { "epoch": 2.057645384302562, "grad_norm": 0.28338122367858887, "learning_rate": 7.323760890939659e-06, "loss": 0.3163, "step": 20239 }, { "epoch": 2.057747051647011, "grad_norm": 0.26293936371803284, "learning_rate": 7.323446654559667e-06, "loss": 0.3168, "step": 20240 }, { "epoch": 2.05784871899146, "grad_norm": 0.2785051167011261, "learning_rate": 7.323132406474703e-06, "loss": 0.3393, "step": 20241 }, { "epoch": 2.057950386335909, "grad_norm": 0.28583818674087524, "learning_rate": 7.322818146686352e-06, "loss": 0.3059, "step": 20242 }, { "epoch": 2.058052053680358, "grad_norm": 0.2617000341415405, "learning_rate": 7.322503875196193e-06, "loss": 0.3339, "step": 20243 }, { "epoch": 2.058153721024807, "grad_norm": 0.2834189832210541, "learning_rate": 7.322189592005811e-06, "loss": 0.3539, "step": 20244 }, { "epoch": 2.058255388369256, "grad_norm": 0.282118022441864, "learning_rate": 7.32187529711679e-06, "loss": 0.3175, "step": 20245 }, { "epoch": 2.0583570557137048, "grad_norm": 0.2809307873249054, "learning_rate": 7.321560990530715e-06, "loss": 0.3518, "step": 20246 }, { "epoch": 2.0584587230581537, "grad_norm": 0.26841580867767334, "learning_rate": 7.321246672249165e-06, "loss": 0.3398, "step": 20247 }, { "epoch": 2.0585603904026026, "grad_norm": 0.2697744369506836, "learning_rate": 7.3209323422737275e-06, "loss": 0.3432, "step": 20248 }, { "epoch": 2.0586620577470516, "grad_norm": 0.2935928702354431, "learning_rate": 7.3206180006059825e-06, "loss": 0.3163, "step": 20249 }, { "epoch": 2.0587637250915005, "grad_norm": 0.28314316272735596, "learning_rate": 7.320303647247518e-06, "loss": 0.3403, "step": 20250 }, { "epoch": 2.0588653924359495, "grad_norm": 0.2556370794773102, "learning_rate": 7.319989282199914e-06, "loss": 0.3374, "step": 20251 }, { "epoch": 2.0589670597803984, "grad_norm": 0.25548219680786133, "learning_rate": 7.319674905464755e-06, "loss": 0.3095, "step": 20252 }, { "epoch": 2.0590687271248473, "grad_norm": 0.2810107469558716, "learning_rate": 7.319360517043625e-06, "loss": 0.3327, "step": 20253 }, { "epoch": 2.0591703944692963, "grad_norm": 0.2763577103614807, "learning_rate": 7.319046116938107e-06, "loss": 0.3033, "step": 20254 }, { "epoch": 2.0592720618137452, "grad_norm": 0.27226513624191284, "learning_rate": 7.318731705149788e-06, "loss": 0.3237, "step": 20255 }, { "epoch": 2.059373729158194, "grad_norm": 0.2738007605075836, "learning_rate": 7.3184172816802475e-06, "loss": 0.3255, "step": 20256 }, { "epoch": 2.0594753965026436, "grad_norm": 0.2876399755477905, "learning_rate": 7.318102846531073e-06, "loss": 0.3395, "step": 20257 }, { "epoch": 2.0595770638470925, "grad_norm": 0.2844279408454895, "learning_rate": 7.3177883997038475e-06, "loss": 0.305, "step": 20258 }, { "epoch": 2.0596787311915414, "grad_norm": 0.26689228415489197, "learning_rate": 7.317473941200154e-06, "loss": 0.3315, "step": 20259 }, { "epoch": 2.0597803985359904, "grad_norm": 0.2759358882904053, "learning_rate": 7.317159471021577e-06, "loss": 0.3463, "step": 20260 }, { "epoch": 2.0598820658804393, "grad_norm": 0.2767487168312073, "learning_rate": 7.316844989169703e-06, "loss": 0.3519, "step": 20261 }, { "epoch": 2.0599837332248883, "grad_norm": 0.3000267744064331, "learning_rate": 7.316530495646113e-06, "loss": 0.342, "step": 20262 }, { "epoch": 2.060085400569337, "grad_norm": 0.28017762303352356, "learning_rate": 7.316215990452394e-06, "loss": 0.3328, "step": 20263 }, { "epoch": 2.060187067913786, "grad_norm": 0.29894715547561646, "learning_rate": 7.315901473590128e-06, "loss": 0.3527, "step": 20264 }, { "epoch": 2.060288735258235, "grad_norm": 0.2675052881240845, "learning_rate": 7.315586945060901e-06, "loss": 0.3376, "step": 20265 }, { "epoch": 2.060390402602684, "grad_norm": 0.2746945917606354, "learning_rate": 7.315272404866298e-06, "loss": 0.3117, "step": 20266 }, { "epoch": 2.060492069947133, "grad_norm": 0.27875766158103943, "learning_rate": 7.3149578530079026e-06, "loss": 0.3351, "step": 20267 }, { "epoch": 2.060593737291582, "grad_norm": 0.2545853555202484, "learning_rate": 7.3146432894873e-06, "loss": 0.3274, "step": 20268 }, { "epoch": 2.060695404636031, "grad_norm": 0.287803590297699, "learning_rate": 7.314328714306073e-06, "loss": 0.3405, "step": 20269 }, { "epoch": 2.06079707198048, "grad_norm": 0.30102574825286865, "learning_rate": 7.314014127465809e-06, "loss": 0.3304, "step": 20270 }, { "epoch": 2.0608987393249287, "grad_norm": 0.2823804020881653, "learning_rate": 7.313699528968091e-06, "loss": 0.3662, "step": 20271 }, { "epoch": 2.0610004066693777, "grad_norm": 0.2754148542881012, "learning_rate": 7.313384918814506e-06, "loss": 0.342, "step": 20272 }, { "epoch": 2.0611020740138266, "grad_norm": 0.3032470643520355, "learning_rate": 7.313070297006637e-06, "loss": 0.3461, "step": 20273 }, { "epoch": 2.0612037413582756, "grad_norm": 0.29110434651374817, "learning_rate": 7.312755663546068e-06, "loss": 0.3227, "step": 20274 }, { "epoch": 2.0613054087027245, "grad_norm": 0.2952621579170227, "learning_rate": 7.3124410184343864e-06, "loss": 0.352, "step": 20275 }, { "epoch": 2.0614070760471734, "grad_norm": 0.2994215786457062, "learning_rate": 7.312126361673178e-06, "loss": 0.3645, "step": 20276 }, { "epoch": 2.061508743391623, "grad_norm": 0.27389973402023315, "learning_rate": 7.311811693264025e-06, "loss": 0.325, "step": 20277 }, { "epoch": 2.0616104107360718, "grad_norm": 0.2841348350048065, "learning_rate": 7.311497013208515e-06, "loss": 0.3164, "step": 20278 }, { "epoch": 2.0617120780805207, "grad_norm": 0.31177112460136414, "learning_rate": 7.311182321508231e-06, "loss": 0.3313, "step": 20279 }, { "epoch": 2.0618137454249696, "grad_norm": 0.2911568880081177, "learning_rate": 7.3108676181647596e-06, "loss": 0.3263, "step": 20280 }, { "epoch": 2.0619154127694186, "grad_norm": 0.3182622790336609, "learning_rate": 7.310552903179689e-06, "loss": 0.3518, "step": 20281 }, { "epoch": 2.0620170801138675, "grad_norm": 0.2766858637332916, "learning_rate": 7.3102381765545985e-06, "loss": 0.2952, "step": 20282 }, { "epoch": 2.0621187474583165, "grad_norm": 0.2954375743865967, "learning_rate": 7.309923438291079e-06, "loss": 0.3054, "step": 20283 }, { "epoch": 2.0622204148027654, "grad_norm": 0.26438620686531067, "learning_rate": 7.309608688390714e-06, "loss": 0.3317, "step": 20284 }, { "epoch": 2.0623220821472144, "grad_norm": 0.27371054887771606, "learning_rate": 7.309293926855088e-06, "loss": 0.3076, "step": 20285 }, { "epoch": 2.0624237494916633, "grad_norm": 0.2759849429130554, "learning_rate": 7.308979153685789e-06, "loss": 0.3654, "step": 20286 }, { "epoch": 2.0625254168361122, "grad_norm": 0.3173162341117859, "learning_rate": 7.308664368884402e-06, "loss": 0.3795, "step": 20287 }, { "epoch": 2.062627084180561, "grad_norm": 0.2820286750793457, "learning_rate": 7.308349572452512e-06, "loss": 0.3207, "step": 20288 }, { "epoch": 2.06272875152501, "grad_norm": 0.28472185134887695, "learning_rate": 7.308034764391706e-06, "loss": 0.3321, "step": 20289 }, { "epoch": 2.062830418869459, "grad_norm": 0.2691797912120819, "learning_rate": 7.307719944703568e-06, "loss": 0.3355, "step": 20290 }, { "epoch": 2.062932086213908, "grad_norm": 0.28251224756240845, "learning_rate": 7.307405113389688e-06, "loss": 0.3391, "step": 20291 }, { "epoch": 2.063033753558357, "grad_norm": 0.24954231083393097, "learning_rate": 7.307090270451647e-06, "loss": 0.3333, "step": 20292 }, { "epoch": 2.063135420902806, "grad_norm": 0.2606455683708191, "learning_rate": 7.306775415891034e-06, "loss": 0.3537, "step": 20293 }, { "epoch": 2.063237088247255, "grad_norm": 0.2783535122871399, "learning_rate": 7.306460549709434e-06, "loss": 0.3325, "step": 20294 }, { "epoch": 2.0633387555917038, "grad_norm": 0.2570860683917999, "learning_rate": 7.306145671908435e-06, "loss": 0.2991, "step": 20295 }, { "epoch": 2.0634404229361527, "grad_norm": 0.2905239462852478, "learning_rate": 7.305830782489623e-06, "loss": 0.3743, "step": 20296 }, { "epoch": 2.0635420902806016, "grad_norm": 0.27793219685554504, "learning_rate": 7.30551588145458e-06, "loss": 0.3364, "step": 20297 }, { "epoch": 2.063643757625051, "grad_norm": 0.2861456274986267, "learning_rate": 7.3052009688049e-06, "loss": 0.3309, "step": 20298 }, { "epoch": 2.0637454249695, "grad_norm": 0.28802385926246643, "learning_rate": 7.304886044542162e-06, "loss": 0.3554, "step": 20299 }, { "epoch": 2.063847092313949, "grad_norm": 0.2536796033382416, "learning_rate": 7.304571108667957e-06, "loss": 0.3444, "step": 20300 }, { "epoch": 2.063948759658398, "grad_norm": 0.26223036646842957, "learning_rate": 7.304256161183872e-06, "loss": 0.3507, "step": 20301 }, { "epoch": 2.064050427002847, "grad_norm": 0.28164446353912354, "learning_rate": 7.303941202091489e-06, "loss": 0.3025, "step": 20302 }, { "epoch": 2.0641520943472957, "grad_norm": 0.27846065163612366, "learning_rate": 7.3036262313924e-06, "loss": 0.319, "step": 20303 }, { "epoch": 2.0642537616917447, "grad_norm": 0.2854478061199188, "learning_rate": 7.303311249088189e-06, "loss": 0.3046, "step": 20304 }, { "epoch": 2.0643554290361936, "grad_norm": 0.29264035820961, "learning_rate": 7.3029962551804424e-06, "loss": 0.3575, "step": 20305 }, { "epoch": 2.0644570963806426, "grad_norm": 0.28258174657821655, "learning_rate": 7.302681249670749e-06, "loss": 0.3433, "step": 20306 }, { "epoch": 2.0645587637250915, "grad_norm": 0.27404549717903137, "learning_rate": 7.3023662325606935e-06, "loss": 0.3301, "step": 20307 }, { "epoch": 2.0646604310695404, "grad_norm": 0.26864081621170044, "learning_rate": 7.302051203851865e-06, "loss": 0.3175, "step": 20308 }, { "epoch": 2.0647620984139894, "grad_norm": 0.24990592896938324, "learning_rate": 7.301736163545851e-06, "loss": 0.3271, "step": 20309 }, { "epoch": 2.0648637657584383, "grad_norm": 0.3034599721431732, "learning_rate": 7.301421111644234e-06, "loss": 0.3633, "step": 20310 }, { "epoch": 2.0649654331028873, "grad_norm": 0.25740429759025574, "learning_rate": 7.301106048148606e-06, "loss": 0.3279, "step": 20311 }, { "epoch": 2.065067100447336, "grad_norm": 0.26085352897644043, "learning_rate": 7.300790973060552e-06, "loss": 0.3605, "step": 20312 }, { "epoch": 2.065168767791785, "grad_norm": 0.26724138855934143, "learning_rate": 7.300475886381662e-06, "loss": 0.3514, "step": 20313 }, { "epoch": 2.065270435136234, "grad_norm": 0.28618380427360535, "learning_rate": 7.30016078811352e-06, "loss": 0.3673, "step": 20314 }, { "epoch": 2.065372102480683, "grad_norm": 0.26680421829223633, "learning_rate": 7.299845678257712e-06, "loss": 0.3062, "step": 20315 }, { "epoch": 2.065473769825132, "grad_norm": 0.27934935688972473, "learning_rate": 7.29953055681583e-06, "loss": 0.3266, "step": 20316 }, { "epoch": 2.065575437169581, "grad_norm": 0.27528315782546997, "learning_rate": 7.299215423789459e-06, "loss": 0.351, "step": 20317 }, { "epoch": 2.0656771045140303, "grad_norm": 0.27531716227531433, "learning_rate": 7.29890027918019e-06, "loss": 0.3496, "step": 20318 }, { "epoch": 2.0657787718584792, "grad_norm": 0.28481587767601013, "learning_rate": 7.2985851229896054e-06, "loss": 0.3279, "step": 20319 }, { "epoch": 2.065880439202928, "grad_norm": 0.2599296569824219, "learning_rate": 7.298269955219294e-06, "loss": 0.3448, "step": 20320 }, { "epoch": 2.065982106547377, "grad_norm": 0.2731545567512512, "learning_rate": 7.297954775870846e-06, "loss": 0.3369, "step": 20321 }, { "epoch": 2.066083773891826, "grad_norm": 0.2875756025314331, "learning_rate": 7.2976395849458485e-06, "loss": 0.3291, "step": 20322 }, { "epoch": 2.066185441236275, "grad_norm": 0.26468440890312195, "learning_rate": 7.297324382445888e-06, "loss": 0.3229, "step": 20323 }, { "epoch": 2.066287108580724, "grad_norm": 0.2992512881755829, "learning_rate": 7.297009168372553e-06, "loss": 0.3461, "step": 20324 }, { "epoch": 2.066388775925173, "grad_norm": 0.2885821461677551, "learning_rate": 7.29669394272743e-06, "loss": 0.3138, "step": 20325 }, { "epoch": 2.066490443269622, "grad_norm": 0.2649739384651184, "learning_rate": 7.296378705512111e-06, "loss": 0.3725, "step": 20326 }, { "epoch": 2.0665921106140708, "grad_norm": 0.2958199977874756, "learning_rate": 7.296063456728183e-06, "loss": 0.3288, "step": 20327 }, { "epoch": 2.0666937779585197, "grad_norm": 0.28072163462638855, "learning_rate": 7.2957481963772306e-06, "loss": 0.3434, "step": 20328 }, { "epoch": 2.0667954453029687, "grad_norm": 0.27793633937835693, "learning_rate": 7.295432924460846e-06, "loss": 0.3357, "step": 20329 }, { "epoch": 2.0668971126474176, "grad_norm": 0.2521066963672638, "learning_rate": 7.295117640980614e-06, "loss": 0.3465, "step": 20330 }, { "epoch": 2.0669987799918665, "grad_norm": 0.2602376341819763, "learning_rate": 7.294802345938126e-06, "loss": 0.3006, "step": 20331 }, { "epoch": 2.0671004473363155, "grad_norm": 0.28514760732650757, "learning_rate": 7.294487039334969e-06, "loss": 0.2994, "step": 20332 }, { "epoch": 2.0672021146807644, "grad_norm": 0.2811569571495056, "learning_rate": 7.29417172117273e-06, "loss": 0.3132, "step": 20333 }, { "epoch": 2.0673037820252134, "grad_norm": 0.280815452337265, "learning_rate": 7.293856391453001e-06, "loss": 0.3348, "step": 20334 }, { "epoch": 2.0674054493696623, "grad_norm": 0.2945413589477539, "learning_rate": 7.293541050177366e-06, "loss": 0.3284, "step": 20335 }, { "epoch": 2.0675071167141112, "grad_norm": 0.2842634916305542, "learning_rate": 7.293225697347419e-06, "loss": 0.3632, "step": 20336 }, { "epoch": 2.06760878405856, "grad_norm": 0.29153138399124146, "learning_rate": 7.292910332964746e-06, "loss": 0.3378, "step": 20337 }, { "epoch": 2.067710451403009, "grad_norm": 0.26980888843536377, "learning_rate": 7.292594957030934e-06, "loss": 0.3039, "step": 20338 }, { "epoch": 2.0678121187474585, "grad_norm": 0.27351871132850647, "learning_rate": 7.292279569547574e-06, "loss": 0.3403, "step": 20339 }, { "epoch": 2.0679137860919075, "grad_norm": 0.27292850613594055, "learning_rate": 7.291964170516255e-06, "loss": 0.3316, "step": 20340 }, { "epoch": 2.0680154534363564, "grad_norm": 0.29372748732566833, "learning_rate": 7.2916487599385635e-06, "loss": 0.3138, "step": 20341 }, { "epoch": 2.0681171207808053, "grad_norm": 0.28914910554885864, "learning_rate": 7.291333337816091e-06, "loss": 0.3324, "step": 20342 }, { "epoch": 2.0682187881252543, "grad_norm": 0.2791360914707184, "learning_rate": 7.291017904150426e-06, "loss": 0.3399, "step": 20343 }, { "epoch": 2.0683204554697032, "grad_norm": 0.29145577549934387, "learning_rate": 7.290702458943157e-06, "loss": 0.2869, "step": 20344 }, { "epoch": 2.068422122814152, "grad_norm": 0.2736637592315674, "learning_rate": 7.290387002195874e-06, "loss": 0.3363, "step": 20345 }, { "epoch": 2.068523790158601, "grad_norm": 0.2722422182559967, "learning_rate": 7.290071533910164e-06, "loss": 0.3362, "step": 20346 }, { "epoch": 2.06862545750305, "grad_norm": 0.25852957367897034, "learning_rate": 7.289756054087619e-06, "loss": 0.3103, "step": 20347 }, { "epoch": 2.068727124847499, "grad_norm": 0.2777789235115051, "learning_rate": 7.289440562729826e-06, "loss": 0.3308, "step": 20348 }, { "epoch": 2.068828792191948, "grad_norm": 0.3032502830028534, "learning_rate": 7.289125059838378e-06, "loss": 0.3406, "step": 20349 }, { "epoch": 2.068930459536397, "grad_norm": 0.3189156651496887, "learning_rate": 7.288809545414859e-06, "loss": 0.394, "step": 20350 }, { "epoch": 2.069032126880846, "grad_norm": 0.2927563488483429, "learning_rate": 7.288494019460862e-06, "loss": 0.3183, "step": 20351 }, { "epoch": 2.0691337942252948, "grad_norm": 0.31457364559173584, "learning_rate": 7.288178481977978e-06, "loss": 0.3211, "step": 20352 }, { "epoch": 2.0692354615697437, "grad_norm": 0.29949814081192017, "learning_rate": 7.287862932967793e-06, "loss": 0.3588, "step": 20353 }, { "epoch": 2.0693371289141926, "grad_norm": 0.3074381351470947, "learning_rate": 7.287547372431898e-06, "loss": 0.338, "step": 20354 }, { "epoch": 2.0694387962586416, "grad_norm": 0.31088733673095703, "learning_rate": 7.2872318003718835e-06, "loss": 0.3491, "step": 20355 }, { "epoch": 2.0695404636030905, "grad_norm": 0.27810990810394287, "learning_rate": 7.286916216789338e-06, "loss": 0.3417, "step": 20356 }, { "epoch": 2.0696421309475395, "grad_norm": 0.27050867676734924, "learning_rate": 7.286600621685853e-06, "loss": 0.3275, "step": 20357 }, { "epoch": 2.0697437982919884, "grad_norm": 0.3112567365169525, "learning_rate": 7.286285015063017e-06, "loss": 0.3119, "step": 20358 }, { "epoch": 2.069845465636438, "grad_norm": 0.2845227122306824, "learning_rate": 7.285969396922421e-06, "loss": 0.3404, "step": 20359 }, { "epoch": 2.0699471329808867, "grad_norm": 0.3030257821083069, "learning_rate": 7.285653767265653e-06, "loss": 0.3422, "step": 20360 }, { "epoch": 2.0700488003253357, "grad_norm": 0.29235535860061646, "learning_rate": 7.285338126094306e-06, "loss": 0.3424, "step": 20361 }, { "epoch": 2.0701504676697846, "grad_norm": 0.27098503708839417, "learning_rate": 7.285022473409968e-06, "loss": 0.3399, "step": 20362 }, { "epoch": 2.0702521350142336, "grad_norm": 0.28511926531791687, "learning_rate": 7.284706809214229e-06, "loss": 0.3111, "step": 20363 }, { "epoch": 2.0703538023586825, "grad_norm": 0.2830626368522644, "learning_rate": 7.284391133508681e-06, "loss": 0.3538, "step": 20364 }, { "epoch": 2.0704554697031314, "grad_norm": 0.2896071672439575, "learning_rate": 7.2840754462949135e-06, "loss": 0.3225, "step": 20365 }, { "epoch": 2.0705571370475804, "grad_norm": 0.2875481843948364, "learning_rate": 7.283759747574515e-06, "loss": 0.3377, "step": 20366 }, { "epoch": 2.0706588043920293, "grad_norm": 0.25813814997673035, "learning_rate": 7.28344403734908e-06, "loss": 0.309, "step": 20367 }, { "epoch": 2.0707604717364783, "grad_norm": 0.3041713833808899, "learning_rate": 7.283128315620195e-06, "loss": 0.2837, "step": 20368 }, { "epoch": 2.070862139080927, "grad_norm": 0.29448243975639343, "learning_rate": 7.282812582389453e-06, "loss": 0.3156, "step": 20369 }, { "epoch": 2.070963806425376, "grad_norm": 0.26889705657958984, "learning_rate": 7.282496837658442e-06, "loss": 0.3013, "step": 20370 }, { "epoch": 2.071065473769825, "grad_norm": 0.2744346559047699, "learning_rate": 7.282181081428756e-06, "loss": 0.3345, "step": 20371 }, { "epoch": 2.071167141114274, "grad_norm": 0.27069056034088135, "learning_rate": 7.281865313701983e-06, "loss": 0.3322, "step": 20372 }, { "epoch": 2.071268808458723, "grad_norm": 0.283578485250473, "learning_rate": 7.281549534479716e-06, "loss": 0.3322, "step": 20373 }, { "epoch": 2.071370475803172, "grad_norm": 0.29065218567848206, "learning_rate": 7.281233743763545e-06, "loss": 0.3361, "step": 20374 }, { "epoch": 2.071472143147621, "grad_norm": 0.24697908759117126, "learning_rate": 7.280917941555059e-06, "loss": 0.3099, "step": 20375 }, { "epoch": 2.07157381049207, "grad_norm": 0.2660863995552063, "learning_rate": 7.2806021278558506e-06, "loss": 0.3109, "step": 20376 }, { "epoch": 2.0716754778365187, "grad_norm": 0.2716188430786133, "learning_rate": 7.280286302667512e-06, "loss": 0.3305, "step": 20377 }, { "epoch": 2.0717771451809677, "grad_norm": 0.26873794198036194, "learning_rate": 7.2799704659916304e-06, "loss": 0.3586, "step": 20378 }, { "epoch": 2.0718788125254166, "grad_norm": 0.28500956296920776, "learning_rate": 7.2796546178298026e-06, "loss": 0.3389, "step": 20379 }, { "epoch": 2.071980479869866, "grad_norm": 0.283003568649292, "learning_rate": 7.279338758183615e-06, "loss": 0.3115, "step": 20380 }, { "epoch": 2.072082147214315, "grad_norm": 0.26997920870780945, "learning_rate": 7.279022887054659e-06, "loss": 0.3379, "step": 20381 }, { "epoch": 2.072183814558764, "grad_norm": 0.2776060402393341, "learning_rate": 7.278707004444529e-06, "loss": 0.3256, "step": 20382 }, { "epoch": 2.072285481903213, "grad_norm": 0.29250141978263855, "learning_rate": 7.278391110354813e-06, "loss": 0.362, "step": 20383 }, { "epoch": 2.0723871492476618, "grad_norm": 0.278645396232605, "learning_rate": 7.278075204787106e-06, "loss": 0.3204, "step": 20384 }, { "epoch": 2.0724888165921107, "grad_norm": 0.27018028497695923, "learning_rate": 7.277759287742997e-06, "loss": 0.3384, "step": 20385 }, { "epoch": 2.0725904839365596, "grad_norm": 0.2759251594543457, "learning_rate": 7.277443359224077e-06, "loss": 0.3161, "step": 20386 }, { "epoch": 2.0726921512810086, "grad_norm": 0.26352211833000183, "learning_rate": 7.2771274192319394e-06, "loss": 0.3302, "step": 20387 }, { "epoch": 2.0727938186254575, "grad_norm": 0.26177167892456055, "learning_rate": 7.276811467768175e-06, "loss": 0.3208, "step": 20388 }, { "epoch": 2.0728954859699065, "grad_norm": 0.26179131865501404, "learning_rate": 7.276495504834375e-06, "loss": 0.373, "step": 20389 }, { "epoch": 2.0729971533143554, "grad_norm": 0.27896809577941895, "learning_rate": 7.2761795304321325e-06, "loss": 0.3299, "step": 20390 }, { "epoch": 2.0730988206588044, "grad_norm": 0.27173322439193726, "learning_rate": 7.275863544563036e-06, "loss": 0.3476, "step": 20391 }, { "epoch": 2.0732004880032533, "grad_norm": 0.27029433846473694, "learning_rate": 7.275547547228683e-06, "loss": 0.3315, "step": 20392 }, { "epoch": 2.0733021553477022, "grad_norm": 0.2686712443828583, "learning_rate": 7.2752315384306604e-06, "loss": 0.3298, "step": 20393 }, { "epoch": 2.073403822692151, "grad_norm": 0.2879789471626282, "learning_rate": 7.274915518170562e-06, "loss": 0.3156, "step": 20394 }, { "epoch": 2.0735054900366, "grad_norm": 0.2717708349227905, "learning_rate": 7.274599486449981e-06, "loss": 0.3467, "step": 20395 }, { "epoch": 2.073607157381049, "grad_norm": 0.2593446373939514, "learning_rate": 7.274283443270507e-06, "loss": 0.2855, "step": 20396 }, { "epoch": 2.073708824725498, "grad_norm": 0.26086679100990295, "learning_rate": 7.273967388633734e-06, "loss": 0.3434, "step": 20397 }, { "epoch": 2.073810492069947, "grad_norm": 0.2716897428035736, "learning_rate": 7.273651322541254e-06, "loss": 0.3331, "step": 20398 }, { "epoch": 2.073912159414396, "grad_norm": 0.25940242409706116, "learning_rate": 7.273335244994658e-06, "loss": 0.3194, "step": 20399 }, { "epoch": 2.0740138267588453, "grad_norm": 0.26067832112312317, "learning_rate": 7.273019155995541e-06, "loss": 0.323, "step": 20400 }, { "epoch": 2.074115494103294, "grad_norm": 0.2902156114578247, "learning_rate": 7.272703055545491e-06, "loss": 0.3611, "step": 20401 }, { "epoch": 2.074217161447743, "grad_norm": 0.2803869843482971, "learning_rate": 7.272386943646105e-06, "loss": 0.3531, "step": 20402 }, { "epoch": 2.074318828792192, "grad_norm": 0.27360668778419495, "learning_rate": 7.272070820298973e-06, "loss": 0.3178, "step": 20403 }, { "epoch": 2.074420496136641, "grad_norm": 0.2983728051185608, "learning_rate": 7.271754685505688e-06, "loss": 0.3774, "step": 20404 }, { "epoch": 2.07452216348109, "grad_norm": 0.25206810235977173, "learning_rate": 7.271438539267844e-06, "loss": 0.3507, "step": 20405 }, { "epoch": 2.074623830825539, "grad_norm": 0.29389652609825134, "learning_rate": 7.271122381587031e-06, "loss": 0.3399, "step": 20406 }, { "epoch": 2.074725498169988, "grad_norm": 0.26781222224235535, "learning_rate": 7.270806212464844e-06, "loss": 0.347, "step": 20407 }, { "epoch": 2.074827165514437, "grad_norm": 0.2636619210243225, "learning_rate": 7.2704900319028745e-06, "loss": 0.3181, "step": 20408 }, { "epoch": 2.0749288328588857, "grad_norm": 0.260104775428772, "learning_rate": 7.270173839902715e-06, "loss": 0.3283, "step": 20409 }, { "epoch": 2.0750305002033347, "grad_norm": 0.2509307861328125, "learning_rate": 7.269857636465961e-06, "loss": 0.3144, "step": 20410 }, { "epoch": 2.0751321675477836, "grad_norm": 0.2504262626171112, "learning_rate": 7.269541421594202e-06, "loss": 0.3105, "step": 20411 }, { "epoch": 2.0752338348922326, "grad_norm": 0.26773151755332947, "learning_rate": 7.269225195289034e-06, "loss": 0.3379, "step": 20412 }, { "epoch": 2.0753355022366815, "grad_norm": 0.27353012561798096, "learning_rate": 7.268908957552048e-06, "loss": 0.3649, "step": 20413 }, { "epoch": 2.0754371695811304, "grad_norm": 0.2701449990272522, "learning_rate": 7.268592708384837e-06, "loss": 0.3258, "step": 20414 }, { "epoch": 2.0755388369255794, "grad_norm": 0.26919466257095337, "learning_rate": 7.268276447788997e-06, "loss": 0.319, "step": 20415 }, { "epoch": 2.0756405042700283, "grad_norm": 0.2708616554737091, "learning_rate": 7.2679601757661165e-06, "loss": 0.3372, "step": 20416 }, { "epoch": 2.0757421716144773, "grad_norm": 0.2875710427761078, "learning_rate": 7.2676438923177924e-06, "loss": 0.3445, "step": 20417 }, { "epoch": 2.075843838958926, "grad_norm": 0.25682416558265686, "learning_rate": 7.267327597445619e-06, "loss": 0.3028, "step": 20418 }, { "epoch": 2.075945506303375, "grad_norm": 0.2705654799938202, "learning_rate": 7.267011291151187e-06, "loss": 0.3247, "step": 20419 }, { "epoch": 2.076047173647824, "grad_norm": 0.28490710258483887, "learning_rate": 7.266694973436091e-06, "loss": 0.3403, "step": 20420 }, { "epoch": 2.0761488409922735, "grad_norm": 0.28475040197372437, "learning_rate": 7.2663786443019245e-06, "loss": 0.3465, "step": 20421 }, { "epoch": 2.0762505083367224, "grad_norm": 0.2691338062286377, "learning_rate": 7.2660623037502805e-06, "loss": 0.3462, "step": 20422 }, { "epoch": 2.0763521756811714, "grad_norm": 0.2820397913455963, "learning_rate": 7.265745951782753e-06, "loss": 0.3309, "step": 20423 }, { "epoch": 2.0764538430256203, "grad_norm": 0.28001219034194946, "learning_rate": 7.265429588400936e-06, "loss": 0.3461, "step": 20424 }, { "epoch": 2.0765555103700692, "grad_norm": 0.2933887243270874, "learning_rate": 7.265113213606424e-06, "loss": 0.314, "step": 20425 }, { "epoch": 2.076657177714518, "grad_norm": 0.29068344831466675, "learning_rate": 7.264796827400808e-06, "loss": 0.3407, "step": 20426 }, { "epoch": 2.076758845058967, "grad_norm": 0.25569066405296326, "learning_rate": 7.2644804297856854e-06, "loss": 0.3392, "step": 20427 }, { "epoch": 2.076860512403416, "grad_norm": 0.27059483528137207, "learning_rate": 7.264164020762648e-06, "loss": 0.3318, "step": 20428 }, { "epoch": 2.076962179747865, "grad_norm": 0.2692410945892334, "learning_rate": 7.26384760033329e-06, "loss": 0.3001, "step": 20429 }, { "epoch": 2.077063847092314, "grad_norm": 0.2678123414516449, "learning_rate": 7.263531168499207e-06, "loss": 0.3386, "step": 20430 }, { "epoch": 2.077165514436763, "grad_norm": 0.28182947635650635, "learning_rate": 7.263214725261991e-06, "loss": 0.3478, "step": 20431 }, { "epoch": 2.077267181781212, "grad_norm": 0.26629048585891724, "learning_rate": 7.262898270623236e-06, "loss": 0.3137, "step": 20432 }, { "epoch": 2.0773688491256608, "grad_norm": 0.2848761975765228, "learning_rate": 7.2625818045845385e-06, "loss": 0.3396, "step": 20433 }, { "epoch": 2.0774705164701097, "grad_norm": 0.2690909206867218, "learning_rate": 7.2622653271474905e-06, "loss": 0.3143, "step": 20434 }, { "epoch": 2.0775721838145587, "grad_norm": 0.29637646675109863, "learning_rate": 7.2619488383136896e-06, "loss": 0.3238, "step": 20435 }, { "epoch": 2.0776738511590076, "grad_norm": 0.2672661244869232, "learning_rate": 7.2616323380847264e-06, "loss": 0.3177, "step": 20436 }, { "epoch": 2.0777755185034565, "grad_norm": 0.2736636996269226, "learning_rate": 7.261315826462196e-06, "loss": 0.327, "step": 20437 }, { "epoch": 2.0778771858479055, "grad_norm": 0.27909529209136963, "learning_rate": 7.260999303447695e-06, "loss": 0.3312, "step": 20438 }, { "epoch": 2.0779788531923544, "grad_norm": 0.3117227554321289, "learning_rate": 7.260682769042817e-06, "loss": 0.2874, "step": 20439 }, { "epoch": 2.0780805205368034, "grad_norm": 0.2700527608394623, "learning_rate": 7.260366223249156e-06, "loss": 0.3256, "step": 20440 }, { "epoch": 2.0781821878812528, "grad_norm": 0.2715175747871399, "learning_rate": 7.260049666068308e-06, "loss": 0.3151, "step": 20441 }, { "epoch": 2.0782838552257017, "grad_norm": 0.2726030647754669, "learning_rate": 7.2597330975018656e-06, "loss": 0.3437, "step": 20442 }, { "epoch": 2.0783855225701506, "grad_norm": 0.29557323455810547, "learning_rate": 7.259416517551425e-06, "loss": 0.3407, "step": 20443 }, { "epoch": 2.0784871899145996, "grad_norm": 0.27579694986343384, "learning_rate": 7.259099926218582e-06, "loss": 0.3172, "step": 20444 }, { "epoch": 2.0785888572590485, "grad_norm": 0.2756586968898773, "learning_rate": 7.2587833235049295e-06, "loss": 0.3159, "step": 20445 }, { "epoch": 2.0786905246034975, "grad_norm": 0.27271610498428345, "learning_rate": 7.258466709412064e-06, "loss": 0.3528, "step": 20446 }, { "epoch": 2.0787921919479464, "grad_norm": 0.26899826526641846, "learning_rate": 7.2581500839415785e-06, "loss": 0.3458, "step": 20447 }, { "epoch": 2.0788938592923953, "grad_norm": 0.2799144685268402, "learning_rate": 7.257833447095071e-06, "loss": 0.3488, "step": 20448 }, { "epoch": 2.0789955266368443, "grad_norm": 0.2932625412940979, "learning_rate": 7.257516798874134e-06, "loss": 0.388, "step": 20449 }, { "epoch": 2.0790971939812932, "grad_norm": 0.286518394947052, "learning_rate": 7.257200139280366e-06, "loss": 0.3626, "step": 20450 }, { "epoch": 2.079198861325742, "grad_norm": 0.2888329327106476, "learning_rate": 7.2568834683153585e-06, "loss": 0.3157, "step": 20451 }, { "epoch": 2.079300528670191, "grad_norm": 0.28699538111686707, "learning_rate": 7.256566785980708e-06, "loss": 0.3499, "step": 20452 }, { "epoch": 2.07940219601464, "grad_norm": 0.2630796432495117, "learning_rate": 7.25625009227801e-06, "loss": 0.3256, "step": 20453 }, { "epoch": 2.079503863359089, "grad_norm": 0.27213186025619507, "learning_rate": 7.255933387208861e-06, "loss": 0.3185, "step": 20454 }, { "epoch": 2.079605530703538, "grad_norm": 0.2795315384864807, "learning_rate": 7.255616670774857e-06, "loss": 0.3219, "step": 20455 }, { "epoch": 2.079707198047987, "grad_norm": 0.2593541741371155, "learning_rate": 7.25529994297759e-06, "loss": 0.3341, "step": 20456 }, { "epoch": 2.079808865392436, "grad_norm": 0.28454411029815674, "learning_rate": 7.2549832038186575e-06, "loss": 0.3059, "step": 20457 }, { "epoch": 2.0799105327368848, "grad_norm": 0.2654339671134949, "learning_rate": 7.254666453299658e-06, "loss": 0.3457, "step": 20458 }, { "epoch": 2.0800122000813337, "grad_norm": 0.27813103795051575, "learning_rate": 7.2543496914221825e-06, "loss": 0.3442, "step": 20459 }, { "epoch": 2.0801138674257826, "grad_norm": 0.28103089332580566, "learning_rate": 7.25403291818783e-06, "loss": 0.3557, "step": 20460 }, { "epoch": 2.0802155347702316, "grad_norm": 0.2403814196586609, "learning_rate": 7.253716133598196e-06, "loss": 0.3572, "step": 20461 }, { "epoch": 2.080317202114681, "grad_norm": 0.2896103262901306, "learning_rate": 7.253399337654874e-06, "loss": 0.3155, "step": 20462 }, { "epoch": 2.08041886945913, "grad_norm": 0.2882102429866791, "learning_rate": 7.253082530359463e-06, "loss": 0.3389, "step": 20463 }, { "epoch": 2.080520536803579, "grad_norm": 0.2739850878715515, "learning_rate": 7.252765711713557e-06, "loss": 0.3335, "step": 20464 }, { "epoch": 2.080622204148028, "grad_norm": 0.28196588158607483, "learning_rate": 7.252448881718752e-06, "loss": 0.3188, "step": 20465 }, { "epoch": 2.0807238714924767, "grad_norm": 0.25186070799827576, "learning_rate": 7.252132040376647e-06, "loss": 0.3226, "step": 20466 }, { "epoch": 2.0808255388369257, "grad_norm": 0.24865412712097168, "learning_rate": 7.251815187688833e-06, "loss": 0.3406, "step": 20467 }, { "epoch": 2.0809272061813746, "grad_norm": 0.2672784626483917, "learning_rate": 7.251498323656911e-06, "loss": 0.3505, "step": 20468 }, { "epoch": 2.0810288735258236, "grad_norm": 0.3005438446998596, "learning_rate": 7.251181448282475e-06, "loss": 0.3468, "step": 20469 }, { "epoch": 2.0811305408702725, "grad_norm": 0.27601099014282227, "learning_rate": 7.2508645615671224e-06, "loss": 0.347, "step": 20470 }, { "epoch": 2.0812322082147214, "grad_norm": 0.278376966714859, "learning_rate": 7.250547663512449e-06, "loss": 0.3452, "step": 20471 }, { "epoch": 2.0813338755591704, "grad_norm": 0.2773299515247345, "learning_rate": 7.25023075412005e-06, "loss": 0.3313, "step": 20472 }, { "epoch": 2.0814355429036193, "grad_norm": 0.26143038272857666, "learning_rate": 7.249913833391524e-06, "loss": 0.3107, "step": 20473 }, { "epoch": 2.0815372102480683, "grad_norm": 0.2645542323589325, "learning_rate": 7.249596901328468e-06, "loss": 0.3502, "step": 20474 }, { "epoch": 2.081638877592517, "grad_norm": 0.2735193967819214, "learning_rate": 7.249279957932476e-06, "loss": 0.3236, "step": 20475 }, { "epoch": 2.081740544936966, "grad_norm": 0.27561619877815247, "learning_rate": 7.248963003205147e-06, "loss": 0.3347, "step": 20476 }, { "epoch": 2.081842212281415, "grad_norm": 0.28532645106315613, "learning_rate": 7.248646037148075e-06, "loss": 0.3556, "step": 20477 }, { "epoch": 2.081943879625864, "grad_norm": 0.2812562882900238, "learning_rate": 7.24832905976286e-06, "loss": 0.343, "step": 20478 }, { "epoch": 2.082045546970313, "grad_norm": 0.29239991307258606, "learning_rate": 7.248012071051098e-06, "loss": 0.3466, "step": 20479 }, { "epoch": 2.082147214314762, "grad_norm": 0.2670400142669678, "learning_rate": 7.247695071014385e-06, "loss": 0.3192, "step": 20480 }, { "epoch": 2.082248881659211, "grad_norm": 0.2637893259525299, "learning_rate": 7.247378059654317e-06, "loss": 0.3337, "step": 20481 }, { "epoch": 2.0823505490036602, "grad_norm": 0.2577176094055176, "learning_rate": 7.247061036972494e-06, "loss": 0.351, "step": 20482 }, { "epoch": 2.082452216348109, "grad_norm": 0.2861163020133972, "learning_rate": 7.246744002970511e-06, "loss": 0.3311, "step": 20483 }, { "epoch": 2.082553883692558, "grad_norm": 0.26262813806533813, "learning_rate": 7.246426957649966e-06, "loss": 0.3, "step": 20484 }, { "epoch": 2.082655551037007, "grad_norm": 0.3104252219200134, "learning_rate": 7.246109901012455e-06, "loss": 0.3161, "step": 20485 }, { "epoch": 2.082757218381456, "grad_norm": 0.26124390959739685, "learning_rate": 7.245792833059578e-06, "loss": 0.3223, "step": 20486 }, { "epoch": 2.082858885725905, "grad_norm": 0.2739330530166626, "learning_rate": 7.245475753792927e-06, "loss": 0.3152, "step": 20487 }, { "epoch": 2.082960553070354, "grad_norm": 0.29937756061553955, "learning_rate": 7.245158663214105e-06, "loss": 0.3321, "step": 20488 }, { "epoch": 2.083062220414803, "grad_norm": 0.26157063245773315, "learning_rate": 7.244841561324707e-06, "loss": 0.3321, "step": 20489 }, { "epoch": 2.0831638877592518, "grad_norm": 0.26424771547317505, "learning_rate": 7.24452444812633e-06, "loss": 0.3152, "step": 20490 }, { "epoch": 2.0832655551037007, "grad_norm": 0.2939281761646271, "learning_rate": 7.244207323620573e-06, "loss": 0.3237, "step": 20491 }, { "epoch": 2.0833672224481496, "grad_norm": 0.28555747866630554, "learning_rate": 7.243890187809032e-06, "loss": 0.3583, "step": 20492 }, { "epoch": 2.0834688897925986, "grad_norm": 0.2746714651584625, "learning_rate": 7.243573040693306e-06, "loss": 0.3554, "step": 20493 }, { "epoch": 2.0835705571370475, "grad_norm": 0.2933480441570282, "learning_rate": 7.243255882274992e-06, "loss": 0.345, "step": 20494 }, { "epoch": 2.0836722244814965, "grad_norm": 0.2759563624858856, "learning_rate": 7.242938712555688e-06, "loss": 0.334, "step": 20495 }, { "epoch": 2.0837738918259454, "grad_norm": 0.26201510429382324, "learning_rate": 7.242621531536993e-06, "loss": 0.3523, "step": 20496 }, { "epoch": 2.0838755591703944, "grad_norm": 0.2753036320209503, "learning_rate": 7.242304339220502e-06, "loss": 0.3337, "step": 20497 }, { "epoch": 2.0839772265148433, "grad_norm": 0.2996233403682709, "learning_rate": 7.241987135607814e-06, "loss": 0.3539, "step": 20498 }, { "epoch": 2.0840788938592922, "grad_norm": 0.27923810482025146, "learning_rate": 7.241669920700529e-06, "loss": 0.3348, "step": 20499 }, { "epoch": 2.084180561203741, "grad_norm": 0.27940452098846436, "learning_rate": 7.241352694500244e-06, "loss": 0.3171, "step": 20500 }, { "epoch": 2.08428222854819, "grad_norm": 0.2935850918292999, "learning_rate": 7.241035457008556e-06, "loss": 0.3527, "step": 20501 }, { "epoch": 2.084383895892639, "grad_norm": 0.25137749314308167, "learning_rate": 7.240718208227063e-06, "loss": 0.3254, "step": 20502 }, { "epoch": 2.0844855632370884, "grad_norm": 0.27532482147216797, "learning_rate": 7.240400948157366e-06, "loss": 0.3336, "step": 20503 }, { "epoch": 2.0845872305815374, "grad_norm": 0.2794501483440399, "learning_rate": 7.240083676801061e-06, "loss": 0.3231, "step": 20504 }, { "epoch": 2.0846888979259863, "grad_norm": 0.28191033005714417, "learning_rate": 7.239766394159745e-06, "loss": 0.3333, "step": 20505 }, { "epoch": 2.0847905652704353, "grad_norm": 0.2626698911190033, "learning_rate": 7.239449100235021e-06, "loss": 0.3265, "step": 20506 }, { "epoch": 2.084892232614884, "grad_norm": 0.2675493061542511, "learning_rate": 7.2391317950284824e-06, "loss": 0.3235, "step": 20507 }, { "epoch": 2.084993899959333, "grad_norm": 0.28294137120246887, "learning_rate": 7.23881447854173e-06, "loss": 0.3201, "step": 20508 }, { "epoch": 2.085095567303782, "grad_norm": 0.27872228622436523, "learning_rate": 7.2384971507763636e-06, "loss": 0.3061, "step": 20509 }, { "epoch": 2.085197234648231, "grad_norm": 0.2918175458908081, "learning_rate": 7.238179811733979e-06, "loss": 0.349, "step": 20510 }, { "epoch": 2.08529890199268, "grad_norm": 0.27672725915908813, "learning_rate": 7.2378624614161785e-06, "loss": 0.3683, "step": 20511 }, { "epoch": 2.085400569337129, "grad_norm": 0.27342668175697327, "learning_rate": 7.237545099824557e-06, "loss": 0.3573, "step": 20512 }, { "epoch": 2.085502236681578, "grad_norm": 0.27890852093696594, "learning_rate": 7.237227726960714e-06, "loss": 0.3224, "step": 20513 }, { "epoch": 2.085603904026027, "grad_norm": 0.29769280552864075, "learning_rate": 7.236910342826251e-06, "loss": 0.3455, "step": 20514 }, { "epoch": 2.0857055713704757, "grad_norm": 0.2676554024219513, "learning_rate": 7.2365929474227645e-06, "loss": 0.3057, "step": 20515 }, { "epoch": 2.0858072387149247, "grad_norm": 0.2908197045326233, "learning_rate": 7.2362755407518555e-06, "loss": 0.327, "step": 20516 }, { "epoch": 2.0859089060593736, "grad_norm": 0.2952236533164978, "learning_rate": 7.235958122815121e-06, "loss": 0.3427, "step": 20517 }, { "epoch": 2.0860105734038226, "grad_norm": 0.2816256880760193, "learning_rate": 7.235640693614161e-06, "loss": 0.3172, "step": 20518 }, { "epoch": 2.0861122407482715, "grad_norm": 0.2858942151069641, "learning_rate": 7.235323253150575e-06, "loss": 0.3534, "step": 20519 }, { "epoch": 2.0862139080927204, "grad_norm": 0.2730931341648102, "learning_rate": 7.2350058014259605e-06, "loss": 0.3306, "step": 20520 }, { "epoch": 2.0863155754371694, "grad_norm": 0.2663307189941406, "learning_rate": 7.2346883384419185e-06, "loss": 0.3371, "step": 20521 }, { "epoch": 2.0864172427816183, "grad_norm": 0.27625879645347595, "learning_rate": 7.234370864200046e-06, "loss": 0.329, "step": 20522 }, { "epoch": 2.0865189101260677, "grad_norm": 0.2716890275478363, "learning_rate": 7.234053378701946e-06, "loss": 0.3138, "step": 20523 }, { "epoch": 2.0866205774705167, "grad_norm": 0.2660183608531952, "learning_rate": 7.233735881949216e-06, "loss": 0.3379, "step": 20524 }, { "epoch": 2.0867222448149656, "grad_norm": 0.2719065248966217, "learning_rate": 7.233418373943455e-06, "loss": 0.3355, "step": 20525 }, { "epoch": 2.0868239121594145, "grad_norm": 0.2752034366130829, "learning_rate": 7.233100854686264e-06, "loss": 0.3591, "step": 20526 }, { "epoch": 2.0869255795038635, "grad_norm": 0.26717355847358704, "learning_rate": 7.23278332417924e-06, "loss": 0.3062, "step": 20527 }, { "epoch": 2.0870272468483124, "grad_norm": 0.280505895614624, "learning_rate": 7.232465782423983e-06, "loss": 0.3632, "step": 20528 }, { "epoch": 2.0871289141927614, "grad_norm": 0.2579858601093292, "learning_rate": 7.232148229422097e-06, "loss": 0.3344, "step": 20529 }, { "epoch": 2.0872305815372103, "grad_norm": 0.26269233226776123, "learning_rate": 7.231830665175177e-06, "loss": 0.3139, "step": 20530 }, { "epoch": 2.0873322488816592, "grad_norm": 0.30122271180152893, "learning_rate": 7.231513089684825e-06, "loss": 0.3242, "step": 20531 }, { "epoch": 2.087433916226108, "grad_norm": 0.29275813698768616, "learning_rate": 7.2311955029526405e-06, "loss": 0.3774, "step": 20532 }, { "epoch": 2.087535583570557, "grad_norm": 0.28983521461486816, "learning_rate": 7.230877904980221e-06, "loss": 0.355, "step": 20533 }, { "epoch": 2.087637250915006, "grad_norm": 0.294352263212204, "learning_rate": 7.2305602957691715e-06, "loss": 0.3388, "step": 20534 }, { "epoch": 2.087738918259455, "grad_norm": 0.27441859245300293, "learning_rate": 7.230242675321089e-06, "loss": 0.3229, "step": 20535 }, { "epoch": 2.087840585603904, "grad_norm": 0.28403663635253906, "learning_rate": 7.229925043637573e-06, "loss": 0.3394, "step": 20536 }, { "epoch": 2.087942252948353, "grad_norm": 0.3046487867832184, "learning_rate": 7.229607400720225e-06, "loss": 0.3256, "step": 20537 }, { "epoch": 2.088043920292802, "grad_norm": 0.27293261885643005, "learning_rate": 7.229289746570644e-06, "loss": 0.3379, "step": 20538 }, { "epoch": 2.0881455876372508, "grad_norm": 0.27070149779319763, "learning_rate": 7.228972081190432e-06, "loss": 0.3444, "step": 20539 }, { "epoch": 2.0882472549816997, "grad_norm": 0.26900482177734375, "learning_rate": 7.2286544045811875e-06, "loss": 0.3344, "step": 20540 }, { "epoch": 2.0883489223261487, "grad_norm": 0.28111380338668823, "learning_rate": 7.228336716744513e-06, "loss": 0.3524, "step": 20541 }, { "epoch": 2.0884505896705976, "grad_norm": 0.27201011776924133, "learning_rate": 7.228019017682007e-06, "loss": 0.3515, "step": 20542 }, { "epoch": 2.0885522570150465, "grad_norm": 0.2852090299129486, "learning_rate": 7.22770130739527e-06, "loss": 0.3292, "step": 20543 }, { "epoch": 2.088653924359496, "grad_norm": 0.29609739780426025, "learning_rate": 7.227383585885903e-06, "loss": 0.3097, "step": 20544 }, { "epoch": 2.088755591703945, "grad_norm": 0.26462286710739136, "learning_rate": 7.227065853155507e-06, "loss": 0.3623, "step": 20545 }, { "epoch": 2.088857259048394, "grad_norm": 0.29205265641212463, "learning_rate": 7.226748109205684e-06, "loss": 0.3543, "step": 20546 }, { "epoch": 2.0889589263928428, "grad_norm": 0.26385897397994995, "learning_rate": 7.226430354038032e-06, "loss": 0.3454, "step": 20547 }, { "epoch": 2.0890605937372917, "grad_norm": 0.2971165180206299, "learning_rate": 7.226112587654153e-06, "loss": 0.3414, "step": 20548 }, { "epoch": 2.0891622610817406, "grad_norm": 0.26871803402900696, "learning_rate": 7.2257948100556475e-06, "loss": 0.3199, "step": 20549 }, { "epoch": 2.0892639284261896, "grad_norm": 0.3126077651977539, "learning_rate": 7.225477021244118e-06, "loss": 0.3287, "step": 20550 }, { "epoch": 2.0893655957706385, "grad_norm": 0.26797401905059814, "learning_rate": 7.225159221221163e-06, "loss": 0.3576, "step": 20551 }, { "epoch": 2.0894672631150875, "grad_norm": 0.27172842621803284, "learning_rate": 7.224841409988385e-06, "loss": 0.3577, "step": 20552 }, { "epoch": 2.0895689304595364, "grad_norm": 0.26779207587242126, "learning_rate": 7.224523587547383e-06, "loss": 0.3323, "step": 20553 }, { "epoch": 2.0896705978039853, "grad_norm": 0.27046114206314087, "learning_rate": 7.224205753899763e-06, "loss": 0.331, "step": 20554 }, { "epoch": 2.0897722651484343, "grad_norm": 0.29493486881256104, "learning_rate": 7.22388790904712e-06, "loss": 0.3935, "step": 20555 }, { "epoch": 2.0898739324928832, "grad_norm": 0.28411865234375, "learning_rate": 7.22357005299106e-06, "loss": 0.3315, "step": 20556 }, { "epoch": 2.089975599837332, "grad_norm": 0.27935728430747986, "learning_rate": 7.223252185733183e-06, "loss": 0.3285, "step": 20557 }, { "epoch": 2.090077267181781, "grad_norm": 0.2812938988208771, "learning_rate": 7.222934307275087e-06, "loss": 0.3709, "step": 20558 }, { "epoch": 2.09017893452623, "grad_norm": 0.2787918746471405, "learning_rate": 7.222616417618378e-06, "loss": 0.3296, "step": 20559 }, { "epoch": 2.090280601870679, "grad_norm": 0.28076162934303284, "learning_rate": 7.222298516764656e-06, "loss": 0.3649, "step": 20560 }, { "epoch": 2.090382269215128, "grad_norm": 0.2820513844490051, "learning_rate": 7.221980604715522e-06, "loss": 0.3384, "step": 20561 }, { "epoch": 2.090483936559577, "grad_norm": 0.302650511264801, "learning_rate": 7.221662681472578e-06, "loss": 0.343, "step": 20562 }, { "epoch": 2.090585603904026, "grad_norm": 0.26915639638900757, "learning_rate": 7.221344747037423e-06, "loss": 0.3423, "step": 20563 }, { "epoch": 2.090687271248475, "grad_norm": 0.2671961188316345, "learning_rate": 7.221026801411663e-06, "loss": 0.3267, "step": 20564 }, { "epoch": 2.090788938592924, "grad_norm": 0.26378706097602844, "learning_rate": 7.220708844596899e-06, "loss": 0.3681, "step": 20565 }, { "epoch": 2.090890605937373, "grad_norm": 0.29531407356262207, "learning_rate": 7.22039087659473e-06, "loss": 0.3086, "step": 20566 }, { "epoch": 2.090992273281822, "grad_norm": 0.2804640531539917, "learning_rate": 7.2200728974067604e-06, "loss": 0.3243, "step": 20567 }, { "epoch": 2.091093940626271, "grad_norm": 0.27149075269699097, "learning_rate": 7.2197549070345885e-06, "loss": 0.3335, "step": 20568 }, { "epoch": 2.09119560797072, "grad_norm": 0.28840455412864685, "learning_rate": 7.219436905479821e-06, "loss": 0.3508, "step": 20569 }, { "epoch": 2.091297275315169, "grad_norm": 0.27492809295654297, "learning_rate": 7.219118892744058e-06, "loss": 0.318, "step": 20570 }, { "epoch": 2.091398942659618, "grad_norm": 0.26469630002975464, "learning_rate": 7.2188008688289e-06, "loss": 0.3414, "step": 20571 }, { "epoch": 2.0915006100040667, "grad_norm": 0.2768840789794922, "learning_rate": 7.2184828337359525e-06, "loss": 0.3473, "step": 20572 }, { "epoch": 2.0916022773485157, "grad_norm": 0.2743470370769501, "learning_rate": 7.2181647874668135e-06, "loss": 0.3348, "step": 20573 }, { "epoch": 2.0917039446929646, "grad_norm": 0.2657284736633301, "learning_rate": 7.217846730023088e-06, "loss": 0.3447, "step": 20574 }, { "epoch": 2.0918056120374136, "grad_norm": 0.29552990198135376, "learning_rate": 7.217528661406379e-06, "loss": 0.3221, "step": 20575 }, { "epoch": 2.0919072793818625, "grad_norm": 0.2742787301540375, "learning_rate": 7.217210581618286e-06, "loss": 0.3874, "step": 20576 }, { "epoch": 2.0920089467263114, "grad_norm": 0.31162500381469727, "learning_rate": 7.216892490660415e-06, "loss": 0.3489, "step": 20577 }, { "epoch": 2.0921106140707604, "grad_norm": 0.2581879496574402, "learning_rate": 7.216574388534365e-06, "loss": 0.3518, "step": 20578 }, { "epoch": 2.0922122814152093, "grad_norm": 0.28322815895080566, "learning_rate": 7.2162562752417395e-06, "loss": 0.3626, "step": 20579 }, { "epoch": 2.0923139487596583, "grad_norm": 0.275157630443573, "learning_rate": 7.215938150784143e-06, "loss": 0.3191, "step": 20580 }, { "epoch": 2.092415616104107, "grad_norm": 0.28174883127212524, "learning_rate": 7.215620015163175e-06, "loss": 0.3355, "step": 20581 }, { "epoch": 2.092517283448556, "grad_norm": 0.2661072611808777, "learning_rate": 7.215301868380441e-06, "loss": 0.3389, "step": 20582 }, { "epoch": 2.092618950793005, "grad_norm": 0.2979668080806732, "learning_rate": 7.214983710437542e-06, "loss": 0.3645, "step": 20583 }, { "epoch": 2.092720618137454, "grad_norm": 0.27163827419281006, "learning_rate": 7.2146655413360815e-06, "loss": 0.3597, "step": 20584 }, { "epoch": 2.0928222854819034, "grad_norm": 0.28574761748313904, "learning_rate": 7.214347361077663e-06, "loss": 0.3171, "step": 20585 }, { "epoch": 2.0929239528263524, "grad_norm": 0.2877173125743866, "learning_rate": 7.214029169663888e-06, "loss": 0.3376, "step": 20586 }, { "epoch": 2.0930256201708013, "grad_norm": 0.2916785776615143, "learning_rate": 7.213710967096362e-06, "loss": 0.3151, "step": 20587 }, { "epoch": 2.0931272875152502, "grad_norm": 0.26801598072052, "learning_rate": 7.213392753376684e-06, "loss": 0.3381, "step": 20588 }, { "epoch": 2.093228954859699, "grad_norm": 0.26879119873046875, "learning_rate": 7.213074528506458e-06, "loss": 0.3245, "step": 20589 }, { "epoch": 2.093330622204148, "grad_norm": 0.2770676910877228, "learning_rate": 7.212756292487292e-06, "loss": 0.3394, "step": 20590 }, { "epoch": 2.093432289548597, "grad_norm": 0.2802768051624298, "learning_rate": 7.212438045320783e-06, "loss": 0.3461, "step": 20591 }, { "epoch": 2.093533956893046, "grad_norm": 0.285997211933136, "learning_rate": 7.212119787008539e-06, "loss": 0.3523, "step": 20592 }, { "epoch": 2.093635624237495, "grad_norm": 0.2824383080005646, "learning_rate": 7.211801517552159e-06, "loss": 0.3252, "step": 20593 }, { "epoch": 2.093737291581944, "grad_norm": 0.29146403074264526, "learning_rate": 7.211483236953249e-06, "loss": 0.3317, "step": 20594 }, { "epoch": 2.093838958926393, "grad_norm": 0.28265196084976196, "learning_rate": 7.211164945213413e-06, "loss": 0.3332, "step": 20595 }, { "epoch": 2.0939406262708418, "grad_norm": 0.2739547789096832, "learning_rate": 7.210846642334254e-06, "loss": 0.3473, "step": 20596 }, { "epoch": 2.0940422936152907, "grad_norm": 0.289600670337677, "learning_rate": 7.210528328317374e-06, "loss": 0.3455, "step": 20597 }, { "epoch": 2.0941439609597396, "grad_norm": 0.26394492387771606, "learning_rate": 7.210210003164378e-06, "loss": 0.3212, "step": 20598 }, { "epoch": 2.0942456283041886, "grad_norm": 0.2732204496860504, "learning_rate": 7.209891666876868e-06, "loss": 0.3329, "step": 20599 }, { "epoch": 2.0943472956486375, "grad_norm": 0.27963173389434814, "learning_rate": 7.209573319456451e-06, "loss": 0.3548, "step": 20600 }, { "epoch": 2.0944489629930865, "grad_norm": 0.2895791530609131, "learning_rate": 7.209254960904727e-06, "loss": 0.3493, "step": 20601 }, { "epoch": 2.0945506303375354, "grad_norm": 0.26578277349472046, "learning_rate": 7.2089365912233024e-06, "loss": 0.3361, "step": 20602 }, { "epoch": 2.0946522976819844, "grad_norm": 0.2974221408367157, "learning_rate": 7.2086182104137795e-06, "loss": 0.3296, "step": 20603 }, { "epoch": 2.0947539650264333, "grad_norm": 0.28725865483283997, "learning_rate": 7.208299818477762e-06, "loss": 0.3535, "step": 20604 }, { "epoch": 2.0948556323708827, "grad_norm": 0.27862635254859924, "learning_rate": 7.207981415416857e-06, "loss": 0.3347, "step": 20605 }, { "epoch": 2.0949572997153316, "grad_norm": 0.280846506357193, "learning_rate": 7.207663001232665e-06, "loss": 0.342, "step": 20606 }, { "epoch": 2.0950589670597806, "grad_norm": 0.2745991051197052, "learning_rate": 7.207344575926792e-06, "loss": 0.3204, "step": 20607 }, { "epoch": 2.0951606344042295, "grad_norm": 0.27180030941963196, "learning_rate": 7.207026139500841e-06, "loss": 0.3196, "step": 20608 }, { "epoch": 2.0952623017486784, "grad_norm": 0.29368412494659424, "learning_rate": 7.206707691956417e-06, "loss": 0.351, "step": 20609 }, { "epoch": 2.0953639690931274, "grad_norm": 0.3081296384334564, "learning_rate": 7.2063892332951244e-06, "loss": 0.335, "step": 20610 }, { "epoch": 2.0954656364375763, "grad_norm": 0.2702188491821289, "learning_rate": 7.2060707635185665e-06, "loss": 0.3215, "step": 20611 }, { "epoch": 2.0955673037820253, "grad_norm": 0.26287680864334106, "learning_rate": 7.2057522826283485e-06, "loss": 0.3309, "step": 20612 }, { "epoch": 2.095668971126474, "grad_norm": 0.25964367389678955, "learning_rate": 7.2054337906260756e-06, "loss": 0.3426, "step": 20613 }, { "epoch": 2.095770638470923, "grad_norm": 0.2733154892921448, "learning_rate": 7.20511528751335e-06, "loss": 0.3433, "step": 20614 }, { "epoch": 2.095872305815372, "grad_norm": 0.2816108763217926, "learning_rate": 7.204796773291779e-06, "loss": 0.3498, "step": 20615 }, { "epoch": 2.095973973159821, "grad_norm": 0.2572970688343048, "learning_rate": 7.204478247962965e-06, "loss": 0.3351, "step": 20616 }, { "epoch": 2.09607564050427, "grad_norm": 0.25968730449676514, "learning_rate": 7.2041597115285135e-06, "loss": 0.3079, "step": 20617 }, { "epoch": 2.096177307848719, "grad_norm": 0.2581523656845093, "learning_rate": 7.203841163990029e-06, "loss": 0.327, "step": 20618 }, { "epoch": 2.096278975193168, "grad_norm": 0.2646176218986511, "learning_rate": 7.203522605349116e-06, "loss": 0.3292, "step": 20619 }, { "epoch": 2.096380642537617, "grad_norm": 0.2819769084453583, "learning_rate": 7.2032040356073805e-06, "loss": 0.3315, "step": 20620 }, { "epoch": 2.0964823098820657, "grad_norm": 0.27156081795692444, "learning_rate": 7.202885454766426e-06, "loss": 0.3285, "step": 20621 }, { "epoch": 2.0965839772265147, "grad_norm": 0.28187358379364014, "learning_rate": 7.20256686282786e-06, "loss": 0.3335, "step": 20622 }, { "epoch": 2.0966856445709636, "grad_norm": 0.2813031077384949, "learning_rate": 7.202248259793283e-06, "loss": 0.3258, "step": 20623 }, { "epoch": 2.0967873119154126, "grad_norm": 0.27042296528816223, "learning_rate": 7.201929645664304e-06, "loss": 0.3278, "step": 20624 }, { "epoch": 2.0968889792598615, "grad_norm": 0.29186004400253296, "learning_rate": 7.201611020442527e-06, "loss": 0.3294, "step": 20625 }, { "epoch": 2.096990646604311, "grad_norm": 0.28278422355651855, "learning_rate": 7.2012923841295565e-06, "loss": 0.3587, "step": 20626 }, { "epoch": 2.09709231394876, "grad_norm": 0.2946215271949768, "learning_rate": 7.200973736726997e-06, "loss": 0.3733, "step": 20627 }, { "epoch": 2.0971939812932088, "grad_norm": 0.26541846990585327, "learning_rate": 7.200655078236457e-06, "loss": 0.3345, "step": 20628 }, { "epoch": 2.0972956486376577, "grad_norm": 0.2818119525909424, "learning_rate": 7.2003364086595365e-06, "loss": 0.3354, "step": 20629 }, { "epoch": 2.0973973159821067, "grad_norm": 0.2721512019634247, "learning_rate": 7.2000177279978465e-06, "loss": 0.3641, "step": 20630 }, { "epoch": 2.0974989833265556, "grad_norm": 0.2765105366706848, "learning_rate": 7.1996990362529894e-06, "loss": 0.3202, "step": 20631 }, { "epoch": 2.0976006506710045, "grad_norm": 0.2787254750728607, "learning_rate": 7.1993803334265715e-06, "loss": 0.3421, "step": 20632 }, { "epoch": 2.0977023180154535, "grad_norm": 0.2706398665904999, "learning_rate": 7.199061619520199e-06, "loss": 0.349, "step": 20633 }, { "epoch": 2.0978039853599024, "grad_norm": 0.27760258316993713, "learning_rate": 7.198742894535473e-06, "loss": 0.3187, "step": 20634 }, { "epoch": 2.0979056527043514, "grad_norm": 0.28923317790031433, "learning_rate": 7.1984241584740066e-06, "loss": 0.3469, "step": 20635 }, { "epoch": 2.0980073200488003, "grad_norm": 0.2875649034976959, "learning_rate": 7.198105411337401e-06, "loss": 0.3538, "step": 20636 }, { "epoch": 2.0981089873932492, "grad_norm": 0.27249088883399963, "learning_rate": 7.197786653127262e-06, "loss": 0.3444, "step": 20637 }, { "epoch": 2.098210654737698, "grad_norm": 0.29162871837615967, "learning_rate": 7.197467883845197e-06, "loss": 0.3427, "step": 20638 }, { "epoch": 2.098312322082147, "grad_norm": 0.29148587584495544, "learning_rate": 7.197149103492809e-06, "loss": 0.3451, "step": 20639 }, { "epoch": 2.098413989426596, "grad_norm": 0.2655581533908844, "learning_rate": 7.196830312071707e-06, "loss": 0.3275, "step": 20640 }, { "epoch": 2.098515656771045, "grad_norm": 0.2987523376941681, "learning_rate": 7.196511509583497e-06, "loss": 0.3255, "step": 20641 }, { "epoch": 2.098617324115494, "grad_norm": 0.271267831325531, "learning_rate": 7.196192696029782e-06, "loss": 0.2944, "step": 20642 }, { "epoch": 2.098718991459943, "grad_norm": 0.28221750259399414, "learning_rate": 7.195873871412172e-06, "loss": 0.3329, "step": 20643 }, { "epoch": 2.098820658804392, "grad_norm": 0.2716742157936096, "learning_rate": 7.195555035732271e-06, "loss": 0.3265, "step": 20644 }, { "epoch": 2.0989223261488408, "grad_norm": 0.3051983714103699, "learning_rate": 7.195236188991685e-06, "loss": 0.3377, "step": 20645 }, { "epoch": 2.09902399349329, "grad_norm": 0.30972909927368164, "learning_rate": 7.1949173311920205e-06, "loss": 0.3236, "step": 20646 }, { "epoch": 2.099125660837739, "grad_norm": 0.29959431290626526, "learning_rate": 7.194598462334884e-06, "loss": 0.3414, "step": 20647 }, { "epoch": 2.099227328182188, "grad_norm": 0.26192981004714966, "learning_rate": 7.194279582421882e-06, "loss": 0.3257, "step": 20648 }, { "epoch": 2.099328995526637, "grad_norm": 0.29213467240333557, "learning_rate": 7.193960691454621e-06, "loss": 0.3286, "step": 20649 }, { "epoch": 2.099430662871086, "grad_norm": 0.3136891722679138, "learning_rate": 7.193641789434707e-06, "loss": 0.3577, "step": 20650 }, { "epoch": 2.099532330215535, "grad_norm": 0.26656314730644226, "learning_rate": 7.193322876363749e-06, "loss": 0.3408, "step": 20651 }, { "epoch": 2.099633997559984, "grad_norm": 0.25939565896987915, "learning_rate": 7.1930039522433495e-06, "loss": 0.3444, "step": 20652 }, { "epoch": 2.0997356649044328, "grad_norm": 0.28175294399261475, "learning_rate": 7.192685017075118e-06, "loss": 0.3489, "step": 20653 }, { "epoch": 2.0998373322488817, "grad_norm": 0.2588568329811096, "learning_rate": 7.19236607086066e-06, "loss": 0.3231, "step": 20654 }, { "epoch": 2.0999389995933306, "grad_norm": 0.2758300006389618, "learning_rate": 7.192047113601582e-06, "loss": 0.3246, "step": 20655 }, { "epoch": 2.1000406669377796, "grad_norm": 0.27457523345947266, "learning_rate": 7.191728145299494e-06, "loss": 0.3057, "step": 20656 }, { "epoch": 2.1001423342822285, "grad_norm": 0.2753199338912964, "learning_rate": 7.191409165955998e-06, "loss": 0.323, "step": 20657 }, { "epoch": 2.1002440016266775, "grad_norm": 0.30722561478614807, "learning_rate": 7.191090175572706e-06, "loss": 0.3427, "step": 20658 }, { "epoch": 2.1003456689711264, "grad_norm": 0.2704062759876251, "learning_rate": 7.19077117415122e-06, "loss": 0.3051, "step": 20659 }, { "epoch": 2.1004473363155753, "grad_norm": 0.30116385221481323, "learning_rate": 7.190452161693149e-06, "loss": 0.3509, "step": 20660 }, { "epoch": 2.1005490036600243, "grad_norm": 0.26637130975723267, "learning_rate": 7.190133138200103e-06, "loss": 0.3637, "step": 20661 }, { "epoch": 2.1006506710044732, "grad_norm": 0.2864281237125397, "learning_rate": 7.1898141036736845e-06, "loss": 0.3204, "step": 20662 }, { "epoch": 2.100752338348922, "grad_norm": 0.2787887454032898, "learning_rate": 7.189495058115504e-06, "loss": 0.3572, "step": 20663 }, { "epoch": 2.100854005693371, "grad_norm": 0.2774157226085663, "learning_rate": 7.189176001527167e-06, "loss": 0.3201, "step": 20664 }, { "epoch": 2.10095567303782, "grad_norm": 0.28223714232444763, "learning_rate": 7.188856933910282e-06, "loss": 0.3294, "step": 20665 }, { "epoch": 2.101057340382269, "grad_norm": 0.30153030157089233, "learning_rate": 7.188537855266457e-06, "loss": 0.3611, "step": 20666 }, { "epoch": 2.1011590077267184, "grad_norm": 0.2886602580547333, "learning_rate": 7.1882187655972966e-06, "loss": 0.351, "step": 20667 }, { "epoch": 2.1012606750711673, "grad_norm": 0.29731282591819763, "learning_rate": 7.187899664904412e-06, "loss": 0.3108, "step": 20668 }, { "epoch": 2.1013623424156163, "grad_norm": 0.27968332171440125, "learning_rate": 7.187580553189406e-06, "loss": 0.3278, "step": 20669 }, { "epoch": 2.101464009760065, "grad_norm": 0.2814306616783142, "learning_rate": 7.187261430453891e-06, "loss": 0.3491, "step": 20670 }, { "epoch": 2.101565677104514, "grad_norm": 0.29397258162498474, "learning_rate": 7.186942296699473e-06, "loss": 0.3013, "step": 20671 }, { "epoch": 2.101667344448963, "grad_norm": 0.27024778723716736, "learning_rate": 7.186623151927758e-06, "loss": 0.3105, "step": 20672 }, { "epoch": 2.101769011793412, "grad_norm": 0.3092341721057892, "learning_rate": 7.186303996140356e-06, "loss": 0.3393, "step": 20673 }, { "epoch": 2.101870679137861, "grad_norm": 0.27590614557266235, "learning_rate": 7.185984829338874e-06, "loss": 0.3365, "step": 20674 }, { "epoch": 2.10197234648231, "grad_norm": 0.28188052773475647, "learning_rate": 7.185665651524919e-06, "loss": 0.3453, "step": 20675 }, { "epoch": 2.102074013826759, "grad_norm": 0.28867870569229126, "learning_rate": 7.185346462700101e-06, "loss": 0.3166, "step": 20676 }, { "epoch": 2.102175681171208, "grad_norm": 0.2877527177333832, "learning_rate": 7.185027262866027e-06, "loss": 0.3161, "step": 20677 }, { "epoch": 2.1022773485156567, "grad_norm": 0.2589850127696991, "learning_rate": 7.184708052024305e-06, "loss": 0.3255, "step": 20678 }, { "epoch": 2.1023790158601057, "grad_norm": 0.2795013189315796, "learning_rate": 7.184388830176542e-06, "loss": 0.3832, "step": 20679 }, { "epoch": 2.1024806832045546, "grad_norm": 0.2912001311779022, "learning_rate": 7.184069597324347e-06, "loss": 0.3166, "step": 20680 }, { "epoch": 2.1025823505490036, "grad_norm": 0.27362293004989624, "learning_rate": 7.1837503534693295e-06, "loss": 0.3242, "step": 20681 }, { "epoch": 2.1026840178934525, "grad_norm": 0.26433315873146057, "learning_rate": 7.183431098613095e-06, "loss": 0.3404, "step": 20682 }, { "epoch": 2.1027856852379014, "grad_norm": 0.26469576358795166, "learning_rate": 7.1831118327572555e-06, "loss": 0.3334, "step": 20683 }, { "epoch": 2.1028873525823504, "grad_norm": 0.30102166533470154, "learning_rate": 7.182792555903416e-06, "loss": 0.3347, "step": 20684 }, { "epoch": 2.1029890199267993, "grad_norm": 0.26420870423316956, "learning_rate": 7.182473268053186e-06, "loss": 0.3241, "step": 20685 }, { "epoch": 2.1030906872712483, "grad_norm": 0.2782857120037079, "learning_rate": 7.182153969208177e-06, "loss": 0.3052, "step": 20686 }, { "epoch": 2.1031923546156976, "grad_norm": 0.2620380222797394, "learning_rate": 7.181834659369992e-06, "loss": 0.3568, "step": 20687 }, { "epoch": 2.1032940219601466, "grad_norm": 0.284072607755661, "learning_rate": 7.181515338540244e-06, "loss": 0.3529, "step": 20688 }, { "epoch": 2.1033956893045955, "grad_norm": 0.2590586543083191, "learning_rate": 7.181196006720539e-06, "loss": 0.3263, "step": 20689 }, { "epoch": 2.1034973566490445, "grad_norm": 0.2710069715976715, "learning_rate": 7.180876663912487e-06, "loss": 0.343, "step": 20690 }, { "epoch": 2.1035990239934934, "grad_norm": 0.2871292531490326, "learning_rate": 7.180557310117698e-06, "loss": 0.3273, "step": 20691 }, { "epoch": 2.1037006913379424, "grad_norm": 0.2731139063835144, "learning_rate": 7.180237945337778e-06, "loss": 0.3418, "step": 20692 }, { "epoch": 2.1038023586823913, "grad_norm": 0.26615825295448303, "learning_rate": 7.179918569574338e-06, "loss": 0.3444, "step": 20693 }, { "epoch": 2.1039040260268402, "grad_norm": 0.26196348667144775, "learning_rate": 7.179599182828986e-06, "loss": 0.3126, "step": 20694 }, { "epoch": 2.104005693371289, "grad_norm": 0.2848474085330963, "learning_rate": 7.17927978510333e-06, "loss": 0.3501, "step": 20695 }, { "epoch": 2.104107360715738, "grad_norm": 0.2893112301826477, "learning_rate": 7.178960376398983e-06, "loss": 0.3337, "step": 20696 }, { "epoch": 2.104209028060187, "grad_norm": 0.2788389027118683, "learning_rate": 7.17864095671755e-06, "loss": 0.3514, "step": 20697 }, { "epoch": 2.104310695404636, "grad_norm": 0.29083940386772156, "learning_rate": 7.178321526060641e-06, "loss": 0.3416, "step": 20698 }, { "epoch": 2.104412362749085, "grad_norm": 0.27439749240875244, "learning_rate": 7.178002084429867e-06, "loss": 0.3377, "step": 20699 }, { "epoch": 2.104514030093534, "grad_norm": 0.30331408977508545, "learning_rate": 7.177682631826835e-06, "loss": 0.3507, "step": 20700 }, { "epoch": 2.104615697437983, "grad_norm": 0.2834623157978058, "learning_rate": 7.1773631682531564e-06, "loss": 0.3302, "step": 20701 }, { "epoch": 2.1047173647824318, "grad_norm": 0.25778260827064514, "learning_rate": 7.17704369371044e-06, "loss": 0.3278, "step": 20702 }, { "epoch": 2.1048190321268807, "grad_norm": 0.2678620517253876, "learning_rate": 7.176724208200293e-06, "loss": 0.3269, "step": 20703 }, { "epoch": 2.1049206994713296, "grad_norm": 0.268162339925766, "learning_rate": 7.176404711724328e-06, "loss": 0.3105, "step": 20704 }, { "epoch": 2.1050223668157786, "grad_norm": 0.2784755527973175, "learning_rate": 7.176085204284152e-06, "loss": 0.3453, "step": 20705 }, { "epoch": 2.1051240341602275, "grad_norm": 0.2654471695423126, "learning_rate": 7.1757656858813775e-06, "loss": 0.3163, "step": 20706 }, { "epoch": 2.1052257015046765, "grad_norm": 0.28869977593421936, "learning_rate": 7.175446156517612e-06, "loss": 0.3253, "step": 20707 }, { "epoch": 2.105327368849126, "grad_norm": 0.29200640320777893, "learning_rate": 7.175126616194465e-06, "loss": 0.3445, "step": 20708 }, { "epoch": 2.105429036193575, "grad_norm": 0.2719782590866089, "learning_rate": 7.174807064913547e-06, "loss": 0.3652, "step": 20709 }, { "epoch": 2.1055307035380237, "grad_norm": 0.2703210115432739, "learning_rate": 7.174487502676468e-06, "loss": 0.329, "step": 20710 }, { "epoch": 2.1056323708824727, "grad_norm": 0.28617554903030396, "learning_rate": 7.174167929484839e-06, "loss": 0.3168, "step": 20711 }, { "epoch": 2.1057340382269216, "grad_norm": 0.28363949060440063, "learning_rate": 7.1738483453402686e-06, "loss": 0.3257, "step": 20712 }, { "epoch": 2.1058357055713706, "grad_norm": 0.2625075876712799, "learning_rate": 7.173528750244365e-06, "loss": 0.3566, "step": 20713 }, { "epoch": 2.1059373729158195, "grad_norm": 0.2590979039669037, "learning_rate": 7.1732091441987405e-06, "loss": 0.318, "step": 20714 }, { "epoch": 2.1060390402602684, "grad_norm": 0.30707016587257385, "learning_rate": 7.172889527205005e-06, "loss": 0.3558, "step": 20715 }, { "epoch": 2.1061407076047174, "grad_norm": 0.27905869483947754, "learning_rate": 7.172569899264769e-06, "loss": 0.3483, "step": 20716 }, { "epoch": 2.1062423749491663, "grad_norm": 0.2640712261199951, "learning_rate": 7.172250260379643e-06, "loss": 0.3493, "step": 20717 }, { "epoch": 2.1063440422936153, "grad_norm": 0.28189587593078613, "learning_rate": 7.171930610551234e-06, "loss": 0.3384, "step": 20718 }, { "epoch": 2.106445709638064, "grad_norm": 0.27583080530166626, "learning_rate": 7.171610949781156e-06, "loss": 0.3286, "step": 20719 }, { "epoch": 2.106547376982513, "grad_norm": 0.26214590668678284, "learning_rate": 7.171291278071018e-06, "loss": 0.3327, "step": 20720 }, { "epoch": 2.106649044326962, "grad_norm": 0.26339191198349, "learning_rate": 7.17097159542243e-06, "loss": 0.3732, "step": 20721 }, { "epoch": 2.106750711671411, "grad_norm": 0.27252060174942017, "learning_rate": 7.170651901837005e-06, "loss": 0.3374, "step": 20722 }, { "epoch": 2.10685237901586, "grad_norm": 0.27518516778945923, "learning_rate": 7.170332197316349e-06, "loss": 0.3376, "step": 20723 }, { "epoch": 2.106954046360309, "grad_norm": 0.2683309018611908, "learning_rate": 7.170012481862076e-06, "loss": 0.3222, "step": 20724 }, { "epoch": 2.107055713704758, "grad_norm": 0.2693374454975128, "learning_rate": 7.1696927554757964e-06, "loss": 0.3382, "step": 20725 }, { "epoch": 2.107157381049207, "grad_norm": 0.2898313105106354, "learning_rate": 7.169373018159119e-06, "loss": 0.3464, "step": 20726 }, { "epoch": 2.1072590483936557, "grad_norm": 0.2929689288139343, "learning_rate": 7.1690532699136575e-06, "loss": 0.3385, "step": 20727 }, { "epoch": 2.107360715738105, "grad_norm": 0.28274857997894287, "learning_rate": 7.1687335107410194e-06, "loss": 0.333, "step": 20728 }, { "epoch": 2.107462383082554, "grad_norm": 0.2933332324028015, "learning_rate": 7.1684137406428185e-06, "loss": 0.3146, "step": 20729 }, { "epoch": 2.107564050427003, "grad_norm": 0.271829217672348, "learning_rate": 7.168093959620664e-06, "loss": 0.3338, "step": 20730 }, { "epoch": 2.107665717771452, "grad_norm": 0.2541506886482239, "learning_rate": 7.167774167676167e-06, "loss": 0.3571, "step": 20731 }, { "epoch": 2.107767385115901, "grad_norm": 0.28008633852005005, "learning_rate": 7.16745436481094e-06, "loss": 0.3626, "step": 20732 }, { "epoch": 2.10786905246035, "grad_norm": 0.2813011407852173, "learning_rate": 7.167134551026592e-06, "loss": 0.3384, "step": 20733 }, { "epoch": 2.1079707198047988, "grad_norm": 0.3355904519557953, "learning_rate": 7.1668147263247355e-06, "loss": 0.3383, "step": 20734 }, { "epoch": 2.1080723871492477, "grad_norm": 0.2847670018672943, "learning_rate": 7.166494890706981e-06, "loss": 0.3069, "step": 20735 }, { "epoch": 2.1081740544936967, "grad_norm": 0.254275381565094, "learning_rate": 7.16617504417494e-06, "loss": 0.3562, "step": 20736 }, { "epoch": 2.1082757218381456, "grad_norm": 0.2805192172527313, "learning_rate": 7.165855186730225e-06, "loss": 0.3459, "step": 20737 }, { "epoch": 2.1083773891825945, "grad_norm": 0.2921812832355499, "learning_rate": 7.165535318374446e-06, "loss": 0.307, "step": 20738 }, { "epoch": 2.1084790565270435, "grad_norm": 0.26025649905204773, "learning_rate": 7.165215439109214e-06, "loss": 0.3516, "step": 20739 }, { "epoch": 2.1085807238714924, "grad_norm": 0.2772621214389801, "learning_rate": 7.16489554893614e-06, "loss": 0.3354, "step": 20740 }, { "epoch": 2.1086823912159414, "grad_norm": 0.2812751531600952, "learning_rate": 7.1645756478568385e-06, "loss": 0.336, "step": 20741 }, { "epoch": 2.1087840585603903, "grad_norm": 0.26662179827690125, "learning_rate": 7.164255735872919e-06, "loss": 0.3284, "step": 20742 }, { "epoch": 2.1088857259048392, "grad_norm": 0.2810251712799072, "learning_rate": 7.163935812985993e-06, "loss": 0.3187, "step": 20743 }, { "epoch": 2.108987393249288, "grad_norm": 0.31163981556892395, "learning_rate": 7.163615879197673e-06, "loss": 0.335, "step": 20744 }, { "epoch": 2.109089060593737, "grad_norm": 0.29267317056655884, "learning_rate": 7.1632959345095695e-06, "loss": 0.3103, "step": 20745 }, { "epoch": 2.109190727938186, "grad_norm": 0.28508010506629944, "learning_rate": 7.162975978923295e-06, "loss": 0.3161, "step": 20746 }, { "epoch": 2.109292395282635, "grad_norm": 0.27915799617767334, "learning_rate": 7.162656012440463e-06, "loss": 0.3615, "step": 20747 }, { "epoch": 2.109394062627084, "grad_norm": 0.2799975872039795, "learning_rate": 7.162336035062683e-06, "loss": 0.3821, "step": 20748 }, { "epoch": 2.1094957299715333, "grad_norm": 0.27417945861816406, "learning_rate": 7.1620160467915686e-06, "loss": 0.2994, "step": 20749 }, { "epoch": 2.1095973973159823, "grad_norm": 0.2637801766395569, "learning_rate": 7.1616960476287315e-06, "loss": 0.3261, "step": 20750 }, { "epoch": 2.1096990646604312, "grad_norm": 0.2725602388381958, "learning_rate": 7.161376037575782e-06, "loss": 0.3132, "step": 20751 }, { "epoch": 2.10980073200488, "grad_norm": 0.24713794887065887, "learning_rate": 7.161056016634335e-06, "loss": 0.3395, "step": 20752 }, { "epoch": 2.109902399349329, "grad_norm": 0.26913154125213623, "learning_rate": 7.160735984806e-06, "loss": 0.3312, "step": 20753 }, { "epoch": 2.110004066693778, "grad_norm": 0.26042985916137695, "learning_rate": 7.1604159420923915e-06, "loss": 0.3761, "step": 20754 }, { "epoch": 2.110105734038227, "grad_norm": 0.2944604754447937, "learning_rate": 7.16009588849512e-06, "loss": 0.3325, "step": 20755 }, { "epoch": 2.110207401382676, "grad_norm": 0.2710210084915161, "learning_rate": 7.159775824015799e-06, "loss": 0.3236, "step": 20756 }, { "epoch": 2.110309068727125, "grad_norm": 0.29135674238204956, "learning_rate": 7.159455748656041e-06, "loss": 0.3371, "step": 20757 }, { "epoch": 2.110410736071574, "grad_norm": 0.28358030319213867, "learning_rate": 7.159135662417458e-06, "loss": 0.3499, "step": 20758 }, { "epoch": 2.1105124034160228, "grad_norm": 0.26873502135276794, "learning_rate": 7.158815565301664e-06, "loss": 0.3261, "step": 20759 }, { "epoch": 2.1106140707604717, "grad_norm": 0.28171679377555847, "learning_rate": 7.158495457310267e-06, "loss": 0.3421, "step": 20760 }, { "epoch": 2.1107157381049206, "grad_norm": 0.27553629875183105, "learning_rate": 7.158175338444885e-06, "loss": 0.3267, "step": 20761 }, { "epoch": 2.1108174054493696, "grad_norm": 0.26741158962249756, "learning_rate": 7.157855208707128e-06, "loss": 0.301, "step": 20762 }, { "epoch": 2.1109190727938185, "grad_norm": 0.2661876976490021, "learning_rate": 7.157535068098609e-06, "loss": 0.3012, "step": 20763 }, { "epoch": 2.1110207401382675, "grad_norm": 0.25398197770118713, "learning_rate": 7.157214916620942e-06, "loss": 0.3235, "step": 20764 }, { "epoch": 2.1111224074827164, "grad_norm": 0.28649142384529114, "learning_rate": 7.156894754275738e-06, "loss": 0.388, "step": 20765 }, { "epoch": 2.1112240748271653, "grad_norm": 0.2857181429862976, "learning_rate": 7.15657458106461e-06, "loss": 0.3237, "step": 20766 }, { "epoch": 2.1113257421716143, "grad_norm": 0.26651033759117126, "learning_rate": 7.156254396989174e-06, "loss": 0.3376, "step": 20767 }, { "epoch": 2.1114274095160637, "grad_norm": 0.2903178632259369, "learning_rate": 7.155934202051039e-06, "loss": 0.3327, "step": 20768 }, { "epoch": 2.1115290768605126, "grad_norm": 0.2866322696208954, "learning_rate": 7.155613996251821e-06, "loss": 0.358, "step": 20769 }, { "epoch": 2.1116307442049616, "grad_norm": 0.2745504677295685, "learning_rate": 7.155293779593131e-06, "loss": 0.2989, "step": 20770 }, { "epoch": 2.1117324115494105, "grad_norm": 0.28405463695526123, "learning_rate": 7.154973552076583e-06, "loss": 0.3496, "step": 20771 }, { "epoch": 2.1118340788938594, "grad_norm": 0.27742889523506165, "learning_rate": 7.154653313703792e-06, "loss": 0.3267, "step": 20772 }, { "epoch": 2.1119357462383084, "grad_norm": 0.2754325568675995, "learning_rate": 7.154333064476369e-06, "loss": 0.3118, "step": 20773 }, { "epoch": 2.1120374135827573, "grad_norm": 0.262080579996109, "learning_rate": 7.154012804395927e-06, "loss": 0.3131, "step": 20774 }, { "epoch": 2.1121390809272063, "grad_norm": 0.2703939378261566, "learning_rate": 7.153692533464081e-06, "loss": 0.3359, "step": 20775 }, { "epoch": 2.112240748271655, "grad_norm": 0.28212329745292664, "learning_rate": 7.153372251682442e-06, "loss": 0.3133, "step": 20776 }, { "epoch": 2.112342415616104, "grad_norm": 0.25516045093536377, "learning_rate": 7.153051959052628e-06, "loss": 0.2995, "step": 20777 }, { "epoch": 2.112444082960553, "grad_norm": 0.26938048005104065, "learning_rate": 7.1527316555762495e-06, "loss": 0.3519, "step": 20778 }, { "epoch": 2.112545750305002, "grad_norm": 0.29677292704582214, "learning_rate": 7.152411341254919e-06, "loss": 0.3331, "step": 20779 }, { "epoch": 2.112647417649451, "grad_norm": 0.27848607301712036, "learning_rate": 7.152091016090252e-06, "loss": 0.3032, "step": 20780 }, { "epoch": 2.1127490849939, "grad_norm": 0.27154994010925293, "learning_rate": 7.151770680083862e-06, "loss": 0.3378, "step": 20781 }, { "epoch": 2.112850752338349, "grad_norm": 0.26276877522468567, "learning_rate": 7.151450333237363e-06, "loss": 0.306, "step": 20782 }, { "epoch": 2.112952419682798, "grad_norm": 0.26879072189331055, "learning_rate": 7.151129975552369e-06, "loss": 0.3437, "step": 20783 }, { "epoch": 2.1130540870272467, "grad_norm": 0.2875010073184967, "learning_rate": 7.150809607030492e-06, "loss": 0.3278, "step": 20784 }, { "epoch": 2.1131557543716957, "grad_norm": 0.2908398509025574, "learning_rate": 7.150489227673348e-06, "loss": 0.3606, "step": 20785 }, { "epoch": 2.1132574217161446, "grad_norm": 0.27011922001838684, "learning_rate": 7.1501688374825505e-06, "loss": 0.3162, "step": 20786 }, { "epoch": 2.1133590890605936, "grad_norm": 0.2751132845878601, "learning_rate": 7.149848436459714e-06, "loss": 0.3256, "step": 20787 }, { "epoch": 2.1134607564050425, "grad_norm": 0.279296338558197, "learning_rate": 7.149528024606451e-06, "loss": 0.3265, "step": 20788 }, { "epoch": 2.1135624237494914, "grad_norm": 0.2633272111415863, "learning_rate": 7.149207601924377e-06, "loss": 0.3334, "step": 20789 }, { "epoch": 2.113664091093941, "grad_norm": 0.2750817835330963, "learning_rate": 7.148887168415105e-06, "loss": 0.3077, "step": 20790 }, { "epoch": 2.1137657584383898, "grad_norm": 0.26757174730300903, "learning_rate": 7.14856672408025e-06, "loss": 0.3141, "step": 20791 }, { "epoch": 2.1138674257828387, "grad_norm": 0.27565711736679077, "learning_rate": 7.148246268921428e-06, "loss": 0.3382, "step": 20792 }, { "epoch": 2.1139690931272876, "grad_norm": 0.28958481550216675, "learning_rate": 7.147925802940252e-06, "loss": 0.3421, "step": 20793 }, { "epoch": 2.1140707604717366, "grad_norm": 0.25660762190818787, "learning_rate": 7.147605326138335e-06, "loss": 0.3577, "step": 20794 }, { "epoch": 2.1141724278161855, "grad_norm": 0.27934378385543823, "learning_rate": 7.147284838517292e-06, "loss": 0.3369, "step": 20795 }, { "epoch": 2.1142740951606345, "grad_norm": 0.2806307077407837, "learning_rate": 7.14696434007874e-06, "loss": 0.3415, "step": 20796 }, { "epoch": 2.1143757625050834, "grad_norm": 0.27453330159187317, "learning_rate": 7.14664383082429e-06, "loss": 0.3475, "step": 20797 }, { "epoch": 2.1144774298495324, "grad_norm": 0.2777673900127411, "learning_rate": 7.146323310755561e-06, "loss": 0.3183, "step": 20798 }, { "epoch": 2.1145790971939813, "grad_norm": 0.3008771538734436, "learning_rate": 7.1460027798741624e-06, "loss": 0.3295, "step": 20799 }, { "epoch": 2.1146807645384302, "grad_norm": 0.2503514587879181, "learning_rate": 7.145682238181713e-06, "loss": 0.3368, "step": 20800 }, { "epoch": 2.114782431882879, "grad_norm": 0.2833888530731201, "learning_rate": 7.145361685679828e-06, "loss": 0.3142, "step": 20801 }, { "epoch": 2.114884099227328, "grad_norm": 0.2783454358577728, "learning_rate": 7.145041122370118e-06, "loss": 0.3721, "step": 20802 }, { "epoch": 2.114985766571777, "grad_norm": 0.2449064701795578, "learning_rate": 7.144720548254202e-06, "loss": 0.3463, "step": 20803 }, { "epoch": 2.115087433916226, "grad_norm": 0.2702307403087616, "learning_rate": 7.144399963333693e-06, "loss": 0.3087, "step": 20804 }, { "epoch": 2.115189101260675, "grad_norm": 0.26225078105926514, "learning_rate": 7.144079367610206e-06, "loss": 0.3079, "step": 20805 }, { "epoch": 2.115290768605124, "grad_norm": 0.2808162569999695, "learning_rate": 7.143758761085359e-06, "loss": 0.3417, "step": 20806 }, { "epoch": 2.115392435949573, "grad_norm": 0.26732194423675537, "learning_rate": 7.143438143760763e-06, "loss": 0.3405, "step": 20807 }, { "epoch": 2.1154941032940218, "grad_norm": 0.2639467120170593, "learning_rate": 7.143117515638035e-06, "loss": 0.3357, "step": 20808 }, { "epoch": 2.115595770638471, "grad_norm": 0.3072817623615265, "learning_rate": 7.14279687671879e-06, "loss": 0.3155, "step": 20809 }, { "epoch": 2.11569743798292, "grad_norm": 0.30098065733909607, "learning_rate": 7.142476227004644e-06, "loss": 0.3517, "step": 20810 }, { "epoch": 2.115799105327369, "grad_norm": 0.2819655239582062, "learning_rate": 7.142155566497213e-06, "loss": 0.3791, "step": 20811 }, { "epoch": 2.115900772671818, "grad_norm": 0.2515590190887451, "learning_rate": 7.14183489519811e-06, "loss": 0.3031, "step": 20812 }, { "epoch": 2.116002440016267, "grad_norm": 0.267289400100708, "learning_rate": 7.141514213108952e-06, "loss": 0.3291, "step": 20813 }, { "epoch": 2.116104107360716, "grad_norm": 0.28428417444229126, "learning_rate": 7.141193520231354e-06, "loss": 0.3393, "step": 20814 }, { "epoch": 2.116205774705165, "grad_norm": 0.2806374728679657, "learning_rate": 7.140872816566933e-06, "loss": 0.3045, "step": 20815 }, { "epoch": 2.1163074420496137, "grad_norm": 0.2813374698162079, "learning_rate": 7.140552102117302e-06, "loss": 0.3704, "step": 20816 }, { "epoch": 2.1164091093940627, "grad_norm": 0.27231284976005554, "learning_rate": 7.140231376884078e-06, "loss": 0.3394, "step": 20817 }, { "epoch": 2.1165107767385116, "grad_norm": 0.26359596848487854, "learning_rate": 7.139910640868879e-06, "loss": 0.354, "step": 20818 }, { "epoch": 2.1166124440829606, "grad_norm": 0.29134058952331543, "learning_rate": 7.139589894073317e-06, "loss": 0.3349, "step": 20819 }, { "epoch": 2.1167141114274095, "grad_norm": 0.26159918308258057, "learning_rate": 7.139269136499011e-06, "loss": 0.3161, "step": 20820 }, { "epoch": 2.1168157787718584, "grad_norm": 0.2609860301017761, "learning_rate": 7.138948368147573e-06, "loss": 0.3526, "step": 20821 }, { "epoch": 2.1169174461163074, "grad_norm": 0.27161484956741333, "learning_rate": 7.138627589020623e-06, "loss": 0.3487, "step": 20822 }, { "epoch": 2.1170191134607563, "grad_norm": 0.26440298557281494, "learning_rate": 7.138306799119776e-06, "loss": 0.341, "step": 20823 }, { "epoch": 2.1171207808052053, "grad_norm": 0.2589260935783386, "learning_rate": 7.137985998446646e-06, "loss": 0.3362, "step": 20824 }, { "epoch": 2.117222448149654, "grad_norm": 0.27039796113967896, "learning_rate": 7.137665187002852e-06, "loss": 0.3517, "step": 20825 }, { "epoch": 2.117324115494103, "grad_norm": 0.2880948483943939, "learning_rate": 7.137344364790009e-06, "loss": 0.3692, "step": 20826 }, { "epoch": 2.117425782838552, "grad_norm": 0.27496716380119324, "learning_rate": 7.137023531809732e-06, "loss": 0.2966, "step": 20827 }, { "epoch": 2.117527450183001, "grad_norm": 0.279199481010437, "learning_rate": 7.136702688063638e-06, "loss": 0.3231, "step": 20828 }, { "epoch": 2.11762911752745, "grad_norm": 0.27695271372795105, "learning_rate": 7.136381833553344e-06, "loss": 0.3468, "step": 20829 }, { "epoch": 2.117730784871899, "grad_norm": 0.30431145429611206, "learning_rate": 7.136060968280467e-06, "loss": 0.3263, "step": 20830 }, { "epoch": 2.1178324522163483, "grad_norm": 0.268420934677124, "learning_rate": 7.1357400922466216e-06, "loss": 0.3152, "step": 20831 }, { "epoch": 2.1179341195607972, "grad_norm": 0.27976277470588684, "learning_rate": 7.135419205453425e-06, "loss": 0.3108, "step": 20832 }, { "epoch": 2.118035786905246, "grad_norm": 0.3057503402233124, "learning_rate": 7.135098307902494e-06, "loss": 0.3296, "step": 20833 }, { "epoch": 2.118137454249695, "grad_norm": 0.2930154502391815, "learning_rate": 7.134777399595445e-06, "loss": 0.3066, "step": 20834 }, { "epoch": 2.118239121594144, "grad_norm": 0.29193368554115295, "learning_rate": 7.134456480533895e-06, "loss": 0.3229, "step": 20835 }, { "epoch": 2.118340788938593, "grad_norm": 0.27522408962249756, "learning_rate": 7.13413555071946e-06, "loss": 0.3394, "step": 20836 }, { "epoch": 2.118442456283042, "grad_norm": 0.2897854745388031, "learning_rate": 7.133814610153757e-06, "loss": 0.3525, "step": 20837 }, { "epoch": 2.118544123627491, "grad_norm": 0.27819308638572693, "learning_rate": 7.133493658838404e-06, "loss": 0.3564, "step": 20838 }, { "epoch": 2.11864579097194, "grad_norm": 0.27271705865859985, "learning_rate": 7.133172696775017e-06, "loss": 0.3364, "step": 20839 }, { "epoch": 2.1187474583163888, "grad_norm": 0.27076050639152527, "learning_rate": 7.132851723965213e-06, "loss": 0.352, "step": 20840 }, { "epoch": 2.1188491256608377, "grad_norm": 0.28331753611564636, "learning_rate": 7.132530740410607e-06, "loss": 0.3497, "step": 20841 }, { "epoch": 2.1189507930052867, "grad_norm": 0.29032212495803833, "learning_rate": 7.132209746112819e-06, "loss": 0.359, "step": 20842 }, { "epoch": 2.1190524603497356, "grad_norm": 0.28013738989830017, "learning_rate": 7.131888741073466e-06, "loss": 0.3175, "step": 20843 }, { "epoch": 2.1191541276941845, "grad_norm": 0.28166595101356506, "learning_rate": 7.131567725294163e-06, "loss": 0.3233, "step": 20844 }, { "epoch": 2.1192557950386335, "grad_norm": 0.28843146562576294, "learning_rate": 7.1312466987765295e-06, "loss": 0.3623, "step": 20845 }, { "epoch": 2.1193574623830824, "grad_norm": 0.29400426149368286, "learning_rate": 7.13092566152218e-06, "loss": 0.3226, "step": 20846 }, { "epoch": 2.1194591297275314, "grad_norm": 0.25221288204193115, "learning_rate": 7.130604613532734e-06, "loss": 0.3142, "step": 20847 }, { "epoch": 2.1195607970719803, "grad_norm": 0.2673492431640625, "learning_rate": 7.13028355480981e-06, "loss": 0.297, "step": 20848 }, { "epoch": 2.1196624644164292, "grad_norm": 0.2891409993171692, "learning_rate": 7.129962485355022e-06, "loss": 0.3226, "step": 20849 }, { "epoch": 2.1197641317608786, "grad_norm": 0.2881213426589966, "learning_rate": 7.129641405169989e-06, "loss": 0.3088, "step": 20850 }, { "epoch": 2.1198657991053276, "grad_norm": 0.2935934364795685, "learning_rate": 7.1293203142563286e-06, "loss": 0.3472, "step": 20851 }, { "epoch": 2.1199674664497765, "grad_norm": 0.2725343108177185, "learning_rate": 7.128999212615658e-06, "loss": 0.3096, "step": 20852 }, { "epoch": 2.1200691337942255, "grad_norm": 0.25876283645629883, "learning_rate": 7.128678100249597e-06, "loss": 0.3086, "step": 20853 }, { "epoch": 2.1201708011386744, "grad_norm": 0.2858217656612396, "learning_rate": 7.128356977159761e-06, "loss": 0.3313, "step": 20854 }, { "epoch": 2.1202724684831233, "grad_norm": 0.2908092439174652, "learning_rate": 7.128035843347768e-06, "loss": 0.3079, "step": 20855 }, { "epoch": 2.1203741358275723, "grad_norm": 0.27726832032203674, "learning_rate": 7.1277146988152355e-06, "loss": 0.3404, "step": 20856 }, { "epoch": 2.1204758031720212, "grad_norm": 0.28980985283851624, "learning_rate": 7.127393543563782e-06, "loss": 0.3364, "step": 20857 }, { "epoch": 2.12057747051647, "grad_norm": 0.2728704810142517, "learning_rate": 7.127072377595027e-06, "loss": 0.3351, "step": 20858 }, { "epoch": 2.120679137860919, "grad_norm": 0.2774953544139862, "learning_rate": 7.126751200910587e-06, "loss": 0.3221, "step": 20859 }, { "epoch": 2.120780805205368, "grad_norm": 0.25652170181274414, "learning_rate": 7.1264300135120776e-06, "loss": 0.3181, "step": 20860 }, { "epoch": 2.120882472549817, "grad_norm": 0.2933245301246643, "learning_rate": 7.126108815401119e-06, "loss": 0.3411, "step": 20861 }, { "epoch": 2.120984139894266, "grad_norm": 0.2633059024810791, "learning_rate": 7.1257876065793305e-06, "loss": 0.3717, "step": 20862 }, { "epoch": 2.121085807238715, "grad_norm": 0.268446147441864, "learning_rate": 7.12546638704833e-06, "loss": 0.3282, "step": 20863 }, { "epoch": 2.121187474583164, "grad_norm": 0.2880438268184662, "learning_rate": 7.125145156809735e-06, "loss": 0.3143, "step": 20864 }, { "epoch": 2.1212891419276128, "grad_norm": 0.27231258153915405, "learning_rate": 7.124823915865162e-06, "loss": 0.2924, "step": 20865 }, { "epoch": 2.1213908092720617, "grad_norm": 0.28413212299346924, "learning_rate": 7.124502664216233e-06, "loss": 0.3204, "step": 20866 }, { "epoch": 2.1214924766165106, "grad_norm": 0.27420473098754883, "learning_rate": 7.124181401864562e-06, "loss": 0.3474, "step": 20867 }, { "epoch": 2.1215941439609596, "grad_norm": 0.2935090661048889, "learning_rate": 7.1238601288117725e-06, "loss": 0.3294, "step": 20868 }, { "epoch": 2.1216958113054085, "grad_norm": 0.27780818939208984, "learning_rate": 7.12353884505948e-06, "loss": 0.3125, "step": 20869 }, { "epoch": 2.1217974786498575, "grad_norm": 0.298397421836853, "learning_rate": 7.1232175506093015e-06, "loss": 0.3523, "step": 20870 }, { "epoch": 2.1218991459943064, "grad_norm": 0.27516812086105347, "learning_rate": 7.1228962454628585e-06, "loss": 0.3292, "step": 20871 }, { "epoch": 2.122000813338756, "grad_norm": 0.2733851671218872, "learning_rate": 7.12257492962177e-06, "loss": 0.3447, "step": 20872 }, { "epoch": 2.1221024806832047, "grad_norm": 0.25115638971328735, "learning_rate": 7.122253603087652e-06, "loss": 0.308, "step": 20873 }, { "epoch": 2.1222041480276537, "grad_norm": 0.2733539640903473, "learning_rate": 7.1219322658621245e-06, "loss": 0.3687, "step": 20874 }, { "epoch": 2.1223058153721026, "grad_norm": 0.27676963806152344, "learning_rate": 7.121610917946808e-06, "loss": 0.2967, "step": 20875 }, { "epoch": 2.1224074827165516, "grad_norm": 0.2737877666950226, "learning_rate": 7.1212895593433175e-06, "loss": 0.324, "step": 20876 }, { "epoch": 2.1225091500610005, "grad_norm": 0.2641547620296478, "learning_rate": 7.120968190053277e-06, "loss": 0.3519, "step": 20877 }, { "epoch": 2.1226108174054494, "grad_norm": 0.2880370020866394, "learning_rate": 7.1206468100783e-06, "loss": 0.3511, "step": 20878 }, { "epoch": 2.1227124847498984, "grad_norm": 0.2795506417751312, "learning_rate": 7.12032541942001e-06, "loss": 0.3298, "step": 20879 }, { "epoch": 2.1228141520943473, "grad_norm": 0.2705937325954437, "learning_rate": 7.120004018080025e-06, "loss": 0.3446, "step": 20880 }, { "epoch": 2.1229158194387963, "grad_norm": 0.27916035056114197, "learning_rate": 7.119682606059961e-06, "loss": 0.3186, "step": 20881 }, { "epoch": 2.123017486783245, "grad_norm": 0.2836228013038635, "learning_rate": 7.119361183361442e-06, "loss": 0.3148, "step": 20882 }, { "epoch": 2.123119154127694, "grad_norm": 0.2717452645301819, "learning_rate": 7.1190397499860844e-06, "loss": 0.3425, "step": 20883 }, { "epoch": 2.123220821472143, "grad_norm": 0.26509571075439453, "learning_rate": 7.118718305935508e-06, "loss": 0.301, "step": 20884 }, { "epoch": 2.123322488816592, "grad_norm": 0.2653990387916565, "learning_rate": 7.118396851211332e-06, "loss": 0.3191, "step": 20885 }, { "epoch": 2.123424156161041, "grad_norm": 0.2595932185649872, "learning_rate": 7.1180753858151765e-06, "loss": 0.3334, "step": 20886 }, { "epoch": 2.12352582350549, "grad_norm": 0.27386555075645447, "learning_rate": 7.117753909748661e-06, "loss": 0.324, "step": 20887 }, { "epoch": 2.123627490849939, "grad_norm": 0.2649613320827484, "learning_rate": 7.117432423013404e-06, "loss": 0.3158, "step": 20888 }, { "epoch": 2.123729158194388, "grad_norm": 0.26161035895347595, "learning_rate": 7.117110925611026e-06, "loss": 0.3344, "step": 20889 }, { "epoch": 2.1238308255388367, "grad_norm": 0.30482354760169983, "learning_rate": 7.116789417543145e-06, "loss": 0.3204, "step": 20890 }, { "epoch": 2.123932492883286, "grad_norm": 0.27108216285705566, "learning_rate": 7.116467898811384e-06, "loss": 0.3442, "step": 20891 }, { "epoch": 2.124034160227735, "grad_norm": 0.2816659212112427, "learning_rate": 7.116146369417358e-06, "loss": 0.3492, "step": 20892 }, { "epoch": 2.124135827572184, "grad_norm": 0.2725156247615814, "learning_rate": 7.115824829362692e-06, "loss": 0.3273, "step": 20893 }, { "epoch": 2.124237494916633, "grad_norm": 0.27658161520957947, "learning_rate": 7.115503278649003e-06, "loss": 0.3368, "step": 20894 }, { "epoch": 2.124339162261082, "grad_norm": 0.2992459237575531, "learning_rate": 7.1151817172779095e-06, "loss": 0.3421, "step": 20895 }, { "epoch": 2.124440829605531, "grad_norm": 0.27012088894844055, "learning_rate": 7.1148601452510335e-06, "loss": 0.3263, "step": 20896 }, { "epoch": 2.1245424969499798, "grad_norm": 0.26281243562698364, "learning_rate": 7.114538562569996e-06, "loss": 0.3205, "step": 20897 }, { "epoch": 2.1246441642944287, "grad_norm": 0.30193179845809937, "learning_rate": 7.114216969236415e-06, "loss": 0.3417, "step": 20898 }, { "epoch": 2.1247458316388776, "grad_norm": 0.2523045241832733, "learning_rate": 7.113895365251912e-06, "loss": 0.3221, "step": 20899 }, { "epoch": 2.1248474989833266, "grad_norm": 0.2657288610935211, "learning_rate": 7.113573750618106e-06, "loss": 0.309, "step": 20900 }, { "epoch": 2.1249491663277755, "grad_norm": 0.29737910628318787, "learning_rate": 7.113252125336616e-06, "loss": 0.3397, "step": 20901 }, { "epoch": 2.1250508336722245, "grad_norm": 0.29902383685112, "learning_rate": 7.112930489409067e-06, "loss": 0.3463, "step": 20902 }, { "epoch": 2.1251525010166734, "grad_norm": 0.2565455138683319, "learning_rate": 7.112608842837074e-06, "loss": 0.3445, "step": 20903 }, { "epoch": 2.1252541683611224, "grad_norm": 0.2861103415489197, "learning_rate": 7.112287185622261e-06, "loss": 0.3411, "step": 20904 }, { "epoch": 2.1253558357055713, "grad_norm": 0.2850186824798584, "learning_rate": 7.111965517766246e-06, "loss": 0.3035, "step": 20905 }, { "epoch": 2.1254575030500202, "grad_norm": 0.24763421714305878, "learning_rate": 7.111643839270651e-06, "loss": 0.3263, "step": 20906 }, { "epoch": 2.125559170394469, "grad_norm": 0.2917192876338959, "learning_rate": 7.111322150137097e-06, "loss": 0.3385, "step": 20907 }, { "epoch": 2.125660837738918, "grad_norm": 0.25923779606819153, "learning_rate": 7.111000450367202e-06, "loss": 0.3538, "step": 20908 }, { "epoch": 2.125762505083367, "grad_norm": 0.27198269963264465, "learning_rate": 7.110678739962591e-06, "loss": 0.3242, "step": 20909 }, { "epoch": 2.125864172427816, "grad_norm": 0.2663207948207855, "learning_rate": 7.11035701892488e-06, "loss": 0.3491, "step": 20910 }, { "epoch": 2.125965839772265, "grad_norm": 0.25081291794776917, "learning_rate": 7.1100352872556925e-06, "loss": 0.3339, "step": 20911 }, { "epoch": 2.126067507116714, "grad_norm": 0.2647836208343506, "learning_rate": 7.109713544956649e-06, "loss": 0.3181, "step": 20912 }, { "epoch": 2.1261691744611633, "grad_norm": 0.2645181715488434, "learning_rate": 7.109391792029368e-06, "loss": 0.3281, "step": 20913 }, { "epoch": 2.126270841805612, "grad_norm": 0.2610083520412445, "learning_rate": 7.1090700284754755e-06, "loss": 0.3607, "step": 20914 }, { "epoch": 2.126372509150061, "grad_norm": 0.2849123775959015, "learning_rate": 7.108748254296587e-06, "loss": 0.3105, "step": 20915 }, { "epoch": 2.12647417649451, "grad_norm": 0.26978349685668945, "learning_rate": 7.108426469494327e-06, "loss": 0.3277, "step": 20916 }, { "epoch": 2.126575843838959, "grad_norm": 0.25391754508018494, "learning_rate": 7.108104674070315e-06, "loss": 0.3275, "step": 20917 }, { "epoch": 2.126677511183408, "grad_norm": 0.3079959750175476, "learning_rate": 7.1077828680261725e-06, "loss": 0.3603, "step": 20918 }, { "epoch": 2.126779178527857, "grad_norm": 0.26859450340270996, "learning_rate": 7.107461051363522e-06, "loss": 0.345, "step": 20919 }, { "epoch": 2.126880845872306, "grad_norm": 0.27076804637908936, "learning_rate": 7.107139224083982e-06, "loss": 0.3066, "step": 20920 }, { "epoch": 2.126982513216755, "grad_norm": 0.2914147973060608, "learning_rate": 7.106817386189177e-06, "loss": 0.3263, "step": 20921 }, { "epoch": 2.1270841805612037, "grad_norm": 0.2682040333747864, "learning_rate": 7.1064955376807255e-06, "loss": 0.3434, "step": 20922 }, { "epoch": 2.1271858479056527, "grad_norm": 0.2776394784450531, "learning_rate": 7.10617367856025e-06, "loss": 0.3432, "step": 20923 }, { "epoch": 2.1272875152501016, "grad_norm": 0.28942832350730896, "learning_rate": 7.105851808829373e-06, "loss": 0.3437, "step": 20924 }, { "epoch": 2.1273891825945506, "grad_norm": 0.25939205288887024, "learning_rate": 7.105529928489714e-06, "loss": 0.348, "step": 20925 }, { "epoch": 2.1274908499389995, "grad_norm": 0.2905529737472534, "learning_rate": 7.105208037542895e-06, "loss": 0.3197, "step": 20926 }, { "epoch": 2.1275925172834484, "grad_norm": 0.26691749691963196, "learning_rate": 7.104886135990539e-06, "loss": 0.3625, "step": 20927 }, { "epoch": 2.1276941846278974, "grad_norm": 0.2903388440608978, "learning_rate": 7.104564223834267e-06, "loss": 0.3449, "step": 20928 }, { "epoch": 2.1277958519723463, "grad_norm": 0.26545482873916626, "learning_rate": 7.104242301075701e-06, "loss": 0.3299, "step": 20929 }, { "epoch": 2.1278975193167953, "grad_norm": 0.28384798765182495, "learning_rate": 7.103920367716462e-06, "loss": 0.328, "step": 20930 }, { "epoch": 2.127999186661244, "grad_norm": 0.2510332763195038, "learning_rate": 7.1035984237581715e-06, "loss": 0.3423, "step": 20931 }, { "epoch": 2.1281008540056936, "grad_norm": 0.27590104937553406, "learning_rate": 7.103276469202451e-06, "loss": 0.3449, "step": 20932 }, { "epoch": 2.1282025213501425, "grad_norm": 0.271537721157074, "learning_rate": 7.102954504050925e-06, "loss": 0.3344, "step": 20933 }, { "epoch": 2.1283041886945915, "grad_norm": 0.28003987669944763, "learning_rate": 7.1026325283052154e-06, "loss": 0.3211, "step": 20934 }, { "epoch": 2.1284058560390404, "grad_norm": 0.26526939868927, "learning_rate": 7.102310541966941e-06, "loss": 0.3102, "step": 20935 }, { "epoch": 2.1285075233834894, "grad_norm": 0.28126874566078186, "learning_rate": 7.101988545037725e-06, "loss": 0.3525, "step": 20936 }, { "epoch": 2.1286091907279383, "grad_norm": 0.271513432264328, "learning_rate": 7.1016665375191916e-06, "loss": 0.3368, "step": 20937 }, { "epoch": 2.1287108580723872, "grad_norm": 0.2698420584201813, "learning_rate": 7.10134451941296e-06, "loss": 0.3289, "step": 20938 }, { "epoch": 2.128812525416836, "grad_norm": 0.3005722165107727, "learning_rate": 7.101022490720657e-06, "loss": 0.3337, "step": 20939 }, { "epoch": 2.128914192761285, "grad_norm": 0.27345845103263855, "learning_rate": 7.1007004514439e-06, "loss": 0.3032, "step": 20940 }, { "epoch": 2.129015860105734, "grad_norm": 0.2701506018638611, "learning_rate": 7.1003784015843135e-06, "loss": 0.3435, "step": 20941 }, { "epoch": 2.129117527450183, "grad_norm": 0.27449679374694824, "learning_rate": 7.1000563411435195e-06, "loss": 0.352, "step": 20942 }, { "epoch": 2.129219194794632, "grad_norm": 0.27962514758110046, "learning_rate": 7.09973427012314e-06, "loss": 0.3343, "step": 20943 }, { "epoch": 2.129320862139081, "grad_norm": 0.28999340534210205, "learning_rate": 7.0994121885248e-06, "loss": 0.3356, "step": 20944 }, { "epoch": 2.12942252948353, "grad_norm": 0.28050488233566284, "learning_rate": 7.099090096350121e-06, "loss": 0.2983, "step": 20945 }, { "epoch": 2.1295241968279788, "grad_norm": 0.28438982367515564, "learning_rate": 7.0987679936007225e-06, "loss": 0.3457, "step": 20946 }, { "epoch": 2.1296258641724277, "grad_norm": 0.26183298230171204, "learning_rate": 7.09844588027823e-06, "loss": 0.3119, "step": 20947 }, { "epoch": 2.1297275315168767, "grad_norm": 0.2711968421936035, "learning_rate": 7.098123756384267e-06, "loss": 0.3439, "step": 20948 }, { "epoch": 2.1298291988613256, "grad_norm": 0.271278440952301, "learning_rate": 7.097801621920454e-06, "loss": 0.3102, "step": 20949 }, { "epoch": 2.1299308662057745, "grad_norm": 0.2798466384410858, "learning_rate": 7.0974794768884155e-06, "loss": 0.3267, "step": 20950 }, { "epoch": 2.1300325335502235, "grad_norm": 0.28679928183555603, "learning_rate": 7.0971573212897734e-06, "loss": 0.3223, "step": 20951 }, { "epoch": 2.1301342008946724, "grad_norm": 0.27284881472587585, "learning_rate": 7.096835155126151e-06, "loss": 0.3362, "step": 20952 }, { "epoch": 2.1302358682391214, "grad_norm": 0.3114152252674103, "learning_rate": 7.096512978399173e-06, "loss": 0.3123, "step": 20953 }, { "epoch": 2.1303375355835708, "grad_norm": 0.28202560544013977, "learning_rate": 7.096190791110459e-06, "loss": 0.3409, "step": 20954 }, { "epoch": 2.1304392029280197, "grad_norm": 0.30724984407424927, "learning_rate": 7.095868593261634e-06, "loss": 0.3017, "step": 20955 }, { "epoch": 2.1305408702724686, "grad_norm": 0.274095356464386, "learning_rate": 7.0955463848543214e-06, "loss": 0.3252, "step": 20956 }, { "epoch": 2.1306425376169176, "grad_norm": 0.27322930097579956, "learning_rate": 7.095224165890144e-06, "loss": 0.3316, "step": 20957 }, { "epoch": 2.1307442049613665, "grad_norm": 0.2784208357334137, "learning_rate": 7.094901936370726e-06, "loss": 0.3317, "step": 20958 }, { "epoch": 2.1308458723058155, "grad_norm": 0.2864595949649811, "learning_rate": 7.094579696297689e-06, "loss": 0.3274, "step": 20959 }, { "epoch": 2.1309475396502644, "grad_norm": 0.2828074097633362, "learning_rate": 7.094257445672659e-06, "loss": 0.3311, "step": 20960 }, { "epoch": 2.1310492069947133, "grad_norm": 0.2604913115501404, "learning_rate": 7.093935184497255e-06, "loss": 0.2948, "step": 20961 }, { "epoch": 2.1311508743391623, "grad_norm": 0.30993983149528503, "learning_rate": 7.093612912773104e-06, "loss": 0.3094, "step": 20962 }, { "epoch": 2.1312525416836112, "grad_norm": 0.28361111879348755, "learning_rate": 7.09329063050183e-06, "loss": 0.3617, "step": 20963 }, { "epoch": 2.13135420902806, "grad_norm": 0.29038602113723755, "learning_rate": 7.0929683376850535e-06, "loss": 0.348, "step": 20964 }, { "epoch": 2.131455876372509, "grad_norm": 0.2782578468322754, "learning_rate": 7.0926460343244005e-06, "loss": 0.3328, "step": 20965 }, { "epoch": 2.131557543716958, "grad_norm": 0.2900102138519287, "learning_rate": 7.092323720421495e-06, "loss": 0.3595, "step": 20966 }, { "epoch": 2.131659211061407, "grad_norm": 0.2921256721019745, "learning_rate": 7.092001395977957e-06, "loss": 0.3287, "step": 20967 }, { "epoch": 2.131760878405856, "grad_norm": 0.3098827600479126, "learning_rate": 7.091679060995417e-06, "loss": 0.3194, "step": 20968 }, { "epoch": 2.131862545750305, "grad_norm": 0.28413230180740356, "learning_rate": 7.091356715475492e-06, "loss": 0.3191, "step": 20969 }, { "epoch": 2.131964213094754, "grad_norm": 0.2858784794807434, "learning_rate": 7.091034359419811e-06, "loss": 0.3361, "step": 20970 }, { "epoch": 2.1320658804392028, "grad_norm": 0.2754700481891632, "learning_rate": 7.090711992829994e-06, "loss": 0.3171, "step": 20971 }, { "epoch": 2.1321675477836517, "grad_norm": 0.299029141664505, "learning_rate": 7.090389615707666e-06, "loss": 0.3299, "step": 20972 }, { "epoch": 2.132269215128101, "grad_norm": 0.3011121153831482, "learning_rate": 7.090067228054454e-06, "loss": 0.3803, "step": 20973 }, { "epoch": 2.13237088247255, "grad_norm": 0.29775771498680115, "learning_rate": 7.0897448298719806e-06, "loss": 0.3228, "step": 20974 }, { "epoch": 2.132472549816999, "grad_norm": 0.2793697416782379, "learning_rate": 7.089422421161868e-06, "loss": 0.3267, "step": 20975 }, { "epoch": 2.132574217161448, "grad_norm": 0.2871992588043213, "learning_rate": 7.089100001925742e-06, "loss": 0.3368, "step": 20976 }, { "epoch": 2.132675884505897, "grad_norm": 0.29348278045654297, "learning_rate": 7.088777572165225e-06, "loss": 0.3199, "step": 20977 }, { "epoch": 2.132777551850346, "grad_norm": 0.2809719145298004, "learning_rate": 7.088455131881946e-06, "loss": 0.3317, "step": 20978 }, { "epoch": 2.1328792191947947, "grad_norm": 0.28669822216033936, "learning_rate": 7.088132681077524e-06, "loss": 0.3204, "step": 20979 }, { "epoch": 2.1329808865392437, "grad_norm": 0.31027814745903015, "learning_rate": 7.087810219753587e-06, "loss": 0.3579, "step": 20980 }, { "epoch": 2.1330825538836926, "grad_norm": 0.28739920258522034, "learning_rate": 7.087487747911757e-06, "loss": 0.3482, "step": 20981 }, { "epoch": 2.1331842212281416, "grad_norm": 0.29778462648391724, "learning_rate": 7.087165265553661e-06, "loss": 0.3185, "step": 20982 }, { "epoch": 2.1332858885725905, "grad_norm": 0.29090964794158936, "learning_rate": 7.0868427726809215e-06, "loss": 0.3291, "step": 20983 }, { "epoch": 2.1333875559170394, "grad_norm": 0.2630087435245514, "learning_rate": 7.086520269295165e-06, "loss": 0.3316, "step": 20984 }, { "epoch": 2.1334892232614884, "grad_norm": 0.27039241790771484, "learning_rate": 7.086197755398015e-06, "loss": 0.331, "step": 20985 }, { "epoch": 2.1335908906059373, "grad_norm": 0.27169448137283325, "learning_rate": 7.085875230991096e-06, "loss": 0.3219, "step": 20986 }, { "epoch": 2.1336925579503863, "grad_norm": 0.26346394419670105, "learning_rate": 7.085552696076034e-06, "loss": 0.3063, "step": 20987 }, { "epoch": 2.133794225294835, "grad_norm": 0.26671990752220154, "learning_rate": 7.085230150654452e-06, "loss": 0.3197, "step": 20988 }, { "epoch": 2.133895892639284, "grad_norm": 0.275359570980072, "learning_rate": 7.0849075947279765e-06, "loss": 0.353, "step": 20989 }, { "epoch": 2.133997559983733, "grad_norm": 0.2862681448459625, "learning_rate": 7.084585028298233e-06, "loss": 0.314, "step": 20990 }, { "epoch": 2.134099227328182, "grad_norm": 0.27039462327957153, "learning_rate": 7.0842624513668435e-06, "loss": 0.301, "step": 20991 }, { "epoch": 2.134200894672631, "grad_norm": 0.2769995927810669, "learning_rate": 7.0839398639354364e-06, "loss": 0.3447, "step": 20992 }, { "epoch": 2.13430256201708, "grad_norm": 0.29340794682502747, "learning_rate": 7.083617266005635e-06, "loss": 0.3299, "step": 20993 }, { "epoch": 2.134404229361529, "grad_norm": 0.30005407333374023, "learning_rate": 7.083294657579065e-06, "loss": 0.3499, "step": 20994 }, { "epoch": 2.1345058967059782, "grad_norm": 0.2641870677471161, "learning_rate": 7.082972038657353e-06, "loss": 0.3399, "step": 20995 }, { "epoch": 2.134607564050427, "grad_norm": 0.28596001863479614, "learning_rate": 7.0826494092421216e-06, "loss": 0.3329, "step": 20996 }, { "epoch": 2.134709231394876, "grad_norm": 0.2801852822303772, "learning_rate": 7.082326769334996e-06, "loss": 0.3456, "step": 20997 }, { "epoch": 2.134810898739325, "grad_norm": 0.27793022990226746, "learning_rate": 7.082004118937605e-06, "loss": 0.3111, "step": 20998 }, { "epoch": 2.134912566083774, "grad_norm": 0.28490257263183594, "learning_rate": 7.081681458051571e-06, "loss": 0.3224, "step": 20999 }, { "epoch": 2.135014233428223, "grad_norm": 0.29613757133483887, "learning_rate": 7.081358786678522e-06, "loss": 0.3371, "step": 21000 }, { "epoch": 2.135115900772672, "grad_norm": 0.27695080637931824, "learning_rate": 7.081036104820081e-06, "loss": 0.3491, "step": 21001 }, { "epoch": 2.135217568117121, "grad_norm": 0.29248616099357605, "learning_rate": 7.0807134124778754e-06, "loss": 0.3828, "step": 21002 }, { "epoch": 2.1353192354615698, "grad_norm": 0.28344619274139404, "learning_rate": 7.080390709653528e-06, "loss": 0.3428, "step": 21003 }, { "epoch": 2.1354209028060187, "grad_norm": 0.2569051682949066, "learning_rate": 7.080067996348667e-06, "loss": 0.3287, "step": 21004 }, { "epoch": 2.1355225701504676, "grad_norm": 0.2590412199497223, "learning_rate": 7.079745272564919e-06, "loss": 0.3306, "step": 21005 }, { "epoch": 2.1356242374949166, "grad_norm": 0.26916930079460144, "learning_rate": 7.079422538303909e-06, "loss": 0.3536, "step": 21006 }, { "epoch": 2.1357259048393655, "grad_norm": 0.2817184329032898, "learning_rate": 7.0790997935672615e-06, "loss": 0.3249, "step": 21007 }, { "epoch": 2.1358275721838145, "grad_norm": 0.2498435229063034, "learning_rate": 7.078777038356603e-06, "loss": 0.3247, "step": 21008 }, { "epoch": 2.1359292395282634, "grad_norm": 0.26706308126449585, "learning_rate": 7.078454272673561e-06, "loss": 0.3554, "step": 21009 }, { "epoch": 2.1360309068727124, "grad_norm": 0.28865334391593933, "learning_rate": 7.078131496519759e-06, "loss": 0.3279, "step": 21010 }, { "epoch": 2.1361325742171613, "grad_norm": 0.27349036931991577, "learning_rate": 7.077808709896825e-06, "loss": 0.3118, "step": 21011 }, { "epoch": 2.1362342415616102, "grad_norm": 0.2675308585166931, "learning_rate": 7.077485912806385e-06, "loss": 0.309, "step": 21012 }, { "epoch": 2.136335908906059, "grad_norm": 0.28716185688972473, "learning_rate": 7.077163105250062e-06, "loss": 0.3339, "step": 21013 }, { "epoch": 2.1364375762505086, "grad_norm": 0.265038400888443, "learning_rate": 7.076840287229487e-06, "loss": 0.3428, "step": 21014 }, { "epoch": 2.1365392435949575, "grad_norm": 0.27619150280952454, "learning_rate": 7.076517458746285e-06, "loss": 0.3456, "step": 21015 }, { "epoch": 2.1366409109394064, "grad_norm": 0.2783122658729553, "learning_rate": 7.07619461980208e-06, "loss": 0.3537, "step": 21016 }, { "epoch": 2.1367425782838554, "grad_norm": 0.2963307201862335, "learning_rate": 7.0758717703985e-06, "loss": 0.3358, "step": 21017 }, { "epoch": 2.1368442456283043, "grad_norm": 0.29014766216278076, "learning_rate": 7.075548910537171e-06, "loss": 0.3177, "step": 21018 }, { "epoch": 2.1369459129727533, "grad_norm": 0.285916805267334, "learning_rate": 7.07522604021972e-06, "loss": 0.347, "step": 21019 }, { "epoch": 2.137047580317202, "grad_norm": 0.2799316644668579, "learning_rate": 7.074903159447775e-06, "loss": 0.3457, "step": 21020 }, { "epoch": 2.137149247661651, "grad_norm": 0.2911038100719452, "learning_rate": 7.07458026822296e-06, "loss": 0.3635, "step": 21021 }, { "epoch": 2.1372509150061, "grad_norm": 0.268702894449234, "learning_rate": 7.074257366546903e-06, "loss": 0.3436, "step": 21022 }, { "epoch": 2.137352582350549, "grad_norm": 0.2750730514526367, "learning_rate": 7.0739344544212295e-06, "loss": 0.3609, "step": 21023 }, { "epoch": 2.137454249694998, "grad_norm": 0.2661520838737488, "learning_rate": 7.073611531847567e-06, "loss": 0.3126, "step": 21024 }, { "epoch": 2.137555917039447, "grad_norm": 0.26591750979423523, "learning_rate": 7.073288598827543e-06, "loss": 0.3599, "step": 21025 }, { "epoch": 2.137657584383896, "grad_norm": 0.2793358862400055, "learning_rate": 7.072965655362784e-06, "loss": 0.3229, "step": 21026 }, { "epoch": 2.137759251728345, "grad_norm": 0.2729383707046509, "learning_rate": 7.072642701454915e-06, "loss": 0.3182, "step": 21027 }, { "epoch": 2.1378609190727937, "grad_norm": 0.2757608890533447, "learning_rate": 7.072319737105565e-06, "loss": 0.3145, "step": 21028 }, { "epoch": 2.1379625864172427, "grad_norm": 0.257700651884079, "learning_rate": 7.071996762316362e-06, "loss": 0.3571, "step": 21029 }, { "epoch": 2.1380642537616916, "grad_norm": 0.29491928219795227, "learning_rate": 7.071673777088931e-06, "loss": 0.3549, "step": 21030 }, { "epoch": 2.1381659211061406, "grad_norm": 0.28956979513168335, "learning_rate": 7.0713507814248995e-06, "loss": 0.3531, "step": 21031 }, { "epoch": 2.1382675884505895, "grad_norm": 0.2523775100708008, "learning_rate": 7.071027775325895e-06, "loss": 0.3288, "step": 21032 }, { "epoch": 2.1383692557950384, "grad_norm": 0.27222540974617004, "learning_rate": 7.070704758793545e-06, "loss": 0.3214, "step": 21033 }, { "epoch": 2.1384709231394874, "grad_norm": 0.27891018986701965, "learning_rate": 7.0703817318294765e-06, "loss": 0.3314, "step": 21034 }, { "epoch": 2.1385725904839363, "grad_norm": 0.303934782743454, "learning_rate": 7.070058694435317e-06, "loss": 0.3477, "step": 21035 }, { "epoch": 2.1386742578283857, "grad_norm": 0.2748814523220062, "learning_rate": 7.069735646612695e-06, "loss": 0.3332, "step": 21036 }, { "epoch": 2.1387759251728347, "grad_norm": 0.27070072293281555, "learning_rate": 7.069412588363234e-06, "loss": 0.3209, "step": 21037 }, { "epoch": 2.1388775925172836, "grad_norm": 0.26591846346855164, "learning_rate": 7.0690895196885645e-06, "loss": 0.3434, "step": 21038 }, { "epoch": 2.1389792598617325, "grad_norm": 0.2631179392337799, "learning_rate": 7.068766440590315e-06, "loss": 0.3278, "step": 21039 }, { "epoch": 2.1390809272061815, "grad_norm": 0.28131529688835144, "learning_rate": 7.068443351070111e-06, "loss": 0.3281, "step": 21040 }, { "epoch": 2.1391825945506304, "grad_norm": 0.2777557969093323, "learning_rate": 7.0681202511295825e-06, "loss": 0.343, "step": 21041 }, { "epoch": 2.1392842618950794, "grad_norm": 0.2597740888595581, "learning_rate": 7.067797140770354e-06, "loss": 0.3544, "step": 21042 }, { "epoch": 2.1393859292395283, "grad_norm": 0.25363603234291077, "learning_rate": 7.067474019994054e-06, "loss": 0.3323, "step": 21043 }, { "epoch": 2.1394875965839772, "grad_norm": 0.25550955533981323, "learning_rate": 7.0671508888023134e-06, "loss": 0.3702, "step": 21044 }, { "epoch": 2.139589263928426, "grad_norm": 0.2742043435573578, "learning_rate": 7.066827747196756e-06, "loss": 0.3393, "step": 21045 }, { "epoch": 2.139690931272875, "grad_norm": 0.2743709981441498, "learning_rate": 7.0665045951790135e-06, "loss": 0.344, "step": 21046 }, { "epoch": 2.139792598617324, "grad_norm": 0.27728140354156494, "learning_rate": 7.066181432750711e-06, "loss": 0.3194, "step": 21047 }, { "epoch": 2.139894265961773, "grad_norm": 0.2758398950099945, "learning_rate": 7.065858259913476e-06, "loss": 0.3256, "step": 21048 }, { "epoch": 2.139995933306222, "grad_norm": 0.2801721692085266, "learning_rate": 7.06553507666894e-06, "loss": 0.3426, "step": 21049 }, { "epoch": 2.140097600650671, "grad_norm": 0.2857917547225952, "learning_rate": 7.065211883018728e-06, "loss": 0.3508, "step": 21050 }, { "epoch": 2.14019926799512, "grad_norm": 0.2809286117553711, "learning_rate": 7.0648886789644695e-06, "loss": 0.3263, "step": 21051 }, { "epoch": 2.1403009353395688, "grad_norm": 0.2757023870944977, "learning_rate": 7.064565464507793e-06, "loss": 0.3039, "step": 21052 }, { "epoch": 2.1404026026840177, "grad_norm": 0.28593555092811584, "learning_rate": 7.064242239650326e-06, "loss": 0.3213, "step": 21053 }, { "epoch": 2.1405042700284667, "grad_norm": 0.25802740454673767, "learning_rate": 7.063919004393698e-06, "loss": 0.3156, "step": 21054 }, { "epoch": 2.140605937372916, "grad_norm": 0.2569981813430786, "learning_rate": 7.063595758739536e-06, "loss": 0.3599, "step": 21055 }, { "epoch": 2.140707604717365, "grad_norm": 0.26586833596229553, "learning_rate": 7.063272502689469e-06, "loss": 0.3036, "step": 21056 }, { "epoch": 2.140809272061814, "grad_norm": 0.2767965495586395, "learning_rate": 7.062949236245125e-06, "loss": 0.3245, "step": 21057 }, { "epoch": 2.140910939406263, "grad_norm": 0.26118218898773193, "learning_rate": 7.062625959408132e-06, "loss": 0.3356, "step": 21058 }, { "epoch": 2.141012606750712, "grad_norm": 0.25172704458236694, "learning_rate": 7.062302672180121e-06, "loss": 0.3356, "step": 21059 }, { "epoch": 2.1411142740951608, "grad_norm": 0.28102219104766846, "learning_rate": 7.061979374562718e-06, "loss": 0.2872, "step": 21060 }, { "epoch": 2.1412159414396097, "grad_norm": 0.29316070675849915, "learning_rate": 7.061656066557555e-06, "loss": 0.3416, "step": 21061 }, { "epoch": 2.1413176087840586, "grad_norm": 0.2692219018936157, "learning_rate": 7.061332748166258e-06, "loss": 0.3392, "step": 21062 }, { "epoch": 2.1414192761285076, "grad_norm": 0.27272456884384155, "learning_rate": 7.0610094193904544e-06, "loss": 0.3178, "step": 21063 }, { "epoch": 2.1415209434729565, "grad_norm": 0.27103155851364136, "learning_rate": 7.060686080231778e-06, "loss": 0.3225, "step": 21064 }, { "epoch": 2.1416226108174055, "grad_norm": 0.26127010583877563, "learning_rate": 7.0603627306918534e-06, "loss": 0.3028, "step": 21065 }, { "epoch": 2.1417242781618544, "grad_norm": 0.29139444231987, "learning_rate": 7.0600393707723116e-06, "loss": 0.3259, "step": 21066 }, { "epoch": 2.1418259455063033, "grad_norm": 0.28874900937080383, "learning_rate": 7.05971600047478e-06, "loss": 0.317, "step": 21067 }, { "epoch": 2.1419276128507523, "grad_norm": 0.29395270347595215, "learning_rate": 7.059392619800889e-06, "loss": 0.3501, "step": 21068 }, { "epoch": 2.1420292801952012, "grad_norm": 0.2929341793060303, "learning_rate": 7.059069228752267e-06, "loss": 0.3441, "step": 21069 }, { "epoch": 2.14213094753965, "grad_norm": 0.2728249132633209, "learning_rate": 7.058745827330545e-06, "loss": 0.3497, "step": 21070 }, { "epoch": 2.142232614884099, "grad_norm": 0.30452361702919006, "learning_rate": 7.058422415537349e-06, "loss": 0.3677, "step": 21071 }, { "epoch": 2.142334282228548, "grad_norm": 0.279215544462204, "learning_rate": 7.05809899337431e-06, "loss": 0.3327, "step": 21072 }, { "epoch": 2.142435949572997, "grad_norm": 0.26856687664985657, "learning_rate": 7.0577755608430585e-06, "loss": 0.3276, "step": 21073 }, { "epoch": 2.142537616917446, "grad_norm": 0.2595149278640747, "learning_rate": 7.057452117945222e-06, "loss": 0.335, "step": 21074 }, { "epoch": 2.142639284261895, "grad_norm": 0.26222655177116394, "learning_rate": 7.057128664682431e-06, "loss": 0.3415, "step": 21075 }, { "epoch": 2.142740951606344, "grad_norm": 0.29130545258522034, "learning_rate": 7.056805201056316e-06, "loss": 0.3222, "step": 21076 }, { "epoch": 2.142842618950793, "grad_norm": 0.26864930987358093, "learning_rate": 7.056481727068505e-06, "loss": 0.3294, "step": 21077 }, { "epoch": 2.142944286295242, "grad_norm": 0.2917800545692444, "learning_rate": 7.0561582427206245e-06, "loss": 0.3287, "step": 21078 }, { "epoch": 2.143045953639691, "grad_norm": 0.26398777961730957, "learning_rate": 7.0558347480143095e-06, "loss": 0.3391, "step": 21079 }, { "epoch": 2.14314762098414, "grad_norm": 0.2547249495983124, "learning_rate": 7.055511242951187e-06, "loss": 0.3332, "step": 21080 }, { "epoch": 2.143249288328589, "grad_norm": 0.2947978079319, "learning_rate": 7.0551877275328895e-06, "loss": 0.3269, "step": 21081 }, { "epoch": 2.143350955673038, "grad_norm": 0.2517751455307007, "learning_rate": 7.054864201761044e-06, "loss": 0.3311, "step": 21082 }, { "epoch": 2.143452623017487, "grad_norm": 0.27443164587020874, "learning_rate": 7.054540665637278e-06, "loss": 0.3355, "step": 21083 }, { "epoch": 2.143554290361936, "grad_norm": 0.2614203989505768, "learning_rate": 7.054217119163227e-06, "loss": 0.3504, "step": 21084 }, { "epoch": 2.1436559577063847, "grad_norm": 0.27418050169944763, "learning_rate": 7.053893562340517e-06, "loss": 0.3511, "step": 21085 }, { "epoch": 2.1437576250508337, "grad_norm": 0.2537585496902466, "learning_rate": 7.053569995170781e-06, "loss": 0.312, "step": 21086 }, { "epoch": 2.1438592923952826, "grad_norm": 0.2777838706970215, "learning_rate": 7.053246417655647e-06, "loss": 0.3786, "step": 21087 }, { "epoch": 2.1439609597397316, "grad_norm": 0.2960103154182434, "learning_rate": 7.052922829796743e-06, "loss": 0.3506, "step": 21088 }, { "epoch": 2.1440626270841805, "grad_norm": 0.281306654214859, "learning_rate": 7.052599231595703e-06, "loss": 0.3731, "step": 21089 }, { "epoch": 2.1441642944286294, "grad_norm": 0.2910807430744171, "learning_rate": 7.052275623054156e-06, "loss": 0.3511, "step": 21090 }, { "epoch": 2.1442659617730784, "grad_norm": 0.2780914902687073, "learning_rate": 7.0519520041737345e-06, "loss": 0.3584, "step": 21091 }, { "epoch": 2.1443676291175273, "grad_norm": 0.2668149471282959, "learning_rate": 7.051628374956064e-06, "loss": 0.3564, "step": 21092 }, { "epoch": 2.1444692964619763, "grad_norm": 0.28657448291778564, "learning_rate": 7.0513047354027765e-06, "loss": 0.3221, "step": 21093 }, { "epoch": 2.144570963806425, "grad_norm": 0.27323096990585327, "learning_rate": 7.050981085515505e-06, "loss": 0.3372, "step": 21094 }, { "epoch": 2.144672631150874, "grad_norm": 0.27863311767578125, "learning_rate": 7.050657425295877e-06, "loss": 0.3227, "step": 21095 }, { "epoch": 2.1447742984953235, "grad_norm": 0.3190145492553711, "learning_rate": 7.050333754745526e-06, "loss": 0.3701, "step": 21096 }, { "epoch": 2.1448759658397725, "grad_norm": 0.28734996914863586, "learning_rate": 7.050010073866079e-06, "loss": 0.3365, "step": 21097 }, { "epoch": 2.1449776331842214, "grad_norm": 0.2913115620613098, "learning_rate": 7.049686382659169e-06, "loss": 0.3228, "step": 21098 }, { "epoch": 2.1450793005286704, "grad_norm": 0.2897360920906067, "learning_rate": 7.049362681126425e-06, "loss": 0.3348, "step": 21099 }, { "epoch": 2.1451809678731193, "grad_norm": 0.284332811832428, "learning_rate": 7.04903896926948e-06, "loss": 0.3147, "step": 21100 }, { "epoch": 2.1452826352175682, "grad_norm": 0.27365222573280334, "learning_rate": 7.048715247089963e-06, "loss": 0.3053, "step": 21101 }, { "epoch": 2.145384302562017, "grad_norm": 0.26717373728752136, "learning_rate": 7.048391514589507e-06, "loss": 0.3266, "step": 21102 }, { "epoch": 2.145485969906466, "grad_norm": 0.29337966442108154, "learning_rate": 7.04806777176974e-06, "loss": 0.3725, "step": 21103 }, { "epoch": 2.145587637250915, "grad_norm": 0.26653778553009033, "learning_rate": 7.047744018632293e-06, "loss": 0.3582, "step": 21104 }, { "epoch": 2.145689304595364, "grad_norm": 0.25586259365081787, "learning_rate": 7.047420255178801e-06, "loss": 0.331, "step": 21105 }, { "epoch": 2.145790971939813, "grad_norm": 0.30060723423957825, "learning_rate": 7.04709648141089e-06, "loss": 0.3605, "step": 21106 }, { "epoch": 2.145892639284262, "grad_norm": 0.2714589834213257, "learning_rate": 7.046772697330195e-06, "loss": 0.3449, "step": 21107 }, { "epoch": 2.145994306628711, "grad_norm": 0.26776883006095886, "learning_rate": 7.046448902938345e-06, "loss": 0.3338, "step": 21108 }, { "epoch": 2.1460959739731598, "grad_norm": 0.2720653712749481, "learning_rate": 7.046125098236971e-06, "loss": 0.3379, "step": 21109 }, { "epoch": 2.1461976413176087, "grad_norm": 0.2635091245174408, "learning_rate": 7.045801283227707e-06, "loss": 0.3399, "step": 21110 }, { "epoch": 2.1462993086620576, "grad_norm": 0.28786763548851013, "learning_rate": 7.0454774579121795e-06, "loss": 0.3302, "step": 21111 }, { "epoch": 2.1464009760065066, "grad_norm": 0.2764352858066559, "learning_rate": 7.045153622292024e-06, "loss": 0.3272, "step": 21112 }, { "epoch": 2.1465026433509555, "grad_norm": 0.27248409390449524, "learning_rate": 7.044829776368871e-06, "loss": 0.3451, "step": 21113 }, { "epoch": 2.1466043106954045, "grad_norm": 0.2909950315952301, "learning_rate": 7.044505920144351e-06, "loss": 0.3463, "step": 21114 }, { "epoch": 2.1467059780398534, "grad_norm": 0.27540767192840576, "learning_rate": 7.044182053620097e-06, "loss": 0.315, "step": 21115 }, { "epoch": 2.1468076453843024, "grad_norm": 0.25221461057662964, "learning_rate": 7.043858176797738e-06, "loss": 0.3367, "step": 21116 }, { "epoch": 2.1469093127287513, "grad_norm": 0.2867267429828644, "learning_rate": 7.0435342896789085e-06, "loss": 0.3464, "step": 21117 }, { "epoch": 2.1470109800732007, "grad_norm": 0.2930445969104767, "learning_rate": 7.0432103922652375e-06, "loss": 0.3209, "step": 21118 }, { "epoch": 2.1471126474176496, "grad_norm": 0.285832017660141, "learning_rate": 7.042886484558358e-06, "loss": 0.3615, "step": 21119 }, { "epoch": 2.1472143147620986, "grad_norm": 0.2833558917045593, "learning_rate": 7.042562566559904e-06, "loss": 0.3299, "step": 21120 }, { "epoch": 2.1473159821065475, "grad_norm": 0.27387940883636475, "learning_rate": 7.0422386382715036e-06, "loss": 0.3672, "step": 21121 }, { "epoch": 2.1474176494509964, "grad_norm": 0.25636613368988037, "learning_rate": 7.041914699694791e-06, "loss": 0.2935, "step": 21122 }, { "epoch": 2.1475193167954454, "grad_norm": 0.25930359959602356, "learning_rate": 7.041590750831396e-06, "loss": 0.372, "step": 21123 }, { "epoch": 2.1476209841398943, "grad_norm": 0.2860265374183655, "learning_rate": 7.041266791682953e-06, "loss": 0.3332, "step": 21124 }, { "epoch": 2.1477226514843433, "grad_norm": 0.263704776763916, "learning_rate": 7.040942822251093e-06, "loss": 0.3237, "step": 21125 }, { "epoch": 2.147824318828792, "grad_norm": 0.27779799699783325, "learning_rate": 7.040618842537448e-06, "loss": 0.3215, "step": 21126 }, { "epoch": 2.147925986173241, "grad_norm": 0.28152453899383545, "learning_rate": 7.04029485254365e-06, "loss": 0.3151, "step": 21127 }, { "epoch": 2.14802765351769, "grad_norm": 0.28075772523880005, "learning_rate": 7.039970852271332e-06, "loss": 0.3798, "step": 21128 }, { "epoch": 2.148129320862139, "grad_norm": 0.2871536910533905, "learning_rate": 7.039646841722124e-06, "loss": 0.3219, "step": 21129 }, { "epoch": 2.148230988206588, "grad_norm": 0.3169865012168884, "learning_rate": 7.039322820897662e-06, "loss": 0.3179, "step": 21130 }, { "epoch": 2.148332655551037, "grad_norm": 0.28334173560142517, "learning_rate": 7.038998789799575e-06, "loss": 0.3391, "step": 21131 }, { "epoch": 2.148434322895486, "grad_norm": 0.2628425657749176, "learning_rate": 7.038674748429497e-06, "loss": 0.3179, "step": 21132 }, { "epoch": 2.148535990239935, "grad_norm": 0.2720596194267273, "learning_rate": 7.0383506967890604e-06, "loss": 0.3421, "step": 21133 }, { "epoch": 2.1486376575843837, "grad_norm": 0.3051072955131531, "learning_rate": 7.038026634879895e-06, "loss": 0.3289, "step": 21134 }, { "epoch": 2.1487393249288327, "grad_norm": 0.2679887115955353, "learning_rate": 7.037702562703639e-06, "loss": 0.3049, "step": 21135 }, { "epoch": 2.1488409922732816, "grad_norm": 0.2991240620613098, "learning_rate": 7.037378480261921e-06, "loss": 0.3546, "step": 21136 }, { "epoch": 2.148942659617731, "grad_norm": 0.28115108609199524, "learning_rate": 7.037054387556373e-06, "loss": 0.3641, "step": 21137 }, { "epoch": 2.14904432696218, "grad_norm": 0.2734858989715576, "learning_rate": 7.0367302845886295e-06, "loss": 0.3208, "step": 21138 }, { "epoch": 2.149145994306629, "grad_norm": 0.283523291349411, "learning_rate": 7.036406171360324e-06, "loss": 0.3358, "step": 21139 }, { "epoch": 2.149247661651078, "grad_norm": 0.2944009304046631, "learning_rate": 7.036082047873088e-06, "loss": 0.3301, "step": 21140 }, { "epoch": 2.1493493289955268, "grad_norm": 0.2826813757419586, "learning_rate": 7.0357579141285526e-06, "loss": 0.3475, "step": 21141 }, { "epoch": 2.1494509963399757, "grad_norm": 0.2806003987789154, "learning_rate": 7.035433770128355e-06, "loss": 0.3342, "step": 21142 }, { "epoch": 2.1495526636844247, "grad_norm": 0.2686726748943329, "learning_rate": 7.035109615874122e-06, "loss": 0.378, "step": 21143 }, { "epoch": 2.1496543310288736, "grad_norm": 0.26924291253089905, "learning_rate": 7.034785451367494e-06, "loss": 0.3286, "step": 21144 }, { "epoch": 2.1497559983733225, "grad_norm": 0.2689150869846344, "learning_rate": 7.034461276610099e-06, "loss": 0.3356, "step": 21145 }, { "epoch": 2.1498576657177715, "grad_norm": 0.27036046981811523, "learning_rate": 7.034137091603572e-06, "loss": 0.3169, "step": 21146 }, { "epoch": 2.1499593330622204, "grad_norm": 0.269582599401474, "learning_rate": 7.0338128963495464e-06, "loss": 0.3468, "step": 21147 }, { "epoch": 2.1500610004066694, "grad_norm": 0.26086872816085815, "learning_rate": 7.033488690849654e-06, "loss": 0.3222, "step": 21148 }, { "epoch": 2.1501626677511183, "grad_norm": 0.27537474036216736, "learning_rate": 7.033164475105528e-06, "loss": 0.3039, "step": 21149 }, { "epoch": 2.1502643350955672, "grad_norm": 0.25534072518348694, "learning_rate": 7.0328402491188044e-06, "loss": 0.3206, "step": 21150 }, { "epoch": 2.150366002440016, "grad_norm": 0.28485432267189026, "learning_rate": 7.032516012891113e-06, "loss": 0.3421, "step": 21151 }, { "epoch": 2.150467669784465, "grad_norm": 0.2579112946987152, "learning_rate": 7.03219176642409e-06, "loss": 0.3267, "step": 21152 }, { "epoch": 2.150569337128914, "grad_norm": 0.27168822288513184, "learning_rate": 7.0318675097193675e-06, "loss": 0.3354, "step": 21153 }, { "epoch": 2.150671004473363, "grad_norm": 0.2817923426628113, "learning_rate": 7.031543242778578e-06, "loss": 0.3323, "step": 21154 }, { "epoch": 2.150772671817812, "grad_norm": 0.2579992711544037, "learning_rate": 7.031218965603358e-06, "loss": 0.3491, "step": 21155 }, { "epoch": 2.150874339162261, "grad_norm": 0.27167898416519165, "learning_rate": 7.030894678195339e-06, "loss": 0.3833, "step": 21156 }, { "epoch": 2.15097600650671, "grad_norm": 0.2776240408420563, "learning_rate": 7.0305703805561565e-06, "loss": 0.3463, "step": 21157 }, { "epoch": 2.151077673851159, "grad_norm": 0.2850412428379059, "learning_rate": 7.030246072687442e-06, "loss": 0.33, "step": 21158 }, { "epoch": 2.151179341195608, "grad_norm": 0.269609272480011, "learning_rate": 7.0299217545908285e-06, "loss": 0.3133, "step": 21159 }, { "epoch": 2.151281008540057, "grad_norm": 0.2859114408493042, "learning_rate": 7.029597426267953e-06, "loss": 0.3139, "step": 21160 }, { "epoch": 2.151382675884506, "grad_norm": 0.2730325162410736, "learning_rate": 7.029273087720448e-06, "loss": 0.3391, "step": 21161 }, { "epoch": 2.151484343228955, "grad_norm": 0.26516443490982056, "learning_rate": 7.028948738949949e-06, "loss": 0.3458, "step": 21162 }, { "epoch": 2.151586010573404, "grad_norm": 0.2561061680316925, "learning_rate": 7.028624379958087e-06, "loss": 0.3097, "step": 21163 }, { "epoch": 2.151687677917853, "grad_norm": 0.2630572021007538, "learning_rate": 7.028300010746496e-06, "loss": 0.3458, "step": 21164 }, { "epoch": 2.151789345262302, "grad_norm": 0.3039877414703369, "learning_rate": 7.027975631316814e-06, "loss": 0.2974, "step": 21165 }, { "epoch": 2.1518910126067508, "grad_norm": 0.29027658700942993, "learning_rate": 7.027651241670671e-06, "loss": 0.3332, "step": 21166 }, { "epoch": 2.1519926799511997, "grad_norm": 0.26476195454597473, "learning_rate": 7.027326841809704e-06, "loss": 0.3076, "step": 21167 }, { "epoch": 2.1520943472956486, "grad_norm": 0.27373450994491577, "learning_rate": 7.0270024317355455e-06, "loss": 0.32, "step": 21168 }, { "epoch": 2.1521960146400976, "grad_norm": 0.26880601048469543, "learning_rate": 7.02667801144983e-06, "loss": 0.3275, "step": 21169 }, { "epoch": 2.1522976819845465, "grad_norm": 0.28665608167648315, "learning_rate": 7.026353580954193e-06, "loss": 0.3253, "step": 21170 }, { "epoch": 2.1523993493289955, "grad_norm": 0.2768803536891937, "learning_rate": 7.026029140250269e-06, "loss": 0.3321, "step": 21171 }, { "epoch": 2.1525010166734444, "grad_norm": 0.2778884470462799, "learning_rate": 7.02570468933969e-06, "loss": 0.3278, "step": 21172 }, { "epoch": 2.1526026840178933, "grad_norm": 0.259123295545578, "learning_rate": 7.025380228224093e-06, "loss": 0.3079, "step": 21173 }, { "epoch": 2.1527043513623423, "grad_norm": 0.27699363231658936, "learning_rate": 7.0250557569051105e-06, "loss": 0.3539, "step": 21174 }, { "epoch": 2.1528060187067912, "grad_norm": 0.26375797390937805, "learning_rate": 7.02473127538438e-06, "loss": 0.3285, "step": 21175 }, { "epoch": 2.15290768605124, "grad_norm": 0.2724856436252594, "learning_rate": 7.024406783663536e-06, "loss": 0.3342, "step": 21176 }, { "epoch": 2.153009353395689, "grad_norm": 0.2748843729496002, "learning_rate": 7.024082281744208e-06, "loss": 0.3496, "step": 21177 }, { "epoch": 2.1531110207401385, "grad_norm": 0.2786892056465149, "learning_rate": 7.023757769628037e-06, "loss": 0.3695, "step": 21178 }, { "epoch": 2.1532126880845874, "grad_norm": 0.2501700818538666, "learning_rate": 7.023433247316652e-06, "loss": 0.3282, "step": 21179 }, { "epoch": 2.1533143554290364, "grad_norm": 0.24963310360908508, "learning_rate": 7.023108714811695e-06, "loss": 0.3658, "step": 21180 }, { "epoch": 2.1534160227734853, "grad_norm": 0.2630152404308319, "learning_rate": 7.022784172114796e-06, "loss": 0.3278, "step": 21181 }, { "epoch": 2.1535176901179343, "grad_norm": 0.2772807776927948, "learning_rate": 7.0224596192275895e-06, "loss": 0.3437, "step": 21182 }, { "epoch": 2.153619357462383, "grad_norm": 0.31590723991394043, "learning_rate": 7.022135056151714e-06, "loss": 0.3298, "step": 21183 }, { "epoch": 2.153721024806832, "grad_norm": 0.2671492397785187, "learning_rate": 7.0218104828888e-06, "loss": 0.3661, "step": 21184 }, { "epoch": 2.153822692151281, "grad_norm": 0.26654571294784546, "learning_rate": 7.021485899440488e-06, "loss": 0.3095, "step": 21185 }, { "epoch": 2.15392435949573, "grad_norm": 0.2723398804664612, "learning_rate": 7.02116130580841e-06, "loss": 0.3087, "step": 21186 }, { "epoch": 2.154026026840179, "grad_norm": 0.28601640462875366, "learning_rate": 7.0208367019942005e-06, "loss": 0.3804, "step": 21187 }, { "epoch": 2.154127694184628, "grad_norm": 0.2641673982143402, "learning_rate": 7.020512087999497e-06, "loss": 0.3467, "step": 21188 }, { "epoch": 2.154229361529077, "grad_norm": 0.26968005299568176, "learning_rate": 7.020187463825933e-06, "loss": 0.3313, "step": 21189 }, { "epoch": 2.154331028873526, "grad_norm": 0.2849798798561096, "learning_rate": 7.019862829475145e-06, "loss": 0.351, "step": 21190 }, { "epoch": 2.1544326962179747, "grad_norm": 0.27616390585899353, "learning_rate": 7.0195381849487674e-06, "loss": 0.3395, "step": 21191 }, { "epoch": 2.1545343635624237, "grad_norm": 0.2689572274684906, "learning_rate": 7.019213530248437e-06, "loss": 0.3794, "step": 21192 }, { "epoch": 2.1546360309068726, "grad_norm": 0.2750479578971863, "learning_rate": 7.018888865375789e-06, "loss": 0.337, "step": 21193 }, { "epoch": 2.1547376982513216, "grad_norm": 0.2724347710609436, "learning_rate": 7.018564190332458e-06, "loss": 0.3113, "step": 21194 }, { "epoch": 2.1548393655957705, "grad_norm": 0.29444801807403564, "learning_rate": 7.018239505120079e-06, "loss": 0.3487, "step": 21195 }, { "epoch": 2.1549410329402194, "grad_norm": 0.2691079080104828, "learning_rate": 7.0179148097402916e-06, "loss": 0.3231, "step": 21196 }, { "epoch": 2.1550427002846684, "grad_norm": 0.26031726598739624, "learning_rate": 7.017590104194727e-06, "loss": 0.3312, "step": 21197 }, { "epoch": 2.1551443676291173, "grad_norm": 0.2779568135738373, "learning_rate": 7.017265388485025e-06, "loss": 0.3566, "step": 21198 }, { "epoch": 2.1552460349735663, "grad_norm": 0.2922775447368622, "learning_rate": 7.0169406626128165e-06, "loss": 0.3641, "step": 21199 }, { "epoch": 2.1553477023180156, "grad_norm": 0.2790347635746002, "learning_rate": 7.016615926579742e-06, "loss": 0.3195, "step": 21200 }, { "epoch": 2.1554493696624646, "grad_norm": 0.266626238822937, "learning_rate": 7.016291180387435e-06, "loss": 0.326, "step": 21201 }, { "epoch": 2.1555510370069135, "grad_norm": 0.26296573877334595, "learning_rate": 7.015966424037533e-06, "loss": 0.3041, "step": 21202 }, { "epoch": 2.1556527043513625, "grad_norm": 0.2777504622936249, "learning_rate": 7.0156416575316724e-06, "loss": 0.3321, "step": 21203 }, { "epoch": 2.1557543716958114, "grad_norm": 0.2710401713848114, "learning_rate": 7.015316880871486e-06, "loss": 0.3749, "step": 21204 }, { "epoch": 2.1558560390402604, "grad_norm": 0.26602083444595337, "learning_rate": 7.014992094058612e-06, "loss": 0.3217, "step": 21205 }, { "epoch": 2.1559577063847093, "grad_norm": 0.2676413953304291, "learning_rate": 7.014667297094688e-06, "loss": 0.3193, "step": 21206 }, { "epoch": 2.1560593737291582, "grad_norm": 0.2708097994327545, "learning_rate": 7.014342489981347e-06, "loss": 0.3405, "step": 21207 }, { "epoch": 2.156161041073607, "grad_norm": 0.25038769841194153, "learning_rate": 7.014017672720229e-06, "loss": 0.3159, "step": 21208 }, { "epoch": 2.156262708418056, "grad_norm": 0.2940559685230255, "learning_rate": 7.013692845312968e-06, "loss": 0.3737, "step": 21209 }, { "epoch": 2.156364375762505, "grad_norm": 0.28476449847221375, "learning_rate": 7.013368007761201e-06, "loss": 0.3674, "step": 21210 }, { "epoch": 2.156466043106954, "grad_norm": 0.28272494673728943, "learning_rate": 7.013043160066565e-06, "loss": 0.3452, "step": 21211 }, { "epoch": 2.156567710451403, "grad_norm": 0.29178324341773987, "learning_rate": 7.012718302230694e-06, "loss": 0.3342, "step": 21212 }, { "epoch": 2.156669377795852, "grad_norm": 0.2726077437400818, "learning_rate": 7.012393434255229e-06, "loss": 0.344, "step": 21213 }, { "epoch": 2.156771045140301, "grad_norm": 0.24819335341453552, "learning_rate": 7.012068556141803e-06, "loss": 0.3307, "step": 21214 }, { "epoch": 2.1568727124847498, "grad_norm": 0.2859109342098236, "learning_rate": 7.011743667892052e-06, "loss": 0.3332, "step": 21215 }, { "epoch": 2.1569743798291987, "grad_norm": 0.2803718149662018, "learning_rate": 7.011418769507618e-06, "loss": 0.3449, "step": 21216 }, { "epoch": 2.1570760471736476, "grad_norm": 0.2749180197715759, "learning_rate": 7.011093860990131e-06, "loss": 0.3364, "step": 21217 }, { "epoch": 2.1571777145180966, "grad_norm": 0.2647593915462494, "learning_rate": 7.010768942341233e-06, "loss": 0.3405, "step": 21218 }, { "epoch": 2.157279381862546, "grad_norm": 0.2842855453491211, "learning_rate": 7.010444013562559e-06, "loss": 0.3567, "step": 21219 }, { "epoch": 2.157381049206995, "grad_norm": 0.30549749732017517, "learning_rate": 7.010119074655744e-06, "loss": 0.3256, "step": 21220 }, { "epoch": 2.157482716551444, "grad_norm": 0.2477152943611145, "learning_rate": 7.0097941256224286e-06, "loss": 0.3514, "step": 21221 }, { "epoch": 2.157584383895893, "grad_norm": 0.2832397520542145, "learning_rate": 7.009469166464247e-06, "loss": 0.2985, "step": 21222 }, { "epoch": 2.1576860512403417, "grad_norm": 0.3151649832725525, "learning_rate": 7.009144197182838e-06, "loss": 0.3328, "step": 21223 }, { "epoch": 2.1577877185847907, "grad_norm": 0.2634396255016327, "learning_rate": 7.008819217779836e-06, "loss": 0.3133, "step": 21224 }, { "epoch": 2.1578893859292396, "grad_norm": 0.2698274850845337, "learning_rate": 7.008494228256883e-06, "loss": 0.3391, "step": 21225 }, { "epoch": 2.1579910532736886, "grad_norm": 0.2635050415992737, "learning_rate": 7.008169228615611e-06, "loss": 0.318, "step": 21226 }, { "epoch": 2.1580927206181375, "grad_norm": 0.27709561586380005, "learning_rate": 7.007844218857661e-06, "loss": 0.3492, "step": 21227 }, { "epoch": 2.1581943879625864, "grad_norm": 0.2922958433628082, "learning_rate": 7.007519198984669e-06, "loss": 0.3311, "step": 21228 }, { "epoch": 2.1582960553070354, "grad_norm": 0.29485324025154114, "learning_rate": 7.007194168998273e-06, "loss": 0.3297, "step": 21229 }, { "epoch": 2.1583977226514843, "grad_norm": 0.29588839411735535, "learning_rate": 7.006869128900107e-06, "loss": 0.3313, "step": 21230 }, { "epoch": 2.1584993899959333, "grad_norm": 0.2575368583202362, "learning_rate": 7.006544078691814e-06, "loss": 0.331, "step": 21231 }, { "epoch": 2.158601057340382, "grad_norm": 0.2755688428878784, "learning_rate": 7.0062190183750265e-06, "loss": 0.3178, "step": 21232 }, { "epoch": 2.158702724684831, "grad_norm": 0.28653690218925476, "learning_rate": 7.005893947951387e-06, "loss": 0.3575, "step": 21233 }, { "epoch": 2.15880439202928, "grad_norm": 0.275413453578949, "learning_rate": 7.00556886742253e-06, "loss": 0.3224, "step": 21234 }, { "epoch": 2.158906059373729, "grad_norm": 0.25983747839927673, "learning_rate": 7.00524377679009e-06, "loss": 0.3371, "step": 21235 }, { "epoch": 2.159007726718178, "grad_norm": 0.2872692346572876, "learning_rate": 7.004918676055712e-06, "loss": 0.3429, "step": 21236 }, { "epoch": 2.159109394062627, "grad_norm": 0.2560505270957947, "learning_rate": 7.004593565221029e-06, "loss": 0.3533, "step": 21237 }, { "epoch": 2.159211061407076, "grad_norm": 0.2731684744358063, "learning_rate": 7.004268444287681e-06, "loss": 0.3165, "step": 21238 }, { "epoch": 2.159312728751525, "grad_norm": 0.26918894052505493, "learning_rate": 7.003943313257304e-06, "loss": 0.3449, "step": 21239 }, { "epoch": 2.1594143960959737, "grad_norm": 0.29904505610466003, "learning_rate": 7.003618172131535e-06, "loss": 0.3198, "step": 21240 }, { "epoch": 2.159516063440423, "grad_norm": 0.27474451065063477, "learning_rate": 7.0032930209120165e-06, "loss": 0.3449, "step": 21241 }, { "epoch": 2.159617730784872, "grad_norm": 0.31159964203834534, "learning_rate": 7.002967859600382e-06, "loss": 0.3449, "step": 21242 }, { "epoch": 2.159719398129321, "grad_norm": 0.29660582542419434, "learning_rate": 7.002642688198274e-06, "loss": 0.3333, "step": 21243 }, { "epoch": 2.15982106547377, "grad_norm": 0.29398688673973083, "learning_rate": 7.002317506707327e-06, "loss": 0.3508, "step": 21244 }, { "epoch": 2.159922732818219, "grad_norm": 0.2632133960723877, "learning_rate": 7.001992315129178e-06, "loss": 0.3094, "step": 21245 }, { "epoch": 2.160024400162668, "grad_norm": 0.2744736969470978, "learning_rate": 7.00166711346547e-06, "loss": 0.3168, "step": 21246 }, { "epoch": 2.1601260675071168, "grad_norm": 0.3001425862312317, "learning_rate": 7.001341901717839e-06, "loss": 0.3276, "step": 21247 }, { "epoch": 2.1602277348515657, "grad_norm": 0.2715986669063568, "learning_rate": 7.0010166798879205e-06, "loss": 0.321, "step": 21248 }, { "epoch": 2.1603294021960147, "grad_norm": 0.273958295583725, "learning_rate": 7.000691447977358e-06, "loss": 0.3453, "step": 21249 }, { "epoch": 2.1604310695404636, "grad_norm": 0.28553155064582825, "learning_rate": 7.0003662059877845e-06, "loss": 0.3383, "step": 21250 }, { "epoch": 2.1605327368849125, "grad_norm": 0.28952738642692566, "learning_rate": 7.000040953920844e-06, "loss": 0.3345, "step": 21251 }, { "epoch": 2.1606344042293615, "grad_norm": 0.2841118276119232, "learning_rate": 6.999715691778173e-06, "loss": 0.3003, "step": 21252 }, { "epoch": 2.1607360715738104, "grad_norm": 0.2663961946964264, "learning_rate": 6.999390419561407e-06, "loss": 0.3375, "step": 21253 }, { "epoch": 2.1608377389182594, "grad_norm": 0.2950792908668518, "learning_rate": 6.9990651372721895e-06, "loss": 0.3536, "step": 21254 }, { "epoch": 2.1609394062627083, "grad_norm": 0.25740277767181396, "learning_rate": 6.998739844912153e-06, "loss": 0.3303, "step": 21255 }, { "epoch": 2.1610410736071572, "grad_norm": 0.26457419991493225, "learning_rate": 6.9984145424829445e-06, "loss": 0.3243, "step": 21256 }, { "epoch": 2.161142740951606, "grad_norm": 0.29855507612228394, "learning_rate": 6.998089229986198e-06, "loss": 0.3218, "step": 21257 }, { "epoch": 2.161244408296055, "grad_norm": 0.29652491211891174, "learning_rate": 6.99776390742355e-06, "loss": 0.324, "step": 21258 }, { "epoch": 2.161346075640504, "grad_norm": 0.2724890410900116, "learning_rate": 6.997438574796644e-06, "loss": 0.3139, "step": 21259 }, { "epoch": 2.1614477429849535, "grad_norm": 0.2829737961292267, "learning_rate": 6.997113232107117e-06, "loss": 0.3523, "step": 21260 }, { "epoch": 2.1615494103294024, "grad_norm": 0.27913838624954224, "learning_rate": 6.996787879356608e-06, "loss": 0.3095, "step": 21261 }, { "epoch": 2.1616510776738513, "grad_norm": 0.2806297838687897, "learning_rate": 6.996462516546757e-06, "loss": 0.3207, "step": 21262 }, { "epoch": 2.1617527450183003, "grad_norm": 0.2653152048587799, "learning_rate": 6.996137143679201e-06, "loss": 0.317, "step": 21263 }, { "epoch": 2.1618544123627492, "grad_norm": 0.2951880395412445, "learning_rate": 6.995811760755582e-06, "loss": 0.3666, "step": 21264 }, { "epoch": 2.161956079707198, "grad_norm": 0.2642444372177124, "learning_rate": 6.995486367777535e-06, "loss": 0.3259, "step": 21265 }, { "epoch": 2.162057747051647, "grad_norm": 0.2832813560962677, "learning_rate": 6.995160964746703e-06, "loss": 0.3232, "step": 21266 }, { "epoch": 2.162159414396096, "grad_norm": 0.2795881927013397, "learning_rate": 6.994835551664726e-06, "loss": 0.3201, "step": 21267 }, { "epoch": 2.162261081740545, "grad_norm": 0.26456892490386963, "learning_rate": 6.99451012853324e-06, "loss": 0.3192, "step": 21268 }, { "epoch": 2.162362749084994, "grad_norm": 0.29815512895584106, "learning_rate": 6.994184695353887e-06, "loss": 0.391, "step": 21269 }, { "epoch": 2.162464416429443, "grad_norm": 0.26008033752441406, "learning_rate": 6.9938592521283035e-06, "loss": 0.3325, "step": 21270 }, { "epoch": 2.162566083773892, "grad_norm": 0.2945593595504761, "learning_rate": 6.993533798858132e-06, "loss": 0.3791, "step": 21271 }, { "epoch": 2.1626677511183408, "grad_norm": 0.26552239060401917, "learning_rate": 6.993208335545012e-06, "loss": 0.3351, "step": 21272 }, { "epoch": 2.1627694184627897, "grad_norm": 0.2797061800956726, "learning_rate": 6.99288286219058e-06, "loss": 0.3347, "step": 21273 }, { "epoch": 2.1628710858072386, "grad_norm": 0.28163784742355347, "learning_rate": 6.9925573787964794e-06, "loss": 0.354, "step": 21274 }, { "epoch": 2.1629727531516876, "grad_norm": 0.2784065008163452, "learning_rate": 6.992231885364347e-06, "loss": 0.3333, "step": 21275 }, { "epoch": 2.1630744204961365, "grad_norm": 0.2744826376438141, "learning_rate": 6.991906381895823e-06, "loss": 0.3102, "step": 21276 }, { "epoch": 2.1631760878405855, "grad_norm": 0.26572561264038086, "learning_rate": 6.991580868392551e-06, "loss": 0.3337, "step": 21277 }, { "epoch": 2.1632777551850344, "grad_norm": 0.2524123787879944, "learning_rate": 6.991255344856165e-06, "loss": 0.3131, "step": 21278 }, { "epoch": 2.1633794225294833, "grad_norm": 0.26997387409210205, "learning_rate": 6.99092981128831e-06, "loss": 0.345, "step": 21279 }, { "epoch": 2.1634810898739323, "grad_norm": 0.28393736481666565, "learning_rate": 6.9906042676906215e-06, "loss": 0.3228, "step": 21280 }, { "epoch": 2.1635827572183812, "grad_norm": 0.2632281184196472, "learning_rate": 6.990278714064743e-06, "loss": 0.3435, "step": 21281 }, { "epoch": 2.1636844245628306, "grad_norm": 0.2807316780090332, "learning_rate": 6.989953150412314e-06, "loss": 0.3392, "step": 21282 }, { "epoch": 2.1637860919072796, "grad_norm": 0.27553242444992065, "learning_rate": 6.989627576734972e-06, "loss": 0.309, "step": 21283 }, { "epoch": 2.1638877592517285, "grad_norm": 0.2829797863960266, "learning_rate": 6.989301993034361e-06, "loss": 0.3361, "step": 21284 }, { "epoch": 2.1639894265961774, "grad_norm": 0.2763488292694092, "learning_rate": 6.988976399312118e-06, "loss": 0.3561, "step": 21285 }, { "epoch": 2.1640910939406264, "grad_norm": 0.26731055974960327, "learning_rate": 6.988650795569885e-06, "loss": 0.3657, "step": 21286 }, { "epoch": 2.1641927612850753, "grad_norm": 0.2797169089317322, "learning_rate": 6.988325181809302e-06, "loss": 0.331, "step": 21287 }, { "epoch": 2.1642944286295243, "grad_norm": 0.27269434928894043, "learning_rate": 6.9879995580320095e-06, "loss": 0.3359, "step": 21288 }, { "epoch": 2.164396095973973, "grad_norm": 0.30307701230049133, "learning_rate": 6.9876739242396475e-06, "loss": 0.321, "step": 21289 }, { "epoch": 2.164497763318422, "grad_norm": 0.26397958397865295, "learning_rate": 6.987348280433855e-06, "loss": 0.2891, "step": 21290 }, { "epoch": 2.164599430662871, "grad_norm": 0.2839908301830292, "learning_rate": 6.987022626616276e-06, "loss": 0.3374, "step": 21291 }, { "epoch": 2.16470109800732, "grad_norm": 0.2710629999637604, "learning_rate": 6.98669696278855e-06, "loss": 0.3725, "step": 21292 }, { "epoch": 2.164802765351769, "grad_norm": 0.26952990889549255, "learning_rate": 6.986371288952314e-06, "loss": 0.3436, "step": 21293 }, { "epoch": 2.164904432696218, "grad_norm": 0.2796347141265869, "learning_rate": 6.986045605109214e-06, "loss": 0.3428, "step": 21294 }, { "epoch": 2.165006100040667, "grad_norm": 0.31096959114074707, "learning_rate": 6.985719911260887e-06, "loss": 0.3538, "step": 21295 }, { "epoch": 2.165107767385116, "grad_norm": 0.26052024960517883, "learning_rate": 6.9853942074089745e-06, "loss": 0.3051, "step": 21296 }, { "epoch": 2.1652094347295647, "grad_norm": 0.2768703103065491, "learning_rate": 6.985068493555119e-06, "loss": 0.3299, "step": 21297 }, { "epoch": 2.1653111020740137, "grad_norm": 0.28064417839050293, "learning_rate": 6.984742769700959e-06, "loss": 0.3281, "step": 21298 }, { "epoch": 2.1654127694184626, "grad_norm": 0.26261308789253235, "learning_rate": 6.984417035848138e-06, "loss": 0.3485, "step": 21299 }, { "epoch": 2.1655144367629116, "grad_norm": 0.2653888165950775, "learning_rate": 6.984091291998294e-06, "loss": 0.3174, "step": 21300 }, { "epoch": 2.165616104107361, "grad_norm": 0.2799551784992218, "learning_rate": 6.98376553815307e-06, "loss": 0.3128, "step": 21301 }, { "epoch": 2.16571777145181, "grad_norm": 0.3050009608268738, "learning_rate": 6.983439774314108e-06, "loss": 0.3581, "step": 21302 }, { "epoch": 2.165819438796259, "grad_norm": 0.27600592374801636, "learning_rate": 6.983114000483045e-06, "loss": 0.3042, "step": 21303 }, { "epoch": 2.1659211061407078, "grad_norm": 0.28400540351867676, "learning_rate": 6.9827882166615276e-06, "loss": 0.3311, "step": 21304 }, { "epoch": 2.1660227734851567, "grad_norm": 0.27351707220077515, "learning_rate": 6.982462422851195e-06, "loss": 0.3285, "step": 21305 }, { "epoch": 2.1661244408296056, "grad_norm": 0.2986346483230591, "learning_rate": 6.9821366190536845e-06, "loss": 0.2988, "step": 21306 }, { "epoch": 2.1662261081740546, "grad_norm": 0.275269478559494, "learning_rate": 6.981810805270642e-06, "loss": 0.3676, "step": 21307 }, { "epoch": 2.1663277755185035, "grad_norm": 0.28561145067214966, "learning_rate": 6.981484981503708e-06, "loss": 0.3516, "step": 21308 }, { "epoch": 2.1664294428629525, "grad_norm": 0.2799578011035919, "learning_rate": 6.981159147754524e-06, "loss": 0.3176, "step": 21309 }, { "epoch": 2.1665311102074014, "grad_norm": 0.2918930649757385, "learning_rate": 6.9808333040247315e-06, "loss": 0.3244, "step": 21310 }, { "epoch": 2.1666327775518504, "grad_norm": 0.26858678460121155, "learning_rate": 6.98050745031597e-06, "loss": 0.3177, "step": 21311 }, { "epoch": 2.1667344448962993, "grad_norm": 0.26811274886131287, "learning_rate": 6.980181586629883e-06, "loss": 0.3511, "step": 21312 }, { "epoch": 2.1668361122407482, "grad_norm": 0.2766209542751312, "learning_rate": 6.979855712968111e-06, "loss": 0.3297, "step": 21313 }, { "epoch": 2.166937779585197, "grad_norm": 0.281564325094223, "learning_rate": 6.979529829332299e-06, "loss": 0.3642, "step": 21314 }, { "epoch": 2.167039446929646, "grad_norm": 0.2661148011684418, "learning_rate": 6.979203935724085e-06, "loss": 0.3355, "step": 21315 }, { "epoch": 2.167141114274095, "grad_norm": 0.26567697525024414, "learning_rate": 6.978878032145109e-06, "loss": 0.3187, "step": 21316 }, { "epoch": 2.167242781618544, "grad_norm": 0.2828123867511749, "learning_rate": 6.97855211859702e-06, "loss": 0.3678, "step": 21317 }, { "epoch": 2.167344448962993, "grad_norm": 0.2819407880306244, "learning_rate": 6.978226195081452e-06, "loss": 0.3177, "step": 21318 }, { "epoch": 2.167446116307442, "grad_norm": 0.2936283349990845, "learning_rate": 6.977900261600053e-06, "loss": 0.3505, "step": 21319 }, { "epoch": 2.167547783651891, "grad_norm": 0.2683270275592804, "learning_rate": 6.977574318154463e-06, "loss": 0.3578, "step": 21320 }, { "epoch": 2.1676494509963398, "grad_norm": 0.29247918725013733, "learning_rate": 6.977248364746321e-06, "loss": 0.3299, "step": 21321 }, { "epoch": 2.1677511183407887, "grad_norm": 0.26488491892814636, "learning_rate": 6.976922401377273e-06, "loss": 0.3153, "step": 21322 }, { "epoch": 2.167852785685238, "grad_norm": 0.2639237642288208, "learning_rate": 6.976596428048961e-06, "loss": 0.3195, "step": 21323 }, { "epoch": 2.167954453029687, "grad_norm": 0.2844879925251007, "learning_rate": 6.976270444763023e-06, "loss": 0.3319, "step": 21324 }, { "epoch": 2.168056120374136, "grad_norm": 0.2928234338760376, "learning_rate": 6.9759444515211065e-06, "loss": 0.3587, "step": 21325 }, { "epoch": 2.168157787718585, "grad_norm": 0.2905900478363037, "learning_rate": 6.975618448324849e-06, "loss": 0.33, "step": 21326 }, { "epoch": 2.168259455063034, "grad_norm": 0.2979491949081421, "learning_rate": 6.975292435175898e-06, "loss": 0.3284, "step": 21327 }, { "epoch": 2.168361122407483, "grad_norm": 0.27209433913230896, "learning_rate": 6.974966412075892e-06, "loss": 0.3431, "step": 21328 }, { "epoch": 2.1684627897519317, "grad_norm": 0.2957693934440613, "learning_rate": 6.974640379026474e-06, "loss": 0.3209, "step": 21329 }, { "epoch": 2.1685644570963807, "grad_norm": 0.2708059251308441, "learning_rate": 6.974314336029288e-06, "loss": 0.333, "step": 21330 }, { "epoch": 2.1686661244408296, "grad_norm": 0.28008532524108887, "learning_rate": 6.973988283085973e-06, "loss": 0.2881, "step": 21331 }, { "epoch": 2.1687677917852786, "grad_norm": 0.26570868492126465, "learning_rate": 6.973662220198177e-06, "loss": 0.3563, "step": 21332 }, { "epoch": 2.1688694591297275, "grad_norm": 0.2540196478366852, "learning_rate": 6.97333614736754e-06, "loss": 0.3442, "step": 21333 }, { "epoch": 2.1689711264741764, "grad_norm": 0.2698955833911896, "learning_rate": 6.973010064595702e-06, "loss": 0.3395, "step": 21334 }, { "epoch": 2.1690727938186254, "grad_norm": 0.2953532338142395, "learning_rate": 6.972683971884311e-06, "loss": 0.3243, "step": 21335 }, { "epoch": 2.1691744611630743, "grad_norm": 0.2876345217227936, "learning_rate": 6.972357869235003e-06, "loss": 0.3468, "step": 21336 }, { "epoch": 2.1692761285075233, "grad_norm": 0.27056366205215454, "learning_rate": 6.972031756649428e-06, "loss": 0.3367, "step": 21337 }, { "epoch": 2.169377795851972, "grad_norm": 0.26985618472099304, "learning_rate": 6.9717056341292255e-06, "loss": 0.3456, "step": 21338 }, { "epoch": 2.169479463196421, "grad_norm": 0.2718859016895294, "learning_rate": 6.971379501676038e-06, "loss": 0.3298, "step": 21339 }, { "epoch": 2.16958113054087, "grad_norm": 0.2850606143474579, "learning_rate": 6.971053359291509e-06, "loss": 0.3679, "step": 21340 }, { "epoch": 2.169682797885319, "grad_norm": 0.26644814014434814, "learning_rate": 6.97072720697728e-06, "loss": 0.3366, "step": 21341 }, { "epoch": 2.1697844652297684, "grad_norm": 0.2593720257282257, "learning_rate": 6.970401044734998e-06, "loss": 0.3448, "step": 21342 }, { "epoch": 2.1698861325742174, "grad_norm": 0.29554033279418945, "learning_rate": 6.970074872566303e-06, "loss": 0.3162, "step": 21343 }, { "epoch": 2.1699877999186663, "grad_norm": 0.2905047833919525, "learning_rate": 6.969748690472838e-06, "loss": 0.3588, "step": 21344 }, { "epoch": 2.1700894672631152, "grad_norm": 0.26991134881973267, "learning_rate": 6.969422498456249e-06, "loss": 0.2952, "step": 21345 }, { "epoch": 2.170191134607564, "grad_norm": 0.27665767073631287, "learning_rate": 6.969096296518177e-06, "loss": 0.338, "step": 21346 }, { "epoch": 2.170292801952013, "grad_norm": 0.2997000813484192, "learning_rate": 6.9687700846602634e-06, "loss": 0.3336, "step": 21347 }, { "epoch": 2.170394469296462, "grad_norm": 0.284949392080307, "learning_rate": 6.968443862884157e-06, "loss": 0.3233, "step": 21348 }, { "epoch": 2.170496136640911, "grad_norm": 0.26197850704193115, "learning_rate": 6.968117631191497e-06, "loss": 0.3353, "step": 21349 }, { "epoch": 2.17059780398536, "grad_norm": 0.24962486326694489, "learning_rate": 6.967791389583929e-06, "loss": 0.3244, "step": 21350 }, { "epoch": 2.170699471329809, "grad_norm": 0.2691372334957123, "learning_rate": 6.967465138063094e-06, "loss": 0.3605, "step": 21351 }, { "epoch": 2.170801138674258, "grad_norm": 0.2673286497592926, "learning_rate": 6.967138876630637e-06, "loss": 0.3281, "step": 21352 }, { "epoch": 2.1709028060187068, "grad_norm": 0.2820051610469818, "learning_rate": 6.966812605288204e-06, "loss": 0.3236, "step": 21353 }, { "epoch": 2.1710044733631557, "grad_norm": 0.2611030340194702, "learning_rate": 6.966486324037434e-06, "loss": 0.316, "step": 21354 }, { "epoch": 2.1711061407076047, "grad_norm": 0.26635852456092834, "learning_rate": 6.966160032879975e-06, "loss": 0.3527, "step": 21355 }, { "epoch": 2.1712078080520536, "grad_norm": 0.2972790598869324, "learning_rate": 6.965833731817469e-06, "loss": 0.3359, "step": 21356 }, { "epoch": 2.1713094753965025, "grad_norm": 0.26732268929481506, "learning_rate": 6.965507420851557e-06, "loss": 0.3186, "step": 21357 }, { "epoch": 2.1714111427409515, "grad_norm": 0.2714838683605194, "learning_rate": 6.965181099983889e-06, "loss": 0.3396, "step": 21358 }, { "epoch": 2.1715128100854004, "grad_norm": 0.2682448625564575, "learning_rate": 6.964854769216104e-06, "loss": 0.3226, "step": 21359 }, { "epoch": 2.1716144774298494, "grad_norm": 0.2569964826107025, "learning_rate": 6.964528428549848e-06, "loss": 0.3518, "step": 21360 }, { "epoch": 2.1717161447742983, "grad_norm": 0.266008198261261, "learning_rate": 6.9642020779867646e-06, "loss": 0.3147, "step": 21361 }, { "epoch": 2.1718178121187472, "grad_norm": 0.2814331352710724, "learning_rate": 6.963875717528498e-06, "loss": 0.329, "step": 21362 }, { "epoch": 2.171919479463196, "grad_norm": 0.2759425640106201, "learning_rate": 6.963549347176693e-06, "loss": 0.3393, "step": 21363 }, { "epoch": 2.1720211468076456, "grad_norm": 0.29022708535194397, "learning_rate": 6.963222966932992e-06, "loss": 0.3404, "step": 21364 }, { "epoch": 2.1721228141520945, "grad_norm": 0.28540849685668945, "learning_rate": 6.962896576799041e-06, "loss": 0.366, "step": 21365 }, { "epoch": 2.1722244814965435, "grad_norm": 0.2820158004760742, "learning_rate": 6.962570176776482e-06, "loss": 0.3295, "step": 21366 }, { "epoch": 2.1723261488409924, "grad_norm": 0.27051734924316406, "learning_rate": 6.962243766866961e-06, "loss": 0.3277, "step": 21367 }, { "epoch": 2.1724278161854413, "grad_norm": 0.27456381916999817, "learning_rate": 6.961917347072124e-06, "loss": 0.3569, "step": 21368 }, { "epoch": 2.1725294835298903, "grad_norm": 0.2798274755477905, "learning_rate": 6.961590917393612e-06, "loss": 0.323, "step": 21369 }, { "epoch": 2.1726311508743392, "grad_norm": 0.2505945861339569, "learning_rate": 6.961264477833072e-06, "loss": 0.3142, "step": 21370 }, { "epoch": 2.172732818218788, "grad_norm": 0.2698718309402466, "learning_rate": 6.960938028392147e-06, "loss": 0.3281, "step": 21371 }, { "epoch": 2.172834485563237, "grad_norm": 0.2663649618625641, "learning_rate": 6.960611569072482e-06, "loss": 0.325, "step": 21372 }, { "epoch": 2.172936152907686, "grad_norm": 0.28510698676109314, "learning_rate": 6.960285099875723e-06, "loss": 0.3295, "step": 21373 }, { "epoch": 2.173037820252135, "grad_norm": 0.2713720202445984, "learning_rate": 6.959958620803513e-06, "loss": 0.3218, "step": 21374 }, { "epoch": 2.173139487596584, "grad_norm": 0.28236231207847595, "learning_rate": 6.959632131857496e-06, "loss": 0.3296, "step": 21375 }, { "epoch": 2.173241154941033, "grad_norm": 0.2845328152179718, "learning_rate": 6.959305633039319e-06, "loss": 0.3135, "step": 21376 }, { "epoch": 2.173342822285482, "grad_norm": 0.28607189655303955, "learning_rate": 6.958979124350627e-06, "loss": 0.3304, "step": 21377 }, { "epoch": 2.1734444896299308, "grad_norm": 0.2906486392021179, "learning_rate": 6.9586526057930624e-06, "loss": 0.326, "step": 21378 }, { "epoch": 2.1735461569743797, "grad_norm": 0.26844632625579834, "learning_rate": 6.958326077368272e-06, "loss": 0.3145, "step": 21379 }, { "epoch": 2.1736478243188286, "grad_norm": 0.26217836141586304, "learning_rate": 6.9579995390779e-06, "loss": 0.3095, "step": 21380 }, { "epoch": 2.1737494916632776, "grad_norm": 0.25792092084884644, "learning_rate": 6.957672990923592e-06, "loss": 0.3305, "step": 21381 }, { "epoch": 2.1738511590077265, "grad_norm": 0.28292810916900635, "learning_rate": 6.95734643290699e-06, "loss": 0.337, "step": 21382 }, { "epoch": 2.173952826352176, "grad_norm": 0.283932089805603, "learning_rate": 6.957019865029745e-06, "loss": 0.3261, "step": 21383 }, { "epoch": 2.174054493696625, "grad_norm": 0.2751709520816803, "learning_rate": 6.9566932872934975e-06, "loss": 0.3188, "step": 21384 }, { "epoch": 2.174156161041074, "grad_norm": 0.27653127908706665, "learning_rate": 6.9563666996998944e-06, "loss": 0.3408, "step": 21385 }, { "epoch": 2.1742578283855227, "grad_norm": 0.2723468542098999, "learning_rate": 6.956040102250582e-06, "loss": 0.3095, "step": 21386 }, { "epoch": 2.1743594957299717, "grad_norm": 0.26272663474082947, "learning_rate": 6.955713494947202e-06, "loss": 0.351, "step": 21387 }, { "epoch": 2.1744611630744206, "grad_norm": 0.2856731712818146, "learning_rate": 6.955386877791405e-06, "loss": 0.3318, "step": 21388 }, { "epoch": 2.1745628304188696, "grad_norm": 0.3043464422225952, "learning_rate": 6.955060250784831e-06, "loss": 0.3649, "step": 21389 }, { "epoch": 2.1746644977633185, "grad_norm": 0.29705268144607544, "learning_rate": 6.95473361392913e-06, "loss": 0.3414, "step": 21390 }, { "epoch": 2.1747661651077674, "grad_norm": 0.29061582684516907, "learning_rate": 6.9544069672259454e-06, "loss": 0.3306, "step": 21391 }, { "epoch": 2.1748678324522164, "grad_norm": 0.27358901500701904, "learning_rate": 6.954080310676921e-06, "loss": 0.308, "step": 21392 }, { "epoch": 2.1749694997966653, "grad_norm": 0.29022783041000366, "learning_rate": 6.9537536442837054e-06, "loss": 0.3428, "step": 21393 }, { "epoch": 2.1750711671411143, "grad_norm": 0.3031119704246521, "learning_rate": 6.953426968047943e-06, "loss": 0.3164, "step": 21394 }, { "epoch": 2.175172834485563, "grad_norm": 0.2772611081600189, "learning_rate": 6.953100281971281e-06, "loss": 0.3195, "step": 21395 }, { "epoch": 2.175274501830012, "grad_norm": 0.25865718722343445, "learning_rate": 6.952773586055363e-06, "loss": 0.3054, "step": 21396 }, { "epoch": 2.175376169174461, "grad_norm": 0.2785493731498718, "learning_rate": 6.9524468803018355e-06, "loss": 0.3415, "step": 21397 }, { "epoch": 2.17547783651891, "grad_norm": 0.2938990294933319, "learning_rate": 6.952120164712345e-06, "loss": 0.3284, "step": 21398 }, { "epoch": 2.175579503863359, "grad_norm": 0.26899704337120056, "learning_rate": 6.951793439288538e-06, "loss": 0.3329, "step": 21399 }, { "epoch": 2.175681171207808, "grad_norm": 0.2700260281562805, "learning_rate": 6.9514667040320574e-06, "loss": 0.3653, "step": 21400 }, { "epoch": 2.175782838552257, "grad_norm": 0.28376147150993347, "learning_rate": 6.951139958944552e-06, "loss": 0.3224, "step": 21401 }, { "epoch": 2.175884505896706, "grad_norm": 0.28996986150741577, "learning_rate": 6.950813204027666e-06, "loss": 0.3454, "step": 21402 }, { "epoch": 2.1759861732411547, "grad_norm": 0.2949816584587097, "learning_rate": 6.95048643928305e-06, "loss": 0.3385, "step": 21403 }, { "epoch": 2.1760878405856037, "grad_norm": 0.2614213824272156, "learning_rate": 6.950159664712345e-06, "loss": 0.3286, "step": 21404 }, { "epoch": 2.176189507930053, "grad_norm": 0.2653251588344574, "learning_rate": 6.949832880317199e-06, "loss": 0.3344, "step": 21405 }, { "epoch": 2.176291175274502, "grad_norm": 0.26784461736679077, "learning_rate": 6.949506086099258e-06, "loss": 0.3211, "step": 21406 }, { "epoch": 2.176392842618951, "grad_norm": 0.252415269613266, "learning_rate": 6.949179282060168e-06, "loss": 0.3391, "step": 21407 }, { "epoch": 2.1764945099634, "grad_norm": 0.2711685597896576, "learning_rate": 6.9488524682015765e-06, "loss": 0.3547, "step": 21408 }, { "epoch": 2.176596177307849, "grad_norm": 0.2688675820827484, "learning_rate": 6.948525644525131e-06, "loss": 0.309, "step": 21409 }, { "epoch": 2.1766978446522978, "grad_norm": 0.2596132159233093, "learning_rate": 6.948198811032475e-06, "loss": 0.3318, "step": 21410 }, { "epoch": 2.1767995119967467, "grad_norm": 0.2777850031852722, "learning_rate": 6.947871967725257e-06, "loss": 0.3462, "step": 21411 }, { "epoch": 2.1769011793411956, "grad_norm": 0.29397085309028625, "learning_rate": 6.947545114605121e-06, "loss": 0.3167, "step": 21412 }, { "epoch": 2.1770028466856446, "grad_norm": 0.2711961269378662, "learning_rate": 6.947218251673718e-06, "loss": 0.3059, "step": 21413 }, { "epoch": 2.1771045140300935, "grad_norm": 0.2782593369483948, "learning_rate": 6.946891378932691e-06, "loss": 0.3384, "step": 21414 }, { "epoch": 2.1772061813745425, "grad_norm": 0.2828333079814911, "learning_rate": 6.946564496383688e-06, "loss": 0.3373, "step": 21415 }, { "epoch": 2.1773078487189914, "grad_norm": 0.28661274909973145, "learning_rate": 6.946237604028355e-06, "loss": 0.3251, "step": 21416 }, { "epoch": 2.1774095160634404, "grad_norm": 0.27485841512680054, "learning_rate": 6.945910701868339e-06, "loss": 0.3075, "step": 21417 }, { "epoch": 2.1775111834078893, "grad_norm": 0.29551273584365845, "learning_rate": 6.945583789905288e-06, "loss": 0.3385, "step": 21418 }, { "epoch": 2.1776128507523382, "grad_norm": 0.27811819314956665, "learning_rate": 6.94525686814085e-06, "loss": 0.3378, "step": 21419 }, { "epoch": 2.177714518096787, "grad_norm": 0.2688332498073578, "learning_rate": 6.944929936576667e-06, "loss": 0.327, "step": 21420 }, { "epoch": 2.177816185441236, "grad_norm": 0.25853854417800903, "learning_rate": 6.944602995214392e-06, "loss": 0.3273, "step": 21421 }, { "epoch": 2.177917852785685, "grad_norm": 0.2819889485836029, "learning_rate": 6.9442760440556675e-06, "loss": 0.3672, "step": 21422 }, { "epoch": 2.178019520130134, "grad_norm": 0.26680418848991394, "learning_rate": 6.943949083102143e-06, "loss": 0.3301, "step": 21423 }, { "epoch": 2.1781211874745834, "grad_norm": 0.2807546854019165, "learning_rate": 6.943622112355465e-06, "loss": 0.3445, "step": 21424 }, { "epoch": 2.1782228548190323, "grad_norm": 0.2787456214427948, "learning_rate": 6.94329513181728e-06, "loss": 0.3221, "step": 21425 }, { "epoch": 2.1783245221634813, "grad_norm": 0.25411683320999146, "learning_rate": 6.942968141489237e-06, "loss": 0.3315, "step": 21426 }, { "epoch": 2.17842618950793, "grad_norm": 0.31460264325141907, "learning_rate": 6.942641141372981e-06, "loss": 0.3534, "step": 21427 }, { "epoch": 2.178527856852379, "grad_norm": 0.2710549235343933, "learning_rate": 6.942314131470161e-06, "loss": 0.3337, "step": 21428 }, { "epoch": 2.178629524196828, "grad_norm": 0.27945834398269653, "learning_rate": 6.941987111782425e-06, "loss": 0.3566, "step": 21429 }, { "epoch": 2.178731191541277, "grad_norm": 0.2572092115879059, "learning_rate": 6.941660082311419e-06, "loss": 0.3463, "step": 21430 }, { "epoch": 2.178832858885726, "grad_norm": 0.2699522376060486, "learning_rate": 6.941333043058791e-06, "loss": 0.3214, "step": 21431 }, { "epoch": 2.178934526230175, "grad_norm": 0.28793343901634216, "learning_rate": 6.9410059940261865e-06, "loss": 0.3469, "step": 21432 }, { "epoch": 2.179036193574624, "grad_norm": 0.2591956853866577, "learning_rate": 6.940678935215257e-06, "loss": 0.3278, "step": 21433 }, { "epoch": 2.179137860919073, "grad_norm": 0.2930755019187927, "learning_rate": 6.940351866627648e-06, "loss": 0.3134, "step": 21434 }, { "epoch": 2.1792395282635217, "grad_norm": 0.2823413014411926, "learning_rate": 6.940024788265007e-06, "loss": 0.304, "step": 21435 }, { "epoch": 2.1793411956079707, "grad_norm": 0.26639896631240845, "learning_rate": 6.939697700128982e-06, "loss": 0.3585, "step": 21436 }, { "epoch": 2.1794428629524196, "grad_norm": 0.26004454493522644, "learning_rate": 6.939370602221221e-06, "loss": 0.3243, "step": 21437 }, { "epoch": 2.1795445302968686, "grad_norm": 0.2731856107711792, "learning_rate": 6.939043494543371e-06, "loss": 0.3404, "step": 21438 }, { "epoch": 2.1796461976413175, "grad_norm": 0.2694133520126343, "learning_rate": 6.938716377097083e-06, "loss": 0.3349, "step": 21439 }, { "epoch": 2.1797478649857664, "grad_norm": 0.2556169033050537, "learning_rate": 6.9383892498840015e-06, "loss": 0.356, "step": 21440 }, { "epoch": 2.1798495323302154, "grad_norm": 0.25743594765663147, "learning_rate": 6.938062112905776e-06, "loss": 0.3411, "step": 21441 }, { "epoch": 2.1799511996746643, "grad_norm": 0.2744871973991394, "learning_rate": 6.937734966164052e-06, "loss": 0.3486, "step": 21442 }, { "epoch": 2.1800528670191133, "grad_norm": 0.2643497884273529, "learning_rate": 6.937407809660481e-06, "loss": 0.3628, "step": 21443 }, { "epoch": 2.180154534363562, "grad_norm": 0.2666994631290436, "learning_rate": 6.937080643396711e-06, "loss": 0.3191, "step": 21444 }, { "epoch": 2.180256201708011, "grad_norm": 0.279486745595932, "learning_rate": 6.936753467374388e-06, "loss": 0.3355, "step": 21445 }, { "epoch": 2.1803578690524605, "grad_norm": 0.2601659595966339, "learning_rate": 6.936426281595161e-06, "loss": 0.341, "step": 21446 }, { "epoch": 2.1804595363969095, "grad_norm": 0.2715744078159332, "learning_rate": 6.9360990860606796e-06, "loss": 0.3433, "step": 21447 }, { "epoch": 2.1805612037413584, "grad_norm": 0.25639358162879944, "learning_rate": 6.935771880772589e-06, "loss": 0.3186, "step": 21448 }, { "epoch": 2.1806628710858074, "grad_norm": 0.2719007134437561, "learning_rate": 6.935444665732542e-06, "loss": 0.3026, "step": 21449 }, { "epoch": 2.1807645384302563, "grad_norm": 0.2466181516647339, "learning_rate": 6.935117440942183e-06, "loss": 0.3382, "step": 21450 }, { "epoch": 2.1808662057747052, "grad_norm": 0.2593021094799042, "learning_rate": 6.9347902064031635e-06, "loss": 0.3485, "step": 21451 }, { "epoch": 2.180967873119154, "grad_norm": 0.2556273639202118, "learning_rate": 6.93446296211713e-06, "loss": 0.3148, "step": 21452 }, { "epoch": 2.181069540463603, "grad_norm": 0.2987731695175171, "learning_rate": 6.934135708085732e-06, "loss": 0.344, "step": 21453 }, { "epoch": 2.181171207808052, "grad_norm": 0.2960059940814972, "learning_rate": 6.933808444310618e-06, "loss": 0.3315, "step": 21454 }, { "epoch": 2.181272875152501, "grad_norm": 0.28882908821105957, "learning_rate": 6.933481170793437e-06, "loss": 0.3216, "step": 21455 }, { "epoch": 2.18137454249695, "grad_norm": 0.3012087941169739, "learning_rate": 6.933153887535837e-06, "loss": 0.333, "step": 21456 }, { "epoch": 2.181476209841399, "grad_norm": 0.26842251420021057, "learning_rate": 6.932826594539466e-06, "loss": 0.3379, "step": 21457 }, { "epoch": 2.181577877185848, "grad_norm": 0.2750393748283386, "learning_rate": 6.932499291805974e-06, "loss": 0.3462, "step": 21458 }, { "epoch": 2.1816795445302968, "grad_norm": 0.2730884253978729, "learning_rate": 6.932171979337012e-06, "loss": 0.3788, "step": 21459 }, { "epoch": 2.1817812118747457, "grad_norm": 0.2820352017879486, "learning_rate": 6.931844657134224e-06, "loss": 0.3362, "step": 21460 }, { "epoch": 2.1818828792191947, "grad_norm": 0.2787497043609619, "learning_rate": 6.931517325199264e-06, "loss": 0.3366, "step": 21461 }, { "epoch": 2.1819845465636436, "grad_norm": 0.28163978457450867, "learning_rate": 6.9311899835337786e-06, "loss": 0.3074, "step": 21462 }, { "epoch": 2.1820862139080925, "grad_norm": 0.2747914493083954, "learning_rate": 6.930862632139415e-06, "loss": 0.3418, "step": 21463 }, { "epoch": 2.1821878812525415, "grad_norm": 0.27837714552879333, "learning_rate": 6.930535271017826e-06, "loss": 0.3106, "step": 21464 }, { "epoch": 2.182289548596991, "grad_norm": 0.28632500767707825, "learning_rate": 6.930207900170658e-06, "loss": 0.353, "step": 21465 }, { "epoch": 2.18239121594144, "grad_norm": 0.26565974950790405, "learning_rate": 6.9298805195995635e-06, "loss": 0.3108, "step": 21466 }, { "epoch": 2.1824928832858888, "grad_norm": 0.2852020859718323, "learning_rate": 6.9295531293061866e-06, "loss": 0.3346, "step": 21467 }, { "epoch": 2.1825945506303377, "grad_norm": 0.26541757583618164, "learning_rate": 6.929225729292181e-06, "loss": 0.3399, "step": 21468 }, { "epoch": 2.1826962179747866, "grad_norm": 0.26285237073898315, "learning_rate": 6.928898319559194e-06, "loss": 0.3305, "step": 21469 }, { "epoch": 2.1827978853192356, "grad_norm": 0.25455597043037415, "learning_rate": 6.928570900108877e-06, "loss": 0.3113, "step": 21470 }, { "epoch": 2.1828995526636845, "grad_norm": 0.250457763671875, "learning_rate": 6.928243470942877e-06, "loss": 0.3269, "step": 21471 }, { "epoch": 2.1830012200081335, "grad_norm": 0.27892619371414185, "learning_rate": 6.9279160320628455e-06, "loss": 0.3397, "step": 21472 }, { "epoch": 2.1831028873525824, "grad_norm": 0.26945269107818604, "learning_rate": 6.9275885834704296e-06, "loss": 0.3138, "step": 21473 }, { "epoch": 2.1832045546970313, "grad_norm": 0.26037082076072693, "learning_rate": 6.9272611251672814e-06, "loss": 0.3338, "step": 21474 }, { "epoch": 2.1833062220414803, "grad_norm": 0.26043567061424255, "learning_rate": 6.92693365715505e-06, "loss": 0.3444, "step": 21475 }, { "epoch": 2.1834078893859292, "grad_norm": 0.2841736674308777, "learning_rate": 6.926606179435385e-06, "loss": 0.3252, "step": 21476 }, { "epoch": 2.183509556730378, "grad_norm": 0.27365636825561523, "learning_rate": 6.926278692009936e-06, "loss": 0.3272, "step": 21477 }, { "epoch": 2.183611224074827, "grad_norm": 0.29742124676704407, "learning_rate": 6.925951194880349e-06, "loss": 0.367, "step": 21478 }, { "epoch": 2.183712891419276, "grad_norm": 0.26912039518356323, "learning_rate": 6.9256236880482815e-06, "loss": 0.3216, "step": 21479 }, { "epoch": 2.183814558763725, "grad_norm": 0.27094897627830505, "learning_rate": 6.92529617151538e-06, "loss": 0.3416, "step": 21480 }, { "epoch": 2.183916226108174, "grad_norm": 0.2766154110431671, "learning_rate": 6.924968645283292e-06, "loss": 0.327, "step": 21481 }, { "epoch": 2.184017893452623, "grad_norm": 0.264055073261261, "learning_rate": 6.924641109353671e-06, "loss": 0.3374, "step": 21482 }, { "epoch": 2.184119560797072, "grad_norm": 0.26835891604423523, "learning_rate": 6.924313563728162e-06, "loss": 0.3333, "step": 21483 }, { "epoch": 2.1842212281415208, "grad_norm": 0.2727035880088806, "learning_rate": 6.923986008408421e-06, "loss": 0.3497, "step": 21484 }, { "epoch": 2.1843228954859697, "grad_norm": 0.2466464638710022, "learning_rate": 6.923658443396096e-06, "loss": 0.3273, "step": 21485 }, { "epoch": 2.1844245628304186, "grad_norm": 0.271314799785614, "learning_rate": 6.9233308686928365e-06, "loss": 0.3415, "step": 21486 }, { "epoch": 2.184526230174868, "grad_norm": 0.26510900259017944, "learning_rate": 6.923003284300294e-06, "loss": 0.3368, "step": 21487 }, { "epoch": 2.184627897519317, "grad_norm": 0.2936997413635254, "learning_rate": 6.922675690220115e-06, "loss": 0.3135, "step": 21488 }, { "epoch": 2.184729564863766, "grad_norm": 0.2946932911872864, "learning_rate": 6.922348086453956e-06, "loss": 0.3483, "step": 21489 }, { "epoch": 2.184831232208215, "grad_norm": 0.2717422544956207, "learning_rate": 6.9220204730034635e-06, "loss": 0.3236, "step": 21490 }, { "epoch": 2.184932899552664, "grad_norm": 0.2658259868621826, "learning_rate": 6.921692849870287e-06, "loss": 0.3269, "step": 21491 }, { "epoch": 2.1850345668971127, "grad_norm": 0.274627149105072, "learning_rate": 6.921365217056079e-06, "loss": 0.3175, "step": 21492 }, { "epoch": 2.1851362342415617, "grad_norm": 0.2816329300403595, "learning_rate": 6.9210375745624905e-06, "loss": 0.3466, "step": 21493 }, { "epoch": 2.1852379015860106, "grad_norm": 0.25063416361808777, "learning_rate": 6.9207099223911715e-06, "loss": 0.3529, "step": 21494 }, { "epoch": 2.1853395689304596, "grad_norm": 0.27355626225471497, "learning_rate": 6.920382260543772e-06, "loss": 0.3251, "step": 21495 }, { "epoch": 2.1854412362749085, "grad_norm": 0.27163419127464294, "learning_rate": 6.920054589021943e-06, "loss": 0.3238, "step": 21496 }, { "epoch": 2.1855429036193574, "grad_norm": 0.2532189190387726, "learning_rate": 6.9197269078273355e-06, "loss": 0.3248, "step": 21497 }, { "epoch": 2.1856445709638064, "grad_norm": 0.26527801156044006, "learning_rate": 6.919399216961598e-06, "loss": 0.3453, "step": 21498 }, { "epoch": 2.1857462383082553, "grad_norm": 0.27245181798934937, "learning_rate": 6.919071516426385e-06, "loss": 0.3433, "step": 21499 }, { "epoch": 2.1858479056527043, "grad_norm": 0.28723597526550293, "learning_rate": 6.918743806223347e-06, "loss": 0.3627, "step": 21500 }, { "epoch": 2.185949572997153, "grad_norm": 0.2709220051765442, "learning_rate": 6.918416086354132e-06, "loss": 0.3372, "step": 21501 }, { "epoch": 2.186051240341602, "grad_norm": 0.26045462489128113, "learning_rate": 6.9180883568203925e-06, "loss": 0.337, "step": 21502 }, { "epoch": 2.186152907686051, "grad_norm": 0.29864317178726196, "learning_rate": 6.917760617623781e-06, "loss": 0.3396, "step": 21503 }, { "epoch": 2.1862545750305, "grad_norm": 0.2612844705581665, "learning_rate": 6.917432868765945e-06, "loss": 0.3034, "step": 21504 }, { "epoch": 2.186356242374949, "grad_norm": 0.2732488512992859, "learning_rate": 6.91710511024854e-06, "loss": 0.355, "step": 21505 }, { "epoch": 2.1864579097193984, "grad_norm": 0.2750064432621002, "learning_rate": 6.916777342073214e-06, "loss": 0.3288, "step": 21506 }, { "epoch": 2.1865595770638473, "grad_norm": 0.2943671941757202, "learning_rate": 6.9164495642416206e-06, "loss": 0.3529, "step": 21507 }, { "epoch": 2.1866612444082962, "grad_norm": 0.2664526402950287, "learning_rate": 6.916121776755408e-06, "loss": 0.3276, "step": 21508 }, { "epoch": 2.186762911752745, "grad_norm": 0.27997416257858276, "learning_rate": 6.91579397961623e-06, "loss": 0.335, "step": 21509 }, { "epoch": 2.186864579097194, "grad_norm": 0.2694846987724304, "learning_rate": 6.9154661728257375e-06, "loss": 0.3169, "step": 21510 }, { "epoch": 2.186966246441643, "grad_norm": 0.2787773907184601, "learning_rate": 6.91513835638558e-06, "loss": 0.3325, "step": 21511 }, { "epoch": 2.187067913786092, "grad_norm": 0.28759080171585083, "learning_rate": 6.914810530297414e-06, "loss": 0.3326, "step": 21512 }, { "epoch": 2.187169581130541, "grad_norm": 0.2942452132701874, "learning_rate": 6.914482694562883e-06, "loss": 0.3401, "step": 21513 }, { "epoch": 2.18727124847499, "grad_norm": 0.30586448311805725, "learning_rate": 6.914154849183645e-06, "loss": 0.3433, "step": 21514 }, { "epoch": 2.187372915819439, "grad_norm": 0.278696209192276, "learning_rate": 6.913826994161351e-06, "loss": 0.3402, "step": 21515 }, { "epoch": 2.1874745831638878, "grad_norm": 0.29283374547958374, "learning_rate": 6.91349912949765e-06, "loss": 0.3441, "step": 21516 }, { "epoch": 2.1875762505083367, "grad_norm": 0.26348257064819336, "learning_rate": 6.913171255194197e-06, "loss": 0.3311, "step": 21517 }, { "epoch": 2.1876779178527856, "grad_norm": 0.2958620488643646, "learning_rate": 6.91284337125264e-06, "loss": 0.3603, "step": 21518 }, { "epoch": 2.1877795851972346, "grad_norm": 0.29053494334220886, "learning_rate": 6.9125154776746325e-06, "loss": 0.3428, "step": 21519 }, { "epoch": 2.1878812525416835, "grad_norm": 0.2701217532157898, "learning_rate": 6.912187574461828e-06, "loss": 0.3362, "step": 21520 }, { "epoch": 2.1879829198861325, "grad_norm": 0.28354188799858093, "learning_rate": 6.911859661615876e-06, "loss": 0.3314, "step": 21521 }, { "epoch": 2.1880845872305814, "grad_norm": 0.2907433807849884, "learning_rate": 6.9115317391384304e-06, "loss": 0.3741, "step": 21522 }, { "epoch": 2.1881862545750304, "grad_norm": 0.27977269887924194, "learning_rate": 6.91120380703114e-06, "loss": 0.3413, "step": 21523 }, { "epoch": 2.1882879219194793, "grad_norm": 0.2971978783607483, "learning_rate": 6.910875865295661e-06, "loss": 0.3452, "step": 21524 }, { "epoch": 2.1883895892639282, "grad_norm": 0.2949925661087036, "learning_rate": 6.910547913933643e-06, "loss": 0.3396, "step": 21525 }, { "epoch": 2.188491256608377, "grad_norm": 0.29129254817962646, "learning_rate": 6.910219952946737e-06, "loss": 0.3386, "step": 21526 }, { "epoch": 2.188592923952826, "grad_norm": 0.2797906696796417, "learning_rate": 6.9098919823365995e-06, "loss": 0.3188, "step": 21527 }, { "epoch": 2.1886945912972755, "grad_norm": 0.2983657717704773, "learning_rate": 6.909564002104878e-06, "loss": 0.361, "step": 21528 }, { "epoch": 2.1887962586417244, "grad_norm": 0.28811371326446533, "learning_rate": 6.909236012253228e-06, "loss": 0.3022, "step": 21529 }, { "epoch": 2.1888979259861734, "grad_norm": 0.2766832709312439, "learning_rate": 6.9089080127832995e-06, "loss": 0.3149, "step": 21530 }, { "epoch": 2.1889995933306223, "grad_norm": 0.2761242091655731, "learning_rate": 6.908580003696746e-06, "loss": 0.3481, "step": 21531 }, { "epoch": 2.1891012606750713, "grad_norm": 0.2814248502254486, "learning_rate": 6.908251984995222e-06, "loss": 0.3766, "step": 21532 }, { "epoch": 2.18920292801952, "grad_norm": 0.2592431306838989, "learning_rate": 6.907923956680377e-06, "loss": 0.322, "step": 21533 }, { "epoch": 2.189304595363969, "grad_norm": 0.28972676396369934, "learning_rate": 6.907595918753863e-06, "loss": 0.3079, "step": 21534 }, { "epoch": 2.189406262708418, "grad_norm": 0.29917246103286743, "learning_rate": 6.907267871217336e-06, "loss": 0.3528, "step": 21535 }, { "epoch": 2.189507930052867, "grad_norm": 0.2787320911884308, "learning_rate": 6.906939814072446e-06, "loss": 0.3485, "step": 21536 }, { "epoch": 2.189609597397316, "grad_norm": 0.2754453420639038, "learning_rate": 6.906611747320847e-06, "loss": 0.3495, "step": 21537 }, { "epoch": 2.189711264741765, "grad_norm": 0.26538974046707153, "learning_rate": 6.90628367096419e-06, "loss": 0.3181, "step": 21538 }, { "epoch": 2.189812932086214, "grad_norm": 0.27311694622039795, "learning_rate": 6.905955585004129e-06, "loss": 0.3262, "step": 21539 }, { "epoch": 2.189914599430663, "grad_norm": 0.2828425168991089, "learning_rate": 6.905627489442317e-06, "loss": 0.3393, "step": 21540 }, { "epoch": 2.1900162667751117, "grad_norm": 0.26839566230773926, "learning_rate": 6.905299384280406e-06, "loss": 0.3303, "step": 21541 }, { "epoch": 2.1901179341195607, "grad_norm": 0.2722248136997223, "learning_rate": 6.904971269520051e-06, "loss": 0.3406, "step": 21542 }, { "epoch": 2.1902196014640096, "grad_norm": 0.2878062129020691, "learning_rate": 6.904643145162902e-06, "loss": 0.3151, "step": 21543 }, { "epoch": 2.1903212688084586, "grad_norm": 0.25905945897102356, "learning_rate": 6.9043150112106135e-06, "loss": 0.3303, "step": 21544 }, { "epoch": 2.1904229361529075, "grad_norm": 0.27905070781707764, "learning_rate": 6.903986867664841e-06, "loss": 0.3308, "step": 21545 }, { "epoch": 2.1905246034973564, "grad_norm": 0.2777092754840851, "learning_rate": 6.903658714527232e-06, "loss": 0.3334, "step": 21546 }, { "epoch": 2.190626270841806, "grad_norm": 0.2608609199523926, "learning_rate": 6.903330551799444e-06, "loss": 0.3624, "step": 21547 }, { "epoch": 2.1907279381862548, "grad_norm": 0.2666291892528534, "learning_rate": 6.903002379483128e-06, "loss": 0.3182, "step": 21548 }, { "epoch": 2.1908296055307037, "grad_norm": 0.2556214928627014, "learning_rate": 6.902674197579939e-06, "loss": 0.3197, "step": 21549 }, { "epoch": 2.1909312728751527, "grad_norm": 0.27300694584846497, "learning_rate": 6.902346006091531e-06, "loss": 0.3251, "step": 21550 }, { "epoch": 2.1910329402196016, "grad_norm": 0.25343137979507446, "learning_rate": 6.902017805019554e-06, "loss": 0.3654, "step": 21551 }, { "epoch": 2.1911346075640505, "grad_norm": 0.26463714241981506, "learning_rate": 6.901689594365664e-06, "loss": 0.3153, "step": 21552 }, { "epoch": 2.1912362749084995, "grad_norm": 0.30154600739479065, "learning_rate": 6.901361374131513e-06, "loss": 0.3703, "step": 21553 }, { "epoch": 2.1913379422529484, "grad_norm": 0.2799111306667328, "learning_rate": 6.901033144318755e-06, "loss": 0.2941, "step": 21554 }, { "epoch": 2.1914396095973974, "grad_norm": 0.2896433174610138, "learning_rate": 6.900704904929045e-06, "loss": 0.3302, "step": 21555 }, { "epoch": 2.1915412769418463, "grad_norm": 0.2684144079685211, "learning_rate": 6.900376655964036e-06, "loss": 0.3291, "step": 21556 }, { "epoch": 2.1916429442862952, "grad_norm": 0.259971559047699, "learning_rate": 6.900048397425379e-06, "loss": 0.3297, "step": 21557 }, { "epoch": 2.191744611630744, "grad_norm": 0.29694700241088867, "learning_rate": 6.899720129314731e-06, "loss": 0.3178, "step": 21558 }, { "epoch": 2.191846278975193, "grad_norm": 0.28758832812309265, "learning_rate": 6.899391851633743e-06, "loss": 0.3443, "step": 21559 }, { "epoch": 2.191947946319642, "grad_norm": 0.31258174777030945, "learning_rate": 6.899063564384072e-06, "loss": 0.3488, "step": 21560 }, { "epoch": 2.192049613664091, "grad_norm": 0.26654288172721863, "learning_rate": 6.8987352675673705e-06, "loss": 0.3122, "step": 21561 }, { "epoch": 2.19215128100854, "grad_norm": 0.2990287244319916, "learning_rate": 6.89840696118529e-06, "loss": 0.3593, "step": 21562 }, { "epoch": 2.192252948352989, "grad_norm": 0.29534170031547546, "learning_rate": 6.898078645239486e-06, "loss": 0.3334, "step": 21563 }, { "epoch": 2.192354615697438, "grad_norm": 0.28063714504241943, "learning_rate": 6.897750319731614e-06, "loss": 0.313, "step": 21564 }, { "epoch": 2.1924562830418868, "grad_norm": 0.2883988618850708, "learning_rate": 6.897421984663327e-06, "loss": 0.3648, "step": 21565 }, { "epoch": 2.1925579503863357, "grad_norm": 0.2726113498210907, "learning_rate": 6.8970936400362795e-06, "loss": 0.341, "step": 21566 }, { "epoch": 2.1926596177307847, "grad_norm": 0.2733097970485687, "learning_rate": 6.8967652858521236e-06, "loss": 0.3252, "step": 21567 }, { "epoch": 2.1927612850752336, "grad_norm": 0.28410521149635315, "learning_rate": 6.896436922112517e-06, "loss": 0.3026, "step": 21568 }, { "epoch": 2.192862952419683, "grad_norm": 0.2806589603424072, "learning_rate": 6.89610854881911e-06, "loss": 0.3727, "step": 21569 }, { "epoch": 2.192964619764132, "grad_norm": 0.26699674129486084, "learning_rate": 6.895780165973558e-06, "loss": 0.3172, "step": 21570 }, { "epoch": 2.193066287108581, "grad_norm": 0.2789258360862732, "learning_rate": 6.895451773577519e-06, "loss": 0.3509, "step": 21571 }, { "epoch": 2.19316795445303, "grad_norm": 0.3046031892299652, "learning_rate": 6.895123371632642e-06, "loss": 0.3262, "step": 21572 }, { "epoch": 2.1932696217974788, "grad_norm": 0.2767910659313202, "learning_rate": 6.894794960140585e-06, "loss": 0.317, "step": 21573 }, { "epoch": 2.1933712891419277, "grad_norm": 0.27820301055908203, "learning_rate": 6.894466539103e-06, "loss": 0.3306, "step": 21574 }, { "epoch": 2.1934729564863766, "grad_norm": 0.2775062024593353, "learning_rate": 6.894138108521542e-06, "loss": 0.3211, "step": 21575 }, { "epoch": 2.1935746238308256, "grad_norm": 0.2763200104236603, "learning_rate": 6.893809668397868e-06, "loss": 0.3186, "step": 21576 }, { "epoch": 2.1936762911752745, "grad_norm": 0.30259469151496887, "learning_rate": 6.893481218733631e-06, "loss": 0.3411, "step": 21577 }, { "epoch": 2.1937779585197235, "grad_norm": 0.2896539270877838, "learning_rate": 6.8931527595304855e-06, "loss": 0.3612, "step": 21578 }, { "epoch": 2.1938796258641724, "grad_norm": 0.3233684003353119, "learning_rate": 6.892824290790087e-06, "loss": 0.3678, "step": 21579 }, { "epoch": 2.1939812932086213, "grad_norm": 0.2745780646800995, "learning_rate": 6.892495812514087e-06, "loss": 0.3299, "step": 21580 }, { "epoch": 2.1940829605530703, "grad_norm": 0.27397528290748596, "learning_rate": 6.892167324704146e-06, "loss": 0.3218, "step": 21581 }, { "epoch": 2.1941846278975192, "grad_norm": 0.28314682841300964, "learning_rate": 6.891838827361913e-06, "loss": 0.3706, "step": 21582 }, { "epoch": 2.194286295241968, "grad_norm": 0.26351651549339294, "learning_rate": 6.891510320489047e-06, "loss": 0.3268, "step": 21583 }, { "epoch": 2.194387962586417, "grad_norm": 0.27136749029159546, "learning_rate": 6.891181804087201e-06, "loss": 0.3323, "step": 21584 }, { "epoch": 2.194489629930866, "grad_norm": 0.2751729488372803, "learning_rate": 6.89085327815803e-06, "loss": 0.341, "step": 21585 }, { "epoch": 2.194591297275315, "grad_norm": 0.2977524995803833, "learning_rate": 6.89052474270319e-06, "loss": 0.3414, "step": 21586 }, { "epoch": 2.194692964619764, "grad_norm": 0.2752622067928314, "learning_rate": 6.890196197724336e-06, "loss": 0.3797, "step": 21587 }, { "epoch": 2.1947946319642133, "grad_norm": 0.24969154596328735, "learning_rate": 6.889867643223122e-06, "loss": 0.3163, "step": 21588 }, { "epoch": 2.1948962993086623, "grad_norm": 0.2815023958683014, "learning_rate": 6.889539079201205e-06, "loss": 0.3617, "step": 21589 }, { "epoch": 2.194997966653111, "grad_norm": 0.29380694031715393, "learning_rate": 6.889210505660238e-06, "loss": 0.3808, "step": 21590 }, { "epoch": 2.19509963399756, "grad_norm": 0.2608819603919983, "learning_rate": 6.888881922601878e-06, "loss": 0.3257, "step": 21591 }, { "epoch": 2.195201301342009, "grad_norm": 0.2865045666694641, "learning_rate": 6.8885533300277805e-06, "loss": 0.3175, "step": 21592 }, { "epoch": 2.195302968686458, "grad_norm": 0.2644740641117096, "learning_rate": 6.8882247279396e-06, "loss": 0.3081, "step": 21593 }, { "epoch": 2.195404636030907, "grad_norm": 0.3098788261413574, "learning_rate": 6.88789611633899e-06, "loss": 0.338, "step": 21594 }, { "epoch": 2.195506303375356, "grad_norm": 0.27950233221054077, "learning_rate": 6.88756749522761e-06, "loss": 0.3381, "step": 21595 }, { "epoch": 2.195607970719805, "grad_norm": 0.32062962651252747, "learning_rate": 6.887238864607113e-06, "loss": 0.3989, "step": 21596 }, { "epoch": 2.195709638064254, "grad_norm": 0.28265079855918884, "learning_rate": 6.886910224479154e-06, "loss": 0.3545, "step": 21597 }, { "epoch": 2.1958113054087027, "grad_norm": 0.2602149546146393, "learning_rate": 6.886581574845391e-06, "loss": 0.3201, "step": 21598 }, { "epoch": 2.1959129727531517, "grad_norm": 0.2729963958263397, "learning_rate": 6.886252915707478e-06, "loss": 0.342, "step": 21599 }, { "epoch": 2.1960146400976006, "grad_norm": 0.2646925449371338, "learning_rate": 6.885924247067071e-06, "loss": 0.3355, "step": 21600 }, { "epoch": 2.1961163074420496, "grad_norm": 0.27569761872291565, "learning_rate": 6.8855955689258254e-06, "loss": 0.3746, "step": 21601 }, { "epoch": 2.1962179747864985, "grad_norm": 0.2749643325805664, "learning_rate": 6.885266881285397e-06, "loss": 0.3049, "step": 21602 }, { "epoch": 2.1963196421309474, "grad_norm": 0.3099563419818878, "learning_rate": 6.884938184147445e-06, "loss": 0.3045, "step": 21603 }, { "epoch": 2.1964213094753964, "grad_norm": 0.2976642847061157, "learning_rate": 6.884609477513619e-06, "loss": 0.3295, "step": 21604 }, { "epoch": 2.1965229768198453, "grad_norm": 0.27916744351387024, "learning_rate": 6.88428076138558e-06, "loss": 0.3523, "step": 21605 }, { "epoch": 2.1966246441642943, "grad_norm": 0.2843204140663147, "learning_rate": 6.883952035764983e-06, "loss": 0.3377, "step": 21606 }, { "epoch": 2.196726311508743, "grad_norm": 0.27534255385398865, "learning_rate": 6.883623300653481e-06, "loss": 0.307, "step": 21607 }, { "epoch": 2.196827978853192, "grad_norm": 0.2851291596889496, "learning_rate": 6.883294556052733e-06, "loss": 0.3146, "step": 21608 }, { "epoch": 2.196929646197641, "grad_norm": 0.2696497440338135, "learning_rate": 6.882965801964395e-06, "loss": 0.3114, "step": 21609 }, { "epoch": 2.1970313135420905, "grad_norm": 0.294609010219574, "learning_rate": 6.882637038390123e-06, "loss": 0.3668, "step": 21610 }, { "epoch": 2.1971329808865394, "grad_norm": 0.26324817538261414, "learning_rate": 6.882308265331574e-06, "loss": 0.3594, "step": 21611 }, { "epoch": 2.1972346482309884, "grad_norm": 0.28322601318359375, "learning_rate": 6.881979482790402e-06, "loss": 0.3491, "step": 21612 }, { "epoch": 2.1973363155754373, "grad_norm": 0.26271018385887146, "learning_rate": 6.881650690768264e-06, "loss": 0.348, "step": 21613 }, { "epoch": 2.1974379829198862, "grad_norm": 0.3064799904823303, "learning_rate": 6.881321889266818e-06, "loss": 0.3463, "step": 21614 }, { "epoch": 2.197539650264335, "grad_norm": 0.28649744391441345, "learning_rate": 6.880993078287719e-06, "loss": 0.3171, "step": 21615 }, { "epoch": 2.197641317608784, "grad_norm": 0.2886686623096466, "learning_rate": 6.880664257832624e-06, "loss": 0.3367, "step": 21616 }, { "epoch": 2.197742984953233, "grad_norm": 0.27890220284461975, "learning_rate": 6.8803354279031886e-06, "loss": 0.3272, "step": 21617 }, { "epoch": 2.197844652297682, "grad_norm": 0.2994539737701416, "learning_rate": 6.8800065885010725e-06, "loss": 0.3621, "step": 21618 }, { "epoch": 2.197946319642131, "grad_norm": 0.285521000623703, "learning_rate": 6.879677739627928e-06, "loss": 0.3228, "step": 21619 }, { "epoch": 2.19804798698658, "grad_norm": 0.2885727882385254, "learning_rate": 6.879348881285414e-06, "loss": 0.3383, "step": 21620 }, { "epoch": 2.198149654331029, "grad_norm": 0.2741585969924927, "learning_rate": 6.879020013475187e-06, "loss": 0.3321, "step": 21621 }, { "epoch": 2.1982513216754778, "grad_norm": 0.28515467047691345, "learning_rate": 6.878691136198903e-06, "loss": 0.3414, "step": 21622 }, { "epoch": 2.1983529890199267, "grad_norm": 0.27338552474975586, "learning_rate": 6.878362249458222e-06, "loss": 0.2992, "step": 21623 }, { "epoch": 2.1984546563643756, "grad_norm": 0.3088567554950714, "learning_rate": 6.878033353254796e-06, "loss": 0.3228, "step": 21624 }, { "epoch": 2.1985563237088246, "grad_norm": 0.2792946696281433, "learning_rate": 6.877704447590283e-06, "loss": 0.3426, "step": 21625 }, { "epoch": 2.1986579910532735, "grad_norm": 0.2660101354122162, "learning_rate": 6.877375532466345e-06, "loss": 0.364, "step": 21626 }, { "epoch": 2.1987596583977225, "grad_norm": 0.29463037848472595, "learning_rate": 6.877046607884633e-06, "loss": 0.3589, "step": 21627 }, { "epoch": 2.1988613257421714, "grad_norm": 0.26543742418289185, "learning_rate": 6.876717673846807e-06, "loss": 0.3388, "step": 21628 }, { "epoch": 2.198962993086621, "grad_norm": 0.2721611261367798, "learning_rate": 6.876388730354522e-06, "loss": 0.3219, "step": 21629 }, { "epoch": 2.1990646604310697, "grad_norm": 0.2745870053768158, "learning_rate": 6.8760597774094365e-06, "loss": 0.3254, "step": 21630 }, { "epoch": 2.1991663277755187, "grad_norm": 0.2931874394416809, "learning_rate": 6.875730815013209e-06, "loss": 0.3304, "step": 21631 }, { "epoch": 2.1992679951199676, "grad_norm": 0.28856807947158813, "learning_rate": 6.875401843167496e-06, "loss": 0.3048, "step": 21632 }, { "epoch": 2.1993696624644166, "grad_norm": 0.2745218575000763, "learning_rate": 6.875072861873953e-06, "loss": 0.3214, "step": 21633 }, { "epoch": 2.1994713298088655, "grad_norm": 0.313640832901001, "learning_rate": 6.874743871134237e-06, "loss": 0.3545, "step": 21634 }, { "epoch": 2.1995729971533144, "grad_norm": 0.29652541875839233, "learning_rate": 6.874414870950008e-06, "loss": 0.319, "step": 21635 }, { "epoch": 2.1996746644977634, "grad_norm": 0.2972320020198822, "learning_rate": 6.874085861322925e-06, "loss": 0.3096, "step": 21636 }, { "epoch": 2.1997763318422123, "grad_norm": 0.29947835206985474, "learning_rate": 6.873756842254639e-06, "loss": 0.3482, "step": 21637 }, { "epoch": 2.1998779991866613, "grad_norm": 0.28235873579978943, "learning_rate": 6.873427813746814e-06, "loss": 0.3835, "step": 21638 }, { "epoch": 2.19997966653111, "grad_norm": 0.28414487838745117, "learning_rate": 6.873098775801103e-06, "loss": 0.3431, "step": 21639 }, { "epoch": 2.200081333875559, "grad_norm": 0.28130096197128296, "learning_rate": 6.872769728419166e-06, "loss": 0.3382, "step": 21640 }, { "epoch": 2.200183001220008, "grad_norm": 0.2877168357372284, "learning_rate": 6.87244067160266e-06, "loss": 0.3556, "step": 21641 }, { "epoch": 2.200284668564457, "grad_norm": 0.2700496017932892, "learning_rate": 6.872111605353244e-06, "loss": 0.3419, "step": 21642 }, { "epoch": 2.200386335908906, "grad_norm": 0.31183111667633057, "learning_rate": 6.8717825296725734e-06, "loss": 0.3472, "step": 21643 }, { "epoch": 2.200488003253355, "grad_norm": 0.26858121156692505, "learning_rate": 6.871453444562308e-06, "loss": 0.3204, "step": 21644 }, { "epoch": 2.200589670597804, "grad_norm": 0.27980518341064453, "learning_rate": 6.8711243500241055e-06, "loss": 0.3322, "step": 21645 }, { "epoch": 2.200691337942253, "grad_norm": 0.28566116094589233, "learning_rate": 6.87079524605962e-06, "loss": 0.3362, "step": 21646 }, { "epoch": 2.2007930052867017, "grad_norm": 0.26997068524360657, "learning_rate": 6.870466132670517e-06, "loss": 0.333, "step": 21647 }, { "epoch": 2.2008946726311507, "grad_norm": 0.24512289464473724, "learning_rate": 6.870137009858446e-06, "loss": 0.3441, "step": 21648 }, { "epoch": 2.2009963399755996, "grad_norm": 0.26667141914367676, "learning_rate": 6.869807877625072e-06, "loss": 0.3019, "step": 21649 }, { "epoch": 2.2010980073200486, "grad_norm": 0.3034946620464325, "learning_rate": 6.86947873597205e-06, "loss": 0.3312, "step": 21650 }, { "epoch": 2.201199674664498, "grad_norm": 0.30266451835632324, "learning_rate": 6.869149584901037e-06, "loss": 0.318, "step": 21651 }, { "epoch": 2.201301342008947, "grad_norm": 0.2632178068161011, "learning_rate": 6.868820424413693e-06, "loss": 0.3124, "step": 21652 }, { "epoch": 2.201403009353396, "grad_norm": 0.25740841031074524, "learning_rate": 6.868491254511677e-06, "loss": 0.3245, "step": 21653 }, { "epoch": 2.2015046766978448, "grad_norm": 0.31930533051490784, "learning_rate": 6.868162075196644e-06, "loss": 0.3374, "step": 21654 }, { "epoch": 2.2016063440422937, "grad_norm": 0.2894875109195709, "learning_rate": 6.8678328864702555e-06, "loss": 0.3392, "step": 21655 }, { "epoch": 2.2017080113867427, "grad_norm": 0.2787402272224426, "learning_rate": 6.8675036883341695e-06, "loss": 0.3416, "step": 21656 }, { "epoch": 2.2018096787311916, "grad_norm": 0.2958711087703705, "learning_rate": 6.867174480790043e-06, "loss": 0.3104, "step": 21657 }, { "epoch": 2.2019113460756405, "grad_norm": 0.27378925681114197, "learning_rate": 6.8668452638395355e-06, "loss": 0.3243, "step": 21658 }, { "epoch": 2.2020130134200895, "grad_norm": 0.2777777910232544, "learning_rate": 6.866516037484305e-06, "loss": 0.3315, "step": 21659 }, { "epoch": 2.2021146807645384, "grad_norm": 0.29810860753059387, "learning_rate": 6.86618680172601e-06, "loss": 0.3236, "step": 21660 }, { "epoch": 2.2022163481089874, "grad_norm": 0.24743840098381042, "learning_rate": 6.865857556566309e-06, "loss": 0.3243, "step": 21661 }, { "epoch": 2.2023180154534363, "grad_norm": 0.2981131970882416, "learning_rate": 6.865528302006862e-06, "loss": 0.3179, "step": 21662 }, { "epoch": 2.2024196827978852, "grad_norm": 0.2763947546482086, "learning_rate": 6.865199038049326e-06, "loss": 0.3632, "step": 21663 }, { "epoch": 2.202521350142334, "grad_norm": 0.2732832729816437, "learning_rate": 6.864869764695361e-06, "loss": 0.3223, "step": 21664 }, { "epoch": 2.202623017486783, "grad_norm": 0.2538916766643524, "learning_rate": 6.864540481946625e-06, "loss": 0.3142, "step": 21665 }, { "epoch": 2.202724684831232, "grad_norm": 0.2741878926753998, "learning_rate": 6.864211189804777e-06, "loss": 0.3118, "step": 21666 }, { "epoch": 2.202826352175681, "grad_norm": 0.24965710937976837, "learning_rate": 6.863881888271477e-06, "loss": 0.3078, "step": 21667 }, { "epoch": 2.20292801952013, "grad_norm": 0.25055116415023804, "learning_rate": 6.863552577348381e-06, "loss": 0.3374, "step": 21668 }, { "epoch": 2.203029686864579, "grad_norm": 0.2971501052379608, "learning_rate": 6.863223257037152e-06, "loss": 0.3282, "step": 21669 }, { "epoch": 2.2031313542090283, "grad_norm": 0.2818528711795807, "learning_rate": 6.8628939273394456e-06, "loss": 0.3448, "step": 21670 }, { "epoch": 2.2032330215534772, "grad_norm": 0.27611324191093445, "learning_rate": 6.862564588256923e-06, "loss": 0.3558, "step": 21671 }, { "epoch": 2.203334688897926, "grad_norm": 0.28289520740509033, "learning_rate": 6.862235239791243e-06, "loss": 0.3295, "step": 21672 }, { "epoch": 2.203436356242375, "grad_norm": 0.2751511037349701, "learning_rate": 6.861905881944063e-06, "loss": 0.3481, "step": 21673 }, { "epoch": 2.203538023586824, "grad_norm": 0.2679921090602875, "learning_rate": 6.861576514717045e-06, "loss": 0.3656, "step": 21674 }, { "epoch": 2.203639690931273, "grad_norm": 0.28121379017829895, "learning_rate": 6.861247138111846e-06, "loss": 0.3206, "step": 21675 }, { "epoch": 2.203741358275722, "grad_norm": 0.2610183656215668, "learning_rate": 6.860917752130127e-06, "loss": 0.3254, "step": 21676 }, { "epoch": 2.203843025620171, "grad_norm": 0.2914159297943115, "learning_rate": 6.860588356773547e-06, "loss": 0.3507, "step": 21677 }, { "epoch": 2.20394469296462, "grad_norm": 0.3205175995826721, "learning_rate": 6.860258952043763e-06, "loss": 0.352, "step": 21678 }, { "epoch": 2.2040463603090688, "grad_norm": 0.26558274030685425, "learning_rate": 6.859929537942438e-06, "loss": 0.3144, "step": 21679 }, { "epoch": 2.2041480276535177, "grad_norm": 0.2815336287021637, "learning_rate": 6.8596001144712296e-06, "loss": 0.3616, "step": 21680 }, { "epoch": 2.2042496949979666, "grad_norm": 0.2671051323413849, "learning_rate": 6.859270681631798e-06, "loss": 0.3331, "step": 21681 }, { "epoch": 2.2043513623424156, "grad_norm": 0.2807013988494873, "learning_rate": 6.8589412394258025e-06, "loss": 0.3085, "step": 21682 }, { "epoch": 2.2044530296868645, "grad_norm": 0.2627609670162201, "learning_rate": 6.858611787854902e-06, "loss": 0.3582, "step": 21683 }, { "epoch": 2.2045546970313135, "grad_norm": 0.26503339409828186, "learning_rate": 6.8582823269207575e-06, "loss": 0.3371, "step": 21684 }, { "epoch": 2.2046563643757624, "grad_norm": 0.2799713909626007, "learning_rate": 6.8579528566250285e-06, "loss": 0.3385, "step": 21685 }, { "epoch": 2.2047580317202113, "grad_norm": 0.2733871340751648, "learning_rate": 6.8576233769693745e-06, "loss": 0.3383, "step": 21686 }, { "epoch": 2.2048596990646603, "grad_norm": 0.2800304889678955, "learning_rate": 6.857293887955455e-06, "loss": 0.3318, "step": 21687 }, { "epoch": 2.2049613664091092, "grad_norm": 0.2883530855178833, "learning_rate": 6.8569643895849305e-06, "loss": 0.3203, "step": 21688 }, { "epoch": 2.205063033753558, "grad_norm": 0.27778270840644836, "learning_rate": 6.856634881859461e-06, "loss": 0.3404, "step": 21689 }, { "epoch": 2.205164701098007, "grad_norm": 0.29660147428512573, "learning_rate": 6.8563053647807055e-06, "loss": 0.3546, "step": 21690 }, { "epoch": 2.205266368442456, "grad_norm": 0.2926374673843384, "learning_rate": 6.855975838350325e-06, "loss": 0.3298, "step": 21691 }, { "epoch": 2.2053680357869054, "grad_norm": 0.25578543543815613, "learning_rate": 6.855646302569979e-06, "loss": 0.3484, "step": 21692 }, { "epoch": 2.2054697031313544, "grad_norm": 0.2655887305736542, "learning_rate": 6.855316757441328e-06, "loss": 0.3392, "step": 21693 }, { "epoch": 2.2055713704758033, "grad_norm": 0.2825815677642822, "learning_rate": 6.854987202966034e-06, "loss": 0.2923, "step": 21694 }, { "epoch": 2.2056730378202523, "grad_norm": 0.2939073145389557, "learning_rate": 6.854657639145752e-06, "loss": 0.3179, "step": 21695 }, { "epoch": 2.205774705164701, "grad_norm": 0.2892681062221527, "learning_rate": 6.854328065982148e-06, "loss": 0.3246, "step": 21696 }, { "epoch": 2.20587637250915, "grad_norm": 0.2777020335197449, "learning_rate": 6.853998483476878e-06, "loss": 0.3353, "step": 21697 }, { "epoch": 2.205978039853599, "grad_norm": 0.25987768173217773, "learning_rate": 6.853668891631605e-06, "loss": 0.3159, "step": 21698 }, { "epoch": 2.206079707198048, "grad_norm": 0.2929627597332001, "learning_rate": 6.85333929044799e-06, "loss": 0.3496, "step": 21699 }, { "epoch": 2.206181374542497, "grad_norm": 0.30188649892807007, "learning_rate": 6.85300967992769e-06, "loss": 0.339, "step": 21700 }, { "epoch": 2.206283041886946, "grad_norm": 0.28582167625427246, "learning_rate": 6.8526800600723685e-06, "loss": 0.3137, "step": 21701 }, { "epoch": 2.206384709231395, "grad_norm": 0.25928544998168945, "learning_rate": 6.8523504308836855e-06, "loss": 0.3444, "step": 21702 }, { "epoch": 2.206486376575844, "grad_norm": 0.28653252124786377, "learning_rate": 6.852020792363301e-06, "loss": 0.3402, "step": 21703 }, { "epoch": 2.2065880439202927, "grad_norm": 0.2940393090248108, "learning_rate": 6.851691144512875e-06, "loss": 0.3175, "step": 21704 }, { "epoch": 2.2066897112647417, "grad_norm": 0.2809796631336212, "learning_rate": 6.85136148733407e-06, "loss": 0.3619, "step": 21705 }, { "epoch": 2.2067913786091906, "grad_norm": 0.283161461353302, "learning_rate": 6.851031820828544e-06, "loss": 0.2983, "step": 21706 }, { "epoch": 2.2068930459536396, "grad_norm": 0.3003256916999817, "learning_rate": 6.850702144997963e-06, "loss": 0.326, "step": 21707 }, { "epoch": 2.2069947132980885, "grad_norm": 0.28799858689308167, "learning_rate": 6.850372459843983e-06, "loss": 0.3742, "step": 21708 }, { "epoch": 2.2070963806425374, "grad_norm": 0.29422828555107117, "learning_rate": 6.850042765368264e-06, "loss": 0.3768, "step": 21709 }, { "epoch": 2.2071980479869864, "grad_norm": 0.2601890563964844, "learning_rate": 6.849713061572472e-06, "loss": 0.3139, "step": 21710 }, { "epoch": 2.2072997153314358, "grad_norm": 0.27329352498054504, "learning_rate": 6.849383348458263e-06, "loss": 0.3495, "step": 21711 }, { "epoch": 2.2074013826758847, "grad_norm": 0.26906558871269226, "learning_rate": 6.849053626027302e-06, "loss": 0.333, "step": 21712 }, { "epoch": 2.2075030500203336, "grad_norm": 0.2466999590396881, "learning_rate": 6.848723894281247e-06, "loss": 0.3021, "step": 21713 }, { "epoch": 2.2076047173647826, "grad_norm": 0.28805285692214966, "learning_rate": 6.8483941532217615e-06, "loss": 0.3161, "step": 21714 }, { "epoch": 2.2077063847092315, "grad_norm": 0.27897658944129944, "learning_rate": 6.848064402850504e-06, "loss": 0.3215, "step": 21715 }, { "epoch": 2.2078080520536805, "grad_norm": 0.26969096064567566, "learning_rate": 6.847734643169137e-06, "loss": 0.349, "step": 21716 }, { "epoch": 2.2079097193981294, "grad_norm": 0.27275726199150085, "learning_rate": 6.847404874179324e-06, "loss": 0.3418, "step": 21717 }, { "epoch": 2.2080113867425784, "grad_norm": 0.31634190678596497, "learning_rate": 6.8470750958827236e-06, "loss": 0.331, "step": 21718 }, { "epoch": 2.2081130540870273, "grad_norm": 0.2635892927646637, "learning_rate": 6.846745308280998e-06, "loss": 0.3583, "step": 21719 }, { "epoch": 2.2082147214314762, "grad_norm": 0.27645444869995117, "learning_rate": 6.846415511375806e-06, "loss": 0.335, "step": 21720 }, { "epoch": 2.208316388775925, "grad_norm": 0.28710708022117615, "learning_rate": 6.846085705168814e-06, "loss": 0.343, "step": 21721 }, { "epoch": 2.208418056120374, "grad_norm": 0.26071861386299133, "learning_rate": 6.845755889661679e-06, "loss": 0.3331, "step": 21722 }, { "epoch": 2.208519723464823, "grad_norm": 0.2638677954673767, "learning_rate": 6.845426064856067e-06, "loss": 0.3219, "step": 21723 }, { "epoch": 2.208621390809272, "grad_norm": 0.2636483609676361, "learning_rate": 6.845096230753635e-06, "loss": 0.3149, "step": 21724 }, { "epoch": 2.208723058153721, "grad_norm": 0.29458633065223694, "learning_rate": 6.844766387356046e-06, "loss": 0.3262, "step": 21725 }, { "epoch": 2.20882472549817, "grad_norm": 0.2643063962459564, "learning_rate": 6.844436534664965e-06, "loss": 0.3209, "step": 21726 }, { "epoch": 2.208926392842619, "grad_norm": 0.276318222284317, "learning_rate": 6.8441066726820495e-06, "loss": 0.294, "step": 21727 }, { "epoch": 2.2090280601870678, "grad_norm": 0.28511038422584534, "learning_rate": 6.843776801408963e-06, "loss": 0.3519, "step": 21728 }, { "epoch": 2.2091297275315167, "grad_norm": 0.28757739067077637, "learning_rate": 6.843446920847368e-06, "loss": 0.2956, "step": 21729 }, { "epoch": 2.2092313948759656, "grad_norm": 0.28304585814476013, "learning_rate": 6.843117030998923e-06, "loss": 0.3479, "step": 21730 }, { "epoch": 2.2093330622204146, "grad_norm": 0.2531565725803375, "learning_rate": 6.842787131865294e-06, "loss": 0.3104, "step": 21731 }, { "epoch": 2.2094347295648635, "grad_norm": 0.31052595376968384, "learning_rate": 6.842457223448141e-06, "loss": 0.3398, "step": 21732 }, { "epoch": 2.209536396909313, "grad_norm": 0.2676166594028473, "learning_rate": 6.842127305749128e-06, "loss": 0.3535, "step": 21733 }, { "epoch": 2.209638064253762, "grad_norm": 0.26556581258773804, "learning_rate": 6.841797378769913e-06, "loss": 0.3611, "step": 21734 }, { "epoch": 2.209739731598211, "grad_norm": 0.2509594261646271, "learning_rate": 6.841467442512161e-06, "loss": 0.3416, "step": 21735 }, { "epoch": 2.2098413989426597, "grad_norm": 0.2526337504386902, "learning_rate": 6.8411374969775346e-06, "loss": 0.3369, "step": 21736 }, { "epoch": 2.2099430662871087, "grad_norm": 0.257406085729599, "learning_rate": 6.840807542167694e-06, "loss": 0.3403, "step": 21737 }, { "epoch": 2.2100447336315576, "grad_norm": 0.2759898900985718, "learning_rate": 6.840477578084303e-06, "loss": 0.3349, "step": 21738 }, { "epoch": 2.2101464009760066, "grad_norm": 0.2603580057621002, "learning_rate": 6.840147604729024e-06, "loss": 0.2997, "step": 21739 }, { "epoch": 2.2102480683204555, "grad_norm": 0.27374696731567383, "learning_rate": 6.8398176221035175e-06, "loss": 0.3296, "step": 21740 }, { "epoch": 2.2103497356649044, "grad_norm": 0.2977437973022461, "learning_rate": 6.839487630209448e-06, "loss": 0.3548, "step": 21741 }, { "epoch": 2.2104514030093534, "grad_norm": 0.2711121141910553, "learning_rate": 6.8391576290484766e-06, "loss": 0.3144, "step": 21742 }, { "epoch": 2.2105530703538023, "grad_norm": 0.28857967257499695, "learning_rate": 6.838827618622266e-06, "loss": 0.3347, "step": 21743 }, { "epoch": 2.2106547376982513, "grad_norm": 0.2675009071826935, "learning_rate": 6.838497598932479e-06, "loss": 0.3459, "step": 21744 }, { "epoch": 2.2107564050427, "grad_norm": 0.2801293134689331, "learning_rate": 6.838167569980776e-06, "loss": 0.3408, "step": 21745 }, { "epoch": 2.210858072387149, "grad_norm": 0.2606539726257324, "learning_rate": 6.837837531768825e-06, "loss": 0.3315, "step": 21746 }, { "epoch": 2.210959739731598, "grad_norm": 0.2603684365749359, "learning_rate": 6.8375074842982825e-06, "loss": 0.3236, "step": 21747 }, { "epoch": 2.211061407076047, "grad_norm": 0.269564688205719, "learning_rate": 6.837177427570815e-06, "loss": 0.3014, "step": 21748 }, { "epoch": 2.211163074420496, "grad_norm": 0.28828683495521545, "learning_rate": 6.8368473615880845e-06, "loss": 0.3405, "step": 21749 }, { "epoch": 2.211264741764945, "grad_norm": 0.2741665840148926, "learning_rate": 6.836517286351752e-06, "loss": 0.3503, "step": 21750 }, { "epoch": 2.211366409109394, "grad_norm": 0.26845747232437134, "learning_rate": 6.836187201863484e-06, "loss": 0.3637, "step": 21751 }, { "epoch": 2.2114680764538432, "grad_norm": 0.27955934405326843, "learning_rate": 6.835857108124941e-06, "loss": 0.3286, "step": 21752 }, { "epoch": 2.211569743798292, "grad_norm": 0.2725464701652527, "learning_rate": 6.835527005137786e-06, "loss": 0.3112, "step": 21753 }, { "epoch": 2.211671411142741, "grad_norm": 0.2749062776565552, "learning_rate": 6.83519689290368e-06, "loss": 0.3547, "step": 21754 }, { "epoch": 2.21177307848719, "grad_norm": 0.2686283588409424, "learning_rate": 6.834866771424291e-06, "loss": 0.2965, "step": 21755 }, { "epoch": 2.211874745831639, "grad_norm": 0.2694503664970398, "learning_rate": 6.834536640701277e-06, "loss": 0.3449, "step": 21756 }, { "epoch": 2.211976413176088, "grad_norm": 0.28715160489082336, "learning_rate": 6.8342065007363046e-06, "loss": 0.3267, "step": 21757 }, { "epoch": 2.212078080520537, "grad_norm": 0.27451595664024353, "learning_rate": 6.833876351531036e-06, "loss": 0.3368, "step": 21758 }, { "epoch": 2.212179747864986, "grad_norm": 0.26089853048324585, "learning_rate": 6.833546193087133e-06, "loss": 0.3139, "step": 21759 }, { "epoch": 2.2122814152094348, "grad_norm": 0.26040005683898926, "learning_rate": 6.833216025406262e-06, "loss": 0.3643, "step": 21760 }, { "epoch": 2.2123830825538837, "grad_norm": 0.2942177951335907, "learning_rate": 6.832885848490082e-06, "loss": 0.3305, "step": 21761 }, { "epoch": 2.2124847498983327, "grad_norm": 0.27652159333229065, "learning_rate": 6.832555662340259e-06, "loss": 0.3204, "step": 21762 }, { "epoch": 2.2125864172427816, "grad_norm": 0.26481011509895325, "learning_rate": 6.832225466958458e-06, "loss": 0.3167, "step": 21763 }, { "epoch": 2.2126880845872305, "grad_norm": 0.27448439598083496, "learning_rate": 6.831895262346337e-06, "loss": 0.3229, "step": 21764 }, { "epoch": 2.2127897519316795, "grad_norm": 0.25855913758277893, "learning_rate": 6.831565048505566e-06, "loss": 0.3072, "step": 21765 }, { "epoch": 2.2128914192761284, "grad_norm": 0.3001999258995056, "learning_rate": 6.831234825437805e-06, "loss": 0.3336, "step": 21766 }, { "epoch": 2.2129930866205774, "grad_norm": 0.28815191984176636, "learning_rate": 6.830904593144717e-06, "loss": 0.3215, "step": 21767 }, { "epoch": 2.2130947539650263, "grad_norm": 0.26019781827926636, "learning_rate": 6.830574351627969e-06, "loss": 0.3255, "step": 21768 }, { "epoch": 2.2131964213094752, "grad_norm": 0.27577632665634155, "learning_rate": 6.830244100889219e-06, "loss": 0.3041, "step": 21769 }, { "epoch": 2.213298088653924, "grad_norm": 0.27611324191093445, "learning_rate": 6.829913840930138e-06, "loss": 0.3278, "step": 21770 }, { "epoch": 2.213399755998373, "grad_norm": 0.28614726662635803, "learning_rate": 6.8295835717523825e-06, "loss": 0.3302, "step": 21771 }, { "epoch": 2.213501423342822, "grad_norm": 0.2655380070209503, "learning_rate": 6.829253293357621e-06, "loss": 0.3407, "step": 21772 }, { "epoch": 2.213603090687271, "grad_norm": 0.25577375292778015, "learning_rate": 6.828923005747518e-06, "loss": 0.3441, "step": 21773 }, { "epoch": 2.2137047580317204, "grad_norm": 0.2995249032974243, "learning_rate": 6.828592708923734e-06, "loss": 0.317, "step": 21774 }, { "epoch": 2.2138064253761693, "grad_norm": 0.31291887164115906, "learning_rate": 6.828262402887934e-06, "loss": 0.3313, "step": 21775 }, { "epoch": 2.2139080927206183, "grad_norm": 0.2808765470981598, "learning_rate": 6.827932087641782e-06, "loss": 0.3392, "step": 21776 }, { "epoch": 2.2140097600650672, "grad_norm": 0.2668147385120392, "learning_rate": 6.827601763186943e-06, "loss": 0.3268, "step": 21777 }, { "epoch": 2.214111427409516, "grad_norm": 0.2644306421279907, "learning_rate": 6.827271429525082e-06, "loss": 0.3387, "step": 21778 }, { "epoch": 2.214213094753965, "grad_norm": 0.283640593290329, "learning_rate": 6.826941086657861e-06, "loss": 0.3125, "step": 21779 }, { "epoch": 2.214314762098414, "grad_norm": 0.26888445019721985, "learning_rate": 6.826610734586945e-06, "loss": 0.3642, "step": 21780 }, { "epoch": 2.214416429442863, "grad_norm": 0.2720487415790558, "learning_rate": 6.826280373313997e-06, "loss": 0.3029, "step": 21781 }, { "epoch": 2.214518096787312, "grad_norm": 0.2588886022567749, "learning_rate": 6.825950002840684e-06, "loss": 0.3602, "step": 21782 }, { "epoch": 2.214619764131761, "grad_norm": 0.25431692600250244, "learning_rate": 6.825619623168668e-06, "loss": 0.3245, "step": 21783 }, { "epoch": 2.21472143147621, "grad_norm": 0.2888110280036926, "learning_rate": 6.825289234299614e-06, "loss": 0.3228, "step": 21784 }, { "epoch": 2.2148230988206588, "grad_norm": 0.26039624214172363, "learning_rate": 6.824958836235187e-06, "loss": 0.3436, "step": 21785 }, { "epoch": 2.2149247661651077, "grad_norm": 0.2530180811882019, "learning_rate": 6.82462842897705e-06, "loss": 0.3258, "step": 21786 }, { "epoch": 2.2150264335095566, "grad_norm": 0.26267898082733154, "learning_rate": 6.82429801252687e-06, "loss": 0.3152, "step": 21787 }, { "epoch": 2.2151281008540056, "grad_norm": 0.25604456663131714, "learning_rate": 6.82396758688631e-06, "loss": 0.3219, "step": 21788 }, { "epoch": 2.2152297681984545, "grad_norm": 0.27606508135795593, "learning_rate": 6.823637152057035e-06, "loss": 0.3237, "step": 21789 }, { "epoch": 2.2153314355429035, "grad_norm": 0.3065410554409027, "learning_rate": 6.823306708040709e-06, "loss": 0.3137, "step": 21790 }, { "epoch": 2.2154331028873524, "grad_norm": 0.27162906527519226, "learning_rate": 6.822976254838996e-06, "loss": 0.3211, "step": 21791 }, { "epoch": 2.2155347702318013, "grad_norm": 0.30699020624160767, "learning_rate": 6.822645792453561e-06, "loss": 0.3541, "step": 21792 }, { "epoch": 2.2156364375762507, "grad_norm": 0.28748804330825806, "learning_rate": 6.822315320886072e-06, "loss": 0.3299, "step": 21793 }, { "epoch": 2.2157381049206997, "grad_norm": 0.2663186490535736, "learning_rate": 6.821984840138193e-06, "loss": 0.3529, "step": 21794 }, { "epoch": 2.2158397722651486, "grad_norm": 0.2739141583442688, "learning_rate": 6.821654350211584e-06, "loss": 0.3291, "step": 21795 }, { "epoch": 2.2159414396095976, "grad_norm": 0.26772499084472656, "learning_rate": 6.821323851107915e-06, "loss": 0.3347, "step": 21796 }, { "epoch": 2.2160431069540465, "grad_norm": 0.29460129141807556, "learning_rate": 6.820993342828849e-06, "loss": 0.347, "step": 21797 }, { "epoch": 2.2161447742984954, "grad_norm": 0.263080894947052, "learning_rate": 6.8206628253760495e-06, "loss": 0.3351, "step": 21798 }, { "epoch": 2.2162464416429444, "grad_norm": 0.296756386756897, "learning_rate": 6.820332298751187e-06, "loss": 0.3348, "step": 21799 }, { "epoch": 2.2163481089873933, "grad_norm": 0.27028119564056396, "learning_rate": 6.820001762955919e-06, "loss": 0.3489, "step": 21800 }, { "epoch": 2.2164497763318423, "grad_norm": 0.2731560468673706, "learning_rate": 6.8196712179919164e-06, "loss": 0.3461, "step": 21801 }, { "epoch": 2.216551443676291, "grad_norm": 0.28774091601371765, "learning_rate": 6.819340663860843e-06, "loss": 0.3479, "step": 21802 }, { "epoch": 2.21665311102074, "grad_norm": 0.2844837009906769, "learning_rate": 6.819010100564363e-06, "loss": 0.3456, "step": 21803 }, { "epoch": 2.216754778365189, "grad_norm": 0.250631183385849, "learning_rate": 6.818679528104144e-06, "loss": 0.3172, "step": 21804 }, { "epoch": 2.216856445709638, "grad_norm": 0.26665058732032776, "learning_rate": 6.818348946481847e-06, "loss": 0.3636, "step": 21805 }, { "epoch": 2.216958113054087, "grad_norm": 0.30074450373649597, "learning_rate": 6.818018355699142e-06, "loss": 0.3512, "step": 21806 }, { "epoch": 2.217059780398536, "grad_norm": 0.2886923551559448, "learning_rate": 6.817687755757693e-06, "loss": 0.3369, "step": 21807 }, { "epoch": 2.217161447742985, "grad_norm": 0.29424282908439636, "learning_rate": 6.817357146659162e-06, "loss": 0.3054, "step": 21808 }, { "epoch": 2.217263115087434, "grad_norm": 0.28323066234588623, "learning_rate": 6.817026528405222e-06, "loss": 0.2976, "step": 21809 }, { "epoch": 2.2173647824318827, "grad_norm": 0.2839692533016205, "learning_rate": 6.816695900997531e-06, "loss": 0.3418, "step": 21810 }, { "epoch": 2.2174664497763317, "grad_norm": 0.29400309920310974, "learning_rate": 6.8163652644377575e-06, "loss": 0.3499, "step": 21811 }, { "epoch": 2.2175681171207806, "grad_norm": 0.28073906898498535, "learning_rate": 6.81603461872757e-06, "loss": 0.3484, "step": 21812 }, { "epoch": 2.2176697844652296, "grad_norm": 0.2535581588745117, "learning_rate": 6.81570396386863e-06, "loss": 0.3186, "step": 21813 }, { "epoch": 2.2177714518096785, "grad_norm": 0.27861639857292175, "learning_rate": 6.815373299862605e-06, "loss": 0.3312, "step": 21814 }, { "epoch": 2.217873119154128, "grad_norm": 0.2924131453037262, "learning_rate": 6.815042626711161e-06, "loss": 0.3171, "step": 21815 }, { "epoch": 2.217974786498577, "grad_norm": 0.30139538645744324, "learning_rate": 6.814711944415961e-06, "loss": 0.326, "step": 21816 }, { "epoch": 2.2180764538430258, "grad_norm": 0.2615494430065155, "learning_rate": 6.814381252978677e-06, "loss": 0.3224, "step": 21817 }, { "epoch": 2.2181781211874747, "grad_norm": 0.2763966917991638, "learning_rate": 6.81405055240097e-06, "loss": 0.3378, "step": 21818 }, { "epoch": 2.2182797885319236, "grad_norm": 0.2752231955528259, "learning_rate": 6.813719842684509e-06, "loss": 0.3143, "step": 21819 }, { "epoch": 2.2183814558763726, "grad_norm": 0.2693233788013458, "learning_rate": 6.813389123830956e-06, "loss": 0.3464, "step": 21820 }, { "epoch": 2.2184831232208215, "grad_norm": 0.26198527216911316, "learning_rate": 6.81305839584198e-06, "loss": 0.3458, "step": 21821 }, { "epoch": 2.2185847905652705, "grad_norm": 0.2849290370941162, "learning_rate": 6.812727658719248e-06, "loss": 0.3166, "step": 21822 }, { "epoch": 2.2186864579097194, "grad_norm": 0.2596985697746277, "learning_rate": 6.812396912464423e-06, "loss": 0.3319, "step": 21823 }, { "epoch": 2.2187881252541684, "grad_norm": 0.29874521493911743, "learning_rate": 6.812066157079176e-06, "loss": 0.3427, "step": 21824 }, { "epoch": 2.2188897925986173, "grad_norm": 0.2832246422767639, "learning_rate": 6.8117353925651675e-06, "loss": 0.3464, "step": 21825 }, { "epoch": 2.2189914599430662, "grad_norm": 0.2757023572921753, "learning_rate": 6.8114046189240665e-06, "loss": 0.3105, "step": 21826 }, { "epoch": 2.219093127287515, "grad_norm": 0.29333868622779846, "learning_rate": 6.811073836157542e-06, "loss": 0.3425, "step": 21827 }, { "epoch": 2.219194794631964, "grad_norm": 0.26489290595054626, "learning_rate": 6.810743044267255e-06, "loss": 0.3192, "step": 21828 }, { "epoch": 2.219296461976413, "grad_norm": 0.26159143447875977, "learning_rate": 6.810412243254877e-06, "loss": 0.356, "step": 21829 }, { "epoch": 2.219398129320862, "grad_norm": 0.28415733575820923, "learning_rate": 6.810081433122071e-06, "loss": 0.348, "step": 21830 }, { "epoch": 2.219499796665311, "grad_norm": 0.2815696895122528, "learning_rate": 6.809750613870505e-06, "loss": 0.3402, "step": 21831 }, { "epoch": 2.21960146400976, "grad_norm": 0.2626115381717682, "learning_rate": 6.809419785501847e-06, "loss": 0.3075, "step": 21832 }, { "epoch": 2.219703131354209, "grad_norm": 0.2738892436027527, "learning_rate": 6.8090889480177605e-06, "loss": 0.3108, "step": 21833 }, { "epoch": 2.219804798698658, "grad_norm": 0.29184579849243164, "learning_rate": 6.808758101419914e-06, "loss": 0.333, "step": 21834 }, { "epoch": 2.219906466043107, "grad_norm": 0.2836332619190216, "learning_rate": 6.808427245709974e-06, "loss": 0.3309, "step": 21835 }, { "epoch": 2.220008133387556, "grad_norm": 0.26755496859550476, "learning_rate": 6.808096380889608e-06, "loss": 0.3099, "step": 21836 }, { "epoch": 2.220109800732005, "grad_norm": 0.2742379605770111, "learning_rate": 6.807765506960482e-06, "loss": 0.3195, "step": 21837 }, { "epoch": 2.220211468076454, "grad_norm": 0.2574027478694916, "learning_rate": 6.807434623924263e-06, "loss": 0.3321, "step": 21838 }, { "epoch": 2.220313135420903, "grad_norm": 0.2664194405078888, "learning_rate": 6.807103731782618e-06, "loss": 0.3354, "step": 21839 }, { "epoch": 2.220414802765352, "grad_norm": 0.26240500807762146, "learning_rate": 6.8067728305372125e-06, "loss": 0.3181, "step": 21840 }, { "epoch": 2.220516470109801, "grad_norm": 0.27804991602897644, "learning_rate": 6.806441920189716e-06, "loss": 0.3092, "step": 21841 }, { "epoch": 2.2206181374542497, "grad_norm": 0.2770240306854248, "learning_rate": 6.806111000741794e-06, "loss": 0.3772, "step": 21842 }, { "epoch": 2.2207198047986987, "grad_norm": 0.2674861252307892, "learning_rate": 6.805780072195114e-06, "loss": 0.3019, "step": 21843 }, { "epoch": 2.2208214721431476, "grad_norm": 0.30538707971572876, "learning_rate": 6.805449134551344e-06, "loss": 0.3572, "step": 21844 }, { "epoch": 2.2209231394875966, "grad_norm": 0.26726821064949036, "learning_rate": 6.805118187812148e-06, "loss": 0.3616, "step": 21845 }, { "epoch": 2.2210248068320455, "grad_norm": 0.2691991627216339, "learning_rate": 6.8047872319791985e-06, "loss": 0.3114, "step": 21846 }, { "epoch": 2.2211264741764944, "grad_norm": 0.26122236251831055, "learning_rate": 6.804456267054158e-06, "loss": 0.3555, "step": 21847 }, { "epoch": 2.2212281415209434, "grad_norm": 0.2823542654514313, "learning_rate": 6.804125293038696e-06, "loss": 0.3182, "step": 21848 }, { "epoch": 2.2213298088653923, "grad_norm": 0.2786547541618347, "learning_rate": 6.803794309934479e-06, "loss": 0.3214, "step": 21849 }, { "epoch": 2.2214314762098413, "grad_norm": 0.27983465790748596, "learning_rate": 6.803463317743176e-06, "loss": 0.3511, "step": 21850 }, { "epoch": 2.22153314355429, "grad_norm": 0.2874126136302948, "learning_rate": 6.8031323164664535e-06, "loss": 0.3259, "step": 21851 }, { "epoch": 2.221634810898739, "grad_norm": 0.2919376492500305, "learning_rate": 6.802801306105977e-06, "loss": 0.3329, "step": 21852 }, { "epoch": 2.221736478243188, "grad_norm": 0.2715771198272705, "learning_rate": 6.802470286663416e-06, "loss": 0.3176, "step": 21853 }, { "epoch": 2.221838145587637, "grad_norm": 0.2976374924182892, "learning_rate": 6.802139258140441e-06, "loss": 0.3184, "step": 21854 }, { "epoch": 2.221939812932086, "grad_norm": 0.2773513197898865, "learning_rate": 6.801808220538715e-06, "loss": 0.3465, "step": 21855 }, { "epoch": 2.2220414802765354, "grad_norm": 0.2569747567176819, "learning_rate": 6.801477173859907e-06, "loss": 0.3496, "step": 21856 }, { "epoch": 2.2221431476209843, "grad_norm": 0.2882777154445648, "learning_rate": 6.801146118105685e-06, "loss": 0.3437, "step": 21857 }, { "epoch": 2.2222448149654332, "grad_norm": 0.2798362374305725, "learning_rate": 6.800815053277716e-06, "loss": 0.3577, "step": 21858 }, { "epoch": 2.222346482309882, "grad_norm": 0.2696034014225006, "learning_rate": 6.80048397937767e-06, "loss": 0.3126, "step": 21859 }, { "epoch": 2.222448149654331, "grad_norm": 0.267309308052063, "learning_rate": 6.800152896407215e-06, "loss": 0.3295, "step": 21860 }, { "epoch": 2.22254981699878, "grad_norm": 0.27090463042259216, "learning_rate": 6.799821804368015e-06, "loss": 0.3161, "step": 21861 }, { "epoch": 2.222651484343229, "grad_norm": 0.2593904137611389, "learning_rate": 6.7994907032617405e-06, "loss": 0.2994, "step": 21862 }, { "epoch": 2.222753151687678, "grad_norm": 0.2722407281398773, "learning_rate": 6.79915959309006e-06, "loss": 0.3538, "step": 21863 }, { "epoch": 2.222854819032127, "grad_norm": 0.26055288314819336, "learning_rate": 6.798828473854642e-06, "loss": 0.3122, "step": 21864 }, { "epoch": 2.222956486376576, "grad_norm": 0.27009904384613037, "learning_rate": 6.798497345557153e-06, "loss": 0.3331, "step": 21865 }, { "epoch": 2.2230581537210248, "grad_norm": 0.27461713552474976, "learning_rate": 6.798166208199262e-06, "loss": 0.3269, "step": 21866 }, { "epoch": 2.2231598210654737, "grad_norm": 0.26121750473976135, "learning_rate": 6.797835061782636e-06, "loss": 0.3662, "step": 21867 }, { "epoch": 2.2232614884099227, "grad_norm": 0.28657636046409607, "learning_rate": 6.797503906308944e-06, "loss": 0.3448, "step": 21868 }, { "epoch": 2.2233631557543716, "grad_norm": 0.29661622643470764, "learning_rate": 6.7971727417798565e-06, "loss": 0.3068, "step": 21869 }, { "epoch": 2.2234648230988205, "grad_norm": 0.28084367513656616, "learning_rate": 6.796841568197039e-06, "loss": 0.3437, "step": 21870 }, { "epoch": 2.2235664904432695, "grad_norm": 0.2846105396747589, "learning_rate": 6.7965103855621605e-06, "loss": 0.3308, "step": 21871 }, { "epoch": 2.2236681577877184, "grad_norm": 0.3529607057571411, "learning_rate": 6.796179193876889e-06, "loss": 0.345, "step": 21872 }, { "epoch": 2.2237698251321674, "grad_norm": 0.3340386748313904, "learning_rate": 6.795847993142895e-06, "loss": 0.3396, "step": 21873 }, { "epoch": 2.2238714924766163, "grad_norm": 0.278201162815094, "learning_rate": 6.795516783361843e-06, "loss": 0.3537, "step": 21874 }, { "epoch": 2.2239731598210657, "grad_norm": 0.3001594841480255, "learning_rate": 6.795185564535407e-06, "loss": 0.3216, "step": 21875 }, { "epoch": 2.2240748271655146, "grad_norm": 0.34947317838668823, "learning_rate": 6.794854336665251e-06, "loss": 0.3269, "step": 21876 }, { "epoch": 2.2241764945099636, "grad_norm": 0.3158012926578522, "learning_rate": 6.7945230997530444e-06, "loss": 0.34, "step": 21877 }, { "epoch": 2.2242781618544125, "grad_norm": 0.2845986783504486, "learning_rate": 6.794191853800459e-06, "loss": 0.344, "step": 21878 }, { "epoch": 2.2243798291988615, "grad_norm": 0.29529157280921936, "learning_rate": 6.79386059880916e-06, "loss": 0.3446, "step": 21879 }, { "epoch": 2.2244814965433104, "grad_norm": 0.30262935161590576, "learning_rate": 6.79352933478082e-06, "loss": 0.2991, "step": 21880 }, { "epoch": 2.2245831638877593, "grad_norm": 0.2954724133014679, "learning_rate": 6.793198061717102e-06, "loss": 0.3639, "step": 21881 }, { "epoch": 2.2246848312322083, "grad_norm": 0.26986703276634216, "learning_rate": 6.7928667796196805e-06, "loss": 0.3383, "step": 21882 }, { "epoch": 2.2247864985766572, "grad_norm": 0.2975492477416992, "learning_rate": 6.792535488490222e-06, "loss": 0.3248, "step": 21883 }, { "epoch": 2.224888165921106, "grad_norm": 0.27488797903060913, "learning_rate": 6.792204188330395e-06, "loss": 0.3316, "step": 21884 }, { "epoch": 2.224989833265555, "grad_norm": 0.26586464047431946, "learning_rate": 6.79187287914187e-06, "loss": 0.3366, "step": 21885 }, { "epoch": 2.225091500610004, "grad_norm": 0.2774055302143097, "learning_rate": 6.791541560926314e-06, "loss": 0.3101, "step": 21886 }, { "epoch": 2.225193167954453, "grad_norm": 0.26576098799705505, "learning_rate": 6.791210233685397e-06, "loss": 0.3097, "step": 21887 }, { "epoch": 2.225294835298902, "grad_norm": 0.2812778949737549, "learning_rate": 6.79087889742079e-06, "loss": 0.352, "step": 21888 }, { "epoch": 2.225396502643351, "grad_norm": 0.29338759183883667, "learning_rate": 6.79054755213416e-06, "loss": 0.3412, "step": 21889 }, { "epoch": 2.2254981699878, "grad_norm": 0.2669140100479126, "learning_rate": 6.7902161978271785e-06, "loss": 0.3174, "step": 21890 }, { "epoch": 2.2255998373322488, "grad_norm": 0.28009477257728577, "learning_rate": 6.789884834501511e-06, "loss": 0.3411, "step": 21891 }, { "epoch": 2.2257015046766977, "grad_norm": 0.26796871423721313, "learning_rate": 6.78955346215883e-06, "loss": 0.3214, "step": 21892 }, { "epoch": 2.2258031720211466, "grad_norm": 0.2854224145412445, "learning_rate": 6.789222080800805e-06, "loss": 0.3322, "step": 21893 }, { "epoch": 2.2259048393655956, "grad_norm": 0.27837422490119934, "learning_rate": 6.788890690429102e-06, "loss": 0.331, "step": 21894 }, { "epoch": 2.2260065067100445, "grad_norm": 0.29311007261276245, "learning_rate": 6.7885592910453945e-06, "loss": 0.3239, "step": 21895 }, { "epoch": 2.226108174054494, "grad_norm": 0.2916260063648224, "learning_rate": 6.78822788265135e-06, "loss": 0.3412, "step": 21896 }, { "epoch": 2.226209841398943, "grad_norm": 0.29034924507141113, "learning_rate": 6.787896465248638e-06, "loss": 0.3499, "step": 21897 }, { "epoch": 2.226311508743392, "grad_norm": 0.2871962785720825, "learning_rate": 6.787565038838929e-06, "loss": 0.3096, "step": 21898 }, { "epoch": 2.2264131760878407, "grad_norm": 0.3029758036136627, "learning_rate": 6.787233603423891e-06, "loss": 0.3265, "step": 21899 }, { "epoch": 2.2265148434322897, "grad_norm": 0.27101364731788635, "learning_rate": 6.786902159005197e-06, "loss": 0.3162, "step": 21900 }, { "epoch": 2.2266165107767386, "grad_norm": 0.2801794409751892, "learning_rate": 6.786570705584512e-06, "loss": 0.3378, "step": 21901 }, { "epoch": 2.2267181781211876, "grad_norm": 0.28667575120925903, "learning_rate": 6.786239243163509e-06, "loss": 0.3578, "step": 21902 }, { "epoch": 2.2268198454656365, "grad_norm": 0.32001644372940063, "learning_rate": 6.7859077717438584e-06, "loss": 0.3462, "step": 21903 }, { "epoch": 2.2269215128100854, "grad_norm": 0.2669663429260254, "learning_rate": 6.785576291327229e-06, "loss": 0.3285, "step": 21904 }, { "epoch": 2.2270231801545344, "grad_norm": 0.28965139389038086, "learning_rate": 6.7852448019152895e-06, "loss": 0.3516, "step": 21905 }, { "epoch": 2.2271248474989833, "grad_norm": 0.25047338008880615, "learning_rate": 6.78491330350971e-06, "loss": 0.3292, "step": 21906 }, { "epoch": 2.2272265148434323, "grad_norm": 0.25484293699264526, "learning_rate": 6.784581796112163e-06, "loss": 0.3195, "step": 21907 }, { "epoch": 2.227328182187881, "grad_norm": 0.27911922335624695, "learning_rate": 6.784250279724317e-06, "loss": 0.3369, "step": 21908 }, { "epoch": 2.22742984953233, "grad_norm": 0.2748660445213318, "learning_rate": 6.783918754347841e-06, "loss": 0.3276, "step": 21909 }, { "epoch": 2.227531516876779, "grad_norm": 0.2714185118675232, "learning_rate": 6.783587219984407e-06, "loss": 0.3484, "step": 21910 }, { "epoch": 2.227633184221228, "grad_norm": 0.26324647665023804, "learning_rate": 6.783255676635685e-06, "loss": 0.3046, "step": 21911 }, { "epoch": 2.227734851565677, "grad_norm": 0.2869609594345093, "learning_rate": 6.7829241243033426e-06, "loss": 0.3765, "step": 21912 }, { "epoch": 2.227836518910126, "grad_norm": 0.2825287878513336, "learning_rate": 6.782592562989054e-06, "loss": 0.3373, "step": 21913 }, { "epoch": 2.227938186254575, "grad_norm": 0.2921611964702606, "learning_rate": 6.782260992694487e-06, "loss": 0.3471, "step": 21914 }, { "epoch": 2.228039853599024, "grad_norm": 0.2767429053783417, "learning_rate": 6.781929413421314e-06, "loss": 0.3155, "step": 21915 }, { "epoch": 2.228141520943473, "grad_norm": 0.2575426399707794, "learning_rate": 6.781597825171202e-06, "loss": 0.3166, "step": 21916 }, { "epoch": 2.228243188287922, "grad_norm": 0.2849361002445221, "learning_rate": 6.781266227945825e-06, "loss": 0.316, "step": 21917 }, { "epoch": 2.228344855632371, "grad_norm": 0.31040725111961365, "learning_rate": 6.780934621746852e-06, "loss": 0.3315, "step": 21918 }, { "epoch": 2.22844652297682, "grad_norm": 0.2733151316642761, "learning_rate": 6.780603006575953e-06, "loss": 0.3571, "step": 21919 }, { "epoch": 2.228548190321269, "grad_norm": 0.2576521039009094, "learning_rate": 6.7802713824348e-06, "loss": 0.3369, "step": 21920 }, { "epoch": 2.228649857665718, "grad_norm": 0.2879863977432251, "learning_rate": 6.779939749325062e-06, "loss": 0.3086, "step": 21921 }, { "epoch": 2.228751525010167, "grad_norm": 0.2670392394065857, "learning_rate": 6.7796081072484095e-06, "loss": 0.329, "step": 21922 }, { "epoch": 2.2288531923546158, "grad_norm": 0.27359601855278015, "learning_rate": 6.779276456206516e-06, "loss": 0.3207, "step": 21923 }, { "epoch": 2.2289548596990647, "grad_norm": 0.26071569323539734, "learning_rate": 6.77894479620105e-06, "loss": 0.3202, "step": 21924 }, { "epoch": 2.2290565270435136, "grad_norm": 0.27383437752723694, "learning_rate": 6.778613127233683e-06, "loss": 0.3403, "step": 21925 }, { "epoch": 2.2291581943879626, "grad_norm": 0.26447826623916626, "learning_rate": 6.7782814493060854e-06, "loss": 0.2999, "step": 21926 }, { "epoch": 2.2292598617324115, "grad_norm": 0.29837194085121155, "learning_rate": 6.777949762419928e-06, "loss": 0.3365, "step": 21927 }, { "epoch": 2.2293615290768605, "grad_norm": 0.28930410742759705, "learning_rate": 6.777618066576883e-06, "loss": 0.3509, "step": 21928 }, { "epoch": 2.2294631964213094, "grad_norm": 0.2645096778869629, "learning_rate": 6.77728636177862e-06, "loss": 0.3356, "step": 21929 }, { "epoch": 2.2295648637657584, "grad_norm": 0.28542906045913696, "learning_rate": 6.776954648026812e-06, "loss": 0.3605, "step": 21930 }, { "epoch": 2.2296665311102073, "grad_norm": 0.2926933169364929, "learning_rate": 6.776622925323128e-06, "loss": 0.3143, "step": 21931 }, { "epoch": 2.2297681984546562, "grad_norm": 0.2788369059562683, "learning_rate": 6.7762911936692386e-06, "loss": 0.3222, "step": 21932 }, { "epoch": 2.229869865799105, "grad_norm": 0.24853716790676117, "learning_rate": 6.775959453066818e-06, "loss": 0.3415, "step": 21933 }, { "epoch": 2.229971533143554, "grad_norm": 0.2826031744480133, "learning_rate": 6.775627703517535e-06, "loss": 0.3356, "step": 21934 }, { "epoch": 2.230073200488003, "grad_norm": 0.28384271264076233, "learning_rate": 6.7752959450230615e-06, "loss": 0.3091, "step": 21935 }, { "epoch": 2.230174867832452, "grad_norm": 0.2743629813194275, "learning_rate": 6.7749641775850705e-06, "loss": 0.309, "step": 21936 }, { "epoch": 2.2302765351769014, "grad_norm": 0.2880145311355591, "learning_rate": 6.774632401205229e-06, "loss": 0.3298, "step": 21937 }, { "epoch": 2.2303782025213503, "grad_norm": 0.2980034053325653, "learning_rate": 6.774300615885212e-06, "loss": 0.3029, "step": 21938 }, { "epoch": 2.2304798698657993, "grad_norm": 0.27933067083358765, "learning_rate": 6.773968821626691e-06, "loss": 0.3259, "step": 21939 }, { "epoch": 2.230581537210248, "grad_norm": 0.3311210572719574, "learning_rate": 6.773637018431337e-06, "loss": 0.3342, "step": 21940 }, { "epoch": 2.230683204554697, "grad_norm": 0.30003103613853455, "learning_rate": 6.773305206300821e-06, "loss": 0.3448, "step": 21941 }, { "epoch": 2.230784871899146, "grad_norm": 0.2837143540382385, "learning_rate": 6.772973385236814e-06, "loss": 0.3257, "step": 21942 }, { "epoch": 2.230886539243595, "grad_norm": 0.2651779055595398, "learning_rate": 6.772641555240989e-06, "loss": 0.3268, "step": 21943 }, { "epoch": 2.230988206588044, "grad_norm": 0.28590211272239685, "learning_rate": 6.772309716315015e-06, "loss": 0.3772, "step": 21944 }, { "epoch": 2.231089873932493, "grad_norm": 0.2721998989582062, "learning_rate": 6.7719778684605686e-06, "loss": 0.3763, "step": 21945 }, { "epoch": 2.231191541276942, "grad_norm": 0.2781262695789337, "learning_rate": 6.771646011679318e-06, "loss": 0.315, "step": 21946 }, { "epoch": 2.231293208621391, "grad_norm": 0.2927723526954651, "learning_rate": 6.771314145972936e-06, "loss": 0.3205, "step": 21947 }, { "epoch": 2.2313948759658397, "grad_norm": 0.29647666215896606, "learning_rate": 6.770982271343093e-06, "loss": 0.3213, "step": 21948 }, { "epoch": 2.2314965433102887, "grad_norm": 0.27570950984954834, "learning_rate": 6.770650387791463e-06, "loss": 0.324, "step": 21949 }, { "epoch": 2.2315982106547376, "grad_norm": 0.29638004302978516, "learning_rate": 6.770318495319717e-06, "loss": 0.339, "step": 21950 }, { "epoch": 2.2316998779991866, "grad_norm": 0.27658239006996155, "learning_rate": 6.7699865939295275e-06, "loss": 0.3348, "step": 21951 }, { "epoch": 2.2318015453436355, "grad_norm": 0.2779344618320465, "learning_rate": 6.7696546836225665e-06, "loss": 0.3262, "step": 21952 }, { "epoch": 2.2319032126880844, "grad_norm": 0.271255224943161, "learning_rate": 6.769322764400503e-06, "loss": 0.3113, "step": 21953 }, { "epoch": 2.2320048800325334, "grad_norm": 0.25759565830230713, "learning_rate": 6.7689908362650156e-06, "loss": 0.318, "step": 21954 }, { "epoch": 2.2321065473769823, "grad_norm": 0.2590291500091553, "learning_rate": 6.768658899217771e-06, "loss": 0.3213, "step": 21955 }, { "epoch": 2.2322082147214313, "grad_norm": 0.29003801941871643, "learning_rate": 6.768326953260443e-06, "loss": 0.3232, "step": 21956 }, { "epoch": 2.2323098820658807, "grad_norm": 0.25503024458885193, "learning_rate": 6.767994998394705e-06, "loss": 0.318, "step": 21957 }, { "epoch": 2.2324115494103296, "grad_norm": 0.2706356942653656, "learning_rate": 6.767663034622227e-06, "loss": 0.2967, "step": 21958 }, { "epoch": 2.2325132167547785, "grad_norm": 0.2593201696872711, "learning_rate": 6.767331061944684e-06, "loss": 0.327, "step": 21959 }, { "epoch": 2.2326148840992275, "grad_norm": 0.2927521765232086, "learning_rate": 6.766999080363745e-06, "loss": 0.3715, "step": 21960 }, { "epoch": 2.2327165514436764, "grad_norm": 0.265428751707077, "learning_rate": 6.766667089881087e-06, "loss": 0.3377, "step": 21961 }, { "epoch": 2.2328182187881254, "grad_norm": 0.30536505579948425, "learning_rate": 6.766335090498378e-06, "loss": 0.3412, "step": 21962 }, { "epoch": 2.2329198861325743, "grad_norm": 0.3005602955818176, "learning_rate": 6.766003082217294e-06, "loss": 0.3461, "step": 21963 }, { "epoch": 2.2330215534770232, "grad_norm": 0.2706425189971924, "learning_rate": 6.765671065039506e-06, "loss": 0.3179, "step": 21964 }, { "epoch": 2.233123220821472, "grad_norm": 0.25437068939208984, "learning_rate": 6.765339038966686e-06, "loss": 0.3419, "step": 21965 }, { "epoch": 2.233224888165921, "grad_norm": 0.25872525572776794, "learning_rate": 6.765007004000509e-06, "loss": 0.3351, "step": 21966 }, { "epoch": 2.23332655551037, "grad_norm": 0.28969907760620117, "learning_rate": 6.764674960142644e-06, "loss": 0.312, "step": 21967 }, { "epoch": 2.233428222854819, "grad_norm": 0.27897071838378906, "learning_rate": 6.764342907394766e-06, "loss": 0.3079, "step": 21968 }, { "epoch": 2.233529890199268, "grad_norm": 0.2623767852783203, "learning_rate": 6.7640108457585496e-06, "loss": 0.3405, "step": 21969 }, { "epoch": 2.233631557543717, "grad_norm": 0.27999529242515564, "learning_rate": 6.763678775235665e-06, "loss": 0.3378, "step": 21970 }, { "epoch": 2.233733224888166, "grad_norm": 0.26173797249794006, "learning_rate": 6.7633466958277864e-06, "loss": 0.283, "step": 21971 }, { "epoch": 2.2338348922326148, "grad_norm": 0.2663467526435852, "learning_rate": 6.763014607536584e-06, "loss": 0.3153, "step": 21972 }, { "epoch": 2.2339365595770637, "grad_norm": 0.25413277745246887, "learning_rate": 6.762682510363734e-06, "loss": 0.3011, "step": 21973 }, { "epoch": 2.2340382269215127, "grad_norm": 0.2661305367946625, "learning_rate": 6.76235040431091e-06, "loss": 0.3475, "step": 21974 }, { "epoch": 2.2341398942659616, "grad_norm": 0.2962404787540436, "learning_rate": 6.762018289379781e-06, "loss": 0.3525, "step": 21975 }, { "epoch": 2.2342415616104105, "grad_norm": 0.2738509774208069, "learning_rate": 6.761686165572026e-06, "loss": 0.3385, "step": 21976 }, { "epoch": 2.2343432289548595, "grad_norm": 0.28015145659446716, "learning_rate": 6.761354032889311e-06, "loss": 0.3381, "step": 21977 }, { "epoch": 2.234444896299309, "grad_norm": 0.2668219208717346, "learning_rate": 6.761021891333315e-06, "loss": 0.3617, "step": 21978 }, { "epoch": 2.234546563643758, "grad_norm": 0.27471795678138733, "learning_rate": 6.760689740905709e-06, "loss": 0.307, "step": 21979 }, { "epoch": 2.2346482309882068, "grad_norm": 0.2591572105884552, "learning_rate": 6.760357581608165e-06, "loss": 0.3325, "step": 21980 }, { "epoch": 2.2347498983326557, "grad_norm": 0.273518830537796, "learning_rate": 6.760025413442359e-06, "loss": 0.3457, "step": 21981 }, { "epoch": 2.2348515656771046, "grad_norm": 0.2682711184024811, "learning_rate": 6.759693236409963e-06, "loss": 0.3111, "step": 21982 }, { "epoch": 2.2349532330215536, "grad_norm": 0.29669734835624695, "learning_rate": 6.7593610505126514e-06, "loss": 0.343, "step": 21983 }, { "epoch": 2.2350549003660025, "grad_norm": 0.2471940666437149, "learning_rate": 6.759028855752097e-06, "loss": 0.3104, "step": 21984 }, { "epoch": 2.2351565677104515, "grad_norm": 0.2863740622997284, "learning_rate": 6.7586966521299725e-06, "loss": 0.3432, "step": 21985 }, { "epoch": 2.2352582350549004, "grad_norm": 0.26591426134109497, "learning_rate": 6.758364439647952e-06, "loss": 0.3106, "step": 21986 }, { "epoch": 2.2353599023993493, "grad_norm": 0.2585679888725281, "learning_rate": 6.758032218307709e-06, "loss": 0.3698, "step": 21987 }, { "epoch": 2.2354615697437983, "grad_norm": 0.27707934379577637, "learning_rate": 6.757699988110918e-06, "loss": 0.3175, "step": 21988 }, { "epoch": 2.2355632370882472, "grad_norm": 0.27794209122657776, "learning_rate": 6.757367749059253e-06, "loss": 0.3453, "step": 21989 }, { "epoch": 2.235664904432696, "grad_norm": 0.2765681743621826, "learning_rate": 6.757035501154385e-06, "loss": 0.3359, "step": 21990 }, { "epoch": 2.235766571777145, "grad_norm": 0.25883328914642334, "learning_rate": 6.7567032443979915e-06, "loss": 0.3425, "step": 21991 }, { "epoch": 2.235868239121594, "grad_norm": 0.30892854928970337, "learning_rate": 6.756370978791744e-06, "loss": 0.3379, "step": 21992 }, { "epoch": 2.235969906466043, "grad_norm": 0.3048926889896393, "learning_rate": 6.756038704337316e-06, "loss": 0.3468, "step": 21993 }, { "epoch": 2.236071573810492, "grad_norm": 0.2666262686252594, "learning_rate": 6.755706421036383e-06, "loss": 0.3358, "step": 21994 }, { "epoch": 2.236173241154941, "grad_norm": 0.26182106137275696, "learning_rate": 6.755374128890619e-06, "loss": 0.3312, "step": 21995 }, { "epoch": 2.23627490849939, "grad_norm": 0.3034074306488037, "learning_rate": 6.755041827901696e-06, "loss": 0.3288, "step": 21996 }, { "epoch": 2.2363765758438388, "grad_norm": 0.2982531487941742, "learning_rate": 6.75470951807129e-06, "loss": 0.3175, "step": 21997 }, { "epoch": 2.236478243188288, "grad_norm": 0.2865212857723236, "learning_rate": 6.754377199401074e-06, "loss": 0.3395, "step": 21998 }, { "epoch": 2.236579910532737, "grad_norm": 0.27583548426628113, "learning_rate": 6.7540448718927235e-06, "loss": 0.3307, "step": 21999 }, { "epoch": 2.236681577877186, "grad_norm": 0.3016628921031952, "learning_rate": 6.7537125355479105e-06, "loss": 0.3282, "step": 22000 }, { "epoch": 2.236783245221635, "grad_norm": 0.27587732672691345, "learning_rate": 6.753380190368313e-06, "loss": 0.3497, "step": 22001 }, { "epoch": 2.236884912566084, "grad_norm": 0.253416508436203, "learning_rate": 6.7530478363555995e-06, "loss": 0.3169, "step": 22002 }, { "epoch": 2.236986579910533, "grad_norm": 0.2795693874359131, "learning_rate": 6.752715473511449e-06, "loss": 0.3228, "step": 22003 }, { "epoch": 2.237088247254982, "grad_norm": 0.26526427268981934, "learning_rate": 6.752383101837534e-06, "loss": 0.357, "step": 22004 }, { "epoch": 2.2371899145994307, "grad_norm": 0.2508448362350464, "learning_rate": 6.7520507213355305e-06, "loss": 0.3293, "step": 22005 }, { "epoch": 2.2372915819438797, "grad_norm": 0.2563033699989319, "learning_rate": 6.751718332007111e-06, "loss": 0.3312, "step": 22006 }, { "epoch": 2.2373932492883286, "grad_norm": 0.2654286324977875, "learning_rate": 6.7513859338539515e-06, "loss": 0.3334, "step": 22007 }, { "epoch": 2.2374949166327776, "grad_norm": 0.27693885564804077, "learning_rate": 6.751053526877724e-06, "loss": 0.3573, "step": 22008 }, { "epoch": 2.2375965839772265, "grad_norm": 0.27647531032562256, "learning_rate": 6.750721111080108e-06, "loss": 0.3257, "step": 22009 }, { "epoch": 2.2376982513216754, "grad_norm": 0.25814324617385864, "learning_rate": 6.750388686462772e-06, "loss": 0.3352, "step": 22010 }, { "epoch": 2.2377999186661244, "grad_norm": 0.259427547454834, "learning_rate": 6.750056253027395e-06, "loss": 0.3308, "step": 22011 }, { "epoch": 2.2379015860105733, "grad_norm": 0.2559128999710083, "learning_rate": 6.749723810775652e-06, "loss": 0.2998, "step": 22012 }, { "epoch": 2.2380032533550223, "grad_norm": 0.25589802861213684, "learning_rate": 6.749391359709213e-06, "loss": 0.2985, "step": 22013 }, { "epoch": 2.238104920699471, "grad_norm": 0.29798194766044617, "learning_rate": 6.749058899829759e-06, "loss": 0.2958, "step": 22014 }, { "epoch": 2.23820658804392, "grad_norm": 0.24480362236499786, "learning_rate": 6.74872643113896e-06, "loss": 0.3473, "step": 22015 }, { "epoch": 2.238308255388369, "grad_norm": 0.26411858201026917, "learning_rate": 6.748393953638494e-06, "loss": 0.3204, "step": 22016 }, { "epoch": 2.238409922732818, "grad_norm": 0.2805710732936859, "learning_rate": 6.748061467330035e-06, "loss": 0.3338, "step": 22017 }, { "epoch": 2.238511590077267, "grad_norm": 0.28189027309417725, "learning_rate": 6.747728972215255e-06, "loss": 0.3326, "step": 22018 }, { "epoch": 2.2386132574217164, "grad_norm": 0.27348336577415466, "learning_rate": 6.747396468295835e-06, "loss": 0.3583, "step": 22019 }, { "epoch": 2.2387149247661653, "grad_norm": 0.26325199007987976, "learning_rate": 6.7470639555734455e-06, "loss": 0.3217, "step": 22020 }, { "epoch": 2.2388165921106142, "grad_norm": 0.26301902532577515, "learning_rate": 6.746731434049764e-06, "loss": 0.308, "step": 22021 }, { "epoch": 2.238918259455063, "grad_norm": 0.27532148361206055, "learning_rate": 6.746398903726463e-06, "loss": 0.3508, "step": 22022 }, { "epoch": 2.239019926799512, "grad_norm": 0.29234814643859863, "learning_rate": 6.7460663646052195e-06, "loss": 0.3102, "step": 22023 }, { "epoch": 2.239121594143961, "grad_norm": 0.2719469368457794, "learning_rate": 6.74573381668771e-06, "loss": 0.3553, "step": 22024 }, { "epoch": 2.23922326148841, "grad_norm": 0.2502812445163727, "learning_rate": 6.745401259975608e-06, "loss": 0.3375, "step": 22025 }, { "epoch": 2.239324928832859, "grad_norm": 0.28805166482925415, "learning_rate": 6.745068694470588e-06, "loss": 0.3386, "step": 22026 }, { "epoch": 2.239426596177308, "grad_norm": 0.28143683075904846, "learning_rate": 6.7447361201743276e-06, "loss": 0.3457, "step": 22027 }, { "epoch": 2.239528263521757, "grad_norm": 0.27335309982299805, "learning_rate": 6.7444035370884995e-06, "loss": 0.3381, "step": 22028 }, { "epoch": 2.2396299308662058, "grad_norm": 0.2862352728843689, "learning_rate": 6.744070945214783e-06, "loss": 0.3029, "step": 22029 }, { "epoch": 2.2397315982106547, "grad_norm": 0.2686055600643158, "learning_rate": 6.743738344554851e-06, "loss": 0.3556, "step": 22030 }, { "epoch": 2.2398332655551036, "grad_norm": 0.2678678631782532, "learning_rate": 6.74340573511038e-06, "loss": 0.3501, "step": 22031 }, { "epoch": 2.2399349328995526, "grad_norm": 0.24577829241752625, "learning_rate": 6.7430731168830456e-06, "loss": 0.3604, "step": 22032 }, { "epoch": 2.2400366002440015, "grad_norm": 0.2967945337295532, "learning_rate": 6.742740489874522e-06, "loss": 0.3292, "step": 22033 }, { "epoch": 2.2401382675884505, "grad_norm": 0.2818870544433594, "learning_rate": 6.7424078540864845e-06, "loss": 0.3139, "step": 22034 }, { "epoch": 2.2402399349328994, "grad_norm": 0.25803685188293457, "learning_rate": 6.742075209520613e-06, "loss": 0.3216, "step": 22035 }, { "epoch": 2.2403416022773484, "grad_norm": 0.2724578380584717, "learning_rate": 6.741742556178579e-06, "loss": 0.3081, "step": 22036 }, { "epoch": 2.2404432696217973, "grad_norm": 0.24879387021064758, "learning_rate": 6.741409894062061e-06, "loss": 0.3057, "step": 22037 }, { "epoch": 2.2405449369662462, "grad_norm": 0.2832963764667511, "learning_rate": 6.741077223172733e-06, "loss": 0.3199, "step": 22038 }, { "epoch": 2.2406466043106956, "grad_norm": 0.29584869742393494, "learning_rate": 6.740744543512271e-06, "loss": 0.3623, "step": 22039 }, { "epoch": 2.2407482716551446, "grad_norm": 0.2790309488773346, "learning_rate": 6.740411855082354e-06, "loss": 0.333, "step": 22040 }, { "epoch": 2.2408499389995935, "grad_norm": 0.28178346157073975, "learning_rate": 6.740079157884654e-06, "loss": 0.3227, "step": 22041 }, { "epoch": 2.2409516063440424, "grad_norm": 0.27002114057540894, "learning_rate": 6.7397464519208486e-06, "loss": 0.3356, "step": 22042 }, { "epoch": 2.2410532736884914, "grad_norm": 0.2556006610393524, "learning_rate": 6.739413737192614e-06, "loss": 0.3516, "step": 22043 }, { "epoch": 2.2411549410329403, "grad_norm": 0.2972288131713867, "learning_rate": 6.739081013701626e-06, "loss": 0.331, "step": 22044 }, { "epoch": 2.2412566083773893, "grad_norm": 0.2903417646884918, "learning_rate": 6.738748281449563e-06, "loss": 0.3492, "step": 22045 }, { "epoch": 2.241358275721838, "grad_norm": 0.2918565571308136, "learning_rate": 6.738415540438097e-06, "loss": 0.348, "step": 22046 }, { "epoch": 2.241459943066287, "grad_norm": 0.26883912086486816, "learning_rate": 6.738082790668908e-06, "loss": 0.3037, "step": 22047 }, { "epoch": 2.241561610410736, "grad_norm": 0.27572503685951233, "learning_rate": 6.7377500321436704e-06, "loss": 0.3536, "step": 22048 }, { "epoch": 2.241663277755185, "grad_norm": 0.2605174779891968, "learning_rate": 6.737417264864061e-06, "loss": 0.3033, "step": 22049 }, { "epoch": 2.241764945099634, "grad_norm": 0.2961398959159851, "learning_rate": 6.7370844888317575e-06, "loss": 0.3445, "step": 22050 }, { "epoch": 2.241866612444083, "grad_norm": 0.28022849559783936, "learning_rate": 6.736751704048434e-06, "loss": 0.3294, "step": 22051 }, { "epoch": 2.241968279788532, "grad_norm": 0.2781258821487427, "learning_rate": 6.736418910515768e-06, "loss": 0.3537, "step": 22052 }, { "epoch": 2.242069947132981, "grad_norm": 0.26874348521232605, "learning_rate": 6.736086108235437e-06, "loss": 0.3353, "step": 22053 }, { "epoch": 2.2421716144774297, "grad_norm": 0.27862343192100525, "learning_rate": 6.735753297209116e-06, "loss": 0.3457, "step": 22054 }, { "epoch": 2.2422732818218787, "grad_norm": 0.28354135155677795, "learning_rate": 6.735420477438483e-06, "loss": 0.3509, "step": 22055 }, { "epoch": 2.2423749491663276, "grad_norm": 0.2652761936187744, "learning_rate": 6.7350876489252134e-06, "loss": 0.3593, "step": 22056 }, { "epoch": 2.2424766165107766, "grad_norm": 0.2649880051612854, "learning_rate": 6.734754811670985e-06, "loss": 0.2997, "step": 22057 }, { "epoch": 2.2425782838552255, "grad_norm": 0.2759997546672821, "learning_rate": 6.734421965677473e-06, "loss": 0.317, "step": 22058 }, { "epoch": 2.2426799511996744, "grad_norm": 0.26646777987480164, "learning_rate": 6.734089110946357e-06, "loss": 0.3126, "step": 22059 }, { "epoch": 2.242781618544124, "grad_norm": 0.26383230090141296, "learning_rate": 6.733756247479311e-06, "loss": 0.3284, "step": 22060 }, { "epoch": 2.2428832858885728, "grad_norm": 0.26159149408340454, "learning_rate": 6.733423375278014e-06, "loss": 0.3253, "step": 22061 }, { "epoch": 2.2429849532330217, "grad_norm": 0.2821465730667114, "learning_rate": 6.733090494344142e-06, "loss": 0.3536, "step": 22062 }, { "epoch": 2.2430866205774707, "grad_norm": 0.2599971890449524, "learning_rate": 6.7327576046793705e-06, "loss": 0.3347, "step": 22063 }, { "epoch": 2.2431882879219196, "grad_norm": 0.2691856920719147, "learning_rate": 6.732424706285379e-06, "loss": 0.3496, "step": 22064 }, { "epoch": 2.2432899552663685, "grad_norm": 0.2720828354358673, "learning_rate": 6.732091799163843e-06, "loss": 0.3768, "step": 22065 }, { "epoch": 2.2433916226108175, "grad_norm": 0.2615593671798706, "learning_rate": 6.73175888331644e-06, "loss": 0.3361, "step": 22066 }, { "epoch": 2.2434932899552664, "grad_norm": 0.26606109738349915, "learning_rate": 6.7314259587448486e-06, "loss": 0.3592, "step": 22067 }, { "epoch": 2.2435949572997154, "grad_norm": 0.24558041989803314, "learning_rate": 6.731093025450744e-06, "loss": 0.3109, "step": 22068 }, { "epoch": 2.2436966246441643, "grad_norm": 0.2783866226673126, "learning_rate": 6.730760083435804e-06, "loss": 0.3377, "step": 22069 }, { "epoch": 2.2437982919886132, "grad_norm": 0.27346983551979065, "learning_rate": 6.7304271327017055e-06, "loss": 0.3278, "step": 22070 }, { "epoch": 2.243899959333062, "grad_norm": 0.2652663588523865, "learning_rate": 6.7300941732501255e-06, "loss": 0.2946, "step": 22071 }, { "epoch": 2.244001626677511, "grad_norm": 0.2758316099643707, "learning_rate": 6.729761205082744e-06, "loss": 0.3356, "step": 22072 }, { "epoch": 2.24410329402196, "grad_norm": 0.29930368065834045, "learning_rate": 6.729428228201235e-06, "loss": 0.3095, "step": 22073 }, { "epoch": 2.244204961366409, "grad_norm": 0.2784011662006378, "learning_rate": 6.729095242607279e-06, "loss": 0.3501, "step": 22074 }, { "epoch": 2.244306628710858, "grad_norm": 0.2738361656665802, "learning_rate": 6.728762248302553e-06, "loss": 0.346, "step": 22075 }, { "epoch": 2.244408296055307, "grad_norm": 0.2701451778411865, "learning_rate": 6.7284292452887314e-06, "loss": 0.3516, "step": 22076 }, { "epoch": 2.244509963399756, "grad_norm": 0.26884299516677856, "learning_rate": 6.728096233567496e-06, "loss": 0.3304, "step": 22077 }, { "epoch": 2.2446116307442048, "grad_norm": 0.2556246519088745, "learning_rate": 6.72776321314052e-06, "loss": 0.3299, "step": 22078 }, { "epoch": 2.2447132980886537, "grad_norm": 0.26960626244544983, "learning_rate": 6.727430184009485e-06, "loss": 0.3199, "step": 22079 }, { "epoch": 2.244814965433103, "grad_norm": 0.2702639698982239, "learning_rate": 6.727097146176067e-06, "loss": 0.3492, "step": 22080 }, { "epoch": 2.244916632777552, "grad_norm": 0.26903992891311646, "learning_rate": 6.726764099641943e-06, "loss": 0.3426, "step": 22081 }, { "epoch": 2.245018300122001, "grad_norm": 0.29065006971359253, "learning_rate": 6.726431044408794e-06, "loss": 0.3426, "step": 22082 }, { "epoch": 2.24511996746645, "grad_norm": 0.2715165615081787, "learning_rate": 6.726097980478295e-06, "loss": 0.3137, "step": 22083 }, { "epoch": 2.245221634810899, "grad_norm": 0.26518574357032776, "learning_rate": 6.725764907852123e-06, "loss": 0.3079, "step": 22084 }, { "epoch": 2.245323302155348, "grad_norm": 0.29218924045562744, "learning_rate": 6.725431826531959e-06, "loss": 0.3394, "step": 22085 }, { "epoch": 2.2454249694997968, "grad_norm": 0.2744732201099396, "learning_rate": 6.725098736519478e-06, "loss": 0.3249, "step": 22086 }, { "epoch": 2.2455266368442457, "grad_norm": 0.2649216055870056, "learning_rate": 6.724765637816362e-06, "loss": 0.3448, "step": 22087 }, { "epoch": 2.2456283041886946, "grad_norm": 0.27885401248931885, "learning_rate": 6.724432530424285e-06, "loss": 0.3423, "step": 22088 }, { "epoch": 2.2457299715331436, "grad_norm": 0.26555442810058594, "learning_rate": 6.724099414344926e-06, "loss": 0.3529, "step": 22089 }, { "epoch": 2.2458316388775925, "grad_norm": 0.30077511072158813, "learning_rate": 6.723766289579965e-06, "loss": 0.3606, "step": 22090 }, { "epoch": 2.2459333062220415, "grad_norm": 0.30681511759757996, "learning_rate": 6.723433156131079e-06, "loss": 0.3687, "step": 22091 }, { "epoch": 2.2460349735664904, "grad_norm": 0.255729079246521, "learning_rate": 6.723100013999946e-06, "loss": 0.3365, "step": 22092 }, { "epoch": 2.2461366409109393, "grad_norm": 0.33017608523368835, "learning_rate": 6.722766863188245e-06, "loss": 0.3562, "step": 22093 }, { "epoch": 2.2462383082553883, "grad_norm": 0.28547343611717224, "learning_rate": 6.722433703697652e-06, "loss": 0.324, "step": 22094 }, { "epoch": 2.2463399755998372, "grad_norm": 0.2758254408836365, "learning_rate": 6.72210053552985e-06, "loss": 0.3476, "step": 22095 }, { "epoch": 2.246441642944286, "grad_norm": 0.2587487995624542, "learning_rate": 6.721767358686513e-06, "loss": 0.298, "step": 22096 }, { "epoch": 2.246543310288735, "grad_norm": 0.26152682304382324, "learning_rate": 6.721434173169323e-06, "loss": 0.3113, "step": 22097 }, { "epoch": 2.246644977633184, "grad_norm": 0.2832893133163452, "learning_rate": 6.721100978979955e-06, "loss": 0.3362, "step": 22098 }, { "epoch": 2.246746644977633, "grad_norm": 0.28702595829963684, "learning_rate": 6.720767776120088e-06, "loss": 0.3427, "step": 22099 }, { "epoch": 2.246848312322082, "grad_norm": 0.2980930209159851, "learning_rate": 6.720434564591405e-06, "loss": 0.3122, "step": 22100 }, { "epoch": 2.2469499796665313, "grad_norm": 0.24916544556617737, "learning_rate": 6.7201013443955805e-06, "loss": 0.3096, "step": 22101 }, { "epoch": 2.2470516470109803, "grad_norm": 0.28245308995246887, "learning_rate": 6.719768115534293e-06, "loss": 0.3365, "step": 22102 }, { "epoch": 2.247153314355429, "grad_norm": 0.29217976331710815, "learning_rate": 6.719434878009224e-06, "loss": 0.314, "step": 22103 }, { "epoch": 2.247254981699878, "grad_norm": 0.26870954036712646, "learning_rate": 6.719101631822049e-06, "loss": 0.3504, "step": 22104 }, { "epoch": 2.247356649044327, "grad_norm": 0.2586037516593933, "learning_rate": 6.718768376974449e-06, "loss": 0.3308, "step": 22105 }, { "epoch": 2.247458316388776, "grad_norm": 0.2907988727092743, "learning_rate": 6.718435113468105e-06, "loss": 0.3272, "step": 22106 }, { "epoch": 2.247559983733225, "grad_norm": 0.2556191086769104, "learning_rate": 6.718101841304689e-06, "loss": 0.3469, "step": 22107 }, { "epoch": 2.247661651077674, "grad_norm": 0.2607320249080658, "learning_rate": 6.7177685604858875e-06, "loss": 0.3294, "step": 22108 }, { "epoch": 2.247763318422123, "grad_norm": 0.2685670852661133, "learning_rate": 6.717435271013374e-06, "loss": 0.3109, "step": 22109 }, { "epoch": 2.247864985766572, "grad_norm": 0.29882514476776123, "learning_rate": 6.717101972888831e-06, "loss": 0.3125, "step": 22110 }, { "epoch": 2.2479666531110207, "grad_norm": 0.2764382064342499, "learning_rate": 6.716768666113936e-06, "loss": 0.3295, "step": 22111 }, { "epoch": 2.2480683204554697, "grad_norm": 0.27740564942359924, "learning_rate": 6.7164353506903694e-06, "loss": 0.3799, "step": 22112 }, { "epoch": 2.2481699877999186, "grad_norm": 0.2691420912742615, "learning_rate": 6.716102026619808e-06, "loss": 0.3376, "step": 22113 }, { "epoch": 2.2482716551443676, "grad_norm": 0.28527581691741943, "learning_rate": 6.715768693903932e-06, "loss": 0.3193, "step": 22114 }, { "epoch": 2.2483733224888165, "grad_norm": 0.27313777804374695, "learning_rate": 6.7154353525444216e-06, "loss": 0.3248, "step": 22115 }, { "epoch": 2.2484749898332654, "grad_norm": 0.257135808467865, "learning_rate": 6.715102002542957e-06, "loss": 0.3025, "step": 22116 }, { "epoch": 2.2485766571777144, "grad_norm": 0.2577250003814697, "learning_rate": 6.714768643901215e-06, "loss": 0.3225, "step": 22117 }, { "epoch": 2.2486783245221633, "grad_norm": 0.27622169256210327, "learning_rate": 6.714435276620876e-06, "loss": 0.3556, "step": 22118 }, { "epoch": 2.2487799918666123, "grad_norm": 0.2756257653236389, "learning_rate": 6.714101900703619e-06, "loss": 0.3408, "step": 22119 }, { "epoch": 2.248881659211061, "grad_norm": 0.25850915908813477, "learning_rate": 6.713768516151125e-06, "loss": 0.3164, "step": 22120 }, { "epoch": 2.2489833265555106, "grad_norm": 0.27342671155929565, "learning_rate": 6.713435122965072e-06, "loss": 0.3162, "step": 22121 }, { "epoch": 2.2490849938999595, "grad_norm": 0.2729913890361786, "learning_rate": 6.71310172114714e-06, "loss": 0.3377, "step": 22122 }, { "epoch": 2.2491866612444085, "grad_norm": 0.2713654339313507, "learning_rate": 6.712768310699008e-06, "loss": 0.3199, "step": 22123 }, { "epoch": 2.2492883285888574, "grad_norm": 0.2796414792537689, "learning_rate": 6.7124348916223565e-06, "loss": 0.3182, "step": 22124 }, { "epoch": 2.2493899959333064, "grad_norm": 0.2752959728240967, "learning_rate": 6.712101463918865e-06, "loss": 0.3608, "step": 22125 }, { "epoch": 2.2494916632777553, "grad_norm": 0.2568003535270691, "learning_rate": 6.711768027590214e-06, "loss": 0.3244, "step": 22126 }, { "epoch": 2.2495933306222042, "grad_norm": 0.2720777988433838, "learning_rate": 6.711434582638081e-06, "loss": 0.3481, "step": 22127 }, { "epoch": 2.249694997966653, "grad_norm": 0.26458805799484253, "learning_rate": 6.711101129064149e-06, "loss": 0.3223, "step": 22128 }, { "epoch": 2.249796665311102, "grad_norm": 0.27685168385505676, "learning_rate": 6.710767666870095e-06, "loss": 0.326, "step": 22129 }, { "epoch": 2.249898332655551, "grad_norm": 0.27420976758003235, "learning_rate": 6.7104341960576e-06, "loss": 0.3324, "step": 22130 }, { "epoch": 2.25, "grad_norm": 0.27638599276542664, "learning_rate": 6.710100716628345e-06, "loss": 0.316, "step": 22131 }, { "epoch": 2.250101667344449, "grad_norm": 0.25274330377578735, "learning_rate": 6.709767228584008e-06, "loss": 0.3422, "step": 22132 }, { "epoch": 2.250203334688898, "grad_norm": 0.25701838731765747, "learning_rate": 6.7094337319262705e-06, "loss": 0.318, "step": 22133 }, { "epoch": 2.250305002033347, "grad_norm": 0.2646348178386688, "learning_rate": 6.709100226656812e-06, "loss": 0.313, "step": 22134 }, { "epoch": 2.2504066693777958, "grad_norm": 0.27382734417915344, "learning_rate": 6.7087667127773115e-06, "loss": 0.3067, "step": 22135 }, { "epoch": 2.2505083367222447, "grad_norm": 0.280897855758667, "learning_rate": 6.708433190289452e-06, "loss": 0.3549, "step": 22136 }, { "epoch": 2.2506100040666936, "grad_norm": 0.26070883870124817, "learning_rate": 6.708099659194911e-06, "loss": 0.336, "step": 22137 }, { "epoch": 2.2507116714111426, "grad_norm": 0.26197031140327454, "learning_rate": 6.707766119495371e-06, "loss": 0.338, "step": 22138 }, { "epoch": 2.2508133387555915, "grad_norm": 0.2685897648334503, "learning_rate": 6.70743257119251e-06, "loss": 0.3101, "step": 22139 }, { "epoch": 2.2509150061000405, "grad_norm": 0.28717923164367676, "learning_rate": 6.7070990142880096e-06, "loss": 0.3432, "step": 22140 }, { "epoch": 2.2510166734444894, "grad_norm": 0.27486640214920044, "learning_rate": 6.706765448783551e-06, "loss": 0.3328, "step": 22141 }, { "epoch": 2.2511183407889384, "grad_norm": 0.2717234790325165, "learning_rate": 6.706431874680812e-06, "loss": 0.3708, "step": 22142 }, { "epoch": 2.2512200081333877, "grad_norm": 0.31178003549575806, "learning_rate": 6.7060982919814764e-06, "loss": 0.3157, "step": 22143 }, { "epoch": 2.2513216754778367, "grad_norm": 0.2688367962837219, "learning_rate": 6.705764700687223e-06, "loss": 0.3239, "step": 22144 }, { "epoch": 2.2514233428222856, "grad_norm": 0.3045336604118347, "learning_rate": 6.705431100799731e-06, "loss": 0.2975, "step": 22145 }, { "epoch": 2.2515250101667346, "grad_norm": 0.26470714807510376, "learning_rate": 6.7050974923206835e-06, "loss": 0.3596, "step": 22146 }, { "epoch": 2.2516266775111835, "grad_norm": 0.2726210057735443, "learning_rate": 6.704763875251759e-06, "loss": 0.3601, "step": 22147 }, { "epoch": 2.2517283448556324, "grad_norm": 0.30095064640045166, "learning_rate": 6.70443024959464e-06, "loss": 0.3717, "step": 22148 }, { "epoch": 2.2518300122000814, "grad_norm": 0.2602255046367645, "learning_rate": 6.704096615351007e-06, "loss": 0.3186, "step": 22149 }, { "epoch": 2.2519316795445303, "grad_norm": 0.2738190293312073, "learning_rate": 6.703762972522538e-06, "loss": 0.3696, "step": 22150 }, { "epoch": 2.2520333468889793, "grad_norm": 0.26897022128105164, "learning_rate": 6.703429321110919e-06, "loss": 0.3453, "step": 22151 }, { "epoch": 2.252135014233428, "grad_norm": 0.30406269431114197, "learning_rate": 6.703095661117825e-06, "loss": 0.3451, "step": 22152 }, { "epoch": 2.252236681577877, "grad_norm": 0.2750910520553589, "learning_rate": 6.702761992544943e-06, "loss": 0.3353, "step": 22153 }, { "epoch": 2.252338348922326, "grad_norm": 0.2693542242050171, "learning_rate": 6.702428315393947e-06, "loss": 0.3442, "step": 22154 }, { "epoch": 2.252440016266775, "grad_norm": 0.2660076320171356, "learning_rate": 6.702094629666525e-06, "loss": 0.3263, "step": 22155 }, { "epoch": 2.252541683611224, "grad_norm": 0.26589855551719666, "learning_rate": 6.701760935364353e-06, "loss": 0.3354, "step": 22156 }, { "epoch": 2.252643350955673, "grad_norm": 0.26480597257614136, "learning_rate": 6.701427232489114e-06, "loss": 0.359, "step": 22157 }, { "epoch": 2.252745018300122, "grad_norm": 0.2533845603466034, "learning_rate": 6.7010935210424895e-06, "loss": 0.3592, "step": 22158 }, { "epoch": 2.252846685644571, "grad_norm": 0.25837963819503784, "learning_rate": 6.70075980102616e-06, "loss": 0.3538, "step": 22159 }, { "epoch": 2.2529483529890197, "grad_norm": 0.7289409041404724, "learning_rate": 6.7004260724418056e-06, "loss": 0.3375, "step": 22160 }, { "epoch": 2.253050020333469, "grad_norm": 0.29066231846809387, "learning_rate": 6.700092335291109e-06, "loss": 0.3432, "step": 22161 }, { "epoch": 2.253151687677918, "grad_norm": 0.2871956527233124, "learning_rate": 6.699758589575752e-06, "loss": 0.3272, "step": 22162 }, { "epoch": 2.253253355022367, "grad_norm": 0.2627505958080292, "learning_rate": 6.6994248352974155e-06, "loss": 0.3202, "step": 22163 }, { "epoch": 2.253355022366816, "grad_norm": 0.28090277314186096, "learning_rate": 6.69909107245778e-06, "loss": 0.3717, "step": 22164 }, { "epoch": 2.253456689711265, "grad_norm": 0.27803903818130493, "learning_rate": 6.698757301058528e-06, "loss": 0.3239, "step": 22165 }, { "epoch": 2.253558357055714, "grad_norm": 0.26151540875434875, "learning_rate": 6.698423521101339e-06, "loss": 0.336, "step": 22166 }, { "epoch": 2.2536600244001628, "grad_norm": 0.2870011031627655, "learning_rate": 6.698089732587897e-06, "loss": 0.3463, "step": 22167 }, { "epoch": 2.2537616917446117, "grad_norm": 0.27280309796333313, "learning_rate": 6.697755935519883e-06, "loss": 0.359, "step": 22168 }, { "epoch": 2.2538633590890607, "grad_norm": 0.305555135011673, "learning_rate": 6.697422129898977e-06, "loss": 0.3444, "step": 22169 }, { "epoch": 2.2539650264335096, "grad_norm": 0.28563106060028076, "learning_rate": 6.697088315726861e-06, "loss": 0.3428, "step": 22170 }, { "epoch": 2.2540666937779585, "grad_norm": 0.27545592188835144, "learning_rate": 6.6967544930052185e-06, "loss": 0.3568, "step": 22171 }, { "epoch": 2.2541683611224075, "grad_norm": 0.28343337774276733, "learning_rate": 6.69642066173573e-06, "loss": 0.3447, "step": 22172 }, { "epoch": 2.2542700284668564, "grad_norm": 0.26913052797317505, "learning_rate": 6.696086821920077e-06, "loss": 0.343, "step": 22173 }, { "epoch": 2.2543716958113054, "grad_norm": 0.27345576882362366, "learning_rate": 6.695752973559943e-06, "loss": 0.3077, "step": 22174 }, { "epoch": 2.2544733631557543, "grad_norm": 0.3005659878253937, "learning_rate": 6.695419116657005e-06, "loss": 0.3624, "step": 22175 }, { "epoch": 2.2545750305002032, "grad_norm": 0.2782443165779114, "learning_rate": 6.695085251212951e-06, "loss": 0.3143, "step": 22176 }, { "epoch": 2.254676697844652, "grad_norm": 0.2729343771934509, "learning_rate": 6.694751377229461e-06, "loss": 0.3173, "step": 22177 }, { "epoch": 2.254778365189101, "grad_norm": 0.28863948583602905, "learning_rate": 6.694417494708215e-06, "loss": 0.3283, "step": 22178 }, { "epoch": 2.25488003253355, "grad_norm": 0.3040478527545929, "learning_rate": 6.694083603650897e-06, "loss": 0.3245, "step": 22179 }, { "epoch": 2.254981699877999, "grad_norm": 0.2882050573825836, "learning_rate": 6.693749704059187e-06, "loss": 0.3576, "step": 22180 }, { "epoch": 2.255083367222448, "grad_norm": 0.2793672978878021, "learning_rate": 6.69341579593477e-06, "loss": 0.3666, "step": 22181 }, { "epoch": 2.255185034566897, "grad_norm": 0.26499420404434204, "learning_rate": 6.693081879279326e-06, "loss": 0.3334, "step": 22182 }, { "epoch": 2.255286701911346, "grad_norm": 0.27698633074760437, "learning_rate": 6.692747954094539e-06, "loss": 0.3176, "step": 22183 }, { "epoch": 2.2553883692557952, "grad_norm": 0.2712647616863251, "learning_rate": 6.692414020382089e-06, "loss": 0.3332, "step": 22184 }, { "epoch": 2.255490036600244, "grad_norm": 0.27687469124794006, "learning_rate": 6.692080078143658e-06, "loss": 0.335, "step": 22185 }, { "epoch": 2.255591703944693, "grad_norm": 0.28201979398727417, "learning_rate": 6.691746127380932e-06, "loss": 0.3449, "step": 22186 }, { "epoch": 2.255693371289142, "grad_norm": 0.27400436997413635, "learning_rate": 6.69141216809559e-06, "loss": 0.334, "step": 22187 }, { "epoch": 2.255795038633591, "grad_norm": 0.2823873460292816, "learning_rate": 6.691078200289316e-06, "loss": 0.3529, "step": 22188 }, { "epoch": 2.25589670597804, "grad_norm": 0.27560386061668396, "learning_rate": 6.690744223963791e-06, "loss": 0.3383, "step": 22189 }, { "epoch": 2.255998373322489, "grad_norm": 0.2943474352359772, "learning_rate": 6.690410239120697e-06, "loss": 0.3464, "step": 22190 }, { "epoch": 2.256100040666938, "grad_norm": 0.2797778844833374, "learning_rate": 6.6900762457617205e-06, "loss": 0.3609, "step": 22191 }, { "epoch": 2.2562017080113868, "grad_norm": 0.262008398771286, "learning_rate": 6.6897422438885415e-06, "loss": 0.3183, "step": 22192 }, { "epoch": 2.2563033753558357, "grad_norm": 0.2702389359474182, "learning_rate": 6.689408233502842e-06, "loss": 0.336, "step": 22193 }, { "epoch": 2.2564050427002846, "grad_norm": 0.2894071638584137, "learning_rate": 6.689074214606306e-06, "loss": 0.3536, "step": 22194 }, { "epoch": 2.2565067100447336, "grad_norm": 0.27356234192848206, "learning_rate": 6.688740187200613e-06, "loss": 0.3102, "step": 22195 }, { "epoch": 2.2566083773891825, "grad_norm": 0.2675958275794983, "learning_rate": 6.68840615128745e-06, "loss": 0.2958, "step": 22196 }, { "epoch": 2.2567100447336315, "grad_norm": 0.2679191529750824, "learning_rate": 6.688072106868498e-06, "loss": 0.318, "step": 22197 }, { "epoch": 2.2568117120780804, "grad_norm": 0.2738729417324066, "learning_rate": 6.68773805394544e-06, "loss": 0.3333, "step": 22198 }, { "epoch": 2.2569133794225293, "grad_norm": 0.2722804546356201, "learning_rate": 6.687403992519959e-06, "loss": 0.3279, "step": 22199 }, { "epoch": 2.2570150467669783, "grad_norm": 0.29632753133773804, "learning_rate": 6.687069922593736e-06, "loss": 0.3573, "step": 22200 }, { "epoch": 2.2571167141114272, "grad_norm": 0.2729772925376892, "learning_rate": 6.686735844168457e-06, "loss": 0.3076, "step": 22201 }, { "epoch": 2.2572183814558766, "grad_norm": 0.2837916910648346, "learning_rate": 6.686401757245804e-06, "loss": 0.3454, "step": 22202 }, { "epoch": 2.2573200488003256, "grad_norm": 0.2827942669391632, "learning_rate": 6.686067661827459e-06, "loss": 0.3063, "step": 22203 }, { "epoch": 2.2574217161447745, "grad_norm": 0.2563241422176361, "learning_rate": 6.685733557915106e-06, "loss": 0.3107, "step": 22204 }, { "epoch": 2.2575233834892234, "grad_norm": 0.2674501836299896, "learning_rate": 6.685399445510428e-06, "loss": 0.309, "step": 22205 }, { "epoch": 2.2576250508336724, "grad_norm": 0.2604645788669586, "learning_rate": 6.6850653246151074e-06, "loss": 0.294, "step": 22206 }, { "epoch": 2.2577267181781213, "grad_norm": 0.25259944796562195, "learning_rate": 6.6847311952308295e-06, "loss": 0.3204, "step": 22207 }, { "epoch": 2.2578283855225703, "grad_norm": 0.2581633925437927, "learning_rate": 6.6843970573592755e-06, "loss": 0.3016, "step": 22208 }, { "epoch": 2.257930052867019, "grad_norm": 0.26397109031677246, "learning_rate": 6.684062911002129e-06, "loss": 0.3315, "step": 22209 }, { "epoch": 2.258031720211468, "grad_norm": 0.25946909189224243, "learning_rate": 6.683728756161074e-06, "loss": 0.3423, "step": 22210 }, { "epoch": 2.258133387555917, "grad_norm": 0.28791114687919617, "learning_rate": 6.683394592837793e-06, "loss": 0.3412, "step": 22211 }, { "epoch": 2.258235054900366, "grad_norm": 0.2834514081478119, "learning_rate": 6.683060421033971e-06, "loss": 0.327, "step": 22212 }, { "epoch": 2.258336722244815, "grad_norm": 0.28375232219696045, "learning_rate": 6.68272624075129e-06, "loss": 0.3526, "step": 22213 }, { "epoch": 2.258438389589264, "grad_norm": 0.27378904819488525, "learning_rate": 6.682392051991436e-06, "loss": 0.3524, "step": 22214 }, { "epoch": 2.258540056933713, "grad_norm": 0.29353243112564087, "learning_rate": 6.682057854756088e-06, "loss": 0.3153, "step": 22215 }, { "epoch": 2.258641724278162, "grad_norm": 0.260127991437912, "learning_rate": 6.681723649046933e-06, "loss": 0.3103, "step": 22216 }, { "epoch": 2.2587433916226107, "grad_norm": 0.2651733160018921, "learning_rate": 6.681389434865655e-06, "loss": 0.3036, "step": 22217 }, { "epoch": 2.2588450589670597, "grad_norm": 0.2663267254829407, "learning_rate": 6.681055212213935e-06, "loss": 0.368, "step": 22218 }, { "epoch": 2.2589467263115086, "grad_norm": 0.26539376378059387, "learning_rate": 6.68072098109346e-06, "loss": 0.3512, "step": 22219 }, { "epoch": 2.2590483936559576, "grad_norm": 0.2728043496608734, "learning_rate": 6.680386741505911e-06, "loss": 0.3495, "step": 22220 }, { "epoch": 2.2591500610004065, "grad_norm": 0.2986866235733032, "learning_rate": 6.680052493452972e-06, "loss": 0.3727, "step": 22221 }, { "epoch": 2.2592517283448554, "grad_norm": 0.2696594297885895, "learning_rate": 6.67971823693633e-06, "loss": 0.3004, "step": 22222 }, { "epoch": 2.2593533956893044, "grad_norm": 0.2660442590713501, "learning_rate": 6.679383971957665e-06, "loss": 0.3378, "step": 22223 }, { "epoch": 2.2594550630337533, "grad_norm": 0.273914635181427, "learning_rate": 6.679049698518664e-06, "loss": 0.3198, "step": 22224 }, { "epoch": 2.2595567303782027, "grad_norm": 0.268004447221756, "learning_rate": 6.678715416621007e-06, "loss": 0.3345, "step": 22225 }, { "epoch": 2.2596583977226516, "grad_norm": 0.2628301680088043, "learning_rate": 6.678381126266383e-06, "loss": 0.3339, "step": 22226 }, { "epoch": 2.2597600650671006, "grad_norm": 0.2558860778808594, "learning_rate": 6.678046827456474e-06, "loss": 0.3421, "step": 22227 }, { "epoch": 2.2598617324115495, "grad_norm": 0.28041619062423706, "learning_rate": 6.677712520192963e-06, "loss": 0.3364, "step": 22228 }, { "epoch": 2.2599633997559985, "grad_norm": 0.289174884557724, "learning_rate": 6.677378204477535e-06, "loss": 0.3712, "step": 22229 }, { "epoch": 2.2600650671004474, "grad_norm": 0.2721618711948395, "learning_rate": 6.677043880311874e-06, "loss": 0.3612, "step": 22230 }, { "epoch": 2.2601667344448964, "grad_norm": 0.28790783882141113, "learning_rate": 6.676709547697664e-06, "loss": 0.3191, "step": 22231 }, { "epoch": 2.2602684017893453, "grad_norm": 0.28427329659461975, "learning_rate": 6.676375206636592e-06, "loss": 0.3287, "step": 22232 }, { "epoch": 2.2603700691337942, "grad_norm": 0.27676236629486084, "learning_rate": 6.676040857130337e-06, "loss": 0.3152, "step": 22233 }, { "epoch": 2.260471736478243, "grad_norm": 0.2855077087879181, "learning_rate": 6.675706499180589e-06, "loss": 0.332, "step": 22234 }, { "epoch": 2.260573403822692, "grad_norm": 0.2789171040058136, "learning_rate": 6.67537213278903e-06, "loss": 0.3272, "step": 22235 }, { "epoch": 2.260675071167141, "grad_norm": 0.2961440980434418, "learning_rate": 6.675037757957341e-06, "loss": 0.2912, "step": 22236 }, { "epoch": 2.26077673851159, "grad_norm": 0.2987298369407654, "learning_rate": 6.674703374687213e-06, "loss": 0.3382, "step": 22237 }, { "epoch": 2.260878405856039, "grad_norm": 0.28327038884162903, "learning_rate": 6.674368982980326e-06, "loss": 0.3195, "step": 22238 }, { "epoch": 2.260980073200488, "grad_norm": 0.2715336084365845, "learning_rate": 6.674034582838367e-06, "loss": 0.3289, "step": 22239 }, { "epoch": 2.261081740544937, "grad_norm": 0.27860409021377563, "learning_rate": 6.673700174263019e-06, "loss": 0.3219, "step": 22240 }, { "epoch": 2.2611834078893858, "grad_norm": 0.2645847201347351, "learning_rate": 6.673365757255966e-06, "loss": 0.3501, "step": 22241 }, { "epoch": 2.2612850752338347, "grad_norm": 0.3161609172821045, "learning_rate": 6.673031331818896e-06, "loss": 0.3756, "step": 22242 }, { "epoch": 2.261386742578284, "grad_norm": 0.2795141637325287, "learning_rate": 6.67269689795349e-06, "loss": 0.3111, "step": 22243 }, { "epoch": 2.261488409922733, "grad_norm": 0.2830479145050049, "learning_rate": 6.672362455661437e-06, "loss": 0.3504, "step": 22244 }, { "epoch": 2.261590077267182, "grad_norm": 0.2602842152118683, "learning_rate": 6.672028004944417e-06, "loss": 0.3615, "step": 22245 }, { "epoch": 2.261691744611631, "grad_norm": 0.28956133127212524, "learning_rate": 6.671693545804118e-06, "loss": 0.3591, "step": 22246 }, { "epoch": 2.26179341195608, "grad_norm": 0.29418110847473145, "learning_rate": 6.6713590782422245e-06, "loss": 0.338, "step": 22247 }, { "epoch": 2.261895079300529, "grad_norm": 0.2784655690193176, "learning_rate": 6.67102460226042e-06, "loss": 0.3379, "step": 22248 }, { "epoch": 2.2619967466449777, "grad_norm": 0.2640987038612366, "learning_rate": 6.670690117860392e-06, "loss": 0.34, "step": 22249 }, { "epoch": 2.2620984139894267, "grad_norm": 0.2569500803947449, "learning_rate": 6.6703556250438255e-06, "loss": 0.3306, "step": 22250 }, { "epoch": 2.2622000813338756, "grad_norm": 0.28507882356643677, "learning_rate": 6.6700211238124005e-06, "loss": 0.3511, "step": 22251 }, { "epoch": 2.2623017486783246, "grad_norm": 0.2739182114601135, "learning_rate": 6.669686614167807e-06, "loss": 0.3419, "step": 22252 }, { "epoch": 2.2624034160227735, "grad_norm": 0.27632513642311096, "learning_rate": 6.669352096111732e-06, "loss": 0.3145, "step": 22253 }, { "epoch": 2.2625050833672224, "grad_norm": 0.25085312128067017, "learning_rate": 6.669017569645855e-06, "loss": 0.3343, "step": 22254 }, { "epoch": 2.2626067507116714, "grad_norm": 0.2604244649410248, "learning_rate": 6.668683034771866e-06, "loss": 0.3225, "step": 22255 }, { "epoch": 2.2627084180561203, "grad_norm": 0.2737564742565155, "learning_rate": 6.668348491491445e-06, "loss": 0.318, "step": 22256 }, { "epoch": 2.2628100854005693, "grad_norm": 0.2770301401615143, "learning_rate": 6.668013939806284e-06, "loss": 0.3176, "step": 22257 }, { "epoch": 2.262911752745018, "grad_norm": 0.25508633255958557, "learning_rate": 6.667679379718065e-06, "loss": 0.3449, "step": 22258 }, { "epoch": 2.263013420089467, "grad_norm": 0.26630914211273193, "learning_rate": 6.6673448112284724e-06, "loss": 0.3407, "step": 22259 }, { "epoch": 2.263115087433916, "grad_norm": 0.2674797475337982, "learning_rate": 6.667010234339194e-06, "loss": 0.3174, "step": 22260 }, { "epoch": 2.263216754778365, "grad_norm": 0.26353901624679565, "learning_rate": 6.666675649051912e-06, "loss": 0.3441, "step": 22261 }, { "epoch": 2.263318422122814, "grad_norm": 0.26478755474090576, "learning_rate": 6.666341055368316e-06, "loss": 0.3395, "step": 22262 }, { "epoch": 2.263420089467263, "grad_norm": 0.2817961573600769, "learning_rate": 6.66600645329009e-06, "loss": 0.3565, "step": 22263 }, { "epoch": 2.263521756811712, "grad_norm": 0.2749713659286499, "learning_rate": 6.66567184281892e-06, "loss": 0.3353, "step": 22264 }, { "epoch": 2.263623424156161, "grad_norm": 0.27278465032577515, "learning_rate": 6.6653372239564895e-06, "loss": 0.3281, "step": 22265 }, { "epoch": 2.26372509150061, "grad_norm": 0.25865045189857483, "learning_rate": 6.665002596704484e-06, "loss": 0.3416, "step": 22266 }, { "epoch": 2.263826758845059, "grad_norm": 0.2752017080783844, "learning_rate": 6.664667961064595e-06, "loss": 0.3504, "step": 22267 }, { "epoch": 2.263928426189508, "grad_norm": 0.26826396584510803, "learning_rate": 6.664333317038503e-06, "loss": 0.3353, "step": 22268 }, { "epoch": 2.264030093533957, "grad_norm": 0.2780221104621887, "learning_rate": 6.663998664627894e-06, "loss": 0.3166, "step": 22269 }, { "epoch": 2.264131760878406, "grad_norm": 0.28704753518104553, "learning_rate": 6.663664003834457e-06, "loss": 0.3418, "step": 22270 }, { "epoch": 2.264233428222855, "grad_norm": 0.2957645654678345, "learning_rate": 6.663329334659875e-06, "loss": 0.3408, "step": 22271 }, { "epoch": 2.264335095567304, "grad_norm": 0.27894991636276245, "learning_rate": 6.662994657105834e-06, "loss": 0.318, "step": 22272 }, { "epoch": 2.2644367629117528, "grad_norm": 0.2841476500034332, "learning_rate": 6.662659971174023e-06, "loss": 0.3139, "step": 22273 }, { "epoch": 2.2645384302562017, "grad_norm": 0.2568395733833313, "learning_rate": 6.662325276866126e-06, "loss": 0.3539, "step": 22274 }, { "epoch": 2.2646400976006507, "grad_norm": 0.2739400267601013, "learning_rate": 6.66199057418383e-06, "loss": 0.3328, "step": 22275 }, { "epoch": 2.2647417649450996, "grad_norm": 0.27831709384918213, "learning_rate": 6.661655863128817e-06, "loss": 0.319, "step": 22276 }, { "epoch": 2.2648434322895485, "grad_norm": 0.24458734691143036, "learning_rate": 6.661321143702779e-06, "loss": 0.3389, "step": 22277 }, { "epoch": 2.2649450996339975, "grad_norm": 0.27316197752952576, "learning_rate": 6.660986415907401e-06, "loss": 0.3233, "step": 22278 }, { "epoch": 2.2650467669784464, "grad_norm": 0.2743206322193146, "learning_rate": 6.660651679744367e-06, "loss": 0.3305, "step": 22279 }, { "epoch": 2.2651484343228954, "grad_norm": 0.2847524583339691, "learning_rate": 6.660316935215364e-06, "loss": 0.3381, "step": 22280 }, { "epoch": 2.2652501016673443, "grad_norm": 0.2567531168460846, "learning_rate": 6.659982182322079e-06, "loss": 0.3376, "step": 22281 }, { "epoch": 2.2653517690117932, "grad_norm": 0.28616178035736084, "learning_rate": 6.659647421066198e-06, "loss": 0.3448, "step": 22282 }, { "epoch": 2.265453436356242, "grad_norm": 0.2708198130130768, "learning_rate": 6.659312651449409e-06, "loss": 0.34, "step": 22283 }, { "epoch": 2.2655551037006916, "grad_norm": 0.2624492347240448, "learning_rate": 6.6589778734733955e-06, "loss": 0.3169, "step": 22284 }, { "epoch": 2.2656567710451405, "grad_norm": 0.23854945600032806, "learning_rate": 6.658643087139847e-06, "loss": 0.3411, "step": 22285 }, { "epoch": 2.2657584383895895, "grad_norm": 0.26421093940734863, "learning_rate": 6.658308292450448e-06, "loss": 0.3267, "step": 22286 }, { "epoch": 2.2658601057340384, "grad_norm": 0.26813945174217224, "learning_rate": 6.657973489406886e-06, "loss": 0.3441, "step": 22287 }, { "epoch": 2.2659617730784873, "grad_norm": 0.28600749373435974, "learning_rate": 6.657638678010848e-06, "loss": 0.3634, "step": 22288 }, { "epoch": 2.2660634404229363, "grad_norm": 0.25450634956359863, "learning_rate": 6.657303858264019e-06, "loss": 0.3202, "step": 22289 }, { "epoch": 2.2661651077673852, "grad_norm": 0.2745002210140228, "learning_rate": 6.65696903016809e-06, "loss": 0.3378, "step": 22290 }, { "epoch": 2.266266775111834, "grad_norm": 0.2723734378814697, "learning_rate": 6.656634193724742e-06, "loss": 0.3226, "step": 22291 }, { "epoch": 2.266368442456283, "grad_norm": 0.28046613931655884, "learning_rate": 6.656299348935664e-06, "loss": 0.3357, "step": 22292 }, { "epoch": 2.266470109800732, "grad_norm": 0.26479411125183105, "learning_rate": 6.655964495802546e-06, "loss": 0.3367, "step": 22293 }, { "epoch": 2.266571777145181, "grad_norm": 0.27555200457572937, "learning_rate": 6.655629634327071e-06, "loss": 0.351, "step": 22294 }, { "epoch": 2.26667344448963, "grad_norm": 0.2731706500053406, "learning_rate": 6.655294764510929e-06, "loss": 0.3296, "step": 22295 }, { "epoch": 2.266775111834079, "grad_norm": 0.23375044763088226, "learning_rate": 6.654959886355803e-06, "loss": 0.3405, "step": 22296 }, { "epoch": 2.266876779178528, "grad_norm": 0.2915167212486267, "learning_rate": 6.654624999863383e-06, "loss": 0.3553, "step": 22297 }, { "epoch": 2.2669784465229768, "grad_norm": 0.2827353775501251, "learning_rate": 6.654290105035356e-06, "loss": 0.3621, "step": 22298 }, { "epoch": 2.2670801138674257, "grad_norm": 0.2799430191516876, "learning_rate": 6.653955201873408e-06, "loss": 0.3446, "step": 22299 }, { "epoch": 2.2671817812118746, "grad_norm": 0.2624984085559845, "learning_rate": 6.653620290379228e-06, "loss": 0.3339, "step": 22300 }, { "epoch": 2.2672834485563236, "grad_norm": 0.2561343014240265, "learning_rate": 6.653285370554501e-06, "loss": 0.33, "step": 22301 }, { "epoch": 2.2673851159007725, "grad_norm": 0.26630184054374695, "learning_rate": 6.652950442400914e-06, "loss": 0.3705, "step": 22302 }, { "epoch": 2.2674867832452215, "grad_norm": 0.26179239153862, "learning_rate": 6.652615505920157e-06, "loss": 0.3221, "step": 22303 }, { "epoch": 2.2675884505896704, "grad_norm": 0.2742201089859009, "learning_rate": 6.652280561113915e-06, "loss": 0.3599, "step": 22304 }, { "epoch": 2.2676901179341193, "grad_norm": 0.2781257629394531, "learning_rate": 6.651945607983878e-06, "loss": 0.3272, "step": 22305 }, { "epoch": 2.2677917852785683, "grad_norm": 0.28274664282798767, "learning_rate": 6.65161064653173e-06, "loss": 0.3029, "step": 22306 }, { "epoch": 2.2678934526230177, "grad_norm": 0.298623263835907, "learning_rate": 6.651275676759159e-06, "loss": 0.3537, "step": 22307 }, { "epoch": 2.2679951199674666, "grad_norm": 0.2975842356681824, "learning_rate": 6.650940698667856e-06, "loss": 0.354, "step": 22308 }, { "epoch": 2.2680967873119156, "grad_norm": 0.2782003879547119, "learning_rate": 6.650605712259504e-06, "loss": 0.295, "step": 22309 }, { "epoch": 2.2681984546563645, "grad_norm": 0.2839276194572449, "learning_rate": 6.650270717535794e-06, "loss": 0.3301, "step": 22310 }, { "epoch": 2.2683001220008134, "grad_norm": 0.26817288994789124, "learning_rate": 6.649935714498412e-06, "loss": 0.3176, "step": 22311 }, { "epoch": 2.2684017893452624, "grad_norm": 0.27634745836257935, "learning_rate": 6.649600703149045e-06, "loss": 0.3367, "step": 22312 }, { "epoch": 2.2685034566897113, "grad_norm": 0.2508288621902466, "learning_rate": 6.649265683489383e-06, "loss": 0.3425, "step": 22313 }, { "epoch": 2.2686051240341603, "grad_norm": 0.2715594470500946, "learning_rate": 6.648930655521112e-06, "loss": 0.3754, "step": 22314 }, { "epoch": 2.268706791378609, "grad_norm": 0.27537116408348083, "learning_rate": 6.64859561924592e-06, "loss": 0.3423, "step": 22315 }, { "epoch": 2.268808458723058, "grad_norm": 0.2624955475330353, "learning_rate": 6.648260574665496e-06, "loss": 0.3306, "step": 22316 }, { "epoch": 2.268910126067507, "grad_norm": 0.26783275604248047, "learning_rate": 6.647925521781524e-06, "loss": 0.3087, "step": 22317 }, { "epoch": 2.269011793411956, "grad_norm": 0.2817271649837494, "learning_rate": 6.647590460595699e-06, "loss": 0.3349, "step": 22318 }, { "epoch": 2.269113460756405, "grad_norm": 0.28392642736434937, "learning_rate": 6.647255391109701e-06, "loss": 0.38, "step": 22319 }, { "epoch": 2.269215128100854, "grad_norm": 0.26139819622039795, "learning_rate": 6.646920313325224e-06, "loss": 0.3277, "step": 22320 }, { "epoch": 2.269316795445303, "grad_norm": 0.2875094413757324, "learning_rate": 6.646585227243955e-06, "loss": 0.3464, "step": 22321 }, { "epoch": 2.269418462789752, "grad_norm": 0.27584952116012573, "learning_rate": 6.646250132867576e-06, "loss": 0.3414, "step": 22322 }, { "epoch": 2.2695201301342007, "grad_norm": 0.25757917761802673, "learning_rate": 6.645915030197785e-06, "loss": 0.3178, "step": 22323 }, { "epoch": 2.2696217974786497, "grad_norm": 0.2783837616443634, "learning_rate": 6.645579919236262e-06, "loss": 0.3274, "step": 22324 }, { "epoch": 2.269723464823099, "grad_norm": 0.2589520812034607, "learning_rate": 6.645244799984701e-06, "loss": 0.3634, "step": 22325 }, { "epoch": 2.269825132167548, "grad_norm": 0.2577745318412781, "learning_rate": 6.644909672444787e-06, "loss": 0.3297, "step": 22326 }, { "epoch": 2.269926799511997, "grad_norm": 0.2744342088699341, "learning_rate": 6.644574536618207e-06, "loss": 0.315, "step": 22327 }, { "epoch": 2.270028466856446, "grad_norm": 0.29894694685935974, "learning_rate": 6.644239392506654e-06, "loss": 0.3028, "step": 22328 }, { "epoch": 2.270130134200895, "grad_norm": 0.27748140692710876, "learning_rate": 6.643904240111813e-06, "loss": 0.3158, "step": 22329 }, { "epoch": 2.2702318015453438, "grad_norm": 0.29647380113601685, "learning_rate": 6.643569079435373e-06, "loss": 0.333, "step": 22330 }, { "epoch": 2.2703334688897927, "grad_norm": 0.298128604888916, "learning_rate": 6.6432339104790214e-06, "loss": 0.3745, "step": 22331 }, { "epoch": 2.2704351362342416, "grad_norm": 0.26936057209968567, "learning_rate": 6.642898733244447e-06, "loss": 0.3273, "step": 22332 }, { "epoch": 2.2705368035786906, "grad_norm": 0.2544366419315338, "learning_rate": 6.642563547733341e-06, "loss": 0.3766, "step": 22333 }, { "epoch": 2.2706384709231395, "grad_norm": 0.2710229158401489, "learning_rate": 6.642228353947391e-06, "loss": 0.3364, "step": 22334 }, { "epoch": 2.2707401382675885, "grad_norm": 0.2773207426071167, "learning_rate": 6.641893151888284e-06, "loss": 0.3147, "step": 22335 }, { "epoch": 2.2708418056120374, "grad_norm": 0.24972324073314667, "learning_rate": 6.641557941557709e-06, "loss": 0.3298, "step": 22336 }, { "epoch": 2.2709434729564864, "grad_norm": 0.2615332007408142, "learning_rate": 6.641222722957355e-06, "loss": 0.3348, "step": 22337 }, { "epoch": 2.2710451403009353, "grad_norm": 0.27270159125328064, "learning_rate": 6.640887496088911e-06, "loss": 0.3561, "step": 22338 }, { "epoch": 2.2711468076453842, "grad_norm": 0.2948532998561859, "learning_rate": 6.640552260954067e-06, "loss": 0.3391, "step": 22339 }, { "epoch": 2.271248474989833, "grad_norm": 0.26019105315208435, "learning_rate": 6.640217017554509e-06, "loss": 0.2938, "step": 22340 }, { "epoch": 2.271350142334282, "grad_norm": 0.2612003684043884, "learning_rate": 6.639881765891929e-06, "loss": 0.3067, "step": 22341 }, { "epoch": 2.271451809678731, "grad_norm": 0.2853916883468628, "learning_rate": 6.639546505968011e-06, "loss": 0.3458, "step": 22342 }, { "epoch": 2.27155347702318, "grad_norm": 0.28796643018722534, "learning_rate": 6.639211237784451e-06, "loss": 0.354, "step": 22343 }, { "epoch": 2.271655144367629, "grad_norm": 0.27860376238822937, "learning_rate": 6.638875961342934e-06, "loss": 0.3084, "step": 22344 }, { "epoch": 2.271756811712078, "grad_norm": 0.2792622745037079, "learning_rate": 6.6385406766451475e-06, "loss": 0.3389, "step": 22345 }, { "epoch": 2.271858479056527, "grad_norm": 0.25216805934906006, "learning_rate": 6.638205383692784e-06, "loss": 0.3041, "step": 22346 }, { "epoch": 2.2719601464009758, "grad_norm": 0.28077012300491333, "learning_rate": 6.637870082487529e-06, "loss": 0.3081, "step": 22347 }, { "epoch": 2.272061813745425, "grad_norm": 0.27995172142982483, "learning_rate": 6.637534773031075e-06, "loss": 0.3285, "step": 22348 }, { "epoch": 2.272163481089874, "grad_norm": 0.285706490278244, "learning_rate": 6.6371994553251116e-06, "loss": 0.3468, "step": 22349 }, { "epoch": 2.272265148434323, "grad_norm": 0.25641152262687683, "learning_rate": 6.6368641293713246e-06, "loss": 0.3272, "step": 22350 }, { "epoch": 2.272366815778772, "grad_norm": 0.2694236934185028, "learning_rate": 6.636528795171407e-06, "loss": 0.3301, "step": 22351 }, { "epoch": 2.272468483123221, "grad_norm": 0.280543714761734, "learning_rate": 6.636193452727044e-06, "loss": 0.338, "step": 22352 }, { "epoch": 2.27257015046767, "grad_norm": 0.26025310158729553, "learning_rate": 6.635858102039926e-06, "loss": 0.3102, "step": 22353 }, { "epoch": 2.272671817812119, "grad_norm": 0.2524573504924774, "learning_rate": 6.635522743111747e-06, "loss": 0.3143, "step": 22354 }, { "epoch": 2.2727734851565677, "grad_norm": 0.3088201582431793, "learning_rate": 6.635187375944191e-06, "loss": 0.3748, "step": 22355 }, { "epoch": 2.2728751525010167, "grad_norm": 0.256947785615921, "learning_rate": 6.63485200053895e-06, "loss": 0.3358, "step": 22356 }, { "epoch": 2.2729768198454656, "grad_norm": 0.27571457624435425, "learning_rate": 6.6345166168977125e-06, "loss": 0.3261, "step": 22357 }, { "epoch": 2.2730784871899146, "grad_norm": 0.2750506103038788, "learning_rate": 6.634181225022169e-06, "loss": 0.3149, "step": 22358 }, { "epoch": 2.2731801545343635, "grad_norm": 0.2811477780342102, "learning_rate": 6.6338458249140105e-06, "loss": 0.3723, "step": 22359 }, { "epoch": 2.2732818218788124, "grad_norm": 0.2693103551864624, "learning_rate": 6.633510416574923e-06, "loss": 0.3325, "step": 22360 }, { "epoch": 2.2733834892232614, "grad_norm": 0.2686522901058197, "learning_rate": 6.6331750000065995e-06, "loss": 0.3449, "step": 22361 }, { "epoch": 2.2734851565677103, "grad_norm": 0.25720328092575073, "learning_rate": 6.632839575210728e-06, "loss": 0.3238, "step": 22362 }, { "epoch": 2.2735868239121593, "grad_norm": 0.27634069323539734, "learning_rate": 6.632504142188998e-06, "loss": 0.3222, "step": 22363 }, { "epoch": 2.273688491256608, "grad_norm": 0.2739117741584778, "learning_rate": 6.632168700943101e-06, "loss": 0.3599, "step": 22364 }, { "epoch": 2.273790158601057, "grad_norm": 0.26977086067199707, "learning_rate": 6.631833251474727e-06, "loss": 0.3092, "step": 22365 }, { "epoch": 2.2738918259455065, "grad_norm": 0.27554619312286377, "learning_rate": 6.631497793785563e-06, "loss": 0.3349, "step": 22366 }, { "epoch": 2.2739934932899555, "grad_norm": 0.29066866636276245, "learning_rate": 6.6311623278773e-06, "loss": 0.3142, "step": 22367 }, { "epoch": 2.2740951606344044, "grad_norm": 0.2832354009151459, "learning_rate": 6.630826853751631e-06, "loss": 0.3328, "step": 22368 }, { "epoch": 2.2741968279788534, "grad_norm": 0.2969520688056946, "learning_rate": 6.630491371410244e-06, "loss": 0.3293, "step": 22369 }, { "epoch": 2.2742984953233023, "grad_norm": 0.29968371987342834, "learning_rate": 6.630155880854827e-06, "loss": 0.3405, "step": 22370 }, { "epoch": 2.2744001626677512, "grad_norm": 0.28937286138534546, "learning_rate": 6.629820382087073e-06, "loss": 0.3431, "step": 22371 }, { "epoch": 2.2745018300122, "grad_norm": 0.2868953347206116, "learning_rate": 6.629484875108672e-06, "loss": 0.3308, "step": 22372 }, { "epoch": 2.274603497356649, "grad_norm": 0.2955876588821411, "learning_rate": 6.629149359921312e-06, "loss": 0.3517, "step": 22373 }, { "epoch": 2.274705164701098, "grad_norm": 0.27398481965065, "learning_rate": 6.6288138365266865e-06, "loss": 0.3378, "step": 22374 }, { "epoch": 2.274806832045547, "grad_norm": 0.26274555921554565, "learning_rate": 6.628478304926482e-06, "loss": 0.3178, "step": 22375 }, { "epoch": 2.274908499389996, "grad_norm": 0.26438742876052856, "learning_rate": 6.628142765122392e-06, "loss": 0.3458, "step": 22376 }, { "epoch": 2.275010166734445, "grad_norm": 0.2895812392234802, "learning_rate": 6.627807217116104e-06, "loss": 0.3255, "step": 22377 }, { "epoch": 2.275111834078894, "grad_norm": 0.2615852653980255, "learning_rate": 6.627471660909312e-06, "loss": 0.3227, "step": 22378 }, { "epoch": 2.2752135014233428, "grad_norm": 0.252156525850296, "learning_rate": 6.6271360965037035e-06, "loss": 0.3337, "step": 22379 }, { "epoch": 2.2753151687677917, "grad_norm": 0.2744598686695099, "learning_rate": 6.6268005239009704e-06, "loss": 0.3392, "step": 22380 }, { "epoch": 2.2754168361122407, "grad_norm": 0.2869609594345093, "learning_rate": 6.626464943102804e-06, "loss": 0.3543, "step": 22381 }, { "epoch": 2.2755185034566896, "grad_norm": 0.2742932140827179, "learning_rate": 6.626129354110891e-06, "loss": 0.3223, "step": 22382 }, { "epoch": 2.2756201708011385, "grad_norm": 0.26560643315315247, "learning_rate": 6.625793756926926e-06, "loss": 0.3462, "step": 22383 }, { "epoch": 2.2757218381455875, "grad_norm": 0.2677633762359619, "learning_rate": 6.6254581515526e-06, "loss": 0.3337, "step": 22384 }, { "epoch": 2.2758235054900364, "grad_norm": 0.2610286772251129, "learning_rate": 6.6251225379896e-06, "loss": 0.3341, "step": 22385 }, { "epoch": 2.2759251728344854, "grad_norm": 0.2775194048881531, "learning_rate": 6.624786916239621e-06, "loss": 0.3383, "step": 22386 }, { "epoch": 2.2760268401789343, "grad_norm": 0.2700900137424469, "learning_rate": 6.6244512863043495e-06, "loss": 0.3181, "step": 22387 }, { "epoch": 2.2761285075233832, "grad_norm": 0.26177138090133667, "learning_rate": 6.624115648185479e-06, "loss": 0.3242, "step": 22388 }, { "epoch": 2.2762301748678326, "grad_norm": 0.28066715598106384, "learning_rate": 6.6237800018846994e-06, "loss": 0.3336, "step": 22389 }, { "epoch": 2.2763318422122816, "grad_norm": 0.2766965329647064, "learning_rate": 6.623444347403703e-06, "loss": 0.3546, "step": 22390 }, { "epoch": 2.2764335095567305, "grad_norm": 0.29279735684394836, "learning_rate": 6.623108684744179e-06, "loss": 0.3232, "step": 22391 }, { "epoch": 2.2765351769011795, "grad_norm": 0.2715385854244232, "learning_rate": 6.622773013907819e-06, "loss": 0.3307, "step": 22392 }, { "epoch": 2.2766368442456284, "grad_norm": 0.2657240927219391, "learning_rate": 6.6224373348963155e-06, "loss": 0.3299, "step": 22393 }, { "epoch": 2.2767385115900773, "grad_norm": 0.2963373064994812, "learning_rate": 6.622101647711357e-06, "loss": 0.3275, "step": 22394 }, { "epoch": 2.2768401789345263, "grad_norm": 0.2736072838306427, "learning_rate": 6.6217659523546375e-06, "loss": 0.348, "step": 22395 }, { "epoch": 2.2769418462789752, "grad_norm": 0.2792106568813324, "learning_rate": 6.6214302488278445e-06, "loss": 0.3258, "step": 22396 }, { "epoch": 2.277043513623424, "grad_norm": 0.28270846605300903, "learning_rate": 6.621094537132673e-06, "loss": 0.3414, "step": 22397 }, { "epoch": 2.277145180967873, "grad_norm": 0.2856012284755707, "learning_rate": 6.62075881727081e-06, "loss": 0.3338, "step": 22398 }, { "epoch": 2.277246848312322, "grad_norm": 0.2788102626800537, "learning_rate": 6.620423089243953e-06, "loss": 0.3601, "step": 22399 }, { "epoch": 2.277348515656771, "grad_norm": 0.28355661034584045, "learning_rate": 6.6200873530537854e-06, "loss": 0.3364, "step": 22400 }, { "epoch": 2.27745018300122, "grad_norm": 0.2728498876094818, "learning_rate": 6.619751608702005e-06, "loss": 0.3105, "step": 22401 }, { "epoch": 2.277551850345669, "grad_norm": 0.2827141284942627, "learning_rate": 6.619415856190301e-06, "loss": 0.339, "step": 22402 }, { "epoch": 2.277653517690118, "grad_norm": 0.27481645345687866, "learning_rate": 6.619080095520364e-06, "loss": 0.3252, "step": 22403 }, { "epoch": 2.2777551850345668, "grad_norm": 0.2669721841812134, "learning_rate": 6.618744326693887e-06, "loss": 0.3405, "step": 22404 }, { "epoch": 2.2778568523790157, "grad_norm": 0.265983521938324, "learning_rate": 6.61840854971256e-06, "loss": 0.3162, "step": 22405 }, { "epoch": 2.2779585197234646, "grad_norm": 0.27341774106025696, "learning_rate": 6.618072764578076e-06, "loss": 0.3499, "step": 22406 }, { "epoch": 2.278060187067914, "grad_norm": 0.27569901943206787, "learning_rate": 6.6177369712921244e-06, "loss": 0.3126, "step": 22407 }, { "epoch": 2.278161854412363, "grad_norm": 0.28864774107933044, "learning_rate": 6.6174011698564e-06, "loss": 0.3336, "step": 22408 }, { "epoch": 2.278263521756812, "grad_norm": 0.2899038791656494, "learning_rate": 6.617065360272592e-06, "loss": 0.3179, "step": 22409 }, { "epoch": 2.278365189101261, "grad_norm": 0.2615843713283539, "learning_rate": 6.616729542542395e-06, "loss": 0.3221, "step": 22410 }, { "epoch": 2.27846685644571, "grad_norm": 0.30374428629875183, "learning_rate": 6.616393716667496e-06, "loss": 0.3391, "step": 22411 }, { "epoch": 2.2785685237901587, "grad_norm": 0.26177021861076355, "learning_rate": 6.616057882649592e-06, "loss": 0.3133, "step": 22412 }, { "epoch": 2.2786701911346077, "grad_norm": 0.2647416889667511, "learning_rate": 6.615722040490369e-06, "loss": 0.3383, "step": 22413 }, { "epoch": 2.2787718584790566, "grad_norm": 0.2614296078681946, "learning_rate": 6.6153861901915255e-06, "loss": 0.3358, "step": 22414 }, { "epoch": 2.2788735258235056, "grad_norm": 0.29207172989845276, "learning_rate": 6.6150503317547495e-06, "loss": 0.3183, "step": 22415 }, { "epoch": 2.2789751931679545, "grad_norm": 0.2735096216201782, "learning_rate": 6.614714465181733e-06, "loss": 0.3244, "step": 22416 }, { "epoch": 2.2790768605124034, "grad_norm": 0.265523225069046, "learning_rate": 6.61437859047417e-06, "loss": 0.3497, "step": 22417 }, { "epoch": 2.2791785278568524, "grad_norm": 0.2696317136287689, "learning_rate": 6.614042707633749e-06, "loss": 0.3366, "step": 22418 }, { "epoch": 2.2792801952013013, "grad_norm": 0.2930479645729065, "learning_rate": 6.613706816662166e-06, "loss": 0.3447, "step": 22419 }, { "epoch": 2.2793818625457503, "grad_norm": 0.27350783348083496, "learning_rate": 6.613370917561111e-06, "loss": 0.3185, "step": 22420 }, { "epoch": 2.279483529890199, "grad_norm": 0.2818070948123932, "learning_rate": 6.613035010332278e-06, "loss": 0.3292, "step": 22421 }, { "epoch": 2.279585197234648, "grad_norm": 0.2592756450176239, "learning_rate": 6.612699094977357e-06, "loss": 0.3659, "step": 22422 }, { "epoch": 2.279686864579097, "grad_norm": 0.27442124485969543, "learning_rate": 6.612363171498041e-06, "loss": 0.3128, "step": 22423 }, { "epoch": 2.279788531923546, "grad_norm": 0.2706603705883026, "learning_rate": 6.612027239896022e-06, "loss": 0.3225, "step": 22424 }, { "epoch": 2.279890199267995, "grad_norm": 0.2589648962020874, "learning_rate": 6.6116913001729946e-06, "loss": 0.3372, "step": 22425 }, { "epoch": 2.279991866612444, "grad_norm": 0.2832757532596588, "learning_rate": 6.6113553523306475e-06, "loss": 0.3468, "step": 22426 }, { "epoch": 2.280093533956893, "grad_norm": 0.2853237986564636, "learning_rate": 6.611019396370677e-06, "loss": 0.3321, "step": 22427 }, { "epoch": 2.280195201301342, "grad_norm": 0.2646644413471222, "learning_rate": 6.610683432294772e-06, "loss": 0.3309, "step": 22428 }, { "epoch": 2.2802968686457907, "grad_norm": 0.27001190185546875, "learning_rate": 6.610347460104627e-06, "loss": 0.318, "step": 22429 }, { "epoch": 2.28039853599024, "grad_norm": 0.27987203001976013, "learning_rate": 6.6100114798019344e-06, "loss": 0.3251, "step": 22430 }, { "epoch": 2.280500203334689, "grad_norm": 0.3051806688308716, "learning_rate": 6.609675491388386e-06, "loss": 0.3215, "step": 22431 }, { "epoch": 2.280601870679138, "grad_norm": 0.26876771450042725, "learning_rate": 6.609339494865676e-06, "loss": 0.353, "step": 22432 }, { "epoch": 2.280703538023587, "grad_norm": 0.30018746852874756, "learning_rate": 6.609003490235495e-06, "loss": 0.351, "step": 22433 }, { "epoch": 2.280805205368036, "grad_norm": 0.26966071128845215, "learning_rate": 6.608667477499537e-06, "loss": 0.3269, "step": 22434 }, { "epoch": 2.280906872712485, "grad_norm": 0.27129021286964417, "learning_rate": 6.608331456659496e-06, "loss": 0.3315, "step": 22435 }, { "epoch": 2.2810085400569338, "grad_norm": 0.29274749755859375, "learning_rate": 6.607995427717061e-06, "loss": 0.3191, "step": 22436 }, { "epoch": 2.2811102074013827, "grad_norm": 0.2851417362689972, "learning_rate": 6.60765939067393e-06, "loss": 0.3566, "step": 22437 }, { "epoch": 2.2812118747458316, "grad_norm": 0.28758862614631653, "learning_rate": 6.607323345531791e-06, "loss": 0.3329, "step": 22438 }, { "epoch": 2.2813135420902806, "grad_norm": 0.2537180483341217, "learning_rate": 6.606987292292339e-06, "loss": 0.3543, "step": 22439 }, { "epoch": 2.2814152094347295, "grad_norm": 0.27773749828338623, "learning_rate": 6.606651230957268e-06, "loss": 0.3316, "step": 22440 }, { "epoch": 2.2815168767791785, "grad_norm": 0.30182915925979614, "learning_rate": 6.606315161528267e-06, "loss": 0.3351, "step": 22441 }, { "epoch": 2.2816185441236274, "grad_norm": 0.2617759704589844, "learning_rate": 6.6059790840070345e-06, "loss": 0.3149, "step": 22442 }, { "epoch": 2.2817202114680764, "grad_norm": 0.27491340041160583, "learning_rate": 6.6056429983952605e-06, "loss": 0.3268, "step": 22443 }, { "epoch": 2.2818218788125253, "grad_norm": 0.27316081523895264, "learning_rate": 6.605306904694638e-06, "loss": 0.3249, "step": 22444 }, { "epoch": 2.2819235461569742, "grad_norm": 0.29127177596092224, "learning_rate": 6.604970802906862e-06, "loss": 0.3378, "step": 22445 }, { "epoch": 2.282025213501423, "grad_norm": 0.27472594380378723, "learning_rate": 6.6046346930336245e-06, "loss": 0.3224, "step": 22446 }, { "epoch": 2.282126880845872, "grad_norm": 0.2788863778114319, "learning_rate": 6.604298575076618e-06, "loss": 0.3227, "step": 22447 }, { "epoch": 2.2822285481903215, "grad_norm": 0.26662346720695496, "learning_rate": 6.6039624490375365e-06, "loss": 0.3282, "step": 22448 }, { "epoch": 2.2823302155347704, "grad_norm": 0.2665465772151947, "learning_rate": 6.603626314918074e-06, "loss": 0.3268, "step": 22449 }, { "epoch": 2.2824318828792194, "grad_norm": 0.2781141698360443, "learning_rate": 6.603290172719922e-06, "loss": 0.338, "step": 22450 }, { "epoch": 2.2825335502236683, "grad_norm": 0.2694913148880005, "learning_rate": 6.602954022444777e-06, "loss": 0.328, "step": 22451 }, { "epoch": 2.2826352175681173, "grad_norm": 0.2696443200111389, "learning_rate": 6.6026178640943295e-06, "loss": 0.3604, "step": 22452 }, { "epoch": 2.282736884912566, "grad_norm": 0.28636258840560913, "learning_rate": 6.602281697670273e-06, "loss": 0.3276, "step": 22453 }, { "epoch": 2.282838552257015, "grad_norm": 0.272401362657547, "learning_rate": 6.601945523174303e-06, "loss": 0.3221, "step": 22454 }, { "epoch": 2.282940219601464, "grad_norm": 0.29197031259536743, "learning_rate": 6.601609340608113e-06, "loss": 0.3525, "step": 22455 }, { "epoch": 2.283041886945913, "grad_norm": 0.26321330666542053, "learning_rate": 6.601273149973394e-06, "loss": 0.3236, "step": 22456 }, { "epoch": 2.283143554290362, "grad_norm": 0.27000150084495544, "learning_rate": 6.600936951271844e-06, "loss": 0.3161, "step": 22457 }, { "epoch": 2.283245221634811, "grad_norm": 0.25808650255203247, "learning_rate": 6.600600744505151e-06, "loss": 0.342, "step": 22458 }, { "epoch": 2.28334688897926, "grad_norm": 0.28801843523979187, "learning_rate": 6.600264529675012e-06, "loss": 0.3232, "step": 22459 }, { "epoch": 2.283448556323709, "grad_norm": 0.2965128719806671, "learning_rate": 6.5999283067831236e-06, "loss": 0.3239, "step": 22460 }, { "epoch": 2.2835502236681577, "grad_norm": 0.2692859172821045, "learning_rate": 6.5995920758311736e-06, "loss": 0.3477, "step": 22461 }, { "epoch": 2.2836518910126067, "grad_norm": 0.267177015542984, "learning_rate": 6.599255836820861e-06, "loss": 0.3688, "step": 22462 }, { "epoch": 2.2837535583570556, "grad_norm": 0.2897726893424988, "learning_rate": 6.598919589753876e-06, "loss": 0.3266, "step": 22463 }, { "epoch": 2.2838552257015046, "grad_norm": 0.2738911509513855, "learning_rate": 6.598583334631914e-06, "loss": 0.3472, "step": 22464 }, { "epoch": 2.2839568930459535, "grad_norm": 0.2707030177116394, "learning_rate": 6.59824707145667e-06, "loss": 0.3057, "step": 22465 }, { "epoch": 2.2840585603904024, "grad_norm": 0.2758219242095947, "learning_rate": 6.597910800229837e-06, "loss": 0.3609, "step": 22466 }, { "epoch": 2.2841602277348514, "grad_norm": 0.2804577052593231, "learning_rate": 6.5975745209531095e-06, "loss": 0.3249, "step": 22467 }, { "epoch": 2.2842618950793003, "grad_norm": 0.26893672347068787, "learning_rate": 6.59723823362818e-06, "loss": 0.3214, "step": 22468 }, { "epoch": 2.2843635624237493, "grad_norm": 0.2732626497745514, "learning_rate": 6.596901938256744e-06, "loss": 0.3486, "step": 22469 }, { "epoch": 2.284465229768198, "grad_norm": 0.26351064443588257, "learning_rate": 6.596565634840496e-06, "loss": 0.3414, "step": 22470 }, { "epoch": 2.2845668971126476, "grad_norm": 0.2805549204349518, "learning_rate": 6.5962293233811295e-06, "loss": 0.3186, "step": 22471 }, { "epoch": 2.2846685644570965, "grad_norm": 0.28448450565338135, "learning_rate": 6.595893003880339e-06, "loss": 0.3454, "step": 22472 }, { "epoch": 2.2847702318015455, "grad_norm": 0.27221494913101196, "learning_rate": 6.5955566763398194e-06, "loss": 0.3315, "step": 22473 }, { "epoch": 2.2848718991459944, "grad_norm": 0.2627335488796234, "learning_rate": 6.595220340761264e-06, "loss": 0.3497, "step": 22474 }, { "epoch": 2.2849735664904434, "grad_norm": 0.3035906255245209, "learning_rate": 6.594883997146368e-06, "loss": 0.3525, "step": 22475 }, { "epoch": 2.2850752338348923, "grad_norm": 0.2580781877040863, "learning_rate": 6.594547645496824e-06, "loss": 0.3348, "step": 22476 }, { "epoch": 2.2851769011793412, "grad_norm": 0.2811824679374695, "learning_rate": 6.5942112858143295e-06, "loss": 0.3099, "step": 22477 }, { "epoch": 2.28527856852379, "grad_norm": 0.266019344329834, "learning_rate": 6.593874918100575e-06, "loss": 0.3352, "step": 22478 }, { "epoch": 2.285380235868239, "grad_norm": 0.2727169096469879, "learning_rate": 6.59353854235726e-06, "loss": 0.3272, "step": 22479 }, { "epoch": 2.285481903212688, "grad_norm": 0.24577262997627258, "learning_rate": 6.593202158586076e-06, "loss": 0.3337, "step": 22480 }, { "epoch": 2.285583570557137, "grad_norm": 0.26424533128738403, "learning_rate": 6.592865766788719e-06, "loss": 0.3451, "step": 22481 }, { "epoch": 2.285685237901586, "grad_norm": 0.2732398808002472, "learning_rate": 6.59252936696688e-06, "loss": 0.3165, "step": 22482 }, { "epoch": 2.285786905246035, "grad_norm": 0.25470706820487976, "learning_rate": 6.592192959122259e-06, "loss": 0.313, "step": 22483 }, { "epoch": 2.285888572590484, "grad_norm": 0.2685019075870514, "learning_rate": 6.591856543256546e-06, "loss": 0.3355, "step": 22484 }, { "epoch": 2.2859902399349328, "grad_norm": 0.266409695148468, "learning_rate": 6.591520119371439e-06, "loss": 0.3361, "step": 22485 }, { "epoch": 2.2860919072793817, "grad_norm": 0.25863760709762573, "learning_rate": 6.591183687468634e-06, "loss": 0.3599, "step": 22486 }, { "epoch": 2.2861935746238307, "grad_norm": 0.29313933849334717, "learning_rate": 6.5908472475498206e-06, "loss": 0.355, "step": 22487 }, { "epoch": 2.2862952419682796, "grad_norm": 0.25436869263648987, "learning_rate": 6.590510799616698e-06, "loss": 0.3249, "step": 22488 }, { "epoch": 2.286396909312729, "grad_norm": 0.2794700562953949, "learning_rate": 6.59017434367096e-06, "loss": 0.3315, "step": 22489 }, { "epoch": 2.286498576657178, "grad_norm": 0.276493638753891, "learning_rate": 6.589837879714303e-06, "loss": 0.3411, "step": 22490 }, { "epoch": 2.286600244001627, "grad_norm": 0.27711811661720276, "learning_rate": 6.589501407748419e-06, "loss": 0.3418, "step": 22491 }, { "epoch": 2.286701911346076, "grad_norm": 0.2809414565563202, "learning_rate": 6.5891649277750045e-06, "loss": 0.3376, "step": 22492 }, { "epoch": 2.2868035786905248, "grad_norm": 0.2872079014778137, "learning_rate": 6.588828439795753e-06, "loss": 0.3362, "step": 22493 }, { "epoch": 2.2869052460349737, "grad_norm": 0.27549034357070923, "learning_rate": 6.588491943812364e-06, "loss": 0.3482, "step": 22494 }, { "epoch": 2.2870069133794226, "grad_norm": 0.25680485367774963, "learning_rate": 6.58815543982653e-06, "loss": 0.3214, "step": 22495 }, { "epoch": 2.2871085807238716, "grad_norm": 0.2758972644805908, "learning_rate": 6.587818927839946e-06, "loss": 0.3389, "step": 22496 }, { "epoch": 2.2872102480683205, "grad_norm": 0.2606014907360077, "learning_rate": 6.5874824078543075e-06, "loss": 0.3233, "step": 22497 }, { "epoch": 2.2873119154127695, "grad_norm": 0.29218795895576477, "learning_rate": 6.58714587987131e-06, "loss": 0.3176, "step": 22498 }, { "epoch": 2.2874135827572184, "grad_norm": 0.27430570125579834, "learning_rate": 6.586809343892648e-06, "loss": 0.3328, "step": 22499 }, { "epoch": 2.2875152501016673, "grad_norm": 0.2542375326156616, "learning_rate": 6.586472799920018e-06, "loss": 0.3318, "step": 22500 }, { "epoch": 2.2876169174461163, "grad_norm": 0.27848169207572937, "learning_rate": 6.586136247955115e-06, "loss": 0.3349, "step": 22501 }, { "epoch": 2.2877185847905652, "grad_norm": 0.2834635078907013, "learning_rate": 6.585799687999634e-06, "loss": 0.3472, "step": 22502 }, { "epoch": 2.287820252135014, "grad_norm": 0.2916382849216461, "learning_rate": 6.585463120055273e-06, "loss": 0.3482, "step": 22503 }, { "epoch": 2.287921919479463, "grad_norm": 0.2653869092464447, "learning_rate": 6.585126544123723e-06, "loss": 0.3486, "step": 22504 }, { "epoch": 2.288023586823912, "grad_norm": 0.27595555782318115, "learning_rate": 6.584789960206684e-06, "loss": 0.3246, "step": 22505 }, { "epoch": 2.288125254168361, "grad_norm": 0.26600492000579834, "learning_rate": 6.58445336830585e-06, "loss": 0.3746, "step": 22506 }, { "epoch": 2.28822692151281, "grad_norm": 0.2798207402229309, "learning_rate": 6.584116768422914e-06, "loss": 0.3333, "step": 22507 }, { "epoch": 2.288328588857259, "grad_norm": 0.2919713258743286, "learning_rate": 6.583780160559577e-06, "loss": 0.3719, "step": 22508 }, { "epoch": 2.288430256201708, "grad_norm": 0.2904365062713623, "learning_rate": 6.583443544717529e-06, "loss": 0.3377, "step": 22509 }, { "epoch": 2.2885319235461568, "grad_norm": 0.2840706408023834, "learning_rate": 6.583106920898471e-06, "loss": 0.3392, "step": 22510 }, { "epoch": 2.2886335908906057, "grad_norm": 0.27845436334609985, "learning_rate": 6.582770289104097e-06, "loss": 0.3509, "step": 22511 }, { "epoch": 2.288735258235055, "grad_norm": 0.27977243065834045, "learning_rate": 6.5824336493360996e-06, "loss": 0.3285, "step": 22512 }, { "epoch": 2.288836925579504, "grad_norm": 0.2550337016582489, "learning_rate": 6.58209700159618e-06, "loss": 0.3187, "step": 22513 }, { "epoch": 2.288938592923953, "grad_norm": 0.27007344365119934, "learning_rate": 6.58176034588603e-06, "loss": 0.3294, "step": 22514 }, { "epoch": 2.289040260268402, "grad_norm": 0.2694646120071411, "learning_rate": 6.581423682207348e-06, "loss": 0.3175, "step": 22515 }, { "epoch": 2.289141927612851, "grad_norm": 0.26870474219322205, "learning_rate": 6.581087010561831e-06, "loss": 0.303, "step": 22516 }, { "epoch": 2.2892435949573, "grad_norm": 0.257033109664917, "learning_rate": 6.580750330951171e-06, "loss": 0.3201, "step": 22517 }, { "epoch": 2.2893452623017487, "grad_norm": 0.266635000705719, "learning_rate": 6.580413643377068e-06, "loss": 0.3403, "step": 22518 }, { "epoch": 2.2894469296461977, "grad_norm": 0.29032373428344727, "learning_rate": 6.580076947841215e-06, "loss": 0.3276, "step": 22519 }, { "epoch": 2.2895485969906466, "grad_norm": 0.2485780268907547, "learning_rate": 6.5797402443453105e-06, "loss": 0.3191, "step": 22520 }, { "epoch": 2.2896502643350956, "grad_norm": 0.2929592728614807, "learning_rate": 6.579403532891051e-06, "loss": 0.3253, "step": 22521 }, { "epoch": 2.2897519316795445, "grad_norm": 0.27254384756088257, "learning_rate": 6.5790668134801305e-06, "loss": 0.3461, "step": 22522 }, { "epoch": 2.2898535990239934, "grad_norm": 0.28998419642448425, "learning_rate": 6.578730086114248e-06, "loss": 0.3163, "step": 22523 }, { "epoch": 2.2899552663684424, "grad_norm": 0.2599670886993408, "learning_rate": 6.578393350795097e-06, "loss": 0.3057, "step": 22524 }, { "epoch": 2.2900569337128913, "grad_norm": 0.2986548840999603, "learning_rate": 6.578056607524376e-06, "loss": 0.3489, "step": 22525 }, { "epoch": 2.2901586010573403, "grad_norm": 0.2630329132080078, "learning_rate": 6.577719856303781e-06, "loss": 0.3246, "step": 22526 }, { "epoch": 2.290260268401789, "grad_norm": 0.2889310419559479, "learning_rate": 6.5773830971350085e-06, "loss": 0.3347, "step": 22527 }, { "epoch": 2.290361935746238, "grad_norm": 0.2641580402851105, "learning_rate": 6.577046330019755e-06, "loss": 0.3424, "step": 22528 }, { "epoch": 2.290463603090687, "grad_norm": 0.26316019892692566, "learning_rate": 6.576709554959716e-06, "loss": 0.3438, "step": 22529 }, { "epoch": 2.2905652704351365, "grad_norm": 0.24765782058238983, "learning_rate": 6.576372771956588e-06, "loss": 0.3489, "step": 22530 }, { "epoch": 2.2906669377795854, "grad_norm": 0.2857043445110321, "learning_rate": 6.576035981012071e-06, "loss": 0.3045, "step": 22531 }, { "epoch": 2.2907686051240344, "grad_norm": 0.2696968615055084, "learning_rate": 6.575699182127857e-06, "loss": 0.3386, "step": 22532 }, { "epoch": 2.2908702724684833, "grad_norm": 0.26629331707954407, "learning_rate": 6.575362375305647e-06, "loss": 0.3319, "step": 22533 }, { "epoch": 2.2909719398129322, "grad_norm": 0.2595309317111969, "learning_rate": 6.575025560547134e-06, "loss": 0.3263, "step": 22534 }, { "epoch": 2.291073607157381, "grad_norm": 0.2540833652019501, "learning_rate": 6.5746887378540185e-06, "loss": 0.348, "step": 22535 }, { "epoch": 2.29117527450183, "grad_norm": 0.2747079133987427, "learning_rate": 6.5743519072279936e-06, "loss": 0.2945, "step": 22536 }, { "epoch": 2.291276941846279, "grad_norm": 0.2711058259010315, "learning_rate": 6.574015068670758e-06, "loss": 0.3552, "step": 22537 }, { "epoch": 2.291378609190728, "grad_norm": 0.2516978681087494, "learning_rate": 6.57367822218401e-06, "loss": 0.3249, "step": 22538 }, { "epoch": 2.291480276535177, "grad_norm": 0.2587060034275055, "learning_rate": 6.573341367769444e-06, "loss": 0.3127, "step": 22539 }, { "epoch": 2.291581943879626, "grad_norm": 0.28010275959968567, "learning_rate": 6.573004505428757e-06, "loss": 0.3168, "step": 22540 }, { "epoch": 2.291683611224075, "grad_norm": 0.2812904715538025, "learning_rate": 6.5726676351636495e-06, "loss": 0.3258, "step": 22541 }, { "epoch": 2.2917852785685238, "grad_norm": 0.2764574885368347, "learning_rate": 6.572330756975814e-06, "loss": 0.3326, "step": 22542 }, { "epoch": 2.2918869459129727, "grad_norm": 0.2665814161300659, "learning_rate": 6.571993870866952e-06, "loss": 0.3435, "step": 22543 }, { "epoch": 2.2919886132574216, "grad_norm": 0.2594881057739258, "learning_rate": 6.5716569768387565e-06, "loss": 0.3212, "step": 22544 }, { "epoch": 2.2920902806018706, "grad_norm": 0.2593337595462799, "learning_rate": 6.571320074892926e-06, "loss": 0.3162, "step": 22545 }, { "epoch": 2.2921919479463195, "grad_norm": 0.2731756865978241, "learning_rate": 6.570983165031162e-06, "loss": 0.3634, "step": 22546 }, { "epoch": 2.2922936152907685, "grad_norm": 0.2880948483943939, "learning_rate": 6.570646247255154e-06, "loss": 0.3467, "step": 22547 }, { "epoch": 2.2923952826352174, "grad_norm": 0.2765721082687378, "learning_rate": 6.570309321566606e-06, "loss": 0.3383, "step": 22548 }, { "epoch": 2.2924969499796664, "grad_norm": 0.27431026101112366, "learning_rate": 6.569972387967211e-06, "loss": 0.3596, "step": 22549 }, { "epoch": 2.2925986173241153, "grad_norm": 0.29152733087539673, "learning_rate": 6.56963544645867e-06, "loss": 0.3317, "step": 22550 }, { "epoch": 2.2927002846685642, "grad_norm": 0.27962160110473633, "learning_rate": 6.569298497042678e-06, "loss": 0.3386, "step": 22551 }, { "epoch": 2.292801952013013, "grad_norm": 0.2516520619392395, "learning_rate": 6.568961539720933e-06, "loss": 0.3419, "step": 22552 }, { "epoch": 2.2929036193574626, "grad_norm": 0.28070077300071716, "learning_rate": 6.568624574495133e-06, "loss": 0.3456, "step": 22553 }, { "epoch": 2.2930052867019115, "grad_norm": 0.2761688232421875, "learning_rate": 6.568287601366974e-06, "loss": 0.3131, "step": 22554 }, { "epoch": 2.2931069540463604, "grad_norm": 0.2787117660045624, "learning_rate": 6.567950620338155e-06, "loss": 0.3218, "step": 22555 }, { "epoch": 2.2932086213908094, "grad_norm": 0.28881436586380005, "learning_rate": 6.567613631410374e-06, "loss": 0.3382, "step": 22556 }, { "epoch": 2.2933102887352583, "grad_norm": 0.29937833547592163, "learning_rate": 6.567276634585329e-06, "loss": 0.324, "step": 22557 }, { "epoch": 2.2934119560797073, "grad_norm": 0.2782303988933563, "learning_rate": 6.566939629864716e-06, "loss": 0.3437, "step": 22558 }, { "epoch": 2.293513623424156, "grad_norm": 0.2882009446620941, "learning_rate": 6.566602617250233e-06, "loss": 0.3535, "step": 22559 }, { "epoch": 2.293615290768605, "grad_norm": 0.26507091522216797, "learning_rate": 6.5662655967435775e-06, "loss": 0.3215, "step": 22560 }, { "epoch": 2.293716958113054, "grad_norm": 0.2618575692176819, "learning_rate": 6.565928568346449e-06, "loss": 0.3561, "step": 22561 }, { "epoch": 2.293818625457503, "grad_norm": 0.2637273073196411, "learning_rate": 6.5655915320605456e-06, "loss": 0.3149, "step": 22562 }, { "epoch": 2.293920292801952, "grad_norm": 0.27312958240509033, "learning_rate": 6.565254487887562e-06, "loss": 0.3385, "step": 22563 }, { "epoch": 2.294021960146401, "grad_norm": 0.29553088545799255, "learning_rate": 6.5649174358292e-06, "loss": 0.3087, "step": 22564 }, { "epoch": 2.29412362749085, "grad_norm": 0.27169695496559143, "learning_rate": 6.5645803758871555e-06, "loss": 0.3294, "step": 22565 }, { "epoch": 2.294225294835299, "grad_norm": 0.2539636194705963, "learning_rate": 6.5642433080631266e-06, "loss": 0.3216, "step": 22566 }, { "epoch": 2.2943269621797477, "grad_norm": 0.2536921501159668, "learning_rate": 6.563906232358812e-06, "loss": 0.3421, "step": 22567 }, { "epoch": 2.2944286295241967, "grad_norm": 0.26709234714508057, "learning_rate": 6.563569148775908e-06, "loss": 0.3398, "step": 22568 }, { "epoch": 2.2945302968686456, "grad_norm": 0.28044942021369934, "learning_rate": 6.563232057316116e-06, "loss": 0.3351, "step": 22569 }, { "epoch": 2.2946319642130946, "grad_norm": 0.2729351818561554, "learning_rate": 6.56289495798113e-06, "loss": 0.3287, "step": 22570 }, { "epoch": 2.294733631557544, "grad_norm": 0.2597106099128723, "learning_rate": 6.562557850772653e-06, "loss": 0.3298, "step": 22571 }, { "epoch": 2.294835298901993, "grad_norm": 0.26279816031455994, "learning_rate": 6.562220735692381e-06, "loss": 0.3337, "step": 22572 }, { "epoch": 2.294936966246442, "grad_norm": 0.2613779902458191, "learning_rate": 6.56188361274201e-06, "loss": 0.3263, "step": 22573 }, { "epoch": 2.2950386335908908, "grad_norm": 0.28575509786605835, "learning_rate": 6.561546481923242e-06, "loss": 0.3152, "step": 22574 }, { "epoch": 2.2951403009353397, "grad_norm": 0.28916752338409424, "learning_rate": 6.5612093432377745e-06, "loss": 0.3464, "step": 22575 }, { "epoch": 2.2952419682797887, "grad_norm": 0.27712932229042053, "learning_rate": 6.560872196687303e-06, "loss": 0.3331, "step": 22576 }, { "epoch": 2.2953436356242376, "grad_norm": 0.2623221278190613, "learning_rate": 6.5605350422735315e-06, "loss": 0.3476, "step": 22577 }, { "epoch": 2.2954453029686865, "grad_norm": 0.300841361284256, "learning_rate": 6.560197879998153e-06, "loss": 0.3366, "step": 22578 }, { "epoch": 2.2955469703131355, "grad_norm": 0.28911253809928894, "learning_rate": 6.559860709862868e-06, "loss": 0.3602, "step": 22579 }, { "epoch": 2.2956486376575844, "grad_norm": 0.27974554896354675, "learning_rate": 6.559523531869377e-06, "loss": 0.3427, "step": 22580 }, { "epoch": 2.2957503050020334, "grad_norm": 0.2569500803947449, "learning_rate": 6.559186346019376e-06, "loss": 0.307, "step": 22581 }, { "epoch": 2.2958519723464823, "grad_norm": 0.28855806589126587, "learning_rate": 6.558849152314566e-06, "loss": 0.3617, "step": 22582 }, { "epoch": 2.2959536396909312, "grad_norm": 0.3067907691001892, "learning_rate": 6.558511950756643e-06, "loss": 0.3298, "step": 22583 }, { "epoch": 2.29605530703538, "grad_norm": 0.25830164551734924, "learning_rate": 6.558174741347307e-06, "loss": 0.3265, "step": 22584 }, { "epoch": 2.296156974379829, "grad_norm": 0.285584032535553, "learning_rate": 6.557837524088259e-06, "loss": 0.3816, "step": 22585 }, { "epoch": 2.296258641724278, "grad_norm": 0.31757497787475586, "learning_rate": 6.557500298981193e-06, "loss": 0.3613, "step": 22586 }, { "epoch": 2.296360309068727, "grad_norm": 0.25860756635665894, "learning_rate": 6.557163066027813e-06, "loss": 0.3258, "step": 22587 }, { "epoch": 2.296461976413176, "grad_norm": 0.27963921427726746, "learning_rate": 6.556825825229813e-06, "loss": 0.3476, "step": 22588 }, { "epoch": 2.296563643757625, "grad_norm": 0.32097771763801575, "learning_rate": 6.556488576588896e-06, "loss": 0.3796, "step": 22589 }, { "epoch": 2.296665311102074, "grad_norm": 0.25988519191741943, "learning_rate": 6.5561513201067605e-06, "loss": 0.3596, "step": 22590 }, { "epoch": 2.2967669784465228, "grad_norm": 0.30073434114456177, "learning_rate": 6.555814055785103e-06, "loss": 0.3312, "step": 22591 }, { "epoch": 2.2968686457909717, "grad_norm": 0.286969929933548, "learning_rate": 6.555476783625625e-06, "loss": 0.3331, "step": 22592 }, { "epoch": 2.2969703131354207, "grad_norm": 0.28561675548553467, "learning_rate": 6.555139503630024e-06, "loss": 0.3171, "step": 22593 }, { "epoch": 2.29707198047987, "grad_norm": 0.26229262351989746, "learning_rate": 6.554802215799998e-06, "loss": 0.346, "step": 22594 }, { "epoch": 2.297173647824319, "grad_norm": 0.27300333976745605, "learning_rate": 6.55446492013725e-06, "loss": 0.351, "step": 22595 }, { "epoch": 2.297275315168768, "grad_norm": 0.27356773614883423, "learning_rate": 6.554127616643476e-06, "loss": 0.349, "step": 22596 }, { "epoch": 2.297376982513217, "grad_norm": 0.28663793206214905, "learning_rate": 6.553790305320378e-06, "loss": 0.3296, "step": 22597 }, { "epoch": 2.297478649857666, "grad_norm": 0.287776380777359, "learning_rate": 6.553452986169653e-06, "loss": 0.3173, "step": 22598 }, { "epoch": 2.2975803172021148, "grad_norm": 0.2974695563316345, "learning_rate": 6.553115659193001e-06, "loss": 0.3386, "step": 22599 }, { "epoch": 2.2976819845465637, "grad_norm": 0.2603071928024292, "learning_rate": 6.55277832439212e-06, "loss": 0.3499, "step": 22600 }, { "epoch": 2.2977836518910126, "grad_norm": 0.28210216760635376, "learning_rate": 6.5524409817687125e-06, "loss": 0.307, "step": 22601 }, { "epoch": 2.2978853192354616, "grad_norm": 0.2827204167842865, "learning_rate": 6.552103631324476e-06, "loss": 0.3255, "step": 22602 }, { "epoch": 2.2979869865799105, "grad_norm": 0.2868463397026062, "learning_rate": 6.55176627306111e-06, "loss": 0.3214, "step": 22603 }, { "epoch": 2.2980886539243595, "grad_norm": 0.279572457075119, "learning_rate": 6.551428906980315e-06, "loss": 0.3147, "step": 22604 }, { "epoch": 2.2981903212688084, "grad_norm": 0.2512613534927368, "learning_rate": 6.551091533083789e-06, "loss": 0.3258, "step": 22605 }, { "epoch": 2.2982919886132573, "grad_norm": 0.26204773783683777, "learning_rate": 6.550754151373231e-06, "loss": 0.3391, "step": 22606 }, { "epoch": 2.2983936559577063, "grad_norm": 0.2810368239879608, "learning_rate": 6.550416761850344e-06, "loss": 0.3578, "step": 22607 }, { "epoch": 2.2984953233021552, "grad_norm": 0.2787717878818512, "learning_rate": 6.550079364516826e-06, "loss": 0.3671, "step": 22608 }, { "epoch": 2.298596990646604, "grad_norm": 0.2650688886642456, "learning_rate": 6.549741959374376e-06, "loss": 0.3482, "step": 22609 }, { "epoch": 2.298698657991053, "grad_norm": 0.26672598719596863, "learning_rate": 6.5494045464246935e-06, "loss": 0.3154, "step": 22610 }, { "epoch": 2.298800325335502, "grad_norm": 0.31170910596847534, "learning_rate": 6.549067125669479e-06, "loss": 0.3495, "step": 22611 }, { "epoch": 2.2989019926799514, "grad_norm": 0.2774425446987152, "learning_rate": 6.548729697110434e-06, "loss": 0.328, "step": 22612 }, { "epoch": 2.2990036600244004, "grad_norm": 0.26967209577560425, "learning_rate": 6.548392260749256e-06, "loss": 0.3182, "step": 22613 }, { "epoch": 2.2991053273688493, "grad_norm": 0.2514957785606384, "learning_rate": 6.5480548165876465e-06, "loss": 0.3166, "step": 22614 }, { "epoch": 2.2992069947132983, "grad_norm": 0.27658525109291077, "learning_rate": 6.547717364627304e-06, "loss": 0.327, "step": 22615 }, { "epoch": 2.299308662057747, "grad_norm": 0.2878921329975128, "learning_rate": 6.547379904869928e-06, "loss": 0.3552, "step": 22616 }, { "epoch": 2.299410329402196, "grad_norm": 0.27117133140563965, "learning_rate": 6.547042437317221e-06, "loss": 0.3547, "step": 22617 }, { "epoch": 2.299511996746645, "grad_norm": 0.26254919171333313, "learning_rate": 6.546704961970882e-06, "loss": 0.3346, "step": 22618 }, { "epoch": 2.299613664091094, "grad_norm": 0.30008575320243835, "learning_rate": 6.546367478832612e-06, "loss": 0.3332, "step": 22619 }, { "epoch": 2.299715331435543, "grad_norm": 0.2895095944404602, "learning_rate": 6.546029987904108e-06, "loss": 0.3549, "step": 22620 }, { "epoch": 2.299816998779992, "grad_norm": 0.28536611795425415, "learning_rate": 6.545692489187072e-06, "loss": 0.35, "step": 22621 }, { "epoch": 2.299918666124441, "grad_norm": 0.2722111642360687, "learning_rate": 6.545354982683207e-06, "loss": 0.33, "step": 22622 }, { "epoch": 2.30002033346889, "grad_norm": 0.2778855264186859, "learning_rate": 6.545017468394209e-06, "loss": 0.3624, "step": 22623 }, { "epoch": 2.3001220008133387, "grad_norm": 0.2741665542125702, "learning_rate": 6.544679946321781e-06, "loss": 0.3446, "step": 22624 }, { "epoch": 2.3002236681577877, "grad_norm": 0.29328274726867676, "learning_rate": 6.544342416467621e-06, "loss": 0.3396, "step": 22625 }, { "epoch": 2.3003253355022366, "grad_norm": 0.26315560936927795, "learning_rate": 6.5440048788334335e-06, "loss": 0.2959, "step": 22626 }, { "epoch": 2.3004270028466856, "grad_norm": 0.2569243609905243, "learning_rate": 6.543667333420916e-06, "loss": 0.3101, "step": 22627 }, { "epoch": 2.3005286701911345, "grad_norm": 0.2707068622112274, "learning_rate": 6.5433297802317666e-06, "loss": 0.3192, "step": 22628 }, { "epoch": 2.3006303375355834, "grad_norm": 0.27820834517478943, "learning_rate": 6.542992219267691e-06, "loss": 0.3445, "step": 22629 }, { "epoch": 2.3007320048800324, "grad_norm": 0.2594210207462311, "learning_rate": 6.542654650530387e-06, "loss": 0.3558, "step": 22630 }, { "epoch": 2.3008336722244813, "grad_norm": 0.2604874074459076, "learning_rate": 6.5423170740215535e-06, "loss": 0.3413, "step": 22631 }, { "epoch": 2.3009353395689303, "grad_norm": 0.2479054033756256, "learning_rate": 6.541979489742896e-06, "loss": 0.3243, "step": 22632 }, { "epoch": 2.301037006913379, "grad_norm": 0.2831118404865265, "learning_rate": 6.54164189769611e-06, "loss": 0.3513, "step": 22633 }, { "epoch": 2.301138674257828, "grad_norm": 0.2850205600261688, "learning_rate": 6.5413042978829e-06, "loss": 0.336, "step": 22634 }, { "epoch": 2.3012403416022775, "grad_norm": 0.3242369294166565, "learning_rate": 6.540966690304964e-06, "loss": 0.3205, "step": 22635 }, { "epoch": 2.3013420089467265, "grad_norm": 0.2762875258922577, "learning_rate": 6.540629074964003e-06, "loss": 0.3211, "step": 22636 }, { "epoch": 2.3014436762911754, "grad_norm": 0.2695107161998749, "learning_rate": 6.540291451861721e-06, "loss": 0.3635, "step": 22637 }, { "epoch": 2.3015453436356244, "grad_norm": 0.30372604727745056, "learning_rate": 6.539953820999817e-06, "loss": 0.3469, "step": 22638 }, { "epoch": 2.3016470109800733, "grad_norm": 0.2614535689353943, "learning_rate": 6.53961618237999e-06, "loss": 0.3267, "step": 22639 }, { "epoch": 2.3017486783245222, "grad_norm": 0.27573612332344055, "learning_rate": 6.539278536003942e-06, "loss": 0.3622, "step": 22640 }, { "epoch": 2.301850345668971, "grad_norm": 0.2849828898906708, "learning_rate": 6.538940881873375e-06, "loss": 0.3434, "step": 22641 }, { "epoch": 2.30195201301342, "grad_norm": 0.2703479528427124, "learning_rate": 6.538603219989992e-06, "loss": 0.3336, "step": 22642 }, { "epoch": 2.302053680357869, "grad_norm": 0.27628597617149353, "learning_rate": 6.538265550355491e-06, "loss": 0.3285, "step": 22643 }, { "epoch": 2.302155347702318, "grad_norm": 0.28187456727027893, "learning_rate": 6.537927872971571e-06, "loss": 0.344, "step": 22644 }, { "epoch": 2.302257015046767, "grad_norm": 0.2807129919528961, "learning_rate": 6.537590187839937e-06, "loss": 0.3467, "step": 22645 }, { "epoch": 2.302358682391216, "grad_norm": 0.2646246552467346, "learning_rate": 6.5372524949622895e-06, "loss": 0.377, "step": 22646 }, { "epoch": 2.302460349735665, "grad_norm": 0.26829731464385986, "learning_rate": 6.536914794340329e-06, "loss": 0.3232, "step": 22647 }, { "epoch": 2.3025620170801138, "grad_norm": 0.3168815076351166, "learning_rate": 6.536577085975758e-06, "loss": 0.3629, "step": 22648 }, { "epoch": 2.3026636844245627, "grad_norm": 0.25962725281715393, "learning_rate": 6.536239369870276e-06, "loss": 0.322, "step": 22649 }, { "epoch": 2.3027653517690116, "grad_norm": 0.2591395676136017, "learning_rate": 6.5359016460255844e-06, "loss": 0.3247, "step": 22650 }, { "epoch": 2.3028670191134606, "grad_norm": 0.30581390857696533, "learning_rate": 6.535563914443386e-06, "loss": 0.3223, "step": 22651 }, { "epoch": 2.3029686864579095, "grad_norm": 0.2806524634361267, "learning_rate": 6.535226175125381e-06, "loss": 0.3394, "step": 22652 }, { "epoch": 2.303070353802359, "grad_norm": 0.26517996191978455, "learning_rate": 6.5348884280732724e-06, "loss": 0.3236, "step": 22653 }, { "epoch": 2.303172021146808, "grad_norm": 0.2648668885231018, "learning_rate": 6.53455067328876e-06, "loss": 0.3432, "step": 22654 }, { "epoch": 2.303273688491257, "grad_norm": 0.2734195291996002, "learning_rate": 6.534212910773545e-06, "loss": 0.3209, "step": 22655 }, { "epoch": 2.3033753558357057, "grad_norm": 0.2798391580581665, "learning_rate": 6.533875140529331e-06, "loss": 0.3318, "step": 22656 }, { "epoch": 2.3034770231801547, "grad_norm": 0.2763626277446747, "learning_rate": 6.533537362557818e-06, "loss": 0.3322, "step": 22657 }, { "epoch": 2.3035786905246036, "grad_norm": 0.2694135904312134, "learning_rate": 6.533199576860709e-06, "loss": 0.3152, "step": 22658 }, { "epoch": 2.3036803578690526, "grad_norm": 0.26112696528434753, "learning_rate": 6.532861783439705e-06, "loss": 0.3478, "step": 22659 }, { "epoch": 2.3037820252135015, "grad_norm": 0.26696738600730896, "learning_rate": 6.532523982296507e-06, "loss": 0.3266, "step": 22660 }, { "epoch": 2.3038836925579504, "grad_norm": 0.27158045768737793, "learning_rate": 6.532186173432817e-06, "loss": 0.3393, "step": 22661 }, { "epoch": 2.3039853599023994, "grad_norm": 0.2603050470352173, "learning_rate": 6.531848356850338e-06, "loss": 0.3094, "step": 22662 }, { "epoch": 2.3040870272468483, "grad_norm": 0.2579168379306793, "learning_rate": 6.531510532550771e-06, "loss": 0.3169, "step": 22663 }, { "epoch": 2.3041886945912973, "grad_norm": 0.2722800076007843, "learning_rate": 6.531172700535817e-06, "loss": 0.338, "step": 22664 }, { "epoch": 2.304290361935746, "grad_norm": 0.27134808897972107, "learning_rate": 6.530834860807179e-06, "loss": 0.3398, "step": 22665 }, { "epoch": 2.304392029280195, "grad_norm": 0.2880711853504181, "learning_rate": 6.53049701336656e-06, "loss": 0.3602, "step": 22666 }, { "epoch": 2.304493696624644, "grad_norm": 0.29142066836357117, "learning_rate": 6.530159158215659e-06, "loss": 0.3182, "step": 22667 }, { "epoch": 2.304595363969093, "grad_norm": 0.24943771958351135, "learning_rate": 6.529821295356181e-06, "loss": 0.3151, "step": 22668 }, { "epoch": 2.304697031313542, "grad_norm": 0.2903008460998535, "learning_rate": 6.529483424789827e-06, "loss": 0.3087, "step": 22669 }, { "epoch": 2.304798698657991, "grad_norm": 0.28682467341423035, "learning_rate": 6.5291455465182975e-06, "loss": 0.3258, "step": 22670 }, { "epoch": 2.30490036600244, "grad_norm": 0.2925608456134796, "learning_rate": 6.5288076605432974e-06, "loss": 0.3286, "step": 22671 }, { "epoch": 2.305002033346889, "grad_norm": 0.2667263448238373, "learning_rate": 6.5284697668665275e-06, "loss": 0.3376, "step": 22672 }, { "epoch": 2.3051037006913377, "grad_norm": 0.27628186345100403, "learning_rate": 6.528131865489691e-06, "loss": 0.3688, "step": 22673 }, { "epoch": 2.3052053680357867, "grad_norm": 0.2770897150039673, "learning_rate": 6.527793956414486e-06, "loss": 0.2987, "step": 22674 }, { "epoch": 2.3053070353802356, "grad_norm": 0.26907336711883545, "learning_rate": 6.527456039642621e-06, "loss": 0.3454, "step": 22675 }, { "epoch": 2.305408702724685, "grad_norm": 0.2732123136520386, "learning_rate": 6.527118115175794e-06, "loss": 0.3432, "step": 22676 }, { "epoch": 2.305510370069134, "grad_norm": 0.2776033580303192, "learning_rate": 6.5267801830157105e-06, "loss": 0.3261, "step": 22677 }, { "epoch": 2.305612037413583, "grad_norm": 0.27856704592704773, "learning_rate": 6.52644224316407e-06, "loss": 0.3545, "step": 22678 }, { "epoch": 2.305713704758032, "grad_norm": 0.26052045822143555, "learning_rate": 6.526104295622577e-06, "loss": 0.2932, "step": 22679 }, { "epoch": 2.3058153721024808, "grad_norm": 0.2717519700527191, "learning_rate": 6.5257663403929315e-06, "loss": 0.322, "step": 22680 }, { "epoch": 2.3059170394469297, "grad_norm": 0.27662423253059387, "learning_rate": 6.52542837747684e-06, "loss": 0.3579, "step": 22681 }, { "epoch": 2.3060187067913787, "grad_norm": 0.2813490033149719, "learning_rate": 6.525090406876002e-06, "loss": 0.3401, "step": 22682 }, { "epoch": 2.3061203741358276, "grad_norm": 0.28634002804756165, "learning_rate": 6.524752428592121e-06, "loss": 0.3565, "step": 22683 }, { "epoch": 2.3062220414802765, "grad_norm": 0.24905410408973694, "learning_rate": 6.5244144426268995e-06, "loss": 0.3255, "step": 22684 }, { "epoch": 2.3063237088247255, "grad_norm": 0.2733764350414276, "learning_rate": 6.52407644898204e-06, "loss": 0.2998, "step": 22685 }, { "epoch": 2.3064253761691744, "grad_norm": 0.2619376480579376, "learning_rate": 6.523738447659246e-06, "loss": 0.3169, "step": 22686 }, { "epoch": 2.3065270435136234, "grad_norm": 0.2645094692707062, "learning_rate": 6.523400438660219e-06, "loss": 0.3178, "step": 22687 }, { "epoch": 2.3066287108580723, "grad_norm": 0.2821962535381317, "learning_rate": 6.523062421986663e-06, "loss": 0.3166, "step": 22688 }, { "epoch": 2.3067303782025212, "grad_norm": 0.2592504024505615, "learning_rate": 6.522724397640281e-06, "loss": 0.3519, "step": 22689 }, { "epoch": 2.30683204554697, "grad_norm": 0.28460371494293213, "learning_rate": 6.522386365622775e-06, "loss": 0.3418, "step": 22690 }, { "epoch": 2.306933712891419, "grad_norm": 0.27313587069511414, "learning_rate": 6.522048325935848e-06, "loss": 0.3584, "step": 22691 }, { "epoch": 2.307035380235868, "grad_norm": 0.27133822441101074, "learning_rate": 6.5217102785812035e-06, "loss": 0.3366, "step": 22692 }, { "epoch": 2.307137047580317, "grad_norm": 0.2702701687812805, "learning_rate": 6.521372223560545e-06, "loss": 0.33, "step": 22693 }, { "epoch": 2.3072387149247664, "grad_norm": 0.28069600462913513, "learning_rate": 6.521034160875572e-06, "loss": 0.3779, "step": 22694 }, { "epoch": 2.3073403822692153, "grad_norm": 0.26881733536720276, "learning_rate": 6.520696090527994e-06, "loss": 0.3388, "step": 22695 }, { "epoch": 2.3074420496136643, "grad_norm": 0.2839272916316986, "learning_rate": 6.520358012519507e-06, "loss": 0.2947, "step": 22696 }, { "epoch": 2.3075437169581132, "grad_norm": 0.3030535578727722, "learning_rate": 6.5200199268518194e-06, "loss": 0.3447, "step": 22697 }, { "epoch": 2.307645384302562, "grad_norm": 0.25932925939559937, "learning_rate": 6.519681833526634e-06, "loss": 0.3177, "step": 22698 }, { "epoch": 2.307747051647011, "grad_norm": 0.2751227617263794, "learning_rate": 6.519343732545651e-06, "loss": 0.3276, "step": 22699 }, { "epoch": 2.30784871899146, "grad_norm": 0.2889048457145691, "learning_rate": 6.519005623910576e-06, "loss": 0.3453, "step": 22700 }, { "epoch": 2.307950386335909, "grad_norm": 0.2870546281337738, "learning_rate": 6.518667507623111e-06, "loss": 0.3462, "step": 22701 }, { "epoch": 2.308052053680358, "grad_norm": 0.2678578197956085, "learning_rate": 6.5183293836849594e-06, "loss": 0.3146, "step": 22702 }, { "epoch": 2.308153721024807, "grad_norm": 0.2711734175682068, "learning_rate": 6.5179912520978265e-06, "loss": 0.2988, "step": 22703 }, { "epoch": 2.308255388369256, "grad_norm": 0.26827603578567505, "learning_rate": 6.517653112863415e-06, "loss": 0.3229, "step": 22704 }, { "epoch": 2.3083570557137048, "grad_norm": 0.29666614532470703, "learning_rate": 6.517314965983426e-06, "loss": 0.3608, "step": 22705 }, { "epoch": 2.3084587230581537, "grad_norm": 0.270230233669281, "learning_rate": 6.516976811459565e-06, "loss": 0.3627, "step": 22706 }, { "epoch": 2.3085603904026026, "grad_norm": 0.29482635855674744, "learning_rate": 6.516638649293535e-06, "loss": 0.3371, "step": 22707 }, { "epoch": 2.3086620577470516, "grad_norm": 0.25468677282333374, "learning_rate": 6.516300479487041e-06, "loss": 0.3174, "step": 22708 }, { "epoch": 2.3087637250915005, "grad_norm": 0.28491756319999695, "learning_rate": 6.5159623020417865e-06, "loss": 0.309, "step": 22709 }, { "epoch": 2.3088653924359495, "grad_norm": 0.2760927379131317, "learning_rate": 6.515624116959471e-06, "loss": 0.2878, "step": 22710 }, { "epoch": 2.3089670597803984, "grad_norm": 0.26339468359947205, "learning_rate": 6.515285924241803e-06, "loss": 0.3362, "step": 22711 }, { "epoch": 2.3090687271248473, "grad_norm": 0.2701733410358429, "learning_rate": 6.5149477238904846e-06, "loss": 0.3548, "step": 22712 }, { "epoch": 2.3091703944692963, "grad_norm": 0.2721027433872223, "learning_rate": 6.514609515907221e-06, "loss": 0.329, "step": 22713 }, { "epoch": 2.3092720618137452, "grad_norm": 0.2593279182910919, "learning_rate": 6.514271300293713e-06, "loss": 0.3158, "step": 22714 }, { "epoch": 2.309373729158194, "grad_norm": 0.2859802544116974, "learning_rate": 6.5139330770516665e-06, "loss": 0.3488, "step": 22715 }, { "epoch": 2.309475396502643, "grad_norm": 0.2623889744281769, "learning_rate": 6.5135948461827846e-06, "loss": 0.3212, "step": 22716 }, { "epoch": 2.3095770638470925, "grad_norm": 0.27772608399391174, "learning_rate": 6.513256607688771e-06, "loss": 0.3565, "step": 22717 }, { "epoch": 2.3096787311915414, "grad_norm": 0.26226112246513367, "learning_rate": 6.512918361571332e-06, "loss": 0.3553, "step": 22718 }, { "epoch": 2.3097803985359904, "grad_norm": 0.25743764638900757, "learning_rate": 6.512580107832169e-06, "loss": 0.3157, "step": 22719 }, { "epoch": 2.3098820658804393, "grad_norm": 0.27212250232696533, "learning_rate": 6.512241846472986e-06, "loss": 0.3402, "step": 22720 }, { "epoch": 2.3099837332248883, "grad_norm": 0.263674795627594, "learning_rate": 6.511903577495489e-06, "loss": 0.3291, "step": 22721 }, { "epoch": 2.310085400569337, "grad_norm": 0.2715045213699341, "learning_rate": 6.511565300901379e-06, "loss": 0.3356, "step": 22722 }, { "epoch": 2.310187067913786, "grad_norm": 0.27093085646629333, "learning_rate": 6.511227016692365e-06, "loss": 0.3187, "step": 22723 }, { "epoch": 2.310288735258235, "grad_norm": 0.2653731107711792, "learning_rate": 6.510888724870148e-06, "loss": 0.3239, "step": 22724 }, { "epoch": 2.310390402602684, "grad_norm": 0.28700152039527893, "learning_rate": 6.510550425436431e-06, "loss": 0.3476, "step": 22725 }, { "epoch": 2.310492069947133, "grad_norm": 0.26085734367370605, "learning_rate": 6.510212118392921e-06, "loss": 0.3467, "step": 22726 }, { "epoch": 2.310593737291582, "grad_norm": 0.26085394620895386, "learning_rate": 6.509873803741321e-06, "loss": 0.3425, "step": 22727 }, { "epoch": 2.310695404636031, "grad_norm": 0.27521800994873047, "learning_rate": 6.509535481483335e-06, "loss": 0.3668, "step": 22728 }, { "epoch": 2.31079707198048, "grad_norm": 0.262634813785553, "learning_rate": 6.50919715162067e-06, "loss": 0.335, "step": 22729 }, { "epoch": 2.3108987393249287, "grad_norm": 0.2581309378147125, "learning_rate": 6.508858814155027e-06, "loss": 0.3376, "step": 22730 }, { "epoch": 2.3110004066693777, "grad_norm": 0.30144837498664856, "learning_rate": 6.508520469088111e-06, "loss": 0.3575, "step": 22731 }, { "epoch": 2.3111020740138266, "grad_norm": 0.2740585207939148, "learning_rate": 6.508182116421628e-06, "loss": 0.3304, "step": 22732 }, { "epoch": 2.3112037413582756, "grad_norm": 0.2902736961841583, "learning_rate": 6.507843756157283e-06, "loss": 0.338, "step": 22733 }, { "epoch": 2.3113054087027245, "grad_norm": 0.25717344880104065, "learning_rate": 6.507505388296779e-06, "loss": 0.3443, "step": 22734 }, { "epoch": 2.311407076047174, "grad_norm": 0.2897075116634369, "learning_rate": 6.50716701284182e-06, "loss": 0.3587, "step": 22735 }, { "epoch": 2.311508743391623, "grad_norm": 0.2794683277606964, "learning_rate": 6.506828629794112e-06, "loss": 0.3129, "step": 22736 }, { "epoch": 2.3116104107360718, "grad_norm": 0.26282814145088196, "learning_rate": 6.506490239155361e-06, "loss": 0.353, "step": 22737 }, { "epoch": 2.3117120780805207, "grad_norm": 0.2649424374103546, "learning_rate": 6.506151840927268e-06, "loss": 0.3491, "step": 22738 }, { "epoch": 2.3118137454249696, "grad_norm": 0.33590832352638245, "learning_rate": 6.505813435111541e-06, "loss": 0.3407, "step": 22739 }, { "epoch": 2.3119154127694186, "grad_norm": 0.26164311170578003, "learning_rate": 6.505475021709884e-06, "loss": 0.313, "step": 22740 }, { "epoch": 2.3120170801138675, "grad_norm": 0.26742658019065857, "learning_rate": 6.505136600724e-06, "loss": 0.3438, "step": 22741 }, { "epoch": 2.3121187474583165, "grad_norm": 0.2599860727787018, "learning_rate": 6.504798172155597e-06, "loss": 0.3567, "step": 22742 }, { "epoch": 2.3122204148027654, "grad_norm": 0.2586406171321869, "learning_rate": 6.504459736006379e-06, "loss": 0.319, "step": 22743 }, { "epoch": 2.3123220821472144, "grad_norm": 0.2836500406265259, "learning_rate": 6.5041212922780494e-06, "loss": 0.3483, "step": 22744 }, { "epoch": 2.3124237494916633, "grad_norm": 0.25340506434440613, "learning_rate": 6.503782840972315e-06, "loss": 0.3202, "step": 22745 }, { "epoch": 2.3125254168361122, "grad_norm": 0.26020166277885437, "learning_rate": 6.503444382090878e-06, "loss": 0.3373, "step": 22746 }, { "epoch": 2.312627084180561, "grad_norm": 0.25255581736564636, "learning_rate": 6.503105915635448e-06, "loss": 0.3184, "step": 22747 }, { "epoch": 2.31272875152501, "grad_norm": 0.26263943314552307, "learning_rate": 6.5027674416077256e-06, "loss": 0.3587, "step": 22748 }, { "epoch": 2.312830418869459, "grad_norm": 0.28103455901145935, "learning_rate": 6.50242896000942e-06, "loss": 0.3111, "step": 22749 }, { "epoch": 2.312932086213908, "grad_norm": 0.2581106722354889, "learning_rate": 6.502090470842233e-06, "loss": 0.3315, "step": 22750 }, { "epoch": 2.313033753558357, "grad_norm": 0.25793716311454773, "learning_rate": 6.50175197410787e-06, "loss": 0.3346, "step": 22751 }, { "epoch": 2.313135420902806, "grad_norm": 0.2662009000778198, "learning_rate": 6.501413469808039e-06, "loss": 0.3309, "step": 22752 }, { "epoch": 2.313237088247255, "grad_norm": 0.2684619724750519, "learning_rate": 6.5010749579444434e-06, "loss": 0.3332, "step": 22753 }, { "epoch": 2.3133387555917038, "grad_norm": 0.26197418570518494, "learning_rate": 6.500736438518789e-06, "loss": 0.3015, "step": 22754 }, { "epoch": 2.3134404229361527, "grad_norm": 0.29070937633514404, "learning_rate": 6.5003979115327805e-06, "loss": 0.3853, "step": 22755 }, { "epoch": 2.3135420902806016, "grad_norm": 0.2579927444458008, "learning_rate": 6.500059376988124e-06, "loss": 0.3675, "step": 22756 }, { "epoch": 2.3136437576250506, "grad_norm": 0.26036781072616577, "learning_rate": 6.499720834886526e-06, "loss": 0.3623, "step": 22757 }, { "epoch": 2.3137454249695, "grad_norm": 0.26061344146728516, "learning_rate": 6.49938228522969e-06, "loss": 0.3621, "step": 22758 }, { "epoch": 2.313847092313949, "grad_norm": 0.2678312361240387, "learning_rate": 6.499043728019322e-06, "loss": 0.3217, "step": 22759 }, { "epoch": 2.313948759658398, "grad_norm": 0.2626250386238098, "learning_rate": 6.498705163257128e-06, "loss": 0.3325, "step": 22760 }, { "epoch": 2.314050427002847, "grad_norm": 0.28858739137649536, "learning_rate": 6.498366590944813e-06, "loss": 0.3483, "step": 22761 }, { "epoch": 2.3141520943472957, "grad_norm": 0.26640158891677856, "learning_rate": 6.498028011084085e-06, "loss": 0.3496, "step": 22762 }, { "epoch": 2.3142537616917447, "grad_norm": 0.2561587989330292, "learning_rate": 6.497689423676646e-06, "loss": 0.3102, "step": 22763 }, { "epoch": 2.3143554290361936, "grad_norm": 0.2862207889556885, "learning_rate": 6.4973508287242045e-06, "loss": 0.3054, "step": 22764 }, { "epoch": 2.3144570963806426, "grad_norm": 0.2787487506866455, "learning_rate": 6.497012226228465e-06, "loss": 0.3645, "step": 22765 }, { "epoch": 2.3145587637250915, "grad_norm": 0.27608704566955566, "learning_rate": 6.496673616191134e-06, "loss": 0.3531, "step": 22766 }, { "epoch": 2.3146604310695404, "grad_norm": 0.27463293075561523, "learning_rate": 6.496334998613918e-06, "loss": 0.3377, "step": 22767 }, { "epoch": 2.3147620984139894, "grad_norm": 0.2716943621635437, "learning_rate": 6.49599637349852e-06, "loss": 0.3331, "step": 22768 }, { "epoch": 2.3148637657584383, "grad_norm": 0.2717975676059723, "learning_rate": 6.495657740846648e-06, "loss": 0.35, "step": 22769 }, { "epoch": 2.3149654331028873, "grad_norm": 0.2888423502445221, "learning_rate": 6.4953191006600095e-06, "loss": 0.3109, "step": 22770 }, { "epoch": 2.315067100447336, "grad_norm": 0.2648639380931854, "learning_rate": 6.494980452940306e-06, "loss": 0.3205, "step": 22771 }, { "epoch": 2.315168767791785, "grad_norm": 0.28422173857688904, "learning_rate": 6.494641797689248e-06, "loss": 0.3362, "step": 22772 }, { "epoch": 2.315270435136234, "grad_norm": 0.2809883654117584, "learning_rate": 6.49430313490854e-06, "loss": 0.3697, "step": 22773 }, { "epoch": 2.315372102480683, "grad_norm": 0.2822684943675995, "learning_rate": 6.493964464599886e-06, "loss": 0.3572, "step": 22774 }, { "epoch": 2.315473769825132, "grad_norm": 0.25965940952301025, "learning_rate": 6.493625786764996e-06, "loss": 0.3301, "step": 22775 }, { "epoch": 2.3155754371695814, "grad_norm": 0.2875082194805145, "learning_rate": 6.493287101405573e-06, "loss": 0.3278, "step": 22776 }, { "epoch": 2.3156771045140303, "grad_norm": 0.26742830872535706, "learning_rate": 6.492948408523326e-06, "loss": 0.3486, "step": 22777 }, { "epoch": 2.3157787718584792, "grad_norm": 0.28779515624046326, "learning_rate": 6.492609708119959e-06, "loss": 0.2976, "step": 22778 }, { "epoch": 2.315880439202928, "grad_norm": 0.2597361207008362, "learning_rate": 6.492271000197179e-06, "loss": 0.3168, "step": 22779 }, { "epoch": 2.315982106547377, "grad_norm": 0.27592504024505615, "learning_rate": 6.491932284756692e-06, "loss": 0.343, "step": 22780 }, { "epoch": 2.316083773891826, "grad_norm": 0.2862301468849182, "learning_rate": 6.491593561800204e-06, "loss": 0.3723, "step": 22781 }, { "epoch": 2.316185441236275, "grad_norm": 0.27227360010147095, "learning_rate": 6.491254831329423e-06, "loss": 0.3771, "step": 22782 }, { "epoch": 2.316287108580724, "grad_norm": 0.26852747797966003, "learning_rate": 6.490916093346054e-06, "loss": 0.3028, "step": 22783 }, { "epoch": 2.316388775925173, "grad_norm": 0.2845310568809509, "learning_rate": 6.490577347851805e-06, "loss": 0.3371, "step": 22784 }, { "epoch": 2.316490443269622, "grad_norm": 0.2898502051830292, "learning_rate": 6.4902385948483824e-06, "loss": 0.3332, "step": 22785 }, { "epoch": 2.3165921106140708, "grad_norm": 0.289569616317749, "learning_rate": 6.48989983433749e-06, "loss": 0.3113, "step": 22786 }, { "epoch": 2.3166937779585197, "grad_norm": 0.2765040993690491, "learning_rate": 6.489561066320836e-06, "loss": 0.3155, "step": 22787 }, { "epoch": 2.3167954453029687, "grad_norm": 0.26497408747673035, "learning_rate": 6.489222290800128e-06, "loss": 0.3463, "step": 22788 }, { "epoch": 2.3168971126474176, "grad_norm": 0.2604234218597412, "learning_rate": 6.4888835077770725e-06, "loss": 0.319, "step": 22789 }, { "epoch": 2.3169987799918665, "grad_norm": 0.30487367510795593, "learning_rate": 6.488544717253376e-06, "loss": 0.3665, "step": 22790 }, { "epoch": 2.3171004473363155, "grad_norm": 0.2853703796863556, "learning_rate": 6.488205919230744e-06, "loss": 0.3285, "step": 22791 }, { "epoch": 2.3172021146807644, "grad_norm": 0.30565354228019714, "learning_rate": 6.487867113710884e-06, "loss": 0.3225, "step": 22792 }, { "epoch": 2.3173037820252134, "grad_norm": 0.26982545852661133, "learning_rate": 6.487528300695503e-06, "loss": 0.3276, "step": 22793 }, { "epoch": 2.3174054493696623, "grad_norm": 0.25431251525878906, "learning_rate": 6.4871894801863086e-06, "loss": 0.345, "step": 22794 }, { "epoch": 2.3175071167141112, "grad_norm": 0.2812596261501312, "learning_rate": 6.486850652185008e-06, "loss": 0.3406, "step": 22795 }, { "epoch": 2.31760878405856, "grad_norm": 0.2837156355381012, "learning_rate": 6.486511816693305e-06, "loss": 0.3551, "step": 22796 }, { "epoch": 2.317710451403009, "grad_norm": 0.30372515320777893, "learning_rate": 6.4861729737129095e-06, "loss": 0.3603, "step": 22797 }, { "epoch": 2.317812118747458, "grad_norm": 0.26127463579177856, "learning_rate": 6.485834123245528e-06, "loss": 0.3326, "step": 22798 }, { "epoch": 2.3179137860919075, "grad_norm": 0.25560542941093445, "learning_rate": 6.485495265292867e-06, "loss": 0.3074, "step": 22799 }, { "epoch": 2.3180154534363564, "grad_norm": 0.28032466769218445, "learning_rate": 6.485156399856635e-06, "loss": 0.3276, "step": 22800 }, { "epoch": 2.3181171207808053, "grad_norm": 0.2772699296474457, "learning_rate": 6.484817526938536e-06, "loss": 0.3264, "step": 22801 }, { "epoch": 2.3182187881252543, "grad_norm": 0.27140864729881287, "learning_rate": 6.48447864654028e-06, "loss": 0.3313, "step": 22802 }, { "epoch": 2.3183204554697032, "grad_norm": 0.2753324508666992, "learning_rate": 6.484139758663575e-06, "loss": 0.3304, "step": 22803 }, { "epoch": 2.318422122814152, "grad_norm": 0.29887571930885315, "learning_rate": 6.483800863310125e-06, "loss": 0.3547, "step": 22804 }, { "epoch": 2.318523790158601, "grad_norm": 0.26018956303596497, "learning_rate": 6.483461960481638e-06, "loss": 0.324, "step": 22805 }, { "epoch": 2.31862545750305, "grad_norm": 0.2725675106048584, "learning_rate": 6.483123050179824e-06, "loss": 0.3293, "step": 22806 }, { "epoch": 2.318727124847499, "grad_norm": 0.29110434651374817, "learning_rate": 6.4827841324063865e-06, "loss": 0.3474, "step": 22807 }, { "epoch": 2.318828792191948, "grad_norm": 0.2631116807460785, "learning_rate": 6.482445207163037e-06, "loss": 0.3564, "step": 22808 }, { "epoch": 2.318930459536397, "grad_norm": 0.2834692597389221, "learning_rate": 6.482106274451479e-06, "loss": 0.3231, "step": 22809 }, { "epoch": 2.319032126880846, "grad_norm": 0.257988303899765, "learning_rate": 6.481767334273423e-06, "loss": 0.3196, "step": 22810 }, { "epoch": 2.3191337942252948, "grad_norm": 0.27293822169303894, "learning_rate": 6.481428386630575e-06, "loss": 0.3662, "step": 22811 }, { "epoch": 2.3192354615697437, "grad_norm": 0.2709377110004425, "learning_rate": 6.481089431524643e-06, "loss": 0.3014, "step": 22812 }, { "epoch": 2.3193371289141926, "grad_norm": 0.30606743693351746, "learning_rate": 6.480750468957335e-06, "loss": 0.3648, "step": 22813 }, { "epoch": 2.3194387962586416, "grad_norm": 0.2784724235534668, "learning_rate": 6.480411498930357e-06, "loss": 0.3177, "step": 22814 }, { "epoch": 2.3195404636030905, "grad_norm": 0.2791966497898102, "learning_rate": 6.480072521445417e-06, "loss": 0.3517, "step": 22815 }, { "epoch": 2.3196421309475395, "grad_norm": 0.2656989097595215, "learning_rate": 6.479733536504225e-06, "loss": 0.3245, "step": 22816 }, { "epoch": 2.319743798291989, "grad_norm": 0.2810294032096863, "learning_rate": 6.479394544108486e-06, "loss": 0.3194, "step": 22817 }, { "epoch": 2.319845465636438, "grad_norm": 0.28430435061454773, "learning_rate": 6.479055544259909e-06, "loss": 0.3248, "step": 22818 }, { "epoch": 2.3199471329808867, "grad_norm": 0.30336299538612366, "learning_rate": 6.478716536960201e-06, "loss": 0.3808, "step": 22819 }, { "epoch": 2.3200488003253357, "grad_norm": 0.28057506680488586, "learning_rate": 6.478377522211072e-06, "loss": 0.3285, "step": 22820 }, { "epoch": 2.3201504676697846, "grad_norm": 0.2832386791706085, "learning_rate": 6.4780385000142276e-06, "loss": 0.3508, "step": 22821 }, { "epoch": 2.3202521350142336, "grad_norm": 0.29043328762054443, "learning_rate": 6.477699470371377e-06, "loss": 0.3347, "step": 22822 }, { "epoch": 2.3203538023586825, "grad_norm": 0.2507306635379791, "learning_rate": 6.477360433284228e-06, "loss": 0.3558, "step": 22823 }, { "epoch": 2.3204554697031314, "grad_norm": 0.2827039062976837, "learning_rate": 6.477021388754486e-06, "loss": 0.3513, "step": 22824 }, { "epoch": 2.3205571370475804, "grad_norm": 0.26746469736099243, "learning_rate": 6.476682336783863e-06, "loss": 0.3034, "step": 22825 }, { "epoch": 2.3206588043920293, "grad_norm": 0.27659112215042114, "learning_rate": 6.4763432773740645e-06, "loss": 0.3402, "step": 22826 }, { "epoch": 2.3207604717364783, "grad_norm": 0.30401116609573364, "learning_rate": 6.4760042105268e-06, "loss": 0.382, "step": 22827 }, { "epoch": 2.320862139080927, "grad_norm": 0.26159945130348206, "learning_rate": 6.475665136243777e-06, "loss": 0.3047, "step": 22828 }, { "epoch": 2.320963806425376, "grad_norm": 0.23893114924430847, "learning_rate": 6.475326054526702e-06, "loss": 0.3247, "step": 22829 }, { "epoch": 2.321065473769825, "grad_norm": 0.28924375772476196, "learning_rate": 6.474986965377288e-06, "loss": 0.3494, "step": 22830 }, { "epoch": 2.321167141114274, "grad_norm": 0.28332895040512085, "learning_rate": 6.4746478687972395e-06, "loss": 0.3591, "step": 22831 }, { "epoch": 2.321268808458723, "grad_norm": 0.2793796956539154, "learning_rate": 6.474308764788263e-06, "loss": 0.3305, "step": 22832 }, { "epoch": 2.321370475803172, "grad_norm": 0.27903637290000916, "learning_rate": 6.473969653352073e-06, "loss": 0.3065, "step": 22833 }, { "epoch": 2.321472143147621, "grad_norm": 0.2474256306886673, "learning_rate": 6.47363053449037e-06, "loss": 0.3669, "step": 22834 }, { "epoch": 2.32157381049207, "grad_norm": 0.2608083486557007, "learning_rate": 6.473291408204869e-06, "loss": 0.3279, "step": 22835 }, { "epoch": 2.3216754778365187, "grad_norm": 0.2702700197696686, "learning_rate": 6.472952274497276e-06, "loss": 0.3266, "step": 22836 }, { "epoch": 2.3217771451809677, "grad_norm": 0.25140923261642456, "learning_rate": 6.4726131333692985e-06, "loss": 0.3304, "step": 22837 }, { "epoch": 2.3218788125254166, "grad_norm": 0.2538769841194153, "learning_rate": 6.472273984822648e-06, "loss": 0.3318, "step": 22838 }, { "epoch": 2.3219804798698656, "grad_norm": 0.2445191890001297, "learning_rate": 6.471934828859029e-06, "loss": 0.3173, "step": 22839 }, { "epoch": 2.322082147214315, "grad_norm": 0.2582146227359772, "learning_rate": 6.471595665480152e-06, "loss": 0.3146, "step": 22840 }, { "epoch": 2.322183814558764, "grad_norm": 0.2809712588787079, "learning_rate": 6.4712564946877265e-06, "loss": 0.3627, "step": 22841 }, { "epoch": 2.322285481903213, "grad_norm": 0.27405205368995667, "learning_rate": 6.47091731648346e-06, "loss": 0.3455, "step": 22842 }, { "epoch": 2.3223871492476618, "grad_norm": 0.2513542175292969, "learning_rate": 6.470578130869063e-06, "loss": 0.3362, "step": 22843 }, { "epoch": 2.3224888165921107, "grad_norm": 0.27318307757377625, "learning_rate": 6.470238937846241e-06, "loss": 0.3226, "step": 22844 }, { "epoch": 2.3225904839365596, "grad_norm": 0.27921679615974426, "learning_rate": 6.469899737416706e-06, "loss": 0.3276, "step": 22845 }, { "epoch": 2.3226921512810086, "grad_norm": 0.27245524525642395, "learning_rate": 6.469560529582165e-06, "loss": 0.3378, "step": 22846 }, { "epoch": 2.3227938186254575, "grad_norm": 0.2728866636753082, "learning_rate": 6.469221314344327e-06, "loss": 0.3492, "step": 22847 }, { "epoch": 2.3228954859699065, "grad_norm": 0.26871442794799805, "learning_rate": 6.468882091704901e-06, "loss": 0.3438, "step": 22848 }, { "epoch": 2.3229971533143554, "grad_norm": 0.2758655250072479, "learning_rate": 6.468542861665596e-06, "loss": 0.3316, "step": 22849 }, { "epoch": 2.3230988206588044, "grad_norm": 0.2645050883293152, "learning_rate": 6.468203624228122e-06, "loss": 0.3575, "step": 22850 }, { "epoch": 2.3232004880032533, "grad_norm": 0.2707246243953705, "learning_rate": 6.467864379394186e-06, "loss": 0.3434, "step": 22851 }, { "epoch": 2.3233021553477022, "grad_norm": 0.27117910981178284, "learning_rate": 6.467525127165499e-06, "loss": 0.3173, "step": 22852 }, { "epoch": 2.323403822692151, "grad_norm": 0.2868782877922058, "learning_rate": 6.467185867543769e-06, "loss": 0.339, "step": 22853 }, { "epoch": 2.3235054900366, "grad_norm": 0.27381932735443115, "learning_rate": 6.466846600530704e-06, "loss": 0.3376, "step": 22854 }, { "epoch": 2.323607157381049, "grad_norm": 0.26758673787117004, "learning_rate": 6.466507326128017e-06, "loss": 0.3501, "step": 22855 }, { "epoch": 2.323708824725498, "grad_norm": 0.2722313702106476, "learning_rate": 6.466168044337413e-06, "loss": 0.3362, "step": 22856 }, { "epoch": 2.323810492069947, "grad_norm": 0.28306934237480164, "learning_rate": 6.465828755160601e-06, "loss": 0.3506, "step": 22857 }, { "epoch": 2.3239121594143963, "grad_norm": 0.27346736192703247, "learning_rate": 6.465489458599294e-06, "loss": 0.3279, "step": 22858 }, { "epoch": 2.3240138267588453, "grad_norm": 0.28401097655296326, "learning_rate": 6.4651501546552e-06, "loss": 0.2974, "step": 22859 }, { "epoch": 2.324115494103294, "grad_norm": 0.31138017773628235, "learning_rate": 6.464810843330027e-06, "loss": 0.32, "step": 22860 }, { "epoch": 2.324217161447743, "grad_norm": 0.26453927159309387, "learning_rate": 6.464471524625485e-06, "loss": 0.321, "step": 22861 }, { "epoch": 2.324318828792192, "grad_norm": 0.2610822319984436, "learning_rate": 6.4641321985432825e-06, "loss": 0.3285, "step": 22862 }, { "epoch": 2.324420496136641, "grad_norm": 0.26436883211135864, "learning_rate": 6.463792865085131e-06, "loss": 0.3041, "step": 22863 }, { "epoch": 2.32452216348109, "grad_norm": 0.2878531813621521, "learning_rate": 6.463453524252738e-06, "loss": 0.356, "step": 22864 }, { "epoch": 2.324623830825539, "grad_norm": 0.25307729840278625, "learning_rate": 6.463114176047816e-06, "loss": 0.3302, "step": 22865 }, { "epoch": 2.324725498169988, "grad_norm": 0.26383888721466064, "learning_rate": 6.462774820472071e-06, "loss": 0.3131, "step": 22866 }, { "epoch": 2.324827165514437, "grad_norm": 0.24931617081165314, "learning_rate": 6.462435457527213e-06, "loss": 0.3204, "step": 22867 }, { "epoch": 2.3249288328588857, "grad_norm": 0.2657296061515808, "learning_rate": 6.462096087214955e-06, "loss": 0.3158, "step": 22868 }, { "epoch": 2.3250305002033347, "grad_norm": 0.26931503415107727, "learning_rate": 6.461756709537003e-06, "loss": 0.3077, "step": 22869 }, { "epoch": 2.3251321675477836, "grad_norm": 0.27812591195106506, "learning_rate": 6.461417324495068e-06, "loss": 0.3172, "step": 22870 }, { "epoch": 2.3252338348922326, "grad_norm": 0.3133476674556732, "learning_rate": 6.461077932090861e-06, "loss": 0.3897, "step": 22871 }, { "epoch": 2.3253355022366815, "grad_norm": 0.26783958077430725, "learning_rate": 6.460738532326088e-06, "loss": 0.3038, "step": 22872 }, { "epoch": 2.3254371695811304, "grad_norm": 0.2775973379611969, "learning_rate": 6.460399125202463e-06, "loss": 0.362, "step": 22873 }, { "epoch": 2.3255388369255794, "grad_norm": 0.262366384267807, "learning_rate": 6.460059710721694e-06, "loss": 0.3439, "step": 22874 }, { "epoch": 2.3256405042700283, "grad_norm": 0.2729152739048004, "learning_rate": 6.4597202888854926e-06, "loss": 0.3502, "step": 22875 }, { "epoch": 2.3257421716144773, "grad_norm": 0.24147363007068634, "learning_rate": 6.459380859695566e-06, "loss": 0.3295, "step": 22876 }, { "epoch": 2.325843838958926, "grad_norm": 0.2754116654396057, "learning_rate": 6.459041423153626e-06, "loss": 0.3677, "step": 22877 }, { "epoch": 2.325945506303375, "grad_norm": 0.2700236141681671, "learning_rate": 6.458701979261382e-06, "loss": 0.3539, "step": 22878 }, { "epoch": 2.326047173647824, "grad_norm": 0.2931433916091919, "learning_rate": 6.458362528020543e-06, "loss": 0.3277, "step": 22879 }, { "epoch": 2.326148840992273, "grad_norm": 0.283674031496048, "learning_rate": 6.458023069432821e-06, "loss": 0.3419, "step": 22880 }, { "epoch": 2.3262505083367224, "grad_norm": 0.25263711810112, "learning_rate": 6.4576836034999256e-06, "loss": 0.3322, "step": 22881 }, { "epoch": 2.3263521756811714, "grad_norm": 0.27716386318206787, "learning_rate": 6.457344130223565e-06, "loss": 0.3223, "step": 22882 }, { "epoch": 2.3264538430256203, "grad_norm": 0.2710110545158386, "learning_rate": 6.457004649605452e-06, "loss": 0.3521, "step": 22883 }, { "epoch": 2.3265555103700692, "grad_norm": 0.2633812129497528, "learning_rate": 6.456665161647296e-06, "loss": 0.3672, "step": 22884 }, { "epoch": 2.326657177714518, "grad_norm": 0.2799552381038666, "learning_rate": 6.456325666350806e-06, "loss": 0.3392, "step": 22885 }, { "epoch": 2.326758845058967, "grad_norm": 0.2749567925930023, "learning_rate": 6.455986163717696e-06, "loss": 0.3366, "step": 22886 }, { "epoch": 2.326860512403416, "grad_norm": 0.2882656455039978, "learning_rate": 6.455646653749672e-06, "loss": 0.3671, "step": 22887 }, { "epoch": 2.326962179747865, "grad_norm": 0.2626699209213257, "learning_rate": 6.455307136448446e-06, "loss": 0.3406, "step": 22888 }, { "epoch": 2.327063847092314, "grad_norm": 0.2927097678184509, "learning_rate": 6.454967611815728e-06, "loss": 0.3421, "step": 22889 }, { "epoch": 2.327165514436763, "grad_norm": 0.23425158858299255, "learning_rate": 6.454628079853229e-06, "loss": 0.3461, "step": 22890 }, { "epoch": 2.327267181781212, "grad_norm": 0.3098805546760559, "learning_rate": 6.454288540562661e-06, "loss": 0.3477, "step": 22891 }, { "epoch": 2.3273688491256608, "grad_norm": 0.2649766802787781, "learning_rate": 6.453948993945731e-06, "loss": 0.3003, "step": 22892 }, { "epoch": 2.3274705164701097, "grad_norm": 0.28648123145103455, "learning_rate": 6.453609440004151e-06, "loss": 0.3242, "step": 22893 }, { "epoch": 2.3275721838145587, "grad_norm": 0.2840399742126465, "learning_rate": 6.453269878739635e-06, "loss": 0.3464, "step": 22894 }, { "epoch": 2.3276738511590076, "grad_norm": 0.2901569604873657, "learning_rate": 6.452930310153889e-06, "loss": 0.3062, "step": 22895 }, { "epoch": 2.3277755185034565, "grad_norm": 0.263108491897583, "learning_rate": 6.452590734248626e-06, "loss": 0.3022, "step": 22896 }, { "epoch": 2.3278771858479055, "grad_norm": 0.26656395196914673, "learning_rate": 6.4522511510255545e-06, "loss": 0.285, "step": 22897 }, { "epoch": 2.3279788531923544, "grad_norm": 0.2767728269100189, "learning_rate": 6.451911560486388e-06, "loss": 0.3638, "step": 22898 }, { "epoch": 2.328080520536804, "grad_norm": 0.27653056383132935, "learning_rate": 6.451571962632837e-06, "loss": 0.3155, "step": 22899 }, { "epoch": 2.3281821878812528, "grad_norm": 0.27695325016975403, "learning_rate": 6.451232357466609e-06, "loss": 0.3376, "step": 22900 }, { "epoch": 2.3282838552257017, "grad_norm": 0.2681453824043274, "learning_rate": 6.450892744989419e-06, "loss": 0.3126, "step": 22901 }, { "epoch": 2.3283855225701506, "grad_norm": 0.2747381329536438, "learning_rate": 6.450553125202975e-06, "loss": 0.3538, "step": 22902 }, { "epoch": 2.3284871899145996, "grad_norm": 0.2630762457847595, "learning_rate": 6.45021349810899e-06, "loss": 0.3332, "step": 22903 }, { "epoch": 2.3285888572590485, "grad_norm": 0.2718717157840729, "learning_rate": 6.449873863709174e-06, "loss": 0.3348, "step": 22904 }, { "epoch": 2.3286905246034975, "grad_norm": 0.27410346269607544, "learning_rate": 6.449534222005237e-06, "loss": 0.3524, "step": 22905 }, { "epoch": 2.3287921919479464, "grad_norm": 0.2590467035770416, "learning_rate": 6.449194572998893e-06, "loss": 0.3176, "step": 22906 }, { "epoch": 2.3288938592923953, "grad_norm": 0.2540815472602844, "learning_rate": 6.4488549166918495e-06, "loss": 0.3022, "step": 22907 }, { "epoch": 2.3289955266368443, "grad_norm": 0.2891099154949188, "learning_rate": 6.448515253085818e-06, "loss": 0.3479, "step": 22908 }, { "epoch": 2.3290971939812932, "grad_norm": 0.23478063941001892, "learning_rate": 6.4481755821825145e-06, "loss": 0.3325, "step": 22909 }, { "epoch": 2.329198861325742, "grad_norm": 0.2941766083240509, "learning_rate": 6.447835903983644e-06, "loss": 0.3673, "step": 22910 }, { "epoch": 2.329300528670191, "grad_norm": 0.2666166126728058, "learning_rate": 6.447496218490922e-06, "loss": 0.334, "step": 22911 }, { "epoch": 2.32940219601464, "grad_norm": 0.2720826268196106, "learning_rate": 6.447156525706055e-06, "loss": 0.3573, "step": 22912 }, { "epoch": 2.329503863359089, "grad_norm": 0.26274800300598145, "learning_rate": 6.446816825630759e-06, "loss": 0.3255, "step": 22913 }, { "epoch": 2.329605530703538, "grad_norm": 0.2821543216705322, "learning_rate": 6.446477118266745e-06, "loss": 0.3469, "step": 22914 }, { "epoch": 2.329707198047987, "grad_norm": 0.30082079768180847, "learning_rate": 6.446137403615722e-06, "loss": 0.3327, "step": 22915 }, { "epoch": 2.329808865392436, "grad_norm": 0.26422786712646484, "learning_rate": 6.445797681679402e-06, "loss": 0.3235, "step": 22916 }, { "epoch": 2.3299105327368848, "grad_norm": 0.2638728618621826, "learning_rate": 6.445457952459497e-06, "loss": 0.3196, "step": 22917 }, { "epoch": 2.3300122000813337, "grad_norm": 0.2906864881515503, "learning_rate": 6.445118215957718e-06, "loss": 0.3294, "step": 22918 }, { "epoch": 2.3301138674257826, "grad_norm": 0.2734799087047577, "learning_rate": 6.444778472175779e-06, "loss": 0.3284, "step": 22919 }, { "epoch": 2.3302155347702316, "grad_norm": 0.27612268924713135, "learning_rate": 6.444438721115387e-06, "loss": 0.3441, "step": 22920 }, { "epoch": 2.3303172021146805, "grad_norm": 0.2611384987831116, "learning_rate": 6.4440989627782565e-06, "loss": 0.3265, "step": 22921 }, { "epoch": 2.33041886945913, "grad_norm": 0.28769776225090027, "learning_rate": 6.4437591971661e-06, "loss": 0.3109, "step": 22922 }, { "epoch": 2.330520536803579, "grad_norm": 0.2651901841163635, "learning_rate": 6.443419424280625e-06, "loss": 0.3351, "step": 22923 }, { "epoch": 2.330622204148028, "grad_norm": 0.2947273850440979, "learning_rate": 6.443079644123549e-06, "loss": 0.3216, "step": 22924 }, { "epoch": 2.3307238714924767, "grad_norm": 0.28273680806159973, "learning_rate": 6.442739856696579e-06, "loss": 0.3487, "step": 22925 }, { "epoch": 2.3308255388369257, "grad_norm": 0.27358078956604004, "learning_rate": 6.442400062001428e-06, "loss": 0.3684, "step": 22926 }, { "epoch": 2.3309272061813746, "grad_norm": 0.2639673054218292, "learning_rate": 6.442060260039809e-06, "loss": 0.3595, "step": 22927 }, { "epoch": 2.3310288735258236, "grad_norm": 0.2577052414417267, "learning_rate": 6.441720450813432e-06, "loss": 0.3429, "step": 22928 }, { "epoch": 2.3311305408702725, "grad_norm": 0.25902995467185974, "learning_rate": 6.441380634324012e-06, "loss": 0.345, "step": 22929 }, { "epoch": 2.3312322082147214, "grad_norm": 0.26763486862182617, "learning_rate": 6.441040810573257e-06, "loss": 0.3551, "step": 22930 }, { "epoch": 2.3313338755591704, "grad_norm": 0.27291902899742126, "learning_rate": 6.4407009795628825e-06, "loss": 0.3558, "step": 22931 }, { "epoch": 2.3314355429036193, "grad_norm": 0.27064257860183716, "learning_rate": 6.440361141294599e-06, "loss": 0.3313, "step": 22932 }, { "epoch": 2.3315372102480683, "grad_norm": 0.2614859342575073, "learning_rate": 6.440021295770116e-06, "loss": 0.3446, "step": 22933 }, { "epoch": 2.331638877592517, "grad_norm": 0.2808363139629364, "learning_rate": 6.439681442991149e-06, "loss": 0.3086, "step": 22934 }, { "epoch": 2.331740544936966, "grad_norm": 0.2758959233760834, "learning_rate": 6.4393415829594086e-06, "loss": 0.3175, "step": 22935 }, { "epoch": 2.331842212281415, "grad_norm": 0.27614715695381165, "learning_rate": 6.439001715676609e-06, "loss": 0.3247, "step": 22936 }, { "epoch": 2.331943879625864, "grad_norm": 0.27862489223480225, "learning_rate": 6.4386618411444604e-06, "loss": 0.326, "step": 22937 }, { "epoch": 2.332045546970313, "grad_norm": 0.25742051005363464, "learning_rate": 6.438321959364673e-06, "loss": 0.3569, "step": 22938 }, { "epoch": 2.332147214314762, "grad_norm": 0.2649722695350647, "learning_rate": 6.4379820703389635e-06, "loss": 0.3027, "step": 22939 }, { "epoch": 2.3322488816592113, "grad_norm": 0.2805270254611969, "learning_rate": 6.437642174069042e-06, "loss": 0.337, "step": 22940 }, { "epoch": 2.3323505490036602, "grad_norm": 0.2862195670604706, "learning_rate": 6.43730227055662e-06, "loss": 0.3354, "step": 22941 }, { "epoch": 2.332452216348109, "grad_norm": 0.304145872592926, "learning_rate": 6.436962359803412e-06, "loss": 0.3087, "step": 22942 }, { "epoch": 2.332553883692558, "grad_norm": 0.28383946418762207, "learning_rate": 6.436622441811127e-06, "loss": 0.3406, "step": 22943 }, { "epoch": 2.332655551037007, "grad_norm": 0.2819647789001465, "learning_rate": 6.436282516581482e-06, "loss": 0.3072, "step": 22944 }, { "epoch": 2.332757218381456, "grad_norm": 0.2897087633609772, "learning_rate": 6.435942584116185e-06, "loss": 0.3133, "step": 22945 }, { "epoch": 2.332858885725905, "grad_norm": 0.27564600110054016, "learning_rate": 6.435602644416952e-06, "loss": 0.3263, "step": 22946 }, { "epoch": 2.332960553070354, "grad_norm": 0.26617538928985596, "learning_rate": 6.435262697485494e-06, "loss": 0.3162, "step": 22947 }, { "epoch": 2.333062220414803, "grad_norm": 0.2637735903263092, "learning_rate": 6.434922743323522e-06, "loss": 0.3395, "step": 22948 }, { "epoch": 2.3331638877592518, "grad_norm": 0.276638925075531, "learning_rate": 6.434582781932751e-06, "loss": 0.3132, "step": 22949 }, { "epoch": 2.3332655551037007, "grad_norm": 0.28246042132377625, "learning_rate": 6.4342428133148925e-06, "loss": 0.3678, "step": 22950 }, { "epoch": 2.3333672224481496, "grad_norm": 0.27476051449775696, "learning_rate": 6.4339028374716605e-06, "loss": 0.3716, "step": 22951 }, { "epoch": 2.3334688897925986, "grad_norm": 0.25876012444496155, "learning_rate": 6.433562854404766e-06, "loss": 0.3678, "step": 22952 }, { "epoch": 2.3335705571370475, "grad_norm": 0.26798373460769653, "learning_rate": 6.433222864115922e-06, "loss": 0.3161, "step": 22953 }, { "epoch": 2.3336722244814965, "grad_norm": 0.2937989830970764, "learning_rate": 6.432882866606844e-06, "loss": 0.3252, "step": 22954 }, { "epoch": 2.3337738918259454, "grad_norm": 0.2831285297870636, "learning_rate": 6.4325428618792416e-06, "loss": 0.3409, "step": 22955 }, { "epoch": 2.3338755591703944, "grad_norm": 0.2981514632701874, "learning_rate": 6.4322028499348276e-06, "loss": 0.3233, "step": 22956 }, { "epoch": 2.3339772265148433, "grad_norm": 0.2725600600242615, "learning_rate": 6.4318628307753175e-06, "loss": 0.3554, "step": 22957 }, { "epoch": 2.3340788938592922, "grad_norm": 0.273773193359375, "learning_rate": 6.431522804402421e-06, "loss": 0.3569, "step": 22958 }, { "epoch": 2.334180561203741, "grad_norm": 0.2831927537918091, "learning_rate": 6.431182770817854e-06, "loss": 0.3397, "step": 22959 }, { "epoch": 2.33428222854819, "grad_norm": 0.3077855706214905, "learning_rate": 6.430842730023327e-06, "loss": 0.3776, "step": 22960 }, { "epoch": 2.334383895892639, "grad_norm": 0.27289631962776184, "learning_rate": 6.4305026820205565e-06, "loss": 0.3227, "step": 22961 }, { "epoch": 2.334485563237088, "grad_norm": 0.2854730784893036, "learning_rate": 6.430162626811253e-06, "loss": 0.3325, "step": 22962 }, { "epoch": 2.3345872305815374, "grad_norm": 0.2711997628211975, "learning_rate": 6.429822564397128e-06, "loss": 0.3455, "step": 22963 }, { "epoch": 2.3346888979259863, "grad_norm": 0.3153201639652252, "learning_rate": 6.429482494779898e-06, "loss": 0.3127, "step": 22964 }, { "epoch": 2.3347905652704353, "grad_norm": 0.27089637517929077, "learning_rate": 6.429142417961276e-06, "loss": 0.3029, "step": 22965 }, { "epoch": 2.334892232614884, "grad_norm": 0.2781895697116852, "learning_rate": 6.428802333942971e-06, "loss": 0.3181, "step": 22966 }, { "epoch": 2.334993899959333, "grad_norm": 0.28765276074409485, "learning_rate": 6.428462242726703e-06, "loss": 0.3348, "step": 22967 }, { "epoch": 2.335095567303782, "grad_norm": 0.2956642806529999, "learning_rate": 6.428122144314179e-06, "loss": 0.3308, "step": 22968 }, { "epoch": 2.335197234648231, "grad_norm": 0.2793426811695099, "learning_rate": 6.427782038707117e-06, "loss": 0.3131, "step": 22969 }, { "epoch": 2.33529890199268, "grad_norm": 0.29353126883506775, "learning_rate": 6.427441925907227e-06, "loss": 0.3418, "step": 22970 }, { "epoch": 2.335400569337129, "grad_norm": 0.2802301049232483, "learning_rate": 6.427101805916225e-06, "loss": 0.3753, "step": 22971 }, { "epoch": 2.335502236681578, "grad_norm": 0.2876347303390503, "learning_rate": 6.426761678735822e-06, "loss": 0.3596, "step": 22972 }, { "epoch": 2.335603904026027, "grad_norm": 0.28311705589294434, "learning_rate": 6.426421544367734e-06, "loss": 0.3345, "step": 22973 }, { "epoch": 2.3357055713704757, "grad_norm": 0.29810670018196106, "learning_rate": 6.426081402813671e-06, "loss": 0.3329, "step": 22974 }, { "epoch": 2.3358072387149247, "grad_norm": 0.25532498955726624, "learning_rate": 6.4257412540753505e-06, "loss": 0.3252, "step": 22975 }, { "epoch": 2.3359089060593736, "grad_norm": 0.2764352560043335, "learning_rate": 6.425401098154484e-06, "loss": 0.3498, "step": 22976 }, { "epoch": 2.3360105734038226, "grad_norm": 0.2751726806163788, "learning_rate": 6.425060935052787e-06, "loss": 0.3181, "step": 22977 }, { "epoch": 2.3361122407482715, "grad_norm": 0.3030010163784027, "learning_rate": 6.424720764771969e-06, "loss": 0.2977, "step": 22978 }, { "epoch": 2.3362139080927204, "grad_norm": 0.2868066132068634, "learning_rate": 6.424380587313748e-06, "loss": 0.3332, "step": 22979 }, { "epoch": 2.3363155754371694, "grad_norm": 0.2944839298725128, "learning_rate": 6.424040402679835e-06, "loss": 0.3424, "step": 22980 }, { "epoch": 2.3364172427816188, "grad_norm": 0.315328985452652, "learning_rate": 6.423700210871946e-06, "loss": 0.3214, "step": 22981 }, { "epoch": 2.3365189101260677, "grad_norm": 0.2790400981903076, "learning_rate": 6.423360011891794e-06, "loss": 0.3336, "step": 22982 }, { "epoch": 2.3366205774705167, "grad_norm": 0.268970251083374, "learning_rate": 6.423019805741091e-06, "loss": 0.3568, "step": 22983 }, { "epoch": 2.3367222448149656, "grad_norm": 0.28377634286880493, "learning_rate": 6.422679592421552e-06, "loss": 0.3232, "step": 22984 }, { "epoch": 2.3368239121594145, "grad_norm": 0.26571527123451233, "learning_rate": 6.422339371934893e-06, "loss": 0.3107, "step": 22985 }, { "epoch": 2.3369255795038635, "grad_norm": 0.31425753235816956, "learning_rate": 6.421999144282826e-06, "loss": 0.3238, "step": 22986 }, { "epoch": 2.3370272468483124, "grad_norm": 0.2615748643875122, "learning_rate": 6.421658909467066e-06, "loss": 0.3413, "step": 22987 }, { "epoch": 2.3371289141927614, "grad_norm": 0.28564363718032837, "learning_rate": 6.421318667489323e-06, "loss": 0.3256, "step": 22988 }, { "epoch": 2.3372305815372103, "grad_norm": 0.2894878089427948, "learning_rate": 6.4209784183513165e-06, "loss": 0.3193, "step": 22989 }, { "epoch": 2.3373322488816592, "grad_norm": 0.28897809982299805, "learning_rate": 6.420638162054758e-06, "loss": 0.3224, "step": 22990 }, { "epoch": 2.337433916226108, "grad_norm": 0.2765094041824341, "learning_rate": 6.420297898601363e-06, "loss": 0.3587, "step": 22991 }, { "epoch": 2.337535583570557, "grad_norm": 0.2616135776042938, "learning_rate": 6.419957627992845e-06, "loss": 0.3722, "step": 22992 }, { "epoch": 2.337637250915006, "grad_norm": 0.2663944363594055, "learning_rate": 6.4196173502309155e-06, "loss": 0.319, "step": 22993 }, { "epoch": 2.337738918259455, "grad_norm": 0.2825259566307068, "learning_rate": 6.419277065317293e-06, "loss": 0.3167, "step": 22994 }, { "epoch": 2.337840585603904, "grad_norm": 0.2623174488544464, "learning_rate": 6.41893677325369e-06, "loss": 0.35, "step": 22995 }, { "epoch": 2.337942252948353, "grad_norm": 0.27325814962387085, "learning_rate": 6.418596474041819e-06, "loss": 0.3001, "step": 22996 }, { "epoch": 2.338043920292802, "grad_norm": 0.2674947679042816, "learning_rate": 6.418256167683399e-06, "loss": 0.3802, "step": 22997 }, { "epoch": 2.3381455876372508, "grad_norm": 0.2685569226741791, "learning_rate": 6.417915854180139e-06, "loss": 0.3614, "step": 22998 }, { "epoch": 2.3382472549816997, "grad_norm": 0.2786628305912018, "learning_rate": 6.417575533533756e-06, "loss": 0.3417, "step": 22999 }, { "epoch": 2.3383489223261487, "grad_norm": 0.2851296067237854, "learning_rate": 6.417235205745966e-06, "loss": 0.3327, "step": 23000 }, { "epoch": 2.3384505896705976, "grad_norm": 0.28705066442489624, "learning_rate": 6.41689487081848e-06, "loss": 0.3285, "step": 23001 }, { "epoch": 2.3385522570150465, "grad_norm": 0.27528858184814453, "learning_rate": 6.4165545287530164e-06, "loss": 0.3464, "step": 23002 }, { "epoch": 2.3386539243594955, "grad_norm": 0.2610114514827728, "learning_rate": 6.416214179551286e-06, "loss": 0.3499, "step": 23003 }, { "epoch": 2.338755591703945, "grad_norm": 0.26133909821510315, "learning_rate": 6.415873823215004e-06, "loss": 0.3419, "step": 23004 }, { "epoch": 2.338857259048394, "grad_norm": 0.28468677401542664, "learning_rate": 6.415533459745888e-06, "loss": 0.3463, "step": 23005 }, { "epoch": 2.3389589263928428, "grad_norm": 0.2638978362083435, "learning_rate": 6.4151930891456505e-06, "loss": 0.3605, "step": 23006 }, { "epoch": 2.3390605937372917, "grad_norm": 0.2731802761554718, "learning_rate": 6.414852711416008e-06, "loss": 0.3179, "step": 23007 }, { "epoch": 2.3391622610817406, "grad_norm": 0.26869115233421326, "learning_rate": 6.414512326558673e-06, "loss": 0.3354, "step": 23008 }, { "epoch": 2.3392639284261896, "grad_norm": 0.2685607075691223, "learning_rate": 6.4141719345753585e-06, "loss": 0.333, "step": 23009 }, { "epoch": 2.3393655957706385, "grad_norm": 0.2375258505344391, "learning_rate": 6.413831535467784e-06, "loss": 0.3251, "step": 23010 }, { "epoch": 2.3394672631150875, "grad_norm": 0.27865609526634216, "learning_rate": 6.413491129237661e-06, "loss": 0.3513, "step": 23011 }, { "epoch": 2.3395689304595364, "grad_norm": 0.270666241645813, "learning_rate": 6.413150715886707e-06, "loss": 0.3541, "step": 23012 }, { "epoch": 2.3396705978039853, "grad_norm": 0.2476852834224701, "learning_rate": 6.412810295416636e-06, "loss": 0.3213, "step": 23013 }, { "epoch": 2.3397722651484343, "grad_norm": 0.2653805613517761, "learning_rate": 6.41246986782916e-06, "loss": 0.3457, "step": 23014 }, { "epoch": 2.3398739324928832, "grad_norm": 0.2675724923610687, "learning_rate": 6.412129433125999e-06, "loss": 0.3006, "step": 23015 }, { "epoch": 2.339975599837332, "grad_norm": 0.25021791458129883, "learning_rate": 6.4117889913088635e-06, "loss": 0.2894, "step": 23016 }, { "epoch": 2.340077267181781, "grad_norm": 0.28009018301963806, "learning_rate": 6.411448542379473e-06, "loss": 0.3254, "step": 23017 }, { "epoch": 2.34017893452623, "grad_norm": 0.26990556716918945, "learning_rate": 6.4111080863395385e-06, "loss": 0.3307, "step": 23018 }, { "epoch": 2.340280601870679, "grad_norm": 0.2573089897632599, "learning_rate": 6.410767623190774e-06, "loss": 0.3076, "step": 23019 }, { "epoch": 2.340382269215128, "grad_norm": 0.26967164874076843, "learning_rate": 6.4104271529349015e-06, "loss": 0.3318, "step": 23020 }, { "epoch": 2.340483936559577, "grad_norm": 0.274644672870636, "learning_rate": 6.410086675573631e-06, "loss": 0.3357, "step": 23021 }, { "epoch": 2.3405856039040263, "grad_norm": 0.2825552523136139, "learning_rate": 6.4097461911086804e-06, "loss": 0.3368, "step": 23022 }, { "epoch": 2.340687271248475, "grad_norm": 0.2782246768474579, "learning_rate": 6.409405699541762e-06, "loss": 0.3202, "step": 23023 }, { "epoch": 2.340788938592924, "grad_norm": 0.2600189745426178, "learning_rate": 6.409065200874591e-06, "loss": 0.338, "step": 23024 }, { "epoch": 2.340890605937373, "grad_norm": 0.25005120038986206, "learning_rate": 6.4087246951088866e-06, "loss": 0.3196, "step": 23025 }, { "epoch": 2.340992273281822, "grad_norm": 0.2801392674446106, "learning_rate": 6.408384182246361e-06, "loss": 0.3345, "step": 23026 }, { "epoch": 2.341093940626271, "grad_norm": 0.2772064507007599, "learning_rate": 6.408043662288732e-06, "loss": 0.3122, "step": 23027 }, { "epoch": 2.34119560797072, "grad_norm": 0.28362566232681274, "learning_rate": 6.407703135237713e-06, "loss": 0.3165, "step": 23028 }, { "epoch": 2.341297275315169, "grad_norm": 0.26854297518730164, "learning_rate": 6.407362601095018e-06, "loss": 0.3107, "step": 23029 }, { "epoch": 2.341398942659618, "grad_norm": 0.273475706577301, "learning_rate": 6.407022059862367e-06, "loss": 0.3415, "step": 23030 }, { "epoch": 2.3415006100040667, "grad_norm": 0.29233071208000183, "learning_rate": 6.406681511541473e-06, "loss": 0.3446, "step": 23031 }, { "epoch": 2.3416022773485157, "grad_norm": 0.28110677003860474, "learning_rate": 6.406340956134051e-06, "loss": 0.343, "step": 23032 }, { "epoch": 2.3417039446929646, "grad_norm": 0.2711358666419983, "learning_rate": 6.406000393641818e-06, "loss": 0.3176, "step": 23033 }, { "epoch": 2.3418056120374136, "grad_norm": 0.26893338561058044, "learning_rate": 6.405659824066488e-06, "loss": 0.3631, "step": 23034 }, { "epoch": 2.3419072793818625, "grad_norm": 0.26474571228027344, "learning_rate": 6.4053192474097805e-06, "loss": 0.3371, "step": 23035 }, { "epoch": 2.3420089467263114, "grad_norm": 0.2643907368183136, "learning_rate": 6.404978663673407e-06, "loss": 0.3237, "step": 23036 }, { "epoch": 2.3421106140707604, "grad_norm": 0.266340434551239, "learning_rate": 6.404638072859084e-06, "loss": 0.3412, "step": 23037 }, { "epoch": 2.3422122814152093, "grad_norm": 0.2688429653644562, "learning_rate": 6.404297474968529e-06, "loss": 0.3185, "step": 23038 }, { "epoch": 2.3423139487596583, "grad_norm": 0.29110461473464966, "learning_rate": 6.403956870003456e-06, "loss": 0.3561, "step": 23039 }, { "epoch": 2.342415616104107, "grad_norm": 0.26976221799850464, "learning_rate": 6.403616257965584e-06, "loss": 0.3233, "step": 23040 }, { "epoch": 2.342517283448556, "grad_norm": 0.27036628127098083, "learning_rate": 6.4032756388566265e-06, "loss": 0.3507, "step": 23041 }, { "epoch": 2.342618950793005, "grad_norm": 0.2851907014846802, "learning_rate": 6.4029350126782986e-06, "loss": 0.3678, "step": 23042 }, { "epoch": 2.342720618137454, "grad_norm": 0.2908819615840912, "learning_rate": 6.402594379432318e-06, "loss": 0.3547, "step": 23043 }, { "epoch": 2.342822285481903, "grad_norm": 0.25730061531066895, "learning_rate": 6.402253739120399e-06, "loss": 0.3852, "step": 23044 }, { "epoch": 2.3429239528263524, "grad_norm": 0.2669442296028137, "learning_rate": 6.4019130917442605e-06, "loss": 0.3336, "step": 23045 }, { "epoch": 2.3430256201708013, "grad_norm": 0.28283560276031494, "learning_rate": 6.401572437305618e-06, "loss": 0.3351, "step": 23046 }, { "epoch": 2.3431272875152502, "grad_norm": 0.3078291118144989, "learning_rate": 6.4012317758061835e-06, "loss": 0.3659, "step": 23047 }, { "epoch": 2.343228954859699, "grad_norm": 0.26075422763824463, "learning_rate": 6.400891107247678e-06, "loss": 0.3298, "step": 23048 }, { "epoch": 2.343330622204148, "grad_norm": 0.27019035816192627, "learning_rate": 6.400550431631817e-06, "loss": 0.3658, "step": 23049 }, { "epoch": 2.343432289548597, "grad_norm": 0.2660435438156128, "learning_rate": 6.400209748960315e-06, "loss": 0.3346, "step": 23050 }, { "epoch": 2.343533956893046, "grad_norm": 0.30291837453842163, "learning_rate": 6.399869059234889e-06, "loss": 0.3379, "step": 23051 }, { "epoch": 2.343635624237495, "grad_norm": 0.2894449234008789, "learning_rate": 6.399528362457254e-06, "loss": 0.3192, "step": 23052 }, { "epoch": 2.343737291581944, "grad_norm": 0.26177793741226196, "learning_rate": 6.399187658629129e-06, "loss": 0.309, "step": 23053 }, { "epoch": 2.343838958926393, "grad_norm": 0.26900631189346313, "learning_rate": 6.3988469477522286e-06, "loss": 0.3089, "step": 23054 }, { "epoch": 2.3439406262708418, "grad_norm": 0.2671026885509491, "learning_rate": 6.39850622982827e-06, "loss": 0.3312, "step": 23055 }, { "epoch": 2.3440422936152907, "grad_norm": 0.26186445355415344, "learning_rate": 6.3981655048589696e-06, "loss": 0.3295, "step": 23056 }, { "epoch": 2.3441439609597396, "grad_norm": 0.2788688838481903, "learning_rate": 6.397824772846043e-06, "loss": 0.3124, "step": 23057 }, { "epoch": 2.3442456283041886, "grad_norm": 0.27747562527656555, "learning_rate": 6.397484033791208e-06, "loss": 0.3248, "step": 23058 }, { "epoch": 2.3443472956486375, "grad_norm": 0.27569347620010376, "learning_rate": 6.39714328769618e-06, "loss": 0.3346, "step": 23059 }, { "epoch": 2.3444489629930865, "grad_norm": 0.2594897747039795, "learning_rate": 6.396802534562676e-06, "loss": 0.3455, "step": 23060 }, { "epoch": 2.3445506303375354, "grad_norm": 0.2535429000854492, "learning_rate": 6.396461774392414e-06, "loss": 0.3596, "step": 23061 }, { "epoch": 2.3446522976819844, "grad_norm": 0.2598963975906372, "learning_rate": 6.396121007187108e-06, "loss": 0.3214, "step": 23062 }, { "epoch": 2.3447539650264337, "grad_norm": 0.2570129334926605, "learning_rate": 6.395780232948477e-06, "loss": 0.3274, "step": 23063 }, { "epoch": 2.3448556323708827, "grad_norm": 0.2692984342575073, "learning_rate": 6.395439451678236e-06, "loss": 0.351, "step": 23064 }, { "epoch": 2.3449572997153316, "grad_norm": 0.2556864619255066, "learning_rate": 6.3950986633781035e-06, "loss": 0.2976, "step": 23065 }, { "epoch": 2.3450589670597806, "grad_norm": 0.2851910889148712, "learning_rate": 6.394757868049796e-06, "loss": 0.3495, "step": 23066 }, { "epoch": 2.3451606344042295, "grad_norm": 0.2821791470050812, "learning_rate": 6.394417065695029e-06, "loss": 0.3466, "step": 23067 }, { "epoch": 2.3452623017486784, "grad_norm": 0.272131085395813, "learning_rate": 6.39407625631552e-06, "loss": 0.3204, "step": 23068 }, { "epoch": 2.3453639690931274, "grad_norm": 0.2708814740180969, "learning_rate": 6.393735439912986e-06, "loss": 0.3572, "step": 23069 }, { "epoch": 2.3454656364375763, "grad_norm": 0.26282599568367004, "learning_rate": 6.393394616489143e-06, "loss": 0.3368, "step": 23070 }, { "epoch": 2.3455673037820253, "grad_norm": 0.29431024193763733, "learning_rate": 6.393053786045712e-06, "loss": 0.3718, "step": 23071 }, { "epoch": 2.345668971126474, "grad_norm": 0.2735050320625305, "learning_rate": 6.392712948584405e-06, "loss": 0.3353, "step": 23072 }, { "epoch": 2.345770638470923, "grad_norm": 0.2699216604232788, "learning_rate": 6.392372104106942e-06, "loss": 0.3447, "step": 23073 }, { "epoch": 2.345872305815372, "grad_norm": 0.2939639687538147, "learning_rate": 6.3920312526150385e-06, "loss": 0.3615, "step": 23074 }, { "epoch": 2.345973973159821, "grad_norm": 0.291502982378006, "learning_rate": 6.391690394110412e-06, "loss": 0.3555, "step": 23075 }, { "epoch": 2.34607564050427, "grad_norm": 0.2664434313774109, "learning_rate": 6.391349528594782e-06, "loss": 0.3213, "step": 23076 }, { "epoch": 2.346177307848719, "grad_norm": 0.27584308385849, "learning_rate": 6.391008656069861e-06, "loss": 0.3531, "step": 23077 }, { "epoch": 2.346278975193168, "grad_norm": 0.2946646511554718, "learning_rate": 6.390667776537372e-06, "loss": 0.3265, "step": 23078 }, { "epoch": 2.346380642537617, "grad_norm": 0.2820572257041931, "learning_rate": 6.3903268899990255e-06, "loss": 0.3375, "step": 23079 }, { "epoch": 2.3464823098820657, "grad_norm": 0.2740767300128937, "learning_rate": 6.389985996456545e-06, "loss": 0.3112, "step": 23080 }, { "epoch": 2.3465839772265147, "grad_norm": 0.2747558057308197, "learning_rate": 6.389645095911645e-06, "loss": 0.3205, "step": 23081 }, { "epoch": 2.3466856445709636, "grad_norm": 0.2753906548023224, "learning_rate": 6.389304188366043e-06, "loss": 0.3476, "step": 23082 }, { "epoch": 2.3467873119154126, "grad_norm": 0.2828716039657593, "learning_rate": 6.388963273821458e-06, "loss": 0.3123, "step": 23083 }, { "epoch": 2.3468889792598615, "grad_norm": 0.26164400577545166, "learning_rate": 6.3886223522796055e-06, "loss": 0.3207, "step": 23084 }, { "epoch": 2.3469906466043104, "grad_norm": 0.2808072566986084, "learning_rate": 6.388281423742201e-06, "loss": 0.3391, "step": 23085 }, { "epoch": 2.34709231394876, "grad_norm": 0.256542831659317, "learning_rate": 6.387940488210968e-06, "loss": 0.3169, "step": 23086 }, { "epoch": 2.3471939812932088, "grad_norm": 0.27242153882980347, "learning_rate": 6.387599545687618e-06, "loss": 0.3289, "step": 23087 }, { "epoch": 2.3472956486376577, "grad_norm": 0.28063052892684937, "learning_rate": 6.387258596173873e-06, "loss": 0.3719, "step": 23088 }, { "epoch": 2.3473973159821067, "grad_norm": 0.2658783495426178, "learning_rate": 6.386917639671448e-06, "loss": 0.3065, "step": 23089 }, { "epoch": 2.3474989833265556, "grad_norm": 0.2820722460746765, "learning_rate": 6.386576676182062e-06, "loss": 0.3559, "step": 23090 }, { "epoch": 2.3476006506710045, "grad_norm": 0.3018556833267212, "learning_rate": 6.386235705707432e-06, "loss": 0.3418, "step": 23091 }, { "epoch": 2.3477023180154535, "grad_norm": 0.3147248923778534, "learning_rate": 6.3858947282492755e-06, "loss": 0.3271, "step": 23092 }, { "epoch": 2.3478039853599024, "grad_norm": 0.2825002670288086, "learning_rate": 6.385553743809312e-06, "loss": 0.3261, "step": 23093 }, { "epoch": 2.3479056527043514, "grad_norm": 0.2801212966442108, "learning_rate": 6.385212752389257e-06, "loss": 0.3345, "step": 23094 }, { "epoch": 2.3480073200488003, "grad_norm": 0.2882397770881653, "learning_rate": 6.384871753990829e-06, "loss": 0.3345, "step": 23095 }, { "epoch": 2.3481089873932492, "grad_norm": 0.25670063495635986, "learning_rate": 6.384530748615747e-06, "loss": 0.3428, "step": 23096 }, { "epoch": 2.348210654737698, "grad_norm": 0.269807904958725, "learning_rate": 6.384189736265727e-06, "loss": 0.3418, "step": 23097 }, { "epoch": 2.348312322082147, "grad_norm": 0.28473612666130066, "learning_rate": 6.383848716942489e-06, "loss": 0.3099, "step": 23098 }, { "epoch": 2.348413989426596, "grad_norm": 0.2705603241920471, "learning_rate": 6.383507690647751e-06, "loss": 0.333, "step": 23099 }, { "epoch": 2.348515656771045, "grad_norm": 0.2633841037750244, "learning_rate": 6.383166657383227e-06, "loss": 0.3388, "step": 23100 }, { "epoch": 2.348617324115494, "grad_norm": 0.2649267613887787, "learning_rate": 6.382825617150641e-06, "loss": 0.3493, "step": 23101 }, { "epoch": 2.348718991459943, "grad_norm": 0.2575794458389282, "learning_rate": 6.382484569951708e-06, "loss": 0.3686, "step": 23102 }, { "epoch": 2.348820658804392, "grad_norm": 0.27778202295303345, "learning_rate": 6.382143515788144e-06, "loss": 0.3069, "step": 23103 }, { "epoch": 2.348922326148841, "grad_norm": 0.2779996693134308, "learning_rate": 6.381802454661672e-06, "loss": 0.3372, "step": 23104 }, { "epoch": 2.34902399349329, "grad_norm": 0.2766376733779907, "learning_rate": 6.381461386574005e-06, "loss": 0.3222, "step": 23105 }, { "epoch": 2.349125660837739, "grad_norm": 0.25365957617759705, "learning_rate": 6.381120311526865e-06, "loss": 0.3227, "step": 23106 }, { "epoch": 2.349227328182188, "grad_norm": 0.25106874108314514, "learning_rate": 6.38077922952197e-06, "loss": 0.3476, "step": 23107 }, { "epoch": 2.349328995526637, "grad_norm": 0.26947686076164246, "learning_rate": 6.380438140561036e-06, "loss": 0.348, "step": 23108 }, { "epoch": 2.349430662871086, "grad_norm": 0.2679329216480255, "learning_rate": 6.380097044645785e-06, "loss": 0.3287, "step": 23109 }, { "epoch": 2.349532330215535, "grad_norm": 0.26806119084358215, "learning_rate": 6.37975594177793e-06, "loss": 0.2829, "step": 23110 }, { "epoch": 2.349633997559984, "grad_norm": 0.27615880966186523, "learning_rate": 6.379414831959194e-06, "loss": 0.346, "step": 23111 }, { "epoch": 2.3497356649044328, "grad_norm": 0.2612389028072357, "learning_rate": 6.379073715191294e-06, "loss": 0.3218, "step": 23112 }, { "epoch": 2.3498373322488817, "grad_norm": 0.26807841658592224, "learning_rate": 6.3787325914759476e-06, "loss": 0.3584, "step": 23113 }, { "epoch": 2.3499389995933306, "grad_norm": 0.24962827563285828, "learning_rate": 6.378391460814876e-06, "loss": 0.3214, "step": 23114 }, { "epoch": 2.3500406669377796, "grad_norm": 0.2738848924636841, "learning_rate": 6.3780503232097926e-06, "loss": 0.3331, "step": 23115 }, { "epoch": 2.3501423342822285, "grad_norm": 0.25804540514945984, "learning_rate": 6.377709178662421e-06, "loss": 0.3376, "step": 23116 }, { "epoch": 2.3502440016266775, "grad_norm": 0.2799255847930908, "learning_rate": 6.377368027174479e-06, "loss": 0.3074, "step": 23117 }, { "epoch": 2.3503456689711264, "grad_norm": 0.2544490694999695, "learning_rate": 6.3770268687476825e-06, "loss": 0.3277, "step": 23118 }, { "epoch": 2.3504473363155753, "grad_norm": 0.2639165222644806, "learning_rate": 6.376685703383752e-06, "loss": 0.3756, "step": 23119 }, { "epoch": 2.3505490036600243, "grad_norm": 0.259603351354599, "learning_rate": 6.376344531084405e-06, "loss": 0.3299, "step": 23120 }, { "epoch": 2.3506506710044732, "grad_norm": 0.255647212266922, "learning_rate": 6.376003351851363e-06, "loss": 0.3165, "step": 23121 }, { "epoch": 2.350752338348922, "grad_norm": 0.2602824866771698, "learning_rate": 6.375662165686344e-06, "loss": 0.2971, "step": 23122 }, { "epoch": 2.350854005693371, "grad_norm": 0.2625362277030945, "learning_rate": 6.375320972591064e-06, "loss": 0.3369, "step": 23123 }, { "epoch": 2.35095567303782, "grad_norm": 0.2667181193828583, "learning_rate": 6.374979772567244e-06, "loss": 0.3156, "step": 23124 }, { "epoch": 2.351057340382269, "grad_norm": 0.3129046559333801, "learning_rate": 6.374638565616604e-06, "loss": 0.3485, "step": 23125 }, { "epoch": 2.351159007726718, "grad_norm": 0.2742522358894348, "learning_rate": 6.37429735174086e-06, "loss": 0.3535, "step": 23126 }, { "epoch": 2.3512606750711673, "grad_norm": 0.28845128417015076, "learning_rate": 6.373956130941734e-06, "loss": 0.3297, "step": 23127 }, { "epoch": 2.3513623424156163, "grad_norm": 0.2915181517601013, "learning_rate": 6.373614903220942e-06, "loss": 0.3711, "step": 23128 }, { "epoch": 2.351464009760065, "grad_norm": 0.32375866174697876, "learning_rate": 6.373273668580207e-06, "loss": 0.4047, "step": 23129 }, { "epoch": 2.351565677104514, "grad_norm": 0.25378820300102234, "learning_rate": 6.372932427021243e-06, "loss": 0.3034, "step": 23130 }, { "epoch": 2.351667344448963, "grad_norm": 0.2779078185558319, "learning_rate": 6.372591178545772e-06, "loss": 0.3347, "step": 23131 }, { "epoch": 2.351769011793412, "grad_norm": 0.2999705374240875, "learning_rate": 6.372249923155513e-06, "loss": 0.3301, "step": 23132 }, { "epoch": 2.351870679137861, "grad_norm": 0.2602832019329071, "learning_rate": 6.371908660852186e-06, "loss": 0.3017, "step": 23133 }, { "epoch": 2.35197234648231, "grad_norm": 0.28567376732826233, "learning_rate": 6.37156739163751e-06, "loss": 0.3099, "step": 23134 }, { "epoch": 2.352074013826759, "grad_norm": 0.2785867750644684, "learning_rate": 6.3712261155132e-06, "loss": 0.3458, "step": 23135 }, { "epoch": 2.352175681171208, "grad_norm": 0.27128323912620544, "learning_rate": 6.370884832480981e-06, "loss": 0.3453, "step": 23136 }, { "epoch": 2.3522773485156567, "grad_norm": 0.27585071325302124, "learning_rate": 6.3705435425425695e-06, "loss": 0.3259, "step": 23137 }, { "epoch": 2.3523790158601057, "grad_norm": 0.28287726640701294, "learning_rate": 6.370202245699686e-06, "loss": 0.3288, "step": 23138 }, { "epoch": 2.3524806832045546, "grad_norm": 0.27749258279800415, "learning_rate": 6.369860941954049e-06, "loss": 0.3126, "step": 23139 }, { "epoch": 2.3525823505490036, "grad_norm": 0.29876506328582764, "learning_rate": 6.369519631307377e-06, "loss": 0.3564, "step": 23140 }, { "epoch": 2.3526840178934525, "grad_norm": 0.29639580845832825, "learning_rate": 6.3691783137613915e-06, "loss": 0.3152, "step": 23141 }, { "epoch": 2.3527856852379014, "grad_norm": 0.26161453127861023, "learning_rate": 6.368836989317811e-06, "loss": 0.3311, "step": 23142 }, { "epoch": 2.3528873525823504, "grad_norm": 0.29211822152137756, "learning_rate": 6.368495657978355e-06, "loss": 0.3479, "step": 23143 }, { "epoch": 2.3529890199267993, "grad_norm": 0.27478525042533875, "learning_rate": 6.368154319744744e-06, "loss": 0.3454, "step": 23144 }, { "epoch": 2.3530906872712487, "grad_norm": 0.27931976318359375, "learning_rate": 6.367812974618694e-06, "loss": 0.3137, "step": 23145 }, { "epoch": 2.3531923546156976, "grad_norm": 0.2793050706386566, "learning_rate": 6.367471622601928e-06, "loss": 0.3358, "step": 23146 }, { "epoch": 2.3532940219601466, "grad_norm": 0.27106809616088867, "learning_rate": 6.367130263696168e-06, "loss": 0.3771, "step": 23147 }, { "epoch": 2.3533956893045955, "grad_norm": 0.28287675976753235, "learning_rate": 6.3667888979031265e-06, "loss": 0.3658, "step": 23148 }, { "epoch": 2.3534973566490445, "grad_norm": 0.28885260224342346, "learning_rate": 6.366447525224531e-06, "loss": 0.3387, "step": 23149 }, { "epoch": 2.3535990239934934, "grad_norm": 0.27591457962989807, "learning_rate": 6.366106145662096e-06, "loss": 0.3214, "step": 23150 }, { "epoch": 2.3537006913379424, "grad_norm": 0.28303244709968567, "learning_rate": 6.365764759217541e-06, "loss": 0.3219, "step": 23151 }, { "epoch": 2.3538023586823913, "grad_norm": 0.271623820066452, "learning_rate": 6.36542336589259e-06, "loss": 0.3291, "step": 23152 }, { "epoch": 2.3539040260268402, "grad_norm": 0.2631971836090088, "learning_rate": 6.36508196568896e-06, "loss": 0.3116, "step": 23153 }, { "epoch": 2.354005693371289, "grad_norm": 0.2682403326034546, "learning_rate": 6.364740558608372e-06, "loss": 0.3213, "step": 23154 }, { "epoch": 2.354107360715738, "grad_norm": 0.2854865789413452, "learning_rate": 6.364399144652545e-06, "loss": 0.3265, "step": 23155 }, { "epoch": 2.354209028060187, "grad_norm": 0.27397096157073975, "learning_rate": 6.364057723823198e-06, "loss": 0.3559, "step": 23156 }, { "epoch": 2.354310695404636, "grad_norm": 0.2601408064365387, "learning_rate": 6.3637162961220545e-06, "loss": 0.3406, "step": 23157 }, { "epoch": 2.354412362749085, "grad_norm": 0.2691115140914917, "learning_rate": 6.363374861550831e-06, "loss": 0.3357, "step": 23158 }, { "epoch": 2.354514030093534, "grad_norm": 0.2508537471294403, "learning_rate": 6.36303342011125e-06, "loss": 0.3267, "step": 23159 }, { "epoch": 2.354615697437983, "grad_norm": 0.2687641978263855, "learning_rate": 6.362691971805031e-06, "loss": 0.3234, "step": 23160 }, { "epoch": 2.3547173647824318, "grad_norm": 0.27612292766571045, "learning_rate": 6.362350516633891e-06, "loss": 0.3171, "step": 23161 }, { "epoch": 2.3548190321268807, "grad_norm": 0.2787790596485138, "learning_rate": 6.362009054599554e-06, "loss": 0.328, "step": 23162 }, { "epoch": 2.3549206994713296, "grad_norm": 0.25369006395339966, "learning_rate": 6.361667585703739e-06, "loss": 0.3296, "step": 23163 }, { "epoch": 2.3550223668157786, "grad_norm": 0.28788822889328003, "learning_rate": 6.361326109948167e-06, "loss": 0.3274, "step": 23164 }, { "epoch": 2.3551240341602275, "grad_norm": 0.253999263048172, "learning_rate": 6.360984627334558e-06, "loss": 0.309, "step": 23165 }, { "epoch": 2.3552257015046765, "grad_norm": 0.27880731225013733, "learning_rate": 6.360643137864629e-06, "loss": 0.3489, "step": 23166 }, { "epoch": 2.3553273688491254, "grad_norm": 0.26618629693984985, "learning_rate": 6.3603016415401066e-06, "loss": 0.34, "step": 23167 }, { "epoch": 2.355429036193575, "grad_norm": 0.2832384407520294, "learning_rate": 6.359960138362706e-06, "loss": 0.3354, "step": 23168 }, { "epoch": 2.3555307035380237, "grad_norm": 0.28674179315567017, "learning_rate": 6.35961862833415e-06, "loss": 0.3362, "step": 23169 }, { "epoch": 2.3556323708824727, "grad_norm": 0.24272520840168, "learning_rate": 6.359277111456159e-06, "loss": 0.3211, "step": 23170 }, { "epoch": 2.3557340382269216, "grad_norm": 0.2667140066623688, "learning_rate": 6.358935587730451e-06, "loss": 0.3434, "step": 23171 }, { "epoch": 2.3558357055713706, "grad_norm": 0.29805874824523926, "learning_rate": 6.358594057158751e-06, "loss": 0.3431, "step": 23172 }, { "epoch": 2.3559373729158195, "grad_norm": 0.28861647844314575, "learning_rate": 6.358252519742775e-06, "loss": 0.3052, "step": 23173 }, { "epoch": 2.3560390402602684, "grad_norm": 0.2634260952472687, "learning_rate": 6.357910975484247e-06, "loss": 0.3196, "step": 23174 }, { "epoch": 2.3561407076047174, "grad_norm": 0.2544274628162384, "learning_rate": 6.357569424384886e-06, "loss": 0.3086, "step": 23175 }, { "epoch": 2.3562423749491663, "grad_norm": 0.2551913559436798, "learning_rate": 6.35722786644641e-06, "loss": 0.3445, "step": 23176 }, { "epoch": 2.3563440422936153, "grad_norm": 0.2635291814804077, "learning_rate": 6.356886301670546e-06, "loss": 0.3231, "step": 23177 }, { "epoch": 2.356445709638064, "grad_norm": 0.3123951554298401, "learning_rate": 6.356544730059011e-06, "loss": 0.3479, "step": 23178 }, { "epoch": 2.356547376982513, "grad_norm": 0.2545475661754608, "learning_rate": 6.356203151613525e-06, "loss": 0.324, "step": 23179 }, { "epoch": 2.356649044326962, "grad_norm": 0.2697564661502838, "learning_rate": 6.3558615663358106e-06, "loss": 0.3296, "step": 23180 }, { "epoch": 2.356750711671411, "grad_norm": 0.27058646082878113, "learning_rate": 6.355519974227585e-06, "loss": 0.3206, "step": 23181 }, { "epoch": 2.35685237901586, "grad_norm": 0.283349871635437, "learning_rate": 6.355178375290575e-06, "loss": 0.3442, "step": 23182 }, { "epoch": 2.356954046360309, "grad_norm": 0.26606056094169617, "learning_rate": 6.3548367695264975e-06, "loss": 0.3513, "step": 23183 }, { "epoch": 2.357055713704758, "grad_norm": 0.2709641456604004, "learning_rate": 6.354495156937074e-06, "loss": 0.3368, "step": 23184 }, { "epoch": 2.357157381049207, "grad_norm": 0.27530768513679504, "learning_rate": 6.354153537524027e-06, "loss": 0.3386, "step": 23185 }, { "epoch": 2.357259048393656, "grad_norm": 0.28035154938697815, "learning_rate": 6.353811911289074e-06, "loss": 0.3266, "step": 23186 }, { "epoch": 2.357360715738105, "grad_norm": 0.28277185559272766, "learning_rate": 6.35347027823394e-06, "loss": 0.369, "step": 23187 }, { "epoch": 2.357462383082554, "grad_norm": 0.26828181743621826, "learning_rate": 6.353128638360344e-06, "loss": 0.3053, "step": 23188 }, { "epoch": 2.357564050427003, "grad_norm": 0.2363726645708084, "learning_rate": 6.352786991670008e-06, "loss": 0.2963, "step": 23189 }, { "epoch": 2.357665717771452, "grad_norm": 0.28543001413345337, "learning_rate": 6.3524453381646525e-06, "loss": 0.3115, "step": 23190 }, { "epoch": 2.357767385115901, "grad_norm": 0.275644451379776, "learning_rate": 6.352103677845996e-06, "loss": 0.3548, "step": 23191 }, { "epoch": 2.35786905246035, "grad_norm": 0.2808692157268524, "learning_rate": 6.3517620107157665e-06, "loss": 0.3331, "step": 23192 }, { "epoch": 2.3579707198047988, "grad_norm": 0.26272332668304443, "learning_rate": 6.35142033677568e-06, "loss": 0.3529, "step": 23193 }, { "epoch": 2.3580723871492477, "grad_norm": 0.26941004395484924, "learning_rate": 6.351078656027457e-06, "loss": 0.3406, "step": 23194 }, { "epoch": 2.3581740544936967, "grad_norm": 0.26677680015563965, "learning_rate": 6.350736968472823e-06, "loss": 0.3102, "step": 23195 }, { "epoch": 2.3582757218381456, "grad_norm": 0.2418564110994339, "learning_rate": 6.3503952741134954e-06, "loss": 0.3239, "step": 23196 }, { "epoch": 2.3583773891825945, "grad_norm": 0.2918734550476074, "learning_rate": 6.3500535729511995e-06, "loss": 0.3674, "step": 23197 }, { "epoch": 2.3584790565270435, "grad_norm": 0.2860235571861267, "learning_rate": 6.349711864987653e-06, "loss": 0.3363, "step": 23198 }, { "epoch": 2.3585807238714924, "grad_norm": 0.2739127278327942, "learning_rate": 6.34937015022458e-06, "loss": 0.3241, "step": 23199 }, { "epoch": 2.3586823912159414, "grad_norm": 0.3091871440410614, "learning_rate": 6.3490284286637e-06, "loss": 0.3343, "step": 23200 }, { "epoch": 2.3587840585603903, "grad_norm": 0.29585006833076477, "learning_rate": 6.3486867003067355e-06, "loss": 0.3487, "step": 23201 }, { "epoch": 2.3588857259048392, "grad_norm": 0.27475613355636597, "learning_rate": 6.348344965155407e-06, "loss": 0.3583, "step": 23202 }, { "epoch": 2.358987393249288, "grad_norm": 0.28184840083122253, "learning_rate": 6.3480032232114385e-06, "loss": 0.3315, "step": 23203 }, { "epoch": 2.359089060593737, "grad_norm": 0.266767293214798, "learning_rate": 6.34766147447655e-06, "loss": 0.3111, "step": 23204 }, { "epoch": 2.359190727938186, "grad_norm": 0.28558897972106934, "learning_rate": 6.347319718952463e-06, "loss": 0.3547, "step": 23205 }, { "epoch": 2.359292395282635, "grad_norm": 0.2853907346725464, "learning_rate": 6.346977956640899e-06, "loss": 0.3006, "step": 23206 }, { "epoch": 2.359394062627084, "grad_norm": 0.2738696038722992, "learning_rate": 6.346636187543579e-06, "loss": 0.3415, "step": 23207 }, { "epoch": 2.359495729971533, "grad_norm": 0.272418349981308, "learning_rate": 6.346294411662228e-06, "loss": 0.3267, "step": 23208 }, { "epoch": 2.3595973973159823, "grad_norm": 0.28834739327430725, "learning_rate": 6.345952628998564e-06, "loss": 0.3409, "step": 23209 }, { "epoch": 2.3596990646604312, "grad_norm": 0.284824401140213, "learning_rate": 6.345610839554313e-06, "loss": 0.305, "step": 23210 }, { "epoch": 2.35980073200488, "grad_norm": 0.3194515109062195, "learning_rate": 6.345269043331191e-06, "loss": 0.3245, "step": 23211 }, { "epoch": 2.359902399349329, "grad_norm": 0.2922246754169464, "learning_rate": 6.3449272403309245e-06, "loss": 0.303, "step": 23212 }, { "epoch": 2.360004066693778, "grad_norm": 0.29166287183761597, "learning_rate": 6.344585430555235e-06, "loss": 0.3429, "step": 23213 }, { "epoch": 2.360105734038227, "grad_norm": 0.2701665163040161, "learning_rate": 6.344243614005843e-06, "loss": 0.3607, "step": 23214 }, { "epoch": 2.360207401382676, "grad_norm": 0.297536164522171, "learning_rate": 6.343901790684472e-06, "loss": 0.3717, "step": 23215 }, { "epoch": 2.360309068727125, "grad_norm": 0.2921035587787628, "learning_rate": 6.343559960592842e-06, "loss": 0.3436, "step": 23216 }, { "epoch": 2.360410736071574, "grad_norm": 0.29585617780685425, "learning_rate": 6.343218123732676e-06, "loss": 0.3458, "step": 23217 }, { "epoch": 2.3605124034160228, "grad_norm": 0.27420660853385925, "learning_rate": 6.342876280105699e-06, "loss": 0.3265, "step": 23218 }, { "epoch": 2.3606140707604717, "grad_norm": 0.26228004693984985, "learning_rate": 6.342534429713627e-06, "loss": 0.3335, "step": 23219 }, { "epoch": 2.3607157381049206, "grad_norm": 0.2895186245441437, "learning_rate": 6.342192572558188e-06, "loss": 0.3194, "step": 23220 }, { "epoch": 2.3608174054493696, "grad_norm": 0.2590439021587372, "learning_rate": 6.3418507086410995e-06, "loss": 0.3393, "step": 23221 }, { "epoch": 2.3609190727938185, "grad_norm": 0.27997663617134094, "learning_rate": 6.341508837964088e-06, "loss": 0.3295, "step": 23222 }, { "epoch": 2.3610207401382675, "grad_norm": 0.27972105145454407, "learning_rate": 6.341166960528873e-06, "loss": 0.3139, "step": 23223 }, { "epoch": 2.3611224074827164, "grad_norm": 0.28392890095710754, "learning_rate": 6.340825076337177e-06, "loss": 0.318, "step": 23224 }, { "epoch": 2.3612240748271653, "grad_norm": 0.2824843227863312, "learning_rate": 6.3404831853907244e-06, "loss": 0.3296, "step": 23225 }, { "epoch": 2.3613257421716143, "grad_norm": 0.28070205450057983, "learning_rate": 6.340141287691235e-06, "loss": 0.324, "step": 23226 }, { "epoch": 2.3614274095160637, "grad_norm": 0.28226470947265625, "learning_rate": 6.339799383240432e-06, "loss": 0.328, "step": 23227 }, { "epoch": 2.3615290768605126, "grad_norm": 0.2658194601535797, "learning_rate": 6.339457472040038e-06, "loss": 0.3256, "step": 23228 }, { "epoch": 2.3616307442049616, "grad_norm": 0.275722473859787, "learning_rate": 6.339115554091776e-06, "loss": 0.3316, "step": 23229 }, { "epoch": 2.3617324115494105, "grad_norm": 0.2688830494880676, "learning_rate": 6.338773629397369e-06, "loss": 0.3542, "step": 23230 }, { "epoch": 2.3618340788938594, "grad_norm": 0.2831572890281677, "learning_rate": 6.338431697958537e-06, "loss": 0.3561, "step": 23231 }, { "epoch": 2.3619357462383084, "grad_norm": 0.26848146319389343, "learning_rate": 6.338089759777004e-06, "loss": 0.3123, "step": 23232 }, { "epoch": 2.3620374135827573, "grad_norm": 0.2557700574398041, "learning_rate": 6.337747814854495e-06, "loss": 0.312, "step": 23233 }, { "epoch": 2.3621390809272063, "grad_norm": 0.2709304094314575, "learning_rate": 6.337405863192728e-06, "loss": 0.3775, "step": 23234 }, { "epoch": 2.362240748271655, "grad_norm": 0.26833730936050415, "learning_rate": 6.33706390479343e-06, "loss": 0.3374, "step": 23235 }, { "epoch": 2.362342415616104, "grad_norm": 0.2761908769607544, "learning_rate": 6.33672193965832e-06, "loss": 0.3444, "step": 23236 }, { "epoch": 2.362444082960553, "grad_norm": 0.26178306341171265, "learning_rate": 6.336379967789124e-06, "loss": 0.3445, "step": 23237 }, { "epoch": 2.362545750305002, "grad_norm": 0.2649660110473633, "learning_rate": 6.336037989187562e-06, "loss": 0.315, "step": 23238 }, { "epoch": 2.362647417649451, "grad_norm": 0.2597002685070038, "learning_rate": 6.3356960038553595e-06, "loss": 0.2977, "step": 23239 }, { "epoch": 2.3627490849939, "grad_norm": 0.2781220078468323, "learning_rate": 6.335354011794237e-06, "loss": 0.3453, "step": 23240 }, { "epoch": 2.362850752338349, "grad_norm": 0.30449917912483215, "learning_rate": 6.335012013005919e-06, "loss": 0.3073, "step": 23241 }, { "epoch": 2.362952419682798, "grad_norm": 0.29569175839424133, "learning_rate": 6.334670007492126e-06, "loss": 0.349, "step": 23242 }, { "epoch": 2.3630540870272467, "grad_norm": 0.25587937235832214, "learning_rate": 6.3343279952545856e-06, "loss": 0.3277, "step": 23243 }, { "epoch": 2.3631557543716957, "grad_norm": 0.24503986537456512, "learning_rate": 6.3339859762950154e-06, "loss": 0.3484, "step": 23244 }, { "epoch": 2.3632574217161446, "grad_norm": 0.26540279388427734, "learning_rate": 6.333643950615143e-06, "loss": 0.3455, "step": 23245 }, { "epoch": 2.3633590890605936, "grad_norm": 0.29582101106643677, "learning_rate": 6.333301918216687e-06, "loss": 0.3107, "step": 23246 }, { "epoch": 2.3634607564050425, "grad_norm": 0.2737251818180084, "learning_rate": 6.332959879101372e-06, "loss": 0.3148, "step": 23247 }, { "epoch": 2.3635624237494914, "grad_norm": 0.2760995328426361, "learning_rate": 6.332617833270925e-06, "loss": 0.3338, "step": 23248 }, { "epoch": 2.3636640910939404, "grad_norm": 0.26887354254722595, "learning_rate": 6.3322757807270644e-06, "loss": 0.3456, "step": 23249 }, { "epoch": 2.3637657584383898, "grad_norm": 0.26954320073127747, "learning_rate": 6.331933721471516e-06, "loss": 0.3208, "step": 23250 }, { "epoch": 2.3638674257828387, "grad_norm": 0.27363675832748413, "learning_rate": 6.331591655506001e-06, "loss": 0.3219, "step": 23251 }, { "epoch": 2.3639690931272876, "grad_norm": 0.27520981431007385, "learning_rate": 6.331249582832243e-06, "loss": 0.3105, "step": 23252 }, { "epoch": 2.3640707604717366, "grad_norm": 0.2799781858921051, "learning_rate": 6.330907503451969e-06, "loss": 0.3462, "step": 23253 }, { "epoch": 2.3641724278161855, "grad_norm": 0.2878982722759247, "learning_rate": 6.330565417366896e-06, "loss": 0.3206, "step": 23254 }, { "epoch": 2.3642740951606345, "grad_norm": 0.2864816188812256, "learning_rate": 6.3302233245787525e-06, "loss": 0.3618, "step": 23255 }, { "epoch": 2.3643757625050834, "grad_norm": 0.28958797454833984, "learning_rate": 6.32988122508926e-06, "loss": 0.3423, "step": 23256 }, { "epoch": 2.3644774298495324, "grad_norm": 0.2741999924182892, "learning_rate": 6.3295391189001385e-06, "loss": 0.3477, "step": 23257 }, { "epoch": 2.3645790971939813, "grad_norm": 0.26525992155075073, "learning_rate": 6.3291970060131195e-06, "loss": 0.3377, "step": 23258 }, { "epoch": 2.3646807645384302, "grad_norm": 0.2701093852519989, "learning_rate": 6.328854886429921e-06, "loss": 0.3442, "step": 23259 }, { "epoch": 2.364782431882879, "grad_norm": 0.2874407470226288, "learning_rate": 6.328512760152265e-06, "loss": 0.3584, "step": 23260 }, { "epoch": 2.364884099227328, "grad_norm": 0.2678370773792267, "learning_rate": 6.32817062718188e-06, "loss": 0.3484, "step": 23261 }, { "epoch": 2.364985766571777, "grad_norm": 0.27139216661453247, "learning_rate": 6.3278284875204834e-06, "loss": 0.356, "step": 23262 }, { "epoch": 2.365087433916226, "grad_norm": 0.26279911398887634, "learning_rate": 6.327486341169806e-06, "loss": 0.3413, "step": 23263 }, { "epoch": 2.365189101260675, "grad_norm": 0.2762725353240967, "learning_rate": 6.327144188131567e-06, "loss": 0.3286, "step": 23264 }, { "epoch": 2.365290768605124, "grad_norm": 0.2810452878475189, "learning_rate": 6.32680202840749e-06, "loss": 0.4138, "step": 23265 }, { "epoch": 2.365392435949573, "grad_norm": 0.26417723298072815, "learning_rate": 6.326459861999301e-06, "loss": 0.317, "step": 23266 }, { "epoch": 2.3654941032940218, "grad_norm": 0.2859349250793457, "learning_rate": 6.326117688908719e-06, "loss": 0.3417, "step": 23267 }, { "epoch": 2.365595770638471, "grad_norm": 0.27242735028266907, "learning_rate": 6.325775509137475e-06, "loss": 0.3333, "step": 23268 }, { "epoch": 2.36569743798292, "grad_norm": 0.30645355582237244, "learning_rate": 6.3254333226872885e-06, "loss": 0.3537, "step": 23269 }, { "epoch": 2.365799105327369, "grad_norm": 0.2596401274204254, "learning_rate": 6.325091129559884e-06, "loss": 0.3525, "step": 23270 }, { "epoch": 2.365900772671818, "grad_norm": 0.2775215804576874, "learning_rate": 6.324748929756985e-06, "loss": 0.3429, "step": 23271 }, { "epoch": 2.366002440016267, "grad_norm": 0.25446629524230957, "learning_rate": 6.3244067232803144e-06, "loss": 0.3427, "step": 23272 }, { "epoch": 2.366104107360716, "grad_norm": 0.2689691185951233, "learning_rate": 6.324064510131598e-06, "loss": 0.3588, "step": 23273 }, { "epoch": 2.366205774705165, "grad_norm": 0.27102696895599365, "learning_rate": 6.3237222903125605e-06, "loss": 0.3297, "step": 23274 }, { "epoch": 2.3663074420496137, "grad_norm": 0.286638081073761, "learning_rate": 6.323380063824923e-06, "loss": 0.3276, "step": 23275 }, { "epoch": 2.3664091093940627, "grad_norm": 0.25562259554862976, "learning_rate": 6.3230378306704135e-06, "loss": 0.3197, "step": 23276 }, { "epoch": 2.3665107767385116, "grad_norm": 0.29025718569755554, "learning_rate": 6.322695590850751e-06, "loss": 0.3416, "step": 23277 }, { "epoch": 2.3666124440829606, "grad_norm": 0.28505757451057434, "learning_rate": 6.322353344367664e-06, "loss": 0.3425, "step": 23278 }, { "epoch": 2.3667141114274095, "grad_norm": 0.27989739179611206, "learning_rate": 6.322011091222875e-06, "loss": 0.359, "step": 23279 }, { "epoch": 2.3668157787718584, "grad_norm": 0.26892685890197754, "learning_rate": 6.321668831418108e-06, "loss": 0.3063, "step": 23280 }, { "epoch": 2.3669174461163074, "grad_norm": 0.2630850374698639, "learning_rate": 6.321326564955088e-06, "loss": 0.3422, "step": 23281 }, { "epoch": 2.3670191134607563, "grad_norm": 0.2729860544204712, "learning_rate": 6.320984291835538e-06, "loss": 0.338, "step": 23282 }, { "epoch": 2.3671207808052053, "grad_norm": 0.253946453332901, "learning_rate": 6.320642012061183e-06, "loss": 0.331, "step": 23283 }, { "epoch": 2.367222448149654, "grad_norm": 0.2594420909881592, "learning_rate": 6.320299725633748e-06, "loss": 0.3534, "step": 23284 }, { "epoch": 2.367324115494103, "grad_norm": 0.25583887100219727, "learning_rate": 6.3199574325549576e-06, "loss": 0.3318, "step": 23285 }, { "epoch": 2.367425782838552, "grad_norm": 0.2733563482761383, "learning_rate": 6.319615132826535e-06, "loss": 0.3114, "step": 23286 }, { "epoch": 2.367527450183001, "grad_norm": 0.281732976436615, "learning_rate": 6.319272826450202e-06, "loss": 0.3187, "step": 23287 }, { "epoch": 2.36762911752745, "grad_norm": 0.28239870071411133, "learning_rate": 6.318930513427688e-06, "loss": 0.365, "step": 23288 }, { "epoch": 2.367730784871899, "grad_norm": 0.2756362855434418, "learning_rate": 6.3185881937607175e-06, "loss": 0.3453, "step": 23289 }, { "epoch": 2.367832452216348, "grad_norm": 0.26539015769958496, "learning_rate": 6.318245867451011e-06, "loss": 0.3096, "step": 23290 }, { "epoch": 2.3679341195607972, "grad_norm": 0.2646908462047577, "learning_rate": 6.317903534500296e-06, "loss": 0.3401, "step": 23291 }, { "epoch": 2.368035786905246, "grad_norm": 0.269646018743515, "learning_rate": 6.317561194910294e-06, "loss": 0.3417, "step": 23292 }, { "epoch": 2.368137454249695, "grad_norm": 0.25765088200569153, "learning_rate": 6.317218848682733e-06, "loss": 0.3362, "step": 23293 }, { "epoch": 2.368239121594144, "grad_norm": 0.2747706174850464, "learning_rate": 6.316876495819337e-06, "loss": 0.3019, "step": 23294 }, { "epoch": 2.368340788938593, "grad_norm": 0.2834382653236389, "learning_rate": 6.31653413632183e-06, "loss": 0.3074, "step": 23295 }, { "epoch": 2.368442456283042, "grad_norm": 0.26061803102493286, "learning_rate": 6.316191770191937e-06, "loss": 0.3329, "step": 23296 }, { "epoch": 2.368544123627491, "grad_norm": 0.2579176425933838, "learning_rate": 6.315849397431383e-06, "loss": 0.3275, "step": 23297 }, { "epoch": 2.36864579097194, "grad_norm": 0.2867670953273773, "learning_rate": 6.315507018041891e-06, "loss": 0.3072, "step": 23298 }, { "epoch": 2.3687474583163888, "grad_norm": 0.272744745016098, "learning_rate": 6.315164632025189e-06, "loss": 0.3395, "step": 23299 }, { "epoch": 2.3688491256608377, "grad_norm": 0.26830002665519714, "learning_rate": 6.314822239382999e-06, "loss": 0.3484, "step": 23300 }, { "epoch": 2.3689507930052867, "grad_norm": 0.2802967131137848, "learning_rate": 6.314479840117047e-06, "loss": 0.3401, "step": 23301 }, { "epoch": 2.3690524603497356, "grad_norm": 0.2841759920120239, "learning_rate": 6.314137434229058e-06, "loss": 0.3199, "step": 23302 }, { "epoch": 2.3691541276941845, "grad_norm": 0.29414984583854675, "learning_rate": 6.3137950217207565e-06, "loss": 0.3125, "step": 23303 }, { "epoch": 2.3692557950386335, "grad_norm": 0.27744707465171814, "learning_rate": 6.313452602593869e-06, "loss": 0.3175, "step": 23304 }, { "epoch": 2.3693574623830824, "grad_norm": 0.28542718291282654, "learning_rate": 6.313110176850119e-06, "loss": 0.3326, "step": 23305 }, { "epoch": 2.3694591297275314, "grad_norm": 0.27166154980659485, "learning_rate": 6.312767744491232e-06, "loss": 0.3585, "step": 23306 }, { "epoch": 2.3695607970719803, "grad_norm": 0.2754647731781006, "learning_rate": 6.312425305518932e-06, "loss": 0.3452, "step": 23307 }, { "epoch": 2.3696624644164297, "grad_norm": 0.2699778079986572, "learning_rate": 6.312082859934947e-06, "loss": 0.3196, "step": 23308 }, { "epoch": 2.3697641317608786, "grad_norm": 0.6439587473869324, "learning_rate": 6.3117404077409995e-06, "loss": 0.321, "step": 23309 }, { "epoch": 2.3698657991053276, "grad_norm": 0.2817620635032654, "learning_rate": 6.311397948938816e-06, "loss": 0.3255, "step": 23310 }, { "epoch": 2.3699674664497765, "grad_norm": 0.277874231338501, "learning_rate": 6.311055483530121e-06, "loss": 0.3468, "step": 23311 }, { "epoch": 2.3700691337942255, "grad_norm": 0.2618529498577118, "learning_rate": 6.3107130115166396e-06, "loss": 0.346, "step": 23312 }, { "epoch": 2.3701708011386744, "grad_norm": 0.2755166292190552, "learning_rate": 6.310370532900096e-06, "loss": 0.3102, "step": 23313 }, { "epoch": 2.3702724684831233, "grad_norm": 0.2718098759651184, "learning_rate": 6.310028047682219e-06, "loss": 0.3178, "step": 23314 }, { "epoch": 2.3703741358275723, "grad_norm": 0.28778600692749023, "learning_rate": 6.309685555864733e-06, "loss": 0.3437, "step": 23315 }, { "epoch": 2.3704758031720212, "grad_norm": 0.28460201621055603, "learning_rate": 6.309343057449361e-06, "loss": 0.3198, "step": 23316 }, { "epoch": 2.37057747051647, "grad_norm": 0.27542412281036377, "learning_rate": 6.309000552437829e-06, "loss": 0.3431, "step": 23317 }, { "epoch": 2.370679137860919, "grad_norm": 0.28703656792640686, "learning_rate": 6.308658040831864e-06, "loss": 0.329, "step": 23318 }, { "epoch": 2.370780805205368, "grad_norm": 0.2777119576931, "learning_rate": 6.308315522633192e-06, "loss": 0.3236, "step": 23319 }, { "epoch": 2.370882472549817, "grad_norm": 0.24992787837982178, "learning_rate": 6.307972997843537e-06, "loss": 0.3327, "step": 23320 }, { "epoch": 2.370984139894266, "grad_norm": 0.27220675349235535, "learning_rate": 6.3076304664646236e-06, "loss": 0.3341, "step": 23321 }, { "epoch": 2.371085807238715, "grad_norm": 0.27134180068969727, "learning_rate": 6.307287928498179e-06, "loss": 0.3581, "step": 23322 }, { "epoch": 2.371187474583164, "grad_norm": 0.263150691986084, "learning_rate": 6.306945383945928e-06, "loss": 0.3267, "step": 23323 }, { "epoch": 2.3712891419276128, "grad_norm": 0.272772878408432, "learning_rate": 6.306602832809598e-06, "loss": 0.3399, "step": 23324 }, { "epoch": 2.3713908092720617, "grad_norm": 0.28245246410369873, "learning_rate": 6.306260275090913e-06, "loss": 0.3384, "step": 23325 }, { "epoch": 2.3714924766165106, "grad_norm": 0.2686750590801239, "learning_rate": 6.305917710791601e-06, "loss": 0.3343, "step": 23326 }, { "epoch": 2.3715941439609596, "grad_norm": 0.2616766095161438, "learning_rate": 6.305575139913383e-06, "loss": 0.3715, "step": 23327 }, { "epoch": 2.3716958113054085, "grad_norm": 0.2676007151603699, "learning_rate": 6.305232562457988e-06, "loss": 0.3507, "step": 23328 }, { "epoch": 2.3717974786498575, "grad_norm": 0.2828739881515503, "learning_rate": 6.304889978427142e-06, "loss": 0.3268, "step": 23329 }, { "epoch": 2.3718991459943064, "grad_norm": 0.26321861147880554, "learning_rate": 6.304547387822572e-06, "loss": 0.319, "step": 23330 }, { "epoch": 2.3720008133387553, "grad_norm": 0.27830690145492554, "learning_rate": 6.304204790646e-06, "loss": 0.3056, "step": 23331 }, { "epoch": 2.3721024806832047, "grad_norm": 0.27779626846313477, "learning_rate": 6.303862186899155e-06, "loss": 0.3246, "step": 23332 }, { "epoch": 2.3722041480276537, "grad_norm": 0.2827259302139282, "learning_rate": 6.303519576583761e-06, "loss": 0.3348, "step": 23333 }, { "epoch": 2.3723058153721026, "grad_norm": 0.29226502776145935, "learning_rate": 6.303176959701547e-06, "loss": 0.3007, "step": 23334 }, { "epoch": 2.3724074827165516, "grad_norm": 0.2733488082885742, "learning_rate": 6.302834336254238e-06, "loss": 0.3368, "step": 23335 }, { "epoch": 2.3725091500610005, "grad_norm": 0.27840501070022583, "learning_rate": 6.302491706243556e-06, "loss": 0.3163, "step": 23336 }, { "epoch": 2.3726108174054494, "grad_norm": 0.2533309757709503, "learning_rate": 6.3021490696712315e-06, "loss": 0.3291, "step": 23337 }, { "epoch": 2.3727124847498984, "grad_norm": 0.26738718152046204, "learning_rate": 6.30180642653899e-06, "loss": 0.3789, "step": 23338 }, { "epoch": 2.3728141520943473, "grad_norm": 0.2871846854686737, "learning_rate": 6.301463776848557e-06, "loss": 0.3431, "step": 23339 }, { "epoch": 2.3729158194387963, "grad_norm": 0.28956207633018494, "learning_rate": 6.3011211206016594e-06, "loss": 0.3337, "step": 23340 }, { "epoch": 2.373017486783245, "grad_norm": 0.28265002369880676, "learning_rate": 6.300778457800022e-06, "loss": 0.3293, "step": 23341 }, { "epoch": 2.373119154127694, "grad_norm": 0.25897908210754395, "learning_rate": 6.300435788445371e-06, "loss": 0.3185, "step": 23342 }, { "epoch": 2.373220821472143, "grad_norm": 0.2766430675983429, "learning_rate": 6.300093112539435e-06, "loss": 0.3258, "step": 23343 }, { "epoch": 2.373322488816592, "grad_norm": 0.2811218798160553, "learning_rate": 6.299750430083939e-06, "loss": 0.3167, "step": 23344 }, { "epoch": 2.373424156161041, "grad_norm": 0.2718510627746582, "learning_rate": 6.299407741080609e-06, "loss": 0.3296, "step": 23345 }, { "epoch": 2.37352582350549, "grad_norm": 0.2807869613170624, "learning_rate": 6.299065045531171e-06, "loss": 0.352, "step": 23346 }, { "epoch": 2.373627490849939, "grad_norm": 0.28392672538757324, "learning_rate": 6.2987223434373525e-06, "loss": 0.3425, "step": 23347 }, { "epoch": 2.373729158194388, "grad_norm": 0.2680476903915405, "learning_rate": 6.2983796348008774e-06, "loss": 0.3202, "step": 23348 }, { "epoch": 2.373830825538837, "grad_norm": 0.2781970798969269, "learning_rate": 6.298036919623477e-06, "loss": 0.3312, "step": 23349 }, { "epoch": 2.373932492883286, "grad_norm": 0.2819100022315979, "learning_rate": 6.297694197906874e-06, "loss": 0.328, "step": 23350 }, { "epoch": 2.374034160227735, "grad_norm": 0.28852152824401855, "learning_rate": 6.297351469652796e-06, "loss": 0.3325, "step": 23351 }, { "epoch": 2.374135827572184, "grad_norm": 0.26288941502571106, "learning_rate": 6.2970087348629704e-06, "loss": 0.3633, "step": 23352 }, { "epoch": 2.374237494916633, "grad_norm": 0.28444772958755493, "learning_rate": 6.296665993539122e-06, "loss": 0.3106, "step": 23353 }, { "epoch": 2.374339162261082, "grad_norm": 0.30706334114074707, "learning_rate": 6.2963232456829784e-06, "loss": 0.3544, "step": 23354 }, { "epoch": 2.374440829605531, "grad_norm": 0.2668536603450775, "learning_rate": 6.2959804912962676e-06, "loss": 0.3304, "step": 23355 }, { "epoch": 2.3745424969499798, "grad_norm": 0.26473772525787354, "learning_rate": 6.295637730380712e-06, "loss": 0.3145, "step": 23356 }, { "epoch": 2.3746441642944287, "grad_norm": 0.28805646300315857, "learning_rate": 6.295294962938046e-06, "loss": 0.3664, "step": 23357 }, { "epoch": 2.3747458316388776, "grad_norm": 0.26059409976005554, "learning_rate": 6.294952188969987e-06, "loss": 0.3364, "step": 23358 }, { "epoch": 2.3748474989833266, "grad_norm": 0.2740509510040283, "learning_rate": 6.29460940847827e-06, "loss": 0.3347, "step": 23359 }, { "epoch": 2.3749491663277755, "grad_norm": 0.26601046323776245, "learning_rate": 6.294266621464618e-06, "loss": 0.3014, "step": 23360 }, { "epoch": 2.3750508336722245, "grad_norm": 0.26290982961654663, "learning_rate": 6.293923827930757e-06, "loss": 0.3372, "step": 23361 }, { "epoch": 2.3751525010166734, "grad_norm": 0.25815659761428833, "learning_rate": 6.293581027878416e-06, "loss": 0.3596, "step": 23362 }, { "epoch": 2.3752541683611224, "grad_norm": 0.2688640356063843, "learning_rate": 6.29323822130932e-06, "loss": 0.3139, "step": 23363 }, { "epoch": 2.3753558357055713, "grad_norm": 0.2644006609916687, "learning_rate": 6.292895408225198e-06, "loss": 0.3077, "step": 23364 }, { "epoch": 2.3754575030500202, "grad_norm": 0.2712070047855377, "learning_rate": 6.292552588627777e-06, "loss": 0.3441, "step": 23365 }, { "epoch": 2.375559170394469, "grad_norm": 0.27164483070373535, "learning_rate": 6.292209762518782e-06, "loss": 0.3369, "step": 23366 }, { "epoch": 2.375660837738918, "grad_norm": 0.27028465270996094, "learning_rate": 6.291866929899943e-06, "loss": 0.3333, "step": 23367 }, { "epoch": 2.375762505083367, "grad_norm": 0.2777007222175598, "learning_rate": 6.291524090772983e-06, "loss": 0.3009, "step": 23368 }, { "epoch": 2.375864172427816, "grad_norm": 0.2690766155719757, "learning_rate": 6.291181245139634e-06, "loss": 0.3419, "step": 23369 }, { "epoch": 2.375965839772265, "grad_norm": 0.2872282564640045, "learning_rate": 6.290838393001619e-06, "loss": 0.3718, "step": 23370 }, { "epoch": 2.376067507116714, "grad_norm": 0.27406686544418335, "learning_rate": 6.290495534360668e-06, "loss": 0.3365, "step": 23371 }, { "epoch": 2.376169174461163, "grad_norm": 0.27072855830192566, "learning_rate": 6.290152669218507e-06, "loss": 0.3491, "step": 23372 }, { "epoch": 2.376270841805612, "grad_norm": 0.2543485760688782, "learning_rate": 6.289809797576863e-06, "loss": 0.3103, "step": 23373 }, { "epoch": 2.376372509150061, "grad_norm": 0.2773275375366211, "learning_rate": 6.289466919437463e-06, "loss": 0.3588, "step": 23374 }, { "epoch": 2.37647417649451, "grad_norm": 0.26588183641433716, "learning_rate": 6.289124034802036e-06, "loss": 0.3296, "step": 23375 }, { "epoch": 2.376575843838959, "grad_norm": 0.2762090861797333, "learning_rate": 6.288781143672309e-06, "loss": 0.3171, "step": 23376 }, { "epoch": 2.376677511183408, "grad_norm": 0.287810355424881, "learning_rate": 6.288438246050008e-06, "loss": 0.3475, "step": 23377 }, { "epoch": 2.376779178527857, "grad_norm": 0.2669954299926758, "learning_rate": 6.2880953419368616e-06, "loss": 0.3684, "step": 23378 }, { "epoch": 2.376880845872306, "grad_norm": 0.2939283549785614, "learning_rate": 6.287752431334596e-06, "loss": 0.3387, "step": 23379 }, { "epoch": 2.376982513216755, "grad_norm": 0.29599058628082275, "learning_rate": 6.287409514244942e-06, "loss": 0.3053, "step": 23380 }, { "epoch": 2.3770841805612037, "grad_norm": 0.2654356360435486, "learning_rate": 6.287066590669622e-06, "loss": 0.3247, "step": 23381 }, { "epoch": 2.3771858479056527, "grad_norm": 0.2753296196460724, "learning_rate": 6.286723660610369e-06, "loss": 0.338, "step": 23382 }, { "epoch": 2.3772875152501016, "grad_norm": 0.302038311958313, "learning_rate": 6.286380724068906e-06, "loss": 0.3064, "step": 23383 }, { "epoch": 2.3773891825945506, "grad_norm": 0.2623042166233063, "learning_rate": 6.286037781046963e-06, "loss": 0.3247, "step": 23384 }, { "epoch": 2.3774908499389995, "grad_norm": 0.2713051736354828, "learning_rate": 6.285694831546269e-06, "loss": 0.3442, "step": 23385 }, { "epoch": 2.3775925172834484, "grad_norm": 0.2581727206707001, "learning_rate": 6.285351875568547e-06, "loss": 0.3089, "step": 23386 }, { "epoch": 2.3776941846278974, "grad_norm": 0.280949205160141, "learning_rate": 6.28500891311553e-06, "loss": 0.325, "step": 23387 }, { "epoch": 2.3777958519723463, "grad_norm": 0.2910153567790985, "learning_rate": 6.284665944188942e-06, "loss": 0.3612, "step": 23388 }, { "epoch": 2.3778975193167953, "grad_norm": 0.27864158153533936, "learning_rate": 6.2843229687905126e-06, "loss": 0.3, "step": 23389 }, { "epoch": 2.3779991866612447, "grad_norm": 0.2698681056499481, "learning_rate": 6.283979986921971e-06, "loss": 0.3417, "step": 23390 }, { "epoch": 2.3781008540056936, "grad_norm": 0.28277507424354553, "learning_rate": 6.283636998585041e-06, "loss": 0.314, "step": 23391 }, { "epoch": 2.3782025213501425, "grad_norm": 0.27721619606018066, "learning_rate": 6.283294003781455e-06, "loss": 0.3446, "step": 23392 }, { "epoch": 2.3783041886945915, "grad_norm": 0.26722973585128784, "learning_rate": 6.282951002512937e-06, "loss": 0.3176, "step": 23393 }, { "epoch": 2.3784058560390404, "grad_norm": 0.2846510410308838, "learning_rate": 6.2826079947812166e-06, "loss": 0.3365, "step": 23394 }, { "epoch": 2.3785075233834894, "grad_norm": 0.2754983901977539, "learning_rate": 6.282264980588024e-06, "loss": 0.3533, "step": 23395 }, { "epoch": 2.3786091907279383, "grad_norm": 0.26641571521759033, "learning_rate": 6.281921959935083e-06, "loss": 0.3346, "step": 23396 }, { "epoch": 2.3787108580723872, "grad_norm": 0.26765283942222595, "learning_rate": 6.281578932824125e-06, "loss": 0.3462, "step": 23397 }, { "epoch": 2.378812525416836, "grad_norm": 0.28166520595550537, "learning_rate": 6.281235899256877e-06, "loss": 0.3403, "step": 23398 }, { "epoch": 2.378914192761285, "grad_norm": 0.2537640631198883, "learning_rate": 6.280892859235066e-06, "loss": 0.3153, "step": 23399 }, { "epoch": 2.379015860105734, "grad_norm": 0.2680012583732605, "learning_rate": 6.280549812760421e-06, "loss": 0.3098, "step": 23400 }, { "epoch": 2.379117527450183, "grad_norm": 0.29447486996650696, "learning_rate": 6.2802067598346714e-06, "loss": 0.3481, "step": 23401 }, { "epoch": 2.379219194794632, "grad_norm": 0.2899477183818817, "learning_rate": 6.279863700459544e-06, "loss": 0.3326, "step": 23402 }, { "epoch": 2.379320862139081, "grad_norm": 0.2821606993675232, "learning_rate": 6.279520634636768e-06, "loss": 0.328, "step": 23403 }, { "epoch": 2.37942252948353, "grad_norm": 0.28099995851516724, "learning_rate": 6.279177562368069e-06, "loss": 0.3396, "step": 23404 }, { "epoch": 2.3795241968279788, "grad_norm": 0.28001025319099426, "learning_rate": 6.278834483655181e-06, "loss": 0.3211, "step": 23405 }, { "epoch": 2.3796258641724277, "grad_norm": 0.27001598477363586, "learning_rate": 6.278491398499826e-06, "loss": 0.3282, "step": 23406 }, { "epoch": 2.3797275315168767, "grad_norm": 0.28617537021636963, "learning_rate": 6.278148306903736e-06, "loss": 0.3672, "step": 23407 }, { "epoch": 2.3798291988613256, "grad_norm": 0.27669987082481384, "learning_rate": 6.277805208868638e-06, "loss": 0.325, "step": 23408 }, { "epoch": 2.3799308662057745, "grad_norm": 0.28114157915115356, "learning_rate": 6.277462104396262e-06, "loss": 0.339, "step": 23409 }, { "epoch": 2.3800325335502235, "grad_norm": 0.25452059507369995, "learning_rate": 6.277118993488333e-06, "loss": 0.3334, "step": 23410 }, { "epoch": 2.3801342008946724, "grad_norm": 0.28144532442092896, "learning_rate": 6.276775876146586e-06, "loss": 0.3277, "step": 23411 }, { "epoch": 2.3802358682391214, "grad_norm": 0.29250213503837585, "learning_rate": 6.276432752372743e-06, "loss": 0.3564, "step": 23412 }, { "epoch": 2.3803375355835703, "grad_norm": 0.24947109818458557, "learning_rate": 6.276089622168536e-06, "loss": 0.3232, "step": 23413 }, { "epoch": 2.3804392029280197, "grad_norm": 0.27947279810905457, "learning_rate": 6.275746485535691e-06, "loss": 0.332, "step": 23414 }, { "epoch": 2.3805408702724686, "grad_norm": 0.27077609300613403, "learning_rate": 6.27540334247594e-06, "loss": 0.3427, "step": 23415 }, { "epoch": 2.3806425376169176, "grad_norm": 0.27394410967826843, "learning_rate": 6.275060192991011e-06, "loss": 0.3461, "step": 23416 }, { "epoch": 2.3807442049613665, "grad_norm": 0.26590192317962646, "learning_rate": 6.274717037082629e-06, "loss": 0.3176, "step": 23417 }, { "epoch": 2.3808458723058155, "grad_norm": 0.27732568979263306, "learning_rate": 6.274373874752526e-06, "loss": 0.3162, "step": 23418 }, { "epoch": 2.3809475396502644, "grad_norm": 0.26928335428237915, "learning_rate": 6.274030706002432e-06, "loss": 0.3286, "step": 23419 }, { "epoch": 2.3810492069947133, "grad_norm": 0.28707560896873474, "learning_rate": 6.273687530834074e-06, "loss": 0.3062, "step": 23420 }, { "epoch": 2.3811508743391623, "grad_norm": 0.2794220745563507, "learning_rate": 6.27334434924918e-06, "loss": 0.3234, "step": 23421 }, { "epoch": 2.3812525416836112, "grad_norm": 0.2781037390232086, "learning_rate": 6.27300116124948e-06, "loss": 0.3548, "step": 23422 }, { "epoch": 2.38135420902806, "grad_norm": 0.2928716838359833, "learning_rate": 6.272657966836702e-06, "loss": 0.3066, "step": 23423 }, { "epoch": 2.381455876372509, "grad_norm": 0.2675188481807709, "learning_rate": 6.272314766012577e-06, "loss": 0.3191, "step": 23424 }, { "epoch": 2.381557543716958, "grad_norm": 0.2592678964138031, "learning_rate": 6.271971558778831e-06, "loss": 0.3113, "step": 23425 }, { "epoch": 2.381659211061407, "grad_norm": 0.27536454796791077, "learning_rate": 6.2716283451371965e-06, "loss": 0.3094, "step": 23426 }, { "epoch": 2.381760878405856, "grad_norm": 0.2807064354419708, "learning_rate": 6.271285125089398e-06, "loss": 0.3244, "step": 23427 }, { "epoch": 2.381862545750305, "grad_norm": 0.2659984230995178, "learning_rate": 6.270941898637168e-06, "loss": 0.317, "step": 23428 }, { "epoch": 2.381964213094754, "grad_norm": 0.27205801010131836, "learning_rate": 6.270598665782236e-06, "loss": 0.3375, "step": 23429 }, { "epoch": 2.3820658804392028, "grad_norm": 0.289476603269577, "learning_rate": 6.270255426526329e-06, "loss": 0.3399, "step": 23430 }, { "epoch": 2.382167547783652, "grad_norm": 0.26818880438804626, "learning_rate": 6.269912180871178e-06, "loss": 0.3046, "step": 23431 }, { "epoch": 2.382269215128101, "grad_norm": 0.27829504013061523, "learning_rate": 6.26956892881851e-06, "loss": 0.3, "step": 23432 }, { "epoch": 2.38237088247255, "grad_norm": 0.28434592485427856, "learning_rate": 6.2692256703700545e-06, "loss": 0.3202, "step": 23433 }, { "epoch": 2.382472549816999, "grad_norm": 0.26470303535461426, "learning_rate": 6.268882405527545e-06, "loss": 0.3452, "step": 23434 }, { "epoch": 2.382574217161448, "grad_norm": 0.2588628828525543, "learning_rate": 6.268539134292704e-06, "loss": 0.3318, "step": 23435 }, { "epoch": 2.382675884505897, "grad_norm": 0.2620643377304077, "learning_rate": 6.268195856667266e-06, "loss": 0.3418, "step": 23436 }, { "epoch": 2.382777551850346, "grad_norm": 0.2534632384777069, "learning_rate": 6.267852572652958e-06, "loss": 0.3023, "step": 23437 }, { "epoch": 2.3828792191947947, "grad_norm": 0.2780359387397766, "learning_rate": 6.2675092822515105e-06, "loss": 0.3103, "step": 23438 }, { "epoch": 2.3829808865392437, "grad_norm": 0.2719532251358032, "learning_rate": 6.267165985464653e-06, "loss": 0.321, "step": 23439 }, { "epoch": 2.3830825538836926, "grad_norm": 0.2668582499027252, "learning_rate": 6.266822682294113e-06, "loss": 0.3236, "step": 23440 }, { "epoch": 2.3831842212281416, "grad_norm": 0.2884818911552429, "learning_rate": 6.266479372741623e-06, "loss": 0.3499, "step": 23441 }, { "epoch": 2.3832858885725905, "grad_norm": 0.26312899589538574, "learning_rate": 6.266136056808909e-06, "loss": 0.3193, "step": 23442 }, { "epoch": 2.3833875559170394, "grad_norm": 0.2662321627140045, "learning_rate": 6.265792734497704e-06, "loss": 0.3404, "step": 23443 }, { "epoch": 2.3834892232614884, "grad_norm": 0.2708275318145752, "learning_rate": 6.265449405809736e-06, "loss": 0.3326, "step": 23444 }, { "epoch": 2.3835908906059373, "grad_norm": 0.28757575154304504, "learning_rate": 6.265106070746733e-06, "loss": 0.3338, "step": 23445 }, { "epoch": 2.3836925579503863, "grad_norm": 0.2919136583805084, "learning_rate": 6.264762729310428e-06, "loss": 0.3383, "step": 23446 }, { "epoch": 2.383794225294835, "grad_norm": 0.2667287588119507, "learning_rate": 6.264419381502548e-06, "loss": 0.3318, "step": 23447 }, { "epoch": 2.383895892639284, "grad_norm": 0.2671787440776825, "learning_rate": 6.264076027324824e-06, "loss": 0.3323, "step": 23448 }, { "epoch": 2.383997559983733, "grad_norm": 0.2669738233089447, "learning_rate": 6.263732666778984e-06, "loss": 0.3235, "step": 23449 }, { "epoch": 2.384099227328182, "grad_norm": 0.2947996258735657, "learning_rate": 6.26338929986676e-06, "loss": 0.3196, "step": 23450 }, { "epoch": 2.384200894672631, "grad_norm": 0.28524401783943176, "learning_rate": 6.263045926589882e-06, "loss": 0.3308, "step": 23451 }, { "epoch": 2.38430256201708, "grad_norm": 0.26308324933052063, "learning_rate": 6.262702546950078e-06, "loss": 0.3321, "step": 23452 }, { "epoch": 2.384404229361529, "grad_norm": 0.26749736070632935, "learning_rate": 6.26235916094908e-06, "loss": 0.3496, "step": 23453 }, { "epoch": 2.3845058967059782, "grad_norm": 0.2659337520599365, "learning_rate": 6.262015768588614e-06, "loss": 0.336, "step": 23454 }, { "epoch": 2.384607564050427, "grad_norm": 0.28507182002067566, "learning_rate": 6.261672369870413e-06, "loss": 0.332, "step": 23455 }, { "epoch": 2.384709231394876, "grad_norm": 0.27944496273994446, "learning_rate": 6.2613289647962086e-06, "loss": 0.3371, "step": 23456 }, { "epoch": 2.384810898739325, "grad_norm": 0.2774356007575989, "learning_rate": 6.260985553367727e-06, "loss": 0.3377, "step": 23457 }, { "epoch": 2.384912566083774, "grad_norm": 0.27082309126853943, "learning_rate": 6.2606421355867e-06, "loss": 0.3337, "step": 23458 }, { "epoch": 2.385014233428223, "grad_norm": 0.25571778416633606, "learning_rate": 6.260298711454856e-06, "loss": 0.3397, "step": 23459 }, { "epoch": 2.385115900772672, "grad_norm": 0.268421471118927, "learning_rate": 6.259955280973928e-06, "loss": 0.33, "step": 23460 }, { "epoch": 2.385217568117121, "grad_norm": 0.2775326669216156, "learning_rate": 6.259611844145645e-06, "loss": 0.3358, "step": 23461 }, { "epoch": 2.3853192354615698, "grad_norm": 0.2667982280254364, "learning_rate": 6.259268400971737e-06, "loss": 0.3345, "step": 23462 }, { "epoch": 2.3854209028060187, "grad_norm": 0.27525052428245544, "learning_rate": 6.258924951453934e-06, "loss": 0.3619, "step": 23463 }, { "epoch": 2.3855225701504676, "grad_norm": 0.27789008617401123, "learning_rate": 6.258581495593965e-06, "loss": 0.3157, "step": 23464 }, { "epoch": 2.3856242374949166, "grad_norm": 0.28420329093933105, "learning_rate": 6.258238033393562e-06, "loss": 0.302, "step": 23465 }, { "epoch": 2.3857259048393655, "grad_norm": 0.275971919298172, "learning_rate": 6.257894564854456e-06, "loss": 0.337, "step": 23466 }, { "epoch": 2.3858275721838145, "grad_norm": 0.3060569763183594, "learning_rate": 6.2575510899783755e-06, "loss": 0.3449, "step": 23467 }, { "epoch": 2.3859292395282634, "grad_norm": 0.2866387963294983, "learning_rate": 6.257207608767051e-06, "loss": 0.3468, "step": 23468 }, { "epoch": 2.3860309068727124, "grad_norm": 0.26904797554016113, "learning_rate": 6.256864121222212e-06, "loss": 0.3343, "step": 23469 }, { "epoch": 2.3861325742171613, "grad_norm": 0.26778244972229004, "learning_rate": 6.2565206273455925e-06, "loss": 0.3421, "step": 23470 }, { "epoch": 2.3862342415616102, "grad_norm": 0.276867151260376, "learning_rate": 6.25617712713892e-06, "loss": 0.3518, "step": 23471 }, { "epoch": 2.3863359089060596, "grad_norm": 0.27673786878585815, "learning_rate": 6.255833620603926e-06, "loss": 0.3455, "step": 23472 }, { "epoch": 2.3864375762505086, "grad_norm": 0.2660869061946869, "learning_rate": 6.25549010774234e-06, "loss": 0.3275, "step": 23473 }, { "epoch": 2.3865392435949575, "grad_norm": 0.27291348576545715, "learning_rate": 6.255146588555894e-06, "loss": 0.3489, "step": 23474 }, { "epoch": 2.3866409109394064, "grad_norm": 0.26456087827682495, "learning_rate": 6.254803063046316e-06, "loss": 0.3137, "step": 23475 }, { "epoch": 2.3867425782838554, "grad_norm": 0.262311190366745, "learning_rate": 6.25445953121534e-06, "loss": 0.3016, "step": 23476 }, { "epoch": 2.3868442456283043, "grad_norm": 0.27910810708999634, "learning_rate": 6.2541159930646945e-06, "loss": 0.3528, "step": 23477 }, { "epoch": 2.3869459129727533, "grad_norm": 0.29676613211631775, "learning_rate": 6.2537724485961115e-06, "loss": 0.3414, "step": 23478 }, { "epoch": 2.387047580317202, "grad_norm": 0.30219271779060364, "learning_rate": 6.25342889781132e-06, "loss": 0.3474, "step": 23479 }, { "epoch": 2.387149247661651, "grad_norm": 0.28036928176879883, "learning_rate": 6.253085340712051e-06, "loss": 0.3515, "step": 23480 }, { "epoch": 2.3872509150061, "grad_norm": 0.2617647647857666, "learning_rate": 6.2527417773000365e-06, "loss": 0.314, "step": 23481 }, { "epoch": 2.387352582350549, "grad_norm": 0.31058090925216675, "learning_rate": 6.252398207577008e-06, "loss": 0.3408, "step": 23482 }, { "epoch": 2.387454249694998, "grad_norm": 0.25803717970848083, "learning_rate": 6.252054631544693e-06, "loss": 0.3386, "step": 23483 }, { "epoch": 2.387555917039447, "grad_norm": 0.27584895491600037, "learning_rate": 6.251711049204824e-06, "loss": 0.3028, "step": 23484 }, { "epoch": 2.387657584383896, "grad_norm": 0.2955150306224823, "learning_rate": 6.2513674605591325e-06, "loss": 0.3486, "step": 23485 }, { "epoch": 2.387759251728345, "grad_norm": 0.29262644052505493, "learning_rate": 6.25102386560935e-06, "loss": 0.3432, "step": 23486 }, { "epoch": 2.3878609190727937, "grad_norm": 0.2702339291572571, "learning_rate": 6.250680264357207e-06, "loss": 0.3548, "step": 23487 }, { "epoch": 2.3879625864172427, "grad_norm": 0.2743350565433502, "learning_rate": 6.250336656804432e-06, "loss": 0.3573, "step": 23488 }, { "epoch": 2.3880642537616916, "grad_norm": 0.28052371740341187, "learning_rate": 6.24999304295276e-06, "loss": 0.3395, "step": 23489 }, { "epoch": 2.3881659211061406, "grad_norm": 0.24311137199401855, "learning_rate": 6.249649422803918e-06, "loss": 0.3201, "step": 23490 }, { "epoch": 2.3882675884505895, "grad_norm": 0.2726427912712097, "learning_rate": 6.249305796359642e-06, "loss": 0.3382, "step": 23491 }, { "epoch": 2.3883692557950384, "grad_norm": 0.2785797715187073, "learning_rate": 6.248962163621659e-06, "loss": 0.2948, "step": 23492 }, { "epoch": 2.3884709231394874, "grad_norm": 0.2598878741264343, "learning_rate": 6.2486185245917e-06, "loss": 0.3285, "step": 23493 }, { "epoch": 2.3885725904839363, "grad_norm": 0.29943060874938965, "learning_rate": 6.248274879271499e-06, "loss": 0.3385, "step": 23494 }, { "epoch": 2.3886742578283857, "grad_norm": 0.276749849319458, "learning_rate": 6.2479312276627845e-06, "loss": 0.3325, "step": 23495 }, { "epoch": 2.3887759251728347, "grad_norm": 0.2970159649848938, "learning_rate": 6.247587569767292e-06, "loss": 0.3265, "step": 23496 }, { "epoch": 2.3888775925172836, "grad_norm": 0.2955775260925293, "learning_rate": 6.247243905586749e-06, "loss": 0.3467, "step": 23497 }, { "epoch": 2.3889792598617325, "grad_norm": 0.2710397243499756, "learning_rate": 6.2469002351228845e-06, "loss": 0.3402, "step": 23498 }, { "epoch": 2.3890809272061815, "grad_norm": 0.25963273644447327, "learning_rate": 6.246556558377434e-06, "loss": 0.3462, "step": 23499 }, { "epoch": 2.3891825945506304, "grad_norm": 0.26652899384498596, "learning_rate": 6.24621287535213e-06, "loss": 0.319, "step": 23500 }, { "epoch": 2.3892842618950794, "grad_norm": 0.28420573472976685, "learning_rate": 6.245869186048701e-06, "loss": 0.3391, "step": 23501 }, { "epoch": 2.3893859292395283, "grad_norm": 0.28730520606040955, "learning_rate": 6.245525490468879e-06, "loss": 0.3013, "step": 23502 }, { "epoch": 2.3894875965839772, "grad_norm": 0.2503844201564789, "learning_rate": 6.245181788614394e-06, "loss": 0.3278, "step": 23503 }, { "epoch": 2.389589263928426, "grad_norm": 0.26296812295913696, "learning_rate": 6.24483808048698e-06, "loss": 0.3083, "step": 23504 }, { "epoch": 2.389690931272875, "grad_norm": 0.2967142164707184, "learning_rate": 6.244494366088369e-06, "loss": 0.3134, "step": 23505 }, { "epoch": 2.389792598617324, "grad_norm": 0.2628740668296814, "learning_rate": 6.244150645420288e-06, "loss": 0.3217, "step": 23506 }, { "epoch": 2.389894265961773, "grad_norm": 0.27331405878067017, "learning_rate": 6.243806918484474e-06, "loss": 0.3189, "step": 23507 }, { "epoch": 2.389995933306222, "grad_norm": 0.2504052221775055, "learning_rate": 6.243463185282655e-06, "loss": 0.325, "step": 23508 }, { "epoch": 2.390097600650671, "grad_norm": 0.2587401866912842, "learning_rate": 6.243119445816566e-06, "loss": 0.3125, "step": 23509 }, { "epoch": 2.39019926799512, "grad_norm": 0.27797508239746094, "learning_rate": 6.242775700087935e-06, "loss": 0.3319, "step": 23510 }, { "epoch": 2.3903009353395688, "grad_norm": 0.2642025947570801, "learning_rate": 6.2424319480984955e-06, "loss": 0.3568, "step": 23511 }, { "epoch": 2.3904026026840177, "grad_norm": 0.2674225866794586, "learning_rate": 6.24208818984998e-06, "loss": 0.2961, "step": 23512 }, { "epoch": 2.390504270028467, "grad_norm": 0.27874788641929626, "learning_rate": 6.2417444253441186e-06, "loss": 0.331, "step": 23513 }, { "epoch": 2.390605937372916, "grad_norm": 0.2835899591445923, "learning_rate": 6.241400654582644e-06, "loss": 0.337, "step": 23514 }, { "epoch": 2.390707604717365, "grad_norm": 0.28300878405570984, "learning_rate": 6.241056877567287e-06, "loss": 0.344, "step": 23515 }, { "epoch": 2.390809272061814, "grad_norm": 0.27366527915000916, "learning_rate": 6.2407130942997816e-06, "loss": 0.3129, "step": 23516 }, { "epoch": 2.390910939406263, "grad_norm": 0.2956582009792328, "learning_rate": 6.240369304781859e-06, "loss": 0.3432, "step": 23517 }, { "epoch": 2.391012606750712, "grad_norm": 0.27816078066825867, "learning_rate": 6.24002550901525e-06, "loss": 0.3246, "step": 23518 }, { "epoch": 2.3911142740951608, "grad_norm": 0.2624457776546478, "learning_rate": 6.239681707001686e-06, "loss": 0.3572, "step": 23519 }, { "epoch": 2.3912159414396097, "grad_norm": 0.27484381198883057, "learning_rate": 6.239337898742901e-06, "loss": 0.3443, "step": 23520 }, { "epoch": 2.3913176087840586, "grad_norm": 0.27773427963256836, "learning_rate": 6.238994084240626e-06, "loss": 0.3452, "step": 23521 }, { "epoch": 2.3914192761285076, "grad_norm": 0.26999107003211975, "learning_rate": 6.238650263496594e-06, "loss": 0.3175, "step": 23522 }, { "epoch": 2.3915209434729565, "grad_norm": 0.24999313056468964, "learning_rate": 6.2383064365125355e-06, "loss": 0.3174, "step": 23523 }, { "epoch": 2.3916226108174055, "grad_norm": 0.2947148084640503, "learning_rate": 6.237962603290183e-06, "loss": 0.3526, "step": 23524 }, { "epoch": 2.3917242781618544, "grad_norm": 0.29598113894462585, "learning_rate": 6.237618763831271e-06, "loss": 0.3495, "step": 23525 }, { "epoch": 2.3918259455063033, "grad_norm": 0.2699265480041504, "learning_rate": 6.237274918137528e-06, "loss": 0.3526, "step": 23526 }, { "epoch": 2.3919276128507523, "grad_norm": 0.29646843671798706, "learning_rate": 6.2369310662106895e-06, "loss": 0.3719, "step": 23527 }, { "epoch": 2.3920292801952012, "grad_norm": 0.28782346844673157, "learning_rate": 6.236587208052485e-06, "loss": 0.3177, "step": 23528 }, { "epoch": 2.39213094753965, "grad_norm": 0.29328325390815735, "learning_rate": 6.236243343664647e-06, "loss": 0.3308, "step": 23529 }, { "epoch": 2.392232614884099, "grad_norm": 0.3052356243133545, "learning_rate": 6.235899473048911e-06, "loss": 0.3746, "step": 23530 }, { "epoch": 2.392334282228548, "grad_norm": 0.26799023151397705, "learning_rate": 6.235555596207006e-06, "loss": 0.3123, "step": 23531 }, { "epoch": 2.392435949572997, "grad_norm": 0.26182302832603455, "learning_rate": 6.235211713140666e-06, "loss": 0.3438, "step": 23532 }, { "epoch": 2.392537616917446, "grad_norm": 0.2604452073574066, "learning_rate": 6.234867823851622e-06, "loss": 0.3383, "step": 23533 }, { "epoch": 2.392639284261895, "grad_norm": 0.258742094039917, "learning_rate": 6.234523928341609e-06, "loss": 0.3279, "step": 23534 }, { "epoch": 2.392740951606344, "grad_norm": 0.3057372272014618, "learning_rate": 6.234180026612358e-06, "loss": 0.369, "step": 23535 }, { "epoch": 2.392842618950793, "grad_norm": 0.2881997227668762, "learning_rate": 6.233836118665598e-06, "loss": 0.3334, "step": 23536 }, { "epoch": 2.392944286295242, "grad_norm": 0.26019084453582764, "learning_rate": 6.233492204503069e-06, "loss": 0.3385, "step": 23537 }, { "epoch": 2.393045953639691, "grad_norm": 0.2708491384983063, "learning_rate": 6.233148284126497e-06, "loss": 0.3366, "step": 23538 }, { "epoch": 2.39314762098414, "grad_norm": 0.3131278157234192, "learning_rate": 6.232804357537617e-06, "loss": 0.3128, "step": 23539 }, { "epoch": 2.393249288328589, "grad_norm": 0.28906840085983276, "learning_rate": 6.232460424738163e-06, "loss": 0.3379, "step": 23540 }, { "epoch": 2.393350955673038, "grad_norm": 0.2720935642719269, "learning_rate": 6.232116485729864e-06, "loss": 0.344, "step": 23541 }, { "epoch": 2.393452623017487, "grad_norm": 0.28270941972732544, "learning_rate": 6.231772540514457e-06, "loss": 0.3125, "step": 23542 }, { "epoch": 2.393554290361936, "grad_norm": 0.27009668946266174, "learning_rate": 6.231428589093671e-06, "loss": 0.3228, "step": 23543 }, { "epoch": 2.3936559577063847, "grad_norm": 0.26288095116615295, "learning_rate": 6.231084631469242e-06, "loss": 0.3423, "step": 23544 }, { "epoch": 2.3937576250508337, "grad_norm": 0.25902971625328064, "learning_rate": 6.2307406676429e-06, "loss": 0.3297, "step": 23545 }, { "epoch": 2.3938592923952826, "grad_norm": 0.2726503610610962, "learning_rate": 6.230396697616379e-06, "loss": 0.3186, "step": 23546 }, { "epoch": 2.3939609597397316, "grad_norm": 0.29264169931411743, "learning_rate": 6.230052721391413e-06, "loss": 0.3834, "step": 23547 }, { "epoch": 2.3940626270841805, "grad_norm": 0.28597548604011536, "learning_rate": 6.229708738969733e-06, "loss": 0.3581, "step": 23548 }, { "epoch": 2.3941642944286294, "grad_norm": 0.26949405670166016, "learning_rate": 6.229364750353073e-06, "loss": 0.3516, "step": 23549 }, { "epoch": 2.3942659617730784, "grad_norm": 0.2544654309749603, "learning_rate": 6.229020755543165e-06, "loss": 0.3719, "step": 23550 }, { "epoch": 2.3943676291175273, "grad_norm": 0.28111448884010315, "learning_rate": 6.2286767545417414e-06, "loss": 0.3467, "step": 23551 }, { "epoch": 2.3944692964619763, "grad_norm": 0.26592618227005005, "learning_rate": 6.228332747350538e-06, "loss": 0.3094, "step": 23552 }, { "epoch": 2.394570963806425, "grad_norm": 0.29129186272621155, "learning_rate": 6.227988733971285e-06, "loss": 0.3389, "step": 23553 }, { "epoch": 2.3946726311508746, "grad_norm": 0.2781127989292145, "learning_rate": 6.227644714405718e-06, "loss": 0.3189, "step": 23554 }, { "epoch": 2.3947742984953235, "grad_norm": 0.28054460883140564, "learning_rate": 6.2273006886555675e-06, "loss": 0.3318, "step": 23555 }, { "epoch": 2.3948759658397725, "grad_norm": 0.2926449179649353, "learning_rate": 6.226956656722568e-06, "loss": 0.3382, "step": 23556 }, { "epoch": 2.3949776331842214, "grad_norm": 0.27748873829841614, "learning_rate": 6.226612618608453e-06, "loss": 0.3134, "step": 23557 }, { "epoch": 2.3950793005286704, "grad_norm": 0.26400288939476013, "learning_rate": 6.226268574314955e-06, "loss": 0.3689, "step": 23558 }, { "epoch": 2.3951809678731193, "grad_norm": 0.3047163188457489, "learning_rate": 6.225924523843807e-06, "loss": 0.3484, "step": 23559 }, { "epoch": 2.3952826352175682, "grad_norm": 0.3020716905593872, "learning_rate": 6.225580467196741e-06, "loss": 0.3479, "step": 23560 }, { "epoch": 2.395384302562017, "grad_norm": 0.26194334030151367, "learning_rate": 6.225236404375494e-06, "loss": 0.3318, "step": 23561 }, { "epoch": 2.395485969906466, "grad_norm": 0.2715064287185669, "learning_rate": 6.224892335381797e-06, "loss": 0.3279, "step": 23562 }, { "epoch": 2.395587637250915, "grad_norm": 0.2687441408634186, "learning_rate": 6.224548260217384e-06, "loss": 0.3018, "step": 23563 }, { "epoch": 2.395689304595364, "grad_norm": 0.27983033657073975, "learning_rate": 6.224204178883986e-06, "loss": 0.3445, "step": 23564 }, { "epoch": 2.395790971939813, "grad_norm": 0.28169184923171997, "learning_rate": 6.223860091383339e-06, "loss": 0.3154, "step": 23565 }, { "epoch": 2.395892639284262, "grad_norm": 0.2826851010322571, "learning_rate": 6.223515997717175e-06, "loss": 0.3305, "step": 23566 }, { "epoch": 2.395994306628711, "grad_norm": 0.25739315152168274, "learning_rate": 6.223171897887231e-06, "loss": 0.3123, "step": 23567 }, { "epoch": 2.3960959739731598, "grad_norm": 0.2754303514957428, "learning_rate": 6.222827791895235e-06, "loss": 0.3044, "step": 23568 }, { "epoch": 2.3961976413176087, "grad_norm": 0.26636993885040283, "learning_rate": 6.222483679742923e-06, "loss": 0.3329, "step": 23569 }, { "epoch": 2.3962993086620576, "grad_norm": 0.2852986752986908, "learning_rate": 6.22213956143203e-06, "loss": 0.3566, "step": 23570 }, { "epoch": 2.3964009760065066, "grad_norm": 0.2629588842391968, "learning_rate": 6.221795436964287e-06, "loss": 0.3244, "step": 23571 }, { "epoch": 2.3965026433509555, "grad_norm": 0.2776177227497101, "learning_rate": 6.22145130634143e-06, "loss": 0.307, "step": 23572 }, { "epoch": 2.3966043106954045, "grad_norm": 0.2810685932636261, "learning_rate": 6.2211071695651926e-06, "loss": 0.3198, "step": 23573 }, { "epoch": 2.3967059780398534, "grad_norm": 0.2928374707698822, "learning_rate": 6.220763026637305e-06, "loss": 0.3216, "step": 23574 }, { "epoch": 2.3968076453843024, "grad_norm": 0.2578064203262329, "learning_rate": 6.2204188775595054e-06, "loss": 0.3393, "step": 23575 }, { "epoch": 2.3969093127287513, "grad_norm": 0.26804372668266296, "learning_rate": 6.220074722333525e-06, "loss": 0.3345, "step": 23576 }, { "epoch": 2.3970109800732007, "grad_norm": 0.263650506734848, "learning_rate": 6.219730560961097e-06, "loss": 0.3122, "step": 23577 }, { "epoch": 2.3971126474176496, "grad_norm": 0.2670903205871582, "learning_rate": 6.219386393443958e-06, "loss": 0.3445, "step": 23578 }, { "epoch": 2.3972143147620986, "grad_norm": 0.26752614974975586, "learning_rate": 6.219042219783838e-06, "loss": 0.3192, "step": 23579 }, { "epoch": 2.3973159821065475, "grad_norm": 0.2607758939266205, "learning_rate": 6.218698039982475e-06, "loss": 0.3263, "step": 23580 }, { "epoch": 2.3974176494509964, "grad_norm": 0.26708149909973145, "learning_rate": 6.2183538540416e-06, "loss": 0.3401, "step": 23581 }, { "epoch": 2.3975193167954454, "grad_norm": 0.2699906527996063, "learning_rate": 6.218009661962948e-06, "loss": 0.3259, "step": 23582 }, { "epoch": 2.3976209841398943, "grad_norm": 0.2627239525318146, "learning_rate": 6.217665463748254e-06, "loss": 0.3657, "step": 23583 }, { "epoch": 2.3977226514843433, "grad_norm": 0.24574318528175354, "learning_rate": 6.217321259399248e-06, "loss": 0.3258, "step": 23584 }, { "epoch": 2.397824318828792, "grad_norm": 0.2809181213378906, "learning_rate": 6.216977048917668e-06, "loss": 0.3651, "step": 23585 }, { "epoch": 2.397925986173241, "grad_norm": 0.2676822543144226, "learning_rate": 6.216632832305247e-06, "loss": 0.3266, "step": 23586 }, { "epoch": 2.39802765351769, "grad_norm": 0.2651866674423218, "learning_rate": 6.216288609563719e-06, "loss": 0.3137, "step": 23587 }, { "epoch": 2.398129320862139, "grad_norm": 0.2733670175075531, "learning_rate": 6.215944380694818e-06, "loss": 0.3493, "step": 23588 }, { "epoch": 2.398230988206588, "grad_norm": 0.26579150557518005, "learning_rate": 6.215600145700278e-06, "loss": 0.3142, "step": 23589 }, { "epoch": 2.398332655551037, "grad_norm": 0.27716004848480225, "learning_rate": 6.215255904581834e-06, "loss": 0.2922, "step": 23590 }, { "epoch": 2.398434322895486, "grad_norm": 0.3148881494998932, "learning_rate": 6.214911657341219e-06, "loss": 0.3374, "step": 23591 }, { "epoch": 2.398535990239935, "grad_norm": 0.28923851251602173, "learning_rate": 6.214567403980167e-06, "loss": 0.3327, "step": 23592 }, { "epoch": 2.3986376575843837, "grad_norm": 0.2582721710205078, "learning_rate": 6.214223144500415e-06, "loss": 0.321, "step": 23593 }, { "epoch": 2.3987393249288327, "grad_norm": 0.2841487526893616, "learning_rate": 6.213878878903694e-06, "loss": 0.3348, "step": 23594 }, { "epoch": 2.398840992273282, "grad_norm": 0.27569010853767395, "learning_rate": 6.213534607191739e-06, "loss": 0.3011, "step": 23595 }, { "epoch": 2.398942659617731, "grad_norm": 0.2808205783367157, "learning_rate": 6.213190329366287e-06, "loss": 0.3381, "step": 23596 }, { "epoch": 2.39904432696218, "grad_norm": 0.2642441391944885, "learning_rate": 6.212846045429068e-06, "loss": 0.3247, "step": 23597 }, { "epoch": 2.399145994306629, "grad_norm": 0.25695163011550903, "learning_rate": 6.212501755381821e-06, "loss": 0.2997, "step": 23598 }, { "epoch": 2.399247661651078, "grad_norm": 0.2907114326953888, "learning_rate": 6.212157459226277e-06, "loss": 0.3302, "step": 23599 }, { "epoch": 2.3993493289955268, "grad_norm": 0.2990424931049347, "learning_rate": 6.211813156964172e-06, "loss": 0.3204, "step": 23600 }, { "epoch": 2.3994509963399757, "grad_norm": 0.26795509457588196, "learning_rate": 6.211468848597241e-06, "loss": 0.3338, "step": 23601 }, { "epoch": 2.3995526636844247, "grad_norm": 0.3081727623939514, "learning_rate": 6.211124534127217e-06, "loss": 0.3469, "step": 23602 }, { "epoch": 2.3996543310288736, "grad_norm": 0.27721667289733887, "learning_rate": 6.210780213555836e-06, "loss": 0.3325, "step": 23603 }, { "epoch": 2.3997559983733225, "grad_norm": 0.2598628103733063, "learning_rate": 6.210435886884832e-06, "loss": 0.3372, "step": 23604 }, { "epoch": 2.3998576657177715, "grad_norm": 0.28803861141204834, "learning_rate": 6.210091554115938e-06, "loss": 0.3483, "step": 23605 }, { "epoch": 2.3999593330622204, "grad_norm": 0.2837977111339569, "learning_rate": 6.209747215250892e-06, "loss": 0.3325, "step": 23606 }, { "epoch": 2.4000610004066694, "grad_norm": 0.27483507990837097, "learning_rate": 6.209402870291427e-06, "loss": 0.3275, "step": 23607 }, { "epoch": 2.4001626677511183, "grad_norm": 0.2871483564376831, "learning_rate": 6.209058519239278e-06, "loss": 0.3388, "step": 23608 }, { "epoch": 2.4002643350955672, "grad_norm": 0.2847156226634979, "learning_rate": 6.208714162096179e-06, "loss": 0.3323, "step": 23609 }, { "epoch": 2.400366002440016, "grad_norm": 0.2735268771648407, "learning_rate": 6.208369798863864e-06, "loss": 0.349, "step": 23610 }, { "epoch": 2.400467669784465, "grad_norm": 0.28669214248657227, "learning_rate": 6.208025429544071e-06, "loss": 0.3402, "step": 23611 }, { "epoch": 2.400569337128914, "grad_norm": 0.28025779128074646, "learning_rate": 6.207681054138532e-06, "loss": 0.3328, "step": 23612 }, { "epoch": 2.400671004473363, "grad_norm": 0.2678922116756439, "learning_rate": 6.207336672648984e-06, "loss": 0.309, "step": 23613 }, { "epoch": 2.400772671817812, "grad_norm": 0.25842711329460144, "learning_rate": 6.2069922850771595e-06, "loss": 0.3444, "step": 23614 }, { "epoch": 2.400874339162261, "grad_norm": 0.2632825970649719, "learning_rate": 6.206647891424794e-06, "loss": 0.3598, "step": 23615 }, { "epoch": 2.40097600650671, "grad_norm": 0.2923130989074707, "learning_rate": 6.2063034916936245e-06, "loss": 0.3178, "step": 23616 }, { "epoch": 2.401077673851159, "grad_norm": 0.2959761619567871, "learning_rate": 6.205959085885385e-06, "loss": 0.3629, "step": 23617 }, { "epoch": 2.401179341195608, "grad_norm": 0.27962586283683777, "learning_rate": 6.20561467400181e-06, "loss": 0.3246, "step": 23618 }, { "epoch": 2.401281008540057, "grad_norm": 0.28678199648857117, "learning_rate": 6.205270256044633e-06, "loss": 0.3412, "step": 23619 }, { "epoch": 2.401382675884506, "grad_norm": 0.24422982335090637, "learning_rate": 6.204925832015592e-06, "loss": 0.3111, "step": 23620 }, { "epoch": 2.401484343228955, "grad_norm": 0.2698848247528076, "learning_rate": 6.2045814019164215e-06, "loss": 0.3396, "step": 23621 }, { "epoch": 2.401586010573404, "grad_norm": 0.2754429280757904, "learning_rate": 6.204236965748856e-06, "loss": 0.3277, "step": 23622 }, { "epoch": 2.401687677917853, "grad_norm": 0.2504537105560303, "learning_rate": 6.203892523514632e-06, "loss": 0.3448, "step": 23623 }, { "epoch": 2.401789345262302, "grad_norm": 0.29291388392448425, "learning_rate": 6.203548075215481e-06, "loss": 0.3349, "step": 23624 }, { "epoch": 2.4018910126067508, "grad_norm": 0.2747510075569153, "learning_rate": 6.203203620853142e-06, "loss": 0.3565, "step": 23625 }, { "epoch": 2.4019926799511997, "grad_norm": 0.2807304561138153, "learning_rate": 6.20285916042935e-06, "loss": 0.3447, "step": 23626 }, { "epoch": 2.4020943472956486, "grad_norm": 0.2717319428920746, "learning_rate": 6.202514693945838e-06, "loss": 0.3267, "step": 23627 }, { "epoch": 2.4021960146400976, "grad_norm": 0.2681960165500641, "learning_rate": 6.202170221404343e-06, "loss": 0.3422, "step": 23628 }, { "epoch": 2.4022976819845465, "grad_norm": 0.2859029769897461, "learning_rate": 6.201825742806601e-06, "loss": 0.3131, "step": 23629 }, { "epoch": 2.4023993493289955, "grad_norm": 0.27350515127182007, "learning_rate": 6.201481258154346e-06, "loss": 0.3381, "step": 23630 }, { "epoch": 2.4025010166734444, "grad_norm": 0.28097638487815857, "learning_rate": 6.201136767449314e-06, "loss": 0.3264, "step": 23631 }, { "epoch": 2.4026026840178933, "grad_norm": 0.2595813572406769, "learning_rate": 6.200792270693239e-06, "loss": 0.3314, "step": 23632 }, { "epoch": 2.4027043513623423, "grad_norm": 0.281257301568985, "learning_rate": 6.200447767887861e-06, "loss": 0.3121, "step": 23633 }, { "epoch": 2.4028060187067912, "grad_norm": 0.26427632570266724, "learning_rate": 6.20010325903491e-06, "loss": 0.34, "step": 23634 }, { "epoch": 2.40290768605124, "grad_norm": 0.28530940413475037, "learning_rate": 6.199758744136124e-06, "loss": 0.3136, "step": 23635 }, { "epoch": 2.4030093533956896, "grad_norm": 0.29003801941871643, "learning_rate": 6.199414223193239e-06, "loss": 0.3404, "step": 23636 }, { "epoch": 2.4031110207401385, "grad_norm": 0.287820041179657, "learning_rate": 6.19906969620799e-06, "loss": 0.3445, "step": 23637 }, { "epoch": 2.4032126880845874, "grad_norm": 0.25982603430747986, "learning_rate": 6.198725163182113e-06, "loss": 0.3556, "step": 23638 }, { "epoch": 2.4033143554290364, "grad_norm": 0.2608700394630432, "learning_rate": 6.1983806241173435e-06, "loss": 0.2842, "step": 23639 }, { "epoch": 2.4034160227734853, "grad_norm": 0.25996944308280945, "learning_rate": 6.198036079015417e-06, "loss": 0.3493, "step": 23640 }, { "epoch": 2.4035176901179343, "grad_norm": 0.26415058970451355, "learning_rate": 6.19769152787807e-06, "loss": 0.3398, "step": 23641 }, { "epoch": 2.403619357462383, "grad_norm": 0.2561096251010895, "learning_rate": 6.197346970707037e-06, "loss": 0.3328, "step": 23642 }, { "epoch": 2.403721024806832, "grad_norm": 0.27481943368911743, "learning_rate": 6.197002407504056e-06, "loss": 0.3163, "step": 23643 }, { "epoch": 2.403822692151281, "grad_norm": 0.25863924622535706, "learning_rate": 6.19665783827086e-06, "loss": 0.3037, "step": 23644 }, { "epoch": 2.40392435949573, "grad_norm": 0.2648666203022003, "learning_rate": 6.196313263009186e-06, "loss": 0.363, "step": 23645 }, { "epoch": 2.404026026840179, "grad_norm": 0.2529437243938446, "learning_rate": 6.19596868172077e-06, "loss": 0.3275, "step": 23646 }, { "epoch": 2.404127694184628, "grad_norm": 0.27653437852859497, "learning_rate": 6.1956240944073474e-06, "loss": 0.318, "step": 23647 }, { "epoch": 2.404229361529077, "grad_norm": 0.2610670328140259, "learning_rate": 6.195279501070657e-06, "loss": 0.3095, "step": 23648 }, { "epoch": 2.404331028873526, "grad_norm": 0.27836334705352783, "learning_rate": 6.194934901712432e-06, "loss": 0.3227, "step": 23649 }, { "epoch": 2.4044326962179747, "grad_norm": 0.2561180293560028, "learning_rate": 6.194590296334407e-06, "loss": 0.3562, "step": 23650 }, { "epoch": 2.4045343635624237, "grad_norm": 0.26189619302749634, "learning_rate": 6.194245684938321e-06, "loss": 0.3454, "step": 23651 }, { "epoch": 2.4046360309068726, "grad_norm": 0.29834499955177307, "learning_rate": 6.193901067525911e-06, "loss": 0.3741, "step": 23652 }, { "epoch": 2.4047376982513216, "grad_norm": 0.27857330441474915, "learning_rate": 6.193556444098908e-06, "loss": 0.3229, "step": 23653 }, { "epoch": 2.4048393655957705, "grad_norm": 0.28752249479293823, "learning_rate": 6.193211814659053e-06, "loss": 0.3617, "step": 23654 }, { "epoch": 2.4049410329402194, "grad_norm": 0.25097280740737915, "learning_rate": 6.19286717920808e-06, "loss": 0.3435, "step": 23655 }, { "epoch": 2.4050427002846684, "grad_norm": 0.25497952103614807, "learning_rate": 6.192522537747725e-06, "loss": 0.3103, "step": 23656 }, { "epoch": 2.4051443676291173, "grad_norm": 0.2685897946357727, "learning_rate": 6.192177890279726e-06, "loss": 0.3408, "step": 23657 }, { "epoch": 2.4052460349735663, "grad_norm": 0.25817757844924927, "learning_rate": 6.191833236805818e-06, "loss": 0.3161, "step": 23658 }, { "epoch": 2.4053477023180156, "grad_norm": 0.2943729758262634, "learning_rate": 6.191488577327737e-06, "loss": 0.3226, "step": 23659 }, { "epoch": 2.4054493696624646, "grad_norm": 0.268322616815567, "learning_rate": 6.191143911847218e-06, "loss": 0.3511, "step": 23660 }, { "epoch": 2.4055510370069135, "grad_norm": 0.2735244929790497, "learning_rate": 6.190799240366001e-06, "loss": 0.3298, "step": 23661 }, { "epoch": 2.4056527043513625, "grad_norm": 0.2736687958240509, "learning_rate": 6.190454562885821e-06, "loss": 0.3367, "step": 23662 }, { "epoch": 2.4057543716958114, "grad_norm": 0.2784781754016876, "learning_rate": 6.190109879408412e-06, "loss": 0.3358, "step": 23663 }, { "epoch": 2.4058560390402604, "grad_norm": 0.2846732437610626, "learning_rate": 6.189765189935513e-06, "loss": 0.3206, "step": 23664 }, { "epoch": 2.4059577063847093, "grad_norm": 0.2850157618522644, "learning_rate": 6.189420494468861e-06, "loss": 0.3377, "step": 23665 }, { "epoch": 2.4060593737291582, "grad_norm": 0.27457472681999207, "learning_rate": 6.1890757930101885e-06, "loss": 0.3834, "step": 23666 }, { "epoch": 2.406161041073607, "grad_norm": 0.26009923219680786, "learning_rate": 6.188731085561237e-06, "loss": 0.3173, "step": 23667 }, { "epoch": 2.406262708418056, "grad_norm": 0.26972365379333496, "learning_rate": 6.1883863721237405e-06, "loss": 0.3418, "step": 23668 }, { "epoch": 2.406364375762505, "grad_norm": 0.24598510563373566, "learning_rate": 6.188041652699435e-06, "loss": 0.3667, "step": 23669 }, { "epoch": 2.406466043106954, "grad_norm": 0.29023340344429016, "learning_rate": 6.1876969272900576e-06, "loss": 0.3625, "step": 23670 }, { "epoch": 2.406567710451403, "grad_norm": 0.3046296238899231, "learning_rate": 6.187352195897346e-06, "loss": 0.328, "step": 23671 }, { "epoch": 2.406669377795852, "grad_norm": 0.2728511691093445, "learning_rate": 6.187007458523037e-06, "loss": 0.3301, "step": 23672 }, { "epoch": 2.406771045140301, "grad_norm": 0.2648793160915375, "learning_rate": 6.186662715168865e-06, "loss": 0.3271, "step": 23673 }, { "epoch": 2.4068727124847498, "grad_norm": 0.2615693211555481, "learning_rate": 6.18631796583657e-06, "loss": 0.3315, "step": 23674 }, { "epoch": 2.4069743798291987, "grad_norm": 0.25839686393737793, "learning_rate": 6.185973210527886e-06, "loss": 0.3725, "step": 23675 }, { "epoch": 2.4070760471736476, "grad_norm": 0.25576382875442505, "learning_rate": 6.185628449244549e-06, "loss": 0.3429, "step": 23676 }, { "epoch": 2.407177714518097, "grad_norm": 0.27894341945648193, "learning_rate": 6.185283681988301e-06, "loss": 0.3065, "step": 23677 }, { "epoch": 2.407279381862546, "grad_norm": 0.2548038959503174, "learning_rate": 6.1849389087608736e-06, "loss": 0.3443, "step": 23678 }, { "epoch": 2.407381049206995, "grad_norm": 0.28193438053131104, "learning_rate": 6.1845941295640064e-06, "loss": 0.3361, "step": 23679 }, { "epoch": 2.407482716551444, "grad_norm": 0.26585689187049866, "learning_rate": 6.184249344399435e-06, "loss": 0.3271, "step": 23680 }, { "epoch": 2.407584383895893, "grad_norm": 0.2751803994178772, "learning_rate": 6.183904553268896e-06, "loss": 0.3361, "step": 23681 }, { "epoch": 2.4076860512403417, "grad_norm": 0.26666462421417236, "learning_rate": 6.183559756174129e-06, "loss": 0.3351, "step": 23682 }, { "epoch": 2.4077877185847907, "grad_norm": 0.2825351357460022, "learning_rate": 6.1832149531168675e-06, "loss": 0.3553, "step": 23683 }, { "epoch": 2.4078893859292396, "grad_norm": 0.271678626537323, "learning_rate": 6.182870144098851e-06, "loss": 0.3348, "step": 23684 }, { "epoch": 2.4079910532736886, "grad_norm": 0.2707899510860443, "learning_rate": 6.182525329121816e-06, "loss": 0.3298, "step": 23685 }, { "epoch": 2.4080927206181375, "grad_norm": 0.27632221579551697, "learning_rate": 6.182180508187498e-06, "loss": 0.3496, "step": 23686 }, { "epoch": 2.4081943879625864, "grad_norm": 0.2698594927787781, "learning_rate": 6.181835681297638e-06, "loss": 0.3559, "step": 23687 }, { "epoch": 2.4082960553070354, "grad_norm": 0.2747800350189209, "learning_rate": 6.181490848453968e-06, "loss": 0.3289, "step": 23688 }, { "epoch": 2.4083977226514843, "grad_norm": 0.26803675293922424, "learning_rate": 6.18114600965823e-06, "loss": 0.3377, "step": 23689 }, { "epoch": 2.4084993899959333, "grad_norm": 0.26324301958084106, "learning_rate": 6.180801164912158e-06, "loss": 0.3561, "step": 23690 }, { "epoch": 2.408601057340382, "grad_norm": 0.25580906867980957, "learning_rate": 6.18045631421749e-06, "loss": 0.3284, "step": 23691 }, { "epoch": 2.408702724684831, "grad_norm": 0.2850865125656128, "learning_rate": 6.1801114575759645e-06, "loss": 0.3387, "step": 23692 }, { "epoch": 2.40880439202928, "grad_norm": 0.28311601281166077, "learning_rate": 6.179766594989318e-06, "loss": 0.3418, "step": 23693 }, { "epoch": 2.408906059373729, "grad_norm": 0.27924278378486633, "learning_rate": 6.179421726459288e-06, "loss": 0.3179, "step": 23694 }, { "epoch": 2.409007726718178, "grad_norm": 0.25918832421302795, "learning_rate": 6.17907685198761e-06, "loss": 0.3107, "step": 23695 }, { "epoch": 2.409109394062627, "grad_norm": 0.2726231813430786, "learning_rate": 6.178731971576024e-06, "loss": 0.3013, "step": 23696 }, { "epoch": 2.409211061407076, "grad_norm": 0.28782758116722107, "learning_rate": 6.1783870852262675e-06, "loss": 0.3382, "step": 23697 }, { "epoch": 2.409312728751525, "grad_norm": 0.29770249128341675, "learning_rate": 6.178042192940075e-06, "loss": 0.3375, "step": 23698 }, { "epoch": 2.4094143960959737, "grad_norm": 0.2934822142124176, "learning_rate": 6.177697294719187e-06, "loss": 0.3377, "step": 23699 }, { "epoch": 2.409516063440423, "grad_norm": 0.26743897795677185, "learning_rate": 6.17735239056534e-06, "loss": 0.3046, "step": 23700 }, { "epoch": 2.409617730784872, "grad_norm": 0.3004859387874603, "learning_rate": 6.1770074804802706e-06, "loss": 0.3258, "step": 23701 }, { "epoch": 2.409719398129321, "grad_norm": 0.27912044525146484, "learning_rate": 6.176662564465717e-06, "loss": 0.319, "step": 23702 }, { "epoch": 2.40982106547377, "grad_norm": 0.27375850081443787, "learning_rate": 6.176317642523418e-06, "loss": 0.3519, "step": 23703 }, { "epoch": 2.409922732818219, "grad_norm": 0.2472752183675766, "learning_rate": 6.175972714655111e-06, "loss": 0.3155, "step": 23704 }, { "epoch": 2.410024400162668, "grad_norm": 0.25131484866142273, "learning_rate": 6.175627780862531e-06, "loss": 0.3218, "step": 23705 }, { "epoch": 2.4101260675071168, "grad_norm": 0.3085038363933563, "learning_rate": 6.175282841147418e-06, "loss": 0.3174, "step": 23706 }, { "epoch": 2.4102277348515657, "grad_norm": 0.2642809748649597, "learning_rate": 6.17493789551151e-06, "loss": 0.3499, "step": 23707 }, { "epoch": 2.4103294021960147, "grad_norm": 0.26010775566101074, "learning_rate": 6.174592943956545e-06, "loss": 0.333, "step": 23708 }, { "epoch": 2.4104310695404636, "grad_norm": 0.28774845600128174, "learning_rate": 6.174247986484258e-06, "loss": 0.3552, "step": 23709 }, { "epoch": 2.4105327368849125, "grad_norm": 0.2659805715084076, "learning_rate": 6.173903023096391e-06, "loss": 0.3236, "step": 23710 }, { "epoch": 2.4106344042293615, "grad_norm": 0.2728848457336426, "learning_rate": 6.173558053794676e-06, "loss": 0.2958, "step": 23711 }, { "epoch": 2.4107360715738104, "grad_norm": 0.2618612051010132, "learning_rate": 6.173213078580858e-06, "loss": 0.361, "step": 23712 }, { "epoch": 2.4108377389182594, "grad_norm": 0.2645045518875122, "learning_rate": 6.172868097456668e-06, "loss": 0.3391, "step": 23713 }, { "epoch": 2.4109394062627083, "grad_norm": 0.25171053409576416, "learning_rate": 6.17252311042385e-06, "loss": 0.3203, "step": 23714 }, { "epoch": 2.4110410736071572, "grad_norm": 0.2587028443813324, "learning_rate": 6.172178117484139e-06, "loss": 0.31, "step": 23715 }, { "epoch": 2.411142740951606, "grad_norm": 0.2957070767879486, "learning_rate": 6.17183311863927e-06, "loss": 0.3008, "step": 23716 }, { "epoch": 2.411244408296055, "grad_norm": 0.2538191080093384, "learning_rate": 6.171488113890988e-06, "loss": 0.3606, "step": 23717 }, { "epoch": 2.4113460756405045, "grad_norm": 0.27962568402290344, "learning_rate": 6.171143103241025e-06, "loss": 0.3421, "step": 23718 }, { "epoch": 2.4114477429849535, "grad_norm": 0.27450326085090637, "learning_rate": 6.170798086691123e-06, "loss": 0.3346, "step": 23719 }, { "epoch": 2.4115494103294024, "grad_norm": 0.2700725197792053, "learning_rate": 6.1704530642430184e-06, "loss": 0.3144, "step": 23720 }, { "epoch": 2.4116510776738513, "grad_norm": 0.27752596139907837, "learning_rate": 6.170108035898448e-06, "loss": 0.3342, "step": 23721 }, { "epoch": 2.4117527450183003, "grad_norm": 0.2711454927921295, "learning_rate": 6.1697630016591514e-06, "loss": 0.3513, "step": 23722 }, { "epoch": 2.4118544123627492, "grad_norm": 0.2613702416419983, "learning_rate": 6.169417961526867e-06, "loss": 0.3315, "step": 23723 }, { "epoch": 2.411956079707198, "grad_norm": 0.26222747564315796, "learning_rate": 6.169072915503333e-06, "loss": 0.3247, "step": 23724 }, { "epoch": 2.412057747051647, "grad_norm": 0.28119799494743347, "learning_rate": 6.1687278635902895e-06, "loss": 0.3229, "step": 23725 }, { "epoch": 2.412159414396096, "grad_norm": 0.26630276441574097, "learning_rate": 6.16838280578947e-06, "loss": 0.3341, "step": 23726 }, { "epoch": 2.412261081740545, "grad_norm": 0.27649426460266113, "learning_rate": 6.168037742102616e-06, "loss": 0.2992, "step": 23727 }, { "epoch": 2.412362749084994, "grad_norm": 0.26834189891815186, "learning_rate": 6.1676926725314665e-06, "loss": 0.3401, "step": 23728 }, { "epoch": 2.412464416429443, "grad_norm": 0.2587987184524536, "learning_rate": 6.1673475970777585e-06, "loss": 0.3273, "step": 23729 }, { "epoch": 2.412566083773892, "grad_norm": 0.2680034041404724, "learning_rate": 6.167002515743231e-06, "loss": 0.3782, "step": 23730 }, { "epoch": 2.4126677511183408, "grad_norm": 0.26495519280433655, "learning_rate": 6.166657428529621e-06, "loss": 0.3215, "step": 23731 }, { "epoch": 2.4127694184627897, "grad_norm": 0.26357731223106384, "learning_rate": 6.166312335438669e-06, "loss": 0.3142, "step": 23732 }, { "epoch": 2.4128710858072386, "grad_norm": 0.258246511220932, "learning_rate": 6.1659672364721125e-06, "loss": 0.3466, "step": 23733 }, { "epoch": 2.4129727531516876, "grad_norm": 0.2690887451171875, "learning_rate": 6.16562213163169e-06, "loss": 0.3415, "step": 23734 }, { "epoch": 2.4130744204961365, "grad_norm": 0.2835111916065216, "learning_rate": 6.16527702091914e-06, "loss": 0.3482, "step": 23735 }, { "epoch": 2.4131760878405855, "grad_norm": 0.2826481759548187, "learning_rate": 6.164931904336201e-06, "loss": 0.3373, "step": 23736 }, { "epoch": 2.4132777551850344, "grad_norm": 0.27521640062332153, "learning_rate": 6.164586781884612e-06, "loss": 0.3744, "step": 23737 }, { "epoch": 2.4133794225294833, "grad_norm": 0.2889520227909088, "learning_rate": 6.164241653566112e-06, "loss": 0.335, "step": 23738 }, { "epoch": 2.4134810898739323, "grad_norm": 0.2778811752796173, "learning_rate": 6.16389651938244e-06, "loss": 0.2884, "step": 23739 }, { "epoch": 2.4135827572183812, "grad_norm": 0.27238214015960693, "learning_rate": 6.163551379335333e-06, "loss": 0.3025, "step": 23740 }, { "epoch": 2.4136844245628306, "grad_norm": 0.2942279577255249, "learning_rate": 6.16320623342653e-06, "loss": 0.3294, "step": 23741 }, { "epoch": 2.4137860919072796, "grad_norm": 0.2580350935459137, "learning_rate": 6.162861081657771e-06, "loss": 0.3386, "step": 23742 }, { "epoch": 2.4138877592517285, "grad_norm": 0.2746708393096924, "learning_rate": 6.162515924030794e-06, "loss": 0.3669, "step": 23743 }, { "epoch": 2.4139894265961774, "grad_norm": 0.2559562921524048, "learning_rate": 6.162170760547339e-06, "loss": 0.3289, "step": 23744 }, { "epoch": 2.4140910939406264, "grad_norm": 0.2803341746330261, "learning_rate": 6.161825591209143e-06, "loss": 0.3351, "step": 23745 }, { "epoch": 2.4141927612850753, "grad_norm": 0.280443012714386, "learning_rate": 6.161480416017946e-06, "loss": 0.3397, "step": 23746 }, { "epoch": 2.4142944286295243, "grad_norm": 0.27097785472869873, "learning_rate": 6.161135234975486e-06, "loss": 0.3378, "step": 23747 }, { "epoch": 2.414396095973973, "grad_norm": 0.269325315952301, "learning_rate": 6.160790048083504e-06, "loss": 0.3366, "step": 23748 }, { "epoch": 2.414497763318422, "grad_norm": 0.2682051658630371, "learning_rate": 6.160444855343737e-06, "loss": 0.3195, "step": 23749 }, { "epoch": 2.414599430662871, "grad_norm": 0.2723546028137207, "learning_rate": 6.160099656757924e-06, "loss": 0.3421, "step": 23750 }, { "epoch": 2.41470109800732, "grad_norm": 0.2514614462852478, "learning_rate": 6.159754452327805e-06, "loss": 0.3311, "step": 23751 }, { "epoch": 2.414802765351769, "grad_norm": 0.28529366850852966, "learning_rate": 6.159409242055119e-06, "loss": 0.3211, "step": 23752 }, { "epoch": 2.414904432696218, "grad_norm": 0.2626726031303406, "learning_rate": 6.1590640259416045e-06, "loss": 0.3347, "step": 23753 }, { "epoch": 2.415006100040667, "grad_norm": 0.28050047159194946, "learning_rate": 6.158718803989e-06, "loss": 0.3341, "step": 23754 }, { "epoch": 2.415107767385116, "grad_norm": 0.26847025752067566, "learning_rate": 6.158373576199047e-06, "loss": 0.3264, "step": 23755 }, { "epoch": 2.4152094347295647, "grad_norm": 0.26761820912361145, "learning_rate": 6.158028342573482e-06, "loss": 0.3418, "step": 23756 }, { "epoch": 2.4153111020740137, "grad_norm": 0.2764120399951935, "learning_rate": 6.157683103114046e-06, "loss": 0.2962, "step": 23757 }, { "epoch": 2.4154127694184626, "grad_norm": 0.26257434487342834, "learning_rate": 6.157337857822478e-06, "loss": 0.3296, "step": 23758 }, { "epoch": 2.415514436762912, "grad_norm": 0.2925635874271393, "learning_rate": 6.156992606700516e-06, "loss": 0.3323, "step": 23759 }, { "epoch": 2.415616104107361, "grad_norm": 0.29667529463768005, "learning_rate": 6.156647349749901e-06, "loss": 0.3638, "step": 23760 }, { "epoch": 2.41571777145181, "grad_norm": 0.25533488392829895, "learning_rate": 6.156302086972371e-06, "loss": 0.344, "step": 23761 }, { "epoch": 2.415819438796259, "grad_norm": 0.2666844129562378, "learning_rate": 6.155956818369666e-06, "loss": 0.3233, "step": 23762 }, { "epoch": 2.4159211061407078, "grad_norm": 0.25652098655700684, "learning_rate": 6.155611543943526e-06, "loss": 0.3421, "step": 23763 }, { "epoch": 2.4160227734851567, "grad_norm": 0.2760632634162903, "learning_rate": 6.155266263695689e-06, "loss": 0.3307, "step": 23764 }, { "epoch": 2.4161244408296056, "grad_norm": 0.276893675327301, "learning_rate": 6.154920977627895e-06, "loss": 0.381, "step": 23765 }, { "epoch": 2.4162261081740546, "grad_norm": 0.30053895711898804, "learning_rate": 6.154575685741884e-06, "loss": 0.3247, "step": 23766 }, { "epoch": 2.4163277755185035, "grad_norm": 0.2679879069328308, "learning_rate": 6.154230388039394e-06, "loss": 0.3253, "step": 23767 }, { "epoch": 2.4164294428629525, "grad_norm": 0.27347561717033386, "learning_rate": 6.153885084522166e-06, "loss": 0.3291, "step": 23768 }, { "epoch": 2.4165311102074014, "grad_norm": 0.2856716513633728, "learning_rate": 6.153539775191939e-06, "loss": 0.3751, "step": 23769 }, { "epoch": 2.4166327775518504, "grad_norm": 0.28822803497314453, "learning_rate": 6.153194460050455e-06, "loss": 0.298, "step": 23770 }, { "epoch": 2.4167344448962993, "grad_norm": 0.27889078855514526, "learning_rate": 6.152849139099448e-06, "loss": 0.3217, "step": 23771 }, { "epoch": 2.4168361122407482, "grad_norm": 0.28109583258628845, "learning_rate": 6.152503812340662e-06, "loss": 0.3506, "step": 23772 }, { "epoch": 2.416937779585197, "grad_norm": 0.2661608159542084, "learning_rate": 6.152158479775837e-06, "loss": 0.3644, "step": 23773 }, { "epoch": 2.417039446929646, "grad_norm": 0.2882254421710968, "learning_rate": 6.151813141406711e-06, "loss": 0.3447, "step": 23774 }, { "epoch": 2.417141114274095, "grad_norm": 0.2738116979598999, "learning_rate": 6.151467797235023e-06, "loss": 0.3817, "step": 23775 }, { "epoch": 2.417242781618544, "grad_norm": 0.275785893201828, "learning_rate": 6.151122447262514e-06, "loss": 0.3415, "step": 23776 }, { "epoch": 2.417344448962993, "grad_norm": 0.26360562443733215, "learning_rate": 6.150777091490925e-06, "loss": 0.3368, "step": 23777 }, { "epoch": 2.417446116307442, "grad_norm": 0.2742350399494171, "learning_rate": 6.150431729921994e-06, "loss": 0.3335, "step": 23778 }, { "epoch": 2.417547783651891, "grad_norm": 0.2739521563053131, "learning_rate": 6.15008636255746e-06, "loss": 0.3396, "step": 23779 }, { "epoch": 2.4176494509963398, "grad_norm": 0.2747226357460022, "learning_rate": 6.149740989399065e-06, "loss": 0.343, "step": 23780 }, { "epoch": 2.4177511183407887, "grad_norm": 0.2529791295528412, "learning_rate": 6.149395610448549e-06, "loss": 0.3118, "step": 23781 }, { "epoch": 2.417852785685238, "grad_norm": 0.28036242723464966, "learning_rate": 6.14905022570765e-06, "loss": 0.3041, "step": 23782 }, { "epoch": 2.417954453029687, "grad_norm": 0.26984184980392456, "learning_rate": 6.14870483517811e-06, "loss": 0.3265, "step": 23783 }, { "epoch": 2.418056120374136, "grad_norm": 0.2785734534263611, "learning_rate": 6.148359438861666e-06, "loss": 0.3519, "step": 23784 }, { "epoch": 2.418157787718585, "grad_norm": 0.28649285435676575, "learning_rate": 6.148014036760063e-06, "loss": 0.3271, "step": 23785 }, { "epoch": 2.418259455063034, "grad_norm": 0.271174818277359, "learning_rate": 6.147668628875037e-06, "loss": 0.3493, "step": 23786 }, { "epoch": 2.418361122407483, "grad_norm": 0.26520898938179016, "learning_rate": 6.147323215208327e-06, "loss": 0.3084, "step": 23787 }, { "epoch": 2.4184627897519317, "grad_norm": 0.26564139127731323, "learning_rate": 6.146977795761678e-06, "loss": 0.3449, "step": 23788 }, { "epoch": 2.4185644570963807, "grad_norm": 0.29050734639167786, "learning_rate": 6.146632370536826e-06, "loss": 0.3545, "step": 23789 }, { "epoch": 2.4186661244408296, "grad_norm": 0.2438497543334961, "learning_rate": 6.146286939535514e-06, "loss": 0.3263, "step": 23790 }, { "epoch": 2.4187677917852786, "grad_norm": 0.28255581855773926, "learning_rate": 6.14594150275948e-06, "loss": 0.3404, "step": 23791 }, { "epoch": 2.4188694591297275, "grad_norm": 0.2652973234653473, "learning_rate": 6.145596060210464e-06, "loss": 0.3505, "step": 23792 }, { "epoch": 2.4189711264741764, "grad_norm": 0.25794556736946106, "learning_rate": 6.145250611890208e-06, "loss": 0.3372, "step": 23793 }, { "epoch": 2.4190727938186254, "grad_norm": 0.2761172354221344, "learning_rate": 6.144905157800451e-06, "loss": 0.3229, "step": 23794 }, { "epoch": 2.4191744611630743, "grad_norm": 0.2654196321964264, "learning_rate": 6.144559697942936e-06, "loss": 0.352, "step": 23795 }, { "epoch": 2.4192761285075233, "grad_norm": 0.2661899924278259, "learning_rate": 6.1442142323193995e-06, "loss": 0.3143, "step": 23796 }, { "epoch": 2.419377795851972, "grad_norm": 0.29617688059806824, "learning_rate": 6.143868760931583e-06, "loss": 0.3545, "step": 23797 }, { "epoch": 2.419479463196421, "grad_norm": 0.27369701862335205, "learning_rate": 6.14352328378123e-06, "loss": 0.3112, "step": 23798 }, { "epoch": 2.41958113054087, "grad_norm": 0.28493165969848633, "learning_rate": 6.143177800870076e-06, "loss": 0.3239, "step": 23799 }, { "epoch": 2.4196827978853195, "grad_norm": 0.279645174741745, "learning_rate": 6.142832312199864e-06, "loss": 0.32, "step": 23800 }, { "epoch": 2.4197844652297684, "grad_norm": 0.2843448221683502, "learning_rate": 6.142486817772337e-06, "loss": 0.3792, "step": 23801 }, { "epoch": 2.4198861325742174, "grad_norm": 0.28321850299835205, "learning_rate": 6.14214131758923e-06, "loss": 0.2889, "step": 23802 }, { "epoch": 2.4199877999186663, "grad_norm": 0.2719200551509857, "learning_rate": 6.141795811652288e-06, "loss": 0.3472, "step": 23803 }, { "epoch": 2.4200894672631152, "grad_norm": 0.2790778577327728, "learning_rate": 6.14145029996325e-06, "loss": 0.3839, "step": 23804 }, { "epoch": 2.420191134607564, "grad_norm": 0.25343257188796997, "learning_rate": 6.141104782523855e-06, "loss": 0.3398, "step": 23805 }, { "epoch": 2.420292801952013, "grad_norm": 0.26576998829841614, "learning_rate": 6.140759259335848e-06, "loss": 0.3219, "step": 23806 }, { "epoch": 2.420394469296462, "grad_norm": 0.29320186376571655, "learning_rate": 6.1404137304009635e-06, "loss": 0.3676, "step": 23807 }, { "epoch": 2.420496136640911, "grad_norm": 0.27743586897850037, "learning_rate": 6.1400681957209485e-06, "loss": 0.3077, "step": 23808 }, { "epoch": 2.42059780398536, "grad_norm": 0.25652986764907837, "learning_rate": 6.139722655297539e-06, "loss": 0.3297, "step": 23809 }, { "epoch": 2.420699471329809, "grad_norm": 0.2751476764678955, "learning_rate": 6.139377109132479e-06, "loss": 0.299, "step": 23810 }, { "epoch": 2.420801138674258, "grad_norm": 0.29666024446487427, "learning_rate": 6.139031557227508e-06, "loss": 0.3171, "step": 23811 }, { "epoch": 2.4209028060187068, "grad_norm": 0.26971083879470825, "learning_rate": 6.138685999584365e-06, "loss": 0.3171, "step": 23812 }, { "epoch": 2.4210044733631557, "grad_norm": 0.2472836822271347, "learning_rate": 6.138340436204794e-06, "loss": 0.363, "step": 23813 }, { "epoch": 2.4211061407076047, "grad_norm": 0.27129271626472473, "learning_rate": 6.137994867090534e-06, "loss": 0.3268, "step": 23814 }, { "epoch": 2.4212078080520536, "grad_norm": 0.257819265127182, "learning_rate": 6.137649292243326e-06, "loss": 0.3619, "step": 23815 }, { "epoch": 2.4213094753965025, "grad_norm": 0.26257994771003723, "learning_rate": 6.1373037116649125e-06, "loss": 0.3476, "step": 23816 }, { "epoch": 2.4214111427409515, "grad_norm": 0.2667388916015625, "learning_rate": 6.136958125357032e-06, "loss": 0.322, "step": 23817 }, { "epoch": 2.4215128100854004, "grad_norm": 0.2459363043308258, "learning_rate": 6.1366125333214264e-06, "loss": 0.3548, "step": 23818 }, { "epoch": 2.4216144774298494, "grad_norm": 0.2860381305217743, "learning_rate": 6.136266935559838e-06, "loss": 0.336, "step": 23819 }, { "epoch": 2.4217161447742983, "grad_norm": 0.2812401056289673, "learning_rate": 6.135921332074005e-06, "loss": 0.3503, "step": 23820 }, { "epoch": 2.4218178121187472, "grad_norm": 0.27005067467689514, "learning_rate": 6.135575722865673e-06, "loss": 0.3158, "step": 23821 }, { "epoch": 2.421919479463196, "grad_norm": 0.2684169411659241, "learning_rate": 6.135230107936578e-06, "loss": 0.3311, "step": 23822 }, { "epoch": 2.4220211468076456, "grad_norm": 0.25945845246315, "learning_rate": 6.1348844872884635e-06, "loss": 0.355, "step": 23823 }, { "epoch": 2.4221228141520945, "grad_norm": 0.2660190463066101, "learning_rate": 6.134538860923073e-06, "loss": 0.3406, "step": 23824 }, { "epoch": 2.4222244814965435, "grad_norm": 0.2554217576980591, "learning_rate": 6.134193228842142e-06, "loss": 0.3551, "step": 23825 }, { "epoch": 2.4223261488409924, "grad_norm": 0.27869054675102234, "learning_rate": 6.133847591047419e-06, "loss": 0.3383, "step": 23826 }, { "epoch": 2.4224278161854413, "grad_norm": 0.27041512727737427, "learning_rate": 6.133501947540638e-06, "loss": 0.3345, "step": 23827 }, { "epoch": 2.4225294835298903, "grad_norm": 0.26752492785453796, "learning_rate": 6.133156298323543e-06, "loss": 0.371, "step": 23828 }, { "epoch": 2.4226311508743392, "grad_norm": 0.2651067078113556, "learning_rate": 6.13281064339788e-06, "loss": 0.3027, "step": 23829 }, { "epoch": 2.422732818218788, "grad_norm": 0.25986719131469727, "learning_rate": 6.132464982765383e-06, "loss": 0.3249, "step": 23830 }, { "epoch": 2.422834485563237, "grad_norm": 0.261761873960495, "learning_rate": 6.132119316427797e-06, "loss": 0.3006, "step": 23831 }, { "epoch": 2.422936152907686, "grad_norm": 0.26665934920310974, "learning_rate": 6.131773644386863e-06, "loss": 0.3383, "step": 23832 }, { "epoch": 2.423037820252135, "grad_norm": 0.2812231779098511, "learning_rate": 6.131427966644322e-06, "loss": 0.3141, "step": 23833 }, { "epoch": 2.423139487596584, "grad_norm": 0.256893128156662, "learning_rate": 6.131082283201916e-06, "loss": 0.3613, "step": 23834 }, { "epoch": 2.423241154941033, "grad_norm": 0.2529139816761017, "learning_rate": 6.130736594061386e-06, "loss": 0.3304, "step": 23835 }, { "epoch": 2.423342822285482, "grad_norm": 0.28149470686912537, "learning_rate": 6.1303908992244755e-06, "loss": 0.348, "step": 23836 }, { "epoch": 2.4234444896299308, "grad_norm": 0.24773240089416504, "learning_rate": 6.1300451986929225e-06, "loss": 0.3434, "step": 23837 }, { "epoch": 2.4235461569743797, "grad_norm": 0.2570193409919739, "learning_rate": 6.1296994924684704e-06, "loss": 0.3445, "step": 23838 }, { "epoch": 2.4236478243188286, "grad_norm": 0.27642688155174255, "learning_rate": 6.1293537805528616e-06, "loss": 0.3467, "step": 23839 }, { "epoch": 2.4237494916632776, "grad_norm": 0.2639712989330292, "learning_rate": 6.129008062947836e-06, "loss": 0.3415, "step": 23840 }, { "epoch": 2.423851159007727, "grad_norm": 0.2704659104347229, "learning_rate": 6.128662339655137e-06, "loss": 0.3526, "step": 23841 }, { "epoch": 2.423952826352176, "grad_norm": 0.2660583555698395, "learning_rate": 6.128316610676505e-06, "loss": 0.326, "step": 23842 }, { "epoch": 2.424054493696625, "grad_norm": 0.26584142446517944, "learning_rate": 6.12797087601368e-06, "loss": 0.3816, "step": 23843 }, { "epoch": 2.424156161041074, "grad_norm": 0.27376648783683777, "learning_rate": 6.127625135668409e-06, "loss": 0.3285, "step": 23844 }, { "epoch": 2.4242578283855227, "grad_norm": 0.26499831676483154, "learning_rate": 6.127279389642429e-06, "loss": 0.3532, "step": 23845 }, { "epoch": 2.4243594957299717, "grad_norm": 0.27418363094329834, "learning_rate": 6.126933637937483e-06, "loss": 0.3455, "step": 23846 }, { "epoch": 2.4244611630744206, "grad_norm": 0.2550539970397949, "learning_rate": 6.126587880555314e-06, "loss": 0.3574, "step": 23847 }, { "epoch": 2.4245628304188696, "grad_norm": 0.444914847612381, "learning_rate": 6.126242117497662e-06, "loss": 0.3119, "step": 23848 }, { "epoch": 2.4246644977633185, "grad_norm": 0.29156026244163513, "learning_rate": 6.12589634876627e-06, "loss": 0.3126, "step": 23849 }, { "epoch": 2.4247661651077674, "grad_norm": 0.2717324495315552, "learning_rate": 6.125550574362881e-06, "loss": 0.2944, "step": 23850 }, { "epoch": 2.4248678324522164, "grad_norm": 0.25185397267341614, "learning_rate": 6.125204794289234e-06, "loss": 0.3456, "step": 23851 }, { "epoch": 2.4249694997966653, "grad_norm": 0.2750806212425232, "learning_rate": 6.124859008547074e-06, "loss": 0.3483, "step": 23852 }, { "epoch": 2.4250711671411143, "grad_norm": 0.2868618667125702, "learning_rate": 6.1245132171381404e-06, "loss": 0.335, "step": 23853 }, { "epoch": 2.425172834485563, "grad_norm": 0.2772233188152313, "learning_rate": 6.124167420064178e-06, "loss": 0.3462, "step": 23854 }, { "epoch": 2.425274501830012, "grad_norm": 0.28354302048683167, "learning_rate": 6.1238216173269264e-06, "loss": 0.3428, "step": 23855 }, { "epoch": 2.425376169174461, "grad_norm": 0.2416471391916275, "learning_rate": 6.123475808928127e-06, "loss": 0.3257, "step": 23856 }, { "epoch": 2.42547783651891, "grad_norm": 0.2604152262210846, "learning_rate": 6.123129994869526e-06, "loss": 0.3647, "step": 23857 }, { "epoch": 2.425579503863359, "grad_norm": 0.26778584718704224, "learning_rate": 6.1227841751528614e-06, "loss": 0.3152, "step": 23858 }, { "epoch": 2.425681171207808, "grad_norm": 0.285994291305542, "learning_rate": 6.122438349779878e-06, "loss": 0.3101, "step": 23859 }, { "epoch": 2.425782838552257, "grad_norm": 0.2585143446922302, "learning_rate": 6.1220925187523164e-06, "loss": 0.3984, "step": 23860 }, { "epoch": 2.425884505896706, "grad_norm": 0.2672045826911926, "learning_rate": 6.12174668207192e-06, "loss": 0.3429, "step": 23861 }, { "epoch": 2.4259861732411547, "grad_norm": 0.2730999290943146, "learning_rate": 6.12140083974043e-06, "loss": 0.3455, "step": 23862 }, { "epoch": 2.4260878405856037, "grad_norm": 0.25846052169799805, "learning_rate": 6.121054991759587e-06, "loss": 0.3371, "step": 23863 }, { "epoch": 2.426189507930053, "grad_norm": 0.27436718344688416, "learning_rate": 6.120709138131138e-06, "loss": 0.3959, "step": 23864 }, { "epoch": 2.426291175274502, "grad_norm": 0.260897159576416, "learning_rate": 6.120363278856821e-06, "loss": 0.3308, "step": 23865 }, { "epoch": 2.426392842618951, "grad_norm": 0.2691776752471924, "learning_rate": 6.1200174139383814e-06, "loss": 0.349, "step": 23866 }, { "epoch": 2.4264945099634, "grad_norm": 0.2739652097225189, "learning_rate": 6.119671543377561e-06, "loss": 0.3453, "step": 23867 }, { "epoch": 2.426596177307849, "grad_norm": 0.29858148097991943, "learning_rate": 6.119325667176099e-06, "loss": 0.3387, "step": 23868 }, { "epoch": 2.4266978446522978, "grad_norm": 0.26777884364128113, "learning_rate": 6.118979785335741e-06, "loss": 0.3363, "step": 23869 }, { "epoch": 2.4267995119967467, "grad_norm": 0.24969349801540375, "learning_rate": 6.11863389785823e-06, "loss": 0.3175, "step": 23870 }, { "epoch": 2.4269011793411956, "grad_norm": 0.29098963737487793, "learning_rate": 6.1182880047453064e-06, "loss": 0.3067, "step": 23871 }, { "epoch": 2.4270028466856446, "grad_norm": 0.2881939113140106, "learning_rate": 6.117942105998715e-06, "loss": 0.353, "step": 23872 }, { "epoch": 2.4271045140300935, "grad_norm": 0.2813962399959564, "learning_rate": 6.117596201620193e-06, "loss": 0.3247, "step": 23873 }, { "epoch": 2.4272061813745425, "grad_norm": 0.26217973232269287, "learning_rate": 6.117250291611491e-06, "loss": 0.3401, "step": 23874 }, { "epoch": 2.4273078487189914, "grad_norm": 0.25705718994140625, "learning_rate": 6.1169043759743455e-06, "loss": 0.3404, "step": 23875 }, { "epoch": 2.4274095160634404, "grad_norm": 0.2800261676311493, "learning_rate": 6.116558454710502e-06, "loss": 0.3154, "step": 23876 }, { "epoch": 2.4275111834078893, "grad_norm": 0.2637268006801605, "learning_rate": 6.116212527821702e-06, "loss": 0.3305, "step": 23877 }, { "epoch": 2.4276128507523382, "grad_norm": 0.274173766374588, "learning_rate": 6.115866595309687e-06, "loss": 0.3156, "step": 23878 }, { "epoch": 2.427714518096787, "grad_norm": 0.2555799186229706, "learning_rate": 6.115520657176204e-06, "loss": 0.3242, "step": 23879 }, { "epoch": 2.427816185441236, "grad_norm": 0.27393731474876404, "learning_rate": 6.1151747134229926e-06, "loss": 0.3539, "step": 23880 }, { "epoch": 2.427917852785685, "grad_norm": 0.28028684854507446, "learning_rate": 6.114828764051794e-06, "loss": 0.3471, "step": 23881 }, { "epoch": 2.4280195201301344, "grad_norm": 0.27919650077819824, "learning_rate": 6.1144828090643545e-06, "loss": 0.3194, "step": 23882 }, { "epoch": 2.4281211874745834, "grad_norm": 0.2717529833316803, "learning_rate": 6.114136848462414e-06, "loss": 0.3638, "step": 23883 }, { "epoch": 2.4282228548190323, "grad_norm": 0.2671924829483032, "learning_rate": 6.1137908822477186e-06, "loss": 0.3449, "step": 23884 }, { "epoch": 2.4283245221634813, "grad_norm": 0.27788597345352173, "learning_rate": 6.113444910422011e-06, "loss": 0.3251, "step": 23885 }, { "epoch": 2.42842618950793, "grad_norm": 0.2640502452850342, "learning_rate": 6.11309893298703e-06, "loss": 0.3169, "step": 23886 }, { "epoch": 2.428527856852379, "grad_norm": 0.26153963804244995, "learning_rate": 6.112752949944522e-06, "loss": 0.3511, "step": 23887 }, { "epoch": 2.428629524196828, "grad_norm": 0.26647743582725525, "learning_rate": 6.1124069612962255e-06, "loss": 0.3257, "step": 23888 }, { "epoch": 2.428731191541277, "grad_norm": 0.26107990741729736, "learning_rate": 6.112060967043891e-06, "loss": 0.3238, "step": 23889 }, { "epoch": 2.428832858885726, "grad_norm": 0.269731730222702, "learning_rate": 6.111714967189257e-06, "loss": 0.3617, "step": 23890 }, { "epoch": 2.428934526230175, "grad_norm": 0.2549258768558502, "learning_rate": 6.111368961734067e-06, "loss": 0.3406, "step": 23891 }, { "epoch": 2.429036193574624, "grad_norm": 0.273451566696167, "learning_rate": 6.111022950680064e-06, "loss": 0.3322, "step": 23892 }, { "epoch": 2.429137860919073, "grad_norm": 0.2495305836200714, "learning_rate": 6.110676934028991e-06, "loss": 0.3543, "step": 23893 }, { "epoch": 2.4292395282635217, "grad_norm": 0.2809532582759857, "learning_rate": 6.110330911782593e-06, "loss": 0.3328, "step": 23894 }, { "epoch": 2.4293411956079707, "grad_norm": 0.29532551765441895, "learning_rate": 6.1099848839426116e-06, "loss": 0.3518, "step": 23895 }, { "epoch": 2.4294428629524196, "grad_norm": 0.2616422474384308, "learning_rate": 6.109638850510789e-06, "loss": 0.2979, "step": 23896 }, { "epoch": 2.4295445302968686, "grad_norm": 0.25594034790992737, "learning_rate": 6.10929281148887e-06, "loss": 0.3701, "step": 23897 }, { "epoch": 2.4296461976413175, "grad_norm": 0.2632296681404114, "learning_rate": 6.1089467668785965e-06, "loss": 0.3463, "step": 23898 }, { "epoch": 2.4297478649857664, "grad_norm": 0.27077311277389526, "learning_rate": 6.108600716681713e-06, "loss": 0.3214, "step": 23899 }, { "epoch": 2.4298495323302154, "grad_norm": 1.1211777925491333, "learning_rate": 6.108254660899964e-06, "loss": 0.3305, "step": 23900 }, { "epoch": 2.4299511996746643, "grad_norm": 0.2562989592552185, "learning_rate": 6.107908599535091e-06, "loss": 0.3273, "step": 23901 }, { "epoch": 2.4300528670191133, "grad_norm": 0.2595568299293518, "learning_rate": 6.107562532588837e-06, "loss": 0.3225, "step": 23902 }, { "epoch": 2.430154534363562, "grad_norm": 0.2746380865573883, "learning_rate": 6.107216460062946e-06, "loss": 0.2944, "step": 23903 }, { "epoch": 2.430256201708011, "grad_norm": 0.29354700446128845, "learning_rate": 6.1068703819591614e-06, "loss": 0.3321, "step": 23904 }, { "epoch": 2.4303578690524605, "grad_norm": 0.2699172794818878, "learning_rate": 6.106524298279228e-06, "loss": 0.3419, "step": 23905 }, { "epoch": 2.4304595363969095, "grad_norm": 0.2777971923351288, "learning_rate": 6.106178209024887e-06, "loss": 0.3692, "step": 23906 }, { "epoch": 2.4305612037413584, "grad_norm": 0.28953343629837036, "learning_rate": 6.105832114197885e-06, "loss": 0.3391, "step": 23907 }, { "epoch": 2.4306628710858074, "grad_norm": 0.26913437247276306, "learning_rate": 6.105486013799962e-06, "loss": 0.3276, "step": 23908 }, { "epoch": 2.4307645384302563, "grad_norm": 0.2926061451435089, "learning_rate": 6.105139907832862e-06, "loss": 0.3421, "step": 23909 }, { "epoch": 2.4308662057747052, "grad_norm": 0.2721569538116455, "learning_rate": 6.104793796298331e-06, "loss": 0.3166, "step": 23910 }, { "epoch": 2.430967873119154, "grad_norm": 0.27725711464881897, "learning_rate": 6.104447679198112e-06, "loss": 0.3349, "step": 23911 }, { "epoch": 2.431069540463603, "grad_norm": 0.26110216975212097, "learning_rate": 6.104101556533948e-06, "loss": 0.3472, "step": 23912 }, { "epoch": 2.431171207808052, "grad_norm": 0.26981741189956665, "learning_rate": 6.103755428307582e-06, "loss": 0.3545, "step": 23913 }, { "epoch": 2.431272875152501, "grad_norm": 0.26297527551651, "learning_rate": 6.103409294520758e-06, "loss": 0.2824, "step": 23914 }, { "epoch": 2.43137454249695, "grad_norm": 0.2705947160720825, "learning_rate": 6.103063155175222e-06, "loss": 0.3608, "step": 23915 }, { "epoch": 2.431476209841399, "grad_norm": 0.2643454670906067, "learning_rate": 6.102717010272714e-06, "loss": 0.297, "step": 23916 }, { "epoch": 2.431577877185848, "grad_norm": 0.2582874596118927, "learning_rate": 6.102370859814981e-06, "loss": 0.3136, "step": 23917 }, { "epoch": 2.4316795445302968, "grad_norm": 0.2719062566757202, "learning_rate": 6.102024703803765e-06, "loss": 0.332, "step": 23918 }, { "epoch": 2.4317812118747457, "grad_norm": 0.25776928663253784, "learning_rate": 6.10167854224081e-06, "loss": 0.3883, "step": 23919 }, { "epoch": 2.4318828792191947, "grad_norm": 0.26062002778053284, "learning_rate": 6.101332375127862e-06, "loss": 0.3242, "step": 23920 }, { "epoch": 2.4319845465636436, "grad_norm": 0.2700953483581543, "learning_rate": 6.100986202466661e-06, "loss": 0.3492, "step": 23921 }, { "epoch": 2.4320862139080925, "grad_norm": 0.2701702415943146, "learning_rate": 6.100640024258956e-06, "loss": 0.3427, "step": 23922 }, { "epoch": 2.432187881252542, "grad_norm": 0.26991957426071167, "learning_rate": 6.100293840506486e-06, "loss": 0.3313, "step": 23923 }, { "epoch": 2.432289548596991, "grad_norm": 0.29148009419441223, "learning_rate": 6.099947651210997e-06, "loss": 0.3268, "step": 23924 }, { "epoch": 2.43239121594144, "grad_norm": 0.27979016304016113, "learning_rate": 6.099601456374234e-06, "loss": 0.3317, "step": 23925 }, { "epoch": 2.4324928832858888, "grad_norm": 0.25450339913368225, "learning_rate": 6.099255255997939e-06, "loss": 0.3198, "step": 23926 }, { "epoch": 2.4325945506303377, "grad_norm": 0.2588402330875397, "learning_rate": 6.0989090500838586e-06, "loss": 0.2891, "step": 23927 }, { "epoch": 2.4326962179747866, "grad_norm": 0.277444452047348, "learning_rate": 6.098562838633734e-06, "loss": 0.3294, "step": 23928 }, { "epoch": 2.4327978853192356, "grad_norm": 0.2762186825275421, "learning_rate": 6.098216621649312e-06, "loss": 0.3523, "step": 23929 }, { "epoch": 2.4328995526636845, "grad_norm": 0.28909358382225037, "learning_rate": 6.097870399132337e-06, "loss": 0.3489, "step": 23930 }, { "epoch": 2.4330012200081335, "grad_norm": 0.27944034337997437, "learning_rate": 6.097524171084549e-06, "loss": 0.3169, "step": 23931 }, { "epoch": 2.4331028873525824, "grad_norm": 0.26852184534072876, "learning_rate": 6.097177937507697e-06, "loss": 0.3428, "step": 23932 }, { "epoch": 2.4332045546970313, "grad_norm": 0.28001564741134644, "learning_rate": 6.0968316984035225e-06, "loss": 0.3505, "step": 23933 }, { "epoch": 2.4333062220414803, "grad_norm": 0.2682499289512634, "learning_rate": 6.096485453773769e-06, "loss": 0.3323, "step": 23934 }, { "epoch": 2.4334078893859292, "grad_norm": 0.27898314595222473, "learning_rate": 6.0961392036201865e-06, "loss": 0.3313, "step": 23935 }, { "epoch": 2.433509556730378, "grad_norm": 0.2781897485256195, "learning_rate": 6.095792947944511e-06, "loss": 0.3358, "step": 23936 }, { "epoch": 2.433611224074827, "grad_norm": 0.25852081179618835, "learning_rate": 6.095446686748494e-06, "loss": 0.293, "step": 23937 }, { "epoch": 2.433712891419276, "grad_norm": 0.2877980172634125, "learning_rate": 6.095100420033876e-06, "loss": 0.3721, "step": 23938 }, { "epoch": 2.433814558763725, "grad_norm": 0.27831873297691345, "learning_rate": 6.0947541478024005e-06, "loss": 0.3025, "step": 23939 }, { "epoch": 2.433916226108174, "grad_norm": 0.2837575078010559, "learning_rate": 6.0944078700558165e-06, "loss": 0.3159, "step": 23940 }, { "epoch": 2.434017893452623, "grad_norm": 0.25599318742752075, "learning_rate": 6.094061586795863e-06, "loss": 0.333, "step": 23941 }, { "epoch": 2.434119560797072, "grad_norm": 0.26962536573410034, "learning_rate": 6.0937152980242895e-06, "loss": 0.3304, "step": 23942 }, { "epoch": 2.4342212281415208, "grad_norm": 0.27998730540275574, "learning_rate": 6.093369003742838e-06, "loss": 0.3671, "step": 23943 }, { "epoch": 2.4343228954859697, "grad_norm": 0.25735482573509216, "learning_rate": 6.093022703953251e-06, "loss": 0.3343, "step": 23944 }, { "epoch": 2.4344245628304186, "grad_norm": 0.26266705989837646, "learning_rate": 6.0926763986572775e-06, "loss": 0.3188, "step": 23945 }, { "epoch": 2.434526230174868, "grad_norm": 0.27849850058555603, "learning_rate": 6.092330087856659e-06, "loss": 0.3218, "step": 23946 }, { "epoch": 2.434627897519317, "grad_norm": 0.287447988986969, "learning_rate": 6.091983771553143e-06, "loss": 0.3366, "step": 23947 }, { "epoch": 2.434729564863766, "grad_norm": 0.2725074887275696, "learning_rate": 6.0916374497484705e-06, "loss": 0.3059, "step": 23948 }, { "epoch": 2.434831232208215, "grad_norm": 0.25403910875320435, "learning_rate": 6.091291122444387e-06, "loss": 0.3108, "step": 23949 }, { "epoch": 2.434932899552664, "grad_norm": 0.2790333330631256, "learning_rate": 6.090944789642641e-06, "loss": 0.3569, "step": 23950 }, { "epoch": 2.4350345668971127, "grad_norm": 0.26859092712402344, "learning_rate": 6.090598451344972e-06, "loss": 0.3476, "step": 23951 }, { "epoch": 2.4351362342415617, "grad_norm": 0.25114360451698303, "learning_rate": 6.0902521075531286e-06, "loss": 0.3294, "step": 23952 }, { "epoch": 2.4352379015860106, "grad_norm": 0.27201688289642334, "learning_rate": 6.089905758268854e-06, "loss": 0.3201, "step": 23953 }, { "epoch": 2.4353395689304596, "grad_norm": 0.26043012738227844, "learning_rate": 6.089559403493891e-06, "loss": 0.2981, "step": 23954 }, { "epoch": 2.4354412362749085, "grad_norm": 0.29358944296836853, "learning_rate": 6.089213043229989e-06, "loss": 0.3134, "step": 23955 }, { "epoch": 2.4355429036193574, "grad_norm": 0.24784241616725922, "learning_rate": 6.088866677478891e-06, "loss": 0.3144, "step": 23956 }, { "epoch": 2.4356445709638064, "grad_norm": 0.2574087083339691, "learning_rate": 6.08852030624234e-06, "loss": 0.3416, "step": 23957 }, { "epoch": 2.4357462383082553, "grad_norm": 0.2829851806163788, "learning_rate": 6.088173929522083e-06, "loss": 0.3217, "step": 23958 }, { "epoch": 2.4358479056527043, "grad_norm": 0.25890353322029114, "learning_rate": 6.087827547319862e-06, "loss": 0.3107, "step": 23959 }, { "epoch": 2.435949572997153, "grad_norm": 0.2669731080532074, "learning_rate": 6.0874811596374276e-06, "loss": 0.3501, "step": 23960 }, { "epoch": 2.436051240341602, "grad_norm": 0.25963544845581055, "learning_rate": 6.087134766476521e-06, "loss": 0.3608, "step": 23961 }, { "epoch": 2.436152907686051, "grad_norm": 0.26531723141670227, "learning_rate": 6.086788367838888e-06, "loss": 0.3092, "step": 23962 }, { "epoch": 2.4362545750305, "grad_norm": 0.29796913266181946, "learning_rate": 6.086441963726272e-06, "loss": 0.3604, "step": 23963 }, { "epoch": 2.4363562423749494, "grad_norm": 0.3288052976131439, "learning_rate": 6.086095554140418e-06, "loss": 0.372, "step": 23964 }, { "epoch": 2.4364579097193984, "grad_norm": 0.29540693759918213, "learning_rate": 6.085749139083076e-06, "loss": 0.3006, "step": 23965 }, { "epoch": 2.4365595770638473, "grad_norm": 0.2773120403289795, "learning_rate": 6.0854027185559875e-06, "loss": 0.3297, "step": 23966 }, { "epoch": 2.4366612444082962, "grad_norm": 0.2692664563655853, "learning_rate": 6.0850562925608966e-06, "loss": 0.3336, "step": 23967 }, { "epoch": 2.436762911752745, "grad_norm": 0.2774163484573364, "learning_rate": 6.084709861099551e-06, "loss": 0.3363, "step": 23968 }, { "epoch": 2.436864579097194, "grad_norm": 0.2833249270915985, "learning_rate": 6.0843634241736934e-06, "loss": 0.3294, "step": 23969 }, { "epoch": 2.436966246441643, "grad_norm": 0.2801581919193268, "learning_rate": 6.084016981785073e-06, "loss": 0.3378, "step": 23970 }, { "epoch": 2.437067913786092, "grad_norm": 0.26730453968048096, "learning_rate": 6.083670533935431e-06, "loss": 0.328, "step": 23971 }, { "epoch": 2.437169581130541, "grad_norm": 0.25890985131263733, "learning_rate": 6.083324080626515e-06, "loss": 0.351, "step": 23972 }, { "epoch": 2.43727124847499, "grad_norm": 0.25043773651123047, "learning_rate": 6.082977621860069e-06, "loss": 0.3229, "step": 23973 }, { "epoch": 2.437372915819439, "grad_norm": 0.2673824429512024, "learning_rate": 6.08263115763784e-06, "loss": 0.3331, "step": 23974 }, { "epoch": 2.4374745831638878, "grad_norm": 0.2772568464279175, "learning_rate": 6.082284687961571e-06, "loss": 0.3365, "step": 23975 }, { "epoch": 2.4375762505083367, "grad_norm": 0.2663544714450836, "learning_rate": 6.081938212833011e-06, "loss": 0.3101, "step": 23976 }, { "epoch": 2.4376779178527856, "grad_norm": 0.2569992244243622, "learning_rate": 6.081591732253901e-06, "loss": 0.3102, "step": 23977 }, { "epoch": 2.4377795851972346, "grad_norm": 0.2802484631538391, "learning_rate": 6.0812452462259904e-06, "loss": 0.3496, "step": 23978 }, { "epoch": 2.4378812525416835, "grad_norm": 0.2667793333530426, "learning_rate": 6.0808987547510235e-06, "loss": 0.3368, "step": 23979 }, { "epoch": 2.4379829198861325, "grad_norm": 0.26231881976127625, "learning_rate": 6.080552257830744e-06, "loss": 0.3458, "step": 23980 }, { "epoch": 2.4380845872305814, "grad_norm": 0.27155163884162903, "learning_rate": 6.080205755466901e-06, "loss": 0.3358, "step": 23981 }, { "epoch": 2.4381862545750304, "grad_norm": 0.24896933138370514, "learning_rate": 6.079859247661236e-06, "loss": 0.3031, "step": 23982 }, { "epoch": 2.4382879219194793, "grad_norm": 0.27198120951652527, "learning_rate": 6.079512734415499e-06, "loss": 0.3099, "step": 23983 }, { "epoch": 2.4383895892639282, "grad_norm": 0.28692546486854553, "learning_rate": 6.079166215731432e-06, "loss": 0.324, "step": 23984 }, { "epoch": 2.438491256608377, "grad_norm": 0.27140361070632935, "learning_rate": 6.078819691610781e-06, "loss": 0.3476, "step": 23985 }, { "epoch": 2.438592923952826, "grad_norm": 0.25900986790657043, "learning_rate": 6.078473162055295e-06, "loss": 0.3256, "step": 23986 }, { "epoch": 2.4386945912972755, "grad_norm": 0.26153889298439026, "learning_rate": 6.078126627066716e-06, "loss": 0.3427, "step": 23987 }, { "epoch": 2.4387962586417244, "grad_norm": 0.25825658440589905, "learning_rate": 6.077780086646793e-06, "loss": 0.3497, "step": 23988 }, { "epoch": 2.4388979259861734, "grad_norm": 0.2653386890888214, "learning_rate": 6.077433540797269e-06, "loss": 0.333, "step": 23989 }, { "epoch": 2.4389995933306223, "grad_norm": 0.27424731850624084, "learning_rate": 6.077086989519889e-06, "loss": 0.3426, "step": 23990 }, { "epoch": 2.4391012606750713, "grad_norm": 0.2639999985694885, "learning_rate": 6.0767404328164024e-06, "loss": 0.3129, "step": 23991 }, { "epoch": 2.43920292801952, "grad_norm": 0.2631419003009796, "learning_rate": 6.076393870688554e-06, "loss": 0.3534, "step": 23992 }, { "epoch": 2.439304595363969, "grad_norm": 0.2846032381057739, "learning_rate": 6.0760473031380885e-06, "loss": 0.3424, "step": 23993 }, { "epoch": 2.439406262708418, "grad_norm": 0.2677096128463745, "learning_rate": 6.075700730166751e-06, "loss": 0.3375, "step": 23994 }, { "epoch": 2.439507930052867, "grad_norm": 0.2600611746311188, "learning_rate": 6.07535415177629e-06, "loss": 0.3334, "step": 23995 }, { "epoch": 2.439609597397316, "grad_norm": 0.2597168982028961, "learning_rate": 6.075007567968452e-06, "loss": 0.3115, "step": 23996 }, { "epoch": 2.439711264741765, "grad_norm": 0.27731794118881226, "learning_rate": 6.074660978744979e-06, "loss": 0.3293, "step": 23997 }, { "epoch": 2.439812932086214, "grad_norm": 0.26268938183784485, "learning_rate": 6.0743143841076216e-06, "loss": 0.3196, "step": 23998 }, { "epoch": 2.439914599430663, "grad_norm": 0.24631944298744202, "learning_rate": 6.0739677840581215e-06, "loss": 0.3386, "step": 23999 }, { "epoch": 2.4400162667751117, "grad_norm": 0.27845457196235657, "learning_rate": 6.0736211785982265e-06, "loss": 0.3201, "step": 24000 }, { "epoch": 2.4401179341195607, "grad_norm": 0.25544843077659607, "learning_rate": 6.0732745677296855e-06, "loss": 0.3367, "step": 24001 }, { "epoch": 2.4402196014640096, "grad_norm": 0.28335392475128174, "learning_rate": 6.07292795145424e-06, "loss": 0.346, "step": 24002 }, { "epoch": 2.4403212688084586, "grad_norm": 0.2707659602165222, "learning_rate": 6.0725813297736405e-06, "loss": 0.3253, "step": 24003 }, { "epoch": 2.4404229361529075, "grad_norm": 0.3001762330532074, "learning_rate": 6.07223470268963e-06, "loss": 0.3365, "step": 24004 }, { "epoch": 2.440524603497357, "grad_norm": 0.26960489153862, "learning_rate": 6.071888070203955e-06, "loss": 0.3388, "step": 24005 }, { "epoch": 2.440626270841806, "grad_norm": 0.2799000144004822, "learning_rate": 6.071541432318364e-06, "loss": 0.3319, "step": 24006 }, { "epoch": 2.4407279381862548, "grad_norm": 0.28579792380332947, "learning_rate": 6.071194789034601e-06, "loss": 0.3308, "step": 24007 }, { "epoch": 2.4408296055307037, "grad_norm": 0.26453089714050293, "learning_rate": 6.070848140354415e-06, "loss": 0.3547, "step": 24008 }, { "epoch": 2.4409312728751527, "grad_norm": 0.2655375599861145, "learning_rate": 6.070501486279548e-06, "loss": 0.3525, "step": 24009 }, { "epoch": 2.4410329402196016, "grad_norm": 0.27133554220199585, "learning_rate": 6.070154826811751e-06, "loss": 0.3382, "step": 24010 }, { "epoch": 2.4411346075640505, "grad_norm": 0.25130411982536316, "learning_rate": 6.069808161952767e-06, "loss": 0.345, "step": 24011 }, { "epoch": 2.4412362749084995, "grad_norm": 0.2797001004219055, "learning_rate": 6.069461491704344e-06, "loss": 0.3767, "step": 24012 }, { "epoch": 2.4413379422529484, "grad_norm": 0.28716689348220825, "learning_rate": 6.069114816068229e-06, "loss": 0.3306, "step": 24013 }, { "epoch": 2.4414396095973974, "grad_norm": 0.26560160517692566, "learning_rate": 6.068768135046168e-06, "loss": 0.3347, "step": 24014 }, { "epoch": 2.4415412769418463, "grad_norm": 0.28172677755355835, "learning_rate": 6.068421448639904e-06, "loss": 0.3306, "step": 24015 }, { "epoch": 2.4416429442862952, "grad_norm": 0.2774488627910614, "learning_rate": 6.06807475685119e-06, "loss": 0.3174, "step": 24016 }, { "epoch": 2.441744611630744, "grad_norm": 0.280937135219574, "learning_rate": 6.067728059681766e-06, "loss": 0.3326, "step": 24017 }, { "epoch": 2.441846278975193, "grad_norm": 0.2831207811832428, "learning_rate": 6.067381357133385e-06, "loss": 0.3333, "step": 24018 }, { "epoch": 2.441947946319642, "grad_norm": 0.2740345299243927, "learning_rate": 6.06703464920779e-06, "loss": 0.3343, "step": 24019 }, { "epoch": 2.442049613664091, "grad_norm": 0.27986234426498413, "learning_rate": 6.066687935906726e-06, "loss": 0.377, "step": 24020 }, { "epoch": 2.44215128100854, "grad_norm": 0.26228490471839905, "learning_rate": 6.066341217231943e-06, "loss": 0.3341, "step": 24021 }, { "epoch": 2.442252948352989, "grad_norm": 0.280720055103302, "learning_rate": 6.065994493185185e-06, "loss": 0.3171, "step": 24022 }, { "epoch": 2.442354615697438, "grad_norm": 0.2672365605831146, "learning_rate": 6.065647763768202e-06, "loss": 0.3281, "step": 24023 }, { "epoch": 2.4424562830418868, "grad_norm": 0.2576858401298523, "learning_rate": 6.065301028982739e-06, "loss": 0.312, "step": 24024 }, { "epoch": 2.4425579503863357, "grad_norm": 0.2707865536212921, "learning_rate": 6.064954288830539e-06, "loss": 0.317, "step": 24025 }, { "epoch": 2.4426596177307847, "grad_norm": 0.2935815453529358, "learning_rate": 6.064607543313356e-06, "loss": 0.3461, "step": 24026 }, { "epoch": 2.4427612850752336, "grad_norm": 0.2704116404056549, "learning_rate": 6.064260792432931e-06, "loss": 0.3294, "step": 24027 }, { "epoch": 2.442862952419683, "grad_norm": 0.2598588466644287, "learning_rate": 6.063914036191015e-06, "loss": 0.3346, "step": 24028 }, { "epoch": 2.442964619764132, "grad_norm": 0.2797585427761078, "learning_rate": 6.063567274589352e-06, "loss": 0.3571, "step": 24029 }, { "epoch": 2.443066287108581, "grad_norm": 0.26541468501091003, "learning_rate": 6.063220507629689e-06, "loss": 0.3512, "step": 24030 }, { "epoch": 2.44316795445303, "grad_norm": 0.2639105021953583, "learning_rate": 6.062873735313774e-06, "loss": 0.343, "step": 24031 }, { "epoch": 2.4432696217974788, "grad_norm": 0.3208297789096832, "learning_rate": 6.062526957643356e-06, "loss": 0.326, "step": 24032 }, { "epoch": 2.4433712891419277, "grad_norm": 0.2757161855697632, "learning_rate": 6.062180174620177e-06, "loss": 0.3125, "step": 24033 }, { "epoch": 2.4434729564863766, "grad_norm": 0.29307126998901367, "learning_rate": 6.0618333862459884e-06, "loss": 0.332, "step": 24034 }, { "epoch": 2.4435746238308256, "grad_norm": 0.25322186946868896, "learning_rate": 6.061486592522533e-06, "loss": 0.3336, "step": 24035 }, { "epoch": 2.4436762911752745, "grad_norm": 0.26581117510795593, "learning_rate": 6.061139793451563e-06, "loss": 0.3225, "step": 24036 }, { "epoch": 2.4437779585197235, "grad_norm": 0.2627091407775879, "learning_rate": 6.060792989034823e-06, "loss": 0.3004, "step": 24037 }, { "epoch": 2.4438796258641724, "grad_norm": 0.27890637516975403, "learning_rate": 6.060446179274058e-06, "loss": 0.356, "step": 24038 }, { "epoch": 2.4439812932086213, "grad_norm": 0.2647382616996765, "learning_rate": 6.060099364171019e-06, "loss": 0.3114, "step": 24039 }, { "epoch": 2.4440829605530703, "grad_norm": 0.2500374913215637, "learning_rate": 6.059752543727448e-06, "loss": 0.3356, "step": 24040 }, { "epoch": 2.4441846278975192, "grad_norm": 0.26135605573654175, "learning_rate": 6.059405717945099e-06, "loss": 0.3107, "step": 24041 }, { "epoch": 2.444286295241968, "grad_norm": 0.26342564821243286, "learning_rate": 6.059058886825716e-06, "loss": 0.3278, "step": 24042 }, { "epoch": 2.444387962586417, "grad_norm": 0.2926199436187744, "learning_rate": 6.058712050371044e-06, "loss": 0.3739, "step": 24043 }, { "epoch": 2.444489629930866, "grad_norm": 0.26832273602485657, "learning_rate": 6.0583652085828335e-06, "loss": 0.3034, "step": 24044 }, { "epoch": 2.444591297275315, "grad_norm": 0.2665168344974518, "learning_rate": 6.058018361462828e-06, "loss": 0.3857, "step": 24045 }, { "epoch": 2.4446929646197644, "grad_norm": 0.2598457336425781, "learning_rate": 6.057671509012781e-06, "loss": 0.3367, "step": 24046 }, { "epoch": 2.4447946319642133, "grad_norm": 0.30330848693847656, "learning_rate": 6.057324651234435e-06, "loss": 0.3571, "step": 24047 }, { "epoch": 2.4448962993086623, "grad_norm": 0.264170378446579, "learning_rate": 6.0569777881295385e-06, "loss": 0.3393, "step": 24048 }, { "epoch": 2.444997966653111, "grad_norm": 0.2727862298488617, "learning_rate": 6.0566309196998396e-06, "loss": 0.3187, "step": 24049 }, { "epoch": 2.44509963399756, "grad_norm": 0.2670772969722748, "learning_rate": 6.056284045947084e-06, "loss": 0.3449, "step": 24050 }, { "epoch": 2.445201301342009, "grad_norm": 0.2814488708972931, "learning_rate": 6.05593716687302e-06, "loss": 0.3227, "step": 24051 }, { "epoch": 2.445302968686458, "grad_norm": 0.29106849431991577, "learning_rate": 6.055590282479398e-06, "loss": 0.3446, "step": 24052 }, { "epoch": 2.445404636030907, "grad_norm": 0.2792207598686218, "learning_rate": 6.055243392767961e-06, "loss": 0.3224, "step": 24053 }, { "epoch": 2.445506303375356, "grad_norm": 0.2898435592651367, "learning_rate": 6.05489649774046e-06, "loss": 0.3252, "step": 24054 }, { "epoch": 2.445607970719805, "grad_norm": 0.2770290672779083, "learning_rate": 6.05454959739864e-06, "loss": 0.3152, "step": 24055 }, { "epoch": 2.445709638064254, "grad_norm": 0.2762916386127472, "learning_rate": 6.054202691744249e-06, "loss": 0.3552, "step": 24056 }, { "epoch": 2.4458113054087027, "grad_norm": 0.26625362038612366, "learning_rate": 6.053855780779036e-06, "loss": 0.3175, "step": 24057 }, { "epoch": 2.4459129727531517, "grad_norm": 0.2867972254753113, "learning_rate": 6.053508864504749e-06, "loss": 0.3045, "step": 24058 }, { "epoch": 2.4460146400976006, "grad_norm": 0.292739599943161, "learning_rate": 6.053161942923135e-06, "loss": 0.3222, "step": 24059 }, { "epoch": 2.4461163074420496, "grad_norm": 0.2695670425891876, "learning_rate": 6.05281501603594e-06, "loss": 0.3198, "step": 24060 }, { "epoch": 2.4462179747864985, "grad_norm": 0.2569814920425415, "learning_rate": 6.052468083844912e-06, "loss": 0.3363, "step": 24061 }, { "epoch": 2.4463196421309474, "grad_norm": 0.26203158497810364, "learning_rate": 6.052121146351803e-06, "loss": 0.3283, "step": 24062 }, { "epoch": 2.4464213094753964, "grad_norm": 0.2809590995311737, "learning_rate": 6.051774203558356e-06, "loss": 0.3719, "step": 24063 }, { "epoch": 2.4465229768198453, "grad_norm": 0.2925008535385132, "learning_rate": 6.05142725546632e-06, "loss": 0.3017, "step": 24064 }, { "epoch": 2.4466246441642943, "grad_norm": 0.40299683809280396, "learning_rate": 6.0510803020774445e-06, "loss": 0.3344, "step": 24065 }, { "epoch": 2.446726311508743, "grad_norm": 0.2630298435688019, "learning_rate": 6.050733343393475e-06, "loss": 0.3294, "step": 24066 }, { "epoch": 2.446827978853192, "grad_norm": 0.2941434383392334, "learning_rate": 6.050386379416162e-06, "loss": 0.3689, "step": 24067 }, { "epoch": 2.446929646197641, "grad_norm": 0.26271069049835205, "learning_rate": 6.05003941014725e-06, "loss": 0.3075, "step": 24068 }, { "epoch": 2.4470313135420905, "grad_norm": 0.24242083728313446, "learning_rate": 6.049692435588492e-06, "loss": 0.3472, "step": 24069 }, { "epoch": 2.4471329808865394, "grad_norm": 0.258339524269104, "learning_rate": 6.049345455741631e-06, "loss": 0.3245, "step": 24070 }, { "epoch": 2.4472346482309884, "grad_norm": 0.2590476870536804, "learning_rate": 6.048998470608416e-06, "loss": 0.3695, "step": 24071 }, { "epoch": 2.4473363155754373, "grad_norm": 0.26425427198410034, "learning_rate": 6.048651480190598e-06, "loss": 0.3205, "step": 24072 }, { "epoch": 2.4474379829198862, "grad_norm": 0.2713732123374939, "learning_rate": 6.048304484489922e-06, "loss": 0.3438, "step": 24073 }, { "epoch": 2.447539650264335, "grad_norm": 0.28441157937049866, "learning_rate": 6.047957483508138e-06, "loss": 0.3275, "step": 24074 }, { "epoch": 2.447641317608784, "grad_norm": 0.2603166699409485, "learning_rate": 6.047610477246992e-06, "loss": 0.3293, "step": 24075 }, { "epoch": 2.447742984953233, "grad_norm": 0.2842315137386322, "learning_rate": 6.047263465708233e-06, "loss": 0.3433, "step": 24076 }, { "epoch": 2.447844652297682, "grad_norm": 0.26384177803993225, "learning_rate": 6.046916448893611e-06, "loss": 0.3141, "step": 24077 }, { "epoch": 2.447946319642131, "grad_norm": 0.28069064021110535, "learning_rate": 6.046569426804872e-06, "loss": 0.3253, "step": 24078 }, { "epoch": 2.44804798698658, "grad_norm": 0.24875564873218536, "learning_rate": 6.046222399443766e-06, "loss": 0.3127, "step": 24079 }, { "epoch": 2.448149654331029, "grad_norm": 0.2748425006866455, "learning_rate": 6.045875366812039e-06, "loss": 0.3324, "step": 24080 }, { "epoch": 2.4482513216754778, "grad_norm": 0.2676677405834198, "learning_rate": 6.045528328911441e-06, "loss": 0.3458, "step": 24081 }, { "epoch": 2.4483529890199267, "grad_norm": 0.2636326849460602, "learning_rate": 6.045181285743719e-06, "loss": 0.3235, "step": 24082 }, { "epoch": 2.4484546563643756, "grad_norm": 0.27425578236579895, "learning_rate": 6.044834237310623e-06, "loss": 0.3443, "step": 24083 }, { "epoch": 2.4485563237088246, "grad_norm": 0.2742574214935303, "learning_rate": 6.044487183613901e-06, "loss": 0.3583, "step": 24084 }, { "epoch": 2.4486579910532735, "grad_norm": 0.2906442880630493, "learning_rate": 6.044140124655299e-06, "loss": 0.3559, "step": 24085 }, { "epoch": 2.4487596583977225, "grad_norm": 0.2711208164691925, "learning_rate": 6.043793060436567e-06, "loss": 0.3166, "step": 24086 }, { "epoch": 2.448861325742172, "grad_norm": 0.28623875975608826, "learning_rate": 6.043445990959456e-06, "loss": 0.3227, "step": 24087 }, { "epoch": 2.448962993086621, "grad_norm": 0.28065893054008484, "learning_rate": 6.043098916225709e-06, "loss": 0.3332, "step": 24088 }, { "epoch": 2.4490646604310697, "grad_norm": 0.28336870670318604, "learning_rate": 6.04275183623708e-06, "loss": 0.3491, "step": 24089 }, { "epoch": 2.4491663277755187, "grad_norm": 0.28545182943344116, "learning_rate": 6.042404750995313e-06, "loss": 0.3169, "step": 24090 }, { "epoch": 2.4492679951199676, "grad_norm": 0.2831972539424896, "learning_rate": 6.0420576605021595e-06, "loss": 0.3282, "step": 24091 }, { "epoch": 2.4493696624644166, "grad_norm": 0.2620425820350647, "learning_rate": 6.041710564759369e-06, "loss": 0.3248, "step": 24092 }, { "epoch": 2.4494713298088655, "grad_norm": 0.27364829182624817, "learning_rate": 6.041363463768685e-06, "loss": 0.3504, "step": 24093 }, { "epoch": 2.4495729971533144, "grad_norm": 0.2725420594215393, "learning_rate": 6.041016357531862e-06, "loss": 0.3352, "step": 24094 }, { "epoch": 2.4496746644977634, "grad_norm": 0.2642212510108948, "learning_rate": 6.040669246050644e-06, "loss": 0.3103, "step": 24095 }, { "epoch": 2.4497763318422123, "grad_norm": 0.2760367691516876, "learning_rate": 6.0403221293267825e-06, "loss": 0.3509, "step": 24096 }, { "epoch": 2.4498779991866613, "grad_norm": 0.2770862579345703, "learning_rate": 6.039975007362026e-06, "loss": 0.3143, "step": 24097 }, { "epoch": 2.44997966653111, "grad_norm": 0.29048091173171997, "learning_rate": 6.039627880158121e-06, "loss": 0.3312, "step": 24098 }, { "epoch": 2.450081333875559, "grad_norm": 0.2857813239097595, "learning_rate": 6.03928074771682e-06, "loss": 0.3512, "step": 24099 }, { "epoch": 2.450183001220008, "grad_norm": 0.24849295616149902, "learning_rate": 6.038933610039869e-06, "loss": 0.3348, "step": 24100 }, { "epoch": 2.450284668564457, "grad_norm": 0.263821005821228, "learning_rate": 6.038586467129015e-06, "loss": 0.3266, "step": 24101 }, { "epoch": 2.450386335908906, "grad_norm": 0.26481765508651733, "learning_rate": 6.038239318986011e-06, "loss": 0.3481, "step": 24102 }, { "epoch": 2.450488003253355, "grad_norm": 0.260905921459198, "learning_rate": 6.037892165612604e-06, "loss": 0.2853, "step": 24103 }, { "epoch": 2.450589670597804, "grad_norm": 0.27624186873435974, "learning_rate": 6.0375450070105436e-06, "loss": 0.3722, "step": 24104 }, { "epoch": 2.450691337942253, "grad_norm": 0.2806086540222168, "learning_rate": 6.037197843181577e-06, "loss": 0.3911, "step": 24105 }, { "epoch": 2.4507930052867017, "grad_norm": 0.2816134989261627, "learning_rate": 6.036850674127453e-06, "loss": 0.361, "step": 24106 }, { "epoch": 2.4508946726311507, "grad_norm": 0.2855869233608246, "learning_rate": 6.036503499849924e-06, "loss": 0.3514, "step": 24107 }, { "epoch": 2.4509963399755996, "grad_norm": 0.2799818813800812, "learning_rate": 6.036156320350737e-06, "loss": 0.3654, "step": 24108 }, { "epoch": 2.4510980073200486, "grad_norm": 0.27182358503341675, "learning_rate": 6.035809135631638e-06, "loss": 0.3493, "step": 24109 }, { "epoch": 2.451199674664498, "grad_norm": 0.2859039306640625, "learning_rate": 6.035461945694381e-06, "loss": 0.3295, "step": 24110 }, { "epoch": 2.451301342008947, "grad_norm": 0.280793696641922, "learning_rate": 6.035114750540711e-06, "loss": 0.3411, "step": 24111 }, { "epoch": 2.451403009353396, "grad_norm": 0.27415356040000916, "learning_rate": 6.03476755017238e-06, "loss": 0.3527, "step": 24112 }, { "epoch": 2.4515046766978448, "grad_norm": 0.2642415165901184, "learning_rate": 6.034420344591136e-06, "loss": 0.3439, "step": 24113 }, { "epoch": 2.4516063440422937, "grad_norm": 0.27878502011299133, "learning_rate": 6.034073133798728e-06, "loss": 0.3305, "step": 24114 }, { "epoch": 2.4517080113867427, "grad_norm": 0.265280544757843, "learning_rate": 6.033725917796906e-06, "loss": 0.3156, "step": 24115 }, { "epoch": 2.4518096787311916, "grad_norm": 0.27659520506858826, "learning_rate": 6.033378696587415e-06, "loss": 0.3123, "step": 24116 }, { "epoch": 2.4519113460756405, "grad_norm": 0.27373605966567993, "learning_rate": 6.033031470172012e-06, "loss": 0.3374, "step": 24117 }, { "epoch": 2.4520130134200895, "grad_norm": 0.2557627260684967, "learning_rate": 6.03268423855244e-06, "loss": 0.3656, "step": 24118 }, { "epoch": 2.4521146807645384, "grad_norm": 0.25608035922050476, "learning_rate": 6.03233700173045e-06, "loss": 0.3307, "step": 24119 }, { "epoch": 2.4522163481089874, "grad_norm": 0.27877384424209595, "learning_rate": 6.031989759707792e-06, "loss": 0.3467, "step": 24120 }, { "epoch": 2.4523180154534363, "grad_norm": 0.2787874937057495, "learning_rate": 6.031642512486215e-06, "loss": 0.344, "step": 24121 }, { "epoch": 2.4524196827978852, "grad_norm": 0.2732751667499542, "learning_rate": 6.031295260067468e-06, "loss": 0.3334, "step": 24122 }, { "epoch": 2.452521350142334, "grad_norm": 0.27393436431884766, "learning_rate": 6.030948002453301e-06, "loss": 0.3256, "step": 24123 }, { "epoch": 2.452623017486783, "grad_norm": 0.2846882939338684, "learning_rate": 6.030600739645462e-06, "loss": 0.3502, "step": 24124 }, { "epoch": 2.452724684831232, "grad_norm": 0.27700093388557434, "learning_rate": 6.030253471645702e-06, "loss": 0.3417, "step": 24125 }, { "epoch": 2.452826352175681, "grad_norm": 0.28728073835372925, "learning_rate": 6.029906198455769e-06, "loss": 0.3154, "step": 24126 }, { "epoch": 2.45292801952013, "grad_norm": 0.2874355912208557, "learning_rate": 6.0295589200774145e-06, "loss": 0.3309, "step": 24127 }, { "epoch": 2.4530296868645793, "grad_norm": 0.2793281376361847, "learning_rate": 6.029211636512386e-06, "loss": 0.3469, "step": 24128 }, { "epoch": 2.4531313542090283, "grad_norm": 0.26772913336753845, "learning_rate": 6.028864347762434e-06, "loss": 0.3527, "step": 24129 }, { "epoch": 2.4532330215534772, "grad_norm": 0.27090156078338623, "learning_rate": 6.0285170538293095e-06, "loss": 0.2899, "step": 24130 }, { "epoch": 2.453334688897926, "grad_norm": 0.28218555450439453, "learning_rate": 6.028169754714758e-06, "loss": 0.3364, "step": 24131 }, { "epoch": 2.453436356242375, "grad_norm": 0.26316511631011963, "learning_rate": 6.027822450420533e-06, "loss": 0.3661, "step": 24132 }, { "epoch": 2.453538023586824, "grad_norm": 0.27754291892051697, "learning_rate": 6.0274751409483825e-06, "loss": 0.3381, "step": 24133 }, { "epoch": 2.453639690931273, "grad_norm": 0.24977119266986847, "learning_rate": 6.027127826300056e-06, "loss": 0.3308, "step": 24134 }, { "epoch": 2.453741358275722, "grad_norm": 0.27622750401496887, "learning_rate": 6.026780506477305e-06, "loss": 0.3211, "step": 24135 }, { "epoch": 2.453843025620171, "grad_norm": 0.3039509057998657, "learning_rate": 6.026433181481876e-06, "loss": 0.3081, "step": 24136 }, { "epoch": 2.45394469296462, "grad_norm": 0.27275487780570984, "learning_rate": 6.026085851315521e-06, "loss": 0.339, "step": 24137 }, { "epoch": 2.4540463603090688, "grad_norm": 0.277565598487854, "learning_rate": 6.02573851597999e-06, "loss": 0.3359, "step": 24138 }, { "epoch": 2.4541480276535177, "grad_norm": 0.2950965166091919, "learning_rate": 6.025391175477031e-06, "loss": 0.356, "step": 24139 }, { "epoch": 2.4542496949979666, "grad_norm": 0.268057644367218, "learning_rate": 6.025043829808397e-06, "loss": 0.3688, "step": 24140 }, { "epoch": 2.4543513623424156, "grad_norm": 0.2631826102733612, "learning_rate": 6.024696478975833e-06, "loss": 0.3363, "step": 24141 }, { "epoch": 2.4544530296868645, "grad_norm": 0.24705365300178528, "learning_rate": 6.024349122981092e-06, "loss": 0.3468, "step": 24142 }, { "epoch": 2.4545546970313135, "grad_norm": 0.29342907667160034, "learning_rate": 6.024001761825926e-06, "loss": 0.355, "step": 24143 }, { "epoch": 2.4546563643757624, "grad_norm": 0.2874755263328552, "learning_rate": 6.023654395512081e-06, "loss": 0.3405, "step": 24144 }, { "epoch": 2.4547580317202113, "grad_norm": 0.2860828936100006, "learning_rate": 6.0233070240413094e-06, "loss": 0.3541, "step": 24145 }, { "epoch": 2.4548596990646603, "grad_norm": 0.28118205070495605, "learning_rate": 6.022959647415358e-06, "loss": 0.3314, "step": 24146 }, { "epoch": 2.4549613664091092, "grad_norm": 0.28812700510025024, "learning_rate": 6.022612265635981e-06, "loss": 0.3186, "step": 24147 }, { "epoch": 2.455063033753558, "grad_norm": 0.2841840982437134, "learning_rate": 6.022264878704925e-06, "loss": 0.3305, "step": 24148 }, { "epoch": 2.455164701098007, "grad_norm": 0.2728998064994812, "learning_rate": 6.021917486623943e-06, "loss": 0.3345, "step": 24149 }, { "epoch": 2.455266368442456, "grad_norm": 0.25701427459716797, "learning_rate": 6.021570089394783e-06, "loss": 0.35, "step": 24150 }, { "epoch": 2.4553680357869054, "grad_norm": 0.27616456151008606, "learning_rate": 6.0212226870191945e-06, "loss": 0.3345, "step": 24151 }, { "epoch": 2.4554697031313544, "grad_norm": 0.26417985558509827, "learning_rate": 6.020875279498929e-06, "loss": 0.3174, "step": 24152 }, { "epoch": 2.4555713704758033, "grad_norm": 0.27626365423202515, "learning_rate": 6.020527866835738e-06, "loss": 0.3049, "step": 24153 }, { "epoch": 2.4556730378202523, "grad_norm": 0.2648420035839081, "learning_rate": 6.020180449031368e-06, "loss": 0.3585, "step": 24154 }, { "epoch": 2.455774705164701, "grad_norm": 0.27547529339790344, "learning_rate": 6.0198330260875736e-06, "loss": 0.3349, "step": 24155 }, { "epoch": 2.45587637250915, "grad_norm": 0.26216667890548706, "learning_rate": 6.019485598006101e-06, "loss": 0.3332, "step": 24156 }, { "epoch": 2.455978039853599, "grad_norm": 0.26872575283050537, "learning_rate": 6.0191381647887025e-06, "loss": 0.3638, "step": 24157 }, { "epoch": 2.456079707198048, "grad_norm": 0.2805272340774536, "learning_rate": 6.0187907264371295e-06, "loss": 0.3236, "step": 24158 }, { "epoch": 2.456181374542497, "grad_norm": 0.244331493973732, "learning_rate": 6.018443282953129e-06, "loss": 0.3258, "step": 24159 }, { "epoch": 2.456283041886946, "grad_norm": 0.28374364972114563, "learning_rate": 6.018095834338456e-06, "loss": 0.3154, "step": 24160 }, { "epoch": 2.456384709231395, "grad_norm": 0.2803134024143219, "learning_rate": 6.017748380594856e-06, "loss": 0.3188, "step": 24161 }, { "epoch": 2.456486376575844, "grad_norm": 0.27904990315437317, "learning_rate": 6.017400921724081e-06, "loss": 0.3216, "step": 24162 }, { "epoch": 2.4565880439202927, "grad_norm": 0.2560305893421173, "learning_rate": 6.017053457727883e-06, "loss": 0.3426, "step": 24163 }, { "epoch": 2.4566897112647417, "grad_norm": 0.2675405442714691, "learning_rate": 6.016705988608012e-06, "loss": 0.3465, "step": 24164 }, { "epoch": 2.4567913786091906, "grad_norm": 0.2710406184196472, "learning_rate": 6.016358514366216e-06, "loss": 0.3122, "step": 24165 }, { "epoch": 2.4568930459536396, "grad_norm": 0.26655086874961853, "learning_rate": 6.016011035004248e-06, "loss": 0.3438, "step": 24166 }, { "epoch": 2.4569947132980885, "grad_norm": 0.2691866457462311, "learning_rate": 6.015663550523858e-06, "loss": 0.2869, "step": 24167 }, { "epoch": 2.4570963806425374, "grad_norm": 0.2830233871936798, "learning_rate": 6.015316060926797e-06, "loss": 0.3243, "step": 24168 }, { "epoch": 2.457198047986987, "grad_norm": 0.2790306508541107, "learning_rate": 6.014968566214814e-06, "loss": 0.3363, "step": 24169 }, { "epoch": 2.4572997153314358, "grad_norm": 0.25902512669563293, "learning_rate": 6.014621066389662e-06, "loss": 0.3236, "step": 24170 }, { "epoch": 2.4574013826758847, "grad_norm": 0.2729203701019287, "learning_rate": 6.014273561453089e-06, "loss": 0.3306, "step": 24171 }, { "epoch": 2.4575030500203336, "grad_norm": 0.293273001909256, "learning_rate": 6.013926051406847e-06, "loss": 0.3244, "step": 24172 }, { "epoch": 2.4576047173647826, "grad_norm": 0.25423112511634827, "learning_rate": 6.013578536252686e-06, "loss": 0.3159, "step": 24173 }, { "epoch": 2.4577063847092315, "grad_norm": 0.313200980424881, "learning_rate": 6.013231015992358e-06, "loss": 0.3456, "step": 24174 }, { "epoch": 2.4578080520536805, "grad_norm": 0.2853405177593231, "learning_rate": 6.012883490627614e-06, "loss": 0.3432, "step": 24175 }, { "epoch": 2.4579097193981294, "grad_norm": 0.25197795033454895, "learning_rate": 6.012535960160201e-06, "loss": 0.3298, "step": 24176 }, { "epoch": 2.4580113867425784, "grad_norm": 0.2550302743911743, "learning_rate": 6.012188424591874e-06, "loss": 0.3446, "step": 24177 }, { "epoch": 2.4581130540870273, "grad_norm": 0.2770443558692932, "learning_rate": 6.011840883924382e-06, "loss": 0.3226, "step": 24178 }, { "epoch": 2.4582147214314762, "grad_norm": 0.31542304158210754, "learning_rate": 6.011493338159475e-06, "loss": 0.3256, "step": 24179 }, { "epoch": 2.458316388775925, "grad_norm": 0.2720894515514374, "learning_rate": 6.011145787298908e-06, "loss": 0.3233, "step": 24180 }, { "epoch": 2.458418056120374, "grad_norm": 0.30173152685165405, "learning_rate": 6.010798231344426e-06, "loss": 0.3369, "step": 24181 }, { "epoch": 2.458519723464823, "grad_norm": 0.27909186482429504, "learning_rate": 6.010450670297782e-06, "loss": 0.3529, "step": 24182 }, { "epoch": 2.458621390809272, "grad_norm": 0.2638431191444397, "learning_rate": 6.01010310416073e-06, "loss": 0.3187, "step": 24183 }, { "epoch": 2.458723058153721, "grad_norm": 0.2770497798919678, "learning_rate": 6.00975553293502e-06, "loss": 0.3561, "step": 24184 }, { "epoch": 2.45882472549817, "grad_norm": 0.261238157749176, "learning_rate": 6.009407956622398e-06, "loss": 0.3286, "step": 24185 }, { "epoch": 2.458926392842619, "grad_norm": 0.2672688961029053, "learning_rate": 6.00906037522462e-06, "loss": 0.3282, "step": 24186 }, { "epoch": 2.4590280601870678, "grad_norm": 0.3271288275718689, "learning_rate": 6.008712788743435e-06, "loss": 0.3761, "step": 24187 }, { "epoch": 2.4591297275315167, "grad_norm": 0.28604060411453247, "learning_rate": 6.008365197180596e-06, "loss": 0.3303, "step": 24188 }, { "epoch": 2.4592313948759656, "grad_norm": 0.2634184658527374, "learning_rate": 6.008017600537852e-06, "loss": 0.3293, "step": 24189 }, { "epoch": 2.4593330622204146, "grad_norm": 0.2817397117614746, "learning_rate": 6.007669998816955e-06, "loss": 0.3265, "step": 24190 }, { "epoch": 2.4594347295648635, "grad_norm": 0.28533414006233215, "learning_rate": 6.007322392019656e-06, "loss": 0.3392, "step": 24191 }, { "epoch": 2.459536396909313, "grad_norm": 0.2882687449455261, "learning_rate": 6.0069747801477054e-06, "loss": 0.3164, "step": 24192 }, { "epoch": 2.459638064253762, "grad_norm": 0.2649627923965454, "learning_rate": 6.006627163202856e-06, "loss": 0.329, "step": 24193 }, { "epoch": 2.459739731598211, "grad_norm": 0.2813849151134491, "learning_rate": 6.0062795411868595e-06, "loss": 0.3352, "step": 24194 }, { "epoch": 2.4598413989426597, "grad_norm": 0.2734518051147461, "learning_rate": 6.0059319141014634e-06, "loss": 0.3383, "step": 24195 }, { "epoch": 2.4599430662871087, "grad_norm": 0.2703927457332611, "learning_rate": 6.0055842819484226e-06, "loss": 0.3264, "step": 24196 }, { "epoch": 2.4600447336315576, "grad_norm": 0.28009697794914246, "learning_rate": 6.005236644729485e-06, "loss": 0.3647, "step": 24197 }, { "epoch": 2.4601464009760066, "grad_norm": 0.26323744654655457, "learning_rate": 6.004889002446407e-06, "loss": 0.3455, "step": 24198 }, { "epoch": 2.4602480683204555, "grad_norm": 0.29525190591812134, "learning_rate": 6.004541355100937e-06, "loss": 0.3504, "step": 24199 }, { "epoch": 2.4603497356649044, "grad_norm": 0.2806246876716614, "learning_rate": 6.004193702694825e-06, "loss": 0.3676, "step": 24200 }, { "epoch": 2.4604514030093534, "grad_norm": 0.26369786262512207, "learning_rate": 6.003846045229824e-06, "loss": 0.3281, "step": 24201 }, { "epoch": 2.4605530703538023, "grad_norm": 0.2593955397605896, "learning_rate": 6.003498382707685e-06, "loss": 0.3228, "step": 24202 }, { "epoch": 2.4606547376982513, "grad_norm": 0.2746739387512207, "learning_rate": 6.00315071513016e-06, "loss": 0.346, "step": 24203 }, { "epoch": 2.4607564050427, "grad_norm": 0.2689974009990692, "learning_rate": 6.002803042498999e-06, "loss": 0.3527, "step": 24204 }, { "epoch": 2.460858072387149, "grad_norm": 0.27337053418159485, "learning_rate": 6.002455364815955e-06, "loss": 0.3289, "step": 24205 }, { "epoch": 2.460959739731598, "grad_norm": 0.26494985818862915, "learning_rate": 6.002107682082781e-06, "loss": 0.3347, "step": 24206 }, { "epoch": 2.461061407076047, "grad_norm": 0.2613410949707031, "learning_rate": 6.001759994301225e-06, "loss": 0.312, "step": 24207 }, { "epoch": 2.461163074420496, "grad_norm": 0.27099138498306274, "learning_rate": 6.001412301473039e-06, "loss": 0.3252, "step": 24208 }, { "epoch": 2.461264741764945, "grad_norm": 0.2841986417770386, "learning_rate": 6.0010646035999785e-06, "loss": 0.3137, "step": 24209 }, { "epoch": 2.4613664091093943, "grad_norm": 0.2644908130168915, "learning_rate": 6.000716900683791e-06, "loss": 0.3258, "step": 24210 }, { "epoch": 2.4614680764538432, "grad_norm": 0.27413326501846313, "learning_rate": 6.000369192726229e-06, "loss": 0.333, "step": 24211 }, { "epoch": 2.461569743798292, "grad_norm": 0.2521538734436035, "learning_rate": 6.000021479729045e-06, "loss": 0.3428, "step": 24212 }, { "epoch": 2.461671411142741, "grad_norm": 0.2591508626937866, "learning_rate": 5.999673761693991e-06, "loss": 0.3235, "step": 24213 }, { "epoch": 2.46177307848719, "grad_norm": 0.2777111530303955, "learning_rate": 5.999326038622819e-06, "loss": 0.2987, "step": 24214 }, { "epoch": 2.461874745831639, "grad_norm": 0.2673054039478302, "learning_rate": 5.998978310517278e-06, "loss": 0.3312, "step": 24215 }, { "epoch": 2.461976413176088, "grad_norm": 0.270675927400589, "learning_rate": 5.998630577379123e-06, "loss": 0.3116, "step": 24216 }, { "epoch": 2.462078080520537, "grad_norm": 0.27511125802993774, "learning_rate": 5.998282839210103e-06, "loss": 0.3333, "step": 24217 }, { "epoch": 2.462179747864986, "grad_norm": 0.30854663252830505, "learning_rate": 5.997935096011973e-06, "loss": 0.3695, "step": 24218 }, { "epoch": 2.4622814152094348, "grad_norm": 0.26633965969085693, "learning_rate": 5.9975873477864836e-06, "loss": 0.3287, "step": 24219 }, { "epoch": 2.4623830825538837, "grad_norm": 0.26295211911201477, "learning_rate": 5.9972395945353835e-06, "loss": 0.335, "step": 24220 }, { "epoch": 2.4624847498983327, "grad_norm": 0.2813107669353485, "learning_rate": 5.99689183626043e-06, "loss": 0.3445, "step": 24221 }, { "epoch": 2.4625864172427816, "grad_norm": 0.2632227838039398, "learning_rate": 5.99654407296337e-06, "loss": 0.3603, "step": 24222 }, { "epoch": 2.4626880845872305, "grad_norm": 0.2610626220703125, "learning_rate": 5.99619630464596e-06, "loss": 0.3501, "step": 24223 }, { "epoch": 2.4627897519316795, "grad_norm": 0.26505857706069946, "learning_rate": 5.995848531309948e-06, "loss": 0.3489, "step": 24224 }, { "epoch": 2.4628914192761284, "grad_norm": 0.2712011933326721, "learning_rate": 5.9955007529570886e-06, "loss": 0.3379, "step": 24225 }, { "epoch": 2.4629930866205774, "grad_norm": 0.24240590631961823, "learning_rate": 5.995152969589133e-06, "loss": 0.3335, "step": 24226 }, { "epoch": 2.4630947539650263, "grad_norm": 0.2686910629272461, "learning_rate": 5.994805181207832e-06, "loss": 0.3683, "step": 24227 }, { "epoch": 2.4631964213094752, "grad_norm": 0.2598804235458374, "learning_rate": 5.99445738781494e-06, "loss": 0.3195, "step": 24228 }, { "epoch": 2.463298088653924, "grad_norm": 0.25850123167037964, "learning_rate": 5.9941095894122085e-06, "loss": 0.2997, "step": 24229 }, { "epoch": 2.463399755998373, "grad_norm": 0.23902174830436707, "learning_rate": 5.9937617860013885e-06, "loss": 0.3107, "step": 24230 }, { "epoch": 2.463501423342822, "grad_norm": 0.27225130796432495, "learning_rate": 5.993413977584233e-06, "loss": 0.3581, "step": 24231 }, { "epoch": 2.463603090687271, "grad_norm": 0.29129159450531006, "learning_rate": 5.993066164162493e-06, "loss": 0.3486, "step": 24232 }, { "epoch": 2.4637047580317204, "grad_norm": 0.2599914073944092, "learning_rate": 5.992718345737923e-06, "loss": 0.323, "step": 24233 }, { "epoch": 2.4638064253761693, "grad_norm": 0.2587442100048065, "learning_rate": 5.992370522312273e-06, "loss": 0.3324, "step": 24234 }, { "epoch": 2.4639080927206183, "grad_norm": 0.2475072145462036, "learning_rate": 5.992022693887296e-06, "loss": 0.3326, "step": 24235 }, { "epoch": 2.4640097600650672, "grad_norm": 0.24661745131015778, "learning_rate": 5.991674860464745e-06, "loss": 0.3065, "step": 24236 }, { "epoch": 2.464111427409516, "grad_norm": 0.2674981951713562, "learning_rate": 5.991327022046371e-06, "loss": 0.3366, "step": 24237 }, { "epoch": 2.464213094753965, "grad_norm": 0.27505308389663696, "learning_rate": 5.9909791786339275e-06, "loss": 0.3365, "step": 24238 }, { "epoch": 2.464314762098414, "grad_norm": 0.25826334953308105, "learning_rate": 5.990631330229167e-06, "loss": 0.3557, "step": 24239 }, { "epoch": 2.464416429442863, "grad_norm": 0.26140207052230835, "learning_rate": 5.990283476833841e-06, "loss": 0.3334, "step": 24240 }, { "epoch": 2.464518096787312, "grad_norm": 0.2592734098434448, "learning_rate": 5.989935618449703e-06, "loss": 0.3256, "step": 24241 }, { "epoch": 2.464619764131761, "grad_norm": 0.27732008695602417, "learning_rate": 5.989587755078503e-06, "loss": 0.325, "step": 24242 }, { "epoch": 2.46472143147621, "grad_norm": 0.26681381464004517, "learning_rate": 5.989239886721995e-06, "loss": 0.3373, "step": 24243 }, { "epoch": 2.4648230988206588, "grad_norm": 0.3033931255340576, "learning_rate": 5.988892013381933e-06, "loss": 0.2919, "step": 24244 }, { "epoch": 2.4649247661651077, "grad_norm": 0.2851487398147583, "learning_rate": 5.988544135060066e-06, "loss": 0.3215, "step": 24245 }, { "epoch": 2.4650264335095566, "grad_norm": 0.29290884733200073, "learning_rate": 5.98819625175815e-06, "loss": 0.3295, "step": 24246 }, { "epoch": 2.4651281008540056, "grad_norm": 0.26516541838645935, "learning_rate": 5.987848363477936e-06, "loss": 0.373, "step": 24247 }, { "epoch": 2.4652297681984545, "grad_norm": 0.2670498490333557, "learning_rate": 5.987500470221176e-06, "loss": 0.3326, "step": 24248 }, { "epoch": 2.4653314355429035, "grad_norm": 0.2819006145000458, "learning_rate": 5.987152571989624e-06, "loss": 0.3315, "step": 24249 }, { "epoch": 2.4654331028873524, "grad_norm": 0.2714257836341858, "learning_rate": 5.986804668785031e-06, "loss": 0.3329, "step": 24250 }, { "epoch": 2.465534770231802, "grad_norm": 0.26671281456947327, "learning_rate": 5.986456760609153e-06, "loss": 0.3187, "step": 24251 }, { "epoch": 2.4656364375762507, "grad_norm": 0.26538559794425964, "learning_rate": 5.986108847463738e-06, "loss": 0.333, "step": 24252 }, { "epoch": 2.4657381049206997, "grad_norm": 0.26016566157341003, "learning_rate": 5.985760929350542e-06, "loss": 0.3667, "step": 24253 }, { "epoch": 2.4658397722651486, "grad_norm": 0.27306678891181946, "learning_rate": 5.985413006271316e-06, "loss": 0.3692, "step": 24254 }, { "epoch": 2.4659414396095976, "grad_norm": 0.2800285816192627, "learning_rate": 5.985065078227813e-06, "loss": 0.3302, "step": 24255 }, { "epoch": 2.4660431069540465, "grad_norm": 0.2603621780872345, "learning_rate": 5.984717145221787e-06, "loss": 0.3223, "step": 24256 }, { "epoch": 2.4661447742984954, "grad_norm": 0.25947147607803345, "learning_rate": 5.9843692072549896e-06, "loss": 0.3369, "step": 24257 }, { "epoch": 2.4662464416429444, "grad_norm": 0.25985512137413025, "learning_rate": 5.984021264329174e-06, "loss": 0.3204, "step": 24258 }, { "epoch": 2.4663481089873933, "grad_norm": 0.2940797209739685, "learning_rate": 5.983673316446094e-06, "loss": 0.3756, "step": 24259 }, { "epoch": 2.4664497763318423, "grad_norm": 0.2703147530555725, "learning_rate": 5.983325363607502e-06, "loss": 0.3331, "step": 24260 }, { "epoch": 2.466551443676291, "grad_norm": 0.26490822434425354, "learning_rate": 5.982977405815149e-06, "loss": 0.3469, "step": 24261 }, { "epoch": 2.46665311102074, "grad_norm": 0.26360219717025757, "learning_rate": 5.98262944307079e-06, "loss": 0.3711, "step": 24262 }, { "epoch": 2.466754778365189, "grad_norm": 0.28589367866516113, "learning_rate": 5.982281475376177e-06, "loss": 0.3299, "step": 24263 }, { "epoch": 2.466856445709638, "grad_norm": 0.26229408383369446, "learning_rate": 5.981933502733064e-06, "loss": 0.3362, "step": 24264 }, { "epoch": 2.466958113054087, "grad_norm": 0.2492072880268097, "learning_rate": 5.981585525143203e-06, "loss": 0.3352, "step": 24265 }, { "epoch": 2.467059780398536, "grad_norm": 0.2680465877056122, "learning_rate": 5.981237542608348e-06, "loss": 0.3895, "step": 24266 }, { "epoch": 2.467161447742985, "grad_norm": 0.2645309567451477, "learning_rate": 5.98088955513025e-06, "loss": 0.2984, "step": 24267 }, { "epoch": 2.467263115087434, "grad_norm": 0.2774480879306793, "learning_rate": 5.980541562710664e-06, "loss": 0.3684, "step": 24268 }, { "epoch": 2.4673647824318827, "grad_norm": 0.28031325340270996, "learning_rate": 5.980193565351344e-06, "loss": 0.3682, "step": 24269 }, { "epoch": 2.4674664497763317, "grad_norm": 0.27781692147254944, "learning_rate": 5.979845563054041e-06, "loss": 0.3481, "step": 24270 }, { "epoch": 2.4675681171207806, "grad_norm": 0.27679750323295593, "learning_rate": 5.979497555820508e-06, "loss": 0.3153, "step": 24271 }, { "epoch": 2.4676697844652296, "grad_norm": 0.2676474153995514, "learning_rate": 5.979149543652499e-06, "loss": 0.2888, "step": 24272 }, { "epoch": 2.4677714518096785, "grad_norm": 0.2567923367023468, "learning_rate": 5.978801526551768e-06, "loss": 0.3009, "step": 24273 }, { "epoch": 2.467873119154128, "grad_norm": 0.27742403745651245, "learning_rate": 5.978453504520068e-06, "loss": 0.3374, "step": 24274 }, { "epoch": 2.467974786498577, "grad_norm": 0.25820645689964294, "learning_rate": 5.9781054775591526e-06, "loss": 0.3454, "step": 24275 }, { "epoch": 2.4680764538430258, "grad_norm": 0.256540447473526, "learning_rate": 5.9777574456707725e-06, "loss": 0.3168, "step": 24276 }, { "epoch": 2.4681781211874747, "grad_norm": 0.25174593925476074, "learning_rate": 5.977409408856682e-06, "loss": 0.3287, "step": 24277 }, { "epoch": 2.4682797885319236, "grad_norm": 0.2560163140296936, "learning_rate": 5.977061367118637e-06, "loss": 0.3417, "step": 24278 }, { "epoch": 2.4683814558763726, "grad_norm": 0.28249892592430115, "learning_rate": 5.976713320458388e-06, "loss": 0.3277, "step": 24279 }, { "epoch": 2.4684831232208215, "grad_norm": 0.27062734961509705, "learning_rate": 5.976365268877691e-06, "loss": 0.3201, "step": 24280 }, { "epoch": 2.4685847905652705, "grad_norm": 0.28209537267684937, "learning_rate": 5.976017212378295e-06, "loss": 0.3525, "step": 24281 }, { "epoch": 2.4686864579097194, "grad_norm": 0.2899013161659241, "learning_rate": 5.975669150961958e-06, "loss": 0.3185, "step": 24282 }, { "epoch": 2.4687881252541684, "grad_norm": 0.2844249904155731, "learning_rate": 5.975321084630432e-06, "loss": 0.3918, "step": 24283 }, { "epoch": 2.4688897925986173, "grad_norm": 0.2800544500350952, "learning_rate": 5.97497301338547e-06, "loss": 0.3205, "step": 24284 }, { "epoch": 2.4689914599430662, "grad_norm": 0.2705628275871277, "learning_rate": 5.974624937228826e-06, "loss": 0.341, "step": 24285 }, { "epoch": 2.469093127287515, "grad_norm": 0.30253008008003235, "learning_rate": 5.974276856162251e-06, "loss": 0.363, "step": 24286 }, { "epoch": 2.469194794631964, "grad_norm": 0.25649651885032654, "learning_rate": 5.973928770187502e-06, "loss": 0.3381, "step": 24287 }, { "epoch": 2.469296461976413, "grad_norm": 0.29889753460884094, "learning_rate": 5.973580679306333e-06, "loss": 0.3077, "step": 24288 }, { "epoch": 2.469398129320862, "grad_norm": 0.2575264871120453, "learning_rate": 5.973232583520493e-06, "loss": 0.3137, "step": 24289 }, { "epoch": 2.469499796665311, "grad_norm": 0.26454266905784607, "learning_rate": 5.97288448283174e-06, "loss": 0.3462, "step": 24290 }, { "epoch": 2.46960146400976, "grad_norm": 0.2575978636741638, "learning_rate": 5.972536377241826e-06, "loss": 0.3365, "step": 24291 }, { "epoch": 2.4697031313542093, "grad_norm": 0.26781409978866577, "learning_rate": 5.9721882667525055e-06, "loss": 0.3506, "step": 24292 }, { "epoch": 2.469804798698658, "grad_norm": 0.3011239469051361, "learning_rate": 5.971840151365531e-06, "loss": 0.2944, "step": 24293 }, { "epoch": 2.469906466043107, "grad_norm": 0.2699289917945862, "learning_rate": 5.971492031082655e-06, "loss": 0.3418, "step": 24294 }, { "epoch": 2.470008133387556, "grad_norm": 0.24689513444900513, "learning_rate": 5.971143905905636e-06, "loss": 0.3251, "step": 24295 }, { "epoch": 2.470109800732005, "grad_norm": 0.29644447565078735, "learning_rate": 5.970795775836223e-06, "loss": 0.3257, "step": 24296 }, { "epoch": 2.470211468076454, "grad_norm": 0.2727862596511841, "learning_rate": 5.970447640876172e-06, "loss": 0.3697, "step": 24297 }, { "epoch": 2.470313135420903, "grad_norm": 0.2476845383644104, "learning_rate": 5.970099501027237e-06, "loss": 0.3635, "step": 24298 }, { "epoch": 2.470414802765352, "grad_norm": 0.27005651593208313, "learning_rate": 5.96975135629117e-06, "loss": 0.3199, "step": 24299 }, { "epoch": 2.470516470109801, "grad_norm": 0.2662839889526367, "learning_rate": 5.969403206669727e-06, "loss": 0.3133, "step": 24300 }, { "epoch": 2.4706181374542497, "grad_norm": 0.2966189384460449, "learning_rate": 5.969055052164661e-06, "loss": 0.3182, "step": 24301 }, { "epoch": 2.4707198047986987, "grad_norm": 0.2556934952735901, "learning_rate": 5.9687068927777255e-06, "loss": 0.3259, "step": 24302 }, { "epoch": 2.4708214721431476, "grad_norm": 0.30904874205589294, "learning_rate": 5.968358728510675e-06, "loss": 0.3209, "step": 24303 }, { "epoch": 2.4709231394875966, "grad_norm": 0.26036337018013, "learning_rate": 5.968010559365264e-06, "loss": 0.3332, "step": 24304 }, { "epoch": 2.4710248068320455, "grad_norm": 0.28524887561798096, "learning_rate": 5.967662385343245e-06, "loss": 0.3406, "step": 24305 }, { "epoch": 2.4711264741764944, "grad_norm": 0.29720500111579895, "learning_rate": 5.967314206446373e-06, "loss": 0.3467, "step": 24306 }, { "epoch": 2.4712281415209434, "grad_norm": 0.2831908166408539, "learning_rate": 5.966966022676403e-06, "loss": 0.3237, "step": 24307 }, { "epoch": 2.4713298088653923, "grad_norm": 0.2924364507198334, "learning_rate": 5.9666178340350865e-06, "loss": 0.3324, "step": 24308 }, { "epoch": 2.4714314762098413, "grad_norm": 0.2652755379676819, "learning_rate": 5.966269640524179e-06, "loss": 0.3417, "step": 24309 }, { "epoch": 2.47153314355429, "grad_norm": 0.26903653144836426, "learning_rate": 5.965921442145436e-06, "loss": 0.3233, "step": 24310 }, { "epoch": 2.471634810898739, "grad_norm": 0.25223517417907715, "learning_rate": 5.9655732389006084e-06, "loss": 0.3377, "step": 24311 }, { "epoch": 2.471736478243188, "grad_norm": 0.2700118124485016, "learning_rate": 5.965225030791455e-06, "loss": 0.3418, "step": 24312 }, { "epoch": 2.471838145587637, "grad_norm": 0.275015652179718, "learning_rate": 5.964876817819725e-06, "loss": 0.3136, "step": 24313 }, { "epoch": 2.471939812932086, "grad_norm": 0.29964151978492737, "learning_rate": 5.964528599987175e-06, "loss": 0.3304, "step": 24314 }, { "epoch": 2.4720414802765354, "grad_norm": 0.2747390866279602, "learning_rate": 5.96418037729556e-06, "loss": 0.3433, "step": 24315 }, { "epoch": 2.4721431476209843, "grad_norm": 0.3153764009475708, "learning_rate": 5.9638321497466325e-06, "loss": 0.3056, "step": 24316 }, { "epoch": 2.4722448149654332, "grad_norm": 0.2806152403354645, "learning_rate": 5.9634839173421485e-06, "loss": 0.3411, "step": 24317 }, { "epoch": 2.472346482309882, "grad_norm": 0.27876412868499756, "learning_rate": 5.963135680083862e-06, "loss": 0.3459, "step": 24318 }, { "epoch": 2.472448149654331, "grad_norm": 0.27149033546447754, "learning_rate": 5.962787437973525e-06, "loss": 0.329, "step": 24319 }, { "epoch": 2.47254981699878, "grad_norm": 0.27337104082107544, "learning_rate": 5.9624391910128955e-06, "loss": 0.378, "step": 24320 }, { "epoch": 2.472651484343229, "grad_norm": 0.28951510787010193, "learning_rate": 5.9620909392037245e-06, "loss": 0.3277, "step": 24321 }, { "epoch": 2.472753151687678, "grad_norm": 0.26034122705459595, "learning_rate": 5.96174268254777e-06, "loss": 0.3256, "step": 24322 }, { "epoch": 2.472854819032127, "grad_norm": 0.2749759256839752, "learning_rate": 5.961394421046782e-06, "loss": 0.3426, "step": 24323 }, { "epoch": 2.472956486376576, "grad_norm": 0.26048222184181213, "learning_rate": 5.961046154702519e-06, "loss": 0.3332, "step": 24324 }, { "epoch": 2.4730581537210248, "grad_norm": 0.2791706919670105, "learning_rate": 5.960697883516733e-06, "loss": 0.3255, "step": 24325 }, { "epoch": 2.4731598210654737, "grad_norm": 0.2670440673828125, "learning_rate": 5.9603496074911796e-06, "loss": 0.3025, "step": 24326 }, { "epoch": 2.4732614884099227, "grad_norm": 0.26900941133499146, "learning_rate": 5.960001326627614e-06, "loss": 0.3306, "step": 24327 }, { "epoch": 2.4733631557543716, "grad_norm": 0.2824454605579376, "learning_rate": 5.959653040927789e-06, "loss": 0.3464, "step": 24328 }, { "epoch": 2.4734648230988205, "grad_norm": 0.2589382529258728, "learning_rate": 5.95930475039346e-06, "loss": 0.3135, "step": 24329 }, { "epoch": 2.4735664904432695, "grad_norm": 0.28000807762145996, "learning_rate": 5.958956455026381e-06, "loss": 0.3395, "step": 24330 }, { "epoch": 2.4736681577877184, "grad_norm": 0.26640868186950684, "learning_rate": 5.958608154828308e-06, "loss": 0.3075, "step": 24331 }, { "epoch": 2.4737698251321674, "grad_norm": 0.2635195851325989, "learning_rate": 5.9582598498009956e-06, "loss": 0.3202, "step": 24332 }, { "epoch": 2.4738714924766168, "grad_norm": 0.27401793003082275, "learning_rate": 5.957911539946197e-06, "loss": 0.3303, "step": 24333 }, { "epoch": 2.4739731598210657, "grad_norm": 0.27379652857780457, "learning_rate": 5.957563225265668e-06, "loss": 0.3178, "step": 24334 }, { "epoch": 2.4740748271655146, "grad_norm": 0.2727377712726593, "learning_rate": 5.957214905761163e-06, "loss": 0.3304, "step": 24335 }, { "epoch": 2.4741764945099636, "grad_norm": 0.2670213580131531, "learning_rate": 5.9568665814344376e-06, "loss": 0.3495, "step": 24336 }, { "epoch": 2.4742781618544125, "grad_norm": 0.2791844308376312, "learning_rate": 5.956518252287244e-06, "loss": 0.3438, "step": 24337 }, { "epoch": 2.4743798291988615, "grad_norm": 0.2827976644039154, "learning_rate": 5.95616991832134e-06, "loss": 0.3033, "step": 24338 }, { "epoch": 2.4744814965433104, "grad_norm": 0.25363942980766296, "learning_rate": 5.955821579538478e-06, "loss": 0.3331, "step": 24339 }, { "epoch": 2.4745831638877593, "grad_norm": 0.2513871192932129, "learning_rate": 5.9554732359404145e-06, "loss": 0.3217, "step": 24340 }, { "epoch": 2.4746848312322083, "grad_norm": 0.264494389295578, "learning_rate": 5.9551248875289044e-06, "loss": 0.3071, "step": 24341 }, { "epoch": 2.4747864985766572, "grad_norm": 0.2589305341243744, "learning_rate": 5.954776534305702e-06, "loss": 0.3353, "step": 24342 }, { "epoch": 2.474888165921106, "grad_norm": 0.2528735399246216, "learning_rate": 5.954428176272562e-06, "loss": 0.3342, "step": 24343 }, { "epoch": 2.474989833265555, "grad_norm": 0.2464125007390976, "learning_rate": 5.9540798134312395e-06, "loss": 0.3093, "step": 24344 }, { "epoch": 2.475091500610004, "grad_norm": 0.2534831166267395, "learning_rate": 5.953731445783489e-06, "loss": 0.3719, "step": 24345 }, { "epoch": 2.475193167954453, "grad_norm": 0.2816017270088196, "learning_rate": 5.953383073331068e-06, "loss": 0.3378, "step": 24346 }, { "epoch": 2.475294835298902, "grad_norm": 0.2733699679374695, "learning_rate": 5.953034696075729e-06, "loss": 0.309, "step": 24347 }, { "epoch": 2.475396502643351, "grad_norm": 0.2757660448551178, "learning_rate": 5.952686314019227e-06, "loss": 0.3291, "step": 24348 }, { "epoch": 2.4754981699878, "grad_norm": 0.2699984014034271, "learning_rate": 5.952337927163318e-06, "loss": 0.3616, "step": 24349 }, { "epoch": 2.4755998373322488, "grad_norm": 0.25704577565193176, "learning_rate": 5.951989535509757e-06, "loss": 0.3454, "step": 24350 }, { "epoch": 2.4757015046766977, "grad_norm": 0.2628842890262604, "learning_rate": 5.951641139060299e-06, "loss": 0.3169, "step": 24351 }, { "epoch": 2.4758031720211466, "grad_norm": 0.2859499752521515, "learning_rate": 5.9512927378166985e-06, "loss": 0.3721, "step": 24352 }, { "epoch": 2.4759048393655956, "grad_norm": 0.25576063990592957, "learning_rate": 5.950944331780711e-06, "loss": 0.3208, "step": 24353 }, { "epoch": 2.4760065067100445, "grad_norm": 0.2627907395362854, "learning_rate": 5.950595920954094e-06, "loss": 0.3242, "step": 24354 }, { "epoch": 2.4761081740544935, "grad_norm": 0.24940001964569092, "learning_rate": 5.950247505338599e-06, "loss": 0.3283, "step": 24355 }, { "epoch": 2.476209841398943, "grad_norm": 0.28158023953437805, "learning_rate": 5.9498990849359825e-06, "loss": 0.3438, "step": 24356 }, { "epoch": 2.476311508743392, "grad_norm": 0.2663474678993225, "learning_rate": 5.949550659748002e-06, "loss": 0.3537, "step": 24357 }, { "epoch": 2.4764131760878407, "grad_norm": 0.25081172585487366, "learning_rate": 5.949202229776408e-06, "loss": 0.3183, "step": 24358 }, { "epoch": 2.4765148434322897, "grad_norm": 0.25467145442962646, "learning_rate": 5.9488537950229615e-06, "loss": 0.307, "step": 24359 }, { "epoch": 2.4766165107767386, "grad_norm": 0.29435813426971436, "learning_rate": 5.948505355489413e-06, "loss": 0.3423, "step": 24360 }, { "epoch": 2.4767181781211876, "grad_norm": 0.2669159770011902, "learning_rate": 5.948156911177521e-06, "loss": 0.3282, "step": 24361 }, { "epoch": 2.4768198454656365, "grad_norm": 0.2644214332103729, "learning_rate": 5.94780846208904e-06, "loss": 0.3249, "step": 24362 }, { "epoch": 2.4769215128100854, "grad_norm": 0.27856096625328064, "learning_rate": 5.947460008225725e-06, "loss": 0.3216, "step": 24363 }, { "epoch": 2.4770231801545344, "grad_norm": 0.2908416986465454, "learning_rate": 5.947111549589331e-06, "loss": 0.3296, "step": 24364 }, { "epoch": 2.4771248474989833, "grad_norm": 0.26780974864959717, "learning_rate": 5.946763086181615e-06, "loss": 0.3312, "step": 24365 }, { "epoch": 2.4772265148434323, "grad_norm": 0.2719991207122803, "learning_rate": 5.946414618004332e-06, "loss": 0.3114, "step": 24366 }, { "epoch": 2.477328182187881, "grad_norm": 0.2721617817878723, "learning_rate": 5.9460661450592364e-06, "loss": 0.3023, "step": 24367 }, { "epoch": 2.47742984953233, "grad_norm": 0.26822206377983093, "learning_rate": 5.9457176673480834e-06, "loss": 0.3495, "step": 24368 }, { "epoch": 2.477531516876779, "grad_norm": 0.26671239733695984, "learning_rate": 5.945369184872631e-06, "loss": 0.3705, "step": 24369 }, { "epoch": 2.477633184221228, "grad_norm": 0.27774912118911743, "learning_rate": 5.945020697634632e-06, "loss": 0.3363, "step": 24370 }, { "epoch": 2.477734851565677, "grad_norm": 0.2803821265697479, "learning_rate": 5.944672205635844e-06, "loss": 0.3233, "step": 24371 }, { "epoch": 2.477836518910126, "grad_norm": 0.29972290992736816, "learning_rate": 5.944323708878022e-06, "loss": 0.3193, "step": 24372 }, { "epoch": 2.477938186254575, "grad_norm": 0.2884495258331299, "learning_rate": 5.943975207362921e-06, "loss": 0.3585, "step": 24373 }, { "epoch": 2.4780398535990242, "grad_norm": 0.2551836669445038, "learning_rate": 5.943626701092297e-06, "loss": 0.3221, "step": 24374 }, { "epoch": 2.478141520943473, "grad_norm": 0.2965610921382904, "learning_rate": 5.943278190067907e-06, "loss": 0.3229, "step": 24375 }, { "epoch": 2.478243188287922, "grad_norm": 0.25917932391166687, "learning_rate": 5.9429296742915066e-06, "loss": 0.3285, "step": 24376 }, { "epoch": 2.478344855632371, "grad_norm": 0.27309948205947876, "learning_rate": 5.942581153764848e-06, "loss": 0.3342, "step": 24377 }, { "epoch": 2.47844652297682, "grad_norm": 0.297924280166626, "learning_rate": 5.942232628489692e-06, "loss": 0.3233, "step": 24378 }, { "epoch": 2.478548190321269, "grad_norm": 0.2705158591270447, "learning_rate": 5.9418840984677896e-06, "loss": 0.328, "step": 24379 }, { "epoch": 2.478649857665718, "grad_norm": 0.2639962434768677, "learning_rate": 5.9415355637009e-06, "loss": 0.3429, "step": 24380 }, { "epoch": 2.478751525010167, "grad_norm": 0.26662933826446533, "learning_rate": 5.941187024190778e-06, "loss": 0.3353, "step": 24381 }, { "epoch": 2.4788531923546158, "grad_norm": 0.27689021825790405, "learning_rate": 5.940838479939179e-06, "loss": 0.3226, "step": 24382 }, { "epoch": 2.4789548596990647, "grad_norm": 0.2528928518295288, "learning_rate": 5.940489930947861e-06, "loss": 0.3486, "step": 24383 }, { "epoch": 2.4790565270435136, "grad_norm": 0.28168195486068726, "learning_rate": 5.940141377218577e-06, "loss": 0.3575, "step": 24384 }, { "epoch": 2.4791581943879626, "grad_norm": 0.2751533091068268, "learning_rate": 5.939792818753083e-06, "loss": 0.3235, "step": 24385 }, { "epoch": 2.4792598617324115, "grad_norm": 0.25447583198547363, "learning_rate": 5.9394442555531375e-06, "loss": 0.3108, "step": 24386 }, { "epoch": 2.4793615290768605, "grad_norm": 0.26117730140686035, "learning_rate": 5.939095687620495e-06, "loss": 0.3052, "step": 24387 }, { "epoch": 2.4794631964213094, "grad_norm": 0.27603331208229065, "learning_rate": 5.938747114956912e-06, "loss": 0.3259, "step": 24388 }, { "epoch": 2.4795648637657584, "grad_norm": 0.26599740982055664, "learning_rate": 5.938398537564143e-06, "loss": 0.3036, "step": 24389 }, { "epoch": 2.4796665311102073, "grad_norm": 0.2730512320995331, "learning_rate": 5.938049955443945e-06, "loss": 0.3387, "step": 24390 }, { "epoch": 2.4797681984546562, "grad_norm": 0.2745625674724579, "learning_rate": 5.937701368598075e-06, "loss": 0.3822, "step": 24391 }, { "epoch": 2.479869865799105, "grad_norm": 0.26947134733200073, "learning_rate": 5.937352777028288e-06, "loss": 0.2919, "step": 24392 }, { "epoch": 2.479971533143554, "grad_norm": 0.2643203139305115, "learning_rate": 5.937004180736341e-06, "loss": 0.3573, "step": 24393 }, { "epoch": 2.480073200488003, "grad_norm": 0.2463165521621704, "learning_rate": 5.936655579723989e-06, "loss": 0.3077, "step": 24394 }, { "epoch": 2.480174867832452, "grad_norm": 0.2760569453239441, "learning_rate": 5.9363069739929894e-06, "loss": 0.3117, "step": 24395 }, { "epoch": 2.480276535176901, "grad_norm": 0.27095112204551697, "learning_rate": 5.935958363545098e-06, "loss": 0.3214, "step": 24396 }, { "epoch": 2.4803782025213503, "grad_norm": 0.2530531883239746, "learning_rate": 5.935609748382069e-06, "loss": 0.3523, "step": 24397 }, { "epoch": 2.4804798698657993, "grad_norm": 0.2729642391204834, "learning_rate": 5.935261128505663e-06, "loss": 0.3284, "step": 24398 }, { "epoch": 2.480581537210248, "grad_norm": 0.2702537178993225, "learning_rate": 5.934912503917631e-06, "loss": 0.3277, "step": 24399 }, { "epoch": 2.480683204554697, "grad_norm": 0.2487107217311859, "learning_rate": 5.934563874619733e-06, "loss": 0.2937, "step": 24400 }, { "epoch": 2.480784871899146, "grad_norm": 0.279605895280838, "learning_rate": 5.9342152406137254e-06, "loss": 0.3143, "step": 24401 }, { "epoch": 2.480886539243595, "grad_norm": 0.2615916430950165, "learning_rate": 5.933866601901363e-06, "loss": 0.3312, "step": 24402 }, { "epoch": 2.480988206588044, "grad_norm": 0.25939467549324036, "learning_rate": 5.933517958484403e-06, "loss": 0.353, "step": 24403 }, { "epoch": 2.481089873932493, "grad_norm": 0.279615193605423, "learning_rate": 5.9331693103645995e-06, "loss": 0.3177, "step": 24404 }, { "epoch": 2.481191541276942, "grad_norm": 0.29166194796562195, "learning_rate": 5.9328206575437105e-06, "loss": 0.3234, "step": 24405 }, { "epoch": 2.481293208621391, "grad_norm": 0.27785155177116394, "learning_rate": 5.932472000023496e-06, "loss": 0.37, "step": 24406 }, { "epoch": 2.4813948759658397, "grad_norm": 0.2656790018081665, "learning_rate": 5.932123337805706e-06, "loss": 0.2925, "step": 24407 }, { "epoch": 2.4814965433102887, "grad_norm": 0.2602877616882324, "learning_rate": 5.931774670892102e-06, "loss": 0.3351, "step": 24408 }, { "epoch": 2.4815982106547376, "grad_norm": 0.29676613211631775, "learning_rate": 5.931425999284438e-06, "loss": 0.3206, "step": 24409 }, { "epoch": 2.4816998779991866, "grad_norm": 0.28702297806739807, "learning_rate": 5.9310773229844695e-06, "loss": 0.3341, "step": 24410 }, { "epoch": 2.4818015453436355, "grad_norm": 0.2796980142593384, "learning_rate": 5.930728641993958e-06, "loss": 0.3223, "step": 24411 }, { "epoch": 2.4819032126880844, "grad_norm": 0.2760281264781952, "learning_rate": 5.9303799563146555e-06, "loss": 0.3339, "step": 24412 }, { "epoch": 2.4820048800325334, "grad_norm": 0.29716402292251587, "learning_rate": 5.93003126594832e-06, "loss": 0.3196, "step": 24413 }, { "epoch": 2.4821065473769823, "grad_norm": 0.28750038146972656, "learning_rate": 5.929682570896707e-06, "loss": 0.3505, "step": 24414 }, { "epoch": 2.4822082147214317, "grad_norm": 0.25259774923324585, "learning_rate": 5.929333871161573e-06, "loss": 0.3003, "step": 24415 }, { "epoch": 2.4823098820658807, "grad_norm": 0.2729564905166626, "learning_rate": 5.928985166744678e-06, "loss": 0.354, "step": 24416 }, { "epoch": 2.4824115494103296, "grad_norm": 0.2520734965801239, "learning_rate": 5.928636457647776e-06, "loss": 0.3339, "step": 24417 }, { "epoch": 2.4825132167547785, "grad_norm": 0.28153690695762634, "learning_rate": 5.928287743872624e-06, "loss": 0.3292, "step": 24418 }, { "epoch": 2.4826148840992275, "grad_norm": 0.28228098154067993, "learning_rate": 5.927939025420979e-06, "loss": 0.3128, "step": 24419 }, { "epoch": 2.4827165514436764, "grad_norm": 0.2660180330276489, "learning_rate": 5.927590302294597e-06, "loss": 0.3287, "step": 24420 }, { "epoch": 2.4828182187881254, "grad_norm": 0.26962798833847046, "learning_rate": 5.927241574495237e-06, "loss": 0.3153, "step": 24421 }, { "epoch": 2.4829198861325743, "grad_norm": 0.273512601852417, "learning_rate": 5.926892842024654e-06, "loss": 0.3185, "step": 24422 }, { "epoch": 2.4830215534770232, "grad_norm": 0.24986396729946136, "learning_rate": 5.926544104884604e-06, "loss": 0.3252, "step": 24423 }, { "epoch": 2.483123220821472, "grad_norm": 0.2638491690158844, "learning_rate": 5.926195363076846e-06, "loss": 0.3526, "step": 24424 }, { "epoch": 2.483224888165921, "grad_norm": 0.27828726172447205, "learning_rate": 5.925846616603135e-06, "loss": 0.3224, "step": 24425 }, { "epoch": 2.48332655551037, "grad_norm": 0.24087700247764587, "learning_rate": 5.92549786546523e-06, "loss": 0.2992, "step": 24426 }, { "epoch": 2.483428222854819, "grad_norm": 0.2640396058559418, "learning_rate": 5.925149109664886e-06, "loss": 0.3021, "step": 24427 }, { "epoch": 2.483529890199268, "grad_norm": 0.25142499804496765, "learning_rate": 5.9248003492038596e-06, "loss": 0.3272, "step": 24428 }, { "epoch": 2.483631557543717, "grad_norm": 0.2658419609069824, "learning_rate": 5.92445158408391e-06, "loss": 0.3429, "step": 24429 }, { "epoch": 2.483733224888166, "grad_norm": 0.27195486426353455, "learning_rate": 5.924102814306793e-06, "loss": 0.3076, "step": 24430 }, { "epoch": 2.4838348922326148, "grad_norm": 0.2790919542312622, "learning_rate": 5.923754039874264e-06, "loss": 0.2926, "step": 24431 }, { "epoch": 2.4839365595770637, "grad_norm": 0.28445589542388916, "learning_rate": 5.9234052607880845e-06, "loss": 0.3422, "step": 24432 }, { "epoch": 2.4840382269215127, "grad_norm": 0.2607628107070923, "learning_rate": 5.923056477050007e-06, "loss": 0.3488, "step": 24433 }, { "epoch": 2.4841398942659616, "grad_norm": 0.26328423619270325, "learning_rate": 5.922707688661789e-06, "loss": 0.3306, "step": 24434 }, { "epoch": 2.4842415616104105, "grad_norm": 0.27710551023483276, "learning_rate": 5.922358895625192e-06, "loss": 0.3647, "step": 24435 }, { "epoch": 2.4843432289548595, "grad_norm": 0.27564936876296997, "learning_rate": 5.922010097941968e-06, "loss": 0.353, "step": 24436 }, { "epoch": 2.4844448962993084, "grad_norm": 0.2729929983615875, "learning_rate": 5.921661295613877e-06, "loss": 0.3345, "step": 24437 }, { "epoch": 2.484546563643758, "grad_norm": 0.26430243253707886, "learning_rate": 5.921312488642675e-06, "loss": 0.3431, "step": 24438 }, { "epoch": 2.4846482309882068, "grad_norm": 0.2723495364189148, "learning_rate": 5.9209636770301205e-06, "loss": 0.3049, "step": 24439 }, { "epoch": 2.4847498983326557, "grad_norm": 0.2789922058582306, "learning_rate": 5.920614860777969e-06, "loss": 0.364, "step": 24440 }, { "epoch": 2.4848515656771046, "grad_norm": 0.27782368659973145, "learning_rate": 5.9202660398879786e-06, "loss": 0.3214, "step": 24441 }, { "epoch": 2.4849532330215536, "grad_norm": 0.26355278491973877, "learning_rate": 5.919917214361907e-06, "loss": 0.3275, "step": 24442 }, { "epoch": 2.4850549003660025, "grad_norm": 0.29024550318717957, "learning_rate": 5.9195683842015105e-06, "loss": 0.3261, "step": 24443 }, { "epoch": 2.4851565677104515, "grad_norm": 0.2888856828212738, "learning_rate": 5.9192195494085465e-06, "loss": 0.299, "step": 24444 }, { "epoch": 2.4852582350549004, "grad_norm": 0.2723613977432251, "learning_rate": 5.9188707099847755e-06, "loss": 0.3368, "step": 24445 }, { "epoch": 2.4853599023993493, "grad_norm": 0.253755122423172, "learning_rate": 5.91852186593195e-06, "loss": 0.3256, "step": 24446 }, { "epoch": 2.4854615697437983, "grad_norm": 0.2605114281177521, "learning_rate": 5.918173017251831e-06, "loss": 0.3144, "step": 24447 }, { "epoch": 2.4855632370882472, "grad_norm": 0.2620302140712738, "learning_rate": 5.917824163946172e-06, "loss": 0.3293, "step": 24448 }, { "epoch": 2.485664904432696, "grad_norm": 0.2605564594268799, "learning_rate": 5.9174753060167345e-06, "loss": 0.3504, "step": 24449 }, { "epoch": 2.485766571777145, "grad_norm": 0.2709662914276123, "learning_rate": 5.917126443465275e-06, "loss": 0.34, "step": 24450 }, { "epoch": 2.485868239121594, "grad_norm": 0.27083829045295715, "learning_rate": 5.91677757629355e-06, "loss": 0.3395, "step": 24451 }, { "epoch": 2.485969906466043, "grad_norm": 0.2918221056461334, "learning_rate": 5.916428704503318e-06, "loss": 0.3317, "step": 24452 }, { "epoch": 2.486071573810492, "grad_norm": 0.2550944983959198, "learning_rate": 5.916079828096335e-06, "loss": 0.3208, "step": 24453 }, { "epoch": 2.486173241154941, "grad_norm": 0.26207494735717773, "learning_rate": 5.915730947074358e-06, "loss": 0.3333, "step": 24454 }, { "epoch": 2.48627490849939, "grad_norm": 0.28343522548675537, "learning_rate": 5.91538206143915e-06, "loss": 0.3803, "step": 24455 }, { "epoch": 2.486376575843839, "grad_norm": 0.25206759572029114, "learning_rate": 5.915033171192461e-06, "loss": 0.3468, "step": 24456 }, { "epoch": 2.486478243188288, "grad_norm": 0.2539266347885132, "learning_rate": 5.914684276336056e-06, "loss": 0.3156, "step": 24457 }, { "epoch": 2.486579910532737, "grad_norm": 0.2633521258831024, "learning_rate": 5.914335376871686e-06, "loss": 0.307, "step": 24458 }, { "epoch": 2.486681577877186, "grad_norm": 0.26201799511909485, "learning_rate": 5.9139864728011124e-06, "loss": 0.2955, "step": 24459 }, { "epoch": 2.486783245221635, "grad_norm": 0.27281492948532104, "learning_rate": 5.913637564126093e-06, "loss": 0.3427, "step": 24460 }, { "epoch": 2.486884912566084, "grad_norm": 0.27421271800994873, "learning_rate": 5.913288650848383e-06, "loss": 0.3282, "step": 24461 }, { "epoch": 2.486986579910533, "grad_norm": 0.2578149735927582, "learning_rate": 5.912939732969744e-06, "loss": 0.3349, "step": 24462 }, { "epoch": 2.487088247254982, "grad_norm": 0.26059332489967346, "learning_rate": 5.912590810491929e-06, "loss": 0.3404, "step": 24463 }, { "epoch": 2.4871899145994307, "grad_norm": 0.2738742232322693, "learning_rate": 5.9122418834167e-06, "loss": 0.3378, "step": 24464 }, { "epoch": 2.4872915819438797, "grad_norm": 0.24820969998836517, "learning_rate": 5.911892951745814e-06, "loss": 0.3346, "step": 24465 }, { "epoch": 2.4873932492883286, "grad_norm": 0.2572653889656067, "learning_rate": 5.911544015481025e-06, "loss": 0.3401, "step": 24466 }, { "epoch": 2.4874949166327776, "grad_norm": 0.2806653678417206, "learning_rate": 5.911195074624096e-06, "loss": 0.3371, "step": 24467 }, { "epoch": 2.4875965839772265, "grad_norm": 0.28081125020980835, "learning_rate": 5.910846129176783e-06, "loss": 0.3441, "step": 24468 }, { "epoch": 2.4876982513216754, "grad_norm": 0.26211580634117126, "learning_rate": 5.9104971791408426e-06, "loss": 0.3172, "step": 24469 }, { "epoch": 2.4877999186661244, "grad_norm": 0.26989179849624634, "learning_rate": 5.910148224518035e-06, "loss": 0.3266, "step": 24470 }, { "epoch": 2.4879015860105733, "grad_norm": 0.27495524287223816, "learning_rate": 5.9097992653101165e-06, "loss": 0.333, "step": 24471 }, { "epoch": 2.4880032533550223, "grad_norm": 0.2776161730289459, "learning_rate": 5.909450301518846e-06, "loss": 0.3333, "step": 24472 }, { "epoch": 2.488104920699471, "grad_norm": 0.2661733329296112, "learning_rate": 5.909101333145979e-06, "loss": 0.3354, "step": 24473 }, { "epoch": 2.48820658804392, "grad_norm": 0.28743648529052734, "learning_rate": 5.908752360193277e-06, "loss": 0.3375, "step": 24474 }, { "epoch": 2.488308255388369, "grad_norm": 0.2663751244544983, "learning_rate": 5.9084033826624965e-06, "loss": 0.3314, "step": 24475 }, { "epoch": 2.488409922732818, "grad_norm": 0.2720644474029541, "learning_rate": 5.908054400555395e-06, "loss": 0.346, "step": 24476 }, { "epoch": 2.488511590077267, "grad_norm": 0.26524680852890015, "learning_rate": 5.9077054138737325e-06, "loss": 0.3653, "step": 24477 }, { "epoch": 2.488613257421716, "grad_norm": 0.2604314982891083, "learning_rate": 5.907356422619265e-06, "loss": 0.3237, "step": 24478 }, { "epoch": 2.4887149247661653, "grad_norm": 0.27283504605293274, "learning_rate": 5.907007426793752e-06, "loss": 0.3063, "step": 24479 }, { "epoch": 2.4888165921106142, "grad_norm": 0.2826569378376007, "learning_rate": 5.90665842639895e-06, "loss": 0.3419, "step": 24480 }, { "epoch": 2.488918259455063, "grad_norm": 0.26404500007629395, "learning_rate": 5.906309421436618e-06, "loss": 0.3519, "step": 24481 }, { "epoch": 2.489019926799512, "grad_norm": 0.27004340291023254, "learning_rate": 5.905960411908517e-06, "loss": 0.3194, "step": 24482 }, { "epoch": 2.489121594143961, "grad_norm": 0.2726347744464874, "learning_rate": 5.9056113978163996e-06, "loss": 0.3169, "step": 24483 }, { "epoch": 2.48922326148841, "grad_norm": 0.2605656087398529, "learning_rate": 5.90526237916203e-06, "loss": 0.3392, "step": 24484 }, { "epoch": 2.489324928832859, "grad_norm": 0.2638237178325653, "learning_rate": 5.904913355947162e-06, "loss": 0.3653, "step": 24485 }, { "epoch": 2.489426596177308, "grad_norm": 0.27583107352256775, "learning_rate": 5.904564328173556e-06, "loss": 0.3366, "step": 24486 }, { "epoch": 2.489528263521757, "grad_norm": 0.26852232217788696, "learning_rate": 5.90421529584297e-06, "loss": 0.3056, "step": 24487 }, { "epoch": 2.4896299308662058, "grad_norm": 0.2803838551044464, "learning_rate": 5.903866258957163e-06, "loss": 0.3551, "step": 24488 }, { "epoch": 2.4897315982106547, "grad_norm": 0.26790934801101685, "learning_rate": 5.903517217517892e-06, "loss": 0.3659, "step": 24489 }, { "epoch": 2.4898332655551036, "grad_norm": 0.2816566526889801, "learning_rate": 5.903168171526914e-06, "loss": 0.3277, "step": 24490 }, { "epoch": 2.4899349328995526, "grad_norm": 0.2918875515460968, "learning_rate": 5.902819120985991e-06, "loss": 0.3642, "step": 24491 }, { "epoch": 2.4900366002440015, "grad_norm": 0.28245505690574646, "learning_rate": 5.90247006589688e-06, "loss": 0.3654, "step": 24492 }, { "epoch": 2.4901382675884505, "grad_norm": 0.2876732647418976, "learning_rate": 5.90212100626134e-06, "loss": 0.34, "step": 24493 }, { "epoch": 2.4902399349328994, "grad_norm": 0.26247748732566833, "learning_rate": 5.901771942081127e-06, "loss": 0.3483, "step": 24494 }, { "epoch": 2.4903416022773484, "grad_norm": 0.2636774480342865, "learning_rate": 5.901422873358001e-06, "loss": 0.351, "step": 24495 }, { "epoch": 2.4904432696217973, "grad_norm": 0.2766069769859314, "learning_rate": 5.901073800093722e-06, "loss": 0.3683, "step": 24496 }, { "epoch": 2.4905449369662467, "grad_norm": 0.2787644565105438, "learning_rate": 5.900724722290048e-06, "loss": 0.342, "step": 24497 }, { "epoch": 2.4906466043106956, "grad_norm": 0.28665322065353394, "learning_rate": 5.900375639948737e-06, "loss": 0.3151, "step": 24498 }, { "epoch": 2.4907482716551446, "grad_norm": 0.28568461537361145, "learning_rate": 5.900026553071546e-06, "loss": 0.3086, "step": 24499 }, { "epoch": 2.4908499389995935, "grad_norm": 0.2955235540866852, "learning_rate": 5.899677461660235e-06, "loss": 0.3576, "step": 24500 }, { "epoch": 2.4909516063440424, "grad_norm": 0.26535141468048096, "learning_rate": 5.8993283657165635e-06, "loss": 0.3448, "step": 24501 }, { "epoch": 2.4910532736884914, "grad_norm": 0.26817837357521057, "learning_rate": 5.89897926524229e-06, "loss": 0.3634, "step": 24502 }, { "epoch": 2.4911549410329403, "grad_norm": 0.26804301142692566, "learning_rate": 5.8986301602391715e-06, "loss": 0.3324, "step": 24503 }, { "epoch": 2.4912566083773893, "grad_norm": 0.29339131712913513, "learning_rate": 5.898281050708969e-06, "loss": 0.3111, "step": 24504 }, { "epoch": 2.491358275721838, "grad_norm": 0.26953697204589844, "learning_rate": 5.897931936653438e-06, "loss": 0.3078, "step": 24505 }, { "epoch": 2.491459943066287, "grad_norm": 0.26731568574905396, "learning_rate": 5.897582818074341e-06, "loss": 0.3167, "step": 24506 }, { "epoch": 2.491561610410736, "grad_norm": 0.2609438896179199, "learning_rate": 5.8972336949734345e-06, "loss": 0.3385, "step": 24507 }, { "epoch": 2.491663277755185, "grad_norm": 0.2914449870586395, "learning_rate": 5.896884567352477e-06, "loss": 0.306, "step": 24508 }, { "epoch": 2.491764945099634, "grad_norm": 0.2563045918941498, "learning_rate": 5.89653543521323e-06, "loss": 0.2877, "step": 24509 }, { "epoch": 2.491866612444083, "grad_norm": 0.26699018478393555, "learning_rate": 5.896186298557449e-06, "loss": 0.3469, "step": 24510 }, { "epoch": 2.491968279788532, "grad_norm": 0.27355965971946716, "learning_rate": 5.895837157386895e-06, "loss": 0.3271, "step": 24511 }, { "epoch": 2.492069947132981, "grad_norm": 0.2913425862789154, "learning_rate": 5.8954880117033266e-06, "loss": 0.3396, "step": 24512 }, { "epoch": 2.4921716144774297, "grad_norm": 0.2624325454235077, "learning_rate": 5.895138861508503e-06, "loss": 0.3051, "step": 24513 }, { "epoch": 2.4922732818218787, "grad_norm": 0.26050829887390137, "learning_rate": 5.89478970680418e-06, "loss": 0.3336, "step": 24514 }, { "epoch": 2.4923749491663276, "grad_norm": 0.28032755851745605, "learning_rate": 5.894440547592121e-06, "loss": 0.3593, "step": 24515 }, { "epoch": 2.4924766165107766, "grad_norm": 0.2607801556587219, "learning_rate": 5.894091383874084e-06, "loss": 0.3129, "step": 24516 }, { "epoch": 2.4925782838552255, "grad_norm": 0.2516401410102844, "learning_rate": 5.893742215651825e-06, "loss": 0.3408, "step": 24517 }, { "epoch": 2.4926799511996744, "grad_norm": 0.2625632882118225, "learning_rate": 5.8933930429271065e-06, "loss": 0.3106, "step": 24518 }, { "epoch": 2.4927816185441234, "grad_norm": 0.2718575894832611, "learning_rate": 5.893043865701685e-06, "loss": 0.3202, "step": 24519 }, { "epoch": 2.4928832858885728, "grad_norm": 0.2819112539291382, "learning_rate": 5.892694683977322e-06, "loss": 0.3477, "step": 24520 }, { "epoch": 2.4929849532330217, "grad_norm": 0.26788046956062317, "learning_rate": 5.892345497755775e-06, "loss": 0.3184, "step": 24521 }, { "epoch": 2.4930866205774707, "grad_norm": 0.25829973816871643, "learning_rate": 5.891996307038802e-06, "loss": 0.3538, "step": 24522 }, { "epoch": 2.4931882879219196, "grad_norm": 0.26468417048454285, "learning_rate": 5.891647111828166e-06, "loss": 0.3977, "step": 24523 }, { "epoch": 2.4932899552663685, "grad_norm": 0.3030809164047241, "learning_rate": 5.891297912125622e-06, "loss": 0.3658, "step": 24524 }, { "epoch": 2.4933916226108175, "grad_norm": 0.274030476808548, "learning_rate": 5.890948707932931e-06, "loss": 0.3219, "step": 24525 }, { "epoch": 2.4934932899552664, "grad_norm": 0.27796855568885803, "learning_rate": 5.8905994992518525e-06, "loss": 0.3549, "step": 24526 }, { "epoch": 2.4935949572997154, "grad_norm": 0.26622775197029114, "learning_rate": 5.890250286084145e-06, "loss": 0.3333, "step": 24527 }, { "epoch": 2.4936966246441643, "grad_norm": 0.2994823455810547, "learning_rate": 5.889901068431569e-06, "loss": 0.3274, "step": 24528 }, { "epoch": 2.4937982919886132, "grad_norm": 0.27582648396492004, "learning_rate": 5.8895518462958825e-06, "loss": 0.3512, "step": 24529 }, { "epoch": 2.493899959333062, "grad_norm": 0.2798798978328705, "learning_rate": 5.889202619678844e-06, "loss": 0.3414, "step": 24530 }, { "epoch": 2.494001626677511, "grad_norm": 0.2784939706325531, "learning_rate": 5.888853388582216e-06, "loss": 0.3351, "step": 24531 }, { "epoch": 2.49410329402196, "grad_norm": 0.26793891191482544, "learning_rate": 5.888504153007754e-06, "loss": 0.3052, "step": 24532 }, { "epoch": 2.494204961366409, "grad_norm": 0.2660483717918396, "learning_rate": 5.8881549129572215e-06, "loss": 0.3321, "step": 24533 }, { "epoch": 2.494306628710858, "grad_norm": 0.2844582498073578, "learning_rate": 5.8878056684323735e-06, "loss": 0.3317, "step": 24534 }, { "epoch": 2.494408296055307, "grad_norm": 0.2509802579879761, "learning_rate": 5.887456419434971e-06, "loss": 0.3091, "step": 24535 }, { "epoch": 2.494509963399756, "grad_norm": 0.2840343415737152, "learning_rate": 5.887107165966777e-06, "loss": 0.3271, "step": 24536 }, { "epoch": 2.4946116307442048, "grad_norm": 0.2826431095600128, "learning_rate": 5.886757908029546e-06, "loss": 0.3277, "step": 24537 }, { "epoch": 2.494713298088654, "grad_norm": 0.2660878002643585, "learning_rate": 5.886408645625039e-06, "loss": 0.3351, "step": 24538 }, { "epoch": 2.494814965433103, "grad_norm": 0.27473270893096924, "learning_rate": 5.886059378755016e-06, "loss": 0.3196, "step": 24539 }, { "epoch": 2.494916632777552, "grad_norm": 0.26252883672714233, "learning_rate": 5.885710107421236e-06, "loss": 0.3242, "step": 24540 }, { "epoch": 2.495018300122001, "grad_norm": 0.25824570655822754, "learning_rate": 5.8853608316254605e-06, "loss": 0.3729, "step": 24541 }, { "epoch": 2.49511996746645, "grad_norm": 0.29327574372291565, "learning_rate": 5.8850115513694475e-06, "loss": 0.3181, "step": 24542 }, { "epoch": 2.495221634810899, "grad_norm": 0.28425267338752747, "learning_rate": 5.884662266654955e-06, "loss": 0.3059, "step": 24543 }, { "epoch": 2.495323302155348, "grad_norm": 0.27517321705818176, "learning_rate": 5.884312977483746e-06, "loss": 0.3467, "step": 24544 }, { "epoch": 2.4954249694997968, "grad_norm": 0.2727173864841461, "learning_rate": 5.883963683857576e-06, "loss": 0.3459, "step": 24545 }, { "epoch": 2.4955266368442457, "grad_norm": 0.2711615264415741, "learning_rate": 5.883614385778209e-06, "loss": 0.3663, "step": 24546 }, { "epoch": 2.4956283041886946, "grad_norm": 0.27338290214538574, "learning_rate": 5.883265083247403e-06, "loss": 0.3336, "step": 24547 }, { "epoch": 2.4957299715331436, "grad_norm": 0.2682245075702667, "learning_rate": 5.882915776266917e-06, "loss": 0.3122, "step": 24548 }, { "epoch": 2.4958316388775925, "grad_norm": 0.28217989206314087, "learning_rate": 5.88256646483851e-06, "loss": 0.3128, "step": 24549 }, { "epoch": 2.4959333062220415, "grad_norm": 0.28217077255249023, "learning_rate": 5.882217148963943e-06, "loss": 0.401, "step": 24550 }, { "epoch": 2.4960349735664904, "grad_norm": 0.2636471390724182, "learning_rate": 5.881867828644977e-06, "loss": 0.353, "step": 24551 }, { "epoch": 2.4961366409109393, "grad_norm": 0.2879447042942047, "learning_rate": 5.881518503883369e-06, "loss": 0.3489, "step": 24552 }, { "epoch": 2.4962383082553883, "grad_norm": 0.2681141793727875, "learning_rate": 5.881169174680882e-06, "loss": 0.3403, "step": 24553 }, { "epoch": 2.4963399755998372, "grad_norm": 0.26598548889160156, "learning_rate": 5.880819841039273e-06, "loss": 0.3373, "step": 24554 }, { "epoch": 2.496441642944286, "grad_norm": 0.2762308120727539, "learning_rate": 5.880470502960302e-06, "loss": 0.2894, "step": 24555 }, { "epoch": 2.496543310288735, "grad_norm": 0.27195703983306885, "learning_rate": 5.880121160445732e-06, "loss": 0.3264, "step": 24556 }, { "epoch": 2.496644977633184, "grad_norm": 0.27495276927948, "learning_rate": 5.8797718134973195e-06, "loss": 0.333, "step": 24557 }, { "epoch": 2.496746644977633, "grad_norm": 0.2536524832248688, "learning_rate": 5.879422462116826e-06, "loss": 0.3599, "step": 24558 }, { "epoch": 2.496848312322082, "grad_norm": 0.26592251658439636, "learning_rate": 5.87907310630601e-06, "loss": 0.3354, "step": 24559 }, { "epoch": 2.496949979666531, "grad_norm": 0.26812416315078735, "learning_rate": 5.878723746066633e-06, "loss": 0.3033, "step": 24560 }, { "epoch": 2.4970516470109803, "grad_norm": 0.28132957220077515, "learning_rate": 5.878374381400456e-06, "loss": 0.3062, "step": 24561 }, { "epoch": 2.497153314355429, "grad_norm": 0.25592562556266785, "learning_rate": 5.8780250123092365e-06, "loss": 0.3172, "step": 24562 }, { "epoch": 2.497254981699878, "grad_norm": 0.2894366383552551, "learning_rate": 5.877675638794737e-06, "loss": 0.3618, "step": 24563 }, { "epoch": 2.497356649044327, "grad_norm": 0.26080644130706787, "learning_rate": 5.877326260858715e-06, "loss": 0.3415, "step": 24564 }, { "epoch": 2.497458316388776, "grad_norm": 0.2812459468841553, "learning_rate": 5.876976878502931e-06, "loss": 0.3376, "step": 24565 }, { "epoch": 2.497559983733225, "grad_norm": 0.2818918526172638, "learning_rate": 5.876627491729147e-06, "loss": 0.3342, "step": 24566 }, { "epoch": 2.497661651077674, "grad_norm": 0.25698626041412354, "learning_rate": 5.87627810053912e-06, "loss": 0.3086, "step": 24567 }, { "epoch": 2.497763318422123, "grad_norm": 0.2766992151737213, "learning_rate": 5.875928704934615e-06, "loss": 0.3549, "step": 24568 }, { "epoch": 2.497864985766572, "grad_norm": 0.278128981590271, "learning_rate": 5.87557930491739e-06, "loss": 0.3177, "step": 24569 }, { "epoch": 2.4979666531110207, "grad_norm": 0.2848217189311981, "learning_rate": 5.875229900489201e-06, "loss": 0.3482, "step": 24570 }, { "epoch": 2.4980683204554697, "grad_norm": 0.2769263982772827, "learning_rate": 5.874880491651813e-06, "loss": 0.3266, "step": 24571 }, { "epoch": 2.4981699877999186, "grad_norm": 0.34175920486450195, "learning_rate": 5.8745310784069855e-06, "loss": 0.3343, "step": 24572 }, { "epoch": 2.4982716551443676, "grad_norm": 0.27761921286582947, "learning_rate": 5.874181660756479e-06, "loss": 0.337, "step": 24573 }, { "epoch": 2.4983733224888165, "grad_norm": 0.26737532019615173, "learning_rate": 5.873832238702053e-06, "loss": 0.3041, "step": 24574 }, { "epoch": 2.4984749898332654, "grad_norm": 0.2605631947517395, "learning_rate": 5.8734828122454656e-06, "loss": 0.3224, "step": 24575 }, { "epoch": 2.4985766571777144, "grad_norm": 0.2879325747489929, "learning_rate": 5.87313338138848e-06, "loss": 0.3251, "step": 24576 }, { "epoch": 2.4986783245221633, "grad_norm": 0.2845813035964966, "learning_rate": 5.872783946132857e-06, "loss": 0.3462, "step": 24577 }, { "epoch": 2.4987799918666123, "grad_norm": 0.2677544951438904, "learning_rate": 5.872434506480357e-06, "loss": 0.3903, "step": 24578 }, { "epoch": 2.4988816592110616, "grad_norm": 0.2683736979961395, "learning_rate": 5.872085062432738e-06, "loss": 0.3191, "step": 24579 }, { "epoch": 2.4989833265555106, "grad_norm": 0.26822254061698914, "learning_rate": 5.871735613991762e-06, "loss": 0.3231, "step": 24580 }, { "epoch": 2.4990849938999595, "grad_norm": 0.27246925234794617, "learning_rate": 5.8713861611591875e-06, "loss": 0.3108, "step": 24581 }, { "epoch": 2.4991866612444085, "grad_norm": 0.26382675766944885, "learning_rate": 5.871036703936779e-06, "loss": 0.3669, "step": 24582 }, { "epoch": 2.4992883285888574, "grad_norm": 0.28681039810180664, "learning_rate": 5.870687242326292e-06, "loss": 0.334, "step": 24583 }, { "epoch": 2.4993899959333064, "grad_norm": 0.26303571462631226, "learning_rate": 5.870337776329493e-06, "loss": 0.3033, "step": 24584 }, { "epoch": 2.4994916632777553, "grad_norm": 0.28364118933677673, "learning_rate": 5.869988305948136e-06, "loss": 0.3284, "step": 24585 }, { "epoch": 2.4995933306222042, "grad_norm": 0.27320602536201477, "learning_rate": 5.8696388311839845e-06, "loss": 0.341, "step": 24586 }, { "epoch": 2.499694997966653, "grad_norm": 0.26976144313812256, "learning_rate": 5.869289352038802e-06, "loss": 0.3393, "step": 24587 }, { "epoch": 2.499796665311102, "grad_norm": 0.25565072894096375, "learning_rate": 5.868939868514344e-06, "loss": 0.326, "step": 24588 }, { "epoch": 2.499898332655551, "grad_norm": 0.26812201738357544, "learning_rate": 5.868590380612375e-06, "loss": 0.3567, "step": 24589 }, { "epoch": 2.5, "grad_norm": 0.2661104202270508, "learning_rate": 5.8682408883346535e-06, "loss": 0.3015, "step": 24590 }, { "epoch": 2.500101667344449, "grad_norm": 0.26207560300827026, "learning_rate": 5.86789139168294e-06, "loss": 0.3309, "step": 24591 }, { "epoch": 2.500203334688898, "grad_norm": 0.27355965971946716, "learning_rate": 5.867541890658996e-06, "loss": 0.3228, "step": 24592 }, { "epoch": 2.500305002033347, "grad_norm": 0.2685122787952423, "learning_rate": 5.867192385264582e-06, "loss": 0.3314, "step": 24593 }, { "epoch": 2.5004066693777958, "grad_norm": 0.25393301248550415, "learning_rate": 5.8668428755014605e-06, "loss": 0.313, "step": 24594 }, { "epoch": 2.5005083367222447, "grad_norm": 0.25938311219215393, "learning_rate": 5.86649336137139e-06, "loss": 0.3324, "step": 24595 }, { "epoch": 2.5006100040666936, "grad_norm": 0.2652864158153534, "learning_rate": 5.8661438428761294e-06, "loss": 0.3207, "step": 24596 }, { "epoch": 2.5007116714111426, "grad_norm": 0.28797855973243713, "learning_rate": 5.865794320017445e-06, "loss": 0.324, "step": 24597 }, { "epoch": 2.5008133387555915, "grad_norm": 0.28137990832328796, "learning_rate": 5.865444792797092e-06, "loss": 0.3273, "step": 24598 }, { "epoch": 2.5009150061000405, "grad_norm": 0.28934213519096375, "learning_rate": 5.865095261216837e-06, "loss": 0.351, "step": 24599 }, { "epoch": 2.5010166734444894, "grad_norm": 0.2601226270198822, "learning_rate": 5.8647457252784355e-06, "loss": 0.3256, "step": 24600 }, { "epoch": 2.5011183407889384, "grad_norm": 0.2734578251838684, "learning_rate": 5.864396184983651e-06, "loss": 0.3489, "step": 24601 }, { "epoch": 2.5012200081333873, "grad_norm": 0.2861713469028473, "learning_rate": 5.864046640334244e-06, "loss": 0.3423, "step": 24602 }, { "epoch": 2.5013216754778367, "grad_norm": 0.27442219853401184, "learning_rate": 5.863697091331974e-06, "loss": 0.315, "step": 24603 }, { "epoch": 2.5014233428222856, "grad_norm": 0.2749858498573303, "learning_rate": 5.863347537978605e-06, "loss": 0.3389, "step": 24604 }, { "epoch": 2.5015250101667346, "grad_norm": 0.27727678418159485, "learning_rate": 5.862997980275896e-06, "loss": 0.3522, "step": 24605 }, { "epoch": 2.5016266775111835, "grad_norm": 0.2647351026535034, "learning_rate": 5.862648418225607e-06, "loss": 0.3277, "step": 24606 }, { "epoch": 2.5017283448556324, "grad_norm": 0.2829749882221222, "learning_rate": 5.862298851829502e-06, "loss": 0.3631, "step": 24607 }, { "epoch": 2.5018300122000814, "grad_norm": 0.2627379596233368, "learning_rate": 5.86194928108934e-06, "loss": 0.3243, "step": 24608 }, { "epoch": 2.5019316795445303, "grad_norm": 0.2763608396053314, "learning_rate": 5.861599706006882e-06, "loss": 0.3118, "step": 24609 }, { "epoch": 2.5020333468889793, "grad_norm": 0.28484654426574707, "learning_rate": 5.861250126583889e-06, "loss": 0.3245, "step": 24610 }, { "epoch": 2.502135014233428, "grad_norm": 0.2639389932155609, "learning_rate": 5.860900542822123e-06, "loss": 0.3587, "step": 24611 }, { "epoch": 2.502236681577877, "grad_norm": 0.2667648494243622, "learning_rate": 5.860550954723345e-06, "loss": 0.2981, "step": 24612 }, { "epoch": 2.502338348922326, "grad_norm": 0.2794550657272339, "learning_rate": 5.860201362289315e-06, "loss": 0.3653, "step": 24613 }, { "epoch": 2.502440016266775, "grad_norm": 0.28365635871887207, "learning_rate": 5.859851765521796e-06, "loss": 0.2975, "step": 24614 }, { "epoch": 2.502541683611224, "grad_norm": 0.27623459696769714, "learning_rate": 5.859502164422547e-06, "loss": 0.3196, "step": 24615 }, { "epoch": 2.502643350955673, "grad_norm": 0.2712181508541107, "learning_rate": 5.859152558993331e-06, "loss": 0.3242, "step": 24616 }, { "epoch": 2.502745018300122, "grad_norm": 0.29746586084365845, "learning_rate": 5.858802949235908e-06, "loss": 0.3499, "step": 24617 }, { "epoch": 2.502846685644571, "grad_norm": 0.2882075309753418, "learning_rate": 5.8584533351520404e-06, "loss": 0.3446, "step": 24618 }, { "epoch": 2.50294835298902, "grad_norm": 0.27974388003349304, "learning_rate": 5.8581037167434885e-06, "loss": 0.3336, "step": 24619 }, { "epoch": 2.503050020333469, "grad_norm": 0.2547944486141205, "learning_rate": 5.857754094012014e-06, "loss": 0.3203, "step": 24620 }, { "epoch": 2.503151687677918, "grad_norm": 0.2906869649887085, "learning_rate": 5.857404466959377e-06, "loss": 0.3293, "step": 24621 }, { "epoch": 2.503253355022367, "grad_norm": 0.27741020917892456, "learning_rate": 5.857054835587344e-06, "loss": 0.3258, "step": 24622 }, { "epoch": 2.503355022366816, "grad_norm": 0.26613205671310425, "learning_rate": 5.856705199897669e-06, "loss": 0.3335, "step": 24623 }, { "epoch": 2.503456689711265, "grad_norm": 0.252452552318573, "learning_rate": 5.856355559892118e-06, "loss": 0.3598, "step": 24624 }, { "epoch": 2.503558357055714, "grad_norm": 0.26374807953834534, "learning_rate": 5.85600591557245e-06, "loss": 0.356, "step": 24625 }, { "epoch": 2.5036600244001628, "grad_norm": 0.2522854506969452, "learning_rate": 5.8556562669404286e-06, "loss": 0.335, "step": 24626 }, { "epoch": 2.5037616917446117, "grad_norm": 0.26648879051208496, "learning_rate": 5.855306613997814e-06, "loss": 0.3196, "step": 24627 }, { "epoch": 2.5038633590890607, "grad_norm": 0.27399709820747375, "learning_rate": 5.854956956746367e-06, "loss": 0.3301, "step": 24628 }, { "epoch": 2.5039650264335096, "grad_norm": 0.2846065163612366, "learning_rate": 5.854607295187852e-06, "loss": 0.3133, "step": 24629 }, { "epoch": 2.5040666937779585, "grad_norm": 0.31583914160728455, "learning_rate": 5.854257629324027e-06, "loss": 0.3148, "step": 24630 }, { "epoch": 2.5041683611224075, "grad_norm": 0.2757527530193329, "learning_rate": 5.853907959156655e-06, "loss": 0.3507, "step": 24631 }, { "epoch": 2.5042700284668564, "grad_norm": 0.27021780610084534, "learning_rate": 5.853558284687499e-06, "loss": 0.3392, "step": 24632 }, { "epoch": 2.5043716958113054, "grad_norm": 0.28519347310066223, "learning_rate": 5.853208605918319e-06, "loss": 0.3629, "step": 24633 }, { "epoch": 2.5044733631557543, "grad_norm": 0.2613309919834137, "learning_rate": 5.852858922850877e-06, "loss": 0.3501, "step": 24634 }, { "epoch": 2.5045750305002032, "grad_norm": 0.2930423617362976, "learning_rate": 5.852509235486934e-06, "loss": 0.329, "step": 24635 }, { "epoch": 2.504676697844652, "grad_norm": 0.27688682079315186, "learning_rate": 5.85215954382825e-06, "loss": 0.3229, "step": 24636 }, { "epoch": 2.504778365189101, "grad_norm": 0.2736699879169464, "learning_rate": 5.8518098478765904e-06, "loss": 0.3333, "step": 24637 }, { "epoch": 2.50488003253355, "grad_norm": 0.2637014389038086, "learning_rate": 5.8514601476337155e-06, "loss": 0.3238, "step": 24638 }, { "epoch": 2.504981699877999, "grad_norm": 0.2579363286495209, "learning_rate": 5.851110443101387e-06, "loss": 0.3604, "step": 24639 }, { "epoch": 2.505083367222448, "grad_norm": 0.2683710753917694, "learning_rate": 5.850760734281367e-06, "loss": 0.3528, "step": 24640 }, { "epoch": 2.505185034566897, "grad_norm": 0.2611527442932129, "learning_rate": 5.850411021175413e-06, "loss": 0.3469, "step": 24641 }, { "epoch": 2.505286701911346, "grad_norm": 0.2682400941848755, "learning_rate": 5.850061303785294e-06, "loss": 0.348, "step": 24642 }, { "epoch": 2.505388369255795, "grad_norm": 0.27134081721305847, "learning_rate": 5.849711582112766e-06, "loss": 0.31, "step": 24643 }, { "epoch": 2.505490036600244, "grad_norm": 0.2672989070415497, "learning_rate": 5.8493618561595945e-06, "loss": 0.339, "step": 24644 }, { "epoch": 2.505591703944693, "grad_norm": 0.2795894145965576, "learning_rate": 5.84901212592754e-06, "loss": 0.3149, "step": 24645 }, { "epoch": 2.505693371289142, "grad_norm": 0.2690049707889557, "learning_rate": 5.8486623914183615e-06, "loss": 0.3235, "step": 24646 }, { "epoch": 2.505795038633591, "grad_norm": 0.2678960859775543, "learning_rate": 5.848312652633826e-06, "loss": 0.3188, "step": 24647 }, { "epoch": 2.50589670597804, "grad_norm": 0.2734396159648895, "learning_rate": 5.8479629095756925e-06, "loss": 0.3322, "step": 24648 }, { "epoch": 2.505998373322489, "grad_norm": 0.284018337726593, "learning_rate": 5.847613162245723e-06, "loss": 0.3579, "step": 24649 }, { "epoch": 2.506100040666938, "grad_norm": 0.26758450269699097, "learning_rate": 5.847263410645681e-06, "loss": 0.354, "step": 24650 }, { "epoch": 2.5062017080113868, "grad_norm": 0.26053014397621155, "learning_rate": 5.846913654777325e-06, "loss": 0.3339, "step": 24651 }, { "epoch": 2.5063033753558357, "grad_norm": 0.26191067695617676, "learning_rate": 5.846563894642422e-06, "loss": 0.3327, "step": 24652 }, { "epoch": 2.5064050427002846, "grad_norm": 0.25635379552841187, "learning_rate": 5.8462141302427286e-06, "loss": 0.3274, "step": 24653 }, { "epoch": 2.5065067100447336, "grad_norm": 0.278741717338562, "learning_rate": 5.845864361580012e-06, "loss": 0.3438, "step": 24654 }, { "epoch": 2.5066083773891825, "grad_norm": 0.26065030694007874, "learning_rate": 5.8455145886560306e-06, "loss": 0.3515, "step": 24655 }, { "epoch": 2.5067100447336315, "grad_norm": 0.271676629781723, "learning_rate": 5.845164811472546e-06, "loss": 0.3667, "step": 24656 }, { "epoch": 2.5068117120780804, "grad_norm": 0.27904024720191956, "learning_rate": 5.844815030031324e-06, "loss": 0.3545, "step": 24657 }, { "epoch": 2.5069133794225293, "grad_norm": 0.2658582329750061, "learning_rate": 5.844465244334125e-06, "loss": 0.3223, "step": 24658 }, { "epoch": 2.5070150467669783, "grad_norm": 0.26303842663764954, "learning_rate": 5.84411545438271e-06, "loss": 0.3145, "step": 24659 }, { "epoch": 2.5071167141114277, "grad_norm": 0.2408628612756729, "learning_rate": 5.843765660178842e-06, "loss": 0.3642, "step": 24660 }, { "epoch": 2.5072183814558766, "grad_norm": 0.25263938307762146, "learning_rate": 5.843415861724282e-06, "loss": 0.3349, "step": 24661 }, { "epoch": 2.5073200488003256, "grad_norm": 0.2577890455722809, "learning_rate": 5.8430660590207945e-06, "loss": 0.3411, "step": 24662 }, { "epoch": 2.5074217161447745, "grad_norm": 0.26527896523475647, "learning_rate": 5.842716252070141e-06, "loss": 0.3383, "step": 24663 }, { "epoch": 2.5075233834892234, "grad_norm": 0.2843596637248993, "learning_rate": 5.8423664408740825e-06, "loss": 0.3281, "step": 24664 }, { "epoch": 2.5076250508336724, "grad_norm": 0.2589034140110016, "learning_rate": 5.842016625434383e-06, "loss": 0.323, "step": 24665 }, { "epoch": 2.5077267181781213, "grad_norm": 0.2687768340110779, "learning_rate": 5.8416668057528015e-06, "loss": 0.3274, "step": 24666 }, { "epoch": 2.5078283855225703, "grad_norm": 0.272721529006958, "learning_rate": 5.841316981831104e-06, "loss": 0.3185, "step": 24667 }, { "epoch": 2.507930052867019, "grad_norm": 0.31139031052589417, "learning_rate": 5.840967153671052e-06, "loss": 0.3449, "step": 24668 }, { "epoch": 2.508031720211468, "grad_norm": 0.2701047658920288, "learning_rate": 5.8406173212744066e-06, "loss": 0.3098, "step": 24669 }, { "epoch": 2.508133387555917, "grad_norm": 0.2670108377933502, "learning_rate": 5.840267484642932e-06, "loss": 0.3381, "step": 24670 }, { "epoch": 2.508235054900366, "grad_norm": 0.27551835775375366, "learning_rate": 5.839917643778388e-06, "loss": 0.3092, "step": 24671 }, { "epoch": 2.508336722244815, "grad_norm": 0.2731264531612396, "learning_rate": 5.8395677986825386e-06, "loss": 0.3456, "step": 24672 }, { "epoch": 2.508438389589264, "grad_norm": 0.259654700756073, "learning_rate": 5.839217949357148e-06, "loss": 0.3559, "step": 24673 }, { "epoch": 2.508540056933713, "grad_norm": 0.2665677070617676, "learning_rate": 5.838868095803974e-06, "loss": 0.3134, "step": 24674 }, { "epoch": 2.508641724278162, "grad_norm": 0.26643049716949463, "learning_rate": 5.838518238024784e-06, "loss": 0.3379, "step": 24675 }, { "epoch": 2.5087433916226107, "grad_norm": 0.2821142375469208, "learning_rate": 5.838168376021338e-06, "loss": 0.303, "step": 24676 }, { "epoch": 2.5088450589670597, "grad_norm": 0.24954411387443542, "learning_rate": 5.837818509795397e-06, "loss": 0.2989, "step": 24677 }, { "epoch": 2.5089467263115086, "grad_norm": 0.2558542788028717, "learning_rate": 5.837468639348729e-06, "loss": 0.3, "step": 24678 }, { "epoch": 2.5090483936559576, "grad_norm": 0.26267170906066895, "learning_rate": 5.837118764683091e-06, "loss": 0.3248, "step": 24679 }, { "epoch": 2.5091500610004065, "grad_norm": 0.2565699517726898, "learning_rate": 5.836768885800248e-06, "loss": 0.3904, "step": 24680 }, { "epoch": 2.5092517283448554, "grad_norm": 0.2706022560596466, "learning_rate": 5.836419002701961e-06, "loss": 0.3411, "step": 24681 }, { "epoch": 2.5093533956893044, "grad_norm": 0.2353268265724182, "learning_rate": 5.836069115389995e-06, "loss": 0.3461, "step": 24682 }, { "epoch": 2.5094550630337533, "grad_norm": 0.26145797967910767, "learning_rate": 5.8357192238661105e-06, "loss": 0.3651, "step": 24683 }, { "epoch": 2.5095567303782023, "grad_norm": 0.2702300250530243, "learning_rate": 5.8353693281320725e-06, "loss": 0.3324, "step": 24684 }, { "epoch": 2.5096583977226516, "grad_norm": 0.25573262572288513, "learning_rate": 5.835019428189642e-06, "loss": 0.3489, "step": 24685 }, { "epoch": 2.5097600650671006, "grad_norm": 0.263263463973999, "learning_rate": 5.8346695240405816e-06, "loss": 0.3224, "step": 24686 }, { "epoch": 2.5098617324115495, "grad_norm": 0.2880997359752655, "learning_rate": 5.834319615686655e-06, "loss": 0.3633, "step": 24687 }, { "epoch": 2.5099633997559985, "grad_norm": 0.2761646807193756, "learning_rate": 5.833969703129624e-06, "loss": 0.3032, "step": 24688 }, { "epoch": 2.5100650671004474, "grad_norm": 0.28323179483413696, "learning_rate": 5.833619786371252e-06, "loss": 0.3159, "step": 24689 }, { "epoch": 2.5101667344448964, "grad_norm": 0.26852837204933167, "learning_rate": 5.833269865413301e-06, "loss": 0.3156, "step": 24690 }, { "epoch": 2.5102684017893453, "grad_norm": 0.29660892486572266, "learning_rate": 5.832919940257536e-06, "loss": 0.3047, "step": 24691 }, { "epoch": 2.5103700691337942, "grad_norm": 0.2740638852119446, "learning_rate": 5.832570010905716e-06, "loss": 0.3463, "step": 24692 }, { "epoch": 2.510471736478243, "grad_norm": 0.26407137513160706, "learning_rate": 5.8322200773596075e-06, "loss": 0.321, "step": 24693 }, { "epoch": 2.510573403822692, "grad_norm": 0.2730940282344818, "learning_rate": 5.831870139620972e-06, "loss": 0.3279, "step": 24694 }, { "epoch": 2.510675071167141, "grad_norm": 0.29782554507255554, "learning_rate": 5.831520197691572e-06, "loss": 0.3156, "step": 24695 }, { "epoch": 2.51077673851159, "grad_norm": 0.2779240310192108, "learning_rate": 5.831170251573172e-06, "loss": 0.3072, "step": 24696 }, { "epoch": 2.510878405856039, "grad_norm": 0.28154250979423523, "learning_rate": 5.830820301267533e-06, "loss": 0.3564, "step": 24697 }, { "epoch": 2.510980073200488, "grad_norm": 0.2686408460140228, "learning_rate": 5.830470346776418e-06, "loss": 0.3209, "step": 24698 }, { "epoch": 2.511081740544937, "grad_norm": 0.28110527992248535, "learning_rate": 5.830120388101591e-06, "loss": 0.3493, "step": 24699 }, { "epoch": 2.5111834078893858, "grad_norm": 0.27344584465026855, "learning_rate": 5.8297704252448165e-06, "loss": 0.3511, "step": 24700 }, { "epoch": 2.511285075233835, "grad_norm": 0.2748095691204071, "learning_rate": 5.829420458207854e-06, "loss": 0.3215, "step": 24701 }, { "epoch": 2.511386742578284, "grad_norm": 0.2868034541606903, "learning_rate": 5.829070486992467e-06, "loss": 0.3201, "step": 24702 }, { "epoch": 2.511488409922733, "grad_norm": 0.29155251383781433, "learning_rate": 5.828720511600423e-06, "loss": 0.3215, "step": 24703 }, { "epoch": 2.511590077267182, "grad_norm": 0.2652066946029663, "learning_rate": 5.82837053203348e-06, "loss": 0.333, "step": 24704 }, { "epoch": 2.511691744611631, "grad_norm": 0.28401100635528564, "learning_rate": 5.828020548293405e-06, "loss": 0.3787, "step": 24705 }, { "epoch": 2.51179341195608, "grad_norm": 0.27007582783699036, "learning_rate": 5.827670560381957e-06, "loss": 0.3049, "step": 24706 }, { "epoch": 2.511895079300529, "grad_norm": 0.32303518056869507, "learning_rate": 5.827320568300903e-06, "loss": 0.3278, "step": 24707 }, { "epoch": 2.5119967466449777, "grad_norm": 0.2930184006690979, "learning_rate": 5.826970572052004e-06, "loss": 0.3507, "step": 24708 }, { "epoch": 2.5120984139894267, "grad_norm": 0.27763062715530396, "learning_rate": 5.826620571637024e-06, "loss": 0.3121, "step": 24709 }, { "epoch": 2.5122000813338756, "grad_norm": 0.26222848892211914, "learning_rate": 5.826270567057725e-06, "loss": 0.3161, "step": 24710 }, { "epoch": 2.5123017486783246, "grad_norm": 0.26891326904296875, "learning_rate": 5.825920558315874e-06, "loss": 0.3381, "step": 24711 }, { "epoch": 2.5124034160227735, "grad_norm": 0.27072426676750183, "learning_rate": 5.8255705454132275e-06, "loss": 0.324, "step": 24712 }, { "epoch": 2.5125050833672224, "grad_norm": 0.2958281636238098, "learning_rate": 5.8252205283515564e-06, "loss": 0.3449, "step": 24713 }, { "epoch": 2.5126067507116714, "grad_norm": 0.30754175782203674, "learning_rate": 5.824870507132617e-06, "loss": 0.3063, "step": 24714 }, { "epoch": 2.5127084180561203, "grad_norm": 0.3007722496986389, "learning_rate": 5.824520481758179e-06, "loss": 0.3266, "step": 24715 }, { "epoch": 2.5128100854005693, "grad_norm": 0.27995866537094116, "learning_rate": 5.8241704522300025e-06, "loss": 0.329, "step": 24716 }, { "epoch": 2.512911752745018, "grad_norm": 0.27850332856178284, "learning_rate": 5.823820418549848e-06, "loss": 0.3474, "step": 24717 }, { "epoch": 2.513013420089467, "grad_norm": 0.2780989706516266, "learning_rate": 5.823470380719485e-06, "loss": 0.3136, "step": 24718 }, { "epoch": 2.513115087433916, "grad_norm": 0.2397066056728363, "learning_rate": 5.823120338740673e-06, "loss": 0.3405, "step": 24719 }, { "epoch": 2.513216754778365, "grad_norm": 0.27402982115745544, "learning_rate": 5.822770292615176e-06, "loss": 0.314, "step": 24720 }, { "epoch": 2.513318422122814, "grad_norm": 0.27068477869033813, "learning_rate": 5.822420242344759e-06, "loss": 0.3228, "step": 24721 }, { "epoch": 2.513420089467263, "grad_norm": 0.2744634449481964, "learning_rate": 5.822070187931182e-06, "loss": 0.3489, "step": 24722 }, { "epoch": 2.513521756811712, "grad_norm": 0.2605521082878113, "learning_rate": 5.821720129376214e-06, "loss": 0.3499, "step": 24723 }, { "epoch": 2.513623424156161, "grad_norm": 0.26046720147132874, "learning_rate": 5.821370066681612e-06, "loss": 0.3353, "step": 24724 }, { "epoch": 2.5137250915006097, "grad_norm": 0.24768315255641937, "learning_rate": 5.8210199998491455e-06, "loss": 0.3176, "step": 24725 }, { "epoch": 2.513826758845059, "grad_norm": 0.26146069169044495, "learning_rate": 5.820669928880575e-06, "loss": 0.3159, "step": 24726 }, { "epoch": 2.513928426189508, "grad_norm": 0.28402480483055115, "learning_rate": 5.820319853777662e-06, "loss": 0.3612, "step": 24727 }, { "epoch": 2.514030093533957, "grad_norm": 0.2767688035964966, "learning_rate": 5.819969774542175e-06, "loss": 0.3505, "step": 24728 }, { "epoch": 2.514131760878406, "grad_norm": 0.2675758898258209, "learning_rate": 5.8196196911758725e-06, "loss": 0.3308, "step": 24729 }, { "epoch": 2.514233428222855, "grad_norm": 0.2541550397872925, "learning_rate": 5.819269603680524e-06, "loss": 0.3566, "step": 24730 }, { "epoch": 2.514335095567304, "grad_norm": 0.28243017196655273, "learning_rate": 5.818919512057889e-06, "loss": 0.344, "step": 24731 }, { "epoch": 2.5144367629117528, "grad_norm": 0.2577800154685974, "learning_rate": 5.818569416309729e-06, "loss": 0.3227, "step": 24732 }, { "epoch": 2.5145384302562017, "grad_norm": 0.25903284549713135, "learning_rate": 5.818219316437814e-06, "loss": 0.3269, "step": 24733 }, { "epoch": 2.5146400976006507, "grad_norm": 0.2676180899143219, "learning_rate": 5.817869212443905e-06, "loss": 0.3016, "step": 24734 }, { "epoch": 2.5147417649450996, "grad_norm": 0.279718279838562, "learning_rate": 5.817519104329763e-06, "loss": 0.3431, "step": 24735 }, { "epoch": 2.5148434322895485, "grad_norm": 0.2716343104839325, "learning_rate": 5.8171689920971565e-06, "loss": 0.3424, "step": 24736 }, { "epoch": 2.5149450996339975, "grad_norm": 0.27682560682296753, "learning_rate": 5.816818875747844e-06, "loss": 0.3218, "step": 24737 }, { "epoch": 2.5150467669784464, "grad_norm": 0.2847691476345062, "learning_rate": 5.816468755283593e-06, "loss": 0.3221, "step": 24738 }, { "epoch": 2.5151484343228954, "grad_norm": 0.2715722620487213, "learning_rate": 5.816118630706168e-06, "loss": 0.3354, "step": 24739 }, { "epoch": 2.5152501016673443, "grad_norm": 0.2829625606536865, "learning_rate": 5.815768502017329e-06, "loss": 0.3274, "step": 24740 }, { "epoch": 2.5153517690117932, "grad_norm": 0.27985554933547974, "learning_rate": 5.815418369218845e-06, "loss": 0.3728, "step": 24741 }, { "epoch": 2.5154534363562426, "grad_norm": 0.26528286933898926, "learning_rate": 5.815068232312474e-06, "loss": 0.3221, "step": 24742 }, { "epoch": 2.5155551037006916, "grad_norm": 0.2782685458660126, "learning_rate": 5.814718091299984e-06, "loss": 0.3556, "step": 24743 }, { "epoch": 2.5156567710451405, "grad_norm": 0.29615452885627747, "learning_rate": 5.814367946183139e-06, "loss": 0.3656, "step": 24744 }, { "epoch": 2.5157584383895895, "grad_norm": 0.28002479672431946, "learning_rate": 5.814017796963701e-06, "loss": 0.3146, "step": 24745 }, { "epoch": 2.5158601057340384, "grad_norm": 0.27487775683403015, "learning_rate": 5.813667643643435e-06, "loss": 0.3428, "step": 24746 }, { "epoch": 2.5159617730784873, "grad_norm": 0.26272258162498474, "learning_rate": 5.813317486224104e-06, "loss": 0.3551, "step": 24747 }, { "epoch": 2.5160634404229363, "grad_norm": 0.26843130588531494, "learning_rate": 5.812967324707474e-06, "loss": 0.3262, "step": 24748 }, { "epoch": 2.5161651077673852, "grad_norm": 0.30431419610977173, "learning_rate": 5.8126171590953074e-06, "loss": 0.3318, "step": 24749 }, { "epoch": 2.516266775111834, "grad_norm": 0.2801443040370941, "learning_rate": 5.812266989389368e-06, "loss": 0.3412, "step": 24750 }, { "epoch": 2.516368442456283, "grad_norm": 0.2805638611316681, "learning_rate": 5.81191681559142e-06, "loss": 0.3845, "step": 24751 }, { "epoch": 2.516470109800732, "grad_norm": 0.27523985505104065, "learning_rate": 5.81156663770323e-06, "loss": 0.332, "step": 24752 }, { "epoch": 2.516571777145181, "grad_norm": 0.2580459713935852, "learning_rate": 5.811216455726558e-06, "loss": 0.3222, "step": 24753 }, { "epoch": 2.51667344448963, "grad_norm": 0.27284669876098633, "learning_rate": 5.810866269663173e-06, "loss": 0.3192, "step": 24754 }, { "epoch": 2.516775111834079, "grad_norm": 0.2844996154308319, "learning_rate": 5.810516079514834e-06, "loss": 0.3574, "step": 24755 }, { "epoch": 2.516876779178528, "grad_norm": 0.2457742542028427, "learning_rate": 5.8101658852833085e-06, "loss": 0.3009, "step": 24756 }, { "epoch": 2.5169784465229768, "grad_norm": 0.2795042097568512, "learning_rate": 5.8098156869703595e-06, "loss": 0.3397, "step": 24757 }, { "epoch": 2.5170801138674257, "grad_norm": 0.2800198197364807, "learning_rate": 5.809465484577751e-06, "loss": 0.3324, "step": 24758 }, { "epoch": 2.5171817812118746, "grad_norm": 0.2664869725704193, "learning_rate": 5.809115278107249e-06, "loss": 0.3384, "step": 24759 }, { "epoch": 2.5172834485563236, "grad_norm": 0.27949315309524536, "learning_rate": 5.808765067560615e-06, "loss": 0.3279, "step": 24760 }, { "epoch": 2.5173851159007725, "grad_norm": 0.2775738835334778, "learning_rate": 5.8084148529396164e-06, "loss": 0.3256, "step": 24761 }, { "epoch": 2.5174867832452215, "grad_norm": 0.25491049885749817, "learning_rate": 5.808064634246015e-06, "loss": 0.3405, "step": 24762 }, { "epoch": 2.5175884505896704, "grad_norm": 0.30354419350624084, "learning_rate": 5.8077144114815765e-06, "loss": 0.3687, "step": 24763 }, { "epoch": 2.5176901179341193, "grad_norm": 0.24590639770030975, "learning_rate": 5.807364184648064e-06, "loss": 0.3171, "step": 24764 }, { "epoch": 2.5177917852785683, "grad_norm": 0.2575588524341583, "learning_rate": 5.807013953747243e-06, "loss": 0.3312, "step": 24765 }, { "epoch": 2.5178934526230172, "grad_norm": 0.25044187903404236, "learning_rate": 5.8066637187808785e-06, "loss": 0.3029, "step": 24766 }, { "epoch": 2.5179951199674666, "grad_norm": 0.29114866256713867, "learning_rate": 5.806313479750732e-06, "loss": 0.3854, "step": 24767 }, { "epoch": 2.5180967873119156, "grad_norm": 0.2872999310493469, "learning_rate": 5.805963236658571e-06, "loss": 0.3262, "step": 24768 }, { "epoch": 2.5181984546563645, "grad_norm": 0.29535529017448425, "learning_rate": 5.8056129895061585e-06, "loss": 0.331, "step": 24769 }, { "epoch": 2.5183001220008134, "grad_norm": 0.2586806118488312, "learning_rate": 5.805262738295259e-06, "loss": 0.3551, "step": 24770 }, { "epoch": 2.5184017893452624, "grad_norm": 0.2694934606552124, "learning_rate": 5.804912483027637e-06, "loss": 0.3253, "step": 24771 }, { "epoch": 2.5185034566897113, "grad_norm": 0.26803088188171387, "learning_rate": 5.804562223705057e-06, "loss": 0.3552, "step": 24772 }, { "epoch": 2.5186051240341603, "grad_norm": 0.2818352282047272, "learning_rate": 5.8042119603292845e-06, "loss": 0.3528, "step": 24773 }, { "epoch": 2.518706791378609, "grad_norm": 0.2749304473400116, "learning_rate": 5.803861692902083e-06, "loss": 0.3392, "step": 24774 }, { "epoch": 2.518808458723058, "grad_norm": 0.27316033840179443, "learning_rate": 5.8035114214252176e-06, "loss": 0.3087, "step": 24775 }, { "epoch": 2.518910126067507, "grad_norm": 0.27886682748794556, "learning_rate": 5.8031611459004525e-06, "loss": 0.3501, "step": 24776 }, { "epoch": 2.519011793411956, "grad_norm": 0.2924136221408844, "learning_rate": 5.802810866329551e-06, "loss": 0.3494, "step": 24777 }, { "epoch": 2.519113460756405, "grad_norm": 0.3025222420692444, "learning_rate": 5.802460582714281e-06, "loss": 0.3405, "step": 24778 }, { "epoch": 2.519215128100854, "grad_norm": 0.2873210310935974, "learning_rate": 5.8021102950564044e-06, "loss": 0.3293, "step": 24779 }, { "epoch": 2.519316795445303, "grad_norm": 0.28187665343284607, "learning_rate": 5.801760003357687e-06, "loss": 0.3011, "step": 24780 }, { "epoch": 2.519418462789752, "grad_norm": 0.2699050009250641, "learning_rate": 5.801409707619894e-06, "loss": 0.3313, "step": 24781 }, { "epoch": 2.5195201301342007, "grad_norm": 0.2691195011138916, "learning_rate": 5.8010594078447875e-06, "loss": 0.3382, "step": 24782 }, { "epoch": 2.51962179747865, "grad_norm": 0.26262152194976807, "learning_rate": 5.800709104034135e-06, "loss": 0.3266, "step": 24783 }, { "epoch": 2.519723464823099, "grad_norm": 0.2745489180088043, "learning_rate": 5.800358796189701e-06, "loss": 0.3368, "step": 24784 }, { "epoch": 2.519825132167548, "grad_norm": 0.2741967439651489, "learning_rate": 5.800008484313249e-06, "loss": 0.3304, "step": 24785 }, { "epoch": 2.519926799511997, "grad_norm": 0.27413254976272583, "learning_rate": 5.799658168406545e-06, "loss": 0.3517, "step": 24786 }, { "epoch": 2.520028466856446, "grad_norm": 0.24450041353702545, "learning_rate": 5.799307848471353e-06, "loss": 0.333, "step": 24787 }, { "epoch": 2.520130134200895, "grad_norm": 0.27074506878852844, "learning_rate": 5.798957524509436e-06, "loss": 0.3229, "step": 24788 }, { "epoch": 2.5202318015453438, "grad_norm": 0.26357096433639526, "learning_rate": 5.798607196522562e-06, "loss": 0.3195, "step": 24789 }, { "epoch": 2.5203334688897927, "grad_norm": 0.2657946050167084, "learning_rate": 5.798256864512495e-06, "loss": 0.3444, "step": 24790 }, { "epoch": 2.5204351362342416, "grad_norm": 0.2739770710468292, "learning_rate": 5.797906528481001e-06, "loss": 0.3377, "step": 24791 }, { "epoch": 2.5205368035786906, "grad_norm": 0.2733859717845917, "learning_rate": 5.797556188429842e-06, "loss": 0.3709, "step": 24792 }, { "epoch": 2.5206384709231395, "grad_norm": 0.27060988545417786, "learning_rate": 5.797205844360784e-06, "loss": 0.3266, "step": 24793 }, { "epoch": 2.5207401382675885, "grad_norm": 0.2720624506473541, "learning_rate": 5.796855496275593e-06, "loss": 0.3525, "step": 24794 }, { "epoch": 2.5208418056120374, "grad_norm": 0.260772705078125, "learning_rate": 5.796505144176033e-06, "loss": 0.3427, "step": 24795 }, { "epoch": 2.5209434729564864, "grad_norm": 0.2849208414554596, "learning_rate": 5.79615478806387e-06, "loss": 0.3809, "step": 24796 }, { "epoch": 2.5210451403009353, "grad_norm": 0.27998724579811096, "learning_rate": 5.7958044279408695e-06, "loss": 0.3307, "step": 24797 }, { "epoch": 2.5211468076453842, "grad_norm": 0.27709197998046875, "learning_rate": 5.795454063808792e-06, "loss": 0.3336, "step": 24798 }, { "epoch": 2.521248474989833, "grad_norm": 0.25780102610588074, "learning_rate": 5.795103695669409e-06, "loss": 0.3408, "step": 24799 }, { "epoch": 2.521350142334282, "grad_norm": 0.28059419989585876, "learning_rate": 5.794753323524481e-06, "loss": 0.3422, "step": 24800 }, { "epoch": 2.521451809678731, "grad_norm": 0.2836453914642334, "learning_rate": 5.794402947375776e-06, "loss": 0.3333, "step": 24801 }, { "epoch": 2.52155347702318, "grad_norm": 0.2759964168071747, "learning_rate": 5.794052567225057e-06, "loss": 0.3364, "step": 24802 }, { "epoch": 2.521655144367629, "grad_norm": 0.2885522246360779, "learning_rate": 5.793702183074088e-06, "loss": 0.3222, "step": 24803 }, { "epoch": 2.521756811712078, "grad_norm": 0.28242385387420654, "learning_rate": 5.793351794924639e-06, "loss": 0.2916, "step": 24804 }, { "epoch": 2.521858479056527, "grad_norm": 0.2894778251647949, "learning_rate": 5.793001402778471e-06, "loss": 0.3255, "step": 24805 }, { "epoch": 2.5219601464009758, "grad_norm": 0.2816920280456543, "learning_rate": 5.792651006637351e-06, "loss": 0.3362, "step": 24806 }, { "epoch": 2.5220618137454247, "grad_norm": 0.2677803039550781, "learning_rate": 5.792300606503043e-06, "loss": 0.3181, "step": 24807 }, { "epoch": 2.522163481089874, "grad_norm": 0.3253401815891266, "learning_rate": 5.791950202377311e-06, "loss": 0.3364, "step": 24808 }, { "epoch": 2.522265148434323, "grad_norm": 0.2651654779911041, "learning_rate": 5.791599794261925e-06, "loss": 0.3155, "step": 24809 }, { "epoch": 2.522366815778772, "grad_norm": 0.257739782333374, "learning_rate": 5.791249382158647e-06, "loss": 0.3259, "step": 24810 }, { "epoch": 2.522468483123221, "grad_norm": 0.2702980935573578, "learning_rate": 5.7908989660692405e-06, "loss": 0.3301, "step": 24811 }, { "epoch": 2.52257015046767, "grad_norm": 0.25013408064842224, "learning_rate": 5.7905485459954756e-06, "loss": 0.3467, "step": 24812 }, { "epoch": 2.522671817812119, "grad_norm": 0.2633974552154541, "learning_rate": 5.7901981219391125e-06, "loss": 0.3315, "step": 24813 }, { "epoch": 2.5227734851565677, "grad_norm": 0.29000425338745117, "learning_rate": 5.78984769390192e-06, "loss": 0.3512, "step": 24814 }, { "epoch": 2.5228751525010167, "grad_norm": 0.2994738519191742, "learning_rate": 5.789497261885664e-06, "loss": 0.3565, "step": 24815 }, { "epoch": 2.5229768198454656, "grad_norm": 0.2878793179988861, "learning_rate": 5.789146825892107e-06, "loss": 0.3464, "step": 24816 }, { "epoch": 2.5230784871899146, "grad_norm": 0.27065566182136536, "learning_rate": 5.7887963859230155e-06, "loss": 0.3026, "step": 24817 }, { "epoch": 2.5231801545343635, "grad_norm": 0.26731741428375244, "learning_rate": 5.788445941980154e-06, "loss": 0.3045, "step": 24818 }, { "epoch": 2.5232818218788124, "grad_norm": 0.28141191601753235, "learning_rate": 5.788095494065292e-06, "loss": 0.3017, "step": 24819 }, { "epoch": 2.5233834892232614, "grad_norm": 0.28848645091056824, "learning_rate": 5.787745042180192e-06, "loss": 0.3251, "step": 24820 }, { "epoch": 2.5234851565677103, "grad_norm": 0.2509855329990387, "learning_rate": 5.787394586326618e-06, "loss": 0.3353, "step": 24821 }, { "epoch": 2.5235868239121593, "grad_norm": 0.2795606851577759, "learning_rate": 5.787044126506339e-06, "loss": 0.3259, "step": 24822 }, { "epoch": 2.523688491256608, "grad_norm": 0.3030094504356384, "learning_rate": 5.786693662721116e-06, "loss": 0.3168, "step": 24823 }, { "epoch": 2.5237901586010576, "grad_norm": 0.26709267497062683, "learning_rate": 5.78634319497272e-06, "loss": 0.3526, "step": 24824 }, { "epoch": 2.5238918259455065, "grad_norm": 0.2697611153125763, "learning_rate": 5.785992723262913e-06, "loss": 0.3206, "step": 24825 }, { "epoch": 2.5239934932899555, "grad_norm": 0.28541281819343567, "learning_rate": 5.7856422475934615e-06, "loss": 0.331, "step": 24826 }, { "epoch": 2.5240951606344044, "grad_norm": 0.26609620451927185, "learning_rate": 5.7852917679661315e-06, "loss": 0.3255, "step": 24827 }, { "epoch": 2.5241968279788534, "grad_norm": 0.2749798893928528, "learning_rate": 5.784941284382687e-06, "loss": 0.3209, "step": 24828 }, { "epoch": 2.5242984953233023, "grad_norm": 0.2711254358291626, "learning_rate": 5.784590796844895e-06, "loss": 0.336, "step": 24829 }, { "epoch": 2.5244001626677512, "grad_norm": 0.2573060989379883, "learning_rate": 5.784240305354522e-06, "loss": 0.3319, "step": 24830 }, { "epoch": 2.5245018300122, "grad_norm": 0.2710634768009186, "learning_rate": 5.7838898099133315e-06, "loss": 0.312, "step": 24831 }, { "epoch": 2.524603497356649, "grad_norm": 0.2572791278362274, "learning_rate": 5.7835393105230906e-06, "loss": 0.3275, "step": 24832 }, { "epoch": 2.524705164701098, "grad_norm": 0.2813665568828583, "learning_rate": 5.783188807185564e-06, "loss": 0.3086, "step": 24833 }, { "epoch": 2.524806832045547, "grad_norm": 0.26719826459884644, "learning_rate": 5.7828382999025204e-06, "loss": 0.3336, "step": 24834 }, { "epoch": 2.524908499389996, "grad_norm": 0.27541252970695496, "learning_rate": 5.782487788675723e-06, "loss": 0.3506, "step": 24835 }, { "epoch": 2.525010166734445, "grad_norm": 0.2835683226585388, "learning_rate": 5.782137273506937e-06, "loss": 0.351, "step": 24836 }, { "epoch": 2.525111834078894, "grad_norm": 0.26702550053596497, "learning_rate": 5.781786754397931e-06, "loss": 0.3269, "step": 24837 }, { "epoch": 2.5252135014233428, "grad_norm": 0.28438952565193176, "learning_rate": 5.7814362313504676e-06, "loss": 0.3049, "step": 24838 }, { "epoch": 2.5253151687677917, "grad_norm": 0.28055575489997864, "learning_rate": 5.781085704366314e-06, "loss": 0.33, "step": 24839 }, { "epoch": 2.5254168361122407, "grad_norm": 0.28087538480758667, "learning_rate": 5.780735173447238e-06, "loss": 0.3719, "step": 24840 }, { "epoch": 2.5255185034566896, "grad_norm": 0.2749898433685303, "learning_rate": 5.780384638595001e-06, "loss": 0.3457, "step": 24841 }, { "epoch": 2.5256201708011385, "grad_norm": 0.2744056284427643, "learning_rate": 5.780034099811374e-06, "loss": 0.308, "step": 24842 }, { "epoch": 2.5257218381455875, "grad_norm": 0.25265786051750183, "learning_rate": 5.779683557098121e-06, "loss": 0.3358, "step": 24843 }, { "epoch": 2.5258235054900364, "grad_norm": 0.2724958062171936, "learning_rate": 5.779333010457005e-06, "loss": 0.3441, "step": 24844 }, { "epoch": 2.5259251728344854, "grad_norm": 0.28321197628974915, "learning_rate": 5.7789824598897955e-06, "loss": 0.3412, "step": 24845 }, { "epoch": 2.5260268401789343, "grad_norm": 0.28602278232574463, "learning_rate": 5.778631905398258e-06, "loss": 0.3335, "step": 24846 }, { "epoch": 2.5261285075233832, "grad_norm": 0.28476548194885254, "learning_rate": 5.778281346984158e-06, "loss": 0.3373, "step": 24847 }, { "epoch": 2.526230174867832, "grad_norm": 0.26759734749794006, "learning_rate": 5.777930784649262e-06, "loss": 0.3287, "step": 24848 }, { "epoch": 2.5263318422122816, "grad_norm": 0.24829977750778198, "learning_rate": 5.7775802183953336e-06, "loss": 0.3338, "step": 24849 }, { "epoch": 2.5264335095567305, "grad_norm": 0.27865275740623474, "learning_rate": 5.777229648224143e-06, "loss": 0.3499, "step": 24850 }, { "epoch": 2.5265351769011795, "grad_norm": 0.27391108870506287, "learning_rate": 5.776879074137453e-06, "loss": 0.345, "step": 24851 }, { "epoch": 2.5266368442456284, "grad_norm": 0.2693919837474823, "learning_rate": 5.776528496137032e-06, "loss": 0.3463, "step": 24852 }, { "epoch": 2.5267385115900773, "grad_norm": 0.2746433913707733, "learning_rate": 5.776177914224643e-06, "loss": 0.3405, "step": 24853 }, { "epoch": 2.5268401789345263, "grad_norm": 0.23985624313354492, "learning_rate": 5.775827328402055e-06, "loss": 0.298, "step": 24854 }, { "epoch": 2.5269418462789752, "grad_norm": 0.24794456362724304, "learning_rate": 5.775476738671034e-06, "loss": 0.3426, "step": 24855 }, { "epoch": 2.527043513623424, "grad_norm": 0.2845272421836853, "learning_rate": 5.775126145033344e-06, "loss": 0.314, "step": 24856 }, { "epoch": 2.527145180967873, "grad_norm": 0.262350469827652, "learning_rate": 5.7747755474907545e-06, "loss": 0.3275, "step": 24857 }, { "epoch": 2.527246848312322, "grad_norm": 0.293814092874527, "learning_rate": 5.774424946045029e-06, "loss": 0.322, "step": 24858 }, { "epoch": 2.527348515656771, "grad_norm": 0.26216408610343933, "learning_rate": 5.774074340697934e-06, "loss": 0.3145, "step": 24859 }, { "epoch": 2.52745018300122, "grad_norm": 0.2559986710548401, "learning_rate": 5.773723731451237e-06, "loss": 0.349, "step": 24860 }, { "epoch": 2.527551850345669, "grad_norm": 0.2958940267562866, "learning_rate": 5.7733731183067045e-06, "loss": 0.3678, "step": 24861 }, { "epoch": 2.527653517690118, "grad_norm": 0.258637934923172, "learning_rate": 5.773022501266101e-06, "loss": 0.3154, "step": 24862 }, { "epoch": 2.5277551850345668, "grad_norm": 0.28551724553108215, "learning_rate": 5.772671880331195e-06, "loss": 0.3528, "step": 24863 }, { "epoch": 2.5278568523790157, "grad_norm": 0.26242977380752563, "learning_rate": 5.77232125550375e-06, "loss": 0.3222, "step": 24864 }, { "epoch": 2.527958519723465, "grad_norm": 0.2654552757740021, "learning_rate": 5.771970626785535e-06, "loss": 0.3421, "step": 24865 }, { "epoch": 2.528060187067914, "grad_norm": 0.29828861355781555, "learning_rate": 5.771619994178314e-06, "loss": 0.3262, "step": 24866 }, { "epoch": 2.528161854412363, "grad_norm": 0.28422680497169495, "learning_rate": 5.771269357683858e-06, "loss": 0.3672, "step": 24867 }, { "epoch": 2.528263521756812, "grad_norm": 0.27457818388938904, "learning_rate": 5.770918717303928e-06, "loss": 0.3852, "step": 24868 }, { "epoch": 2.528365189101261, "grad_norm": 0.2954500913619995, "learning_rate": 5.7705680730402925e-06, "loss": 0.3318, "step": 24869 }, { "epoch": 2.52846685644571, "grad_norm": 0.2452416718006134, "learning_rate": 5.77021742489472e-06, "loss": 0.3311, "step": 24870 }, { "epoch": 2.5285685237901587, "grad_norm": 0.25343427062034607, "learning_rate": 5.7698667728689726e-06, "loss": 0.3258, "step": 24871 }, { "epoch": 2.5286701911346077, "grad_norm": 0.26750004291534424, "learning_rate": 5.769516116964821e-06, "loss": 0.3387, "step": 24872 }, { "epoch": 2.5287718584790566, "grad_norm": 0.25322845578193665, "learning_rate": 5.769165457184031e-06, "loss": 0.3275, "step": 24873 }, { "epoch": 2.5288735258235056, "grad_norm": 0.2721487581729889, "learning_rate": 5.768814793528364e-06, "loss": 0.3554, "step": 24874 }, { "epoch": 2.5289751931679545, "grad_norm": 0.2698325514793396, "learning_rate": 5.7684641259995956e-06, "loss": 0.3216, "step": 24875 }, { "epoch": 2.5290768605124034, "grad_norm": 0.2627808451652527, "learning_rate": 5.768113454599484e-06, "loss": 0.3361, "step": 24876 }, { "epoch": 2.5291785278568524, "grad_norm": 0.2699491083621979, "learning_rate": 5.767762779329802e-06, "loss": 0.3433, "step": 24877 }, { "epoch": 2.5292801952013013, "grad_norm": 0.2888844907283783, "learning_rate": 5.767412100192313e-06, "loss": 0.3512, "step": 24878 }, { "epoch": 2.5293818625457503, "grad_norm": 0.26795655488967896, "learning_rate": 5.767061417188783e-06, "loss": 0.2963, "step": 24879 }, { "epoch": 2.529483529890199, "grad_norm": 0.2773483395576477, "learning_rate": 5.766710730320981e-06, "loss": 0.3223, "step": 24880 }, { "epoch": 2.529585197234648, "grad_norm": 0.2812030613422394, "learning_rate": 5.766360039590672e-06, "loss": 0.3215, "step": 24881 }, { "epoch": 2.529686864579097, "grad_norm": 0.28406858444213867, "learning_rate": 5.766009344999624e-06, "loss": 0.343, "step": 24882 }, { "epoch": 2.529788531923546, "grad_norm": 0.27206167578697205, "learning_rate": 5.765658646549603e-06, "loss": 0.3554, "step": 24883 }, { "epoch": 2.529890199267995, "grad_norm": 0.2631026804447174, "learning_rate": 5.765307944242373e-06, "loss": 0.3521, "step": 24884 }, { "epoch": 2.529991866612444, "grad_norm": 0.26806002855300903, "learning_rate": 5.764957238079707e-06, "loss": 0.3343, "step": 24885 }, { "epoch": 2.530093533956893, "grad_norm": 0.2561981976032257, "learning_rate": 5.764606528063366e-06, "loss": 0.3355, "step": 24886 }, { "epoch": 2.530195201301342, "grad_norm": 0.25154122710227966, "learning_rate": 5.76425581419512e-06, "loss": 0.3248, "step": 24887 }, { "epoch": 2.5302968686457907, "grad_norm": 0.28294679522514343, "learning_rate": 5.763905096476735e-06, "loss": 0.3161, "step": 24888 }, { "epoch": 2.5303985359902397, "grad_norm": 0.28793635964393616, "learning_rate": 5.763554374909975e-06, "loss": 0.3319, "step": 24889 }, { "epoch": 2.530500203334689, "grad_norm": 0.2767667770385742, "learning_rate": 5.763203649496614e-06, "loss": 0.3529, "step": 24890 }, { "epoch": 2.530601870679138, "grad_norm": 0.3002171218395233, "learning_rate": 5.762852920238412e-06, "loss": 0.3188, "step": 24891 }, { "epoch": 2.530703538023587, "grad_norm": 0.2673937976360321, "learning_rate": 5.762502187137138e-06, "loss": 0.3163, "step": 24892 }, { "epoch": 2.530805205368036, "grad_norm": 0.2557694911956787, "learning_rate": 5.76215145019456e-06, "loss": 0.3244, "step": 24893 }, { "epoch": 2.530906872712485, "grad_norm": 0.2790610194206238, "learning_rate": 5.761800709412442e-06, "loss": 0.3286, "step": 24894 }, { "epoch": 2.5310085400569338, "grad_norm": 0.2801915109157562, "learning_rate": 5.761449964792556e-06, "loss": 0.3551, "step": 24895 }, { "epoch": 2.5311102074013827, "grad_norm": 0.2742654085159302, "learning_rate": 5.7610992163366654e-06, "loss": 0.3248, "step": 24896 }, { "epoch": 2.5312118747458316, "grad_norm": 0.2612874507904053, "learning_rate": 5.760748464046536e-06, "loss": 0.3214, "step": 24897 }, { "epoch": 2.5313135420902806, "grad_norm": 0.272632360458374, "learning_rate": 5.760397707923938e-06, "loss": 0.34, "step": 24898 }, { "epoch": 2.5314152094347295, "grad_norm": 0.28490209579467773, "learning_rate": 5.7600469479706354e-06, "loss": 0.3185, "step": 24899 }, { "epoch": 2.5315168767791785, "grad_norm": 0.2573513090610504, "learning_rate": 5.759696184188399e-06, "loss": 0.348, "step": 24900 }, { "epoch": 2.5316185441236274, "grad_norm": 0.28188860416412354, "learning_rate": 5.759345416578994e-06, "loss": 0.3555, "step": 24901 }, { "epoch": 2.5317202114680764, "grad_norm": 0.2646671235561371, "learning_rate": 5.7589946451441855e-06, "loss": 0.3072, "step": 24902 }, { "epoch": 2.5318218788125253, "grad_norm": 0.25615549087524414, "learning_rate": 5.758643869885744e-06, "loss": 0.3121, "step": 24903 }, { "epoch": 2.5319235461569742, "grad_norm": 0.2744896113872528, "learning_rate": 5.758293090805433e-06, "loss": 0.325, "step": 24904 }, { "epoch": 2.532025213501423, "grad_norm": 0.2781103253364563, "learning_rate": 5.757942307905022e-06, "loss": 0.3461, "step": 24905 }, { "epoch": 2.5321268808458726, "grad_norm": 0.2922663986682892, "learning_rate": 5.75759152118628e-06, "loss": 0.3501, "step": 24906 }, { "epoch": 2.5322285481903215, "grad_norm": 0.2596236765384674, "learning_rate": 5.75724073065097e-06, "loss": 0.331, "step": 24907 }, { "epoch": 2.5323302155347704, "grad_norm": 0.26263463497161865, "learning_rate": 5.7568899363008625e-06, "loss": 0.3136, "step": 24908 }, { "epoch": 2.5324318828792194, "grad_norm": 0.2723209261894226, "learning_rate": 5.756539138137721e-06, "loss": 0.341, "step": 24909 }, { "epoch": 2.5325335502236683, "grad_norm": 0.28545570373535156, "learning_rate": 5.7561883361633165e-06, "loss": 0.3558, "step": 24910 }, { "epoch": 2.5326352175681173, "grad_norm": 0.25497084856033325, "learning_rate": 5.755837530379415e-06, "loss": 0.3168, "step": 24911 }, { "epoch": 2.532736884912566, "grad_norm": 0.2622317969799042, "learning_rate": 5.755486720787784e-06, "loss": 0.3181, "step": 24912 }, { "epoch": 2.532838552257015, "grad_norm": 0.2729264497756958, "learning_rate": 5.75513590739019e-06, "loss": 0.3485, "step": 24913 }, { "epoch": 2.532940219601464, "grad_norm": 0.29767462611198425, "learning_rate": 5.754785090188401e-06, "loss": 0.3322, "step": 24914 }, { "epoch": 2.533041886945913, "grad_norm": 0.2731243073940277, "learning_rate": 5.754434269184182e-06, "loss": 0.3032, "step": 24915 }, { "epoch": 2.533143554290362, "grad_norm": 0.26575666666030884, "learning_rate": 5.754083444379306e-06, "loss": 0.3377, "step": 24916 }, { "epoch": 2.533245221634811, "grad_norm": 0.2606641948223114, "learning_rate": 5.753732615775534e-06, "loss": 0.3467, "step": 24917 }, { "epoch": 2.53334688897926, "grad_norm": 0.2710684537887573, "learning_rate": 5.753381783374637e-06, "loss": 0.2997, "step": 24918 }, { "epoch": 2.533448556323709, "grad_norm": 0.289977103471756, "learning_rate": 5.753030947178382e-06, "loss": 0.3378, "step": 24919 }, { "epoch": 2.5335502236681577, "grad_norm": 0.27453216910362244, "learning_rate": 5.752680107188535e-06, "loss": 0.3246, "step": 24920 }, { "epoch": 2.5336518910126067, "grad_norm": 0.2886914014816284, "learning_rate": 5.752329263406866e-06, "loss": 0.3372, "step": 24921 }, { "epoch": 2.5337535583570556, "grad_norm": 0.30777665972709656, "learning_rate": 5.75197841583514e-06, "loss": 0.3383, "step": 24922 }, { "epoch": 2.5338552257015046, "grad_norm": 0.2849099040031433, "learning_rate": 5.751627564475128e-06, "loss": 0.3506, "step": 24923 }, { "epoch": 2.5339568930459535, "grad_norm": 0.27474674582481384, "learning_rate": 5.751276709328592e-06, "loss": 0.3328, "step": 24924 }, { "epoch": 2.5340585603904024, "grad_norm": 0.2616085410118103, "learning_rate": 5.750925850397302e-06, "loss": 0.31, "step": 24925 }, { "epoch": 2.5341602277348514, "grad_norm": 0.2782042622566223, "learning_rate": 5.7505749876830275e-06, "loss": 0.3432, "step": 24926 }, { "epoch": 2.5342618950793003, "grad_norm": 0.2940080761909485, "learning_rate": 5.750224121187534e-06, "loss": 0.322, "step": 24927 }, { "epoch": 2.5343635624237493, "grad_norm": 0.2651751637458801, "learning_rate": 5.74987325091259e-06, "loss": 0.3171, "step": 24928 }, { "epoch": 2.534465229768198, "grad_norm": 0.29207664728164673, "learning_rate": 5.7495223768599625e-06, "loss": 0.3195, "step": 24929 }, { "epoch": 2.534566897112647, "grad_norm": 0.31526604294776917, "learning_rate": 5.74917149903142e-06, "loss": 0.3475, "step": 24930 }, { "epoch": 2.5346685644570965, "grad_norm": 0.2902719974517822, "learning_rate": 5.748820617428729e-06, "loss": 0.3237, "step": 24931 }, { "epoch": 2.5347702318015455, "grad_norm": 0.25999438762664795, "learning_rate": 5.748469732053658e-06, "loss": 0.3289, "step": 24932 }, { "epoch": 2.5348718991459944, "grad_norm": 0.28157225251197815, "learning_rate": 5.748118842907974e-06, "loss": 0.3311, "step": 24933 }, { "epoch": 2.5349735664904434, "grad_norm": 0.28417494893074036, "learning_rate": 5.747767949993445e-06, "loss": 0.3684, "step": 24934 }, { "epoch": 2.5350752338348923, "grad_norm": 0.2872820794582367, "learning_rate": 5.747417053311838e-06, "loss": 0.3473, "step": 24935 }, { "epoch": 2.5351769011793412, "grad_norm": 0.26661011576652527, "learning_rate": 5.747066152864923e-06, "loss": 0.2957, "step": 24936 }, { "epoch": 2.53527856852379, "grad_norm": 0.27134183049201965, "learning_rate": 5.746715248654465e-06, "loss": 0.3024, "step": 24937 }, { "epoch": 2.535380235868239, "grad_norm": 0.27874675393104553, "learning_rate": 5.746364340682235e-06, "loss": 0.3157, "step": 24938 }, { "epoch": 2.535481903212688, "grad_norm": 0.27462247014045715, "learning_rate": 5.746013428949997e-06, "loss": 0.3493, "step": 24939 }, { "epoch": 2.535583570557137, "grad_norm": 0.27867060899734497, "learning_rate": 5.74566251345952e-06, "loss": 0.357, "step": 24940 }, { "epoch": 2.535685237901586, "grad_norm": 0.2898765206336975, "learning_rate": 5.745311594212575e-06, "loss": 0.3387, "step": 24941 }, { "epoch": 2.535786905246035, "grad_norm": 0.26566362380981445, "learning_rate": 5.744960671210925e-06, "loss": 0.3482, "step": 24942 }, { "epoch": 2.535888572590484, "grad_norm": 0.30920788645744324, "learning_rate": 5.744609744456342e-06, "loss": 0.3555, "step": 24943 }, { "epoch": 2.5359902399349328, "grad_norm": 0.27529671788215637, "learning_rate": 5.744258813950592e-06, "loss": 0.3265, "step": 24944 }, { "epoch": 2.5360919072793817, "grad_norm": 0.27097487449645996, "learning_rate": 5.743907879695439e-06, "loss": 0.3406, "step": 24945 }, { "epoch": 2.5361935746238307, "grad_norm": 0.2669452428817749, "learning_rate": 5.743556941692659e-06, "loss": 0.3549, "step": 24946 }, { "epoch": 2.53629524196828, "grad_norm": 0.2554948031902313, "learning_rate": 5.743205999944015e-06, "loss": 0.3489, "step": 24947 }, { "epoch": 2.536396909312729, "grad_norm": 0.30474767088890076, "learning_rate": 5.742855054451275e-06, "loss": 0.3614, "step": 24948 }, { "epoch": 2.536498576657178, "grad_norm": 0.2847587466239929, "learning_rate": 5.74250410521621e-06, "loss": 0.3565, "step": 24949 }, { "epoch": 2.536600244001627, "grad_norm": 0.25813278555870056, "learning_rate": 5.742153152240581e-06, "loss": 0.3404, "step": 24950 }, { "epoch": 2.536701911346076, "grad_norm": 0.2723420262336731, "learning_rate": 5.741802195526166e-06, "loss": 0.3258, "step": 24951 }, { "epoch": 2.5368035786905248, "grad_norm": 0.2767994999885559, "learning_rate": 5.741451235074725e-06, "loss": 0.314, "step": 24952 }, { "epoch": 2.5369052460349737, "grad_norm": 0.2863185405731201, "learning_rate": 5.7411002708880305e-06, "loss": 0.3483, "step": 24953 }, { "epoch": 2.5370069133794226, "grad_norm": 0.27313321828842163, "learning_rate": 5.740749302967848e-06, "loss": 0.3489, "step": 24954 }, { "epoch": 2.5371085807238716, "grad_norm": 0.25298139452934265, "learning_rate": 5.740398331315945e-06, "loss": 0.3345, "step": 24955 }, { "epoch": 2.5372102480683205, "grad_norm": 0.27518096566200256, "learning_rate": 5.740047355934093e-06, "loss": 0.3479, "step": 24956 }, { "epoch": 2.5373119154127695, "grad_norm": 0.2820976674556732, "learning_rate": 5.739696376824059e-06, "loss": 0.3929, "step": 24957 }, { "epoch": 2.5374135827572184, "grad_norm": 0.2899649143218994, "learning_rate": 5.739345393987608e-06, "loss": 0.3422, "step": 24958 }, { "epoch": 2.5375152501016673, "grad_norm": 0.2690281569957733, "learning_rate": 5.7389944074265134e-06, "loss": 0.3101, "step": 24959 }, { "epoch": 2.5376169174461163, "grad_norm": 0.2754282057285309, "learning_rate": 5.738643417142538e-06, "loss": 0.3404, "step": 24960 }, { "epoch": 2.5377185847905652, "grad_norm": 0.26301339268684387, "learning_rate": 5.738292423137454e-06, "loss": 0.3603, "step": 24961 }, { "epoch": 2.537820252135014, "grad_norm": 0.2638605833053589, "learning_rate": 5.737941425413028e-06, "loss": 0.3158, "step": 24962 }, { "epoch": 2.537921919479463, "grad_norm": 0.2728877067565918, "learning_rate": 5.737590423971029e-06, "loss": 0.3193, "step": 24963 }, { "epoch": 2.538023586823912, "grad_norm": 0.26675570011138916, "learning_rate": 5.737239418813225e-06, "loss": 0.3224, "step": 24964 }, { "epoch": 2.538125254168361, "grad_norm": 0.2629038393497467, "learning_rate": 5.736888409941381e-06, "loss": 0.3467, "step": 24965 }, { "epoch": 2.53822692151281, "grad_norm": 0.2569040358066559, "learning_rate": 5.736537397357271e-06, "loss": 0.3026, "step": 24966 }, { "epoch": 2.538328588857259, "grad_norm": 0.2890385091304779, "learning_rate": 5.736186381062661e-06, "loss": 0.3396, "step": 24967 }, { "epoch": 2.538430256201708, "grad_norm": 0.2657014727592468, "learning_rate": 5.735835361059318e-06, "loss": 0.3409, "step": 24968 }, { "epoch": 2.5385319235461568, "grad_norm": 0.2510853111743927, "learning_rate": 5.735484337349012e-06, "loss": 0.3151, "step": 24969 }, { "epoch": 2.5386335908906057, "grad_norm": 0.27315637469291687, "learning_rate": 5.735133309933508e-06, "loss": 0.3618, "step": 24970 }, { "epoch": 2.5387352582350546, "grad_norm": 0.2636108696460724, "learning_rate": 5.734782278814581e-06, "loss": 0.3553, "step": 24971 }, { "epoch": 2.538836925579504, "grad_norm": 0.24991874396800995, "learning_rate": 5.734431243993993e-06, "loss": 0.3213, "step": 24972 }, { "epoch": 2.538938592923953, "grad_norm": 0.2673521339893341, "learning_rate": 5.734080205473516e-06, "loss": 0.3162, "step": 24973 }, { "epoch": 2.539040260268402, "grad_norm": 0.2543559670448303, "learning_rate": 5.733729163254918e-06, "loss": 0.3089, "step": 24974 }, { "epoch": 2.539141927612851, "grad_norm": 0.2611965239048004, "learning_rate": 5.733378117339963e-06, "loss": 0.3328, "step": 24975 }, { "epoch": 2.5392435949573, "grad_norm": 0.2613045871257782, "learning_rate": 5.733027067730427e-06, "loss": 0.322, "step": 24976 }, { "epoch": 2.5393452623017487, "grad_norm": 0.25081199407577515, "learning_rate": 5.732676014428075e-06, "loss": 0.3365, "step": 24977 }, { "epoch": 2.5394469296461977, "grad_norm": 0.253080278635025, "learning_rate": 5.732324957434673e-06, "loss": 0.3052, "step": 24978 }, { "epoch": 2.5395485969906466, "grad_norm": 0.26773688197135925, "learning_rate": 5.731973896751995e-06, "loss": 0.3469, "step": 24979 }, { "epoch": 2.5396502643350956, "grad_norm": 0.26627588272094727, "learning_rate": 5.731622832381804e-06, "loss": 0.3363, "step": 24980 }, { "epoch": 2.5397519316795445, "grad_norm": 0.27080410718917847, "learning_rate": 5.73127176432587e-06, "loss": 0.3272, "step": 24981 }, { "epoch": 2.5398535990239934, "grad_norm": 0.2796337306499481, "learning_rate": 5.730920692585965e-06, "loss": 0.3789, "step": 24982 }, { "epoch": 2.5399552663684424, "grad_norm": 0.27284619212150574, "learning_rate": 5.730569617163854e-06, "loss": 0.3064, "step": 24983 }, { "epoch": 2.5400569337128913, "grad_norm": 0.2820349335670471, "learning_rate": 5.730218538061308e-06, "loss": 0.3282, "step": 24984 }, { "epoch": 2.5401586010573403, "grad_norm": 0.2737152576446533, "learning_rate": 5.729867455280094e-06, "loss": 0.327, "step": 24985 }, { "epoch": 2.540260268401789, "grad_norm": 0.2659364938735962, "learning_rate": 5.72951636882198e-06, "loss": 0.3391, "step": 24986 }, { "epoch": 2.540361935746238, "grad_norm": 0.2748071253299713, "learning_rate": 5.7291652786887375e-06, "loss": 0.3386, "step": 24987 }, { "epoch": 2.5404636030906875, "grad_norm": 0.27068793773651123, "learning_rate": 5.728814184882133e-06, "loss": 0.3508, "step": 24988 }, { "epoch": 2.5405652704351365, "grad_norm": 0.2711400091648102, "learning_rate": 5.728463087403937e-06, "loss": 0.3148, "step": 24989 }, { "epoch": 2.5406669377795854, "grad_norm": 0.2773456275463104, "learning_rate": 5.728111986255915e-06, "loss": 0.3186, "step": 24990 }, { "epoch": 2.5407686051240344, "grad_norm": 0.2695949673652649, "learning_rate": 5.727760881439839e-06, "loss": 0.3015, "step": 24991 }, { "epoch": 2.5408702724684833, "grad_norm": 0.2629362940788269, "learning_rate": 5.727409772957477e-06, "loss": 0.3277, "step": 24992 }, { "epoch": 2.5409719398129322, "grad_norm": 0.26616764068603516, "learning_rate": 5.727058660810596e-06, "loss": 0.2982, "step": 24993 }, { "epoch": 2.541073607157381, "grad_norm": 0.2533576786518097, "learning_rate": 5.726707545000968e-06, "loss": 0.3199, "step": 24994 }, { "epoch": 2.54117527450183, "grad_norm": 0.26768696308135986, "learning_rate": 5.726356425530358e-06, "loss": 0.3276, "step": 24995 }, { "epoch": 2.541276941846279, "grad_norm": 0.25667276978492737, "learning_rate": 5.726005302400538e-06, "loss": 0.3247, "step": 24996 }, { "epoch": 2.541378609190728, "grad_norm": 0.2632982134819031, "learning_rate": 5.725654175613277e-06, "loss": 0.3122, "step": 24997 }, { "epoch": 2.541480276535177, "grad_norm": 0.27093932032585144, "learning_rate": 5.725303045170341e-06, "loss": 0.3302, "step": 24998 }, { "epoch": 2.541581943879626, "grad_norm": 0.2611387073993683, "learning_rate": 5.724951911073502e-06, "loss": 0.3394, "step": 24999 }, { "epoch": 2.541683611224075, "grad_norm": 0.28111591935157776, "learning_rate": 5.724600773324527e-06, "loss": 0.3498, "step": 25000 }, { "epoch": 2.5417852785685238, "grad_norm": 0.2621655762195587, "learning_rate": 5.724249631925184e-06, "loss": 0.324, "step": 25001 }, { "epoch": 2.5418869459129727, "grad_norm": 0.28873834013938904, "learning_rate": 5.723898486877246e-06, "loss": 0.3087, "step": 25002 }, { "epoch": 2.5419886132574216, "grad_norm": 0.2843886911869049, "learning_rate": 5.723547338182478e-06, "loss": 0.3406, "step": 25003 }, { "epoch": 2.5420902806018706, "grad_norm": 0.27809393405914307, "learning_rate": 5.723196185842651e-06, "loss": 0.3292, "step": 25004 }, { "epoch": 2.5421919479463195, "grad_norm": 0.26004892587661743, "learning_rate": 5.722845029859533e-06, "loss": 0.3088, "step": 25005 }, { "epoch": 2.5422936152907685, "grad_norm": 0.25820258259773254, "learning_rate": 5.722493870234893e-06, "loss": 0.3198, "step": 25006 }, { "epoch": 2.5423952826352174, "grad_norm": 0.2839438021183014, "learning_rate": 5.722142706970502e-06, "loss": 0.3362, "step": 25007 }, { "epoch": 2.5424969499796664, "grad_norm": 0.2831180989742279, "learning_rate": 5.721791540068127e-06, "loss": 0.3665, "step": 25008 }, { "epoch": 2.5425986173241153, "grad_norm": 0.2685728371143341, "learning_rate": 5.721440369529537e-06, "loss": 0.3361, "step": 25009 }, { "epoch": 2.5427002846685642, "grad_norm": 0.2689988613128662, "learning_rate": 5.721089195356503e-06, "loss": 0.3786, "step": 25010 }, { "epoch": 2.542801952013013, "grad_norm": 0.27889901399612427, "learning_rate": 5.720738017550791e-06, "loss": 0.3399, "step": 25011 }, { "epoch": 2.542903619357462, "grad_norm": 0.27449846267700195, "learning_rate": 5.7203868361141745e-06, "loss": 0.3169, "step": 25012 }, { "epoch": 2.5430052867019115, "grad_norm": 0.24966321885585785, "learning_rate": 5.720035651048419e-06, "loss": 0.3068, "step": 25013 }, { "epoch": 2.5431069540463604, "grad_norm": 0.26800164580345154, "learning_rate": 5.719684462355296e-06, "loss": 0.3739, "step": 25014 }, { "epoch": 2.5432086213908094, "grad_norm": 0.2667123079299927, "learning_rate": 5.719333270036573e-06, "loss": 0.3475, "step": 25015 }, { "epoch": 2.5433102887352583, "grad_norm": 0.24208344519138336, "learning_rate": 5.71898207409402e-06, "loss": 0.3305, "step": 25016 }, { "epoch": 2.5434119560797073, "grad_norm": 0.2738974392414093, "learning_rate": 5.718630874529406e-06, "loss": 0.3539, "step": 25017 }, { "epoch": 2.543513623424156, "grad_norm": 0.28266748785972595, "learning_rate": 5.7182796713445014e-06, "loss": 0.3598, "step": 25018 }, { "epoch": 2.543615290768605, "grad_norm": 0.2726495563983917, "learning_rate": 5.7179284645410735e-06, "loss": 0.3398, "step": 25019 }, { "epoch": 2.543716958113054, "grad_norm": 0.2793887257575989, "learning_rate": 5.717577254120893e-06, "loss": 0.3268, "step": 25020 }, { "epoch": 2.543818625457503, "grad_norm": 0.28373533487319946, "learning_rate": 5.717226040085728e-06, "loss": 0.3396, "step": 25021 }, { "epoch": 2.543920292801952, "grad_norm": 0.280269980430603, "learning_rate": 5.716874822437351e-06, "loss": 0.3816, "step": 25022 }, { "epoch": 2.544021960146401, "grad_norm": 0.28809961676597595, "learning_rate": 5.716523601177526e-06, "loss": 0.3267, "step": 25023 }, { "epoch": 2.54412362749085, "grad_norm": 0.27724167704582214, "learning_rate": 5.716172376308027e-06, "loss": 0.2974, "step": 25024 }, { "epoch": 2.544225294835299, "grad_norm": 0.27554377913475037, "learning_rate": 5.715821147830621e-06, "loss": 0.3476, "step": 25025 }, { "epoch": 2.5443269621797477, "grad_norm": 0.27556687593460083, "learning_rate": 5.715469915747078e-06, "loss": 0.2933, "step": 25026 }, { "epoch": 2.5444286295241967, "grad_norm": 0.26901376247406006, "learning_rate": 5.715118680059169e-06, "loss": 0.3145, "step": 25027 }, { "epoch": 2.5445302968686456, "grad_norm": 0.3020607829093933, "learning_rate": 5.714767440768661e-06, "loss": 0.3241, "step": 25028 }, { "epoch": 2.544631964213095, "grad_norm": 0.26791876554489136, "learning_rate": 5.714416197877325e-06, "loss": 0.2955, "step": 25029 }, { "epoch": 2.544733631557544, "grad_norm": 0.27793464064598083, "learning_rate": 5.714064951386929e-06, "loss": 0.3334, "step": 25030 }, { "epoch": 2.544835298901993, "grad_norm": 0.27152755856513977, "learning_rate": 5.713713701299244e-06, "loss": 0.3288, "step": 25031 }, { "epoch": 2.544936966246442, "grad_norm": 0.2528654932975769, "learning_rate": 5.7133624476160395e-06, "loss": 0.3584, "step": 25032 }, { "epoch": 2.5450386335908908, "grad_norm": 0.26641732454299927, "learning_rate": 5.713011190339085e-06, "loss": 0.3237, "step": 25033 }, { "epoch": 2.5451403009353397, "grad_norm": 0.27959853410720825, "learning_rate": 5.712659929470149e-06, "loss": 0.3429, "step": 25034 }, { "epoch": 2.5452419682797887, "grad_norm": 0.26964426040649414, "learning_rate": 5.712308665011001e-06, "loss": 0.3375, "step": 25035 }, { "epoch": 2.5453436356242376, "grad_norm": 0.270136296749115, "learning_rate": 5.71195739696341e-06, "loss": 0.3118, "step": 25036 }, { "epoch": 2.5454453029686865, "grad_norm": 0.2765811085700989, "learning_rate": 5.71160612532915e-06, "loss": 0.3336, "step": 25037 }, { "epoch": 2.5455469703131355, "grad_norm": 0.29032236337661743, "learning_rate": 5.7112548501099866e-06, "loss": 0.3478, "step": 25038 }, { "epoch": 2.5456486376575844, "grad_norm": 0.27339500188827515, "learning_rate": 5.710903571307689e-06, "loss": 0.3084, "step": 25039 }, { "epoch": 2.5457503050020334, "grad_norm": 0.3007728159427643, "learning_rate": 5.710552288924028e-06, "loss": 0.3351, "step": 25040 }, { "epoch": 2.5458519723464823, "grad_norm": 0.28767162561416626, "learning_rate": 5.710201002960773e-06, "loss": 0.3637, "step": 25041 }, { "epoch": 2.5459536396909312, "grad_norm": 0.28896647691726685, "learning_rate": 5.7098497134196965e-06, "loss": 0.3303, "step": 25042 }, { "epoch": 2.54605530703538, "grad_norm": 0.26836198568344116, "learning_rate": 5.709498420302565e-06, "loss": 0.3401, "step": 25043 }, { "epoch": 2.546156974379829, "grad_norm": 0.30471134185791016, "learning_rate": 5.709147123611147e-06, "loss": 0.3606, "step": 25044 }, { "epoch": 2.546258641724278, "grad_norm": 0.28204765915870667, "learning_rate": 5.7087958233472175e-06, "loss": 0.2956, "step": 25045 }, { "epoch": 2.546360309068727, "grad_norm": 0.2540664076805115, "learning_rate": 5.708444519512539e-06, "loss": 0.3534, "step": 25046 }, { "epoch": 2.546461976413176, "grad_norm": 0.24884507060050964, "learning_rate": 5.7080932121088894e-06, "loss": 0.3339, "step": 25047 }, { "epoch": 2.546563643757625, "grad_norm": 0.26464125514030457, "learning_rate": 5.707741901138033e-06, "loss": 0.35, "step": 25048 }, { "epoch": 2.546665311102074, "grad_norm": 0.27631381154060364, "learning_rate": 5.707390586601741e-06, "loss": 0.3248, "step": 25049 }, { "epoch": 2.5467669784465228, "grad_norm": 0.27533191442489624, "learning_rate": 5.707039268501784e-06, "loss": 0.3322, "step": 25050 }, { "epoch": 2.5468686457909717, "grad_norm": 0.26006820797920227, "learning_rate": 5.706687946839928e-06, "loss": 0.3577, "step": 25051 }, { "epoch": 2.5469703131354207, "grad_norm": 0.2745732069015503, "learning_rate": 5.706336621617951e-06, "loss": 0.3717, "step": 25052 }, { "epoch": 2.5470719804798696, "grad_norm": 0.2566136121749878, "learning_rate": 5.7059852928376145e-06, "loss": 0.3071, "step": 25053 }, { "epoch": 2.547173647824319, "grad_norm": 0.2545970678329468, "learning_rate": 5.705633960500693e-06, "loss": 0.364, "step": 25054 }, { "epoch": 2.547275315168768, "grad_norm": 0.26900163292884827, "learning_rate": 5.7052826246089564e-06, "loss": 0.3403, "step": 25055 }, { "epoch": 2.547376982513217, "grad_norm": 0.26750868558883667, "learning_rate": 5.704931285164171e-06, "loss": 0.3353, "step": 25056 }, { "epoch": 2.547478649857666, "grad_norm": 0.28912368416786194, "learning_rate": 5.7045799421681105e-06, "loss": 0.3396, "step": 25057 }, { "epoch": 2.5475803172021148, "grad_norm": 0.2811097800731659, "learning_rate": 5.704228595622544e-06, "loss": 0.3435, "step": 25058 }, { "epoch": 2.5476819845465637, "grad_norm": 0.26299145817756653, "learning_rate": 5.7038772455292415e-06, "loss": 0.3528, "step": 25059 }, { "epoch": 2.5477836518910126, "grad_norm": 0.28302350640296936, "learning_rate": 5.703525891889971e-06, "loss": 0.3193, "step": 25060 }, { "epoch": 2.5478853192354616, "grad_norm": 0.2638068199157715, "learning_rate": 5.703174534706506e-06, "loss": 0.3278, "step": 25061 }, { "epoch": 2.5479869865799105, "grad_norm": 0.28424403071403503, "learning_rate": 5.702823173980614e-06, "loss": 0.3311, "step": 25062 }, { "epoch": 2.5480886539243595, "grad_norm": 0.30111172795295715, "learning_rate": 5.702471809714066e-06, "loss": 0.373, "step": 25063 }, { "epoch": 2.5481903212688084, "grad_norm": 0.2540677487850189, "learning_rate": 5.702120441908632e-06, "loss": 0.3551, "step": 25064 }, { "epoch": 2.5482919886132573, "grad_norm": 0.27170535922050476, "learning_rate": 5.701769070566083e-06, "loss": 0.3076, "step": 25065 }, { "epoch": 2.5483936559577063, "grad_norm": 0.26087313890457153, "learning_rate": 5.701417695688188e-06, "loss": 0.3383, "step": 25066 }, { "epoch": 2.5484953233021552, "grad_norm": 0.2878233790397644, "learning_rate": 5.7010663172767155e-06, "loss": 0.3527, "step": 25067 }, { "epoch": 2.548596990646604, "grad_norm": 0.2760574221611023, "learning_rate": 5.700714935333439e-06, "loss": 0.3094, "step": 25068 }, { "epoch": 2.5486986579910536, "grad_norm": 0.3098098039627075, "learning_rate": 5.700363549860126e-06, "loss": 0.3308, "step": 25069 }, { "epoch": 2.5488003253355025, "grad_norm": 0.27800247073173523, "learning_rate": 5.700012160858549e-06, "loss": 0.3588, "step": 25070 }, { "epoch": 2.5489019926799514, "grad_norm": 0.2553849518299103, "learning_rate": 5.6996607683304765e-06, "loss": 0.3451, "step": 25071 }, { "epoch": 2.5490036600244004, "grad_norm": 0.2812579870223999, "learning_rate": 5.699309372277679e-06, "loss": 0.3089, "step": 25072 }, { "epoch": 2.5491053273688493, "grad_norm": 0.28364330530166626, "learning_rate": 5.698957972701928e-06, "loss": 0.3255, "step": 25073 }, { "epoch": 2.5492069947132983, "grad_norm": 0.28367218375205994, "learning_rate": 5.698606569604992e-06, "loss": 0.3657, "step": 25074 }, { "epoch": 2.549308662057747, "grad_norm": 0.2853313386440277, "learning_rate": 5.698255162988644e-06, "loss": 0.3402, "step": 25075 }, { "epoch": 2.549410329402196, "grad_norm": 0.2802075743675232, "learning_rate": 5.69790375285465e-06, "loss": 0.3552, "step": 25076 }, { "epoch": 2.549511996746645, "grad_norm": 0.25989091396331787, "learning_rate": 5.697552339204784e-06, "loss": 0.3343, "step": 25077 }, { "epoch": 2.549613664091094, "grad_norm": 0.28444111347198486, "learning_rate": 5.697200922040815e-06, "loss": 0.3736, "step": 25078 }, { "epoch": 2.549715331435543, "grad_norm": 0.26012852787971497, "learning_rate": 5.696849501364512e-06, "loss": 0.3321, "step": 25079 }, { "epoch": 2.549816998779992, "grad_norm": 0.2627638876438141, "learning_rate": 5.696498077177649e-06, "loss": 0.3264, "step": 25080 }, { "epoch": 2.549918666124441, "grad_norm": 0.2881788909435272, "learning_rate": 5.696146649481993e-06, "loss": 0.3569, "step": 25081 }, { "epoch": 2.55002033346889, "grad_norm": 0.2619134187698364, "learning_rate": 5.695795218279315e-06, "loss": 0.3029, "step": 25082 }, { "epoch": 2.5501220008133387, "grad_norm": 0.28677183389663696, "learning_rate": 5.695443783571388e-06, "loss": 0.3358, "step": 25083 }, { "epoch": 2.5502236681577877, "grad_norm": 0.28367626667022705, "learning_rate": 5.695092345359978e-06, "loss": 0.3358, "step": 25084 }, { "epoch": 2.5503253355022366, "grad_norm": 0.29916542768478394, "learning_rate": 5.69474090364686e-06, "loss": 0.2952, "step": 25085 }, { "epoch": 2.5504270028466856, "grad_norm": 0.263884037733078, "learning_rate": 5.6943894584338026e-06, "loss": 0.3504, "step": 25086 }, { "epoch": 2.5505286701911345, "grad_norm": 0.2855117619037628, "learning_rate": 5.694038009722574e-06, "loss": 0.3609, "step": 25087 }, { "epoch": 2.5506303375355834, "grad_norm": 0.2639024555683136, "learning_rate": 5.693686557514949e-06, "loss": 0.3563, "step": 25088 }, { "epoch": 2.5507320048800324, "grad_norm": 0.30052804946899414, "learning_rate": 5.693335101812695e-06, "loss": 0.3247, "step": 25089 }, { "epoch": 2.5508336722244813, "grad_norm": 0.2720715403556824, "learning_rate": 5.692983642617584e-06, "loss": 0.3121, "step": 25090 }, { "epoch": 2.5509353395689303, "grad_norm": 0.2874367833137512, "learning_rate": 5.692632179931386e-06, "loss": 0.328, "step": 25091 }, { "epoch": 2.551037006913379, "grad_norm": 0.29543107748031616, "learning_rate": 5.69228071375587e-06, "loss": 0.323, "step": 25092 }, { "epoch": 2.551138674257828, "grad_norm": 0.2698494493961334, "learning_rate": 5.69192924409281e-06, "loss": 0.3199, "step": 25093 }, { "epoch": 2.551240341602277, "grad_norm": 0.26043522357940674, "learning_rate": 5.6915777709439745e-06, "loss": 0.3579, "step": 25094 }, { "epoch": 2.5513420089467265, "grad_norm": 0.28523242473602295, "learning_rate": 5.691226294311135e-06, "loss": 0.3115, "step": 25095 }, { "epoch": 2.5514436762911754, "grad_norm": 0.293111652135849, "learning_rate": 5.6908748141960605e-06, "loss": 0.3401, "step": 25096 }, { "epoch": 2.5515453436356244, "grad_norm": 0.2701210379600525, "learning_rate": 5.690523330600524e-06, "loss": 0.3146, "step": 25097 }, { "epoch": 2.5516470109800733, "grad_norm": 0.25912633538246155, "learning_rate": 5.690171843526294e-06, "loss": 0.3229, "step": 25098 }, { "epoch": 2.5517486783245222, "grad_norm": 0.29006338119506836, "learning_rate": 5.689820352975143e-06, "loss": 0.3295, "step": 25099 }, { "epoch": 2.551850345668971, "grad_norm": 0.25767889618873596, "learning_rate": 5.689468858948841e-06, "loss": 0.3375, "step": 25100 }, { "epoch": 2.55195201301342, "grad_norm": 0.2806803584098816, "learning_rate": 5.689117361449159e-06, "loss": 0.3237, "step": 25101 }, { "epoch": 2.552053680357869, "grad_norm": 0.25285840034484863, "learning_rate": 5.688765860477866e-06, "loss": 0.3066, "step": 25102 }, { "epoch": 2.552155347702318, "grad_norm": 0.2757473886013031, "learning_rate": 5.688414356036736e-06, "loss": 0.3343, "step": 25103 }, { "epoch": 2.552257015046767, "grad_norm": 0.28126466274261475, "learning_rate": 5.688062848127538e-06, "loss": 0.3407, "step": 25104 }, { "epoch": 2.552358682391216, "grad_norm": 0.254839688539505, "learning_rate": 5.687711336752042e-06, "loss": 0.313, "step": 25105 }, { "epoch": 2.552460349735665, "grad_norm": 0.2747095823287964, "learning_rate": 5.68735982191202e-06, "loss": 0.3335, "step": 25106 }, { "epoch": 2.5525620170801138, "grad_norm": 0.26864832639694214, "learning_rate": 5.687008303609241e-06, "loss": 0.3137, "step": 25107 }, { "epoch": 2.5526636844245627, "grad_norm": 0.2957584857940674, "learning_rate": 5.686656781845481e-06, "loss": 0.3293, "step": 25108 }, { "epoch": 2.5527653517690116, "grad_norm": 0.28485819697380066, "learning_rate": 5.686305256622505e-06, "loss": 0.3216, "step": 25109 }, { "epoch": 2.552867019113461, "grad_norm": 0.29070669412612915, "learning_rate": 5.685953727942086e-06, "loss": 0.3419, "step": 25110 }, { "epoch": 2.55296868645791, "grad_norm": 0.25708383321762085, "learning_rate": 5.685602195805996e-06, "loss": 0.3246, "step": 25111 }, { "epoch": 2.553070353802359, "grad_norm": 0.2605331242084503, "learning_rate": 5.685250660216004e-06, "loss": 0.3536, "step": 25112 }, { "epoch": 2.553172021146808, "grad_norm": 0.24971304833889008, "learning_rate": 5.684899121173883e-06, "loss": 0.3661, "step": 25113 }, { "epoch": 2.553273688491257, "grad_norm": 0.30675506591796875, "learning_rate": 5.684547578681404e-06, "loss": 0.3422, "step": 25114 }, { "epoch": 2.5533753558357057, "grad_norm": 0.2575954496860504, "learning_rate": 5.684196032740335e-06, "loss": 0.3217, "step": 25115 }, { "epoch": 2.5534770231801547, "grad_norm": 0.2671121656894684, "learning_rate": 5.68384448335245e-06, "loss": 0.3291, "step": 25116 }, { "epoch": 2.5535786905246036, "grad_norm": 0.260873407125473, "learning_rate": 5.683492930519518e-06, "loss": 0.3505, "step": 25117 }, { "epoch": 2.5536803578690526, "grad_norm": 0.2825425863265991, "learning_rate": 5.683141374243311e-06, "loss": 0.3613, "step": 25118 }, { "epoch": 2.5537820252135015, "grad_norm": 0.28034868836402893, "learning_rate": 5.682789814525602e-06, "loss": 0.356, "step": 25119 }, { "epoch": 2.5538836925579504, "grad_norm": 0.2800302803516388, "learning_rate": 5.682438251368159e-06, "loss": 0.3387, "step": 25120 }, { "epoch": 2.5539853599023994, "grad_norm": 0.2640661895275116, "learning_rate": 5.682086684772754e-06, "loss": 0.3426, "step": 25121 }, { "epoch": 2.5540870272468483, "grad_norm": 0.2593868672847748, "learning_rate": 5.681735114741158e-06, "loss": 0.3491, "step": 25122 }, { "epoch": 2.5541886945912973, "grad_norm": 0.2673630714416504, "learning_rate": 5.681383541275144e-06, "loss": 0.3615, "step": 25123 }, { "epoch": 2.554290361935746, "grad_norm": 0.2645221948623657, "learning_rate": 5.681031964376481e-06, "loss": 0.3328, "step": 25124 }, { "epoch": 2.554392029280195, "grad_norm": 0.2717890739440918, "learning_rate": 5.6806803840469405e-06, "loss": 0.3143, "step": 25125 }, { "epoch": 2.554493696624644, "grad_norm": 0.2574450373649597, "learning_rate": 5.680328800288293e-06, "loss": 0.3326, "step": 25126 }, { "epoch": 2.554595363969093, "grad_norm": 0.28766047954559326, "learning_rate": 5.6799772131023115e-06, "loss": 0.3522, "step": 25127 }, { "epoch": 2.554697031313542, "grad_norm": 0.2667376697063446, "learning_rate": 5.679625622490767e-06, "loss": 0.3355, "step": 25128 }, { "epoch": 2.554798698657991, "grad_norm": 0.26249629259109497, "learning_rate": 5.67927402845543e-06, "loss": 0.3545, "step": 25129 }, { "epoch": 2.55490036600244, "grad_norm": 0.2796195447444916, "learning_rate": 5.678922430998072e-06, "loss": 0.3525, "step": 25130 }, { "epoch": 2.555002033346889, "grad_norm": 0.25226789712905884, "learning_rate": 5.678570830120465e-06, "loss": 0.3046, "step": 25131 }, { "epoch": 2.5551037006913377, "grad_norm": 0.2702237069606781, "learning_rate": 5.678219225824377e-06, "loss": 0.3393, "step": 25132 }, { "epoch": 2.5552053680357867, "grad_norm": 0.28940436244010925, "learning_rate": 5.6778676181115826e-06, "loss": 0.3511, "step": 25133 }, { "epoch": 2.5553070353802356, "grad_norm": 0.2809617519378662, "learning_rate": 5.6775160069838534e-06, "loss": 0.346, "step": 25134 }, { "epoch": 2.5554087027246846, "grad_norm": 0.2737455666065216, "learning_rate": 5.677164392442958e-06, "loss": 0.3429, "step": 25135 }, { "epoch": 2.555510370069134, "grad_norm": 0.2687351107597351, "learning_rate": 5.67681277449067e-06, "loss": 0.3202, "step": 25136 }, { "epoch": 2.555612037413583, "grad_norm": 0.2683992087841034, "learning_rate": 5.67646115312876e-06, "loss": 0.3147, "step": 25137 }, { "epoch": 2.555713704758032, "grad_norm": 0.2842414379119873, "learning_rate": 5.676109528358998e-06, "loss": 0.3372, "step": 25138 }, { "epoch": 2.5558153721024808, "grad_norm": 0.28245049715042114, "learning_rate": 5.67575790018316e-06, "loss": 0.3241, "step": 25139 }, { "epoch": 2.5559170394469297, "grad_norm": 0.2737075686454773, "learning_rate": 5.675406268603012e-06, "loss": 0.3482, "step": 25140 }, { "epoch": 2.5560187067913787, "grad_norm": 0.2538659870624542, "learning_rate": 5.675054633620329e-06, "loss": 0.318, "step": 25141 }, { "epoch": 2.5561203741358276, "grad_norm": 0.2872644066810608, "learning_rate": 5.67470299523688e-06, "loss": 0.3158, "step": 25142 }, { "epoch": 2.5562220414802765, "grad_norm": 0.2742379307746887, "learning_rate": 5.674351353454437e-06, "loss": 0.339, "step": 25143 }, { "epoch": 2.5563237088247255, "grad_norm": 0.2663888931274414, "learning_rate": 5.6739997082747745e-06, "loss": 0.3215, "step": 25144 }, { "epoch": 2.5564253761691744, "grad_norm": 0.2676559388637543, "learning_rate": 5.67364805969966e-06, "loss": 0.3367, "step": 25145 }, { "epoch": 2.5565270435136234, "grad_norm": 0.2695792615413666, "learning_rate": 5.673296407730867e-06, "loss": 0.3242, "step": 25146 }, { "epoch": 2.5566287108580723, "grad_norm": 0.2707742154598236, "learning_rate": 5.672944752370166e-06, "loss": 0.3234, "step": 25147 }, { "epoch": 2.5567303782025212, "grad_norm": 0.25345516204833984, "learning_rate": 5.672593093619331e-06, "loss": 0.3321, "step": 25148 }, { "epoch": 2.55683204554697, "grad_norm": 0.27371829748153687, "learning_rate": 5.672241431480131e-06, "loss": 0.3401, "step": 25149 }, { "epoch": 2.556933712891419, "grad_norm": 0.2622746527194977, "learning_rate": 5.671889765954338e-06, "loss": 0.3418, "step": 25150 }, { "epoch": 2.5570353802358685, "grad_norm": 0.28546634316444397, "learning_rate": 5.671538097043725e-06, "loss": 0.3632, "step": 25151 }, { "epoch": 2.5571370475803175, "grad_norm": 0.3014249801635742, "learning_rate": 5.671186424750061e-06, "loss": 0.339, "step": 25152 }, { "epoch": 2.5572387149247664, "grad_norm": 0.28576093912124634, "learning_rate": 5.6708347490751205e-06, "loss": 0.3315, "step": 25153 }, { "epoch": 2.5573403822692153, "grad_norm": 0.2751140892505646, "learning_rate": 5.6704830700206735e-06, "loss": 0.323, "step": 25154 }, { "epoch": 2.5574420496136643, "grad_norm": 0.27864059805870056, "learning_rate": 5.670131387588492e-06, "loss": 0.3266, "step": 25155 }, { "epoch": 2.5575437169581132, "grad_norm": 0.28374847769737244, "learning_rate": 5.669779701780349e-06, "loss": 0.3134, "step": 25156 }, { "epoch": 2.557645384302562, "grad_norm": 0.2851818799972534, "learning_rate": 5.669428012598014e-06, "loss": 0.3472, "step": 25157 }, { "epoch": 2.557747051647011, "grad_norm": 0.2881539762020111, "learning_rate": 5.669076320043259e-06, "loss": 0.3422, "step": 25158 }, { "epoch": 2.55784871899146, "grad_norm": 0.25548434257507324, "learning_rate": 5.668724624117859e-06, "loss": 0.3346, "step": 25159 }, { "epoch": 2.557950386335909, "grad_norm": 0.26757827401161194, "learning_rate": 5.6683729248235806e-06, "loss": 0.3347, "step": 25160 }, { "epoch": 2.558052053680358, "grad_norm": 0.2681151330471039, "learning_rate": 5.6680212221622e-06, "loss": 0.3154, "step": 25161 }, { "epoch": 2.558153721024807, "grad_norm": 0.2837401032447815, "learning_rate": 5.667669516135485e-06, "loss": 0.3397, "step": 25162 }, { "epoch": 2.558255388369256, "grad_norm": 0.2740592360496521, "learning_rate": 5.6673178067452115e-06, "loss": 0.3377, "step": 25163 }, { "epoch": 2.5583570557137048, "grad_norm": 0.2685950994491577, "learning_rate": 5.66696609399315e-06, "loss": 0.338, "step": 25164 }, { "epoch": 2.5584587230581537, "grad_norm": 0.26970648765563965, "learning_rate": 5.66661437788107e-06, "loss": 0.3334, "step": 25165 }, { "epoch": 2.5585603904026026, "grad_norm": 0.271075040102005, "learning_rate": 5.666262658410748e-06, "loss": 0.3388, "step": 25166 }, { "epoch": 2.5586620577470516, "grad_norm": 0.26197025179862976, "learning_rate": 5.66591093558395e-06, "loss": 0.3275, "step": 25167 }, { "epoch": 2.5587637250915005, "grad_norm": 0.24174752831459045, "learning_rate": 5.665559209402452e-06, "loss": 0.3311, "step": 25168 }, { "epoch": 2.5588653924359495, "grad_norm": 0.27111420035362244, "learning_rate": 5.665207479868026e-06, "loss": 0.3595, "step": 25169 }, { "epoch": 2.5589670597803984, "grad_norm": 0.2952468693256378, "learning_rate": 5.6648557469824404e-06, "loss": 0.3383, "step": 25170 }, { "epoch": 2.5590687271248473, "grad_norm": 0.2658085525035858, "learning_rate": 5.6645040107474715e-06, "loss": 0.3674, "step": 25171 }, { "epoch": 2.5591703944692963, "grad_norm": 0.26917630434036255, "learning_rate": 5.664152271164889e-06, "loss": 0.3188, "step": 25172 }, { "epoch": 2.5592720618137452, "grad_norm": 0.27152779698371887, "learning_rate": 5.663800528236464e-06, "loss": 0.3497, "step": 25173 }, { "epoch": 2.559373729158194, "grad_norm": 0.2662631869316101, "learning_rate": 5.6634487819639714e-06, "loss": 0.361, "step": 25174 }, { "epoch": 2.559475396502643, "grad_norm": 0.25477173924446106, "learning_rate": 5.6630970323491795e-06, "loss": 0.3554, "step": 25175 }, { "epoch": 2.559577063847092, "grad_norm": 0.2673705518245697, "learning_rate": 5.662745279393864e-06, "loss": 0.3354, "step": 25176 }, { "epoch": 2.5596787311915414, "grad_norm": 0.29628807306289673, "learning_rate": 5.662393523099795e-06, "loss": 0.3327, "step": 25177 }, { "epoch": 2.5597803985359904, "grad_norm": 0.26730093359947205, "learning_rate": 5.662041763468744e-06, "loss": 0.3603, "step": 25178 }, { "epoch": 2.5598820658804393, "grad_norm": 0.26679477095603943, "learning_rate": 5.661690000502486e-06, "loss": 0.3059, "step": 25179 }, { "epoch": 2.5599837332248883, "grad_norm": 0.2468063235282898, "learning_rate": 5.661338234202789e-06, "loss": 0.314, "step": 25180 }, { "epoch": 2.560085400569337, "grad_norm": 0.2741405665874481, "learning_rate": 5.660986464571428e-06, "loss": 0.324, "step": 25181 }, { "epoch": 2.560187067913786, "grad_norm": 0.26023420691490173, "learning_rate": 5.660634691610174e-06, "loss": 0.3362, "step": 25182 }, { "epoch": 2.560288735258235, "grad_norm": 0.25547975301742554, "learning_rate": 5.660282915320798e-06, "loss": 0.3164, "step": 25183 }, { "epoch": 2.560390402602684, "grad_norm": 0.2655569612979889, "learning_rate": 5.659931135705076e-06, "loss": 0.2989, "step": 25184 }, { "epoch": 2.560492069947133, "grad_norm": 0.27457624673843384, "learning_rate": 5.659579352764777e-06, "loss": 0.3543, "step": 25185 }, { "epoch": 2.560593737291582, "grad_norm": 0.26422736048698425, "learning_rate": 5.659227566501674e-06, "loss": 0.3083, "step": 25186 }, { "epoch": 2.560695404636031, "grad_norm": 0.24748636782169342, "learning_rate": 5.658875776917539e-06, "loss": 0.3133, "step": 25187 }, { "epoch": 2.56079707198048, "grad_norm": 0.2535903751850128, "learning_rate": 5.658523984014143e-06, "loss": 0.3012, "step": 25188 }, { "epoch": 2.5608987393249287, "grad_norm": 0.27654027938842773, "learning_rate": 5.658172187793262e-06, "loss": 0.348, "step": 25189 }, { "epoch": 2.5610004066693777, "grad_norm": 0.26032716035842896, "learning_rate": 5.6578203882566655e-06, "loss": 0.3355, "step": 25190 }, { "epoch": 2.5611020740138266, "grad_norm": 0.27326151728630066, "learning_rate": 5.657468585406125e-06, "loss": 0.3064, "step": 25191 }, { "epoch": 2.561203741358276, "grad_norm": 0.2727733254432678, "learning_rate": 5.657116779243413e-06, "loss": 0.3374, "step": 25192 }, { "epoch": 2.561305408702725, "grad_norm": 0.28129273653030396, "learning_rate": 5.6567649697703045e-06, "loss": 0.3224, "step": 25193 }, { "epoch": 2.561407076047174, "grad_norm": 0.2450878918170929, "learning_rate": 5.656413156988571e-06, "loss": 0.3844, "step": 25194 }, { "epoch": 2.561508743391623, "grad_norm": 0.26436275243759155, "learning_rate": 5.656061340899983e-06, "loss": 0.3209, "step": 25195 }, { "epoch": 2.5616104107360718, "grad_norm": 0.2550230622291565, "learning_rate": 5.655709521506314e-06, "loss": 0.3267, "step": 25196 }, { "epoch": 2.5617120780805207, "grad_norm": 0.26538294553756714, "learning_rate": 5.655357698809336e-06, "loss": 0.3062, "step": 25197 }, { "epoch": 2.5618137454249696, "grad_norm": 0.24467705190181732, "learning_rate": 5.655005872810822e-06, "loss": 0.3045, "step": 25198 }, { "epoch": 2.5619154127694186, "grad_norm": 0.26166146993637085, "learning_rate": 5.654654043512544e-06, "loss": 0.3191, "step": 25199 }, { "epoch": 2.5620170801138675, "grad_norm": 0.24643220007419586, "learning_rate": 5.654302210916275e-06, "loss": 0.2976, "step": 25200 }, { "epoch": 2.5621187474583165, "grad_norm": 0.2708503305912018, "learning_rate": 5.653950375023786e-06, "loss": 0.289, "step": 25201 }, { "epoch": 2.5622204148027654, "grad_norm": 0.2695540487766266, "learning_rate": 5.65359853583685e-06, "loss": 0.3461, "step": 25202 }, { "epoch": 2.5623220821472144, "grad_norm": 0.269424706697464, "learning_rate": 5.65324669335724e-06, "loss": 0.318, "step": 25203 }, { "epoch": 2.5624237494916633, "grad_norm": 0.2461579293012619, "learning_rate": 5.65289484758673e-06, "loss": 0.339, "step": 25204 }, { "epoch": 2.5625254168361122, "grad_norm": 0.26050662994384766, "learning_rate": 5.652542998527091e-06, "loss": 0.3255, "step": 25205 }, { "epoch": 2.562627084180561, "grad_norm": 0.2709624171257019, "learning_rate": 5.6521911461800925e-06, "loss": 0.344, "step": 25206 }, { "epoch": 2.56272875152501, "grad_norm": 0.2584737241268158, "learning_rate": 5.651839290547512e-06, "loss": 0.3299, "step": 25207 }, { "epoch": 2.562830418869459, "grad_norm": 0.24130704998970032, "learning_rate": 5.651487431631121e-06, "loss": 0.3257, "step": 25208 }, { "epoch": 2.562932086213908, "grad_norm": 0.28568699955940247, "learning_rate": 5.6511355694326896e-06, "loss": 0.317, "step": 25209 }, { "epoch": 2.563033753558357, "grad_norm": 0.27679798007011414, "learning_rate": 5.650783703953993e-06, "loss": 0.3423, "step": 25210 }, { "epoch": 2.563135420902806, "grad_norm": 0.25178763270378113, "learning_rate": 5.650431835196801e-06, "loss": 0.322, "step": 25211 }, { "epoch": 2.563237088247255, "grad_norm": 0.2731693387031555, "learning_rate": 5.650079963162889e-06, "loss": 0.3045, "step": 25212 }, { "epoch": 2.5633387555917038, "grad_norm": 0.2602176070213318, "learning_rate": 5.64972808785403e-06, "loss": 0.3121, "step": 25213 }, { "epoch": 2.5634404229361527, "grad_norm": 0.2908225953578949, "learning_rate": 5.6493762092719925e-06, "loss": 0.3229, "step": 25214 }, { "epoch": 2.5635420902806016, "grad_norm": 0.27196750044822693, "learning_rate": 5.649024327418555e-06, "loss": 0.329, "step": 25215 }, { "epoch": 2.5636437576250506, "grad_norm": 0.2885423004627228, "learning_rate": 5.648672442295485e-06, "loss": 0.3256, "step": 25216 }, { "epoch": 2.5637454249694995, "grad_norm": 0.27651113271713257, "learning_rate": 5.6483205539045585e-06, "loss": 0.3093, "step": 25217 }, { "epoch": 2.563847092313949, "grad_norm": 0.2830907702445984, "learning_rate": 5.647968662247547e-06, "loss": 0.3516, "step": 25218 }, { "epoch": 2.563948759658398, "grad_norm": 0.2747589647769928, "learning_rate": 5.647616767326223e-06, "loss": 0.315, "step": 25219 }, { "epoch": 2.564050427002847, "grad_norm": 0.26288557052612305, "learning_rate": 5.64726486914236e-06, "loss": 0.3399, "step": 25220 }, { "epoch": 2.5641520943472957, "grad_norm": 0.2782961130142212, "learning_rate": 5.646912967697728e-06, "loss": 0.3742, "step": 25221 }, { "epoch": 2.5642537616917447, "grad_norm": 0.27664583921432495, "learning_rate": 5.646561062994106e-06, "loss": 0.3221, "step": 25222 }, { "epoch": 2.5643554290361936, "grad_norm": 0.2683885395526886, "learning_rate": 5.646209155033261e-06, "loss": 0.3393, "step": 25223 }, { "epoch": 2.5644570963806426, "grad_norm": 0.26853397488594055, "learning_rate": 5.645857243816968e-06, "loss": 0.3228, "step": 25224 }, { "epoch": 2.5645587637250915, "grad_norm": 0.27180930972099304, "learning_rate": 5.6455053293469995e-06, "loss": 0.3428, "step": 25225 }, { "epoch": 2.5646604310695404, "grad_norm": 0.264304518699646, "learning_rate": 5.645153411625129e-06, "loss": 0.3515, "step": 25226 }, { "epoch": 2.5647620984139894, "grad_norm": 0.2794102430343628, "learning_rate": 5.644801490653129e-06, "loss": 0.3916, "step": 25227 }, { "epoch": 2.5648637657584383, "grad_norm": 0.2619718611240387, "learning_rate": 5.644449566432772e-06, "loss": 0.3207, "step": 25228 }, { "epoch": 2.5649654331028873, "grad_norm": 0.2642851769924164, "learning_rate": 5.644097638965831e-06, "loss": 0.3314, "step": 25229 }, { "epoch": 2.565067100447336, "grad_norm": 0.26672956347465515, "learning_rate": 5.64374570825408e-06, "loss": 0.3355, "step": 25230 }, { "epoch": 2.565168767791785, "grad_norm": 0.27843374013900757, "learning_rate": 5.64339377429929e-06, "loss": 0.3558, "step": 25231 }, { "epoch": 2.565270435136234, "grad_norm": 0.2431785762310028, "learning_rate": 5.643041837103237e-06, "loss": 0.3106, "step": 25232 }, { "epoch": 2.5653721024806835, "grad_norm": 0.2592616677284241, "learning_rate": 5.64268989666769e-06, "loss": 0.3788, "step": 25233 }, { "epoch": 2.5654737698251324, "grad_norm": 0.27162405848503113, "learning_rate": 5.642337952994424e-06, "loss": 0.3364, "step": 25234 }, { "epoch": 2.5655754371695814, "grad_norm": 0.26540225744247437, "learning_rate": 5.641986006085214e-06, "loss": 0.3473, "step": 25235 }, { "epoch": 2.5656771045140303, "grad_norm": 0.22885937988758087, "learning_rate": 5.641634055941829e-06, "loss": 0.3211, "step": 25236 }, { "epoch": 2.5657787718584792, "grad_norm": 0.2557426691055298, "learning_rate": 5.641282102566046e-06, "loss": 0.373, "step": 25237 }, { "epoch": 2.565880439202928, "grad_norm": 0.2730903625488281, "learning_rate": 5.640930145959634e-06, "loss": 0.3541, "step": 25238 }, { "epoch": 2.565982106547377, "grad_norm": 0.267924964427948, "learning_rate": 5.640578186124369e-06, "loss": 0.3417, "step": 25239 }, { "epoch": 2.566083773891826, "grad_norm": 0.2426074743270874, "learning_rate": 5.6402262230620246e-06, "loss": 0.3176, "step": 25240 }, { "epoch": 2.566185441236275, "grad_norm": 0.27271464467048645, "learning_rate": 5.63987425677437e-06, "loss": 0.3566, "step": 25241 }, { "epoch": 2.566287108580724, "grad_norm": 0.2437986582517624, "learning_rate": 5.639522287263184e-06, "loss": 0.3358, "step": 25242 }, { "epoch": 2.566388775925173, "grad_norm": 0.29921743273735046, "learning_rate": 5.639170314530235e-06, "loss": 0.3038, "step": 25243 }, { "epoch": 2.566490443269622, "grad_norm": 0.27238598465919495, "learning_rate": 5.638818338577297e-06, "loss": 0.3478, "step": 25244 }, { "epoch": 2.5665921106140708, "grad_norm": 0.2660701274871826, "learning_rate": 5.638466359406146e-06, "loss": 0.3697, "step": 25245 }, { "epoch": 2.5666937779585197, "grad_norm": 0.2421528548002243, "learning_rate": 5.638114377018552e-06, "loss": 0.3163, "step": 25246 }, { "epoch": 2.5667954453029687, "grad_norm": 0.25406304001808167, "learning_rate": 5.6377623914162905e-06, "loss": 0.3332, "step": 25247 }, { "epoch": 2.5668971126474176, "grad_norm": 0.28677690029144287, "learning_rate": 5.637410402601132e-06, "loss": 0.3536, "step": 25248 }, { "epoch": 2.5669987799918665, "grad_norm": 0.2786726951599121, "learning_rate": 5.637058410574853e-06, "loss": 0.3418, "step": 25249 }, { "epoch": 2.5671004473363155, "grad_norm": 0.27868807315826416, "learning_rate": 5.636706415339225e-06, "loss": 0.3258, "step": 25250 }, { "epoch": 2.5672021146807644, "grad_norm": 0.27980712056159973, "learning_rate": 5.636354416896019e-06, "loss": 0.3069, "step": 25251 }, { "epoch": 2.5673037820252134, "grad_norm": 0.27860400080680847, "learning_rate": 5.636002415247014e-06, "loss": 0.3282, "step": 25252 }, { "epoch": 2.5674054493696623, "grad_norm": 0.2681313753128052, "learning_rate": 5.635650410393978e-06, "loss": 0.332, "step": 25253 }, { "epoch": 2.5675071167141112, "grad_norm": 0.2763407826423645, "learning_rate": 5.635298402338686e-06, "loss": 0.353, "step": 25254 }, { "epoch": 2.56760878405856, "grad_norm": 0.2762371599674225, "learning_rate": 5.634946391082913e-06, "loss": 0.3139, "step": 25255 }, { "epoch": 2.567710451403009, "grad_norm": 0.2761053740978241, "learning_rate": 5.634594376628429e-06, "loss": 0.3363, "step": 25256 }, { "epoch": 2.567812118747458, "grad_norm": 0.2907620370388031, "learning_rate": 5.634242358977013e-06, "loss": 0.3653, "step": 25257 }, { "epoch": 2.567913786091907, "grad_norm": 0.25877949595451355, "learning_rate": 5.633890338130431e-06, "loss": 0.3396, "step": 25258 }, { "epoch": 2.5680154534363564, "grad_norm": 0.2802846431732178, "learning_rate": 5.633538314090461e-06, "loss": 0.3576, "step": 25259 }, { "epoch": 2.5681171207808053, "grad_norm": 0.25637340545654297, "learning_rate": 5.633186286858876e-06, "loss": 0.344, "step": 25260 }, { "epoch": 2.5682187881252543, "grad_norm": 0.24452239274978638, "learning_rate": 5.63283425643745e-06, "loss": 0.3256, "step": 25261 }, { "epoch": 2.5683204554697032, "grad_norm": 0.28973957896232605, "learning_rate": 5.632482222827954e-06, "loss": 0.3384, "step": 25262 }, { "epoch": 2.568422122814152, "grad_norm": 0.28740525245666504, "learning_rate": 5.632130186032163e-06, "loss": 0.3519, "step": 25263 }, { "epoch": 2.568523790158601, "grad_norm": 0.2669665813446045, "learning_rate": 5.63177814605185e-06, "loss": 0.3293, "step": 25264 }, { "epoch": 2.56862545750305, "grad_norm": 0.2625885307788849, "learning_rate": 5.631426102888789e-06, "loss": 0.3346, "step": 25265 }, { "epoch": 2.568727124847499, "grad_norm": 0.2828516662120819, "learning_rate": 5.631074056544755e-06, "loss": 0.3441, "step": 25266 }, { "epoch": 2.568828792191948, "grad_norm": 0.25461533665657043, "learning_rate": 5.630722007021519e-06, "loss": 0.3412, "step": 25267 }, { "epoch": 2.568930459536397, "grad_norm": 0.24924921989440918, "learning_rate": 5.630369954320853e-06, "loss": 0.3067, "step": 25268 }, { "epoch": 2.569032126880846, "grad_norm": 0.27372756600379944, "learning_rate": 5.630017898444535e-06, "loss": 0.3376, "step": 25269 }, { "epoch": 2.5691337942252948, "grad_norm": 0.27259561419487, "learning_rate": 5.629665839394337e-06, "loss": 0.3233, "step": 25270 }, { "epoch": 2.5692354615697437, "grad_norm": 0.28437939286231995, "learning_rate": 5.629313777172033e-06, "loss": 0.3197, "step": 25271 }, { "epoch": 2.5693371289141926, "grad_norm": 0.30167120695114136, "learning_rate": 5.6289617117793935e-06, "loss": 0.3533, "step": 25272 }, { "epoch": 2.5694387962586416, "grad_norm": 0.29457002878189087, "learning_rate": 5.628609643218195e-06, "loss": 0.3405, "step": 25273 }, { "epoch": 2.569540463603091, "grad_norm": 0.2711765468120575, "learning_rate": 5.62825757149021e-06, "loss": 0.3399, "step": 25274 }, { "epoch": 2.56964213094754, "grad_norm": 0.2564777731895447, "learning_rate": 5.627905496597214e-06, "loss": 0.3336, "step": 25275 }, { "epoch": 2.569743798291989, "grad_norm": 0.2703285813331604, "learning_rate": 5.62755341854098e-06, "loss": 0.3452, "step": 25276 }, { "epoch": 2.569845465636438, "grad_norm": 0.2743607461452484, "learning_rate": 5.627201337323278e-06, "loss": 0.3647, "step": 25277 }, { "epoch": 2.5699471329808867, "grad_norm": 0.28300222754478455, "learning_rate": 5.626849252945886e-06, "loss": 0.3488, "step": 25278 }, { "epoch": 2.5700488003253357, "grad_norm": 0.258686363697052, "learning_rate": 5.626497165410577e-06, "loss": 0.3442, "step": 25279 }, { "epoch": 2.5701504676697846, "grad_norm": 0.2571510672569275, "learning_rate": 5.626145074719125e-06, "loss": 0.332, "step": 25280 }, { "epoch": 2.5702521350142336, "grad_norm": 0.24618270993232727, "learning_rate": 5.625792980873303e-06, "loss": 0.3021, "step": 25281 }, { "epoch": 2.5703538023586825, "grad_norm": 0.2696055769920349, "learning_rate": 5.625440883874882e-06, "loss": 0.333, "step": 25282 }, { "epoch": 2.5704554697031314, "grad_norm": 0.2579698860645294, "learning_rate": 5.62508878372564e-06, "loss": 0.3157, "step": 25283 }, { "epoch": 2.5705571370475804, "grad_norm": 0.25369855761528015, "learning_rate": 5.62473668042735e-06, "loss": 0.3667, "step": 25284 }, { "epoch": 2.5706588043920293, "grad_norm": 0.25355759263038635, "learning_rate": 5.624384573981784e-06, "loss": 0.3332, "step": 25285 }, { "epoch": 2.5707604717364783, "grad_norm": 0.27777764201164246, "learning_rate": 5.6240324643907175e-06, "loss": 0.349, "step": 25286 }, { "epoch": 2.570862139080927, "grad_norm": 0.271331250667572, "learning_rate": 5.6236803516559224e-06, "loss": 0.3007, "step": 25287 }, { "epoch": 2.570963806425376, "grad_norm": 0.25580036640167236, "learning_rate": 5.6233282357791755e-06, "loss": 0.3221, "step": 25288 }, { "epoch": 2.571065473769825, "grad_norm": 0.2582644820213318, "learning_rate": 5.622976116762249e-06, "loss": 0.323, "step": 25289 }, { "epoch": 2.571167141114274, "grad_norm": 0.276813268661499, "learning_rate": 5.622623994606916e-06, "loss": 0.3459, "step": 25290 }, { "epoch": 2.571268808458723, "grad_norm": 0.2879083454608917, "learning_rate": 5.622271869314952e-06, "loss": 0.3182, "step": 25291 }, { "epoch": 2.571370475803172, "grad_norm": 0.26882749795913696, "learning_rate": 5.62191974088813e-06, "loss": 0.3625, "step": 25292 }, { "epoch": 2.571472143147621, "grad_norm": 0.2803241014480591, "learning_rate": 5.621567609328224e-06, "loss": 0.3569, "step": 25293 }, { "epoch": 2.57157381049207, "grad_norm": 0.2656439244747162, "learning_rate": 5.621215474637007e-06, "loss": 0.2956, "step": 25294 }, { "epoch": 2.5716754778365187, "grad_norm": 0.28769516944885254, "learning_rate": 5.620863336816256e-06, "loss": 0.329, "step": 25295 }, { "epoch": 2.5717771451809677, "grad_norm": 0.28371891379356384, "learning_rate": 5.620511195867743e-06, "loss": 0.3305, "step": 25296 }, { "epoch": 2.5718788125254166, "grad_norm": 0.27868446707725525, "learning_rate": 5.62015905179324e-06, "loss": 0.32, "step": 25297 }, { "epoch": 2.5719804798698656, "grad_norm": 0.2633911967277527, "learning_rate": 5.6198069045945236e-06, "loss": 0.3592, "step": 25298 }, { "epoch": 2.5720821472143145, "grad_norm": 0.26071828603744507, "learning_rate": 5.619454754273369e-06, "loss": 0.3304, "step": 25299 }, { "epoch": 2.572183814558764, "grad_norm": 0.2778937518596649, "learning_rate": 5.619102600831548e-06, "loss": 0.3536, "step": 25300 }, { "epoch": 2.572285481903213, "grad_norm": 0.28868675231933594, "learning_rate": 5.618750444270835e-06, "loss": 0.3126, "step": 25301 }, { "epoch": 2.5723871492476618, "grad_norm": 0.2761223316192627, "learning_rate": 5.618398284593003e-06, "loss": 0.3514, "step": 25302 }, { "epoch": 2.5724888165921107, "grad_norm": 0.2641330063343048, "learning_rate": 5.618046121799828e-06, "loss": 0.3367, "step": 25303 }, { "epoch": 2.5725904839365596, "grad_norm": 0.2661069631576538, "learning_rate": 5.617693955893085e-06, "loss": 0.3405, "step": 25304 }, { "epoch": 2.5726921512810086, "grad_norm": 0.28773266077041626, "learning_rate": 5.617341786874545e-06, "loss": 0.3454, "step": 25305 }, { "epoch": 2.5727938186254575, "grad_norm": 0.2789122462272644, "learning_rate": 5.616989614745986e-06, "loss": 0.3668, "step": 25306 }, { "epoch": 2.5728954859699065, "grad_norm": 0.2706350088119507, "learning_rate": 5.6166374395091775e-06, "loss": 0.2997, "step": 25307 }, { "epoch": 2.5729971533143554, "grad_norm": 0.26623064279556274, "learning_rate": 5.616285261165897e-06, "loss": 0.3127, "step": 25308 }, { "epoch": 2.5730988206588044, "grad_norm": 0.26050111651420593, "learning_rate": 5.615933079717918e-06, "loss": 0.2972, "step": 25309 }, { "epoch": 2.5732004880032533, "grad_norm": 0.2689879834651947, "learning_rate": 5.6155808951670145e-06, "loss": 0.3013, "step": 25310 }, { "epoch": 2.5733021553477022, "grad_norm": 0.26779839396476746, "learning_rate": 5.6152287075149606e-06, "loss": 0.3456, "step": 25311 }, { "epoch": 2.573403822692151, "grad_norm": 0.2906937897205353, "learning_rate": 5.61487651676353e-06, "loss": 0.3405, "step": 25312 }, { "epoch": 2.5735054900366, "grad_norm": 0.26330840587615967, "learning_rate": 5.6145243229144976e-06, "loss": 0.3411, "step": 25313 }, { "epoch": 2.573607157381049, "grad_norm": 0.2736530601978302, "learning_rate": 5.614172125969639e-06, "loss": 0.3293, "step": 25314 }, { "epoch": 2.5737088247254984, "grad_norm": 0.2949794828891754, "learning_rate": 5.613819925930725e-06, "loss": 0.3297, "step": 25315 }, { "epoch": 2.5738104920699474, "grad_norm": 0.29403895139694214, "learning_rate": 5.613467722799536e-06, "loss": 0.3412, "step": 25316 }, { "epoch": 2.5739121594143963, "grad_norm": 0.26295459270477295, "learning_rate": 5.6131155165778396e-06, "loss": 0.3212, "step": 25317 }, { "epoch": 2.5740138267588453, "grad_norm": 0.2610834240913391, "learning_rate": 5.612763307267414e-06, "loss": 0.3458, "step": 25318 }, { "epoch": 2.574115494103294, "grad_norm": 0.2623271644115448, "learning_rate": 5.61241109487003e-06, "loss": 0.328, "step": 25319 }, { "epoch": 2.574217161447743, "grad_norm": 0.2589645981788635, "learning_rate": 5.612058879387466e-06, "loss": 0.3081, "step": 25320 }, { "epoch": 2.574318828792192, "grad_norm": 0.2867206633090973, "learning_rate": 5.611706660821497e-06, "loss": 0.335, "step": 25321 }, { "epoch": 2.574420496136641, "grad_norm": 0.25778329372406006, "learning_rate": 5.611354439173891e-06, "loss": 0.3346, "step": 25322 }, { "epoch": 2.57452216348109, "grad_norm": 0.27785784006118774, "learning_rate": 5.61100221444643e-06, "loss": 0.3418, "step": 25323 }, { "epoch": 2.574623830825539, "grad_norm": 0.2890944182872772, "learning_rate": 5.610649986640884e-06, "loss": 0.3432, "step": 25324 }, { "epoch": 2.574725498169988, "grad_norm": 0.29433169960975647, "learning_rate": 5.610297755759028e-06, "loss": 0.3182, "step": 25325 }, { "epoch": 2.574827165514437, "grad_norm": 0.2606205344200134, "learning_rate": 5.609945521802638e-06, "loss": 0.3301, "step": 25326 }, { "epoch": 2.5749288328588857, "grad_norm": 0.305081307888031, "learning_rate": 5.609593284773486e-06, "loss": 0.3362, "step": 25327 }, { "epoch": 2.5750305002033347, "grad_norm": 0.2772798240184784, "learning_rate": 5.6092410446733495e-06, "loss": 0.3345, "step": 25328 }, { "epoch": 2.5751321675477836, "grad_norm": 0.28390392661094666, "learning_rate": 5.608888801504e-06, "loss": 0.3281, "step": 25329 }, { "epoch": 2.5752338348922326, "grad_norm": 0.2986249625682831, "learning_rate": 5.608536555267213e-06, "loss": 0.3371, "step": 25330 }, { "epoch": 2.5753355022366815, "grad_norm": 0.2895021438598633, "learning_rate": 5.6081843059647655e-06, "loss": 0.3237, "step": 25331 }, { "epoch": 2.5754371695811304, "grad_norm": 0.25579342246055603, "learning_rate": 5.607832053598429e-06, "loss": 0.3527, "step": 25332 }, { "epoch": 2.5755388369255794, "grad_norm": 0.26795265078544617, "learning_rate": 5.607479798169979e-06, "loss": 0.3227, "step": 25333 }, { "epoch": 2.5756405042700283, "grad_norm": 0.2967180907726288, "learning_rate": 5.60712753968119e-06, "loss": 0.3426, "step": 25334 }, { "epoch": 2.5757421716144773, "grad_norm": 0.2891603708267212, "learning_rate": 5.606775278133836e-06, "loss": 0.3501, "step": 25335 }, { "epoch": 2.575843838958926, "grad_norm": 0.269320547580719, "learning_rate": 5.6064230135296935e-06, "loss": 0.3372, "step": 25336 }, { "epoch": 2.575945506303375, "grad_norm": 0.2592747211456299, "learning_rate": 5.606070745870537e-06, "loss": 0.3123, "step": 25337 }, { "epoch": 2.576047173647824, "grad_norm": 0.2632635235786438, "learning_rate": 5.605718475158137e-06, "loss": 0.3212, "step": 25338 }, { "epoch": 2.576148840992273, "grad_norm": 0.29496803879737854, "learning_rate": 5.605366201394273e-06, "loss": 0.3151, "step": 25339 }, { "epoch": 2.576250508336722, "grad_norm": 0.2615273892879486, "learning_rate": 5.605013924580718e-06, "loss": 0.3489, "step": 25340 }, { "epoch": 2.5763521756811714, "grad_norm": 0.2658556401729584, "learning_rate": 5.604661644719247e-06, "loss": 0.3003, "step": 25341 }, { "epoch": 2.5764538430256203, "grad_norm": 0.2654648423194885, "learning_rate": 5.604309361811634e-06, "loss": 0.3083, "step": 25342 }, { "epoch": 2.5765555103700692, "grad_norm": 0.26240846514701843, "learning_rate": 5.603957075859654e-06, "loss": 0.3078, "step": 25343 }, { "epoch": 2.576657177714518, "grad_norm": 0.27944716811180115, "learning_rate": 5.603604786865081e-06, "loss": 0.3814, "step": 25344 }, { "epoch": 2.576758845058967, "grad_norm": 0.28269797563552856, "learning_rate": 5.603252494829691e-06, "loss": 0.3423, "step": 25345 }, { "epoch": 2.576860512403416, "grad_norm": 0.26268690824508667, "learning_rate": 5.602900199755259e-06, "loss": 0.3151, "step": 25346 }, { "epoch": 2.576962179747865, "grad_norm": 0.2791081666946411, "learning_rate": 5.602547901643559e-06, "loss": 0.3272, "step": 25347 }, { "epoch": 2.577063847092314, "grad_norm": 0.2889091372489929, "learning_rate": 5.602195600496365e-06, "loss": 0.3439, "step": 25348 }, { "epoch": 2.577165514436763, "grad_norm": 0.2762152850627899, "learning_rate": 5.601843296315453e-06, "loss": 0.3596, "step": 25349 }, { "epoch": 2.577267181781212, "grad_norm": 0.2681901156902313, "learning_rate": 5.601490989102596e-06, "loss": 0.3411, "step": 25350 }, { "epoch": 2.5773688491256608, "grad_norm": 0.2696269750595093, "learning_rate": 5.601138678859574e-06, "loss": 0.3108, "step": 25351 }, { "epoch": 2.5774705164701097, "grad_norm": 0.30217650532722473, "learning_rate": 5.600786365588155e-06, "loss": 0.3052, "step": 25352 }, { "epoch": 2.5775721838145587, "grad_norm": 0.3061513900756836, "learning_rate": 5.600434049290119e-06, "loss": 0.338, "step": 25353 }, { "epoch": 2.5776738511590076, "grad_norm": 0.2799004316329956, "learning_rate": 5.600081729967237e-06, "loss": 0.33, "step": 25354 }, { "epoch": 2.5777755185034565, "grad_norm": 0.263180673122406, "learning_rate": 5.599729407621287e-06, "loss": 0.3284, "step": 25355 }, { "epoch": 2.577877185847906, "grad_norm": 0.267945259809494, "learning_rate": 5.599377082254043e-06, "loss": 0.3624, "step": 25356 }, { "epoch": 2.577978853192355, "grad_norm": 0.2720387279987335, "learning_rate": 5.599024753867279e-06, "loss": 0.3375, "step": 25357 }, { "epoch": 2.578080520536804, "grad_norm": 0.2702554762363434, "learning_rate": 5.598672422462772e-06, "loss": 0.313, "step": 25358 }, { "epoch": 2.5781821878812528, "grad_norm": 0.2875056266784668, "learning_rate": 5.5983200880422935e-06, "loss": 0.3532, "step": 25359 }, { "epoch": 2.5782838552257017, "grad_norm": 0.26275625824928284, "learning_rate": 5.597967750607622e-06, "loss": 0.3028, "step": 25360 }, { "epoch": 2.5783855225701506, "grad_norm": 0.2798190414905548, "learning_rate": 5.597615410160531e-06, "loss": 0.3741, "step": 25361 }, { "epoch": 2.5784871899145996, "grad_norm": 0.26809605956077576, "learning_rate": 5.597263066702796e-06, "loss": 0.3188, "step": 25362 }, { "epoch": 2.5785888572590485, "grad_norm": 0.2715347707271576, "learning_rate": 5.5969107202361906e-06, "loss": 0.3492, "step": 25363 }, { "epoch": 2.5786905246034975, "grad_norm": 0.26956236362457275, "learning_rate": 5.59655837076249e-06, "loss": 0.354, "step": 25364 }, { "epoch": 2.5787921919479464, "grad_norm": 0.27097418904304504, "learning_rate": 5.596206018283472e-06, "loss": 0.3293, "step": 25365 }, { "epoch": 2.5788938592923953, "grad_norm": 0.25602173805236816, "learning_rate": 5.5958536628009085e-06, "loss": 0.3135, "step": 25366 }, { "epoch": 2.5789955266368443, "grad_norm": 0.2640257775783539, "learning_rate": 5.595501304316577e-06, "loss": 0.3177, "step": 25367 }, { "epoch": 2.5790971939812932, "grad_norm": 0.2669486105442047, "learning_rate": 5.59514894283225e-06, "loss": 0.3153, "step": 25368 }, { "epoch": 2.579198861325742, "grad_norm": 0.27093780040740967, "learning_rate": 5.594796578349705e-06, "loss": 0.3137, "step": 25369 }, { "epoch": 2.579300528670191, "grad_norm": 0.2786270081996918, "learning_rate": 5.594444210870716e-06, "loss": 0.3317, "step": 25370 }, { "epoch": 2.57940219601464, "grad_norm": 0.272916316986084, "learning_rate": 5.594091840397059e-06, "loss": 0.335, "step": 25371 }, { "epoch": 2.579503863359089, "grad_norm": 0.26454073190689087, "learning_rate": 5.593739466930508e-06, "loss": 0.3359, "step": 25372 }, { "epoch": 2.579605530703538, "grad_norm": 0.27156829833984375, "learning_rate": 5.593387090472838e-06, "loss": 0.3118, "step": 25373 }, { "epoch": 2.579707198047987, "grad_norm": 0.2838296592235565, "learning_rate": 5.593034711025825e-06, "loss": 0.3078, "step": 25374 }, { "epoch": 2.579808865392436, "grad_norm": 0.2657557427883148, "learning_rate": 5.592682328591246e-06, "loss": 0.4049, "step": 25375 }, { "epoch": 2.5799105327368848, "grad_norm": 0.2697666883468628, "learning_rate": 5.5923299431708725e-06, "loss": 0.3396, "step": 25376 }, { "epoch": 2.5800122000813337, "grad_norm": 0.2564023435115814, "learning_rate": 5.5919775547664815e-06, "loss": 0.3107, "step": 25377 }, { "epoch": 2.5801138674257826, "grad_norm": 0.2851390838623047, "learning_rate": 5.5916251633798476e-06, "loss": 0.357, "step": 25378 }, { "epoch": 2.5802155347702316, "grad_norm": 0.254966676235199, "learning_rate": 5.591272769012748e-06, "loss": 0.3115, "step": 25379 }, { "epoch": 2.5803172021146805, "grad_norm": 0.2929714322090149, "learning_rate": 5.590920371666957e-06, "loss": 0.341, "step": 25380 }, { "epoch": 2.5804188694591295, "grad_norm": 0.2465730756521225, "learning_rate": 5.590567971344249e-06, "loss": 0.3237, "step": 25381 }, { "epoch": 2.580520536803579, "grad_norm": 0.2913305461406708, "learning_rate": 5.5902155680464e-06, "loss": 0.3445, "step": 25382 }, { "epoch": 2.580622204148028, "grad_norm": 0.2531541585922241, "learning_rate": 5.589863161775184e-06, "loss": 0.3039, "step": 25383 }, { "epoch": 2.5807238714924767, "grad_norm": 0.2657729685306549, "learning_rate": 5.589510752532378e-06, "loss": 0.3374, "step": 25384 }, { "epoch": 2.5808255388369257, "grad_norm": 0.27821075916290283, "learning_rate": 5.589158340319758e-06, "loss": 0.3005, "step": 25385 }, { "epoch": 2.5809272061813746, "grad_norm": 0.24490490555763245, "learning_rate": 5.588805925139098e-06, "loss": 0.3448, "step": 25386 }, { "epoch": 2.5810288735258236, "grad_norm": 0.27054354548454285, "learning_rate": 5.588453506992174e-06, "loss": 0.304, "step": 25387 }, { "epoch": 2.5811305408702725, "grad_norm": 0.24688076972961426, "learning_rate": 5.58810108588076e-06, "loss": 0.3209, "step": 25388 }, { "epoch": 2.5812322082147214, "grad_norm": 0.2522628903388977, "learning_rate": 5.587748661806632e-06, "loss": 0.3197, "step": 25389 }, { "epoch": 2.5813338755591704, "grad_norm": 0.277267187833786, "learning_rate": 5.587396234771568e-06, "loss": 0.3401, "step": 25390 }, { "epoch": 2.5814355429036193, "grad_norm": 0.25036731362342834, "learning_rate": 5.58704380477734e-06, "loss": 0.3117, "step": 25391 }, { "epoch": 2.5815372102480683, "grad_norm": 0.2941508889198303, "learning_rate": 5.586691371825725e-06, "loss": 0.3362, "step": 25392 }, { "epoch": 2.581638877592517, "grad_norm": 0.28371283411979675, "learning_rate": 5.5863389359184985e-06, "loss": 0.362, "step": 25393 }, { "epoch": 2.581740544936966, "grad_norm": 0.2873346507549286, "learning_rate": 5.585986497057435e-06, "loss": 0.3337, "step": 25394 }, { "epoch": 2.581842212281415, "grad_norm": 0.2622782289981842, "learning_rate": 5.585634055244312e-06, "loss": 0.3338, "step": 25395 }, { "epoch": 2.581943879625864, "grad_norm": 0.2767082750797272, "learning_rate": 5.585281610480903e-06, "loss": 0.3424, "step": 25396 }, { "epoch": 2.5820455469703134, "grad_norm": 0.2681522071361542, "learning_rate": 5.584929162768985e-06, "loss": 0.3259, "step": 25397 }, { "epoch": 2.5821472143147624, "grad_norm": 0.25913336873054504, "learning_rate": 5.584576712110332e-06, "loss": 0.339, "step": 25398 }, { "epoch": 2.5822488816592113, "grad_norm": 0.24494776129722595, "learning_rate": 5.584224258506721e-06, "loss": 0.3208, "step": 25399 }, { "epoch": 2.5823505490036602, "grad_norm": 0.24933914840221405, "learning_rate": 5.583871801959927e-06, "loss": 0.3623, "step": 25400 }, { "epoch": 2.582452216348109, "grad_norm": 0.30361637473106384, "learning_rate": 5.5835193424717246e-06, "loss": 0.2947, "step": 25401 }, { "epoch": 2.582553883692558, "grad_norm": 0.26318782567977905, "learning_rate": 5.583166880043892e-06, "loss": 0.2966, "step": 25402 }, { "epoch": 2.582655551037007, "grad_norm": 0.2768472135066986, "learning_rate": 5.5828144146782025e-06, "loss": 0.3427, "step": 25403 }, { "epoch": 2.582757218381456, "grad_norm": 0.2550280690193176, "learning_rate": 5.582461946376432e-06, "loss": 0.3306, "step": 25404 }, { "epoch": 2.582858885725905, "grad_norm": 0.2686421275138855, "learning_rate": 5.5821094751403585e-06, "loss": 0.3377, "step": 25405 }, { "epoch": 2.582960553070354, "grad_norm": 0.2765764892101288, "learning_rate": 5.581757000971754e-06, "loss": 0.3364, "step": 25406 }, { "epoch": 2.583062220414803, "grad_norm": 0.2889334559440613, "learning_rate": 5.5814045238723965e-06, "loss": 0.3301, "step": 25407 }, { "epoch": 2.5831638877592518, "grad_norm": 0.27142953872680664, "learning_rate": 5.581052043844061e-06, "loss": 0.3296, "step": 25408 }, { "epoch": 2.5832655551037007, "grad_norm": 0.28192538022994995, "learning_rate": 5.580699560888524e-06, "loss": 0.3421, "step": 25409 }, { "epoch": 2.5833672224481496, "grad_norm": 0.2665644884109497, "learning_rate": 5.580347075007559e-06, "loss": 0.3064, "step": 25410 }, { "epoch": 2.5834688897925986, "grad_norm": 0.28024721145629883, "learning_rate": 5.5799945862029444e-06, "loss": 0.3209, "step": 25411 }, { "epoch": 2.5835705571370475, "grad_norm": 0.26582446694374084, "learning_rate": 5.579642094476455e-06, "loss": 0.2992, "step": 25412 }, { "epoch": 2.5836722244814965, "grad_norm": 0.2903927266597748, "learning_rate": 5.579289599829868e-06, "loss": 0.3649, "step": 25413 }, { "epoch": 2.5837738918259454, "grad_norm": 0.26878267526626587, "learning_rate": 5.5789371022649555e-06, "loss": 0.2994, "step": 25414 }, { "epoch": 2.5838755591703944, "grad_norm": 0.2906201183795929, "learning_rate": 5.578584601783495e-06, "loss": 0.3461, "step": 25415 }, { "epoch": 2.5839772265148433, "grad_norm": 0.29626595973968506, "learning_rate": 5.578232098387264e-06, "loss": 0.3963, "step": 25416 }, { "epoch": 2.5840788938592922, "grad_norm": 0.2899947166442871, "learning_rate": 5.577879592078037e-06, "loss": 0.346, "step": 25417 }, { "epoch": 2.584180561203741, "grad_norm": 0.2644951641559601, "learning_rate": 5.577527082857591e-06, "loss": 0.3354, "step": 25418 }, { "epoch": 2.58428222854819, "grad_norm": 0.2662928104400635, "learning_rate": 5.577174570727699e-06, "loss": 0.3408, "step": 25419 }, { "epoch": 2.584383895892639, "grad_norm": 0.28961825370788574, "learning_rate": 5.5768220556901385e-06, "loss": 0.3399, "step": 25420 }, { "epoch": 2.584485563237088, "grad_norm": 0.2825857102870941, "learning_rate": 5.576469537746686e-06, "loss": 0.3376, "step": 25421 }, { "epoch": 2.584587230581537, "grad_norm": 0.2671045958995819, "learning_rate": 5.5761170168991184e-06, "loss": 0.3254, "step": 25422 }, { "epoch": 2.5846888979259863, "grad_norm": 0.2616237998008728, "learning_rate": 5.57576449314921e-06, "loss": 0.3347, "step": 25423 }, { "epoch": 2.5847905652704353, "grad_norm": 0.27153822779655457, "learning_rate": 5.575411966498735e-06, "loss": 0.3749, "step": 25424 }, { "epoch": 2.584892232614884, "grad_norm": 0.27072176337242126, "learning_rate": 5.5750594369494714e-06, "loss": 0.3523, "step": 25425 }, { "epoch": 2.584993899959333, "grad_norm": 0.2695401608943939, "learning_rate": 5.574706904503196e-06, "loss": 0.3651, "step": 25426 }, { "epoch": 2.585095567303782, "grad_norm": 0.2565598785877228, "learning_rate": 5.5743543691616845e-06, "loss": 0.322, "step": 25427 }, { "epoch": 2.585197234648231, "grad_norm": 0.2524973154067993, "learning_rate": 5.574001830926712e-06, "loss": 0.3314, "step": 25428 }, { "epoch": 2.58529890199268, "grad_norm": 0.2771819531917572, "learning_rate": 5.573649289800054e-06, "loss": 0.3156, "step": 25429 }, { "epoch": 2.585400569337129, "grad_norm": 0.25199243426322937, "learning_rate": 5.573296745783486e-06, "loss": 0.3254, "step": 25430 }, { "epoch": 2.585502236681578, "grad_norm": 0.25654909014701843, "learning_rate": 5.572944198878787e-06, "loss": 0.3554, "step": 25431 }, { "epoch": 2.585603904026027, "grad_norm": 0.27420225739479065, "learning_rate": 5.572591649087731e-06, "loss": 0.3263, "step": 25432 }, { "epoch": 2.5857055713704757, "grad_norm": 0.26032423973083496, "learning_rate": 5.572239096412094e-06, "loss": 0.359, "step": 25433 }, { "epoch": 2.5858072387149247, "grad_norm": 0.2663137912750244, "learning_rate": 5.571886540853652e-06, "loss": 0.3528, "step": 25434 }, { "epoch": 2.5859089060593736, "grad_norm": 0.28435200452804565, "learning_rate": 5.571533982414181e-06, "loss": 0.3721, "step": 25435 }, { "epoch": 2.5860105734038226, "grad_norm": 0.2772703468799591, "learning_rate": 5.571181421095459e-06, "loss": 0.364, "step": 25436 }, { "epoch": 2.5861122407482715, "grad_norm": 0.27017563581466675, "learning_rate": 5.570828856899261e-06, "loss": 0.3509, "step": 25437 }, { "epoch": 2.586213908092721, "grad_norm": 0.25038084387779236, "learning_rate": 5.5704762898273624e-06, "loss": 0.3327, "step": 25438 }, { "epoch": 2.58631557543717, "grad_norm": 0.3114759027957916, "learning_rate": 5.57012371988154e-06, "loss": 0.3633, "step": 25439 }, { "epoch": 2.5864172427816188, "grad_norm": 0.2705901265144348, "learning_rate": 5.569771147063568e-06, "loss": 0.3455, "step": 25440 }, { "epoch": 2.5865189101260677, "grad_norm": 0.2477731853723526, "learning_rate": 5.569418571375227e-06, "loss": 0.3082, "step": 25441 }, { "epoch": 2.5866205774705167, "grad_norm": 0.26905104517936707, "learning_rate": 5.569065992818289e-06, "loss": 0.3701, "step": 25442 }, { "epoch": 2.5867222448149656, "grad_norm": 0.27380549907684326, "learning_rate": 5.568713411394532e-06, "loss": 0.3222, "step": 25443 }, { "epoch": 2.5868239121594145, "grad_norm": 0.256460040807724, "learning_rate": 5.568360827105732e-06, "loss": 0.3264, "step": 25444 }, { "epoch": 2.5869255795038635, "grad_norm": 0.2555142641067505, "learning_rate": 5.568008239953665e-06, "loss": 0.319, "step": 25445 }, { "epoch": 2.5870272468483124, "grad_norm": 0.2762482762336731, "learning_rate": 5.567655649940108e-06, "loss": 0.2967, "step": 25446 }, { "epoch": 2.5871289141927614, "grad_norm": 0.27823176980018616, "learning_rate": 5.567303057066836e-06, "loss": 0.3423, "step": 25447 }, { "epoch": 2.5872305815372103, "grad_norm": 0.2984520196914673, "learning_rate": 5.5669504613356275e-06, "loss": 0.359, "step": 25448 }, { "epoch": 2.5873322488816592, "grad_norm": 0.26144537329673767, "learning_rate": 5.566597862748256e-06, "loss": 0.3438, "step": 25449 }, { "epoch": 2.587433916226108, "grad_norm": 0.255044549703598, "learning_rate": 5.566245261306499e-06, "loss": 0.3454, "step": 25450 }, { "epoch": 2.587535583570557, "grad_norm": 0.2734702527523041, "learning_rate": 5.565892657012133e-06, "loss": 0.3436, "step": 25451 }, { "epoch": 2.587637250915006, "grad_norm": 0.25392332673072815, "learning_rate": 5.565540049866934e-06, "loss": 0.337, "step": 25452 }, { "epoch": 2.587738918259455, "grad_norm": 0.2724229097366333, "learning_rate": 5.56518743987268e-06, "loss": 0.3324, "step": 25453 }, { "epoch": 2.587840585603904, "grad_norm": 0.26407119631767273, "learning_rate": 5.5648348270311444e-06, "loss": 0.3306, "step": 25454 }, { "epoch": 2.587942252948353, "grad_norm": 0.2619727551937103, "learning_rate": 5.564482211344107e-06, "loss": 0.3382, "step": 25455 }, { "epoch": 2.588043920292802, "grad_norm": 0.2977044880390167, "learning_rate": 5.564129592813341e-06, "loss": 0.3523, "step": 25456 }, { "epoch": 2.5881455876372508, "grad_norm": 0.2666870951652527, "learning_rate": 5.563776971440624e-06, "loss": 0.329, "step": 25457 }, { "epoch": 2.5882472549816997, "grad_norm": 0.2745746076107025, "learning_rate": 5.5634243472277336e-06, "loss": 0.3399, "step": 25458 }, { "epoch": 2.5883489223261487, "grad_norm": 0.2797430753707886, "learning_rate": 5.563071720176445e-06, "loss": 0.3206, "step": 25459 }, { "epoch": 2.5884505896705976, "grad_norm": 0.26257196068763733, "learning_rate": 5.562719090288534e-06, "loss": 0.3545, "step": 25460 }, { "epoch": 2.5885522570150465, "grad_norm": 0.2830032706260681, "learning_rate": 5.562366457565779e-06, "loss": 0.3264, "step": 25461 }, { "epoch": 2.5886539243594955, "grad_norm": 0.2669801712036133, "learning_rate": 5.562013822009955e-06, "loss": 0.3286, "step": 25462 }, { "epoch": 2.5887555917039444, "grad_norm": 0.3021560609340668, "learning_rate": 5.561661183622839e-06, "loss": 0.3379, "step": 25463 }, { "epoch": 2.588857259048394, "grad_norm": 0.2777198553085327, "learning_rate": 5.561308542406208e-06, "loss": 0.3302, "step": 25464 }, { "epoch": 2.5889589263928428, "grad_norm": 0.2717422842979431, "learning_rate": 5.560955898361836e-06, "loss": 0.3459, "step": 25465 }, { "epoch": 2.5890605937372917, "grad_norm": 0.29883885383605957, "learning_rate": 5.560603251491503e-06, "loss": 0.3516, "step": 25466 }, { "epoch": 2.5891622610817406, "grad_norm": 0.27492034435272217, "learning_rate": 5.560250601796984e-06, "loss": 0.3318, "step": 25467 }, { "epoch": 2.5892639284261896, "grad_norm": 0.26468485593795776, "learning_rate": 5.559897949280055e-06, "loss": 0.3342, "step": 25468 }, { "epoch": 2.5893655957706385, "grad_norm": 0.2727787494659424, "learning_rate": 5.559545293942494e-06, "loss": 0.3326, "step": 25469 }, { "epoch": 2.5894672631150875, "grad_norm": 0.2618706524372101, "learning_rate": 5.559192635786076e-06, "loss": 0.3477, "step": 25470 }, { "epoch": 2.5895689304595364, "grad_norm": 0.28287333250045776, "learning_rate": 5.558839974812579e-06, "loss": 0.3961, "step": 25471 }, { "epoch": 2.5896705978039853, "grad_norm": 0.29867714643478394, "learning_rate": 5.558487311023778e-06, "loss": 0.3445, "step": 25472 }, { "epoch": 2.5897722651484343, "grad_norm": 0.2616073787212372, "learning_rate": 5.558134644421452e-06, "loss": 0.3254, "step": 25473 }, { "epoch": 2.5898739324928832, "grad_norm": 0.26099544763565063, "learning_rate": 5.557781975007376e-06, "loss": 0.3133, "step": 25474 }, { "epoch": 2.589975599837332, "grad_norm": 0.2808869481086731, "learning_rate": 5.557429302783326e-06, "loss": 0.3358, "step": 25475 }, { "epoch": 2.590077267181781, "grad_norm": 0.2589324712753296, "learning_rate": 5.557076627751082e-06, "loss": 0.3308, "step": 25476 }, { "epoch": 2.59017893452623, "grad_norm": 0.2971981465816498, "learning_rate": 5.556723949912415e-06, "loss": 0.3396, "step": 25477 }, { "epoch": 2.590280601870679, "grad_norm": 0.28549739718437195, "learning_rate": 5.556371269269107e-06, "loss": 0.323, "step": 25478 }, { "epoch": 2.5903822692151284, "grad_norm": 0.28093063831329346, "learning_rate": 5.556018585822932e-06, "loss": 0.3514, "step": 25479 }, { "epoch": 2.5904839365595773, "grad_norm": 0.27491602301597595, "learning_rate": 5.555665899575667e-06, "loss": 0.3245, "step": 25480 }, { "epoch": 2.5905856039040263, "grad_norm": 0.266409307718277, "learning_rate": 5.555313210529092e-06, "loss": 0.3557, "step": 25481 }, { "epoch": 2.590687271248475, "grad_norm": 0.25901275873184204, "learning_rate": 5.554960518684979e-06, "loss": 0.3221, "step": 25482 }, { "epoch": 2.590788938592924, "grad_norm": 0.28574034571647644, "learning_rate": 5.554607824045108e-06, "loss": 0.334, "step": 25483 }, { "epoch": 2.590890605937373, "grad_norm": 0.2647695541381836, "learning_rate": 5.554255126611252e-06, "loss": 0.3142, "step": 25484 }, { "epoch": 2.590992273281822, "grad_norm": 0.2550944983959198, "learning_rate": 5.553902426385191e-06, "loss": 0.3313, "step": 25485 }, { "epoch": 2.591093940626271, "grad_norm": 0.27005472779273987, "learning_rate": 5.553549723368703e-06, "loss": 0.3029, "step": 25486 }, { "epoch": 2.59119560797072, "grad_norm": 0.28765755891799927, "learning_rate": 5.553197017563562e-06, "loss": 0.3656, "step": 25487 }, { "epoch": 2.591297275315169, "grad_norm": 0.3239435851573944, "learning_rate": 5.552844308971547e-06, "loss": 0.3571, "step": 25488 }, { "epoch": 2.591398942659618, "grad_norm": 0.2457207441329956, "learning_rate": 5.552491597594433e-06, "loss": 0.3439, "step": 25489 }, { "epoch": 2.5915006100040667, "grad_norm": 0.28269633650779724, "learning_rate": 5.552138883433996e-06, "loss": 0.3232, "step": 25490 }, { "epoch": 2.5916022773485157, "grad_norm": 0.2733376622200012, "learning_rate": 5.551786166492017e-06, "loss": 0.3423, "step": 25491 }, { "epoch": 2.5917039446929646, "grad_norm": 0.26906803250312805, "learning_rate": 5.551433446770268e-06, "loss": 0.368, "step": 25492 }, { "epoch": 2.5918056120374136, "grad_norm": 0.28763824701309204, "learning_rate": 5.551080724270531e-06, "loss": 0.3195, "step": 25493 }, { "epoch": 2.5919072793818625, "grad_norm": 0.2817988395690918, "learning_rate": 5.5507279989945795e-06, "loss": 0.3399, "step": 25494 }, { "epoch": 2.5920089467263114, "grad_norm": 0.279011994600296, "learning_rate": 5.550375270944189e-06, "loss": 0.3591, "step": 25495 }, { "epoch": 2.5921106140707604, "grad_norm": 0.2725693881511688, "learning_rate": 5.55002254012114e-06, "loss": 0.317, "step": 25496 }, { "epoch": 2.5922122814152093, "grad_norm": 0.2569465935230255, "learning_rate": 5.549669806527209e-06, "loss": 0.3468, "step": 25497 }, { "epoch": 2.5923139487596583, "grad_norm": 0.25230348110198975, "learning_rate": 5.549317070164172e-06, "loss": 0.3091, "step": 25498 }, { "epoch": 2.592415616104107, "grad_norm": 0.28185421228408813, "learning_rate": 5.548964331033807e-06, "loss": 0.3141, "step": 25499 }, { "epoch": 2.592517283448556, "grad_norm": 0.29205089807510376, "learning_rate": 5.548611589137887e-06, "loss": 0.3442, "step": 25500 }, { "epoch": 2.592618950793005, "grad_norm": 0.2728408873081207, "learning_rate": 5.548258844478195e-06, "loss": 0.3193, "step": 25501 }, { "epoch": 2.592720618137454, "grad_norm": 0.2571149170398712, "learning_rate": 5.547906097056503e-06, "loss": 0.3203, "step": 25502 }, { "epoch": 2.592822285481903, "grad_norm": 0.2511899471282959, "learning_rate": 5.547553346874592e-06, "loss": 0.3295, "step": 25503 }, { "epoch": 2.592923952826352, "grad_norm": 0.28337323665618896, "learning_rate": 5.547200593934239e-06, "loss": 0.3382, "step": 25504 }, { "epoch": 2.5930256201708013, "grad_norm": 0.2633025050163269, "learning_rate": 5.546847838237216e-06, "loss": 0.3344, "step": 25505 }, { "epoch": 2.5931272875152502, "grad_norm": 0.2767382264137268, "learning_rate": 5.546495079785304e-06, "loss": 0.3078, "step": 25506 }, { "epoch": 2.593228954859699, "grad_norm": 0.2638356685638428, "learning_rate": 5.546142318580282e-06, "loss": 0.3588, "step": 25507 }, { "epoch": 2.593330622204148, "grad_norm": 0.252419114112854, "learning_rate": 5.545789554623922e-06, "loss": 0.3037, "step": 25508 }, { "epoch": 2.593432289548597, "grad_norm": 0.2669374942779541, "learning_rate": 5.545436787918007e-06, "loss": 0.3443, "step": 25509 }, { "epoch": 2.593533956893046, "grad_norm": 0.2610624432563782, "learning_rate": 5.5450840184643095e-06, "loss": 0.3577, "step": 25510 }, { "epoch": 2.593635624237495, "grad_norm": 0.2900503873825073, "learning_rate": 5.544731246264608e-06, "loss": 0.3059, "step": 25511 }, { "epoch": 2.593737291581944, "grad_norm": 0.261459618806839, "learning_rate": 5.544378471320679e-06, "loss": 0.3488, "step": 25512 }, { "epoch": 2.593838958926393, "grad_norm": 0.2753323018550873, "learning_rate": 5.544025693634302e-06, "loss": 0.3066, "step": 25513 }, { "epoch": 2.5939406262708418, "grad_norm": 0.26302844285964966, "learning_rate": 5.543672913207253e-06, "loss": 0.3214, "step": 25514 }, { "epoch": 2.5940422936152907, "grad_norm": 0.2618762254714966, "learning_rate": 5.543320130041309e-06, "loss": 0.3203, "step": 25515 }, { "epoch": 2.5941439609597396, "grad_norm": 0.2761867940425873, "learning_rate": 5.542967344138246e-06, "loss": 0.3078, "step": 25516 }, { "epoch": 2.5942456283041886, "grad_norm": 0.26433444023132324, "learning_rate": 5.542614555499845e-06, "loss": 0.3366, "step": 25517 }, { "epoch": 2.5943472956486375, "grad_norm": 0.28109705448150635, "learning_rate": 5.5422617641278775e-06, "loss": 0.3539, "step": 25518 }, { "epoch": 2.5944489629930865, "grad_norm": 0.2837965190410614, "learning_rate": 5.541908970024127e-06, "loss": 0.3037, "step": 25519 }, { "epoch": 2.594550630337536, "grad_norm": 0.2719365656375885, "learning_rate": 5.541556173190365e-06, "loss": 0.3433, "step": 25520 }, { "epoch": 2.594652297681985, "grad_norm": 0.2649787366390228, "learning_rate": 5.541203373628375e-06, "loss": 0.3465, "step": 25521 }, { "epoch": 2.5947539650264337, "grad_norm": 0.29415348172187805, "learning_rate": 5.54085057133993e-06, "loss": 0.3096, "step": 25522 }, { "epoch": 2.5948556323708827, "grad_norm": 0.28645452857017517, "learning_rate": 5.540497766326807e-06, "loss": 0.319, "step": 25523 }, { "epoch": 2.5949572997153316, "grad_norm": 0.2601011097431183, "learning_rate": 5.5401449585907855e-06, "loss": 0.3109, "step": 25524 }, { "epoch": 2.5950589670597806, "grad_norm": 0.2651671767234802, "learning_rate": 5.539792148133641e-06, "loss": 0.3269, "step": 25525 }, { "epoch": 2.5951606344042295, "grad_norm": 0.26466310024261475, "learning_rate": 5.539439334957153e-06, "loss": 0.326, "step": 25526 }, { "epoch": 2.5952623017486784, "grad_norm": 0.27300044894218445, "learning_rate": 5.539086519063098e-06, "loss": 0.3547, "step": 25527 }, { "epoch": 2.5953639690931274, "grad_norm": 0.2961079180240631, "learning_rate": 5.538733700453254e-06, "loss": 0.3179, "step": 25528 }, { "epoch": 2.5954656364375763, "grad_norm": 0.293847918510437, "learning_rate": 5.538380879129397e-06, "loss": 0.3456, "step": 25529 }, { "epoch": 2.5955673037820253, "grad_norm": 0.2651279866695404, "learning_rate": 5.538028055093304e-06, "loss": 0.3246, "step": 25530 }, { "epoch": 2.595668971126474, "grad_norm": 0.251516729593277, "learning_rate": 5.537675228346754e-06, "loss": 0.3119, "step": 25531 }, { "epoch": 2.595770638470923, "grad_norm": 0.27964136004447937, "learning_rate": 5.537322398891525e-06, "loss": 0.3324, "step": 25532 }, { "epoch": 2.595872305815372, "grad_norm": 0.2715386152267456, "learning_rate": 5.536969566729391e-06, "loss": 0.3534, "step": 25533 }, { "epoch": 2.595973973159821, "grad_norm": 0.26210370659828186, "learning_rate": 5.536616731862134e-06, "loss": 0.3522, "step": 25534 }, { "epoch": 2.59607564050427, "grad_norm": 0.2581787705421448, "learning_rate": 5.53626389429153e-06, "loss": 0.3442, "step": 25535 }, { "epoch": 2.596177307848719, "grad_norm": 0.2700856029987335, "learning_rate": 5.535911054019354e-06, "loss": 0.3602, "step": 25536 }, { "epoch": 2.596278975193168, "grad_norm": 0.2526087462902069, "learning_rate": 5.535558211047387e-06, "loss": 0.3326, "step": 25537 }, { "epoch": 2.596380642537617, "grad_norm": 0.26315632462501526, "learning_rate": 5.535205365377404e-06, "loss": 0.3193, "step": 25538 }, { "epoch": 2.5964823098820657, "grad_norm": 0.25515368580818176, "learning_rate": 5.534852517011185e-06, "loss": 0.3365, "step": 25539 }, { "epoch": 2.5965839772265147, "grad_norm": 0.26504650712013245, "learning_rate": 5.534499665950505e-06, "loss": 0.3325, "step": 25540 }, { "epoch": 2.5966856445709636, "grad_norm": 0.27531343698501587, "learning_rate": 5.534146812197141e-06, "loss": 0.3035, "step": 25541 }, { "epoch": 2.5967873119154126, "grad_norm": 0.2601003348827362, "learning_rate": 5.5337939557528756e-06, "loss": 0.3304, "step": 25542 }, { "epoch": 2.5968889792598615, "grad_norm": 0.2817520499229431, "learning_rate": 5.53344109661948e-06, "loss": 0.3073, "step": 25543 }, { "epoch": 2.5969906466043104, "grad_norm": 0.269672155380249, "learning_rate": 5.533088234798738e-06, "loss": 0.328, "step": 25544 }, { "epoch": 2.5970923139487594, "grad_norm": 0.2666322886943817, "learning_rate": 5.532735370292421e-06, "loss": 0.3063, "step": 25545 }, { "epoch": 2.5971939812932088, "grad_norm": 0.2680196762084961, "learning_rate": 5.532382503102312e-06, "loss": 0.3316, "step": 25546 }, { "epoch": 2.5972956486376577, "grad_norm": 0.2528418302536011, "learning_rate": 5.532029633230187e-06, "loss": 0.3358, "step": 25547 }, { "epoch": 2.5973973159821067, "grad_norm": 0.268901526927948, "learning_rate": 5.531676760677821e-06, "loss": 0.3293, "step": 25548 }, { "epoch": 2.5974989833265556, "grad_norm": 0.2774601876735687, "learning_rate": 5.531323885446995e-06, "loss": 0.3339, "step": 25549 }, { "epoch": 2.5976006506710045, "grad_norm": 0.2961856424808502, "learning_rate": 5.530971007539485e-06, "loss": 0.3358, "step": 25550 }, { "epoch": 2.5977023180154535, "grad_norm": 0.2786388099193573, "learning_rate": 5.53061812695707e-06, "loss": 0.3422, "step": 25551 }, { "epoch": 2.5978039853599024, "grad_norm": 0.25742319226264954, "learning_rate": 5.530265243701527e-06, "loss": 0.3521, "step": 25552 }, { "epoch": 2.5979056527043514, "grad_norm": 0.26120442152023315, "learning_rate": 5.529912357774633e-06, "loss": 0.3482, "step": 25553 }, { "epoch": 2.5980073200488003, "grad_norm": 0.28127947449684143, "learning_rate": 5.529559469178167e-06, "loss": 0.3464, "step": 25554 }, { "epoch": 2.5981089873932492, "grad_norm": 0.26243889331817627, "learning_rate": 5.529206577913906e-06, "loss": 0.3553, "step": 25555 }, { "epoch": 2.598210654737698, "grad_norm": 0.24622435867786407, "learning_rate": 5.528853683983627e-06, "loss": 0.3427, "step": 25556 }, { "epoch": 2.598312322082147, "grad_norm": 0.2673795819282532, "learning_rate": 5.528500787389111e-06, "loss": 0.3468, "step": 25557 }, { "epoch": 2.598413989426596, "grad_norm": 0.2912805378437042, "learning_rate": 5.528147888132132e-06, "loss": 0.3419, "step": 25558 }, { "epoch": 2.598515656771045, "grad_norm": 0.31257930397987366, "learning_rate": 5.5277949862144705e-06, "loss": 0.3635, "step": 25559 }, { "epoch": 2.598617324115494, "grad_norm": 0.2630275785923004, "learning_rate": 5.5274420816379025e-06, "loss": 0.3465, "step": 25560 }, { "epoch": 2.5987189914599433, "grad_norm": 0.26321956515312195, "learning_rate": 5.5270891744042066e-06, "loss": 0.3566, "step": 25561 }, { "epoch": 2.5988206588043923, "grad_norm": 0.26241546869277954, "learning_rate": 5.526736264515161e-06, "loss": 0.3367, "step": 25562 }, { "epoch": 2.598922326148841, "grad_norm": 0.25349393486976624, "learning_rate": 5.526383351972543e-06, "loss": 0.3349, "step": 25563 }, { "epoch": 2.59902399349329, "grad_norm": 0.26733872294425964, "learning_rate": 5.526030436778131e-06, "loss": 0.3523, "step": 25564 }, { "epoch": 2.599125660837739, "grad_norm": 0.2641267478466034, "learning_rate": 5.525677518933704e-06, "loss": 0.342, "step": 25565 }, { "epoch": 2.599227328182188, "grad_norm": 0.2531397044658661, "learning_rate": 5.525324598441036e-06, "loss": 0.3118, "step": 25566 }, { "epoch": 2.599328995526637, "grad_norm": 0.2658684551715851, "learning_rate": 5.524971675301909e-06, "loss": 0.3225, "step": 25567 }, { "epoch": 2.599430662871086, "grad_norm": 0.26528066396713257, "learning_rate": 5.5246187495180984e-06, "loss": 0.3473, "step": 25568 }, { "epoch": 2.599532330215535, "grad_norm": 0.25789356231689453, "learning_rate": 5.524265821091385e-06, "loss": 0.3173, "step": 25569 }, { "epoch": 2.599633997559984, "grad_norm": 0.2684805393218994, "learning_rate": 5.523912890023544e-06, "loss": 0.351, "step": 25570 }, { "epoch": 2.5997356649044328, "grad_norm": 0.28895333409309387, "learning_rate": 5.523559956316353e-06, "loss": 0.3635, "step": 25571 }, { "epoch": 2.5998373322488817, "grad_norm": 0.26734405755996704, "learning_rate": 5.5232070199715934e-06, "loss": 0.3484, "step": 25572 }, { "epoch": 2.5999389995933306, "grad_norm": 0.27954983711242676, "learning_rate": 5.52285408099104e-06, "loss": 0.345, "step": 25573 }, { "epoch": 2.6000406669377796, "grad_norm": 0.2935657501220703, "learning_rate": 5.522501139376472e-06, "loss": 0.3194, "step": 25574 }, { "epoch": 2.6001423342822285, "grad_norm": 0.27992749214172363, "learning_rate": 5.522148195129669e-06, "loss": 0.3442, "step": 25575 }, { "epoch": 2.6002440016266775, "grad_norm": 0.2673935890197754, "learning_rate": 5.521795248252405e-06, "loss": 0.3212, "step": 25576 }, { "epoch": 2.6003456689711264, "grad_norm": 0.2585172653198242, "learning_rate": 5.521442298746463e-06, "loss": 0.3547, "step": 25577 }, { "epoch": 2.6004473363155753, "grad_norm": 0.2693099081516266, "learning_rate": 5.521089346613617e-06, "loss": 0.3119, "step": 25578 }, { "epoch": 2.6005490036600243, "grad_norm": 0.30757343769073486, "learning_rate": 5.520736391855648e-06, "loss": 0.3602, "step": 25579 }, { "epoch": 2.6006506710044732, "grad_norm": 0.28763121366500854, "learning_rate": 5.520383434474332e-06, "loss": 0.3831, "step": 25580 }, { "epoch": 2.600752338348922, "grad_norm": 0.2547074258327484, "learning_rate": 5.520030474471447e-06, "loss": 0.3288, "step": 25581 }, { "epoch": 2.600854005693371, "grad_norm": 0.25943684577941895, "learning_rate": 5.519677511848774e-06, "loss": 0.3721, "step": 25582 }, { "epoch": 2.60095567303782, "grad_norm": 0.27685093879699707, "learning_rate": 5.519324546608088e-06, "loss": 0.3152, "step": 25583 }, { "epoch": 2.601057340382269, "grad_norm": 0.28519585728645325, "learning_rate": 5.518971578751169e-06, "loss": 0.3424, "step": 25584 }, { "epoch": 2.601159007726718, "grad_norm": 0.2692842483520508, "learning_rate": 5.518618608279794e-06, "loss": 0.3368, "step": 25585 }, { "epoch": 2.601260675071167, "grad_norm": 0.2809993624687195, "learning_rate": 5.51826563519574e-06, "loss": 0.3086, "step": 25586 }, { "epoch": 2.6013623424156163, "grad_norm": 0.26261430978775024, "learning_rate": 5.517912659500789e-06, "loss": 0.3408, "step": 25587 }, { "epoch": 2.601464009760065, "grad_norm": 0.2650892436504364, "learning_rate": 5.517559681196718e-06, "loss": 0.3384, "step": 25588 }, { "epoch": 2.601565677104514, "grad_norm": 0.31041938066482544, "learning_rate": 5.517206700285302e-06, "loss": 0.3571, "step": 25589 }, { "epoch": 2.601667344448963, "grad_norm": 0.297748863697052, "learning_rate": 5.516853716768323e-06, "loss": 0.3249, "step": 25590 }, { "epoch": 2.601769011793412, "grad_norm": 0.2561883330345154, "learning_rate": 5.516500730647556e-06, "loss": 0.2929, "step": 25591 }, { "epoch": 2.601870679137861, "grad_norm": 0.2799752950668335, "learning_rate": 5.516147741924784e-06, "loss": 0.3079, "step": 25592 }, { "epoch": 2.60197234648231, "grad_norm": 0.26906710863113403, "learning_rate": 5.515794750601781e-06, "loss": 0.3384, "step": 25593 }, { "epoch": 2.602074013826759, "grad_norm": 0.28601887822151184, "learning_rate": 5.515441756680325e-06, "loss": 0.3542, "step": 25594 }, { "epoch": 2.602175681171208, "grad_norm": 0.25737857818603516, "learning_rate": 5.515088760162198e-06, "loss": 0.3397, "step": 25595 }, { "epoch": 2.6022773485156567, "grad_norm": 0.26852914690971375, "learning_rate": 5.514735761049176e-06, "loss": 0.3519, "step": 25596 }, { "epoch": 2.6023790158601057, "grad_norm": 0.2643892467021942, "learning_rate": 5.514382759343036e-06, "loss": 0.3612, "step": 25597 }, { "epoch": 2.6024806832045546, "grad_norm": 0.2570754885673523, "learning_rate": 5.51402975504556e-06, "loss": 0.3312, "step": 25598 }, { "epoch": 2.6025823505490036, "grad_norm": 0.2580625116825104, "learning_rate": 5.513676748158523e-06, "loss": 0.3458, "step": 25599 }, { "epoch": 2.6026840178934525, "grad_norm": 0.29427212476730347, "learning_rate": 5.513323738683704e-06, "loss": 0.3544, "step": 25600 }, { "epoch": 2.6027856852379014, "grad_norm": 0.26581454277038574, "learning_rate": 5.512970726622884e-06, "loss": 0.3352, "step": 25601 }, { "epoch": 2.602887352582351, "grad_norm": 0.2518477737903595, "learning_rate": 5.512617711977837e-06, "loss": 0.2966, "step": 25602 }, { "epoch": 2.6029890199267998, "grad_norm": 0.24580422043800354, "learning_rate": 5.512264694750346e-06, "loss": 0.3385, "step": 25603 }, { "epoch": 2.6030906872712487, "grad_norm": 0.26301440596580505, "learning_rate": 5.5119116749421856e-06, "loss": 0.3018, "step": 25604 }, { "epoch": 2.6031923546156976, "grad_norm": 0.25387269258499146, "learning_rate": 5.511558652555137e-06, "loss": 0.3146, "step": 25605 }, { "epoch": 2.6032940219601466, "grad_norm": 0.2950826585292816, "learning_rate": 5.511205627590977e-06, "loss": 0.3444, "step": 25606 }, { "epoch": 2.6033956893045955, "grad_norm": 0.27494120597839355, "learning_rate": 5.510852600051483e-06, "loss": 0.3299, "step": 25607 }, { "epoch": 2.6034973566490445, "grad_norm": 0.2744990885257721, "learning_rate": 5.510499569938438e-06, "loss": 0.3402, "step": 25608 }, { "epoch": 2.6035990239934934, "grad_norm": 0.2631710469722748, "learning_rate": 5.510146537253614e-06, "loss": 0.351, "step": 25609 }, { "epoch": 2.6037006913379424, "grad_norm": 0.2796623706817627, "learning_rate": 5.509793501998796e-06, "loss": 0.357, "step": 25610 }, { "epoch": 2.6038023586823913, "grad_norm": 0.27593398094177246, "learning_rate": 5.509440464175758e-06, "loss": 0.3308, "step": 25611 }, { "epoch": 2.6039040260268402, "grad_norm": 0.26017001271247864, "learning_rate": 5.5090874237862805e-06, "loss": 0.3578, "step": 25612 }, { "epoch": 2.604005693371289, "grad_norm": 0.28140372037887573, "learning_rate": 5.508734380832141e-06, "loss": 0.3574, "step": 25613 }, { "epoch": 2.604107360715738, "grad_norm": 0.2772383391857147, "learning_rate": 5.508381335315119e-06, "loss": 0.3374, "step": 25614 }, { "epoch": 2.604209028060187, "grad_norm": 0.2678217887878418, "learning_rate": 5.5080282872369925e-06, "loss": 0.3612, "step": 25615 }, { "epoch": 2.604310695404636, "grad_norm": 0.2712008059024811, "learning_rate": 5.507675236599541e-06, "loss": 0.3133, "step": 25616 }, { "epoch": 2.604412362749085, "grad_norm": 0.25181522965431213, "learning_rate": 5.507322183404541e-06, "loss": 0.3174, "step": 25617 }, { "epoch": 2.604514030093534, "grad_norm": 0.24656762182712555, "learning_rate": 5.506969127653774e-06, "loss": 0.3174, "step": 25618 }, { "epoch": 2.604615697437983, "grad_norm": 0.264274001121521, "learning_rate": 5.506616069349015e-06, "loss": 0.3287, "step": 25619 }, { "epoch": 2.6047173647824318, "grad_norm": 0.26827096939086914, "learning_rate": 5.506263008492046e-06, "loss": 0.3536, "step": 25620 }, { "epoch": 2.6048190321268807, "grad_norm": 0.26821959018707275, "learning_rate": 5.505909945084644e-06, "loss": 0.3398, "step": 25621 }, { "epoch": 2.6049206994713296, "grad_norm": 0.24995988607406616, "learning_rate": 5.5055568791285875e-06, "loss": 0.3529, "step": 25622 }, { "epoch": 2.6050223668157786, "grad_norm": 0.30067047476768494, "learning_rate": 5.505203810625657e-06, "loss": 0.3222, "step": 25623 }, { "epoch": 2.6051240341602275, "grad_norm": 0.2871939241886139, "learning_rate": 5.504850739577627e-06, "loss": 0.3425, "step": 25624 }, { "epoch": 2.6052257015046765, "grad_norm": 0.2658213675022125, "learning_rate": 5.504497665986282e-06, "loss": 0.3356, "step": 25625 }, { "epoch": 2.6053273688491254, "grad_norm": 0.2705894410610199, "learning_rate": 5.504144589853395e-06, "loss": 0.3001, "step": 25626 }, { "epoch": 2.6054290361935744, "grad_norm": 0.28241556882858276, "learning_rate": 5.503791511180749e-06, "loss": 0.3275, "step": 25627 }, { "epoch": 2.6055307035380237, "grad_norm": 0.288301557302475, "learning_rate": 5.503438429970121e-06, "loss": 0.3356, "step": 25628 }, { "epoch": 2.6056323708824727, "grad_norm": 0.2977510392665863, "learning_rate": 5.503085346223289e-06, "loss": 0.3407, "step": 25629 }, { "epoch": 2.6057340382269216, "grad_norm": 0.26076823472976685, "learning_rate": 5.502732259942034e-06, "loss": 0.3098, "step": 25630 }, { "epoch": 2.6058357055713706, "grad_norm": 0.27190962433815, "learning_rate": 5.5023791711281315e-06, "loss": 0.3289, "step": 25631 }, { "epoch": 2.6059373729158195, "grad_norm": 0.29626551270484924, "learning_rate": 5.5020260797833634e-06, "loss": 0.3199, "step": 25632 }, { "epoch": 2.6060390402602684, "grad_norm": 0.29453790187835693, "learning_rate": 5.5016729859095075e-06, "loss": 0.3339, "step": 25633 }, { "epoch": 2.6061407076047174, "grad_norm": 0.2656539976596832, "learning_rate": 5.501319889508341e-06, "loss": 0.3428, "step": 25634 }, { "epoch": 2.6062423749491663, "grad_norm": 0.2618473768234253, "learning_rate": 5.500966790581645e-06, "loss": 0.3176, "step": 25635 }, { "epoch": 2.6063440422936153, "grad_norm": 0.28407689929008484, "learning_rate": 5.500613689131196e-06, "loss": 0.3114, "step": 25636 }, { "epoch": 2.606445709638064, "grad_norm": 0.2497505098581314, "learning_rate": 5.5002605851587765e-06, "loss": 0.3011, "step": 25637 }, { "epoch": 2.606547376982513, "grad_norm": 0.26109254360198975, "learning_rate": 5.4999074786661625e-06, "loss": 0.3435, "step": 25638 }, { "epoch": 2.606649044326962, "grad_norm": 0.27840667963027954, "learning_rate": 5.4995543696551315e-06, "loss": 0.3507, "step": 25639 }, { "epoch": 2.606750711671411, "grad_norm": 0.2755070924758911, "learning_rate": 5.4992012581274665e-06, "loss": 0.3398, "step": 25640 }, { "epoch": 2.60685237901586, "grad_norm": 0.25221535563468933, "learning_rate": 5.498848144084944e-06, "loss": 0.3445, "step": 25641 }, { "epoch": 2.606954046360309, "grad_norm": 0.24553464353084564, "learning_rate": 5.49849502752934e-06, "loss": 0.3104, "step": 25642 }, { "epoch": 2.6070557137047583, "grad_norm": 0.2684628367424011, "learning_rate": 5.49814190846244e-06, "loss": 0.3474, "step": 25643 }, { "epoch": 2.6071573810492072, "grad_norm": 0.26561975479125977, "learning_rate": 5.497788786886018e-06, "loss": 0.3534, "step": 25644 }, { "epoch": 2.607259048393656, "grad_norm": 0.2613207697868347, "learning_rate": 5.497435662801856e-06, "loss": 0.3593, "step": 25645 }, { "epoch": 2.607360715738105, "grad_norm": 0.2568790316581726, "learning_rate": 5.49708253621173e-06, "loss": 0.3153, "step": 25646 }, { "epoch": 2.607462383082554, "grad_norm": 0.26566794514656067, "learning_rate": 5.496729407117419e-06, "loss": 0.3253, "step": 25647 }, { "epoch": 2.607564050427003, "grad_norm": 0.267050176858902, "learning_rate": 5.496376275520705e-06, "loss": 0.3104, "step": 25648 }, { "epoch": 2.607665717771452, "grad_norm": 0.25315427780151367, "learning_rate": 5.496023141423364e-06, "loss": 0.3323, "step": 25649 }, { "epoch": 2.607767385115901, "grad_norm": 0.3190235495567322, "learning_rate": 5.495670004827178e-06, "loss": 0.315, "step": 25650 }, { "epoch": 2.60786905246035, "grad_norm": 0.2587541937828064, "learning_rate": 5.495316865733924e-06, "loss": 0.3031, "step": 25651 }, { "epoch": 2.6079707198047988, "grad_norm": 0.26904287934303284, "learning_rate": 5.4949637241453805e-06, "loss": 0.3082, "step": 25652 }, { "epoch": 2.6080723871492477, "grad_norm": 0.263332724571228, "learning_rate": 5.494610580063327e-06, "loss": 0.3522, "step": 25653 }, { "epoch": 2.6081740544936967, "grad_norm": 0.27672505378723145, "learning_rate": 5.494257433489544e-06, "loss": 0.3351, "step": 25654 }, { "epoch": 2.6082757218381456, "grad_norm": 0.282272607088089, "learning_rate": 5.493904284425809e-06, "loss": 0.3147, "step": 25655 }, { "epoch": 2.6083773891825945, "grad_norm": 0.27124205231666565, "learning_rate": 5.493551132873903e-06, "loss": 0.3172, "step": 25656 }, { "epoch": 2.6084790565270435, "grad_norm": 0.2658667266368866, "learning_rate": 5.493197978835601e-06, "loss": 0.3404, "step": 25657 }, { "epoch": 2.6085807238714924, "grad_norm": 0.26964080333709717, "learning_rate": 5.492844822312686e-06, "loss": 0.3206, "step": 25658 }, { "epoch": 2.6086823912159414, "grad_norm": 0.2874061167240143, "learning_rate": 5.492491663306937e-06, "loss": 0.3448, "step": 25659 }, { "epoch": 2.6087840585603903, "grad_norm": 0.2902132272720337, "learning_rate": 5.492138501820131e-06, "loss": 0.3255, "step": 25660 }, { "epoch": 2.6088857259048392, "grad_norm": 0.29444223642349243, "learning_rate": 5.491785337854049e-06, "loss": 0.3457, "step": 25661 }, { "epoch": 2.608987393249288, "grad_norm": 0.2553868591785431, "learning_rate": 5.491432171410467e-06, "loss": 0.3203, "step": 25662 }, { "epoch": 2.609089060593737, "grad_norm": 0.25255492329597473, "learning_rate": 5.491079002491169e-06, "loss": 0.2997, "step": 25663 }, { "epoch": 2.609190727938186, "grad_norm": 0.25806164741516113, "learning_rate": 5.490725831097931e-06, "loss": 0.3557, "step": 25664 }, { "epoch": 2.609292395282635, "grad_norm": 0.2792217433452606, "learning_rate": 5.490372657232532e-06, "loss": 0.3363, "step": 25665 }, { "epoch": 2.609394062627084, "grad_norm": 0.2611536979675293, "learning_rate": 5.490019480896754e-06, "loss": 0.333, "step": 25666 }, { "epoch": 2.609495729971533, "grad_norm": 0.26462534070014954, "learning_rate": 5.4896663020923716e-06, "loss": 0.3303, "step": 25667 }, { "epoch": 2.609597397315982, "grad_norm": 0.3008977770805359, "learning_rate": 5.489313120821169e-06, "loss": 0.3418, "step": 25668 }, { "epoch": 2.6096990646604312, "grad_norm": 0.2747044861316681, "learning_rate": 5.488959937084924e-06, "loss": 0.3432, "step": 25669 }, { "epoch": 2.60980073200488, "grad_norm": 0.25441598892211914, "learning_rate": 5.488606750885412e-06, "loss": 0.3486, "step": 25670 }, { "epoch": 2.609902399349329, "grad_norm": 0.2752818167209625, "learning_rate": 5.488253562224417e-06, "loss": 0.3174, "step": 25671 }, { "epoch": 2.610004066693778, "grad_norm": 0.28516536951065063, "learning_rate": 5.487900371103715e-06, "loss": 0.3361, "step": 25672 }, { "epoch": 2.610105734038227, "grad_norm": 0.2656139135360718, "learning_rate": 5.487547177525089e-06, "loss": 0.3558, "step": 25673 }, { "epoch": 2.610207401382676, "grad_norm": 0.28133219480514526, "learning_rate": 5.487193981490317e-06, "loss": 0.3533, "step": 25674 }, { "epoch": 2.610309068727125, "grad_norm": 0.2827426791191101, "learning_rate": 5.486840783001175e-06, "loss": 0.3276, "step": 25675 }, { "epoch": 2.610410736071574, "grad_norm": 0.2581414580345154, "learning_rate": 5.486487582059446e-06, "loss": 0.3606, "step": 25676 }, { "epoch": 2.6105124034160228, "grad_norm": 0.265598863363266, "learning_rate": 5.486134378666906e-06, "loss": 0.3178, "step": 25677 }, { "epoch": 2.6106140707604717, "grad_norm": 0.29729798436164856, "learning_rate": 5.4857811728253395e-06, "loss": 0.3353, "step": 25678 }, { "epoch": 2.6107157381049206, "grad_norm": 0.26476243138313293, "learning_rate": 5.485427964536522e-06, "loss": 0.3401, "step": 25679 }, { "epoch": 2.6108174054493696, "grad_norm": 0.27833759784698486, "learning_rate": 5.485074753802233e-06, "loss": 0.3362, "step": 25680 }, { "epoch": 2.6109190727938185, "grad_norm": 0.2607002854347229, "learning_rate": 5.484721540624254e-06, "loss": 0.3317, "step": 25681 }, { "epoch": 2.6110207401382675, "grad_norm": 0.27427852153778076, "learning_rate": 5.484368325004362e-06, "loss": 0.3071, "step": 25682 }, { "epoch": 2.6111224074827164, "grad_norm": 0.251733660697937, "learning_rate": 5.4840151069443374e-06, "loss": 0.3468, "step": 25683 }, { "epoch": 2.611224074827166, "grad_norm": 0.2552911341190338, "learning_rate": 5.48366188644596e-06, "loss": 0.3269, "step": 25684 }, { "epoch": 2.6113257421716147, "grad_norm": 0.24340102076530457, "learning_rate": 5.4833086635110085e-06, "loss": 0.3916, "step": 25685 }, { "epoch": 2.6114274095160637, "grad_norm": 0.29134613275527954, "learning_rate": 5.482955438141264e-06, "loss": 0.3063, "step": 25686 }, { "epoch": 2.6115290768605126, "grad_norm": 0.2546899616718292, "learning_rate": 5.482602210338503e-06, "loss": 0.3289, "step": 25687 }, { "epoch": 2.6116307442049616, "grad_norm": 0.269334614276886, "learning_rate": 5.482248980104509e-06, "loss": 0.3017, "step": 25688 }, { "epoch": 2.6117324115494105, "grad_norm": 0.27368152141571045, "learning_rate": 5.4818957474410584e-06, "loss": 0.3433, "step": 25689 }, { "epoch": 2.6118340788938594, "grad_norm": 0.262373149394989, "learning_rate": 5.48154251234993e-06, "loss": 0.3147, "step": 25690 }, { "epoch": 2.6119357462383084, "grad_norm": 0.2777891457080841, "learning_rate": 5.481189274832908e-06, "loss": 0.3285, "step": 25691 }, { "epoch": 2.6120374135827573, "grad_norm": 0.28048214316368103, "learning_rate": 5.480836034891766e-06, "loss": 0.3491, "step": 25692 }, { "epoch": 2.6121390809272063, "grad_norm": 0.28575339913368225, "learning_rate": 5.4804827925282865e-06, "loss": 0.3402, "step": 25693 }, { "epoch": 2.612240748271655, "grad_norm": 0.27644580602645874, "learning_rate": 5.48012954774425e-06, "loss": 0.3167, "step": 25694 }, { "epoch": 2.612342415616104, "grad_norm": 0.2464359849691391, "learning_rate": 5.479776300541434e-06, "loss": 0.3119, "step": 25695 }, { "epoch": 2.612444082960553, "grad_norm": 0.24853086471557617, "learning_rate": 5.479423050921621e-06, "loss": 0.3349, "step": 25696 }, { "epoch": 2.612545750305002, "grad_norm": 0.2685369849205017, "learning_rate": 5.479069798886586e-06, "loss": 0.3318, "step": 25697 }, { "epoch": 2.612647417649451, "grad_norm": 0.2541504204273224, "learning_rate": 5.478716544438112e-06, "loss": 0.3317, "step": 25698 }, { "epoch": 2.6127490849939, "grad_norm": 0.26948192715644836, "learning_rate": 5.478363287577978e-06, "loss": 0.3279, "step": 25699 }, { "epoch": 2.612850752338349, "grad_norm": 0.2524186968803406, "learning_rate": 5.478010028307963e-06, "loss": 0.3232, "step": 25700 }, { "epoch": 2.612952419682798, "grad_norm": 0.26755890250205994, "learning_rate": 5.477656766629849e-06, "loss": 0.3269, "step": 25701 }, { "epoch": 2.6130540870272467, "grad_norm": 0.2719729244709015, "learning_rate": 5.4773035025454116e-06, "loss": 0.3196, "step": 25702 }, { "epoch": 2.6131557543716957, "grad_norm": 0.27909255027770996, "learning_rate": 5.476950236056433e-06, "loss": 0.3592, "step": 25703 }, { "epoch": 2.6132574217161446, "grad_norm": 0.27594202756881714, "learning_rate": 5.476596967164694e-06, "loss": 0.3194, "step": 25704 }, { "epoch": 2.6133590890605936, "grad_norm": 0.24725447595119476, "learning_rate": 5.47624369587197e-06, "loss": 0.3278, "step": 25705 }, { "epoch": 2.6134607564050425, "grad_norm": 0.2692319452762604, "learning_rate": 5.475890422180046e-06, "loss": 0.3446, "step": 25706 }, { "epoch": 2.6135624237494914, "grad_norm": 0.2694965600967407, "learning_rate": 5.475537146090697e-06, "loss": 0.3184, "step": 25707 }, { "epoch": 2.6136640910939404, "grad_norm": 0.28158068656921387, "learning_rate": 5.475183867605705e-06, "loss": 0.3367, "step": 25708 }, { "epoch": 2.6137657584383893, "grad_norm": 0.2656289041042328, "learning_rate": 5.474830586726851e-06, "loss": 0.4147, "step": 25709 }, { "epoch": 2.6138674257828387, "grad_norm": 0.2556171119213104, "learning_rate": 5.474477303455913e-06, "loss": 0.3497, "step": 25710 }, { "epoch": 2.6139690931272876, "grad_norm": 0.2781803011894226, "learning_rate": 5.474124017794672e-06, "loss": 0.3275, "step": 25711 }, { "epoch": 2.6140707604717366, "grad_norm": 0.26897087693214417, "learning_rate": 5.473770729744905e-06, "loss": 0.3218, "step": 25712 }, { "epoch": 2.6141724278161855, "grad_norm": 0.28260937333106995, "learning_rate": 5.473417439308395e-06, "loss": 0.34, "step": 25713 }, { "epoch": 2.6142740951606345, "grad_norm": 0.2753918170928955, "learning_rate": 5.47306414648692e-06, "loss": 0.3321, "step": 25714 }, { "epoch": 2.6143757625050834, "grad_norm": 0.2960081100463867, "learning_rate": 5.472710851282259e-06, "loss": 0.3058, "step": 25715 }, { "epoch": 2.6144774298495324, "grad_norm": 0.26259222626686096, "learning_rate": 5.472357553696196e-06, "loss": 0.35, "step": 25716 }, { "epoch": 2.6145790971939813, "grad_norm": 0.271085649728775, "learning_rate": 5.472004253730506e-06, "loss": 0.334, "step": 25717 }, { "epoch": 2.6146807645384302, "grad_norm": 0.26624077558517456, "learning_rate": 5.47165095138697e-06, "loss": 0.3317, "step": 25718 }, { "epoch": 2.614782431882879, "grad_norm": 0.26516082882881165, "learning_rate": 5.471297646667369e-06, "loss": 0.3145, "step": 25719 }, { "epoch": 2.614884099227328, "grad_norm": 0.2887488007545471, "learning_rate": 5.470944339573483e-06, "loss": 0.3242, "step": 25720 }, { "epoch": 2.614985766571777, "grad_norm": 0.2778054177761078, "learning_rate": 5.470591030107092e-06, "loss": 0.3272, "step": 25721 }, { "epoch": 2.615087433916226, "grad_norm": 0.2606601417064667, "learning_rate": 5.470237718269975e-06, "loss": 0.3359, "step": 25722 }, { "epoch": 2.615189101260675, "grad_norm": 0.2581822872161865, "learning_rate": 5.46988440406391e-06, "loss": 0.3614, "step": 25723 }, { "epoch": 2.615290768605124, "grad_norm": 0.2696126103401184, "learning_rate": 5.469531087490682e-06, "loss": 0.3395, "step": 25724 }, { "epoch": 2.6153924359495733, "grad_norm": 0.27153918147087097, "learning_rate": 5.4691777685520655e-06, "loss": 0.3548, "step": 25725 }, { "epoch": 2.615494103294022, "grad_norm": 0.29939186573028564, "learning_rate": 5.4688244472498446e-06, "loss": 0.3478, "step": 25726 }, { "epoch": 2.615595770638471, "grad_norm": 0.2729816138744354, "learning_rate": 5.468471123585798e-06, "loss": 0.3364, "step": 25727 }, { "epoch": 2.61569743798292, "grad_norm": 0.25859761238098145, "learning_rate": 5.468117797561702e-06, "loss": 0.3155, "step": 25728 }, { "epoch": 2.615799105327369, "grad_norm": 0.24337345361709595, "learning_rate": 5.467764469179343e-06, "loss": 0.3322, "step": 25729 }, { "epoch": 2.615900772671818, "grad_norm": 0.2806481719017029, "learning_rate": 5.467411138440496e-06, "loss": 0.3275, "step": 25730 }, { "epoch": 2.616002440016267, "grad_norm": 0.24664543569087982, "learning_rate": 5.467057805346944e-06, "loss": 0.3302, "step": 25731 }, { "epoch": 2.616104107360716, "grad_norm": 0.2914583384990692, "learning_rate": 5.466704469900466e-06, "loss": 0.3176, "step": 25732 }, { "epoch": 2.616205774705165, "grad_norm": 0.27327364683151245, "learning_rate": 5.466351132102839e-06, "loss": 0.3272, "step": 25733 }, { "epoch": 2.6163074420496137, "grad_norm": 0.2643086016178131, "learning_rate": 5.465997791955848e-06, "loss": 0.3269, "step": 25734 }, { "epoch": 2.6164091093940627, "grad_norm": 0.2505994141101837, "learning_rate": 5.465644449461272e-06, "loss": 0.3464, "step": 25735 }, { "epoch": 2.6165107767385116, "grad_norm": 0.24487942457199097, "learning_rate": 5.465291104620888e-06, "loss": 0.3284, "step": 25736 }, { "epoch": 2.6166124440829606, "grad_norm": 0.2675507366657257, "learning_rate": 5.464937757436479e-06, "loss": 0.3094, "step": 25737 }, { "epoch": 2.6167141114274095, "grad_norm": 0.29102450609207153, "learning_rate": 5.464584407909822e-06, "loss": 0.3639, "step": 25738 }, { "epoch": 2.6168157787718584, "grad_norm": 0.2649047374725342, "learning_rate": 5.464231056042702e-06, "loss": 0.3273, "step": 25739 }, { "epoch": 2.6169174461163074, "grad_norm": 0.2667420506477356, "learning_rate": 5.463877701836895e-06, "loss": 0.3207, "step": 25740 }, { "epoch": 2.6170191134607563, "grad_norm": 0.28041332960128784, "learning_rate": 5.463524345294182e-06, "loss": 0.2978, "step": 25741 }, { "epoch": 2.6171207808052053, "grad_norm": 0.26621973514556885, "learning_rate": 5.4631709864163455e-06, "loss": 0.3282, "step": 25742 }, { "epoch": 2.617222448149654, "grad_norm": 0.29307281970977783, "learning_rate": 5.46281762520516e-06, "loss": 0.3598, "step": 25743 }, { "epoch": 2.617324115494103, "grad_norm": 0.261464923620224, "learning_rate": 5.462464261662412e-06, "loss": 0.3327, "step": 25744 }, { "epoch": 2.617425782838552, "grad_norm": 0.26279398798942566, "learning_rate": 5.4621108957898786e-06, "loss": 0.3548, "step": 25745 }, { "epoch": 2.617527450183001, "grad_norm": 0.2580380439758301, "learning_rate": 5.46175752758934e-06, "loss": 0.327, "step": 25746 }, { "epoch": 2.61762911752745, "grad_norm": 0.2576411962509155, "learning_rate": 5.461404157062577e-06, "loss": 0.3154, "step": 25747 }, { "epoch": 2.617730784871899, "grad_norm": 0.27309948205947876, "learning_rate": 5.461050784211368e-06, "loss": 0.3229, "step": 25748 }, { "epoch": 2.617832452216348, "grad_norm": 0.23497499525547028, "learning_rate": 5.460697409037497e-06, "loss": 0.3136, "step": 25749 }, { "epoch": 2.617934119560797, "grad_norm": 0.29069676995277405, "learning_rate": 5.46034403154274e-06, "loss": 0.327, "step": 25750 }, { "epoch": 2.618035786905246, "grad_norm": 0.28655946254730225, "learning_rate": 5.459990651728881e-06, "loss": 0.3294, "step": 25751 }, { "epoch": 2.618137454249695, "grad_norm": 0.283243328332901, "learning_rate": 5.4596372695976976e-06, "loss": 0.3761, "step": 25752 }, { "epoch": 2.618239121594144, "grad_norm": 0.2617165148258209, "learning_rate": 5.45928388515097e-06, "loss": 0.3334, "step": 25753 }, { "epoch": 2.618340788938593, "grad_norm": 0.2681322395801544, "learning_rate": 5.458930498390479e-06, "loss": 0.3093, "step": 25754 }, { "epoch": 2.618442456283042, "grad_norm": 0.2719676196575165, "learning_rate": 5.458577109318007e-06, "loss": 0.3368, "step": 25755 }, { "epoch": 2.618544123627491, "grad_norm": 0.2566301226615906, "learning_rate": 5.458223717935331e-06, "loss": 0.3554, "step": 25756 }, { "epoch": 2.61864579097194, "grad_norm": 0.27238601446151733, "learning_rate": 5.4578703242442345e-06, "loss": 0.3384, "step": 25757 }, { "epoch": 2.6187474583163888, "grad_norm": 0.28348299860954285, "learning_rate": 5.4575169282464945e-06, "loss": 0.3463, "step": 25758 }, { "epoch": 2.6188491256608377, "grad_norm": 0.30497100949287415, "learning_rate": 5.457163529943893e-06, "loss": 0.3263, "step": 25759 }, { "epoch": 2.6189507930052867, "grad_norm": 0.266217976808548, "learning_rate": 5.4568101293382116e-06, "loss": 0.334, "step": 25760 }, { "epoch": 2.6190524603497356, "grad_norm": 0.2829258441925049, "learning_rate": 5.456456726431229e-06, "loss": 0.3368, "step": 25761 }, { "epoch": 2.6191541276941845, "grad_norm": 0.2850056290626526, "learning_rate": 5.456103321224725e-06, "loss": 0.3482, "step": 25762 }, { "epoch": 2.6192557950386335, "grad_norm": 0.2520999312400818, "learning_rate": 5.455749913720481e-06, "loss": 0.3304, "step": 25763 }, { "epoch": 2.6193574623830824, "grad_norm": 0.263126403093338, "learning_rate": 5.455396503920278e-06, "loss": 0.3184, "step": 25764 }, { "epoch": 2.6194591297275314, "grad_norm": 0.2727906107902527, "learning_rate": 5.455043091825895e-06, "loss": 0.3011, "step": 25765 }, { "epoch": 2.6195607970719808, "grad_norm": 0.2576979100704193, "learning_rate": 5.454689677439113e-06, "loss": 0.3366, "step": 25766 }, { "epoch": 2.6196624644164297, "grad_norm": 0.2751666307449341, "learning_rate": 5.454336260761713e-06, "loss": 0.3337, "step": 25767 }, { "epoch": 2.6197641317608786, "grad_norm": 0.2763483226299286, "learning_rate": 5.453982841795475e-06, "loss": 0.3357, "step": 25768 }, { "epoch": 2.6198657991053276, "grad_norm": 0.24617281556129456, "learning_rate": 5.453629420542178e-06, "loss": 0.3206, "step": 25769 }, { "epoch": 2.6199674664497765, "grad_norm": 0.2883400022983551, "learning_rate": 5.453275997003605e-06, "loss": 0.3276, "step": 25770 }, { "epoch": 2.6200691337942255, "grad_norm": 0.286507785320282, "learning_rate": 5.452922571181535e-06, "loss": 0.3515, "step": 25771 }, { "epoch": 2.6201708011386744, "grad_norm": 0.28631648421287537, "learning_rate": 5.452569143077748e-06, "loss": 0.3385, "step": 25772 }, { "epoch": 2.6202724684831233, "grad_norm": 0.26241534948349, "learning_rate": 5.452215712694025e-06, "loss": 0.316, "step": 25773 }, { "epoch": 2.6203741358275723, "grad_norm": 0.2720681130886078, "learning_rate": 5.451862280032149e-06, "loss": 0.3095, "step": 25774 }, { "epoch": 2.6204758031720212, "grad_norm": 0.28205522894859314, "learning_rate": 5.451508845093897e-06, "loss": 0.3101, "step": 25775 }, { "epoch": 2.62057747051647, "grad_norm": 0.2610929608345032, "learning_rate": 5.451155407881049e-06, "loss": 0.2978, "step": 25776 }, { "epoch": 2.620679137860919, "grad_norm": 0.26774102449417114, "learning_rate": 5.45080196839539e-06, "loss": 0.2911, "step": 25777 }, { "epoch": 2.620780805205368, "grad_norm": 0.2760162353515625, "learning_rate": 5.450448526638696e-06, "loss": 0.3063, "step": 25778 }, { "epoch": 2.620882472549817, "grad_norm": 0.25815239548683167, "learning_rate": 5.45009508261275e-06, "loss": 0.3288, "step": 25779 }, { "epoch": 2.620984139894266, "grad_norm": 0.25635915994644165, "learning_rate": 5.449741636319333e-06, "loss": 0.3422, "step": 25780 }, { "epoch": 2.621085807238715, "grad_norm": 0.2657451033592224, "learning_rate": 5.449388187760222e-06, "loss": 0.3158, "step": 25781 }, { "epoch": 2.621187474583164, "grad_norm": 0.2526172399520874, "learning_rate": 5.449034736937202e-06, "loss": 0.2898, "step": 25782 }, { "epoch": 2.6212891419276128, "grad_norm": 0.2977982461452484, "learning_rate": 5.448681283852051e-06, "loss": 0.3411, "step": 25783 }, { "epoch": 2.6213908092720617, "grad_norm": 0.3095259964466095, "learning_rate": 5.44832782850655e-06, "loss": 0.3668, "step": 25784 }, { "epoch": 2.6214924766165106, "grad_norm": 0.28571808338165283, "learning_rate": 5.44797437090248e-06, "loss": 0.3446, "step": 25785 }, { "epoch": 2.6215941439609596, "grad_norm": 0.28491124510765076, "learning_rate": 5.447620911041623e-06, "loss": 0.3594, "step": 25786 }, { "epoch": 2.6216958113054085, "grad_norm": 0.26605361700057983, "learning_rate": 5.447267448925757e-06, "loss": 0.3344, "step": 25787 }, { "epoch": 2.6217974786498575, "grad_norm": 0.2844627797603607, "learning_rate": 5.4469139845566635e-06, "loss": 0.3264, "step": 25788 }, { "epoch": 2.6218991459943064, "grad_norm": 0.2786499261856079, "learning_rate": 5.446560517936123e-06, "loss": 0.3166, "step": 25789 }, { "epoch": 2.6220008133387553, "grad_norm": 0.2986462414264679, "learning_rate": 5.446207049065919e-06, "loss": 0.3247, "step": 25790 }, { "epoch": 2.6221024806832043, "grad_norm": 0.2575642168521881, "learning_rate": 5.4458535779478285e-06, "loss": 0.3735, "step": 25791 }, { "epoch": 2.6222041480276537, "grad_norm": 0.280965656042099, "learning_rate": 5.445500104583634e-06, "loss": 0.3766, "step": 25792 }, { "epoch": 2.6223058153721026, "grad_norm": 0.2770288288593292, "learning_rate": 5.445146628975116e-06, "loss": 0.3085, "step": 25793 }, { "epoch": 2.6224074827165516, "grad_norm": 0.2717093527317047, "learning_rate": 5.444793151124054e-06, "loss": 0.3365, "step": 25794 }, { "epoch": 2.6225091500610005, "grad_norm": 0.2725970447063446, "learning_rate": 5.444439671032231e-06, "loss": 0.3111, "step": 25795 }, { "epoch": 2.6226108174054494, "grad_norm": 0.2730730473995209, "learning_rate": 5.444086188701426e-06, "loss": 0.3274, "step": 25796 }, { "epoch": 2.6227124847498984, "grad_norm": 0.2700171172618866, "learning_rate": 5.4437327041334205e-06, "loss": 0.3206, "step": 25797 }, { "epoch": 2.6228141520943473, "grad_norm": 0.277383953332901, "learning_rate": 5.443379217329996e-06, "loss": 0.3597, "step": 25798 }, { "epoch": 2.6229158194387963, "grad_norm": 0.26287421584129333, "learning_rate": 5.44302572829293e-06, "loss": 0.3335, "step": 25799 }, { "epoch": 2.623017486783245, "grad_norm": 0.2714303731918335, "learning_rate": 5.442672237024008e-06, "loss": 0.3483, "step": 25800 }, { "epoch": 2.623119154127694, "grad_norm": 0.2732263505458832, "learning_rate": 5.4423187435250066e-06, "loss": 0.3074, "step": 25801 }, { "epoch": 2.623220821472143, "grad_norm": 0.2872193455696106, "learning_rate": 5.441965247797711e-06, "loss": 0.3359, "step": 25802 }, { "epoch": 2.623322488816592, "grad_norm": 0.3022441565990448, "learning_rate": 5.441611749843898e-06, "loss": 0.3516, "step": 25803 }, { "epoch": 2.623424156161041, "grad_norm": 0.282709538936615, "learning_rate": 5.441258249665349e-06, "loss": 0.3759, "step": 25804 }, { "epoch": 2.62352582350549, "grad_norm": 0.24018408358097076, "learning_rate": 5.440904747263847e-06, "loss": 0.3209, "step": 25805 }, { "epoch": 2.623627490849939, "grad_norm": 0.25008732080459595, "learning_rate": 5.440551242641171e-06, "loss": 0.3303, "step": 25806 }, { "epoch": 2.6237291581943882, "grad_norm": 0.2720007002353668, "learning_rate": 5.4401977357991035e-06, "loss": 0.3266, "step": 25807 }, { "epoch": 2.623830825538837, "grad_norm": 0.26154324412345886, "learning_rate": 5.439844226739424e-06, "loss": 0.3345, "step": 25808 }, { "epoch": 2.623932492883286, "grad_norm": 0.2839494049549103, "learning_rate": 5.439490715463912e-06, "loss": 0.3486, "step": 25809 }, { "epoch": 2.624034160227735, "grad_norm": 0.2696051597595215, "learning_rate": 5.439137201974353e-06, "loss": 0.3232, "step": 25810 }, { "epoch": 2.624135827572184, "grad_norm": 0.25923627614974976, "learning_rate": 5.438783686272524e-06, "loss": 0.3408, "step": 25811 }, { "epoch": 2.624237494916633, "grad_norm": 0.2634279727935791, "learning_rate": 5.438430168360207e-06, "loss": 0.3383, "step": 25812 }, { "epoch": 2.624339162261082, "grad_norm": 0.25870898365974426, "learning_rate": 5.438076648239183e-06, "loss": 0.3523, "step": 25813 }, { "epoch": 2.624440829605531, "grad_norm": 0.25659361481666565, "learning_rate": 5.437723125911232e-06, "loss": 0.3398, "step": 25814 }, { "epoch": 2.6245424969499798, "grad_norm": 0.25875723361968994, "learning_rate": 5.4373696013781375e-06, "loss": 0.3392, "step": 25815 }, { "epoch": 2.6246441642944287, "grad_norm": 0.27020835876464844, "learning_rate": 5.4370160746416786e-06, "loss": 0.3666, "step": 25816 }, { "epoch": 2.6247458316388776, "grad_norm": 0.2616709768772125, "learning_rate": 5.436662545703636e-06, "loss": 0.3313, "step": 25817 }, { "epoch": 2.6248474989833266, "grad_norm": 0.26981857419013977, "learning_rate": 5.436309014565792e-06, "loss": 0.3192, "step": 25818 }, { "epoch": 2.6249491663277755, "grad_norm": 0.27414023876190186, "learning_rate": 5.435955481229925e-06, "loss": 0.3197, "step": 25819 }, { "epoch": 2.6250508336722245, "grad_norm": 0.2610689103603363, "learning_rate": 5.4356019456978195e-06, "loss": 0.3237, "step": 25820 }, { "epoch": 2.6251525010166734, "grad_norm": 0.2664659023284912, "learning_rate": 5.4352484079712554e-06, "loss": 0.3177, "step": 25821 }, { "epoch": 2.6252541683611224, "grad_norm": 0.26155710220336914, "learning_rate": 5.434894868052012e-06, "loss": 0.3064, "step": 25822 }, { "epoch": 2.6253558357055713, "grad_norm": 0.26830217242240906, "learning_rate": 5.434541325941874e-06, "loss": 0.3275, "step": 25823 }, { "epoch": 2.6254575030500202, "grad_norm": 0.269641637802124, "learning_rate": 5.434187781642616e-06, "loss": 0.3365, "step": 25824 }, { "epoch": 2.625559170394469, "grad_norm": 0.2635703980922699, "learning_rate": 5.433834235156026e-06, "loss": 0.3156, "step": 25825 }, { "epoch": 2.625660837738918, "grad_norm": 0.2769971787929535, "learning_rate": 5.433480686483882e-06, "loss": 0.3415, "step": 25826 }, { "epoch": 2.625762505083367, "grad_norm": 0.27619364857673645, "learning_rate": 5.4331271356279635e-06, "loss": 0.3227, "step": 25827 }, { "epoch": 2.625864172427816, "grad_norm": 0.2589545249938965, "learning_rate": 5.432773582590057e-06, "loss": 0.2973, "step": 25828 }, { "epoch": 2.625965839772265, "grad_norm": 0.26053279638290405, "learning_rate": 5.432420027371936e-06, "loss": 0.3593, "step": 25829 }, { "epoch": 2.626067507116714, "grad_norm": 0.24407830834388733, "learning_rate": 5.4320664699753875e-06, "loss": 0.3617, "step": 25830 }, { "epoch": 2.626169174461163, "grad_norm": 0.282615602016449, "learning_rate": 5.431712910402191e-06, "loss": 0.2957, "step": 25831 }, { "epoch": 2.6262708418056118, "grad_norm": 0.2998599112033844, "learning_rate": 5.431359348654127e-06, "loss": 0.3626, "step": 25832 }, { "epoch": 2.626372509150061, "grad_norm": 0.29371219873428345, "learning_rate": 5.431005784732977e-06, "loss": 0.3285, "step": 25833 }, { "epoch": 2.62647417649451, "grad_norm": 0.2826572358608246, "learning_rate": 5.430652218640523e-06, "loss": 0.309, "step": 25834 }, { "epoch": 2.626575843838959, "grad_norm": 0.26670506596565247, "learning_rate": 5.430298650378545e-06, "loss": 0.3258, "step": 25835 }, { "epoch": 2.626677511183408, "grad_norm": 0.30316996574401855, "learning_rate": 5.429945079948825e-06, "loss": 0.3566, "step": 25836 }, { "epoch": 2.626779178527857, "grad_norm": 0.26196539402008057, "learning_rate": 5.429591507353143e-06, "loss": 0.3326, "step": 25837 }, { "epoch": 2.626880845872306, "grad_norm": 0.254160612821579, "learning_rate": 5.429237932593282e-06, "loss": 0.3237, "step": 25838 }, { "epoch": 2.626982513216755, "grad_norm": 0.24987706542015076, "learning_rate": 5.428884355671021e-06, "loss": 0.3342, "step": 25839 }, { "epoch": 2.6270841805612037, "grad_norm": 0.26907140016555786, "learning_rate": 5.428530776588144e-06, "loss": 0.3021, "step": 25840 }, { "epoch": 2.6271858479056527, "grad_norm": 0.2910909354686737, "learning_rate": 5.42817719534643e-06, "loss": 0.3268, "step": 25841 }, { "epoch": 2.6272875152501016, "grad_norm": 0.2834815979003906, "learning_rate": 5.4278236119476615e-06, "loss": 0.3162, "step": 25842 }, { "epoch": 2.6273891825945506, "grad_norm": 0.2849014103412628, "learning_rate": 5.42747002639362e-06, "loss": 0.3687, "step": 25843 }, { "epoch": 2.6274908499389995, "grad_norm": 0.2586974501609802, "learning_rate": 5.427116438686084e-06, "loss": 0.3399, "step": 25844 }, { "epoch": 2.6275925172834484, "grad_norm": 0.2549508810043335, "learning_rate": 5.426762848826837e-06, "loss": 0.3051, "step": 25845 }, { "epoch": 2.6276941846278974, "grad_norm": 0.2754859924316406, "learning_rate": 5.426409256817663e-06, "loss": 0.3019, "step": 25846 }, { "epoch": 2.6277958519723463, "grad_norm": 0.28080374002456665, "learning_rate": 5.4260556626603375e-06, "loss": 0.3192, "step": 25847 }, { "epoch": 2.6278975193167957, "grad_norm": 0.27304607629776, "learning_rate": 5.4257020663566475e-06, "loss": 0.3298, "step": 25848 }, { "epoch": 2.6279991866612447, "grad_norm": 0.2926872670650482, "learning_rate": 5.425348467908369e-06, "loss": 0.3416, "step": 25849 }, { "epoch": 2.6281008540056936, "grad_norm": 0.28119131922721863, "learning_rate": 5.424994867317287e-06, "loss": 0.3058, "step": 25850 }, { "epoch": 2.6282025213501425, "grad_norm": 0.25149351358413696, "learning_rate": 5.424641264585183e-06, "loss": 0.3447, "step": 25851 }, { "epoch": 2.6283041886945915, "grad_norm": 0.279413640499115, "learning_rate": 5.424287659713836e-06, "loss": 0.335, "step": 25852 }, { "epoch": 2.6284058560390404, "grad_norm": 0.2737917900085449, "learning_rate": 5.423934052705029e-06, "loss": 0.3398, "step": 25853 }, { "epoch": 2.6285075233834894, "grad_norm": 0.25458189845085144, "learning_rate": 5.423580443560542e-06, "loss": 0.3318, "step": 25854 }, { "epoch": 2.6286091907279383, "grad_norm": 0.25298961997032166, "learning_rate": 5.423226832282158e-06, "loss": 0.2975, "step": 25855 }, { "epoch": 2.6287108580723872, "grad_norm": 0.28409919142723083, "learning_rate": 5.422873218871659e-06, "loss": 0.3486, "step": 25856 }, { "epoch": 2.628812525416836, "grad_norm": 0.26080936193466187, "learning_rate": 5.422519603330823e-06, "loss": 0.3927, "step": 25857 }, { "epoch": 2.628914192761285, "grad_norm": 0.2562851011753082, "learning_rate": 5.422165985661436e-06, "loss": 0.3472, "step": 25858 }, { "epoch": 2.629015860105734, "grad_norm": 0.26431217789649963, "learning_rate": 5.421812365865276e-06, "loss": 0.3112, "step": 25859 }, { "epoch": 2.629117527450183, "grad_norm": 0.27121293544769287, "learning_rate": 5.4214587439441246e-06, "loss": 0.3597, "step": 25860 }, { "epoch": 2.629219194794632, "grad_norm": 0.24687443673610687, "learning_rate": 5.421105119899765e-06, "loss": 0.3284, "step": 25861 }, { "epoch": 2.629320862139081, "grad_norm": 0.2623695135116577, "learning_rate": 5.4207514937339786e-06, "loss": 0.2967, "step": 25862 }, { "epoch": 2.62942252948353, "grad_norm": 0.27590519189834595, "learning_rate": 5.4203978654485455e-06, "loss": 0.323, "step": 25863 }, { "epoch": 2.6295241968279788, "grad_norm": 0.2700684368610382, "learning_rate": 5.420044235045248e-06, "loss": 0.3597, "step": 25864 }, { "epoch": 2.6296258641724277, "grad_norm": 0.25774502754211426, "learning_rate": 5.419690602525866e-06, "loss": 0.3283, "step": 25865 }, { "epoch": 2.6297275315168767, "grad_norm": 0.2653031051158905, "learning_rate": 5.419336967892186e-06, "loss": 0.3359, "step": 25866 }, { "epoch": 2.6298291988613256, "grad_norm": 0.2487611472606659, "learning_rate": 5.418983331145983e-06, "loss": 0.3567, "step": 25867 }, { "epoch": 2.6299308662057745, "grad_norm": 0.28704139590263367, "learning_rate": 5.418629692289042e-06, "loss": 0.3396, "step": 25868 }, { "epoch": 2.6300325335502235, "grad_norm": 0.26654863357543945, "learning_rate": 5.418276051323145e-06, "loss": 0.3128, "step": 25869 }, { "epoch": 2.6301342008946724, "grad_norm": 0.28461000323295593, "learning_rate": 5.4179224082500715e-06, "loss": 0.3541, "step": 25870 }, { "epoch": 2.6302358682391214, "grad_norm": 0.23786607384681702, "learning_rate": 5.4175687630716055e-06, "loss": 0.3387, "step": 25871 }, { "epoch": 2.6303375355835703, "grad_norm": 0.25583508610725403, "learning_rate": 5.417215115789526e-06, "loss": 0.3336, "step": 25872 }, { "epoch": 2.6304392029280192, "grad_norm": 0.28026044368743896, "learning_rate": 5.416861466405617e-06, "loss": 0.3175, "step": 25873 }, { "epoch": 2.6305408702724686, "grad_norm": 0.2633841335773468, "learning_rate": 5.4165078149216586e-06, "loss": 0.3382, "step": 25874 }, { "epoch": 2.6306425376169176, "grad_norm": 0.2602929174900055, "learning_rate": 5.416154161339432e-06, "loss": 0.342, "step": 25875 }, { "epoch": 2.6307442049613665, "grad_norm": 0.28369393944740295, "learning_rate": 5.415800505660721e-06, "loss": 0.3573, "step": 25876 }, { "epoch": 2.6308458723058155, "grad_norm": 0.27489522099494934, "learning_rate": 5.415446847887304e-06, "loss": 0.342, "step": 25877 }, { "epoch": 2.6309475396502644, "grad_norm": 0.26380157470703125, "learning_rate": 5.415093188020968e-06, "loss": 0.3362, "step": 25878 }, { "epoch": 2.6310492069947133, "grad_norm": 0.27284958958625793, "learning_rate": 5.41473952606349e-06, "loss": 0.356, "step": 25879 }, { "epoch": 2.6311508743391623, "grad_norm": 0.2631687819957733, "learning_rate": 5.414385862016649e-06, "loss": 0.334, "step": 25880 }, { "epoch": 2.6312525416836112, "grad_norm": 0.26414188742637634, "learning_rate": 5.414032195882234e-06, "loss": 0.3322, "step": 25881 }, { "epoch": 2.63135420902806, "grad_norm": 0.27448153495788574, "learning_rate": 5.4136785276620215e-06, "loss": 0.3674, "step": 25882 }, { "epoch": 2.631455876372509, "grad_norm": 0.26669394969940186, "learning_rate": 5.413324857357797e-06, "loss": 0.3176, "step": 25883 }, { "epoch": 2.631557543716958, "grad_norm": 0.27754637598991394, "learning_rate": 5.4129711849713394e-06, "loss": 0.3085, "step": 25884 }, { "epoch": 2.631659211061407, "grad_norm": 0.2587730288505554, "learning_rate": 5.412617510504429e-06, "loss": 0.3348, "step": 25885 }, { "epoch": 2.631760878405856, "grad_norm": 0.2730351984500885, "learning_rate": 5.412263833958852e-06, "loss": 0.3205, "step": 25886 }, { "epoch": 2.631862545750305, "grad_norm": 0.26512569189071655, "learning_rate": 5.411910155336388e-06, "loss": 0.3276, "step": 25887 }, { "epoch": 2.631964213094754, "grad_norm": 0.2913666367530823, "learning_rate": 5.411556474638817e-06, "loss": 0.3409, "step": 25888 }, { "epoch": 2.632065880439203, "grad_norm": 0.2855745255947113, "learning_rate": 5.411202791867923e-06, "loss": 0.3263, "step": 25889 }, { "epoch": 2.632167547783652, "grad_norm": 0.27342143654823303, "learning_rate": 5.410849107025485e-06, "loss": 0.3207, "step": 25890 }, { "epoch": 2.632269215128101, "grad_norm": 0.25131234526634216, "learning_rate": 5.41049542011329e-06, "loss": 0.3023, "step": 25891 }, { "epoch": 2.63237088247255, "grad_norm": 0.2669370770454407, "learning_rate": 5.410141731133116e-06, "loss": 0.3495, "step": 25892 }, { "epoch": 2.632472549816999, "grad_norm": 0.27731987833976746, "learning_rate": 5.409788040086744e-06, "loss": 0.3526, "step": 25893 }, { "epoch": 2.632574217161448, "grad_norm": 0.2626909017562866, "learning_rate": 5.409434346975959e-06, "loss": 0.3264, "step": 25894 }, { "epoch": 2.632675884505897, "grad_norm": 0.27063223719596863, "learning_rate": 5.409080651802538e-06, "loss": 0.3285, "step": 25895 }, { "epoch": 2.632777551850346, "grad_norm": 0.2741696834564209, "learning_rate": 5.408726954568268e-06, "loss": 0.3194, "step": 25896 }, { "epoch": 2.6328792191947947, "grad_norm": 0.2690858840942383, "learning_rate": 5.40837325527493e-06, "loss": 0.3789, "step": 25897 }, { "epoch": 2.6329808865392437, "grad_norm": 0.25437039136886597, "learning_rate": 5.408019553924303e-06, "loss": 0.3229, "step": 25898 }, { "epoch": 2.6330825538836926, "grad_norm": 0.25049257278442383, "learning_rate": 5.407665850518171e-06, "loss": 0.3444, "step": 25899 }, { "epoch": 2.6331842212281416, "grad_norm": 0.2608456313610077, "learning_rate": 5.407312145058313e-06, "loss": 0.3126, "step": 25900 }, { "epoch": 2.6332858885725905, "grad_norm": 0.24710381031036377, "learning_rate": 5.406958437546517e-06, "loss": 0.3036, "step": 25901 }, { "epoch": 2.6333875559170394, "grad_norm": 0.2879420816898346, "learning_rate": 5.406604727984559e-06, "loss": 0.3497, "step": 25902 }, { "epoch": 2.6334892232614884, "grad_norm": 0.2589864432811737, "learning_rate": 5.406251016374223e-06, "loss": 0.336, "step": 25903 }, { "epoch": 2.6335908906059373, "grad_norm": 0.28510355949401855, "learning_rate": 5.405897302717292e-06, "loss": 0.3085, "step": 25904 }, { "epoch": 2.6336925579503863, "grad_norm": 0.26566529273986816, "learning_rate": 5.405543587015546e-06, "loss": 0.3334, "step": 25905 }, { "epoch": 2.633794225294835, "grad_norm": 0.24558372795581818, "learning_rate": 5.4051898692707676e-06, "loss": 0.3145, "step": 25906 }, { "epoch": 2.633895892639284, "grad_norm": 0.2633604109287262, "learning_rate": 5.404836149484739e-06, "loss": 0.3336, "step": 25907 }, { "epoch": 2.633997559983733, "grad_norm": 0.24777649343013763, "learning_rate": 5.404482427659242e-06, "loss": 0.3357, "step": 25908 }, { "epoch": 2.634099227328182, "grad_norm": 0.2540799677371979, "learning_rate": 5.40412870379606e-06, "loss": 0.3425, "step": 25909 }, { "epoch": 2.634200894672631, "grad_norm": 0.2642848491668701, "learning_rate": 5.4037749778969715e-06, "loss": 0.3271, "step": 25910 }, { "epoch": 2.63430256201708, "grad_norm": 0.25685667991638184, "learning_rate": 5.403421249963762e-06, "loss": 0.3343, "step": 25911 }, { "epoch": 2.634404229361529, "grad_norm": 0.26185134053230286, "learning_rate": 5.403067519998212e-06, "loss": 0.3098, "step": 25912 }, { "epoch": 2.634505896705978, "grad_norm": 0.2749323844909668, "learning_rate": 5.402713788002103e-06, "loss": 0.3436, "step": 25913 }, { "epoch": 2.6346075640504267, "grad_norm": 0.2781286835670471, "learning_rate": 5.4023600539772185e-06, "loss": 0.3247, "step": 25914 }, { "epoch": 2.634709231394876, "grad_norm": 0.2548685073852539, "learning_rate": 5.402006317925339e-06, "loss": 0.3365, "step": 25915 }, { "epoch": 2.634810898739325, "grad_norm": 0.28577762842178345, "learning_rate": 5.401652579848246e-06, "loss": 0.3312, "step": 25916 }, { "epoch": 2.634912566083774, "grad_norm": 0.2448398470878601, "learning_rate": 5.401298839747725e-06, "loss": 0.3252, "step": 25917 }, { "epoch": 2.635014233428223, "grad_norm": 0.24575094878673553, "learning_rate": 5.400945097625555e-06, "loss": 0.3333, "step": 25918 }, { "epoch": 2.635115900772672, "grad_norm": 0.2604221999645233, "learning_rate": 5.400591353483519e-06, "loss": 0.3735, "step": 25919 }, { "epoch": 2.635217568117121, "grad_norm": 0.2838479280471802, "learning_rate": 5.400237607323399e-06, "loss": 0.3217, "step": 25920 }, { "epoch": 2.6353192354615698, "grad_norm": 0.2651898264884949, "learning_rate": 5.399883859146976e-06, "loss": 0.3065, "step": 25921 }, { "epoch": 2.6354209028060187, "grad_norm": 0.2629939317703247, "learning_rate": 5.399530108956035e-06, "loss": 0.3433, "step": 25922 }, { "epoch": 2.6355225701504676, "grad_norm": 0.2944152355194092, "learning_rate": 5.399176356752355e-06, "loss": 0.3373, "step": 25923 }, { "epoch": 2.6356242374949166, "grad_norm": 0.269429475069046, "learning_rate": 5.39882260253772e-06, "loss": 0.3141, "step": 25924 }, { "epoch": 2.6357259048393655, "grad_norm": 0.27465489506721497, "learning_rate": 5.39846884631391e-06, "loss": 0.3397, "step": 25925 }, { "epoch": 2.6358275721838145, "grad_norm": 0.254415899515152, "learning_rate": 5.398115088082709e-06, "loss": 0.3384, "step": 25926 }, { "epoch": 2.6359292395282634, "grad_norm": 0.25504934787750244, "learning_rate": 5.397761327845901e-06, "loss": 0.3717, "step": 25927 }, { "epoch": 2.6360309068727124, "grad_norm": 0.26964348554611206, "learning_rate": 5.397407565605264e-06, "loss": 0.337, "step": 25928 }, { "epoch": 2.6361325742171613, "grad_norm": 0.2706908583641052, "learning_rate": 5.397053801362583e-06, "loss": 0.3334, "step": 25929 }, { "epoch": 2.6362342415616107, "grad_norm": 0.29288026690483093, "learning_rate": 5.396700035119639e-06, "loss": 0.3308, "step": 25930 }, { "epoch": 2.6363359089060596, "grad_norm": 0.2486075460910797, "learning_rate": 5.396346266878213e-06, "loss": 0.3308, "step": 25931 }, { "epoch": 2.6364375762505086, "grad_norm": 0.2932862639427185, "learning_rate": 5.395992496640091e-06, "loss": 0.3374, "step": 25932 }, { "epoch": 2.6365392435949575, "grad_norm": 0.2628808617591858, "learning_rate": 5.395638724407052e-06, "loss": 0.3791, "step": 25933 }, { "epoch": 2.6366409109394064, "grad_norm": 0.2850305736064911, "learning_rate": 5.395284950180879e-06, "loss": 0.3438, "step": 25934 }, { "epoch": 2.6367425782838554, "grad_norm": 0.29908233880996704, "learning_rate": 5.394931173963354e-06, "loss": 0.3502, "step": 25935 }, { "epoch": 2.6368442456283043, "grad_norm": 0.2667495608329773, "learning_rate": 5.39457739575626e-06, "loss": 0.35, "step": 25936 }, { "epoch": 2.6369459129727533, "grad_norm": 0.2756314277648926, "learning_rate": 5.394223615561379e-06, "loss": 0.3405, "step": 25937 }, { "epoch": 2.637047580317202, "grad_norm": 0.2732734680175781, "learning_rate": 5.393869833380493e-06, "loss": 0.3828, "step": 25938 }, { "epoch": 2.637149247661651, "grad_norm": 0.27937230467796326, "learning_rate": 5.393516049215385e-06, "loss": 0.2923, "step": 25939 }, { "epoch": 2.6372509150061, "grad_norm": 0.2870662212371826, "learning_rate": 5.393162263067836e-06, "loss": 0.3463, "step": 25940 }, { "epoch": 2.637352582350549, "grad_norm": 0.2797277569770813, "learning_rate": 5.392808474939628e-06, "loss": 0.3415, "step": 25941 }, { "epoch": 2.637454249694998, "grad_norm": 0.29391542077064514, "learning_rate": 5.392454684832546e-06, "loss": 0.2999, "step": 25942 }, { "epoch": 2.637555917039447, "grad_norm": 0.2581673860549927, "learning_rate": 5.3921008927483696e-06, "loss": 0.3547, "step": 25943 }, { "epoch": 2.637657584383896, "grad_norm": 0.2942982614040375, "learning_rate": 5.391747098688883e-06, "loss": 0.3314, "step": 25944 }, { "epoch": 2.637759251728345, "grad_norm": 0.26394498348236084, "learning_rate": 5.391393302655866e-06, "loss": 0.3295, "step": 25945 }, { "epoch": 2.6378609190727937, "grad_norm": 0.2829861044883728, "learning_rate": 5.391039504651103e-06, "loss": 0.3714, "step": 25946 }, { "epoch": 2.6379625864172427, "grad_norm": 0.2904389500617981, "learning_rate": 5.3906857046763775e-06, "loss": 0.2878, "step": 25947 }, { "epoch": 2.6380642537616916, "grad_norm": 0.27264395356178284, "learning_rate": 5.3903319027334685e-06, "loss": 0.363, "step": 25948 }, { "epoch": 2.6381659211061406, "grad_norm": 0.25585317611694336, "learning_rate": 5.389978098824161e-06, "loss": 0.3597, "step": 25949 }, { "epoch": 2.6382675884505895, "grad_norm": 0.2893621325492859, "learning_rate": 5.389624292950236e-06, "loss": 0.3625, "step": 25950 }, { "epoch": 2.6383692557950384, "grad_norm": 0.2794283628463745, "learning_rate": 5.389270485113477e-06, "loss": 0.3391, "step": 25951 }, { "epoch": 2.6384709231394874, "grad_norm": 0.25355345010757446, "learning_rate": 5.388916675315665e-06, "loss": 0.3207, "step": 25952 }, { "epoch": 2.6385725904839363, "grad_norm": 0.27451443672180176, "learning_rate": 5.388562863558584e-06, "loss": 0.3417, "step": 25953 }, { "epoch": 2.6386742578283853, "grad_norm": 0.2694396376609802, "learning_rate": 5.3882090498440154e-06, "loss": 0.3222, "step": 25954 }, { "epoch": 2.638775925172834, "grad_norm": 0.25067853927612305, "learning_rate": 5.3878552341737414e-06, "loss": 0.3383, "step": 25955 }, { "epoch": 2.6388775925172836, "grad_norm": 0.2578713595867157, "learning_rate": 5.387501416549545e-06, "loss": 0.3041, "step": 25956 }, { "epoch": 2.6389792598617325, "grad_norm": 0.26571595668792725, "learning_rate": 5.387147596973209e-06, "loss": 0.3387, "step": 25957 }, { "epoch": 2.6390809272061815, "grad_norm": 0.2760174572467804, "learning_rate": 5.386793775446515e-06, "loss": 0.3464, "step": 25958 }, { "epoch": 2.6391825945506304, "grad_norm": 0.24981728196144104, "learning_rate": 5.386439951971247e-06, "loss": 0.3131, "step": 25959 }, { "epoch": 2.6392842618950794, "grad_norm": 0.27096179127693176, "learning_rate": 5.386086126549185e-06, "loss": 0.3461, "step": 25960 }, { "epoch": 2.6393859292395283, "grad_norm": 0.2759849727153778, "learning_rate": 5.385732299182112e-06, "loss": 0.3272, "step": 25961 }, { "epoch": 2.6394875965839772, "grad_norm": 0.2819390892982483, "learning_rate": 5.385378469871814e-06, "loss": 0.3301, "step": 25962 }, { "epoch": 2.639589263928426, "grad_norm": 0.29552075266838074, "learning_rate": 5.38502463862007e-06, "loss": 0.3452, "step": 25963 }, { "epoch": 2.639690931272875, "grad_norm": 0.2566530406475067, "learning_rate": 5.384670805428662e-06, "loss": 0.3319, "step": 25964 }, { "epoch": 2.639792598617324, "grad_norm": 0.2932371199131012, "learning_rate": 5.384316970299375e-06, "loss": 0.3384, "step": 25965 }, { "epoch": 2.639894265961773, "grad_norm": 0.2686707377433777, "learning_rate": 5.383963133233989e-06, "loss": 0.3371, "step": 25966 }, { "epoch": 2.639995933306222, "grad_norm": 0.2708721458911896, "learning_rate": 5.38360929423429e-06, "loss": 0.3095, "step": 25967 }, { "epoch": 2.640097600650671, "grad_norm": 0.2779243588447571, "learning_rate": 5.383255453302059e-06, "loss": 0.2981, "step": 25968 }, { "epoch": 2.64019926799512, "grad_norm": 0.26672014594078064, "learning_rate": 5.382901610439076e-06, "loss": 0.3555, "step": 25969 }, { "epoch": 2.6403009353395688, "grad_norm": 0.2677456736564636, "learning_rate": 5.382547765647126e-06, "loss": 0.3184, "step": 25970 }, { "epoch": 2.640402602684018, "grad_norm": 0.2557514011859894, "learning_rate": 5.382193918927992e-06, "loss": 0.3255, "step": 25971 }, { "epoch": 2.640504270028467, "grad_norm": 0.2502491772174835, "learning_rate": 5.381840070283456e-06, "loss": 0.3268, "step": 25972 }, { "epoch": 2.640605937372916, "grad_norm": 0.26523372530937195, "learning_rate": 5.381486219715301e-06, "loss": 0.3491, "step": 25973 }, { "epoch": 2.640707604717365, "grad_norm": 0.26764729619026184, "learning_rate": 5.381132367225307e-06, "loss": 0.326, "step": 25974 }, { "epoch": 2.640809272061814, "grad_norm": 0.24889498949050903, "learning_rate": 5.38077851281526e-06, "loss": 0.3015, "step": 25975 }, { "epoch": 2.640910939406263, "grad_norm": 0.26854562759399414, "learning_rate": 5.38042465648694e-06, "loss": 0.3664, "step": 25976 }, { "epoch": 2.641012606750712, "grad_norm": 0.2448781579732895, "learning_rate": 5.380070798242133e-06, "loss": 0.3261, "step": 25977 }, { "epoch": 2.6411142740951608, "grad_norm": 0.2580863833427429, "learning_rate": 5.379716938082619e-06, "loss": 0.3332, "step": 25978 }, { "epoch": 2.6412159414396097, "grad_norm": 0.2581072449684143, "learning_rate": 5.379363076010181e-06, "loss": 0.3355, "step": 25979 }, { "epoch": 2.6413176087840586, "grad_norm": 0.2525712549686432, "learning_rate": 5.379009212026602e-06, "loss": 0.372, "step": 25980 }, { "epoch": 2.6414192761285076, "grad_norm": 0.2818935215473175, "learning_rate": 5.3786553461336655e-06, "loss": 0.3148, "step": 25981 }, { "epoch": 2.6415209434729565, "grad_norm": 0.2525371313095093, "learning_rate": 5.378301478333152e-06, "loss": 0.3457, "step": 25982 }, { "epoch": 2.6416226108174055, "grad_norm": 0.2654965817928314, "learning_rate": 5.3779476086268464e-06, "loss": 0.3282, "step": 25983 }, { "epoch": 2.6417242781618544, "grad_norm": 0.25320619344711304, "learning_rate": 5.3775937370165294e-06, "loss": 0.3443, "step": 25984 }, { "epoch": 2.6418259455063033, "grad_norm": 0.2663690447807312, "learning_rate": 5.3772398635039865e-06, "loss": 0.324, "step": 25985 }, { "epoch": 2.6419276128507523, "grad_norm": 0.2668921947479248, "learning_rate": 5.376885988090998e-06, "loss": 0.3307, "step": 25986 }, { "epoch": 2.6420292801952012, "grad_norm": 0.2517523169517517, "learning_rate": 5.3765321107793465e-06, "loss": 0.3186, "step": 25987 }, { "epoch": 2.64213094753965, "grad_norm": 0.2678854465484619, "learning_rate": 5.376178231570818e-06, "loss": 0.3257, "step": 25988 }, { "epoch": 2.642232614884099, "grad_norm": 0.270009309053421, "learning_rate": 5.375824350467191e-06, "loss": 0.3407, "step": 25989 }, { "epoch": 2.642334282228548, "grad_norm": 0.25338926911354065, "learning_rate": 5.375470467470252e-06, "loss": 0.3109, "step": 25990 }, { "epoch": 2.642435949572997, "grad_norm": 0.2777509093284607, "learning_rate": 5.375116582581781e-06, "loss": 0.4087, "step": 25991 }, { "epoch": 2.642537616917446, "grad_norm": 0.26217931509017944, "learning_rate": 5.37476269580356e-06, "loss": 0.3109, "step": 25992 }, { "epoch": 2.642639284261895, "grad_norm": 0.2794347107410431, "learning_rate": 5.374408807137377e-06, "loss": 0.3225, "step": 25993 }, { "epoch": 2.642740951606344, "grad_norm": 0.27946481108665466, "learning_rate": 5.374054916585009e-06, "loss": 0.3212, "step": 25994 }, { "epoch": 2.6428426189507928, "grad_norm": 0.26118534803390503, "learning_rate": 5.373701024148242e-06, "loss": 0.3474, "step": 25995 }, { "epoch": 2.6429442862952417, "grad_norm": 0.2818569839000702, "learning_rate": 5.373347129828857e-06, "loss": 0.3303, "step": 25996 }, { "epoch": 2.643045953639691, "grad_norm": 0.3174735903739929, "learning_rate": 5.3729932336286394e-06, "loss": 0.3424, "step": 25997 }, { "epoch": 2.64314762098414, "grad_norm": 0.2645666301250458, "learning_rate": 5.372639335549371e-06, "loss": 0.3299, "step": 25998 }, { "epoch": 2.643249288328589, "grad_norm": 0.28130602836608887, "learning_rate": 5.3722854355928324e-06, "loss": 0.3171, "step": 25999 }, { "epoch": 2.643350955673038, "grad_norm": 0.2704228460788727, "learning_rate": 5.37193153376081e-06, "loss": 0.3504, "step": 26000 }, { "epoch": 2.643452623017487, "grad_norm": 0.27026069164276123, "learning_rate": 5.371577630055083e-06, "loss": 0.3348, "step": 26001 }, { "epoch": 2.643554290361936, "grad_norm": 0.27680426836013794, "learning_rate": 5.3712237244774365e-06, "loss": 0.3467, "step": 26002 }, { "epoch": 2.6436559577063847, "grad_norm": 0.2649843990802765, "learning_rate": 5.370869817029654e-06, "loss": 0.3621, "step": 26003 }, { "epoch": 2.6437576250508337, "grad_norm": 0.28247523307800293, "learning_rate": 5.3705159077135176e-06, "loss": 0.3325, "step": 26004 }, { "epoch": 2.6438592923952826, "grad_norm": 0.2519785165786743, "learning_rate": 5.370161996530811e-06, "loss": 0.3245, "step": 26005 }, { "epoch": 2.6439609597397316, "grad_norm": 0.2684269845485687, "learning_rate": 5.369808083483314e-06, "loss": 0.3373, "step": 26006 }, { "epoch": 2.6440626270841805, "grad_norm": 0.2903437316417694, "learning_rate": 5.369454168572812e-06, "loss": 0.3368, "step": 26007 }, { "epoch": 2.6441642944286294, "grad_norm": 0.2946176826953888, "learning_rate": 5.369100251801089e-06, "loss": 0.3526, "step": 26008 }, { "epoch": 2.6442659617730784, "grad_norm": 0.3001915514469147, "learning_rate": 5.368746333169926e-06, "loss": 0.3089, "step": 26009 }, { "epoch": 2.6443676291175273, "grad_norm": 0.25808748602867126, "learning_rate": 5.368392412681107e-06, "loss": 0.328, "step": 26010 }, { "epoch": 2.6444692964619763, "grad_norm": 0.25651922821998596, "learning_rate": 5.368038490336413e-06, "loss": 0.3171, "step": 26011 }, { "epoch": 2.6445709638064256, "grad_norm": 0.25552842020988464, "learning_rate": 5.3676845661376306e-06, "loss": 0.3064, "step": 26012 }, { "epoch": 2.6446726311508746, "grad_norm": 0.27873465418815613, "learning_rate": 5.36733064008654e-06, "loss": 0.3455, "step": 26013 }, { "epoch": 2.6447742984953235, "grad_norm": 0.2881523072719574, "learning_rate": 5.366976712184925e-06, "loss": 0.3043, "step": 26014 }, { "epoch": 2.6448759658397725, "grad_norm": 0.27568528056144714, "learning_rate": 5.366622782434569e-06, "loss": 0.3751, "step": 26015 }, { "epoch": 2.6449776331842214, "grad_norm": 0.27383264899253845, "learning_rate": 5.366268850837253e-06, "loss": 0.3299, "step": 26016 }, { "epoch": 2.6450793005286704, "grad_norm": 0.2758805751800537, "learning_rate": 5.365914917394763e-06, "loss": 0.3268, "step": 26017 }, { "epoch": 2.6451809678731193, "grad_norm": 0.26470157504081726, "learning_rate": 5.365560982108879e-06, "loss": 0.3275, "step": 26018 }, { "epoch": 2.6452826352175682, "grad_norm": 0.2627790868282318, "learning_rate": 5.365207044981387e-06, "loss": 0.3349, "step": 26019 }, { "epoch": 2.645384302562017, "grad_norm": 0.2835509479045868, "learning_rate": 5.364853106014069e-06, "loss": 0.3419, "step": 26020 }, { "epoch": 2.645485969906466, "grad_norm": 0.2639153301715851, "learning_rate": 5.3644991652087075e-06, "loss": 0.3102, "step": 26021 }, { "epoch": 2.645587637250915, "grad_norm": 0.26829519867897034, "learning_rate": 5.364145222567084e-06, "loss": 0.3692, "step": 26022 }, { "epoch": 2.645689304595364, "grad_norm": 0.26819974184036255, "learning_rate": 5.363791278090986e-06, "loss": 0.3229, "step": 26023 }, { "epoch": 2.645790971939813, "grad_norm": 0.2464446723461151, "learning_rate": 5.363437331782193e-06, "loss": 0.3375, "step": 26024 }, { "epoch": 2.645892639284262, "grad_norm": 0.25840410590171814, "learning_rate": 5.363083383642489e-06, "loss": 0.3504, "step": 26025 }, { "epoch": 2.645994306628711, "grad_norm": 0.2570910155773163, "learning_rate": 5.362729433673657e-06, "loss": 0.3368, "step": 26026 }, { "epoch": 2.6460959739731598, "grad_norm": 0.288998544216156, "learning_rate": 5.3623754818774795e-06, "loss": 0.3446, "step": 26027 }, { "epoch": 2.6461976413176087, "grad_norm": 0.2723217308521271, "learning_rate": 5.362021528255743e-06, "loss": 0.3197, "step": 26028 }, { "epoch": 2.6462993086620576, "grad_norm": 0.25600069761276245, "learning_rate": 5.3616675728102255e-06, "loss": 0.3114, "step": 26029 }, { "epoch": 2.6464009760065066, "grad_norm": 0.28881028294563293, "learning_rate": 5.361313615542715e-06, "loss": 0.3174, "step": 26030 }, { "epoch": 2.6465026433509555, "grad_norm": 0.277809202671051, "learning_rate": 5.360959656454991e-06, "loss": 0.3488, "step": 26031 }, { "epoch": 2.6466043106954045, "grad_norm": 0.2853372395038605, "learning_rate": 5.3606056955488375e-06, "loss": 0.334, "step": 26032 }, { "epoch": 2.6467059780398534, "grad_norm": 0.25625887513160706, "learning_rate": 5.360251732826038e-06, "loss": 0.3037, "step": 26033 }, { "epoch": 2.6468076453843024, "grad_norm": 0.26223209500312805, "learning_rate": 5.359897768288377e-06, "loss": 0.325, "step": 26034 }, { "epoch": 2.6469093127287513, "grad_norm": 0.283571720123291, "learning_rate": 5.359543801937637e-06, "loss": 0.3257, "step": 26035 }, { "epoch": 2.6470109800732002, "grad_norm": 0.2742062211036682, "learning_rate": 5.3591898337756e-06, "loss": 0.3662, "step": 26036 }, { "epoch": 2.647112647417649, "grad_norm": 0.2622760236263275, "learning_rate": 5.358835863804049e-06, "loss": 0.3352, "step": 26037 }, { "epoch": 2.6472143147620986, "grad_norm": 0.25678470730781555, "learning_rate": 5.358481892024769e-06, "loss": 0.3282, "step": 26038 }, { "epoch": 2.6473159821065475, "grad_norm": 0.2687285542488098, "learning_rate": 5.358127918439544e-06, "loss": 0.3804, "step": 26039 }, { "epoch": 2.6474176494509964, "grad_norm": 0.2677253782749176, "learning_rate": 5.357773943050154e-06, "loss": 0.3087, "step": 26040 }, { "epoch": 2.6475193167954454, "grad_norm": 0.27860817313194275, "learning_rate": 5.357419965858383e-06, "loss": 0.3774, "step": 26041 }, { "epoch": 2.6476209841398943, "grad_norm": 0.2772500514984131, "learning_rate": 5.357065986866017e-06, "loss": 0.3243, "step": 26042 }, { "epoch": 2.6477226514843433, "grad_norm": 0.2626150846481323, "learning_rate": 5.356712006074837e-06, "loss": 0.3191, "step": 26043 }, { "epoch": 2.647824318828792, "grad_norm": 0.29258373379707336, "learning_rate": 5.356358023486626e-06, "loss": 0.3655, "step": 26044 }, { "epoch": 2.647925986173241, "grad_norm": 0.2549138069152832, "learning_rate": 5.356004039103169e-06, "loss": 0.3234, "step": 26045 }, { "epoch": 2.64802765351769, "grad_norm": 0.30192285776138306, "learning_rate": 5.3556500529262465e-06, "loss": 0.3484, "step": 26046 }, { "epoch": 2.648129320862139, "grad_norm": 0.26119720935821533, "learning_rate": 5.355296064957645e-06, "loss": 0.3181, "step": 26047 }, { "epoch": 2.648230988206588, "grad_norm": 0.2615206241607666, "learning_rate": 5.354942075199146e-06, "loss": 0.3265, "step": 26048 }, { "epoch": 2.648332655551037, "grad_norm": 0.2627463638782501, "learning_rate": 5.354588083652534e-06, "loss": 0.3681, "step": 26049 }, { "epoch": 2.648434322895486, "grad_norm": 0.27370864152908325, "learning_rate": 5.35423409031959e-06, "loss": 0.3576, "step": 26050 }, { "epoch": 2.648535990239935, "grad_norm": 0.254120796918869, "learning_rate": 5.3538800952020995e-06, "loss": 0.3688, "step": 26051 }, { "epoch": 2.6486376575843837, "grad_norm": 0.28512218594551086, "learning_rate": 5.3535260983018445e-06, "loss": 0.3523, "step": 26052 }, { "epoch": 2.648739324928833, "grad_norm": 0.2675498127937317, "learning_rate": 5.35317209962061e-06, "loss": 0.3234, "step": 26053 }, { "epoch": 2.648840992273282, "grad_norm": 0.27074626088142395, "learning_rate": 5.3528180991601785e-06, "loss": 0.3435, "step": 26054 }, { "epoch": 2.648942659617731, "grad_norm": 0.2760411500930786, "learning_rate": 5.352464096922333e-06, "loss": 0.3474, "step": 26055 }, { "epoch": 2.64904432696218, "grad_norm": 0.2720508277416229, "learning_rate": 5.352110092908857e-06, "loss": 0.3284, "step": 26056 }, { "epoch": 2.649145994306629, "grad_norm": 0.2918837368488312, "learning_rate": 5.351756087121534e-06, "loss": 0.3572, "step": 26057 }, { "epoch": 2.649247661651078, "grad_norm": 0.283574640750885, "learning_rate": 5.351402079562147e-06, "loss": 0.3062, "step": 26058 }, { "epoch": 2.6493493289955268, "grad_norm": 0.26446107029914856, "learning_rate": 5.351048070232481e-06, "loss": 0.3368, "step": 26059 }, { "epoch": 2.6494509963399757, "grad_norm": 0.25428536534309387, "learning_rate": 5.350694059134317e-06, "loss": 0.3321, "step": 26060 }, { "epoch": 2.6495526636844247, "grad_norm": 0.2637936472892761, "learning_rate": 5.3503400462694395e-06, "loss": 0.3257, "step": 26061 }, { "epoch": 2.6496543310288736, "grad_norm": 0.24849067628383636, "learning_rate": 5.349986031639634e-06, "loss": 0.3411, "step": 26062 }, { "epoch": 2.6497559983733225, "grad_norm": 0.26396605372428894, "learning_rate": 5.34963201524668e-06, "loss": 0.3269, "step": 26063 }, { "epoch": 2.6498576657177715, "grad_norm": 0.26461631059646606, "learning_rate": 5.349277997092365e-06, "loss": 0.2974, "step": 26064 }, { "epoch": 2.6499593330622204, "grad_norm": 0.27093032002449036, "learning_rate": 5.348923977178469e-06, "loss": 0.3324, "step": 26065 }, { "epoch": 2.6500610004066694, "grad_norm": 0.2535475790500641, "learning_rate": 5.3485699555067776e-06, "loss": 0.3197, "step": 26066 }, { "epoch": 2.6501626677511183, "grad_norm": 0.2767893970012665, "learning_rate": 5.348215932079073e-06, "loss": 0.3348, "step": 26067 }, { "epoch": 2.6502643350955672, "grad_norm": 0.26978668570518494, "learning_rate": 5.347861906897138e-06, "loss": 0.3265, "step": 26068 }, { "epoch": 2.650366002440016, "grad_norm": 0.24497872591018677, "learning_rate": 5.34750787996276e-06, "loss": 0.3383, "step": 26069 }, { "epoch": 2.650467669784465, "grad_norm": 0.26926639676094055, "learning_rate": 5.347153851277717e-06, "loss": 0.3397, "step": 26070 }, { "epoch": 2.650569337128914, "grad_norm": 0.2909664809703827, "learning_rate": 5.346799820843799e-06, "loss": 0.3261, "step": 26071 }, { "epoch": 2.650671004473363, "grad_norm": 0.26132234930992126, "learning_rate": 5.346445788662783e-06, "loss": 0.3348, "step": 26072 }, { "epoch": 2.650772671817812, "grad_norm": 0.25494512915611267, "learning_rate": 5.346091754736457e-06, "loss": 0.3418, "step": 26073 }, { "epoch": 2.650874339162261, "grad_norm": 0.2582370638847351, "learning_rate": 5.345737719066603e-06, "loss": 0.3651, "step": 26074 }, { "epoch": 2.65097600650671, "grad_norm": 0.2510104179382324, "learning_rate": 5.345383681655003e-06, "loss": 0.3614, "step": 26075 }, { "epoch": 2.651077673851159, "grad_norm": 0.2688155770301819, "learning_rate": 5.3450296425034444e-06, "loss": 0.3266, "step": 26076 }, { "epoch": 2.6511793411956077, "grad_norm": 0.26697903871536255, "learning_rate": 5.344675601613706e-06, "loss": 0.3324, "step": 26077 }, { "epoch": 2.6512810085400567, "grad_norm": 0.2678934931755066, "learning_rate": 5.344321558987574e-06, "loss": 0.3125, "step": 26078 }, { "epoch": 2.651382675884506, "grad_norm": 0.26284170150756836, "learning_rate": 5.3439675146268346e-06, "loss": 0.313, "step": 26079 }, { "epoch": 2.651484343228955, "grad_norm": 0.2607915997505188, "learning_rate": 5.343613468533265e-06, "loss": 0.3115, "step": 26080 }, { "epoch": 2.651586010573404, "grad_norm": 0.25595733523368835, "learning_rate": 5.343259420708656e-06, "loss": 0.3281, "step": 26081 }, { "epoch": 2.651687677917853, "grad_norm": 0.2740762233734131, "learning_rate": 5.342905371154785e-06, "loss": 0.3334, "step": 26082 }, { "epoch": 2.651789345262302, "grad_norm": 0.2670579254627228, "learning_rate": 5.342551319873438e-06, "loss": 0.3592, "step": 26083 }, { "epoch": 2.6518910126067508, "grad_norm": 0.27397429943084717, "learning_rate": 5.3421972668664015e-06, "loss": 0.3779, "step": 26084 }, { "epoch": 2.6519926799511997, "grad_norm": 0.2858998477458954, "learning_rate": 5.341843212135453e-06, "loss": 0.3334, "step": 26085 }, { "epoch": 2.6520943472956486, "grad_norm": 0.259303480386734, "learning_rate": 5.3414891556823835e-06, "loss": 0.3194, "step": 26086 }, { "epoch": 2.6521960146400976, "grad_norm": 0.26090407371520996, "learning_rate": 5.3411350975089695e-06, "loss": 0.348, "step": 26087 }, { "epoch": 2.6522976819845465, "grad_norm": 0.2647506594657898, "learning_rate": 5.340781037616999e-06, "loss": 0.3338, "step": 26088 }, { "epoch": 2.6523993493289955, "grad_norm": 0.24910061061382294, "learning_rate": 5.340426976008255e-06, "loss": 0.3084, "step": 26089 }, { "epoch": 2.6525010166734444, "grad_norm": 0.26917433738708496, "learning_rate": 5.340072912684521e-06, "loss": 0.332, "step": 26090 }, { "epoch": 2.6526026840178933, "grad_norm": 0.28639912605285645, "learning_rate": 5.3397188476475806e-06, "loss": 0.316, "step": 26091 }, { "epoch": 2.6527043513623423, "grad_norm": 0.27596476674079895, "learning_rate": 5.339364780899217e-06, "loss": 0.3217, "step": 26092 }, { "epoch": 2.6528060187067912, "grad_norm": 0.28364038467407227, "learning_rate": 5.339010712441213e-06, "loss": 0.332, "step": 26093 }, { "epoch": 2.6529076860512406, "grad_norm": 0.247283473610878, "learning_rate": 5.338656642275355e-06, "loss": 0.3347, "step": 26094 }, { "epoch": 2.6530093533956896, "grad_norm": 0.24744249880313873, "learning_rate": 5.338302570403425e-06, "loss": 0.3242, "step": 26095 }, { "epoch": 2.6531110207401385, "grad_norm": 0.27617958188056946, "learning_rate": 5.337948496827208e-06, "loss": 0.3489, "step": 26096 }, { "epoch": 2.6532126880845874, "grad_norm": 0.2659539580345154, "learning_rate": 5.3375944215484855e-06, "loss": 0.3282, "step": 26097 }, { "epoch": 2.6533143554290364, "grad_norm": 0.29090988636016846, "learning_rate": 5.337240344569043e-06, "loss": 0.3522, "step": 26098 }, { "epoch": 2.6534160227734853, "grad_norm": 0.3153422474861145, "learning_rate": 5.336886265890664e-06, "loss": 0.337, "step": 26099 }, { "epoch": 2.6535176901179343, "grad_norm": 0.27403441071510315, "learning_rate": 5.3365321855151305e-06, "loss": 0.326, "step": 26100 }, { "epoch": 2.653619357462383, "grad_norm": 0.25567370653152466, "learning_rate": 5.3361781034442295e-06, "loss": 0.3254, "step": 26101 }, { "epoch": 2.653721024806832, "grad_norm": 0.27109622955322266, "learning_rate": 5.335824019679743e-06, "loss": 0.3369, "step": 26102 }, { "epoch": 2.653822692151281, "grad_norm": 0.2768201231956482, "learning_rate": 5.335469934223454e-06, "loss": 0.3355, "step": 26103 }, { "epoch": 2.65392435949573, "grad_norm": 0.25808966159820557, "learning_rate": 5.335115847077148e-06, "loss": 0.3297, "step": 26104 }, { "epoch": 2.654026026840179, "grad_norm": 0.26406919956207275, "learning_rate": 5.334761758242607e-06, "loss": 0.3643, "step": 26105 }, { "epoch": 2.654127694184628, "grad_norm": 0.2656460702419281, "learning_rate": 5.334407667721617e-06, "loss": 0.3166, "step": 26106 }, { "epoch": 2.654229361529077, "grad_norm": 0.28120821714401245, "learning_rate": 5.334053575515961e-06, "loss": 0.3066, "step": 26107 }, { "epoch": 2.654331028873526, "grad_norm": 0.26378223299980164, "learning_rate": 5.33369948162742e-06, "loss": 0.3065, "step": 26108 }, { "epoch": 2.6544326962179747, "grad_norm": 0.2581094205379486, "learning_rate": 5.333345386057783e-06, "loss": 0.3257, "step": 26109 }, { "epoch": 2.6545343635624237, "grad_norm": 0.27061060070991516, "learning_rate": 5.332991288808829e-06, "loss": 0.3253, "step": 26110 }, { "epoch": 2.6546360309068726, "grad_norm": 0.259531170129776, "learning_rate": 5.332637189882347e-06, "loss": 0.3239, "step": 26111 }, { "epoch": 2.6547376982513216, "grad_norm": 0.2618267834186554, "learning_rate": 5.332283089280116e-06, "loss": 0.3347, "step": 26112 }, { "epoch": 2.6548393655957705, "grad_norm": 0.2715437114238739, "learning_rate": 5.33192898700392e-06, "loss": 0.3198, "step": 26113 }, { "epoch": 2.6549410329402194, "grad_norm": 0.245988667011261, "learning_rate": 5.331574883055548e-06, "loss": 0.3379, "step": 26114 }, { "epoch": 2.6550427002846684, "grad_norm": 0.2809995114803314, "learning_rate": 5.3312207774367795e-06, "loss": 0.3338, "step": 26115 }, { "epoch": 2.6551443676291173, "grad_norm": 0.24844059348106384, "learning_rate": 5.3308666701493985e-06, "loss": 0.3325, "step": 26116 }, { "epoch": 2.6552460349735663, "grad_norm": 0.2782694697380066, "learning_rate": 5.330512561195189e-06, "loss": 0.3706, "step": 26117 }, { "epoch": 2.655347702318015, "grad_norm": 0.26549994945526123, "learning_rate": 5.330158450575938e-06, "loss": 0.3045, "step": 26118 }, { "epoch": 2.655449369662464, "grad_norm": 0.27992284297943115, "learning_rate": 5.329804338293426e-06, "loss": 0.3285, "step": 26119 }, { "epoch": 2.6555510370069135, "grad_norm": 0.2736950218677521, "learning_rate": 5.3294502243494395e-06, "loss": 0.329, "step": 26120 }, { "epoch": 2.6556527043513625, "grad_norm": 0.2949356138706207, "learning_rate": 5.329096108745759e-06, "loss": 0.3399, "step": 26121 }, { "epoch": 2.6557543716958114, "grad_norm": 0.2801549434661865, "learning_rate": 5.328741991484171e-06, "loss": 0.3297, "step": 26122 }, { "epoch": 2.6558560390402604, "grad_norm": 0.27267783880233765, "learning_rate": 5.328387872566458e-06, "loss": 0.3399, "step": 26123 }, { "epoch": 2.6559577063847093, "grad_norm": 0.2551802098751068, "learning_rate": 5.328033751994408e-06, "loss": 0.3455, "step": 26124 }, { "epoch": 2.6560593737291582, "grad_norm": 0.277130663394928, "learning_rate": 5.327679629769801e-06, "loss": 0.363, "step": 26125 }, { "epoch": 2.656161041073607, "grad_norm": 0.24995654821395874, "learning_rate": 5.3273255058944205e-06, "loss": 0.3658, "step": 26126 }, { "epoch": 2.656262708418056, "grad_norm": 0.25825899839401245, "learning_rate": 5.326971380370052e-06, "loss": 0.3261, "step": 26127 }, { "epoch": 2.656364375762505, "grad_norm": 0.27028223872184753, "learning_rate": 5.326617253198479e-06, "loss": 0.35, "step": 26128 }, { "epoch": 2.656466043106954, "grad_norm": 0.2734176218509674, "learning_rate": 5.326263124381487e-06, "loss": 0.3444, "step": 26129 }, { "epoch": 2.656567710451403, "grad_norm": 0.2628110349178314, "learning_rate": 5.3259089939208595e-06, "loss": 0.3144, "step": 26130 }, { "epoch": 2.656669377795852, "grad_norm": 0.25713682174682617, "learning_rate": 5.3255548618183795e-06, "loss": 0.3088, "step": 26131 }, { "epoch": 2.656771045140301, "grad_norm": 0.25947749614715576, "learning_rate": 5.32520072807583e-06, "loss": 0.324, "step": 26132 }, { "epoch": 2.6568727124847498, "grad_norm": 0.2798951268196106, "learning_rate": 5.324846592694999e-06, "loss": 0.3391, "step": 26133 }, { "epoch": 2.6569743798291987, "grad_norm": 0.28378599882125854, "learning_rate": 5.324492455677665e-06, "loss": 0.3465, "step": 26134 }, { "epoch": 2.657076047173648, "grad_norm": 0.2688038945198059, "learning_rate": 5.324138317025618e-06, "loss": 0.3589, "step": 26135 }, { "epoch": 2.657177714518097, "grad_norm": 0.28024405241012573, "learning_rate": 5.323784176740637e-06, "loss": 0.3302, "step": 26136 }, { "epoch": 2.657279381862546, "grad_norm": 0.2658536732196808, "learning_rate": 5.3234300348245095e-06, "loss": 0.347, "step": 26137 }, { "epoch": 2.657381049206995, "grad_norm": 0.2704530656337738, "learning_rate": 5.323075891279018e-06, "loss": 0.3213, "step": 26138 }, { "epoch": 2.657482716551444, "grad_norm": 0.26734659075737, "learning_rate": 5.322721746105947e-06, "loss": 0.3096, "step": 26139 }, { "epoch": 2.657584383895893, "grad_norm": 0.26145797967910767, "learning_rate": 5.322367599307081e-06, "loss": 0.3277, "step": 26140 }, { "epoch": 2.6576860512403417, "grad_norm": 0.2675299644470215, "learning_rate": 5.322013450884203e-06, "loss": 0.3187, "step": 26141 }, { "epoch": 2.6577877185847907, "grad_norm": 0.2779546082019806, "learning_rate": 5.3216593008390975e-06, "loss": 0.3285, "step": 26142 }, { "epoch": 2.6578893859292396, "grad_norm": 0.25960788130760193, "learning_rate": 5.32130514917355e-06, "loss": 0.3284, "step": 26143 }, { "epoch": 2.6579910532736886, "grad_norm": 0.26422685384750366, "learning_rate": 5.320950995889342e-06, "loss": 0.3291, "step": 26144 }, { "epoch": 2.6580927206181375, "grad_norm": 0.2737506628036499, "learning_rate": 5.32059684098826e-06, "loss": 0.3518, "step": 26145 }, { "epoch": 2.6581943879625864, "grad_norm": 0.26156947016716003, "learning_rate": 5.320242684472088e-06, "loss": 0.328, "step": 26146 }, { "epoch": 2.6582960553070354, "grad_norm": 0.2725352346897125, "learning_rate": 5.3198885263426085e-06, "loss": 0.3728, "step": 26147 }, { "epoch": 2.6583977226514843, "grad_norm": 0.28183895349502563, "learning_rate": 5.319534366601606e-06, "loss": 0.3528, "step": 26148 }, { "epoch": 2.6584993899959333, "grad_norm": 0.2564438581466675, "learning_rate": 5.3191802052508665e-06, "loss": 0.3233, "step": 26149 }, { "epoch": 2.658601057340382, "grad_norm": 0.2922244668006897, "learning_rate": 5.318826042292173e-06, "loss": 0.346, "step": 26150 }, { "epoch": 2.658702724684831, "grad_norm": 0.24321968853473663, "learning_rate": 5.3184718777273094e-06, "loss": 0.3595, "step": 26151 }, { "epoch": 2.65880439202928, "grad_norm": 0.26879116892814636, "learning_rate": 5.3181177115580595e-06, "loss": 0.314, "step": 26152 }, { "epoch": 2.658906059373729, "grad_norm": 0.2631934583187103, "learning_rate": 5.31776354378621e-06, "loss": 0.3447, "step": 26153 }, { "epoch": 2.659007726718178, "grad_norm": 0.28262951970100403, "learning_rate": 5.3174093744135406e-06, "loss": 0.326, "step": 26154 }, { "epoch": 2.659109394062627, "grad_norm": 0.25792598724365234, "learning_rate": 5.31705520344184e-06, "loss": 0.3223, "step": 26155 }, { "epoch": 2.659211061407076, "grad_norm": 0.2662597596645355, "learning_rate": 5.31670103087289e-06, "loss": 0.3448, "step": 26156 }, { "epoch": 2.659312728751525, "grad_norm": 0.25938209891319275, "learning_rate": 5.316346856708475e-06, "loss": 0.3291, "step": 26157 }, { "epoch": 2.6594143960959737, "grad_norm": 0.26051512360572815, "learning_rate": 5.315992680950381e-06, "loss": 0.3351, "step": 26158 }, { "epoch": 2.6595160634404227, "grad_norm": 0.2566111385822296, "learning_rate": 5.31563850360039e-06, "loss": 0.3729, "step": 26159 }, { "epoch": 2.6596177307848716, "grad_norm": 0.265460729598999, "learning_rate": 5.3152843246602885e-06, "loss": 0.3134, "step": 26160 }, { "epoch": 2.659719398129321, "grad_norm": 0.26028966903686523, "learning_rate": 5.314930144131858e-06, "loss": 0.3248, "step": 26161 }, { "epoch": 2.65982106547377, "grad_norm": 0.29605183005332947, "learning_rate": 5.314575962016885e-06, "loss": 0.3291, "step": 26162 }, { "epoch": 2.659922732818219, "grad_norm": 0.23568987846374512, "learning_rate": 5.314221778317154e-06, "loss": 0.3121, "step": 26163 }, { "epoch": 2.660024400162668, "grad_norm": 0.2809450030326843, "learning_rate": 5.313867593034445e-06, "loss": 0.3153, "step": 26164 }, { "epoch": 2.6601260675071168, "grad_norm": 0.2653796970844269, "learning_rate": 5.31351340617055e-06, "loss": 0.3201, "step": 26165 }, { "epoch": 2.6602277348515657, "grad_norm": 0.28703606128692627, "learning_rate": 5.313159217727246e-06, "loss": 0.3428, "step": 26166 }, { "epoch": 2.6603294021960147, "grad_norm": 0.26286935806274414, "learning_rate": 5.312805027706321e-06, "loss": 0.33, "step": 26167 }, { "epoch": 2.6604310695404636, "grad_norm": 0.29540884494781494, "learning_rate": 5.312450836109558e-06, "loss": 0.316, "step": 26168 }, { "epoch": 2.6605327368849125, "grad_norm": 0.2644258439540863, "learning_rate": 5.3120966429387436e-06, "loss": 0.3131, "step": 26169 }, { "epoch": 2.6606344042293615, "grad_norm": 0.248589888215065, "learning_rate": 5.311742448195659e-06, "loss": 0.308, "step": 26170 }, { "epoch": 2.6607360715738104, "grad_norm": 0.27968093752861023, "learning_rate": 5.311388251882091e-06, "loss": 0.3109, "step": 26171 }, { "epoch": 2.6608377389182594, "grad_norm": 0.2796548902988434, "learning_rate": 5.311034053999823e-06, "loss": 0.358, "step": 26172 }, { "epoch": 2.6609394062627083, "grad_norm": 0.25484055280685425, "learning_rate": 5.310679854550639e-06, "loss": 0.3065, "step": 26173 }, { "epoch": 2.6610410736071572, "grad_norm": 0.2705589234828949, "learning_rate": 5.310325653536322e-06, "loss": 0.3383, "step": 26174 }, { "epoch": 2.661142740951606, "grad_norm": 0.28548291325569153, "learning_rate": 5.30997145095866e-06, "loss": 0.3728, "step": 26175 }, { "epoch": 2.6612444082960556, "grad_norm": 0.27459457516670227, "learning_rate": 5.309617246819437e-06, "loss": 0.3155, "step": 26176 }, { "epoch": 2.6613460756405045, "grad_norm": 0.2522667348384857, "learning_rate": 5.309263041120434e-06, "loss": 0.3513, "step": 26177 }, { "epoch": 2.6614477429849535, "grad_norm": 0.27680453658103943, "learning_rate": 5.308908833863437e-06, "loss": 0.322, "step": 26178 }, { "epoch": 2.6615494103294024, "grad_norm": 0.27677276730537415, "learning_rate": 5.308554625050231e-06, "loss": 0.3395, "step": 26179 }, { "epoch": 2.6616510776738513, "grad_norm": 0.2557581961154938, "learning_rate": 5.3082004146826004e-06, "loss": 0.3521, "step": 26180 }, { "epoch": 2.6617527450183003, "grad_norm": 0.25779858231544495, "learning_rate": 5.307846202762329e-06, "loss": 0.313, "step": 26181 }, { "epoch": 2.6618544123627492, "grad_norm": 0.28088292479515076, "learning_rate": 5.307491989291202e-06, "loss": 0.3337, "step": 26182 }, { "epoch": 2.661956079707198, "grad_norm": 0.24969364702701569, "learning_rate": 5.307137774271003e-06, "loss": 0.3329, "step": 26183 }, { "epoch": 2.662057747051647, "grad_norm": 0.2610408663749695, "learning_rate": 5.306783557703517e-06, "loss": 0.321, "step": 26184 }, { "epoch": 2.662159414396096, "grad_norm": 0.2564660310745239, "learning_rate": 5.306429339590529e-06, "loss": 0.3023, "step": 26185 }, { "epoch": 2.662261081740545, "grad_norm": 0.2678010165691376, "learning_rate": 5.306075119933822e-06, "loss": 0.3342, "step": 26186 }, { "epoch": 2.662362749084994, "grad_norm": 0.2643973231315613, "learning_rate": 5.3057208987351815e-06, "loss": 0.3327, "step": 26187 }, { "epoch": 2.662464416429443, "grad_norm": 0.256873220205307, "learning_rate": 5.3053666759963915e-06, "loss": 0.3061, "step": 26188 }, { "epoch": 2.662566083773892, "grad_norm": 0.2790527939796448, "learning_rate": 5.305012451719237e-06, "loss": 0.335, "step": 26189 }, { "epoch": 2.6626677511183408, "grad_norm": 0.2940075993537903, "learning_rate": 5.3046582259055026e-06, "loss": 0.3466, "step": 26190 }, { "epoch": 2.6627694184627897, "grad_norm": 0.2707168459892273, "learning_rate": 5.304303998556972e-06, "loss": 0.3317, "step": 26191 }, { "epoch": 2.6628710858072386, "grad_norm": 0.29143333435058594, "learning_rate": 5.303949769675431e-06, "loss": 0.295, "step": 26192 }, { "epoch": 2.6629727531516876, "grad_norm": 0.26220792531967163, "learning_rate": 5.303595539262661e-06, "loss": 0.3425, "step": 26193 }, { "epoch": 2.6630744204961365, "grad_norm": 0.2534583806991577, "learning_rate": 5.303241307320451e-06, "loss": 0.3176, "step": 26194 }, { "epoch": 2.6631760878405855, "grad_norm": 0.27822765707969666, "learning_rate": 5.302887073850583e-06, "loss": 0.3054, "step": 26195 }, { "epoch": 2.6632777551850344, "grad_norm": 0.25170600414276123, "learning_rate": 5.302532838854842e-06, "loss": 0.3045, "step": 26196 }, { "epoch": 2.6633794225294833, "grad_norm": 0.2601478695869446, "learning_rate": 5.302178602335012e-06, "loss": 0.314, "step": 26197 }, { "epoch": 2.6634810898739323, "grad_norm": 0.2835440933704376, "learning_rate": 5.301824364292878e-06, "loss": 0.357, "step": 26198 }, { "epoch": 2.6635827572183812, "grad_norm": 0.2589118182659149, "learning_rate": 5.301470124730223e-06, "loss": 0.3574, "step": 26199 }, { "epoch": 2.66368442456283, "grad_norm": 0.2610994875431061, "learning_rate": 5.301115883648837e-06, "loss": 0.3485, "step": 26200 }, { "epoch": 2.663786091907279, "grad_norm": 0.27415144443511963, "learning_rate": 5.3007616410504995e-06, "loss": 0.3335, "step": 26201 }, { "epoch": 2.6638877592517285, "grad_norm": 0.2978349030017853, "learning_rate": 5.300407396936994e-06, "loss": 0.3653, "step": 26202 }, { "epoch": 2.6639894265961774, "grad_norm": 0.2836759090423584, "learning_rate": 5.300053151310108e-06, "loss": 0.3357, "step": 26203 }, { "epoch": 2.6640910939406264, "grad_norm": 0.2779964804649353, "learning_rate": 5.299698904171626e-06, "loss": 0.3045, "step": 26204 }, { "epoch": 2.6641927612850753, "grad_norm": 0.2734106779098511, "learning_rate": 5.299344655523334e-06, "loss": 0.3314, "step": 26205 }, { "epoch": 2.6642944286295243, "grad_norm": 0.25147631764411926, "learning_rate": 5.298990405367014e-06, "loss": 0.3369, "step": 26206 }, { "epoch": 2.664396095973973, "grad_norm": 0.27734336256980896, "learning_rate": 5.29863615370445e-06, "loss": 0.3184, "step": 26207 }, { "epoch": 2.664497763318422, "grad_norm": 0.2668226659297943, "learning_rate": 5.298281900537428e-06, "loss": 0.3712, "step": 26208 }, { "epoch": 2.664599430662871, "grad_norm": 0.2845154404640198, "learning_rate": 5.297927645867734e-06, "loss": 0.3267, "step": 26209 }, { "epoch": 2.66470109800732, "grad_norm": 0.2653072774410248, "learning_rate": 5.29757338969715e-06, "loss": 0.3533, "step": 26210 }, { "epoch": 2.664802765351769, "grad_norm": 0.2661494314670563, "learning_rate": 5.297219132027464e-06, "loss": 0.2958, "step": 26211 }, { "epoch": 2.664904432696218, "grad_norm": 0.2529955208301544, "learning_rate": 5.296864872860456e-06, "loss": 0.3396, "step": 26212 }, { "epoch": 2.665006100040667, "grad_norm": 0.2758569121360779, "learning_rate": 5.296510612197915e-06, "loss": 0.3294, "step": 26213 }, { "epoch": 2.665107767385116, "grad_norm": 0.28708142042160034, "learning_rate": 5.296156350041624e-06, "loss": 0.3404, "step": 26214 }, { "epoch": 2.6652094347295647, "grad_norm": 0.2517937123775482, "learning_rate": 5.295802086393367e-06, "loss": 0.32, "step": 26215 }, { "epoch": 2.6653111020740137, "grad_norm": 0.28972330689430237, "learning_rate": 5.29544782125493e-06, "loss": 0.3272, "step": 26216 }, { "epoch": 2.665412769418463, "grad_norm": 0.26063522696495056, "learning_rate": 5.295093554628096e-06, "loss": 0.324, "step": 26217 }, { "epoch": 2.665514436762912, "grad_norm": 0.2627808451652527, "learning_rate": 5.294739286514652e-06, "loss": 0.3087, "step": 26218 }, { "epoch": 2.665616104107361, "grad_norm": 0.2572389245033264, "learning_rate": 5.294385016916382e-06, "loss": 0.3288, "step": 26219 }, { "epoch": 2.66571777145181, "grad_norm": 0.25677576661109924, "learning_rate": 5.294030745835068e-06, "loss": 0.3414, "step": 26220 }, { "epoch": 2.665819438796259, "grad_norm": 0.26347965002059937, "learning_rate": 5.2936764732724985e-06, "loss": 0.3093, "step": 26221 }, { "epoch": 2.6659211061407078, "grad_norm": 0.25232774019241333, "learning_rate": 5.293322199230456e-06, "loss": 0.3429, "step": 26222 }, { "epoch": 2.6660227734851567, "grad_norm": 0.2578570544719696, "learning_rate": 5.2929679237107265e-06, "loss": 0.3289, "step": 26223 }, { "epoch": 2.6661244408296056, "grad_norm": 0.28113794326782227, "learning_rate": 5.2926136467150945e-06, "loss": 0.3255, "step": 26224 }, { "epoch": 2.6662261081740546, "grad_norm": 0.2724849581718445, "learning_rate": 5.292259368245344e-06, "loss": 0.316, "step": 26225 }, { "epoch": 2.6663277755185035, "grad_norm": 0.28413569927215576, "learning_rate": 5.291905088303261e-06, "loss": 0.3351, "step": 26226 }, { "epoch": 2.6664294428629525, "grad_norm": 0.2739609479904175, "learning_rate": 5.291550806890629e-06, "loss": 0.3425, "step": 26227 }, { "epoch": 2.6665311102074014, "grad_norm": 0.2698490619659424, "learning_rate": 5.291196524009233e-06, "loss": 0.3263, "step": 26228 }, { "epoch": 2.6666327775518504, "grad_norm": 0.2491374909877777, "learning_rate": 5.290842239660858e-06, "loss": 0.3077, "step": 26229 }, { "epoch": 2.6667344448962993, "grad_norm": 0.25336503982543945, "learning_rate": 5.29048795384729e-06, "loss": 0.3209, "step": 26230 }, { "epoch": 2.6668361122407482, "grad_norm": 0.26772570610046387, "learning_rate": 5.290133666570312e-06, "loss": 0.3489, "step": 26231 }, { "epoch": 2.666937779585197, "grad_norm": 0.25161677598953247, "learning_rate": 5.289779377831708e-06, "loss": 0.3296, "step": 26232 }, { "epoch": 2.667039446929646, "grad_norm": 0.2582075595855713, "learning_rate": 5.289425087633266e-06, "loss": 0.3167, "step": 26233 }, { "epoch": 2.667141114274095, "grad_norm": 0.2880358099937439, "learning_rate": 5.28907079597677e-06, "loss": 0.313, "step": 26234 }, { "epoch": 2.667242781618544, "grad_norm": 0.25392282009124756, "learning_rate": 5.288716502864003e-06, "loss": 0.3478, "step": 26235 }, { "epoch": 2.667344448962993, "grad_norm": 0.29057326912879944, "learning_rate": 5.288362208296751e-06, "loss": 0.3272, "step": 26236 }, { "epoch": 2.667446116307442, "grad_norm": 0.26924729347229004, "learning_rate": 5.288007912276799e-06, "loss": 0.356, "step": 26237 }, { "epoch": 2.667547783651891, "grad_norm": 0.26028522849082947, "learning_rate": 5.287653614805931e-06, "loss": 0.3093, "step": 26238 }, { "epoch": 2.6676494509963398, "grad_norm": 0.28679153323173523, "learning_rate": 5.287299315885933e-06, "loss": 0.3304, "step": 26239 }, { "epoch": 2.6677511183407887, "grad_norm": 0.27901455760002136, "learning_rate": 5.286945015518589e-06, "loss": 0.3111, "step": 26240 }, { "epoch": 2.6678527856852376, "grad_norm": 0.25548797845840454, "learning_rate": 5.286590713705685e-06, "loss": 0.3095, "step": 26241 }, { "epoch": 2.6679544530296866, "grad_norm": 0.2687302827835083, "learning_rate": 5.286236410449004e-06, "loss": 0.3222, "step": 26242 }, { "epoch": 2.668056120374136, "grad_norm": 0.2712855041027069, "learning_rate": 5.285882105750332e-06, "loss": 0.2921, "step": 26243 }, { "epoch": 2.668157787718585, "grad_norm": 0.29097533226013184, "learning_rate": 5.285527799611454e-06, "loss": 0.3403, "step": 26244 }, { "epoch": 2.668259455063034, "grad_norm": 0.28652408719062805, "learning_rate": 5.285173492034155e-06, "loss": 0.3186, "step": 26245 }, { "epoch": 2.668361122407483, "grad_norm": 0.2799600660800934, "learning_rate": 5.28481918302022e-06, "loss": 0.3291, "step": 26246 }, { "epoch": 2.6684627897519317, "grad_norm": 0.2663154602050781, "learning_rate": 5.2844648725714334e-06, "loss": 0.3464, "step": 26247 }, { "epoch": 2.6685644570963807, "grad_norm": 0.26558107137680054, "learning_rate": 5.284110560689579e-06, "loss": 0.3284, "step": 26248 }, { "epoch": 2.6686661244408296, "grad_norm": 0.27933362126350403, "learning_rate": 5.2837562473764455e-06, "loss": 0.3572, "step": 26249 }, { "epoch": 2.6687677917852786, "grad_norm": 0.2805612087249756, "learning_rate": 5.2834019326338135e-06, "loss": 0.3505, "step": 26250 }, { "epoch": 2.6688694591297275, "grad_norm": 0.2739179730415344, "learning_rate": 5.283047616463473e-06, "loss": 0.3254, "step": 26251 }, { "epoch": 2.6689711264741764, "grad_norm": 0.27886754274368286, "learning_rate": 5.282693298867202e-06, "loss": 0.3039, "step": 26252 }, { "epoch": 2.6690727938186254, "grad_norm": 0.2863791584968567, "learning_rate": 5.282338979846792e-06, "loss": 0.357, "step": 26253 }, { "epoch": 2.6691744611630743, "grad_norm": 0.2868202328681946, "learning_rate": 5.281984659404024e-06, "loss": 0.3444, "step": 26254 }, { "epoch": 2.6692761285075233, "grad_norm": 0.2771136164665222, "learning_rate": 5.281630337540684e-06, "loss": 0.3068, "step": 26255 }, { "epoch": 2.669377795851972, "grad_norm": 0.25274255871772766, "learning_rate": 5.281276014258559e-06, "loss": 0.3293, "step": 26256 }, { "epoch": 2.669479463196421, "grad_norm": 0.2896193563938141, "learning_rate": 5.2809216895594305e-06, "loss": 0.3019, "step": 26257 }, { "epoch": 2.6695811305408705, "grad_norm": 0.2727867066860199, "learning_rate": 5.280567363445086e-06, "loss": 0.3302, "step": 26258 }, { "epoch": 2.6696827978853195, "grad_norm": 0.25131431221961975, "learning_rate": 5.28021303591731e-06, "loss": 0.3561, "step": 26259 }, { "epoch": 2.6697844652297684, "grad_norm": 0.25058603286743164, "learning_rate": 5.279858706977886e-06, "loss": 0.3664, "step": 26260 }, { "epoch": 2.6698861325742174, "grad_norm": 0.2742481827735901, "learning_rate": 5.2795043766286024e-06, "loss": 0.335, "step": 26261 }, { "epoch": 2.6699877999186663, "grad_norm": 0.2727254331111908, "learning_rate": 5.279150044871241e-06, "loss": 0.343, "step": 26262 }, { "epoch": 2.6700894672631152, "grad_norm": 0.2858320474624634, "learning_rate": 5.278795711707588e-06, "loss": 0.29, "step": 26263 }, { "epoch": 2.670191134607564, "grad_norm": 0.2742702066898346, "learning_rate": 5.278441377139429e-06, "loss": 0.3542, "step": 26264 }, { "epoch": 2.670292801952013, "grad_norm": 0.2659723162651062, "learning_rate": 5.278087041168548e-06, "loss": 0.2947, "step": 26265 }, { "epoch": 2.670394469296462, "grad_norm": 0.27018770575523376, "learning_rate": 5.277732703796731e-06, "loss": 0.3169, "step": 26266 }, { "epoch": 2.670496136640911, "grad_norm": 0.2619917690753937, "learning_rate": 5.277378365025762e-06, "loss": 0.3546, "step": 26267 }, { "epoch": 2.67059780398536, "grad_norm": 0.2814467251300812, "learning_rate": 5.277024024857428e-06, "loss": 0.3553, "step": 26268 }, { "epoch": 2.670699471329809, "grad_norm": 0.2701677978038788, "learning_rate": 5.276669683293511e-06, "loss": 0.3566, "step": 26269 }, { "epoch": 2.670801138674258, "grad_norm": 0.25871822237968445, "learning_rate": 5.276315340335798e-06, "loss": 0.323, "step": 26270 }, { "epoch": 2.6709028060187068, "grad_norm": 0.2784667909145355, "learning_rate": 5.275960995986076e-06, "loss": 0.3266, "step": 26271 }, { "epoch": 2.6710044733631557, "grad_norm": 0.285945326089859, "learning_rate": 5.275606650246128e-06, "loss": 0.3896, "step": 26272 }, { "epoch": 2.6711061407076047, "grad_norm": 0.2855134904384613, "learning_rate": 5.275252303117737e-06, "loss": 0.3503, "step": 26273 }, { "epoch": 2.6712078080520536, "grad_norm": 0.2859079837799072, "learning_rate": 5.274897954602691e-06, "loss": 0.328, "step": 26274 }, { "epoch": 2.6713094753965025, "grad_norm": 0.2688637375831604, "learning_rate": 5.274543604702774e-06, "loss": 0.375, "step": 26275 }, { "epoch": 2.6714111427409515, "grad_norm": 0.3000681698322296, "learning_rate": 5.274189253419773e-06, "loss": 0.3191, "step": 26276 }, { "epoch": 2.6715128100854004, "grad_norm": 0.2636919915676117, "learning_rate": 5.273834900755471e-06, "loss": 0.3311, "step": 26277 }, { "epoch": 2.6716144774298494, "grad_norm": 0.2769506275653839, "learning_rate": 5.273480546711654e-06, "loss": 0.3298, "step": 26278 }, { "epoch": 2.6717161447742983, "grad_norm": 0.25325140357017517, "learning_rate": 5.2731261912901065e-06, "loss": 0.3237, "step": 26279 }, { "epoch": 2.6718178121187472, "grad_norm": 0.28360989689826965, "learning_rate": 5.272771834492614e-06, "loss": 0.3064, "step": 26280 }, { "epoch": 2.671919479463196, "grad_norm": 0.2627345323562622, "learning_rate": 5.272417476320964e-06, "loss": 0.333, "step": 26281 }, { "epoch": 2.672021146807645, "grad_norm": 0.27841585874557495, "learning_rate": 5.272063116776938e-06, "loss": 0.3183, "step": 26282 }, { "epoch": 2.672122814152094, "grad_norm": 0.27934008836746216, "learning_rate": 5.2717087558623214e-06, "loss": 0.3116, "step": 26283 }, { "epoch": 2.6722244814965435, "grad_norm": 0.26089543104171753, "learning_rate": 5.271354393578901e-06, "loss": 0.359, "step": 26284 }, { "epoch": 2.6723261488409924, "grad_norm": 0.2644316256046295, "learning_rate": 5.271000029928463e-06, "loss": 0.3234, "step": 26285 }, { "epoch": 2.6724278161854413, "grad_norm": 0.2710648477077484, "learning_rate": 5.27064566491279e-06, "loss": 0.3354, "step": 26286 }, { "epoch": 2.6725294835298903, "grad_norm": 0.2782680094242096, "learning_rate": 5.27029129853367e-06, "loss": 0.3263, "step": 26287 }, { "epoch": 2.6726311508743392, "grad_norm": 0.2503233253955841, "learning_rate": 5.2699369307928864e-06, "loss": 0.3198, "step": 26288 }, { "epoch": 2.672732818218788, "grad_norm": 0.25725480914115906, "learning_rate": 5.2695825616922225e-06, "loss": 0.3153, "step": 26289 }, { "epoch": 2.672834485563237, "grad_norm": 0.25532177090644836, "learning_rate": 5.269228191233468e-06, "loss": 0.3407, "step": 26290 }, { "epoch": 2.672936152907686, "grad_norm": 0.26850321888923645, "learning_rate": 5.268873819418405e-06, "loss": 0.3546, "step": 26291 }, { "epoch": 2.673037820252135, "grad_norm": 0.26302090287208557, "learning_rate": 5.268519446248821e-06, "loss": 0.3652, "step": 26292 }, { "epoch": 2.673139487596584, "grad_norm": 0.24942274391651154, "learning_rate": 5.2681650717264975e-06, "loss": 0.3395, "step": 26293 }, { "epoch": 2.673241154941033, "grad_norm": 0.24922654032707214, "learning_rate": 5.267810695853222e-06, "loss": 0.3142, "step": 26294 }, { "epoch": 2.673342822285482, "grad_norm": 0.2610485851764679, "learning_rate": 5.267456318630782e-06, "loss": 0.3418, "step": 26295 }, { "epoch": 2.6734444896299308, "grad_norm": 0.2467806041240692, "learning_rate": 5.267101940060959e-06, "loss": 0.3385, "step": 26296 }, { "epoch": 2.6735461569743797, "grad_norm": 0.25870487093925476, "learning_rate": 5.266747560145542e-06, "loss": 0.3386, "step": 26297 }, { "epoch": 2.6736478243188286, "grad_norm": 0.27371665835380554, "learning_rate": 5.266393178886312e-06, "loss": 0.3486, "step": 26298 }, { "epoch": 2.673749491663278, "grad_norm": 0.2807593047618866, "learning_rate": 5.266038796285057e-06, "loss": 0.3315, "step": 26299 }, { "epoch": 2.673851159007727, "grad_norm": 0.2670641839504242, "learning_rate": 5.2656844123435625e-06, "loss": 0.3384, "step": 26300 }, { "epoch": 2.673952826352176, "grad_norm": 0.2827577590942383, "learning_rate": 5.265330027063612e-06, "loss": 0.3419, "step": 26301 }, { "epoch": 2.674054493696625, "grad_norm": 0.2611091136932373, "learning_rate": 5.264975640446993e-06, "loss": 0.2935, "step": 26302 }, { "epoch": 2.674156161041074, "grad_norm": 0.24945567548274994, "learning_rate": 5.264621252495489e-06, "loss": 0.3339, "step": 26303 }, { "epoch": 2.6742578283855227, "grad_norm": 0.286476731300354, "learning_rate": 5.264266863210885e-06, "loss": 0.3236, "step": 26304 }, { "epoch": 2.6743594957299717, "grad_norm": 0.26203665137290955, "learning_rate": 5.2639124725949695e-06, "loss": 0.3505, "step": 26305 }, { "epoch": 2.6744611630744206, "grad_norm": 0.2879820168018341, "learning_rate": 5.263558080649524e-06, "loss": 0.3298, "step": 26306 }, { "epoch": 2.6745628304188696, "grad_norm": 0.2631511390209198, "learning_rate": 5.263203687376337e-06, "loss": 0.3244, "step": 26307 }, { "epoch": 2.6746644977633185, "grad_norm": 0.26855921745300293, "learning_rate": 5.262849292777191e-06, "loss": 0.3048, "step": 26308 }, { "epoch": 2.6747661651077674, "grad_norm": 0.27084922790527344, "learning_rate": 5.262494896853873e-06, "loss": 0.3459, "step": 26309 }, { "epoch": 2.6748678324522164, "grad_norm": 0.26717695593833923, "learning_rate": 5.2621404996081684e-06, "loss": 0.3056, "step": 26310 }, { "epoch": 2.6749694997966653, "grad_norm": 0.2603544592857361, "learning_rate": 5.261786101041862e-06, "loss": 0.315, "step": 26311 }, { "epoch": 2.6750711671411143, "grad_norm": 0.27287766337394714, "learning_rate": 5.261431701156741e-06, "loss": 0.3281, "step": 26312 }, { "epoch": 2.675172834485563, "grad_norm": 0.30095669627189636, "learning_rate": 5.261077299954587e-06, "loss": 0.3451, "step": 26313 }, { "epoch": 2.675274501830012, "grad_norm": 0.2935415804386139, "learning_rate": 5.260722897437187e-06, "loss": 0.3278, "step": 26314 }, { "epoch": 2.675376169174461, "grad_norm": 0.26300913095474243, "learning_rate": 5.2603684936063295e-06, "loss": 0.3394, "step": 26315 }, { "epoch": 2.67547783651891, "grad_norm": 0.259503036737442, "learning_rate": 5.260014088463796e-06, "loss": 0.3022, "step": 26316 }, { "epoch": 2.675579503863359, "grad_norm": 0.26445502042770386, "learning_rate": 5.259659682011375e-06, "loss": 0.3102, "step": 26317 }, { "epoch": 2.675681171207808, "grad_norm": 0.2743648290634155, "learning_rate": 5.259305274250848e-06, "loss": 0.3129, "step": 26318 }, { "epoch": 2.675782838552257, "grad_norm": 0.26240888237953186, "learning_rate": 5.258950865184003e-06, "loss": 0.3453, "step": 26319 }, { "epoch": 2.675884505896706, "grad_norm": 0.25226524472236633, "learning_rate": 5.258596454812626e-06, "loss": 0.325, "step": 26320 }, { "epoch": 2.6759861732411547, "grad_norm": 0.2873877286911011, "learning_rate": 5.258242043138501e-06, "loss": 0.3247, "step": 26321 }, { "epoch": 2.6760878405856037, "grad_norm": 0.2517543435096741, "learning_rate": 5.257887630163414e-06, "loss": 0.2998, "step": 26322 }, { "epoch": 2.6761895079300526, "grad_norm": 0.2502043545246124, "learning_rate": 5.25753321588915e-06, "loss": 0.3316, "step": 26323 }, { "epoch": 2.6762911752745016, "grad_norm": 0.2728354334831238, "learning_rate": 5.257178800317495e-06, "loss": 0.3196, "step": 26324 }, { "epoch": 2.676392842618951, "grad_norm": 0.25801604986190796, "learning_rate": 5.2568243834502355e-06, "loss": 0.3307, "step": 26325 }, { "epoch": 2.6764945099634, "grad_norm": 0.24359621107578278, "learning_rate": 5.256469965289155e-06, "loss": 0.3168, "step": 26326 }, { "epoch": 2.676596177307849, "grad_norm": 0.2601107060909271, "learning_rate": 5.2561155458360405e-06, "loss": 0.31, "step": 26327 }, { "epoch": 2.6766978446522978, "grad_norm": 0.2658224105834961, "learning_rate": 5.255761125092674e-06, "loss": 0.3721, "step": 26328 }, { "epoch": 2.6767995119967467, "grad_norm": 0.30128079652786255, "learning_rate": 5.255406703060846e-06, "loss": 0.3415, "step": 26329 }, { "epoch": 2.6769011793411956, "grad_norm": 0.273458868265152, "learning_rate": 5.25505227974234e-06, "loss": 0.3231, "step": 26330 }, { "epoch": 2.6770028466856446, "grad_norm": 0.2707659900188446, "learning_rate": 5.254697855138941e-06, "loss": 0.336, "step": 26331 }, { "epoch": 2.6771045140300935, "grad_norm": 0.26322516798973083, "learning_rate": 5.254343429252434e-06, "loss": 0.3495, "step": 26332 }, { "epoch": 2.6772061813745425, "grad_norm": 0.26420795917510986, "learning_rate": 5.253989002084605e-06, "loss": 0.3381, "step": 26333 }, { "epoch": 2.6773078487189914, "grad_norm": 0.2801569402217865, "learning_rate": 5.253634573637241e-06, "loss": 0.3076, "step": 26334 }, { "epoch": 2.6774095160634404, "grad_norm": 0.2689964771270752, "learning_rate": 5.253280143912125e-06, "loss": 0.3607, "step": 26335 }, { "epoch": 2.6775111834078893, "grad_norm": 0.2789406180381775, "learning_rate": 5.252925712911044e-06, "loss": 0.3254, "step": 26336 }, { "epoch": 2.6776128507523382, "grad_norm": 0.2631625533103943, "learning_rate": 5.252571280635785e-06, "loss": 0.2901, "step": 26337 }, { "epoch": 2.677714518096787, "grad_norm": 0.25605571269989014, "learning_rate": 5.2522168470881305e-06, "loss": 0.3307, "step": 26338 }, { "epoch": 2.677816185441236, "grad_norm": 0.2792074382305145, "learning_rate": 5.251862412269867e-06, "loss": 0.3352, "step": 26339 }, { "epoch": 2.6779178527856855, "grad_norm": 0.264450341463089, "learning_rate": 5.251507976182782e-06, "loss": 0.314, "step": 26340 }, { "epoch": 2.6780195201301344, "grad_norm": 0.2603711485862732, "learning_rate": 5.251153538828659e-06, "loss": 0.3507, "step": 26341 }, { "epoch": 2.6781211874745834, "grad_norm": 0.26340416073799133, "learning_rate": 5.250799100209284e-06, "loss": 0.3178, "step": 26342 }, { "epoch": 2.6782228548190323, "grad_norm": 0.2700127065181732, "learning_rate": 5.2504446603264435e-06, "loss": 0.338, "step": 26343 }, { "epoch": 2.6783245221634813, "grad_norm": 0.24558277428150177, "learning_rate": 5.250090219181919e-06, "loss": 0.3356, "step": 26344 }, { "epoch": 2.67842618950793, "grad_norm": 0.2892758250236511, "learning_rate": 5.2497357767775035e-06, "loss": 0.323, "step": 26345 }, { "epoch": 2.678527856852379, "grad_norm": 0.26680707931518555, "learning_rate": 5.2493813331149754e-06, "loss": 0.3504, "step": 26346 }, { "epoch": 2.678629524196828, "grad_norm": 0.24407529830932617, "learning_rate": 5.249026888196126e-06, "loss": 0.3158, "step": 26347 }, { "epoch": 2.678731191541277, "grad_norm": 0.2602709233760834, "learning_rate": 5.248672442022739e-06, "loss": 0.3164, "step": 26348 }, { "epoch": 2.678832858885726, "grad_norm": 0.2892911434173584, "learning_rate": 5.248317994596597e-06, "loss": 0.334, "step": 26349 }, { "epoch": 2.678934526230175, "grad_norm": 0.2765919268131256, "learning_rate": 5.247963545919488e-06, "loss": 0.3066, "step": 26350 }, { "epoch": 2.679036193574624, "grad_norm": 0.26629650592803955, "learning_rate": 5.247609095993197e-06, "loss": 0.3373, "step": 26351 }, { "epoch": 2.679137860919073, "grad_norm": 0.27684029936790466, "learning_rate": 5.247254644819512e-06, "loss": 0.352, "step": 26352 }, { "epoch": 2.6792395282635217, "grad_norm": 0.2691507637500763, "learning_rate": 5.2469001924002175e-06, "loss": 0.344, "step": 26353 }, { "epoch": 2.6793411956079707, "grad_norm": 0.26834505796432495, "learning_rate": 5.246545738737096e-06, "loss": 0.3143, "step": 26354 }, { "epoch": 2.6794428629524196, "grad_norm": 0.25179415941238403, "learning_rate": 5.246191283831936e-06, "loss": 0.3258, "step": 26355 }, { "epoch": 2.6795445302968686, "grad_norm": 0.25542524456977844, "learning_rate": 5.245836827686523e-06, "loss": 0.3241, "step": 26356 }, { "epoch": 2.6796461976413175, "grad_norm": 0.26683497428894043, "learning_rate": 5.245482370302645e-06, "loss": 0.3169, "step": 26357 }, { "epoch": 2.6797478649857664, "grad_norm": 0.2560548484325409, "learning_rate": 5.245127911682083e-06, "loss": 0.3634, "step": 26358 }, { "epoch": 2.6798495323302154, "grad_norm": 0.26081016659736633, "learning_rate": 5.244773451826625e-06, "loss": 0.3311, "step": 26359 }, { "epoch": 2.6799511996746643, "grad_norm": 0.2706013321876526, "learning_rate": 5.244418990738056e-06, "loss": 0.3445, "step": 26360 }, { "epoch": 2.6800528670191133, "grad_norm": 0.2701461613178253, "learning_rate": 5.244064528418164e-06, "loss": 0.3575, "step": 26361 }, { "epoch": 2.680154534363562, "grad_norm": 0.2786963880062103, "learning_rate": 5.243710064868731e-06, "loss": 0.3697, "step": 26362 }, { "epoch": 2.680256201708011, "grad_norm": 0.2480081021785736, "learning_rate": 5.243355600091547e-06, "loss": 0.3243, "step": 26363 }, { "epoch": 2.68035786905246, "grad_norm": 0.26029613614082336, "learning_rate": 5.243001134088393e-06, "loss": 0.3377, "step": 26364 }, { "epoch": 2.680459536396909, "grad_norm": 0.2587895095348358, "learning_rate": 5.2426466668610565e-06, "loss": 0.3099, "step": 26365 }, { "epoch": 2.6805612037413584, "grad_norm": 0.2581225037574768, "learning_rate": 5.242292198411325e-06, "loss": 0.3244, "step": 26366 }, { "epoch": 2.6806628710858074, "grad_norm": 0.2663898766040802, "learning_rate": 5.241937728740984e-06, "loss": 0.3515, "step": 26367 }, { "epoch": 2.6807645384302563, "grad_norm": 0.2525903582572937, "learning_rate": 5.241583257851817e-06, "loss": 0.3398, "step": 26368 }, { "epoch": 2.6808662057747052, "grad_norm": 0.2575686275959015, "learning_rate": 5.2412287857456105e-06, "loss": 0.2945, "step": 26369 }, { "epoch": 2.680967873119154, "grad_norm": 0.27390196919441223, "learning_rate": 5.240874312424151e-06, "loss": 0.3437, "step": 26370 }, { "epoch": 2.681069540463603, "grad_norm": 0.2508716583251953, "learning_rate": 5.240519837889224e-06, "loss": 0.355, "step": 26371 }, { "epoch": 2.681171207808052, "grad_norm": 0.2622741758823395, "learning_rate": 5.2401653621426155e-06, "loss": 0.3365, "step": 26372 }, { "epoch": 2.681272875152501, "grad_norm": 0.2511630058288574, "learning_rate": 5.2398108851861116e-06, "loss": 0.3592, "step": 26373 }, { "epoch": 2.68137454249695, "grad_norm": 0.2701376676559448, "learning_rate": 5.239456407021497e-06, "loss": 0.3059, "step": 26374 }, { "epoch": 2.681476209841399, "grad_norm": 0.29713770747184753, "learning_rate": 5.239101927650557e-06, "loss": 0.3322, "step": 26375 }, { "epoch": 2.681577877185848, "grad_norm": 0.2573820650577545, "learning_rate": 5.2387474470750795e-06, "loss": 0.3007, "step": 26376 }, { "epoch": 2.6816795445302968, "grad_norm": 0.2622281312942505, "learning_rate": 5.238392965296848e-06, "loss": 0.3591, "step": 26377 }, { "epoch": 2.6817812118747457, "grad_norm": 0.27969875931739807, "learning_rate": 5.238038482317651e-06, "loss": 0.3535, "step": 26378 }, { "epoch": 2.6818828792191947, "grad_norm": 0.31410571932792664, "learning_rate": 5.237683998139271e-06, "loss": 0.3407, "step": 26379 }, { "epoch": 2.6819845465636436, "grad_norm": 0.25943100452423096, "learning_rate": 5.237329512763496e-06, "loss": 0.3035, "step": 26380 }, { "epoch": 2.682086213908093, "grad_norm": 0.2509474456310272, "learning_rate": 5.236975026192113e-06, "loss": 0.3391, "step": 26381 }, { "epoch": 2.682187881252542, "grad_norm": 0.2551364004611969, "learning_rate": 5.2366205384269035e-06, "loss": 0.32, "step": 26382 }, { "epoch": 2.682289548596991, "grad_norm": 0.26791417598724365, "learning_rate": 5.236266049469657e-06, "loss": 0.3079, "step": 26383 }, { "epoch": 2.68239121594144, "grad_norm": 0.2694686949253082, "learning_rate": 5.235911559322159e-06, "loss": 0.338, "step": 26384 }, { "epoch": 2.6824928832858888, "grad_norm": 0.2677296996116638, "learning_rate": 5.235557067986193e-06, "loss": 0.3227, "step": 26385 }, { "epoch": 2.6825945506303377, "grad_norm": 0.26940447092056274, "learning_rate": 5.235202575463548e-06, "loss": 0.3779, "step": 26386 }, { "epoch": 2.6826962179747866, "grad_norm": 0.2851253151893616, "learning_rate": 5.234848081756008e-06, "loss": 0.3178, "step": 26387 }, { "epoch": 2.6827978853192356, "grad_norm": 0.2659223973751068, "learning_rate": 5.234493586865359e-06, "loss": 0.3018, "step": 26388 }, { "epoch": 2.6828995526636845, "grad_norm": 0.2779427468776703, "learning_rate": 5.234139090793387e-06, "loss": 0.3164, "step": 26389 }, { "epoch": 2.6830012200081335, "grad_norm": 0.26392555236816406, "learning_rate": 5.2337845935418776e-06, "loss": 0.3254, "step": 26390 }, { "epoch": 2.6831028873525824, "grad_norm": 0.2681161165237427, "learning_rate": 5.2334300951126174e-06, "loss": 0.3279, "step": 26391 }, { "epoch": 2.6832045546970313, "grad_norm": 0.2871718108654022, "learning_rate": 5.233075595507392e-06, "loss": 0.3252, "step": 26392 }, { "epoch": 2.6833062220414803, "grad_norm": 0.2665562927722931, "learning_rate": 5.232721094727987e-06, "loss": 0.3483, "step": 26393 }, { "epoch": 2.6834078893859292, "grad_norm": 0.2826997935771942, "learning_rate": 5.232366592776188e-06, "loss": 0.3375, "step": 26394 }, { "epoch": 2.683509556730378, "grad_norm": 0.2887393832206726, "learning_rate": 5.232012089653781e-06, "loss": 0.3512, "step": 26395 }, { "epoch": 2.683611224074827, "grad_norm": 0.2653193175792694, "learning_rate": 5.231657585362554e-06, "loss": 0.3359, "step": 26396 }, { "epoch": 2.683712891419276, "grad_norm": 0.2894952595233917, "learning_rate": 5.23130307990429e-06, "loss": 0.3305, "step": 26397 }, { "epoch": 2.683814558763725, "grad_norm": 0.29485177993774414, "learning_rate": 5.230948573280776e-06, "loss": 0.3132, "step": 26398 }, { "epoch": 2.683916226108174, "grad_norm": 0.2574101388454437, "learning_rate": 5.230594065493797e-06, "loss": 0.3019, "step": 26399 }, { "epoch": 2.684017893452623, "grad_norm": 0.27977579832077026, "learning_rate": 5.230239556545141e-06, "loss": 0.3326, "step": 26400 }, { "epoch": 2.684119560797072, "grad_norm": 0.2729508578777313, "learning_rate": 5.229885046436594e-06, "loss": 0.3541, "step": 26401 }, { "epoch": 2.6842212281415208, "grad_norm": 0.28770771622657776, "learning_rate": 5.2295305351699385e-06, "loss": 0.3571, "step": 26402 }, { "epoch": 2.6843228954859697, "grad_norm": 0.28437721729278564, "learning_rate": 5.229176022746965e-06, "loss": 0.3361, "step": 26403 }, { "epoch": 2.6844245628304186, "grad_norm": 0.2920398414134979, "learning_rate": 5.228821509169455e-06, "loss": 0.3458, "step": 26404 }, { "epoch": 2.6845262301748676, "grad_norm": 0.28282269835472107, "learning_rate": 5.228466994439196e-06, "loss": 0.3127, "step": 26405 }, { "epoch": 2.6846278975193165, "grad_norm": 0.2659226059913635, "learning_rate": 5.228112478557978e-06, "loss": 0.3117, "step": 26406 }, { "epoch": 2.684729564863766, "grad_norm": 0.25699231028556824, "learning_rate": 5.227757961527581e-06, "loss": 0.3275, "step": 26407 }, { "epoch": 2.684831232208215, "grad_norm": 0.28549641370773315, "learning_rate": 5.227403443349794e-06, "loss": 0.3326, "step": 26408 }, { "epoch": 2.684932899552664, "grad_norm": 0.2667638063430786, "learning_rate": 5.227048924026402e-06, "loss": 0.3364, "step": 26409 }, { "epoch": 2.6850345668971127, "grad_norm": 0.28265058994293213, "learning_rate": 5.226694403559191e-06, "loss": 0.3107, "step": 26410 }, { "epoch": 2.6851362342415617, "grad_norm": 0.2861107885837555, "learning_rate": 5.22633988194995e-06, "loss": 0.3454, "step": 26411 }, { "epoch": 2.6852379015860106, "grad_norm": 0.2625071406364441, "learning_rate": 5.225985359200461e-06, "loss": 0.3509, "step": 26412 }, { "epoch": 2.6853395689304596, "grad_norm": 0.28816208243370056, "learning_rate": 5.225630835312512e-06, "loss": 0.3594, "step": 26413 }, { "epoch": 2.6854412362749085, "grad_norm": 0.2906912565231323, "learning_rate": 5.2252763102878875e-06, "loss": 0.3173, "step": 26414 }, { "epoch": 2.6855429036193574, "grad_norm": 0.2666899561882019, "learning_rate": 5.224921784128374e-06, "loss": 0.33, "step": 26415 }, { "epoch": 2.6856445709638064, "grad_norm": 0.2896825969219208, "learning_rate": 5.2245672568357595e-06, "loss": 0.381, "step": 26416 }, { "epoch": 2.6857462383082553, "grad_norm": 0.2672673761844635, "learning_rate": 5.224212728411829e-06, "loss": 0.3246, "step": 26417 }, { "epoch": 2.6858479056527043, "grad_norm": 0.27610957622528076, "learning_rate": 5.223858198858367e-06, "loss": 0.3642, "step": 26418 }, { "epoch": 2.685949572997153, "grad_norm": 0.2603621482849121, "learning_rate": 5.223503668177161e-06, "loss": 0.3077, "step": 26419 }, { "epoch": 2.686051240341602, "grad_norm": 0.28386247158050537, "learning_rate": 5.2231491363699945e-06, "loss": 0.3292, "step": 26420 }, { "epoch": 2.686152907686051, "grad_norm": 0.2828083634376526, "learning_rate": 5.222794603438658e-06, "loss": 0.3252, "step": 26421 }, { "epoch": 2.6862545750305005, "grad_norm": 0.2749565839767456, "learning_rate": 5.222440069384934e-06, "loss": 0.3379, "step": 26422 }, { "epoch": 2.6863562423749494, "grad_norm": 0.2577608823776245, "learning_rate": 5.222085534210611e-06, "loss": 0.3079, "step": 26423 }, { "epoch": 2.6864579097193984, "grad_norm": 0.28175392746925354, "learning_rate": 5.221730997917475e-06, "loss": 0.3235, "step": 26424 }, { "epoch": 2.6865595770638473, "grad_norm": 0.2709294557571411, "learning_rate": 5.221376460507307e-06, "loss": 0.3332, "step": 26425 }, { "epoch": 2.6866612444082962, "grad_norm": 0.2937886416912079, "learning_rate": 5.2210219219819e-06, "loss": 0.3651, "step": 26426 }, { "epoch": 2.686762911752745, "grad_norm": 0.2695595622062683, "learning_rate": 5.220667382343035e-06, "loss": 0.3276, "step": 26427 }, { "epoch": 2.686864579097194, "grad_norm": 0.2592166066169739, "learning_rate": 5.220312841592503e-06, "loss": 0.3452, "step": 26428 }, { "epoch": 2.686966246441643, "grad_norm": 0.26539063453674316, "learning_rate": 5.219958299732086e-06, "loss": 0.3186, "step": 26429 }, { "epoch": 2.687067913786092, "grad_norm": 0.27868059277534485, "learning_rate": 5.219603756763569e-06, "loss": 0.3605, "step": 26430 }, { "epoch": 2.687169581130541, "grad_norm": 0.25519633293151855, "learning_rate": 5.219249212688744e-06, "loss": 0.3142, "step": 26431 }, { "epoch": 2.68727124847499, "grad_norm": 0.24216777086257935, "learning_rate": 5.218894667509391e-06, "loss": 0.3001, "step": 26432 }, { "epoch": 2.687372915819439, "grad_norm": 0.2511918544769287, "learning_rate": 5.2185401212273016e-06, "loss": 0.3533, "step": 26433 }, { "epoch": 2.6874745831638878, "grad_norm": 0.2725752294063568, "learning_rate": 5.218185573844257e-06, "loss": 0.3258, "step": 26434 }, { "epoch": 2.6875762505083367, "grad_norm": 0.28696301579475403, "learning_rate": 5.217831025362044e-06, "loss": 0.34, "step": 26435 }, { "epoch": 2.6876779178527856, "grad_norm": 0.25104954838752747, "learning_rate": 5.2174764757824525e-06, "loss": 0.2926, "step": 26436 }, { "epoch": 2.6877795851972346, "grad_norm": 0.28128692507743835, "learning_rate": 5.217121925107266e-06, "loss": 0.3266, "step": 26437 }, { "epoch": 2.6878812525416835, "grad_norm": 0.2908017039299011, "learning_rate": 5.216767373338269e-06, "loss": 0.3262, "step": 26438 }, { "epoch": 2.6879829198861325, "grad_norm": 0.27362388372421265, "learning_rate": 5.2164128204772515e-06, "loss": 0.3432, "step": 26439 }, { "epoch": 2.6880845872305814, "grad_norm": 0.2587568461894989, "learning_rate": 5.216058266525996e-06, "loss": 0.3161, "step": 26440 }, { "epoch": 2.6881862545750304, "grad_norm": 0.26626095175743103, "learning_rate": 5.21570371148629e-06, "loss": 0.3473, "step": 26441 }, { "epoch": 2.6882879219194793, "grad_norm": 0.2735077142715454, "learning_rate": 5.2153491553599225e-06, "loss": 0.3191, "step": 26442 }, { "epoch": 2.6883895892639282, "grad_norm": 0.27040067315101624, "learning_rate": 5.214994598148675e-06, "loss": 0.3408, "step": 26443 }, { "epoch": 2.688491256608377, "grad_norm": 0.2780095040798187, "learning_rate": 5.214640039854337e-06, "loss": 0.3423, "step": 26444 }, { "epoch": 2.688592923952826, "grad_norm": 0.29899996519088745, "learning_rate": 5.214285480478692e-06, "loss": 0.35, "step": 26445 }, { "epoch": 2.688694591297275, "grad_norm": 0.2796541154384613, "learning_rate": 5.2139309200235286e-06, "loss": 0.3436, "step": 26446 }, { "epoch": 2.688796258641724, "grad_norm": 0.2537171542644501, "learning_rate": 5.213576358490633e-06, "loss": 0.3263, "step": 26447 }, { "epoch": 2.6888979259861734, "grad_norm": 0.2616485059261322, "learning_rate": 5.213221795881789e-06, "loss": 0.332, "step": 26448 }, { "epoch": 2.6889995933306223, "grad_norm": 0.2791570723056793, "learning_rate": 5.212867232198785e-06, "loss": 0.3598, "step": 26449 }, { "epoch": 2.6891012606750713, "grad_norm": 0.2619635760784149, "learning_rate": 5.212512667443406e-06, "loss": 0.3186, "step": 26450 }, { "epoch": 2.68920292801952, "grad_norm": 0.27108290791511536, "learning_rate": 5.212158101617438e-06, "loss": 0.3557, "step": 26451 }, { "epoch": 2.689304595363969, "grad_norm": 0.28868523240089417, "learning_rate": 5.21180353472267e-06, "loss": 0.3303, "step": 26452 }, { "epoch": 2.689406262708418, "grad_norm": 0.2819359004497528, "learning_rate": 5.211448966760884e-06, "loss": 0.3272, "step": 26453 }, { "epoch": 2.689507930052867, "grad_norm": 0.27180400490760803, "learning_rate": 5.21109439773387e-06, "loss": 0.3243, "step": 26454 }, { "epoch": 2.689609597397316, "grad_norm": 0.25148534774780273, "learning_rate": 5.210739827643412e-06, "loss": 0.3015, "step": 26455 }, { "epoch": 2.689711264741765, "grad_norm": 0.2616289556026459, "learning_rate": 5.210385256491296e-06, "loss": 0.3196, "step": 26456 }, { "epoch": 2.689812932086214, "grad_norm": 0.2960910499095917, "learning_rate": 5.21003068427931e-06, "loss": 0.3038, "step": 26457 }, { "epoch": 2.689914599430663, "grad_norm": 0.26709234714508057, "learning_rate": 5.20967611100924e-06, "loss": 0.3192, "step": 26458 }, { "epoch": 2.6900162667751117, "grad_norm": 0.27231472730636597, "learning_rate": 5.209321536682871e-06, "loss": 0.3651, "step": 26459 }, { "epoch": 2.6901179341195607, "grad_norm": 0.2576054036617279, "learning_rate": 5.208966961301989e-06, "loss": 0.3416, "step": 26460 }, { "epoch": 2.6902196014640096, "grad_norm": 0.2820584774017334, "learning_rate": 5.208612384868381e-06, "loss": 0.3178, "step": 26461 }, { "epoch": 2.6903212688084586, "grad_norm": 0.2719299793243408, "learning_rate": 5.208257807383834e-06, "loss": 0.3385, "step": 26462 }, { "epoch": 2.690422936152908, "grad_norm": 0.277087539434433, "learning_rate": 5.207903228850134e-06, "loss": 0.319, "step": 26463 }, { "epoch": 2.690524603497357, "grad_norm": 0.2609526515007019, "learning_rate": 5.207548649269067e-06, "loss": 0.339, "step": 26464 }, { "epoch": 2.690626270841806, "grad_norm": 0.26921287178993225, "learning_rate": 5.2071940686424185e-06, "loss": 0.3229, "step": 26465 }, { "epoch": 2.6907279381862548, "grad_norm": 0.28043943643569946, "learning_rate": 5.2068394869719755e-06, "loss": 0.3477, "step": 26466 }, { "epoch": 2.6908296055307037, "grad_norm": 0.28842753171920776, "learning_rate": 5.206484904259525e-06, "loss": 0.3741, "step": 26467 }, { "epoch": 2.6909312728751527, "grad_norm": 0.2646521031856537, "learning_rate": 5.206130320506852e-06, "loss": 0.3449, "step": 26468 }, { "epoch": 2.6910329402196016, "grad_norm": 0.29171743988990784, "learning_rate": 5.2057757357157445e-06, "loss": 0.3159, "step": 26469 }, { "epoch": 2.6911346075640505, "grad_norm": 0.27608469128608704, "learning_rate": 5.205421149887986e-06, "loss": 0.3588, "step": 26470 }, { "epoch": 2.6912362749084995, "grad_norm": 0.27781176567077637, "learning_rate": 5.205066563025365e-06, "loss": 0.3173, "step": 26471 }, { "epoch": 2.6913379422529484, "grad_norm": 0.2615041136741638, "learning_rate": 5.204711975129669e-06, "loss": 0.3026, "step": 26472 }, { "epoch": 2.6914396095973974, "grad_norm": 0.299842894077301, "learning_rate": 5.2043573862026805e-06, "loss": 0.3259, "step": 26473 }, { "epoch": 2.6915412769418463, "grad_norm": 0.26847824454307556, "learning_rate": 5.20400279624619e-06, "loss": 0.3494, "step": 26474 }, { "epoch": 2.6916429442862952, "grad_norm": 0.25090596079826355, "learning_rate": 5.20364820526198e-06, "loss": 0.319, "step": 26475 }, { "epoch": 2.691744611630744, "grad_norm": 0.2570672333240509, "learning_rate": 5.203293613251841e-06, "loss": 0.3221, "step": 26476 }, { "epoch": 2.691846278975193, "grad_norm": 0.2691543400287628, "learning_rate": 5.202939020217556e-06, "loss": 0.3214, "step": 26477 }, { "epoch": 2.691947946319642, "grad_norm": 0.2565113306045532, "learning_rate": 5.202584426160912e-06, "loss": 0.3237, "step": 26478 }, { "epoch": 2.692049613664091, "grad_norm": 0.2825825810432434, "learning_rate": 5.202229831083696e-06, "loss": 0.3159, "step": 26479 }, { "epoch": 2.69215128100854, "grad_norm": 0.26321282982826233, "learning_rate": 5.2018752349876945e-06, "loss": 0.3484, "step": 26480 }, { "epoch": 2.692252948352989, "grad_norm": 0.25276118516921997, "learning_rate": 5.201520637874693e-06, "loss": 0.3225, "step": 26481 }, { "epoch": 2.692354615697438, "grad_norm": 0.26732417941093445, "learning_rate": 5.2011660397464796e-06, "loss": 0.3216, "step": 26482 }, { "epoch": 2.6924562830418868, "grad_norm": 0.2620948851108551, "learning_rate": 5.2008114406048385e-06, "loss": 0.3235, "step": 26483 }, { "epoch": 2.6925579503863357, "grad_norm": 0.2559925317764282, "learning_rate": 5.200456840451557e-06, "loss": 0.339, "step": 26484 }, { "epoch": 2.6926596177307847, "grad_norm": 0.2726428210735321, "learning_rate": 5.200102239288421e-06, "loss": 0.3409, "step": 26485 }, { "epoch": 2.6927612850752336, "grad_norm": 0.2607235908508301, "learning_rate": 5.199747637117219e-06, "loss": 0.3406, "step": 26486 }, { "epoch": 2.6928629524196825, "grad_norm": 0.2823900580406189, "learning_rate": 5.199393033939735e-06, "loss": 0.3172, "step": 26487 }, { "epoch": 2.6929646197641315, "grad_norm": 0.27039834856987, "learning_rate": 5.199038429757756e-06, "loss": 0.3656, "step": 26488 }, { "epoch": 2.693066287108581, "grad_norm": 0.27800071239471436, "learning_rate": 5.19868382457307e-06, "loss": 0.3193, "step": 26489 }, { "epoch": 2.69316795445303, "grad_norm": 0.28100624680519104, "learning_rate": 5.198329218387461e-06, "loss": 0.3407, "step": 26490 }, { "epoch": 2.6932696217974788, "grad_norm": 0.26560714840888977, "learning_rate": 5.1979746112027165e-06, "loss": 0.3465, "step": 26491 }, { "epoch": 2.6933712891419277, "grad_norm": 0.27711060643196106, "learning_rate": 5.197620003020623e-06, "loss": 0.3307, "step": 26492 }, { "epoch": 2.6934729564863766, "grad_norm": 0.26797324419021606, "learning_rate": 5.1972653938429675e-06, "loss": 0.3227, "step": 26493 }, { "epoch": 2.6935746238308256, "grad_norm": 0.27126559615135193, "learning_rate": 5.196910783671536e-06, "loss": 0.3106, "step": 26494 }, { "epoch": 2.6936762911752745, "grad_norm": 0.28160569071769714, "learning_rate": 5.196556172508115e-06, "loss": 0.3392, "step": 26495 }, { "epoch": 2.6937779585197235, "grad_norm": 0.2841210961341858, "learning_rate": 5.196201560354488e-06, "loss": 0.3561, "step": 26496 }, { "epoch": 2.6938796258641724, "grad_norm": 0.30576327443122864, "learning_rate": 5.1958469472124465e-06, "loss": 0.3581, "step": 26497 }, { "epoch": 2.6939812932086213, "grad_norm": 0.2608354389667511, "learning_rate": 5.195492333083774e-06, "loss": 0.3239, "step": 26498 }, { "epoch": 2.6940829605530703, "grad_norm": 0.2528426945209503, "learning_rate": 5.1951377179702576e-06, "loss": 0.3217, "step": 26499 }, { "epoch": 2.6941846278975192, "grad_norm": 0.28040382266044617, "learning_rate": 5.194783101873685e-06, "loss": 0.3408, "step": 26500 }, { "epoch": 2.694286295241968, "grad_norm": 0.2872982621192932, "learning_rate": 5.194428484795839e-06, "loss": 0.3365, "step": 26501 }, { "epoch": 2.694387962586417, "grad_norm": 0.28538256883621216, "learning_rate": 5.19407386673851e-06, "loss": 0.3491, "step": 26502 }, { "epoch": 2.694489629930866, "grad_norm": 0.26425445079803467, "learning_rate": 5.193719247703483e-06, "loss": 0.3353, "step": 26503 }, { "epoch": 2.6945912972753154, "grad_norm": 0.2745843529701233, "learning_rate": 5.193364627692544e-06, "loss": 0.3654, "step": 26504 }, { "epoch": 2.6946929646197644, "grad_norm": 0.2850058972835541, "learning_rate": 5.193010006707482e-06, "loss": 0.3943, "step": 26505 }, { "epoch": 2.6947946319642133, "grad_norm": 0.26040321588516235, "learning_rate": 5.1926553847500775e-06, "loss": 0.3286, "step": 26506 }, { "epoch": 2.6948962993086623, "grad_norm": 0.28274261951446533, "learning_rate": 5.192300761822123e-06, "loss": 0.3333, "step": 26507 }, { "epoch": 2.694997966653111, "grad_norm": 0.29972970485687256, "learning_rate": 5.191946137925403e-06, "loss": 0.3228, "step": 26508 }, { "epoch": 2.69509963399756, "grad_norm": 0.2704082131385803, "learning_rate": 5.191591513061706e-06, "loss": 0.3302, "step": 26509 }, { "epoch": 2.695201301342009, "grad_norm": 0.2562547028064728, "learning_rate": 5.191236887232815e-06, "loss": 0.3259, "step": 26510 }, { "epoch": 2.695302968686458, "grad_norm": 0.2730276584625244, "learning_rate": 5.190882260440515e-06, "loss": 0.3173, "step": 26511 }, { "epoch": 2.695404636030907, "grad_norm": 0.2839842438697815, "learning_rate": 5.1905276326866e-06, "loss": 0.306, "step": 26512 }, { "epoch": 2.695506303375356, "grad_norm": 0.2793418765068054, "learning_rate": 5.190173003972849e-06, "loss": 0.3424, "step": 26513 }, { "epoch": 2.695607970719805, "grad_norm": 0.25226134061813354, "learning_rate": 5.189818374301053e-06, "loss": 0.3247, "step": 26514 }, { "epoch": 2.695709638064254, "grad_norm": 0.27577611804008484, "learning_rate": 5.189463743672998e-06, "loss": 0.3375, "step": 26515 }, { "epoch": 2.6958113054087027, "grad_norm": 0.28184232115745544, "learning_rate": 5.1891091120904665e-06, "loss": 0.3475, "step": 26516 }, { "epoch": 2.6959129727531517, "grad_norm": 0.251880407333374, "learning_rate": 5.188754479555251e-06, "loss": 0.2965, "step": 26517 }, { "epoch": 2.6960146400976006, "grad_norm": 0.2679257094860077, "learning_rate": 5.1883998460691355e-06, "loss": 0.337, "step": 26518 }, { "epoch": 2.6961163074420496, "grad_norm": 0.2697327733039856, "learning_rate": 5.188045211633905e-06, "loss": 0.337, "step": 26519 }, { "epoch": 2.6962179747864985, "grad_norm": 0.2792464792728424, "learning_rate": 5.1876905762513486e-06, "loss": 0.3176, "step": 26520 }, { "epoch": 2.6963196421309474, "grad_norm": 0.2865857481956482, "learning_rate": 5.187335939923249e-06, "loss": 0.3251, "step": 26521 }, { "epoch": 2.6964213094753964, "grad_norm": 0.24362389743328094, "learning_rate": 5.186981302651399e-06, "loss": 0.3262, "step": 26522 }, { "epoch": 2.6965229768198453, "grad_norm": 0.26472631096839905, "learning_rate": 5.1866266644375795e-06, "loss": 0.3465, "step": 26523 }, { "epoch": 2.6966246441642943, "grad_norm": 0.2516910433769226, "learning_rate": 5.186272025283579e-06, "loss": 0.3287, "step": 26524 }, { "epoch": 2.696726311508743, "grad_norm": 0.2540837228298187, "learning_rate": 5.185917385191186e-06, "loss": 0.333, "step": 26525 }, { "epoch": 2.696827978853192, "grad_norm": 0.26609116792678833, "learning_rate": 5.185562744162183e-06, "loss": 0.3485, "step": 26526 }, { "epoch": 2.696929646197641, "grad_norm": 0.24128040671348572, "learning_rate": 5.1852081021983614e-06, "loss": 0.3357, "step": 26527 }, { "epoch": 2.69703131354209, "grad_norm": 0.2501336336135864, "learning_rate": 5.184853459301505e-06, "loss": 0.3024, "step": 26528 }, { "epoch": 2.697132980886539, "grad_norm": 0.24828524887561798, "learning_rate": 5.1844988154734e-06, "loss": 0.3037, "step": 26529 }, { "epoch": 2.6972346482309884, "grad_norm": 0.2549777925014496, "learning_rate": 5.184144170715834e-06, "loss": 0.3421, "step": 26530 }, { "epoch": 2.6973363155754373, "grad_norm": 0.2753804624080658, "learning_rate": 5.183789525030593e-06, "loss": 0.314, "step": 26531 }, { "epoch": 2.6974379829198862, "grad_norm": 0.2655317783355713, "learning_rate": 5.183434878419464e-06, "loss": 0.3151, "step": 26532 }, { "epoch": 2.697539650264335, "grad_norm": 0.2933405339717865, "learning_rate": 5.1830802308842355e-06, "loss": 0.3259, "step": 26533 }, { "epoch": 2.697641317608784, "grad_norm": 0.26310133934020996, "learning_rate": 5.18272558242669e-06, "loss": 0.2957, "step": 26534 }, { "epoch": 2.697742984953233, "grad_norm": 0.2726207375526428, "learning_rate": 5.182370933048618e-06, "loss": 0.3059, "step": 26535 }, { "epoch": 2.697844652297682, "grad_norm": 0.25771674513816833, "learning_rate": 5.182016282751804e-06, "loss": 0.3309, "step": 26536 }, { "epoch": 2.697946319642131, "grad_norm": 0.2840818464756012, "learning_rate": 5.181661631538034e-06, "loss": 0.3205, "step": 26537 }, { "epoch": 2.69804798698658, "grad_norm": 0.2823384702205658, "learning_rate": 5.181306979409097e-06, "loss": 0.3269, "step": 26538 }, { "epoch": 2.698149654331029, "grad_norm": 0.25645145773887634, "learning_rate": 5.18095232636678e-06, "loss": 0.3153, "step": 26539 }, { "epoch": 2.6982513216754778, "grad_norm": 0.2970687448978424, "learning_rate": 5.180597672412867e-06, "loss": 0.3215, "step": 26540 }, { "epoch": 2.6983529890199267, "grad_norm": 0.28726813197135925, "learning_rate": 5.180243017549145e-06, "loss": 0.3426, "step": 26541 }, { "epoch": 2.6984546563643756, "grad_norm": 0.25573453307151794, "learning_rate": 5.179888361777401e-06, "loss": 0.309, "step": 26542 }, { "epoch": 2.6985563237088246, "grad_norm": 0.26665306091308594, "learning_rate": 5.179533705099425e-06, "loss": 0.3388, "step": 26543 }, { "epoch": 2.6986579910532735, "grad_norm": 0.2391091138124466, "learning_rate": 5.179179047516998e-06, "loss": 0.3419, "step": 26544 }, { "epoch": 2.698759658397723, "grad_norm": 0.2659287452697754, "learning_rate": 5.178824389031911e-06, "loss": 0.3559, "step": 26545 }, { "epoch": 2.698861325742172, "grad_norm": 0.2685618996620178, "learning_rate": 5.1784697296459484e-06, "loss": 0.3744, "step": 26546 }, { "epoch": 2.698962993086621, "grad_norm": 0.25433772802352905, "learning_rate": 5.178115069360899e-06, "loss": 0.3845, "step": 26547 }, { "epoch": 2.6990646604310697, "grad_norm": 0.2638736963272095, "learning_rate": 5.177760408178547e-06, "loss": 0.3344, "step": 26548 }, { "epoch": 2.6991663277755187, "grad_norm": 0.2713163495063782, "learning_rate": 5.177405746100681e-06, "loss": 0.3409, "step": 26549 }, { "epoch": 2.6992679951199676, "grad_norm": 0.28071150183677673, "learning_rate": 5.1770510831290875e-06, "loss": 0.3501, "step": 26550 }, { "epoch": 2.6993696624644166, "grad_norm": 0.2835385203361511, "learning_rate": 5.176696419265551e-06, "loss": 0.3606, "step": 26551 }, { "epoch": 2.6994713298088655, "grad_norm": 0.2572622299194336, "learning_rate": 5.176341754511861e-06, "loss": 0.3347, "step": 26552 }, { "epoch": 2.6995729971533144, "grad_norm": 0.28567296266555786, "learning_rate": 5.175987088869803e-06, "loss": 0.3175, "step": 26553 }, { "epoch": 2.6996746644977634, "grad_norm": 0.2514275908470154, "learning_rate": 5.175632422341164e-06, "loss": 0.3361, "step": 26554 }, { "epoch": 2.6997763318422123, "grad_norm": 0.26260924339294434, "learning_rate": 5.175277754927731e-06, "loss": 0.3197, "step": 26555 }, { "epoch": 2.6998779991866613, "grad_norm": 0.27501851320266724, "learning_rate": 5.17492308663129e-06, "loss": 0.3524, "step": 26556 }, { "epoch": 2.69997966653111, "grad_norm": 0.2855803668498993, "learning_rate": 5.174568417453627e-06, "loss": 0.3513, "step": 26557 }, { "epoch": 2.700081333875559, "grad_norm": 0.2752683162689209, "learning_rate": 5.174213747396531e-06, "loss": 0.3099, "step": 26558 }, { "epoch": 2.700183001220008, "grad_norm": 0.2786113917827606, "learning_rate": 5.173859076461787e-06, "loss": 0.3299, "step": 26559 }, { "epoch": 2.700284668564457, "grad_norm": 0.28645020723342896, "learning_rate": 5.173504404651183e-06, "loss": 0.3391, "step": 26560 }, { "epoch": 2.700386335908906, "grad_norm": 0.27921172976493835, "learning_rate": 5.173149731966503e-06, "loss": 0.332, "step": 26561 }, { "epoch": 2.700488003253355, "grad_norm": 0.26625943183898926, "learning_rate": 5.172795058409537e-06, "loss": 0.3298, "step": 26562 }, { "epoch": 2.700589670597804, "grad_norm": 0.26804056763648987, "learning_rate": 5.172440383982072e-06, "loss": 0.341, "step": 26563 }, { "epoch": 2.700691337942253, "grad_norm": 0.2955705225467682, "learning_rate": 5.172085708685892e-06, "loss": 0.3603, "step": 26564 }, { "epoch": 2.7007930052867017, "grad_norm": 0.28738364577293396, "learning_rate": 5.171731032522787e-06, "loss": 0.3546, "step": 26565 }, { "epoch": 2.7008946726311507, "grad_norm": 0.28270164132118225, "learning_rate": 5.17137635549454e-06, "loss": 0.3082, "step": 26566 }, { "epoch": 2.7009963399755996, "grad_norm": 0.24807681143283844, "learning_rate": 5.171021677602939e-06, "loss": 0.3477, "step": 26567 }, { "epoch": 2.7010980073200486, "grad_norm": 0.2520498037338257, "learning_rate": 5.1706669988497725e-06, "loss": 0.3162, "step": 26568 }, { "epoch": 2.7011996746644975, "grad_norm": 0.28847965598106384, "learning_rate": 5.170312319236825e-06, "loss": 0.3287, "step": 26569 }, { "epoch": 2.7013013420089464, "grad_norm": 0.2617189586162567, "learning_rate": 5.169957638765887e-06, "loss": 0.3814, "step": 26570 }, { "epoch": 2.701403009353396, "grad_norm": 0.28579607605934143, "learning_rate": 5.169602957438742e-06, "loss": 0.3303, "step": 26571 }, { "epoch": 2.7015046766978448, "grad_norm": 0.26287710666656494, "learning_rate": 5.169248275257175e-06, "loss": 0.3395, "step": 26572 }, { "epoch": 2.7016063440422937, "grad_norm": 0.2605651617050171, "learning_rate": 5.168893592222977e-06, "loss": 0.3229, "step": 26573 }, { "epoch": 2.7017080113867427, "grad_norm": 0.26748791337013245, "learning_rate": 5.168538908337935e-06, "loss": 0.3089, "step": 26574 }, { "epoch": 2.7018096787311916, "grad_norm": 0.26631635427474976, "learning_rate": 5.168184223603833e-06, "loss": 0.3359, "step": 26575 }, { "epoch": 2.7019113460756405, "grad_norm": 0.25031590461730957, "learning_rate": 5.167829538022458e-06, "loss": 0.3159, "step": 26576 }, { "epoch": 2.7020130134200895, "grad_norm": 0.26018425822257996, "learning_rate": 5.167474851595597e-06, "loss": 0.3568, "step": 26577 }, { "epoch": 2.7021146807645384, "grad_norm": 0.2869550883769989, "learning_rate": 5.167120164325039e-06, "loss": 0.3196, "step": 26578 }, { "epoch": 2.7022163481089874, "grad_norm": 0.26423266530036926, "learning_rate": 5.166765476212569e-06, "loss": 0.3033, "step": 26579 }, { "epoch": 2.7023180154534363, "grad_norm": 0.2693585753440857, "learning_rate": 5.166410787259974e-06, "loss": 0.3586, "step": 26580 }, { "epoch": 2.7024196827978852, "grad_norm": 0.2649095952510834, "learning_rate": 5.166056097469042e-06, "loss": 0.3195, "step": 26581 }, { "epoch": 2.702521350142334, "grad_norm": 0.2641445994377136, "learning_rate": 5.165701406841557e-06, "loss": 0.3096, "step": 26582 }, { "epoch": 2.702623017486783, "grad_norm": 0.2720543444156647, "learning_rate": 5.165346715379308e-06, "loss": 0.3456, "step": 26583 }, { "epoch": 2.702724684831232, "grad_norm": 0.2608509659767151, "learning_rate": 5.1649920230840825e-06, "loss": 0.3166, "step": 26584 }, { "epoch": 2.702826352175681, "grad_norm": 0.25028809905052185, "learning_rate": 5.164637329957667e-06, "loss": 0.3358, "step": 26585 }, { "epoch": 2.7029280195201304, "grad_norm": 0.2530263662338257, "learning_rate": 5.164282636001847e-06, "loss": 0.3359, "step": 26586 }, { "epoch": 2.7030296868645793, "grad_norm": 0.27244171500205994, "learning_rate": 5.1639279412184086e-06, "loss": 0.3165, "step": 26587 }, { "epoch": 2.7031313542090283, "grad_norm": 0.2722276449203491, "learning_rate": 5.1635732456091414e-06, "loss": 0.3249, "step": 26588 }, { "epoch": 2.7032330215534772, "grad_norm": 0.26826369762420654, "learning_rate": 5.163218549175832e-06, "loss": 0.3539, "step": 26589 }, { "epoch": 2.703334688897926, "grad_norm": 0.2785101532936096, "learning_rate": 5.162863851920265e-06, "loss": 0.3364, "step": 26590 }, { "epoch": 2.703436356242375, "grad_norm": 0.288783460855484, "learning_rate": 5.16250915384423e-06, "loss": 0.3505, "step": 26591 }, { "epoch": 2.703538023586824, "grad_norm": 0.27598121762275696, "learning_rate": 5.162154454949509e-06, "loss": 0.3011, "step": 26592 }, { "epoch": 2.703639690931273, "grad_norm": 0.26714304089546204, "learning_rate": 5.161799755237896e-06, "loss": 0.3417, "step": 26593 }, { "epoch": 2.703741358275722, "grad_norm": 0.267257422208786, "learning_rate": 5.161445054711175e-06, "loss": 0.3442, "step": 26594 }, { "epoch": 2.703843025620171, "grad_norm": 0.2493649125099182, "learning_rate": 5.16109035337113e-06, "loss": 0.338, "step": 26595 }, { "epoch": 2.70394469296462, "grad_norm": 0.265708327293396, "learning_rate": 5.16073565121955e-06, "loss": 0.3371, "step": 26596 }, { "epoch": 2.7040463603090688, "grad_norm": 0.2689301073551178, "learning_rate": 5.160380948258221e-06, "loss": 0.3562, "step": 26597 }, { "epoch": 2.7041480276535177, "grad_norm": 0.2931581735610962, "learning_rate": 5.160026244488934e-06, "loss": 0.3283, "step": 26598 }, { "epoch": 2.7042496949979666, "grad_norm": 0.25346773862838745, "learning_rate": 5.159671539913471e-06, "loss": 0.3105, "step": 26599 }, { "epoch": 2.7043513623424156, "grad_norm": 0.2556058168411255, "learning_rate": 5.159316834533621e-06, "loss": 0.313, "step": 26600 }, { "epoch": 2.7044530296868645, "grad_norm": 0.291189581155777, "learning_rate": 5.158962128351171e-06, "loss": 0.338, "step": 26601 }, { "epoch": 2.7045546970313135, "grad_norm": 0.28782594203948975, "learning_rate": 5.158607421367906e-06, "loss": 0.3198, "step": 26602 }, { "epoch": 2.7046563643757624, "grad_norm": 0.2716861367225647, "learning_rate": 5.158252713585617e-06, "loss": 0.3382, "step": 26603 }, { "epoch": 2.7047580317202113, "grad_norm": 0.2667860984802246, "learning_rate": 5.157898005006086e-06, "loss": 0.3351, "step": 26604 }, { "epoch": 2.7048596990646603, "grad_norm": 0.2735165059566498, "learning_rate": 5.157543295631104e-06, "loss": 0.303, "step": 26605 }, { "epoch": 2.7049613664091092, "grad_norm": 0.25966471433639526, "learning_rate": 5.157188585462457e-06, "loss": 0.3191, "step": 26606 }, { "epoch": 2.705063033753558, "grad_norm": 0.2662599980831146, "learning_rate": 5.156833874501929e-06, "loss": 0.3288, "step": 26607 }, { "epoch": 2.705164701098007, "grad_norm": 0.2647028863430023, "learning_rate": 5.15647916275131e-06, "loss": 0.3495, "step": 26608 }, { "epoch": 2.705266368442456, "grad_norm": 0.2556050419807434, "learning_rate": 5.156124450212388e-06, "loss": 0.3263, "step": 26609 }, { "epoch": 2.705368035786905, "grad_norm": 0.26023682951927185, "learning_rate": 5.155769736886945e-06, "loss": 0.3686, "step": 26610 }, { "epoch": 2.7054697031313544, "grad_norm": 0.2904953062534332, "learning_rate": 5.155415022776773e-06, "loss": 0.3676, "step": 26611 }, { "epoch": 2.7055713704758033, "grad_norm": 0.2649611234664917, "learning_rate": 5.155060307883657e-06, "loss": 0.3206, "step": 26612 }, { "epoch": 2.7056730378202523, "grad_norm": 0.2561512589454651, "learning_rate": 5.154705592209382e-06, "loss": 0.3554, "step": 26613 }, { "epoch": 2.705774705164701, "grad_norm": 0.2587706744670868, "learning_rate": 5.1543508757557405e-06, "loss": 0.3472, "step": 26614 }, { "epoch": 2.70587637250915, "grad_norm": 0.29338905215263367, "learning_rate": 5.153996158524513e-06, "loss": 0.3471, "step": 26615 }, { "epoch": 2.705978039853599, "grad_norm": 0.27030661702156067, "learning_rate": 5.153641440517492e-06, "loss": 0.3659, "step": 26616 }, { "epoch": 2.706079707198048, "grad_norm": 0.267610102891922, "learning_rate": 5.15328672173646e-06, "loss": 0.3373, "step": 26617 }, { "epoch": 2.706181374542497, "grad_norm": 0.25424838066101074, "learning_rate": 5.1529320021832065e-06, "loss": 0.3189, "step": 26618 }, { "epoch": 2.706283041886946, "grad_norm": 0.2614518105983734, "learning_rate": 5.152577281859518e-06, "loss": 0.3345, "step": 26619 }, { "epoch": 2.706384709231395, "grad_norm": 0.2518737316131592, "learning_rate": 5.152222560767181e-06, "loss": 0.3475, "step": 26620 }, { "epoch": 2.706486376575844, "grad_norm": 0.2796059548854828, "learning_rate": 5.151867838907983e-06, "loss": 0.3379, "step": 26621 }, { "epoch": 2.7065880439202927, "grad_norm": 0.2598276138305664, "learning_rate": 5.151513116283711e-06, "loss": 0.3508, "step": 26622 }, { "epoch": 2.7066897112647417, "grad_norm": 0.26367729902267456, "learning_rate": 5.151158392896152e-06, "loss": 0.2972, "step": 26623 }, { "epoch": 2.7067913786091906, "grad_norm": 0.2605751156806946, "learning_rate": 5.150803668747093e-06, "loss": 0.3111, "step": 26624 }, { "epoch": 2.7068930459536396, "grad_norm": 0.2332795411348343, "learning_rate": 5.150448943838321e-06, "loss": 0.318, "step": 26625 }, { "epoch": 2.7069947132980885, "grad_norm": 0.26212117075920105, "learning_rate": 5.150094218171623e-06, "loss": 0.3255, "step": 26626 }, { "epoch": 2.707096380642538, "grad_norm": 0.24638782441616058, "learning_rate": 5.149739491748787e-06, "loss": 0.3162, "step": 26627 }, { "epoch": 2.707198047986987, "grad_norm": 0.26306599378585815, "learning_rate": 5.149384764571597e-06, "loss": 0.3706, "step": 26628 }, { "epoch": 2.7072997153314358, "grad_norm": 0.2602775990962982, "learning_rate": 5.149030036641844e-06, "loss": 0.3112, "step": 26629 }, { "epoch": 2.7074013826758847, "grad_norm": 0.26347512006759644, "learning_rate": 5.14867530796131e-06, "loss": 0.3534, "step": 26630 }, { "epoch": 2.7075030500203336, "grad_norm": 0.2629857361316681, "learning_rate": 5.148320578531788e-06, "loss": 0.33, "step": 26631 }, { "epoch": 2.7076047173647826, "grad_norm": 0.2557956576347351, "learning_rate": 5.147965848355061e-06, "loss": 0.3468, "step": 26632 }, { "epoch": 2.7077063847092315, "grad_norm": 0.2763995826244354, "learning_rate": 5.147611117432918e-06, "loss": 0.3315, "step": 26633 }, { "epoch": 2.7078080520536805, "grad_norm": 0.28413474559783936, "learning_rate": 5.147256385767145e-06, "loss": 0.3189, "step": 26634 }, { "epoch": 2.7079097193981294, "grad_norm": 0.2773066759109497, "learning_rate": 5.146901653359529e-06, "loss": 0.365, "step": 26635 }, { "epoch": 2.7080113867425784, "grad_norm": 0.2803177535533905, "learning_rate": 5.146546920211858e-06, "loss": 0.3261, "step": 26636 }, { "epoch": 2.7081130540870273, "grad_norm": 0.26992687582969666, "learning_rate": 5.146192186325918e-06, "loss": 0.3329, "step": 26637 }, { "epoch": 2.7082147214314762, "grad_norm": 0.3047000765800476, "learning_rate": 5.145837451703496e-06, "loss": 0.3634, "step": 26638 }, { "epoch": 2.708316388775925, "grad_norm": 0.2695969343185425, "learning_rate": 5.14548271634638e-06, "loss": 0.3346, "step": 26639 }, { "epoch": 2.708418056120374, "grad_norm": 0.26271870732307434, "learning_rate": 5.145127980256357e-06, "loss": 0.3525, "step": 26640 }, { "epoch": 2.708519723464823, "grad_norm": 0.26596102118492126, "learning_rate": 5.144773243435214e-06, "loss": 0.3163, "step": 26641 }, { "epoch": 2.708621390809272, "grad_norm": 0.2886020243167877, "learning_rate": 5.144418505884738e-06, "loss": 0.3184, "step": 26642 }, { "epoch": 2.708723058153721, "grad_norm": 0.2637617588043213, "learning_rate": 5.144063767606713e-06, "loss": 0.3207, "step": 26643 }, { "epoch": 2.70882472549817, "grad_norm": 0.24199332296848297, "learning_rate": 5.143709028602932e-06, "loss": 0.3365, "step": 26644 }, { "epoch": 2.708926392842619, "grad_norm": 0.2852723300457001, "learning_rate": 5.143354288875177e-06, "loss": 0.317, "step": 26645 }, { "epoch": 2.7090280601870678, "grad_norm": 0.2670579254627228, "learning_rate": 5.1429995484252395e-06, "loss": 0.3285, "step": 26646 }, { "epoch": 2.7091297275315167, "grad_norm": 0.2701493799686432, "learning_rate": 5.142644807254904e-06, "loss": 0.3104, "step": 26647 }, { "epoch": 2.7092313948759656, "grad_norm": 0.26190096139907837, "learning_rate": 5.142290065365956e-06, "loss": 0.3208, "step": 26648 }, { "epoch": 2.7093330622204146, "grad_norm": 0.2583969235420227, "learning_rate": 5.141935322760186e-06, "loss": 0.3646, "step": 26649 }, { "epoch": 2.7094347295648635, "grad_norm": 0.2461482286453247, "learning_rate": 5.141580579439378e-06, "loss": 0.3255, "step": 26650 }, { "epoch": 2.7095363969093125, "grad_norm": 0.2690521478652954, "learning_rate": 5.141225835405322e-06, "loss": 0.343, "step": 26651 }, { "epoch": 2.709638064253762, "grad_norm": 0.2707248330116272, "learning_rate": 5.140871090659805e-06, "loss": 0.3737, "step": 26652 }, { "epoch": 2.709739731598211, "grad_norm": 0.26773229241371155, "learning_rate": 5.140516345204609e-06, "loss": 0.3192, "step": 26653 }, { "epoch": 2.7098413989426597, "grad_norm": 0.2617669105529785, "learning_rate": 5.140161599041529e-06, "loss": 0.3511, "step": 26654 }, { "epoch": 2.7099430662871087, "grad_norm": 0.26456567645072937, "learning_rate": 5.139806852172345e-06, "loss": 0.3338, "step": 26655 }, { "epoch": 2.7100447336315576, "grad_norm": 0.26872560381889343, "learning_rate": 5.13945210459885e-06, "loss": 0.3388, "step": 26656 }, { "epoch": 2.7101464009760066, "grad_norm": 0.2571702003479004, "learning_rate": 5.139097356322829e-06, "loss": 0.3211, "step": 26657 }, { "epoch": 2.7102480683204555, "grad_norm": 0.27763819694519043, "learning_rate": 5.138742607346064e-06, "loss": 0.3088, "step": 26658 }, { "epoch": 2.7103497356649044, "grad_norm": 0.27867332100868225, "learning_rate": 5.1383878576703505e-06, "loss": 0.3534, "step": 26659 }, { "epoch": 2.7104514030093534, "grad_norm": 0.24848324060440063, "learning_rate": 5.138033107297471e-06, "loss": 0.3275, "step": 26660 }, { "epoch": 2.7105530703538023, "grad_norm": 0.28512638807296753, "learning_rate": 5.1376783562292145e-06, "loss": 0.3308, "step": 26661 }, { "epoch": 2.7106547376982513, "grad_norm": 0.28433090448379517, "learning_rate": 5.137323604467366e-06, "loss": 0.3448, "step": 26662 }, { "epoch": 2.7107564050427, "grad_norm": 0.2649289071559906, "learning_rate": 5.136968852013713e-06, "loss": 0.3413, "step": 26663 }, { "epoch": 2.710858072387149, "grad_norm": 0.2577327489852905, "learning_rate": 5.136614098870045e-06, "loss": 0.3329, "step": 26664 }, { "epoch": 2.710959739731598, "grad_norm": 0.2623461186885834, "learning_rate": 5.136259345038147e-06, "loss": 0.3349, "step": 26665 }, { "epoch": 2.711061407076047, "grad_norm": 0.2630843222141266, "learning_rate": 5.1359045905198065e-06, "loss": 0.3773, "step": 26666 }, { "epoch": 2.711163074420496, "grad_norm": 0.2605358958244324, "learning_rate": 5.135549835316812e-06, "loss": 0.3422, "step": 26667 }, { "epoch": 2.7112647417649454, "grad_norm": 0.267971009016037, "learning_rate": 5.135195079430948e-06, "loss": 0.3402, "step": 26668 }, { "epoch": 2.7113664091093943, "grad_norm": 0.26623305678367615, "learning_rate": 5.134840322864006e-06, "loss": 0.3678, "step": 26669 }, { "epoch": 2.7114680764538432, "grad_norm": 0.2667574882507324, "learning_rate": 5.134485565617768e-06, "loss": 0.3785, "step": 26670 }, { "epoch": 2.711569743798292, "grad_norm": 0.2669367492198944, "learning_rate": 5.134130807694024e-06, "loss": 0.3457, "step": 26671 }, { "epoch": 2.711671411142741, "grad_norm": 0.27796924114227295, "learning_rate": 5.1337760490945625e-06, "loss": 0.374, "step": 26672 }, { "epoch": 2.71177307848719, "grad_norm": 0.2574755549430847, "learning_rate": 5.1334212898211666e-06, "loss": 0.3181, "step": 26673 }, { "epoch": 2.711874745831639, "grad_norm": 0.26913347840309143, "learning_rate": 5.133066529875628e-06, "loss": 0.3193, "step": 26674 }, { "epoch": 2.711976413176088, "grad_norm": 0.2800719141960144, "learning_rate": 5.1327117692597315e-06, "loss": 0.3485, "step": 26675 }, { "epoch": 2.712078080520537, "grad_norm": 0.2738626301288605, "learning_rate": 5.132357007975264e-06, "loss": 0.3024, "step": 26676 }, { "epoch": 2.712179747864986, "grad_norm": 0.2760933041572571, "learning_rate": 5.1320022460240134e-06, "loss": 0.3394, "step": 26677 }, { "epoch": 2.7122814152094348, "grad_norm": 0.25941383838653564, "learning_rate": 5.131647483407766e-06, "loss": 0.3131, "step": 26678 }, { "epoch": 2.7123830825538837, "grad_norm": 0.29762765765190125, "learning_rate": 5.131292720128311e-06, "loss": 0.317, "step": 26679 }, { "epoch": 2.7124847498983327, "grad_norm": 0.263889342546463, "learning_rate": 5.130937956187435e-06, "loss": 0.3287, "step": 26680 }, { "epoch": 2.7125864172427816, "grad_norm": 0.25837671756744385, "learning_rate": 5.1305831915869245e-06, "loss": 0.3234, "step": 26681 }, { "epoch": 2.7126880845872305, "grad_norm": 0.26539579033851624, "learning_rate": 5.1302284263285675e-06, "loss": 0.3528, "step": 26682 }, { "epoch": 2.7127897519316795, "grad_norm": 0.2510351240634918, "learning_rate": 5.129873660414149e-06, "loss": 0.3239, "step": 26683 }, { "epoch": 2.7128914192761284, "grad_norm": 0.27318015694618225, "learning_rate": 5.129518893845458e-06, "loss": 0.317, "step": 26684 }, { "epoch": 2.7129930866205774, "grad_norm": 0.27853304147720337, "learning_rate": 5.129164126624282e-06, "loss": 0.3485, "step": 26685 }, { "epoch": 2.7130947539650263, "grad_norm": 0.27526533603668213, "learning_rate": 5.128809358752408e-06, "loss": 0.3437, "step": 26686 }, { "epoch": 2.7131964213094752, "grad_norm": 0.26817265152931213, "learning_rate": 5.128454590231623e-06, "loss": 0.3566, "step": 26687 }, { "epoch": 2.713298088653924, "grad_norm": 0.26582929491996765, "learning_rate": 5.128099821063715e-06, "loss": 0.3654, "step": 26688 }, { "epoch": 2.713399755998373, "grad_norm": 0.25613611936569214, "learning_rate": 5.127745051250469e-06, "loss": 0.3166, "step": 26689 }, { "epoch": 2.713501423342822, "grad_norm": 0.26359230279922485, "learning_rate": 5.127390280793675e-06, "loss": 0.3343, "step": 26690 }, { "epoch": 2.713603090687271, "grad_norm": 0.26600223779678345, "learning_rate": 5.127035509695118e-06, "loss": 0.3292, "step": 26691 }, { "epoch": 2.71370475803172, "grad_norm": 0.2615593373775482, "learning_rate": 5.126680737956588e-06, "loss": 0.3477, "step": 26692 }, { "epoch": 2.7138064253761693, "grad_norm": 0.2631354331970215, "learning_rate": 5.126325965579868e-06, "loss": 0.319, "step": 26693 }, { "epoch": 2.7139080927206183, "grad_norm": 0.2586156129837036, "learning_rate": 5.125971192566749e-06, "loss": 0.3411, "step": 26694 }, { "epoch": 2.7140097600650672, "grad_norm": 0.27841705083847046, "learning_rate": 5.125616418919019e-06, "loss": 0.3682, "step": 26695 }, { "epoch": 2.714111427409516, "grad_norm": 0.290841668844223, "learning_rate": 5.1252616446384605e-06, "loss": 0.348, "step": 26696 }, { "epoch": 2.714213094753965, "grad_norm": 0.2513844668865204, "learning_rate": 5.124906869726865e-06, "loss": 0.3299, "step": 26697 }, { "epoch": 2.714314762098414, "grad_norm": 0.26936784386634827, "learning_rate": 5.124552094186019e-06, "loss": 0.3354, "step": 26698 }, { "epoch": 2.714416429442863, "grad_norm": 0.25739091634750366, "learning_rate": 5.124197318017707e-06, "loss": 0.309, "step": 26699 }, { "epoch": 2.714518096787312, "grad_norm": 0.2580898702144623, "learning_rate": 5.12384254122372e-06, "loss": 0.3085, "step": 26700 }, { "epoch": 2.714619764131761, "grad_norm": 0.27025821805000305, "learning_rate": 5.123487763805843e-06, "loss": 0.3681, "step": 26701 }, { "epoch": 2.71472143147621, "grad_norm": 0.2542698383331299, "learning_rate": 5.123132985765865e-06, "loss": 0.335, "step": 26702 }, { "epoch": 2.7148230988206588, "grad_norm": 0.25815701484680176, "learning_rate": 5.122778207105572e-06, "loss": 0.3315, "step": 26703 }, { "epoch": 2.7149247661651077, "grad_norm": 0.26575204730033875, "learning_rate": 5.1224234278267506e-06, "loss": 0.3495, "step": 26704 }, { "epoch": 2.7150264335095566, "grad_norm": 0.2674158811569214, "learning_rate": 5.12206864793119e-06, "loss": 0.3392, "step": 26705 }, { "epoch": 2.7151281008540056, "grad_norm": 0.27024638652801514, "learning_rate": 5.1217138674206756e-06, "loss": 0.332, "step": 26706 }, { "epoch": 2.7152297681984545, "grad_norm": 0.2605170011520386, "learning_rate": 5.1213590862969975e-06, "loss": 0.3309, "step": 26707 }, { "epoch": 2.7153314355429035, "grad_norm": 0.2708545923233032, "learning_rate": 5.1210043045619396e-06, "loss": 0.3774, "step": 26708 }, { "epoch": 2.715433102887353, "grad_norm": 0.2591760456562042, "learning_rate": 5.120649522217291e-06, "loss": 0.3597, "step": 26709 }, { "epoch": 2.715534770231802, "grad_norm": 0.24814023077487946, "learning_rate": 5.12029473926484e-06, "loss": 0.3139, "step": 26710 }, { "epoch": 2.7156364375762507, "grad_norm": 0.2476726770401001, "learning_rate": 5.11993995570637e-06, "loss": 0.3481, "step": 26711 }, { "epoch": 2.7157381049206997, "grad_norm": 0.25495484471321106, "learning_rate": 5.119585171543674e-06, "loss": 0.3, "step": 26712 }, { "epoch": 2.7158397722651486, "grad_norm": 0.2768979072570801, "learning_rate": 5.119230386778534e-06, "loss": 0.3162, "step": 26713 }, { "epoch": 2.7159414396095976, "grad_norm": 0.2701084315776825, "learning_rate": 5.118875601412741e-06, "loss": 0.3493, "step": 26714 }, { "epoch": 2.7160431069540465, "grad_norm": 0.279185026884079, "learning_rate": 5.1185208154480804e-06, "loss": 0.3362, "step": 26715 }, { "epoch": 2.7161447742984954, "grad_norm": 0.2551960349082947, "learning_rate": 5.11816602888634e-06, "loss": 0.3729, "step": 26716 }, { "epoch": 2.7162464416429444, "grad_norm": 0.24754683673381805, "learning_rate": 5.117811241729308e-06, "loss": 0.3191, "step": 26717 }, { "epoch": 2.7163481089873933, "grad_norm": 0.25789716839790344, "learning_rate": 5.117456453978771e-06, "loss": 0.3483, "step": 26718 }, { "epoch": 2.7164497763318423, "grad_norm": 0.267326682806015, "learning_rate": 5.117101665636515e-06, "loss": 0.3111, "step": 26719 }, { "epoch": 2.716551443676291, "grad_norm": 0.27186641097068787, "learning_rate": 5.116746876704329e-06, "loss": 0.348, "step": 26720 }, { "epoch": 2.71665311102074, "grad_norm": 0.2793078124523163, "learning_rate": 5.116392087184e-06, "loss": 0.3647, "step": 26721 }, { "epoch": 2.716754778365189, "grad_norm": 0.26522669196128845, "learning_rate": 5.116037297077316e-06, "loss": 0.3346, "step": 26722 }, { "epoch": 2.716856445709638, "grad_norm": 0.2918529808521271, "learning_rate": 5.115682506386065e-06, "loss": 0.3588, "step": 26723 }, { "epoch": 2.716958113054087, "grad_norm": 0.2814828157424927, "learning_rate": 5.115327715112029e-06, "loss": 0.3428, "step": 26724 }, { "epoch": 2.717059780398536, "grad_norm": 0.26396074891090393, "learning_rate": 5.114972923257004e-06, "loss": 0.3451, "step": 26725 }, { "epoch": 2.717161447742985, "grad_norm": 0.2691837251186371, "learning_rate": 5.114618130822769e-06, "loss": 0.3164, "step": 26726 }, { "epoch": 2.717263115087434, "grad_norm": 0.27299654483795166, "learning_rate": 5.114263337811118e-06, "loss": 0.3144, "step": 26727 }, { "epoch": 2.7173647824318827, "grad_norm": 0.2657633423805237, "learning_rate": 5.113908544223834e-06, "loss": 0.3604, "step": 26728 }, { "epoch": 2.7174664497763317, "grad_norm": 0.26061323285102844, "learning_rate": 5.113553750062704e-06, "loss": 0.3512, "step": 26729 }, { "epoch": 2.7175681171207806, "grad_norm": 0.2519100606441498, "learning_rate": 5.1131989553295205e-06, "loss": 0.3613, "step": 26730 }, { "epoch": 2.7176697844652296, "grad_norm": 0.2603939473628998, "learning_rate": 5.112844160026066e-06, "loss": 0.3444, "step": 26731 }, { "epoch": 2.7177714518096785, "grad_norm": 0.266783744096756, "learning_rate": 5.112489364154129e-06, "loss": 0.2909, "step": 26732 }, { "epoch": 2.7178731191541274, "grad_norm": 0.2643372416496277, "learning_rate": 5.112134567715499e-06, "loss": 0.324, "step": 26733 }, { "epoch": 2.717974786498577, "grad_norm": 0.2951499819755554, "learning_rate": 5.11177977071196e-06, "loss": 0.2987, "step": 26734 }, { "epoch": 2.7180764538430258, "grad_norm": 0.2793133556842804, "learning_rate": 5.1114249731453015e-06, "loss": 0.3463, "step": 26735 }, { "epoch": 2.7181781211874747, "grad_norm": 0.28195369243621826, "learning_rate": 5.111070175017311e-06, "loss": 0.3237, "step": 26736 }, { "epoch": 2.7182797885319236, "grad_norm": 0.26668474078178406, "learning_rate": 5.110715376329774e-06, "loss": 0.3351, "step": 26737 }, { "epoch": 2.7183814558763726, "grad_norm": 0.2679630219936371, "learning_rate": 5.110360577084482e-06, "loss": 0.2993, "step": 26738 }, { "epoch": 2.7184831232208215, "grad_norm": 0.2870246469974518, "learning_rate": 5.110005777283216e-06, "loss": 0.3111, "step": 26739 }, { "epoch": 2.7185847905652705, "grad_norm": 0.275318443775177, "learning_rate": 5.10965097692777e-06, "loss": 0.3301, "step": 26740 }, { "epoch": 2.7186864579097194, "grad_norm": 0.25876832008361816, "learning_rate": 5.109296176019928e-06, "loss": 0.3367, "step": 26741 }, { "epoch": 2.7187881252541684, "grad_norm": 0.26149117946624756, "learning_rate": 5.108941374561477e-06, "loss": 0.2934, "step": 26742 }, { "epoch": 2.7188897925986173, "grad_norm": 0.2841203808784485, "learning_rate": 5.108586572554206e-06, "loss": 0.3131, "step": 26743 }, { "epoch": 2.7189914599430662, "grad_norm": 0.3196965754032135, "learning_rate": 5.1082317699998995e-06, "loss": 0.3409, "step": 26744 }, { "epoch": 2.719093127287515, "grad_norm": 0.24254989624023438, "learning_rate": 5.107876966900349e-06, "loss": 0.3141, "step": 26745 }, { "epoch": 2.719194794631964, "grad_norm": 0.2783415615558624, "learning_rate": 5.10752216325734e-06, "loss": 0.3121, "step": 26746 }, { "epoch": 2.719296461976413, "grad_norm": 0.2740396559238434, "learning_rate": 5.107167359072659e-06, "loss": 0.3181, "step": 26747 }, { "epoch": 2.719398129320862, "grad_norm": 0.2898746132850647, "learning_rate": 5.106812554348095e-06, "loss": 0.3139, "step": 26748 }, { "epoch": 2.719499796665311, "grad_norm": 0.2603086531162262, "learning_rate": 5.1064577490854335e-06, "loss": 0.319, "step": 26749 }, { "epoch": 2.7196014640097603, "grad_norm": 0.25824692845344543, "learning_rate": 5.106102943286465e-06, "loss": 0.3187, "step": 26750 }, { "epoch": 2.7197031313542093, "grad_norm": 0.27098166942596436, "learning_rate": 5.105748136952975e-06, "loss": 0.3528, "step": 26751 }, { "epoch": 2.719804798698658, "grad_norm": 0.3041226863861084, "learning_rate": 5.10539333008675e-06, "loss": 0.3108, "step": 26752 }, { "epoch": 2.719906466043107, "grad_norm": 0.25958123803138733, "learning_rate": 5.10503852268958e-06, "loss": 0.3132, "step": 26753 }, { "epoch": 2.720008133387556, "grad_norm": 0.24624183773994446, "learning_rate": 5.1046837147632474e-06, "loss": 0.328, "step": 26754 }, { "epoch": 2.720109800732005, "grad_norm": 0.27520912885665894, "learning_rate": 5.104328906309546e-06, "loss": 0.3249, "step": 26755 }, { "epoch": 2.720211468076454, "grad_norm": 0.2428564727306366, "learning_rate": 5.10397409733026e-06, "loss": 0.3248, "step": 26756 }, { "epoch": 2.720313135420903, "grad_norm": 0.264882892370224, "learning_rate": 5.1036192878271776e-06, "loss": 0.3127, "step": 26757 }, { "epoch": 2.720414802765352, "grad_norm": 0.2713262438774109, "learning_rate": 5.103264477802085e-06, "loss": 0.3185, "step": 26758 }, { "epoch": 2.720516470109801, "grad_norm": 0.27605193853378296, "learning_rate": 5.10290966725677e-06, "loss": 0.3561, "step": 26759 }, { "epoch": 2.7206181374542497, "grad_norm": 0.26921719312667847, "learning_rate": 5.102554856193021e-06, "loss": 0.3289, "step": 26760 }, { "epoch": 2.7207198047986987, "grad_norm": 0.2694341242313385, "learning_rate": 5.1022000446126265e-06, "loss": 0.359, "step": 26761 }, { "epoch": 2.7208214721431476, "grad_norm": 0.2570916712284088, "learning_rate": 5.101845232517371e-06, "loss": 0.3133, "step": 26762 }, { "epoch": 2.7209231394875966, "grad_norm": 0.23688799142837524, "learning_rate": 5.101490419909043e-06, "loss": 0.3594, "step": 26763 }, { "epoch": 2.7210248068320455, "grad_norm": 0.2743927538394928, "learning_rate": 5.101135606789431e-06, "loss": 0.3633, "step": 26764 }, { "epoch": 2.7211264741764944, "grad_norm": 0.2549409568309784, "learning_rate": 5.100780793160322e-06, "loss": 0.3279, "step": 26765 }, { "epoch": 2.7212281415209434, "grad_norm": 0.2852976322174072, "learning_rate": 5.100425979023503e-06, "loss": 0.3112, "step": 26766 }, { "epoch": 2.7213298088653923, "grad_norm": 0.2857252359390259, "learning_rate": 5.100071164380762e-06, "loss": 0.3301, "step": 26767 }, { "epoch": 2.7214314762098413, "grad_norm": 0.27931544184684753, "learning_rate": 5.0997163492338865e-06, "loss": 0.3516, "step": 26768 }, { "epoch": 2.72153314355429, "grad_norm": 0.2529611885547638, "learning_rate": 5.099361533584663e-06, "loss": 0.3079, "step": 26769 }, { "epoch": 2.721634810898739, "grad_norm": 0.2654303014278412, "learning_rate": 5.09900671743488e-06, "loss": 0.3041, "step": 26770 }, { "epoch": 2.721736478243188, "grad_norm": 0.25577881932258606, "learning_rate": 5.098651900786326e-06, "loss": 0.353, "step": 26771 }, { "epoch": 2.721838145587637, "grad_norm": 0.24666127562522888, "learning_rate": 5.098297083640785e-06, "loss": 0.2903, "step": 26772 }, { "epoch": 2.721939812932086, "grad_norm": 0.2669157087802887, "learning_rate": 5.097942266000048e-06, "loss": 0.2948, "step": 26773 }, { "epoch": 2.722041480276535, "grad_norm": 0.25887030363082886, "learning_rate": 5.097587447865901e-06, "loss": 0.3451, "step": 26774 }, { "epoch": 2.7221431476209843, "grad_norm": 0.2662813663482666, "learning_rate": 5.097232629240129e-06, "loss": 0.3271, "step": 26775 }, { "epoch": 2.7222448149654332, "grad_norm": 0.2546162009239197, "learning_rate": 5.096877810124527e-06, "loss": 0.3357, "step": 26776 }, { "epoch": 2.722346482309882, "grad_norm": 0.2688637971878052, "learning_rate": 5.0965229905208744e-06, "loss": 0.3745, "step": 26777 }, { "epoch": 2.722448149654331, "grad_norm": 0.2580564320087433, "learning_rate": 5.096168170430963e-06, "loss": 0.3024, "step": 26778 }, { "epoch": 2.72254981699878, "grad_norm": 0.25450313091278076, "learning_rate": 5.095813349856579e-06, "loss": 0.3623, "step": 26779 }, { "epoch": 2.722651484343229, "grad_norm": 0.29492121934890747, "learning_rate": 5.0954585287995086e-06, "loss": 0.3778, "step": 26780 }, { "epoch": 2.722753151687678, "grad_norm": 0.24495092034339905, "learning_rate": 5.095103707261543e-06, "loss": 0.315, "step": 26781 }, { "epoch": 2.722854819032127, "grad_norm": 0.2819216847419739, "learning_rate": 5.094748885244466e-06, "loss": 0.3422, "step": 26782 }, { "epoch": 2.722956486376576, "grad_norm": 0.27915552258491516, "learning_rate": 5.0943940627500686e-06, "loss": 0.2996, "step": 26783 }, { "epoch": 2.7230581537210248, "grad_norm": 0.26009538769721985, "learning_rate": 5.094039239780134e-06, "loss": 0.3178, "step": 26784 }, { "epoch": 2.7231598210654737, "grad_norm": 0.2610244154930115, "learning_rate": 5.093684416336453e-06, "loss": 0.3302, "step": 26785 }, { "epoch": 2.7232614884099227, "grad_norm": 0.2526492774486542, "learning_rate": 5.093329592420813e-06, "loss": 0.3192, "step": 26786 }, { "epoch": 2.7233631557543716, "grad_norm": 0.27432677149772644, "learning_rate": 5.0929747680349995e-06, "loss": 0.325, "step": 26787 }, { "epoch": 2.7234648230988205, "grad_norm": 0.26361268758773804, "learning_rate": 5.092619943180802e-06, "loss": 0.325, "step": 26788 }, { "epoch": 2.7235664904432695, "grad_norm": 0.27039361000061035, "learning_rate": 5.092265117860007e-06, "loss": 0.317, "step": 26789 }, { "epoch": 2.7236681577877184, "grad_norm": 0.2615499794483185, "learning_rate": 5.0919102920744025e-06, "loss": 0.3148, "step": 26790 }, { "epoch": 2.723769825132168, "grad_norm": 0.2802261710166931, "learning_rate": 5.091555465825776e-06, "loss": 0.3096, "step": 26791 }, { "epoch": 2.7238714924766168, "grad_norm": 0.28042805194854736, "learning_rate": 5.091200639115913e-06, "loss": 0.337, "step": 26792 }, { "epoch": 2.7239731598210657, "grad_norm": 0.29058465361595154, "learning_rate": 5.090845811946605e-06, "loss": 0.3573, "step": 26793 }, { "epoch": 2.7240748271655146, "grad_norm": 0.27603551745414734, "learning_rate": 5.090490984319636e-06, "loss": 0.3355, "step": 26794 }, { "epoch": 2.7241764945099636, "grad_norm": 0.2640867531299591, "learning_rate": 5.090136156236796e-06, "loss": 0.3405, "step": 26795 }, { "epoch": 2.7242781618544125, "grad_norm": 0.28305330872535706, "learning_rate": 5.089781327699871e-06, "loss": 0.3326, "step": 26796 }, { "epoch": 2.7243798291988615, "grad_norm": 0.26393356919288635, "learning_rate": 5.089426498710649e-06, "loss": 0.3503, "step": 26797 }, { "epoch": 2.7244814965433104, "grad_norm": 0.26604709029197693, "learning_rate": 5.089071669270917e-06, "loss": 0.3301, "step": 26798 }, { "epoch": 2.7245831638877593, "grad_norm": 0.286606103181839, "learning_rate": 5.088716839382464e-06, "loss": 0.3384, "step": 26799 }, { "epoch": 2.7246848312322083, "grad_norm": 0.26200270652770996, "learning_rate": 5.0883620090470755e-06, "loss": 0.3292, "step": 26800 }, { "epoch": 2.7247864985766572, "grad_norm": 0.25398197770118713, "learning_rate": 5.088007178266542e-06, "loss": 0.3041, "step": 26801 }, { "epoch": 2.724888165921106, "grad_norm": 0.27209389209747314, "learning_rate": 5.087652347042648e-06, "loss": 0.3322, "step": 26802 }, { "epoch": 2.724989833265555, "grad_norm": 0.27347037196159363, "learning_rate": 5.087297515377183e-06, "loss": 0.301, "step": 26803 }, { "epoch": 2.725091500610004, "grad_norm": 0.25695133209228516, "learning_rate": 5.086942683271933e-06, "loss": 0.3036, "step": 26804 }, { "epoch": 2.725193167954453, "grad_norm": 0.2764163017272949, "learning_rate": 5.086587850728686e-06, "loss": 0.3231, "step": 26805 }, { "epoch": 2.725294835298902, "grad_norm": 0.26589474081993103, "learning_rate": 5.0862330177492315e-06, "loss": 0.3592, "step": 26806 }, { "epoch": 2.725396502643351, "grad_norm": 0.2700217664241791, "learning_rate": 5.085878184335354e-06, "loss": 0.3583, "step": 26807 }, { "epoch": 2.7254981699878, "grad_norm": 0.3008551001548767, "learning_rate": 5.085523350488845e-06, "loss": 0.3398, "step": 26808 }, { "epoch": 2.7255998373322488, "grad_norm": 0.27994048595428467, "learning_rate": 5.085168516211487e-06, "loss": 0.333, "step": 26809 }, { "epoch": 2.7257015046766977, "grad_norm": 0.25273218750953674, "learning_rate": 5.0848136815050705e-06, "loss": 0.3296, "step": 26810 }, { "epoch": 2.7258031720211466, "grad_norm": 0.2708298861980438, "learning_rate": 5.084458846371385e-06, "loss": 0.3318, "step": 26811 }, { "epoch": 2.7259048393655956, "grad_norm": 0.28169336915016174, "learning_rate": 5.084104010812214e-06, "loss": 0.3491, "step": 26812 }, { "epoch": 2.7260065067100445, "grad_norm": 0.28870296478271484, "learning_rate": 5.083749174829349e-06, "loss": 0.3, "step": 26813 }, { "epoch": 2.7261081740544935, "grad_norm": 0.2694677412509918, "learning_rate": 5.0833943384245735e-06, "loss": 0.3754, "step": 26814 }, { "epoch": 2.7262098413989424, "grad_norm": 0.2691514492034912, "learning_rate": 5.083039501599677e-06, "loss": 0.3566, "step": 26815 }, { "epoch": 2.726311508743392, "grad_norm": 0.27389541268348694, "learning_rate": 5.082684664356448e-06, "loss": 0.3059, "step": 26816 }, { "epoch": 2.7264131760878407, "grad_norm": 0.28086695075035095, "learning_rate": 5.0823298266966744e-06, "loss": 0.325, "step": 26817 }, { "epoch": 2.7265148434322897, "grad_norm": 0.262798011302948, "learning_rate": 5.081974988622142e-06, "loss": 0.3281, "step": 26818 }, { "epoch": 2.7266165107767386, "grad_norm": 0.2707827687263489, "learning_rate": 5.081620150134639e-06, "loss": 0.37, "step": 26819 }, { "epoch": 2.7267181781211876, "grad_norm": 0.2813529670238495, "learning_rate": 5.081265311235951e-06, "loss": 0.2954, "step": 26820 }, { "epoch": 2.7268198454656365, "grad_norm": 0.26901066303253174, "learning_rate": 5.08091047192787e-06, "loss": 0.3335, "step": 26821 }, { "epoch": 2.7269215128100854, "grad_norm": 0.2617475390434265, "learning_rate": 5.080555632212182e-06, "loss": 0.3564, "step": 26822 }, { "epoch": 2.7270231801545344, "grad_norm": 0.2660468816757202, "learning_rate": 5.080200792090672e-06, "loss": 0.3394, "step": 26823 }, { "epoch": 2.7271248474989833, "grad_norm": 0.2700698971748352, "learning_rate": 5.07984595156513e-06, "loss": 0.3459, "step": 26824 }, { "epoch": 2.7272265148434323, "grad_norm": 0.27053433656692505, "learning_rate": 5.079491110637343e-06, "loss": 0.3451, "step": 26825 }, { "epoch": 2.727328182187881, "grad_norm": 0.2585596442222595, "learning_rate": 5.079136269309099e-06, "loss": 0.3258, "step": 26826 }, { "epoch": 2.72742984953233, "grad_norm": 0.28586041927337646, "learning_rate": 5.0787814275821844e-06, "loss": 0.291, "step": 26827 }, { "epoch": 2.727531516876779, "grad_norm": 0.27329742908477783, "learning_rate": 5.078426585458388e-06, "loss": 0.326, "step": 26828 }, { "epoch": 2.727633184221228, "grad_norm": 0.2944609224796295, "learning_rate": 5.078071742939498e-06, "loss": 0.3128, "step": 26829 }, { "epoch": 2.727734851565677, "grad_norm": 0.28292277455329895, "learning_rate": 5.0777169000272994e-06, "loss": 0.3497, "step": 26830 }, { "epoch": 2.727836518910126, "grad_norm": 0.2723129391670227, "learning_rate": 5.077362056723584e-06, "loss": 0.3461, "step": 26831 }, { "epoch": 2.7279381862545753, "grad_norm": 0.26526233553886414, "learning_rate": 5.077007213030136e-06, "loss": 0.3098, "step": 26832 }, { "epoch": 2.7280398535990242, "grad_norm": 0.27990013360977173, "learning_rate": 5.076652368948744e-06, "loss": 0.356, "step": 26833 }, { "epoch": 2.728141520943473, "grad_norm": 0.2805377244949341, "learning_rate": 5.076297524481195e-06, "loss": 0.3437, "step": 26834 }, { "epoch": 2.728243188287922, "grad_norm": 0.26046597957611084, "learning_rate": 5.075942679629276e-06, "loss": 0.3134, "step": 26835 }, { "epoch": 2.728344855632371, "grad_norm": 0.25618577003479004, "learning_rate": 5.075587834394777e-06, "loss": 0.3355, "step": 26836 }, { "epoch": 2.72844652297682, "grad_norm": 0.2713698446750641, "learning_rate": 5.075232988779486e-06, "loss": 0.3366, "step": 26837 }, { "epoch": 2.728548190321269, "grad_norm": 0.26675286889076233, "learning_rate": 5.074878142785188e-06, "loss": 0.3123, "step": 26838 }, { "epoch": 2.728649857665718, "grad_norm": 0.2885797917842865, "learning_rate": 5.0745232964136715e-06, "loss": 0.3522, "step": 26839 }, { "epoch": 2.728751525010167, "grad_norm": 0.27287015318870544, "learning_rate": 5.074168449666724e-06, "loss": 0.3382, "step": 26840 }, { "epoch": 2.7288531923546158, "grad_norm": 0.2653907835483551, "learning_rate": 5.073813602546133e-06, "loss": 0.3145, "step": 26841 }, { "epoch": 2.7289548596990647, "grad_norm": 0.2695329487323761, "learning_rate": 5.0734587550536885e-06, "loss": 0.3157, "step": 26842 }, { "epoch": 2.7290565270435136, "grad_norm": 0.26483479142189026, "learning_rate": 5.073103907191175e-06, "loss": 0.3069, "step": 26843 }, { "epoch": 2.7291581943879626, "grad_norm": 0.29014450311660767, "learning_rate": 5.0727490589603825e-06, "loss": 0.3409, "step": 26844 }, { "epoch": 2.7292598617324115, "grad_norm": 0.278313010931015, "learning_rate": 5.072394210363095e-06, "loss": 0.3312, "step": 26845 }, { "epoch": 2.7293615290768605, "grad_norm": 0.28216102719306946, "learning_rate": 5.072039361401105e-06, "loss": 0.3435, "step": 26846 }, { "epoch": 2.7294631964213094, "grad_norm": 0.24839134514331818, "learning_rate": 5.071684512076197e-06, "loss": 0.3237, "step": 26847 }, { "epoch": 2.7295648637657584, "grad_norm": 0.2795485854148865, "learning_rate": 5.071329662390159e-06, "loss": 0.3116, "step": 26848 }, { "epoch": 2.7296665311102073, "grad_norm": 0.26022255420684814, "learning_rate": 5.07097481234478e-06, "loss": 0.3064, "step": 26849 }, { "epoch": 2.7297681984546562, "grad_norm": 0.259705126285553, "learning_rate": 5.0706199619418465e-06, "loss": 0.3614, "step": 26850 }, { "epoch": 2.729869865799105, "grad_norm": 0.28443631529808044, "learning_rate": 5.070265111183146e-06, "loss": 0.3663, "step": 26851 }, { "epoch": 2.729971533143554, "grad_norm": 0.26933011412620544, "learning_rate": 5.069910260070467e-06, "loss": 0.3171, "step": 26852 }, { "epoch": 2.730073200488003, "grad_norm": 0.28210848569869995, "learning_rate": 5.069555408605596e-06, "loss": 0.3159, "step": 26853 }, { "epoch": 2.730174867832452, "grad_norm": 0.31572070717811584, "learning_rate": 5.069200556790323e-06, "loss": 0.3466, "step": 26854 }, { "epoch": 2.730276535176901, "grad_norm": 0.267410010099411, "learning_rate": 5.068845704626433e-06, "loss": 0.3282, "step": 26855 }, { "epoch": 2.73037820252135, "grad_norm": 0.25229716300964355, "learning_rate": 5.068490852115714e-06, "loss": 0.3016, "step": 26856 }, { "epoch": 2.7304798698657993, "grad_norm": 0.2762991487979889, "learning_rate": 5.068135999259956e-06, "loss": 0.3276, "step": 26857 }, { "epoch": 2.730581537210248, "grad_norm": 0.27911314368247986, "learning_rate": 5.067781146060942e-06, "loss": 0.3242, "step": 26858 }, { "epoch": 2.730683204554697, "grad_norm": 0.26409924030303955, "learning_rate": 5.067426292520466e-06, "loss": 0.3518, "step": 26859 }, { "epoch": 2.730784871899146, "grad_norm": 0.25857147574424744, "learning_rate": 5.06707143864031e-06, "loss": 0.3055, "step": 26860 }, { "epoch": 2.730886539243595, "grad_norm": 0.2707936465740204, "learning_rate": 5.066716584422266e-06, "loss": 0.3517, "step": 26861 }, { "epoch": 2.730988206588044, "grad_norm": 0.2599721848964691, "learning_rate": 5.06636172986812e-06, "loss": 0.3465, "step": 26862 }, { "epoch": 2.731089873932493, "grad_norm": 0.2731396555900574, "learning_rate": 5.0660068749796576e-06, "loss": 0.3045, "step": 26863 }, { "epoch": 2.731191541276942, "grad_norm": 0.28598523139953613, "learning_rate": 5.065652019758669e-06, "loss": 0.3557, "step": 26864 }, { "epoch": 2.731293208621391, "grad_norm": 0.25461941957473755, "learning_rate": 5.065297164206942e-06, "loss": 0.3289, "step": 26865 }, { "epoch": 2.7313948759658397, "grad_norm": 0.26304692029953003, "learning_rate": 5.06494230832626e-06, "loss": 0.3129, "step": 26866 }, { "epoch": 2.7314965433102887, "grad_norm": 0.2744934856891632, "learning_rate": 5.064587452118418e-06, "loss": 0.3257, "step": 26867 }, { "epoch": 2.7315982106547376, "grad_norm": 0.2837775945663452, "learning_rate": 5.064232595585198e-06, "loss": 0.3671, "step": 26868 }, { "epoch": 2.7316998779991866, "grad_norm": 0.2592684030532837, "learning_rate": 5.06387773872839e-06, "loss": 0.3527, "step": 26869 }, { "epoch": 2.7318015453436355, "grad_norm": 0.26708704233169556, "learning_rate": 5.0635228815497805e-06, "loss": 0.2984, "step": 26870 }, { "epoch": 2.7319032126880844, "grad_norm": 0.256795197725296, "learning_rate": 5.063168024051158e-06, "loss": 0.3546, "step": 26871 }, { "epoch": 2.7320048800325334, "grad_norm": 0.24529427289962769, "learning_rate": 5.06281316623431e-06, "loss": 0.3152, "step": 26872 }, { "epoch": 2.7321065473769828, "grad_norm": 0.3048578202724457, "learning_rate": 5.062458308101025e-06, "loss": 0.3536, "step": 26873 }, { "epoch": 2.7322082147214317, "grad_norm": 0.27671605348587036, "learning_rate": 5.06210344965309e-06, "loss": 0.3281, "step": 26874 }, { "epoch": 2.7323098820658807, "grad_norm": 0.29213470220565796, "learning_rate": 5.061748590892291e-06, "loss": 0.3351, "step": 26875 }, { "epoch": 2.7324115494103296, "grad_norm": 0.247811421751976, "learning_rate": 5.061393731820417e-06, "loss": 0.3176, "step": 26876 }, { "epoch": 2.7325132167547785, "grad_norm": 0.27692151069641113, "learning_rate": 5.061038872439259e-06, "loss": 0.3144, "step": 26877 }, { "epoch": 2.7326148840992275, "grad_norm": 0.2694816291332245, "learning_rate": 5.060684012750599e-06, "loss": 0.3308, "step": 26878 }, { "epoch": 2.7327165514436764, "grad_norm": 0.28098002076148987, "learning_rate": 5.06032915275623e-06, "loss": 0.332, "step": 26879 }, { "epoch": 2.7328182187881254, "grad_norm": 0.28885501623153687, "learning_rate": 5.059974292457933e-06, "loss": 0.3448, "step": 26880 }, { "epoch": 2.7329198861325743, "grad_norm": 0.2583230137825012, "learning_rate": 5.059619431857502e-06, "loss": 0.3329, "step": 26881 }, { "epoch": 2.7330215534770232, "grad_norm": 0.25809386372566223, "learning_rate": 5.059264570956724e-06, "loss": 0.3362, "step": 26882 }, { "epoch": 2.733123220821472, "grad_norm": 0.2665543556213379, "learning_rate": 5.0589097097573825e-06, "loss": 0.3421, "step": 26883 }, { "epoch": 2.733224888165921, "grad_norm": 0.27008965611457825, "learning_rate": 5.058554848261269e-06, "loss": 0.3159, "step": 26884 }, { "epoch": 2.73332655551037, "grad_norm": 0.2498740404844284, "learning_rate": 5.05819998647017e-06, "loss": 0.3207, "step": 26885 }, { "epoch": 2.733428222854819, "grad_norm": 0.2764629125595093, "learning_rate": 5.0578451243858725e-06, "loss": 0.3648, "step": 26886 }, { "epoch": 2.733529890199268, "grad_norm": 0.27457261085510254, "learning_rate": 5.057490262010168e-06, "loss": 0.3229, "step": 26887 }, { "epoch": 2.733631557543717, "grad_norm": 0.30270206928253174, "learning_rate": 5.057135399344839e-06, "loss": 0.3591, "step": 26888 }, { "epoch": 2.733733224888166, "grad_norm": 0.28067508339881897, "learning_rate": 5.056780536391674e-06, "loss": 0.2992, "step": 26889 }, { "epoch": 2.7338348922326148, "grad_norm": 0.24582166969776154, "learning_rate": 5.056425673152464e-06, "loss": 0.3621, "step": 26890 }, { "epoch": 2.7339365595770637, "grad_norm": 0.2720217704772949, "learning_rate": 5.056070809628995e-06, "loss": 0.3293, "step": 26891 }, { "epoch": 2.7340382269215127, "grad_norm": 0.2696380317211151, "learning_rate": 5.0557159458230546e-06, "loss": 0.3479, "step": 26892 }, { "epoch": 2.7341398942659616, "grad_norm": 0.27471596002578735, "learning_rate": 5.0553610817364304e-06, "loss": 0.3226, "step": 26893 }, { "epoch": 2.7342415616104105, "grad_norm": 0.26956915855407715, "learning_rate": 5.055006217370909e-06, "loss": 0.3531, "step": 26894 }, { "epoch": 2.7343432289548595, "grad_norm": 0.2726573348045349, "learning_rate": 5.05465135272828e-06, "loss": 0.3405, "step": 26895 }, { "epoch": 2.7344448962993084, "grad_norm": 0.262978732585907, "learning_rate": 5.054296487810331e-06, "loss": 0.335, "step": 26896 }, { "epoch": 2.7345465636437574, "grad_norm": 0.2688968777656555, "learning_rate": 5.053941622618849e-06, "loss": 0.3109, "step": 26897 }, { "epoch": 2.7346482309882068, "grad_norm": 0.26466089487075806, "learning_rate": 5.0535867571556215e-06, "loss": 0.3109, "step": 26898 }, { "epoch": 2.7347498983326557, "grad_norm": 0.2801225483417511, "learning_rate": 5.053231891422437e-06, "loss": 0.3276, "step": 26899 }, { "epoch": 2.7348515656771046, "grad_norm": 0.2916603088378906, "learning_rate": 5.0528770254210824e-06, "loss": 0.3384, "step": 26900 }, { "epoch": 2.7349532330215536, "grad_norm": 0.26999273896217346, "learning_rate": 5.052522159153346e-06, "loss": 0.3059, "step": 26901 }, { "epoch": 2.7350549003660025, "grad_norm": 0.26225337386131287, "learning_rate": 5.052167292621017e-06, "loss": 0.3251, "step": 26902 }, { "epoch": 2.7351565677104515, "grad_norm": 0.25635674595832825, "learning_rate": 5.05181242582588e-06, "loss": 0.3584, "step": 26903 }, { "epoch": 2.7352582350549004, "grad_norm": 0.27537742257118225, "learning_rate": 5.0514575587697245e-06, "loss": 0.3311, "step": 26904 }, { "epoch": 2.7353599023993493, "grad_norm": 0.27155348658561707, "learning_rate": 5.051102691454338e-06, "loss": 0.3008, "step": 26905 }, { "epoch": 2.7354615697437983, "grad_norm": 0.26707786321640015, "learning_rate": 5.050747823881509e-06, "loss": 0.3149, "step": 26906 }, { "epoch": 2.7355632370882472, "grad_norm": 0.28406673669815063, "learning_rate": 5.050392956053024e-06, "loss": 0.3153, "step": 26907 }, { "epoch": 2.735664904432696, "grad_norm": 0.2797762155532837, "learning_rate": 5.050038087970672e-06, "loss": 0.3354, "step": 26908 }, { "epoch": 2.735766571777145, "grad_norm": 0.25843507051467896, "learning_rate": 5.049683219636239e-06, "loss": 0.3355, "step": 26909 }, { "epoch": 2.735868239121594, "grad_norm": 0.2685236930847168, "learning_rate": 5.049328351051514e-06, "loss": 0.342, "step": 26910 }, { "epoch": 2.735969906466043, "grad_norm": 0.24385303258895874, "learning_rate": 5.048973482218284e-06, "loss": 0.2975, "step": 26911 }, { "epoch": 2.736071573810492, "grad_norm": 0.26332783699035645, "learning_rate": 5.048618613138338e-06, "loss": 0.3147, "step": 26912 }, { "epoch": 2.736173241154941, "grad_norm": 0.25821515917778015, "learning_rate": 5.048263743813464e-06, "loss": 0.3302, "step": 26913 }, { "epoch": 2.7362749084993903, "grad_norm": 0.2833062410354614, "learning_rate": 5.047908874245447e-06, "loss": 0.3199, "step": 26914 }, { "epoch": 2.736376575843839, "grad_norm": 0.2704201340675354, "learning_rate": 5.047554004436077e-06, "loss": 0.3389, "step": 26915 }, { "epoch": 2.736478243188288, "grad_norm": 0.2634645402431488, "learning_rate": 5.047199134387141e-06, "loss": 0.3127, "step": 26916 }, { "epoch": 2.736579910532737, "grad_norm": 0.2722625136375427, "learning_rate": 5.046844264100427e-06, "loss": 0.3162, "step": 26917 }, { "epoch": 2.736681577877186, "grad_norm": 0.2673637270927429, "learning_rate": 5.046489393577724e-06, "loss": 0.3264, "step": 26918 }, { "epoch": 2.736783245221635, "grad_norm": 0.27392998337745667, "learning_rate": 5.046134522820816e-06, "loss": 0.3252, "step": 26919 }, { "epoch": 2.736884912566084, "grad_norm": 0.27471020817756653, "learning_rate": 5.045779651831495e-06, "loss": 0.3404, "step": 26920 }, { "epoch": 2.736986579910533, "grad_norm": 0.25251129269599915, "learning_rate": 5.0454247806115465e-06, "loss": 0.3122, "step": 26921 }, { "epoch": 2.737088247254982, "grad_norm": 0.2955188453197479, "learning_rate": 5.0450699091627584e-06, "loss": 0.3453, "step": 26922 }, { "epoch": 2.7371899145994307, "grad_norm": 0.27329105138778687, "learning_rate": 5.044715037486921e-06, "loss": 0.3844, "step": 26923 }, { "epoch": 2.7372915819438797, "grad_norm": 0.2703685760498047, "learning_rate": 5.044360165585816e-06, "loss": 0.3137, "step": 26924 }, { "epoch": 2.7373932492883286, "grad_norm": 0.26047641038894653, "learning_rate": 5.044005293461238e-06, "loss": 0.331, "step": 26925 }, { "epoch": 2.7374949166327776, "grad_norm": 0.2613377273082733, "learning_rate": 5.0436504211149705e-06, "loss": 0.3365, "step": 26926 }, { "epoch": 2.7375965839772265, "grad_norm": 0.27914199233055115, "learning_rate": 5.043295548548804e-06, "loss": 0.3397, "step": 26927 }, { "epoch": 2.7376982513216754, "grad_norm": 0.26429086923599243, "learning_rate": 5.042940675764524e-06, "loss": 0.2847, "step": 26928 }, { "epoch": 2.7377999186661244, "grad_norm": 0.26456257700920105, "learning_rate": 5.0425858027639184e-06, "loss": 0.3221, "step": 26929 }, { "epoch": 2.7379015860105733, "grad_norm": 0.25812652707099915, "learning_rate": 5.042230929548777e-06, "loss": 0.3292, "step": 26930 }, { "epoch": 2.7380032533550223, "grad_norm": 0.25321245193481445, "learning_rate": 5.041876056120885e-06, "loss": 0.3326, "step": 26931 }, { "epoch": 2.738104920699471, "grad_norm": 0.2712918221950531, "learning_rate": 5.041521182482031e-06, "loss": 0.3231, "step": 26932 }, { "epoch": 2.73820658804392, "grad_norm": 0.27299800515174866, "learning_rate": 5.0411663086340045e-06, "loss": 0.3173, "step": 26933 }, { "epoch": 2.738308255388369, "grad_norm": 0.2974301874637604, "learning_rate": 5.040811434578591e-06, "loss": 0.3298, "step": 26934 }, { "epoch": 2.738409922732818, "grad_norm": 0.2484588325023651, "learning_rate": 5.04045656031758e-06, "loss": 0.3551, "step": 26935 }, { "epoch": 2.738511590077267, "grad_norm": 0.2892102003097534, "learning_rate": 5.0401016858527575e-06, "loss": 0.3247, "step": 26936 }, { "epoch": 2.738613257421716, "grad_norm": 0.26408928632736206, "learning_rate": 5.039746811185912e-06, "loss": 0.3176, "step": 26937 }, { "epoch": 2.738714924766165, "grad_norm": 0.27978870272636414, "learning_rate": 5.0393919363188325e-06, "loss": 0.2974, "step": 26938 }, { "epoch": 2.7388165921106142, "grad_norm": 0.25568583607673645, "learning_rate": 5.039037061253306e-06, "loss": 0.362, "step": 26939 }, { "epoch": 2.738918259455063, "grad_norm": 0.271731436252594, "learning_rate": 5.03868218599112e-06, "loss": 0.3488, "step": 26940 }, { "epoch": 2.739019926799512, "grad_norm": 0.2700163722038269, "learning_rate": 5.038327310534061e-06, "loss": 0.3296, "step": 26941 }, { "epoch": 2.739121594143961, "grad_norm": 0.27878811955451965, "learning_rate": 5.037972434883919e-06, "loss": 0.3215, "step": 26942 }, { "epoch": 2.73922326148841, "grad_norm": 0.2969546914100647, "learning_rate": 5.037617559042482e-06, "loss": 0.3375, "step": 26943 }, { "epoch": 2.739324928832859, "grad_norm": 0.27862751483917236, "learning_rate": 5.037262683011534e-06, "loss": 0.3226, "step": 26944 }, { "epoch": 2.739426596177308, "grad_norm": 0.2693004906177521, "learning_rate": 5.0369078067928675e-06, "loss": 0.3445, "step": 26945 }, { "epoch": 2.739528263521757, "grad_norm": 0.2583831250667572, "learning_rate": 5.0365529303882675e-06, "loss": 0.3454, "step": 26946 }, { "epoch": 2.7396299308662058, "grad_norm": 0.28469395637512207, "learning_rate": 5.036198053799523e-06, "loss": 0.3404, "step": 26947 }, { "epoch": 2.7397315982106547, "grad_norm": 0.2595915198326111, "learning_rate": 5.0358431770284226e-06, "loss": 0.3335, "step": 26948 }, { "epoch": 2.7398332655551036, "grad_norm": 0.24914677441120148, "learning_rate": 5.0354883000767495e-06, "loss": 0.3543, "step": 26949 }, { "epoch": 2.7399349328995526, "grad_norm": 0.2670827805995941, "learning_rate": 5.035133422946298e-06, "loss": 0.3586, "step": 26950 }, { "epoch": 2.7400366002440015, "grad_norm": 0.25979533791542053, "learning_rate": 5.034778545638851e-06, "loss": 0.3477, "step": 26951 }, { "epoch": 2.7401382675884505, "grad_norm": 0.2711302936077118, "learning_rate": 5.0344236681561974e-06, "loss": 0.3279, "step": 26952 }, { "epoch": 2.7402399349328994, "grad_norm": 0.2636861801147461, "learning_rate": 5.0340687905001275e-06, "loss": 0.3806, "step": 26953 }, { "epoch": 2.7403416022773484, "grad_norm": 0.26434993743896484, "learning_rate": 5.033713912672424e-06, "loss": 0.325, "step": 26954 }, { "epoch": 2.7404432696217977, "grad_norm": 0.24802182614803314, "learning_rate": 5.033359034674881e-06, "loss": 0.3322, "step": 26955 }, { "epoch": 2.7405449369662467, "grad_norm": 0.2788778841495514, "learning_rate": 5.033004156509281e-06, "loss": 0.3437, "step": 26956 }, { "epoch": 2.7406466043106956, "grad_norm": 0.24750390648841858, "learning_rate": 5.0326492781774146e-06, "loss": 0.3246, "step": 26957 }, { "epoch": 2.7407482716551446, "grad_norm": 0.2694326639175415, "learning_rate": 5.03229439968107e-06, "loss": 0.3267, "step": 26958 }, { "epoch": 2.7408499389995935, "grad_norm": 0.24641205370426178, "learning_rate": 5.031939521022032e-06, "loss": 0.305, "step": 26959 }, { "epoch": 2.7409516063440424, "grad_norm": 0.2721370756626129, "learning_rate": 5.031584642202093e-06, "loss": 0.3609, "step": 26960 }, { "epoch": 2.7410532736884914, "grad_norm": 0.26633137464523315, "learning_rate": 5.0312297632230355e-06, "loss": 0.3427, "step": 26961 }, { "epoch": 2.7411549410329403, "grad_norm": 0.2766405940055847, "learning_rate": 5.030874884086651e-06, "loss": 0.3531, "step": 26962 }, { "epoch": 2.7412566083773893, "grad_norm": 0.26920607686042786, "learning_rate": 5.030520004794726e-06, "loss": 0.3226, "step": 26963 }, { "epoch": 2.741358275721838, "grad_norm": 0.2560337483882904, "learning_rate": 5.030165125349049e-06, "loss": 0.3166, "step": 26964 }, { "epoch": 2.741459943066287, "grad_norm": 0.2558865249156952, "learning_rate": 5.029810245751407e-06, "loss": 0.3567, "step": 26965 }, { "epoch": 2.741561610410736, "grad_norm": 0.25758475065231323, "learning_rate": 5.029455366003586e-06, "loss": 0.3509, "step": 26966 }, { "epoch": 2.741663277755185, "grad_norm": 0.26292747259140015, "learning_rate": 5.029100486107378e-06, "loss": 0.3119, "step": 26967 }, { "epoch": 2.741764945099634, "grad_norm": 0.2562539279460907, "learning_rate": 5.028745606064569e-06, "loss": 0.3441, "step": 26968 }, { "epoch": 2.741866612444083, "grad_norm": 0.25105297565460205, "learning_rate": 5.028390725876946e-06, "loss": 0.3721, "step": 26969 }, { "epoch": 2.741968279788532, "grad_norm": 0.26948028802871704, "learning_rate": 5.028035845546296e-06, "loss": 0.3387, "step": 26970 }, { "epoch": 2.742069947132981, "grad_norm": 0.2599669098854065, "learning_rate": 5.027680965074409e-06, "loss": 0.3318, "step": 26971 }, { "epoch": 2.7421716144774297, "grad_norm": 0.250301718711853, "learning_rate": 5.0273260844630715e-06, "loss": 0.3445, "step": 26972 }, { "epoch": 2.7422732818218787, "grad_norm": 0.2619178891181946, "learning_rate": 5.026971203714073e-06, "loss": 0.3134, "step": 26973 }, { "epoch": 2.7423749491663276, "grad_norm": 0.2686549425125122, "learning_rate": 5.0266163228291995e-06, "loss": 0.3149, "step": 26974 }, { "epoch": 2.7424766165107766, "grad_norm": 0.29418841004371643, "learning_rate": 5.0262614418102385e-06, "loss": 0.3, "step": 26975 }, { "epoch": 2.7425782838552255, "grad_norm": 0.2743094563484192, "learning_rate": 5.025906560658979e-06, "loss": 0.3288, "step": 26976 }, { "epoch": 2.7426799511996744, "grad_norm": 0.2701960504055023, "learning_rate": 5.025551679377208e-06, "loss": 0.309, "step": 26977 }, { "epoch": 2.7427816185441234, "grad_norm": 0.2680610120296478, "learning_rate": 5.0251967979667135e-06, "loss": 0.3231, "step": 26978 }, { "epoch": 2.7428832858885723, "grad_norm": 0.25083139538764954, "learning_rate": 5.024841916429285e-06, "loss": 0.3178, "step": 26979 }, { "epoch": 2.7429849532330217, "grad_norm": 0.2761250436306, "learning_rate": 5.024487034766707e-06, "loss": 0.3286, "step": 26980 }, { "epoch": 2.7430866205774707, "grad_norm": 0.2642345428466797, "learning_rate": 5.024132152980771e-06, "loss": 0.3167, "step": 26981 }, { "epoch": 2.7431882879219196, "grad_norm": 0.26173514127731323, "learning_rate": 5.023777271073261e-06, "loss": 0.3414, "step": 26982 }, { "epoch": 2.7432899552663685, "grad_norm": 0.2807981073856354, "learning_rate": 5.023422389045969e-06, "loss": 0.334, "step": 26983 }, { "epoch": 2.7433916226108175, "grad_norm": 0.27306631207466125, "learning_rate": 5.02306750690068e-06, "loss": 0.3196, "step": 26984 }, { "epoch": 2.7434932899552664, "grad_norm": 0.24979177117347717, "learning_rate": 5.0227126246391815e-06, "loss": 0.3213, "step": 26985 }, { "epoch": 2.7435949572997154, "grad_norm": 0.25819042325019836, "learning_rate": 5.022357742263261e-06, "loss": 0.3524, "step": 26986 }, { "epoch": 2.7436966246441643, "grad_norm": 0.2810101807117462, "learning_rate": 5.02200285977471e-06, "loss": 0.3478, "step": 26987 }, { "epoch": 2.7437982919886132, "grad_norm": 0.27458086609840393, "learning_rate": 5.021647977175312e-06, "loss": 0.3355, "step": 26988 }, { "epoch": 2.743899959333062, "grad_norm": 0.29965800046920776, "learning_rate": 5.021293094466858e-06, "loss": 0.3593, "step": 26989 }, { "epoch": 2.744001626677511, "grad_norm": 0.27068057656288147, "learning_rate": 5.020938211651133e-06, "loss": 0.3353, "step": 26990 }, { "epoch": 2.74410329402196, "grad_norm": 0.2868996262550354, "learning_rate": 5.020583328729927e-06, "loss": 0.3517, "step": 26991 }, { "epoch": 2.744204961366409, "grad_norm": 0.25980573892593384, "learning_rate": 5.020228445705027e-06, "loss": 0.3458, "step": 26992 }, { "epoch": 2.744306628710858, "grad_norm": 0.2636463940143585, "learning_rate": 5.0198735625782216e-06, "loss": 0.2902, "step": 26993 }, { "epoch": 2.744408296055307, "grad_norm": 0.2976381778717041, "learning_rate": 5.019518679351298e-06, "loss": 0.3654, "step": 26994 }, { "epoch": 2.744509963399756, "grad_norm": 0.23590345680713654, "learning_rate": 5.019163796026043e-06, "loss": 0.3361, "step": 26995 }, { "epoch": 2.744611630744205, "grad_norm": 0.2673724293708801, "learning_rate": 5.018808912604245e-06, "loss": 0.3302, "step": 26996 }, { "epoch": 2.744713298088654, "grad_norm": 0.2722996175289154, "learning_rate": 5.018454029087694e-06, "loss": 0.3381, "step": 26997 }, { "epoch": 2.744814965433103, "grad_norm": 0.25448867678642273, "learning_rate": 5.018099145478174e-06, "loss": 0.3303, "step": 26998 }, { "epoch": 2.744916632777552, "grad_norm": 0.2667812705039978, "learning_rate": 5.017744261777476e-06, "loss": 0.3461, "step": 26999 }, { "epoch": 2.745018300122001, "grad_norm": 0.2731283903121948, "learning_rate": 5.017389377987386e-06, "loss": 0.3, "step": 27000 }, { "epoch": 2.74511996746645, "grad_norm": 0.24521790444850922, "learning_rate": 5.017034494109693e-06, "loss": 0.3224, "step": 27001 }, { "epoch": 2.745221634810899, "grad_norm": 0.25893282890319824, "learning_rate": 5.0166796101461845e-06, "loss": 0.3216, "step": 27002 }, { "epoch": 2.745323302155348, "grad_norm": 0.2709163427352905, "learning_rate": 5.016324726098647e-06, "loss": 0.3676, "step": 27003 }, { "epoch": 2.7454249694997968, "grad_norm": 0.3283638060092926, "learning_rate": 5.015969841968871e-06, "loss": 0.3494, "step": 27004 }, { "epoch": 2.7455266368442457, "grad_norm": 0.2773682475090027, "learning_rate": 5.015614957758641e-06, "loss": 0.2963, "step": 27005 }, { "epoch": 2.7456283041886946, "grad_norm": 0.2456984668970108, "learning_rate": 5.015260073469748e-06, "loss": 0.3194, "step": 27006 }, { "epoch": 2.7457299715331436, "grad_norm": 0.26799851655960083, "learning_rate": 5.014905189103977e-06, "loss": 0.3436, "step": 27007 }, { "epoch": 2.7458316388775925, "grad_norm": 0.25992563366889954, "learning_rate": 5.014550304663117e-06, "loss": 0.3279, "step": 27008 }, { "epoch": 2.7459333062220415, "grad_norm": 0.27582696080207825, "learning_rate": 5.014195420148958e-06, "loss": 0.3331, "step": 27009 }, { "epoch": 2.7460349735664904, "grad_norm": 0.2608417272567749, "learning_rate": 5.013840535563283e-06, "loss": 0.2961, "step": 27010 }, { "epoch": 2.7461366409109393, "grad_norm": 0.2548601031303406, "learning_rate": 5.013485650907885e-06, "loss": 0.3287, "step": 27011 }, { "epoch": 2.7462383082553883, "grad_norm": 0.2936866879463196, "learning_rate": 5.013130766184549e-06, "loss": 0.307, "step": 27012 }, { "epoch": 2.7463399755998372, "grad_norm": 0.24824322760105133, "learning_rate": 5.0127758813950635e-06, "loss": 0.3081, "step": 27013 }, { "epoch": 2.746441642944286, "grad_norm": 0.27328333258628845, "learning_rate": 5.012420996541216e-06, "loss": 0.3214, "step": 27014 }, { "epoch": 2.746543310288735, "grad_norm": 0.2586548328399658, "learning_rate": 5.012066111624794e-06, "loss": 0.3704, "step": 27015 }, { "epoch": 2.746644977633184, "grad_norm": 0.2732107937335968, "learning_rate": 5.011711226647587e-06, "loss": 0.3187, "step": 27016 }, { "epoch": 2.746746644977633, "grad_norm": 0.2996940016746521, "learning_rate": 5.01135634161138e-06, "loss": 0.353, "step": 27017 }, { "epoch": 2.746848312322082, "grad_norm": 0.27403613924980164, "learning_rate": 5.0110014565179625e-06, "loss": 0.3202, "step": 27018 }, { "epoch": 2.746949979666531, "grad_norm": 0.27210214734077454, "learning_rate": 5.010646571369123e-06, "loss": 0.3567, "step": 27019 }, { "epoch": 2.74705164701098, "grad_norm": 0.29061391949653625, "learning_rate": 5.0102916861666485e-06, "loss": 0.3005, "step": 27020 }, { "epoch": 2.747153314355429, "grad_norm": 0.260280042886734, "learning_rate": 5.009936800912329e-06, "loss": 0.3343, "step": 27021 }, { "epoch": 2.747254981699878, "grad_norm": 0.282577782869339, "learning_rate": 5.009581915607947e-06, "loss": 0.3301, "step": 27022 }, { "epoch": 2.747356649044327, "grad_norm": 0.28172487020492554, "learning_rate": 5.0092270302552934e-06, "loss": 0.3627, "step": 27023 }, { "epoch": 2.747458316388776, "grad_norm": 0.2541796565055847, "learning_rate": 5.008872144856159e-06, "loss": 0.35, "step": 27024 }, { "epoch": 2.747559983733225, "grad_norm": 0.2581893503665924, "learning_rate": 5.008517259412327e-06, "loss": 0.3045, "step": 27025 }, { "epoch": 2.747661651077674, "grad_norm": 0.264024019241333, "learning_rate": 5.008162373925588e-06, "loss": 0.3353, "step": 27026 }, { "epoch": 2.747763318422123, "grad_norm": 0.30565345287323, "learning_rate": 5.0078074883977275e-06, "loss": 0.3153, "step": 27027 }, { "epoch": 2.747864985766572, "grad_norm": 0.26834291219711304, "learning_rate": 5.007452602830536e-06, "loss": 0.319, "step": 27028 }, { "epoch": 2.7479666531110207, "grad_norm": 0.23915468156337738, "learning_rate": 5.0070977172258e-06, "loss": 0.3352, "step": 27029 }, { "epoch": 2.7480683204554697, "grad_norm": 0.2512165307998657, "learning_rate": 5.006742831585307e-06, "loss": 0.3348, "step": 27030 }, { "epoch": 2.7481699877999186, "grad_norm": 0.2836999297142029, "learning_rate": 5.0063879459108465e-06, "loss": 0.3261, "step": 27031 }, { "epoch": 2.7482716551443676, "grad_norm": 0.26627859473228455, "learning_rate": 5.006033060204203e-06, "loss": 0.318, "step": 27032 }, { "epoch": 2.7483733224888165, "grad_norm": 0.25954028964042664, "learning_rate": 5.005678174467167e-06, "loss": 0.3387, "step": 27033 }, { "epoch": 2.7484749898332654, "grad_norm": 0.2737533450126648, "learning_rate": 5.005323288701527e-06, "loss": 0.3079, "step": 27034 }, { "epoch": 2.7485766571777144, "grad_norm": 0.26928821206092834, "learning_rate": 5.004968402909067e-06, "loss": 0.3128, "step": 27035 }, { "epoch": 2.7486783245221633, "grad_norm": 0.27464303374290466, "learning_rate": 5.00461351709158e-06, "loss": 0.3483, "step": 27036 }, { "epoch": 2.7487799918666127, "grad_norm": 0.2610885798931122, "learning_rate": 5.00425863125085e-06, "loss": 0.3601, "step": 27037 }, { "epoch": 2.7488816592110616, "grad_norm": 0.2800824046134949, "learning_rate": 5.003903745388665e-06, "loss": 0.3362, "step": 27038 }, { "epoch": 2.7489833265555106, "grad_norm": 0.25018489360809326, "learning_rate": 5.003548859506816e-06, "loss": 0.3353, "step": 27039 }, { "epoch": 2.7490849938999595, "grad_norm": 0.27918756008148193, "learning_rate": 5.003193973607089e-06, "loss": 0.3275, "step": 27040 }, { "epoch": 2.7491866612444085, "grad_norm": 0.2689777910709381, "learning_rate": 5.002839087691269e-06, "loss": 0.3088, "step": 27041 }, { "epoch": 2.7492883285888574, "grad_norm": 0.27523136138916016, "learning_rate": 5.002484201761148e-06, "loss": 0.3271, "step": 27042 }, { "epoch": 2.7493899959333064, "grad_norm": 0.2696932554244995, "learning_rate": 5.0021293158185105e-06, "loss": 0.3178, "step": 27043 }, { "epoch": 2.7494916632777553, "grad_norm": 0.2681240737438202, "learning_rate": 5.001774429865148e-06, "loss": 0.3377, "step": 27044 }, { "epoch": 2.7495933306222042, "grad_norm": 0.2833450734615326, "learning_rate": 5.001419543902846e-06, "loss": 0.3291, "step": 27045 }, { "epoch": 2.749694997966653, "grad_norm": 0.25332534313201904, "learning_rate": 5.0010646579333925e-06, "loss": 0.3287, "step": 27046 }, { "epoch": 2.749796665311102, "grad_norm": 0.2663305997848511, "learning_rate": 5.000709771958574e-06, "loss": 0.3166, "step": 27047 }, { "epoch": 2.749898332655551, "grad_norm": 0.28351524472236633, "learning_rate": 5.000354885980181e-06, "loss": 0.3496, "step": 27048 }, { "epoch": 2.75, "grad_norm": 0.28166523575782776, "learning_rate": 5e-06, "loss": 0.3182, "step": 27049 }, { "epoch": 2.750101667344449, "grad_norm": 0.26935482025146484, "learning_rate": 4.999645114019819e-06, "loss": 0.3276, "step": 27050 }, { "epoch": 2.750203334688898, "grad_norm": 0.270290344953537, "learning_rate": 4.999290228041428e-06, "loss": 0.3111, "step": 27051 }, { "epoch": 2.750305002033347, "grad_norm": 0.2510065734386444, "learning_rate": 4.99893534206661e-06, "loss": 0.3041, "step": 27052 }, { "epoch": 2.7504066693777958, "grad_norm": 0.28736811876296997, "learning_rate": 4.9985804560971565e-06, "loss": 0.3342, "step": 27053 }, { "epoch": 2.7505083367222447, "grad_norm": 0.2743052840232849, "learning_rate": 4.998225570134854e-06, "loss": 0.3103, "step": 27054 }, { "epoch": 2.7506100040666936, "grad_norm": 0.2710787355899811, "learning_rate": 4.99787068418149e-06, "loss": 0.3169, "step": 27055 }, { "epoch": 2.7507116714111426, "grad_norm": 0.2687532305717468, "learning_rate": 4.997515798238855e-06, "loss": 0.3414, "step": 27056 }, { "epoch": 2.7508133387555915, "grad_norm": 0.2886829078197479, "learning_rate": 4.997160912308732e-06, "loss": 0.3556, "step": 27057 }, { "epoch": 2.7509150061000405, "grad_norm": 0.2514428496360779, "learning_rate": 4.996806026392914e-06, "loss": 0.3118, "step": 27058 }, { "epoch": 2.7510166734444894, "grad_norm": 0.2416660487651825, "learning_rate": 4.9964511404931855e-06, "loss": 0.3278, "step": 27059 }, { "epoch": 2.7511183407889384, "grad_norm": 0.25195181369781494, "learning_rate": 4.996096254611335e-06, "loss": 0.3343, "step": 27060 }, { "epoch": 2.7512200081333873, "grad_norm": 0.27569735050201416, "learning_rate": 4.995741368749152e-06, "loss": 0.3137, "step": 27061 }, { "epoch": 2.7513216754778367, "grad_norm": 0.27067285776138306, "learning_rate": 4.995386482908421e-06, "loss": 0.3042, "step": 27062 }, { "epoch": 2.7514233428222856, "grad_norm": 0.2825765311717987, "learning_rate": 4.995031597090934e-06, "loss": 0.3124, "step": 27063 }, { "epoch": 2.7515250101667346, "grad_norm": 0.27817490696907043, "learning_rate": 4.9946767112984755e-06, "loss": 0.3418, "step": 27064 }, { "epoch": 2.7516266775111835, "grad_norm": 0.2595234513282776, "learning_rate": 4.994321825532833e-06, "loss": 0.3173, "step": 27065 }, { "epoch": 2.7517283448556324, "grad_norm": 0.2668565511703491, "learning_rate": 4.993966939795799e-06, "loss": 0.3465, "step": 27066 }, { "epoch": 2.7518300122000814, "grad_norm": 0.25628840923309326, "learning_rate": 4.993612054089155e-06, "loss": 0.3357, "step": 27067 }, { "epoch": 2.7519316795445303, "grad_norm": 0.2557319700717926, "learning_rate": 4.993257168414694e-06, "loss": 0.348, "step": 27068 }, { "epoch": 2.7520333468889793, "grad_norm": 0.2524261176586151, "learning_rate": 4.9929022827742005e-06, "loss": 0.3458, "step": 27069 }, { "epoch": 2.752135014233428, "grad_norm": 0.2687361240386963, "learning_rate": 4.992547397169464e-06, "loss": 0.337, "step": 27070 }, { "epoch": 2.752236681577877, "grad_norm": 0.2788742184638977, "learning_rate": 4.992192511602273e-06, "loss": 0.3342, "step": 27071 }, { "epoch": 2.752338348922326, "grad_norm": 0.2749168574810028, "learning_rate": 4.991837626074414e-06, "loss": 0.3402, "step": 27072 }, { "epoch": 2.752440016266775, "grad_norm": 0.2696112096309662, "learning_rate": 4.991482740587674e-06, "loss": 0.321, "step": 27073 }, { "epoch": 2.752541683611224, "grad_norm": 0.2717476785182953, "learning_rate": 4.991127855143843e-06, "loss": 0.3077, "step": 27074 }, { "epoch": 2.752643350955673, "grad_norm": 0.2700469493865967, "learning_rate": 4.990772969744706e-06, "loss": 0.3279, "step": 27075 }, { "epoch": 2.752745018300122, "grad_norm": 0.27062898874282837, "learning_rate": 4.990418084392055e-06, "loss": 0.3275, "step": 27076 }, { "epoch": 2.752846685644571, "grad_norm": 0.2627062201499939, "learning_rate": 4.990063199087674e-06, "loss": 0.3438, "step": 27077 }, { "epoch": 2.75294835298902, "grad_norm": 0.2480342835187912, "learning_rate": 4.989708313833352e-06, "loss": 0.307, "step": 27078 }, { "epoch": 2.753050020333469, "grad_norm": 0.2848871350288391, "learning_rate": 4.989353428630878e-06, "loss": 0.3288, "step": 27079 }, { "epoch": 2.753151687677918, "grad_norm": 0.2649403214454651, "learning_rate": 4.9889985434820375e-06, "loss": 0.3188, "step": 27080 }, { "epoch": 2.753253355022367, "grad_norm": 0.2860822379589081, "learning_rate": 4.988643658388622e-06, "loss": 0.3557, "step": 27081 }, { "epoch": 2.753355022366816, "grad_norm": 0.2714882493019104, "learning_rate": 4.988288773352415e-06, "loss": 0.3198, "step": 27082 }, { "epoch": 2.753456689711265, "grad_norm": 0.27033156156539917, "learning_rate": 4.987933888375208e-06, "loss": 0.3683, "step": 27083 }, { "epoch": 2.753558357055714, "grad_norm": 0.24859976768493652, "learning_rate": 4.987579003458786e-06, "loss": 0.3312, "step": 27084 }, { "epoch": 2.7536600244001628, "grad_norm": 0.2602357864379883, "learning_rate": 4.987224118604937e-06, "loss": 0.3259, "step": 27085 }, { "epoch": 2.7537616917446117, "grad_norm": 0.2774669826030731, "learning_rate": 4.986869233815452e-06, "loss": 0.3634, "step": 27086 }, { "epoch": 2.7538633590890607, "grad_norm": 0.2716588079929352, "learning_rate": 4.9865143490921156e-06, "loss": 0.3174, "step": 27087 }, { "epoch": 2.7539650264335096, "grad_norm": 0.2804897427558899, "learning_rate": 4.986159464436718e-06, "loss": 0.3273, "step": 27088 }, { "epoch": 2.7540666937779585, "grad_norm": 0.2834257483482361, "learning_rate": 4.985804579851044e-06, "loss": 0.3625, "step": 27089 }, { "epoch": 2.7541683611224075, "grad_norm": 0.27299967408180237, "learning_rate": 4.985449695336883e-06, "loss": 0.3214, "step": 27090 }, { "epoch": 2.7542700284668564, "grad_norm": 0.25604861974716187, "learning_rate": 4.985094810896025e-06, "loss": 0.32, "step": 27091 }, { "epoch": 2.7543716958113054, "grad_norm": 0.2636747360229492, "learning_rate": 4.984739926530254e-06, "loss": 0.299, "step": 27092 }, { "epoch": 2.7544733631557543, "grad_norm": 0.2692825496196747, "learning_rate": 4.984385042241361e-06, "loss": 0.3399, "step": 27093 }, { "epoch": 2.7545750305002032, "grad_norm": 0.2982636094093323, "learning_rate": 4.984030158031131e-06, "loss": 0.3401, "step": 27094 }, { "epoch": 2.754676697844652, "grad_norm": 0.2629989981651306, "learning_rate": 4.983675273901354e-06, "loss": 0.3438, "step": 27095 }, { "epoch": 2.754778365189101, "grad_norm": 0.26842668652534485, "learning_rate": 4.983320389853817e-06, "loss": 0.3469, "step": 27096 }, { "epoch": 2.75488003253355, "grad_norm": 0.24418513476848602, "learning_rate": 4.982965505890308e-06, "loss": 0.3345, "step": 27097 }, { "epoch": 2.754981699877999, "grad_norm": 0.2708331048488617, "learning_rate": 4.982610622012617e-06, "loss": 0.3554, "step": 27098 }, { "epoch": 2.755083367222448, "grad_norm": 0.2660263776779175, "learning_rate": 4.982255738222525e-06, "loss": 0.2956, "step": 27099 }, { "epoch": 2.755185034566897, "grad_norm": 0.2642459571361542, "learning_rate": 4.981900854521826e-06, "loss": 0.3298, "step": 27100 }, { "epoch": 2.755286701911346, "grad_norm": 0.2740810513496399, "learning_rate": 4.981545970912308e-06, "loss": 0.3034, "step": 27101 }, { "epoch": 2.755388369255795, "grad_norm": 0.25235000252723694, "learning_rate": 4.981191087395755e-06, "loss": 0.3484, "step": 27102 }, { "epoch": 2.755490036600244, "grad_norm": 0.25918957591056824, "learning_rate": 4.980836203973959e-06, "loss": 0.3173, "step": 27103 }, { "epoch": 2.755591703944693, "grad_norm": 0.30465203523635864, "learning_rate": 4.9804813206487036e-06, "loss": 0.3866, "step": 27104 }, { "epoch": 2.755693371289142, "grad_norm": 0.263528048992157, "learning_rate": 4.980126437421779e-06, "loss": 0.3119, "step": 27105 }, { "epoch": 2.755795038633591, "grad_norm": 0.24805904924869537, "learning_rate": 4.979771554294973e-06, "loss": 0.3461, "step": 27106 }, { "epoch": 2.75589670597804, "grad_norm": 0.2721361815929413, "learning_rate": 4.979416671270073e-06, "loss": 0.2984, "step": 27107 }, { "epoch": 2.755998373322489, "grad_norm": 0.29461905360221863, "learning_rate": 4.979061788348869e-06, "loss": 0.3415, "step": 27108 }, { "epoch": 2.756100040666938, "grad_norm": 0.2683199942111969, "learning_rate": 4.978706905533143e-06, "loss": 0.3105, "step": 27109 }, { "epoch": 2.7562017080113868, "grad_norm": 0.25733211636543274, "learning_rate": 4.9783520228246885e-06, "loss": 0.3027, "step": 27110 }, { "epoch": 2.7563033753558357, "grad_norm": 0.27719447016716003, "learning_rate": 4.977997140225292e-06, "loss": 0.3612, "step": 27111 }, { "epoch": 2.7564050427002846, "grad_norm": 0.2691681385040283, "learning_rate": 4.977642257736739e-06, "loss": 0.3522, "step": 27112 }, { "epoch": 2.7565067100447336, "grad_norm": 0.2645912766456604, "learning_rate": 4.977287375360822e-06, "loss": 0.3182, "step": 27113 }, { "epoch": 2.7566083773891825, "grad_norm": 0.265607625246048, "learning_rate": 4.976932493099322e-06, "loss": 0.3421, "step": 27114 }, { "epoch": 2.7567100447336315, "grad_norm": 0.2667123079299927, "learning_rate": 4.976577610954032e-06, "loss": 0.3171, "step": 27115 }, { "epoch": 2.7568117120780804, "grad_norm": 0.2677789032459259, "learning_rate": 4.976222728926741e-06, "loss": 0.2945, "step": 27116 }, { "epoch": 2.7569133794225293, "grad_norm": 0.26184096932411194, "learning_rate": 4.97586784701923e-06, "loss": 0.321, "step": 27117 }, { "epoch": 2.7570150467669783, "grad_norm": 0.2611648440361023, "learning_rate": 4.9755129652332944e-06, "loss": 0.3143, "step": 27118 }, { "epoch": 2.7571167141114277, "grad_norm": 0.26573196053504944, "learning_rate": 4.975158083570716e-06, "loss": 0.3894, "step": 27119 }, { "epoch": 2.7572183814558766, "grad_norm": 0.27836787700653076, "learning_rate": 4.9748032020332865e-06, "loss": 0.3207, "step": 27120 }, { "epoch": 2.7573200488003256, "grad_norm": 0.25961875915527344, "learning_rate": 4.974448320622794e-06, "loss": 0.3218, "step": 27121 }, { "epoch": 2.7574217161447745, "grad_norm": 0.25595125555992126, "learning_rate": 4.974093439341021e-06, "loss": 0.2986, "step": 27122 }, { "epoch": 2.7575233834892234, "grad_norm": 0.25444120168685913, "learning_rate": 4.973738558189764e-06, "loss": 0.3271, "step": 27123 }, { "epoch": 2.7576250508336724, "grad_norm": 0.26279041171073914, "learning_rate": 4.973383677170802e-06, "loss": 0.3377, "step": 27124 }, { "epoch": 2.7577267181781213, "grad_norm": 0.27561503648757935, "learning_rate": 4.973028796285927e-06, "loss": 0.3532, "step": 27125 }, { "epoch": 2.7578283855225703, "grad_norm": 0.2740356922149658, "learning_rate": 4.97267391553693e-06, "loss": 0.352, "step": 27126 }, { "epoch": 2.757930052867019, "grad_norm": 0.2630993127822876, "learning_rate": 4.972319034925591e-06, "loss": 0.3573, "step": 27127 }, { "epoch": 2.758031720211468, "grad_norm": 0.28248193860054016, "learning_rate": 4.971964154453706e-06, "loss": 0.3251, "step": 27128 }, { "epoch": 2.758133387555917, "grad_norm": 0.25160396099090576, "learning_rate": 4.971609274123056e-06, "loss": 0.3194, "step": 27129 }, { "epoch": 2.758235054900366, "grad_norm": 0.24961186945438385, "learning_rate": 4.971254393935432e-06, "loss": 0.3384, "step": 27130 }, { "epoch": 2.758336722244815, "grad_norm": 0.2750031054019928, "learning_rate": 4.970899513892623e-06, "loss": 0.3209, "step": 27131 }, { "epoch": 2.758438389589264, "grad_norm": 0.248885840177536, "learning_rate": 4.970544633996414e-06, "loss": 0.3334, "step": 27132 }, { "epoch": 2.758540056933713, "grad_norm": 0.2605620324611664, "learning_rate": 4.970189754248596e-06, "loss": 0.3381, "step": 27133 }, { "epoch": 2.758641724278162, "grad_norm": 0.2600421607494354, "learning_rate": 4.969834874650952e-06, "loss": 0.3019, "step": 27134 }, { "epoch": 2.7587433916226107, "grad_norm": 0.26316508650779724, "learning_rate": 4.969479995205274e-06, "loss": 0.3244, "step": 27135 }, { "epoch": 2.7588450589670597, "grad_norm": 0.2518293559551239, "learning_rate": 4.969125115913351e-06, "loss": 0.3051, "step": 27136 }, { "epoch": 2.7589467263115086, "grad_norm": 0.2541283965110779, "learning_rate": 4.9687702367769645e-06, "loss": 0.3174, "step": 27137 }, { "epoch": 2.7590483936559576, "grad_norm": 0.24590712785720825, "learning_rate": 4.9684153577979095e-06, "loss": 0.3412, "step": 27138 }, { "epoch": 2.7591500610004065, "grad_norm": 0.2667340338230133, "learning_rate": 4.968060478977969e-06, "loss": 0.3368, "step": 27139 }, { "epoch": 2.7592517283448554, "grad_norm": 0.26628053188323975, "learning_rate": 4.96770560031893e-06, "loss": 0.3207, "step": 27140 }, { "epoch": 2.7593533956893044, "grad_norm": 0.268297016620636, "learning_rate": 4.967350721822586e-06, "loss": 0.3179, "step": 27141 }, { "epoch": 2.7594550630337533, "grad_norm": 0.26493754982948303, "learning_rate": 4.966995843490719e-06, "loss": 0.3182, "step": 27142 }, { "epoch": 2.7595567303782023, "grad_norm": 0.3188357949256897, "learning_rate": 4.966640965325121e-06, "loss": 0.3485, "step": 27143 }, { "epoch": 2.7596583977226516, "grad_norm": 0.2550068199634552, "learning_rate": 4.966286087327577e-06, "loss": 0.3381, "step": 27144 }, { "epoch": 2.7597600650671006, "grad_norm": 0.2507435381412506, "learning_rate": 4.965931209499876e-06, "loss": 0.3069, "step": 27145 }, { "epoch": 2.7598617324115495, "grad_norm": 0.2617136538028717, "learning_rate": 4.965576331843804e-06, "loss": 0.3231, "step": 27146 }, { "epoch": 2.7599633997559985, "grad_norm": 0.25141188502311707, "learning_rate": 4.96522145436115e-06, "loss": 0.3151, "step": 27147 }, { "epoch": 2.7600650671004474, "grad_norm": 0.2650716006755829, "learning_rate": 4.964866577053704e-06, "loss": 0.3275, "step": 27148 }, { "epoch": 2.7601667344448964, "grad_norm": 0.2653629183769226, "learning_rate": 4.964511699923251e-06, "loss": 0.3734, "step": 27149 }, { "epoch": 2.7602684017893453, "grad_norm": 0.2935962677001953, "learning_rate": 4.964156822971581e-06, "loss": 0.3574, "step": 27150 }, { "epoch": 2.7603700691337942, "grad_norm": 0.28671354055404663, "learning_rate": 4.963801946200478e-06, "loss": 0.3436, "step": 27151 }, { "epoch": 2.760471736478243, "grad_norm": 0.27556565403938293, "learning_rate": 4.9634470696117324e-06, "loss": 0.3374, "step": 27152 }, { "epoch": 2.760573403822692, "grad_norm": 0.24973271787166595, "learning_rate": 4.963092193207134e-06, "loss": 0.3639, "step": 27153 }, { "epoch": 2.760675071167141, "grad_norm": 0.26843807101249695, "learning_rate": 4.9627373169884665e-06, "loss": 0.3412, "step": 27154 }, { "epoch": 2.76077673851159, "grad_norm": 0.2471611201763153, "learning_rate": 4.962382440957521e-06, "loss": 0.3218, "step": 27155 }, { "epoch": 2.760878405856039, "grad_norm": 0.2826171815395355, "learning_rate": 4.9620275651160825e-06, "loss": 0.3242, "step": 27156 }, { "epoch": 2.760980073200488, "grad_norm": 0.2679153382778168, "learning_rate": 4.96167268946594e-06, "loss": 0.3385, "step": 27157 }, { "epoch": 2.761081740544937, "grad_norm": 0.29124119877815247, "learning_rate": 4.961317814008883e-06, "loss": 0.3291, "step": 27158 }, { "epoch": 2.7611834078893858, "grad_norm": 0.27632173895835876, "learning_rate": 4.960962938746696e-06, "loss": 0.3568, "step": 27159 }, { "epoch": 2.761285075233835, "grad_norm": 0.2633279860019684, "learning_rate": 4.960608063681169e-06, "loss": 0.312, "step": 27160 }, { "epoch": 2.761386742578284, "grad_norm": 0.2813170254230499, "learning_rate": 4.960253188814089e-06, "loss": 0.3487, "step": 27161 }, { "epoch": 2.761488409922733, "grad_norm": 0.26563605666160583, "learning_rate": 4.959898314147244e-06, "loss": 0.364, "step": 27162 }, { "epoch": 2.761590077267182, "grad_norm": 0.28063952922821045, "learning_rate": 4.9595434396824225e-06, "loss": 0.3564, "step": 27163 }, { "epoch": 2.761691744611631, "grad_norm": 0.2714196741580963, "learning_rate": 4.9591885654214106e-06, "loss": 0.3342, "step": 27164 }, { "epoch": 2.76179341195608, "grad_norm": 0.2725609838962555, "learning_rate": 4.958833691365998e-06, "loss": 0.3366, "step": 27165 }, { "epoch": 2.761895079300529, "grad_norm": 0.2549504339694977, "learning_rate": 4.9584788175179706e-06, "loss": 0.3088, "step": 27166 }, { "epoch": 2.7619967466449777, "grad_norm": 0.2535223662853241, "learning_rate": 4.958123943879117e-06, "loss": 0.3532, "step": 27167 }, { "epoch": 2.7620984139894267, "grad_norm": 0.28388485312461853, "learning_rate": 4.957769070451226e-06, "loss": 0.3249, "step": 27168 }, { "epoch": 2.7622000813338756, "grad_norm": 0.27312517166137695, "learning_rate": 4.957414197236083e-06, "loss": 0.3353, "step": 27169 }, { "epoch": 2.7623017486783246, "grad_norm": 0.26531827449798584, "learning_rate": 4.957059324235478e-06, "loss": 0.374, "step": 27170 }, { "epoch": 2.7624034160227735, "grad_norm": 0.25110647082328796, "learning_rate": 4.956704451451198e-06, "loss": 0.3452, "step": 27171 }, { "epoch": 2.7625050833672224, "grad_norm": 0.2550225555896759, "learning_rate": 4.95634957888503e-06, "loss": 0.3317, "step": 27172 }, { "epoch": 2.7626067507116714, "grad_norm": 0.27026504278182983, "learning_rate": 4.9559947065387635e-06, "loss": 0.3174, "step": 27173 }, { "epoch": 2.7627084180561203, "grad_norm": 0.2702741026878357, "learning_rate": 4.955639834414185e-06, "loss": 0.3325, "step": 27174 }, { "epoch": 2.7628100854005693, "grad_norm": 0.3567042350769043, "learning_rate": 4.955284962513082e-06, "loss": 0.3395, "step": 27175 }, { "epoch": 2.762911752745018, "grad_norm": 0.2579345703125, "learning_rate": 4.954930090837242e-06, "loss": 0.3359, "step": 27176 }, { "epoch": 2.763013420089467, "grad_norm": 0.2637925446033478, "learning_rate": 4.954575219388455e-06, "loss": 0.3078, "step": 27177 }, { "epoch": 2.763115087433916, "grad_norm": 0.2666440010070801, "learning_rate": 4.954220348168507e-06, "loss": 0.3314, "step": 27178 }, { "epoch": 2.763216754778365, "grad_norm": 0.25655290484428406, "learning_rate": 4.953865477179185e-06, "loss": 0.3231, "step": 27179 }, { "epoch": 2.763318422122814, "grad_norm": 0.26436811685562134, "learning_rate": 4.95351060642228e-06, "loss": 0.3419, "step": 27180 }, { "epoch": 2.763420089467263, "grad_norm": 0.274893581867218, "learning_rate": 4.953155735899575e-06, "loss": 0.3217, "step": 27181 }, { "epoch": 2.763521756811712, "grad_norm": 0.259880930185318, "learning_rate": 4.952800865612861e-06, "loss": 0.3184, "step": 27182 }, { "epoch": 2.763623424156161, "grad_norm": 0.2526319921016693, "learning_rate": 4.952445995563925e-06, "loss": 0.3354, "step": 27183 }, { "epoch": 2.7637250915006097, "grad_norm": 0.2675553262233734, "learning_rate": 4.952091125754555e-06, "loss": 0.3367, "step": 27184 }, { "epoch": 2.763826758845059, "grad_norm": 0.27149516344070435, "learning_rate": 4.951736256186539e-06, "loss": 0.3204, "step": 27185 }, { "epoch": 2.763928426189508, "grad_norm": 0.2774961590766907, "learning_rate": 4.9513813868616635e-06, "loss": 0.3243, "step": 27186 }, { "epoch": 2.764030093533957, "grad_norm": 0.26716887950897217, "learning_rate": 4.951026517781717e-06, "loss": 0.3188, "step": 27187 }, { "epoch": 2.764131760878406, "grad_norm": 0.2563121020793915, "learning_rate": 4.9506716489484875e-06, "loss": 0.323, "step": 27188 }, { "epoch": 2.764233428222855, "grad_norm": 0.2995438873767853, "learning_rate": 4.950316780363763e-06, "loss": 0.2805, "step": 27189 }, { "epoch": 2.764335095567304, "grad_norm": 0.2581411898136139, "learning_rate": 4.94996191202933e-06, "loss": 0.3751, "step": 27190 }, { "epoch": 2.7644367629117528, "grad_norm": 0.26747241616249084, "learning_rate": 4.949607043946977e-06, "loss": 0.3213, "step": 27191 }, { "epoch": 2.7645384302562017, "grad_norm": 0.2665998339653015, "learning_rate": 4.949252176118492e-06, "loss": 0.3436, "step": 27192 }, { "epoch": 2.7646400976006507, "grad_norm": 0.2650568187236786, "learning_rate": 4.948897308545663e-06, "loss": 0.3652, "step": 27193 }, { "epoch": 2.7647417649450996, "grad_norm": 0.2836722135543823, "learning_rate": 4.948542441230276e-06, "loss": 0.3703, "step": 27194 }, { "epoch": 2.7648434322895485, "grad_norm": 0.27315622568130493, "learning_rate": 4.948187574174122e-06, "loss": 0.3439, "step": 27195 }, { "epoch": 2.7649450996339975, "grad_norm": 0.25491514801979065, "learning_rate": 4.947832707378984e-06, "loss": 0.3151, "step": 27196 }, { "epoch": 2.7650467669784464, "grad_norm": 0.2675582468509674, "learning_rate": 4.947477840846654e-06, "loss": 0.3344, "step": 27197 }, { "epoch": 2.7651484343228954, "grad_norm": 0.2570292055606842, "learning_rate": 4.947122974578918e-06, "loss": 0.354, "step": 27198 }, { "epoch": 2.7652501016673443, "grad_norm": 0.25720784068107605, "learning_rate": 4.946768108577564e-06, "loss": 0.3433, "step": 27199 }, { "epoch": 2.7653517690117932, "grad_norm": 0.2681853473186493, "learning_rate": 4.94641324284438e-06, "loss": 0.3433, "step": 27200 }, { "epoch": 2.7654534363562426, "grad_norm": 0.2600622773170471, "learning_rate": 4.946058377381153e-06, "loss": 0.3556, "step": 27201 }, { "epoch": 2.7655551037006916, "grad_norm": 0.2650917172431946, "learning_rate": 4.94570351218967e-06, "loss": 0.3455, "step": 27202 }, { "epoch": 2.7656567710451405, "grad_norm": 0.27989083528518677, "learning_rate": 4.945348647271721e-06, "loss": 0.329, "step": 27203 }, { "epoch": 2.7657584383895895, "grad_norm": 0.2644319534301758, "learning_rate": 4.944993782629092e-06, "loss": 0.3073, "step": 27204 }, { "epoch": 2.7658601057340384, "grad_norm": 0.267206609249115, "learning_rate": 4.944638918263572e-06, "loss": 0.3206, "step": 27205 }, { "epoch": 2.7659617730784873, "grad_norm": 0.26923397183418274, "learning_rate": 4.944284054176947e-06, "loss": 0.3457, "step": 27206 }, { "epoch": 2.7660634404229363, "grad_norm": 0.28898313641548157, "learning_rate": 4.943929190371006e-06, "loss": 0.3344, "step": 27207 }, { "epoch": 2.7661651077673852, "grad_norm": 0.25811678171157837, "learning_rate": 4.943574326847538e-06, "loss": 0.3389, "step": 27208 }, { "epoch": 2.766266775111834, "grad_norm": 0.25716426968574524, "learning_rate": 4.943219463608327e-06, "loss": 0.3448, "step": 27209 }, { "epoch": 2.766368442456283, "grad_norm": 0.2479882538318634, "learning_rate": 4.942864600655164e-06, "loss": 0.3245, "step": 27210 }, { "epoch": 2.766470109800732, "grad_norm": 0.24817796051502228, "learning_rate": 4.942509737989835e-06, "loss": 0.3319, "step": 27211 }, { "epoch": 2.766571777145181, "grad_norm": 0.25407108664512634, "learning_rate": 4.9421548756141275e-06, "loss": 0.3243, "step": 27212 }, { "epoch": 2.76667344448963, "grad_norm": 0.2672809064388275, "learning_rate": 4.941800013529831e-06, "loss": 0.3654, "step": 27213 }, { "epoch": 2.766775111834079, "grad_norm": 0.2655864953994751, "learning_rate": 4.941445151738732e-06, "loss": 0.3246, "step": 27214 }, { "epoch": 2.766876779178528, "grad_norm": 0.23406225442886353, "learning_rate": 4.941090290242619e-06, "loss": 0.3263, "step": 27215 }, { "epoch": 2.7669784465229768, "grad_norm": 0.2577935457229614, "learning_rate": 4.940735429043279e-06, "loss": 0.3376, "step": 27216 }, { "epoch": 2.7670801138674257, "grad_norm": 0.27375367283821106, "learning_rate": 4.940380568142498e-06, "loss": 0.3181, "step": 27217 }, { "epoch": 2.7671817812118746, "grad_norm": 0.2909163236618042, "learning_rate": 4.9400257075420675e-06, "loss": 0.3463, "step": 27218 }, { "epoch": 2.7672834485563236, "grad_norm": 0.2970130145549774, "learning_rate": 4.939670847243773e-06, "loss": 0.3431, "step": 27219 }, { "epoch": 2.7673851159007725, "grad_norm": 0.2626165449619293, "learning_rate": 4.939315987249402e-06, "loss": 0.3445, "step": 27220 }, { "epoch": 2.7674867832452215, "grad_norm": 0.26014453172683716, "learning_rate": 4.938961127560743e-06, "loss": 0.349, "step": 27221 }, { "epoch": 2.7675884505896704, "grad_norm": 0.28668278455734253, "learning_rate": 4.938606268179582e-06, "loss": 0.3503, "step": 27222 }, { "epoch": 2.7676901179341193, "grad_norm": 0.2712262272834778, "learning_rate": 4.93825140910771e-06, "loss": 0.3427, "step": 27223 }, { "epoch": 2.7677917852785683, "grad_norm": 0.28526970744132996, "learning_rate": 4.937896550346912e-06, "loss": 0.3014, "step": 27224 }, { "epoch": 2.7678934526230172, "grad_norm": 0.26101040840148926, "learning_rate": 4.937541691898977e-06, "loss": 0.3286, "step": 27225 }, { "epoch": 2.7679951199674666, "grad_norm": 0.26266080141067505, "learning_rate": 4.937186833765691e-06, "loss": 0.3402, "step": 27226 }, { "epoch": 2.7680967873119156, "grad_norm": 0.2830319404602051, "learning_rate": 4.936831975948842e-06, "loss": 0.3439, "step": 27227 }, { "epoch": 2.7681984546563645, "grad_norm": 0.27973079681396484, "learning_rate": 4.936477118450221e-06, "loss": 0.3236, "step": 27228 }, { "epoch": 2.7683001220008134, "grad_norm": 0.2536112070083618, "learning_rate": 4.936122261271611e-06, "loss": 0.3293, "step": 27229 }, { "epoch": 2.7684017893452624, "grad_norm": 0.271419882774353, "learning_rate": 4.935767404414803e-06, "loss": 0.3372, "step": 27230 }, { "epoch": 2.7685034566897113, "grad_norm": 0.27671560645103455, "learning_rate": 4.9354125478815835e-06, "loss": 0.3248, "step": 27231 }, { "epoch": 2.7686051240341603, "grad_norm": 0.2732761800289154, "learning_rate": 4.935057691673739e-06, "loss": 0.3362, "step": 27232 }, { "epoch": 2.768706791378609, "grad_norm": 0.24508757889270782, "learning_rate": 4.934702835793061e-06, "loss": 0.3163, "step": 27233 }, { "epoch": 2.768808458723058, "grad_norm": 0.27104708552360535, "learning_rate": 4.934347980241332e-06, "loss": 0.3459, "step": 27234 }, { "epoch": 2.768910126067507, "grad_norm": 0.25550395250320435, "learning_rate": 4.933993125020344e-06, "loss": 0.3279, "step": 27235 }, { "epoch": 2.769011793411956, "grad_norm": 0.25551503896713257, "learning_rate": 4.933638270131882e-06, "loss": 0.3414, "step": 27236 }, { "epoch": 2.769113460756405, "grad_norm": 0.26967617869377136, "learning_rate": 4.933283415577734e-06, "loss": 0.3423, "step": 27237 }, { "epoch": 2.769215128100854, "grad_norm": 0.258694052696228, "learning_rate": 4.93292856135969e-06, "loss": 0.3243, "step": 27238 }, { "epoch": 2.769316795445303, "grad_norm": 0.2905120551586151, "learning_rate": 4.932573707479535e-06, "loss": 0.3549, "step": 27239 }, { "epoch": 2.769418462789752, "grad_norm": 0.27275940775871277, "learning_rate": 4.9322188539390585e-06, "loss": 0.3499, "step": 27240 }, { "epoch": 2.7695201301342007, "grad_norm": 0.26939353346824646, "learning_rate": 4.931864000740046e-06, "loss": 0.3123, "step": 27241 }, { "epoch": 2.76962179747865, "grad_norm": 0.2638494074344635, "learning_rate": 4.931509147884287e-06, "loss": 0.3313, "step": 27242 }, { "epoch": 2.769723464823099, "grad_norm": 0.2520413100719452, "learning_rate": 4.931154295373569e-06, "loss": 0.3297, "step": 27243 }, { "epoch": 2.769825132167548, "grad_norm": 0.27112820744514465, "learning_rate": 4.930799443209678e-06, "loss": 0.3432, "step": 27244 }, { "epoch": 2.769926799511997, "grad_norm": 0.2791329026222229, "learning_rate": 4.9304445913944056e-06, "loss": 0.3526, "step": 27245 }, { "epoch": 2.770028466856446, "grad_norm": 0.26242077350616455, "learning_rate": 4.9300897399295346e-06, "loss": 0.2951, "step": 27246 }, { "epoch": 2.770130134200895, "grad_norm": 0.2895351052284241, "learning_rate": 4.929734888816855e-06, "loss": 0.3505, "step": 27247 }, { "epoch": 2.7702318015453438, "grad_norm": 0.279903769493103, "learning_rate": 4.929380038058155e-06, "loss": 0.3311, "step": 27248 }, { "epoch": 2.7703334688897927, "grad_norm": 0.2568734288215637, "learning_rate": 4.929025187655221e-06, "loss": 0.323, "step": 27249 }, { "epoch": 2.7704351362342416, "grad_norm": 0.27052757143974304, "learning_rate": 4.928670337609844e-06, "loss": 0.3108, "step": 27250 }, { "epoch": 2.7705368035786906, "grad_norm": 0.26421117782592773, "learning_rate": 4.9283154879238045e-06, "loss": 0.3255, "step": 27251 }, { "epoch": 2.7706384709231395, "grad_norm": 0.2735801637172699, "learning_rate": 4.927960638598896e-06, "loss": 0.3345, "step": 27252 }, { "epoch": 2.7707401382675885, "grad_norm": 0.24565474689006805, "learning_rate": 4.927605789636906e-06, "loss": 0.2834, "step": 27253 }, { "epoch": 2.7708418056120374, "grad_norm": 0.2694908678531647, "learning_rate": 4.92725094103962e-06, "loss": 0.299, "step": 27254 }, { "epoch": 2.7709434729564864, "grad_norm": 0.25489142537117004, "learning_rate": 4.9268960928088275e-06, "loss": 0.3461, "step": 27255 }, { "epoch": 2.7710451403009353, "grad_norm": 0.2829594612121582, "learning_rate": 4.926541244946313e-06, "loss": 0.3165, "step": 27256 }, { "epoch": 2.7711468076453842, "grad_norm": 0.25223401188850403, "learning_rate": 4.926186397453867e-06, "loss": 0.3111, "step": 27257 }, { "epoch": 2.771248474989833, "grad_norm": 0.2658270299434662, "learning_rate": 4.925831550333278e-06, "loss": 0.3498, "step": 27258 }, { "epoch": 2.771350142334282, "grad_norm": 0.271280974149704, "learning_rate": 4.92547670358633e-06, "loss": 0.3334, "step": 27259 }, { "epoch": 2.771451809678731, "grad_norm": 0.2730107605457306, "learning_rate": 4.925121857214815e-06, "loss": 0.3483, "step": 27260 }, { "epoch": 2.77155347702318, "grad_norm": 0.2641219198703766, "learning_rate": 4.924767011220515e-06, "loss": 0.3243, "step": 27261 }, { "epoch": 2.771655144367629, "grad_norm": 0.2505798041820526, "learning_rate": 4.924412165605223e-06, "loss": 0.3585, "step": 27262 }, { "epoch": 2.771756811712078, "grad_norm": 0.27864503860473633, "learning_rate": 4.9240573203707245e-06, "loss": 0.3447, "step": 27263 }, { "epoch": 2.771858479056527, "grad_norm": 0.27230575680732727, "learning_rate": 4.9237024755188066e-06, "loss": 0.3288, "step": 27264 }, { "epoch": 2.7719601464009758, "grad_norm": 0.28061485290527344, "learning_rate": 4.9233476310512595e-06, "loss": 0.301, "step": 27265 }, { "epoch": 2.7720618137454247, "grad_norm": 0.29285240173339844, "learning_rate": 4.9229927869698654e-06, "loss": 0.3091, "step": 27266 }, { "epoch": 2.772163481089874, "grad_norm": 0.25776588916778564, "learning_rate": 4.922637943276417e-06, "loss": 0.3059, "step": 27267 }, { "epoch": 2.772265148434323, "grad_norm": 0.28825974464416504, "learning_rate": 4.922283099972701e-06, "loss": 0.353, "step": 27268 }, { "epoch": 2.772366815778772, "grad_norm": 0.26589435338974, "learning_rate": 4.9219282570605025e-06, "loss": 0.3365, "step": 27269 }, { "epoch": 2.772468483123221, "grad_norm": 0.27423903346061707, "learning_rate": 4.921573414541614e-06, "loss": 0.3206, "step": 27270 }, { "epoch": 2.77257015046767, "grad_norm": 0.26406130194664, "learning_rate": 4.921218572417816e-06, "loss": 0.3063, "step": 27271 }, { "epoch": 2.772671817812119, "grad_norm": 0.27611202001571655, "learning_rate": 4.920863730690902e-06, "loss": 0.3285, "step": 27272 }, { "epoch": 2.7727734851565677, "grad_norm": 0.27079638838768005, "learning_rate": 4.92050888936266e-06, "loss": 0.2939, "step": 27273 }, { "epoch": 2.7728751525010167, "grad_norm": 0.2807498872280121, "learning_rate": 4.920154048434871e-06, "loss": 0.331, "step": 27274 }, { "epoch": 2.7729768198454656, "grad_norm": 0.26513466238975525, "learning_rate": 4.919799207909331e-06, "loss": 0.3364, "step": 27275 }, { "epoch": 2.7730784871899146, "grad_norm": 0.2834254801273346, "learning_rate": 4.91944436778782e-06, "loss": 0.3771, "step": 27276 }, { "epoch": 2.7731801545343635, "grad_norm": 0.29381147027015686, "learning_rate": 4.91908952807213e-06, "loss": 0.3479, "step": 27277 }, { "epoch": 2.7732818218788124, "grad_norm": 0.26457440853118896, "learning_rate": 4.9187346887640505e-06, "loss": 0.357, "step": 27278 }, { "epoch": 2.7733834892232614, "grad_norm": 0.276531457901001, "learning_rate": 4.918379849865363e-06, "loss": 0.3416, "step": 27279 }, { "epoch": 2.7734851565677103, "grad_norm": 0.2819327712059021, "learning_rate": 4.918025011377861e-06, "loss": 0.3153, "step": 27280 }, { "epoch": 2.7735868239121593, "grad_norm": 0.2786824405193329, "learning_rate": 4.917670173303327e-06, "loss": 0.3104, "step": 27281 }, { "epoch": 2.773688491256608, "grad_norm": 0.2576340436935425, "learning_rate": 4.917315335643552e-06, "loss": 0.3436, "step": 27282 }, { "epoch": 2.7737901586010576, "grad_norm": 0.26130345463752747, "learning_rate": 4.916960498400324e-06, "loss": 0.3307, "step": 27283 }, { "epoch": 2.7738918259455065, "grad_norm": 0.269684374332428, "learning_rate": 4.916605661575427e-06, "loss": 0.3573, "step": 27284 }, { "epoch": 2.7739934932899555, "grad_norm": 0.29174521565437317, "learning_rate": 4.916250825170654e-06, "loss": 0.3282, "step": 27285 }, { "epoch": 2.7740951606344044, "grad_norm": 0.2763127386569977, "learning_rate": 4.915895989187788e-06, "loss": 0.374, "step": 27286 }, { "epoch": 2.7741968279788534, "grad_norm": 0.2722875773906708, "learning_rate": 4.915541153628616e-06, "loss": 0.3444, "step": 27287 }, { "epoch": 2.7742984953233023, "grad_norm": 0.26266810297966003, "learning_rate": 4.91518631849493e-06, "loss": 0.3569, "step": 27288 }, { "epoch": 2.7744001626677512, "grad_norm": 0.29163116216659546, "learning_rate": 4.914831483788513e-06, "loss": 0.3165, "step": 27289 }, { "epoch": 2.7745018300122, "grad_norm": 0.2740251421928406, "learning_rate": 4.914476649511158e-06, "loss": 0.3277, "step": 27290 }, { "epoch": 2.774603497356649, "grad_norm": 0.2804586887359619, "learning_rate": 4.9141218156646466e-06, "loss": 0.3391, "step": 27291 }, { "epoch": 2.774705164701098, "grad_norm": 0.29880425333976746, "learning_rate": 4.9137669822507685e-06, "loss": 0.3636, "step": 27292 }, { "epoch": 2.774806832045547, "grad_norm": 0.2612132132053375, "learning_rate": 4.913412149271316e-06, "loss": 0.3077, "step": 27293 }, { "epoch": 2.774908499389996, "grad_norm": 0.27755582332611084, "learning_rate": 4.913057316728067e-06, "loss": 0.3664, "step": 27294 }, { "epoch": 2.775010166734445, "grad_norm": 0.2798178493976593, "learning_rate": 4.91270248462282e-06, "loss": 0.3441, "step": 27295 }, { "epoch": 2.775111834078894, "grad_norm": 0.257474422454834, "learning_rate": 4.912347652957354e-06, "loss": 0.3389, "step": 27296 }, { "epoch": 2.7752135014233428, "grad_norm": 0.28299054503440857, "learning_rate": 4.911992821733458e-06, "loss": 0.3351, "step": 27297 }, { "epoch": 2.7753151687677917, "grad_norm": 0.2786909341812134, "learning_rate": 4.911637990952925e-06, "loss": 0.3194, "step": 27298 }, { "epoch": 2.7754168361122407, "grad_norm": 0.25580018758773804, "learning_rate": 4.911283160617537e-06, "loss": 0.3219, "step": 27299 }, { "epoch": 2.7755185034566896, "grad_norm": 0.26615390181541443, "learning_rate": 4.910928330729085e-06, "loss": 0.3609, "step": 27300 }, { "epoch": 2.7756201708011385, "grad_norm": 0.26544827222824097, "learning_rate": 4.910573501289353e-06, "loss": 0.3286, "step": 27301 }, { "epoch": 2.7757218381455875, "grad_norm": 0.279742032289505, "learning_rate": 4.910218672300129e-06, "loss": 0.3113, "step": 27302 }, { "epoch": 2.7758235054900364, "grad_norm": 0.27076205611228943, "learning_rate": 4.909863843763206e-06, "loss": 0.353, "step": 27303 }, { "epoch": 2.7759251728344854, "grad_norm": 0.2510222792625427, "learning_rate": 4.9095090156803635e-06, "loss": 0.3533, "step": 27304 }, { "epoch": 2.7760268401789343, "grad_norm": 0.28387150168418884, "learning_rate": 4.909154188053398e-06, "loss": 0.3502, "step": 27305 }, { "epoch": 2.7761285075233832, "grad_norm": 0.2623436748981476, "learning_rate": 4.908799360884088e-06, "loss": 0.319, "step": 27306 }, { "epoch": 2.776230174867832, "grad_norm": 0.26354843378067017, "learning_rate": 4.908444534174225e-06, "loss": 0.3069, "step": 27307 }, { "epoch": 2.7763318422122816, "grad_norm": 0.27778178453445435, "learning_rate": 4.908089707925599e-06, "loss": 0.3565, "step": 27308 }, { "epoch": 2.7764335095567305, "grad_norm": 0.2725735008716583, "learning_rate": 4.907734882139995e-06, "loss": 0.303, "step": 27309 }, { "epoch": 2.7765351769011795, "grad_norm": 0.27231329679489136, "learning_rate": 4.9073800568192e-06, "loss": 0.3298, "step": 27310 }, { "epoch": 2.7766368442456284, "grad_norm": 0.27609944343566895, "learning_rate": 4.907025231965002e-06, "loss": 0.3345, "step": 27311 }, { "epoch": 2.7767385115900773, "grad_norm": 0.27475404739379883, "learning_rate": 4.9066704075791874e-06, "loss": 0.3116, "step": 27312 }, { "epoch": 2.7768401789345263, "grad_norm": 0.2671974003314972, "learning_rate": 4.906315583663548e-06, "loss": 0.3755, "step": 27313 }, { "epoch": 2.7769418462789752, "grad_norm": 0.2831522226333618, "learning_rate": 4.905960760219867e-06, "loss": 0.3187, "step": 27314 }, { "epoch": 2.777043513623424, "grad_norm": 0.25859639048576355, "learning_rate": 4.905605937249934e-06, "loss": 0.3437, "step": 27315 }, { "epoch": 2.777145180967873, "grad_norm": 0.26966822147369385, "learning_rate": 4.9052511147555345e-06, "loss": 0.3512, "step": 27316 }, { "epoch": 2.777246848312322, "grad_norm": 0.33102619647979736, "learning_rate": 4.904896292738457e-06, "loss": 0.3357, "step": 27317 }, { "epoch": 2.777348515656771, "grad_norm": 0.2766266167163849, "learning_rate": 4.904541471200492e-06, "loss": 0.3171, "step": 27318 }, { "epoch": 2.77745018300122, "grad_norm": 0.24950185418128967, "learning_rate": 4.904186650143423e-06, "loss": 0.3054, "step": 27319 }, { "epoch": 2.777551850345669, "grad_norm": 0.2599300742149353, "learning_rate": 4.90383182956904e-06, "loss": 0.3098, "step": 27320 }, { "epoch": 2.777653517690118, "grad_norm": 0.2739540636539459, "learning_rate": 4.903477009479127e-06, "loss": 0.3216, "step": 27321 }, { "epoch": 2.7777551850345668, "grad_norm": 0.2853865623474121, "learning_rate": 4.903122189875474e-06, "loss": 0.3222, "step": 27322 }, { "epoch": 2.7778568523790157, "grad_norm": 0.2619663178920746, "learning_rate": 4.9027673707598715e-06, "loss": 0.3255, "step": 27323 }, { "epoch": 2.777958519723465, "grad_norm": 0.2725066840648651, "learning_rate": 4.902412552134101e-06, "loss": 0.3436, "step": 27324 }, { "epoch": 2.778060187067914, "grad_norm": 0.2714538276195526, "learning_rate": 4.9020577339999545e-06, "loss": 0.3234, "step": 27325 }, { "epoch": 2.778161854412363, "grad_norm": 0.27339276671409607, "learning_rate": 4.9017029163592165e-06, "loss": 0.37, "step": 27326 }, { "epoch": 2.778263521756812, "grad_norm": 0.25176137685775757, "learning_rate": 4.901348099213676e-06, "loss": 0.3063, "step": 27327 }, { "epoch": 2.778365189101261, "grad_norm": 0.26256075501441956, "learning_rate": 4.900993282565122e-06, "loss": 0.3219, "step": 27328 }, { "epoch": 2.77846685644571, "grad_norm": 0.3055890202522278, "learning_rate": 4.900638466415338e-06, "loss": 0.3178, "step": 27329 }, { "epoch": 2.7785685237901587, "grad_norm": 0.2613770067691803, "learning_rate": 4.900283650766116e-06, "loss": 0.3229, "step": 27330 }, { "epoch": 2.7786701911346077, "grad_norm": 0.2954365611076355, "learning_rate": 4.8999288356192395e-06, "loss": 0.3426, "step": 27331 }, { "epoch": 2.7787718584790566, "grad_norm": 0.2868542969226837, "learning_rate": 4.899574020976499e-06, "loss": 0.3243, "step": 27332 }, { "epoch": 2.7788735258235056, "grad_norm": 0.30212467908859253, "learning_rate": 4.89921920683968e-06, "loss": 0.3506, "step": 27333 }, { "epoch": 2.7789751931679545, "grad_norm": 0.2513023316860199, "learning_rate": 4.89886439321057e-06, "loss": 0.3295, "step": 27334 }, { "epoch": 2.7790768605124034, "grad_norm": 0.25467467308044434, "learning_rate": 4.89850958009096e-06, "loss": 0.3184, "step": 27335 }, { "epoch": 2.7791785278568524, "grad_norm": 0.28652992844581604, "learning_rate": 4.898154767482632e-06, "loss": 0.3615, "step": 27336 }, { "epoch": 2.7792801952013013, "grad_norm": 0.25507187843322754, "learning_rate": 4.897799955387377e-06, "loss": 0.3333, "step": 27337 }, { "epoch": 2.7793818625457503, "grad_norm": 0.26499733328819275, "learning_rate": 4.89744514380698e-06, "loss": 0.3177, "step": 27338 }, { "epoch": 2.779483529890199, "grad_norm": 0.28970715403556824, "learning_rate": 4.897090332743231e-06, "loss": 0.3665, "step": 27339 }, { "epoch": 2.779585197234648, "grad_norm": 0.2727598249912262, "learning_rate": 4.896735522197918e-06, "loss": 0.3567, "step": 27340 }, { "epoch": 2.779686864579097, "grad_norm": 0.27959558367729187, "learning_rate": 4.896380712172825e-06, "loss": 0.3675, "step": 27341 }, { "epoch": 2.779788531923546, "grad_norm": 0.26989632844924927, "learning_rate": 4.896025902669742e-06, "loss": 0.3392, "step": 27342 }, { "epoch": 2.779890199267995, "grad_norm": 0.26775825023651123, "learning_rate": 4.895671093690455e-06, "loss": 0.3439, "step": 27343 }, { "epoch": 2.779991866612444, "grad_norm": 0.2659546136856079, "learning_rate": 4.895316285236753e-06, "loss": 0.326, "step": 27344 }, { "epoch": 2.780093533956893, "grad_norm": 0.26208558678627014, "learning_rate": 4.894961477310423e-06, "loss": 0.3063, "step": 27345 }, { "epoch": 2.780195201301342, "grad_norm": 0.2794775068759918, "learning_rate": 4.8946066699132515e-06, "loss": 0.3462, "step": 27346 }, { "epoch": 2.7802968686457907, "grad_norm": 0.27575039863586426, "learning_rate": 4.8942518630470276e-06, "loss": 0.3296, "step": 27347 }, { "epoch": 2.7803985359902397, "grad_norm": 0.2836315929889679, "learning_rate": 4.8938970567135365e-06, "loss": 0.3293, "step": 27348 }, { "epoch": 2.780500203334689, "grad_norm": 0.26762914657592773, "learning_rate": 4.893542250914567e-06, "loss": 0.3303, "step": 27349 }, { "epoch": 2.780601870679138, "grad_norm": 0.2897054851055145, "learning_rate": 4.893187445651907e-06, "loss": 0.2935, "step": 27350 }, { "epoch": 2.780703538023587, "grad_norm": 0.2834137976169586, "learning_rate": 4.8928326409273425e-06, "loss": 0.3197, "step": 27351 }, { "epoch": 2.780805205368036, "grad_norm": 0.2803972363471985, "learning_rate": 4.892477836742662e-06, "loss": 0.3052, "step": 27352 }, { "epoch": 2.780906872712485, "grad_norm": 0.27162766456604004, "learning_rate": 4.892123033099653e-06, "loss": 0.3099, "step": 27353 }, { "epoch": 2.7810085400569338, "grad_norm": 0.261005163192749, "learning_rate": 4.891768230000101e-06, "loss": 0.3229, "step": 27354 }, { "epoch": 2.7811102074013827, "grad_norm": 0.24371911585330963, "learning_rate": 4.891413427445797e-06, "loss": 0.3195, "step": 27355 }, { "epoch": 2.7812118747458316, "grad_norm": 0.26583847403526306, "learning_rate": 4.891058625438525e-06, "loss": 0.3353, "step": 27356 }, { "epoch": 2.7813135420902806, "grad_norm": 0.2536853551864624, "learning_rate": 4.890703823980074e-06, "loss": 0.3177, "step": 27357 }, { "epoch": 2.7814152094347295, "grad_norm": 0.258392333984375, "learning_rate": 4.890349023072232e-06, "loss": 0.3328, "step": 27358 }, { "epoch": 2.7815168767791785, "grad_norm": 0.26134005188941956, "learning_rate": 4.889994222716784e-06, "loss": 0.3365, "step": 27359 }, { "epoch": 2.7816185441236274, "grad_norm": 0.2600839138031006, "learning_rate": 4.8896394229155204e-06, "loss": 0.3495, "step": 27360 }, { "epoch": 2.7817202114680764, "grad_norm": 0.2518833875656128, "learning_rate": 4.889284623670226e-06, "loss": 0.3098, "step": 27361 }, { "epoch": 2.7818218788125253, "grad_norm": 0.2894623279571533, "learning_rate": 4.888929824982691e-06, "loss": 0.3135, "step": 27362 }, { "epoch": 2.7819235461569742, "grad_norm": 0.2730473577976227, "learning_rate": 4.8885750268547e-06, "loss": 0.3238, "step": 27363 }, { "epoch": 2.782025213501423, "grad_norm": 0.2826433479785919, "learning_rate": 4.888220229288041e-06, "loss": 0.3526, "step": 27364 }, { "epoch": 2.7821268808458726, "grad_norm": 0.26950395107269287, "learning_rate": 4.887865432284503e-06, "loss": 0.3354, "step": 27365 }, { "epoch": 2.7822285481903215, "grad_norm": 0.2503575086593628, "learning_rate": 4.887510635845872e-06, "loss": 0.3555, "step": 27366 }, { "epoch": 2.7823302155347704, "grad_norm": 0.2478218972682953, "learning_rate": 4.887155839973936e-06, "loss": 0.3098, "step": 27367 }, { "epoch": 2.7824318828792194, "grad_norm": 0.24507439136505127, "learning_rate": 4.886801044670481e-06, "loss": 0.3142, "step": 27368 }, { "epoch": 2.7825335502236683, "grad_norm": 0.2556462585926056, "learning_rate": 4.886446249937295e-06, "loss": 0.3479, "step": 27369 }, { "epoch": 2.7826352175681173, "grad_norm": 0.25700241327285767, "learning_rate": 4.886091455776168e-06, "loss": 0.3152, "step": 27370 }, { "epoch": 2.782736884912566, "grad_norm": 0.27688273787498474, "learning_rate": 4.885736662188884e-06, "loss": 0.3391, "step": 27371 }, { "epoch": 2.782838552257015, "grad_norm": 0.2738073766231537, "learning_rate": 4.885381869177232e-06, "loss": 0.3554, "step": 27372 }, { "epoch": 2.782940219601464, "grad_norm": 0.27697572112083435, "learning_rate": 4.885027076742998e-06, "loss": 0.3399, "step": 27373 }, { "epoch": 2.783041886945913, "grad_norm": 0.25966593623161316, "learning_rate": 4.884672284887971e-06, "loss": 0.33, "step": 27374 }, { "epoch": 2.783143554290362, "grad_norm": 0.25493571162223816, "learning_rate": 4.884317493613938e-06, "loss": 0.3261, "step": 27375 }, { "epoch": 2.783245221634811, "grad_norm": 0.2725219130516052, "learning_rate": 4.883962702922685e-06, "loss": 0.3474, "step": 27376 }, { "epoch": 2.78334688897926, "grad_norm": 0.30541694164276123, "learning_rate": 4.883607912816001e-06, "loss": 0.3711, "step": 27377 }, { "epoch": 2.783448556323709, "grad_norm": 0.25605878233909607, "learning_rate": 4.883253123295672e-06, "loss": 0.312, "step": 27378 }, { "epoch": 2.7835502236681577, "grad_norm": 0.26238635182380676, "learning_rate": 4.882898334363486e-06, "loss": 0.3146, "step": 27379 }, { "epoch": 2.7836518910126067, "grad_norm": 0.279430091381073, "learning_rate": 4.882543546021231e-06, "loss": 0.3369, "step": 27380 }, { "epoch": 2.7837535583570556, "grad_norm": 0.2615794241428375, "learning_rate": 4.882188758270694e-06, "loss": 0.3201, "step": 27381 }, { "epoch": 2.7838552257015046, "grad_norm": 0.2698224186897278, "learning_rate": 4.881833971113662e-06, "loss": 0.328, "step": 27382 }, { "epoch": 2.7839568930459535, "grad_norm": 0.26584792137145996, "learning_rate": 4.881479184551921e-06, "loss": 0.3287, "step": 27383 }, { "epoch": 2.7840585603904024, "grad_norm": 0.2811015844345093, "learning_rate": 4.881124398587259e-06, "loss": 0.3438, "step": 27384 }, { "epoch": 2.7841602277348514, "grad_norm": 0.28202152252197266, "learning_rate": 4.8807696132214675e-06, "loss": 0.3358, "step": 27385 }, { "epoch": 2.7842618950793003, "grad_norm": 0.26844897866249084, "learning_rate": 4.880414828456328e-06, "loss": 0.3207, "step": 27386 }, { "epoch": 2.7843635624237493, "grad_norm": 0.2619313895702362, "learning_rate": 4.880060044293631e-06, "loss": 0.3256, "step": 27387 }, { "epoch": 2.784465229768198, "grad_norm": 0.25864335894584656, "learning_rate": 4.879705260735163e-06, "loss": 0.3421, "step": 27388 }, { "epoch": 2.784566897112647, "grad_norm": 0.2667810618877411, "learning_rate": 4.87935047778271e-06, "loss": 0.3387, "step": 27389 }, { "epoch": 2.7846685644570965, "grad_norm": 0.2451535314321518, "learning_rate": 4.878995695438062e-06, "loss": 0.3382, "step": 27390 }, { "epoch": 2.7847702318015455, "grad_norm": 0.2696271240711212, "learning_rate": 4.878640913703004e-06, "loss": 0.339, "step": 27391 }, { "epoch": 2.7848718991459944, "grad_norm": 0.2638515830039978, "learning_rate": 4.878286132579325e-06, "loss": 0.3192, "step": 27392 }, { "epoch": 2.7849735664904434, "grad_norm": 0.2597903311252594, "learning_rate": 4.877931352068811e-06, "loss": 0.3238, "step": 27393 }, { "epoch": 2.7850752338348923, "grad_norm": 0.2568494975566864, "learning_rate": 4.87757657217325e-06, "loss": 0.3238, "step": 27394 }, { "epoch": 2.7851769011793412, "grad_norm": 0.24633720517158508, "learning_rate": 4.8772217928944295e-06, "loss": 0.3536, "step": 27395 }, { "epoch": 2.78527856852379, "grad_norm": 0.2576524019241333, "learning_rate": 4.876867014234136e-06, "loss": 0.2955, "step": 27396 }, { "epoch": 2.785380235868239, "grad_norm": 0.26282617449760437, "learning_rate": 4.876512236194158e-06, "loss": 0.3493, "step": 27397 }, { "epoch": 2.785481903212688, "grad_norm": 0.26492831110954285, "learning_rate": 4.876157458776281e-06, "loss": 0.3031, "step": 27398 }, { "epoch": 2.785583570557137, "grad_norm": 0.2583805322647095, "learning_rate": 4.875802681982294e-06, "loss": 0.3147, "step": 27399 }, { "epoch": 2.785685237901586, "grad_norm": 0.2494657188653946, "learning_rate": 4.875447905813983e-06, "loss": 0.3068, "step": 27400 }, { "epoch": 2.785786905246035, "grad_norm": 0.2536846101284027, "learning_rate": 4.8750931302731355e-06, "loss": 0.2998, "step": 27401 }, { "epoch": 2.785888572590484, "grad_norm": 0.28203797340393066, "learning_rate": 4.874738355361542e-06, "loss": 0.3707, "step": 27402 }, { "epoch": 2.7859902399349328, "grad_norm": 0.2846023142337799, "learning_rate": 4.874383581080983e-06, "loss": 0.3302, "step": 27403 }, { "epoch": 2.7860919072793817, "grad_norm": 0.25437602400779724, "learning_rate": 4.874028807433251e-06, "loss": 0.3341, "step": 27404 }, { "epoch": 2.7861935746238307, "grad_norm": 0.2708711624145508, "learning_rate": 4.873674034420133e-06, "loss": 0.3604, "step": 27405 }, { "epoch": 2.78629524196828, "grad_norm": 0.255085289478302, "learning_rate": 4.873319262043414e-06, "loss": 0.3239, "step": 27406 }, { "epoch": 2.786396909312729, "grad_norm": 0.2795572578907013, "learning_rate": 4.872964490304885e-06, "loss": 0.3513, "step": 27407 }, { "epoch": 2.786498576657178, "grad_norm": 0.27868300676345825, "learning_rate": 4.872609719206326e-06, "loss": 0.3899, "step": 27408 }, { "epoch": 2.786600244001627, "grad_norm": 0.27863025665283203, "learning_rate": 4.872254948749532e-06, "loss": 0.3673, "step": 27409 }, { "epoch": 2.786701911346076, "grad_norm": 0.2645558714866638, "learning_rate": 4.871900178936288e-06, "loss": 0.3182, "step": 27410 }, { "epoch": 2.7868035786905248, "grad_norm": 0.2638254463672638, "learning_rate": 4.871545409768378e-06, "loss": 0.3365, "step": 27411 }, { "epoch": 2.7869052460349737, "grad_norm": 0.2766314744949341, "learning_rate": 4.871190641247595e-06, "loss": 0.3495, "step": 27412 }, { "epoch": 2.7870069133794226, "grad_norm": 0.2791179418563843, "learning_rate": 4.870835873375719e-06, "loss": 0.3641, "step": 27413 }, { "epoch": 2.7871085807238716, "grad_norm": 0.25206458568573, "learning_rate": 4.870481106154543e-06, "loss": 0.3442, "step": 27414 }, { "epoch": 2.7872102480683205, "grad_norm": 0.2809889018535614, "learning_rate": 4.870126339585853e-06, "loss": 0.3277, "step": 27415 }, { "epoch": 2.7873119154127695, "grad_norm": 0.26324108242988586, "learning_rate": 4.869771573671434e-06, "loss": 0.3149, "step": 27416 }, { "epoch": 2.7874135827572184, "grad_norm": 0.2634700834751129, "learning_rate": 4.869416808413078e-06, "loss": 0.3485, "step": 27417 }, { "epoch": 2.7875152501016673, "grad_norm": 0.28058886528015137, "learning_rate": 4.869062043812566e-06, "loss": 0.3451, "step": 27418 }, { "epoch": 2.7876169174461163, "grad_norm": 0.26684993505477905, "learning_rate": 4.868707279871689e-06, "loss": 0.3446, "step": 27419 }, { "epoch": 2.7877185847905652, "grad_norm": 0.26468804478645325, "learning_rate": 4.868352516592236e-06, "loss": 0.3026, "step": 27420 }, { "epoch": 2.787820252135014, "grad_norm": 0.2710409164428711, "learning_rate": 4.867997753975987e-06, "loss": 0.3292, "step": 27421 }, { "epoch": 2.787921919479463, "grad_norm": 0.27176326513290405, "learning_rate": 4.867642992024739e-06, "loss": 0.3536, "step": 27422 }, { "epoch": 2.788023586823912, "grad_norm": 0.2748090326786041, "learning_rate": 4.86728823074027e-06, "loss": 0.308, "step": 27423 }, { "epoch": 2.788125254168361, "grad_norm": 0.271866112947464, "learning_rate": 4.866933470124373e-06, "loss": 0.3538, "step": 27424 }, { "epoch": 2.78822692151281, "grad_norm": 0.2803073525428772, "learning_rate": 4.866578710178835e-06, "loss": 0.3218, "step": 27425 }, { "epoch": 2.788328588857259, "grad_norm": 0.26780351996421814, "learning_rate": 4.866223950905438e-06, "loss": 0.3521, "step": 27426 }, { "epoch": 2.788430256201708, "grad_norm": 0.27012506127357483, "learning_rate": 4.865869192305978e-06, "loss": 0.3152, "step": 27427 }, { "epoch": 2.7885319235461568, "grad_norm": 0.2676179111003876, "learning_rate": 4.865514434382233e-06, "loss": 0.3494, "step": 27428 }, { "epoch": 2.7886335908906057, "grad_norm": 0.26824307441711426, "learning_rate": 4.865159677135995e-06, "loss": 0.3312, "step": 27429 }, { "epoch": 2.7887352582350546, "grad_norm": 0.2550050914287567, "learning_rate": 4.864804920569053e-06, "loss": 0.3084, "step": 27430 }, { "epoch": 2.788836925579504, "grad_norm": 0.27745816111564636, "learning_rate": 4.864450164683189e-06, "loss": 0.3225, "step": 27431 }, { "epoch": 2.788938592923953, "grad_norm": 0.27278560400009155, "learning_rate": 4.864095409480196e-06, "loss": 0.3334, "step": 27432 }, { "epoch": 2.789040260268402, "grad_norm": 0.2743821442127228, "learning_rate": 4.863740654961854e-06, "loss": 0.3051, "step": 27433 }, { "epoch": 2.789141927612851, "grad_norm": 0.2663883566856384, "learning_rate": 4.8633859011299555e-06, "loss": 0.3097, "step": 27434 }, { "epoch": 2.7892435949573, "grad_norm": 0.2553579807281494, "learning_rate": 4.863031147986289e-06, "loss": 0.3254, "step": 27435 }, { "epoch": 2.7893452623017487, "grad_norm": 0.27129945158958435, "learning_rate": 4.862676395532635e-06, "loss": 0.3402, "step": 27436 }, { "epoch": 2.7894469296461977, "grad_norm": 0.2696293592453003, "learning_rate": 4.862321643770789e-06, "loss": 0.3241, "step": 27437 }, { "epoch": 2.7895485969906466, "grad_norm": 0.29072004556655884, "learning_rate": 4.861966892702531e-06, "loss": 0.3635, "step": 27438 }, { "epoch": 2.7896502643350956, "grad_norm": 0.2727075517177582, "learning_rate": 4.8616121423296495e-06, "loss": 0.3534, "step": 27439 }, { "epoch": 2.7897519316795445, "grad_norm": 0.2507421672344208, "learning_rate": 4.8612573926539365e-06, "loss": 0.3406, "step": 27440 }, { "epoch": 2.7898535990239934, "grad_norm": 0.27095741033554077, "learning_rate": 4.860902643677173e-06, "loss": 0.3336, "step": 27441 }, { "epoch": 2.7899552663684424, "grad_norm": 0.26760098338127136, "learning_rate": 4.860547895401152e-06, "loss": 0.3592, "step": 27442 }, { "epoch": 2.7900569337128913, "grad_norm": 0.27149179577827454, "learning_rate": 4.860193147827656e-06, "loss": 0.3443, "step": 27443 }, { "epoch": 2.7901586010573403, "grad_norm": 0.2679193913936615, "learning_rate": 4.859838400958472e-06, "loss": 0.3109, "step": 27444 }, { "epoch": 2.790260268401789, "grad_norm": 0.2442411482334137, "learning_rate": 4.859483654795392e-06, "loss": 0.3196, "step": 27445 }, { "epoch": 2.790361935746238, "grad_norm": 0.2604195475578308, "learning_rate": 4.859128909340198e-06, "loss": 0.3517, "step": 27446 }, { "epoch": 2.7904636030906875, "grad_norm": 0.2631361484527588, "learning_rate": 4.8587741645946805e-06, "loss": 0.2997, "step": 27447 }, { "epoch": 2.7905652704351365, "grad_norm": 0.27694690227508545, "learning_rate": 4.858419420560624e-06, "loss": 0.3135, "step": 27448 }, { "epoch": 2.7906669377795854, "grad_norm": 0.2526024878025055, "learning_rate": 4.858064677239815e-06, "loss": 0.3531, "step": 27449 }, { "epoch": 2.7907686051240344, "grad_norm": 0.2622297704219818, "learning_rate": 4.8577099346340464e-06, "loss": 0.3496, "step": 27450 }, { "epoch": 2.7908702724684833, "grad_norm": 0.27489879727363586, "learning_rate": 4.857355192745097e-06, "loss": 0.3508, "step": 27451 }, { "epoch": 2.7909719398129322, "grad_norm": 0.2620929479598999, "learning_rate": 4.857000451574763e-06, "loss": 0.3163, "step": 27452 }, { "epoch": 2.791073607157381, "grad_norm": 0.270540714263916, "learning_rate": 4.8566457111248236e-06, "loss": 0.3518, "step": 27453 }, { "epoch": 2.79117527450183, "grad_norm": 0.2655404210090637, "learning_rate": 4.856290971397069e-06, "loss": 0.338, "step": 27454 }, { "epoch": 2.791276941846279, "grad_norm": 0.2816630005836487, "learning_rate": 4.855936232393288e-06, "loss": 0.3696, "step": 27455 }, { "epoch": 2.791378609190728, "grad_norm": 0.25756213068962097, "learning_rate": 4.855581494115264e-06, "loss": 0.3115, "step": 27456 }, { "epoch": 2.791480276535177, "grad_norm": 0.30251792073249817, "learning_rate": 4.855226756564789e-06, "loss": 0.321, "step": 27457 }, { "epoch": 2.791581943879626, "grad_norm": 0.26941177248954773, "learning_rate": 4.854872019743645e-06, "loss": 0.3265, "step": 27458 }, { "epoch": 2.791683611224075, "grad_norm": 0.240650936961174, "learning_rate": 4.85451728365362e-06, "loss": 0.3094, "step": 27459 }, { "epoch": 2.7917852785685238, "grad_norm": 0.2578270137310028, "learning_rate": 4.854162548296506e-06, "loss": 0.3413, "step": 27460 }, { "epoch": 2.7918869459129727, "grad_norm": 0.25982359051704407, "learning_rate": 4.853807813674084e-06, "loss": 0.2947, "step": 27461 }, { "epoch": 2.7919886132574216, "grad_norm": 0.28000035881996155, "learning_rate": 4.853453079788145e-06, "loss": 0.3224, "step": 27462 }, { "epoch": 2.7920902806018706, "grad_norm": 0.24817347526550293, "learning_rate": 4.853098346640472e-06, "loss": 0.3342, "step": 27463 }, { "epoch": 2.7921919479463195, "grad_norm": 0.2353610247373581, "learning_rate": 4.8527436142328555e-06, "loss": 0.3271, "step": 27464 }, { "epoch": 2.7922936152907685, "grad_norm": 0.27781161665916443, "learning_rate": 4.852388882567084e-06, "loss": 0.3096, "step": 27465 }, { "epoch": 2.7923952826352174, "grad_norm": 0.2681238353252411, "learning_rate": 4.8520341516449396e-06, "loss": 0.323, "step": 27466 }, { "epoch": 2.7924969499796664, "grad_norm": 0.2614116668701172, "learning_rate": 4.851679421468214e-06, "loss": 0.376, "step": 27467 }, { "epoch": 2.7925986173241153, "grad_norm": 0.2635039985179901, "learning_rate": 4.851324692038691e-06, "loss": 0.3142, "step": 27468 }, { "epoch": 2.7927002846685642, "grad_norm": 0.2767758369445801, "learning_rate": 4.850969963358157e-06, "loss": 0.3367, "step": 27469 }, { "epoch": 2.792801952013013, "grad_norm": 0.28496888279914856, "learning_rate": 4.850615235428406e-06, "loss": 0.3222, "step": 27470 }, { "epoch": 2.792903619357462, "grad_norm": 0.2522895038127899, "learning_rate": 4.850260508251216e-06, "loss": 0.3288, "step": 27471 }, { "epoch": 2.7930052867019115, "grad_norm": 0.26947298645973206, "learning_rate": 4.849905781828379e-06, "loss": 0.3371, "step": 27472 }, { "epoch": 2.7931069540463604, "grad_norm": 0.27240046858787537, "learning_rate": 4.84955105616168e-06, "loss": 0.3142, "step": 27473 }, { "epoch": 2.7932086213908094, "grad_norm": 0.2869722843170166, "learning_rate": 4.849196331252907e-06, "loss": 0.3347, "step": 27474 }, { "epoch": 2.7933102887352583, "grad_norm": 0.2832402288913727, "learning_rate": 4.8488416071038495e-06, "loss": 0.3562, "step": 27475 }, { "epoch": 2.7934119560797073, "grad_norm": 0.26310858130455017, "learning_rate": 4.84848688371629e-06, "loss": 0.3713, "step": 27476 }, { "epoch": 2.793513623424156, "grad_norm": 0.2547641694545746, "learning_rate": 4.8481321610920194e-06, "loss": 0.3731, "step": 27477 }, { "epoch": 2.793615290768605, "grad_norm": 0.2375342696905136, "learning_rate": 4.847777439232821e-06, "loss": 0.3227, "step": 27478 }, { "epoch": 2.793716958113054, "grad_norm": 0.2546128034591675, "learning_rate": 4.847422718140484e-06, "loss": 0.3321, "step": 27479 }, { "epoch": 2.793818625457503, "grad_norm": 0.26208338141441345, "learning_rate": 4.847067997816796e-06, "loss": 0.3606, "step": 27480 }, { "epoch": 2.793920292801952, "grad_norm": 0.2593030333518982, "learning_rate": 4.846713278263541e-06, "loss": 0.3322, "step": 27481 }, { "epoch": 2.794021960146401, "grad_norm": 0.2587222158908844, "learning_rate": 4.846358559482511e-06, "loss": 0.302, "step": 27482 }, { "epoch": 2.79412362749085, "grad_norm": 0.28967344760894775, "learning_rate": 4.8460038414754876e-06, "loss": 0.3417, "step": 27483 }, { "epoch": 2.794225294835299, "grad_norm": 0.2698211073875427, "learning_rate": 4.845649124244261e-06, "loss": 0.3873, "step": 27484 }, { "epoch": 2.7943269621797477, "grad_norm": 0.24342438578605652, "learning_rate": 4.845294407790619e-06, "loss": 0.3418, "step": 27485 }, { "epoch": 2.7944286295241967, "grad_norm": 0.25207236409187317, "learning_rate": 4.844939692116345e-06, "loss": 0.3218, "step": 27486 }, { "epoch": 2.7945302968686456, "grad_norm": 0.25801774859428406, "learning_rate": 4.844584977223229e-06, "loss": 0.334, "step": 27487 }, { "epoch": 2.794631964213095, "grad_norm": 0.2836851179599762, "learning_rate": 4.844230263113056e-06, "loss": 0.3128, "step": 27488 }, { "epoch": 2.794733631557544, "grad_norm": 0.27445098757743835, "learning_rate": 4.843875549787614e-06, "loss": 0.3314, "step": 27489 }, { "epoch": 2.794835298901993, "grad_norm": 0.26248887181282043, "learning_rate": 4.843520837248691e-06, "loss": 0.3347, "step": 27490 }, { "epoch": 2.794936966246442, "grad_norm": 0.2569406032562256, "learning_rate": 4.843166125498072e-06, "loss": 0.3133, "step": 27491 }, { "epoch": 2.7950386335908908, "grad_norm": 0.2624199688434601, "learning_rate": 4.842811414537546e-06, "loss": 0.3149, "step": 27492 }, { "epoch": 2.7951403009353397, "grad_norm": 0.27566856145858765, "learning_rate": 4.8424567043688975e-06, "loss": 0.3124, "step": 27493 }, { "epoch": 2.7952419682797887, "grad_norm": 0.2759767472743988, "learning_rate": 4.842101994993914e-06, "loss": 0.3209, "step": 27494 }, { "epoch": 2.7953436356242376, "grad_norm": 0.25715771317481995, "learning_rate": 4.841747286414385e-06, "loss": 0.3179, "step": 27495 }, { "epoch": 2.7954453029686865, "grad_norm": 0.2687798738479614, "learning_rate": 4.841392578632094e-06, "loss": 0.3161, "step": 27496 }, { "epoch": 2.7955469703131355, "grad_norm": 0.27887943387031555, "learning_rate": 4.841037871648831e-06, "loss": 0.306, "step": 27497 }, { "epoch": 2.7956486376575844, "grad_norm": 0.2764398455619812, "learning_rate": 4.8406831654663805e-06, "loss": 0.3288, "step": 27498 }, { "epoch": 2.7957503050020334, "grad_norm": 0.2726873755455017, "learning_rate": 4.84032846008653e-06, "loss": 0.3296, "step": 27499 }, { "epoch": 2.7958519723464823, "grad_norm": 0.2710472345352173, "learning_rate": 4.8399737555110675e-06, "loss": 0.346, "step": 27500 }, { "epoch": 2.7959536396909312, "grad_norm": 0.2764686048030853, "learning_rate": 4.839619051741778e-06, "loss": 0.324, "step": 27501 }, { "epoch": 2.79605530703538, "grad_norm": 0.27150213718414307, "learning_rate": 4.839264348780452e-06, "loss": 0.3287, "step": 27502 }, { "epoch": 2.796156974379829, "grad_norm": 0.2819134593009949, "learning_rate": 4.838909646628872e-06, "loss": 0.3348, "step": 27503 }, { "epoch": 2.796258641724278, "grad_norm": 0.2693386375904083, "learning_rate": 4.838554945288827e-06, "loss": 0.34, "step": 27504 }, { "epoch": 2.796360309068727, "grad_norm": 0.30048084259033203, "learning_rate": 4.8382002447621045e-06, "loss": 0.3406, "step": 27505 }, { "epoch": 2.796461976413176, "grad_norm": 0.2589351236820221, "learning_rate": 4.83784554505049e-06, "loss": 0.342, "step": 27506 }, { "epoch": 2.796563643757625, "grad_norm": 0.26052239537239075, "learning_rate": 4.837490846155773e-06, "loss": 0.3595, "step": 27507 }, { "epoch": 2.796665311102074, "grad_norm": 0.28531283140182495, "learning_rate": 4.837136148079736e-06, "loss": 0.3453, "step": 27508 }, { "epoch": 2.7967669784465228, "grad_norm": 0.2618292570114136, "learning_rate": 4.836781450824169e-06, "loss": 0.3312, "step": 27509 }, { "epoch": 2.7968686457909717, "grad_norm": 0.25538158416748047, "learning_rate": 4.83642675439086e-06, "loss": 0.3125, "step": 27510 }, { "epoch": 2.7969703131354207, "grad_norm": 0.2728992998600006, "learning_rate": 4.836072058781592e-06, "loss": 0.3445, "step": 27511 }, { "epoch": 2.7970719804798696, "grad_norm": 0.3034769594669342, "learning_rate": 4.835717363998156e-06, "loss": 0.3516, "step": 27512 }, { "epoch": 2.797173647824319, "grad_norm": 0.26408571004867554, "learning_rate": 4.835362670042335e-06, "loss": 0.304, "step": 27513 }, { "epoch": 2.797275315168768, "grad_norm": 0.2768518328666687, "learning_rate": 4.835007976915918e-06, "loss": 0.2848, "step": 27514 }, { "epoch": 2.797376982513217, "grad_norm": 0.28594350814819336, "learning_rate": 4.834653284620693e-06, "loss": 0.3183, "step": 27515 }, { "epoch": 2.797478649857666, "grad_norm": 0.26613757014274597, "learning_rate": 4.834298593158444e-06, "loss": 0.342, "step": 27516 }, { "epoch": 2.7975803172021148, "grad_norm": 0.2696695923805237, "learning_rate": 4.833943902530961e-06, "loss": 0.3215, "step": 27517 }, { "epoch": 2.7976819845465637, "grad_norm": 0.28021588921546936, "learning_rate": 4.833589212740027e-06, "loss": 0.3572, "step": 27518 }, { "epoch": 2.7977836518910126, "grad_norm": 0.28668734431266785, "learning_rate": 4.833234523787433e-06, "loss": 0.3294, "step": 27519 }, { "epoch": 2.7978853192354616, "grad_norm": 0.27074265480041504, "learning_rate": 4.832879835674962e-06, "loss": 0.3218, "step": 27520 }, { "epoch": 2.7979869865799105, "grad_norm": 0.274371474981308, "learning_rate": 4.832525148404403e-06, "loss": 0.3139, "step": 27521 }, { "epoch": 2.7980886539243595, "grad_norm": 0.29980161786079407, "learning_rate": 4.832170461977544e-06, "loss": 0.3335, "step": 27522 }, { "epoch": 2.7981903212688084, "grad_norm": 0.24340759217739105, "learning_rate": 4.83181577639617e-06, "loss": 0.3653, "step": 27523 }, { "epoch": 2.7982919886132573, "grad_norm": 0.23784145712852478, "learning_rate": 4.831461091662068e-06, "loss": 0.3099, "step": 27524 }, { "epoch": 2.7983936559577063, "grad_norm": 0.262688547372818, "learning_rate": 4.831106407777023e-06, "loss": 0.3455, "step": 27525 }, { "epoch": 2.7984953233021552, "grad_norm": 0.2548656761646271, "learning_rate": 4.830751724742825e-06, "loss": 0.3726, "step": 27526 }, { "epoch": 2.798596990646604, "grad_norm": 0.2467205673456192, "learning_rate": 4.830397042561261e-06, "loss": 0.3546, "step": 27527 }, { "epoch": 2.7986986579910536, "grad_norm": 0.2895098924636841, "learning_rate": 4.830042361234115e-06, "loss": 0.3334, "step": 27528 }, { "epoch": 2.7988003253355025, "grad_norm": 0.28651347756385803, "learning_rate": 4.829687680763176e-06, "loss": 0.3344, "step": 27529 }, { "epoch": 2.7989019926799514, "grad_norm": 0.27508124709129333, "learning_rate": 4.829333001150229e-06, "loss": 0.3388, "step": 27530 }, { "epoch": 2.7990036600244004, "grad_norm": 0.268587201833725, "learning_rate": 4.828978322397062e-06, "loss": 0.3245, "step": 27531 }, { "epoch": 2.7991053273688493, "grad_norm": 0.26485970616340637, "learning_rate": 4.828623644505462e-06, "loss": 0.3171, "step": 27532 }, { "epoch": 2.7992069947132983, "grad_norm": 0.2791292071342468, "learning_rate": 4.828268967477215e-06, "loss": 0.3379, "step": 27533 }, { "epoch": 2.799308662057747, "grad_norm": 0.27009114623069763, "learning_rate": 4.827914291314109e-06, "loss": 0.3604, "step": 27534 }, { "epoch": 2.799410329402196, "grad_norm": 0.2570183575153351, "learning_rate": 4.8275596160179285e-06, "loss": 0.3193, "step": 27535 }, { "epoch": 2.799511996746645, "grad_norm": 0.2620939612388611, "learning_rate": 4.827204941590462e-06, "loss": 0.3696, "step": 27536 }, { "epoch": 2.799613664091094, "grad_norm": 0.2762613296508789, "learning_rate": 4.8268502680334975e-06, "loss": 0.364, "step": 27537 }, { "epoch": 2.799715331435543, "grad_norm": 0.2790120542049408, "learning_rate": 4.826495595348819e-06, "loss": 0.3408, "step": 27538 }, { "epoch": 2.799816998779992, "grad_norm": 0.26379039883613586, "learning_rate": 4.826140923538215e-06, "loss": 0.3448, "step": 27539 }, { "epoch": 2.799918666124441, "grad_norm": 0.26741963624954224, "learning_rate": 4.8257862526034704e-06, "loss": 0.3297, "step": 27540 }, { "epoch": 2.80002033346889, "grad_norm": 0.24542760848999023, "learning_rate": 4.8254315825463735e-06, "loss": 0.3303, "step": 27541 }, { "epoch": 2.8001220008133387, "grad_norm": 0.2718326151371002, "learning_rate": 4.825076913368712e-06, "loss": 0.3228, "step": 27542 }, { "epoch": 2.8002236681577877, "grad_norm": 0.2816115617752075, "learning_rate": 4.82472224507227e-06, "loss": 0.3239, "step": 27543 }, { "epoch": 2.8003253355022366, "grad_norm": 0.27467358112335205, "learning_rate": 4.824367577658838e-06, "loss": 0.3276, "step": 27544 }, { "epoch": 2.8004270028466856, "grad_norm": 0.251921683549881, "learning_rate": 4.824012911130198e-06, "loss": 0.3157, "step": 27545 }, { "epoch": 2.8005286701911345, "grad_norm": 0.2767926752567291, "learning_rate": 4.82365824548814e-06, "loss": 0.3411, "step": 27546 }, { "epoch": 2.8006303375355834, "grad_norm": 0.2958117723464966, "learning_rate": 4.82330358073445e-06, "loss": 0.3241, "step": 27547 }, { "epoch": 2.8007320048800324, "grad_norm": 0.2843698263168335, "learning_rate": 4.822948916870914e-06, "loss": 0.3435, "step": 27548 }, { "epoch": 2.8008336722244813, "grad_norm": 0.24512603878974915, "learning_rate": 4.822594253899321e-06, "loss": 0.3255, "step": 27549 }, { "epoch": 2.8009353395689303, "grad_norm": 0.263659805059433, "learning_rate": 4.8222395918214546e-06, "loss": 0.324, "step": 27550 }, { "epoch": 2.801037006913379, "grad_norm": 0.2489360272884369, "learning_rate": 4.821884930639102e-06, "loss": 0.3141, "step": 27551 }, { "epoch": 2.801138674257828, "grad_norm": 0.2765127420425415, "learning_rate": 4.821530270354052e-06, "loss": 0.3442, "step": 27552 }, { "epoch": 2.801240341602277, "grad_norm": 0.2637065649032593, "learning_rate": 4.82117561096809e-06, "loss": 0.3623, "step": 27553 }, { "epoch": 2.8013420089467265, "grad_norm": 0.26607388257980347, "learning_rate": 4.820820952483005e-06, "loss": 0.321, "step": 27554 }, { "epoch": 2.8014436762911754, "grad_norm": 0.25338512659072876, "learning_rate": 4.820466294900577e-06, "loss": 0.319, "step": 27555 }, { "epoch": 2.8015453436356244, "grad_norm": 0.2651461958885193, "learning_rate": 4.8201116382225985e-06, "loss": 0.3293, "step": 27556 }, { "epoch": 2.8016470109800733, "grad_norm": 0.23724529147148132, "learning_rate": 4.819756982450857e-06, "loss": 0.301, "step": 27557 }, { "epoch": 2.8017486783245222, "grad_norm": 0.2521461248397827, "learning_rate": 4.819402327587135e-06, "loss": 0.3737, "step": 27558 }, { "epoch": 2.801850345668971, "grad_norm": 0.25092214345932007, "learning_rate": 4.819047673633223e-06, "loss": 0.3174, "step": 27559 }, { "epoch": 2.80195201301342, "grad_norm": 0.2801785469055176, "learning_rate": 4.8186930205909035e-06, "loss": 0.3534, "step": 27560 }, { "epoch": 2.802053680357869, "grad_norm": 0.2667393088340759, "learning_rate": 4.818338368461966e-06, "loss": 0.3563, "step": 27561 }, { "epoch": 2.802155347702318, "grad_norm": 0.2700570225715637, "learning_rate": 4.817983717248198e-06, "loss": 0.3321, "step": 27562 }, { "epoch": 2.802257015046767, "grad_norm": 0.27329498529434204, "learning_rate": 4.817629066951384e-06, "loss": 0.364, "step": 27563 }, { "epoch": 2.802358682391216, "grad_norm": 0.2642212510108948, "learning_rate": 4.817274417573313e-06, "loss": 0.3598, "step": 27564 }, { "epoch": 2.802460349735665, "grad_norm": 0.2683736979961395, "learning_rate": 4.816919769115766e-06, "loss": 0.3492, "step": 27565 }, { "epoch": 2.8025620170801138, "grad_norm": 0.26182126998901367, "learning_rate": 4.816565121580536e-06, "loss": 0.3313, "step": 27566 }, { "epoch": 2.8026636844245627, "grad_norm": 0.2726362645626068, "learning_rate": 4.816210474969408e-06, "loss": 0.3484, "step": 27567 }, { "epoch": 2.8027653517690116, "grad_norm": 0.266684353351593, "learning_rate": 4.815855829284167e-06, "loss": 0.2975, "step": 27568 }, { "epoch": 2.802867019113461, "grad_norm": 0.26155945658683777, "learning_rate": 4.815501184526603e-06, "loss": 0.3144, "step": 27569 }, { "epoch": 2.80296868645791, "grad_norm": 0.268705815076828, "learning_rate": 4.815146540698496e-06, "loss": 0.3536, "step": 27570 }, { "epoch": 2.803070353802359, "grad_norm": 0.25827014446258545, "learning_rate": 4.814791897801639e-06, "loss": 0.3368, "step": 27571 }, { "epoch": 2.803172021146808, "grad_norm": 0.2722069323062897, "learning_rate": 4.814437255837818e-06, "loss": 0.3636, "step": 27572 }, { "epoch": 2.803273688491257, "grad_norm": 0.2712419629096985, "learning_rate": 4.814082614808816e-06, "loss": 0.3193, "step": 27573 }, { "epoch": 2.8033753558357057, "grad_norm": 0.26065686345100403, "learning_rate": 4.813727974716423e-06, "loss": 0.3476, "step": 27574 }, { "epoch": 2.8034770231801547, "grad_norm": 0.2577855885028839, "learning_rate": 4.813373335562422e-06, "loss": 0.3289, "step": 27575 }, { "epoch": 2.8035786905246036, "grad_norm": 0.2559923231601715, "learning_rate": 4.813018697348602e-06, "loss": 0.313, "step": 27576 }, { "epoch": 2.8036803578690526, "grad_norm": 0.2726074755191803, "learning_rate": 4.812664060076752e-06, "loss": 0.3341, "step": 27577 }, { "epoch": 2.8037820252135015, "grad_norm": 0.25071457028388977, "learning_rate": 4.812309423748653e-06, "loss": 0.3326, "step": 27578 }, { "epoch": 2.8038836925579504, "grad_norm": 0.27221331000328064, "learning_rate": 4.8119547883660975e-06, "loss": 0.37, "step": 27579 }, { "epoch": 2.8039853599023994, "grad_norm": 0.2681429088115692, "learning_rate": 4.811600153930866e-06, "loss": 0.3474, "step": 27580 }, { "epoch": 2.8040870272468483, "grad_norm": 0.2620788514614105, "learning_rate": 4.81124552044475e-06, "loss": 0.3021, "step": 27581 }, { "epoch": 2.8041886945912973, "grad_norm": 0.2623383104801178, "learning_rate": 4.810890887909535e-06, "loss": 0.3487, "step": 27582 }, { "epoch": 2.804290361935746, "grad_norm": 0.26730671525001526, "learning_rate": 4.810536256327004e-06, "loss": 0.344, "step": 27583 }, { "epoch": 2.804392029280195, "grad_norm": 0.2854035496711731, "learning_rate": 4.810181625698949e-06, "loss": 0.3075, "step": 27584 }, { "epoch": 2.804493696624644, "grad_norm": 0.23934943974018097, "learning_rate": 4.809826996027152e-06, "loss": 0.319, "step": 27585 }, { "epoch": 2.804595363969093, "grad_norm": 0.26085859537124634, "learning_rate": 4.809472367313401e-06, "loss": 0.3284, "step": 27586 }, { "epoch": 2.804697031313542, "grad_norm": 0.24771876633167267, "learning_rate": 4.809117739559486e-06, "loss": 0.3288, "step": 27587 }, { "epoch": 2.804798698657991, "grad_norm": 0.24989546835422516, "learning_rate": 4.808763112767188e-06, "loss": 0.3163, "step": 27588 }, { "epoch": 2.80490036600244, "grad_norm": 0.28032028675079346, "learning_rate": 4.8084084869382974e-06, "loss": 0.3127, "step": 27589 }, { "epoch": 2.805002033346889, "grad_norm": 0.24043166637420654, "learning_rate": 4.8080538620745985e-06, "loss": 0.3152, "step": 27590 }, { "epoch": 2.8051037006913377, "grad_norm": 0.2628173232078552, "learning_rate": 4.807699238177877e-06, "loss": 0.3009, "step": 27591 }, { "epoch": 2.8052053680357867, "grad_norm": 0.2683340609073639, "learning_rate": 4.807344615249923e-06, "loss": 0.3504, "step": 27592 }, { "epoch": 2.8053070353802356, "grad_norm": 0.2678798735141754, "learning_rate": 4.80698999329252e-06, "loss": 0.3551, "step": 27593 }, { "epoch": 2.8054087027246846, "grad_norm": 0.2727328836917877, "learning_rate": 4.806635372307458e-06, "loss": 0.3445, "step": 27594 }, { "epoch": 2.805510370069134, "grad_norm": 0.25738006830215454, "learning_rate": 4.806280752296519e-06, "loss": 0.316, "step": 27595 }, { "epoch": 2.805612037413583, "grad_norm": 0.2741974890232086, "learning_rate": 4.805926133261491e-06, "loss": 0.3577, "step": 27596 }, { "epoch": 2.805713704758032, "grad_norm": 0.26199841499328613, "learning_rate": 4.805571515204162e-06, "loss": 0.343, "step": 27597 }, { "epoch": 2.8058153721024808, "grad_norm": 0.25910836458206177, "learning_rate": 4.805216898126317e-06, "loss": 0.3421, "step": 27598 }, { "epoch": 2.8059170394469297, "grad_norm": 0.27861467003822327, "learning_rate": 4.804862282029745e-06, "loss": 0.3421, "step": 27599 }, { "epoch": 2.8060187067913787, "grad_norm": 0.26669013500213623, "learning_rate": 4.804507666916228e-06, "loss": 0.3271, "step": 27600 }, { "epoch": 2.8061203741358276, "grad_norm": 0.25086596608161926, "learning_rate": 4.804153052787554e-06, "loss": 0.3112, "step": 27601 }, { "epoch": 2.8062220414802765, "grad_norm": 0.2561172842979431, "learning_rate": 4.803798439645514e-06, "loss": 0.3473, "step": 27602 }, { "epoch": 2.8063237088247255, "grad_norm": 0.2908517122268677, "learning_rate": 4.803443827491887e-06, "loss": 0.3247, "step": 27603 }, { "epoch": 2.8064253761691744, "grad_norm": 0.2609405815601349, "learning_rate": 4.803089216328467e-06, "loss": 0.3115, "step": 27604 }, { "epoch": 2.8065270435136234, "grad_norm": 0.2729720175266266, "learning_rate": 4.802734606157035e-06, "loss": 0.3383, "step": 27605 }, { "epoch": 2.8066287108580723, "grad_norm": 0.25889649987220764, "learning_rate": 4.8023799969793774e-06, "loss": 0.31, "step": 27606 }, { "epoch": 2.8067303782025212, "grad_norm": 0.26473119854927063, "learning_rate": 4.802025388797286e-06, "loss": 0.3188, "step": 27607 }, { "epoch": 2.80683204554697, "grad_norm": 0.27425727248191833, "learning_rate": 4.801670781612541e-06, "loss": 0.3365, "step": 27608 }, { "epoch": 2.806933712891419, "grad_norm": 0.28398725390434265, "learning_rate": 4.8013161754269326e-06, "loss": 0.3046, "step": 27609 }, { "epoch": 2.8070353802358685, "grad_norm": 0.30742597579956055, "learning_rate": 4.800961570242245e-06, "loss": 0.3837, "step": 27610 }, { "epoch": 2.8071370475803175, "grad_norm": 0.27042868733406067, "learning_rate": 4.800606966060265e-06, "loss": 0.3095, "step": 27611 }, { "epoch": 2.8072387149247664, "grad_norm": 0.2700646221637726, "learning_rate": 4.800252362882783e-06, "loss": 0.355, "step": 27612 }, { "epoch": 2.8073403822692153, "grad_norm": 0.26473918557167053, "learning_rate": 4.79989776071158e-06, "loss": 0.3122, "step": 27613 }, { "epoch": 2.8074420496136643, "grad_norm": 0.26811233162879944, "learning_rate": 4.799543159548445e-06, "loss": 0.3734, "step": 27614 }, { "epoch": 2.8075437169581132, "grad_norm": 0.23446053266525269, "learning_rate": 4.799188559395164e-06, "loss": 0.3387, "step": 27615 }, { "epoch": 2.807645384302562, "grad_norm": 0.2594754695892334, "learning_rate": 4.798833960253521e-06, "loss": 0.3347, "step": 27616 }, { "epoch": 2.807747051647011, "grad_norm": 0.2861208915710449, "learning_rate": 4.7984793621253086e-06, "loss": 0.345, "step": 27617 }, { "epoch": 2.80784871899146, "grad_norm": 0.2673588991165161, "learning_rate": 4.798124765012307e-06, "loss": 0.3248, "step": 27618 }, { "epoch": 2.807950386335909, "grad_norm": 0.24240723252296448, "learning_rate": 4.797770168916306e-06, "loss": 0.3357, "step": 27619 }, { "epoch": 2.808052053680358, "grad_norm": 0.30333754420280457, "learning_rate": 4.79741557383909e-06, "loss": 0.3194, "step": 27620 }, { "epoch": 2.808153721024807, "grad_norm": 0.2742112874984741, "learning_rate": 4.797060979782444e-06, "loss": 0.3593, "step": 27621 }, { "epoch": 2.808255388369256, "grad_norm": 0.2858765721321106, "learning_rate": 4.796706386748161e-06, "loss": 0.3023, "step": 27622 }, { "epoch": 2.8083570557137048, "grad_norm": 0.26707446575164795, "learning_rate": 4.79635179473802e-06, "loss": 0.3629, "step": 27623 }, { "epoch": 2.8084587230581537, "grad_norm": 0.24965357780456543, "learning_rate": 4.795997203753812e-06, "loss": 0.3253, "step": 27624 }, { "epoch": 2.8085603904026026, "grad_norm": 0.2852986454963684, "learning_rate": 4.79564261379732e-06, "loss": 0.318, "step": 27625 }, { "epoch": 2.8086620577470516, "grad_norm": 0.26466479897499084, "learning_rate": 4.795288024870332e-06, "loss": 0.3293, "step": 27626 }, { "epoch": 2.8087637250915005, "grad_norm": 0.2799956798553467, "learning_rate": 4.794933436974636e-06, "loss": 0.3466, "step": 27627 }, { "epoch": 2.8088653924359495, "grad_norm": 0.24706688523292542, "learning_rate": 4.794578850112015e-06, "loss": 0.3208, "step": 27628 }, { "epoch": 2.8089670597803984, "grad_norm": 0.2946968972682953, "learning_rate": 4.794224264284259e-06, "loss": 0.3277, "step": 27629 }, { "epoch": 2.8090687271248473, "grad_norm": 0.27165353298187256, "learning_rate": 4.793869679493149e-06, "loss": 0.3149, "step": 27630 }, { "epoch": 2.8091703944692963, "grad_norm": 0.25397932529449463, "learning_rate": 4.793515095740476e-06, "loss": 0.3618, "step": 27631 }, { "epoch": 2.8092720618137452, "grad_norm": 0.28383609652519226, "learning_rate": 4.793160513028026e-06, "loss": 0.3645, "step": 27632 }, { "epoch": 2.809373729158194, "grad_norm": 0.2812533974647522, "learning_rate": 4.792805931357583e-06, "loss": 0.3306, "step": 27633 }, { "epoch": 2.809475396502643, "grad_norm": 0.27383390069007874, "learning_rate": 4.792451350730935e-06, "loss": 0.3405, "step": 27634 }, { "epoch": 2.809577063847092, "grad_norm": 0.2628396153450012, "learning_rate": 4.792096771149868e-06, "loss": 0.325, "step": 27635 }, { "epoch": 2.8096787311915414, "grad_norm": 0.2574270963668823, "learning_rate": 4.7917421926161665e-06, "loss": 0.3167, "step": 27636 }, { "epoch": 2.8097803985359904, "grad_norm": 0.27951470017433167, "learning_rate": 4.7913876151316205e-06, "loss": 0.3426, "step": 27637 }, { "epoch": 2.8098820658804393, "grad_norm": 0.2676795721054077, "learning_rate": 4.791033038698013e-06, "loss": 0.3329, "step": 27638 }, { "epoch": 2.8099837332248883, "grad_norm": 0.2645367383956909, "learning_rate": 4.7906784633171326e-06, "loss": 0.3434, "step": 27639 }, { "epoch": 2.810085400569337, "grad_norm": 0.2511725425720215, "learning_rate": 4.7903238889907625e-06, "loss": 0.342, "step": 27640 }, { "epoch": 2.810187067913786, "grad_norm": 0.26640257239341736, "learning_rate": 4.789969315720691e-06, "loss": 0.3542, "step": 27641 }, { "epoch": 2.810288735258235, "grad_norm": 0.2610166668891907, "learning_rate": 4.789614743508706e-06, "loss": 0.3275, "step": 27642 }, { "epoch": 2.810390402602684, "grad_norm": 0.25987493991851807, "learning_rate": 4.78926017235659e-06, "loss": 0.343, "step": 27643 }, { "epoch": 2.810492069947133, "grad_norm": 0.25744953751564026, "learning_rate": 4.788905602266132e-06, "loss": 0.3225, "step": 27644 }, { "epoch": 2.810593737291582, "grad_norm": 0.2593154013156891, "learning_rate": 4.7885510332391165e-06, "loss": 0.3272, "step": 27645 }, { "epoch": 2.810695404636031, "grad_norm": 0.2769303023815155, "learning_rate": 4.788196465277331e-06, "loss": 0.3097, "step": 27646 }, { "epoch": 2.81079707198048, "grad_norm": 0.2501067519187927, "learning_rate": 4.787841898382563e-06, "loss": 0.3537, "step": 27647 }, { "epoch": 2.8108987393249287, "grad_norm": 0.2667465806007385, "learning_rate": 4.787487332556595e-06, "loss": 0.3528, "step": 27648 }, { "epoch": 2.8110004066693777, "grad_norm": 0.2726072072982788, "learning_rate": 4.787132767801217e-06, "loss": 0.2975, "step": 27649 }, { "epoch": 2.8111020740138266, "grad_norm": 0.267770379781723, "learning_rate": 4.7867782041182125e-06, "loss": 0.3073, "step": 27650 }, { "epoch": 2.811203741358276, "grad_norm": 0.265779972076416, "learning_rate": 4.7864236415093685e-06, "loss": 0.3303, "step": 27651 }, { "epoch": 2.811305408702725, "grad_norm": 0.26622653007507324, "learning_rate": 4.786069079976472e-06, "loss": 0.3358, "step": 27652 }, { "epoch": 2.811407076047174, "grad_norm": 0.2675390839576721, "learning_rate": 4.7857145195213086e-06, "loss": 0.3548, "step": 27653 }, { "epoch": 2.811508743391623, "grad_norm": 0.27314862608909607, "learning_rate": 4.7853599601456655e-06, "loss": 0.3314, "step": 27654 }, { "epoch": 2.8116104107360718, "grad_norm": 0.2912556231021881, "learning_rate": 4.785005401851326e-06, "loss": 0.3289, "step": 27655 }, { "epoch": 2.8117120780805207, "grad_norm": 0.2857140898704529, "learning_rate": 4.784650844640078e-06, "loss": 0.3155, "step": 27656 }, { "epoch": 2.8118137454249696, "grad_norm": 0.259275883436203, "learning_rate": 4.78429628851371e-06, "loss": 0.3529, "step": 27657 }, { "epoch": 2.8119154127694186, "grad_norm": 0.2641391456127167, "learning_rate": 4.783941733474005e-06, "loss": 0.3337, "step": 27658 }, { "epoch": 2.8120170801138675, "grad_norm": 0.2663562595844269, "learning_rate": 4.783587179522751e-06, "loss": 0.3143, "step": 27659 }, { "epoch": 2.8121187474583165, "grad_norm": 0.30539900064468384, "learning_rate": 4.7832326266617316e-06, "loss": 0.3261, "step": 27660 }, { "epoch": 2.8122204148027654, "grad_norm": 0.3158193528652191, "learning_rate": 4.782878074892736e-06, "loss": 0.3549, "step": 27661 }, { "epoch": 2.8123220821472144, "grad_norm": 0.28368332982063293, "learning_rate": 4.782523524217549e-06, "loss": 0.3407, "step": 27662 }, { "epoch": 2.8124237494916633, "grad_norm": 0.2847398817539215, "learning_rate": 4.7821689746379565e-06, "loss": 0.3457, "step": 27663 }, { "epoch": 2.8125254168361122, "grad_norm": 0.27618762850761414, "learning_rate": 4.781814426155745e-06, "loss": 0.3508, "step": 27664 }, { "epoch": 2.812627084180561, "grad_norm": 0.2903442680835724, "learning_rate": 4.781459878772701e-06, "loss": 0.3441, "step": 27665 }, { "epoch": 2.81272875152501, "grad_norm": 0.2655829191207886, "learning_rate": 4.781105332490608e-06, "loss": 0.3247, "step": 27666 }, { "epoch": 2.812830418869459, "grad_norm": 0.2827718257904053, "learning_rate": 4.780750787311257e-06, "loss": 0.3855, "step": 27667 }, { "epoch": 2.812932086213908, "grad_norm": 0.29358217120170593, "learning_rate": 4.780396243236431e-06, "loss": 0.3591, "step": 27668 }, { "epoch": 2.813033753558357, "grad_norm": 0.25874775648117065, "learning_rate": 4.780041700267916e-06, "loss": 0.3236, "step": 27669 }, { "epoch": 2.813135420902806, "grad_norm": 0.24482762813568115, "learning_rate": 4.779687158407499e-06, "loss": 0.3263, "step": 27670 }, { "epoch": 2.813237088247255, "grad_norm": 0.25518542528152466, "learning_rate": 4.779332617656965e-06, "loss": 0.3093, "step": 27671 }, { "epoch": 2.8133387555917038, "grad_norm": 0.24907754361629486, "learning_rate": 4.778978078018102e-06, "loss": 0.341, "step": 27672 }, { "epoch": 2.8134404229361527, "grad_norm": 0.2457965910434723, "learning_rate": 4.778623539492693e-06, "loss": 0.3144, "step": 27673 }, { "epoch": 2.8135420902806016, "grad_norm": 0.2760974168777466, "learning_rate": 4.778269002082529e-06, "loss": 0.347, "step": 27674 }, { "epoch": 2.8136437576250506, "grad_norm": 0.27668142318725586, "learning_rate": 4.77791446578939e-06, "loss": 0.3461, "step": 27675 }, { "epoch": 2.8137454249694995, "grad_norm": 0.2711029052734375, "learning_rate": 4.777559930615066e-06, "loss": 0.3396, "step": 27676 }, { "epoch": 2.813847092313949, "grad_norm": 0.2642321288585663, "learning_rate": 4.777205396561343e-06, "loss": 0.3559, "step": 27677 }, { "epoch": 2.813948759658398, "grad_norm": 0.26687002182006836, "learning_rate": 4.776850863630006e-06, "loss": 0.3095, "step": 27678 }, { "epoch": 2.814050427002847, "grad_norm": 0.2623163163661957, "learning_rate": 4.776496331822842e-06, "loss": 0.3205, "step": 27679 }, { "epoch": 2.8141520943472957, "grad_norm": 0.26221203804016113, "learning_rate": 4.776141801141635e-06, "loss": 0.3139, "step": 27680 }, { "epoch": 2.8142537616917447, "grad_norm": 0.2496688961982727, "learning_rate": 4.775787271588172e-06, "loss": 0.3246, "step": 27681 }, { "epoch": 2.8143554290361936, "grad_norm": 0.2527313530445099, "learning_rate": 4.775432743164241e-06, "loss": 0.312, "step": 27682 }, { "epoch": 2.8144570963806426, "grad_norm": 0.27342694997787476, "learning_rate": 4.775078215871625e-06, "loss": 0.3372, "step": 27683 }, { "epoch": 2.8145587637250915, "grad_norm": 0.2559821605682373, "learning_rate": 4.774723689712114e-06, "loss": 0.3423, "step": 27684 }, { "epoch": 2.8146604310695404, "grad_norm": 0.25744470953941345, "learning_rate": 4.77436916468749e-06, "loss": 0.3534, "step": 27685 }, { "epoch": 2.8147620984139894, "grad_norm": 0.26820921897888184, "learning_rate": 4.77401464079954e-06, "loss": 0.3639, "step": 27686 }, { "epoch": 2.8148637657584383, "grad_norm": 0.2621898353099823, "learning_rate": 4.773660118050051e-06, "loss": 0.2979, "step": 27687 }, { "epoch": 2.8149654331028873, "grad_norm": 0.2636702358722687, "learning_rate": 4.773305596440808e-06, "loss": 0.3651, "step": 27688 }, { "epoch": 2.815067100447336, "grad_norm": 0.3231227695941925, "learning_rate": 4.7729510759735995e-06, "loss": 0.3272, "step": 27689 }, { "epoch": 2.815168767791785, "grad_norm": 0.2447250932455063, "learning_rate": 4.772596556650207e-06, "loss": 0.3117, "step": 27690 }, { "epoch": 2.815270435136234, "grad_norm": 0.27277377247810364, "learning_rate": 4.77224203847242e-06, "loss": 0.3138, "step": 27691 }, { "epoch": 2.8153721024806835, "grad_norm": 0.26267340779304504, "learning_rate": 4.771887521442024e-06, "loss": 0.2965, "step": 27692 }, { "epoch": 2.8154737698251324, "grad_norm": 0.26163041591644287, "learning_rate": 4.771533005560804e-06, "loss": 0.3369, "step": 27693 }, { "epoch": 2.8155754371695814, "grad_norm": 0.27172940969467163, "learning_rate": 4.771178490830547e-06, "loss": 0.3063, "step": 27694 }, { "epoch": 2.8156771045140303, "grad_norm": 0.2774404287338257, "learning_rate": 4.770823977253038e-06, "loss": 0.3423, "step": 27695 }, { "epoch": 2.8157787718584792, "grad_norm": 0.2664056718349457, "learning_rate": 4.770469464830062e-06, "loss": 0.3622, "step": 27696 }, { "epoch": 2.815880439202928, "grad_norm": 0.26407870650291443, "learning_rate": 4.770114953563408e-06, "loss": 0.3337, "step": 27697 }, { "epoch": 2.815982106547377, "grad_norm": 0.2686687707901001, "learning_rate": 4.7697604434548595e-06, "loss": 0.3493, "step": 27698 }, { "epoch": 2.816083773891826, "grad_norm": 0.2623235285282135, "learning_rate": 4.769405934506204e-06, "loss": 0.3379, "step": 27699 }, { "epoch": 2.816185441236275, "grad_norm": 0.26796412467956543, "learning_rate": 4.769051426719225e-06, "loss": 0.3552, "step": 27700 }, { "epoch": 2.816287108580724, "grad_norm": 0.28832390904426575, "learning_rate": 4.768696920095711e-06, "loss": 0.3692, "step": 27701 }, { "epoch": 2.816388775925173, "grad_norm": 0.27216193079948425, "learning_rate": 4.768342414637448e-06, "loss": 0.3333, "step": 27702 }, { "epoch": 2.816490443269622, "grad_norm": 0.24646663665771484, "learning_rate": 4.767987910346219e-06, "loss": 0.3439, "step": 27703 }, { "epoch": 2.8165921106140708, "grad_norm": 0.2720484137535095, "learning_rate": 4.767633407223814e-06, "loss": 0.3382, "step": 27704 }, { "epoch": 2.8166937779585197, "grad_norm": 0.2761135697364807, "learning_rate": 4.7672789052720146e-06, "loss": 0.345, "step": 27705 }, { "epoch": 2.8167954453029687, "grad_norm": 0.2531774044036865, "learning_rate": 4.766924404492611e-06, "loss": 0.3294, "step": 27706 }, { "epoch": 2.8168971126474176, "grad_norm": 0.2699624300003052, "learning_rate": 4.766569904887383e-06, "loss": 0.323, "step": 27707 }, { "epoch": 2.8169987799918665, "grad_norm": 0.2679976224899292, "learning_rate": 4.766215406458123e-06, "loss": 0.3013, "step": 27708 }, { "epoch": 2.8171004473363155, "grad_norm": 0.2730898857116699, "learning_rate": 4.765860909206614e-06, "loss": 0.3188, "step": 27709 }, { "epoch": 2.8172021146807644, "grad_norm": 0.2615768313407898, "learning_rate": 4.7655064131346425e-06, "loss": 0.2957, "step": 27710 }, { "epoch": 2.8173037820252134, "grad_norm": 0.2608357071876526, "learning_rate": 4.765151918243995e-06, "loss": 0.3258, "step": 27711 }, { "epoch": 2.8174054493696623, "grad_norm": 0.2665652930736542, "learning_rate": 4.764797424536453e-06, "loss": 0.3154, "step": 27712 }, { "epoch": 2.8175071167141112, "grad_norm": 0.25443482398986816, "learning_rate": 4.7644429320138074e-06, "loss": 0.3517, "step": 27713 }, { "epoch": 2.81760878405856, "grad_norm": 0.27570003271102905, "learning_rate": 4.7640884406778435e-06, "loss": 0.3331, "step": 27714 }, { "epoch": 2.817710451403009, "grad_norm": 0.2541620433330536, "learning_rate": 4.763733950530344e-06, "loss": 0.3439, "step": 27715 }, { "epoch": 2.817812118747458, "grad_norm": 0.28136759996414185, "learning_rate": 4.763379461573099e-06, "loss": 0.3431, "step": 27716 }, { "epoch": 2.817913786091907, "grad_norm": 0.25297781825065613, "learning_rate": 4.76302497380789e-06, "loss": 0.3549, "step": 27717 }, { "epoch": 2.8180154534363564, "grad_norm": 0.2697578966617584, "learning_rate": 4.762670487236505e-06, "loss": 0.3209, "step": 27718 }, { "epoch": 2.8181171207808053, "grad_norm": 0.2560054063796997, "learning_rate": 4.76231600186073e-06, "loss": 0.3384, "step": 27719 }, { "epoch": 2.8182187881252543, "grad_norm": 0.26126858592033386, "learning_rate": 4.761961517682351e-06, "loss": 0.3142, "step": 27720 }, { "epoch": 2.8183204554697032, "grad_norm": 0.2823691964149475, "learning_rate": 4.761607034703154e-06, "loss": 0.3175, "step": 27721 }, { "epoch": 2.818422122814152, "grad_norm": 0.27636536955833435, "learning_rate": 4.761252552924922e-06, "loss": 0.3386, "step": 27722 }, { "epoch": 2.818523790158601, "grad_norm": 0.25673243403434753, "learning_rate": 4.760898072349444e-06, "loss": 0.3115, "step": 27723 }, { "epoch": 2.81862545750305, "grad_norm": 0.29401251673698425, "learning_rate": 4.760543592978506e-06, "loss": 0.3333, "step": 27724 }, { "epoch": 2.818727124847499, "grad_norm": 0.28332728147506714, "learning_rate": 4.760189114813889e-06, "loss": 0.3437, "step": 27725 }, { "epoch": 2.818828792191948, "grad_norm": 0.27352961897850037, "learning_rate": 4.759834637857387e-06, "loss": 0.334, "step": 27726 }, { "epoch": 2.818930459536397, "grad_norm": 0.27085521817207336, "learning_rate": 4.759480162110776e-06, "loss": 0.3275, "step": 27727 }, { "epoch": 2.819032126880846, "grad_norm": 0.2571296691894531, "learning_rate": 4.75912568757585e-06, "loss": 0.3118, "step": 27728 }, { "epoch": 2.8191337942252948, "grad_norm": 0.25417184829711914, "learning_rate": 4.758771214254392e-06, "loss": 0.3472, "step": 27729 }, { "epoch": 2.8192354615697437, "grad_norm": 0.27945008873939514, "learning_rate": 4.758416742148185e-06, "loss": 0.3206, "step": 27730 }, { "epoch": 2.8193371289141926, "grad_norm": 0.25458666682243347, "learning_rate": 4.7580622712590196e-06, "loss": 0.3142, "step": 27731 }, { "epoch": 2.8194387962586416, "grad_norm": 0.25890037417411804, "learning_rate": 4.757707801588676e-06, "loss": 0.3204, "step": 27732 }, { "epoch": 2.819540463603091, "grad_norm": 0.2551298439502716, "learning_rate": 4.7573533331389435e-06, "loss": 0.3302, "step": 27733 }, { "epoch": 2.81964213094754, "grad_norm": 0.25233685970306396, "learning_rate": 4.756998865911611e-06, "loss": 0.2882, "step": 27734 }, { "epoch": 2.819743798291989, "grad_norm": 0.2718753218650818, "learning_rate": 4.756644399908456e-06, "loss": 0.3339, "step": 27735 }, { "epoch": 2.819845465636438, "grad_norm": 0.26075032353401184, "learning_rate": 4.756289935131272e-06, "loss": 0.3188, "step": 27736 }, { "epoch": 2.8199471329808867, "grad_norm": 0.2711741030216217, "learning_rate": 4.755935471581838e-06, "loss": 0.3468, "step": 27737 }, { "epoch": 2.8200488003253357, "grad_norm": 0.27809956669807434, "learning_rate": 4.755581009261944e-06, "loss": 0.3125, "step": 27738 }, { "epoch": 2.8201504676697846, "grad_norm": 0.2866188585758209, "learning_rate": 4.7552265481733774e-06, "loss": 0.3283, "step": 27739 }, { "epoch": 2.8202521350142336, "grad_norm": 0.2774731516838074, "learning_rate": 4.754872088317918e-06, "loss": 0.3154, "step": 27740 }, { "epoch": 2.8203538023586825, "grad_norm": 0.2513309717178345, "learning_rate": 4.754517629697358e-06, "loss": 0.3353, "step": 27741 }, { "epoch": 2.8204554697031314, "grad_norm": 0.25677892565727234, "learning_rate": 4.7541631723134775e-06, "loss": 0.31, "step": 27742 }, { "epoch": 2.8205571370475804, "grad_norm": 0.26526883244514465, "learning_rate": 4.753808716168064e-06, "loss": 0.33, "step": 27743 }, { "epoch": 2.8206588043920293, "grad_norm": 0.27084580063819885, "learning_rate": 4.753454261262907e-06, "loss": 0.3005, "step": 27744 }, { "epoch": 2.8207604717364783, "grad_norm": 0.26555851101875305, "learning_rate": 4.753099807599785e-06, "loss": 0.3435, "step": 27745 }, { "epoch": 2.820862139080927, "grad_norm": 0.25703558325767517, "learning_rate": 4.7527453551804906e-06, "loss": 0.3318, "step": 27746 }, { "epoch": 2.820963806425376, "grad_norm": 0.2773464322090149, "learning_rate": 4.752390904006805e-06, "loss": 0.3562, "step": 27747 }, { "epoch": 2.821065473769825, "grad_norm": 0.26374009251594543, "learning_rate": 4.752036454080513e-06, "loss": 0.3414, "step": 27748 }, { "epoch": 2.821167141114274, "grad_norm": 0.2845635712146759, "learning_rate": 4.751682005403407e-06, "loss": 0.3588, "step": 27749 }, { "epoch": 2.821268808458723, "grad_norm": 0.2653970718383789, "learning_rate": 4.751327557977263e-06, "loss": 0.3299, "step": 27750 }, { "epoch": 2.821370475803172, "grad_norm": 0.2980603575706482, "learning_rate": 4.750973111803876e-06, "loss": 0.3377, "step": 27751 }, { "epoch": 2.821472143147621, "grad_norm": 0.2647099196910858, "learning_rate": 4.750618666885025e-06, "loss": 0.3256, "step": 27752 }, { "epoch": 2.82157381049207, "grad_norm": 0.2865431010723114, "learning_rate": 4.750264223222497e-06, "loss": 0.3253, "step": 27753 }, { "epoch": 2.8216754778365187, "grad_norm": 0.26967760920524597, "learning_rate": 4.749909780818082e-06, "loss": 0.3484, "step": 27754 }, { "epoch": 2.8217771451809677, "grad_norm": 0.2604808807373047, "learning_rate": 4.749555339673558e-06, "loss": 0.3052, "step": 27755 }, { "epoch": 2.8218788125254166, "grad_norm": 0.2550209164619446, "learning_rate": 4.749200899790718e-06, "loss": 0.3292, "step": 27756 }, { "epoch": 2.8219804798698656, "grad_norm": 0.2797539234161377, "learning_rate": 4.748846461171343e-06, "loss": 0.3348, "step": 27757 }, { "epoch": 2.8220821472143145, "grad_norm": 0.28960272669792175, "learning_rate": 4.748492023817218e-06, "loss": 0.38, "step": 27758 }, { "epoch": 2.822183814558764, "grad_norm": 0.2728864848613739, "learning_rate": 4.748137587730134e-06, "loss": 0.3629, "step": 27759 }, { "epoch": 2.822285481903213, "grad_norm": 0.27971139550209045, "learning_rate": 4.747783152911871e-06, "loss": 0.3165, "step": 27760 }, { "epoch": 2.8223871492476618, "grad_norm": 0.2635589838027954, "learning_rate": 4.7474287193642176e-06, "loss": 0.339, "step": 27761 }, { "epoch": 2.8224888165921107, "grad_norm": 0.26933446526527405, "learning_rate": 4.7470742870889565e-06, "loss": 0.3543, "step": 27762 }, { "epoch": 2.8225904839365596, "grad_norm": 0.26057541370391846, "learning_rate": 4.746719856087874e-06, "loss": 0.3273, "step": 27763 }, { "epoch": 2.8226921512810086, "grad_norm": 0.2487877607345581, "learning_rate": 4.746365426362761e-06, "loss": 0.3358, "step": 27764 }, { "epoch": 2.8227938186254575, "grad_norm": 0.2735539674758911, "learning_rate": 4.746010997915396e-06, "loss": 0.3109, "step": 27765 }, { "epoch": 2.8228954859699065, "grad_norm": 0.2648093104362488, "learning_rate": 4.745656570747568e-06, "loss": 0.3239, "step": 27766 }, { "epoch": 2.8229971533143554, "grad_norm": 0.25278979539871216, "learning_rate": 4.745302144861062e-06, "loss": 0.3144, "step": 27767 }, { "epoch": 2.8230988206588044, "grad_norm": 0.2770857512950897, "learning_rate": 4.74494772025766e-06, "loss": 0.3173, "step": 27768 }, { "epoch": 2.8232004880032533, "grad_norm": 0.2669549286365509, "learning_rate": 4.744593296939156e-06, "loss": 0.3509, "step": 27769 }, { "epoch": 2.8233021553477022, "grad_norm": 0.2864850163459778, "learning_rate": 4.744238874907327e-06, "loss": 0.3439, "step": 27770 }, { "epoch": 2.823403822692151, "grad_norm": 0.28235334157943726, "learning_rate": 4.743884454163963e-06, "loss": 0.3772, "step": 27771 }, { "epoch": 2.8235054900366, "grad_norm": 0.27664101123809814, "learning_rate": 4.743530034710847e-06, "loss": 0.3281, "step": 27772 }, { "epoch": 2.823607157381049, "grad_norm": 0.27463915944099426, "learning_rate": 4.743175616549765e-06, "loss": 0.3259, "step": 27773 }, { "epoch": 2.8237088247254984, "grad_norm": 0.28009089827537537, "learning_rate": 4.742821199682506e-06, "loss": 0.3172, "step": 27774 }, { "epoch": 2.8238104920699474, "grad_norm": 0.2646598815917969, "learning_rate": 4.742466784110851e-06, "loss": 0.3132, "step": 27775 }, { "epoch": 2.8239121594143963, "grad_norm": 0.2625223994255066, "learning_rate": 4.742112369836588e-06, "loss": 0.3331, "step": 27776 }, { "epoch": 2.8240138267588453, "grad_norm": 0.25351816415786743, "learning_rate": 4.741757956861501e-06, "loss": 0.3486, "step": 27777 }, { "epoch": 2.824115494103294, "grad_norm": 0.34967854619026184, "learning_rate": 4.741403545187374e-06, "loss": 0.3616, "step": 27778 }, { "epoch": 2.824217161447743, "grad_norm": 0.2673623561859131, "learning_rate": 4.741049134815999e-06, "loss": 0.3341, "step": 27779 }, { "epoch": 2.824318828792192, "grad_norm": 0.26834240555763245, "learning_rate": 4.740694725749154e-06, "loss": 0.3247, "step": 27780 }, { "epoch": 2.824420496136641, "grad_norm": 0.2780051529407501, "learning_rate": 4.740340317988628e-06, "loss": 0.3204, "step": 27781 }, { "epoch": 2.82452216348109, "grad_norm": 0.2953015863895416, "learning_rate": 4.7399859115362055e-06, "loss": 0.3765, "step": 27782 }, { "epoch": 2.824623830825539, "grad_norm": 0.2874147593975067, "learning_rate": 4.739631506393672e-06, "loss": 0.3157, "step": 27783 }, { "epoch": 2.824725498169988, "grad_norm": 0.26756370067596436, "learning_rate": 4.739277102562813e-06, "loss": 0.3242, "step": 27784 }, { "epoch": 2.824827165514437, "grad_norm": 0.265161395072937, "learning_rate": 4.738922700045416e-06, "loss": 0.3252, "step": 27785 }, { "epoch": 2.8249288328588857, "grad_norm": 0.26097598671913147, "learning_rate": 4.738568298843263e-06, "loss": 0.3579, "step": 27786 }, { "epoch": 2.8250305002033347, "grad_norm": 0.2701999247074127, "learning_rate": 4.738213898958139e-06, "loss": 0.3499, "step": 27787 }, { "epoch": 2.8251321675477836, "grad_norm": 0.25743812322616577, "learning_rate": 4.737859500391832e-06, "loss": 0.3312, "step": 27788 }, { "epoch": 2.8252338348922326, "grad_norm": 0.2772805988788605, "learning_rate": 4.737505103146129e-06, "loss": 0.3181, "step": 27789 }, { "epoch": 2.8253355022366815, "grad_norm": 0.26263517141342163, "learning_rate": 4.7371507072228105e-06, "loss": 0.3755, "step": 27790 }, { "epoch": 2.8254371695811304, "grad_norm": 0.28031080961227417, "learning_rate": 4.736796312623666e-06, "loss": 0.3664, "step": 27791 }, { "epoch": 2.8255388369255794, "grad_norm": 0.24965402483940125, "learning_rate": 4.736441919350477e-06, "loss": 0.3661, "step": 27792 }, { "epoch": 2.8256405042700283, "grad_norm": 0.252424418926239, "learning_rate": 4.736087527405032e-06, "loss": 0.3553, "step": 27793 }, { "epoch": 2.8257421716144773, "grad_norm": 0.2706339955329895, "learning_rate": 4.735733136789116e-06, "loss": 0.309, "step": 27794 }, { "epoch": 2.825843838958926, "grad_norm": 0.2660425305366516, "learning_rate": 4.735378747504513e-06, "loss": 0.3052, "step": 27795 }, { "epoch": 2.825945506303375, "grad_norm": 0.2696062922477722, "learning_rate": 4.7350243595530095e-06, "loss": 0.3685, "step": 27796 }, { "epoch": 2.826047173647824, "grad_norm": 0.26411062479019165, "learning_rate": 4.7346699729363895e-06, "loss": 0.3563, "step": 27797 }, { "epoch": 2.826148840992273, "grad_norm": 0.25221338868141174, "learning_rate": 4.734315587656439e-06, "loss": 0.2892, "step": 27798 }, { "epoch": 2.826250508336722, "grad_norm": 0.2661474645137787, "learning_rate": 4.733961203714945e-06, "loss": 0.3318, "step": 27799 }, { "epoch": 2.8263521756811714, "grad_norm": 0.2605784237384796, "learning_rate": 4.7336068211136896e-06, "loss": 0.3116, "step": 27800 }, { "epoch": 2.8264538430256203, "grad_norm": 0.2984568774700165, "learning_rate": 4.7332524398544614e-06, "loss": 0.3099, "step": 27801 }, { "epoch": 2.8265555103700692, "grad_norm": 0.2602458894252777, "learning_rate": 4.732898059939042e-06, "loss": 0.3527, "step": 27802 }, { "epoch": 2.826657177714518, "grad_norm": 0.2657984793186188, "learning_rate": 4.732543681369219e-06, "loss": 0.3186, "step": 27803 }, { "epoch": 2.826758845058967, "grad_norm": 0.2722117304801941, "learning_rate": 4.7321893041467785e-06, "loss": 0.3268, "step": 27804 }, { "epoch": 2.826860512403416, "grad_norm": 0.28069454431533813, "learning_rate": 4.731834928273503e-06, "loss": 0.3389, "step": 27805 }, { "epoch": 2.826962179747865, "grad_norm": 0.25403594970703125, "learning_rate": 4.731480553751182e-06, "loss": 0.3179, "step": 27806 }, { "epoch": 2.827063847092314, "grad_norm": 0.27568817138671875, "learning_rate": 4.7311261805815965e-06, "loss": 0.3216, "step": 27807 }, { "epoch": 2.827165514436763, "grad_norm": 0.2536556124687195, "learning_rate": 4.730771808766533e-06, "loss": 0.3298, "step": 27808 }, { "epoch": 2.827267181781212, "grad_norm": 0.2651956081390381, "learning_rate": 4.730417438307778e-06, "loss": 0.344, "step": 27809 }, { "epoch": 2.8273688491256608, "grad_norm": 0.2554732859134674, "learning_rate": 4.730063069207116e-06, "loss": 0.3134, "step": 27810 }, { "epoch": 2.8274705164701097, "grad_norm": 0.2567623257637024, "learning_rate": 4.729708701466332e-06, "loss": 0.2928, "step": 27811 }, { "epoch": 2.8275721838145587, "grad_norm": 0.2548861801624298, "learning_rate": 4.7293543350872105e-06, "loss": 0.3248, "step": 27812 }, { "epoch": 2.8276738511590076, "grad_norm": 0.28316423296928406, "learning_rate": 4.7289999700715375e-06, "loss": 0.3518, "step": 27813 }, { "epoch": 2.8277755185034565, "grad_norm": 0.26082301139831543, "learning_rate": 4.7286456064211e-06, "loss": 0.3181, "step": 27814 }, { "epoch": 2.827877185847906, "grad_norm": 0.25167185068130493, "learning_rate": 4.72829124413768e-06, "loss": 0.3488, "step": 27815 }, { "epoch": 2.827978853192355, "grad_norm": 0.28600069880485535, "learning_rate": 4.727936883223065e-06, "loss": 0.331, "step": 27816 }, { "epoch": 2.828080520536804, "grad_norm": 0.2702038586139679, "learning_rate": 4.727582523679039e-06, "loss": 0.3204, "step": 27817 }, { "epoch": 2.8281821878812528, "grad_norm": 0.26637107133865356, "learning_rate": 4.727228165507386e-06, "loss": 0.3365, "step": 27818 }, { "epoch": 2.8282838552257017, "grad_norm": 0.26986998319625854, "learning_rate": 4.726873808709894e-06, "loss": 0.3768, "step": 27819 }, { "epoch": 2.8283855225701506, "grad_norm": 0.2573445439338684, "learning_rate": 4.7265194532883475e-06, "loss": 0.3387, "step": 27820 }, { "epoch": 2.8284871899145996, "grad_norm": 0.27424925565719604, "learning_rate": 4.726165099244531e-06, "loss": 0.3003, "step": 27821 }, { "epoch": 2.8285888572590485, "grad_norm": 0.2571616768836975, "learning_rate": 4.725810746580228e-06, "loss": 0.3204, "step": 27822 }, { "epoch": 2.8286905246034975, "grad_norm": 0.2502425014972687, "learning_rate": 4.725456395297227e-06, "loss": 0.3482, "step": 27823 }, { "epoch": 2.8287921919479464, "grad_norm": 0.27774468064308167, "learning_rate": 4.7251020453973105e-06, "loss": 0.3745, "step": 27824 }, { "epoch": 2.8288938592923953, "grad_norm": 0.2821744680404663, "learning_rate": 4.7247476968822645e-06, "loss": 0.3387, "step": 27825 }, { "epoch": 2.8289955266368443, "grad_norm": 0.2753390073776245, "learning_rate": 4.724393349753875e-06, "loss": 0.3384, "step": 27826 }, { "epoch": 2.8290971939812932, "grad_norm": 0.25766241550445557, "learning_rate": 4.724039004013926e-06, "loss": 0.3465, "step": 27827 }, { "epoch": 2.829198861325742, "grad_norm": 0.2547943592071533, "learning_rate": 4.7236846596642014e-06, "loss": 0.311, "step": 27828 }, { "epoch": 2.829300528670191, "grad_norm": 0.27147772908210754, "learning_rate": 4.72333031670649e-06, "loss": 0.3078, "step": 27829 }, { "epoch": 2.82940219601464, "grad_norm": 0.26856812834739685, "learning_rate": 4.7229759751425736e-06, "loss": 0.3057, "step": 27830 }, { "epoch": 2.829503863359089, "grad_norm": 0.2656565010547638, "learning_rate": 4.722621634974239e-06, "loss": 0.3313, "step": 27831 }, { "epoch": 2.829605530703538, "grad_norm": 0.2798367440700531, "learning_rate": 4.722267296203271e-06, "loss": 0.3227, "step": 27832 }, { "epoch": 2.829707198047987, "grad_norm": 0.2741851806640625, "learning_rate": 4.721912958831453e-06, "loss": 0.3167, "step": 27833 }, { "epoch": 2.829808865392436, "grad_norm": 0.2737729847431183, "learning_rate": 4.721558622860573e-06, "loss": 0.332, "step": 27834 }, { "epoch": 2.8299105327368848, "grad_norm": 0.2613731920719147, "learning_rate": 4.721204288292413e-06, "loss": 0.3209, "step": 27835 }, { "epoch": 2.8300122000813337, "grad_norm": 0.2599339187145233, "learning_rate": 4.720849955128761e-06, "loss": 0.3295, "step": 27836 }, { "epoch": 2.8301138674257826, "grad_norm": 0.2847656011581421, "learning_rate": 4.720495623371399e-06, "loss": 0.3395, "step": 27837 }, { "epoch": 2.8302155347702316, "grad_norm": 0.26001837849617004, "learning_rate": 4.720141293022114e-06, "loss": 0.3318, "step": 27838 }, { "epoch": 2.8303172021146805, "grad_norm": 0.27616021037101746, "learning_rate": 4.7197869640826914e-06, "loss": 0.3217, "step": 27839 }, { "epoch": 2.8304188694591295, "grad_norm": 0.24711431562900543, "learning_rate": 4.7194326365549145e-06, "loss": 0.3322, "step": 27840 }, { "epoch": 2.830520536803579, "grad_norm": 0.271396279335022, "learning_rate": 4.719078310440571e-06, "loss": 0.3406, "step": 27841 }, { "epoch": 2.830622204148028, "grad_norm": 0.29621806740760803, "learning_rate": 4.718723985741443e-06, "loss": 0.3333, "step": 27842 }, { "epoch": 2.8307238714924767, "grad_norm": 0.2635299861431122, "learning_rate": 4.718369662459316e-06, "loss": 0.3749, "step": 27843 }, { "epoch": 2.8308255388369257, "grad_norm": 0.25281888246536255, "learning_rate": 4.718015340595978e-06, "loss": 0.322, "step": 27844 }, { "epoch": 2.8309272061813746, "grad_norm": 0.2865552008152008, "learning_rate": 4.71766102015321e-06, "loss": 0.3528, "step": 27845 }, { "epoch": 2.8310288735258236, "grad_norm": 0.25611814856529236, "learning_rate": 4.7173067011328e-06, "loss": 0.303, "step": 27846 }, { "epoch": 2.8311305408702725, "grad_norm": 0.27669915556907654, "learning_rate": 4.71695238353653e-06, "loss": 0.3152, "step": 27847 }, { "epoch": 2.8312322082147214, "grad_norm": 0.2549094259738922, "learning_rate": 4.7165980673661865e-06, "loss": 0.3137, "step": 27848 }, { "epoch": 2.8313338755591704, "grad_norm": 0.27039211988449097, "learning_rate": 4.716243752623556e-06, "loss": 0.3275, "step": 27849 }, { "epoch": 2.8314355429036193, "grad_norm": 0.25558775663375854, "learning_rate": 4.715889439310421e-06, "loss": 0.3465, "step": 27850 }, { "epoch": 2.8315372102480683, "grad_norm": 0.28146129846572876, "learning_rate": 4.715535127428568e-06, "loss": 0.3537, "step": 27851 }, { "epoch": 2.831638877592517, "grad_norm": 0.26333165168762207, "learning_rate": 4.715180816979782e-06, "loss": 0.3235, "step": 27852 }, { "epoch": 2.831740544936966, "grad_norm": 0.24888837337493896, "learning_rate": 4.714826507965845e-06, "loss": 0.3071, "step": 27853 }, { "epoch": 2.831842212281415, "grad_norm": 0.2679678201675415, "learning_rate": 4.714472200388547e-06, "loss": 0.3207, "step": 27854 }, { "epoch": 2.831943879625864, "grad_norm": 0.26854562759399414, "learning_rate": 4.714117894249669e-06, "loss": 0.3405, "step": 27855 }, { "epoch": 2.8320455469703134, "grad_norm": 0.2669455111026764, "learning_rate": 4.713763589550998e-06, "loss": 0.3343, "step": 27856 }, { "epoch": 2.8321472143147624, "grad_norm": 0.2628517746925354, "learning_rate": 4.713409286294317e-06, "loss": 0.3035, "step": 27857 }, { "epoch": 2.8322488816592113, "grad_norm": 0.2694709599018097, "learning_rate": 4.713054984481412e-06, "loss": 0.3304, "step": 27858 }, { "epoch": 2.8323505490036602, "grad_norm": 0.2461552917957306, "learning_rate": 4.712700684114068e-06, "loss": 0.3217, "step": 27859 }, { "epoch": 2.832452216348109, "grad_norm": 0.2667686939239502, "learning_rate": 4.7123463851940694e-06, "loss": 0.2945, "step": 27860 }, { "epoch": 2.832553883692558, "grad_norm": 0.2630535066127777, "learning_rate": 4.711992087723203e-06, "loss": 0.3305, "step": 27861 }, { "epoch": 2.832655551037007, "grad_norm": 0.26592111587524414, "learning_rate": 4.711637791703249e-06, "loss": 0.2986, "step": 27862 }, { "epoch": 2.832757218381456, "grad_norm": 0.2856639623641968, "learning_rate": 4.711283497135997e-06, "loss": 0.3488, "step": 27863 }, { "epoch": 2.832858885725905, "grad_norm": 0.2653272747993469, "learning_rate": 4.710929204023231e-06, "loss": 0.3465, "step": 27864 }, { "epoch": 2.832960553070354, "grad_norm": 0.2715091407299042, "learning_rate": 4.710574912366734e-06, "loss": 0.317, "step": 27865 }, { "epoch": 2.833062220414803, "grad_norm": 0.27850452065467834, "learning_rate": 4.710220622168293e-06, "loss": 0.3328, "step": 27866 }, { "epoch": 2.8331638877592518, "grad_norm": 0.2769072949886322, "learning_rate": 4.70986633342969e-06, "loss": 0.3104, "step": 27867 }, { "epoch": 2.8332655551037007, "grad_norm": 0.2776549458503723, "learning_rate": 4.709512046152711e-06, "loss": 0.3549, "step": 27868 }, { "epoch": 2.8333672224481496, "grad_norm": 0.27151787281036377, "learning_rate": 4.709157760339143e-06, "loss": 0.3201, "step": 27869 }, { "epoch": 2.8334688897925986, "grad_norm": 0.2818654477596283, "learning_rate": 4.708803475990768e-06, "loss": 0.3352, "step": 27870 }, { "epoch": 2.8335705571370475, "grad_norm": 0.2567504644393921, "learning_rate": 4.708449193109373e-06, "loss": 0.3541, "step": 27871 }, { "epoch": 2.8336722244814965, "grad_norm": 0.27003762125968933, "learning_rate": 4.70809491169674e-06, "loss": 0.3332, "step": 27872 }, { "epoch": 2.8337738918259454, "grad_norm": 0.2641984522342682, "learning_rate": 4.707740631754657e-06, "loss": 0.3171, "step": 27873 }, { "epoch": 2.8338755591703944, "grad_norm": 0.2641505300998688, "learning_rate": 4.707386353284907e-06, "loss": 0.3571, "step": 27874 }, { "epoch": 2.8339772265148433, "grad_norm": 0.2555549740791321, "learning_rate": 4.707032076289274e-06, "loss": 0.372, "step": 27875 }, { "epoch": 2.8340788938592922, "grad_norm": 0.2518356144428253, "learning_rate": 4.7066778007695465e-06, "loss": 0.3286, "step": 27876 }, { "epoch": 2.834180561203741, "grad_norm": 0.2422001212835312, "learning_rate": 4.706323526727502e-06, "loss": 0.3167, "step": 27877 }, { "epoch": 2.83428222854819, "grad_norm": 0.25278207659721375, "learning_rate": 4.7059692541649325e-06, "loss": 0.3353, "step": 27878 }, { "epoch": 2.834383895892639, "grad_norm": 0.2782725691795349, "learning_rate": 4.705614983083621e-06, "loss": 0.3209, "step": 27879 }, { "epoch": 2.834485563237088, "grad_norm": 0.2552995979785919, "learning_rate": 4.705260713485349e-06, "loss": 0.3297, "step": 27880 }, { "epoch": 2.834587230581537, "grad_norm": 0.2532493770122528, "learning_rate": 4.704906445371906e-06, "loss": 0.3097, "step": 27881 }, { "epoch": 2.8346888979259863, "grad_norm": 0.24730151891708374, "learning_rate": 4.704552178745071e-06, "loss": 0.3281, "step": 27882 }, { "epoch": 2.8347905652704353, "grad_norm": 0.25126996636390686, "learning_rate": 4.704197913606633e-06, "loss": 0.3271, "step": 27883 }, { "epoch": 2.834892232614884, "grad_norm": 0.250169038772583, "learning_rate": 4.703843649958378e-06, "loss": 0.3334, "step": 27884 }, { "epoch": 2.834993899959333, "grad_norm": 0.2608771324157715, "learning_rate": 4.703489387802086e-06, "loss": 0.3377, "step": 27885 }, { "epoch": 2.835095567303782, "grad_norm": 0.25512731075286865, "learning_rate": 4.703135127139546e-06, "loss": 0.334, "step": 27886 }, { "epoch": 2.835197234648231, "grad_norm": 0.25114384293556213, "learning_rate": 4.702780867972538e-06, "loss": 0.3684, "step": 27887 }, { "epoch": 2.83529890199268, "grad_norm": 0.29201459884643555, "learning_rate": 4.70242661030285e-06, "loss": 0.3027, "step": 27888 }, { "epoch": 2.835400569337129, "grad_norm": 0.27906185388565063, "learning_rate": 4.702072354132267e-06, "loss": 0.3282, "step": 27889 }, { "epoch": 2.835502236681578, "grad_norm": 0.26924630999565125, "learning_rate": 4.701718099462572e-06, "loss": 0.3223, "step": 27890 }, { "epoch": 2.835603904026027, "grad_norm": 0.2709257900714874, "learning_rate": 4.701363846295553e-06, "loss": 0.3337, "step": 27891 }, { "epoch": 2.8357055713704757, "grad_norm": 0.27158552408218384, "learning_rate": 4.701009594632988e-06, "loss": 0.3274, "step": 27892 }, { "epoch": 2.8358072387149247, "grad_norm": 0.2963733971118927, "learning_rate": 4.700655344476669e-06, "loss": 0.36, "step": 27893 }, { "epoch": 2.8359089060593736, "grad_norm": 0.24520227313041687, "learning_rate": 4.700301095828375e-06, "loss": 0.3111, "step": 27894 }, { "epoch": 2.8360105734038226, "grad_norm": 0.2765887677669525, "learning_rate": 4.699946848689892e-06, "loss": 0.313, "step": 27895 }, { "epoch": 2.8361122407482715, "grad_norm": 0.266169935464859, "learning_rate": 4.6995926030630085e-06, "loss": 0.3601, "step": 27896 }, { "epoch": 2.836213908092721, "grad_norm": 0.2887918949127197, "learning_rate": 4.699238358949503e-06, "loss": 0.3794, "step": 27897 }, { "epoch": 2.83631557543717, "grad_norm": 0.27706047892570496, "learning_rate": 4.698884116351166e-06, "loss": 0.3518, "step": 27898 }, { "epoch": 2.8364172427816188, "grad_norm": 0.26283568143844604, "learning_rate": 4.6985298752697775e-06, "loss": 0.339, "step": 27899 }, { "epoch": 2.8365189101260677, "grad_norm": 0.27100077271461487, "learning_rate": 4.698175635707123e-06, "loss": 0.3285, "step": 27900 }, { "epoch": 2.8366205774705167, "grad_norm": 0.2697254419326782, "learning_rate": 4.697821397664991e-06, "loss": 0.3564, "step": 27901 }, { "epoch": 2.8367222448149656, "grad_norm": 0.2978053092956543, "learning_rate": 4.697467161145159e-06, "loss": 0.3571, "step": 27902 }, { "epoch": 2.8368239121594145, "grad_norm": 0.2575637698173523, "learning_rate": 4.69711292614942e-06, "loss": 0.291, "step": 27903 }, { "epoch": 2.8369255795038635, "grad_norm": 0.2729351222515106, "learning_rate": 4.696758692679551e-06, "loss": 0.3413, "step": 27904 }, { "epoch": 2.8370272468483124, "grad_norm": 0.2626492381095886, "learning_rate": 4.696404460737339e-06, "loss": 0.3403, "step": 27905 }, { "epoch": 2.8371289141927614, "grad_norm": 0.2933133542537689, "learning_rate": 4.696050230324572e-06, "loss": 0.33, "step": 27906 }, { "epoch": 2.8372305815372103, "grad_norm": 0.288898229598999, "learning_rate": 4.695696001443029e-06, "loss": 0.3139, "step": 27907 }, { "epoch": 2.8373322488816592, "grad_norm": 0.268705815076828, "learning_rate": 4.6953417740945e-06, "loss": 0.3172, "step": 27908 }, { "epoch": 2.837433916226108, "grad_norm": 0.29397153854370117, "learning_rate": 4.694987548280765e-06, "loss": 0.3621, "step": 27909 }, { "epoch": 2.837535583570557, "grad_norm": 0.28292933106422424, "learning_rate": 4.694633324003609e-06, "loss": 0.3255, "step": 27910 }, { "epoch": 2.837637250915006, "grad_norm": 0.29717162251472473, "learning_rate": 4.694279101264821e-06, "loss": 0.3479, "step": 27911 }, { "epoch": 2.837738918259455, "grad_norm": 0.26882001757621765, "learning_rate": 4.69392488006618e-06, "loss": 0.3588, "step": 27912 }, { "epoch": 2.837840585603904, "grad_norm": 0.28566840291023254, "learning_rate": 4.693570660409474e-06, "loss": 0.3788, "step": 27913 }, { "epoch": 2.837942252948353, "grad_norm": 0.25140881538391113, "learning_rate": 4.693216442296485e-06, "loss": 0.3059, "step": 27914 }, { "epoch": 2.838043920292802, "grad_norm": 0.288245290517807, "learning_rate": 4.692862225728997e-06, "loss": 0.3113, "step": 27915 }, { "epoch": 2.8381455876372508, "grad_norm": 0.2664780020713806, "learning_rate": 4.692508010708801e-06, "loss": 0.3016, "step": 27916 }, { "epoch": 2.8382472549816997, "grad_norm": 0.2901361584663391, "learning_rate": 4.692153797237673e-06, "loss": 0.3463, "step": 27917 }, { "epoch": 2.8383489223261487, "grad_norm": 0.299841970205307, "learning_rate": 4.691799585317402e-06, "loss": 0.3601, "step": 27918 }, { "epoch": 2.8384505896705976, "grad_norm": 0.25182703137397766, "learning_rate": 4.691445374949771e-06, "loss": 0.3264, "step": 27919 }, { "epoch": 2.8385522570150465, "grad_norm": 0.28645822405815125, "learning_rate": 4.691091166136564e-06, "loss": 0.331, "step": 27920 }, { "epoch": 2.8386539243594955, "grad_norm": 0.26997658610343933, "learning_rate": 4.690736958879569e-06, "loss": 0.3362, "step": 27921 }, { "epoch": 2.8387555917039444, "grad_norm": 0.24906539916992188, "learning_rate": 4.690382753180566e-06, "loss": 0.3094, "step": 27922 }, { "epoch": 2.838857259048394, "grad_norm": 0.26133018732070923, "learning_rate": 4.690028549041341e-06, "loss": 0.3263, "step": 27923 }, { "epoch": 2.8389589263928428, "grad_norm": 0.27222883701324463, "learning_rate": 4.689674346463679e-06, "loss": 0.3608, "step": 27924 }, { "epoch": 2.8390605937372917, "grad_norm": 0.28562331199645996, "learning_rate": 4.689320145449362e-06, "loss": 0.3403, "step": 27925 }, { "epoch": 2.8391622610817406, "grad_norm": 0.2740527093410492, "learning_rate": 4.68896594600018e-06, "loss": 0.3162, "step": 27926 }, { "epoch": 2.8392639284261896, "grad_norm": 0.27326515316963196, "learning_rate": 4.6886117481179105e-06, "loss": 0.3179, "step": 27927 }, { "epoch": 2.8393655957706385, "grad_norm": 0.2798304557800293, "learning_rate": 4.688257551804343e-06, "loss": 0.3609, "step": 27928 }, { "epoch": 2.8394672631150875, "grad_norm": 0.2668902277946472, "learning_rate": 4.687903357061258e-06, "loss": 0.3591, "step": 27929 }, { "epoch": 2.8395689304595364, "grad_norm": 0.25087565183639526, "learning_rate": 4.687549163890441e-06, "loss": 0.3305, "step": 27930 }, { "epoch": 2.8396705978039853, "grad_norm": 0.2546628713607788, "learning_rate": 4.687194972293681e-06, "loss": 0.3298, "step": 27931 }, { "epoch": 2.8397722651484343, "grad_norm": 0.2674461603164673, "learning_rate": 4.686840782272756e-06, "loss": 0.3291, "step": 27932 }, { "epoch": 2.8398739324928832, "grad_norm": 0.26798468828201294, "learning_rate": 4.6864865938294535e-06, "loss": 0.3321, "step": 27933 }, { "epoch": 2.839975599837332, "grad_norm": 0.29836443066596985, "learning_rate": 4.686132406965556e-06, "loss": 0.3574, "step": 27934 }, { "epoch": 2.840077267181781, "grad_norm": 0.2836330235004425, "learning_rate": 4.685778221682848e-06, "loss": 0.3262, "step": 27935 }, { "epoch": 2.84017893452623, "grad_norm": 0.2636905014514923, "learning_rate": 4.685424037983117e-06, "loss": 0.3447, "step": 27936 }, { "epoch": 2.840280601870679, "grad_norm": 0.28937041759490967, "learning_rate": 4.685069855868143e-06, "loss": 0.3528, "step": 27937 }, { "epoch": 2.8403822692151284, "grad_norm": 0.2692089378833771, "learning_rate": 4.684715675339714e-06, "loss": 0.3762, "step": 27938 }, { "epoch": 2.8404839365595773, "grad_norm": 0.26904189586639404, "learning_rate": 4.684361496399611e-06, "loss": 0.3119, "step": 27939 }, { "epoch": 2.8405856039040263, "grad_norm": 0.29537275433540344, "learning_rate": 4.6840073190496195e-06, "loss": 0.354, "step": 27940 }, { "epoch": 2.840687271248475, "grad_norm": 0.2772676646709442, "learning_rate": 4.683653143291526e-06, "loss": 0.3103, "step": 27941 }, { "epoch": 2.840788938592924, "grad_norm": 0.26270192861557007, "learning_rate": 4.683298969127111e-06, "loss": 0.3212, "step": 27942 }, { "epoch": 2.840890605937373, "grad_norm": 0.28275421261787415, "learning_rate": 4.682944796558162e-06, "loss": 0.3546, "step": 27943 }, { "epoch": 2.840992273281822, "grad_norm": 0.2707546651363373, "learning_rate": 4.68259062558646e-06, "loss": 0.3344, "step": 27944 }, { "epoch": 2.841093940626271, "grad_norm": 0.29286810755729675, "learning_rate": 4.682236456213792e-06, "loss": 0.3448, "step": 27945 }, { "epoch": 2.84119560797072, "grad_norm": 0.2955927550792694, "learning_rate": 4.681882288441942e-06, "loss": 0.3078, "step": 27946 }, { "epoch": 2.841297275315169, "grad_norm": 0.27003395557403564, "learning_rate": 4.681528122272692e-06, "loss": 0.3007, "step": 27947 }, { "epoch": 2.841398942659618, "grad_norm": 0.26879388093948364, "learning_rate": 4.6811739577078295e-06, "loss": 0.2966, "step": 27948 }, { "epoch": 2.8415006100040667, "grad_norm": 0.2614217698574066, "learning_rate": 4.680819794749135e-06, "loss": 0.3314, "step": 27949 }, { "epoch": 2.8416022773485157, "grad_norm": 0.26944875717163086, "learning_rate": 4.680465633398395e-06, "loss": 0.3367, "step": 27950 }, { "epoch": 2.8417039446929646, "grad_norm": 0.25887569785118103, "learning_rate": 4.680111473657394e-06, "loss": 0.3173, "step": 27951 }, { "epoch": 2.8418056120374136, "grad_norm": 0.29090601205825806, "learning_rate": 4.679757315527914e-06, "loss": 0.3125, "step": 27952 }, { "epoch": 2.8419072793818625, "grad_norm": 0.26303353905677795, "learning_rate": 4.679403159011741e-06, "loss": 0.3109, "step": 27953 }, { "epoch": 2.8420089467263114, "grad_norm": 0.279881089925766, "learning_rate": 4.67904900411066e-06, "loss": 0.3702, "step": 27954 }, { "epoch": 2.8421106140707604, "grad_norm": 0.27137112617492676, "learning_rate": 4.678694850826452e-06, "loss": 0.3298, "step": 27955 }, { "epoch": 2.8422122814152093, "grad_norm": 0.2688293159008026, "learning_rate": 4.678340699160904e-06, "loss": 0.3019, "step": 27956 }, { "epoch": 2.8423139487596583, "grad_norm": 0.2549576163291931, "learning_rate": 4.677986549115798e-06, "loss": 0.3231, "step": 27957 }, { "epoch": 2.842415616104107, "grad_norm": 0.26509881019592285, "learning_rate": 4.677632400692921e-06, "loss": 0.3367, "step": 27958 }, { "epoch": 2.842517283448556, "grad_norm": 0.24911077320575714, "learning_rate": 4.677278253894054e-06, "loss": 0.344, "step": 27959 }, { "epoch": 2.842618950793005, "grad_norm": 0.24858863651752472, "learning_rate": 4.676924108720983e-06, "loss": 0.3221, "step": 27960 }, { "epoch": 2.842720618137454, "grad_norm": 0.2766461670398712, "learning_rate": 4.676569965175492e-06, "loss": 0.3129, "step": 27961 }, { "epoch": 2.842822285481903, "grad_norm": 0.2709004282951355, "learning_rate": 4.676215823259364e-06, "loss": 0.3313, "step": 27962 }, { "epoch": 2.842923952826352, "grad_norm": 0.26952558755874634, "learning_rate": 4.6758616829743845e-06, "loss": 0.3375, "step": 27963 }, { "epoch": 2.8430256201708013, "grad_norm": 0.255195677280426, "learning_rate": 4.675507544322336e-06, "loss": 0.3115, "step": 27964 }, { "epoch": 2.8431272875152502, "grad_norm": 0.25259867310523987, "learning_rate": 4.675153407305003e-06, "loss": 0.2872, "step": 27965 }, { "epoch": 2.843228954859699, "grad_norm": 0.27596962451934814, "learning_rate": 4.6747992719241715e-06, "loss": 0.3255, "step": 27966 }, { "epoch": 2.843330622204148, "grad_norm": 0.26671847701072693, "learning_rate": 4.674445138181622e-06, "loss": 0.3485, "step": 27967 }, { "epoch": 2.843432289548597, "grad_norm": 0.27168983221054077, "learning_rate": 4.674091006079143e-06, "loss": 0.3336, "step": 27968 }, { "epoch": 2.843533956893046, "grad_norm": 0.2626565396785736, "learning_rate": 4.673736875618514e-06, "loss": 0.3366, "step": 27969 }, { "epoch": 2.843635624237495, "grad_norm": 0.2673196792602539, "learning_rate": 4.673382746801522e-06, "loss": 0.3234, "step": 27970 }, { "epoch": 2.843737291581944, "grad_norm": 0.2801857590675354, "learning_rate": 4.67302861962995e-06, "loss": 0.3275, "step": 27971 }, { "epoch": 2.843838958926393, "grad_norm": 0.2735668122768402, "learning_rate": 4.672674494105581e-06, "loss": 0.3314, "step": 27972 }, { "epoch": 2.8439406262708418, "grad_norm": 0.2786691188812256, "learning_rate": 4.6723203702302025e-06, "loss": 0.3506, "step": 27973 }, { "epoch": 2.8440422936152907, "grad_norm": 0.2766610383987427, "learning_rate": 4.671966248005594e-06, "loss": 0.3456, "step": 27974 }, { "epoch": 2.8441439609597396, "grad_norm": 0.2846834063529968, "learning_rate": 4.671612127433541e-06, "loss": 0.3018, "step": 27975 }, { "epoch": 2.8442456283041886, "grad_norm": 0.2595001459121704, "learning_rate": 4.67125800851583e-06, "loss": 0.3302, "step": 27976 }, { "epoch": 2.8443472956486375, "grad_norm": 0.2592724561691284, "learning_rate": 4.670903891254242e-06, "loss": 0.3329, "step": 27977 }, { "epoch": 2.8444489629930865, "grad_norm": 0.25792166590690613, "learning_rate": 4.670549775650564e-06, "loss": 0.3571, "step": 27978 }, { "epoch": 2.844550630337536, "grad_norm": 0.27202266454696655, "learning_rate": 4.670195661706575e-06, "loss": 0.3062, "step": 27979 }, { "epoch": 2.844652297681985, "grad_norm": 0.26903387904167175, "learning_rate": 4.669841549424063e-06, "loss": 0.3459, "step": 27980 }, { "epoch": 2.8447539650264337, "grad_norm": 0.25825023651123047, "learning_rate": 4.669487438804812e-06, "loss": 0.3311, "step": 27981 }, { "epoch": 2.8448556323708827, "grad_norm": 0.27000147104263306, "learning_rate": 4.669133329850603e-06, "loss": 0.3303, "step": 27982 }, { "epoch": 2.8449572997153316, "grad_norm": 0.26717451214790344, "learning_rate": 4.668779222563223e-06, "loss": 0.3584, "step": 27983 }, { "epoch": 2.8450589670597806, "grad_norm": 0.2670895755290985, "learning_rate": 4.668425116944454e-06, "loss": 0.341, "step": 27984 }, { "epoch": 2.8451606344042295, "grad_norm": 0.26123833656311035, "learning_rate": 4.668071012996079e-06, "loss": 0.3382, "step": 27985 }, { "epoch": 2.8452623017486784, "grad_norm": 0.29003283381462097, "learning_rate": 4.6677169107198865e-06, "loss": 0.308, "step": 27986 }, { "epoch": 2.8453639690931274, "grad_norm": 0.2550899088382721, "learning_rate": 4.6673628101176545e-06, "loss": 0.3235, "step": 27987 }, { "epoch": 2.8454656364375763, "grad_norm": 0.26079562306404114, "learning_rate": 4.667008711191171e-06, "loss": 0.3342, "step": 27988 }, { "epoch": 2.8455673037820253, "grad_norm": 0.25942370295524597, "learning_rate": 4.666654613942218e-06, "loss": 0.3371, "step": 27989 }, { "epoch": 2.845668971126474, "grad_norm": 0.25037023425102234, "learning_rate": 4.666300518372579e-06, "loss": 0.3464, "step": 27990 }, { "epoch": 2.845770638470923, "grad_norm": 0.26559242606163025, "learning_rate": 4.6659464244840416e-06, "loss": 0.3459, "step": 27991 }, { "epoch": 2.845872305815372, "grad_norm": 0.2614316940307617, "learning_rate": 4.665592332278384e-06, "loss": 0.3397, "step": 27992 }, { "epoch": 2.845973973159821, "grad_norm": 0.24172037839889526, "learning_rate": 4.665238241757394e-06, "loss": 0.3031, "step": 27993 }, { "epoch": 2.84607564050427, "grad_norm": 0.2624918520450592, "learning_rate": 4.6648841529228535e-06, "loss": 0.3322, "step": 27994 }, { "epoch": 2.846177307848719, "grad_norm": 0.24274073541164398, "learning_rate": 4.664530065776547e-06, "loss": 0.3279, "step": 27995 }, { "epoch": 2.846278975193168, "grad_norm": 0.26482003927230835, "learning_rate": 4.664175980320259e-06, "loss": 0.3462, "step": 27996 }, { "epoch": 2.846380642537617, "grad_norm": 0.2715786099433899, "learning_rate": 4.663821896555771e-06, "loss": 0.3181, "step": 27997 }, { "epoch": 2.8464823098820657, "grad_norm": 0.2472844272851944, "learning_rate": 4.66346781448487e-06, "loss": 0.3243, "step": 27998 }, { "epoch": 2.8465839772265147, "grad_norm": 0.25440868735313416, "learning_rate": 4.663113734109338e-06, "loss": 0.3071, "step": 27999 }, { "epoch": 2.8466856445709636, "grad_norm": 0.2594207525253296, "learning_rate": 4.662759655430958e-06, "loss": 0.3591, "step": 28000 }, { "epoch": 2.8467873119154126, "grad_norm": 0.26269468665122986, "learning_rate": 4.662405578451516e-06, "loss": 0.3272, "step": 28001 }, { "epoch": 2.8468889792598615, "grad_norm": 0.2753060758113861, "learning_rate": 4.662051503172794e-06, "loss": 0.332, "step": 28002 }, { "epoch": 2.8469906466043104, "grad_norm": 0.28454384207725525, "learning_rate": 4.661697429596577e-06, "loss": 0.3242, "step": 28003 }, { "epoch": 2.8470923139487594, "grad_norm": 0.3034392297267914, "learning_rate": 4.661343357724646e-06, "loss": 0.3417, "step": 28004 }, { "epoch": 2.8471939812932088, "grad_norm": 0.25981834530830383, "learning_rate": 4.660989287558787e-06, "loss": 0.3278, "step": 28005 }, { "epoch": 2.8472956486376577, "grad_norm": 0.27205124497413635, "learning_rate": 4.660635219100785e-06, "loss": 0.3209, "step": 28006 }, { "epoch": 2.8473973159821067, "grad_norm": 0.27633461356163025, "learning_rate": 4.660281152352421e-06, "loss": 0.3201, "step": 28007 }, { "epoch": 2.8474989833265556, "grad_norm": 0.2639673948287964, "learning_rate": 4.6599270873154806e-06, "loss": 0.3364, "step": 28008 }, { "epoch": 2.8476006506710045, "grad_norm": 0.2604687213897705, "learning_rate": 4.6595730239917455e-06, "loss": 0.3222, "step": 28009 }, { "epoch": 2.8477023180154535, "grad_norm": 0.2574416697025299, "learning_rate": 4.659218962383001e-06, "loss": 0.31, "step": 28010 }, { "epoch": 2.8478039853599024, "grad_norm": 0.25485941767692566, "learning_rate": 4.658864902491031e-06, "loss": 0.3454, "step": 28011 }, { "epoch": 2.8479056527043514, "grad_norm": 0.25165224075317383, "learning_rate": 4.658510844317618e-06, "loss": 0.3202, "step": 28012 }, { "epoch": 2.8480073200488003, "grad_norm": 0.27366483211517334, "learning_rate": 4.658156787864548e-06, "loss": 0.3139, "step": 28013 }, { "epoch": 2.8481089873932492, "grad_norm": 0.24472394585609436, "learning_rate": 4.657802733133601e-06, "loss": 0.3316, "step": 28014 }, { "epoch": 2.848210654737698, "grad_norm": 0.2611159682273865, "learning_rate": 4.657448680126562e-06, "loss": 0.3333, "step": 28015 }, { "epoch": 2.848312322082147, "grad_norm": 0.2492556869983673, "learning_rate": 4.657094628845217e-06, "loss": 0.3046, "step": 28016 }, { "epoch": 2.848413989426596, "grad_norm": 0.29655054211616516, "learning_rate": 4.656740579291345e-06, "loss": 0.3415, "step": 28017 }, { "epoch": 2.848515656771045, "grad_norm": 0.24491970241069794, "learning_rate": 4.656386531466736e-06, "loss": 0.3641, "step": 28018 }, { "epoch": 2.848617324115494, "grad_norm": 0.26295340061187744, "learning_rate": 4.656032485373168e-06, "loss": 0.3378, "step": 28019 }, { "epoch": 2.8487189914599433, "grad_norm": 0.2510681450366974, "learning_rate": 4.655678441012425e-06, "loss": 0.3607, "step": 28020 }, { "epoch": 2.8488206588043923, "grad_norm": 0.2551077604293823, "learning_rate": 4.655324398386295e-06, "loss": 0.3045, "step": 28021 }, { "epoch": 2.848922326148841, "grad_norm": 0.2509268522262573, "learning_rate": 4.654970357496558e-06, "loss": 0.3258, "step": 28022 }, { "epoch": 2.84902399349329, "grad_norm": 0.28008419275283813, "learning_rate": 4.654616318344999e-06, "loss": 0.3323, "step": 28023 }, { "epoch": 2.849125660837739, "grad_norm": 0.2564491331577301, "learning_rate": 4.654262280933399e-06, "loss": 0.3254, "step": 28024 }, { "epoch": 2.849227328182188, "grad_norm": 0.26460719108581543, "learning_rate": 4.653908245263544e-06, "loss": 0.3375, "step": 28025 }, { "epoch": 2.849328995526637, "grad_norm": 0.24511976540088654, "learning_rate": 4.653554211337218e-06, "loss": 0.3122, "step": 28026 }, { "epoch": 2.849430662871086, "grad_norm": 0.2633606493473053, "learning_rate": 4.653200179156203e-06, "loss": 0.3837, "step": 28027 }, { "epoch": 2.849532330215535, "grad_norm": 0.23504602909088135, "learning_rate": 4.652846148722284e-06, "loss": 0.3172, "step": 28028 }, { "epoch": 2.849633997559984, "grad_norm": 0.26796114444732666, "learning_rate": 4.652492120037242e-06, "loss": 0.3188, "step": 28029 }, { "epoch": 2.8497356649044328, "grad_norm": 0.2580528259277344, "learning_rate": 4.652138093102862e-06, "loss": 0.2993, "step": 28030 }, { "epoch": 2.8498373322488817, "grad_norm": 0.26099589467048645, "learning_rate": 4.651784067920929e-06, "loss": 0.3287, "step": 28031 }, { "epoch": 2.8499389995933306, "grad_norm": 0.27600499987602234, "learning_rate": 4.651430044493224e-06, "loss": 0.3234, "step": 28032 }, { "epoch": 2.8500406669377796, "grad_norm": 0.2750168740749359, "learning_rate": 4.651076022821534e-06, "loss": 0.3383, "step": 28033 }, { "epoch": 2.8501423342822285, "grad_norm": 0.25743991136550903, "learning_rate": 4.6507220029076375e-06, "loss": 0.3437, "step": 28034 }, { "epoch": 2.8502440016266775, "grad_norm": 0.2482653707265854, "learning_rate": 4.65036798475332e-06, "loss": 0.3232, "step": 28035 }, { "epoch": 2.8503456689711264, "grad_norm": 0.2813675105571747, "learning_rate": 4.650013968360368e-06, "loss": 0.3044, "step": 28036 }, { "epoch": 2.8504473363155753, "grad_norm": 0.265842080116272, "learning_rate": 4.649659953730561e-06, "loss": 0.3549, "step": 28037 }, { "epoch": 2.8505490036600243, "grad_norm": 0.24192263185977936, "learning_rate": 4.649305940865685e-06, "loss": 0.2916, "step": 28038 }, { "epoch": 2.8506506710044732, "grad_norm": 0.26691126823425293, "learning_rate": 4.6489519297675206e-06, "loss": 0.3314, "step": 28039 }, { "epoch": 2.850752338348922, "grad_norm": 0.2882497012615204, "learning_rate": 4.648597920437853e-06, "loss": 0.3314, "step": 28040 }, { "epoch": 2.850854005693371, "grad_norm": 0.27361926436424255, "learning_rate": 4.648243912878467e-06, "loss": 0.3498, "step": 28041 }, { "epoch": 2.85095567303782, "grad_norm": 0.27503493428230286, "learning_rate": 4.647889907091144e-06, "loss": 0.3518, "step": 28042 }, { "epoch": 2.851057340382269, "grad_norm": 0.27543550729751587, "learning_rate": 4.64753590307767e-06, "loss": 0.3434, "step": 28043 }, { "epoch": 2.851159007726718, "grad_norm": 0.26380711793899536, "learning_rate": 4.647181900839822e-06, "loss": 0.3256, "step": 28044 }, { "epoch": 2.851260675071167, "grad_norm": 0.25560200214385986, "learning_rate": 4.64682790037939e-06, "loss": 0.3445, "step": 28045 }, { "epoch": 2.8513623424156163, "grad_norm": 0.2585793137550354, "learning_rate": 4.646473901698157e-06, "loss": 0.3441, "step": 28046 }, { "epoch": 2.851464009760065, "grad_norm": 0.26685285568237305, "learning_rate": 4.646119904797901e-06, "loss": 0.3158, "step": 28047 }, { "epoch": 2.851565677104514, "grad_norm": 0.32067862153053284, "learning_rate": 4.645765909680413e-06, "loss": 0.3234, "step": 28048 }, { "epoch": 2.851667344448963, "grad_norm": 0.29121971130371094, "learning_rate": 4.645411916347467e-06, "loss": 0.3089, "step": 28049 }, { "epoch": 2.851769011793412, "grad_norm": 0.2791441082954407, "learning_rate": 4.6450579248008545e-06, "loss": 0.3416, "step": 28050 }, { "epoch": 2.851870679137861, "grad_norm": 0.28416910767555237, "learning_rate": 4.644703935042357e-06, "loss": 0.3275, "step": 28051 }, { "epoch": 2.85197234648231, "grad_norm": 0.25280895829200745, "learning_rate": 4.6443499470737535e-06, "loss": 0.3142, "step": 28052 }, { "epoch": 2.852074013826759, "grad_norm": 0.2750479280948639, "learning_rate": 4.643995960896834e-06, "loss": 0.3476, "step": 28053 }, { "epoch": 2.852175681171208, "grad_norm": 0.2625371813774109, "learning_rate": 4.6436419765133745e-06, "loss": 0.3066, "step": 28054 }, { "epoch": 2.8522773485156567, "grad_norm": 0.26251405477523804, "learning_rate": 4.643287993925163e-06, "loss": 0.3237, "step": 28055 }, { "epoch": 2.8523790158601057, "grad_norm": 0.2698814570903778, "learning_rate": 4.642934013133985e-06, "loss": 0.3326, "step": 28056 }, { "epoch": 2.8524806832045546, "grad_norm": 0.2600215971469879, "learning_rate": 4.642580034141617e-06, "loss": 0.3307, "step": 28057 }, { "epoch": 2.8525823505490036, "grad_norm": 0.26047417521476746, "learning_rate": 4.642226056949849e-06, "loss": 0.304, "step": 28058 }, { "epoch": 2.8526840178934525, "grad_norm": 0.27353957295417786, "learning_rate": 4.641872081560457e-06, "loss": 0.3319, "step": 28059 }, { "epoch": 2.8527856852379014, "grad_norm": 0.28046929836273193, "learning_rate": 4.641518107975231e-06, "loss": 0.3335, "step": 28060 }, { "epoch": 2.852887352582351, "grad_norm": 0.2493368238210678, "learning_rate": 4.641164136195953e-06, "loss": 0.3154, "step": 28061 }, { "epoch": 2.8529890199267998, "grad_norm": 0.27141696214675903, "learning_rate": 4.6408101662244006e-06, "loss": 0.3161, "step": 28062 }, { "epoch": 2.8530906872712487, "grad_norm": 0.25802746415138245, "learning_rate": 4.640456198062366e-06, "loss": 0.3289, "step": 28063 }, { "epoch": 2.8531923546156976, "grad_norm": 0.26675477623939514, "learning_rate": 4.6401022317116246e-06, "loss": 0.3316, "step": 28064 }, { "epoch": 2.8532940219601466, "grad_norm": 0.281818687915802, "learning_rate": 4.639748267173962e-06, "loss": 0.3478, "step": 28065 }, { "epoch": 2.8533956893045955, "grad_norm": 0.2618291676044464, "learning_rate": 4.639394304451165e-06, "loss": 0.3166, "step": 28066 }, { "epoch": 2.8534973566490445, "grad_norm": 0.24607430398464203, "learning_rate": 4.63904034354501e-06, "loss": 0.3133, "step": 28067 }, { "epoch": 2.8535990239934934, "grad_norm": 0.2889423072338104, "learning_rate": 4.638686384457288e-06, "loss": 0.3593, "step": 28068 }, { "epoch": 2.8537006913379424, "grad_norm": 0.25786611437797546, "learning_rate": 4.638332427189775e-06, "loss": 0.3444, "step": 28069 }, { "epoch": 2.8538023586823913, "grad_norm": 0.26357901096343994, "learning_rate": 4.637978471744258e-06, "loss": 0.3725, "step": 28070 }, { "epoch": 2.8539040260268402, "grad_norm": 0.27068060636520386, "learning_rate": 4.637624518122521e-06, "loss": 0.3629, "step": 28071 }, { "epoch": 2.854005693371289, "grad_norm": 0.2687995731830597, "learning_rate": 4.637270566326344e-06, "loss": 0.3361, "step": 28072 }, { "epoch": 2.854107360715738, "grad_norm": 0.25979647040367126, "learning_rate": 4.636916616357513e-06, "loss": 0.3101, "step": 28073 }, { "epoch": 2.854209028060187, "grad_norm": 0.2653893530368805, "learning_rate": 4.636562668217809e-06, "loss": 0.3402, "step": 28074 }, { "epoch": 2.854310695404636, "grad_norm": 0.2810175120830536, "learning_rate": 4.636208721909014e-06, "loss": 0.3172, "step": 28075 }, { "epoch": 2.854412362749085, "grad_norm": 0.27224087715148926, "learning_rate": 4.6358547774329165e-06, "loss": 0.3437, "step": 28076 }, { "epoch": 2.854514030093534, "grad_norm": 0.2854034900665283, "learning_rate": 4.635500834791293e-06, "loss": 0.3633, "step": 28077 }, { "epoch": 2.854615697437983, "grad_norm": 0.2678738236427307, "learning_rate": 4.635146893985933e-06, "loss": 0.3354, "step": 28078 }, { "epoch": 2.8547173647824318, "grad_norm": 0.2627279460430145, "learning_rate": 4.6347929550186144e-06, "loss": 0.3687, "step": 28079 }, { "epoch": 2.8548190321268807, "grad_norm": 0.2539041340351105, "learning_rate": 4.634439017891121e-06, "loss": 0.3066, "step": 28080 }, { "epoch": 2.8549206994713296, "grad_norm": 0.2832319736480713, "learning_rate": 4.634085082605239e-06, "loss": 0.3462, "step": 28081 }, { "epoch": 2.8550223668157786, "grad_norm": 0.26568689942359924, "learning_rate": 4.633731149162748e-06, "loss": 0.3171, "step": 28082 }, { "epoch": 2.8551240341602275, "grad_norm": 0.25738072395324707, "learning_rate": 4.633377217565434e-06, "loss": 0.3295, "step": 28083 }, { "epoch": 2.8552257015046765, "grad_norm": 0.2738702893257141, "learning_rate": 4.633023287815077e-06, "loss": 0.3111, "step": 28084 }, { "epoch": 2.8553273688491254, "grad_norm": 0.265671044588089, "learning_rate": 4.632669359913462e-06, "loss": 0.3392, "step": 28085 }, { "epoch": 2.8554290361935744, "grad_norm": 0.25981152057647705, "learning_rate": 4.632315433862371e-06, "loss": 0.3341, "step": 28086 }, { "epoch": 2.8555307035380237, "grad_norm": 0.2624734342098236, "learning_rate": 4.6319615096635875e-06, "loss": 0.3233, "step": 28087 }, { "epoch": 2.8556323708824727, "grad_norm": 0.26934346556663513, "learning_rate": 4.6316075873188956e-06, "loss": 0.3329, "step": 28088 }, { "epoch": 2.8557340382269216, "grad_norm": 0.25159716606140137, "learning_rate": 4.631253666830076e-06, "loss": 0.3224, "step": 28089 }, { "epoch": 2.8558357055713706, "grad_norm": 0.2675577700138092, "learning_rate": 4.630899748198913e-06, "loss": 0.3344, "step": 28090 }, { "epoch": 2.8559373729158195, "grad_norm": 0.2586612105369568, "learning_rate": 4.630545831427189e-06, "loss": 0.2967, "step": 28091 }, { "epoch": 2.8560390402602684, "grad_norm": 0.2634108066558838, "learning_rate": 4.630191916516688e-06, "loss": 0.3228, "step": 28092 }, { "epoch": 2.8561407076047174, "grad_norm": 0.2799511253833771, "learning_rate": 4.629838003469192e-06, "loss": 0.3313, "step": 28093 }, { "epoch": 2.8562423749491663, "grad_norm": 0.25607606768608093, "learning_rate": 4.629484092286483e-06, "loss": 0.3189, "step": 28094 }, { "epoch": 2.8563440422936153, "grad_norm": 0.27474844455718994, "learning_rate": 4.629130182970348e-06, "loss": 0.3348, "step": 28095 }, { "epoch": 2.856445709638064, "grad_norm": 0.26152434945106506, "learning_rate": 4.628776275522564e-06, "loss": 0.3192, "step": 28096 }, { "epoch": 2.856547376982513, "grad_norm": 0.2779650092124939, "learning_rate": 4.628422369944918e-06, "loss": 0.3434, "step": 28097 }, { "epoch": 2.856649044326962, "grad_norm": 0.2520734965801239, "learning_rate": 4.628068466239193e-06, "loss": 0.3318, "step": 28098 }, { "epoch": 2.856750711671411, "grad_norm": 0.2804226279258728, "learning_rate": 4.627714564407169e-06, "loss": 0.3462, "step": 28099 }, { "epoch": 2.85685237901586, "grad_norm": 0.26022136211395264, "learning_rate": 4.627360664450632e-06, "loss": 0.3073, "step": 28100 }, { "epoch": 2.856954046360309, "grad_norm": 0.2690849006175995, "learning_rate": 4.627006766371362e-06, "loss": 0.3333, "step": 28101 }, { "epoch": 2.8570557137047583, "grad_norm": 0.29545778036117554, "learning_rate": 4.6266528701711435e-06, "loss": 0.3672, "step": 28102 }, { "epoch": 2.8571573810492072, "grad_norm": 0.24641868472099304, "learning_rate": 4.6262989758517605e-06, "loss": 0.341, "step": 28103 }, { "epoch": 2.857259048393656, "grad_norm": 0.2469204217195511, "learning_rate": 4.625945083414993e-06, "loss": 0.3236, "step": 28104 }, { "epoch": 2.857360715738105, "grad_norm": 0.25052499771118164, "learning_rate": 4.6255911928626264e-06, "loss": 0.3227, "step": 28105 }, { "epoch": 2.857462383082554, "grad_norm": 0.27134808897972107, "learning_rate": 4.625237304196441e-06, "loss": 0.3448, "step": 28106 }, { "epoch": 2.857564050427003, "grad_norm": 0.25486600399017334, "learning_rate": 4.6248834174182215e-06, "loss": 0.3155, "step": 28107 }, { "epoch": 2.857665717771452, "grad_norm": 0.2558356821537018, "learning_rate": 4.624529532529751e-06, "loss": 0.3442, "step": 28108 }, { "epoch": 2.857767385115901, "grad_norm": 0.2644995450973511, "learning_rate": 4.624175649532811e-06, "loss": 0.3363, "step": 28109 }, { "epoch": 2.85786905246035, "grad_norm": 0.2607017755508423, "learning_rate": 4.623821768429185e-06, "loss": 0.359, "step": 28110 }, { "epoch": 2.8579707198047988, "grad_norm": 0.2617899179458618, "learning_rate": 4.623467889220654e-06, "loss": 0.3149, "step": 28111 }, { "epoch": 2.8580723871492477, "grad_norm": 0.2722405195236206, "learning_rate": 4.6231140119090035e-06, "loss": 0.3561, "step": 28112 }, { "epoch": 2.8581740544936967, "grad_norm": 0.2526063919067383, "learning_rate": 4.622760136496016e-06, "loss": 0.33, "step": 28113 }, { "epoch": 2.8582757218381456, "grad_norm": 0.2463812679052353, "learning_rate": 4.622406262983471e-06, "loss": 0.3403, "step": 28114 }, { "epoch": 2.8583773891825945, "grad_norm": 0.28909751772880554, "learning_rate": 4.622052391373156e-06, "loss": 0.3392, "step": 28115 }, { "epoch": 2.8584790565270435, "grad_norm": 0.27055174112319946, "learning_rate": 4.62169852166685e-06, "loss": 0.3324, "step": 28116 }, { "epoch": 2.8585807238714924, "grad_norm": 0.24769948422908783, "learning_rate": 4.621344653866336e-06, "loss": 0.3281, "step": 28117 }, { "epoch": 2.8586823912159414, "grad_norm": 0.2621430456638336, "learning_rate": 4.620990787973399e-06, "loss": 0.3272, "step": 28118 }, { "epoch": 2.8587840585603903, "grad_norm": 0.27827221155166626, "learning_rate": 4.62063692398982e-06, "loss": 0.3216, "step": 28119 }, { "epoch": 2.8588857259048392, "grad_norm": 0.24757324159145355, "learning_rate": 4.620283061917383e-06, "loss": 0.3349, "step": 28120 }, { "epoch": 2.858987393249288, "grad_norm": 0.27166348695755005, "learning_rate": 4.619929201757868e-06, "loss": 0.3373, "step": 28121 }, { "epoch": 2.859089060593737, "grad_norm": 0.26500841975212097, "learning_rate": 4.61957534351306e-06, "loss": 0.3816, "step": 28122 }, { "epoch": 2.859190727938186, "grad_norm": 0.27796754240989685, "learning_rate": 4.6192214871847416e-06, "loss": 0.3171, "step": 28123 }, { "epoch": 2.859292395282635, "grad_norm": 0.2503611445426941, "learning_rate": 4.618867632774695e-06, "loss": 0.2731, "step": 28124 }, { "epoch": 2.859394062627084, "grad_norm": 0.2681443691253662, "learning_rate": 4.6185137802847025e-06, "loss": 0.3288, "step": 28125 }, { "epoch": 2.859495729971533, "grad_norm": 0.2619704008102417, "learning_rate": 4.618159929716546e-06, "loss": 0.3212, "step": 28126 }, { "epoch": 2.859597397315982, "grad_norm": 0.2616299092769623, "learning_rate": 4.617806081072008e-06, "loss": 0.3317, "step": 28127 }, { "epoch": 2.8596990646604312, "grad_norm": 0.25821149349212646, "learning_rate": 4.617452234352875e-06, "loss": 0.3296, "step": 28128 }, { "epoch": 2.85980073200488, "grad_norm": 0.26699984073638916, "learning_rate": 4.617098389560926e-06, "loss": 0.3361, "step": 28129 }, { "epoch": 2.859902399349329, "grad_norm": 0.26865914463996887, "learning_rate": 4.616744546697944e-06, "loss": 0.3254, "step": 28130 }, { "epoch": 2.860004066693778, "grad_norm": 0.2587076723575592, "learning_rate": 4.616390705765711e-06, "loss": 0.3382, "step": 28131 }, { "epoch": 2.860105734038227, "grad_norm": 0.261324942111969, "learning_rate": 4.616036866766011e-06, "loss": 0.3328, "step": 28132 }, { "epoch": 2.860207401382676, "grad_norm": 0.2417580932378769, "learning_rate": 4.6156830297006266e-06, "loss": 0.3252, "step": 28133 }, { "epoch": 2.860309068727125, "grad_norm": 0.2418125569820404, "learning_rate": 4.6153291945713385e-06, "loss": 0.3142, "step": 28134 }, { "epoch": 2.860410736071574, "grad_norm": 0.2641640305519104, "learning_rate": 4.614975361379932e-06, "loss": 0.3393, "step": 28135 }, { "epoch": 2.8605124034160228, "grad_norm": 0.25820285081863403, "learning_rate": 4.614621530128187e-06, "loss": 0.3411, "step": 28136 }, { "epoch": 2.8606140707604717, "grad_norm": 0.2504632771015167, "learning_rate": 4.6142677008178876e-06, "loss": 0.3617, "step": 28137 }, { "epoch": 2.8607157381049206, "grad_norm": 0.2753680944442749, "learning_rate": 4.613913873450816e-06, "loss": 0.3327, "step": 28138 }, { "epoch": 2.8608174054493696, "grad_norm": 0.28229930996894836, "learning_rate": 4.613560048028754e-06, "loss": 0.3615, "step": 28139 }, { "epoch": 2.8609190727938185, "grad_norm": 0.25762927532196045, "learning_rate": 4.613206224553486e-06, "loss": 0.3386, "step": 28140 }, { "epoch": 2.8610207401382675, "grad_norm": 0.25875329971313477, "learning_rate": 4.612852403026792e-06, "loss": 0.33, "step": 28141 }, { "epoch": 2.8611224074827164, "grad_norm": 0.2676875591278076, "learning_rate": 4.6124985834504554e-06, "loss": 0.3193, "step": 28142 }, { "epoch": 2.861224074827166, "grad_norm": 0.2670336663722992, "learning_rate": 4.612144765826259e-06, "loss": 0.3138, "step": 28143 }, { "epoch": 2.8613257421716147, "grad_norm": 0.2642691135406494, "learning_rate": 4.611790950155985e-06, "loss": 0.3328, "step": 28144 }, { "epoch": 2.8614274095160637, "grad_norm": 0.2430965155363083, "learning_rate": 4.611437136441418e-06, "loss": 0.3375, "step": 28145 }, { "epoch": 2.8615290768605126, "grad_norm": 0.2744138240814209, "learning_rate": 4.6110833246843356e-06, "loss": 0.3503, "step": 28146 }, { "epoch": 2.8616307442049616, "grad_norm": 0.2543433606624603, "learning_rate": 4.610729514886524e-06, "loss": 0.3168, "step": 28147 }, { "epoch": 2.8617324115494105, "grad_norm": 0.26553356647491455, "learning_rate": 4.610375707049765e-06, "loss": 0.3262, "step": 28148 }, { "epoch": 2.8618340788938594, "grad_norm": 0.26712360978126526, "learning_rate": 4.61002190117584e-06, "loss": 0.3046, "step": 28149 }, { "epoch": 2.8619357462383084, "grad_norm": 0.2821078896522522, "learning_rate": 4.609668097266533e-06, "loss": 0.3531, "step": 28150 }, { "epoch": 2.8620374135827573, "grad_norm": 0.26769769191741943, "learning_rate": 4.609314295323624e-06, "loss": 0.3346, "step": 28151 }, { "epoch": 2.8621390809272063, "grad_norm": 0.2846684455871582, "learning_rate": 4.608960495348897e-06, "loss": 0.3429, "step": 28152 }, { "epoch": 2.862240748271655, "grad_norm": 0.2568392753601074, "learning_rate": 4.608606697344135e-06, "loss": 0.334, "step": 28153 }, { "epoch": 2.862342415616104, "grad_norm": 0.2763161361217499, "learning_rate": 4.608252901311119e-06, "loss": 0.3558, "step": 28154 }, { "epoch": 2.862444082960553, "grad_norm": 0.2504362463951111, "learning_rate": 4.607899107251632e-06, "loss": 0.3372, "step": 28155 }, { "epoch": 2.862545750305002, "grad_norm": 0.28884556889533997, "learning_rate": 4.6075453151674555e-06, "loss": 0.3669, "step": 28156 }, { "epoch": 2.862647417649451, "grad_norm": 0.2569798231124878, "learning_rate": 4.6071915250603725e-06, "loss": 0.3367, "step": 28157 }, { "epoch": 2.8627490849939, "grad_norm": 0.280068039894104, "learning_rate": 4.606837736932166e-06, "loss": 0.3441, "step": 28158 }, { "epoch": 2.862850752338349, "grad_norm": 0.2908164858818054, "learning_rate": 4.606483950784616e-06, "loss": 0.3356, "step": 28159 }, { "epoch": 2.862952419682798, "grad_norm": 0.2638195753097534, "learning_rate": 4.606130166619508e-06, "loss": 0.3155, "step": 28160 }, { "epoch": 2.8630540870272467, "grad_norm": 0.2804388403892517, "learning_rate": 4.6057763844386215e-06, "loss": 0.3477, "step": 28161 }, { "epoch": 2.8631557543716957, "grad_norm": 0.2694084346294403, "learning_rate": 4.605422604243741e-06, "loss": 0.3319, "step": 28162 }, { "epoch": 2.8632574217161446, "grad_norm": 0.27246806025505066, "learning_rate": 4.605068826036647e-06, "loss": 0.3483, "step": 28163 }, { "epoch": 2.8633590890605936, "grad_norm": 0.282817006111145, "learning_rate": 4.604715049819122e-06, "loss": 0.3169, "step": 28164 }, { "epoch": 2.8634607564050425, "grad_norm": 0.2515963613986969, "learning_rate": 4.60436127559295e-06, "loss": 0.3175, "step": 28165 }, { "epoch": 2.8635624237494914, "grad_norm": 0.2695157825946808, "learning_rate": 4.604007503359911e-06, "loss": 0.2993, "step": 28166 }, { "epoch": 2.8636640910939404, "grad_norm": 0.2807581126689911, "learning_rate": 4.6036537331217875e-06, "loss": 0.3154, "step": 28167 }, { "epoch": 2.8637657584383893, "grad_norm": 0.26568999886512756, "learning_rate": 4.603299964880364e-06, "loss": 0.3278, "step": 28168 }, { "epoch": 2.8638674257828387, "grad_norm": 0.2895146310329437, "learning_rate": 4.602946198637418e-06, "loss": 0.317, "step": 28169 }, { "epoch": 2.8639690931272876, "grad_norm": 0.2826939523220062, "learning_rate": 4.602592434394738e-06, "loss": 0.351, "step": 28170 }, { "epoch": 2.8640707604717366, "grad_norm": 0.24953757226467133, "learning_rate": 4.602238672154101e-06, "loss": 0.3307, "step": 28171 }, { "epoch": 2.8641724278161855, "grad_norm": 0.275689035654068, "learning_rate": 4.601884911917291e-06, "loss": 0.3491, "step": 28172 }, { "epoch": 2.8642740951606345, "grad_norm": 0.2755233943462372, "learning_rate": 4.601531153686091e-06, "loss": 0.356, "step": 28173 }, { "epoch": 2.8643757625050834, "grad_norm": 0.26820629835128784, "learning_rate": 4.601177397462282e-06, "loss": 0.3372, "step": 28174 }, { "epoch": 2.8644774298495324, "grad_norm": 0.254168838262558, "learning_rate": 4.600823643247648e-06, "loss": 0.3011, "step": 28175 }, { "epoch": 2.8645790971939813, "grad_norm": 0.2567678987979889, "learning_rate": 4.600469891043967e-06, "loss": 0.3132, "step": 28176 }, { "epoch": 2.8646807645384302, "grad_norm": 0.25085702538490295, "learning_rate": 4.600116140853024e-06, "loss": 0.3084, "step": 28177 }, { "epoch": 2.864782431882879, "grad_norm": 0.275532990694046, "learning_rate": 4.599762392676603e-06, "loss": 0.3612, "step": 28178 }, { "epoch": 2.864884099227328, "grad_norm": 0.27412354946136475, "learning_rate": 4.599408646516482e-06, "loss": 0.3249, "step": 28179 }, { "epoch": 2.864985766571777, "grad_norm": 0.2797088027000427, "learning_rate": 4.599054902374448e-06, "loss": 0.3356, "step": 28180 }, { "epoch": 2.865087433916226, "grad_norm": 0.27363109588623047, "learning_rate": 4.5987011602522766e-06, "loss": 0.3402, "step": 28181 }, { "epoch": 2.865189101260675, "grad_norm": 0.2902378439903259, "learning_rate": 4.598347420151754e-06, "loss": 0.3599, "step": 28182 }, { "epoch": 2.865290768605124, "grad_norm": 0.2656223475933075, "learning_rate": 4.597993682074663e-06, "loss": 0.3128, "step": 28183 }, { "epoch": 2.8653924359495733, "grad_norm": 0.2431158572435379, "learning_rate": 4.597639946022783e-06, "loss": 0.3341, "step": 28184 }, { "epoch": 2.865494103294022, "grad_norm": 0.2924274206161499, "learning_rate": 4.5972862119979e-06, "loss": 0.343, "step": 28185 }, { "epoch": 2.865595770638471, "grad_norm": 0.2646101117134094, "learning_rate": 4.59693248000179e-06, "loss": 0.3264, "step": 28186 }, { "epoch": 2.86569743798292, "grad_norm": 0.2672554850578308, "learning_rate": 4.596578750036239e-06, "loss": 0.3276, "step": 28187 }, { "epoch": 2.865799105327369, "grad_norm": 0.2582530379295349, "learning_rate": 4.596225022103029e-06, "loss": 0.3239, "step": 28188 }, { "epoch": 2.865900772671818, "grad_norm": 0.2798400819301605, "learning_rate": 4.595871296203942e-06, "loss": 0.3498, "step": 28189 }, { "epoch": 2.866002440016267, "grad_norm": 0.26924920082092285, "learning_rate": 4.595517572340761e-06, "loss": 0.3483, "step": 28190 }, { "epoch": 2.866104107360716, "grad_norm": 0.2651553750038147, "learning_rate": 4.5951638505152624e-06, "loss": 0.3288, "step": 28191 }, { "epoch": 2.866205774705165, "grad_norm": 0.26087090373039246, "learning_rate": 4.594810130729233e-06, "loss": 0.3093, "step": 28192 }, { "epoch": 2.8663074420496137, "grad_norm": 0.2630541920661926, "learning_rate": 4.594456412984456e-06, "loss": 0.2941, "step": 28193 }, { "epoch": 2.8664091093940627, "grad_norm": 0.2576480209827423, "learning_rate": 4.59410269728271e-06, "loss": 0.3308, "step": 28194 }, { "epoch": 2.8665107767385116, "grad_norm": 0.260648638010025, "learning_rate": 4.5937489836257796e-06, "loss": 0.3192, "step": 28195 }, { "epoch": 2.8666124440829606, "grad_norm": 0.2495693564414978, "learning_rate": 4.593395272015442e-06, "loss": 0.3075, "step": 28196 }, { "epoch": 2.8667141114274095, "grad_norm": 0.2512297034263611, "learning_rate": 4.593041562453484e-06, "loss": 0.338, "step": 28197 }, { "epoch": 2.8668157787718584, "grad_norm": 0.27642422914505005, "learning_rate": 4.592687854941688e-06, "loss": 0.3284, "step": 28198 }, { "epoch": 2.8669174461163074, "grad_norm": 0.26210567355155945, "learning_rate": 4.592334149481831e-06, "loss": 0.3676, "step": 28199 }, { "epoch": 2.8670191134607563, "grad_norm": 0.28222954273223877, "learning_rate": 4.5919804460757e-06, "loss": 0.3396, "step": 28200 }, { "epoch": 2.8671207808052053, "grad_norm": 0.25495442748069763, "learning_rate": 4.591626744725072e-06, "loss": 0.3364, "step": 28201 }, { "epoch": 2.867222448149654, "grad_norm": 0.2545502185821533, "learning_rate": 4.591273045431732e-06, "loss": 0.3006, "step": 28202 }, { "epoch": 2.867324115494103, "grad_norm": 0.2713170051574707, "learning_rate": 4.5909193481974635e-06, "loss": 0.3327, "step": 28203 }, { "epoch": 2.867425782838552, "grad_norm": 0.25265905261039734, "learning_rate": 4.5905656530240426e-06, "loss": 0.3317, "step": 28204 }, { "epoch": 2.867527450183001, "grad_norm": 0.27188536524772644, "learning_rate": 4.590211959913259e-06, "loss": 0.3466, "step": 28205 }, { "epoch": 2.86762911752745, "grad_norm": 0.254454642534256, "learning_rate": 4.589858268866885e-06, "loss": 0.3072, "step": 28206 }, { "epoch": 2.867730784871899, "grad_norm": 0.28407022356987, "learning_rate": 4.58950457988671e-06, "loss": 0.3641, "step": 28207 }, { "epoch": 2.867832452216348, "grad_norm": 0.27412474155426025, "learning_rate": 4.589150892974516e-06, "loss": 0.3363, "step": 28208 }, { "epoch": 2.867934119560797, "grad_norm": 0.2720959484577179, "learning_rate": 4.588797208132079e-06, "loss": 0.3234, "step": 28209 }, { "epoch": 2.868035786905246, "grad_norm": 0.2621482312679291, "learning_rate": 4.588443525361186e-06, "loss": 0.3589, "step": 28210 }, { "epoch": 2.868137454249695, "grad_norm": 0.37872493267059326, "learning_rate": 4.588089844663614e-06, "loss": 0.3442, "step": 28211 }, { "epoch": 2.868239121594144, "grad_norm": 0.2665870189666748, "learning_rate": 4.587736166041148e-06, "loss": 0.2985, "step": 28212 }, { "epoch": 2.868340788938593, "grad_norm": 0.25243228673934937, "learning_rate": 4.587382489495572e-06, "loss": 0.3272, "step": 28213 }, { "epoch": 2.868442456283042, "grad_norm": 0.2506106495857239, "learning_rate": 4.587028815028662e-06, "loss": 0.34, "step": 28214 }, { "epoch": 2.868544123627491, "grad_norm": 0.2767011523246765, "learning_rate": 4.5866751426422054e-06, "loss": 0.3153, "step": 28215 }, { "epoch": 2.86864579097194, "grad_norm": 0.2636963725090027, "learning_rate": 4.586321472337979e-06, "loss": 0.3291, "step": 28216 }, { "epoch": 2.8687474583163888, "grad_norm": 0.28217047452926636, "learning_rate": 4.585967804117767e-06, "loss": 0.349, "step": 28217 }, { "epoch": 2.8688491256608377, "grad_norm": 0.27433812618255615, "learning_rate": 4.5856141379833524e-06, "loss": 0.3508, "step": 28218 }, { "epoch": 2.8689507930052867, "grad_norm": 0.25608178973197937, "learning_rate": 4.585260473936513e-06, "loss": 0.3576, "step": 28219 }, { "epoch": 2.8690524603497356, "grad_norm": 0.26375746726989746, "learning_rate": 4.584906811979036e-06, "loss": 0.3502, "step": 28220 }, { "epoch": 2.8691541276941845, "grad_norm": 0.26839572191238403, "learning_rate": 4.5845531521126965e-06, "loss": 0.3339, "step": 28221 }, { "epoch": 2.8692557950386335, "grad_norm": 0.2654706835746765, "learning_rate": 4.584199494339279e-06, "loss": 0.344, "step": 28222 }, { "epoch": 2.8693574623830824, "grad_norm": 0.23632608354091644, "learning_rate": 4.583845838660569e-06, "loss": 0.3356, "step": 28223 }, { "epoch": 2.8694591297275314, "grad_norm": 0.27086278796195984, "learning_rate": 4.583492185078342e-06, "loss": 0.343, "step": 28224 }, { "epoch": 2.8695607970719808, "grad_norm": 0.2639563977718353, "learning_rate": 4.583138533594385e-06, "loss": 0.3233, "step": 28225 }, { "epoch": 2.8696624644164297, "grad_norm": 0.26215770840644836, "learning_rate": 4.582784884210475e-06, "loss": 0.363, "step": 28226 }, { "epoch": 2.8697641317608786, "grad_norm": 0.28682348132133484, "learning_rate": 4.5824312369283945e-06, "loss": 0.3049, "step": 28227 }, { "epoch": 2.8698657991053276, "grad_norm": 0.2675509750843048, "learning_rate": 4.58207759174993e-06, "loss": 0.3362, "step": 28228 }, { "epoch": 2.8699674664497765, "grad_norm": 0.26974770426750183, "learning_rate": 4.581723948676855e-06, "loss": 0.3472, "step": 28229 }, { "epoch": 2.8700691337942255, "grad_norm": 0.2791638970375061, "learning_rate": 4.5813703077109595e-06, "loss": 0.3435, "step": 28230 }, { "epoch": 2.8701708011386744, "grad_norm": 0.2553010582923889, "learning_rate": 4.581016668854019e-06, "loss": 0.3551, "step": 28231 }, { "epoch": 2.8702724684831233, "grad_norm": 0.2876153588294983, "learning_rate": 4.580663032107815e-06, "loss": 0.3309, "step": 28232 }, { "epoch": 2.8703741358275723, "grad_norm": 0.27272239327430725, "learning_rate": 4.580309397474134e-06, "loss": 0.3224, "step": 28233 }, { "epoch": 2.8704758031720212, "grad_norm": 0.2653944790363312, "learning_rate": 4.579955764954753e-06, "loss": 0.3145, "step": 28234 }, { "epoch": 2.87057747051647, "grad_norm": 0.2814124822616577, "learning_rate": 4.579602134551456e-06, "loss": 0.3534, "step": 28235 }, { "epoch": 2.870679137860919, "grad_norm": 0.29304879903793335, "learning_rate": 4.579248506266023e-06, "loss": 0.3169, "step": 28236 }, { "epoch": 2.870780805205368, "grad_norm": 0.25153130292892456, "learning_rate": 4.578894880100234e-06, "loss": 0.3093, "step": 28237 }, { "epoch": 2.870882472549817, "grad_norm": 0.2894883155822754, "learning_rate": 4.578541256055877e-06, "loss": 0.3117, "step": 28238 }, { "epoch": 2.870984139894266, "grad_norm": 0.28225621581077576, "learning_rate": 4.578187634134725e-06, "loss": 0.3379, "step": 28239 }, { "epoch": 2.871085807238715, "grad_norm": 0.26422804594039917, "learning_rate": 4.577834014338566e-06, "loss": 0.3268, "step": 28240 }, { "epoch": 2.871187474583164, "grad_norm": 0.2789439558982849, "learning_rate": 4.5774803966691775e-06, "loss": 0.3374, "step": 28241 }, { "epoch": 2.8712891419276128, "grad_norm": 0.262360543012619, "learning_rate": 4.577126781128342e-06, "loss": 0.3366, "step": 28242 }, { "epoch": 2.8713908092720617, "grad_norm": 0.2806945741176605, "learning_rate": 4.576773167717843e-06, "loss": 0.3506, "step": 28243 }, { "epoch": 2.8714924766165106, "grad_norm": 0.279767245054245, "learning_rate": 4.576419556439459e-06, "loss": 0.336, "step": 28244 }, { "epoch": 2.8715941439609596, "grad_norm": 0.2707206606864929, "learning_rate": 4.576065947294973e-06, "loss": 0.3224, "step": 28245 }, { "epoch": 2.8716958113054085, "grad_norm": 0.25066250562667847, "learning_rate": 4.575712340286166e-06, "loss": 0.3046, "step": 28246 }, { "epoch": 2.8717974786498575, "grad_norm": 0.2687474191188812, "learning_rate": 4.575358735414818e-06, "loss": 0.3888, "step": 28247 }, { "epoch": 2.8718991459943064, "grad_norm": 0.28637173771858215, "learning_rate": 4.575005132682714e-06, "loss": 0.2912, "step": 28248 }, { "epoch": 2.8720008133387553, "grad_norm": 0.261385977268219, "learning_rate": 4.5746515320916316e-06, "loss": 0.344, "step": 28249 }, { "epoch": 2.8721024806832043, "grad_norm": 0.26801708340644836, "learning_rate": 4.574297933643355e-06, "loss": 0.3316, "step": 28250 }, { "epoch": 2.8722041480276537, "grad_norm": 0.25304335355758667, "learning_rate": 4.573944337339663e-06, "loss": 0.3426, "step": 28251 }, { "epoch": 2.8723058153721026, "grad_norm": 0.30328232049942017, "learning_rate": 4.573590743182338e-06, "loss": 0.3241, "step": 28252 }, { "epoch": 2.8724074827165516, "grad_norm": 0.2769845128059387, "learning_rate": 4.5732371511731635e-06, "loss": 0.3145, "step": 28253 }, { "epoch": 2.8725091500610005, "grad_norm": 0.2911073565483093, "learning_rate": 4.572883561313917e-06, "loss": 0.3138, "step": 28254 }, { "epoch": 2.8726108174054494, "grad_norm": 0.2649723291397095, "learning_rate": 4.572529973606383e-06, "loss": 0.3513, "step": 28255 }, { "epoch": 2.8727124847498984, "grad_norm": 0.27045542001724243, "learning_rate": 4.57217638805234e-06, "loss": 0.3136, "step": 28256 }, { "epoch": 2.8728141520943473, "grad_norm": 0.27039235830307007, "learning_rate": 4.571822804653571e-06, "loss": 0.347, "step": 28257 }, { "epoch": 2.8729158194387963, "grad_norm": 0.252260684967041, "learning_rate": 4.571469223411858e-06, "loss": 0.3336, "step": 28258 }, { "epoch": 2.873017486783245, "grad_norm": 0.2511025369167328, "learning_rate": 4.57111564432898e-06, "loss": 0.3269, "step": 28259 }, { "epoch": 2.873119154127694, "grad_norm": 0.2679852843284607, "learning_rate": 4.570762067406721e-06, "loss": 0.3436, "step": 28260 }, { "epoch": 2.873220821472143, "grad_norm": 0.25841638445854187, "learning_rate": 4.570408492646858e-06, "loss": 0.3225, "step": 28261 }, { "epoch": 2.873322488816592, "grad_norm": 0.25377917289733887, "learning_rate": 4.5700549200511765e-06, "loss": 0.3491, "step": 28262 }, { "epoch": 2.873424156161041, "grad_norm": 0.26029112935066223, "learning_rate": 4.569701349621457e-06, "loss": 0.3567, "step": 28263 }, { "epoch": 2.87352582350549, "grad_norm": 0.2816654145717621, "learning_rate": 4.569347781359478e-06, "loss": 0.3553, "step": 28264 }, { "epoch": 2.873627490849939, "grad_norm": 0.2775609493255615, "learning_rate": 4.568994215267025e-06, "loss": 0.3153, "step": 28265 }, { "epoch": 2.8737291581943882, "grad_norm": 0.27227625250816345, "learning_rate": 4.568640651345875e-06, "loss": 0.3452, "step": 28266 }, { "epoch": 2.873830825538837, "grad_norm": 0.26018837094306946, "learning_rate": 4.5682870895978105e-06, "loss": 0.3108, "step": 28267 }, { "epoch": 2.873932492883286, "grad_norm": 0.26484090089797974, "learning_rate": 4.567933530024614e-06, "loss": 0.317, "step": 28268 }, { "epoch": 2.874034160227735, "grad_norm": 0.2738729417324066, "learning_rate": 4.5675799726280655e-06, "loss": 0.3175, "step": 28269 }, { "epoch": 2.874135827572184, "grad_norm": 0.2767319679260254, "learning_rate": 4.567226417409947e-06, "loss": 0.3224, "step": 28270 }, { "epoch": 2.874237494916633, "grad_norm": 0.2529323399066925, "learning_rate": 4.566872864372037e-06, "loss": 0.3375, "step": 28271 }, { "epoch": 2.874339162261082, "grad_norm": 0.2511346638202667, "learning_rate": 4.56651931351612e-06, "loss": 0.343, "step": 28272 }, { "epoch": 2.874440829605531, "grad_norm": 0.2731846272945404, "learning_rate": 4.566165764843976e-06, "loss": 0.319, "step": 28273 }, { "epoch": 2.8745424969499798, "grad_norm": 0.2587127089500427, "learning_rate": 4.565812218357385e-06, "loss": 0.3542, "step": 28274 }, { "epoch": 2.8746441642944287, "grad_norm": 0.29585471749305725, "learning_rate": 4.56545867405813e-06, "loss": 0.3543, "step": 28275 }, { "epoch": 2.8747458316388776, "grad_norm": 0.30726000666618347, "learning_rate": 4.565105131947989e-06, "loss": 0.3404, "step": 28276 }, { "epoch": 2.8748474989833266, "grad_norm": 0.2578597962856293, "learning_rate": 4.564751592028746e-06, "loss": 0.342, "step": 28277 }, { "epoch": 2.8749491663277755, "grad_norm": 0.30816519260406494, "learning_rate": 4.564398054302181e-06, "loss": 0.3355, "step": 28278 }, { "epoch": 2.8750508336722245, "grad_norm": 0.2507286071777344, "learning_rate": 4.5640445187700755e-06, "loss": 0.3338, "step": 28279 }, { "epoch": 2.8751525010166734, "grad_norm": 0.27475711703300476, "learning_rate": 4.56369098543421e-06, "loss": 0.3836, "step": 28280 }, { "epoch": 2.8752541683611224, "grad_norm": 0.2809029519557953, "learning_rate": 4.5633374542963655e-06, "loss": 0.3119, "step": 28281 }, { "epoch": 2.8753558357055713, "grad_norm": 0.2670719623565674, "learning_rate": 4.562983925358324e-06, "loss": 0.3434, "step": 28282 }, { "epoch": 2.8754575030500202, "grad_norm": 0.27174144983291626, "learning_rate": 4.562630398621864e-06, "loss": 0.3207, "step": 28283 }, { "epoch": 2.875559170394469, "grad_norm": 0.26354512572288513, "learning_rate": 4.562276874088768e-06, "loss": 0.3617, "step": 28284 }, { "epoch": 2.875660837738918, "grad_norm": 0.254599004983902, "learning_rate": 4.561923351760819e-06, "loss": 0.3276, "step": 28285 }, { "epoch": 2.875762505083367, "grad_norm": 0.2753174304962158, "learning_rate": 4.5615698316397945e-06, "loss": 0.3564, "step": 28286 }, { "epoch": 2.875864172427816, "grad_norm": 0.3016420602798462, "learning_rate": 4.5612163137274775e-06, "loss": 0.3233, "step": 28287 }, { "epoch": 2.875965839772265, "grad_norm": 0.2600044310092926, "learning_rate": 4.560862798025649e-06, "loss": 0.3257, "step": 28288 }, { "epoch": 2.876067507116714, "grad_norm": 0.27344194054603577, "learning_rate": 4.560509284536088e-06, "loss": 0.3559, "step": 28289 }, { "epoch": 2.876169174461163, "grad_norm": 0.28081437945365906, "learning_rate": 4.560155773260578e-06, "loss": 0.3233, "step": 28290 }, { "epoch": 2.8762708418056118, "grad_norm": 0.27182433009147644, "learning_rate": 4.559802264200898e-06, "loss": 0.3343, "step": 28291 }, { "epoch": 2.876372509150061, "grad_norm": 0.2858341336250305, "learning_rate": 4.559448757358831e-06, "loss": 0.349, "step": 28292 }, { "epoch": 2.87647417649451, "grad_norm": 0.25507014989852905, "learning_rate": 4.559095252736154e-06, "loss": 0.3511, "step": 28293 }, { "epoch": 2.876575843838959, "grad_norm": 0.2776225209236145, "learning_rate": 4.558741750334652e-06, "loss": 0.3478, "step": 28294 }, { "epoch": 2.876677511183408, "grad_norm": 0.27965047955513, "learning_rate": 4.5583882501561046e-06, "loss": 0.3395, "step": 28295 }, { "epoch": 2.876779178527857, "grad_norm": 0.29996538162231445, "learning_rate": 4.55803475220229e-06, "loss": 0.3285, "step": 28296 }, { "epoch": 2.876880845872306, "grad_norm": 0.24615435302257538, "learning_rate": 4.557681256474994e-06, "loss": 0.3293, "step": 28297 }, { "epoch": 2.876982513216755, "grad_norm": 0.28829362988471985, "learning_rate": 4.557327762975993e-06, "loss": 0.3692, "step": 28298 }, { "epoch": 2.8770841805612037, "grad_norm": 0.2706739604473114, "learning_rate": 4.556974271707069e-06, "loss": 0.3249, "step": 28299 }, { "epoch": 2.8771858479056527, "grad_norm": 0.2677377462387085, "learning_rate": 4.556620782670006e-06, "loss": 0.3385, "step": 28300 }, { "epoch": 2.8772875152501016, "grad_norm": 0.26455146074295044, "learning_rate": 4.55626729586658e-06, "loss": 0.3506, "step": 28301 }, { "epoch": 2.8773891825945506, "grad_norm": 0.2889254093170166, "learning_rate": 4.555913811298575e-06, "loss": 0.3183, "step": 28302 }, { "epoch": 2.8774908499389995, "grad_norm": 0.26617133617401123, "learning_rate": 4.55556032896777e-06, "loss": 0.3213, "step": 28303 }, { "epoch": 2.8775925172834484, "grad_norm": 0.27067995071411133, "learning_rate": 4.555206848875946e-06, "loss": 0.3184, "step": 28304 }, { "epoch": 2.8776941846278974, "grad_norm": 0.26439782977104187, "learning_rate": 4.5548533710248856e-06, "loss": 0.3903, "step": 28305 }, { "epoch": 2.8777958519723463, "grad_norm": 0.2568764090538025, "learning_rate": 4.554499895416367e-06, "loss": 0.3293, "step": 28306 }, { "epoch": 2.8778975193167957, "grad_norm": 0.2625405192375183, "learning_rate": 4.554146422052173e-06, "loss": 0.3423, "step": 28307 }, { "epoch": 2.8779991866612447, "grad_norm": 0.2557447850704193, "learning_rate": 4.553792950934082e-06, "loss": 0.3099, "step": 28308 }, { "epoch": 2.8781008540056936, "grad_norm": 0.25989750027656555, "learning_rate": 4.553439482063877e-06, "loss": 0.365, "step": 28309 }, { "epoch": 2.8782025213501425, "grad_norm": 0.2830214202404022, "learning_rate": 4.553086015443337e-06, "loss": 0.3231, "step": 28310 }, { "epoch": 2.8783041886945915, "grad_norm": 0.2603853642940521, "learning_rate": 4.552732551074245e-06, "loss": 0.3533, "step": 28311 }, { "epoch": 2.8784058560390404, "grad_norm": 0.26465651392936707, "learning_rate": 4.55237908895838e-06, "loss": 0.3205, "step": 28312 }, { "epoch": 2.8785075233834894, "grad_norm": 0.2649753987789154, "learning_rate": 4.552025629097521e-06, "loss": 0.3232, "step": 28313 }, { "epoch": 2.8786091907279383, "grad_norm": 0.24976636469364166, "learning_rate": 4.551672171493451e-06, "loss": 0.3213, "step": 28314 }, { "epoch": 2.8787108580723872, "grad_norm": 0.2810681164264679, "learning_rate": 4.551318716147951e-06, "loss": 0.3244, "step": 28315 }, { "epoch": 2.878812525416836, "grad_norm": 0.27503976225852966, "learning_rate": 4.5509652630628e-06, "loss": 0.3258, "step": 28316 }, { "epoch": 2.878914192761285, "grad_norm": 0.25898730754852295, "learning_rate": 4.55061181223978e-06, "loss": 0.3626, "step": 28317 }, { "epoch": 2.879015860105734, "grad_norm": 0.2447049617767334, "learning_rate": 4.55025836368067e-06, "loss": 0.3503, "step": 28318 }, { "epoch": 2.879117527450183, "grad_norm": 0.26859402656555176, "learning_rate": 4.549904917387251e-06, "loss": 0.3293, "step": 28319 }, { "epoch": 2.879219194794632, "grad_norm": 0.28700998425483704, "learning_rate": 4.549551473361305e-06, "loss": 0.3362, "step": 28320 }, { "epoch": 2.879320862139081, "grad_norm": 0.2654247581958771, "learning_rate": 4.549198031604611e-06, "loss": 0.3169, "step": 28321 }, { "epoch": 2.87942252948353, "grad_norm": 0.252199649810791, "learning_rate": 4.548844592118952e-06, "loss": 0.3313, "step": 28322 }, { "epoch": 2.8795241968279788, "grad_norm": 0.2688855230808258, "learning_rate": 4.548491154906105e-06, "loss": 0.3558, "step": 28323 }, { "epoch": 2.8796258641724277, "grad_norm": 0.26556310057640076, "learning_rate": 4.548137719967852e-06, "loss": 0.3108, "step": 28324 }, { "epoch": 2.8797275315168767, "grad_norm": 0.2747398316860199, "learning_rate": 4.547784287305976e-06, "loss": 0.3228, "step": 28325 }, { "epoch": 2.8798291988613256, "grad_norm": 0.2765205502510071, "learning_rate": 4.547430856922253e-06, "loss": 0.3375, "step": 28326 }, { "epoch": 2.8799308662057745, "grad_norm": 0.2536887228488922, "learning_rate": 4.547077428818468e-06, "loss": 0.3391, "step": 28327 }, { "epoch": 2.8800325335502235, "grad_norm": 0.2632638216018677, "learning_rate": 4.546724002996397e-06, "loss": 0.3492, "step": 28328 }, { "epoch": 2.8801342008946724, "grad_norm": 0.2799188792705536, "learning_rate": 4.5463705794578225e-06, "loss": 0.3455, "step": 28329 }, { "epoch": 2.8802358682391214, "grad_norm": 0.27691882848739624, "learning_rate": 4.546017158204527e-06, "loss": 0.3335, "step": 28330 }, { "epoch": 2.8803375355835703, "grad_norm": 0.2746542692184448, "learning_rate": 4.545663739238288e-06, "loss": 0.334, "step": 28331 }, { "epoch": 2.8804392029280192, "grad_norm": 0.26227977871894836, "learning_rate": 4.5453103225608894e-06, "loss": 0.3062, "step": 28332 }, { "epoch": 2.8805408702724686, "grad_norm": 0.2358456403017044, "learning_rate": 4.544956908174106e-06, "loss": 0.3264, "step": 28333 }, { "epoch": 2.8806425376169176, "grad_norm": 0.28926926851272583, "learning_rate": 4.544603496079723e-06, "loss": 0.3485, "step": 28334 }, { "epoch": 2.8807442049613665, "grad_norm": 0.2487044334411621, "learning_rate": 4.54425008627952e-06, "loss": 0.3067, "step": 28335 }, { "epoch": 2.8808458723058155, "grad_norm": 0.27871420979499817, "learning_rate": 4.5438966787752755e-06, "loss": 0.3439, "step": 28336 }, { "epoch": 2.8809475396502644, "grad_norm": 0.2655206024646759, "learning_rate": 4.543543273568774e-06, "loss": 0.3363, "step": 28337 }, { "epoch": 2.8810492069947133, "grad_norm": 0.2445383369922638, "learning_rate": 4.543189870661789e-06, "loss": 0.3627, "step": 28338 }, { "epoch": 2.8811508743391623, "grad_norm": 0.24665547907352448, "learning_rate": 4.542836470056107e-06, "loss": 0.3656, "step": 28339 }, { "epoch": 2.8812525416836112, "grad_norm": 0.265062153339386, "learning_rate": 4.542483071753507e-06, "loss": 0.3489, "step": 28340 }, { "epoch": 2.88135420902806, "grad_norm": 0.2658863067626953, "learning_rate": 4.542129675755767e-06, "loss": 0.3269, "step": 28341 }, { "epoch": 2.881455876372509, "grad_norm": 0.2608703672885895, "learning_rate": 4.541776282064671e-06, "loss": 0.3323, "step": 28342 }, { "epoch": 2.881557543716958, "grad_norm": 0.2707423269748688, "learning_rate": 4.541422890681994e-06, "loss": 0.3449, "step": 28343 }, { "epoch": 2.881659211061407, "grad_norm": 0.2671090066432953, "learning_rate": 4.5410695016095205e-06, "loss": 0.3348, "step": 28344 }, { "epoch": 2.881760878405856, "grad_norm": 0.26740095019340515, "learning_rate": 4.540716114849032e-06, "loss": 0.3027, "step": 28345 }, { "epoch": 2.881862545750305, "grad_norm": 0.25115787982940674, "learning_rate": 4.540362730402305e-06, "loss": 0.3359, "step": 28346 }, { "epoch": 2.881964213094754, "grad_norm": 0.29131507873535156, "learning_rate": 4.540009348271123e-06, "loss": 0.3288, "step": 28347 }, { "epoch": 2.882065880439203, "grad_norm": 0.2699272036552429, "learning_rate": 4.539655968457261e-06, "loss": 0.3282, "step": 28348 }, { "epoch": 2.882167547783652, "grad_norm": 0.2716420888900757, "learning_rate": 4.539302590962504e-06, "loss": 0.3379, "step": 28349 }, { "epoch": 2.882269215128101, "grad_norm": 0.2582201361656189, "learning_rate": 4.538949215788634e-06, "loss": 0.3231, "step": 28350 }, { "epoch": 2.88237088247255, "grad_norm": 0.2697266638278961, "learning_rate": 4.538595842937424e-06, "loss": 0.3284, "step": 28351 }, { "epoch": 2.882472549816999, "grad_norm": 0.2876034080982208, "learning_rate": 4.5382424724106635e-06, "loss": 0.3182, "step": 28352 }, { "epoch": 2.882574217161448, "grad_norm": 0.2769283652305603, "learning_rate": 4.537889104210123e-06, "loss": 0.3453, "step": 28353 }, { "epoch": 2.882675884505897, "grad_norm": 0.2865604758262634, "learning_rate": 4.537535738337588e-06, "loss": 0.3466, "step": 28354 }, { "epoch": 2.882777551850346, "grad_norm": 0.26539552211761475, "learning_rate": 4.5371823747948415e-06, "loss": 0.3454, "step": 28355 }, { "epoch": 2.8828792191947947, "grad_norm": 0.2703522741794586, "learning_rate": 4.536829013583657e-06, "loss": 0.3002, "step": 28356 }, { "epoch": 2.8829808865392437, "grad_norm": 0.2521218955516815, "learning_rate": 4.53647565470582e-06, "loss": 0.3269, "step": 28357 }, { "epoch": 2.8830825538836926, "grad_norm": 0.29107198119163513, "learning_rate": 4.536122298163106e-06, "loss": 0.3622, "step": 28358 }, { "epoch": 2.8831842212281416, "grad_norm": 0.263285368680954, "learning_rate": 4.535768943957299e-06, "loss": 0.3289, "step": 28359 }, { "epoch": 2.8832858885725905, "grad_norm": 0.25662901997566223, "learning_rate": 4.535415592090179e-06, "loss": 0.3312, "step": 28360 }, { "epoch": 2.8833875559170394, "grad_norm": 0.2521308660507202, "learning_rate": 4.535062242563522e-06, "loss": 0.3084, "step": 28361 }, { "epoch": 2.8834892232614884, "grad_norm": 0.26327916979789734, "learning_rate": 4.534708895379115e-06, "loss": 0.3224, "step": 28362 }, { "epoch": 2.8835908906059373, "grad_norm": 0.25675731897354126, "learning_rate": 4.534355550538729e-06, "loss": 0.3169, "step": 28363 }, { "epoch": 2.8836925579503863, "grad_norm": 0.2555557191371918, "learning_rate": 4.534002208044151e-06, "loss": 0.3286, "step": 28364 }, { "epoch": 2.883794225294835, "grad_norm": 0.2586996555328369, "learning_rate": 4.5336488678971625e-06, "loss": 0.3422, "step": 28365 }, { "epoch": 2.883895892639284, "grad_norm": 0.2600689232349396, "learning_rate": 4.533295530099536e-06, "loss": 0.3188, "step": 28366 }, { "epoch": 2.883997559983733, "grad_norm": 0.2641296982765198, "learning_rate": 4.532942194653059e-06, "loss": 0.3217, "step": 28367 }, { "epoch": 2.884099227328182, "grad_norm": 0.25937509536743164, "learning_rate": 4.532588861559506e-06, "loss": 0.3466, "step": 28368 }, { "epoch": 2.884200894672631, "grad_norm": 0.28791335225105286, "learning_rate": 4.5322355308206575e-06, "loss": 0.3237, "step": 28369 }, { "epoch": 2.88430256201708, "grad_norm": 0.27037742733955383, "learning_rate": 4.531882202438299e-06, "loss": 0.3263, "step": 28370 }, { "epoch": 2.884404229361529, "grad_norm": 0.26906293630599976, "learning_rate": 4.531528876414204e-06, "loss": 0.3459, "step": 28371 }, { "epoch": 2.884505896705978, "grad_norm": 0.28570428490638733, "learning_rate": 4.531175552750158e-06, "loss": 0.3097, "step": 28372 }, { "epoch": 2.8846075640504267, "grad_norm": 0.261453777551651, "learning_rate": 4.530822231447936e-06, "loss": 0.3359, "step": 28373 }, { "epoch": 2.884709231394876, "grad_norm": 0.2687912881374359, "learning_rate": 4.530468912509318e-06, "loss": 0.3114, "step": 28374 }, { "epoch": 2.884810898739325, "grad_norm": 0.27206769585609436, "learning_rate": 4.530115595936092e-06, "loss": 0.314, "step": 28375 }, { "epoch": 2.884912566083774, "grad_norm": 0.2597389221191406, "learning_rate": 4.5297622817300265e-06, "loss": 0.3647, "step": 28376 }, { "epoch": 2.885014233428223, "grad_norm": 0.26772311329841614, "learning_rate": 4.529408969892911e-06, "loss": 0.3366, "step": 28377 }, { "epoch": 2.885115900772672, "grad_norm": 0.24031192064285278, "learning_rate": 4.529055660426518e-06, "loss": 0.3234, "step": 28378 }, { "epoch": 2.885217568117121, "grad_norm": 0.25152450799942017, "learning_rate": 4.528702353332631e-06, "loss": 0.3162, "step": 28379 }, { "epoch": 2.8853192354615698, "grad_norm": 0.2689886689186096, "learning_rate": 4.528349048613032e-06, "loss": 0.3227, "step": 28380 }, { "epoch": 2.8854209028060187, "grad_norm": 0.26100239157676697, "learning_rate": 4.5279957462694955e-06, "loss": 0.3195, "step": 28381 }, { "epoch": 2.8855225701504676, "grad_norm": 0.2797732949256897, "learning_rate": 4.5276424463038075e-06, "loss": 0.359, "step": 28382 }, { "epoch": 2.8856242374949166, "grad_norm": 0.2597726285457611, "learning_rate": 4.527289148717742e-06, "loss": 0.2951, "step": 28383 }, { "epoch": 2.8857259048393655, "grad_norm": 0.276301771402359, "learning_rate": 4.52693585351308e-06, "loss": 0.3429, "step": 28384 }, { "epoch": 2.8858275721838145, "grad_norm": 0.26630356907844543, "learning_rate": 4.526582560691607e-06, "loss": 0.3261, "step": 28385 }, { "epoch": 2.8859292395282634, "grad_norm": 0.26647940278053284, "learning_rate": 4.5262292702550955e-06, "loss": 0.3289, "step": 28386 }, { "epoch": 2.8860309068727124, "grad_norm": 0.26143354177474976, "learning_rate": 4.525875982205331e-06, "loss": 0.3597, "step": 28387 }, { "epoch": 2.8861325742171613, "grad_norm": 0.255472868680954, "learning_rate": 4.525522696544088e-06, "loss": 0.3324, "step": 28388 }, { "epoch": 2.8862342415616107, "grad_norm": 0.2621309161186218, "learning_rate": 4.525169413273148e-06, "loss": 0.3398, "step": 28389 }, { "epoch": 2.8863359089060596, "grad_norm": 0.24172034859657288, "learning_rate": 4.5248161323942955e-06, "loss": 0.323, "step": 28390 }, { "epoch": 2.8864375762505086, "grad_norm": 0.2622108459472656, "learning_rate": 4.524462853909304e-06, "loss": 0.3032, "step": 28391 }, { "epoch": 2.8865392435949575, "grad_norm": 0.26708462834358215, "learning_rate": 4.524109577819957e-06, "loss": 0.3625, "step": 28392 }, { "epoch": 2.8866409109394064, "grad_norm": 0.25929582118988037, "learning_rate": 4.523756304128031e-06, "loss": 0.3388, "step": 28393 }, { "epoch": 2.8867425782838554, "grad_norm": 0.2763524055480957, "learning_rate": 4.523403032835307e-06, "loss": 0.349, "step": 28394 }, { "epoch": 2.8868442456283043, "grad_norm": 0.2776070535182953, "learning_rate": 4.523049763943569e-06, "loss": 0.3463, "step": 28395 }, { "epoch": 2.8869459129727533, "grad_norm": 0.25793173909187317, "learning_rate": 4.52269649745459e-06, "loss": 0.346, "step": 28396 }, { "epoch": 2.887047580317202, "grad_norm": 0.2711544632911682, "learning_rate": 4.522343233370154e-06, "loss": 0.3421, "step": 28397 }, { "epoch": 2.887149247661651, "grad_norm": 0.28386175632476807, "learning_rate": 4.521989971692038e-06, "loss": 0.3071, "step": 28398 }, { "epoch": 2.8872509150061, "grad_norm": 0.2824304401874542, "learning_rate": 4.521636712422022e-06, "loss": 0.3642, "step": 28399 }, { "epoch": 2.887352582350549, "grad_norm": 0.2740178108215332, "learning_rate": 4.52128345556189e-06, "loss": 0.3428, "step": 28400 }, { "epoch": 2.887454249694998, "grad_norm": 0.2555842995643616, "learning_rate": 4.520930201113416e-06, "loss": 0.3132, "step": 28401 }, { "epoch": 2.887555917039447, "grad_norm": 0.2488541305065155, "learning_rate": 4.520576949078382e-06, "loss": 0.3217, "step": 28402 }, { "epoch": 2.887657584383896, "grad_norm": 0.2707156836986542, "learning_rate": 4.520223699458568e-06, "loss": 0.3525, "step": 28403 }, { "epoch": 2.887759251728345, "grad_norm": 0.2507602274417877, "learning_rate": 4.519870452255751e-06, "loss": 0.3188, "step": 28404 }, { "epoch": 2.8878609190727937, "grad_norm": 0.24571067094802856, "learning_rate": 4.519517207471715e-06, "loss": 0.3469, "step": 28405 }, { "epoch": 2.8879625864172427, "grad_norm": 0.2677720785140991, "learning_rate": 4.519163965108236e-06, "loss": 0.3411, "step": 28406 }, { "epoch": 2.8880642537616916, "grad_norm": 0.2761509418487549, "learning_rate": 4.5188107251670956e-06, "loss": 0.3124, "step": 28407 }, { "epoch": 2.8881659211061406, "grad_norm": 0.24609820544719696, "learning_rate": 4.518457487650071e-06, "loss": 0.2941, "step": 28408 }, { "epoch": 2.8882675884505895, "grad_norm": 0.2680305540561676, "learning_rate": 4.518104252558943e-06, "loss": 0.3239, "step": 28409 }, { "epoch": 2.8883692557950384, "grad_norm": 0.27651089429855347, "learning_rate": 4.517751019895493e-06, "loss": 0.3033, "step": 28410 }, { "epoch": 2.8884709231394874, "grad_norm": 0.27506759762763977, "learning_rate": 4.517397789661498e-06, "loss": 0.2919, "step": 28411 }, { "epoch": 2.8885725904839363, "grad_norm": 0.25211775302886963, "learning_rate": 4.517044561858738e-06, "loss": 0.346, "step": 28412 }, { "epoch": 2.8886742578283853, "grad_norm": 0.25305911898612976, "learning_rate": 4.516691336488992e-06, "loss": 0.3427, "step": 28413 }, { "epoch": 2.888775925172834, "grad_norm": 0.3027682602405548, "learning_rate": 4.516338113554042e-06, "loss": 0.3553, "step": 28414 }, { "epoch": 2.8888775925172836, "grad_norm": 0.29092586040496826, "learning_rate": 4.515984893055664e-06, "loss": 0.335, "step": 28415 }, { "epoch": 2.8889792598617325, "grad_norm": 0.273182213306427, "learning_rate": 4.51563167499564e-06, "loss": 0.303, "step": 28416 }, { "epoch": 2.8890809272061815, "grad_norm": 0.2497500330209732, "learning_rate": 4.5152784593757486e-06, "loss": 0.3329, "step": 28417 }, { "epoch": 2.8891825945506304, "grad_norm": 0.2551760673522949, "learning_rate": 4.514925246197769e-06, "loss": 0.3438, "step": 28418 }, { "epoch": 2.8892842618950794, "grad_norm": 0.28088462352752686, "learning_rate": 4.514572035463479e-06, "loss": 0.3235, "step": 28419 }, { "epoch": 2.8893859292395283, "grad_norm": 0.25945988297462463, "learning_rate": 4.514218827174662e-06, "loss": 0.3344, "step": 28420 }, { "epoch": 2.8894875965839772, "grad_norm": 0.2565089762210846, "learning_rate": 4.513865621333095e-06, "loss": 0.3343, "step": 28421 }, { "epoch": 2.889589263928426, "grad_norm": 0.26518234610557556, "learning_rate": 4.513512417940557e-06, "loss": 0.3002, "step": 28422 }, { "epoch": 2.889690931272875, "grad_norm": 0.2666144073009491, "learning_rate": 4.513159216998827e-06, "loss": 0.3428, "step": 28423 }, { "epoch": 2.889792598617324, "grad_norm": 0.26413699984550476, "learning_rate": 4.512806018509686e-06, "loss": 0.3064, "step": 28424 }, { "epoch": 2.889894265961773, "grad_norm": 0.26808303594589233, "learning_rate": 4.5124528224749125e-06, "loss": 0.3471, "step": 28425 }, { "epoch": 2.889995933306222, "grad_norm": 0.2786409556865692, "learning_rate": 4.512099628896286e-06, "loss": 0.3855, "step": 28426 }, { "epoch": 2.890097600650671, "grad_norm": 0.265358030796051, "learning_rate": 4.511746437775585e-06, "loss": 0.3198, "step": 28427 }, { "epoch": 2.89019926799512, "grad_norm": 0.2726244330406189, "learning_rate": 4.511393249114589e-06, "loss": 0.3017, "step": 28428 }, { "epoch": 2.8903009353395688, "grad_norm": 0.2399720698595047, "learning_rate": 4.511040062915079e-06, "loss": 0.3446, "step": 28429 }, { "epoch": 2.890402602684018, "grad_norm": 0.27023735642433167, "learning_rate": 4.510686879178833e-06, "loss": 0.321, "step": 28430 }, { "epoch": 2.890504270028467, "grad_norm": 0.2689218819141388, "learning_rate": 4.510333697907628e-06, "loss": 0.325, "step": 28431 }, { "epoch": 2.890605937372916, "grad_norm": 0.25003933906555176, "learning_rate": 4.509980519103248e-06, "loss": 0.3124, "step": 28432 }, { "epoch": 2.890707604717365, "grad_norm": 0.25918808579444885, "learning_rate": 4.509627342767469e-06, "loss": 0.3176, "step": 28433 }, { "epoch": 2.890809272061814, "grad_norm": 0.27195364236831665, "learning_rate": 4.50927416890207e-06, "loss": 0.333, "step": 28434 }, { "epoch": 2.890910939406263, "grad_norm": 0.2731674015522003, "learning_rate": 4.508920997508832e-06, "loss": 0.3477, "step": 28435 }, { "epoch": 2.891012606750712, "grad_norm": 0.2657046616077423, "learning_rate": 4.5085678285895325e-06, "loss": 0.3025, "step": 28436 }, { "epoch": 2.8911142740951608, "grad_norm": 0.2524983882904053, "learning_rate": 4.5082146621459534e-06, "loss": 0.3067, "step": 28437 }, { "epoch": 2.8912159414396097, "grad_norm": 0.248393714427948, "learning_rate": 4.50786149817987e-06, "loss": 0.3606, "step": 28438 }, { "epoch": 2.8913176087840586, "grad_norm": 0.26294636726379395, "learning_rate": 4.507508336693064e-06, "loss": 0.3246, "step": 28439 }, { "epoch": 2.8914192761285076, "grad_norm": 0.25619468092918396, "learning_rate": 4.507155177687315e-06, "loss": 0.3331, "step": 28440 }, { "epoch": 2.8915209434729565, "grad_norm": 0.2523435950279236, "learning_rate": 4.506802021164399e-06, "loss": 0.3097, "step": 28441 }, { "epoch": 2.8916226108174055, "grad_norm": 0.253252774477005, "learning_rate": 4.5064488671261e-06, "loss": 0.3497, "step": 28442 }, { "epoch": 2.8917242781618544, "grad_norm": 0.2612999379634857, "learning_rate": 4.5060957155741915e-06, "loss": 0.3841, "step": 28443 }, { "epoch": 2.8918259455063033, "grad_norm": 0.25111231207847595, "learning_rate": 4.505742566510456e-06, "loss": 0.3336, "step": 28444 }, { "epoch": 2.8919276128507523, "grad_norm": 0.27208203077316284, "learning_rate": 4.5053894199366735e-06, "loss": 0.303, "step": 28445 }, { "epoch": 2.8920292801952012, "grad_norm": 0.25131767988204956, "learning_rate": 4.50503627585462e-06, "loss": 0.3354, "step": 28446 }, { "epoch": 2.89213094753965, "grad_norm": 0.2819660007953644, "learning_rate": 4.504683134266078e-06, "loss": 0.3338, "step": 28447 }, { "epoch": 2.892232614884099, "grad_norm": 0.2654416561126709, "learning_rate": 4.504329995172823e-06, "loss": 0.3313, "step": 28448 }, { "epoch": 2.892334282228548, "grad_norm": 0.25568124651908875, "learning_rate": 4.503976858576636e-06, "loss": 0.3415, "step": 28449 }, { "epoch": 2.892435949572997, "grad_norm": 0.27194759249687195, "learning_rate": 4.503623724479297e-06, "loss": 0.3171, "step": 28450 }, { "epoch": 2.892537616917446, "grad_norm": 0.26816973090171814, "learning_rate": 4.503270592882582e-06, "loss": 0.3365, "step": 28451 }, { "epoch": 2.892639284261895, "grad_norm": 0.24518951773643494, "learning_rate": 4.502917463788273e-06, "loss": 0.3301, "step": 28452 }, { "epoch": 2.892740951606344, "grad_norm": 0.2876378893852234, "learning_rate": 4.502564337198146e-06, "loss": 0.3409, "step": 28453 }, { "epoch": 2.8928426189507928, "grad_norm": 0.26874110102653503, "learning_rate": 4.5022112131139825e-06, "loss": 0.3512, "step": 28454 }, { "epoch": 2.8929442862952417, "grad_norm": 0.29950782656669617, "learning_rate": 4.501858091537561e-06, "loss": 0.3259, "step": 28455 }, { "epoch": 2.893045953639691, "grad_norm": 0.2661830484867096, "learning_rate": 4.50150497247066e-06, "loss": 0.3596, "step": 28456 }, { "epoch": 2.89314762098414, "grad_norm": 0.23651480674743652, "learning_rate": 4.501151855915059e-06, "loss": 0.3109, "step": 28457 }, { "epoch": 2.893249288328589, "grad_norm": 0.2734817862510681, "learning_rate": 4.500798741872535e-06, "loss": 0.3421, "step": 28458 }, { "epoch": 2.893350955673038, "grad_norm": 0.24826447665691376, "learning_rate": 4.500445630344869e-06, "loss": 0.331, "step": 28459 }, { "epoch": 2.893452623017487, "grad_norm": 0.25388845801353455, "learning_rate": 4.500092521333839e-06, "loss": 0.3284, "step": 28460 }, { "epoch": 2.893554290361936, "grad_norm": 0.28054583072662354, "learning_rate": 4.499739414841224e-06, "loss": 0.3134, "step": 28461 }, { "epoch": 2.8936559577063847, "grad_norm": 0.27670592069625854, "learning_rate": 4.499386310868804e-06, "loss": 0.3255, "step": 28462 }, { "epoch": 2.8937576250508337, "grad_norm": 0.28078892827033997, "learning_rate": 4.499033209418356e-06, "loss": 0.321, "step": 28463 }, { "epoch": 2.8938592923952826, "grad_norm": 0.2581925690174103, "learning_rate": 4.49868011049166e-06, "loss": 0.3273, "step": 28464 }, { "epoch": 2.8939609597397316, "grad_norm": 0.27267900109291077, "learning_rate": 4.498327014090494e-06, "loss": 0.348, "step": 28465 }, { "epoch": 2.8940626270841805, "grad_norm": 0.2771875262260437, "learning_rate": 4.497973920216637e-06, "loss": 0.3529, "step": 28466 }, { "epoch": 2.8941642944286294, "grad_norm": 0.27090561389923096, "learning_rate": 4.497620828871869e-06, "loss": 0.3482, "step": 28467 }, { "epoch": 2.8942659617730784, "grad_norm": 0.25990527868270874, "learning_rate": 4.497267740057967e-06, "loss": 0.3222, "step": 28468 }, { "epoch": 2.8943676291175273, "grad_norm": 0.26371392607688904, "learning_rate": 4.496914653776712e-06, "loss": 0.3643, "step": 28469 }, { "epoch": 2.8944692964619763, "grad_norm": 0.25915589928627014, "learning_rate": 4.49656157002988e-06, "loss": 0.3551, "step": 28470 }, { "epoch": 2.8945709638064256, "grad_norm": 0.28483644127845764, "learning_rate": 4.496208488819252e-06, "loss": 0.3459, "step": 28471 }, { "epoch": 2.8946726311508746, "grad_norm": 0.2625872790813446, "learning_rate": 4.4958554101466056e-06, "loss": 0.322, "step": 28472 }, { "epoch": 2.8947742984953235, "grad_norm": 0.2556951344013214, "learning_rate": 4.495502334013719e-06, "loss": 0.3415, "step": 28473 }, { "epoch": 2.8948759658397725, "grad_norm": 0.27439993619918823, "learning_rate": 4.495149260422374e-06, "loss": 0.3114, "step": 28474 }, { "epoch": 2.8949776331842214, "grad_norm": 0.25565430521965027, "learning_rate": 4.4947961893743456e-06, "loss": 0.3263, "step": 28475 }, { "epoch": 2.8950793005286704, "grad_norm": 0.268041729927063, "learning_rate": 4.4944431208714125e-06, "loss": 0.3425, "step": 28476 }, { "epoch": 2.8951809678731193, "grad_norm": 0.2588825523853302, "learning_rate": 4.494090054915358e-06, "loss": 0.3384, "step": 28477 }, { "epoch": 2.8952826352175682, "grad_norm": 0.2620672881603241, "learning_rate": 4.4937369915079545e-06, "loss": 0.3296, "step": 28478 }, { "epoch": 2.895384302562017, "grad_norm": 0.24904441833496094, "learning_rate": 4.493383930650987e-06, "loss": 0.3099, "step": 28479 }, { "epoch": 2.895485969906466, "grad_norm": 0.2698703408241272, "learning_rate": 4.493030872346228e-06, "loss": 0.34, "step": 28480 }, { "epoch": 2.895587637250915, "grad_norm": 0.267634779214859, "learning_rate": 4.492677816595459e-06, "loss": 0.328, "step": 28481 }, { "epoch": 2.895689304595364, "grad_norm": 0.25895392894744873, "learning_rate": 4.4923247634004606e-06, "loss": 0.3421, "step": 28482 }, { "epoch": 2.895790971939813, "grad_norm": 0.2508528232574463, "learning_rate": 4.491971712763008e-06, "loss": 0.3143, "step": 28483 }, { "epoch": 2.895892639284262, "grad_norm": 0.2545403242111206, "learning_rate": 4.491618664684884e-06, "loss": 0.3531, "step": 28484 }, { "epoch": 2.895994306628711, "grad_norm": 0.2661629021167755, "learning_rate": 4.49126561916786e-06, "loss": 0.3205, "step": 28485 }, { "epoch": 2.8960959739731598, "grad_norm": 0.2742012143135071, "learning_rate": 4.49091257621372e-06, "loss": 0.3198, "step": 28486 }, { "epoch": 2.8961976413176087, "grad_norm": 0.2628095746040344, "learning_rate": 4.490559535824243e-06, "loss": 0.3293, "step": 28487 }, { "epoch": 2.8962993086620576, "grad_norm": 0.2848503887653351, "learning_rate": 4.490206498001205e-06, "loss": 0.3314, "step": 28488 }, { "epoch": 2.8964009760065066, "grad_norm": 0.2710370421409607, "learning_rate": 4.489853462746388e-06, "loss": 0.334, "step": 28489 }, { "epoch": 2.8965026433509555, "grad_norm": 0.2563093900680542, "learning_rate": 4.489500430061564e-06, "loss": 0.3257, "step": 28490 }, { "epoch": 2.8966043106954045, "grad_norm": 0.2561514377593994, "learning_rate": 4.489147399948517e-06, "loss": 0.3323, "step": 28491 }, { "epoch": 2.8967059780398534, "grad_norm": 0.2619606554508209, "learning_rate": 4.488794372409025e-06, "loss": 0.34, "step": 28492 }, { "epoch": 2.8968076453843024, "grad_norm": 0.24613279104232788, "learning_rate": 4.4884413474448645e-06, "loss": 0.3073, "step": 28493 }, { "epoch": 2.8969093127287513, "grad_norm": 0.26563483476638794, "learning_rate": 4.488088325057817e-06, "loss": 0.3318, "step": 28494 }, { "epoch": 2.8970109800732002, "grad_norm": 0.25758421421051025, "learning_rate": 4.487735305249656e-06, "loss": 0.3141, "step": 28495 }, { "epoch": 2.897112647417649, "grad_norm": 0.25282782316207886, "learning_rate": 4.487382288022163e-06, "loss": 0.3102, "step": 28496 }, { "epoch": 2.8972143147620986, "grad_norm": 0.2547301650047302, "learning_rate": 4.48702927337712e-06, "loss": 0.3141, "step": 28497 }, { "epoch": 2.8973159821065475, "grad_norm": 0.25317472219467163, "learning_rate": 4.4866762613162964e-06, "loss": 0.3176, "step": 28498 }, { "epoch": 2.8974176494509964, "grad_norm": 0.2484603077173233, "learning_rate": 4.48632325184148e-06, "loss": 0.34, "step": 28499 }, { "epoch": 2.8975193167954454, "grad_norm": 0.2781256139278412, "learning_rate": 4.485970244954442e-06, "loss": 0.3517, "step": 28500 }, { "epoch": 2.8976209841398943, "grad_norm": 0.24169228971004486, "learning_rate": 4.485617240656964e-06, "loss": 0.3336, "step": 28501 }, { "epoch": 2.8977226514843433, "grad_norm": 0.280643105506897, "learning_rate": 4.485264238950827e-06, "loss": 0.3165, "step": 28502 }, { "epoch": 2.897824318828792, "grad_norm": 0.25956907868385315, "learning_rate": 4.484911239837803e-06, "loss": 0.3139, "step": 28503 }, { "epoch": 2.897925986173241, "grad_norm": 0.25899580121040344, "learning_rate": 4.4845582433196775e-06, "loss": 0.3306, "step": 28504 }, { "epoch": 2.89802765351769, "grad_norm": 0.2606015205383301, "learning_rate": 4.484205249398221e-06, "loss": 0.3297, "step": 28505 }, { "epoch": 2.898129320862139, "grad_norm": 0.2318093627691269, "learning_rate": 4.483852258075217e-06, "loss": 0.3401, "step": 28506 }, { "epoch": 2.898230988206588, "grad_norm": 0.268673837184906, "learning_rate": 4.4834992693524455e-06, "loss": 0.3455, "step": 28507 }, { "epoch": 2.898332655551037, "grad_norm": 0.2671732008457184, "learning_rate": 4.483146283231678e-06, "loss": 0.336, "step": 28508 }, { "epoch": 2.898434322895486, "grad_norm": 0.2624552845954895, "learning_rate": 4.4827932997147e-06, "loss": 0.3164, "step": 28509 }, { "epoch": 2.898535990239935, "grad_norm": 0.271045982837677, "learning_rate": 4.482440318803284e-06, "loss": 0.3372, "step": 28510 }, { "epoch": 2.8986376575843837, "grad_norm": 0.2668929696083069, "learning_rate": 4.482087340499211e-06, "loss": 0.3519, "step": 28511 }, { "epoch": 2.898739324928833, "grad_norm": 0.2757885456085205, "learning_rate": 4.481734364804261e-06, "loss": 0.317, "step": 28512 }, { "epoch": 2.898840992273282, "grad_norm": 0.25040990114212036, "learning_rate": 4.4813813917202075e-06, "loss": 0.3275, "step": 28513 }, { "epoch": 2.898942659617731, "grad_norm": 0.2525652050971985, "learning_rate": 4.481028421248834e-06, "loss": 0.3166, "step": 28514 }, { "epoch": 2.89904432696218, "grad_norm": 0.29591187834739685, "learning_rate": 4.480675453391914e-06, "loss": 0.3395, "step": 28515 }, { "epoch": 2.899145994306629, "grad_norm": 0.25001272559165955, "learning_rate": 4.480322488151227e-06, "loss": 0.3158, "step": 28516 }, { "epoch": 2.899247661651078, "grad_norm": 0.2448795586824417, "learning_rate": 4.479969525528555e-06, "loss": 0.3334, "step": 28517 }, { "epoch": 2.8993493289955268, "grad_norm": 0.2737179398536682, "learning_rate": 4.479616565525669e-06, "loss": 0.3256, "step": 28518 }, { "epoch": 2.8994509963399757, "grad_norm": 0.2608430087566376, "learning_rate": 4.4792636081443545e-06, "loss": 0.3124, "step": 28519 }, { "epoch": 2.8995526636844247, "grad_norm": 0.2649928629398346, "learning_rate": 4.478910653386384e-06, "loss": 0.3142, "step": 28520 }, { "epoch": 2.8996543310288736, "grad_norm": 0.23900987207889557, "learning_rate": 4.478557701253538e-06, "loss": 0.2936, "step": 28521 }, { "epoch": 2.8997559983733225, "grad_norm": 0.25535890460014343, "learning_rate": 4.478204751747596e-06, "loss": 0.3164, "step": 28522 }, { "epoch": 2.8998576657177715, "grad_norm": 0.30070993304252625, "learning_rate": 4.4778518048703315e-06, "loss": 0.364, "step": 28523 }, { "epoch": 2.8999593330622204, "grad_norm": 0.2806788980960846, "learning_rate": 4.477498860623529e-06, "loss": 0.3419, "step": 28524 }, { "epoch": 2.9000610004066694, "grad_norm": 0.2765575647354126, "learning_rate": 4.477145919008961e-06, "loss": 0.3375, "step": 28525 }, { "epoch": 2.9001626677511183, "grad_norm": 0.2594060003757477, "learning_rate": 4.4767929800284065e-06, "loss": 0.3039, "step": 28526 }, { "epoch": 2.9002643350955672, "grad_norm": 0.27855128049850464, "learning_rate": 4.476440043683648e-06, "loss": 0.3441, "step": 28527 }, { "epoch": 2.900366002440016, "grad_norm": 0.29176831245422363, "learning_rate": 4.476087109976457e-06, "loss": 0.3128, "step": 28528 }, { "epoch": 2.900467669784465, "grad_norm": 0.2595626711845398, "learning_rate": 4.475734178908618e-06, "loss": 0.2943, "step": 28529 }, { "epoch": 2.900569337128914, "grad_norm": 0.24863123893737793, "learning_rate": 4.475381250481902e-06, "loss": 0.3428, "step": 28530 }, { "epoch": 2.900671004473363, "grad_norm": 0.26888859272003174, "learning_rate": 4.475028324698092e-06, "loss": 0.3549, "step": 28531 }, { "epoch": 2.900772671817812, "grad_norm": 0.24712508916854858, "learning_rate": 4.474675401558966e-06, "loss": 0.3045, "step": 28532 }, { "epoch": 2.900874339162261, "grad_norm": 0.27256354689598083, "learning_rate": 4.4743224810662975e-06, "loss": 0.3376, "step": 28533 }, { "epoch": 2.90097600650671, "grad_norm": 0.25734585523605347, "learning_rate": 4.473969563221871e-06, "loss": 0.3265, "step": 28534 }, { "epoch": 2.901077673851159, "grad_norm": 0.2539040744304657, "learning_rate": 4.473616648027458e-06, "loss": 0.334, "step": 28535 }, { "epoch": 2.9011793411956077, "grad_norm": 0.2657201886177063, "learning_rate": 4.473263735484839e-06, "loss": 0.3185, "step": 28536 }, { "epoch": 2.9012810085400567, "grad_norm": 0.26147133111953735, "learning_rate": 4.472910825595795e-06, "loss": 0.3052, "step": 28537 }, { "epoch": 2.901382675884506, "grad_norm": 0.27631476521492004, "learning_rate": 4.472557918362099e-06, "loss": 0.3502, "step": 28538 }, { "epoch": 2.901484343228955, "grad_norm": 0.26579639315605164, "learning_rate": 4.472205013785532e-06, "loss": 0.3305, "step": 28539 }, { "epoch": 2.901586010573404, "grad_norm": 0.2573644816875458, "learning_rate": 4.471852111867869e-06, "loss": 0.3316, "step": 28540 }, { "epoch": 2.901687677917853, "grad_norm": 0.2648557424545288, "learning_rate": 4.471499212610889e-06, "loss": 0.3037, "step": 28541 }, { "epoch": 2.901789345262302, "grad_norm": 0.26107290387153625, "learning_rate": 4.471146316016374e-06, "loss": 0.3334, "step": 28542 }, { "epoch": 2.9018910126067508, "grad_norm": 0.27864712476730347, "learning_rate": 4.4707934220860955e-06, "loss": 0.3501, "step": 28543 }, { "epoch": 2.9019926799511997, "grad_norm": 0.2741071879863739, "learning_rate": 4.470440530821835e-06, "loss": 0.3315, "step": 28544 }, { "epoch": 2.9020943472956486, "grad_norm": 0.25542205572128296, "learning_rate": 4.470087642225369e-06, "loss": 0.3635, "step": 28545 }, { "epoch": 2.9021960146400976, "grad_norm": 0.2664588391780853, "learning_rate": 4.469734756298473e-06, "loss": 0.2905, "step": 28546 }, { "epoch": 2.9022976819845465, "grad_norm": 0.27134111523628235, "learning_rate": 4.469381873042932e-06, "loss": 0.3253, "step": 28547 }, { "epoch": 2.9023993493289955, "grad_norm": 0.27911999821662903, "learning_rate": 4.469028992460516e-06, "loss": 0.3616, "step": 28548 }, { "epoch": 2.9025010166734444, "grad_norm": 0.2608094811439514, "learning_rate": 4.468676114553006e-06, "loss": 0.345, "step": 28549 }, { "epoch": 2.9026026840178933, "grad_norm": 0.2634391784667969, "learning_rate": 4.46832323932218e-06, "loss": 0.3302, "step": 28550 }, { "epoch": 2.9027043513623423, "grad_norm": 0.2555156350135803, "learning_rate": 4.467970366769814e-06, "loss": 0.3249, "step": 28551 }, { "epoch": 2.9028060187067912, "grad_norm": 0.2869417071342468, "learning_rate": 4.467617496897689e-06, "loss": 0.3291, "step": 28552 }, { "epoch": 2.9029076860512406, "grad_norm": 0.2425195723772049, "learning_rate": 4.46726462970758e-06, "loss": 0.3052, "step": 28553 }, { "epoch": 2.9030093533956896, "grad_norm": 0.26991528272628784, "learning_rate": 4.466911765201265e-06, "loss": 0.3155, "step": 28554 }, { "epoch": 2.9031110207401385, "grad_norm": 0.2632693946361542, "learning_rate": 4.4665589033805205e-06, "loss": 0.307, "step": 28555 }, { "epoch": 2.9032126880845874, "grad_norm": 0.2539660632610321, "learning_rate": 4.466206044247126e-06, "loss": 0.3106, "step": 28556 }, { "epoch": 2.9033143554290364, "grad_norm": 0.2962973117828369, "learning_rate": 4.46585318780286e-06, "loss": 0.3586, "step": 28557 }, { "epoch": 2.9034160227734853, "grad_norm": 0.2806006371974945, "learning_rate": 4.465500334049498e-06, "loss": 0.3442, "step": 28558 }, { "epoch": 2.9035176901179343, "grad_norm": 0.24837303161621094, "learning_rate": 4.465147482988818e-06, "loss": 0.3278, "step": 28559 }, { "epoch": 2.903619357462383, "grad_norm": 0.2677721381187439, "learning_rate": 4.464794634622598e-06, "loss": 0.3104, "step": 28560 }, { "epoch": 2.903721024806832, "grad_norm": 0.2703692317008972, "learning_rate": 4.464441788952615e-06, "loss": 0.3449, "step": 28561 }, { "epoch": 2.903822692151281, "grad_norm": 0.2856076657772064, "learning_rate": 4.4640889459806475e-06, "loss": 0.3153, "step": 28562 }, { "epoch": 2.90392435949573, "grad_norm": 0.27643534541130066, "learning_rate": 4.463736105708472e-06, "loss": 0.335, "step": 28563 }, { "epoch": 2.904026026840179, "grad_norm": 0.2708924114704132, "learning_rate": 4.4633832681378675e-06, "loss": 0.3073, "step": 28564 }, { "epoch": 2.904127694184628, "grad_norm": 0.2684551477432251, "learning_rate": 4.46303043327061e-06, "loss": 0.3213, "step": 28565 }, { "epoch": 2.904229361529077, "grad_norm": 0.2607196569442749, "learning_rate": 4.462677601108477e-06, "loss": 0.3158, "step": 28566 }, { "epoch": 2.904331028873526, "grad_norm": 0.2686963975429535, "learning_rate": 4.462324771653248e-06, "loss": 0.3261, "step": 28567 }, { "epoch": 2.9044326962179747, "grad_norm": 0.2707406282424927, "learning_rate": 4.461971944906698e-06, "loss": 0.3468, "step": 28568 }, { "epoch": 2.9045343635624237, "grad_norm": 0.24304188787937164, "learning_rate": 4.461619120870606e-06, "loss": 0.3211, "step": 28569 }, { "epoch": 2.9046360309068726, "grad_norm": 0.25052398443222046, "learning_rate": 4.4612662995467485e-06, "loss": 0.3535, "step": 28570 }, { "epoch": 2.9047376982513216, "grad_norm": 0.26327186822891235, "learning_rate": 4.460913480936903e-06, "loss": 0.2995, "step": 28571 }, { "epoch": 2.9048393655957705, "grad_norm": 0.2553645074367523, "learning_rate": 4.460560665042848e-06, "loss": 0.294, "step": 28572 }, { "epoch": 2.9049410329402194, "grad_norm": 0.2739965319633484, "learning_rate": 4.4602078518663596e-06, "loss": 0.3117, "step": 28573 }, { "epoch": 2.9050427002846684, "grad_norm": 0.2590569257736206, "learning_rate": 4.459855041409217e-06, "loss": 0.3042, "step": 28574 }, { "epoch": 2.9051443676291173, "grad_norm": 0.2697213888168335, "learning_rate": 4.459502233673195e-06, "loss": 0.3135, "step": 28575 }, { "epoch": 2.9052460349735663, "grad_norm": 0.25858983397483826, "learning_rate": 4.459149428660072e-06, "loss": 0.3357, "step": 28576 }, { "epoch": 2.905347702318015, "grad_norm": 0.2781774699687958, "learning_rate": 4.458796626371628e-06, "loss": 0.3357, "step": 28577 }, { "epoch": 2.905449369662464, "grad_norm": 0.2623136639595032, "learning_rate": 4.4584438268096355e-06, "loss": 0.303, "step": 28578 }, { "epoch": 2.9055510370069135, "grad_norm": 0.27554014325141907, "learning_rate": 4.458091029975875e-06, "loss": 0.3287, "step": 28579 }, { "epoch": 2.9056527043513625, "grad_norm": 0.2863882780075073, "learning_rate": 4.457738235872124e-06, "loss": 0.3439, "step": 28580 }, { "epoch": 2.9057543716958114, "grad_norm": 0.27006813883781433, "learning_rate": 4.457385444500158e-06, "loss": 0.3581, "step": 28581 }, { "epoch": 2.9058560390402604, "grad_norm": 0.25649383664131165, "learning_rate": 4.4570326558617555e-06, "loss": 0.3195, "step": 28582 }, { "epoch": 2.9059577063847093, "grad_norm": 0.24500003457069397, "learning_rate": 4.456679869958693e-06, "loss": 0.3525, "step": 28583 }, { "epoch": 2.9060593737291582, "grad_norm": 0.2848743796348572, "learning_rate": 4.456327086792749e-06, "loss": 0.3567, "step": 28584 }, { "epoch": 2.906161041073607, "grad_norm": 0.25327426195144653, "learning_rate": 4.455974306365699e-06, "loss": 0.3207, "step": 28585 }, { "epoch": 2.906262708418056, "grad_norm": 0.26138219237327576, "learning_rate": 4.455621528679322e-06, "loss": 0.3193, "step": 28586 }, { "epoch": 2.906364375762505, "grad_norm": 0.2706433832645416, "learning_rate": 4.455268753735395e-06, "loss": 0.3403, "step": 28587 }, { "epoch": 2.906466043106954, "grad_norm": 0.28418511152267456, "learning_rate": 4.454915981535692e-06, "loss": 0.3575, "step": 28588 }, { "epoch": 2.906567710451403, "grad_norm": 0.2632211148738861, "learning_rate": 4.454563212081995e-06, "loss": 0.3353, "step": 28589 }, { "epoch": 2.906669377795852, "grad_norm": 0.26349061727523804, "learning_rate": 4.4542104453760785e-06, "loss": 0.3372, "step": 28590 }, { "epoch": 2.906771045140301, "grad_norm": 0.26481208205223083, "learning_rate": 4.453857681419719e-06, "loss": 0.3373, "step": 28591 }, { "epoch": 2.9068727124847498, "grad_norm": 0.27786895632743835, "learning_rate": 4.4535049202146965e-06, "loss": 0.3429, "step": 28592 }, { "epoch": 2.9069743798291987, "grad_norm": 0.2653968036174774, "learning_rate": 4.453152161762785e-06, "loss": 0.3025, "step": 28593 }, { "epoch": 2.907076047173648, "grad_norm": 0.2795407772064209, "learning_rate": 4.452799406065764e-06, "loss": 0.3535, "step": 28594 }, { "epoch": 2.907177714518097, "grad_norm": 0.26538875699043274, "learning_rate": 4.452446653125409e-06, "loss": 0.3347, "step": 28595 }, { "epoch": 2.907279381862546, "grad_norm": 0.250618040561676, "learning_rate": 4.452093902943497e-06, "loss": 0.3501, "step": 28596 }, { "epoch": 2.907381049206995, "grad_norm": 0.28283894062042236, "learning_rate": 4.451741155521807e-06, "loss": 0.3259, "step": 28597 }, { "epoch": 2.907482716551444, "grad_norm": 0.28168678283691406, "learning_rate": 4.451388410862113e-06, "loss": 0.2953, "step": 28598 }, { "epoch": 2.907584383895893, "grad_norm": 0.2695712447166443, "learning_rate": 4.451035668966196e-06, "loss": 0.3406, "step": 28599 }, { "epoch": 2.9076860512403417, "grad_norm": 0.27944740653038025, "learning_rate": 4.450682929835829e-06, "loss": 0.3481, "step": 28600 }, { "epoch": 2.9077877185847907, "grad_norm": 0.2676016688346863, "learning_rate": 4.450330193472792e-06, "loss": 0.3364, "step": 28601 }, { "epoch": 2.9078893859292396, "grad_norm": 0.2594986855983734, "learning_rate": 4.4499774598788605e-06, "loss": 0.366, "step": 28602 }, { "epoch": 2.9079910532736886, "grad_norm": 0.26265114545822144, "learning_rate": 4.449624729055812e-06, "loss": 0.361, "step": 28603 }, { "epoch": 2.9080927206181375, "grad_norm": 0.27936220169067383, "learning_rate": 4.449272001005423e-06, "loss": 0.3555, "step": 28604 }, { "epoch": 2.9081943879625864, "grad_norm": 0.2621780037879944, "learning_rate": 4.448919275729471e-06, "loss": 0.3146, "step": 28605 }, { "epoch": 2.9082960553070354, "grad_norm": 0.26739129424095154, "learning_rate": 4.448566553229732e-06, "loss": 0.3541, "step": 28606 }, { "epoch": 2.9083977226514843, "grad_norm": 0.2558782398700714, "learning_rate": 4.448213833507985e-06, "loss": 0.3147, "step": 28607 }, { "epoch": 2.9084993899959333, "grad_norm": 0.29640913009643555, "learning_rate": 4.4478611165660046e-06, "loss": 0.3112, "step": 28608 }, { "epoch": 2.908601057340382, "grad_norm": 0.26703858375549316, "learning_rate": 4.4475084024055695e-06, "loss": 0.3008, "step": 28609 }, { "epoch": 2.908702724684831, "grad_norm": 0.278090238571167, "learning_rate": 4.447155691028455e-06, "loss": 0.375, "step": 28610 }, { "epoch": 2.90880439202928, "grad_norm": 0.2631348967552185, "learning_rate": 4.446802982436438e-06, "loss": 0.3269, "step": 28611 }, { "epoch": 2.908906059373729, "grad_norm": 0.28116434812545776, "learning_rate": 4.446450276631298e-06, "loss": 0.3407, "step": 28612 }, { "epoch": 2.909007726718178, "grad_norm": 0.2556265592575073, "learning_rate": 4.446097573614809e-06, "loss": 0.3518, "step": 28613 }, { "epoch": 2.909109394062627, "grad_norm": 0.2673065960407257, "learning_rate": 4.445744873388749e-06, "loss": 0.3105, "step": 28614 }, { "epoch": 2.909211061407076, "grad_norm": 0.28639158606529236, "learning_rate": 4.445392175954895e-06, "loss": 0.3338, "step": 28615 }, { "epoch": 2.909312728751525, "grad_norm": 0.2724461555480957, "learning_rate": 4.445039481315022e-06, "loss": 0.356, "step": 28616 }, { "epoch": 2.9094143960959737, "grad_norm": 0.24332763254642487, "learning_rate": 4.44468678947091e-06, "loss": 0.3443, "step": 28617 }, { "epoch": 2.9095160634404227, "grad_norm": 0.2733975648880005, "learning_rate": 4.4443341004243326e-06, "loss": 0.3308, "step": 28618 }, { "epoch": 2.9096177307848716, "grad_norm": 0.2598229944705963, "learning_rate": 4.4439814141770695e-06, "loss": 0.3326, "step": 28619 }, { "epoch": 2.909719398129321, "grad_norm": 0.28837883472442627, "learning_rate": 4.443628730730894e-06, "loss": 0.3606, "step": 28620 }, { "epoch": 2.90982106547377, "grad_norm": 0.27878350019454956, "learning_rate": 4.443276050087585e-06, "loss": 0.3216, "step": 28621 }, { "epoch": 2.909922732818219, "grad_norm": 0.2603784203529358, "learning_rate": 4.442923372248921e-06, "loss": 0.3119, "step": 28622 }, { "epoch": 2.910024400162668, "grad_norm": 0.2746363878250122, "learning_rate": 4.442570697216674e-06, "loss": 0.3171, "step": 28623 }, { "epoch": 2.9101260675071168, "grad_norm": 0.2614935636520386, "learning_rate": 4.442218024992626e-06, "loss": 0.3436, "step": 28624 }, { "epoch": 2.9102277348515657, "grad_norm": 0.2667508125305176, "learning_rate": 4.441865355578549e-06, "loss": 0.372, "step": 28625 }, { "epoch": 2.9103294021960147, "grad_norm": 0.2568412721157074, "learning_rate": 4.441512688976222e-06, "loss": 0.293, "step": 28626 }, { "epoch": 2.9104310695404636, "grad_norm": 0.27063092589378357, "learning_rate": 4.441160025187422e-06, "loss": 0.3312, "step": 28627 }, { "epoch": 2.9105327368849125, "grad_norm": 0.25165438652038574, "learning_rate": 4.440807364213925e-06, "loss": 0.3045, "step": 28628 }, { "epoch": 2.9106344042293615, "grad_norm": 0.2537234425544739, "learning_rate": 4.4404547060575075e-06, "loss": 0.3289, "step": 28629 }, { "epoch": 2.9107360715738104, "grad_norm": 0.28047311305999756, "learning_rate": 4.4401020507199455e-06, "loss": 0.3653, "step": 28630 }, { "epoch": 2.9108377389182594, "grad_norm": 0.2622472941875458, "learning_rate": 4.439749398203017e-06, "loss": 0.3581, "step": 28631 }, { "epoch": 2.9109394062627083, "grad_norm": 0.26293960213661194, "learning_rate": 4.439396748508498e-06, "loss": 0.3418, "step": 28632 }, { "epoch": 2.9110410736071572, "grad_norm": 0.2749412953853607, "learning_rate": 4.439044101638165e-06, "loss": 0.3381, "step": 28633 }, { "epoch": 2.911142740951606, "grad_norm": 0.269936740398407, "learning_rate": 4.438691457593795e-06, "loss": 0.326, "step": 28634 }, { "epoch": 2.9112444082960556, "grad_norm": 0.3028711974620819, "learning_rate": 4.438338816377162e-06, "loss": 0.3143, "step": 28635 }, { "epoch": 2.9113460756405045, "grad_norm": 0.2784874141216278, "learning_rate": 4.437986177990046e-06, "loss": 0.315, "step": 28636 }, { "epoch": 2.9114477429849535, "grad_norm": 0.2875812351703644, "learning_rate": 4.437633542434222e-06, "loss": 0.3498, "step": 28637 }, { "epoch": 2.9115494103294024, "grad_norm": 0.2702724039554596, "learning_rate": 4.437280909711467e-06, "loss": 0.3583, "step": 28638 }, { "epoch": 2.9116510776738513, "grad_norm": 0.263171523809433, "learning_rate": 4.436928279823557e-06, "loss": 0.3172, "step": 28639 }, { "epoch": 2.9117527450183003, "grad_norm": 0.2813531458377838, "learning_rate": 4.436575652772267e-06, "loss": 0.331, "step": 28640 }, { "epoch": 2.9118544123627492, "grad_norm": 0.27174991369247437, "learning_rate": 4.436223028559376e-06, "loss": 0.3518, "step": 28641 }, { "epoch": 2.911956079707198, "grad_norm": 0.2507537305355072, "learning_rate": 4.43587040718666e-06, "loss": 0.3352, "step": 28642 }, { "epoch": 2.912057747051647, "grad_norm": 0.26629483699798584, "learning_rate": 4.435517788655894e-06, "loss": 0.3069, "step": 28643 }, { "epoch": 2.912159414396096, "grad_norm": 0.2763417661190033, "learning_rate": 4.435165172968856e-06, "loss": 0.3785, "step": 28644 }, { "epoch": 2.912261081740545, "grad_norm": 0.2759813070297241, "learning_rate": 4.434812560127321e-06, "loss": 0.331, "step": 28645 }, { "epoch": 2.912362749084994, "grad_norm": 0.2565924823284149, "learning_rate": 4.4344599501330674e-06, "loss": 0.3219, "step": 28646 }, { "epoch": 2.912464416429443, "grad_norm": 0.2801082134246826, "learning_rate": 4.434107342987868e-06, "loss": 0.3409, "step": 28647 }, { "epoch": 2.912566083773892, "grad_norm": 0.28649982810020447, "learning_rate": 4.433754738693502e-06, "loss": 0.3466, "step": 28648 }, { "epoch": 2.9126677511183408, "grad_norm": 0.26115143299102783, "learning_rate": 4.433402137251747e-06, "loss": 0.3509, "step": 28649 }, { "epoch": 2.9127694184627897, "grad_norm": 0.28182336688041687, "learning_rate": 4.433049538664374e-06, "loss": 0.3571, "step": 28650 }, { "epoch": 2.9128710858072386, "grad_norm": 0.2901113033294678, "learning_rate": 4.432696942933166e-06, "loss": 0.3458, "step": 28651 }, { "epoch": 2.9129727531516876, "grad_norm": 0.2760675251483917, "learning_rate": 4.432344350059894e-06, "loss": 0.3259, "step": 28652 }, { "epoch": 2.9130744204961365, "grad_norm": 0.256540447473526, "learning_rate": 4.431991760046336e-06, "loss": 0.3588, "step": 28653 }, { "epoch": 2.9131760878405855, "grad_norm": 0.24559134244918823, "learning_rate": 4.4316391728942706e-06, "loss": 0.3682, "step": 28654 }, { "epoch": 2.9132777551850344, "grad_norm": 0.27946335077285767, "learning_rate": 4.431286588605469e-06, "loss": 0.317, "step": 28655 }, { "epoch": 2.9133794225294833, "grad_norm": 0.27523136138916016, "learning_rate": 4.430934007181714e-06, "loss": 0.3072, "step": 28656 }, { "epoch": 2.9134810898739323, "grad_norm": 0.25440940260887146, "learning_rate": 4.430581428624775e-06, "loss": 0.3343, "step": 28657 }, { "epoch": 2.9135827572183812, "grad_norm": 0.2526596784591675, "learning_rate": 4.430228852936432e-06, "loss": 0.3162, "step": 28658 }, { "epoch": 2.91368442456283, "grad_norm": 0.2568284273147583, "learning_rate": 4.429876280118463e-06, "loss": 0.3634, "step": 28659 }, { "epoch": 2.913786091907279, "grad_norm": 0.264067679643631, "learning_rate": 4.429523710172639e-06, "loss": 0.3269, "step": 28660 }, { "epoch": 2.9138877592517285, "grad_norm": 0.2773509621620178, "learning_rate": 4.429171143100742e-06, "loss": 0.3399, "step": 28661 }, { "epoch": 2.9139894265961774, "grad_norm": 0.26037952303886414, "learning_rate": 4.4288185789045415e-06, "loss": 0.3432, "step": 28662 }, { "epoch": 2.9140910939406264, "grad_norm": 0.2603069841861725, "learning_rate": 4.428466017585819e-06, "loss": 0.3295, "step": 28663 }, { "epoch": 2.9141927612850753, "grad_norm": 0.24765419960021973, "learning_rate": 4.42811345914635e-06, "loss": 0.3365, "step": 28664 }, { "epoch": 2.9142944286295243, "grad_norm": 0.24776697158813477, "learning_rate": 4.427760903587907e-06, "loss": 0.349, "step": 28665 }, { "epoch": 2.914396095973973, "grad_norm": 0.26662638783454895, "learning_rate": 4.4274083509122724e-06, "loss": 0.3024, "step": 28666 }, { "epoch": 2.914497763318422, "grad_norm": 0.2734197676181793, "learning_rate": 4.427055801121214e-06, "loss": 0.314, "step": 28667 }, { "epoch": 2.914599430662871, "grad_norm": 0.24945755302906036, "learning_rate": 4.426703254216515e-06, "loss": 0.3249, "step": 28668 }, { "epoch": 2.91470109800732, "grad_norm": 0.2505339980125427, "learning_rate": 4.4263507101999496e-06, "loss": 0.3258, "step": 28669 }, { "epoch": 2.914802765351769, "grad_norm": 0.2680293917655945, "learning_rate": 4.42599816907329e-06, "loss": 0.311, "step": 28670 }, { "epoch": 2.914904432696218, "grad_norm": 0.27520981431007385, "learning_rate": 4.425645630838318e-06, "loss": 0.3206, "step": 28671 }, { "epoch": 2.915006100040667, "grad_norm": 0.27911821007728577, "learning_rate": 4.425293095496805e-06, "loss": 0.3409, "step": 28672 }, { "epoch": 2.915107767385116, "grad_norm": 0.2654874324798584, "learning_rate": 4.4249405630505285e-06, "loss": 0.3054, "step": 28673 }, { "epoch": 2.9152094347295647, "grad_norm": 0.25103938579559326, "learning_rate": 4.424588033501267e-06, "loss": 0.3201, "step": 28674 }, { "epoch": 2.9153111020740137, "grad_norm": 0.39898940920829773, "learning_rate": 4.424235506850792e-06, "loss": 0.3294, "step": 28675 }, { "epoch": 2.915412769418463, "grad_norm": 0.2882382869720459, "learning_rate": 4.423882983100884e-06, "loss": 0.4081, "step": 28676 }, { "epoch": 2.915514436762912, "grad_norm": 0.2616855800151825, "learning_rate": 4.423530462253315e-06, "loss": 0.3491, "step": 28677 }, { "epoch": 2.915616104107361, "grad_norm": 0.25893908739089966, "learning_rate": 4.4231779443098615e-06, "loss": 0.2997, "step": 28678 }, { "epoch": 2.91571777145181, "grad_norm": 0.25557366013526917, "learning_rate": 4.422825429272304e-06, "loss": 0.3394, "step": 28679 }, { "epoch": 2.915819438796259, "grad_norm": 0.2749403119087219, "learning_rate": 4.422472917142411e-06, "loss": 0.3467, "step": 28680 }, { "epoch": 2.9159211061407078, "grad_norm": 0.2531704902648926, "learning_rate": 4.422120407921965e-06, "loss": 0.3183, "step": 28681 }, { "epoch": 2.9160227734851567, "grad_norm": 0.2825626730918884, "learning_rate": 4.421767901612738e-06, "loss": 0.3468, "step": 28682 }, { "epoch": 2.9161244408296056, "grad_norm": 0.2764909863471985, "learning_rate": 4.421415398216505e-06, "loss": 0.3516, "step": 28683 }, { "epoch": 2.9162261081740546, "grad_norm": 0.2689156234264374, "learning_rate": 4.421062897735047e-06, "loss": 0.3209, "step": 28684 }, { "epoch": 2.9163277755185035, "grad_norm": 0.2550603747367859, "learning_rate": 4.420710400170134e-06, "loss": 0.3428, "step": 28685 }, { "epoch": 2.9164294428629525, "grad_norm": 0.2717461884021759, "learning_rate": 4.420357905523547e-06, "loss": 0.3125, "step": 28686 }, { "epoch": 2.9165311102074014, "grad_norm": 0.30524173378944397, "learning_rate": 4.420005413797057e-06, "loss": 0.356, "step": 28687 }, { "epoch": 2.9166327775518504, "grad_norm": 0.2583366930484772, "learning_rate": 4.419652924992441e-06, "loss": 0.3328, "step": 28688 }, { "epoch": 2.9167344448962993, "grad_norm": 0.2563474476337433, "learning_rate": 4.419300439111479e-06, "loss": 0.2985, "step": 28689 }, { "epoch": 2.9168361122407482, "grad_norm": 0.2731647193431854, "learning_rate": 4.4189479561559405e-06, "loss": 0.3102, "step": 28690 }, { "epoch": 2.916937779585197, "grad_norm": 0.26145806908607483, "learning_rate": 4.418595476127606e-06, "loss": 0.3153, "step": 28691 }, { "epoch": 2.917039446929646, "grad_norm": 0.29555806517601013, "learning_rate": 4.418242999028249e-06, "loss": 0.3199, "step": 28692 }, { "epoch": 2.917141114274095, "grad_norm": 0.26204290986061096, "learning_rate": 4.417890524859642e-06, "loss": 0.3144, "step": 28693 }, { "epoch": 2.917242781618544, "grad_norm": 0.27240514755249023, "learning_rate": 4.41753805362357e-06, "loss": 0.3164, "step": 28694 }, { "epoch": 2.917344448962993, "grad_norm": 0.27010664343833923, "learning_rate": 4.417185585321798e-06, "loss": 0.3194, "step": 28695 }, { "epoch": 2.917446116307442, "grad_norm": 0.27131012082099915, "learning_rate": 4.41683311995611e-06, "loss": 0.3162, "step": 28696 }, { "epoch": 2.917547783651891, "grad_norm": 0.2583906054496765, "learning_rate": 4.416480657528276e-06, "loss": 0.3037, "step": 28697 }, { "epoch": 2.9176494509963398, "grad_norm": 0.260221391916275, "learning_rate": 4.416128198040073e-06, "loss": 0.3438, "step": 28698 }, { "epoch": 2.9177511183407887, "grad_norm": 0.2601993680000305, "learning_rate": 4.415775741493281e-06, "loss": 0.3212, "step": 28699 }, { "epoch": 2.9178527856852376, "grad_norm": 0.2528631091117859, "learning_rate": 4.41542328788967e-06, "loss": 0.3203, "step": 28700 }, { "epoch": 2.9179544530296866, "grad_norm": 0.26285266876220703, "learning_rate": 4.4150708372310175e-06, "loss": 0.3374, "step": 28701 }, { "epoch": 2.918056120374136, "grad_norm": 0.2548292577266693, "learning_rate": 4.414718389519099e-06, "loss": 0.3465, "step": 28702 }, { "epoch": 2.918157787718585, "grad_norm": 0.25621387362480164, "learning_rate": 4.414365944755688e-06, "loss": 0.3288, "step": 28703 }, { "epoch": 2.918259455063034, "grad_norm": 0.27730119228363037, "learning_rate": 4.414013502942566e-06, "loss": 0.3405, "step": 28704 }, { "epoch": 2.918361122407483, "grad_norm": 0.28949716687202454, "learning_rate": 4.413661064081503e-06, "loss": 0.3401, "step": 28705 }, { "epoch": 2.9184627897519317, "grad_norm": 0.2748531699180603, "learning_rate": 4.413308628174277e-06, "loss": 0.3408, "step": 28706 }, { "epoch": 2.9185644570963807, "grad_norm": 0.2972354590892792, "learning_rate": 4.412956195222662e-06, "loss": 0.3177, "step": 28707 }, { "epoch": 2.9186661244408296, "grad_norm": 0.27820876240730286, "learning_rate": 4.412603765228434e-06, "loss": 0.3175, "step": 28708 }, { "epoch": 2.9187677917852786, "grad_norm": 0.2815397381782532, "learning_rate": 4.412251338193369e-06, "loss": 0.3294, "step": 28709 }, { "epoch": 2.9188694591297275, "grad_norm": 0.269702285528183, "learning_rate": 4.411898914119241e-06, "loss": 0.3289, "step": 28710 }, { "epoch": 2.9189711264741764, "grad_norm": 0.31750544905662537, "learning_rate": 4.411546493007829e-06, "loss": 0.3677, "step": 28711 }, { "epoch": 2.9190727938186254, "grad_norm": 0.2724023163318634, "learning_rate": 4.411194074860904e-06, "loss": 0.3179, "step": 28712 }, { "epoch": 2.9191744611630743, "grad_norm": 0.26634812355041504, "learning_rate": 4.4108416596802435e-06, "loss": 0.306, "step": 28713 }, { "epoch": 2.9192761285075233, "grad_norm": 0.24633723497390747, "learning_rate": 4.410489247467623e-06, "loss": 0.3358, "step": 28714 }, { "epoch": 2.919377795851972, "grad_norm": 0.27705949544906616, "learning_rate": 4.410136838224817e-06, "loss": 0.3686, "step": 28715 }, { "epoch": 2.919479463196421, "grad_norm": 0.2664298713207245, "learning_rate": 4.409784431953603e-06, "loss": 0.3397, "step": 28716 }, { "epoch": 2.9195811305408705, "grad_norm": 0.2851361632347107, "learning_rate": 4.409432028655754e-06, "loss": 0.3052, "step": 28717 }, { "epoch": 2.9196827978853195, "grad_norm": 0.24609223008155823, "learning_rate": 4.409079628333045e-06, "loss": 0.3178, "step": 28718 }, { "epoch": 2.9197844652297684, "grad_norm": 0.2942505180835724, "learning_rate": 4.408727230987254e-06, "loss": 0.3109, "step": 28719 }, { "epoch": 2.9198861325742174, "grad_norm": 0.2727574408054352, "learning_rate": 4.408374836620153e-06, "loss": 0.3461, "step": 28720 }, { "epoch": 2.9199877999186663, "grad_norm": 0.26493921875953674, "learning_rate": 4.408022445233521e-06, "loss": 0.3124, "step": 28721 }, { "epoch": 2.9200894672631152, "grad_norm": 0.27355244755744934, "learning_rate": 4.40767005682913e-06, "loss": 0.3325, "step": 28722 }, { "epoch": 2.920191134607564, "grad_norm": 0.24331344664096832, "learning_rate": 4.4073176714087565e-06, "loss": 0.3106, "step": 28723 }, { "epoch": 2.920292801952013, "grad_norm": 0.2645443081855774, "learning_rate": 4.406965288974176e-06, "loss": 0.3076, "step": 28724 }, { "epoch": 2.920394469296462, "grad_norm": 0.27950426936149597, "learning_rate": 4.406612909527163e-06, "loss": 0.3094, "step": 28725 }, { "epoch": 2.920496136640911, "grad_norm": 0.2661781311035156, "learning_rate": 4.406260533069494e-06, "loss": 0.3351, "step": 28726 }, { "epoch": 2.92059780398536, "grad_norm": 0.26938730478286743, "learning_rate": 4.405908159602943e-06, "loss": 0.3612, "step": 28727 }, { "epoch": 2.920699471329809, "grad_norm": 0.30136239528656006, "learning_rate": 4.405555789129285e-06, "loss": 0.3311, "step": 28728 }, { "epoch": 2.920801138674258, "grad_norm": 0.2524462640285492, "learning_rate": 4.405203421650296e-06, "loss": 0.3114, "step": 28729 }, { "epoch": 2.9209028060187068, "grad_norm": 0.2531964182853699, "learning_rate": 4.40485105716775e-06, "loss": 0.3195, "step": 28730 }, { "epoch": 2.9210044733631557, "grad_norm": 0.27348628640174866, "learning_rate": 4.404498695683425e-06, "loss": 0.3671, "step": 28731 }, { "epoch": 2.9211061407076047, "grad_norm": 0.2720603048801422, "learning_rate": 4.404146337199093e-06, "loss": 0.3455, "step": 28732 }, { "epoch": 2.9212078080520536, "grad_norm": 0.26568666100502014, "learning_rate": 4.403793981716529e-06, "loss": 0.3245, "step": 28733 }, { "epoch": 2.9213094753965025, "grad_norm": 0.2675212323665619, "learning_rate": 4.403441629237511e-06, "loss": 0.3218, "step": 28734 }, { "epoch": 2.9214111427409515, "grad_norm": 0.2506311237812042, "learning_rate": 4.403089279763811e-06, "loss": 0.3322, "step": 28735 }, { "epoch": 2.9215128100854004, "grad_norm": 0.2470420002937317, "learning_rate": 4.402736933297207e-06, "loss": 0.3396, "step": 28736 }, { "epoch": 2.9216144774298494, "grad_norm": 0.2623761296272278, "learning_rate": 4.402384589839471e-06, "loss": 0.3181, "step": 28737 }, { "epoch": 2.9217161447742983, "grad_norm": 0.25294825434684753, "learning_rate": 4.402032249392379e-06, "loss": 0.3309, "step": 28738 }, { "epoch": 2.9218178121187472, "grad_norm": 0.2607433795928955, "learning_rate": 4.401679911957708e-06, "loss": 0.3606, "step": 28739 }, { "epoch": 2.921919479463196, "grad_norm": 0.2619971036911011, "learning_rate": 4.40132757753723e-06, "loss": 0.3376, "step": 28740 }, { "epoch": 2.922021146807645, "grad_norm": 0.26861873269081116, "learning_rate": 4.4009752461327225e-06, "loss": 0.3317, "step": 28741 }, { "epoch": 2.922122814152094, "grad_norm": 0.26418450474739075, "learning_rate": 4.400622917745959e-06, "loss": 0.3399, "step": 28742 }, { "epoch": 2.9222244814965435, "grad_norm": 0.2602410316467285, "learning_rate": 4.400270592378714e-06, "loss": 0.3572, "step": 28743 }, { "epoch": 2.9223261488409924, "grad_norm": 0.2505839169025421, "learning_rate": 4.3999182700327645e-06, "loss": 0.3045, "step": 28744 }, { "epoch": 2.9224278161854413, "grad_norm": 0.28170523047447205, "learning_rate": 4.399565950709883e-06, "loss": 0.3196, "step": 28745 }, { "epoch": 2.9225294835298903, "grad_norm": 0.2789364755153656, "learning_rate": 4.399213634411847e-06, "loss": 0.297, "step": 28746 }, { "epoch": 2.9226311508743392, "grad_norm": 0.276650607585907, "learning_rate": 4.398861321140429e-06, "loss": 0.3524, "step": 28747 }, { "epoch": 2.922732818218788, "grad_norm": 0.267354279756546, "learning_rate": 4.398509010897404e-06, "loss": 0.3583, "step": 28748 }, { "epoch": 2.922834485563237, "grad_norm": 0.26712605357170105, "learning_rate": 4.398156703684549e-06, "loss": 0.3164, "step": 28749 }, { "epoch": 2.922936152907686, "grad_norm": 0.24287092685699463, "learning_rate": 4.3978043995036365e-06, "loss": 0.3089, "step": 28750 }, { "epoch": 2.923037820252135, "grad_norm": 0.2810245454311371, "learning_rate": 4.397452098356444e-06, "loss": 0.3361, "step": 28751 }, { "epoch": 2.923139487596584, "grad_norm": 0.28686586022377014, "learning_rate": 4.397099800244743e-06, "loss": 0.3469, "step": 28752 }, { "epoch": 2.923241154941033, "grad_norm": 0.2605970501899719, "learning_rate": 4.39674750517031e-06, "loss": 0.3243, "step": 28753 }, { "epoch": 2.923342822285482, "grad_norm": 0.2572559416294098, "learning_rate": 4.396395213134921e-06, "loss": 0.3125, "step": 28754 }, { "epoch": 2.9234444896299308, "grad_norm": 0.2606797218322754, "learning_rate": 4.396042924140348e-06, "loss": 0.3168, "step": 28755 }, { "epoch": 2.9235461569743797, "grad_norm": 0.25672540068626404, "learning_rate": 4.3956906381883685e-06, "loss": 0.3317, "step": 28756 }, { "epoch": 2.9236478243188286, "grad_norm": 0.2618471384048462, "learning_rate": 4.3953383552807546e-06, "loss": 0.3613, "step": 28757 }, { "epoch": 2.923749491663278, "grad_norm": 0.24398194253444672, "learning_rate": 4.394986075419283e-06, "loss": 0.3218, "step": 28758 }, { "epoch": 2.923851159007727, "grad_norm": 0.2728451192378998, "learning_rate": 4.394633798605729e-06, "loss": 0.3049, "step": 28759 }, { "epoch": 2.923952826352176, "grad_norm": 0.2631487548351288, "learning_rate": 4.394281524841863e-06, "loss": 0.3205, "step": 28760 }, { "epoch": 2.924054493696625, "grad_norm": 0.25899362564086914, "learning_rate": 4.3939292541294665e-06, "loss": 0.3008, "step": 28761 }, { "epoch": 2.924156161041074, "grad_norm": 0.2676481604576111, "learning_rate": 4.393576986470307e-06, "loss": 0.3437, "step": 28762 }, { "epoch": 2.9242578283855227, "grad_norm": 0.2597469985485077, "learning_rate": 4.393224721866165e-06, "loss": 0.3214, "step": 28763 }, { "epoch": 2.9243594957299717, "grad_norm": 0.2712722420692444, "learning_rate": 4.3928724603188126e-06, "loss": 0.3125, "step": 28764 }, { "epoch": 2.9244611630744206, "grad_norm": 0.27044519782066345, "learning_rate": 4.392520201830023e-06, "loss": 0.3256, "step": 28765 }, { "epoch": 2.9245628304188696, "grad_norm": 0.24826902151107788, "learning_rate": 4.392167946401573e-06, "loss": 0.3235, "step": 28766 }, { "epoch": 2.9246644977633185, "grad_norm": 0.23684947192668915, "learning_rate": 4.391815694035236e-06, "loss": 0.3064, "step": 28767 }, { "epoch": 2.9247661651077674, "grad_norm": 0.26442843675613403, "learning_rate": 4.391463444732787e-06, "loss": 0.3197, "step": 28768 }, { "epoch": 2.9248678324522164, "grad_norm": 0.2816101610660553, "learning_rate": 4.391111198496002e-06, "loss": 0.3446, "step": 28769 }, { "epoch": 2.9249694997966653, "grad_norm": 0.25533416867256165, "learning_rate": 4.390758955326652e-06, "loss": 0.3263, "step": 28770 }, { "epoch": 2.9250711671411143, "grad_norm": 0.27309349179267883, "learning_rate": 4.390406715226515e-06, "loss": 0.3403, "step": 28771 }, { "epoch": 2.925172834485563, "grad_norm": 0.2549278736114502, "learning_rate": 4.390054478197364e-06, "loss": 0.3175, "step": 28772 }, { "epoch": 2.925274501830012, "grad_norm": 0.267323762178421, "learning_rate": 4.389702244240972e-06, "loss": 0.3095, "step": 28773 }, { "epoch": 2.925376169174461, "grad_norm": 0.2760692238807678, "learning_rate": 4.389350013359118e-06, "loss": 0.3419, "step": 28774 }, { "epoch": 2.92547783651891, "grad_norm": 0.26530712842941284, "learning_rate": 4.388997785553571e-06, "loss": 0.3273, "step": 28775 }, { "epoch": 2.925579503863359, "grad_norm": 0.26776692271232605, "learning_rate": 4.3886455608261094e-06, "loss": 0.3121, "step": 28776 }, { "epoch": 2.925681171207808, "grad_norm": 0.2709352672100067, "learning_rate": 4.388293339178505e-06, "loss": 0.3094, "step": 28777 }, { "epoch": 2.925782838552257, "grad_norm": 0.26243147253990173, "learning_rate": 4.387941120612534e-06, "loss": 0.3555, "step": 28778 }, { "epoch": 2.925884505896706, "grad_norm": 0.2713261544704437, "learning_rate": 4.3875889051299705e-06, "loss": 0.3353, "step": 28779 }, { "epoch": 2.9259861732411547, "grad_norm": 0.2568175792694092, "learning_rate": 4.3872366927325884e-06, "loss": 0.2933, "step": 28780 }, { "epoch": 2.9260878405856037, "grad_norm": 0.2527613639831543, "learning_rate": 4.386884483422162e-06, "loss": 0.3221, "step": 28781 }, { "epoch": 2.9261895079300526, "grad_norm": 0.2509021461009979, "learning_rate": 4.386532277200466e-06, "loss": 0.3354, "step": 28782 }, { "epoch": 2.9262911752745016, "grad_norm": 0.27518749237060547, "learning_rate": 4.386180074069274e-06, "loss": 0.3371, "step": 28783 }, { "epoch": 2.926392842618951, "grad_norm": 0.2671879827976227, "learning_rate": 4.3858278740303624e-06, "loss": 0.3163, "step": 28784 }, { "epoch": 2.9264945099634, "grad_norm": 0.2565167546272278, "learning_rate": 4.385475677085502e-06, "loss": 0.306, "step": 28785 }, { "epoch": 2.926596177307849, "grad_norm": 0.2505694329738617, "learning_rate": 4.385123483236472e-06, "loss": 0.3278, "step": 28786 }, { "epoch": 2.9266978446522978, "grad_norm": 0.27566593885421753, "learning_rate": 4.384771292485041e-06, "loss": 0.3553, "step": 28787 }, { "epoch": 2.9267995119967467, "grad_norm": 0.2677208483219147, "learning_rate": 4.384419104832986e-06, "loss": 0.3291, "step": 28788 }, { "epoch": 2.9269011793411956, "grad_norm": 0.2681408226490021, "learning_rate": 4.3840669202820835e-06, "loss": 0.2909, "step": 28789 }, { "epoch": 2.9270028466856446, "grad_norm": 0.25739458203315735, "learning_rate": 4.383714738834104e-06, "loss": 0.3257, "step": 28790 }, { "epoch": 2.9271045140300935, "grad_norm": 0.2839296758174896, "learning_rate": 4.383362560490824e-06, "loss": 0.3491, "step": 28791 }, { "epoch": 2.9272061813745425, "grad_norm": 0.252806156873703, "learning_rate": 4.383010385254016e-06, "loss": 0.3302, "step": 28792 }, { "epoch": 2.9273078487189914, "grad_norm": 0.2859475910663605, "learning_rate": 4.382658213125454e-06, "loss": 0.3247, "step": 28793 }, { "epoch": 2.9274095160634404, "grad_norm": 0.28349655866622925, "learning_rate": 4.382306044106917e-06, "loss": 0.3281, "step": 28794 }, { "epoch": 2.9275111834078893, "grad_norm": 0.27643758058547974, "learning_rate": 4.381953878200172e-06, "loss": 0.3725, "step": 28795 }, { "epoch": 2.9276128507523382, "grad_norm": 0.2724258005619049, "learning_rate": 4.381601715406998e-06, "loss": 0.3508, "step": 28796 }, { "epoch": 2.927714518096787, "grad_norm": 0.2652384340763092, "learning_rate": 4.3812495557291666e-06, "loss": 0.3862, "step": 28797 }, { "epoch": 2.927816185441236, "grad_norm": 0.28232601284980774, "learning_rate": 4.380897399168453e-06, "loss": 0.3164, "step": 28798 }, { "epoch": 2.9279178527856855, "grad_norm": 0.26981785893440247, "learning_rate": 4.380545245726633e-06, "loss": 0.3259, "step": 28799 }, { "epoch": 2.9280195201301344, "grad_norm": 0.2900439500808716, "learning_rate": 4.380193095405476e-06, "loss": 0.3357, "step": 28800 }, { "epoch": 2.9281211874745834, "grad_norm": 0.26196029782295227, "learning_rate": 4.379840948206762e-06, "loss": 0.3471, "step": 28801 }, { "epoch": 2.9282228548190323, "grad_norm": 0.26581665873527527, "learning_rate": 4.379488804132259e-06, "loss": 0.3435, "step": 28802 }, { "epoch": 2.9283245221634813, "grad_norm": 0.259387344121933, "learning_rate": 4.379136663183745e-06, "loss": 0.3012, "step": 28803 }, { "epoch": 2.92842618950793, "grad_norm": 0.27510833740234375, "learning_rate": 4.378784525362993e-06, "loss": 0.3083, "step": 28804 }, { "epoch": 2.928527856852379, "grad_norm": 0.2654626965522766, "learning_rate": 4.378432390671777e-06, "loss": 0.3219, "step": 28805 }, { "epoch": 2.928629524196828, "grad_norm": 0.27724984288215637, "learning_rate": 4.378080259111873e-06, "loss": 0.3469, "step": 28806 }, { "epoch": 2.928731191541277, "grad_norm": 0.27291038632392883, "learning_rate": 4.3777281306850495e-06, "loss": 0.3178, "step": 28807 }, { "epoch": 2.928832858885726, "grad_norm": 0.2671319544315338, "learning_rate": 4.377376005393085e-06, "loss": 0.3211, "step": 28808 }, { "epoch": 2.928934526230175, "grad_norm": 0.25964415073394775, "learning_rate": 4.377023883237753e-06, "loss": 0.3361, "step": 28809 }, { "epoch": 2.929036193574624, "grad_norm": 0.2533830404281616, "learning_rate": 4.376671764220825e-06, "loss": 0.3202, "step": 28810 }, { "epoch": 2.929137860919073, "grad_norm": 0.26024165749549866, "learning_rate": 4.376319648344079e-06, "loss": 0.3615, "step": 28811 }, { "epoch": 2.9292395282635217, "grad_norm": 0.24277198314666748, "learning_rate": 4.375967535609284e-06, "loss": 0.343, "step": 28812 }, { "epoch": 2.9293411956079707, "grad_norm": 0.26959413290023804, "learning_rate": 4.375615426018216e-06, "loss": 0.3145, "step": 28813 }, { "epoch": 2.9294428629524196, "grad_norm": 0.2675729990005493, "learning_rate": 4.375263319572652e-06, "loss": 0.3441, "step": 28814 }, { "epoch": 2.9295445302968686, "grad_norm": 0.2566792666912079, "learning_rate": 4.374911216274361e-06, "loss": 0.3259, "step": 28815 }, { "epoch": 2.9296461976413175, "grad_norm": 0.267953485250473, "learning_rate": 4.37455911612512e-06, "loss": 0.3321, "step": 28816 }, { "epoch": 2.9297478649857664, "grad_norm": 0.24513348937034607, "learning_rate": 4.3742070191267e-06, "loss": 0.2981, "step": 28817 }, { "epoch": 2.9298495323302154, "grad_norm": 0.265508234500885, "learning_rate": 4.373854925280876e-06, "loss": 0.3421, "step": 28818 }, { "epoch": 2.9299511996746643, "grad_norm": 0.2659018039703369, "learning_rate": 4.3735028345894244e-06, "loss": 0.3297, "step": 28819 }, { "epoch": 2.9300528670191133, "grad_norm": 0.2855750620365143, "learning_rate": 4.373150747054114e-06, "loss": 0.3403, "step": 28820 }, { "epoch": 2.930154534363562, "grad_norm": 0.26363301277160645, "learning_rate": 4.372798662676724e-06, "loss": 0.3544, "step": 28821 }, { "epoch": 2.930256201708011, "grad_norm": 0.2625243365764618, "learning_rate": 4.3724465814590226e-06, "loss": 0.3122, "step": 28822 }, { "epoch": 2.93035786905246, "grad_norm": 0.26881536841392517, "learning_rate": 4.372094503402786e-06, "loss": 0.3321, "step": 28823 }, { "epoch": 2.930459536396909, "grad_norm": 0.2780474126338959, "learning_rate": 4.371742428509791e-06, "loss": 0.3511, "step": 28824 }, { "epoch": 2.9305612037413584, "grad_norm": 0.2541878819465637, "learning_rate": 4.371390356781806e-06, "loss": 0.3259, "step": 28825 }, { "epoch": 2.9306628710858074, "grad_norm": 0.2479773610830307, "learning_rate": 4.371038288220609e-06, "loss": 0.2977, "step": 28826 }, { "epoch": 2.9307645384302563, "grad_norm": 0.26838356256484985, "learning_rate": 4.370686222827969e-06, "loss": 0.288, "step": 28827 }, { "epoch": 2.9308662057747052, "grad_norm": 0.27650636434555054, "learning_rate": 4.370334160605663e-06, "loss": 0.3317, "step": 28828 }, { "epoch": 2.930967873119154, "grad_norm": 0.26220571994781494, "learning_rate": 4.369982101555466e-06, "loss": 0.2978, "step": 28829 }, { "epoch": 2.931069540463603, "grad_norm": 0.23962043225765228, "learning_rate": 4.369630045679147e-06, "loss": 0.3512, "step": 28830 }, { "epoch": 2.931171207808052, "grad_norm": 0.2524127662181854, "learning_rate": 4.3692779929784844e-06, "loss": 0.3279, "step": 28831 }, { "epoch": 2.931272875152501, "grad_norm": 0.28596460819244385, "learning_rate": 4.368925943455247e-06, "loss": 0.3556, "step": 28832 }, { "epoch": 2.93137454249695, "grad_norm": 0.26794591546058655, "learning_rate": 4.3685738971112125e-06, "loss": 0.3175, "step": 28833 }, { "epoch": 2.931476209841399, "grad_norm": 0.2554265856742859, "learning_rate": 4.368221853948152e-06, "loss": 0.3628, "step": 28834 }, { "epoch": 2.931577877185848, "grad_norm": 0.2669142484664917, "learning_rate": 4.3678698139678374e-06, "loss": 0.3415, "step": 28835 }, { "epoch": 2.9316795445302968, "grad_norm": 0.24955949187278748, "learning_rate": 4.367517777172049e-06, "loss": 0.3496, "step": 28836 }, { "epoch": 2.9317812118747457, "grad_norm": 0.31455501914024353, "learning_rate": 4.367165743562551e-06, "loss": 0.3311, "step": 28837 }, { "epoch": 2.9318828792191947, "grad_norm": 0.26789167523384094, "learning_rate": 4.366813713141126e-06, "loss": 0.3362, "step": 28838 }, { "epoch": 2.9319845465636436, "grad_norm": 0.2689022719860077, "learning_rate": 4.366461685909541e-06, "loss": 0.3346, "step": 28839 }, { "epoch": 2.932086213908093, "grad_norm": 0.24572722613811493, "learning_rate": 4.36610966186957e-06, "loss": 0.313, "step": 28840 }, { "epoch": 2.932187881252542, "grad_norm": 0.2789537012577057, "learning_rate": 4.365757641022991e-06, "loss": 0.3339, "step": 28841 }, { "epoch": 2.932289548596991, "grad_norm": 0.2801019549369812, "learning_rate": 4.365405623371572e-06, "loss": 0.3248, "step": 28842 }, { "epoch": 2.93239121594144, "grad_norm": 0.2532960772514343, "learning_rate": 4.3650536089170896e-06, "loss": 0.3174, "step": 28843 }, { "epoch": 2.9324928832858888, "grad_norm": 0.2563927471637726, "learning_rate": 4.3647015976613156e-06, "loss": 0.3506, "step": 28844 }, { "epoch": 2.9325945506303377, "grad_norm": 0.2786358892917633, "learning_rate": 4.3643495896060225e-06, "loss": 0.3359, "step": 28845 }, { "epoch": 2.9326962179747866, "grad_norm": 0.2954714298248291, "learning_rate": 4.363997584752988e-06, "loss": 0.3533, "step": 28846 }, { "epoch": 2.9327978853192356, "grad_norm": 0.2680843770503998, "learning_rate": 4.363645583103982e-06, "loss": 0.3209, "step": 28847 }, { "epoch": 2.9328995526636845, "grad_norm": 0.2790643274784088, "learning_rate": 4.363293584660778e-06, "loss": 0.314, "step": 28848 }, { "epoch": 2.9330012200081335, "grad_norm": 0.2770121693611145, "learning_rate": 4.362941589425149e-06, "loss": 0.3252, "step": 28849 }, { "epoch": 2.9331028873525824, "grad_norm": 0.28030839562416077, "learning_rate": 4.362589597398868e-06, "loss": 0.3148, "step": 28850 }, { "epoch": 2.9332045546970313, "grad_norm": 0.26148539781570435, "learning_rate": 4.362237608583712e-06, "loss": 0.3257, "step": 28851 }, { "epoch": 2.9333062220414803, "grad_norm": 0.26452916860580444, "learning_rate": 4.36188562298145e-06, "loss": 0.3476, "step": 28852 }, { "epoch": 2.9334078893859292, "grad_norm": 0.26062169671058655, "learning_rate": 4.361533640593856e-06, "loss": 0.3336, "step": 28853 }, { "epoch": 2.933509556730378, "grad_norm": 0.2648201286792755, "learning_rate": 4.361181661422704e-06, "loss": 0.3369, "step": 28854 }, { "epoch": 2.933611224074827, "grad_norm": 0.29415932297706604, "learning_rate": 4.360829685469766e-06, "loss": 0.3408, "step": 28855 }, { "epoch": 2.933712891419276, "grad_norm": 0.26248323917388916, "learning_rate": 4.360477712736818e-06, "loss": 0.3366, "step": 28856 }, { "epoch": 2.933814558763725, "grad_norm": 0.2583451569080353, "learning_rate": 4.360125743225631e-06, "loss": 0.3311, "step": 28857 }, { "epoch": 2.933916226108174, "grad_norm": 0.2505452632904053, "learning_rate": 4.359773776937978e-06, "loss": 0.314, "step": 28858 }, { "epoch": 2.934017893452623, "grad_norm": 0.2598646581172943, "learning_rate": 4.3594218138756326e-06, "loss": 0.3165, "step": 28859 }, { "epoch": 2.934119560797072, "grad_norm": 0.2672995626926422, "learning_rate": 4.3590698540403675e-06, "loss": 0.3305, "step": 28860 }, { "epoch": 2.9342212281415208, "grad_norm": 0.2747296988964081, "learning_rate": 4.358717897433957e-06, "loss": 0.3257, "step": 28861 }, { "epoch": 2.9343228954859697, "grad_norm": 0.26044023036956787, "learning_rate": 4.358365944058172e-06, "loss": 0.331, "step": 28862 }, { "epoch": 2.9344245628304186, "grad_norm": 0.2877671420574188, "learning_rate": 4.3580139939147895e-06, "loss": 0.3166, "step": 28863 }, { "epoch": 2.9345262301748676, "grad_norm": 0.26973995566368103, "learning_rate": 4.357662047005578e-06, "loss": 0.3328, "step": 28864 }, { "epoch": 2.9346278975193165, "grad_norm": 0.2726001441478729, "learning_rate": 4.357310103332312e-06, "loss": 0.3151, "step": 28865 }, { "epoch": 2.934729564863766, "grad_norm": 0.25301074981689453, "learning_rate": 4.356958162896766e-06, "loss": 0.3272, "step": 28866 }, { "epoch": 2.934831232208215, "grad_norm": 0.2832472026348114, "learning_rate": 4.356606225700711e-06, "loss": 0.3077, "step": 28867 }, { "epoch": 2.934932899552664, "grad_norm": 0.2640712261199951, "learning_rate": 4.356254291745923e-06, "loss": 0.3285, "step": 28868 }, { "epoch": 2.9350345668971127, "grad_norm": 0.27173298597335815, "learning_rate": 4.355902361034171e-06, "loss": 0.3135, "step": 28869 }, { "epoch": 2.9351362342415617, "grad_norm": 0.28474870324134827, "learning_rate": 4.3555504335672295e-06, "loss": 0.2947, "step": 28870 }, { "epoch": 2.9352379015860106, "grad_norm": 0.24591191112995148, "learning_rate": 4.355198509346873e-06, "loss": 0.3557, "step": 28871 }, { "epoch": 2.9353395689304596, "grad_norm": 0.2660604417324066, "learning_rate": 4.3548465883748724e-06, "loss": 0.3379, "step": 28872 }, { "epoch": 2.9354412362749085, "grad_norm": 0.2765660583972931, "learning_rate": 4.354494670653002e-06, "loss": 0.319, "step": 28873 }, { "epoch": 2.9355429036193574, "grad_norm": 0.23813945055007935, "learning_rate": 4.354142756183034e-06, "loss": 0.3474, "step": 28874 }, { "epoch": 2.9356445709638064, "grad_norm": 0.2473050355911255, "learning_rate": 4.353790844966741e-06, "loss": 0.3339, "step": 28875 }, { "epoch": 2.9357462383082553, "grad_norm": 0.2731398046016693, "learning_rate": 4.353438937005897e-06, "loss": 0.3489, "step": 28876 }, { "epoch": 2.9358479056527043, "grad_norm": 0.26192706823349, "learning_rate": 4.3530870323022724e-06, "loss": 0.3358, "step": 28877 }, { "epoch": 2.935949572997153, "grad_norm": 0.26596707105636597, "learning_rate": 4.352735130857643e-06, "loss": 0.3595, "step": 28878 }, { "epoch": 2.936051240341602, "grad_norm": 0.25055745244026184, "learning_rate": 4.352383232673779e-06, "loss": 0.322, "step": 28879 }, { "epoch": 2.936152907686051, "grad_norm": 0.2754800021648407, "learning_rate": 4.352031337752455e-06, "loss": 0.3187, "step": 28880 }, { "epoch": 2.9362545750305005, "grad_norm": 0.28248414397239685, "learning_rate": 4.351679446095444e-06, "loss": 0.3669, "step": 28881 }, { "epoch": 2.9363562423749494, "grad_norm": 0.26034265756607056, "learning_rate": 4.351327557704516e-06, "loss": 0.3239, "step": 28882 }, { "epoch": 2.9364579097193984, "grad_norm": 0.26807552576065063, "learning_rate": 4.350975672581448e-06, "loss": 0.3229, "step": 28883 }, { "epoch": 2.9365595770638473, "grad_norm": 0.2592344284057617, "learning_rate": 4.350623790728008e-06, "loss": 0.3274, "step": 28884 }, { "epoch": 2.9366612444082962, "grad_norm": 0.2795931100845337, "learning_rate": 4.350271912145972e-06, "loss": 0.3207, "step": 28885 }, { "epoch": 2.936762911752745, "grad_norm": 0.26845934987068176, "learning_rate": 4.3499200368371115e-06, "loss": 0.3334, "step": 28886 }, { "epoch": 2.936864579097194, "grad_norm": 0.2585951089859009, "learning_rate": 4.3495681648032e-06, "loss": 0.3604, "step": 28887 }, { "epoch": 2.936966246441643, "grad_norm": 0.2786807119846344, "learning_rate": 4.34921629604601e-06, "loss": 0.363, "step": 28888 }, { "epoch": 2.937067913786092, "grad_norm": 0.25962916016578674, "learning_rate": 4.348864430567312e-06, "loss": 0.3319, "step": 28889 }, { "epoch": 2.937169581130541, "grad_norm": 0.28476065397262573, "learning_rate": 4.348512568368881e-06, "loss": 0.3373, "step": 28890 }, { "epoch": 2.93727124847499, "grad_norm": 0.28457584977149963, "learning_rate": 4.348160709452489e-06, "loss": 0.2904, "step": 28891 }, { "epoch": 2.937372915819439, "grad_norm": 0.2575787901878357, "learning_rate": 4.347808853819908e-06, "loss": 0.3073, "step": 28892 }, { "epoch": 2.9374745831638878, "grad_norm": 0.2671881914138794, "learning_rate": 4.3474570014729125e-06, "loss": 0.3501, "step": 28893 }, { "epoch": 2.9375762505083367, "grad_norm": 0.26645371317863464, "learning_rate": 4.347105152413272e-06, "loss": 0.3132, "step": 28894 }, { "epoch": 2.9376779178527856, "grad_norm": 0.278572678565979, "learning_rate": 4.34675330664276e-06, "loss": 0.3662, "step": 28895 }, { "epoch": 2.9377795851972346, "grad_norm": 0.26673778891563416, "learning_rate": 4.346401464163151e-06, "loss": 0.3347, "step": 28896 }, { "epoch": 2.9378812525416835, "grad_norm": 0.2627313435077667, "learning_rate": 4.346049624976216e-06, "loss": 0.3, "step": 28897 }, { "epoch": 2.9379829198861325, "grad_norm": 0.257609099149704, "learning_rate": 4.3456977890837284e-06, "loss": 0.3176, "step": 28898 }, { "epoch": 2.9380845872305814, "grad_norm": 0.2849372923374176, "learning_rate": 4.345345956487458e-06, "loss": 0.337, "step": 28899 }, { "epoch": 2.9381862545750304, "grad_norm": 0.2788563370704651, "learning_rate": 4.344994127189179e-06, "loss": 0.3302, "step": 28900 }, { "epoch": 2.9382879219194793, "grad_norm": 0.2715068459510803, "learning_rate": 4.344642301190666e-06, "loss": 0.3454, "step": 28901 }, { "epoch": 2.9383895892639282, "grad_norm": 0.2743174433708191, "learning_rate": 4.344290478493688e-06, "loss": 0.3301, "step": 28902 }, { "epoch": 2.938491256608377, "grad_norm": 0.26525041460990906, "learning_rate": 4.343938659100019e-06, "loss": 0.3331, "step": 28903 }, { "epoch": 2.938592923952826, "grad_norm": 0.27934667468070984, "learning_rate": 4.34358684301143e-06, "loss": 0.3358, "step": 28904 }, { "epoch": 2.938694591297275, "grad_norm": 0.2580340504646301, "learning_rate": 4.3432350302296954e-06, "loss": 0.3301, "step": 28905 }, { "epoch": 2.938796258641724, "grad_norm": 0.2748583257198334, "learning_rate": 4.342883220756588e-06, "loss": 0.3731, "step": 28906 }, { "epoch": 2.9388979259861734, "grad_norm": 0.26757505536079407, "learning_rate": 4.342531414593877e-06, "loss": 0.2888, "step": 28907 }, { "epoch": 2.9389995933306223, "grad_norm": 0.2528088390827179, "learning_rate": 4.342179611743337e-06, "loss": 0.3365, "step": 28908 }, { "epoch": 2.9391012606750713, "grad_norm": 0.2538052797317505, "learning_rate": 4.34182781220674e-06, "loss": 0.3129, "step": 28909 }, { "epoch": 2.93920292801952, "grad_norm": 0.26827773451805115, "learning_rate": 4.341476015985857e-06, "loss": 0.3062, "step": 28910 }, { "epoch": 2.939304595363969, "grad_norm": 0.27312323451042175, "learning_rate": 4.341124223082464e-06, "loss": 0.3666, "step": 28911 }, { "epoch": 2.939406262708418, "grad_norm": 0.24841833114624023, "learning_rate": 4.340772433498328e-06, "loss": 0.3193, "step": 28912 }, { "epoch": 2.939507930052867, "grad_norm": 0.28520670533180237, "learning_rate": 4.340420647235225e-06, "loss": 0.3329, "step": 28913 }, { "epoch": 2.939609597397316, "grad_norm": 0.2845534682273865, "learning_rate": 4.340068864294925e-06, "loss": 0.3309, "step": 28914 }, { "epoch": 2.939711264741765, "grad_norm": 0.2723381519317627, "learning_rate": 4.339717084679202e-06, "loss": 0.3144, "step": 28915 }, { "epoch": 2.939812932086214, "grad_norm": 0.29434865713119507, "learning_rate": 4.339365308389828e-06, "loss": 0.3795, "step": 28916 }, { "epoch": 2.939914599430663, "grad_norm": 0.27307045459747314, "learning_rate": 4.339013535428573e-06, "loss": 0.332, "step": 28917 }, { "epoch": 2.9400162667751117, "grad_norm": 0.26220962405204773, "learning_rate": 4.338661765797213e-06, "loss": 0.3142, "step": 28918 }, { "epoch": 2.9401179341195607, "grad_norm": 0.2719477713108063, "learning_rate": 4.338309999497516e-06, "loss": 0.3314, "step": 28919 }, { "epoch": 2.9402196014640096, "grad_norm": 0.24852624535560608, "learning_rate": 4.337958236531256e-06, "loss": 0.331, "step": 28920 }, { "epoch": 2.9403212688084586, "grad_norm": 0.2863868772983551, "learning_rate": 4.337606476900206e-06, "loss": 0.3464, "step": 28921 }, { "epoch": 2.940422936152908, "grad_norm": 0.2790098488330841, "learning_rate": 4.337254720606137e-06, "loss": 0.2913, "step": 28922 }, { "epoch": 2.940524603497357, "grad_norm": 0.24896478652954102, "learning_rate": 4.336902967650821e-06, "loss": 0.329, "step": 28923 }, { "epoch": 2.940626270841806, "grad_norm": 0.2776898741722107, "learning_rate": 4.33655121803603e-06, "loss": 0.3502, "step": 28924 }, { "epoch": 2.9407279381862548, "grad_norm": 0.24810221791267395, "learning_rate": 4.336199471763536e-06, "loss": 0.324, "step": 28925 }, { "epoch": 2.9408296055307037, "grad_norm": 0.2779989242553711, "learning_rate": 4.335847728835113e-06, "loss": 0.3315, "step": 28926 }, { "epoch": 2.9409312728751527, "grad_norm": 0.2556084394454956, "learning_rate": 4.335495989252529e-06, "loss": 0.31, "step": 28927 }, { "epoch": 2.9410329402196016, "grad_norm": 0.2708747088909149, "learning_rate": 4.33514425301756e-06, "loss": 0.3307, "step": 28928 }, { "epoch": 2.9411346075640505, "grad_norm": 0.28075936436653137, "learning_rate": 4.334792520131977e-06, "loss": 0.3254, "step": 28929 }, { "epoch": 2.9412362749084995, "grad_norm": 0.2753935158252716, "learning_rate": 4.334440790597549e-06, "loss": 0.3333, "step": 28930 }, { "epoch": 2.9413379422529484, "grad_norm": 0.2597675621509552, "learning_rate": 4.3340890644160514e-06, "loss": 0.3517, "step": 28931 }, { "epoch": 2.9414396095973974, "grad_norm": 0.26698583364486694, "learning_rate": 4.333737341589254e-06, "loss": 0.3476, "step": 28932 }, { "epoch": 2.9415412769418463, "grad_norm": 0.28474345803260803, "learning_rate": 4.333385622118931e-06, "loss": 0.3574, "step": 28933 }, { "epoch": 2.9416429442862952, "grad_norm": 0.27415260672569275, "learning_rate": 4.3330339060068515e-06, "loss": 0.3434, "step": 28934 }, { "epoch": 2.941744611630744, "grad_norm": 0.2745830714702606, "learning_rate": 4.3326821932547885e-06, "loss": 0.3368, "step": 28935 }, { "epoch": 2.941846278975193, "grad_norm": 0.24626891314983368, "learning_rate": 4.3323304838645154e-06, "loss": 0.3557, "step": 28936 }, { "epoch": 2.941947946319642, "grad_norm": 0.2654520571231842, "learning_rate": 4.331978777837802e-06, "loss": 0.3504, "step": 28937 }, { "epoch": 2.942049613664091, "grad_norm": 0.25535309314727783, "learning_rate": 4.331627075176421e-06, "loss": 0.3188, "step": 28938 }, { "epoch": 2.94215128100854, "grad_norm": 0.28326016664505005, "learning_rate": 4.331275375882143e-06, "loss": 0.3102, "step": 28939 }, { "epoch": 2.942252948352989, "grad_norm": 0.24316316843032837, "learning_rate": 4.330923679956741e-06, "loss": 0.3085, "step": 28940 }, { "epoch": 2.942354615697438, "grad_norm": 0.26659318804740906, "learning_rate": 4.330571987401987e-06, "loss": 0.3315, "step": 28941 }, { "epoch": 2.9424562830418868, "grad_norm": 0.26038044691085815, "learning_rate": 4.330220298219652e-06, "loss": 0.3324, "step": 28942 }, { "epoch": 2.9425579503863357, "grad_norm": 0.2708192765712738, "learning_rate": 4.329868612411509e-06, "loss": 0.3062, "step": 28943 }, { "epoch": 2.9426596177307847, "grad_norm": 0.24510401487350464, "learning_rate": 4.329516929979327e-06, "loss": 0.3314, "step": 28944 }, { "epoch": 2.9427612850752336, "grad_norm": 0.25341320037841797, "learning_rate": 4.32916525092488e-06, "loss": 0.3564, "step": 28945 }, { "epoch": 2.9428629524196825, "grad_norm": 0.2690958082675934, "learning_rate": 4.32881357524994e-06, "loss": 0.3358, "step": 28946 }, { "epoch": 2.9429646197641315, "grad_norm": 0.2559422254562378, "learning_rate": 4.328461902956277e-06, "loss": 0.3637, "step": 28947 }, { "epoch": 2.943066287108581, "grad_norm": 0.24401989579200745, "learning_rate": 4.328110234045664e-06, "loss": 0.3106, "step": 28948 }, { "epoch": 2.94316795445303, "grad_norm": 0.23661884665489197, "learning_rate": 4.327758568519871e-06, "loss": 0.3214, "step": 28949 }, { "epoch": 2.9432696217974788, "grad_norm": 0.24882309138774872, "learning_rate": 4.32740690638067e-06, "loss": 0.3142, "step": 28950 }, { "epoch": 2.9433712891419277, "grad_norm": 0.265283465385437, "learning_rate": 4.327055247629835e-06, "loss": 0.3255, "step": 28951 }, { "epoch": 2.9434729564863766, "grad_norm": 0.2683953642845154, "learning_rate": 4.326703592269134e-06, "loss": 0.3023, "step": 28952 }, { "epoch": 2.9435746238308256, "grad_norm": 0.26526907086372375, "learning_rate": 4.326351940300343e-06, "loss": 0.3189, "step": 28953 }, { "epoch": 2.9436762911752745, "grad_norm": 0.28119879961013794, "learning_rate": 4.326000291725227e-06, "loss": 0.343, "step": 28954 }, { "epoch": 2.9437779585197235, "grad_norm": 0.26549744606018066, "learning_rate": 4.325648646545563e-06, "loss": 0.3298, "step": 28955 }, { "epoch": 2.9438796258641724, "grad_norm": 0.2755764126777649, "learning_rate": 4.325297004763121e-06, "loss": 0.3624, "step": 28956 }, { "epoch": 2.9439812932086213, "grad_norm": 0.26953908801078796, "learning_rate": 4.324945366379673e-06, "loss": 0.31, "step": 28957 }, { "epoch": 2.9440829605530703, "grad_norm": 0.28205692768096924, "learning_rate": 4.324593731396991e-06, "loss": 0.3038, "step": 28958 }, { "epoch": 2.9441846278975192, "grad_norm": 0.2656996548175812, "learning_rate": 4.324242099816842e-06, "loss": 0.3438, "step": 28959 }, { "epoch": 2.944286295241968, "grad_norm": 0.27532240748405457, "learning_rate": 4.3238904716410016e-06, "loss": 0.369, "step": 28960 }, { "epoch": 2.944387962586417, "grad_norm": 0.2854887545108795, "learning_rate": 4.323538846871242e-06, "loss": 0.3304, "step": 28961 }, { "epoch": 2.944489629930866, "grad_norm": 0.273523211479187, "learning_rate": 4.323187225509331e-06, "loss": 0.3471, "step": 28962 }, { "epoch": 2.9445912972753154, "grad_norm": 0.270879328250885, "learning_rate": 4.322835607557045e-06, "loss": 0.3419, "step": 28963 }, { "epoch": 2.9446929646197644, "grad_norm": 0.2851843535900116, "learning_rate": 4.322483993016149e-06, "loss": 0.3178, "step": 28964 }, { "epoch": 2.9447946319642133, "grad_norm": 0.2682216763496399, "learning_rate": 4.322132381888418e-06, "loss": 0.317, "step": 28965 }, { "epoch": 2.9448962993086623, "grad_norm": 0.25983747839927673, "learning_rate": 4.3217807741756244e-06, "loss": 0.3309, "step": 28966 }, { "epoch": 2.944997966653111, "grad_norm": 0.25272348523139954, "learning_rate": 4.321429169879537e-06, "loss": 0.3507, "step": 28967 }, { "epoch": 2.94509963399756, "grad_norm": 0.321325421333313, "learning_rate": 4.321077569001931e-06, "loss": 0.3295, "step": 28968 }, { "epoch": 2.945201301342009, "grad_norm": 0.24476709961891174, "learning_rate": 4.3207259715445705e-06, "loss": 0.3358, "step": 28969 }, { "epoch": 2.945302968686458, "grad_norm": 0.26453396677970886, "learning_rate": 4.3203743775092335e-06, "loss": 0.3146, "step": 28970 }, { "epoch": 2.945404636030907, "grad_norm": 0.2705182433128357, "learning_rate": 4.320022786897689e-06, "loss": 0.3018, "step": 28971 }, { "epoch": 2.945506303375356, "grad_norm": 0.2692021131515503, "learning_rate": 4.319671199711707e-06, "loss": 0.2964, "step": 28972 }, { "epoch": 2.945607970719805, "grad_norm": 0.2537398338317871, "learning_rate": 4.319319615953062e-06, "loss": 0.3144, "step": 28973 }, { "epoch": 2.945709638064254, "grad_norm": 0.2542240023612976, "learning_rate": 4.31896803562352e-06, "loss": 0.3724, "step": 28974 }, { "epoch": 2.9458113054087027, "grad_norm": 0.2734699845314026, "learning_rate": 4.318616458724857e-06, "loss": 0.3175, "step": 28975 }, { "epoch": 2.9459129727531517, "grad_norm": 0.28190895915031433, "learning_rate": 4.318264885258843e-06, "loss": 0.34, "step": 28976 }, { "epoch": 2.9460146400976006, "grad_norm": 0.2432456761598587, "learning_rate": 4.317913315227247e-06, "loss": 0.3248, "step": 28977 }, { "epoch": 2.9461163074420496, "grad_norm": 0.28106972575187683, "learning_rate": 4.317561748631843e-06, "loss": 0.3518, "step": 28978 }, { "epoch": 2.9462179747864985, "grad_norm": 0.2749905586242676, "learning_rate": 4.3172101854743985e-06, "loss": 0.3319, "step": 28979 }, { "epoch": 2.9463196421309474, "grad_norm": 0.2659478187561035, "learning_rate": 4.316858625756688e-06, "loss": 0.3289, "step": 28980 }, { "epoch": 2.9464213094753964, "grad_norm": 0.27586206793785095, "learning_rate": 4.316507069480483e-06, "loss": 0.3333, "step": 28981 }, { "epoch": 2.9465229768198453, "grad_norm": 0.26739391684532166, "learning_rate": 4.316155516647551e-06, "loss": 0.3229, "step": 28982 }, { "epoch": 2.9466246441642943, "grad_norm": 0.2743060886859894, "learning_rate": 4.315803967259667e-06, "loss": 0.3123, "step": 28983 }, { "epoch": 2.946726311508743, "grad_norm": 0.26605644822120667, "learning_rate": 4.315452421318598e-06, "loss": 0.3091, "step": 28984 }, { "epoch": 2.946827978853192, "grad_norm": 0.2699422538280487, "learning_rate": 4.315100878826117e-06, "loss": 0.3291, "step": 28985 }, { "epoch": 2.946929646197641, "grad_norm": 0.27310362458229065, "learning_rate": 4.3147493397839975e-06, "loss": 0.3486, "step": 28986 }, { "epoch": 2.94703131354209, "grad_norm": 0.26062873005867004, "learning_rate": 4.314397804194004e-06, "loss": 0.3813, "step": 28987 }, { "epoch": 2.947132980886539, "grad_norm": 0.2563510537147522, "learning_rate": 4.314046272057916e-06, "loss": 0.3092, "step": 28988 }, { "epoch": 2.9472346482309884, "grad_norm": 0.25839829444885254, "learning_rate": 4.3136947433774955e-06, "loss": 0.383, "step": 28989 }, { "epoch": 2.9473363155754373, "grad_norm": 0.2626133859157562, "learning_rate": 4.31334321815452e-06, "loss": 0.3314, "step": 28990 }, { "epoch": 2.9474379829198862, "grad_norm": 0.2581452429294586, "learning_rate": 4.31299169639076e-06, "loss": 0.3226, "step": 28991 }, { "epoch": 2.947539650264335, "grad_norm": 0.282889723777771, "learning_rate": 4.31264017808798e-06, "loss": 0.3142, "step": 28992 }, { "epoch": 2.947641317608784, "grad_norm": 0.27058595418930054, "learning_rate": 4.31228866324796e-06, "loss": 0.3493, "step": 28993 }, { "epoch": 2.947742984953233, "grad_norm": 0.25924667716026306, "learning_rate": 4.311937151872464e-06, "loss": 0.3496, "step": 28994 }, { "epoch": 2.947844652297682, "grad_norm": 0.2556318938732147, "learning_rate": 4.3115856439632646e-06, "loss": 0.3055, "step": 28995 }, { "epoch": 2.947946319642131, "grad_norm": 0.25806304812431335, "learning_rate": 4.3112341395221354e-06, "loss": 0.3025, "step": 28996 }, { "epoch": 2.94804798698658, "grad_norm": 0.25622591376304626, "learning_rate": 4.310882638550842e-06, "loss": 0.374, "step": 28997 }, { "epoch": 2.948149654331029, "grad_norm": 0.30112168192863464, "learning_rate": 4.310531141051161e-06, "loss": 0.3122, "step": 28998 }, { "epoch": 2.9482513216754778, "grad_norm": 0.2628321647644043, "learning_rate": 4.310179647024859e-06, "loss": 0.3422, "step": 28999 }, { "epoch": 2.9483529890199267, "grad_norm": 0.25789204239845276, "learning_rate": 4.309828156473706e-06, "loss": 0.3535, "step": 29000 }, { "epoch": 2.9484546563643756, "grad_norm": 0.2626521587371826, "learning_rate": 4.3094766693994785e-06, "loss": 0.3288, "step": 29001 }, { "epoch": 2.9485563237088246, "grad_norm": 0.2649405896663666, "learning_rate": 4.3091251858039395e-06, "loss": 0.3498, "step": 29002 }, { "epoch": 2.9486579910532735, "grad_norm": 0.25954586267471313, "learning_rate": 4.308773705688868e-06, "loss": 0.3194, "step": 29003 }, { "epoch": 2.948759658397723, "grad_norm": 0.24995169043540955, "learning_rate": 4.308422229056027e-06, "loss": 0.3073, "step": 29004 }, { "epoch": 2.948861325742172, "grad_norm": 0.2555674612522125, "learning_rate": 4.30807075590719e-06, "loss": 0.3298, "step": 29005 }, { "epoch": 2.948962993086621, "grad_norm": 0.2634740471839905, "learning_rate": 4.307719286244132e-06, "loss": 0.347, "step": 29006 }, { "epoch": 2.9490646604310697, "grad_norm": 0.2628785967826843, "learning_rate": 4.307367820068615e-06, "loss": 0.3249, "step": 29007 }, { "epoch": 2.9491663277755187, "grad_norm": 0.2874685227870941, "learning_rate": 4.307016357382419e-06, "loss": 0.318, "step": 29008 }, { "epoch": 2.9492679951199676, "grad_norm": 0.2783207893371582, "learning_rate": 4.306664898187307e-06, "loss": 0.305, "step": 29009 }, { "epoch": 2.9493696624644166, "grad_norm": 0.2551240026950836, "learning_rate": 4.306313442485052e-06, "loss": 0.3402, "step": 29010 }, { "epoch": 2.9494713298088655, "grad_norm": 0.2452947199344635, "learning_rate": 4.305961990277427e-06, "loss": 0.3275, "step": 29011 }, { "epoch": 2.9495729971533144, "grad_norm": 0.289415568113327, "learning_rate": 4.3056105415662e-06, "loss": 0.3155, "step": 29012 }, { "epoch": 2.9496746644977634, "grad_norm": 0.29478058218955994, "learning_rate": 4.3052590963531415e-06, "loss": 0.3451, "step": 29013 }, { "epoch": 2.9497763318422123, "grad_norm": 0.26758524775505066, "learning_rate": 4.304907654640022e-06, "loss": 0.3325, "step": 29014 }, { "epoch": 2.9498779991866613, "grad_norm": 0.28153881430625916, "learning_rate": 4.304556216428613e-06, "loss": 0.3241, "step": 29015 }, { "epoch": 2.94997966653111, "grad_norm": 0.2790619432926178, "learning_rate": 4.304204781720686e-06, "loss": 0.3156, "step": 29016 }, { "epoch": 2.950081333875559, "grad_norm": 0.25580617785453796, "learning_rate": 4.303853350518009e-06, "loss": 0.3026, "step": 29017 }, { "epoch": 2.950183001220008, "grad_norm": 0.2990552484989166, "learning_rate": 4.303501922822353e-06, "loss": 0.3455, "step": 29018 }, { "epoch": 2.950284668564457, "grad_norm": 0.2720862030982971, "learning_rate": 4.3031504986354885e-06, "loss": 0.3283, "step": 29019 }, { "epoch": 2.950386335908906, "grad_norm": 0.27132266759872437, "learning_rate": 4.302799077959187e-06, "loss": 0.3041, "step": 29020 }, { "epoch": 2.950488003253355, "grad_norm": 0.24935366213321686, "learning_rate": 4.3024476607952184e-06, "loss": 0.356, "step": 29021 }, { "epoch": 2.950589670597804, "grad_norm": 0.2665887475013733, "learning_rate": 4.302096247145351e-06, "loss": 0.3137, "step": 29022 }, { "epoch": 2.950691337942253, "grad_norm": 0.2632980942726135, "learning_rate": 4.301744837011359e-06, "loss": 0.348, "step": 29023 }, { "epoch": 2.9507930052867017, "grad_norm": 0.2707563638687134, "learning_rate": 4.301393430395009e-06, "loss": 0.3278, "step": 29024 }, { "epoch": 2.9508946726311507, "grad_norm": 0.2747338116168976, "learning_rate": 4.301042027298074e-06, "loss": 0.3544, "step": 29025 }, { "epoch": 2.9509963399755996, "grad_norm": 0.27016857266426086, "learning_rate": 4.300690627722323e-06, "loss": 0.3286, "step": 29026 }, { "epoch": 2.9510980073200486, "grad_norm": 0.2560981214046478, "learning_rate": 4.300339231669524e-06, "loss": 0.3556, "step": 29027 }, { "epoch": 2.9511996746644975, "grad_norm": 0.28348180651664734, "learning_rate": 4.299987839141453e-06, "loss": 0.2939, "step": 29028 }, { "epoch": 2.9513013420089464, "grad_norm": 0.2707135081291199, "learning_rate": 4.299636450139876e-06, "loss": 0.3318, "step": 29029 }, { "epoch": 2.951403009353396, "grad_norm": 0.24513737857341766, "learning_rate": 4.299285064666564e-06, "loss": 0.3276, "step": 29030 }, { "epoch": 2.9515046766978448, "grad_norm": 0.2645729184150696, "learning_rate": 4.298933682723286e-06, "loss": 0.3159, "step": 29031 }, { "epoch": 2.9516063440422937, "grad_norm": 0.2837057411670685, "learning_rate": 4.298582304311815e-06, "loss": 0.3465, "step": 29032 }, { "epoch": 2.9517080113867427, "grad_norm": 0.2702332139015198, "learning_rate": 4.298230929433919e-06, "loss": 0.3409, "step": 29033 }, { "epoch": 2.9518096787311916, "grad_norm": 0.293186753988266, "learning_rate": 4.297879558091369e-06, "loss": 0.3367, "step": 29034 }, { "epoch": 2.9519113460756405, "grad_norm": 0.28105464577674866, "learning_rate": 4.297528190285935e-06, "loss": 0.3063, "step": 29035 }, { "epoch": 2.9520130134200895, "grad_norm": 0.2543538808822632, "learning_rate": 4.2971768260193874e-06, "loss": 0.3438, "step": 29036 }, { "epoch": 2.9521146807645384, "grad_norm": 0.25489306449890137, "learning_rate": 4.296825465293495e-06, "loss": 0.3426, "step": 29037 }, { "epoch": 2.9522163481089874, "grad_norm": 0.24395328760147095, "learning_rate": 4.29647410811003e-06, "loss": 0.3555, "step": 29038 }, { "epoch": 2.9523180154534363, "grad_norm": 0.257269948720932, "learning_rate": 4.296122754470761e-06, "loss": 0.3716, "step": 29039 }, { "epoch": 2.9524196827978852, "grad_norm": 0.28117936849594116, "learning_rate": 4.295771404377458e-06, "loss": 0.3368, "step": 29040 }, { "epoch": 2.952521350142334, "grad_norm": 0.26300498843193054, "learning_rate": 4.295420057831891e-06, "loss": 0.3793, "step": 29041 }, { "epoch": 2.952623017486783, "grad_norm": 0.2710687816143036, "learning_rate": 4.29506871483583e-06, "loss": 0.3568, "step": 29042 }, { "epoch": 2.952724684831232, "grad_norm": 0.27737608551979065, "learning_rate": 4.294717375391047e-06, "loss": 0.3348, "step": 29043 }, { "epoch": 2.952826352175681, "grad_norm": 0.2777099609375, "learning_rate": 4.294366039499308e-06, "loss": 0.3232, "step": 29044 }, { "epoch": 2.9529280195201304, "grad_norm": 0.27171826362609863, "learning_rate": 4.294014707162387e-06, "loss": 0.3349, "step": 29045 }, { "epoch": 2.9530296868645793, "grad_norm": 0.28402554988861084, "learning_rate": 4.293663378382052e-06, "loss": 0.3516, "step": 29046 }, { "epoch": 2.9531313542090283, "grad_norm": 0.2786202132701874, "learning_rate": 4.293312053160071e-06, "loss": 0.3017, "step": 29047 }, { "epoch": 2.9532330215534772, "grad_norm": 0.2749137282371521, "learning_rate": 4.292960731498218e-06, "loss": 0.308, "step": 29048 }, { "epoch": 2.953334688897926, "grad_norm": 0.2443026304244995, "learning_rate": 4.29260941339826e-06, "loss": 0.331, "step": 29049 }, { "epoch": 2.953436356242375, "grad_norm": 0.2716521620750427, "learning_rate": 4.2922580988619685e-06, "loss": 0.342, "step": 29050 }, { "epoch": 2.953538023586824, "grad_norm": 0.27569782733917236, "learning_rate": 4.291906787891112e-06, "loss": 0.3193, "step": 29051 }, { "epoch": 2.953639690931273, "grad_norm": 0.27974310517311096, "learning_rate": 4.291555480487461e-06, "loss": 0.3424, "step": 29052 }, { "epoch": 2.953741358275722, "grad_norm": 0.26915401220321655, "learning_rate": 4.291204176652785e-06, "loss": 0.3015, "step": 29053 }, { "epoch": 2.953843025620171, "grad_norm": 0.2678292989730835, "learning_rate": 4.290852876388854e-06, "loss": 0.3184, "step": 29054 }, { "epoch": 2.95394469296462, "grad_norm": 0.24311359226703644, "learning_rate": 4.290501579697437e-06, "loss": 0.3268, "step": 29055 }, { "epoch": 2.9540463603090688, "grad_norm": 0.2646634876728058, "learning_rate": 4.290150286580305e-06, "loss": 0.3285, "step": 29056 }, { "epoch": 2.9541480276535177, "grad_norm": 0.28456801176071167, "learning_rate": 4.2897989970392274e-06, "loss": 0.356, "step": 29057 }, { "epoch": 2.9542496949979666, "grad_norm": 0.27161359786987305, "learning_rate": 4.289447711075974e-06, "loss": 0.3193, "step": 29058 }, { "epoch": 2.9543513623424156, "grad_norm": 0.25736886262893677, "learning_rate": 4.289096428692313e-06, "loss": 0.3046, "step": 29059 }, { "epoch": 2.9544530296868645, "grad_norm": 0.23011019825935364, "learning_rate": 4.288745149890017e-06, "loss": 0.3208, "step": 29060 }, { "epoch": 2.9545546970313135, "grad_norm": 0.26255521178245544, "learning_rate": 4.288393874670852e-06, "loss": 0.3617, "step": 29061 }, { "epoch": 2.9546563643757624, "grad_norm": 0.27356162667274475, "learning_rate": 4.288042603036589e-06, "loss": 0.3431, "step": 29062 }, { "epoch": 2.9547580317202113, "grad_norm": 0.26248788833618164, "learning_rate": 4.287691334989001e-06, "loss": 0.3224, "step": 29063 }, { "epoch": 2.9548596990646603, "grad_norm": 0.2617940306663513, "learning_rate": 4.287340070529853e-06, "loss": 0.3424, "step": 29064 }, { "epoch": 2.9549613664091092, "grad_norm": 0.26101455092430115, "learning_rate": 4.286988809660917e-06, "loss": 0.3268, "step": 29065 }, { "epoch": 2.955063033753558, "grad_norm": 0.2607351541519165, "learning_rate": 4.286637552383961e-06, "loss": 0.316, "step": 29066 }, { "epoch": 2.955164701098007, "grad_norm": 0.2583039104938507, "learning_rate": 4.2862862987007554e-06, "loss": 0.3411, "step": 29067 }, { "epoch": 2.955266368442456, "grad_norm": 0.2694522440433502, "learning_rate": 4.285935048613072e-06, "loss": 0.3625, "step": 29068 }, { "epoch": 2.955368035786905, "grad_norm": 0.26868098974227905, "learning_rate": 4.285583802122676e-06, "loss": 0.3164, "step": 29069 }, { "epoch": 2.9554697031313544, "grad_norm": 0.2500419318675995, "learning_rate": 4.28523255923134e-06, "loss": 0.3377, "step": 29070 }, { "epoch": 2.9555713704758033, "grad_norm": 0.2774094343185425, "learning_rate": 4.2848813199408325e-06, "loss": 0.347, "step": 29071 }, { "epoch": 2.9556730378202523, "grad_norm": 0.27762356400489807, "learning_rate": 4.284530084252922e-06, "loss": 0.3342, "step": 29072 }, { "epoch": 2.955774705164701, "grad_norm": 0.25347012281417847, "learning_rate": 4.284178852169381e-06, "loss": 0.2926, "step": 29073 }, { "epoch": 2.95587637250915, "grad_norm": 0.2971041202545166, "learning_rate": 4.283827623691974e-06, "loss": 0.3285, "step": 29074 }, { "epoch": 2.955978039853599, "grad_norm": 0.2493482232093811, "learning_rate": 4.283476398822476e-06, "loss": 0.3217, "step": 29075 }, { "epoch": 2.956079707198048, "grad_norm": 0.24747750163078308, "learning_rate": 4.283125177562652e-06, "loss": 0.3471, "step": 29076 }, { "epoch": 2.956181374542497, "grad_norm": 0.2772640287876129, "learning_rate": 4.282773959914273e-06, "loss": 0.3468, "step": 29077 }, { "epoch": 2.956283041886946, "grad_norm": 0.2868088483810425, "learning_rate": 4.282422745879109e-06, "loss": 0.2911, "step": 29078 }, { "epoch": 2.956384709231395, "grad_norm": 0.2834721803665161, "learning_rate": 4.282071535458927e-06, "loss": 0.3641, "step": 29079 }, { "epoch": 2.956486376575844, "grad_norm": 0.26611411571502686, "learning_rate": 4.281720328655501e-06, "loss": 0.3062, "step": 29080 }, { "epoch": 2.9565880439202927, "grad_norm": 0.26647812128067017, "learning_rate": 4.2813691254705945e-06, "loss": 0.3389, "step": 29081 }, { "epoch": 2.9566897112647417, "grad_norm": 0.24918602406978607, "learning_rate": 4.28101792590598e-06, "loss": 0.2838, "step": 29082 }, { "epoch": 2.9567913786091906, "grad_norm": 0.268624484539032, "learning_rate": 4.280666729963428e-06, "loss": 0.3559, "step": 29083 }, { "epoch": 2.9568930459536396, "grad_norm": 0.2663549482822418, "learning_rate": 4.280315537644705e-06, "loss": 0.3581, "step": 29084 }, { "epoch": 2.9569947132980885, "grad_norm": 0.26125669479370117, "learning_rate": 4.279964348951582e-06, "loss": 0.3039, "step": 29085 }, { "epoch": 2.957096380642538, "grad_norm": 0.2601338028907776, "learning_rate": 4.279613163885826e-06, "loss": 0.3241, "step": 29086 }, { "epoch": 2.957198047986987, "grad_norm": 0.26480892300605774, "learning_rate": 4.279261982449209e-06, "loss": 0.3174, "step": 29087 }, { "epoch": 2.9572997153314358, "grad_norm": 0.25034964084625244, "learning_rate": 4.278910804643499e-06, "loss": 0.3288, "step": 29088 }, { "epoch": 2.9574013826758847, "grad_norm": 0.27965593338012695, "learning_rate": 4.2785596304704644e-06, "loss": 0.3204, "step": 29089 }, { "epoch": 2.9575030500203336, "grad_norm": 0.2675280272960663, "learning_rate": 4.278208459931875e-06, "loss": 0.3263, "step": 29090 }, { "epoch": 2.9576047173647826, "grad_norm": 0.29354313015937805, "learning_rate": 4.2778572930295e-06, "loss": 0.3312, "step": 29091 }, { "epoch": 2.9577063847092315, "grad_norm": 0.2578545808792114, "learning_rate": 4.277506129765108e-06, "loss": 0.3352, "step": 29092 }, { "epoch": 2.9578080520536805, "grad_norm": 0.2488648146390915, "learning_rate": 4.277154970140469e-06, "loss": 0.3368, "step": 29093 }, { "epoch": 2.9579097193981294, "grad_norm": 0.25641781091690063, "learning_rate": 4.276803814157351e-06, "loss": 0.3426, "step": 29094 }, { "epoch": 2.9580113867425784, "grad_norm": 0.255165696144104, "learning_rate": 4.276452661817524e-06, "loss": 0.3328, "step": 29095 }, { "epoch": 2.9581130540870273, "grad_norm": 0.26595035195350647, "learning_rate": 4.276101513122756e-06, "loss": 0.3459, "step": 29096 }, { "epoch": 2.9582147214314762, "grad_norm": 0.2609390914440155, "learning_rate": 4.275750368074816e-06, "loss": 0.2944, "step": 29097 }, { "epoch": 2.958316388775925, "grad_norm": 0.25760820508003235, "learning_rate": 4.275399226675475e-06, "loss": 0.3668, "step": 29098 }, { "epoch": 2.958418056120374, "grad_norm": 0.25665283203125, "learning_rate": 4.2750480889264996e-06, "loss": 0.3231, "step": 29099 }, { "epoch": 2.958519723464823, "grad_norm": 0.2833980917930603, "learning_rate": 4.27469695482966e-06, "loss": 0.3214, "step": 29100 }, { "epoch": 2.958621390809272, "grad_norm": 0.2686220705509186, "learning_rate": 4.274345824386724e-06, "loss": 0.3641, "step": 29101 }, { "epoch": 2.958723058153721, "grad_norm": 0.2737072706222534, "learning_rate": 4.273994697599462e-06, "loss": 0.3246, "step": 29102 }, { "epoch": 2.95882472549817, "grad_norm": 0.2506025731563568, "learning_rate": 4.2736435744696424e-06, "loss": 0.3105, "step": 29103 }, { "epoch": 2.958926392842619, "grad_norm": 0.26616862416267395, "learning_rate": 4.273292454999033e-06, "loss": 0.3353, "step": 29104 }, { "epoch": 2.9590280601870678, "grad_norm": 0.27937987446784973, "learning_rate": 4.272941339189406e-06, "loss": 0.3291, "step": 29105 }, { "epoch": 2.9591297275315167, "grad_norm": 0.2550259232521057, "learning_rate": 4.272590227042524e-06, "loss": 0.2956, "step": 29106 }, { "epoch": 2.9592313948759656, "grad_norm": 0.2615146040916443, "learning_rate": 4.272239118560162e-06, "loss": 0.3205, "step": 29107 }, { "epoch": 2.9593330622204146, "grad_norm": 0.282730370759964, "learning_rate": 4.271888013744086e-06, "loss": 0.3035, "step": 29108 }, { "epoch": 2.9594347295648635, "grad_norm": 0.26214149594306946, "learning_rate": 4.2715369125960645e-06, "loss": 0.3799, "step": 29109 }, { "epoch": 2.9595363969093125, "grad_norm": 0.25118082761764526, "learning_rate": 4.2711858151178695e-06, "loss": 0.3303, "step": 29110 }, { "epoch": 2.959638064253762, "grad_norm": 0.27476179599761963, "learning_rate": 4.270834721311263e-06, "loss": 0.3324, "step": 29111 }, { "epoch": 2.959739731598211, "grad_norm": 0.25797325372695923, "learning_rate": 4.2704836311780205e-06, "loss": 0.3168, "step": 29112 }, { "epoch": 2.9598413989426597, "grad_norm": 0.24584713578224182, "learning_rate": 4.270132544719908e-06, "loss": 0.3048, "step": 29113 }, { "epoch": 2.9599430662871087, "grad_norm": 0.2493852823972702, "learning_rate": 4.2697814619386935e-06, "loss": 0.3254, "step": 29114 }, { "epoch": 2.9600447336315576, "grad_norm": 0.2666545510292053, "learning_rate": 4.269430382836148e-06, "loss": 0.348, "step": 29115 }, { "epoch": 2.9601464009760066, "grad_norm": 0.2678737938404083, "learning_rate": 4.269079307414036e-06, "loss": 0.3545, "step": 29116 }, { "epoch": 2.9602480683204555, "grad_norm": 0.2805846929550171, "learning_rate": 4.26872823567413e-06, "loss": 0.3403, "step": 29117 }, { "epoch": 2.9603497356649044, "grad_norm": 0.2628815174102783, "learning_rate": 4.268377167618198e-06, "loss": 0.3457, "step": 29118 }, { "epoch": 2.9604514030093534, "grad_norm": 0.2633294463157654, "learning_rate": 4.268026103248008e-06, "loss": 0.3422, "step": 29119 }, { "epoch": 2.9605530703538023, "grad_norm": 0.2900330424308777, "learning_rate": 4.267675042565329e-06, "loss": 0.3692, "step": 29120 }, { "epoch": 2.9606547376982513, "grad_norm": 0.25369858741760254, "learning_rate": 4.2673239855719275e-06, "loss": 0.3169, "step": 29121 }, { "epoch": 2.9607564050427, "grad_norm": 0.26513200998306274, "learning_rate": 4.266972932269574e-06, "loss": 0.2964, "step": 29122 }, { "epoch": 2.960858072387149, "grad_norm": 0.25141236186027527, "learning_rate": 4.266621882660038e-06, "loss": 0.3581, "step": 29123 }, { "epoch": 2.960959739731598, "grad_norm": 0.30061614513397217, "learning_rate": 4.266270836745085e-06, "loss": 0.338, "step": 29124 }, { "epoch": 2.961061407076047, "grad_norm": 0.24773114919662476, "learning_rate": 4.265919794526487e-06, "loss": 0.3249, "step": 29125 }, { "epoch": 2.961163074420496, "grad_norm": 0.24868735671043396, "learning_rate": 4.265568756006008e-06, "loss": 0.3493, "step": 29126 }, { "epoch": 2.9612647417649454, "grad_norm": 0.2604599893093109, "learning_rate": 4.26521772118542e-06, "loss": 0.3142, "step": 29127 }, { "epoch": 2.9613664091093943, "grad_norm": 0.27944526076316833, "learning_rate": 4.2648666900664935e-06, "loss": 0.3327, "step": 29128 }, { "epoch": 2.9614680764538432, "grad_norm": 0.26353684067726135, "learning_rate": 4.2645156626509896e-06, "loss": 0.3516, "step": 29129 }, { "epoch": 2.961569743798292, "grad_norm": 0.27870115637779236, "learning_rate": 4.264164638940685e-06, "loss": 0.3689, "step": 29130 }, { "epoch": 2.961671411142741, "grad_norm": 0.24244435131549835, "learning_rate": 4.26381361893734e-06, "loss": 0.3094, "step": 29131 }, { "epoch": 2.96177307848719, "grad_norm": 0.2624918520450592, "learning_rate": 4.263462602642729e-06, "loss": 0.3455, "step": 29132 }, { "epoch": 2.961874745831639, "grad_norm": 0.26937296986579895, "learning_rate": 4.26311159005862e-06, "loss": 0.3471, "step": 29133 }, { "epoch": 2.961976413176088, "grad_norm": 0.24816414713859558, "learning_rate": 4.262760581186777e-06, "loss": 0.321, "step": 29134 }, { "epoch": 2.962078080520537, "grad_norm": 0.26223307847976685, "learning_rate": 4.262409576028974e-06, "loss": 0.2991, "step": 29135 }, { "epoch": 2.962179747864986, "grad_norm": 0.2630316913127899, "learning_rate": 4.262058574586973e-06, "loss": 0.3481, "step": 29136 }, { "epoch": 2.9622814152094348, "grad_norm": 0.26487231254577637, "learning_rate": 4.261707576862546e-06, "loss": 0.3316, "step": 29137 }, { "epoch": 2.9623830825538837, "grad_norm": 0.25523120164871216, "learning_rate": 4.261356582857464e-06, "loss": 0.3484, "step": 29138 }, { "epoch": 2.9624847498983327, "grad_norm": 0.27553918957710266, "learning_rate": 4.261005592573487e-06, "loss": 0.3295, "step": 29139 }, { "epoch": 2.9625864172427816, "grad_norm": 0.2514394521713257, "learning_rate": 4.260654606012393e-06, "loss": 0.3166, "step": 29140 }, { "epoch": 2.9626880845872305, "grad_norm": 0.2595105767250061, "learning_rate": 4.260303623175943e-06, "loss": 0.3304, "step": 29141 }, { "epoch": 2.9627897519316795, "grad_norm": 0.28240495920181274, "learning_rate": 4.259952644065906e-06, "loss": 0.3372, "step": 29142 }, { "epoch": 2.9628914192761284, "grad_norm": 0.2846929430961609, "learning_rate": 4.2596016686840566e-06, "loss": 0.3543, "step": 29143 }, { "epoch": 2.9629930866205774, "grad_norm": 0.26526182889938354, "learning_rate": 4.259250697032154e-06, "loss": 0.317, "step": 29144 }, { "epoch": 2.9630947539650263, "grad_norm": 0.2733076810836792, "learning_rate": 4.258899729111973e-06, "loss": 0.3696, "step": 29145 }, { "epoch": 2.9631964213094752, "grad_norm": 0.2691251039505005, "learning_rate": 4.258548764925276e-06, "loss": 0.367, "step": 29146 }, { "epoch": 2.963298088653924, "grad_norm": 0.2633046805858612, "learning_rate": 4.258197804473835e-06, "loss": 0.3162, "step": 29147 }, { "epoch": 2.963399755998373, "grad_norm": 0.27270811796188354, "learning_rate": 4.25784684775942e-06, "loss": 0.3294, "step": 29148 }, { "epoch": 2.963501423342822, "grad_norm": 0.2479785978794098, "learning_rate": 4.257495894783792e-06, "loss": 0.3166, "step": 29149 }, { "epoch": 2.963603090687271, "grad_norm": 0.2741738259792328, "learning_rate": 4.257144945548727e-06, "loss": 0.3934, "step": 29150 }, { "epoch": 2.96370475803172, "grad_norm": 0.2712194621562958, "learning_rate": 4.256794000055987e-06, "loss": 0.3322, "step": 29151 }, { "epoch": 2.9638064253761693, "grad_norm": 0.2559763491153717, "learning_rate": 4.256443058307341e-06, "loss": 0.3304, "step": 29152 }, { "epoch": 2.9639080927206183, "grad_norm": 0.25106915831565857, "learning_rate": 4.256092120304562e-06, "loss": 0.3286, "step": 29153 }, { "epoch": 2.9640097600650672, "grad_norm": 0.29169195890426636, "learning_rate": 4.25574118604941e-06, "loss": 0.3388, "step": 29154 }, { "epoch": 2.964111427409516, "grad_norm": 0.27168968319892883, "learning_rate": 4.255390255543661e-06, "loss": 0.3605, "step": 29155 }, { "epoch": 2.964213094753965, "grad_norm": 0.2955761253833771, "learning_rate": 4.255039328789077e-06, "loss": 0.3498, "step": 29156 }, { "epoch": 2.964314762098414, "grad_norm": 0.2732859253883362, "learning_rate": 4.254688405787426e-06, "loss": 0.3527, "step": 29157 }, { "epoch": 2.964416429442863, "grad_norm": 0.2885494828224182, "learning_rate": 4.2543374865404815e-06, "loss": 0.3332, "step": 29158 }, { "epoch": 2.964518096787312, "grad_norm": 0.2742868661880493, "learning_rate": 4.253986571050004e-06, "loss": 0.3507, "step": 29159 }, { "epoch": 2.964619764131761, "grad_norm": 0.24681319296360016, "learning_rate": 4.253635659317768e-06, "loss": 0.3061, "step": 29160 }, { "epoch": 2.96472143147621, "grad_norm": 0.2656618058681488, "learning_rate": 4.253284751345536e-06, "loss": 0.3781, "step": 29161 }, { "epoch": 2.9648230988206588, "grad_norm": 0.26422184705734253, "learning_rate": 4.252933847135077e-06, "loss": 0.329, "step": 29162 }, { "epoch": 2.9649247661651077, "grad_norm": 0.26118460297584534, "learning_rate": 4.252582946688164e-06, "loss": 0.3514, "step": 29163 }, { "epoch": 2.9650264335095566, "grad_norm": 0.2534571588039398, "learning_rate": 4.2522320500065574e-06, "loss": 0.3265, "step": 29164 }, { "epoch": 2.9651281008540056, "grad_norm": 0.2636302709579468, "learning_rate": 4.2518811570920285e-06, "loss": 0.356, "step": 29165 }, { "epoch": 2.9652297681984545, "grad_norm": 0.2969488799571991, "learning_rate": 4.251530267946345e-06, "loss": 0.3021, "step": 29166 }, { "epoch": 2.9653314355429035, "grad_norm": 0.2622949481010437, "learning_rate": 4.251179382571271e-06, "loss": 0.3063, "step": 29167 }, { "epoch": 2.965433102887353, "grad_norm": 0.25896257162094116, "learning_rate": 4.250828500968583e-06, "loss": 0.3098, "step": 29168 }, { "epoch": 2.965534770231802, "grad_norm": 0.26002931594848633, "learning_rate": 4.250477623140039e-06, "loss": 0.3376, "step": 29169 }, { "epoch": 2.9656364375762507, "grad_norm": 0.27167338132858276, "learning_rate": 4.250126749087412e-06, "loss": 0.3489, "step": 29170 }, { "epoch": 2.9657381049206997, "grad_norm": 0.266570121049881, "learning_rate": 4.2497758788124676e-06, "loss": 0.3632, "step": 29171 }, { "epoch": 2.9658397722651486, "grad_norm": 0.27596211433410645, "learning_rate": 4.2494250123169725e-06, "loss": 0.366, "step": 29172 }, { "epoch": 2.9659414396095976, "grad_norm": 0.27308565378189087, "learning_rate": 4.2490741496027e-06, "loss": 0.3255, "step": 29173 }, { "epoch": 2.9660431069540465, "grad_norm": 0.27925679087638855, "learning_rate": 4.24872329067141e-06, "loss": 0.343, "step": 29174 }, { "epoch": 2.9661447742984954, "grad_norm": 0.2819390594959259, "learning_rate": 4.248372435524876e-06, "loss": 0.3513, "step": 29175 }, { "epoch": 2.9662464416429444, "grad_norm": 0.27792757749557495, "learning_rate": 4.248021584164861e-06, "loss": 0.3448, "step": 29176 }, { "epoch": 2.9663481089873933, "grad_norm": 0.2643979489803314, "learning_rate": 4.247670736593134e-06, "loss": 0.304, "step": 29177 }, { "epoch": 2.9664497763318423, "grad_norm": 0.24478144943714142, "learning_rate": 4.247319892811466e-06, "loss": 0.3496, "step": 29178 }, { "epoch": 2.966551443676291, "grad_norm": 0.24430273473262787, "learning_rate": 4.246969052821619e-06, "loss": 0.3193, "step": 29179 }, { "epoch": 2.96665311102074, "grad_norm": 0.2664034068584442, "learning_rate": 4.2466182166253646e-06, "loss": 0.3494, "step": 29180 }, { "epoch": 2.966754778365189, "grad_norm": 0.2833525240421295, "learning_rate": 4.246267384224467e-06, "loss": 0.3107, "step": 29181 }, { "epoch": 2.966856445709638, "grad_norm": 0.2626774311065674, "learning_rate": 4.245916555620696e-06, "loss": 0.3215, "step": 29182 }, { "epoch": 2.966958113054087, "grad_norm": 0.26626408100128174, "learning_rate": 4.2455657308158186e-06, "loss": 0.3106, "step": 29183 }, { "epoch": 2.967059780398536, "grad_norm": 0.27981287240982056, "learning_rate": 4.245214909811601e-06, "loss": 0.3726, "step": 29184 }, { "epoch": 2.967161447742985, "grad_norm": 0.26600441336631775, "learning_rate": 4.2448640926098125e-06, "loss": 0.3318, "step": 29185 }, { "epoch": 2.967263115087434, "grad_norm": 0.2588013708591461, "learning_rate": 4.2445132792122175e-06, "loss": 0.3271, "step": 29186 }, { "epoch": 2.9673647824318827, "grad_norm": 0.2816632091999054, "learning_rate": 4.2441624696205855e-06, "loss": 0.3325, "step": 29187 }, { "epoch": 2.9674664497763317, "grad_norm": 0.2897395193576813, "learning_rate": 4.243811663836685e-06, "loss": 0.3366, "step": 29188 }, { "epoch": 2.9675681171207806, "grad_norm": 0.28240689635276794, "learning_rate": 4.24346086186228e-06, "loss": 0.3637, "step": 29189 }, { "epoch": 2.9676697844652296, "grad_norm": 0.26576483249664307, "learning_rate": 4.243110063699141e-06, "loss": 0.3969, "step": 29190 }, { "epoch": 2.9677714518096785, "grad_norm": 0.25711625814437866, "learning_rate": 4.242759269349032e-06, "loss": 0.3199, "step": 29191 }, { "epoch": 2.9678731191541274, "grad_norm": 0.26361942291259766, "learning_rate": 4.2424084788137216e-06, "loss": 0.3003, "step": 29192 }, { "epoch": 2.967974786498577, "grad_norm": 0.27035775780677795, "learning_rate": 4.2420576920949785e-06, "loss": 0.3052, "step": 29193 }, { "epoch": 2.9680764538430258, "grad_norm": 0.2600240409374237, "learning_rate": 4.241706909194568e-06, "loss": 0.3448, "step": 29194 }, { "epoch": 2.9681781211874747, "grad_norm": 0.2669677138328552, "learning_rate": 4.2413561301142585e-06, "loss": 0.3434, "step": 29195 }, { "epoch": 2.9682797885319236, "grad_norm": 0.2617553770542145, "learning_rate": 4.241005354855816e-06, "loss": 0.3343, "step": 29196 }, { "epoch": 2.9683814558763726, "grad_norm": 0.2796649634838104, "learning_rate": 4.2406545834210076e-06, "loss": 0.3369, "step": 29197 }, { "epoch": 2.9684831232208215, "grad_norm": 0.2782529592514038, "learning_rate": 4.240303815811602e-06, "loss": 0.3391, "step": 29198 }, { "epoch": 2.9685847905652705, "grad_norm": 0.2798035442829132, "learning_rate": 4.2399530520293645e-06, "loss": 0.3251, "step": 29199 }, { "epoch": 2.9686864579097194, "grad_norm": 0.24280159175395966, "learning_rate": 4.239602292076065e-06, "loss": 0.3423, "step": 29200 }, { "epoch": 2.9687881252541684, "grad_norm": 0.2805138826370239, "learning_rate": 4.2392515359534655e-06, "loss": 0.396, "step": 29201 }, { "epoch": 2.9688897925986173, "grad_norm": 0.27629026770591736, "learning_rate": 4.238900783663337e-06, "loss": 0.2995, "step": 29202 }, { "epoch": 2.9689914599430662, "grad_norm": 0.2666533589363098, "learning_rate": 4.238550035207446e-06, "loss": 0.3169, "step": 29203 }, { "epoch": 2.969093127287515, "grad_norm": 0.26861608028411865, "learning_rate": 4.238199290587559e-06, "loss": 0.3241, "step": 29204 }, { "epoch": 2.969194794631964, "grad_norm": 0.25213822722435, "learning_rate": 4.237848549805443e-06, "loss": 0.3328, "step": 29205 }, { "epoch": 2.969296461976413, "grad_norm": 0.26299718022346497, "learning_rate": 4.237497812862864e-06, "loss": 0.3198, "step": 29206 }, { "epoch": 2.969398129320862, "grad_norm": 0.2729227840900421, "learning_rate": 4.23714707976159e-06, "loss": 0.3564, "step": 29207 }, { "epoch": 2.969499796665311, "grad_norm": 0.28267601132392883, "learning_rate": 4.236796350503389e-06, "loss": 0.3126, "step": 29208 }, { "epoch": 2.9696014640097603, "grad_norm": 0.27925360202789307, "learning_rate": 4.236445625090024e-06, "loss": 0.3125, "step": 29209 }, { "epoch": 2.9697031313542093, "grad_norm": 0.2754433751106262, "learning_rate": 4.236094903523267e-06, "loss": 0.3374, "step": 29210 }, { "epoch": 2.969804798698658, "grad_norm": 0.26259467005729675, "learning_rate": 4.235744185804882e-06, "loss": 0.3397, "step": 29211 }, { "epoch": 2.969906466043107, "grad_norm": 0.26255202293395996, "learning_rate": 4.235393471936635e-06, "loss": 0.3336, "step": 29212 }, { "epoch": 2.970008133387556, "grad_norm": 0.2700255811214447, "learning_rate": 4.235042761920295e-06, "loss": 0.3465, "step": 29213 }, { "epoch": 2.970109800732005, "grad_norm": 0.271432489156723, "learning_rate": 4.234692055757627e-06, "loss": 0.36, "step": 29214 }, { "epoch": 2.970211468076454, "grad_norm": 0.2514961063861847, "learning_rate": 4.2343413534503994e-06, "loss": 0.32, "step": 29215 }, { "epoch": 2.970313135420903, "grad_norm": 0.29090264439582825, "learning_rate": 4.233990655000377e-06, "loss": 0.3139, "step": 29216 }, { "epoch": 2.970414802765352, "grad_norm": 0.26614823937416077, "learning_rate": 4.23363996040933e-06, "loss": 0.3382, "step": 29217 }, { "epoch": 2.970516470109801, "grad_norm": 0.2595229744911194, "learning_rate": 4.23328926967902e-06, "loss": 0.3447, "step": 29218 }, { "epoch": 2.9706181374542497, "grad_norm": 0.28270775079727173, "learning_rate": 4.232938582811217e-06, "loss": 0.3267, "step": 29219 }, { "epoch": 2.9707198047986987, "grad_norm": 0.26183679699897766, "learning_rate": 4.2325878998076885e-06, "loss": 0.3275, "step": 29220 }, { "epoch": 2.9708214721431476, "grad_norm": 0.2649582028388977, "learning_rate": 4.2322372206701985e-06, "loss": 0.3166, "step": 29221 }, { "epoch": 2.9709231394875966, "grad_norm": 0.26886293292045593, "learning_rate": 4.231886545400517e-06, "loss": 0.3147, "step": 29222 }, { "epoch": 2.9710248068320455, "grad_norm": 0.2711668908596039, "learning_rate": 4.231535874000406e-06, "loss": 0.337, "step": 29223 }, { "epoch": 2.9711264741764944, "grad_norm": 0.2717337906360626, "learning_rate": 4.231185206471635e-06, "loss": 0.3581, "step": 29224 }, { "epoch": 2.9712281415209434, "grad_norm": 0.26184821128845215, "learning_rate": 4.230834542815972e-06, "loss": 0.3247, "step": 29225 }, { "epoch": 2.9713298088653923, "grad_norm": 0.2782864570617676, "learning_rate": 4.23048388303518e-06, "loss": 0.3511, "step": 29226 }, { "epoch": 2.9714314762098413, "grad_norm": 0.2615368366241455, "learning_rate": 4.230133227131029e-06, "loss": 0.3277, "step": 29227 }, { "epoch": 2.97153314355429, "grad_norm": 0.2536311745643616, "learning_rate": 4.2297825751052825e-06, "loss": 0.3344, "step": 29228 }, { "epoch": 2.971634810898739, "grad_norm": 0.2788231670856476, "learning_rate": 4.229431926959708e-06, "loss": 0.3022, "step": 29229 }, { "epoch": 2.971736478243188, "grad_norm": 0.2553127706050873, "learning_rate": 4.229081282696073e-06, "loss": 0.339, "step": 29230 }, { "epoch": 2.971838145587637, "grad_norm": 0.2621442675590515, "learning_rate": 4.228730642316144e-06, "loss": 0.3642, "step": 29231 }, { "epoch": 2.971939812932086, "grad_norm": 0.27649107575416565, "learning_rate": 4.228380005821687e-06, "loss": 0.353, "step": 29232 }, { "epoch": 2.972041480276535, "grad_norm": 0.26273900270462036, "learning_rate": 4.228029373214467e-06, "loss": 0.3332, "step": 29233 }, { "epoch": 2.9721431476209843, "grad_norm": 0.27441370487213135, "learning_rate": 4.2276787444962505e-06, "loss": 0.3286, "step": 29234 }, { "epoch": 2.9722448149654332, "grad_norm": 0.27645617723464966, "learning_rate": 4.227328119668807e-06, "loss": 0.3356, "step": 29235 }, { "epoch": 2.972346482309882, "grad_norm": 0.2657455503940582, "learning_rate": 4.2269774987339005e-06, "loss": 0.3326, "step": 29236 }, { "epoch": 2.972448149654331, "grad_norm": 0.2627449631690979, "learning_rate": 4.226626881693297e-06, "loss": 0.3034, "step": 29237 }, { "epoch": 2.97254981699878, "grad_norm": 0.30194056034088135, "learning_rate": 4.226276268548764e-06, "loss": 0.3115, "step": 29238 }, { "epoch": 2.972651484343229, "grad_norm": 0.28703275322914124, "learning_rate": 4.225925659302067e-06, "loss": 0.3051, "step": 29239 }, { "epoch": 2.972753151687678, "grad_norm": 0.2782348692417145, "learning_rate": 4.225575053954973e-06, "loss": 0.3421, "step": 29240 }, { "epoch": 2.972854819032127, "grad_norm": 0.24713779985904694, "learning_rate": 4.225224452509246e-06, "loss": 0.3613, "step": 29241 }, { "epoch": 2.972956486376576, "grad_norm": 0.28747713565826416, "learning_rate": 4.224873854966657e-06, "loss": 0.3327, "step": 29242 }, { "epoch": 2.9730581537210248, "grad_norm": 0.25902339816093445, "learning_rate": 4.224523261328968e-06, "loss": 0.353, "step": 29243 }, { "epoch": 2.9731598210654737, "grad_norm": 0.249189555644989, "learning_rate": 4.224172671597946e-06, "loss": 0.3549, "step": 29244 }, { "epoch": 2.9732614884099227, "grad_norm": 0.2552030086517334, "learning_rate": 4.223822085775358e-06, "loss": 0.3272, "step": 29245 }, { "epoch": 2.9733631557543716, "grad_norm": 0.27138182520866394, "learning_rate": 4.223471503862971e-06, "loss": 0.3388, "step": 29246 }, { "epoch": 2.9734648230988205, "grad_norm": 0.24164053797721863, "learning_rate": 4.223120925862549e-06, "loss": 0.3366, "step": 29247 }, { "epoch": 2.9735664904432695, "grad_norm": 0.27568358182907104, "learning_rate": 4.222770351775859e-06, "loss": 0.3131, "step": 29248 }, { "epoch": 2.9736681577877184, "grad_norm": 0.2832760810852051, "learning_rate": 4.222419781604666e-06, "loss": 0.344, "step": 29249 }, { "epoch": 2.973769825132168, "grad_norm": 0.26227694749832153, "learning_rate": 4.222069215350741e-06, "loss": 0.3085, "step": 29250 }, { "epoch": 2.9738714924766168, "grad_norm": 0.26639920473098755, "learning_rate": 4.2217186530158424e-06, "loss": 0.3165, "step": 29251 }, { "epoch": 2.9739731598210657, "grad_norm": 0.26676517724990845, "learning_rate": 4.221368094601743e-06, "loss": 0.337, "step": 29252 }, { "epoch": 2.9740748271655146, "grad_norm": 0.2590191662311554, "learning_rate": 4.221017540110205e-06, "loss": 0.3343, "step": 29253 }, { "epoch": 2.9741764945099636, "grad_norm": 0.2660309672355652, "learning_rate": 4.220666989542995e-06, "loss": 0.3198, "step": 29254 }, { "epoch": 2.9742781618544125, "grad_norm": 0.25557905435562134, "learning_rate": 4.220316442901882e-06, "loss": 0.3209, "step": 29255 }, { "epoch": 2.9743798291988615, "grad_norm": 0.28413066267967224, "learning_rate": 4.219965900188626e-06, "loss": 0.3401, "step": 29256 }, { "epoch": 2.9744814965433104, "grad_norm": 0.2774014174938202, "learning_rate": 4.219615361405001e-06, "loss": 0.3222, "step": 29257 }, { "epoch": 2.9745831638877593, "grad_norm": 0.2500174045562744, "learning_rate": 4.219264826552763e-06, "loss": 0.3421, "step": 29258 }, { "epoch": 2.9746848312322083, "grad_norm": 0.25356507301330566, "learning_rate": 4.218914295633686e-06, "loss": 0.3352, "step": 29259 }, { "epoch": 2.9747864985766572, "grad_norm": 0.2704644501209259, "learning_rate": 4.218563768649534e-06, "loss": 0.3196, "step": 29260 }, { "epoch": 2.974888165921106, "grad_norm": 0.26374882459640503, "learning_rate": 4.21821324560207e-06, "loss": 0.3433, "step": 29261 }, { "epoch": 2.974989833265555, "grad_norm": 0.2819225490093231, "learning_rate": 4.217862726493065e-06, "loss": 0.3426, "step": 29262 }, { "epoch": 2.975091500610004, "grad_norm": 0.2556472718715668, "learning_rate": 4.217512211324278e-06, "loss": 0.3281, "step": 29263 }, { "epoch": 2.975193167954453, "grad_norm": 0.2629229724407196, "learning_rate": 4.21716170009748e-06, "loss": 0.3323, "step": 29264 }, { "epoch": 2.975294835298902, "grad_norm": 0.27116698026657104, "learning_rate": 4.2168111928144365e-06, "loss": 0.3187, "step": 29265 }, { "epoch": 2.975396502643351, "grad_norm": 0.3199075758457184, "learning_rate": 4.21646068947691e-06, "loss": 0.3405, "step": 29266 }, { "epoch": 2.9754981699878, "grad_norm": 0.2798752188682556, "learning_rate": 4.216110190086672e-06, "loss": 0.3344, "step": 29267 }, { "epoch": 2.9755998373322488, "grad_norm": 0.28053954243659973, "learning_rate": 4.215759694645481e-06, "loss": 0.3414, "step": 29268 }, { "epoch": 2.9757015046766977, "grad_norm": 0.25886449217796326, "learning_rate": 4.215409203155106e-06, "loss": 0.3569, "step": 29269 }, { "epoch": 2.9758031720211466, "grad_norm": 0.26973509788513184, "learning_rate": 4.215058715617315e-06, "loss": 0.3076, "step": 29270 }, { "epoch": 2.9759048393655956, "grad_norm": 0.2625274360179901, "learning_rate": 4.21470823203387e-06, "loss": 0.2989, "step": 29271 }, { "epoch": 2.9760065067100445, "grad_norm": 0.2633926272392273, "learning_rate": 4.214357752406542e-06, "loss": 0.3204, "step": 29272 }, { "epoch": 2.9761081740544935, "grad_norm": 0.26272720098495483, "learning_rate": 4.2140072767370884e-06, "loss": 0.305, "step": 29273 }, { "epoch": 2.9762098413989424, "grad_norm": 0.2685689330101013, "learning_rate": 4.213656805027281e-06, "loss": 0.3004, "step": 29274 }, { "epoch": 2.976311508743392, "grad_norm": 0.2646779716014862, "learning_rate": 4.213306337278885e-06, "loss": 0.3053, "step": 29275 }, { "epoch": 2.9764131760878407, "grad_norm": 0.2759847342967987, "learning_rate": 4.212955873493662e-06, "loss": 0.3192, "step": 29276 }, { "epoch": 2.9765148434322897, "grad_norm": 0.26978248357772827, "learning_rate": 4.212605413673384e-06, "loss": 0.3391, "step": 29277 }, { "epoch": 2.9766165107767386, "grad_norm": 0.29102280735969543, "learning_rate": 4.212254957819809e-06, "loss": 0.3263, "step": 29278 }, { "epoch": 2.9767181781211876, "grad_norm": 0.2477715164422989, "learning_rate": 4.211904505934708e-06, "loss": 0.3399, "step": 29279 }, { "epoch": 2.9768198454656365, "grad_norm": 0.2581198811531067, "learning_rate": 4.2115540580198465e-06, "loss": 0.3285, "step": 29280 }, { "epoch": 2.9769215128100854, "grad_norm": 0.271563857793808, "learning_rate": 4.211203614076985e-06, "loss": 0.338, "step": 29281 }, { "epoch": 2.9770231801545344, "grad_norm": 0.268575519323349, "learning_rate": 4.210853174107896e-06, "loss": 0.3422, "step": 29282 }, { "epoch": 2.9771248474989833, "grad_norm": 0.2758171260356903, "learning_rate": 4.210502738114338e-06, "loss": 0.3609, "step": 29283 }, { "epoch": 2.9772265148434323, "grad_norm": 0.2560630738735199, "learning_rate": 4.21015230609808e-06, "loss": 0.325, "step": 29284 }, { "epoch": 2.977328182187881, "grad_norm": 0.2370014786720276, "learning_rate": 4.20980187806089e-06, "loss": 0.3284, "step": 29285 }, { "epoch": 2.97742984953233, "grad_norm": 0.27560365200042725, "learning_rate": 4.209451454004526e-06, "loss": 0.3376, "step": 29286 }, { "epoch": 2.977531516876779, "grad_norm": 0.2513009309768677, "learning_rate": 4.209101033930761e-06, "loss": 0.3245, "step": 29287 }, { "epoch": 2.977633184221228, "grad_norm": 0.26310014724731445, "learning_rate": 4.208750617841355e-06, "loss": 0.309, "step": 29288 }, { "epoch": 2.977734851565677, "grad_norm": 0.26631462574005127, "learning_rate": 4.208400205738076e-06, "loss": 0.3278, "step": 29289 }, { "epoch": 2.977836518910126, "grad_norm": 0.26864850521087646, "learning_rate": 4.20804979762269e-06, "loss": 0.3214, "step": 29290 }, { "epoch": 2.9779381862545753, "grad_norm": 0.25376325845718384, "learning_rate": 4.207699393496959e-06, "loss": 0.3407, "step": 29291 }, { "epoch": 2.9780398535990242, "grad_norm": 0.28097963333129883, "learning_rate": 4.207348993362652e-06, "loss": 0.3045, "step": 29292 }, { "epoch": 2.978141520943473, "grad_norm": 0.26978862285614014, "learning_rate": 4.206998597221531e-06, "loss": 0.3058, "step": 29293 }, { "epoch": 2.978243188287922, "grad_norm": 0.2723196744918823, "learning_rate": 4.206648205075361e-06, "loss": 0.3396, "step": 29294 }, { "epoch": 2.978344855632371, "grad_norm": 0.28341829776763916, "learning_rate": 4.2062978169259125e-06, "loss": 0.3253, "step": 29295 }, { "epoch": 2.97844652297682, "grad_norm": 0.2809285521507263, "learning_rate": 4.205947432774944e-06, "loss": 0.3441, "step": 29296 }, { "epoch": 2.978548190321269, "grad_norm": 0.2512826919555664, "learning_rate": 4.205597052624227e-06, "loss": 0.3162, "step": 29297 }, { "epoch": 2.978649857665718, "grad_norm": 0.2649478018283844, "learning_rate": 4.20524667647552e-06, "loss": 0.346, "step": 29298 }, { "epoch": 2.978751525010167, "grad_norm": 0.2541739344596863, "learning_rate": 4.204896304330591e-06, "loss": 0.3307, "step": 29299 }, { "epoch": 2.9788531923546158, "grad_norm": 0.27322834730148315, "learning_rate": 4.204545936191209e-06, "loss": 0.3501, "step": 29300 }, { "epoch": 2.9789548596990647, "grad_norm": 0.2777858078479767, "learning_rate": 4.204195572059132e-06, "loss": 0.3126, "step": 29301 }, { "epoch": 2.9790565270435136, "grad_norm": 0.2954893708229065, "learning_rate": 4.203845211936132e-06, "loss": 0.3011, "step": 29302 }, { "epoch": 2.9791581943879626, "grad_norm": 0.26948121190071106, "learning_rate": 4.203494855823968e-06, "loss": 0.316, "step": 29303 }, { "epoch": 2.9792598617324115, "grad_norm": 0.2735348641872406, "learning_rate": 4.203144503724407e-06, "loss": 0.3374, "step": 29304 }, { "epoch": 2.9793615290768605, "grad_norm": 0.2682128846645355, "learning_rate": 4.202794155639218e-06, "loss": 0.3376, "step": 29305 }, { "epoch": 2.9794631964213094, "grad_norm": 0.31071749329566956, "learning_rate": 4.2024438115701595e-06, "loss": 0.3307, "step": 29306 }, { "epoch": 2.9795648637657584, "grad_norm": 0.269761323928833, "learning_rate": 4.202093471519002e-06, "loss": 0.3621, "step": 29307 }, { "epoch": 2.9796665311102073, "grad_norm": 0.25759050250053406, "learning_rate": 4.201743135487507e-06, "loss": 0.3171, "step": 29308 }, { "epoch": 2.9797681984546562, "grad_norm": 0.26773351430892944, "learning_rate": 4.201392803477437e-06, "loss": 0.3257, "step": 29309 }, { "epoch": 2.979869865799105, "grad_norm": 0.255928635597229, "learning_rate": 4.201042475490566e-06, "loss": 0.3316, "step": 29310 }, { "epoch": 2.979971533143554, "grad_norm": 0.2759833037853241, "learning_rate": 4.200692151528649e-06, "loss": 0.3253, "step": 29311 }, { "epoch": 2.980073200488003, "grad_norm": 0.275211900472641, "learning_rate": 4.200341831593458e-06, "loss": 0.3069, "step": 29312 }, { "epoch": 2.980174867832452, "grad_norm": 0.2651907801628113, "learning_rate": 4.199991515686753e-06, "loss": 0.348, "step": 29313 }, { "epoch": 2.980276535176901, "grad_norm": 0.25991931557655334, "learning_rate": 4.1996412038103e-06, "loss": 0.3599, "step": 29314 }, { "epoch": 2.98037820252135, "grad_norm": 0.27502161264419556, "learning_rate": 4.199290895965866e-06, "loss": 0.3418, "step": 29315 }, { "epoch": 2.9804798698657993, "grad_norm": 0.2616165280342102, "learning_rate": 4.198940592155214e-06, "loss": 0.3518, "step": 29316 }, { "epoch": 2.980581537210248, "grad_norm": 0.2734721601009369, "learning_rate": 4.1985902923801084e-06, "loss": 0.3062, "step": 29317 }, { "epoch": 2.980683204554697, "grad_norm": 0.26576340198516846, "learning_rate": 4.198239996642315e-06, "loss": 0.3143, "step": 29318 }, { "epoch": 2.980784871899146, "grad_norm": 0.2614213824272156, "learning_rate": 4.1978897049435955e-06, "loss": 0.3374, "step": 29319 }, { "epoch": 2.980886539243595, "grad_norm": 0.2722143530845642, "learning_rate": 4.1975394172857205e-06, "loss": 0.3204, "step": 29320 }, { "epoch": 2.980988206588044, "grad_norm": 0.2919231653213501, "learning_rate": 4.19718913367045e-06, "loss": 0.3138, "step": 29321 }, { "epoch": 2.981089873932493, "grad_norm": 0.2609103322029114, "learning_rate": 4.19683885409955e-06, "loss": 0.3416, "step": 29322 }, { "epoch": 2.981191541276942, "grad_norm": 0.28012239933013916, "learning_rate": 4.196488578574784e-06, "loss": 0.3528, "step": 29323 }, { "epoch": 2.981293208621391, "grad_norm": 0.2934015691280365, "learning_rate": 4.196138307097917e-06, "loss": 0.3117, "step": 29324 }, { "epoch": 2.9813948759658397, "grad_norm": 0.23161081969738007, "learning_rate": 4.195788039670717e-06, "loss": 0.3131, "step": 29325 }, { "epoch": 2.9814965433102887, "grad_norm": 0.2757977545261383, "learning_rate": 4.195437776294944e-06, "loss": 0.3412, "step": 29326 }, { "epoch": 2.9815982106547376, "grad_norm": 0.2761210501194, "learning_rate": 4.195087516972365e-06, "loss": 0.3144, "step": 29327 }, { "epoch": 2.9816998779991866, "grad_norm": 0.27457359433174133, "learning_rate": 4.194737261704743e-06, "loss": 0.3257, "step": 29328 }, { "epoch": 2.9818015453436355, "grad_norm": 0.2748703062534332, "learning_rate": 4.194387010493842e-06, "loss": 0.3208, "step": 29329 }, { "epoch": 2.9819032126880844, "grad_norm": 0.2799684703350067, "learning_rate": 4.194036763341431e-06, "loss": 0.3498, "step": 29330 }, { "epoch": 2.9820048800325334, "grad_norm": 0.28445667028427124, "learning_rate": 4.1936865202492695e-06, "loss": 0.3066, "step": 29331 }, { "epoch": 2.9821065473769828, "grad_norm": 0.28737741708755493, "learning_rate": 4.193336281219124e-06, "loss": 0.3385, "step": 29332 }, { "epoch": 2.9822082147214317, "grad_norm": 0.2861056625843048, "learning_rate": 4.192986046252758e-06, "loss": 0.3716, "step": 29333 }, { "epoch": 2.9823098820658807, "grad_norm": 0.2685370147228241, "learning_rate": 4.192635815351937e-06, "loss": 0.3438, "step": 29334 }, { "epoch": 2.9824115494103296, "grad_norm": 0.276180237531662, "learning_rate": 4.192285588518425e-06, "loss": 0.3729, "step": 29335 }, { "epoch": 2.9825132167547785, "grad_norm": 0.2695278227329254, "learning_rate": 4.191935365753986e-06, "loss": 0.3298, "step": 29336 }, { "epoch": 2.9826148840992275, "grad_norm": 0.2737390100955963, "learning_rate": 4.191585147060386e-06, "loss": 0.3432, "step": 29337 }, { "epoch": 2.9827165514436764, "grad_norm": 0.27966436743736267, "learning_rate": 4.1912349324393855e-06, "loss": 0.34, "step": 29338 }, { "epoch": 2.9828182187881254, "grad_norm": 0.276134729385376, "learning_rate": 4.190884721892752e-06, "loss": 0.3058, "step": 29339 }, { "epoch": 2.9829198861325743, "grad_norm": 0.2591608762741089, "learning_rate": 4.19053451542225e-06, "loss": 0.3141, "step": 29340 }, { "epoch": 2.9830215534770232, "grad_norm": 0.2740788161754608, "learning_rate": 4.190184313029642e-06, "loss": 0.3212, "step": 29341 }, { "epoch": 2.983123220821472, "grad_norm": 0.26665645837783813, "learning_rate": 4.189834114716694e-06, "loss": 0.3269, "step": 29342 }, { "epoch": 2.983224888165921, "grad_norm": 0.2515581548213959, "learning_rate": 4.189483920485168e-06, "loss": 0.3353, "step": 29343 }, { "epoch": 2.98332655551037, "grad_norm": 0.25553351640701294, "learning_rate": 4.189133730336829e-06, "loss": 0.299, "step": 29344 }, { "epoch": 2.983428222854819, "grad_norm": 0.27105075120925903, "learning_rate": 4.1887835442734435e-06, "loss": 0.3293, "step": 29345 }, { "epoch": 2.983529890199268, "grad_norm": 0.27180197834968567, "learning_rate": 4.1884333622967725e-06, "loss": 0.3572, "step": 29346 }, { "epoch": 2.983631557543717, "grad_norm": 0.2672429382801056, "learning_rate": 4.188083184408581e-06, "loss": 0.3509, "step": 29347 }, { "epoch": 2.983733224888166, "grad_norm": 0.26929759979248047, "learning_rate": 4.187733010610634e-06, "loss": 0.3473, "step": 29348 }, { "epoch": 2.9838348922326148, "grad_norm": 0.2627240717411041, "learning_rate": 4.187382840904694e-06, "loss": 0.3048, "step": 29349 }, { "epoch": 2.9839365595770637, "grad_norm": 0.2528021037578583, "learning_rate": 4.187032675292529e-06, "loss": 0.3104, "step": 29350 }, { "epoch": 2.9840382269215127, "grad_norm": 0.28167757391929626, "learning_rate": 4.186682513775897e-06, "loss": 0.3773, "step": 29351 }, { "epoch": 2.9841398942659616, "grad_norm": 0.37165743112564087, "learning_rate": 4.186332356356567e-06, "loss": 0.3962, "step": 29352 }, { "epoch": 2.9842415616104105, "grad_norm": 0.258976548910141, "learning_rate": 4.185982203036301e-06, "loss": 0.3282, "step": 29353 }, { "epoch": 2.9843432289548595, "grad_norm": 0.24060989916324615, "learning_rate": 4.185632053816862e-06, "loss": 0.3239, "step": 29354 }, { "epoch": 2.9844448962993084, "grad_norm": 0.24776646494865417, "learning_rate": 4.185281908700017e-06, "loss": 0.332, "step": 29355 }, { "epoch": 2.9845465636437574, "grad_norm": 0.25981205701828003, "learning_rate": 4.1849317676875266e-06, "loss": 0.2938, "step": 29356 }, { "epoch": 2.9846482309882068, "grad_norm": 0.27127617597579956, "learning_rate": 4.184581630781158e-06, "loss": 0.3064, "step": 29357 }, { "epoch": 2.9847498983326557, "grad_norm": 0.2683664858341217, "learning_rate": 4.184231497982672e-06, "loss": 0.3303, "step": 29358 }, { "epoch": 2.9848515656771046, "grad_norm": 0.24608245491981506, "learning_rate": 4.1838813692938336e-06, "loss": 0.3131, "step": 29359 }, { "epoch": 2.9849532330215536, "grad_norm": 0.25128453969955444, "learning_rate": 4.183531244716408e-06, "loss": 0.3463, "step": 29360 }, { "epoch": 2.9850549003660025, "grad_norm": 0.26105281710624695, "learning_rate": 4.183181124252157e-06, "loss": 0.3313, "step": 29361 }, { "epoch": 2.9851565677104515, "grad_norm": 0.27161988615989685, "learning_rate": 4.182831007902847e-06, "loss": 0.3223, "step": 29362 }, { "epoch": 2.9852582350549004, "grad_norm": 0.2658177316188812, "learning_rate": 4.182480895670238e-06, "loss": 0.3215, "step": 29363 }, { "epoch": 2.9853599023993493, "grad_norm": 0.2762504518032074, "learning_rate": 4.182130787556097e-06, "loss": 0.3664, "step": 29364 }, { "epoch": 2.9854615697437983, "grad_norm": 0.24815131723880768, "learning_rate": 4.1817806835621875e-06, "loss": 0.314, "step": 29365 }, { "epoch": 2.9855632370882472, "grad_norm": 0.27574336528778076, "learning_rate": 4.181430583690271e-06, "loss": 0.3005, "step": 29366 }, { "epoch": 2.985664904432696, "grad_norm": 0.259518563747406, "learning_rate": 4.181080487942114e-06, "loss": 0.3225, "step": 29367 }, { "epoch": 2.985766571777145, "grad_norm": 0.26234671473503113, "learning_rate": 4.180730396319478e-06, "loss": 0.3339, "step": 29368 }, { "epoch": 2.985868239121594, "grad_norm": 0.25206178426742554, "learning_rate": 4.1803803088241274e-06, "loss": 0.3479, "step": 29369 }, { "epoch": 2.985969906466043, "grad_norm": 0.2513401210308075, "learning_rate": 4.180030225457828e-06, "loss": 0.3363, "step": 29370 }, { "epoch": 2.986071573810492, "grad_norm": 0.267970472574234, "learning_rate": 4.179680146222339e-06, "loss": 0.3316, "step": 29371 }, { "epoch": 2.986173241154941, "grad_norm": 0.2691503167152405, "learning_rate": 4.179330071119428e-06, "loss": 0.3457, "step": 29372 }, { "epoch": 2.9862749084993903, "grad_norm": 0.2640065848827362, "learning_rate": 4.178980000150856e-06, "loss": 0.3633, "step": 29373 }, { "epoch": 2.986376575843839, "grad_norm": 0.2858096957206726, "learning_rate": 4.178629933318388e-06, "loss": 0.345, "step": 29374 }, { "epoch": 2.986478243188288, "grad_norm": 0.2625221908092499, "learning_rate": 4.1782798706237884e-06, "loss": 0.333, "step": 29375 }, { "epoch": 2.986579910532737, "grad_norm": 0.291801393032074, "learning_rate": 4.177929812068818e-06, "loss": 0.3211, "step": 29376 }, { "epoch": 2.986681577877186, "grad_norm": 0.2569257318973541, "learning_rate": 4.177579757655243e-06, "loss": 0.3381, "step": 29377 }, { "epoch": 2.986783245221635, "grad_norm": 0.2763083875179291, "learning_rate": 4.177229707384825e-06, "loss": 0.3178, "step": 29378 }, { "epoch": 2.986884912566084, "grad_norm": 0.26076874136924744, "learning_rate": 4.176879661259328e-06, "loss": 0.3098, "step": 29379 }, { "epoch": 2.986986579910533, "grad_norm": 0.28715386986732483, "learning_rate": 4.176529619280516e-06, "loss": 0.3414, "step": 29380 }, { "epoch": 2.987088247254982, "grad_norm": 0.273128867149353, "learning_rate": 4.176179581450152e-06, "loss": 0.34, "step": 29381 }, { "epoch": 2.9871899145994307, "grad_norm": 0.2786627411842346, "learning_rate": 4.175829547770001e-06, "loss": 0.3332, "step": 29382 }, { "epoch": 2.9872915819438797, "grad_norm": 0.271495521068573, "learning_rate": 4.1754795182418225e-06, "loss": 0.3037, "step": 29383 }, { "epoch": 2.9873932492883286, "grad_norm": 0.26388493180274963, "learning_rate": 4.175129492867383e-06, "loss": 0.2938, "step": 29384 }, { "epoch": 2.9874949166327776, "grad_norm": 0.2744564414024353, "learning_rate": 4.174779471648446e-06, "loss": 0.3218, "step": 29385 }, { "epoch": 2.9875965839772265, "grad_norm": 0.2676892876625061, "learning_rate": 4.1744294545867725e-06, "loss": 0.3131, "step": 29386 }, { "epoch": 2.9876982513216754, "grad_norm": 0.26215294003486633, "learning_rate": 4.174079441684129e-06, "loss": 0.3593, "step": 29387 }, { "epoch": 2.9877999186661244, "grad_norm": 0.25977039337158203, "learning_rate": 4.1737294329422754e-06, "loss": 0.3199, "step": 29388 }, { "epoch": 2.9879015860105733, "grad_norm": 0.25176119804382324, "learning_rate": 4.1733794283629765e-06, "loss": 0.3256, "step": 29389 }, { "epoch": 2.9880032533550223, "grad_norm": 0.2902168333530426, "learning_rate": 4.173029427947997e-06, "loss": 0.3387, "step": 29390 }, { "epoch": 2.988104920699471, "grad_norm": 0.28377777338027954, "learning_rate": 4.172679431699098e-06, "loss": 0.3175, "step": 29391 }, { "epoch": 2.98820658804392, "grad_norm": 0.2535376250743866, "learning_rate": 4.172329439618044e-06, "loss": 0.3297, "step": 29392 }, { "epoch": 2.988308255388369, "grad_norm": 0.25552698969841003, "learning_rate": 4.171979451706597e-06, "loss": 0.3483, "step": 29393 }, { "epoch": 2.988409922732818, "grad_norm": 0.2629719078540802, "learning_rate": 4.171629467966521e-06, "loss": 0.3416, "step": 29394 }, { "epoch": 2.988511590077267, "grad_norm": 0.2808475196361542, "learning_rate": 4.171279488399578e-06, "loss": 0.323, "step": 29395 }, { "epoch": 2.988613257421716, "grad_norm": 0.2814679741859436, "learning_rate": 4.170929513007533e-06, "loss": 0.3299, "step": 29396 }, { "epoch": 2.988714924766165, "grad_norm": 0.2506479024887085, "learning_rate": 4.170579541792148e-06, "loss": 0.3103, "step": 29397 }, { "epoch": 2.9888165921106142, "grad_norm": 0.2791460454463959, "learning_rate": 4.170229574755186e-06, "loss": 0.3326, "step": 29398 }, { "epoch": 2.988918259455063, "grad_norm": 0.24891513586044312, "learning_rate": 4.16987961189841e-06, "loss": 0.3384, "step": 29399 }, { "epoch": 2.989019926799512, "grad_norm": 0.2684652507305145, "learning_rate": 4.169529653223583e-06, "loss": 0.3212, "step": 29400 }, { "epoch": 2.989121594143961, "grad_norm": 0.2793971598148346, "learning_rate": 4.169179698732468e-06, "loss": 0.3082, "step": 29401 }, { "epoch": 2.98922326148841, "grad_norm": 0.25996634364128113, "learning_rate": 4.168829748426831e-06, "loss": 0.3587, "step": 29402 }, { "epoch": 2.989324928832859, "grad_norm": 0.2784494161605835, "learning_rate": 4.168479802308429e-06, "loss": 0.3277, "step": 29403 }, { "epoch": 2.989426596177308, "grad_norm": 0.24946489930152893, "learning_rate": 4.16812986037903e-06, "loss": 0.3384, "step": 29404 }, { "epoch": 2.989528263521757, "grad_norm": 0.27073314785957336, "learning_rate": 4.167779922640393e-06, "loss": 0.3605, "step": 29405 }, { "epoch": 2.9896299308662058, "grad_norm": 0.28168439865112305, "learning_rate": 4.1674299890942845e-06, "loss": 0.3228, "step": 29406 }, { "epoch": 2.9897315982106547, "grad_norm": 0.2749080955982208, "learning_rate": 4.167080059742467e-06, "loss": 0.3383, "step": 29407 }, { "epoch": 2.9898332655551036, "grad_norm": 0.28202947974205017, "learning_rate": 4.1667301345867004e-06, "loss": 0.3045, "step": 29408 }, { "epoch": 2.9899349328995526, "grad_norm": 0.2591831684112549, "learning_rate": 4.1663802136287514e-06, "loss": 0.3161, "step": 29409 }, { "epoch": 2.9900366002440015, "grad_norm": 0.23876526951789856, "learning_rate": 4.166030296870377e-06, "loss": 0.3241, "step": 29410 }, { "epoch": 2.9901382675884505, "grad_norm": 0.2698654532432556, "learning_rate": 4.165680384313346e-06, "loss": 0.3156, "step": 29411 }, { "epoch": 2.9902399349328994, "grad_norm": 0.28612956404685974, "learning_rate": 4.16533047595942e-06, "loss": 0.3313, "step": 29412 }, { "epoch": 2.9903416022773484, "grad_norm": 0.2521039843559265, "learning_rate": 4.164980571810359e-06, "loss": 0.3365, "step": 29413 }, { "epoch": 2.9904432696217977, "grad_norm": 0.2819853127002716, "learning_rate": 4.16463067186793e-06, "loss": 0.3093, "step": 29414 }, { "epoch": 2.9905449369662467, "grad_norm": 0.25495001673698425, "learning_rate": 4.16428077613389e-06, "loss": 0.3188, "step": 29415 }, { "epoch": 2.9906466043106956, "grad_norm": 0.26261040568351746, "learning_rate": 4.163930884610006e-06, "loss": 0.3243, "step": 29416 }, { "epoch": 2.9907482716551446, "grad_norm": 0.25760617852211, "learning_rate": 4.16358099729804e-06, "loss": 0.3268, "step": 29417 }, { "epoch": 2.9908499389995935, "grad_norm": 0.26927173137664795, "learning_rate": 4.163231114199754e-06, "loss": 0.3519, "step": 29418 }, { "epoch": 2.9909516063440424, "grad_norm": 0.2691362500190735, "learning_rate": 4.1628812353169125e-06, "loss": 0.3178, "step": 29419 }, { "epoch": 2.9910532736884914, "grad_norm": 0.2781725227832794, "learning_rate": 4.162531360651273e-06, "loss": 0.3522, "step": 29420 }, { "epoch": 2.9911549410329403, "grad_norm": 0.2727411985397339, "learning_rate": 4.162181490204603e-06, "loss": 0.362, "step": 29421 }, { "epoch": 2.9912566083773893, "grad_norm": 0.26967653632164, "learning_rate": 4.161831623978664e-06, "loss": 0.3397, "step": 29422 }, { "epoch": 2.991358275721838, "grad_norm": 0.29474738240242004, "learning_rate": 4.161481761975217e-06, "loss": 0.3337, "step": 29423 }, { "epoch": 2.991459943066287, "grad_norm": 0.2578471601009369, "learning_rate": 4.161131904196028e-06, "loss": 0.3143, "step": 29424 }, { "epoch": 2.991561610410736, "grad_norm": 0.2919696867465973, "learning_rate": 4.160782050642854e-06, "loss": 0.2905, "step": 29425 }, { "epoch": 2.991663277755185, "grad_norm": 0.268694132566452, "learning_rate": 4.160432201317462e-06, "loss": 0.3403, "step": 29426 }, { "epoch": 2.991764945099634, "grad_norm": 0.26022234559059143, "learning_rate": 4.1600823562216145e-06, "loss": 0.3402, "step": 29427 }, { "epoch": 2.991866612444083, "grad_norm": 0.2670893669128418, "learning_rate": 4.15973251535707e-06, "loss": 0.2983, "step": 29428 }, { "epoch": 2.991968279788532, "grad_norm": 0.26804491877555847, "learning_rate": 4.159382678725597e-06, "loss": 0.3292, "step": 29429 }, { "epoch": 2.992069947132981, "grad_norm": 0.2719561457633972, "learning_rate": 4.15903284632895e-06, "loss": 0.3459, "step": 29430 }, { "epoch": 2.9921716144774297, "grad_norm": 0.2724732458591461, "learning_rate": 4.158683018168896e-06, "loss": 0.3106, "step": 29431 }, { "epoch": 2.9922732818218787, "grad_norm": 0.26155561208724976, "learning_rate": 4.158333194247201e-06, "loss": 0.3237, "step": 29432 }, { "epoch": 2.9923749491663276, "grad_norm": 0.24302920699119568, "learning_rate": 4.15798337456562e-06, "loss": 0.3081, "step": 29433 }, { "epoch": 2.9924766165107766, "grad_norm": 0.2732633650302887, "learning_rate": 4.157633559125921e-06, "loss": 0.326, "step": 29434 }, { "epoch": 2.9925782838552255, "grad_norm": 0.2665269672870636, "learning_rate": 4.157283747929861e-06, "loss": 0.3141, "step": 29435 }, { "epoch": 2.9926799511996744, "grad_norm": 0.25040876865386963, "learning_rate": 4.156933940979206e-06, "loss": 0.3554, "step": 29436 }, { "epoch": 2.9927816185441234, "grad_norm": 0.2627262771129608, "learning_rate": 4.15658413827572e-06, "loss": 0.2954, "step": 29437 }, { "epoch": 2.9928832858885723, "grad_norm": 0.28678858280181885, "learning_rate": 4.15623433982116e-06, "loss": 0.3326, "step": 29438 }, { "epoch": 2.9929849532330217, "grad_norm": 0.27188944816589355, "learning_rate": 4.155884545617293e-06, "loss": 0.3144, "step": 29439 }, { "epoch": 2.9930866205774707, "grad_norm": 0.288507342338562, "learning_rate": 4.1555347556658766e-06, "loss": 0.3407, "step": 29440 }, { "epoch": 2.9931882879219196, "grad_norm": 0.24898314476013184, "learning_rate": 4.155184969968676e-06, "loss": 0.3271, "step": 29441 }, { "epoch": 2.9932899552663685, "grad_norm": 0.2683812081813812, "learning_rate": 4.154835188527455e-06, "loss": 0.3136, "step": 29442 }, { "epoch": 2.9933916226108175, "grad_norm": 0.272178053855896, "learning_rate": 4.15448541134397e-06, "loss": 0.3254, "step": 29443 }, { "epoch": 2.9934932899552664, "grad_norm": 0.3149327337741852, "learning_rate": 4.154135638419991e-06, "loss": 0.3033, "step": 29444 }, { "epoch": 2.9935949572997154, "grad_norm": 0.2859426438808441, "learning_rate": 4.153785869757272e-06, "loss": 0.3182, "step": 29445 }, { "epoch": 2.9936966246441643, "grad_norm": 0.2661879062652588, "learning_rate": 4.153436105357579e-06, "loss": 0.3603, "step": 29446 }, { "epoch": 2.9937982919886132, "grad_norm": 0.2511410415172577, "learning_rate": 4.153086345222676e-06, "loss": 0.3507, "step": 29447 }, { "epoch": 2.993899959333062, "grad_norm": 0.2653830349445343, "learning_rate": 4.15273658935432e-06, "loss": 0.3093, "step": 29448 }, { "epoch": 2.994001626677511, "grad_norm": 0.2639584541320801, "learning_rate": 4.1523868377542784e-06, "loss": 0.3391, "step": 29449 }, { "epoch": 2.99410329402196, "grad_norm": 0.28567084670066833, "learning_rate": 4.152037090424309e-06, "loss": 0.3417, "step": 29450 }, { "epoch": 2.994204961366409, "grad_norm": 0.2906436026096344, "learning_rate": 4.1516873473661736e-06, "loss": 0.3363, "step": 29451 }, { "epoch": 2.994306628710858, "grad_norm": 0.24957135319709778, "learning_rate": 4.151337608581639e-06, "loss": 0.3131, "step": 29452 }, { "epoch": 2.994408296055307, "grad_norm": 0.26334691047668457, "learning_rate": 4.1509878740724616e-06, "loss": 0.3402, "step": 29453 }, { "epoch": 2.994509963399756, "grad_norm": 0.272845596075058, "learning_rate": 4.150638143840407e-06, "loss": 0.3174, "step": 29454 }, { "epoch": 2.994611630744205, "grad_norm": 0.24881955981254578, "learning_rate": 4.1502884178872345e-06, "loss": 0.3446, "step": 29455 }, { "epoch": 2.994713298088654, "grad_norm": 0.27760058641433716, "learning_rate": 4.149938696214706e-06, "loss": 0.3193, "step": 29456 }, { "epoch": 2.994814965433103, "grad_norm": 0.251630038022995, "learning_rate": 4.149588978824588e-06, "loss": 0.3144, "step": 29457 }, { "epoch": 2.994916632777552, "grad_norm": 0.24722062051296234, "learning_rate": 4.149239265718635e-06, "loss": 0.3407, "step": 29458 }, { "epoch": 2.995018300122001, "grad_norm": 0.2601366341114044, "learning_rate": 4.1488895568986156e-06, "loss": 0.3081, "step": 29459 }, { "epoch": 2.99511996746645, "grad_norm": 0.25950831174850464, "learning_rate": 4.148539852366286e-06, "loss": 0.3128, "step": 29460 }, { "epoch": 2.995221634810899, "grad_norm": 0.27090615034103394, "learning_rate": 4.1481901521234095e-06, "loss": 0.3413, "step": 29461 }, { "epoch": 2.995323302155348, "grad_norm": 0.25133344531059265, "learning_rate": 4.147840456171752e-06, "loss": 0.3606, "step": 29462 }, { "epoch": 2.9954249694997968, "grad_norm": 0.26436904072761536, "learning_rate": 4.147490764513068e-06, "loss": 0.3368, "step": 29463 }, { "epoch": 2.9955266368442457, "grad_norm": 0.2641963064670563, "learning_rate": 4.147141077149126e-06, "loss": 0.3217, "step": 29464 }, { "epoch": 2.9956283041886946, "grad_norm": 0.25921830534935, "learning_rate": 4.1467913940816825e-06, "loss": 0.314, "step": 29465 }, { "epoch": 2.9957299715331436, "grad_norm": 0.26795342564582825, "learning_rate": 4.1464417153125006e-06, "loss": 0.3282, "step": 29466 }, { "epoch": 2.9958316388775925, "grad_norm": 0.28699252009391785, "learning_rate": 4.146092040843346e-06, "loss": 0.3505, "step": 29467 }, { "epoch": 2.9959333062220415, "grad_norm": 0.270093709230423, "learning_rate": 4.145742370675974e-06, "loss": 0.3064, "step": 29468 }, { "epoch": 2.9960349735664904, "grad_norm": 0.266915500164032, "learning_rate": 4.14539270481215e-06, "loss": 0.3328, "step": 29469 }, { "epoch": 2.9961366409109393, "grad_norm": 0.27747783064842224, "learning_rate": 4.1450430432536335e-06, "loss": 0.328, "step": 29470 }, { "epoch": 2.9962383082553883, "grad_norm": 0.2804363965988159, "learning_rate": 4.144693386002187e-06, "loss": 0.3563, "step": 29471 }, { "epoch": 2.9963399755998372, "grad_norm": 0.2499101459980011, "learning_rate": 4.144343733059574e-06, "loss": 0.3461, "step": 29472 }, { "epoch": 2.996441642944286, "grad_norm": 0.27271756529808044, "learning_rate": 4.1439940844275515e-06, "loss": 0.3369, "step": 29473 }, { "epoch": 2.996543310288735, "grad_norm": 0.2636006772518158, "learning_rate": 4.143644440107885e-06, "loss": 0.3375, "step": 29474 }, { "epoch": 2.996644977633184, "grad_norm": 0.2700725793838501, "learning_rate": 4.143294800102333e-06, "loss": 0.3438, "step": 29475 }, { "epoch": 2.996746644977633, "grad_norm": 0.27300119400024414, "learning_rate": 4.142945164412657e-06, "loss": 0.3144, "step": 29476 }, { "epoch": 2.996848312322082, "grad_norm": 0.2783084511756897, "learning_rate": 4.1425955330406234e-06, "loss": 0.3584, "step": 29477 }, { "epoch": 2.996949979666531, "grad_norm": 0.281911164522171, "learning_rate": 4.142245905987987e-06, "loss": 0.3848, "step": 29478 }, { "epoch": 2.99705164701098, "grad_norm": 0.25592905282974243, "learning_rate": 4.141896283256513e-06, "loss": 0.3217, "step": 29479 }, { "epoch": 2.997153314355429, "grad_norm": 0.24946008622646332, "learning_rate": 4.141546664847962e-06, "loss": 0.3208, "step": 29480 }, { "epoch": 2.997254981699878, "grad_norm": 0.26688891649246216, "learning_rate": 4.141197050764092e-06, "loss": 0.3294, "step": 29481 }, { "epoch": 2.997356649044327, "grad_norm": 0.2879272699356079, "learning_rate": 4.140847441006671e-06, "loss": 0.3275, "step": 29482 }, { "epoch": 2.997458316388776, "grad_norm": 0.2505589723587036, "learning_rate": 4.140497835577455e-06, "loss": 0.3252, "step": 29483 }, { "epoch": 2.997559983733225, "grad_norm": 0.27134257555007935, "learning_rate": 4.140148234478207e-06, "loss": 0.3146, "step": 29484 }, { "epoch": 2.997661651077674, "grad_norm": 0.2529006898403168, "learning_rate": 4.139798637710687e-06, "loss": 0.3241, "step": 29485 }, { "epoch": 2.997763318422123, "grad_norm": 0.2865122854709625, "learning_rate": 4.139449045276657e-06, "loss": 0.3301, "step": 29486 }, { "epoch": 2.997864985766572, "grad_norm": 0.2896263301372528, "learning_rate": 4.1390994571778785e-06, "loss": 0.3622, "step": 29487 }, { "epoch": 2.9979666531110207, "grad_norm": 0.27912184596061707, "learning_rate": 4.138749873416112e-06, "loss": 0.3229, "step": 29488 }, { "epoch": 2.9980683204554697, "grad_norm": 0.256924569606781, "learning_rate": 4.13840029399312e-06, "loss": 0.3503, "step": 29489 }, { "epoch": 2.9981699877999186, "grad_norm": 0.2676566243171692, "learning_rate": 4.138050718910662e-06, "loss": 0.3818, "step": 29490 }, { "epoch": 2.9982716551443676, "grad_norm": 0.27830713987350464, "learning_rate": 4.137701148170499e-06, "loss": 0.3095, "step": 29491 }, { "epoch": 2.9983733224888165, "grad_norm": 0.25317683815956116, "learning_rate": 4.137351581774394e-06, "loss": 0.3037, "step": 29492 }, { "epoch": 2.9984749898332654, "grad_norm": 0.26880770921707153, "learning_rate": 4.137002019724106e-06, "loss": 0.3096, "step": 29493 }, { "epoch": 2.9985766571777144, "grad_norm": 0.2697086036205292, "learning_rate": 4.136652462021396e-06, "loss": 0.3416, "step": 29494 }, { "epoch": 2.9986783245221633, "grad_norm": 0.25478246808052063, "learning_rate": 4.136302908668027e-06, "loss": 0.3045, "step": 29495 }, { "epoch": 2.9987799918666127, "grad_norm": 0.2922338545322418, "learning_rate": 4.1359533596657575e-06, "loss": 0.313, "step": 29496 }, { "epoch": 2.9988816592110616, "grad_norm": 0.28294238448143005, "learning_rate": 4.135603815016351e-06, "loss": 0.3289, "step": 29497 }, { "epoch": 2.9989833265555106, "grad_norm": 0.26285403966903687, "learning_rate": 4.135254274721566e-06, "loss": 0.3277, "step": 29498 }, { "epoch": 2.9990849938999595, "grad_norm": 0.27651920914649963, "learning_rate": 4.134904738783165e-06, "loss": 0.3479, "step": 29499 }, { "epoch": 2.9991866612444085, "grad_norm": 0.2630539536476135, "learning_rate": 4.1345552072029085e-06, "loss": 0.3283, "step": 29500 }, { "epoch": 2.9992883285888574, "grad_norm": 0.2581442594528198, "learning_rate": 4.134205679982557e-06, "loss": 0.3327, "step": 29501 }, { "epoch": 2.9993899959333064, "grad_norm": 0.28366002440452576, "learning_rate": 4.133856157123871e-06, "loss": 0.3618, "step": 29502 }, { "epoch": 2.9994916632777553, "grad_norm": 0.2457103729248047, "learning_rate": 4.133506638628613e-06, "loss": 0.3284, "step": 29503 }, { "epoch": 2.9995933306222042, "grad_norm": 0.24691925942897797, "learning_rate": 4.133157124498542e-06, "loss": 0.3253, "step": 29504 }, { "epoch": 2.999694997966653, "grad_norm": 0.2603922188282013, "learning_rate": 4.132807614735419e-06, "loss": 0.3055, "step": 29505 }, { "epoch": 2.999796665311102, "grad_norm": 0.2780344486236572, "learning_rate": 4.1324581093410055e-06, "loss": 0.3292, "step": 29506 }, { "epoch": 2.999898332655551, "grad_norm": 0.26777204871177673, "learning_rate": 4.132108608317063e-06, "loss": 0.3396, "step": 29507 }, { "epoch": 3.0, "grad_norm": 0.25668802857398987, "learning_rate": 4.131759111665349e-06, "loss": 0.3166, "step": 29508 }, { "epoch": 3.000101667344449, "grad_norm": 0.27334752678871155, "learning_rate": 4.131409619387627e-06, "loss": 0.3116, "step": 29509 }, { "epoch": 3.000203334688898, "grad_norm": 0.308361679315567, "learning_rate": 4.131060131485658e-06, "loss": 0.3113, "step": 29510 }, { "epoch": 3.000305002033347, "grad_norm": 0.28215262293815613, "learning_rate": 4.1307106479612e-06, "loss": 0.311, "step": 29511 }, { "epoch": 3.0004066693777958, "grad_norm": 0.25775274634361267, "learning_rate": 4.130361168816016e-06, "loss": 0.3008, "step": 29512 }, { "epoch": 3.0005083367222447, "grad_norm": 0.2537052631378174, "learning_rate": 4.130011694051866e-06, "loss": 0.2884, "step": 29513 }, { "epoch": 3.0006100040666936, "grad_norm": 0.2863805294036865, "learning_rate": 4.129662223670511e-06, "loss": 0.325, "step": 29514 }, { "epoch": 3.0007116714111426, "grad_norm": 0.2863597571849823, "learning_rate": 4.129312757673709e-06, "loss": 0.2778, "step": 29515 }, { "epoch": 3.0008133387555915, "grad_norm": 0.31219398975372314, "learning_rate": 4.128963296063223e-06, "loss": 0.3389, "step": 29516 }, { "epoch": 3.0009150061000405, "grad_norm": 0.29186275601387024, "learning_rate": 4.128613838840814e-06, "loss": 0.307, "step": 29517 }, { "epoch": 3.0010166734444894, "grad_norm": 0.2834813892841339, "learning_rate": 4.128264386008241e-06, "loss": 0.2846, "step": 29518 }, { "epoch": 3.001118340788939, "grad_norm": 0.2776786983013153, "learning_rate": 4.127914937567264e-06, "loss": 0.3098, "step": 29519 }, { "epoch": 3.0012200081333877, "grad_norm": 0.2728758454322815, "learning_rate": 4.1275654935196456e-06, "loss": 0.2894, "step": 29520 }, { "epoch": 3.0013216754778367, "grad_norm": 0.2611594498157501, "learning_rate": 4.127216053867144e-06, "loss": 0.3284, "step": 29521 }, { "epoch": 3.0014233428222856, "grad_norm": 0.2850554287433624, "learning_rate": 4.126866618611521e-06, "loss": 0.2903, "step": 29522 }, { "epoch": 3.0015250101667346, "grad_norm": 0.2729293406009674, "learning_rate": 4.126517187754535e-06, "loss": 0.3188, "step": 29523 }, { "epoch": 3.0016266775111835, "grad_norm": 0.2687503695487976, "learning_rate": 4.1261677612979504e-06, "loss": 0.3115, "step": 29524 }, { "epoch": 3.0017283448556324, "grad_norm": 0.2659633159637451, "learning_rate": 4.125818339243523e-06, "loss": 0.3035, "step": 29525 }, { "epoch": 3.0018300122000814, "grad_norm": 0.274754136800766, "learning_rate": 4.125468921593015e-06, "loss": 0.3152, "step": 29526 }, { "epoch": 3.0019316795445303, "grad_norm": 0.3005337715148926, "learning_rate": 4.125119508348188e-06, "loss": 0.3486, "step": 29527 }, { "epoch": 3.0020333468889793, "grad_norm": 0.26889824867248535, "learning_rate": 4.124770099510801e-06, "loss": 0.2876, "step": 29528 }, { "epoch": 3.002135014233428, "grad_norm": 0.30016323924064636, "learning_rate": 4.124420695082614e-06, "loss": 0.3145, "step": 29529 }, { "epoch": 3.002236681577877, "grad_norm": 0.2797967791557312, "learning_rate": 4.1240712950653865e-06, "loss": 0.3026, "step": 29530 }, { "epoch": 3.002338348922326, "grad_norm": 0.2580953538417816, "learning_rate": 4.12372189946088e-06, "loss": 0.3446, "step": 29531 }, { "epoch": 3.002440016266775, "grad_norm": 0.2484581470489502, "learning_rate": 4.123372508270856e-06, "loss": 0.3146, "step": 29532 }, { "epoch": 3.002541683611224, "grad_norm": 0.2914942800998688, "learning_rate": 4.123023121497071e-06, "loss": 0.2852, "step": 29533 }, { "epoch": 3.002643350955673, "grad_norm": 0.2743992805480957, "learning_rate": 4.122673739141288e-06, "loss": 0.2988, "step": 29534 }, { "epoch": 3.002745018300122, "grad_norm": 0.29705435037612915, "learning_rate": 4.122324361205266e-06, "loss": 0.3175, "step": 29535 }, { "epoch": 3.002846685644571, "grad_norm": 0.2624146044254303, "learning_rate": 4.121974987690764e-06, "loss": 0.3178, "step": 29536 }, { "epoch": 3.0029483529890197, "grad_norm": 0.29078125953674316, "learning_rate": 4.121625618599546e-06, "loss": 0.3135, "step": 29537 }, { "epoch": 3.0030500203334687, "grad_norm": 0.2908896207809448, "learning_rate": 4.121276253933368e-06, "loss": 0.321, "step": 29538 }, { "epoch": 3.003151687677918, "grad_norm": 0.2613432705402374, "learning_rate": 4.120926893693991e-06, "loss": 0.2878, "step": 29539 }, { "epoch": 3.003253355022367, "grad_norm": 0.2976389527320862, "learning_rate": 4.120577537883176e-06, "loss": 0.2848, "step": 29540 }, { "epoch": 3.003355022366816, "grad_norm": 0.30415278673171997, "learning_rate": 4.120228186502681e-06, "loss": 0.2982, "step": 29541 }, { "epoch": 3.003456689711265, "grad_norm": 0.2810455858707428, "learning_rate": 4.11987883955427e-06, "loss": 0.2737, "step": 29542 }, { "epoch": 3.003558357055714, "grad_norm": 0.2687559425830841, "learning_rate": 4.119529497039699e-06, "loss": 0.2883, "step": 29543 }, { "epoch": 3.0036600244001628, "grad_norm": 0.26290372014045715, "learning_rate": 4.119180158960729e-06, "loss": 0.3235, "step": 29544 }, { "epoch": 3.0037616917446117, "grad_norm": 0.27820685505867004, "learning_rate": 4.118830825319119e-06, "loss": 0.2872, "step": 29545 }, { "epoch": 3.0038633590890607, "grad_norm": 0.257563978433609, "learning_rate": 4.118481496116631e-06, "loss": 0.2769, "step": 29546 }, { "epoch": 3.0039650264335096, "grad_norm": 0.2663654685020447, "learning_rate": 4.118132171355024e-06, "loss": 0.2906, "step": 29547 }, { "epoch": 3.0040666937779585, "grad_norm": 0.2820231318473816, "learning_rate": 4.117782851036058e-06, "loss": 0.3147, "step": 29548 }, { "epoch": 3.0041683611224075, "grad_norm": 0.26611199975013733, "learning_rate": 4.117433535161491e-06, "loss": 0.3098, "step": 29549 }, { "epoch": 3.0042700284668564, "grad_norm": 0.2657054662704468, "learning_rate": 4.117084223733085e-06, "loss": 0.3191, "step": 29550 }, { "epoch": 3.0043716958113054, "grad_norm": 0.26042309403419495, "learning_rate": 4.116734916752598e-06, "loss": 0.297, "step": 29551 }, { "epoch": 3.0044733631557543, "grad_norm": 0.28310543298721313, "learning_rate": 4.116385614221792e-06, "loss": 0.3396, "step": 29552 }, { "epoch": 3.0045750305002032, "grad_norm": 0.28542059659957886, "learning_rate": 4.116036316142424e-06, "loss": 0.3025, "step": 29553 }, { "epoch": 3.004676697844652, "grad_norm": 0.28097665309906006, "learning_rate": 4.1156870225162555e-06, "loss": 0.3034, "step": 29554 }, { "epoch": 3.004778365189101, "grad_norm": 0.284371942281723, "learning_rate": 4.115337733345045e-06, "loss": 0.283, "step": 29555 }, { "epoch": 3.00488003253355, "grad_norm": 0.2902659773826599, "learning_rate": 4.114988448630553e-06, "loss": 0.307, "step": 29556 }, { "epoch": 3.004981699877999, "grad_norm": 0.28913140296936035, "learning_rate": 4.11463916837454e-06, "loss": 0.3081, "step": 29557 }, { "epoch": 3.005083367222448, "grad_norm": 0.24756690859794617, "learning_rate": 4.114289892578764e-06, "loss": 0.292, "step": 29558 }, { "epoch": 3.005185034566897, "grad_norm": 0.2836376428604126, "learning_rate": 4.113940621244985e-06, "loss": 0.3321, "step": 29559 }, { "epoch": 3.0052867019113463, "grad_norm": 0.28767630457878113, "learning_rate": 4.1135913543749625e-06, "loss": 0.3278, "step": 29560 }, { "epoch": 3.0053883692557952, "grad_norm": 0.30764105916023254, "learning_rate": 4.113242091970455e-06, "loss": 0.343, "step": 29561 }, { "epoch": 3.005490036600244, "grad_norm": 0.2631349563598633, "learning_rate": 4.112892834033225e-06, "loss": 0.3117, "step": 29562 }, { "epoch": 3.005591703944693, "grad_norm": 0.2792651951313019, "learning_rate": 4.112543580565029e-06, "loss": 0.2882, "step": 29563 }, { "epoch": 3.005693371289142, "grad_norm": 0.2629472315311432, "learning_rate": 4.112194331567628e-06, "loss": 0.3204, "step": 29564 }, { "epoch": 3.005795038633591, "grad_norm": 0.27814391255378723, "learning_rate": 4.111845087042781e-06, "loss": 0.2949, "step": 29565 }, { "epoch": 3.00589670597804, "grad_norm": 0.26569682359695435, "learning_rate": 4.1114958469922465e-06, "loss": 0.3152, "step": 29566 }, { "epoch": 3.005998373322489, "grad_norm": 0.2853868007659912, "learning_rate": 4.1111466114177856e-06, "loss": 0.2993, "step": 29567 }, { "epoch": 3.006100040666938, "grad_norm": 0.266203910112381, "learning_rate": 4.110797380321156e-06, "loss": 0.3396, "step": 29568 }, { "epoch": 3.0062017080113868, "grad_norm": 0.2809543311595917, "learning_rate": 4.11044815370412e-06, "loss": 0.3045, "step": 29569 }, { "epoch": 3.0063033753558357, "grad_norm": 0.24767783284187317, "learning_rate": 4.110098931568432e-06, "loss": 0.2968, "step": 29570 }, { "epoch": 3.0064050427002846, "grad_norm": 0.27558648586273193, "learning_rate": 4.109749713915855e-06, "loss": 0.3383, "step": 29571 }, { "epoch": 3.0065067100447336, "grad_norm": 0.2529105246067047, "learning_rate": 4.109400500748148e-06, "loss": 0.307, "step": 29572 }, { "epoch": 3.0066083773891825, "grad_norm": 0.26968103647232056, "learning_rate": 4.10905129206707e-06, "loss": 0.3149, "step": 29573 }, { "epoch": 3.0067100447336315, "grad_norm": 0.26199376583099365, "learning_rate": 4.10870208787438e-06, "loss": 0.3208, "step": 29574 }, { "epoch": 3.0068117120780804, "grad_norm": 0.28784623742103577, "learning_rate": 4.1083528881718356e-06, "loss": 0.2774, "step": 29575 }, { "epoch": 3.0069133794225293, "grad_norm": 0.2894403338432312, "learning_rate": 4.1080036929611975e-06, "loss": 0.359, "step": 29576 }, { "epoch": 3.0070150467669783, "grad_norm": 0.2744119167327881, "learning_rate": 4.107654502244227e-06, "loss": 0.2814, "step": 29577 }, { "epoch": 3.0071167141114272, "grad_norm": 0.26074525713920593, "learning_rate": 4.107305316022679e-06, "loss": 0.2949, "step": 29578 }, { "epoch": 3.007218381455876, "grad_norm": 0.2860235273838043, "learning_rate": 4.1069561342983175e-06, "loss": 0.3217, "step": 29579 }, { "epoch": 3.0073200488003256, "grad_norm": 0.2838551998138428, "learning_rate": 4.106606957072895e-06, "loss": 0.2886, "step": 29580 }, { "epoch": 3.0074217161447745, "grad_norm": 0.2656826972961426, "learning_rate": 4.106257784348177e-06, "loss": 0.3329, "step": 29581 }, { "epoch": 3.0075233834892234, "grad_norm": 0.2642820477485657, "learning_rate": 4.105908616125917e-06, "loss": 0.3215, "step": 29582 }, { "epoch": 3.0076250508336724, "grad_norm": 0.25655263662338257, "learning_rate": 4.105559452407879e-06, "loss": 0.3185, "step": 29583 }, { "epoch": 3.0077267181781213, "grad_norm": 0.24477218091487885, "learning_rate": 4.105210293195821e-06, "loss": 0.3217, "step": 29584 }, { "epoch": 3.0078283855225703, "grad_norm": 0.25560262799263, "learning_rate": 4.104861138491499e-06, "loss": 0.3032, "step": 29585 }, { "epoch": 3.007930052867019, "grad_norm": 0.30294162034988403, "learning_rate": 4.104511988296676e-06, "loss": 0.3243, "step": 29586 }, { "epoch": 3.008031720211468, "grad_norm": 0.2577213943004608, "learning_rate": 4.1041628426131054e-06, "loss": 0.3085, "step": 29587 }, { "epoch": 3.008133387555917, "grad_norm": 0.28553739190101624, "learning_rate": 4.103813701442551e-06, "loss": 0.3043, "step": 29588 }, { "epoch": 3.008235054900366, "grad_norm": 0.2887766659259796, "learning_rate": 4.103464564786773e-06, "loss": 0.2988, "step": 29589 }, { "epoch": 3.008336722244815, "grad_norm": 0.28582635521888733, "learning_rate": 4.1031154326475235e-06, "loss": 0.3393, "step": 29590 }, { "epoch": 3.008438389589264, "grad_norm": 0.27168288826942444, "learning_rate": 4.102766305026569e-06, "loss": 0.3324, "step": 29591 }, { "epoch": 3.008540056933713, "grad_norm": 0.26041507720947266, "learning_rate": 4.10241718192566e-06, "loss": 0.3084, "step": 29592 }, { "epoch": 3.008641724278162, "grad_norm": 0.26894959807395935, "learning_rate": 4.102068063346563e-06, "loss": 0.2827, "step": 29593 }, { "epoch": 3.0087433916226107, "grad_norm": 0.25886252522468567, "learning_rate": 4.101718949291034e-06, "loss": 0.2947, "step": 29594 }, { "epoch": 3.0088450589670597, "grad_norm": 0.2653373181819916, "learning_rate": 4.101369839760829e-06, "loss": 0.322, "step": 29595 }, { "epoch": 3.0089467263115086, "grad_norm": 0.26145920157432556, "learning_rate": 4.101020734757713e-06, "loss": 0.3416, "step": 29596 }, { "epoch": 3.0090483936559576, "grad_norm": 0.2735401391983032, "learning_rate": 4.100671634283438e-06, "loss": 0.3132, "step": 29597 }, { "epoch": 3.0091500610004065, "grad_norm": 0.265619695186615, "learning_rate": 4.100322538339766e-06, "loss": 0.324, "step": 29598 }, { "epoch": 3.0092517283448554, "grad_norm": 0.2966286540031433, "learning_rate": 4.099973446928457e-06, "loss": 0.2888, "step": 29599 }, { "epoch": 3.0093533956893044, "grad_norm": 0.2738557755947113, "learning_rate": 4.099624360051265e-06, "loss": 0.3439, "step": 29600 }, { "epoch": 3.0094550630337538, "grad_norm": 0.27288684248924255, "learning_rate": 4.099275277709954e-06, "loss": 0.292, "step": 29601 }, { "epoch": 3.0095567303782027, "grad_norm": 0.28347933292388916, "learning_rate": 4.098926199906279e-06, "loss": 0.3105, "step": 29602 }, { "epoch": 3.0096583977226516, "grad_norm": 0.290615051984787, "learning_rate": 4.098577126641998e-06, "loss": 0.3359, "step": 29603 }, { "epoch": 3.0097600650671006, "grad_norm": 0.27377113699913025, "learning_rate": 4.098228057918875e-06, "loss": 0.2986, "step": 29604 }, { "epoch": 3.0098617324115495, "grad_norm": 0.26734375953674316, "learning_rate": 4.0978789937386615e-06, "loss": 0.3383, "step": 29605 }, { "epoch": 3.0099633997559985, "grad_norm": 0.2870127856731415, "learning_rate": 4.0975299341031226e-06, "loss": 0.2956, "step": 29606 }, { "epoch": 3.0100650671004474, "grad_norm": 0.271482914686203, "learning_rate": 4.097180879014011e-06, "loss": 0.3404, "step": 29607 }, { "epoch": 3.0101667344448964, "grad_norm": 0.3143388032913208, "learning_rate": 4.096831828473086e-06, "loss": 0.345, "step": 29608 }, { "epoch": 3.0102684017893453, "grad_norm": 0.2460726797580719, "learning_rate": 4.096482782482112e-06, "loss": 0.2702, "step": 29609 }, { "epoch": 3.0103700691337942, "grad_norm": 0.2806214988231659, "learning_rate": 4.096133741042839e-06, "loss": 0.2778, "step": 29610 }, { "epoch": 3.010471736478243, "grad_norm": 0.27696165442466736, "learning_rate": 4.0957847041570325e-06, "loss": 0.2998, "step": 29611 }, { "epoch": 3.010573403822692, "grad_norm": 0.27564528584480286, "learning_rate": 4.095435671826446e-06, "loss": 0.2801, "step": 29612 }, { "epoch": 3.010675071167141, "grad_norm": 0.2706082761287689, "learning_rate": 4.095086644052838e-06, "loss": 0.3205, "step": 29613 }, { "epoch": 3.01077673851159, "grad_norm": 0.2650549113750458, "learning_rate": 4.094737620837973e-06, "loss": 0.3429, "step": 29614 }, { "epoch": 3.010878405856039, "grad_norm": 0.26192936301231384, "learning_rate": 4.094388602183601e-06, "loss": 0.3393, "step": 29615 }, { "epoch": 3.010980073200488, "grad_norm": 0.26619625091552734, "learning_rate": 4.094039588091486e-06, "loss": 0.3036, "step": 29616 }, { "epoch": 3.011081740544937, "grad_norm": 0.26475790143013, "learning_rate": 4.0936905785633826e-06, "loss": 0.3124, "step": 29617 }, { "epoch": 3.0111834078893858, "grad_norm": 0.2854062616825104, "learning_rate": 4.09334157360105e-06, "loss": 0.2875, "step": 29618 }, { "epoch": 3.0112850752338347, "grad_norm": 0.2645176649093628, "learning_rate": 4.0929925732062505e-06, "loss": 0.3179, "step": 29619 }, { "epoch": 3.0113867425782836, "grad_norm": 0.2682263255119324, "learning_rate": 4.092643577380737e-06, "loss": 0.2726, "step": 29620 }, { "epoch": 3.011488409922733, "grad_norm": 0.28338372707366943, "learning_rate": 4.09229458612627e-06, "loss": 0.3452, "step": 29621 }, { "epoch": 3.011590077267182, "grad_norm": 0.2712760269641876, "learning_rate": 4.091945599444606e-06, "loss": 0.3162, "step": 29622 }, { "epoch": 3.011691744611631, "grad_norm": 0.2819405198097229, "learning_rate": 4.0915966173375035e-06, "loss": 0.3073, "step": 29623 }, { "epoch": 3.01179341195608, "grad_norm": 0.2672491669654846, "learning_rate": 4.091247639806725e-06, "loss": 0.2996, "step": 29624 }, { "epoch": 3.011895079300529, "grad_norm": 0.2771712839603424, "learning_rate": 4.090898666854022e-06, "loss": 0.2953, "step": 29625 }, { "epoch": 3.0119967466449777, "grad_norm": 0.29524850845336914, "learning_rate": 4.090549698481157e-06, "loss": 0.3017, "step": 29626 }, { "epoch": 3.0120984139894267, "grad_norm": 0.2792363166809082, "learning_rate": 4.090200734689885e-06, "loss": 0.3154, "step": 29627 }, { "epoch": 3.0122000813338756, "grad_norm": 0.276934951543808, "learning_rate": 4.089851775481965e-06, "loss": 0.3367, "step": 29628 }, { "epoch": 3.0123017486783246, "grad_norm": 0.3102705478668213, "learning_rate": 4.089502820859158e-06, "loss": 0.3077, "step": 29629 }, { "epoch": 3.0124034160227735, "grad_norm": 0.27875351905822754, "learning_rate": 4.089153870823218e-06, "loss": 0.3197, "step": 29630 }, { "epoch": 3.0125050833672224, "grad_norm": 0.28892770409584045, "learning_rate": 4.088804925375905e-06, "loss": 0.3272, "step": 29631 }, { "epoch": 3.0126067507116714, "grad_norm": 0.2794051170349121, "learning_rate": 4.088455984518976e-06, "loss": 0.2895, "step": 29632 }, { "epoch": 3.0127084180561203, "grad_norm": 0.2721199691295624, "learning_rate": 4.088107048254187e-06, "loss": 0.3149, "step": 29633 }, { "epoch": 3.0128100854005693, "grad_norm": 0.2586634159088135, "learning_rate": 4.087758116583302e-06, "loss": 0.3031, "step": 29634 }, { "epoch": 3.012911752745018, "grad_norm": 0.2718696892261505, "learning_rate": 4.087409189508073e-06, "loss": 0.3069, "step": 29635 }, { "epoch": 3.013013420089467, "grad_norm": 0.27897563576698303, "learning_rate": 4.08706026703026e-06, "loss": 0.3026, "step": 29636 }, { "epoch": 3.013115087433916, "grad_norm": 0.28723135590553284, "learning_rate": 4.086711349151619e-06, "loss": 0.2981, "step": 29637 }, { "epoch": 3.013216754778365, "grad_norm": 0.284993976354599, "learning_rate": 4.086362435873909e-06, "loss": 0.2986, "step": 29638 }, { "epoch": 3.013318422122814, "grad_norm": 0.27520594000816345, "learning_rate": 4.08601352719889e-06, "loss": 0.2915, "step": 29639 }, { "epoch": 3.013420089467263, "grad_norm": 0.27616941928863525, "learning_rate": 4.085664623128316e-06, "loss": 0.3285, "step": 29640 }, { "epoch": 3.013521756811712, "grad_norm": 0.2953192889690399, "learning_rate": 4.085315723663948e-06, "loss": 0.3348, "step": 29641 }, { "epoch": 3.0136234241561612, "grad_norm": 0.25893744826316833, "learning_rate": 4.08496682880754e-06, "loss": 0.3141, "step": 29642 }, { "epoch": 3.01372509150061, "grad_norm": 0.27727994322776794, "learning_rate": 4.084617938560852e-06, "loss": 0.3037, "step": 29643 }, { "epoch": 3.013826758845059, "grad_norm": 0.27783364057540894, "learning_rate": 4.084269052925643e-06, "loss": 0.2915, "step": 29644 }, { "epoch": 3.013928426189508, "grad_norm": 0.2874389588832855, "learning_rate": 4.083920171903667e-06, "loss": 0.2988, "step": 29645 }, { "epoch": 3.014030093533957, "grad_norm": 0.2668006420135498, "learning_rate": 4.083571295496685e-06, "loss": 0.3099, "step": 29646 }, { "epoch": 3.014131760878406, "grad_norm": 0.25720781087875366, "learning_rate": 4.0832224237064524e-06, "loss": 0.3367, "step": 29647 }, { "epoch": 3.014233428222855, "grad_norm": 0.26761966943740845, "learning_rate": 4.0828735565347265e-06, "loss": 0.3314, "step": 29648 }, { "epoch": 3.014335095567304, "grad_norm": 0.30021005868911743, "learning_rate": 4.082524693983267e-06, "loss": 0.3321, "step": 29649 }, { "epoch": 3.0144367629117528, "grad_norm": 0.25882869958877563, "learning_rate": 4.082175836053829e-06, "loss": 0.3073, "step": 29650 }, { "epoch": 3.0145384302562017, "grad_norm": 0.2738565504550934, "learning_rate": 4.081826982748172e-06, "loss": 0.323, "step": 29651 }, { "epoch": 3.0146400976006507, "grad_norm": 0.29040101170539856, "learning_rate": 4.081478134068052e-06, "loss": 0.2867, "step": 29652 }, { "epoch": 3.0147417649450996, "grad_norm": 0.2885318994522095, "learning_rate": 4.081129290015227e-06, "loss": 0.2951, "step": 29653 }, { "epoch": 3.0148434322895485, "grad_norm": 0.2906208634376526, "learning_rate": 4.080780450591454e-06, "loss": 0.3054, "step": 29654 }, { "epoch": 3.0149450996339975, "grad_norm": 0.2950226664543152, "learning_rate": 4.080431615798491e-06, "loss": 0.2991, "step": 29655 }, { "epoch": 3.0150467669784464, "grad_norm": 0.27757155895233154, "learning_rate": 4.080082785638095e-06, "loss": 0.2867, "step": 29656 }, { "epoch": 3.0151484343228954, "grad_norm": 0.28387758135795593, "learning_rate": 4.079733960112023e-06, "loss": 0.2874, "step": 29657 }, { "epoch": 3.0152501016673443, "grad_norm": 0.2724127769470215, "learning_rate": 4.079385139222033e-06, "loss": 0.3102, "step": 29658 }, { "epoch": 3.0153517690117932, "grad_norm": 0.2702241539955139, "learning_rate": 4.079036322969881e-06, "loss": 0.2916, "step": 29659 }, { "epoch": 3.015453436356242, "grad_norm": 0.28974035382270813, "learning_rate": 4.078687511357326e-06, "loss": 0.3086, "step": 29660 }, { "epoch": 3.015555103700691, "grad_norm": 0.2818461060523987, "learning_rate": 4.078338704386125e-06, "loss": 0.3248, "step": 29661 }, { "epoch": 3.0156567710451405, "grad_norm": 0.2841018736362457, "learning_rate": 4.077989902058033e-06, "loss": 0.3076, "step": 29662 }, { "epoch": 3.0157584383895895, "grad_norm": 0.29036393761634827, "learning_rate": 4.077641104374809e-06, "loss": 0.3142, "step": 29663 }, { "epoch": 3.0158601057340384, "grad_norm": 0.2647801637649536, "learning_rate": 4.0772923113382114e-06, "loss": 0.295, "step": 29664 }, { "epoch": 3.0159617730784873, "grad_norm": 0.27827635407447815, "learning_rate": 4.076943522949995e-06, "loss": 0.2866, "step": 29665 }, { "epoch": 3.0160634404229363, "grad_norm": 0.2902100682258606, "learning_rate": 4.076594739211918e-06, "loss": 0.3371, "step": 29666 }, { "epoch": 3.0161651077673852, "grad_norm": 0.2758884131908417, "learning_rate": 4.076245960125737e-06, "loss": 0.2921, "step": 29667 }, { "epoch": 3.016266775111834, "grad_norm": 0.2999632954597473, "learning_rate": 4.075897185693208e-06, "loss": 0.296, "step": 29668 }, { "epoch": 3.016368442456283, "grad_norm": 0.2969745993614197, "learning_rate": 4.075548415916092e-06, "loss": 0.3127, "step": 29669 }, { "epoch": 3.016470109800732, "grad_norm": 0.2610277235507965, "learning_rate": 4.075199650796141e-06, "loss": 0.295, "step": 29670 }, { "epoch": 3.016571777145181, "grad_norm": 0.26632577180862427, "learning_rate": 4.074850890335117e-06, "loss": 0.2855, "step": 29671 }, { "epoch": 3.01667344448963, "grad_norm": 0.26662981510162354, "learning_rate": 4.074502134534773e-06, "loss": 0.3098, "step": 29672 }, { "epoch": 3.016775111834079, "grad_norm": 0.26064980030059814, "learning_rate": 4.074153383396866e-06, "loss": 0.2963, "step": 29673 }, { "epoch": 3.016876779178528, "grad_norm": 0.25926533341407776, "learning_rate": 4.073804636923156e-06, "loss": 0.2863, "step": 29674 }, { "epoch": 3.0169784465229768, "grad_norm": 0.2907092273235321, "learning_rate": 4.073455895115397e-06, "loss": 0.282, "step": 29675 }, { "epoch": 3.0170801138674257, "grad_norm": 0.277195543050766, "learning_rate": 4.073107157975349e-06, "loss": 0.3044, "step": 29676 }, { "epoch": 3.0171817812118746, "grad_norm": 0.26413223147392273, "learning_rate": 4.072758425504764e-06, "loss": 0.3081, "step": 29677 }, { "epoch": 3.0172834485563236, "grad_norm": 0.2883267104625702, "learning_rate": 4.072409697705403e-06, "loss": 0.339, "step": 29678 }, { "epoch": 3.0173851159007725, "grad_norm": 0.27315565943717957, "learning_rate": 4.072060974579023e-06, "loss": 0.3049, "step": 29679 }, { "epoch": 3.0174867832452215, "grad_norm": 0.27948710322380066, "learning_rate": 4.071712256127377e-06, "loss": 0.3254, "step": 29680 }, { "epoch": 3.0175884505896704, "grad_norm": 0.2907004952430725, "learning_rate": 4.071363542352226e-06, "loss": 0.3064, "step": 29681 }, { "epoch": 3.0176901179341193, "grad_norm": 0.28143683075904846, "learning_rate": 4.071014833255323e-06, "loss": 0.3054, "step": 29682 }, { "epoch": 3.0177917852785687, "grad_norm": 0.26676085591316223, "learning_rate": 4.070666128838428e-06, "loss": 0.3322, "step": 29683 }, { "epoch": 3.0178934526230177, "grad_norm": 0.2819902002811432, "learning_rate": 4.070317429103295e-06, "loss": 0.302, "step": 29684 }, { "epoch": 3.0179951199674666, "grad_norm": 0.26211369037628174, "learning_rate": 4.069968734051683e-06, "loss": 0.3177, "step": 29685 }, { "epoch": 3.0180967873119156, "grad_norm": 0.2765306234359741, "learning_rate": 4.069620043685347e-06, "loss": 0.3146, "step": 29686 }, { "epoch": 3.0181984546563645, "grad_norm": 0.2644635736942291, "learning_rate": 4.069271358006044e-06, "loss": 0.2865, "step": 29687 }, { "epoch": 3.0183001220008134, "grad_norm": 0.2650223672389984, "learning_rate": 4.06892267701553e-06, "loss": 0.342, "step": 29688 }, { "epoch": 3.0184017893452624, "grad_norm": 0.2658819258213043, "learning_rate": 4.068574000715564e-06, "loss": 0.2851, "step": 29689 }, { "epoch": 3.0185034566897113, "grad_norm": 0.25799083709716797, "learning_rate": 4.0682253291078994e-06, "loss": 0.2939, "step": 29690 }, { "epoch": 3.0186051240341603, "grad_norm": 0.2724985182285309, "learning_rate": 4.067876662194296e-06, "loss": 0.3251, "step": 29691 }, { "epoch": 3.018706791378609, "grad_norm": 0.25652897357940674, "learning_rate": 4.067527999976506e-06, "loss": 0.2993, "step": 29692 }, { "epoch": 3.018808458723058, "grad_norm": 0.24813029170036316, "learning_rate": 4.067179342456289e-06, "loss": 0.3007, "step": 29693 }, { "epoch": 3.018910126067507, "grad_norm": 0.28763994574546814, "learning_rate": 4.066830689635402e-06, "loss": 0.3675, "step": 29694 }, { "epoch": 3.019011793411956, "grad_norm": 0.26046106219291687, "learning_rate": 4.0664820415155995e-06, "loss": 0.317, "step": 29695 }, { "epoch": 3.019113460756405, "grad_norm": 0.29045769572257996, "learning_rate": 4.066133398098639e-06, "loss": 0.3016, "step": 29696 }, { "epoch": 3.019215128100854, "grad_norm": 0.28859400749206543, "learning_rate": 4.065784759386275e-06, "loss": 0.3571, "step": 29697 }, { "epoch": 3.019316795445303, "grad_norm": 0.27906620502471924, "learning_rate": 4.065436125380267e-06, "loss": 0.3103, "step": 29698 }, { "epoch": 3.019418462789752, "grad_norm": 0.26997777819633484, "learning_rate": 4.06508749608237e-06, "loss": 0.3008, "step": 29699 }, { "epoch": 3.0195201301342007, "grad_norm": 0.2581869661808014, "learning_rate": 4.064738871494339e-06, "loss": 0.2829, "step": 29700 }, { "epoch": 3.0196217974786497, "grad_norm": 0.24965699017047882, "learning_rate": 4.064390251617932e-06, "loss": 0.3144, "step": 29701 }, { "epoch": 3.0197234648230986, "grad_norm": 0.26578226685523987, "learning_rate": 4.064041636454904e-06, "loss": 0.3145, "step": 29702 }, { "epoch": 3.019825132167548, "grad_norm": 0.26364725828170776, "learning_rate": 4.063693026007011e-06, "loss": 0.2998, "step": 29703 }, { "epoch": 3.019926799511997, "grad_norm": 0.25497061014175415, "learning_rate": 4.063344420276012e-06, "loss": 0.3019, "step": 29704 }, { "epoch": 3.020028466856446, "grad_norm": 0.26048219203948975, "learning_rate": 4.062995819263659e-06, "loss": 0.3212, "step": 29705 }, { "epoch": 3.020130134200895, "grad_norm": 0.27137014269828796, "learning_rate": 4.062647222971713e-06, "loss": 0.2979, "step": 29706 }, { "epoch": 3.0202318015453438, "grad_norm": 0.24880005419254303, "learning_rate": 4.0622986314019255e-06, "loss": 0.2906, "step": 29707 }, { "epoch": 3.0203334688897927, "grad_norm": 0.28338250517845154, "learning_rate": 4.061950044556056e-06, "loss": 0.2915, "step": 29708 }, { "epoch": 3.0204351362342416, "grad_norm": 0.2716805040836334, "learning_rate": 4.061601462435858e-06, "loss": 0.3075, "step": 29709 }, { "epoch": 3.0205368035786906, "grad_norm": 0.2662794888019562, "learning_rate": 4.06125288504309e-06, "loss": 0.288, "step": 29710 }, { "epoch": 3.0206384709231395, "grad_norm": 0.27694588899612427, "learning_rate": 4.060904312379506e-06, "loss": 0.3025, "step": 29711 }, { "epoch": 3.0207401382675885, "grad_norm": 0.2805235683917999, "learning_rate": 4.060555744446863e-06, "loss": 0.3197, "step": 29712 }, { "epoch": 3.0208418056120374, "grad_norm": 0.2662823796272278, "learning_rate": 4.060207181246917e-06, "loss": 0.3144, "step": 29713 }, { "epoch": 3.0209434729564864, "grad_norm": 0.29829636216163635, "learning_rate": 4.059858622781425e-06, "loss": 0.307, "step": 29714 }, { "epoch": 3.0210451403009353, "grad_norm": 0.2605508267879486, "learning_rate": 4.059510069052141e-06, "loss": 0.3213, "step": 29715 }, { "epoch": 3.0211468076453842, "grad_norm": 0.3041941225528717, "learning_rate": 4.059161520060822e-06, "loss": 0.2826, "step": 29716 }, { "epoch": 3.021248474989833, "grad_norm": 0.275823712348938, "learning_rate": 4.058812975809223e-06, "loss": 0.3015, "step": 29717 }, { "epoch": 3.021350142334282, "grad_norm": 0.2667876183986664, "learning_rate": 4.0584644362991e-06, "loss": 0.3084, "step": 29718 }, { "epoch": 3.021451809678731, "grad_norm": 0.2962310016155243, "learning_rate": 4.058115901532211e-06, "loss": 0.2992, "step": 29719 }, { "epoch": 3.02155347702318, "grad_norm": 0.29053622484207153, "learning_rate": 4.057767371510309e-06, "loss": 0.3307, "step": 29720 }, { "epoch": 3.021655144367629, "grad_norm": 0.2659619450569153, "learning_rate": 4.057418846235153e-06, "loss": 0.3502, "step": 29721 }, { "epoch": 3.021756811712078, "grad_norm": 0.28309208154678345, "learning_rate": 4.057070325708495e-06, "loss": 0.3291, "step": 29722 }, { "epoch": 3.021858479056527, "grad_norm": 0.27805230021476746, "learning_rate": 4.056721809932092e-06, "loss": 0.2842, "step": 29723 }, { "epoch": 3.021960146400976, "grad_norm": 0.2942236065864563, "learning_rate": 4.0563732989077035e-06, "loss": 0.2912, "step": 29724 }, { "epoch": 3.022061813745425, "grad_norm": 0.28419551253318787, "learning_rate": 4.05602479263708e-06, "loss": 0.3318, "step": 29725 }, { "epoch": 3.022163481089874, "grad_norm": 0.25936657190322876, "learning_rate": 4.055676291121979e-06, "loss": 0.309, "step": 29726 }, { "epoch": 3.022265148434323, "grad_norm": 0.2688617408275604, "learning_rate": 4.055327794364157e-06, "loss": 0.2773, "step": 29727 }, { "epoch": 3.022366815778772, "grad_norm": 0.2759917080402374, "learning_rate": 4.054979302365368e-06, "loss": 0.3174, "step": 29728 }, { "epoch": 3.022468483123221, "grad_norm": 0.28100717067718506, "learning_rate": 4.054630815127371e-06, "loss": 0.3201, "step": 29729 }, { "epoch": 3.02257015046767, "grad_norm": 0.2647300362586975, "learning_rate": 4.054282332651917e-06, "loss": 0.2985, "step": 29730 }, { "epoch": 3.022671817812119, "grad_norm": 0.27393946051597595, "learning_rate": 4.053933854940767e-06, "loss": 0.3181, "step": 29731 }, { "epoch": 3.0227734851565677, "grad_norm": 0.2508123815059662, "learning_rate": 4.053585381995669e-06, "loss": 0.2911, "step": 29732 }, { "epoch": 3.0228751525010167, "grad_norm": 0.26611536741256714, "learning_rate": 4.053236913818385e-06, "loss": 0.3382, "step": 29733 }, { "epoch": 3.0229768198454656, "grad_norm": 0.27974361181259155, "learning_rate": 4.0528884504106695e-06, "loss": 0.3232, "step": 29734 }, { "epoch": 3.0230784871899146, "grad_norm": 0.2681177854537964, "learning_rate": 4.052539991774276e-06, "loss": 0.3375, "step": 29735 }, { "epoch": 3.0231801545343635, "grad_norm": 0.2853703796863556, "learning_rate": 4.052191537910962e-06, "loss": 0.2735, "step": 29736 }, { "epoch": 3.0232818218788124, "grad_norm": 0.23063091933727264, "learning_rate": 4.0518430888224795e-06, "loss": 0.3032, "step": 29737 }, { "epoch": 3.0233834892232614, "grad_norm": 0.2596696615219116, "learning_rate": 4.051494644510587e-06, "loss": 0.2985, "step": 29738 }, { "epoch": 3.0234851565677103, "grad_norm": 0.30429330468177795, "learning_rate": 4.05114620497704e-06, "loss": 0.3254, "step": 29739 }, { "epoch": 3.0235868239121593, "grad_norm": 0.2527642548084259, "learning_rate": 4.050797770223592e-06, "loss": 0.293, "step": 29740 }, { "epoch": 3.023688491256608, "grad_norm": 0.27419528365135193, "learning_rate": 4.050449340252002e-06, "loss": 0.2879, "step": 29741 }, { "epoch": 3.023790158601057, "grad_norm": 0.2815227806568146, "learning_rate": 4.050100915064018e-06, "loss": 0.3016, "step": 29742 }, { "epoch": 3.023891825945506, "grad_norm": 0.25312602519989014, "learning_rate": 4.049752494661402e-06, "loss": 0.2946, "step": 29743 }, { "epoch": 3.0239934932899555, "grad_norm": 0.2844294607639313, "learning_rate": 4.049404079045907e-06, "loss": 0.3984, "step": 29744 }, { "epoch": 3.0240951606344044, "grad_norm": 0.26283785700798035, "learning_rate": 4.049055668219289e-06, "loss": 0.3135, "step": 29745 }, { "epoch": 3.0241968279788534, "grad_norm": 0.2779468595981598, "learning_rate": 4.048707262183304e-06, "loss": 0.2741, "step": 29746 }, { "epoch": 3.0242984953233023, "grad_norm": 0.2943529784679413, "learning_rate": 4.048358860939702e-06, "loss": 0.3407, "step": 29747 }, { "epoch": 3.0244001626677512, "grad_norm": 0.28946784138679504, "learning_rate": 4.048010464490243e-06, "loss": 0.3187, "step": 29748 }, { "epoch": 3.0245018300122, "grad_norm": 0.2692605257034302, "learning_rate": 4.047662072836685e-06, "loss": 0.3216, "step": 29749 }, { "epoch": 3.024603497356649, "grad_norm": 0.26037025451660156, "learning_rate": 4.047313685980774e-06, "loss": 0.2637, "step": 29750 }, { "epoch": 3.024705164701098, "grad_norm": 0.2803790867328644, "learning_rate": 4.046965303924274e-06, "loss": 0.2899, "step": 29751 }, { "epoch": 3.024806832045547, "grad_norm": 0.27209150791168213, "learning_rate": 4.046616926668933e-06, "loss": 0.3039, "step": 29752 }, { "epoch": 3.024908499389996, "grad_norm": 0.2759367823600769, "learning_rate": 4.046268554216511e-06, "loss": 0.309, "step": 29753 }, { "epoch": 3.025010166734445, "grad_norm": 0.2666233479976654, "learning_rate": 4.045920186568762e-06, "loss": 0.3143, "step": 29754 }, { "epoch": 3.025111834078894, "grad_norm": 0.2572879195213318, "learning_rate": 4.0455718237274385e-06, "loss": 0.3102, "step": 29755 }, { "epoch": 3.0252135014233428, "grad_norm": 0.28484630584716797, "learning_rate": 4.045223465694301e-06, "loss": 0.3048, "step": 29756 }, { "epoch": 3.0253151687677917, "grad_norm": 0.28173333406448364, "learning_rate": 4.044875112471096e-06, "loss": 0.2983, "step": 29757 }, { "epoch": 3.0254168361122407, "grad_norm": 0.30850037932395935, "learning_rate": 4.0445267640595855e-06, "loss": 0.3048, "step": 29758 }, { "epoch": 3.0255185034566896, "grad_norm": 0.2666594088077545, "learning_rate": 4.044178420461524e-06, "loss": 0.2996, "step": 29759 }, { "epoch": 3.0256201708011385, "grad_norm": 0.2694047689437866, "learning_rate": 4.043830081678661e-06, "loss": 0.3357, "step": 29760 }, { "epoch": 3.0257218381455875, "grad_norm": 0.28609752655029297, "learning_rate": 4.043481747712758e-06, "loss": 0.3006, "step": 29761 }, { "epoch": 3.0258235054900364, "grad_norm": 0.2951853573322296, "learning_rate": 4.043133418565564e-06, "loss": 0.3086, "step": 29762 }, { "epoch": 3.0259251728344854, "grad_norm": 0.3023764193058014, "learning_rate": 4.042785094238838e-06, "loss": 0.3218, "step": 29763 }, { "epoch": 3.0260268401789343, "grad_norm": 0.2735486328601837, "learning_rate": 4.042436774734334e-06, "loss": 0.2978, "step": 29764 }, { "epoch": 3.0261285075233837, "grad_norm": 0.2534378170967102, "learning_rate": 4.042088460053803e-06, "loss": 0.3071, "step": 29765 }, { "epoch": 3.0262301748678326, "grad_norm": 0.2765152156352997, "learning_rate": 4.041740150199007e-06, "loss": 0.3264, "step": 29766 }, { "epoch": 3.0263318422122816, "grad_norm": 0.3051145672798157, "learning_rate": 4.0413918451716935e-06, "loss": 0.3146, "step": 29767 }, { "epoch": 3.0264335095567305, "grad_norm": 0.27370432019233704, "learning_rate": 4.041043544973618e-06, "loss": 0.3173, "step": 29768 }, { "epoch": 3.0265351769011795, "grad_norm": 0.2941683828830719, "learning_rate": 4.040695249606542e-06, "loss": 0.2938, "step": 29769 }, { "epoch": 3.0266368442456284, "grad_norm": 0.2480384111404419, "learning_rate": 4.040346959072212e-06, "loss": 0.3328, "step": 29770 }, { "epoch": 3.0267385115900773, "grad_norm": 0.29051533341407776, "learning_rate": 4.039998673372389e-06, "loss": 0.2996, "step": 29771 }, { "epoch": 3.0268401789345263, "grad_norm": 0.26902610063552856, "learning_rate": 4.039650392508821e-06, "loss": 0.2895, "step": 29772 }, { "epoch": 3.0269418462789752, "grad_norm": 0.28582093119621277, "learning_rate": 4.039302116483269e-06, "loss": 0.3146, "step": 29773 }, { "epoch": 3.027043513623424, "grad_norm": 0.27176371216773987, "learning_rate": 4.038953845297483e-06, "loss": 0.2938, "step": 29774 }, { "epoch": 3.027145180967873, "grad_norm": 0.26801934838294983, "learning_rate": 4.038605578953218e-06, "loss": 0.3075, "step": 29775 }, { "epoch": 3.027246848312322, "grad_norm": 0.2756272554397583, "learning_rate": 4.0382573174522334e-06, "loss": 0.35, "step": 29776 }, { "epoch": 3.027348515656771, "grad_norm": 0.2685042917728424, "learning_rate": 4.037909060796276e-06, "loss": 0.3271, "step": 29777 }, { "epoch": 3.02745018300122, "grad_norm": 0.2911195755004883, "learning_rate": 4.037560808987107e-06, "loss": 0.3014, "step": 29778 }, { "epoch": 3.027551850345669, "grad_norm": 0.2660016715526581, "learning_rate": 4.037212562026476e-06, "loss": 0.344, "step": 29779 }, { "epoch": 3.027653517690118, "grad_norm": 0.27765437960624695, "learning_rate": 4.036864319916139e-06, "loss": 0.3155, "step": 29780 }, { "epoch": 3.0277551850345668, "grad_norm": 0.28273454308509827, "learning_rate": 4.036516082657853e-06, "loss": 0.3349, "step": 29781 }, { "epoch": 3.0278568523790157, "grad_norm": 0.28444603085517883, "learning_rate": 4.036167850253368e-06, "loss": 0.3014, "step": 29782 }, { "epoch": 3.0279585197234646, "grad_norm": 0.25407925248146057, "learning_rate": 4.035819622704442e-06, "loss": 0.311, "step": 29783 }, { "epoch": 3.0280601870679136, "grad_norm": 0.26945337653160095, "learning_rate": 4.035471400012827e-06, "loss": 0.311, "step": 29784 }, { "epoch": 3.028161854412363, "grad_norm": 0.29477646946907043, "learning_rate": 4.035123182180276e-06, "loss": 0.3024, "step": 29785 }, { "epoch": 3.028263521756812, "grad_norm": 0.2693392336368561, "learning_rate": 4.034774969208548e-06, "loss": 0.2737, "step": 29786 }, { "epoch": 3.028365189101261, "grad_norm": 0.2787165641784668, "learning_rate": 4.034426761099392e-06, "loss": 0.3042, "step": 29787 }, { "epoch": 3.02846685644571, "grad_norm": 0.27993932366371155, "learning_rate": 4.034078557854567e-06, "loss": 0.2971, "step": 29788 }, { "epoch": 3.0285685237901587, "grad_norm": 0.28062325716018677, "learning_rate": 4.033730359475822e-06, "loss": 0.2878, "step": 29789 }, { "epoch": 3.0286701911346077, "grad_norm": 0.2814997732639313, "learning_rate": 4.033382165964915e-06, "loss": 0.3187, "step": 29790 }, { "epoch": 3.0287718584790566, "grad_norm": 0.25391891598701477, "learning_rate": 4.0330339773236e-06, "loss": 0.3112, "step": 29791 }, { "epoch": 3.0288735258235056, "grad_norm": 0.28375792503356934, "learning_rate": 4.032685793553628e-06, "loss": 0.297, "step": 29792 }, { "epoch": 3.0289751931679545, "grad_norm": 0.27861642837524414, "learning_rate": 4.032337614656757e-06, "loss": 0.3274, "step": 29793 }, { "epoch": 3.0290768605124034, "grad_norm": 0.28109845519065857, "learning_rate": 4.031989440634738e-06, "loss": 0.285, "step": 29794 }, { "epoch": 3.0291785278568524, "grad_norm": 0.300910621881485, "learning_rate": 4.031641271489326e-06, "loss": 0.3034, "step": 29795 }, { "epoch": 3.0292801952013013, "grad_norm": 0.26457229256629944, "learning_rate": 4.031293107222276e-06, "loss": 0.2696, "step": 29796 }, { "epoch": 3.0293818625457503, "grad_norm": 0.26302874088287354, "learning_rate": 4.030944947835341e-06, "loss": 0.3202, "step": 29797 }, { "epoch": 3.029483529890199, "grad_norm": 0.26621803641319275, "learning_rate": 4.030596793330275e-06, "loss": 0.2914, "step": 29798 }, { "epoch": 3.029585197234648, "grad_norm": 0.28596270084381104, "learning_rate": 4.030248643708831e-06, "loss": 0.3046, "step": 29799 }, { "epoch": 3.029686864579097, "grad_norm": 0.26763415336608887, "learning_rate": 4.029900498972765e-06, "loss": 0.3655, "step": 29800 }, { "epoch": 3.029788531923546, "grad_norm": 0.2955368161201477, "learning_rate": 4.0295523591238294e-06, "loss": 0.2973, "step": 29801 }, { "epoch": 3.029890199267995, "grad_norm": 0.25764036178588867, "learning_rate": 4.0292042241637785e-06, "loss": 0.2757, "step": 29802 }, { "epoch": 3.029991866612444, "grad_norm": 0.27654531598091125, "learning_rate": 4.028856094094367e-06, "loss": 0.328, "step": 29803 }, { "epoch": 3.030093533956893, "grad_norm": 0.2758459150791168, "learning_rate": 4.028507968917346e-06, "loss": 0.3037, "step": 29804 }, { "epoch": 3.030195201301342, "grad_norm": 0.3041757345199585, "learning_rate": 4.028159848634471e-06, "loss": 0.2711, "step": 29805 }, { "epoch": 3.030296868645791, "grad_norm": 0.2639009356498718, "learning_rate": 4.027811733247497e-06, "loss": 0.3272, "step": 29806 }, { "epoch": 3.03039853599024, "grad_norm": 0.2621043622493744, "learning_rate": 4.027463622758175e-06, "loss": 0.3241, "step": 29807 }, { "epoch": 3.030500203334689, "grad_norm": 0.28619593381881714, "learning_rate": 4.027115517168262e-06, "loss": 0.3162, "step": 29808 }, { "epoch": 3.030601870679138, "grad_norm": 0.2669454514980316, "learning_rate": 4.026767416479508e-06, "loss": 0.3479, "step": 29809 }, { "epoch": 3.030703538023587, "grad_norm": 0.27319660782814026, "learning_rate": 4.026419320693669e-06, "loss": 0.3003, "step": 29810 }, { "epoch": 3.030805205368036, "grad_norm": 0.2552024722099304, "learning_rate": 4.0260712298124994e-06, "loss": 0.3114, "step": 29811 }, { "epoch": 3.030906872712485, "grad_norm": 0.2904662787914276, "learning_rate": 4.0257231438377495e-06, "loss": 0.2841, "step": 29812 }, { "epoch": 3.0310085400569338, "grad_norm": 0.25759026408195496, "learning_rate": 4.025375062771177e-06, "loss": 0.2979, "step": 29813 }, { "epoch": 3.0311102074013827, "grad_norm": 0.2694590985774994, "learning_rate": 4.0250269866145316e-06, "loss": 0.3079, "step": 29814 }, { "epoch": 3.0312118747458316, "grad_norm": 0.25513312220573425, "learning_rate": 4.0246789153695686e-06, "loss": 0.2891, "step": 29815 }, { "epoch": 3.0313135420902806, "grad_norm": 0.27513188123703003, "learning_rate": 4.024330849038043e-06, "loss": 0.2884, "step": 29816 }, { "epoch": 3.0314152094347295, "grad_norm": 0.2761235535144806, "learning_rate": 4.0239827876217055e-06, "loss": 0.2881, "step": 29817 }, { "epoch": 3.0315168767791785, "grad_norm": 0.28619104623794556, "learning_rate": 4.023634731122311e-06, "loss": 0.3131, "step": 29818 }, { "epoch": 3.0316185441236274, "grad_norm": 0.2661112844944, "learning_rate": 4.023286679541613e-06, "loss": 0.3128, "step": 29819 }, { "epoch": 3.0317202114680764, "grad_norm": 0.30539029836654663, "learning_rate": 4.022938632881364e-06, "loss": 0.3383, "step": 29820 }, { "epoch": 3.0318218788125253, "grad_norm": 0.29669806361198425, "learning_rate": 4.0225905911433185e-06, "loss": 0.2869, "step": 29821 }, { "epoch": 3.0319235461569742, "grad_norm": 0.26862412691116333, "learning_rate": 4.022242554329228e-06, "loss": 0.3158, "step": 29822 }, { "epoch": 3.032025213501423, "grad_norm": 0.2822631895542145, "learning_rate": 4.02189452244085e-06, "loss": 0.3497, "step": 29823 }, { "epoch": 3.032126880845872, "grad_norm": 0.2670523226261139, "learning_rate": 4.021546495479932e-06, "loss": 0.316, "step": 29824 }, { "epoch": 3.032228548190321, "grad_norm": 0.3056676685810089, "learning_rate": 4.021198473448232e-06, "loss": 0.2846, "step": 29825 }, { "epoch": 3.0323302155347704, "grad_norm": 0.2726356089115143, "learning_rate": 4.020850456347502e-06, "loss": 0.3422, "step": 29826 }, { "epoch": 3.0324318828792194, "grad_norm": 0.27786386013031006, "learning_rate": 4.020502444179493e-06, "loss": 0.3102, "step": 29827 }, { "epoch": 3.0325335502236683, "grad_norm": 0.27655288577079773, "learning_rate": 4.020154436945961e-06, "loss": 0.2888, "step": 29828 }, { "epoch": 3.0326352175681173, "grad_norm": 0.2687608003616333, "learning_rate": 4.019806434648657e-06, "loss": 0.301, "step": 29829 }, { "epoch": 3.032736884912566, "grad_norm": 0.2768212854862213, "learning_rate": 4.019458437289336e-06, "loss": 0.3337, "step": 29830 }, { "epoch": 3.032838552257015, "grad_norm": 0.26795169711112976, "learning_rate": 4.019110444869752e-06, "loss": 0.3123, "step": 29831 }, { "epoch": 3.032940219601464, "grad_norm": 0.28522542119026184, "learning_rate": 4.018762457391654e-06, "loss": 0.2993, "step": 29832 }, { "epoch": 3.033041886945913, "grad_norm": 0.26472944021224976, "learning_rate": 4.0184144748567985e-06, "loss": 0.2981, "step": 29833 }, { "epoch": 3.033143554290362, "grad_norm": 0.28118786215782166, "learning_rate": 4.018066497266938e-06, "loss": 0.3184, "step": 29834 }, { "epoch": 3.033245221634811, "grad_norm": 0.2619308829307556, "learning_rate": 4.017718524623824e-06, "loss": 0.298, "step": 29835 }, { "epoch": 3.03334688897926, "grad_norm": 0.2927621603012085, "learning_rate": 4.0173705569292114e-06, "loss": 0.3065, "step": 29836 }, { "epoch": 3.033448556323709, "grad_norm": 0.29018157720565796, "learning_rate": 4.0170225941848525e-06, "loss": 0.357, "step": 29837 }, { "epoch": 3.0335502236681577, "grad_norm": 0.2857239842414856, "learning_rate": 4.016674636392501e-06, "loss": 0.3348, "step": 29838 }, { "epoch": 3.0336518910126067, "grad_norm": 0.28288736939430237, "learning_rate": 4.016326683553907e-06, "loss": 0.3286, "step": 29839 }, { "epoch": 3.0337535583570556, "grad_norm": 0.26962974667549133, "learning_rate": 4.015978735670825e-06, "loss": 0.2977, "step": 29840 }, { "epoch": 3.0338552257015046, "grad_norm": 0.29551780223846436, "learning_rate": 4.015630792745011e-06, "loss": 0.3069, "step": 29841 }, { "epoch": 3.0339568930459535, "grad_norm": 0.2613818049430847, "learning_rate": 4.015282854778214e-06, "loss": 0.2951, "step": 29842 }, { "epoch": 3.0340585603904024, "grad_norm": 0.2613440155982971, "learning_rate": 4.0149349217721885e-06, "loss": 0.3163, "step": 29843 }, { "epoch": 3.0341602277348514, "grad_norm": 0.25923168659210205, "learning_rate": 4.014586993728686e-06, "loss": 0.3294, "step": 29844 }, { "epoch": 3.0342618950793003, "grad_norm": 0.26230987906455994, "learning_rate": 4.014239070649459e-06, "loss": 0.2984, "step": 29845 }, { "epoch": 3.0343635624237493, "grad_norm": 0.2736605107784271, "learning_rate": 4.013891152536263e-06, "loss": 0.2967, "step": 29846 }, { "epoch": 3.0344652297681987, "grad_norm": 0.2897849977016449, "learning_rate": 4.0135432393908486e-06, "loss": 0.3425, "step": 29847 }, { "epoch": 3.0345668971126476, "grad_norm": 0.2689523994922638, "learning_rate": 4.013195331214969e-06, "loss": 0.2917, "step": 29848 }, { "epoch": 3.0346685644570965, "grad_norm": 0.26254114508628845, "learning_rate": 4.012847428010377e-06, "loss": 0.307, "step": 29849 }, { "epoch": 3.0347702318015455, "grad_norm": 0.2684749662876129, "learning_rate": 4.012499529778824e-06, "loss": 0.2903, "step": 29850 }, { "epoch": 3.0348718991459944, "grad_norm": 0.28896594047546387, "learning_rate": 4.012151636522065e-06, "loss": 0.3024, "step": 29851 }, { "epoch": 3.0349735664904434, "grad_norm": 0.2737846076488495, "learning_rate": 4.011803748241851e-06, "loss": 0.2951, "step": 29852 }, { "epoch": 3.0350752338348923, "grad_norm": 0.2548905313014984, "learning_rate": 4.011455864939935e-06, "loss": 0.2711, "step": 29853 }, { "epoch": 3.0351769011793412, "grad_norm": 0.2767552137374878, "learning_rate": 4.011107986618069e-06, "loss": 0.3001, "step": 29854 }, { "epoch": 3.03527856852379, "grad_norm": 0.28134554624557495, "learning_rate": 4.010760113278006e-06, "loss": 0.3119, "step": 29855 }, { "epoch": 3.035380235868239, "grad_norm": 0.27270156145095825, "learning_rate": 4.010412244921499e-06, "loss": 0.3026, "step": 29856 }, { "epoch": 3.035481903212688, "grad_norm": 0.2687148451805115, "learning_rate": 4.010064381550299e-06, "loss": 0.2922, "step": 29857 }, { "epoch": 3.035583570557137, "grad_norm": 0.269388347864151, "learning_rate": 4.0097165231661606e-06, "loss": 0.3059, "step": 29858 }, { "epoch": 3.035685237901586, "grad_norm": 0.2658637762069702, "learning_rate": 4.009368669770834e-06, "loss": 0.3361, "step": 29859 }, { "epoch": 3.035786905246035, "grad_norm": 0.28627172112464905, "learning_rate": 4.009020821366072e-06, "loss": 0.2787, "step": 29860 }, { "epoch": 3.035888572590484, "grad_norm": 0.3000650703907013, "learning_rate": 4.0086729779536295e-06, "loss": 0.3187, "step": 29861 }, { "epoch": 3.0359902399349328, "grad_norm": 0.25492531061172485, "learning_rate": 4.008325139535255e-06, "loss": 0.3166, "step": 29862 }, { "epoch": 3.0360919072793817, "grad_norm": 0.27174249291419983, "learning_rate": 4.0079773061127046e-06, "loss": 0.2951, "step": 29863 }, { "epoch": 3.0361935746238307, "grad_norm": 0.27459800243377686, "learning_rate": 4.007629477687728e-06, "loss": 0.3377, "step": 29864 }, { "epoch": 3.0362952419682796, "grad_norm": 0.2710462510585785, "learning_rate": 4.007281654262078e-06, "loss": 0.2778, "step": 29865 }, { "epoch": 3.0363969093127285, "grad_norm": 0.27656179666519165, "learning_rate": 4.006933835837508e-06, "loss": 0.2991, "step": 29866 }, { "epoch": 3.036498576657178, "grad_norm": 0.27983537316322327, "learning_rate": 4.006586022415768e-06, "loss": 0.34, "step": 29867 }, { "epoch": 3.036600244001627, "grad_norm": 0.2721557021141052, "learning_rate": 4.006238213998613e-06, "loss": 0.3337, "step": 29868 }, { "epoch": 3.036701911346076, "grad_norm": 0.2726065516471863, "learning_rate": 4.005890410587792e-06, "loss": 0.3078, "step": 29869 }, { "epoch": 3.0368035786905248, "grad_norm": 0.2669403851032257, "learning_rate": 4.005542612185061e-06, "loss": 0.3266, "step": 29870 }, { "epoch": 3.0369052460349737, "grad_norm": 0.26941072940826416, "learning_rate": 4.005194818792169e-06, "loss": 0.3042, "step": 29871 }, { "epoch": 3.0370069133794226, "grad_norm": 0.26226016879081726, "learning_rate": 4.0048470304108685e-06, "loss": 0.3315, "step": 29872 }, { "epoch": 3.0371085807238716, "grad_norm": 0.25018396973609924, "learning_rate": 4.004499247042913e-06, "loss": 0.3052, "step": 29873 }, { "epoch": 3.0372102480683205, "grad_norm": 0.2905275821685791, "learning_rate": 4.0041514686900534e-06, "loss": 0.3017, "step": 29874 }, { "epoch": 3.0373119154127695, "grad_norm": 0.2801954448223114, "learning_rate": 4.003803695354041e-06, "loss": 0.2836, "step": 29875 }, { "epoch": 3.0374135827572184, "grad_norm": 0.2826402187347412, "learning_rate": 4.003455927036631e-06, "loss": 0.3264, "step": 29876 }, { "epoch": 3.0375152501016673, "grad_norm": 0.263771116733551, "learning_rate": 4.0031081637395715e-06, "loss": 0.2842, "step": 29877 }, { "epoch": 3.0376169174461163, "grad_norm": 0.29865148663520813, "learning_rate": 4.002760405464617e-06, "loss": 0.3103, "step": 29878 }, { "epoch": 3.0377185847905652, "grad_norm": 0.2704947888851166, "learning_rate": 4.002412652213519e-06, "loss": 0.3051, "step": 29879 }, { "epoch": 3.037820252135014, "grad_norm": 0.25520163774490356, "learning_rate": 4.002064903988028e-06, "loss": 0.3411, "step": 29880 }, { "epoch": 3.037921919479463, "grad_norm": 0.28039929270744324, "learning_rate": 4.001717160789898e-06, "loss": 0.3283, "step": 29881 }, { "epoch": 3.038023586823912, "grad_norm": 0.28551051020622253, "learning_rate": 4.001369422620878e-06, "loss": 0.3258, "step": 29882 }, { "epoch": 3.038125254168361, "grad_norm": 0.2590213418006897, "learning_rate": 4.001021689482724e-06, "loss": 0.3085, "step": 29883 }, { "epoch": 3.03822692151281, "grad_norm": 0.2960209846496582, "learning_rate": 4.0006739613771825e-06, "loss": 0.2813, "step": 29884 }, { "epoch": 3.038328588857259, "grad_norm": 0.2759433686733246, "learning_rate": 4.0003262383060094e-06, "loss": 0.3008, "step": 29885 }, { "epoch": 3.038430256201708, "grad_norm": 0.2995244562625885, "learning_rate": 3.9999785202709555e-06, "loss": 0.2973, "step": 29886 }, { "epoch": 3.0385319235461568, "grad_norm": 0.2829970419406891, "learning_rate": 3.9996308072737715e-06, "loss": 0.3133, "step": 29887 }, { "epoch": 3.038633590890606, "grad_norm": 0.2672833204269409, "learning_rate": 3.9992830993162116e-06, "loss": 0.3195, "step": 29888 }, { "epoch": 3.038735258235055, "grad_norm": 0.298931747674942, "learning_rate": 3.998935396400023e-06, "loss": 0.312, "step": 29889 }, { "epoch": 3.038836925579504, "grad_norm": 0.27738162875175476, "learning_rate": 3.9985876985269606e-06, "loss": 0.336, "step": 29890 }, { "epoch": 3.038938592923953, "grad_norm": 0.27136602997779846, "learning_rate": 3.998240005698777e-06, "loss": 0.3068, "step": 29891 }, { "epoch": 3.039040260268402, "grad_norm": 0.26169711351394653, "learning_rate": 3.997892317917221e-06, "loss": 0.2715, "step": 29892 }, { "epoch": 3.039141927612851, "grad_norm": 0.2792626917362213, "learning_rate": 3.997544635184047e-06, "loss": 0.295, "step": 29893 }, { "epoch": 3.0392435949573, "grad_norm": 0.29038292169570923, "learning_rate": 3.997196957501002e-06, "loss": 0.3099, "step": 29894 }, { "epoch": 3.0393452623017487, "grad_norm": 0.24963484704494476, "learning_rate": 3.996849284869841e-06, "loss": 0.3206, "step": 29895 }, { "epoch": 3.0394469296461977, "grad_norm": 0.29572775959968567, "learning_rate": 3.996501617292316e-06, "loss": 0.3172, "step": 29896 }, { "epoch": 3.0395485969906466, "grad_norm": 0.2902267277240753, "learning_rate": 3.996153954770177e-06, "loss": 0.3137, "step": 29897 }, { "epoch": 3.0396502643350956, "grad_norm": 0.26551878452301025, "learning_rate": 3.995806297305178e-06, "loss": 0.3178, "step": 29898 }, { "epoch": 3.0397519316795445, "grad_norm": 0.2782377302646637, "learning_rate": 3.995458644899065e-06, "loss": 0.2891, "step": 29899 }, { "epoch": 3.0398535990239934, "grad_norm": 0.27643564343452454, "learning_rate": 3.995110997553594e-06, "loss": 0.3204, "step": 29900 }, { "epoch": 3.0399552663684424, "grad_norm": 0.2750076949596405, "learning_rate": 3.994763355270516e-06, "loss": 0.2731, "step": 29901 }, { "epoch": 3.0400569337128913, "grad_norm": 0.26666268706321716, "learning_rate": 3.994415718051578e-06, "loss": 0.2994, "step": 29902 }, { "epoch": 3.0401586010573403, "grad_norm": 0.2629154920578003, "learning_rate": 3.994068085898539e-06, "loss": 0.3325, "step": 29903 }, { "epoch": 3.040260268401789, "grad_norm": 0.29372990131378174, "learning_rate": 3.993720458813142e-06, "loss": 0.3219, "step": 29904 }, { "epoch": 3.040361935746238, "grad_norm": 0.26908016204833984, "learning_rate": 3.993372836797144e-06, "loss": 0.3208, "step": 29905 }, { "epoch": 3.040463603090687, "grad_norm": 0.2910429835319519, "learning_rate": 3.993025219852296e-06, "loss": 0.2848, "step": 29906 }, { "epoch": 3.040565270435136, "grad_norm": 0.3113917410373688, "learning_rate": 3.992677607980345e-06, "loss": 0.313, "step": 29907 }, { "epoch": 3.0406669377795854, "grad_norm": 0.2812384366989136, "learning_rate": 3.992330001183047e-06, "loss": 0.3131, "step": 29908 }, { "epoch": 3.0407686051240344, "grad_norm": 0.29168403148651123, "learning_rate": 3.991982399462149e-06, "loss": 0.29, "step": 29909 }, { "epoch": 3.0408702724684833, "grad_norm": 0.2712760865688324, "learning_rate": 3.991634802819404e-06, "loss": 0.2948, "step": 29910 }, { "epoch": 3.0409719398129322, "grad_norm": 0.2893452048301697, "learning_rate": 3.991287211256567e-06, "loss": 0.3042, "step": 29911 }, { "epoch": 3.041073607157381, "grad_norm": 0.27368786931037903, "learning_rate": 3.990939624775381e-06, "loss": 0.3219, "step": 29912 }, { "epoch": 3.04117527450183, "grad_norm": 0.29258862137794495, "learning_rate": 3.990592043377604e-06, "loss": 0.3258, "step": 29913 }, { "epoch": 3.041276941846279, "grad_norm": 0.2749781310558319, "learning_rate": 3.990244467064982e-06, "loss": 0.3333, "step": 29914 }, { "epoch": 3.041378609190728, "grad_norm": 0.27873530983924866, "learning_rate": 3.989896895839269e-06, "loss": 0.304, "step": 29915 }, { "epoch": 3.041480276535177, "grad_norm": 0.25528615713119507, "learning_rate": 3.9895493297022185e-06, "loss": 0.3224, "step": 29916 }, { "epoch": 3.041581943879626, "grad_norm": 0.2735745906829834, "learning_rate": 3.989201768655574e-06, "loss": 0.2931, "step": 29917 }, { "epoch": 3.041683611224075, "grad_norm": 0.2696535587310791, "learning_rate": 3.988854212701095e-06, "loss": 0.333, "step": 29918 }, { "epoch": 3.0417852785685238, "grad_norm": 0.2863965630531311, "learning_rate": 3.9885066618405255e-06, "loss": 0.343, "step": 29919 }, { "epoch": 3.0418869459129727, "grad_norm": 0.29123538732528687, "learning_rate": 3.9881591160756186e-06, "loss": 0.3111, "step": 29920 }, { "epoch": 3.0419886132574216, "grad_norm": 0.28784194588661194, "learning_rate": 3.987811575408128e-06, "loss": 0.3037, "step": 29921 }, { "epoch": 3.0420902806018706, "grad_norm": 0.25572890043258667, "learning_rate": 3.987464039839799e-06, "loss": 0.312, "step": 29922 }, { "epoch": 3.0421919479463195, "grad_norm": 0.26582348346710205, "learning_rate": 3.9871165093723895e-06, "loss": 0.2857, "step": 29923 }, { "epoch": 3.0422936152907685, "grad_norm": 0.27710840106010437, "learning_rate": 3.9867689840076434e-06, "loss": 0.2818, "step": 29924 }, { "epoch": 3.0423952826352174, "grad_norm": 0.256298691034317, "learning_rate": 3.9864214637473136e-06, "loss": 0.3015, "step": 29925 }, { "epoch": 3.0424969499796664, "grad_norm": 0.2896183729171753, "learning_rate": 3.986073948593155e-06, "loss": 0.3135, "step": 29926 }, { "epoch": 3.0425986173241153, "grad_norm": 0.26473701000213623, "learning_rate": 3.985726438546912e-06, "loss": 0.2939, "step": 29927 }, { "epoch": 3.0427002846685642, "grad_norm": 0.2673640847206116, "learning_rate": 3.98537893361034e-06, "loss": 0.3048, "step": 29928 }, { "epoch": 3.0428019520130136, "grad_norm": 0.27816808223724365, "learning_rate": 3.985031433785187e-06, "loss": 0.2952, "step": 29929 }, { "epoch": 3.0429036193574626, "grad_norm": 0.29093196988105774, "learning_rate": 3.984683939073203e-06, "loss": 0.3032, "step": 29930 }, { "epoch": 3.0430052867019115, "grad_norm": 0.26855167746543884, "learning_rate": 3.984336449476143e-06, "loss": 0.3359, "step": 29931 }, { "epoch": 3.0431069540463604, "grad_norm": 0.29422205686569214, "learning_rate": 3.983988964995752e-06, "loss": 0.3061, "step": 29932 }, { "epoch": 3.0432086213908094, "grad_norm": 0.2735820412635803, "learning_rate": 3.9836414856337855e-06, "loss": 0.327, "step": 29933 }, { "epoch": 3.0433102887352583, "grad_norm": 0.2892466187477112, "learning_rate": 3.983294011391991e-06, "loss": 0.3185, "step": 29934 }, { "epoch": 3.0434119560797073, "grad_norm": 0.2684146463871002, "learning_rate": 3.982946542272117e-06, "loss": 0.3115, "step": 29935 }, { "epoch": 3.043513623424156, "grad_norm": 0.26770126819610596, "learning_rate": 3.98259907827592e-06, "loss": 0.298, "step": 29936 }, { "epoch": 3.043615290768605, "grad_norm": 0.28365397453308105, "learning_rate": 3.982251619405147e-06, "loss": 0.3339, "step": 29937 }, { "epoch": 3.043716958113054, "grad_norm": 0.26895588636398315, "learning_rate": 3.981904165661547e-06, "loss": 0.2813, "step": 29938 }, { "epoch": 3.043818625457503, "grad_norm": 0.270886093378067, "learning_rate": 3.981556717046872e-06, "loss": 0.3323, "step": 29939 }, { "epoch": 3.043920292801952, "grad_norm": 0.29193928837776184, "learning_rate": 3.9812092735628705e-06, "loss": 0.281, "step": 29940 }, { "epoch": 3.044021960146401, "grad_norm": 0.26769426465034485, "learning_rate": 3.980861835211298e-06, "loss": 0.3303, "step": 29941 }, { "epoch": 3.04412362749085, "grad_norm": 0.2744896113872528, "learning_rate": 3.9805144019939e-06, "loss": 0.3129, "step": 29942 }, { "epoch": 3.044225294835299, "grad_norm": 0.2674511969089508, "learning_rate": 3.980166973912429e-06, "loss": 0.2994, "step": 29943 }, { "epoch": 3.0443269621797477, "grad_norm": 0.2663024067878723, "learning_rate": 3.9798195509686325e-06, "loss": 0.3301, "step": 29944 }, { "epoch": 3.0444286295241967, "grad_norm": 0.28872236609458923, "learning_rate": 3.979472133164262e-06, "loss": 0.3109, "step": 29945 }, { "epoch": 3.0445302968686456, "grad_norm": 0.27474188804626465, "learning_rate": 3.979124720501072e-06, "loss": 0.3034, "step": 29946 }, { "epoch": 3.0446319642130946, "grad_norm": 0.2687196433544159, "learning_rate": 3.978777312980807e-06, "loss": 0.3467, "step": 29947 }, { "epoch": 3.0447336315575435, "grad_norm": 0.25447455048561096, "learning_rate": 3.97842991060522e-06, "loss": 0.2977, "step": 29948 }, { "epoch": 3.044835298901993, "grad_norm": 0.2667941153049469, "learning_rate": 3.978082513376059e-06, "loss": 0.2818, "step": 29949 }, { "epoch": 3.044936966246442, "grad_norm": 0.2696453928947449, "learning_rate": 3.977735121295075e-06, "loss": 0.2968, "step": 29950 }, { "epoch": 3.0450386335908908, "grad_norm": 0.2741684019565582, "learning_rate": 3.977387734364021e-06, "loss": 0.3132, "step": 29951 }, { "epoch": 3.0451403009353397, "grad_norm": 0.2711810767650604, "learning_rate": 3.9770403525846425e-06, "loss": 0.3117, "step": 29952 }, { "epoch": 3.0452419682797887, "grad_norm": 0.2558806836605072, "learning_rate": 3.976692975958694e-06, "loss": 0.3213, "step": 29953 }, { "epoch": 3.0453436356242376, "grad_norm": 0.2718046307563782, "learning_rate": 3.976345604487921e-06, "loss": 0.3282, "step": 29954 }, { "epoch": 3.0454453029686865, "grad_norm": 0.2786147892475128, "learning_rate": 3.975998238174074e-06, "loss": 0.3275, "step": 29955 }, { "epoch": 3.0455469703131355, "grad_norm": 0.2913236916065216, "learning_rate": 3.975650877018909e-06, "loss": 0.3265, "step": 29956 }, { "epoch": 3.0456486376575844, "grad_norm": 0.30902183055877686, "learning_rate": 3.975303521024168e-06, "loss": 0.3139, "step": 29957 }, { "epoch": 3.0457503050020334, "grad_norm": 0.2925032079219818, "learning_rate": 3.9749561701916066e-06, "loss": 0.2919, "step": 29958 }, { "epoch": 3.0458519723464823, "grad_norm": 0.2703346610069275, "learning_rate": 3.97460882452297e-06, "loss": 0.3389, "step": 29959 }, { "epoch": 3.0459536396909312, "grad_norm": 0.287727415561676, "learning_rate": 3.974261484020013e-06, "loss": 0.3296, "step": 29960 }, { "epoch": 3.04605530703538, "grad_norm": 0.3086628317832947, "learning_rate": 3.973914148684481e-06, "loss": 0.3114, "step": 29961 }, { "epoch": 3.046156974379829, "grad_norm": 0.27248069643974304, "learning_rate": 3.973566818518126e-06, "loss": 0.3425, "step": 29962 }, { "epoch": 3.046258641724278, "grad_norm": 0.2775156795978546, "learning_rate": 3.973219493522698e-06, "loss": 0.3154, "step": 29963 }, { "epoch": 3.046360309068727, "grad_norm": 0.2597498297691345, "learning_rate": 3.972872173699945e-06, "loss": 0.3155, "step": 29964 }, { "epoch": 3.046461976413176, "grad_norm": 0.2785104513168335, "learning_rate": 3.97252485905162e-06, "loss": 0.2999, "step": 29965 }, { "epoch": 3.046563643757625, "grad_norm": 0.2704618275165558, "learning_rate": 3.972177549579469e-06, "loss": 0.3286, "step": 29966 }, { "epoch": 3.046665311102074, "grad_norm": 0.2672697603702545, "learning_rate": 3.971830245285243e-06, "loss": 0.3037, "step": 29967 }, { "epoch": 3.0467669784465228, "grad_norm": 0.26949846744537354, "learning_rate": 3.971482946170694e-06, "loss": 0.2913, "step": 29968 }, { "epoch": 3.0468686457909717, "grad_norm": 0.26785725355148315, "learning_rate": 3.9711356522375675e-06, "loss": 0.3185, "step": 29969 }, { "epoch": 3.046970313135421, "grad_norm": 0.27437469363212585, "learning_rate": 3.970788363487616e-06, "loss": 0.2775, "step": 29970 }, { "epoch": 3.04707198047987, "grad_norm": 0.27068954706192017, "learning_rate": 3.970441079922587e-06, "loss": 0.2986, "step": 29971 }, { "epoch": 3.047173647824319, "grad_norm": 0.27308860421180725, "learning_rate": 3.9700938015442316e-06, "loss": 0.3273, "step": 29972 }, { "epoch": 3.047275315168768, "grad_norm": 0.28447362780570984, "learning_rate": 3.9697465283543e-06, "loss": 0.2702, "step": 29973 }, { "epoch": 3.047376982513217, "grad_norm": 0.2792959213256836, "learning_rate": 3.969399260354539e-06, "loss": 0.3243, "step": 29974 }, { "epoch": 3.047478649857666, "grad_norm": 0.2779454290866852, "learning_rate": 3.969051997546701e-06, "loss": 0.2991, "step": 29975 }, { "epoch": 3.0475803172021148, "grad_norm": 0.25816553831100464, "learning_rate": 3.968704739932534e-06, "loss": 0.3268, "step": 29976 }, { "epoch": 3.0476819845465637, "grad_norm": 0.28096941113471985, "learning_rate": 3.968357487513786e-06, "loss": 0.3339, "step": 29977 }, { "epoch": 3.0477836518910126, "grad_norm": 0.28661587834358215, "learning_rate": 3.96801024029221e-06, "loss": 0.3186, "step": 29978 }, { "epoch": 3.0478853192354616, "grad_norm": 0.2743726372718811, "learning_rate": 3.967662998269551e-06, "loss": 0.3414, "step": 29979 }, { "epoch": 3.0479869865799105, "grad_norm": 0.2512023150920868, "learning_rate": 3.967315761447562e-06, "loss": 0.2851, "step": 29980 }, { "epoch": 3.0480886539243595, "grad_norm": 0.3209432065486908, "learning_rate": 3.96696852982799e-06, "loss": 0.3408, "step": 29981 }, { "epoch": 3.0481903212688084, "grad_norm": 0.2865387499332428, "learning_rate": 3.966621303412585e-06, "loss": 0.3059, "step": 29982 }, { "epoch": 3.0482919886132573, "grad_norm": 0.3112019896507263, "learning_rate": 3.966274082203098e-06, "loss": 0.3331, "step": 29983 }, { "epoch": 3.0483936559577063, "grad_norm": 0.2588694989681244, "learning_rate": 3.965926866201274e-06, "loss": 0.3081, "step": 29984 }, { "epoch": 3.0484953233021552, "grad_norm": 0.2754348814487457, "learning_rate": 3.965579655408866e-06, "loss": 0.288, "step": 29985 }, { "epoch": 3.048596990646604, "grad_norm": 0.27292758226394653, "learning_rate": 3.9652324498276215e-06, "loss": 0.315, "step": 29986 }, { "epoch": 3.048698657991053, "grad_norm": 0.28204619884490967, "learning_rate": 3.96488524945929e-06, "loss": 0.3031, "step": 29987 }, { "epoch": 3.048800325335502, "grad_norm": 0.28155145049095154, "learning_rate": 3.9645380543056214e-06, "loss": 0.305, "step": 29988 }, { "epoch": 3.048901992679951, "grad_norm": 0.2878870666027069, "learning_rate": 3.964190864368364e-06, "loss": 0.3115, "step": 29989 }, { "epoch": 3.0490036600244004, "grad_norm": 0.28988513350486755, "learning_rate": 3.963843679649267e-06, "loss": 0.2974, "step": 29990 }, { "epoch": 3.0491053273688493, "grad_norm": 0.2903144061565399, "learning_rate": 3.963496500150077e-06, "loss": 0.296, "step": 29991 }, { "epoch": 3.0492069947132983, "grad_norm": 0.29286959767341614, "learning_rate": 3.963149325872548e-06, "loss": 0.34, "step": 29992 }, { "epoch": 3.049308662057747, "grad_norm": 0.2603308856487274, "learning_rate": 3.962802156818425e-06, "loss": 0.3127, "step": 29993 }, { "epoch": 3.049410329402196, "grad_norm": 0.2637760639190674, "learning_rate": 3.962454992989458e-06, "loss": 0.304, "step": 29994 }, { "epoch": 3.049511996746645, "grad_norm": 0.28532201051712036, "learning_rate": 3.962107834387397e-06, "loss": 0.3024, "step": 29995 }, { "epoch": 3.049613664091094, "grad_norm": 0.2719699442386627, "learning_rate": 3.9617606810139895e-06, "loss": 0.3049, "step": 29996 }, { "epoch": 3.049715331435543, "grad_norm": 0.2528579533100128, "learning_rate": 3.961413532870986e-06, "loss": 0.2758, "step": 29997 }, { "epoch": 3.049816998779992, "grad_norm": 0.2685396373271942, "learning_rate": 3.961066389960134e-06, "loss": 0.2882, "step": 29998 }, { "epoch": 3.049918666124441, "grad_norm": 0.2838892340660095, "learning_rate": 3.960719252283182e-06, "loss": 0.3033, "step": 29999 }, { "epoch": 3.05002033346889, "grad_norm": 0.31974440813064575, "learning_rate": 3.96037211984188e-06, "loss": 0.3063, "step": 30000 }, { "epoch": 3.0501220008133387, "grad_norm": 0.30094948410987854, "learning_rate": 3.960024992637975e-06, "loss": 0.3056, "step": 30001 }, { "epoch": 3.0502236681577877, "grad_norm": 0.2779691815376282, "learning_rate": 3.9596778706732175e-06, "loss": 0.35, "step": 30002 }, { "epoch": 3.0503253355022366, "grad_norm": 0.2594257891178131, "learning_rate": 3.959330753949357e-06, "loss": 0.3381, "step": 30003 }, { "epoch": 3.0504270028466856, "grad_norm": 0.26830700039863586, "learning_rate": 3.958983642468139e-06, "loss": 0.3183, "step": 30004 }, { "epoch": 3.0505286701911345, "grad_norm": 0.2791474163532257, "learning_rate": 3.958636536231316e-06, "loss": 0.3347, "step": 30005 }, { "epoch": 3.0506303375355834, "grad_norm": 0.2842674255371094, "learning_rate": 3.958289435240633e-06, "loss": 0.3007, "step": 30006 }, { "epoch": 3.0507320048800324, "grad_norm": 0.2876490354537964, "learning_rate": 3.9579423394978404e-06, "loss": 0.3086, "step": 30007 }, { "epoch": 3.0508336722244813, "grad_norm": 0.26033151149749756, "learning_rate": 3.9575952490046875e-06, "loss": 0.3242, "step": 30008 }, { "epoch": 3.0509353395689303, "grad_norm": 0.26218244433403015, "learning_rate": 3.957248163762921e-06, "loss": 0.2861, "step": 30009 }, { "epoch": 3.051037006913379, "grad_norm": 0.25564539432525635, "learning_rate": 3.956901083774292e-06, "loss": 0.3152, "step": 30010 }, { "epoch": 3.0511386742578286, "grad_norm": 0.29774636030197144, "learning_rate": 3.9565540090405465e-06, "loss": 0.3463, "step": 30011 }, { "epoch": 3.0512403416022775, "grad_norm": 0.27584633231163025, "learning_rate": 3.956206939563433e-06, "loss": 0.2824, "step": 30012 }, { "epoch": 3.0513420089467265, "grad_norm": 0.27006053924560547, "learning_rate": 3.955859875344703e-06, "loss": 0.3332, "step": 30013 }, { "epoch": 3.0514436762911754, "grad_norm": 0.2977123260498047, "learning_rate": 3.955512816386101e-06, "loss": 0.2932, "step": 30014 }, { "epoch": 3.0515453436356244, "grad_norm": 0.27956122159957886, "learning_rate": 3.955165762689379e-06, "loss": 0.3259, "step": 30015 }, { "epoch": 3.0516470109800733, "grad_norm": 0.2720224857330322, "learning_rate": 3.954818714256282e-06, "loss": 0.3028, "step": 30016 }, { "epoch": 3.0517486783245222, "grad_norm": 0.261059045791626, "learning_rate": 3.95447167108856e-06, "loss": 0.3204, "step": 30017 }, { "epoch": 3.051850345668971, "grad_norm": 0.28166332840919495, "learning_rate": 3.954124633187962e-06, "loss": 0.3059, "step": 30018 }, { "epoch": 3.05195201301342, "grad_norm": 0.2811621129512787, "learning_rate": 3.953777600556235e-06, "loss": 0.2843, "step": 30019 }, { "epoch": 3.052053680357869, "grad_norm": 0.2608242630958557, "learning_rate": 3.953430573195129e-06, "loss": 0.2998, "step": 30020 }, { "epoch": 3.052155347702318, "grad_norm": 0.28954166173934937, "learning_rate": 3.9530835511063895e-06, "loss": 0.318, "step": 30021 }, { "epoch": 3.052257015046767, "grad_norm": 0.2652333378791809, "learning_rate": 3.9527365342917665e-06, "loss": 0.3181, "step": 30022 }, { "epoch": 3.052358682391216, "grad_norm": 0.2667381167411804, "learning_rate": 3.952389522753009e-06, "loss": 0.2778, "step": 30023 }, { "epoch": 3.052460349735665, "grad_norm": 0.2844228744506836, "learning_rate": 3.952042516491864e-06, "loss": 0.2812, "step": 30024 }, { "epoch": 3.0525620170801138, "grad_norm": 0.26098138093948364, "learning_rate": 3.9516955155100796e-06, "loss": 0.3267, "step": 30025 }, { "epoch": 3.0526636844245627, "grad_norm": 0.2583467662334442, "learning_rate": 3.951348519809403e-06, "loss": 0.2805, "step": 30026 }, { "epoch": 3.0527653517690116, "grad_norm": 0.2544786036014557, "learning_rate": 3.951001529391584e-06, "loss": 0.2971, "step": 30027 }, { "epoch": 3.0528670191134606, "grad_norm": 0.2740369737148285, "learning_rate": 3.950654544258371e-06, "loss": 0.3048, "step": 30028 }, { "epoch": 3.0529686864579095, "grad_norm": 0.2969023287296295, "learning_rate": 3.95030756441151e-06, "loss": 0.2888, "step": 30029 }, { "epoch": 3.0530703538023585, "grad_norm": 0.2772522568702698, "learning_rate": 3.949960589852752e-06, "loss": 0.3214, "step": 30030 }, { "epoch": 3.053172021146808, "grad_norm": 0.29208141565322876, "learning_rate": 3.94961362058384e-06, "loss": 0.2878, "step": 30031 }, { "epoch": 3.053273688491257, "grad_norm": 0.27688512206077576, "learning_rate": 3.949266656606525e-06, "loss": 0.3326, "step": 30032 }, { "epoch": 3.0533753558357057, "grad_norm": 0.2775430381298065, "learning_rate": 3.948919697922557e-06, "loss": 0.3382, "step": 30033 }, { "epoch": 3.0534770231801547, "grad_norm": 0.2731577754020691, "learning_rate": 3.948572744533681e-06, "loss": 0.3025, "step": 30034 }, { "epoch": 3.0535786905246036, "grad_norm": 0.28524133563041687, "learning_rate": 3.948225796441647e-06, "loss": 0.3025, "step": 30035 }, { "epoch": 3.0536803578690526, "grad_norm": 0.2628609836101532, "learning_rate": 3.947878853648199e-06, "loss": 0.2762, "step": 30036 }, { "epoch": 3.0537820252135015, "grad_norm": 0.27902117371559143, "learning_rate": 3.9475319161550876e-06, "loss": 0.3215, "step": 30037 }, { "epoch": 3.0538836925579504, "grad_norm": 0.26965150237083435, "learning_rate": 3.947184983964062e-06, "loss": 0.2953, "step": 30038 }, { "epoch": 3.0539853599023994, "grad_norm": 0.3047272562980652, "learning_rate": 3.946838057076867e-06, "loss": 0.3288, "step": 30039 }, { "epoch": 3.0540870272468483, "grad_norm": 0.29474571347236633, "learning_rate": 3.946491135495253e-06, "loss": 0.3011, "step": 30040 }, { "epoch": 3.0541886945912973, "grad_norm": 0.289778470993042, "learning_rate": 3.9461442192209644e-06, "loss": 0.3133, "step": 30041 }, { "epoch": 3.054290361935746, "grad_norm": 0.26832103729248047, "learning_rate": 3.945797308255751e-06, "loss": 0.3225, "step": 30042 }, { "epoch": 3.054392029280195, "grad_norm": 0.2694719731807709, "learning_rate": 3.945450402601362e-06, "loss": 0.3273, "step": 30043 }, { "epoch": 3.054493696624644, "grad_norm": 0.26159971952438354, "learning_rate": 3.945103502259542e-06, "loss": 0.3319, "step": 30044 }, { "epoch": 3.054595363969093, "grad_norm": 0.2843317985534668, "learning_rate": 3.944756607232042e-06, "loss": 0.3055, "step": 30045 }, { "epoch": 3.054697031313542, "grad_norm": 0.28042730689048767, "learning_rate": 3.944409717520603e-06, "loss": 0.323, "step": 30046 }, { "epoch": 3.054798698657991, "grad_norm": 0.29111239314079285, "learning_rate": 3.944062833126979e-06, "loss": 0.3057, "step": 30047 }, { "epoch": 3.05490036600244, "grad_norm": 0.2897432744503021, "learning_rate": 3.943715954052917e-06, "loss": 0.3142, "step": 30048 }, { "epoch": 3.055002033346889, "grad_norm": 0.2810891270637512, "learning_rate": 3.943369080300162e-06, "loss": 0.2844, "step": 30049 }, { "epoch": 3.0551037006913377, "grad_norm": 0.2663689851760864, "learning_rate": 3.943022211870464e-06, "loss": 0.2963, "step": 30050 }, { "epoch": 3.0552053680357867, "grad_norm": 0.2696991562843323, "learning_rate": 3.9426753487655665e-06, "loss": 0.3221, "step": 30051 }, { "epoch": 3.055307035380236, "grad_norm": 0.27907291054725647, "learning_rate": 3.94232849098722e-06, "loss": 0.3105, "step": 30052 }, { "epoch": 3.055408702724685, "grad_norm": 0.2541319727897644, "learning_rate": 3.941981638537172e-06, "loss": 0.323, "step": 30053 }, { "epoch": 3.055510370069134, "grad_norm": 0.272274911403656, "learning_rate": 3.941634791417167e-06, "loss": 0.3062, "step": 30054 }, { "epoch": 3.055612037413583, "grad_norm": 0.2771167755126953, "learning_rate": 3.9412879496289585e-06, "loss": 0.2903, "step": 30055 }, { "epoch": 3.055713704758032, "grad_norm": 0.2613256275653839, "learning_rate": 3.940941113174286e-06, "loss": 0.3281, "step": 30056 }, { "epoch": 3.0558153721024808, "grad_norm": 0.29059308767318726, "learning_rate": 3.940594282054901e-06, "loss": 0.3294, "step": 30057 }, { "epoch": 3.0559170394469297, "grad_norm": 0.2913022041320801, "learning_rate": 3.9402474562725525e-06, "loss": 0.2996, "step": 30058 }, { "epoch": 3.0560187067913787, "grad_norm": 0.28978967666625977, "learning_rate": 3.939900635828983e-06, "loss": 0.3009, "step": 30059 }, { "epoch": 3.0561203741358276, "grad_norm": 0.2642284035682678, "learning_rate": 3.939553820725945e-06, "loss": 0.2978, "step": 30060 }, { "epoch": 3.0562220414802765, "grad_norm": 0.2873004376888275, "learning_rate": 3.939207010965179e-06, "loss": 0.3149, "step": 30061 }, { "epoch": 3.0563237088247255, "grad_norm": 0.2800624370574951, "learning_rate": 3.938860206548437e-06, "loss": 0.2867, "step": 30062 }, { "epoch": 3.0564253761691744, "grad_norm": 0.2784019410610199, "learning_rate": 3.938513407477469e-06, "loss": 0.3222, "step": 30063 }, { "epoch": 3.0565270435136234, "grad_norm": 0.2588953673839569, "learning_rate": 3.938166613754013e-06, "loss": 0.2781, "step": 30064 }, { "epoch": 3.0566287108580723, "grad_norm": 0.255228728055954, "learning_rate": 3.937819825379826e-06, "loss": 0.3176, "step": 30065 }, { "epoch": 3.0567303782025212, "grad_norm": 0.2791648507118225, "learning_rate": 3.937473042356645e-06, "loss": 0.3059, "step": 30066 }, { "epoch": 3.05683204554697, "grad_norm": 0.31190821528434753, "learning_rate": 3.937126264686225e-06, "loss": 0.2942, "step": 30067 }, { "epoch": 3.056933712891419, "grad_norm": 0.2690792679786682, "learning_rate": 3.9367794923703125e-06, "loss": 0.3078, "step": 30068 }, { "epoch": 3.057035380235868, "grad_norm": 0.2785329520702362, "learning_rate": 3.9364327254106485e-06, "loss": 0.2962, "step": 30069 }, { "epoch": 3.057137047580317, "grad_norm": 0.2741243839263916, "learning_rate": 3.936085963808988e-06, "loss": 0.3267, "step": 30070 }, { "epoch": 3.057238714924766, "grad_norm": 0.27557656168937683, "learning_rate": 3.935739207567071e-06, "loss": 0.3172, "step": 30071 }, { "epoch": 3.0573403822692153, "grad_norm": 0.30164438486099243, "learning_rate": 3.935392456686645e-06, "loss": 0.3415, "step": 30072 }, { "epoch": 3.0574420496136643, "grad_norm": 0.27620023488998413, "learning_rate": 3.935045711169462e-06, "loss": 0.3135, "step": 30073 }, { "epoch": 3.0575437169581132, "grad_norm": 0.28530070185661316, "learning_rate": 3.934698971017264e-06, "loss": 0.2992, "step": 30074 }, { "epoch": 3.057645384302562, "grad_norm": 0.27225711941719055, "learning_rate": 3.934352236231801e-06, "loss": 0.3516, "step": 30075 }, { "epoch": 3.057747051647011, "grad_norm": 0.2500695586204529, "learning_rate": 3.934005506814816e-06, "loss": 0.3051, "step": 30076 }, { "epoch": 3.05784871899146, "grad_norm": 0.263906866312027, "learning_rate": 3.933658782768058e-06, "loss": 0.3382, "step": 30077 }, { "epoch": 3.057950386335909, "grad_norm": 0.26181310415267944, "learning_rate": 3.933312064093277e-06, "loss": 0.2774, "step": 30078 }, { "epoch": 3.058052053680358, "grad_norm": 0.30263152718544006, "learning_rate": 3.9329653507922115e-06, "loss": 0.3155, "step": 30079 }, { "epoch": 3.058153721024807, "grad_norm": 0.26986223459243774, "learning_rate": 3.932618642866617e-06, "loss": 0.2902, "step": 30080 }, { "epoch": 3.058255388369256, "grad_norm": 0.2755868434906006, "learning_rate": 3.932271940318234e-06, "loss": 0.3253, "step": 30081 }, { "epoch": 3.0583570557137048, "grad_norm": 0.26664987206459045, "learning_rate": 3.931925243148811e-06, "loss": 0.3014, "step": 30082 }, { "epoch": 3.0584587230581537, "grad_norm": 0.28184235095977783, "learning_rate": 3.931578551360097e-06, "loss": 0.2917, "step": 30083 }, { "epoch": 3.0585603904026026, "grad_norm": 0.2623361349105835, "learning_rate": 3.931231864953834e-06, "loss": 0.3131, "step": 30084 }, { "epoch": 3.0586620577470516, "grad_norm": 0.2811509072780609, "learning_rate": 3.930885183931774e-06, "loss": 0.2953, "step": 30085 }, { "epoch": 3.0587637250915005, "grad_norm": 0.2700903117656708, "learning_rate": 3.930538508295657e-06, "loss": 0.3031, "step": 30086 }, { "epoch": 3.0588653924359495, "grad_norm": 0.2589237689971924, "learning_rate": 3.9301918380472334e-06, "loss": 0.3078, "step": 30087 }, { "epoch": 3.0589670597803984, "grad_norm": 0.26846250891685486, "learning_rate": 3.929845173188252e-06, "loss": 0.3432, "step": 30088 }, { "epoch": 3.0590687271248473, "grad_norm": 0.2738886773586273, "learning_rate": 3.929498513720453e-06, "loss": 0.3215, "step": 30089 }, { "epoch": 3.0591703944692963, "grad_norm": 0.2560020983219147, "learning_rate": 3.929151859645588e-06, "loss": 0.295, "step": 30090 }, { "epoch": 3.0592720618137452, "grad_norm": 0.3013874292373657, "learning_rate": 3.9288052109654e-06, "loss": 0.2906, "step": 30091 }, { "epoch": 3.059373729158194, "grad_norm": 0.28625333309173584, "learning_rate": 3.928458567681636e-06, "loss": 0.3156, "step": 30092 }, { "epoch": 3.0594753965026436, "grad_norm": 0.2864522337913513, "learning_rate": 3.928111929796046e-06, "loss": 0.3252, "step": 30093 }, { "epoch": 3.0595770638470925, "grad_norm": 0.28859394788742065, "learning_rate": 3.927765297310372e-06, "loss": 0.319, "step": 30094 }, { "epoch": 3.0596787311915414, "grad_norm": 0.2899543046951294, "learning_rate": 3.927418670226362e-06, "loss": 0.3013, "step": 30095 }, { "epoch": 3.0597803985359904, "grad_norm": 0.26791492104530334, "learning_rate": 3.927072048545761e-06, "loss": 0.3373, "step": 30096 }, { "epoch": 3.0598820658804393, "grad_norm": 0.279054194688797, "learning_rate": 3.926725432270315e-06, "loss": 0.2919, "step": 30097 }, { "epoch": 3.0599837332248883, "grad_norm": 0.28579413890838623, "learning_rate": 3.926378821401775e-06, "loss": 0.3404, "step": 30098 }, { "epoch": 3.060085400569337, "grad_norm": 0.27468162775039673, "learning_rate": 3.926032215941881e-06, "loss": 0.3041, "step": 30099 }, { "epoch": 3.060187067913786, "grad_norm": 0.28063881397247314, "learning_rate": 3.925685615892382e-06, "loss": 0.3378, "step": 30100 }, { "epoch": 3.060288735258235, "grad_norm": 0.27382978796958923, "learning_rate": 3.9253390212550225e-06, "loss": 0.3302, "step": 30101 }, { "epoch": 3.060390402602684, "grad_norm": 0.2779090404510498, "learning_rate": 3.924992432031549e-06, "loss": 0.2921, "step": 30102 }, { "epoch": 3.060492069947133, "grad_norm": 0.271940678358078, "learning_rate": 3.924645848223711e-06, "loss": 0.309, "step": 30103 }, { "epoch": 3.060593737291582, "grad_norm": 0.2687663435935974, "learning_rate": 3.92429926983325e-06, "loss": 0.3237, "step": 30104 }, { "epoch": 3.060695404636031, "grad_norm": 0.2688613533973694, "learning_rate": 3.923952696861914e-06, "loss": 0.3098, "step": 30105 }, { "epoch": 3.06079707198048, "grad_norm": 0.29046106338500977, "learning_rate": 3.923606129311448e-06, "loss": 0.3151, "step": 30106 }, { "epoch": 3.0608987393249287, "grad_norm": 0.2910907566547394, "learning_rate": 3.9232595671835975e-06, "loss": 0.3021, "step": 30107 }, { "epoch": 3.0610004066693777, "grad_norm": 0.2601582407951355, "learning_rate": 3.922913010480112e-06, "loss": 0.3208, "step": 30108 }, { "epoch": 3.0611020740138266, "grad_norm": 0.2623897194862366, "learning_rate": 3.922566459202734e-06, "loss": 0.325, "step": 30109 }, { "epoch": 3.0612037413582756, "grad_norm": 0.2822189927101135, "learning_rate": 3.92221991335321e-06, "loss": 0.311, "step": 30110 }, { "epoch": 3.0613054087027245, "grad_norm": 0.261584609746933, "learning_rate": 3.9218733729332855e-06, "loss": 0.3137, "step": 30111 }, { "epoch": 3.0614070760471734, "grad_norm": 0.27801766991615295, "learning_rate": 3.921526837944706e-06, "loss": 0.2829, "step": 30112 }, { "epoch": 3.061508743391623, "grad_norm": 0.29068270325660706, "learning_rate": 3.92118030838922e-06, "loss": 0.3137, "step": 30113 }, { "epoch": 3.0616104107360718, "grad_norm": 0.2663743495941162, "learning_rate": 3.920833784268569e-06, "loss": 0.319, "step": 30114 }, { "epoch": 3.0617120780805207, "grad_norm": 0.27210554480552673, "learning_rate": 3.920487265584504e-06, "loss": 0.2969, "step": 30115 }, { "epoch": 3.0618137454249696, "grad_norm": 0.29833683371543884, "learning_rate": 3.9201407523387645e-06, "loss": 0.3331, "step": 30116 }, { "epoch": 3.0619154127694186, "grad_norm": 0.2671835124492645, "learning_rate": 3.9197942445331005e-06, "loss": 0.3199, "step": 30117 }, { "epoch": 3.0620170801138675, "grad_norm": 0.2796388864517212, "learning_rate": 3.919447742169257e-06, "loss": 0.3273, "step": 30118 }, { "epoch": 3.0621187474583165, "grad_norm": 0.26807376742362976, "learning_rate": 3.919101245248978e-06, "loss": 0.3105, "step": 30119 }, { "epoch": 3.0622204148027654, "grad_norm": 0.2578774094581604, "learning_rate": 3.918754753774011e-06, "loss": 0.2951, "step": 30120 }, { "epoch": 3.0623220821472144, "grad_norm": 0.26231449842453003, "learning_rate": 3.9184082677461e-06, "loss": 0.3252, "step": 30121 }, { "epoch": 3.0624237494916633, "grad_norm": 0.26737481355667114, "learning_rate": 3.918061787166991e-06, "loss": 0.2959, "step": 30122 }, { "epoch": 3.0625254168361122, "grad_norm": 0.28619590401649475, "learning_rate": 3.91771531203843e-06, "loss": 0.29, "step": 30123 }, { "epoch": 3.062627084180561, "grad_norm": 0.2514793574810028, "learning_rate": 3.917368842362162e-06, "loss": 0.3076, "step": 30124 }, { "epoch": 3.06272875152501, "grad_norm": 0.2829708456993103, "learning_rate": 3.917022378139933e-06, "loss": 0.2817, "step": 30125 }, { "epoch": 3.062830418869459, "grad_norm": 0.2675977349281311, "learning_rate": 3.916675919373487e-06, "loss": 0.3222, "step": 30126 }, { "epoch": 3.062932086213908, "grad_norm": 0.2625488042831421, "learning_rate": 3.91632946606457e-06, "loss": 0.309, "step": 30127 }, { "epoch": 3.063033753558357, "grad_norm": 0.2812146842479706, "learning_rate": 3.91598301821493e-06, "loss": 0.2626, "step": 30128 }, { "epoch": 3.063135420902806, "grad_norm": 0.2824552655220032, "learning_rate": 3.915636575826307e-06, "loss": 0.3113, "step": 30129 }, { "epoch": 3.063237088247255, "grad_norm": 0.2721495032310486, "learning_rate": 3.915290138900451e-06, "loss": 0.3118, "step": 30130 }, { "epoch": 3.0633387555917038, "grad_norm": 0.2870686948299408, "learning_rate": 3.914943707439104e-06, "loss": 0.2989, "step": 30131 }, { "epoch": 3.0634404229361527, "grad_norm": 0.2686498165130615, "learning_rate": 3.914597281444013e-06, "loss": 0.3034, "step": 30132 }, { "epoch": 3.0635420902806016, "grad_norm": 0.28722941875457764, "learning_rate": 3.914250860916925e-06, "loss": 0.3059, "step": 30133 }, { "epoch": 3.063643757625051, "grad_norm": 0.29626578092575073, "learning_rate": 3.913904445859582e-06, "loss": 0.3509, "step": 30134 }, { "epoch": 3.0637454249695, "grad_norm": 0.2596275806427002, "learning_rate": 3.9135580362737305e-06, "loss": 0.3102, "step": 30135 }, { "epoch": 3.063847092313949, "grad_norm": 0.27361440658569336, "learning_rate": 3.913211632161115e-06, "loss": 0.3481, "step": 30136 }, { "epoch": 3.063948759658398, "grad_norm": 0.2866482436656952, "learning_rate": 3.91286523352348e-06, "loss": 0.3075, "step": 30137 }, { "epoch": 3.064050427002847, "grad_norm": 0.2845298647880554, "learning_rate": 3.912518840362573e-06, "loss": 0.3372, "step": 30138 }, { "epoch": 3.0641520943472957, "grad_norm": 0.27734094858169556, "learning_rate": 3.912172452680138e-06, "loss": 0.3171, "step": 30139 }, { "epoch": 3.0642537616917447, "grad_norm": 0.2599627375602722, "learning_rate": 3.911826070477919e-06, "loss": 0.3127, "step": 30140 }, { "epoch": 3.0643554290361936, "grad_norm": 0.29223769903182983, "learning_rate": 3.911479693757661e-06, "loss": 0.3544, "step": 30141 }, { "epoch": 3.0644570963806426, "grad_norm": 0.2659246623516083, "learning_rate": 3.911133322521111e-06, "loss": 0.324, "step": 30142 }, { "epoch": 3.0645587637250915, "grad_norm": 0.28816136717796326, "learning_rate": 3.910786956770012e-06, "loss": 0.2857, "step": 30143 }, { "epoch": 3.0646604310695404, "grad_norm": 0.25819656252861023, "learning_rate": 3.910440596506109e-06, "loss": 0.3049, "step": 30144 }, { "epoch": 3.0647620984139894, "grad_norm": 0.2698293924331665, "learning_rate": 3.9100942417311485e-06, "loss": 0.3151, "step": 30145 }, { "epoch": 3.0648637657584383, "grad_norm": 0.2515006959438324, "learning_rate": 3.909747892446873e-06, "loss": 0.3293, "step": 30146 }, { "epoch": 3.0649654331028873, "grad_norm": 0.27386215329170227, "learning_rate": 3.9094015486550295e-06, "loss": 0.293, "step": 30147 }, { "epoch": 3.065067100447336, "grad_norm": 0.3140586018562317, "learning_rate": 3.909055210357361e-06, "loss": 0.3193, "step": 30148 }, { "epoch": 3.065168767791785, "grad_norm": 0.25283217430114746, "learning_rate": 3.908708877555613e-06, "loss": 0.3022, "step": 30149 }, { "epoch": 3.065270435136234, "grad_norm": 0.24737171828746796, "learning_rate": 3.908362550251532e-06, "loss": 0.3153, "step": 30150 }, { "epoch": 3.065372102480683, "grad_norm": 0.2934269607067108, "learning_rate": 3.908016228446859e-06, "loss": 0.3535, "step": 30151 }, { "epoch": 3.065473769825132, "grad_norm": 0.26723963022232056, "learning_rate": 3.907669912143342e-06, "loss": 0.2981, "step": 30152 }, { "epoch": 3.065575437169581, "grad_norm": 0.261370986700058, "learning_rate": 3.907323601342723e-06, "loss": 0.2928, "step": 30153 }, { "epoch": 3.0656771045140303, "grad_norm": 0.28712254762649536, "learning_rate": 3.906977296046749e-06, "loss": 0.3257, "step": 30154 }, { "epoch": 3.0657787718584792, "grad_norm": 0.28109467029571533, "learning_rate": 3.9066309962571645e-06, "loss": 0.2979, "step": 30155 }, { "epoch": 3.065880439202928, "grad_norm": 0.28181830048561096, "learning_rate": 3.906284701975712e-06, "loss": 0.3091, "step": 30156 }, { "epoch": 3.065982106547377, "grad_norm": 0.2748169004917145, "learning_rate": 3.905938413204138e-06, "loss": 0.2935, "step": 30157 }, { "epoch": 3.066083773891826, "grad_norm": 0.2943355441093445, "learning_rate": 3.905592129944186e-06, "loss": 0.3159, "step": 30158 }, { "epoch": 3.066185441236275, "grad_norm": 0.2718436121940613, "learning_rate": 3.9052458521976e-06, "loss": 0.2764, "step": 30159 }, { "epoch": 3.066287108580724, "grad_norm": 0.3010979890823364, "learning_rate": 3.9048995799661266e-06, "loss": 0.296, "step": 30160 }, { "epoch": 3.066388775925173, "grad_norm": 0.27400872111320496, "learning_rate": 3.904553313251508e-06, "loss": 0.3213, "step": 30161 }, { "epoch": 3.066490443269622, "grad_norm": 0.2693694829940796, "learning_rate": 3.9042070520554895e-06, "loss": 0.3, "step": 30162 }, { "epoch": 3.0665921106140708, "grad_norm": 0.26724973320961, "learning_rate": 3.903860796379816e-06, "loss": 0.2951, "step": 30163 }, { "epoch": 3.0666937779585197, "grad_norm": 0.2829812169075012, "learning_rate": 3.903514546226231e-06, "loss": 0.3261, "step": 30164 }, { "epoch": 3.0667954453029687, "grad_norm": 0.2641383707523346, "learning_rate": 3.903168301596479e-06, "loss": 0.3331, "step": 30165 }, { "epoch": 3.0668971126474176, "grad_norm": 0.27866366505622864, "learning_rate": 3.902822062492304e-06, "loss": 0.299, "step": 30166 }, { "epoch": 3.0669987799918665, "grad_norm": 0.2666763365268707, "learning_rate": 3.902475828915452e-06, "loss": 0.3309, "step": 30167 }, { "epoch": 3.0671004473363155, "grad_norm": 0.32291701436042786, "learning_rate": 3.902129600867665e-06, "loss": 0.2908, "step": 30168 }, { "epoch": 3.0672021146807644, "grad_norm": 0.26622042059898376, "learning_rate": 3.901783378350689e-06, "loss": 0.3057, "step": 30169 }, { "epoch": 3.0673037820252134, "grad_norm": 0.2858833968639374, "learning_rate": 3.901437161366266e-06, "loss": 0.2884, "step": 30170 }, { "epoch": 3.0674054493696623, "grad_norm": 0.26110246777534485, "learning_rate": 3.901090949916142e-06, "loss": 0.2854, "step": 30171 }, { "epoch": 3.0675071167141112, "grad_norm": 0.2897093594074249, "learning_rate": 3.900744744002062e-06, "loss": 0.3471, "step": 30172 }, { "epoch": 3.06760878405856, "grad_norm": 0.2689504027366638, "learning_rate": 3.900398543625767e-06, "loss": 0.3328, "step": 30173 }, { "epoch": 3.067710451403009, "grad_norm": 0.3109839856624603, "learning_rate": 3.900052348789004e-06, "loss": 0.3354, "step": 30174 }, { "epoch": 3.0678121187474585, "grad_norm": 0.2764013707637787, "learning_rate": 3.899706159493516e-06, "loss": 0.3059, "step": 30175 }, { "epoch": 3.0679137860919075, "grad_norm": 0.2797217071056366, "learning_rate": 3.899359975741046e-06, "loss": 0.3111, "step": 30176 }, { "epoch": 3.0680154534363564, "grad_norm": 0.2807963490486145, "learning_rate": 3.89901379753334e-06, "loss": 0.2649, "step": 30177 }, { "epoch": 3.0681171207808053, "grad_norm": 0.29329967498779297, "learning_rate": 3.89866762487214e-06, "loss": 0.3317, "step": 30178 }, { "epoch": 3.0682187881252543, "grad_norm": 0.28198757767677307, "learning_rate": 3.8983214577591895e-06, "loss": 0.2944, "step": 30179 }, { "epoch": 3.0683204554697032, "grad_norm": 0.26131314039230347, "learning_rate": 3.897975296196236e-06, "loss": 0.3194, "step": 30180 }, { "epoch": 3.068422122814152, "grad_norm": 0.28517791628837585, "learning_rate": 3.89762914018502e-06, "loss": 0.3014, "step": 30181 }, { "epoch": 3.068523790158601, "grad_norm": 0.2699129283428192, "learning_rate": 3.897282989727289e-06, "loss": 0.2799, "step": 30182 }, { "epoch": 3.06862545750305, "grad_norm": 0.26526474952697754, "learning_rate": 3.89693684482478e-06, "loss": 0.3206, "step": 30183 }, { "epoch": 3.068727124847499, "grad_norm": 0.2627302408218384, "learning_rate": 3.8965907054792425e-06, "loss": 0.3049, "step": 30184 }, { "epoch": 3.068828792191948, "grad_norm": 0.2548132538795471, "learning_rate": 3.89624457169242e-06, "loss": 0.2578, "step": 30185 }, { "epoch": 3.068930459536397, "grad_norm": 0.2705235481262207, "learning_rate": 3.895898443466053e-06, "loss": 0.3185, "step": 30186 }, { "epoch": 3.069032126880846, "grad_norm": 0.28235945105552673, "learning_rate": 3.895552320801891e-06, "loss": 0.29, "step": 30187 }, { "epoch": 3.0691337942252948, "grad_norm": 0.2684817910194397, "learning_rate": 3.8952062037016695e-06, "loss": 0.3066, "step": 30188 }, { "epoch": 3.0692354615697437, "grad_norm": 0.2785980999469757, "learning_rate": 3.8948600921671385e-06, "loss": 0.2956, "step": 30189 }, { "epoch": 3.0693371289141926, "grad_norm": 0.25529080629348755, "learning_rate": 3.894513986200041e-06, "loss": 0.3393, "step": 30190 }, { "epoch": 3.0694387962586416, "grad_norm": 0.27340060472488403, "learning_rate": 3.894167885802117e-06, "loss": 0.2972, "step": 30191 }, { "epoch": 3.0695404636030905, "grad_norm": 0.25385257601737976, "learning_rate": 3.893821790975115e-06, "loss": 0.2933, "step": 30192 }, { "epoch": 3.0696421309475395, "grad_norm": 0.2639765739440918, "learning_rate": 3.893475701720773e-06, "loss": 0.3218, "step": 30193 }, { "epoch": 3.0697437982919884, "grad_norm": 0.2791183590888977, "learning_rate": 3.893129618040839e-06, "loss": 0.2967, "step": 30194 }, { "epoch": 3.069845465636438, "grad_norm": 0.2766595780849457, "learning_rate": 3.8927835399370555e-06, "loss": 0.3123, "step": 30195 }, { "epoch": 3.0699471329808867, "grad_norm": 0.29163798689842224, "learning_rate": 3.892437467411164e-06, "loss": 0.285, "step": 30196 }, { "epoch": 3.0700488003253357, "grad_norm": 0.28576895594596863, "learning_rate": 3.892091400464912e-06, "loss": 0.2969, "step": 30197 }, { "epoch": 3.0701504676697846, "grad_norm": 0.2830350697040558, "learning_rate": 3.891745339100037e-06, "loss": 0.324, "step": 30198 }, { "epoch": 3.0702521350142336, "grad_norm": 0.28222644329071045, "learning_rate": 3.891399283318287e-06, "loss": 0.3084, "step": 30199 }, { "epoch": 3.0703538023586825, "grad_norm": 0.2753750681877136, "learning_rate": 3.891053233121404e-06, "loss": 0.3038, "step": 30200 }, { "epoch": 3.0704554697031314, "grad_norm": 0.2677927315235138, "learning_rate": 3.890707188511131e-06, "loss": 0.3353, "step": 30201 }, { "epoch": 3.0705571370475804, "grad_norm": 0.26697391271591187, "learning_rate": 3.890361149489214e-06, "loss": 0.3084, "step": 30202 }, { "epoch": 3.0706588043920293, "grad_norm": 0.26788729429244995, "learning_rate": 3.890015116057391e-06, "loss": 0.3153, "step": 30203 }, { "epoch": 3.0707604717364783, "grad_norm": 0.28607627749443054, "learning_rate": 3.889669088217408e-06, "loss": 0.3291, "step": 30204 }, { "epoch": 3.070862139080927, "grad_norm": 0.2885741889476776, "learning_rate": 3.889323065971011e-06, "loss": 0.3205, "step": 30205 }, { "epoch": 3.070963806425376, "grad_norm": 0.2636062800884247, "learning_rate": 3.888977049319937e-06, "loss": 0.3251, "step": 30206 }, { "epoch": 3.071065473769825, "grad_norm": 0.2828027009963989, "learning_rate": 3.888631038265935e-06, "loss": 0.3227, "step": 30207 }, { "epoch": 3.071167141114274, "grad_norm": 0.2830902338027954, "learning_rate": 3.888285032810744e-06, "loss": 0.2981, "step": 30208 }, { "epoch": 3.071268808458723, "grad_norm": 0.28242412209510803, "learning_rate": 3.8879390329561086e-06, "loss": 0.3221, "step": 30209 }, { "epoch": 3.071370475803172, "grad_norm": 0.27911442518234253, "learning_rate": 3.887593038703775e-06, "loss": 0.2985, "step": 30210 }, { "epoch": 3.071472143147621, "grad_norm": 0.2790209650993347, "learning_rate": 3.887247050055481e-06, "loss": 0.2955, "step": 30211 }, { "epoch": 3.07157381049207, "grad_norm": 0.26319798827171326, "learning_rate": 3.886901067012974e-06, "loss": 0.3287, "step": 30212 }, { "epoch": 3.0716754778365187, "grad_norm": 0.2541523575782776, "learning_rate": 3.886555089577991e-06, "loss": 0.3138, "step": 30213 }, { "epoch": 3.0717771451809677, "grad_norm": 0.27452215552330017, "learning_rate": 3.8862091177522805e-06, "loss": 0.317, "step": 30214 }, { "epoch": 3.0718788125254166, "grad_norm": 0.26670584082603455, "learning_rate": 3.885863151537587e-06, "loss": 0.3187, "step": 30215 }, { "epoch": 3.071980479869866, "grad_norm": 0.25403931736946106, "learning_rate": 3.885517190935646e-06, "loss": 0.3215, "step": 30216 }, { "epoch": 3.072082147214315, "grad_norm": 0.27026230096817017, "learning_rate": 3.885171235948208e-06, "loss": 0.3121, "step": 30217 }, { "epoch": 3.072183814558764, "grad_norm": 0.25725632905960083, "learning_rate": 3.884825286577009e-06, "loss": 0.3034, "step": 30218 }, { "epoch": 3.072285481903213, "grad_norm": 0.25414055585861206, "learning_rate": 3.884479342823797e-06, "loss": 0.3318, "step": 30219 }, { "epoch": 3.0723871492476618, "grad_norm": 0.27401822805404663, "learning_rate": 3.884133404690314e-06, "loss": 0.3336, "step": 30220 }, { "epoch": 3.0724888165921107, "grad_norm": 0.2781793177127838, "learning_rate": 3.883787472178299e-06, "loss": 0.2964, "step": 30221 }, { "epoch": 3.0725904839365596, "grad_norm": 0.26779836416244507, "learning_rate": 3.883441545289501e-06, "loss": 0.3434, "step": 30222 }, { "epoch": 3.0726921512810086, "grad_norm": 0.26247307658195496, "learning_rate": 3.883095624025656e-06, "loss": 0.2947, "step": 30223 }, { "epoch": 3.0727938186254575, "grad_norm": 0.2592070698738098, "learning_rate": 3.88274970838851e-06, "loss": 0.2783, "step": 30224 }, { "epoch": 3.0728954859699065, "grad_norm": 0.26975393295288086, "learning_rate": 3.882403798379808e-06, "loss": 0.3225, "step": 30225 }, { "epoch": 3.0729971533143554, "grad_norm": 0.28164467215538025, "learning_rate": 3.882057894001287e-06, "loss": 0.3072, "step": 30226 }, { "epoch": 3.0730988206588044, "grad_norm": 0.29049596190452576, "learning_rate": 3.881711995254696e-06, "loss": 0.3276, "step": 30227 }, { "epoch": 3.0732004880032533, "grad_norm": 0.2653159499168396, "learning_rate": 3.881366102141772e-06, "loss": 0.2696, "step": 30228 }, { "epoch": 3.0733021553477022, "grad_norm": 0.2882682681083679, "learning_rate": 3.881020214664258e-06, "loss": 0.2968, "step": 30229 }, { "epoch": 3.073403822692151, "grad_norm": 0.27401667833328247, "learning_rate": 3.880674332823903e-06, "loss": 0.3193, "step": 30230 }, { "epoch": 3.0735054900366, "grad_norm": 0.29474738240242004, "learning_rate": 3.88032845662244e-06, "loss": 0.3301, "step": 30231 }, { "epoch": 3.073607157381049, "grad_norm": 0.26358646154403687, "learning_rate": 3.87998258606162e-06, "loss": 0.3206, "step": 30232 }, { "epoch": 3.073708824725498, "grad_norm": 0.25399839878082275, "learning_rate": 3.87963672114318e-06, "loss": 0.3252, "step": 30233 }, { "epoch": 3.073810492069947, "grad_norm": 0.2705322504043579, "learning_rate": 3.879290861868863e-06, "loss": 0.3339, "step": 30234 }, { "epoch": 3.073912159414396, "grad_norm": 0.2739425301551819, "learning_rate": 3.878945008240414e-06, "loss": 0.357, "step": 30235 }, { "epoch": 3.0740138267588453, "grad_norm": 0.27866920828819275, "learning_rate": 3.8785991602595715e-06, "loss": 0.341, "step": 30236 }, { "epoch": 3.074115494103294, "grad_norm": 0.2883622348308563, "learning_rate": 3.878253317928083e-06, "loss": 0.3248, "step": 30237 }, { "epoch": 3.074217161447743, "grad_norm": 0.28227564692497253, "learning_rate": 3.877907481247685e-06, "loss": 0.3043, "step": 30238 }, { "epoch": 3.074318828792192, "grad_norm": 0.2691376507282257, "learning_rate": 3.877561650220122e-06, "loss": 0.329, "step": 30239 }, { "epoch": 3.074420496136641, "grad_norm": 0.2630472183227539, "learning_rate": 3.87721582484714e-06, "loss": 0.3152, "step": 30240 }, { "epoch": 3.07452216348109, "grad_norm": 0.27257657051086426, "learning_rate": 3.876870005130475e-06, "loss": 0.3162, "step": 30241 }, { "epoch": 3.074623830825539, "grad_norm": 0.28121769428253174, "learning_rate": 3.876524191071874e-06, "loss": 0.3119, "step": 30242 }, { "epoch": 3.074725498169988, "grad_norm": 0.2656767666339874, "learning_rate": 3.876178382673076e-06, "loss": 0.3111, "step": 30243 }, { "epoch": 3.074827165514437, "grad_norm": 0.28670912981033325, "learning_rate": 3.875832579935822e-06, "loss": 0.3025, "step": 30244 }, { "epoch": 3.0749288328588857, "grad_norm": 0.27502599358558655, "learning_rate": 3.875486782861861e-06, "loss": 0.3193, "step": 30245 }, { "epoch": 3.0750305002033347, "grad_norm": 0.2918727993965149, "learning_rate": 3.875140991452928e-06, "loss": 0.3348, "step": 30246 }, { "epoch": 3.0751321675477836, "grad_norm": 0.26819419860839844, "learning_rate": 3.874795205710768e-06, "loss": 0.328, "step": 30247 }, { "epoch": 3.0752338348922326, "grad_norm": 0.28787288069725037, "learning_rate": 3.874449425637121e-06, "loss": 0.324, "step": 30248 }, { "epoch": 3.0753355022366815, "grad_norm": 0.2896915376186371, "learning_rate": 3.87410365123373e-06, "loss": 0.3267, "step": 30249 }, { "epoch": 3.0754371695811304, "grad_norm": 0.2706798315048218, "learning_rate": 3.87375788250234e-06, "loss": 0.3121, "step": 30250 }, { "epoch": 3.0755388369255794, "grad_norm": 0.264935165643692, "learning_rate": 3.873412119444688e-06, "loss": 0.3082, "step": 30251 }, { "epoch": 3.0756405042700283, "grad_norm": 0.2758117914199829, "learning_rate": 3.8730663620625195e-06, "loss": 0.3118, "step": 30252 }, { "epoch": 3.0757421716144773, "grad_norm": 0.28327494859695435, "learning_rate": 3.8727206103575735e-06, "loss": 0.3075, "step": 30253 }, { "epoch": 3.075843838958926, "grad_norm": 0.2857191264629364, "learning_rate": 3.872374864331592e-06, "loss": 0.2843, "step": 30254 }, { "epoch": 3.075945506303375, "grad_norm": 0.28357410430908203, "learning_rate": 3.872029123986321e-06, "loss": 0.3426, "step": 30255 }, { "epoch": 3.076047173647824, "grad_norm": 0.28661948442459106, "learning_rate": 3.8716833893234975e-06, "loss": 0.295, "step": 30256 }, { "epoch": 3.0761488409922735, "grad_norm": 0.2651664614677429, "learning_rate": 3.8713376603448656e-06, "loss": 0.3294, "step": 30257 }, { "epoch": 3.0762505083367224, "grad_norm": 0.30667343735694885, "learning_rate": 3.870991937052165e-06, "loss": 0.3203, "step": 30258 }, { "epoch": 3.0763521756811714, "grad_norm": 0.28384360671043396, "learning_rate": 3.87064621944714e-06, "loss": 0.3127, "step": 30259 }, { "epoch": 3.0764538430256203, "grad_norm": 0.26248329877853394, "learning_rate": 3.870300507531531e-06, "loss": 0.3119, "step": 30260 }, { "epoch": 3.0765555103700692, "grad_norm": 0.3240894675254822, "learning_rate": 3.869954801307079e-06, "loss": 0.3091, "step": 30261 }, { "epoch": 3.076657177714518, "grad_norm": 0.2816910147666931, "learning_rate": 3.869609100775527e-06, "loss": 0.3106, "step": 30262 }, { "epoch": 3.076758845058967, "grad_norm": 0.2773970067501068, "learning_rate": 3.869263405938615e-06, "loss": 0.3289, "step": 30263 }, { "epoch": 3.076860512403416, "grad_norm": 0.27506664395332336, "learning_rate": 3.868917716798085e-06, "loss": 0.2839, "step": 30264 }, { "epoch": 3.076962179747865, "grad_norm": 0.27225103974342346, "learning_rate": 3.8685720333556795e-06, "loss": 0.3359, "step": 30265 }, { "epoch": 3.077063847092314, "grad_norm": 0.2703542709350586, "learning_rate": 3.868226355613139e-06, "loss": 0.2997, "step": 30266 }, { "epoch": 3.077165514436763, "grad_norm": 0.29644596576690674, "learning_rate": 3.867880683572206e-06, "loss": 0.3104, "step": 30267 }, { "epoch": 3.077267181781212, "grad_norm": 0.27966681122779846, "learning_rate": 3.867535017234619e-06, "loss": 0.3205, "step": 30268 }, { "epoch": 3.0773688491256608, "grad_norm": 0.31221112608909607, "learning_rate": 3.867189356602123e-06, "loss": 0.3287, "step": 30269 }, { "epoch": 3.0774705164701097, "grad_norm": 0.26960036158561707, "learning_rate": 3.8668437016764575e-06, "loss": 0.2933, "step": 30270 }, { "epoch": 3.0775721838145587, "grad_norm": 0.2757295072078705, "learning_rate": 3.866498052459364e-06, "loss": 0.3127, "step": 30271 }, { "epoch": 3.0776738511590076, "grad_norm": 0.2660914659500122, "learning_rate": 3.866152408952584e-06, "loss": 0.2883, "step": 30272 }, { "epoch": 3.0777755185034565, "grad_norm": 0.24898189306259155, "learning_rate": 3.8658067711578586e-06, "loss": 0.3274, "step": 30273 }, { "epoch": 3.0778771858479055, "grad_norm": 0.2621178925037384, "learning_rate": 3.8654611390769295e-06, "loss": 0.3357, "step": 30274 }, { "epoch": 3.0779788531923544, "grad_norm": 0.28948816657066345, "learning_rate": 3.865115512711537e-06, "loss": 0.3038, "step": 30275 }, { "epoch": 3.0780805205368034, "grad_norm": 0.27267956733703613, "learning_rate": 3.864769892063424e-06, "loss": 0.3306, "step": 30276 }, { "epoch": 3.0781821878812528, "grad_norm": 0.2806457579135895, "learning_rate": 3.86442427713433e-06, "loss": 0.3016, "step": 30277 }, { "epoch": 3.0782838552257017, "grad_norm": 0.2674223482608795, "learning_rate": 3.864078667925996e-06, "loss": 0.2987, "step": 30278 }, { "epoch": 3.0783855225701506, "grad_norm": 0.27271538972854614, "learning_rate": 3.863733064440164e-06, "loss": 0.3135, "step": 30279 }, { "epoch": 3.0784871899145996, "grad_norm": 0.2614438533782959, "learning_rate": 3.863387466678575e-06, "loss": 0.3026, "step": 30280 }, { "epoch": 3.0785888572590485, "grad_norm": 0.26713451743125916, "learning_rate": 3.863041874642969e-06, "loss": 0.3109, "step": 30281 }, { "epoch": 3.0786905246034975, "grad_norm": 0.2967962622642517, "learning_rate": 3.86269628833509e-06, "loss": 0.294, "step": 30282 }, { "epoch": 3.0787921919479464, "grad_norm": 0.2875053882598877, "learning_rate": 3.8623507077566745e-06, "loss": 0.3308, "step": 30283 }, { "epoch": 3.0788938592923953, "grad_norm": 0.26548531651496887, "learning_rate": 3.862005132909467e-06, "loss": 0.3123, "step": 30284 }, { "epoch": 3.0789955266368443, "grad_norm": 0.27970245480537415, "learning_rate": 3.861659563795207e-06, "loss": 0.293, "step": 30285 }, { "epoch": 3.0790971939812932, "grad_norm": 0.27708378434181213, "learning_rate": 3.861314000415635e-06, "loss": 0.3045, "step": 30286 }, { "epoch": 3.079198861325742, "grad_norm": 0.275857150554657, "learning_rate": 3.8609684427724945e-06, "loss": 0.2965, "step": 30287 }, { "epoch": 3.079300528670191, "grad_norm": 0.2691081762313843, "learning_rate": 3.8606228908675216e-06, "loss": 0.2974, "step": 30288 }, { "epoch": 3.07940219601464, "grad_norm": 0.2642589509487152, "learning_rate": 3.8602773447024615e-06, "loss": 0.3026, "step": 30289 }, { "epoch": 3.079503863359089, "grad_norm": 0.2871687114238739, "learning_rate": 3.859931804279053e-06, "loss": 0.325, "step": 30290 }, { "epoch": 3.079605530703538, "grad_norm": 0.27185142040252686, "learning_rate": 3.859586269599037e-06, "loss": 0.3066, "step": 30291 }, { "epoch": 3.079707198047987, "grad_norm": 0.27653786540031433, "learning_rate": 3.8592407406641545e-06, "loss": 0.3291, "step": 30292 }, { "epoch": 3.079808865392436, "grad_norm": 0.2676757276058197, "learning_rate": 3.8588952174761455e-06, "loss": 0.3093, "step": 30293 }, { "epoch": 3.0799105327368848, "grad_norm": 0.278357595205307, "learning_rate": 3.858549700036752e-06, "loss": 0.3016, "step": 30294 }, { "epoch": 3.0800122000813337, "grad_norm": 0.27362892031669617, "learning_rate": 3.858204188347714e-06, "loss": 0.3113, "step": 30295 }, { "epoch": 3.0801138674257826, "grad_norm": 0.27483606338500977, "learning_rate": 3.857858682410771e-06, "loss": 0.3282, "step": 30296 }, { "epoch": 3.0802155347702316, "grad_norm": 0.2943533957004547, "learning_rate": 3.857513182227666e-06, "loss": 0.3367, "step": 30297 }, { "epoch": 3.080317202114681, "grad_norm": 0.25755757093429565, "learning_rate": 3.8571676878001364e-06, "loss": 0.3234, "step": 30298 }, { "epoch": 3.08041886945913, "grad_norm": 0.27595674991607666, "learning_rate": 3.8568221991299245e-06, "loss": 0.2947, "step": 30299 }, { "epoch": 3.080520536803579, "grad_norm": 0.2554670572280884, "learning_rate": 3.856476716218773e-06, "loss": 0.3064, "step": 30300 }, { "epoch": 3.080622204148028, "grad_norm": 0.2617419362068176, "learning_rate": 3.856131239068417e-06, "loss": 0.3527, "step": 30301 }, { "epoch": 3.0807238714924767, "grad_norm": 0.27892759442329407, "learning_rate": 3.855785767680602e-06, "loss": 0.3167, "step": 30302 }, { "epoch": 3.0808255388369257, "grad_norm": 0.2613082528114319, "learning_rate": 3.855440302057066e-06, "loss": 0.3592, "step": 30303 }, { "epoch": 3.0809272061813746, "grad_norm": 0.2809749245643616, "learning_rate": 3.8550948421995485e-06, "loss": 0.3108, "step": 30304 }, { "epoch": 3.0810288735258236, "grad_norm": 0.2941652536392212, "learning_rate": 3.854749388109793e-06, "loss": 0.3136, "step": 30305 }, { "epoch": 3.0811305408702725, "grad_norm": 0.2642854154109955, "learning_rate": 3.854403939789537e-06, "loss": 0.3118, "step": 30306 }, { "epoch": 3.0812322082147214, "grad_norm": 0.26553764939308167, "learning_rate": 3.854058497240522e-06, "loss": 0.3068, "step": 30307 }, { "epoch": 3.0813338755591704, "grad_norm": 0.2822057604789734, "learning_rate": 3.853713060464488e-06, "loss": 0.3237, "step": 30308 }, { "epoch": 3.0814355429036193, "grad_norm": 0.24554221332073212, "learning_rate": 3.853367629463174e-06, "loss": 0.304, "step": 30309 }, { "epoch": 3.0815372102480683, "grad_norm": 0.27562734484672546, "learning_rate": 3.853022204238324e-06, "loss": 0.3031, "step": 30310 }, { "epoch": 3.081638877592517, "grad_norm": 0.2561652958393097, "learning_rate": 3.852676784791673e-06, "loss": 0.3092, "step": 30311 }, { "epoch": 3.081740544936966, "grad_norm": 0.2675299644470215, "learning_rate": 3.852331371124965e-06, "loss": 0.2916, "step": 30312 }, { "epoch": 3.081842212281415, "grad_norm": 0.2664542496204376, "learning_rate": 3.851985963239938e-06, "loss": 0.3129, "step": 30313 }, { "epoch": 3.081943879625864, "grad_norm": 0.2826119661331177, "learning_rate": 3.851640561138334e-06, "loss": 0.2914, "step": 30314 }, { "epoch": 3.082045546970313, "grad_norm": 0.3045380115509033, "learning_rate": 3.851295164821892e-06, "loss": 0.2967, "step": 30315 }, { "epoch": 3.082147214314762, "grad_norm": 0.2701888084411621, "learning_rate": 3.850949774292351e-06, "loss": 0.3216, "step": 30316 }, { "epoch": 3.082248881659211, "grad_norm": 0.2687687873840332, "learning_rate": 3.850604389551452e-06, "loss": 0.3076, "step": 30317 }, { "epoch": 3.0823505490036602, "grad_norm": 0.26514437794685364, "learning_rate": 3.8502590106009355e-06, "loss": 0.2942, "step": 30318 }, { "epoch": 3.082452216348109, "grad_norm": 0.2857794463634491, "learning_rate": 3.8499136374425396e-06, "loss": 0.3276, "step": 30319 }, { "epoch": 3.082553883692558, "grad_norm": 0.25079330801963806, "learning_rate": 3.849568270078008e-06, "loss": 0.2973, "step": 30320 }, { "epoch": 3.082655551037007, "grad_norm": 0.27799832820892334, "learning_rate": 3.849222908509076e-06, "loss": 0.3031, "step": 30321 }, { "epoch": 3.082757218381456, "grad_norm": 0.2629339098930359, "learning_rate": 3.848877552737487e-06, "loss": 0.3248, "step": 30322 }, { "epoch": 3.082858885725905, "grad_norm": 0.25658032298088074, "learning_rate": 3.848532202764978e-06, "loss": 0.2967, "step": 30323 }, { "epoch": 3.082960553070354, "grad_norm": 0.2508925497531891, "learning_rate": 3.84818685859329e-06, "loss": 0.3295, "step": 30324 }, { "epoch": 3.083062220414803, "grad_norm": 0.2687525153160095, "learning_rate": 3.847841520224164e-06, "loss": 0.3264, "step": 30325 }, { "epoch": 3.0831638877592518, "grad_norm": 0.2787274718284607, "learning_rate": 3.847496187659338e-06, "loss": 0.333, "step": 30326 }, { "epoch": 3.0832655551037007, "grad_norm": 0.26244765520095825, "learning_rate": 3.847150860900553e-06, "loss": 0.296, "step": 30327 }, { "epoch": 3.0833672224481496, "grad_norm": 0.267733097076416, "learning_rate": 3.846805539949548e-06, "loss": 0.3058, "step": 30328 }, { "epoch": 3.0834688897925986, "grad_norm": 0.28053978085517883, "learning_rate": 3.846460224808061e-06, "loss": 0.3081, "step": 30329 }, { "epoch": 3.0835705571370475, "grad_norm": 0.2882663905620575, "learning_rate": 3.8461149154778345e-06, "loss": 0.3364, "step": 30330 }, { "epoch": 3.0836722244814965, "grad_norm": 0.2621553838253021, "learning_rate": 3.845769611960607e-06, "loss": 0.3126, "step": 30331 }, { "epoch": 3.0837738918259454, "grad_norm": 0.27598661184310913, "learning_rate": 3.8454243142581184e-06, "loss": 0.3478, "step": 30332 }, { "epoch": 3.0838755591703944, "grad_norm": 0.2676445245742798, "learning_rate": 3.845079022372106e-06, "loss": 0.2998, "step": 30333 }, { "epoch": 3.0839772265148433, "grad_norm": 0.2651922404766083, "learning_rate": 3.844733736304314e-06, "loss": 0.2838, "step": 30334 }, { "epoch": 3.0840788938592922, "grad_norm": 0.2727219760417938, "learning_rate": 3.8443884560564755e-06, "loss": 0.3395, "step": 30335 }, { "epoch": 3.084180561203741, "grad_norm": 0.29100072383880615, "learning_rate": 3.844043181630335e-06, "loss": 0.328, "step": 30336 }, { "epoch": 3.08428222854819, "grad_norm": 0.28377920389175415, "learning_rate": 3.84369791302763e-06, "loss": 0.3264, "step": 30337 }, { "epoch": 3.084383895892639, "grad_norm": 0.255500853061676, "learning_rate": 3.8433526502500995e-06, "loss": 0.2906, "step": 30338 }, { "epoch": 3.0844855632370884, "grad_norm": 0.26183149218559265, "learning_rate": 3.843007393299486e-06, "loss": 0.2978, "step": 30339 }, { "epoch": 3.0845872305815374, "grad_norm": 0.283096045255661, "learning_rate": 3.8426621421775235e-06, "loss": 0.3214, "step": 30340 }, { "epoch": 3.0846888979259863, "grad_norm": 0.2693725526332855, "learning_rate": 3.842316896885954e-06, "loss": 0.3695, "step": 30341 }, { "epoch": 3.0847905652704353, "grad_norm": 0.2801133394241333, "learning_rate": 3.841971657426519e-06, "loss": 0.3271, "step": 30342 }, { "epoch": 3.084892232614884, "grad_norm": 0.2840884029865265, "learning_rate": 3.841626423800954e-06, "loss": 0.2993, "step": 30343 }, { "epoch": 3.084993899959333, "grad_norm": 0.27422353625297546, "learning_rate": 3.841281196011002e-06, "loss": 0.3164, "step": 30344 }, { "epoch": 3.085095567303782, "grad_norm": 0.30285581946372986, "learning_rate": 3.840935974058397e-06, "loss": 0.3052, "step": 30345 }, { "epoch": 3.085197234648231, "grad_norm": 0.27126920223236084, "learning_rate": 3.840590757944882e-06, "loss": 0.3139, "step": 30346 }, { "epoch": 3.08529890199268, "grad_norm": 0.25594204664230347, "learning_rate": 3.840245547672196e-06, "loss": 0.2945, "step": 30347 }, { "epoch": 3.085400569337129, "grad_norm": 0.3014966547489166, "learning_rate": 3.8399003432420765e-06, "loss": 0.3375, "step": 30348 }, { "epoch": 3.085502236681578, "grad_norm": 0.2667151987552643, "learning_rate": 3.839555144656266e-06, "loss": 0.2916, "step": 30349 }, { "epoch": 3.085603904026027, "grad_norm": 0.2581605017185211, "learning_rate": 3.839209951916497e-06, "loss": 0.3469, "step": 30350 }, { "epoch": 3.0857055713704757, "grad_norm": 0.2776913344860077, "learning_rate": 3.838864765024515e-06, "loss": 0.3196, "step": 30351 }, { "epoch": 3.0858072387149247, "grad_norm": 0.2828671932220459, "learning_rate": 3.838519583982057e-06, "loss": 0.3261, "step": 30352 }, { "epoch": 3.0859089060593736, "grad_norm": 0.29407206177711487, "learning_rate": 3.838174408790858e-06, "loss": 0.3083, "step": 30353 }, { "epoch": 3.0860105734038226, "grad_norm": 0.25137630105018616, "learning_rate": 3.837829239452664e-06, "loss": 0.3175, "step": 30354 }, { "epoch": 3.0861122407482715, "grad_norm": 0.27704599499702454, "learning_rate": 3.8374840759692065e-06, "loss": 0.3272, "step": 30355 }, { "epoch": 3.0862139080927204, "grad_norm": 0.2887974977493286, "learning_rate": 3.83713891834223e-06, "loss": 0.3219, "step": 30356 }, { "epoch": 3.0863155754371694, "grad_norm": 0.2821120023727417, "learning_rate": 3.8367937665734725e-06, "loss": 0.3133, "step": 30357 }, { "epoch": 3.0864172427816183, "grad_norm": 0.2552165985107422, "learning_rate": 3.8364486206646684e-06, "loss": 0.2839, "step": 30358 }, { "epoch": 3.0865189101260677, "grad_norm": 0.26713815331459045, "learning_rate": 3.836103480617564e-06, "loss": 0.2747, "step": 30359 }, { "epoch": 3.0866205774705167, "grad_norm": 0.27004581689834595, "learning_rate": 3.8357583464338885e-06, "loss": 0.3179, "step": 30360 }, { "epoch": 3.0867222448149656, "grad_norm": 0.2654697299003601, "learning_rate": 3.835413218115388e-06, "loss": 0.3187, "step": 30361 }, { "epoch": 3.0868239121594145, "grad_norm": 0.26741230487823486, "learning_rate": 3.835068095663802e-06, "loss": 0.3376, "step": 30362 }, { "epoch": 3.0869255795038635, "grad_norm": 0.28159773349761963, "learning_rate": 3.8347229790808615e-06, "loss": 0.2938, "step": 30363 }, { "epoch": 3.0870272468483124, "grad_norm": 0.26260268688201904, "learning_rate": 3.834377868368313e-06, "loss": 0.3084, "step": 30364 }, { "epoch": 3.0871289141927614, "grad_norm": 0.2657349705696106, "learning_rate": 3.834032763527889e-06, "loss": 0.3171, "step": 30365 }, { "epoch": 3.0872305815372103, "grad_norm": 0.2700819969177246, "learning_rate": 3.833687664561332e-06, "loss": 0.3031, "step": 30366 }, { "epoch": 3.0873322488816592, "grad_norm": 0.29018935561180115, "learning_rate": 3.833342571470382e-06, "loss": 0.3292, "step": 30367 }, { "epoch": 3.087433916226108, "grad_norm": 0.2772051990032196, "learning_rate": 3.832997484256771e-06, "loss": 0.3089, "step": 30368 }, { "epoch": 3.087535583570557, "grad_norm": 0.2524258494377136, "learning_rate": 3.832652402922244e-06, "loss": 0.296, "step": 30369 }, { "epoch": 3.087637250915006, "grad_norm": 0.2785273790359497, "learning_rate": 3.832307327468534e-06, "loss": 0.2991, "step": 30370 }, { "epoch": 3.087738918259455, "grad_norm": 0.28715917468070984, "learning_rate": 3.831962257897384e-06, "loss": 0.2982, "step": 30371 }, { "epoch": 3.087840585603904, "grad_norm": 0.28042086958885193, "learning_rate": 3.831617194210532e-06, "loss": 0.3088, "step": 30372 }, { "epoch": 3.087942252948353, "grad_norm": 0.27478840947151184, "learning_rate": 3.831272136409712e-06, "loss": 0.2855, "step": 30373 }, { "epoch": 3.088043920292802, "grad_norm": 0.27159374952316284, "learning_rate": 3.830927084496668e-06, "loss": 0.3047, "step": 30374 }, { "epoch": 3.0881455876372508, "grad_norm": 0.29048866033554077, "learning_rate": 3.830582038473134e-06, "loss": 0.3005, "step": 30375 }, { "epoch": 3.0882472549816997, "grad_norm": 0.27524858713150024, "learning_rate": 3.8302369983408485e-06, "loss": 0.2709, "step": 30376 }, { "epoch": 3.0883489223261487, "grad_norm": 0.2797548472881317, "learning_rate": 3.829891964101554e-06, "loss": 0.3111, "step": 30377 }, { "epoch": 3.0884505896705976, "grad_norm": 0.29479530453681946, "learning_rate": 3.829546935756983e-06, "loss": 0.3252, "step": 30378 }, { "epoch": 3.0885522570150465, "grad_norm": 0.2693539559841156, "learning_rate": 3.82920191330888e-06, "loss": 0.3135, "step": 30379 }, { "epoch": 3.088653924359496, "grad_norm": 0.2889100909233093, "learning_rate": 3.8288568967589765e-06, "loss": 0.32, "step": 30380 }, { "epoch": 3.088755591703945, "grad_norm": 0.27075207233428955, "learning_rate": 3.828511886109012e-06, "loss": 0.3103, "step": 30381 }, { "epoch": 3.088857259048394, "grad_norm": 0.29677197337150574, "learning_rate": 3.828166881360731e-06, "loss": 0.3076, "step": 30382 }, { "epoch": 3.0889589263928428, "grad_norm": 0.2582681477069855, "learning_rate": 3.827821882515863e-06, "loss": 0.3453, "step": 30383 }, { "epoch": 3.0890605937372917, "grad_norm": 0.27992701530456543, "learning_rate": 3.827476889576152e-06, "loss": 0.3384, "step": 30384 }, { "epoch": 3.0891622610817406, "grad_norm": 0.2671421766281128, "learning_rate": 3.827131902543333e-06, "loss": 0.3096, "step": 30385 }, { "epoch": 3.0892639284261896, "grad_norm": 0.29340118169784546, "learning_rate": 3.826786921419143e-06, "loss": 0.3051, "step": 30386 }, { "epoch": 3.0893655957706385, "grad_norm": 0.28866538405418396, "learning_rate": 3.826441946205326e-06, "loss": 0.3247, "step": 30387 }, { "epoch": 3.0894672631150875, "grad_norm": 0.2695399522781372, "learning_rate": 3.826096976903611e-06, "loss": 0.2766, "step": 30388 }, { "epoch": 3.0895689304595364, "grad_norm": 0.2794255316257477, "learning_rate": 3.825752013515744e-06, "loss": 0.3488, "step": 30389 }, { "epoch": 3.0896705978039853, "grad_norm": 0.2724889814853668, "learning_rate": 3.825407056043458e-06, "loss": 0.3228, "step": 30390 }, { "epoch": 3.0897722651484343, "grad_norm": 0.26959851384162903, "learning_rate": 3.82506210448849e-06, "loss": 0.3189, "step": 30391 }, { "epoch": 3.0898739324928832, "grad_norm": 0.2643324136734009, "learning_rate": 3.824717158852583e-06, "loss": 0.285, "step": 30392 }, { "epoch": 3.089975599837332, "grad_norm": 0.2785521149635315, "learning_rate": 3.82437221913747e-06, "loss": 0.3065, "step": 30393 }, { "epoch": 3.090077267181781, "grad_norm": 0.2532442808151245, "learning_rate": 3.824027285344891e-06, "loss": 0.3051, "step": 30394 }, { "epoch": 3.09017893452623, "grad_norm": 0.2871389091014862, "learning_rate": 3.823682357476584e-06, "loss": 0.3089, "step": 30395 }, { "epoch": 3.090280601870679, "grad_norm": 0.29170653223991394, "learning_rate": 3.823337435534282e-06, "loss": 0.2889, "step": 30396 }, { "epoch": 3.090382269215128, "grad_norm": 0.2875123620033264, "learning_rate": 3.822992519519731e-06, "loss": 0.3059, "step": 30397 }, { "epoch": 3.090483936559577, "grad_norm": 0.27511754631996155, "learning_rate": 3.8226476094346624e-06, "loss": 0.3232, "step": 30398 }, { "epoch": 3.090585603904026, "grad_norm": 0.28568702936172485, "learning_rate": 3.822302705280816e-06, "loss": 0.3069, "step": 30399 }, { "epoch": 3.090687271248475, "grad_norm": 0.2657099962234497, "learning_rate": 3.821957807059926e-06, "loss": 0.2902, "step": 30400 }, { "epoch": 3.090788938592924, "grad_norm": 0.2532216012477875, "learning_rate": 3.821612914773733e-06, "loss": 0.3054, "step": 30401 }, { "epoch": 3.090890605937373, "grad_norm": 0.2765083312988281, "learning_rate": 3.821268028423978e-06, "loss": 0.3133, "step": 30402 }, { "epoch": 3.090992273281822, "grad_norm": 0.2747586965560913, "learning_rate": 3.820923148012391e-06, "loss": 0.3079, "step": 30403 }, { "epoch": 3.091093940626271, "grad_norm": 0.27475401759147644, "learning_rate": 3.820578273540715e-06, "loss": 0.3248, "step": 30404 }, { "epoch": 3.09119560797072, "grad_norm": 0.26142174005508423, "learning_rate": 3.820233405010684e-06, "loss": 0.3068, "step": 30405 }, { "epoch": 3.091297275315169, "grad_norm": 0.2694244980812073, "learning_rate": 3.819888542424035e-06, "loss": 0.3207, "step": 30406 }, { "epoch": 3.091398942659618, "grad_norm": 0.27061858773231506, "learning_rate": 3.8195436857825115e-06, "loss": 0.3064, "step": 30407 }, { "epoch": 3.0915006100040667, "grad_norm": 0.26463034749031067, "learning_rate": 3.819198835087843e-06, "loss": 0.3087, "step": 30408 }, { "epoch": 3.0916022773485157, "grad_norm": 0.2685924172401428, "learning_rate": 3.818853990341772e-06, "loss": 0.3142, "step": 30409 }, { "epoch": 3.0917039446929646, "grad_norm": 0.268696129322052, "learning_rate": 3.818509151546033e-06, "loss": 0.3079, "step": 30410 }, { "epoch": 3.0918056120374136, "grad_norm": 0.28122058510780334, "learning_rate": 3.8181643187023645e-06, "loss": 0.3335, "step": 30411 }, { "epoch": 3.0919072793818625, "grad_norm": 0.2540554404258728, "learning_rate": 3.817819491812503e-06, "loss": 0.3029, "step": 30412 }, { "epoch": 3.0920089467263114, "grad_norm": 0.29481571912765503, "learning_rate": 3.817474670878186e-06, "loss": 0.325, "step": 30413 }, { "epoch": 3.0921106140707604, "grad_norm": 0.2909155786037445, "learning_rate": 3.817129855901151e-06, "loss": 0.3528, "step": 30414 }, { "epoch": 3.0922122814152093, "grad_norm": 0.2664649188518524, "learning_rate": 3.816785046883134e-06, "loss": 0.3283, "step": 30415 }, { "epoch": 3.0923139487596583, "grad_norm": 0.28077155351638794, "learning_rate": 3.816440243825873e-06, "loss": 0.2946, "step": 30416 }, { "epoch": 3.092415616104107, "grad_norm": 0.2674759030342102, "learning_rate": 3.816095446731106e-06, "loss": 0.3073, "step": 30417 }, { "epoch": 3.092517283448556, "grad_norm": 0.27931809425354004, "learning_rate": 3.815750655600567e-06, "loss": 0.3241, "step": 30418 }, { "epoch": 3.092618950793005, "grad_norm": 0.29722389578819275, "learning_rate": 3.815405870435996e-06, "loss": 0.3022, "step": 30419 }, { "epoch": 3.092720618137454, "grad_norm": 0.2785528004169464, "learning_rate": 3.815061091239128e-06, "loss": 0.3167, "step": 30420 }, { "epoch": 3.0928222854819034, "grad_norm": 0.2661706209182739, "learning_rate": 3.8147163180117e-06, "loss": 0.3174, "step": 30421 }, { "epoch": 3.0929239528263524, "grad_norm": 0.28367528319358826, "learning_rate": 3.814371550755451e-06, "loss": 0.3043, "step": 30422 }, { "epoch": 3.0930256201708013, "grad_norm": 0.2904547452926636, "learning_rate": 3.8140267894721157e-06, "loss": 0.2774, "step": 30423 }, { "epoch": 3.0931272875152502, "grad_norm": 0.27594107389450073, "learning_rate": 3.8136820341634323e-06, "loss": 0.3414, "step": 30424 }, { "epoch": 3.093228954859699, "grad_norm": 0.26531368494033813, "learning_rate": 3.8133372848311355e-06, "loss": 0.2829, "step": 30425 }, { "epoch": 3.093330622204148, "grad_norm": 0.2593326270580292, "learning_rate": 3.8129925414769644e-06, "loss": 0.3705, "step": 30426 }, { "epoch": 3.093432289548597, "grad_norm": 0.27516821026802063, "learning_rate": 3.812647804102655e-06, "loss": 0.2907, "step": 30427 }, { "epoch": 3.093533956893046, "grad_norm": 0.27179285883903503, "learning_rate": 3.8123030727099432e-06, "loss": 0.3631, "step": 30428 }, { "epoch": 3.093635624237495, "grad_norm": 0.2693135738372803, "learning_rate": 3.8119583473005674e-06, "loss": 0.2954, "step": 30429 }, { "epoch": 3.093737291581944, "grad_norm": 0.2696591317653656, "learning_rate": 3.8116136278762616e-06, "loss": 0.3354, "step": 30430 }, { "epoch": 3.093838958926393, "grad_norm": 0.25862258672714233, "learning_rate": 3.8112689144387643e-06, "loss": 0.306, "step": 30431 }, { "epoch": 3.0939406262708418, "grad_norm": 0.2861625850200653, "learning_rate": 3.8109242069898128e-06, "loss": 0.3254, "step": 30432 }, { "epoch": 3.0940422936152907, "grad_norm": 0.2754558026790619, "learning_rate": 3.8105795055311414e-06, "loss": 0.3039, "step": 30433 }, { "epoch": 3.0941439609597396, "grad_norm": 0.25637054443359375, "learning_rate": 3.8102348100644887e-06, "loss": 0.3262, "step": 30434 }, { "epoch": 3.0942456283041886, "grad_norm": 0.263826847076416, "learning_rate": 3.8098901205915893e-06, "loss": 0.2917, "step": 30435 }, { "epoch": 3.0943472956486375, "grad_norm": 0.28566935658454895, "learning_rate": 3.8095454371141806e-06, "loss": 0.3461, "step": 30436 }, { "epoch": 3.0944489629930865, "grad_norm": 0.27709394693374634, "learning_rate": 3.809200759634e-06, "loss": 0.3475, "step": 30437 }, { "epoch": 3.0945506303375354, "grad_norm": 0.2445802539587021, "learning_rate": 3.8088560881527823e-06, "loss": 0.3178, "step": 30438 }, { "epoch": 3.0946522976819844, "grad_norm": 0.27967503666877747, "learning_rate": 3.8085114226722657e-06, "loss": 0.2953, "step": 30439 }, { "epoch": 3.0947539650264333, "grad_norm": 0.27030444145202637, "learning_rate": 3.8081667631941845e-06, "loss": 0.297, "step": 30440 }, { "epoch": 3.0948556323708827, "grad_norm": 0.26650989055633545, "learning_rate": 3.8078221097202756e-06, "loss": 0.3034, "step": 30441 }, { "epoch": 3.0949572997153316, "grad_norm": 0.26815786957740784, "learning_rate": 3.8074774622522766e-06, "loss": 0.3221, "step": 30442 }, { "epoch": 3.0950589670597806, "grad_norm": 0.27584055066108704, "learning_rate": 3.8071328207919216e-06, "loss": 0.3349, "step": 30443 }, { "epoch": 3.0951606344042295, "grad_norm": 0.27694374322891235, "learning_rate": 3.8067881853409493e-06, "loss": 0.3191, "step": 30444 }, { "epoch": 3.0952623017486784, "grad_norm": 0.276825487613678, "learning_rate": 3.8064435559010938e-06, "loss": 0.3538, "step": 30445 }, { "epoch": 3.0953639690931274, "grad_norm": 0.2731108069419861, "learning_rate": 3.8060989324740918e-06, "loss": 0.3111, "step": 30446 }, { "epoch": 3.0954656364375763, "grad_norm": 0.26001742482185364, "learning_rate": 3.8057543150616805e-06, "loss": 0.3093, "step": 30447 }, { "epoch": 3.0955673037820253, "grad_norm": 0.2586134970188141, "learning_rate": 3.805409703665594e-06, "loss": 0.2977, "step": 30448 }, { "epoch": 3.095668971126474, "grad_norm": 0.2668392062187195, "learning_rate": 3.8050650982875707e-06, "loss": 0.3023, "step": 30449 }, { "epoch": 3.095770638470923, "grad_norm": 0.2699272930622101, "learning_rate": 3.8047204989293444e-06, "loss": 0.3148, "step": 30450 }, { "epoch": 3.095872305815372, "grad_norm": 0.2753075957298279, "learning_rate": 3.8043759055926525e-06, "loss": 0.3096, "step": 30451 }, { "epoch": 3.095973973159821, "grad_norm": 0.28859585523605347, "learning_rate": 3.8040313182792314e-06, "loss": 0.2877, "step": 30452 }, { "epoch": 3.09607564050427, "grad_norm": 0.25848522782325745, "learning_rate": 3.8036867369908154e-06, "loss": 0.32, "step": 30453 }, { "epoch": 3.096177307848719, "grad_norm": 0.2697085738182068, "learning_rate": 3.803342161729142e-06, "loss": 0.3132, "step": 30454 }, { "epoch": 3.096278975193168, "grad_norm": 0.27394577860832214, "learning_rate": 3.8029975924959463e-06, "loss": 0.3259, "step": 30455 }, { "epoch": 3.096380642537617, "grad_norm": 0.2660631537437439, "learning_rate": 3.8026530292929635e-06, "loss": 0.3366, "step": 30456 }, { "epoch": 3.0964823098820657, "grad_norm": 0.2744062542915344, "learning_rate": 3.8023084721219315e-06, "loss": 0.3087, "step": 30457 }, { "epoch": 3.0965839772265147, "grad_norm": 0.2682722806930542, "learning_rate": 3.8019639209845837e-06, "loss": 0.3112, "step": 30458 }, { "epoch": 3.0966856445709636, "grad_norm": 0.2844277322292328, "learning_rate": 3.801619375882658e-06, "loss": 0.3266, "step": 30459 }, { "epoch": 3.0967873119154126, "grad_norm": 0.29018840193748474, "learning_rate": 3.801274836817888e-06, "loss": 0.3171, "step": 30460 }, { "epoch": 3.0968889792598615, "grad_norm": 0.2746081054210663, "learning_rate": 3.8009303037920108e-06, "loss": 0.3094, "step": 30461 }, { "epoch": 3.096990646604311, "grad_norm": 0.2601124048233032, "learning_rate": 3.8005857768067626e-06, "loss": 0.3039, "step": 30462 }, { "epoch": 3.09709231394876, "grad_norm": 0.27536845207214355, "learning_rate": 3.8002412558638774e-06, "loss": 0.2959, "step": 30463 }, { "epoch": 3.0971939812932088, "grad_norm": 0.2741813361644745, "learning_rate": 3.7998967409650923e-06, "loss": 0.3156, "step": 30464 }, { "epoch": 3.0972956486376577, "grad_norm": 0.28408002853393555, "learning_rate": 3.7995522321121416e-06, "loss": 0.3096, "step": 30465 }, { "epoch": 3.0973973159821067, "grad_norm": 0.28271013498306274, "learning_rate": 3.799207729306761e-06, "loss": 0.3062, "step": 30466 }, { "epoch": 3.0974989833265556, "grad_norm": 0.30388692021369934, "learning_rate": 3.798863232550688e-06, "loss": 0.2688, "step": 30467 }, { "epoch": 3.0976006506710045, "grad_norm": 0.279593825340271, "learning_rate": 3.7985187418456555e-06, "loss": 0.2832, "step": 30468 }, { "epoch": 3.0977023180154535, "grad_norm": 0.2717774212360382, "learning_rate": 3.798174257193401e-06, "loss": 0.3307, "step": 30469 }, { "epoch": 3.0978039853599024, "grad_norm": 0.2762197256088257, "learning_rate": 3.7978297785956574e-06, "loss": 0.3139, "step": 30470 }, { "epoch": 3.0979056527043514, "grad_norm": 0.27687397599220276, "learning_rate": 3.7974853060541626e-06, "loss": 0.2887, "step": 30471 }, { "epoch": 3.0980073200488003, "grad_norm": 0.3020927906036377, "learning_rate": 3.797140839570652e-06, "loss": 0.3051, "step": 30472 }, { "epoch": 3.0981089873932492, "grad_norm": 0.28881770372390747, "learning_rate": 3.796796379146859e-06, "loss": 0.2993, "step": 30473 }, { "epoch": 3.098210654737698, "grad_norm": 0.2804245352745056, "learning_rate": 3.79645192478452e-06, "loss": 0.3035, "step": 30474 }, { "epoch": 3.098312322082147, "grad_norm": 0.27524667978286743, "learning_rate": 3.79610747648537e-06, "loss": 0.3027, "step": 30475 }, { "epoch": 3.098413989426596, "grad_norm": 0.2972775995731354, "learning_rate": 3.795763034251144e-06, "loss": 0.2855, "step": 30476 }, { "epoch": 3.098515656771045, "grad_norm": 0.2672860026359558, "learning_rate": 3.7954185980835793e-06, "loss": 0.3006, "step": 30477 }, { "epoch": 3.098617324115494, "grad_norm": 0.2886015474796295, "learning_rate": 3.7950741679844084e-06, "loss": 0.3226, "step": 30478 }, { "epoch": 3.098718991459943, "grad_norm": 0.2550058662891388, "learning_rate": 3.7947297439553675e-06, "loss": 0.3207, "step": 30479 }, { "epoch": 3.098820658804392, "grad_norm": 0.2737363874912262, "learning_rate": 3.7943853259981924e-06, "loss": 0.3259, "step": 30480 }, { "epoch": 3.0989223261488408, "grad_norm": 0.2542598247528076, "learning_rate": 3.7940409141146162e-06, "loss": 0.291, "step": 30481 }, { "epoch": 3.09902399349329, "grad_norm": 0.27810823917388916, "learning_rate": 3.7936965083063763e-06, "loss": 0.3153, "step": 30482 }, { "epoch": 3.099125660837739, "grad_norm": 0.2894224226474762, "learning_rate": 3.7933521085752064e-06, "loss": 0.3136, "step": 30483 }, { "epoch": 3.099227328182188, "grad_norm": 0.27351608872413635, "learning_rate": 3.793007714922843e-06, "loss": 0.289, "step": 30484 }, { "epoch": 3.099328995526637, "grad_norm": 0.26773393154144287, "learning_rate": 3.792663327351018e-06, "loss": 0.3102, "step": 30485 }, { "epoch": 3.099430662871086, "grad_norm": 0.28377991914749146, "learning_rate": 3.7923189458614686e-06, "loss": 0.3047, "step": 30486 }, { "epoch": 3.099532330215535, "grad_norm": 0.26897507905960083, "learning_rate": 3.791974570455931e-06, "loss": 0.283, "step": 30487 }, { "epoch": 3.099633997559984, "grad_norm": 0.2756071090698242, "learning_rate": 3.791630201136136e-06, "loss": 0.3124, "step": 30488 }, { "epoch": 3.0997356649044328, "grad_norm": 0.256994366645813, "learning_rate": 3.791285837903823e-06, "loss": 0.2988, "step": 30489 }, { "epoch": 3.0998373322488817, "grad_norm": 0.2672733962535858, "learning_rate": 3.790941480760724e-06, "loss": 0.3042, "step": 30490 }, { "epoch": 3.0999389995933306, "grad_norm": 0.2769027054309845, "learning_rate": 3.790597129708574e-06, "loss": 0.2956, "step": 30491 }, { "epoch": 3.1000406669377796, "grad_norm": 0.27775827050209045, "learning_rate": 3.790252784749109e-06, "loss": 0.2896, "step": 30492 }, { "epoch": 3.1001423342822285, "grad_norm": 0.2813299894332886, "learning_rate": 3.789908445884062e-06, "loss": 0.3354, "step": 30493 }, { "epoch": 3.1002440016266775, "grad_norm": 0.26787787675857544, "learning_rate": 3.78956411311517e-06, "loss": 0.2945, "step": 30494 }, { "epoch": 3.1003456689711264, "grad_norm": 0.28073108196258545, "learning_rate": 3.7892197864441655e-06, "loss": 0.3064, "step": 30495 }, { "epoch": 3.1004473363155753, "grad_norm": 0.2773277759552002, "learning_rate": 3.7888754658727837e-06, "loss": 0.3012, "step": 30496 }, { "epoch": 3.1005490036600243, "grad_norm": 0.29696351289749146, "learning_rate": 3.7885311514027606e-06, "loss": 0.2759, "step": 30497 }, { "epoch": 3.1006506710044732, "grad_norm": 0.2606646716594696, "learning_rate": 3.788186843035829e-06, "loss": 0.339, "step": 30498 }, { "epoch": 3.100752338348922, "grad_norm": 0.26926255226135254, "learning_rate": 3.7878425407737245e-06, "loss": 0.3031, "step": 30499 }, { "epoch": 3.100854005693371, "grad_norm": 0.26559287309646606, "learning_rate": 3.787498244618181e-06, "loss": 0.3006, "step": 30500 }, { "epoch": 3.10095567303782, "grad_norm": 0.2889869213104248, "learning_rate": 3.7871539545709324e-06, "loss": 0.2782, "step": 30501 }, { "epoch": 3.101057340382269, "grad_norm": 0.26715758442878723, "learning_rate": 3.786809670633715e-06, "loss": 0.3511, "step": 30502 }, { "epoch": 3.1011590077267184, "grad_norm": 0.29829952120780945, "learning_rate": 3.7864653928082616e-06, "loss": 0.3265, "step": 30503 }, { "epoch": 3.1012606750711673, "grad_norm": 0.2589881718158722, "learning_rate": 3.786121121096309e-06, "loss": 0.3014, "step": 30504 }, { "epoch": 3.1013623424156163, "grad_norm": 0.26954010128974915, "learning_rate": 3.7857768554995866e-06, "loss": 0.2892, "step": 30505 }, { "epoch": 3.101464009760065, "grad_norm": 0.276946485042572, "learning_rate": 3.7854325960198327e-06, "loss": 0.3243, "step": 30506 }, { "epoch": 3.101565677104514, "grad_norm": 0.27242863178253174, "learning_rate": 3.7850883426587827e-06, "loss": 0.285, "step": 30507 }, { "epoch": 3.101667344448963, "grad_norm": 0.2808881998062134, "learning_rate": 3.784744095418167e-06, "loss": 0.2805, "step": 30508 }, { "epoch": 3.101769011793412, "grad_norm": 0.2520611584186554, "learning_rate": 3.784399854299724e-06, "loss": 0.3101, "step": 30509 }, { "epoch": 3.101870679137861, "grad_norm": 0.29540199041366577, "learning_rate": 3.7840556193051825e-06, "loss": 0.3143, "step": 30510 }, { "epoch": 3.10197234648231, "grad_norm": 0.2756798267364502, "learning_rate": 3.783711390436281e-06, "loss": 0.3056, "step": 30511 }, { "epoch": 3.102074013826759, "grad_norm": 0.25495538115501404, "learning_rate": 3.7833671676947534e-06, "loss": 0.3007, "step": 30512 }, { "epoch": 3.102175681171208, "grad_norm": 0.28826141357421875, "learning_rate": 3.7830229510823324e-06, "loss": 0.2746, "step": 30513 }, { "epoch": 3.1022773485156567, "grad_norm": 0.2738504409790039, "learning_rate": 3.782678740600754e-06, "loss": 0.307, "step": 30514 }, { "epoch": 3.1023790158601057, "grad_norm": 0.27570590376853943, "learning_rate": 3.7823345362517485e-06, "loss": 0.3546, "step": 30515 }, { "epoch": 3.1024806832045546, "grad_norm": 0.2689948081970215, "learning_rate": 3.7819903380370525e-06, "loss": 0.2821, "step": 30516 }, { "epoch": 3.1025823505490036, "grad_norm": 0.29006990790367126, "learning_rate": 3.7816461459584008e-06, "loss": 0.356, "step": 30517 }, { "epoch": 3.1026840178934525, "grad_norm": 0.2753397822380066, "learning_rate": 3.7813019600175256e-06, "loss": 0.2968, "step": 30518 }, { "epoch": 3.1027856852379014, "grad_norm": 0.2653137147426605, "learning_rate": 3.7809577802161633e-06, "loss": 0.3085, "step": 30519 }, { "epoch": 3.1028873525823504, "grad_norm": 0.2715599834918976, "learning_rate": 3.7806136065560435e-06, "loss": 0.3271, "step": 30520 }, { "epoch": 3.1029890199267993, "grad_norm": 0.28804582357406616, "learning_rate": 3.780269439038905e-06, "loss": 0.3049, "step": 30521 }, { "epoch": 3.1030906872712483, "grad_norm": 0.27912449836730957, "learning_rate": 3.779925277666476e-06, "loss": 0.3307, "step": 30522 }, { "epoch": 3.1031923546156976, "grad_norm": 0.27273961901664734, "learning_rate": 3.7795811224404954e-06, "loss": 0.3035, "step": 30523 }, { "epoch": 3.1032940219601466, "grad_norm": 0.2752944231033325, "learning_rate": 3.7792369733626965e-06, "loss": 0.3167, "step": 30524 }, { "epoch": 3.1033956893045955, "grad_norm": 0.2595442235469818, "learning_rate": 3.7788928304348087e-06, "loss": 0.2893, "step": 30525 }, { "epoch": 3.1034973566490445, "grad_norm": 0.2597545385360718, "learning_rate": 3.7785486936585718e-06, "loss": 0.2974, "step": 30526 }, { "epoch": 3.1035990239934934, "grad_norm": 0.2768387198448181, "learning_rate": 3.7782045630357145e-06, "loss": 0.2986, "step": 30527 }, { "epoch": 3.1037006913379424, "grad_norm": 0.280925989151001, "learning_rate": 3.777860438567971e-06, "loss": 0.3028, "step": 30528 }, { "epoch": 3.1038023586823913, "grad_norm": 0.26963692903518677, "learning_rate": 3.7775163202570785e-06, "loss": 0.29, "step": 30529 }, { "epoch": 3.1039040260268402, "grad_norm": 0.27704471349716187, "learning_rate": 3.777172208104766e-06, "loss": 0.3288, "step": 30530 }, { "epoch": 3.104005693371289, "grad_norm": 0.2660185396671295, "learning_rate": 3.7768281021127728e-06, "loss": 0.2824, "step": 30531 }, { "epoch": 3.104107360715738, "grad_norm": 0.29578521847724915, "learning_rate": 3.7764840022828264e-06, "loss": 0.3247, "step": 30532 }, { "epoch": 3.104209028060187, "grad_norm": 0.26191896200180054, "learning_rate": 3.7761399086166617e-06, "loss": 0.3136, "step": 30533 }, { "epoch": 3.104310695404636, "grad_norm": 0.25757086277008057, "learning_rate": 3.7757958211160163e-06, "loss": 0.2744, "step": 30534 }, { "epoch": 3.104412362749085, "grad_norm": 0.2504478991031647, "learning_rate": 3.775451739782618e-06, "loss": 0.2895, "step": 30535 }, { "epoch": 3.104514030093534, "grad_norm": 0.27187687158584595, "learning_rate": 3.7751076646182056e-06, "loss": 0.2699, "step": 30536 }, { "epoch": 3.104615697437983, "grad_norm": 0.24419869482517242, "learning_rate": 3.7747635956245077e-06, "loss": 0.3086, "step": 30537 }, { "epoch": 3.1047173647824318, "grad_norm": 0.30801376700401306, "learning_rate": 3.7744195328032586e-06, "loss": 0.3021, "step": 30538 }, { "epoch": 3.1048190321268807, "grad_norm": 0.29721930623054504, "learning_rate": 3.774075476156196e-06, "loss": 0.2993, "step": 30539 }, { "epoch": 3.1049206994713296, "grad_norm": 0.2624313235282898, "learning_rate": 3.7737314256850467e-06, "loss": 0.287, "step": 30540 }, { "epoch": 3.1050223668157786, "grad_norm": 0.2771340012550354, "learning_rate": 3.77338738139155e-06, "loss": 0.2831, "step": 30541 }, { "epoch": 3.1051240341602275, "grad_norm": 0.2796064615249634, "learning_rate": 3.7730433432774337e-06, "loss": 0.3196, "step": 30542 }, { "epoch": 3.1052257015046765, "grad_norm": 0.2750721275806427, "learning_rate": 3.772699311344433e-06, "loss": 0.3005, "step": 30543 }, { "epoch": 3.105327368849126, "grad_norm": 0.282060831785202, "learning_rate": 3.7723552855942846e-06, "loss": 0.3031, "step": 30544 }, { "epoch": 3.105429036193575, "grad_norm": 0.27844491600990295, "learning_rate": 3.772011266028716e-06, "loss": 0.3207, "step": 30545 }, { "epoch": 3.1055307035380237, "grad_norm": 0.2991596758365631, "learning_rate": 3.7716672526494647e-06, "loss": 0.3123, "step": 30546 }, { "epoch": 3.1056323708824727, "grad_norm": 0.26485517621040344, "learning_rate": 3.77132324545826e-06, "loss": 0.2807, "step": 30547 }, { "epoch": 3.1057340382269216, "grad_norm": 0.26528677344322205, "learning_rate": 3.770979244456836e-06, "loss": 0.3305, "step": 30548 }, { "epoch": 3.1058357055713706, "grad_norm": 0.27584725618362427, "learning_rate": 3.77063524964693e-06, "loss": 0.3147, "step": 30549 }, { "epoch": 3.1059373729158195, "grad_norm": 0.2746331989765167, "learning_rate": 3.770291261030269e-06, "loss": 0.2902, "step": 30550 }, { "epoch": 3.1060390402602684, "grad_norm": 0.27205637097358704, "learning_rate": 3.7699472786085892e-06, "loss": 0.3328, "step": 30551 }, { "epoch": 3.1061407076047174, "grad_norm": 0.2748020589351654, "learning_rate": 3.769603302383622e-06, "loss": 0.2911, "step": 30552 }, { "epoch": 3.1062423749491663, "grad_norm": 0.25875332951545715, "learning_rate": 3.7692593323571e-06, "loss": 0.2776, "step": 30553 }, { "epoch": 3.1063440422936153, "grad_norm": 0.28747043013572693, "learning_rate": 3.7689153685307605e-06, "loss": 0.3502, "step": 30554 }, { "epoch": 3.106445709638064, "grad_norm": 0.2555472254753113, "learning_rate": 3.7685714109063304e-06, "loss": 0.2853, "step": 30555 }, { "epoch": 3.106547376982513, "grad_norm": 0.25528064370155334, "learning_rate": 3.7682274594855455e-06, "loss": 0.294, "step": 30556 }, { "epoch": 3.106649044326962, "grad_norm": 0.26260077953338623, "learning_rate": 3.7678835142701375e-06, "loss": 0.3198, "step": 30557 }, { "epoch": 3.106750711671411, "grad_norm": 0.2721042335033417, "learning_rate": 3.7675395752618382e-06, "loss": 0.3443, "step": 30558 }, { "epoch": 3.10685237901586, "grad_norm": 0.27139797806739807, "learning_rate": 3.7671956424623853e-06, "loss": 0.2749, "step": 30559 }, { "epoch": 3.106954046360309, "grad_norm": 0.2639354467391968, "learning_rate": 3.766851715873505e-06, "loss": 0.3029, "step": 30560 }, { "epoch": 3.107055713704758, "grad_norm": 0.2630579471588135, "learning_rate": 3.766507795496934e-06, "loss": 0.2939, "step": 30561 }, { "epoch": 3.107157381049207, "grad_norm": 0.2829139232635498, "learning_rate": 3.7661638813344025e-06, "loss": 0.3217, "step": 30562 }, { "epoch": 3.1072590483936557, "grad_norm": 0.28178760409355164, "learning_rate": 3.7658199733876446e-06, "loss": 0.2806, "step": 30563 }, { "epoch": 3.107360715738105, "grad_norm": 0.2935233414173126, "learning_rate": 3.7654760716583927e-06, "loss": 0.3049, "step": 30564 }, { "epoch": 3.107462383082554, "grad_norm": 0.2549094259738922, "learning_rate": 3.765132176148378e-06, "loss": 0.3018, "step": 30565 }, { "epoch": 3.107564050427003, "grad_norm": 0.29072046279907227, "learning_rate": 3.764788286859336e-06, "loss": 0.3084, "step": 30566 }, { "epoch": 3.107665717771452, "grad_norm": 0.2880229651927948, "learning_rate": 3.764444403792995e-06, "loss": 0.3038, "step": 30567 }, { "epoch": 3.107767385115901, "grad_norm": 0.2916022539138794, "learning_rate": 3.7641005269510898e-06, "loss": 0.3256, "step": 30568 }, { "epoch": 3.10786905246035, "grad_norm": 0.29564177989959717, "learning_rate": 3.763756656335354e-06, "loss": 0.3004, "step": 30569 }, { "epoch": 3.1079707198047988, "grad_norm": 0.30518239736557007, "learning_rate": 3.7634127919475166e-06, "loss": 0.2822, "step": 30570 }, { "epoch": 3.1080723871492477, "grad_norm": 0.26094257831573486, "learning_rate": 3.7630689337893135e-06, "loss": 0.289, "step": 30571 }, { "epoch": 3.1081740544936967, "grad_norm": 0.30323079228401184, "learning_rate": 3.7627250818624733e-06, "loss": 0.308, "step": 30572 }, { "epoch": 3.1082757218381456, "grad_norm": 0.27390363812446594, "learning_rate": 3.7623812361687305e-06, "loss": 0.2857, "step": 30573 }, { "epoch": 3.1083773891825945, "grad_norm": 0.2642209231853485, "learning_rate": 3.7620373967098177e-06, "loss": 0.3066, "step": 30574 }, { "epoch": 3.1084790565270435, "grad_norm": 0.273162305355072, "learning_rate": 3.7616935634874657e-06, "loss": 0.3499, "step": 30575 }, { "epoch": 3.1085807238714924, "grad_norm": 0.2618342638015747, "learning_rate": 3.761349736503408e-06, "loss": 0.3444, "step": 30576 }, { "epoch": 3.1086823912159414, "grad_norm": 0.276226669549942, "learning_rate": 3.7610059157593752e-06, "loss": 0.3062, "step": 30577 }, { "epoch": 3.1087840585603903, "grad_norm": 0.2711014151573181, "learning_rate": 3.7606621012571e-06, "loss": 0.2914, "step": 30578 }, { "epoch": 3.1088857259048392, "grad_norm": 0.2691701352596283, "learning_rate": 3.7603182929983157e-06, "loss": 0.3065, "step": 30579 }, { "epoch": 3.108987393249288, "grad_norm": 0.2595280110836029, "learning_rate": 3.7599744909847525e-06, "loss": 0.2934, "step": 30580 }, { "epoch": 3.109089060593737, "grad_norm": 0.2750377953052521, "learning_rate": 3.7596306952181436e-06, "loss": 0.3159, "step": 30581 }, { "epoch": 3.109190727938186, "grad_norm": 0.2671017050743103, "learning_rate": 3.7592869057002197e-06, "loss": 0.2643, "step": 30582 }, { "epoch": 3.109292395282635, "grad_norm": 0.265428364276886, "learning_rate": 3.758943122432713e-06, "loss": 0.3175, "step": 30583 }, { "epoch": 3.109394062627084, "grad_norm": 0.2843928933143616, "learning_rate": 3.758599345417358e-06, "loss": 0.31, "step": 30584 }, { "epoch": 3.1094957299715333, "grad_norm": 0.2752234935760498, "learning_rate": 3.758255574655883e-06, "loss": 0.2985, "step": 30585 }, { "epoch": 3.1095973973159823, "grad_norm": 0.277699738740921, "learning_rate": 3.757911810150022e-06, "loss": 0.3084, "step": 30586 }, { "epoch": 3.1096990646604312, "grad_norm": 0.2887864410877228, "learning_rate": 3.7575680519015057e-06, "loss": 0.3036, "step": 30587 }, { "epoch": 3.10980073200488, "grad_norm": 0.27937406301498413, "learning_rate": 3.757224299912066e-06, "loss": 0.3255, "step": 30588 }, { "epoch": 3.109902399349329, "grad_norm": 0.26958391070365906, "learning_rate": 3.7568805541834364e-06, "loss": 0.3215, "step": 30589 }, { "epoch": 3.110004066693778, "grad_norm": 0.2754737138748169, "learning_rate": 3.7565368147173453e-06, "loss": 0.3012, "step": 30590 }, { "epoch": 3.110105734038227, "grad_norm": 0.2605099081993103, "learning_rate": 3.756193081515528e-06, "loss": 0.3183, "step": 30591 }, { "epoch": 3.110207401382676, "grad_norm": 0.25758087635040283, "learning_rate": 3.755849354579713e-06, "loss": 0.3058, "step": 30592 }, { "epoch": 3.110309068727125, "grad_norm": 0.27961039543151855, "learning_rate": 3.7555056339116335e-06, "loss": 0.3234, "step": 30593 }, { "epoch": 3.110410736071574, "grad_norm": 0.2780177593231201, "learning_rate": 3.7551619195130217e-06, "loss": 0.3098, "step": 30594 }, { "epoch": 3.1105124034160228, "grad_norm": 0.2746497392654419, "learning_rate": 3.7548182113856072e-06, "loss": 0.3025, "step": 30595 }, { "epoch": 3.1106140707604717, "grad_norm": 0.31193801760673523, "learning_rate": 3.754474509531124e-06, "loss": 0.3167, "step": 30596 }, { "epoch": 3.1107157381049206, "grad_norm": 0.27576422691345215, "learning_rate": 3.754130813951301e-06, "loss": 0.3211, "step": 30597 }, { "epoch": 3.1108174054493696, "grad_norm": 0.26125383377075195, "learning_rate": 3.7537871246478708e-06, "loss": 0.3211, "step": 30598 }, { "epoch": 3.1109190727938185, "grad_norm": 0.27771347761154175, "learning_rate": 3.753443441622566e-06, "loss": 0.3075, "step": 30599 }, { "epoch": 3.1110207401382675, "grad_norm": 0.30047208070755005, "learning_rate": 3.7530997648771163e-06, "loss": 0.3214, "step": 30600 }, { "epoch": 3.1111224074827164, "grad_norm": 0.27402743697166443, "learning_rate": 3.7527560944132547e-06, "loss": 0.3096, "step": 30601 }, { "epoch": 3.1112240748271653, "grad_norm": 0.294199138879776, "learning_rate": 3.7524124302327103e-06, "loss": 0.3012, "step": 30602 }, { "epoch": 3.1113257421716143, "grad_norm": 0.2801703214645386, "learning_rate": 3.752068772337215e-06, "loss": 0.3595, "step": 30603 }, { "epoch": 3.1114274095160637, "grad_norm": 0.3112107515335083, "learning_rate": 3.751725120728502e-06, "loss": 0.3149, "step": 30604 }, { "epoch": 3.1115290768605126, "grad_norm": 0.2610425353050232, "learning_rate": 3.7513814754083004e-06, "loss": 0.2934, "step": 30605 }, { "epoch": 3.1116307442049616, "grad_norm": 0.24915893375873566, "learning_rate": 3.7510378363783436e-06, "loss": 0.284, "step": 30606 }, { "epoch": 3.1117324115494105, "grad_norm": 0.26627886295318604, "learning_rate": 3.75069420364036e-06, "loss": 0.3091, "step": 30607 }, { "epoch": 3.1118340788938594, "grad_norm": 0.2691424787044525, "learning_rate": 3.7503505771960817e-06, "loss": 0.3323, "step": 30608 }, { "epoch": 3.1119357462383084, "grad_norm": 0.2732393741607666, "learning_rate": 3.750006957047242e-06, "loss": 0.355, "step": 30609 }, { "epoch": 3.1120374135827573, "grad_norm": 0.26391902565956116, "learning_rate": 3.749663343195569e-06, "loss": 0.3409, "step": 30610 }, { "epoch": 3.1121390809272063, "grad_norm": 0.27258697152137756, "learning_rate": 3.749319735642796e-06, "loss": 0.3064, "step": 30611 }, { "epoch": 3.112240748271655, "grad_norm": 0.2717162072658539, "learning_rate": 3.7489761343906512e-06, "loss": 0.3276, "step": 30612 }, { "epoch": 3.112342415616104, "grad_norm": 0.2758818566799164, "learning_rate": 3.748632539440868e-06, "loss": 0.3256, "step": 30613 }, { "epoch": 3.112444082960553, "grad_norm": 0.2560397982597351, "learning_rate": 3.7482889507951777e-06, "loss": 0.3208, "step": 30614 }, { "epoch": 3.112545750305002, "grad_norm": 0.2892637252807617, "learning_rate": 3.747945368455309e-06, "loss": 0.3065, "step": 30615 }, { "epoch": 3.112647417649451, "grad_norm": 0.2622396945953369, "learning_rate": 3.7476017924229948e-06, "loss": 0.2954, "step": 30616 }, { "epoch": 3.1127490849939, "grad_norm": 0.30130961537361145, "learning_rate": 3.7472582226999643e-06, "loss": 0.3199, "step": 30617 }, { "epoch": 3.112850752338349, "grad_norm": 0.2894379198551178, "learning_rate": 3.7469146592879497e-06, "loss": 0.3, "step": 30618 }, { "epoch": 3.112952419682798, "grad_norm": 0.2516297996044159, "learning_rate": 3.746571102188682e-06, "loss": 0.3234, "step": 30619 }, { "epoch": 3.1130540870272467, "grad_norm": 0.29349809885025024, "learning_rate": 3.7462275514038898e-06, "loss": 0.2915, "step": 30620 }, { "epoch": 3.1131557543716957, "grad_norm": 0.2884814143180847, "learning_rate": 3.7458840069353063e-06, "loss": 0.3021, "step": 30621 }, { "epoch": 3.1132574217161446, "grad_norm": 0.2608192265033722, "learning_rate": 3.7455404687846606e-06, "loss": 0.2781, "step": 30622 }, { "epoch": 3.1133590890605936, "grad_norm": 0.2641621232032776, "learning_rate": 3.7451969369536838e-06, "loss": 0.2944, "step": 30623 }, { "epoch": 3.1134607564050425, "grad_norm": 0.2826441824436188, "learning_rate": 3.7448534114441083e-06, "loss": 0.3079, "step": 30624 }, { "epoch": 3.1135624237494914, "grad_norm": 0.2991805970668793, "learning_rate": 3.7445098922576604e-06, "loss": 0.3215, "step": 30625 }, { "epoch": 3.113664091093941, "grad_norm": 0.3091314733028412, "learning_rate": 3.7441663793960758e-06, "loss": 0.3065, "step": 30626 }, { "epoch": 3.1137657584383898, "grad_norm": 0.25998055934906006, "learning_rate": 3.743822872861081e-06, "loss": 0.2958, "step": 30627 }, { "epoch": 3.1138674257828387, "grad_norm": 0.28153079748153687, "learning_rate": 3.743479372654408e-06, "loss": 0.3092, "step": 30628 }, { "epoch": 3.1139690931272876, "grad_norm": 0.28168296813964844, "learning_rate": 3.743135878777788e-06, "loss": 0.2674, "step": 30629 }, { "epoch": 3.1140707604717366, "grad_norm": 0.2800218462944031, "learning_rate": 3.74279239123295e-06, "loss": 0.316, "step": 30630 }, { "epoch": 3.1141724278161855, "grad_norm": 0.25882142782211304, "learning_rate": 3.7424489100216266e-06, "loss": 0.3186, "step": 30631 }, { "epoch": 3.1142740951606345, "grad_norm": 0.29156914353370667, "learning_rate": 3.7421054351455453e-06, "loss": 0.3203, "step": 30632 }, { "epoch": 3.1143757625050834, "grad_norm": 0.27542516589164734, "learning_rate": 3.7417619666064385e-06, "loss": 0.3251, "step": 30633 }, { "epoch": 3.1144774298495324, "grad_norm": 0.2777326703071594, "learning_rate": 3.7414185044060363e-06, "loss": 0.3009, "step": 30634 }, { "epoch": 3.1145790971939813, "grad_norm": 0.2559260129928589, "learning_rate": 3.741075048546068e-06, "loss": 0.3047, "step": 30635 }, { "epoch": 3.1146807645384302, "grad_norm": 0.2806863486766815, "learning_rate": 3.740731599028265e-06, "loss": 0.3007, "step": 30636 }, { "epoch": 3.114782431882879, "grad_norm": 0.2609711289405823, "learning_rate": 3.7403881558543557e-06, "loss": 0.3488, "step": 30637 }, { "epoch": 3.114884099227328, "grad_norm": 0.2697044312953949, "learning_rate": 3.7400447190260724e-06, "loss": 0.2962, "step": 30638 }, { "epoch": 3.114985766571777, "grad_norm": 0.2895154058933258, "learning_rate": 3.739701288545145e-06, "loss": 0.301, "step": 30639 }, { "epoch": 3.115087433916226, "grad_norm": 0.2784340977668762, "learning_rate": 3.7393578644133023e-06, "loss": 0.322, "step": 30640 }, { "epoch": 3.115189101260675, "grad_norm": 0.2903769612312317, "learning_rate": 3.739014446632275e-06, "loss": 0.3005, "step": 30641 }, { "epoch": 3.115290768605124, "grad_norm": 0.26388248801231384, "learning_rate": 3.7386710352037935e-06, "loss": 0.3088, "step": 30642 }, { "epoch": 3.115392435949573, "grad_norm": 0.2741745412349701, "learning_rate": 3.7383276301295866e-06, "loss": 0.317, "step": 30643 }, { "epoch": 3.1154941032940218, "grad_norm": 0.2667674720287323, "learning_rate": 3.7379842314113875e-06, "loss": 0.2884, "step": 30644 }, { "epoch": 3.115595770638471, "grad_norm": 0.2558456063270569, "learning_rate": 3.7376408390509224e-06, "loss": 0.3057, "step": 30645 }, { "epoch": 3.11569743798292, "grad_norm": 0.30488789081573486, "learning_rate": 3.7372974530499234e-06, "loss": 0.3276, "step": 30646 }, { "epoch": 3.115799105327369, "grad_norm": 0.26976168155670166, "learning_rate": 3.736954073410119e-06, "loss": 0.2916, "step": 30647 }, { "epoch": 3.115900772671818, "grad_norm": 0.2773008942604065, "learning_rate": 3.7366107001332398e-06, "loss": 0.3095, "step": 30648 }, { "epoch": 3.116002440016267, "grad_norm": 0.27618008852005005, "learning_rate": 3.7362673332210164e-06, "loss": 0.2765, "step": 30649 }, { "epoch": 3.116104107360716, "grad_norm": 0.2594289481639862, "learning_rate": 3.7359239726751774e-06, "loss": 0.3078, "step": 30650 }, { "epoch": 3.116205774705165, "grad_norm": 0.2721433639526367, "learning_rate": 3.735580618497454e-06, "loss": 0.2922, "step": 30651 }, { "epoch": 3.1163074420496137, "grad_norm": 0.3169783055782318, "learning_rate": 3.7352372706895735e-06, "loss": 0.3065, "step": 30652 }, { "epoch": 3.1164091093940627, "grad_norm": 0.3064645826816559, "learning_rate": 3.734893929253267e-06, "loss": 0.3156, "step": 30653 }, { "epoch": 3.1165107767385116, "grad_norm": 0.2572755217552185, "learning_rate": 3.734550594190266e-06, "loss": 0.3356, "step": 30654 }, { "epoch": 3.1166124440829606, "grad_norm": 0.27262967824935913, "learning_rate": 3.734207265502297e-06, "loss": 0.2861, "step": 30655 }, { "epoch": 3.1167141114274095, "grad_norm": 0.2819765508174896, "learning_rate": 3.733863943191093e-06, "loss": 0.3143, "step": 30656 }, { "epoch": 3.1168157787718584, "grad_norm": 0.26474735140800476, "learning_rate": 3.7335206272583792e-06, "loss": 0.3149, "step": 30657 }, { "epoch": 3.1169174461163074, "grad_norm": 0.2685052752494812, "learning_rate": 3.733177317705887e-06, "loss": 0.3297, "step": 30658 }, { "epoch": 3.1170191134607563, "grad_norm": 0.2712407410144806, "learning_rate": 3.7328340145353485e-06, "loss": 0.3175, "step": 30659 }, { "epoch": 3.1171207808052053, "grad_norm": 0.26917439699172974, "learning_rate": 3.7324907177484903e-06, "loss": 0.2794, "step": 30660 }, { "epoch": 3.117222448149654, "grad_norm": 0.26431187987327576, "learning_rate": 3.7321474273470442e-06, "loss": 0.3245, "step": 30661 }, { "epoch": 3.117324115494103, "grad_norm": 0.2785118520259857, "learning_rate": 3.731804143332735e-06, "loss": 0.2984, "step": 30662 }, { "epoch": 3.117425782838552, "grad_norm": 0.2670263946056366, "learning_rate": 3.731460865707296e-06, "loss": 0.3058, "step": 30663 }, { "epoch": 3.117527450183001, "grad_norm": 0.2681651711463928, "learning_rate": 3.7311175944724574e-06, "loss": 0.3047, "step": 30664 }, { "epoch": 3.11762911752745, "grad_norm": 0.2773779034614563, "learning_rate": 3.7307743296299455e-06, "loss": 0.3079, "step": 30665 }, { "epoch": 3.117730784871899, "grad_norm": 0.2671947181224823, "learning_rate": 3.730431071181493e-06, "loss": 0.3225, "step": 30666 }, { "epoch": 3.1178324522163483, "grad_norm": 0.27240926027297974, "learning_rate": 3.7300878191288236e-06, "loss": 0.3167, "step": 30667 }, { "epoch": 3.1179341195607972, "grad_norm": 0.26126807928085327, "learning_rate": 3.7297445734736713e-06, "loss": 0.3054, "step": 30668 }, { "epoch": 3.118035786905246, "grad_norm": 0.2759537696838379, "learning_rate": 3.7294013342177648e-06, "loss": 0.3033, "step": 30669 }, { "epoch": 3.118137454249695, "grad_norm": 0.2824123501777649, "learning_rate": 3.729058101362832e-06, "loss": 0.3007, "step": 30670 }, { "epoch": 3.118239121594144, "grad_norm": 0.2847413420677185, "learning_rate": 3.728714874910604e-06, "loss": 0.3155, "step": 30671 }, { "epoch": 3.118340788938593, "grad_norm": 0.24743075668811798, "learning_rate": 3.7283716548628056e-06, "loss": 0.3292, "step": 30672 }, { "epoch": 3.118442456283042, "grad_norm": 0.25589215755462646, "learning_rate": 3.728028441221169e-06, "loss": 0.2974, "step": 30673 }, { "epoch": 3.118544123627491, "grad_norm": 0.2951377332210541, "learning_rate": 3.727685233987425e-06, "loss": 0.3147, "step": 30674 }, { "epoch": 3.11864579097194, "grad_norm": 0.2935548722743988, "learning_rate": 3.727342033163298e-06, "loss": 0.2864, "step": 30675 }, { "epoch": 3.1187474583163888, "grad_norm": 0.29520800709724426, "learning_rate": 3.7269988387505227e-06, "loss": 0.3447, "step": 30676 }, { "epoch": 3.1188491256608377, "grad_norm": 0.2837851047515869, "learning_rate": 3.726655650750821e-06, "loss": 0.3269, "step": 30677 }, { "epoch": 3.1189507930052867, "grad_norm": 0.26095113158226013, "learning_rate": 3.726312469165927e-06, "loss": 0.31, "step": 30678 }, { "epoch": 3.1190524603497356, "grad_norm": 0.26758328080177307, "learning_rate": 3.72596929399757e-06, "loss": 0.3105, "step": 30679 }, { "epoch": 3.1191541276941845, "grad_norm": 0.2827758491039276, "learning_rate": 3.7256261252474736e-06, "loss": 0.2985, "step": 30680 }, { "epoch": 3.1192557950386335, "grad_norm": 0.273494154214859, "learning_rate": 3.725282962917373e-06, "loss": 0.3275, "step": 30681 }, { "epoch": 3.1193574623830824, "grad_norm": 0.2950139343738556, "learning_rate": 3.724939807008991e-06, "loss": 0.318, "step": 30682 }, { "epoch": 3.1194591297275314, "grad_norm": 0.2613021433353424, "learning_rate": 3.72459665752406e-06, "loss": 0.3099, "step": 30683 }, { "epoch": 3.1195607970719803, "grad_norm": 0.2682945728302002, "learning_rate": 3.724253514464311e-06, "loss": 0.2859, "step": 30684 }, { "epoch": 3.1196624644164292, "grad_norm": 0.2808602750301361, "learning_rate": 3.7239103778314656e-06, "loss": 0.3104, "step": 30685 }, { "epoch": 3.1197641317608786, "grad_norm": 0.2629505395889282, "learning_rate": 3.7235672476272598e-06, "loss": 0.3018, "step": 30686 }, { "epoch": 3.1198657991053276, "grad_norm": 0.2816856801509857, "learning_rate": 3.7232241238534155e-06, "loss": 0.345, "step": 30687 }, { "epoch": 3.1199674664497765, "grad_norm": 0.2925538122653961, "learning_rate": 3.7228810065116662e-06, "loss": 0.2844, "step": 30688 }, { "epoch": 3.1200691337942255, "grad_norm": 0.2929717004299164, "learning_rate": 3.7225378956037404e-06, "loss": 0.3306, "step": 30689 }, { "epoch": 3.1201708011386744, "grad_norm": 0.30100172758102417, "learning_rate": 3.722194791131363e-06, "loss": 0.3156, "step": 30690 }, { "epoch": 3.1202724684831233, "grad_norm": 0.30379247665405273, "learning_rate": 3.721851693096267e-06, "loss": 0.3346, "step": 30691 }, { "epoch": 3.1203741358275723, "grad_norm": 0.28207647800445557, "learning_rate": 3.721508601500175e-06, "loss": 0.327, "step": 30692 }, { "epoch": 3.1204758031720212, "grad_norm": 0.29110774397850037, "learning_rate": 3.7211655163448202e-06, "loss": 0.2743, "step": 30693 }, { "epoch": 3.12057747051647, "grad_norm": 0.2807839512825012, "learning_rate": 3.720822437631932e-06, "loss": 0.3198, "step": 30694 }, { "epoch": 3.120679137860919, "grad_norm": 0.28687191009521484, "learning_rate": 3.7204793653632334e-06, "loss": 0.2953, "step": 30695 }, { "epoch": 3.120780805205368, "grad_norm": 0.27529504895210266, "learning_rate": 3.7201362995404577e-06, "loss": 0.3122, "step": 30696 }, { "epoch": 3.120882472549817, "grad_norm": 0.27014923095703125, "learning_rate": 3.7197932401653302e-06, "loss": 0.3119, "step": 30697 }, { "epoch": 3.120984139894266, "grad_norm": 0.26923084259033203, "learning_rate": 3.719450187239578e-06, "loss": 0.3351, "step": 30698 }, { "epoch": 3.121085807238715, "grad_norm": 0.29096677899360657, "learning_rate": 3.719107140764936e-06, "loss": 0.3546, "step": 30699 }, { "epoch": 3.121187474583164, "grad_norm": 0.30278563499450684, "learning_rate": 3.718764100743124e-06, "loss": 0.3137, "step": 30700 }, { "epoch": 3.1212891419276128, "grad_norm": 0.28056415915489197, "learning_rate": 3.7184210671758768e-06, "loss": 0.2839, "step": 30701 }, { "epoch": 3.1213908092720617, "grad_norm": 0.26669758558273315, "learning_rate": 3.7180780400649187e-06, "loss": 0.2841, "step": 30702 }, { "epoch": 3.1214924766165106, "grad_norm": 0.2747942805290222, "learning_rate": 3.7177350194119766e-06, "loss": 0.3089, "step": 30703 }, { "epoch": 3.1215941439609596, "grad_norm": 0.29507601261138916, "learning_rate": 3.7173920052187847e-06, "loss": 0.2644, "step": 30704 }, { "epoch": 3.1216958113054085, "grad_norm": 0.26573336124420166, "learning_rate": 3.7170489974870633e-06, "loss": 0.3112, "step": 30705 }, { "epoch": 3.1217974786498575, "grad_norm": 0.29890644550323486, "learning_rate": 3.7167059962185477e-06, "loss": 0.2946, "step": 30706 }, { "epoch": 3.1218991459943064, "grad_norm": 0.258900910615921, "learning_rate": 3.71636300141496e-06, "loss": 0.2878, "step": 30707 }, { "epoch": 3.122000813338756, "grad_norm": 0.289000928401947, "learning_rate": 3.716020013078032e-06, "loss": 0.3207, "step": 30708 }, { "epoch": 3.1221024806832047, "grad_norm": 0.2803412079811096, "learning_rate": 3.7156770312094883e-06, "loss": 0.3077, "step": 30709 }, { "epoch": 3.1222041480276537, "grad_norm": 0.2756330668926239, "learning_rate": 3.715334055811057e-06, "loss": 0.2848, "step": 30710 }, { "epoch": 3.1223058153721026, "grad_norm": 0.24870559573173523, "learning_rate": 3.714991086884472e-06, "loss": 0.2865, "step": 30711 }, { "epoch": 3.1224074827165516, "grad_norm": 0.2943713366985321, "learning_rate": 3.7146481244314537e-06, "loss": 0.2972, "step": 30712 }, { "epoch": 3.1225091500610005, "grad_norm": 0.27365565299987793, "learning_rate": 3.714305168453734e-06, "loss": 0.3122, "step": 30713 }, { "epoch": 3.1226108174054494, "grad_norm": 0.2588374614715576, "learning_rate": 3.713962218953038e-06, "loss": 0.3492, "step": 30714 }, { "epoch": 3.1227124847498984, "grad_norm": 0.27522382140159607, "learning_rate": 3.713619275931095e-06, "loss": 0.3056, "step": 30715 }, { "epoch": 3.1228141520943473, "grad_norm": 0.29339903593063354, "learning_rate": 3.7132763393896334e-06, "loss": 0.3271, "step": 30716 }, { "epoch": 3.1229158194387963, "grad_norm": 0.2636387348175049, "learning_rate": 3.7129334093303793e-06, "loss": 0.3221, "step": 30717 }, { "epoch": 3.123017486783245, "grad_norm": 0.26344093680381775, "learning_rate": 3.7125904857550608e-06, "loss": 0.313, "step": 30718 }, { "epoch": 3.123119154127694, "grad_norm": 0.2967146933078766, "learning_rate": 3.7122475686654045e-06, "loss": 0.3143, "step": 30719 }, { "epoch": 3.123220821472143, "grad_norm": 0.26668787002563477, "learning_rate": 3.71190465806314e-06, "loss": 0.2911, "step": 30720 }, { "epoch": 3.123322488816592, "grad_norm": 0.24849535524845123, "learning_rate": 3.711561753949994e-06, "loss": 0.3138, "step": 30721 }, { "epoch": 3.123424156161041, "grad_norm": 0.2850155532360077, "learning_rate": 3.711218856327693e-06, "loss": 0.2975, "step": 30722 }, { "epoch": 3.12352582350549, "grad_norm": 0.28881123661994934, "learning_rate": 3.710875965197966e-06, "loss": 0.3139, "step": 30723 }, { "epoch": 3.123627490849939, "grad_norm": 0.2730242609977722, "learning_rate": 3.7105330805625383e-06, "loss": 0.2818, "step": 30724 }, { "epoch": 3.123729158194388, "grad_norm": 0.2807281017303467, "learning_rate": 3.710190202423139e-06, "loss": 0.3369, "step": 30725 }, { "epoch": 3.1238308255388367, "grad_norm": 0.28774794936180115, "learning_rate": 3.7098473307814955e-06, "loss": 0.34, "step": 30726 }, { "epoch": 3.123932492883286, "grad_norm": 0.31878721714019775, "learning_rate": 3.7095044656393343e-06, "loss": 0.2802, "step": 30727 }, { "epoch": 3.124034160227735, "grad_norm": 0.2810475528240204, "learning_rate": 3.7091616069983828e-06, "loss": 0.2819, "step": 30728 }, { "epoch": 3.124135827572184, "grad_norm": 0.2604791224002838, "learning_rate": 3.7088187548603682e-06, "loss": 0.3056, "step": 30729 }, { "epoch": 3.124237494916633, "grad_norm": 0.2823379635810852, "learning_rate": 3.7084759092270174e-06, "loss": 0.3337, "step": 30730 }, { "epoch": 3.124339162261082, "grad_norm": 0.28305914998054504, "learning_rate": 3.7081330701000596e-06, "loss": 0.291, "step": 30731 }, { "epoch": 3.124440829605531, "grad_norm": 0.26167988777160645, "learning_rate": 3.7077902374812193e-06, "loss": 0.3242, "step": 30732 }, { "epoch": 3.1245424969499798, "grad_norm": 0.2660330832004547, "learning_rate": 3.7074474113722252e-06, "loss": 0.3096, "step": 30733 }, { "epoch": 3.1246441642944287, "grad_norm": 0.28824669122695923, "learning_rate": 3.707104591774803e-06, "loss": 0.3029, "step": 30734 }, { "epoch": 3.1247458316388776, "grad_norm": 0.28553083539009094, "learning_rate": 3.7067617786906807e-06, "loss": 0.3318, "step": 30735 }, { "epoch": 3.1248474989833266, "grad_norm": 0.2608819007873535, "learning_rate": 3.7064189721215865e-06, "loss": 0.299, "step": 30736 }, { "epoch": 3.1249491663277755, "grad_norm": 0.2883498966693878, "learning_rate": 3.706076172069245e-06, "loss": 0.3217, "step": 30737 }, { "epoch": 3.1250508336722245, "grad_norm": 0.2693191170692444, "learning_rate": 3.7057333785353855e-06, "loss": 0.296, "step": 30738 }, { "epoch": 3.1251525010166734, "grad_norm": 0.2817703187465668, "learning_rate": 3.7053905915217325e-06, "loss": 0.3121, "step": 30739 }, { "epoch": 3.1252541683611224, "grad_norm": 0.27988991141319275, "learning_rate": 3.7050478110300138e-06, "loss": 0.3188, "step": 30740 }, { "epoch": 3.1253558357055713, "grad_norm": 0.2746793329715729, "learning_rate": 3.7047050370619573e-06, "loss": 0.2968, "step": 30741 }, { "epoch": 3.1254575030500202, "grad_norm": 0.28952181339263916, "learning_rate": 3.704362269619288e-06, "loss": 0.3235, "step": 30742 }, { "epoch": 3.125559170394469, "grad_norm": 0.28280892968177795, "learning_rate": 3.704019508703736e-06, "loss": 0.3139, "step": 30743 }, { "epoch": 3.125660837738918, "grad_norm": 0.2759879529476166, "learning_rate": 3.7036767543170236e-06, "loss": 0.3172, "step": 30744 }, { "epoch": 3.125762505083367, "grad_norm": 0.2729407250881195, "learning_rate": 3.7033340064608796e-06, "loss": 0.3116, "step": 30745 }, { "epoch": 3.125864172427816, "grad_norm": 0.279153048992157, "learning_rate": 3.7029912651370325e-06, "loss": 0.2905, "step": 30746 }, { "epoch": 3.125965839772265, "grad_norm": 0.2800799608230591, "learning_rate": 3.7026485303472056e-06, "loss": 0.2782, "step": 30747 }, { "epoch": 3.126067507116714, "grad_norm": 0.2800557613372803, "learning_rate": 3.702305802093128e-06, "loss": 0.3012, "step": 30748 }, { "epoch": 3.1261691744611633, "grad_norm": 0.3054140508174896, "learning_rate": 3.701963080376525e-06, "loss": 0.3112, "step": 30749 }, { "epoch": 3.126270841805612, "grad_norm": 0.2606779932975769, "learning_rate": 3.701620365199123e-06, "loss": 0.3117, "step": 30750 }, { "epoch": 3.126372509150061, "grad_norm": 0.2736991047859192, "learning_rate": 3.7012776565626496e-06, "loss": 0.3049, "step": 30751 }, { "epoch": 3.12647417649451, "grad_norm": 0.2772558629512787, "learning_rate": 3.7009349544688306e-06, "loss": 0.2926, "step": 30752 }, { "epoch": 3.126575843838959, "grad_norm": 0.2680326998233795, "learning_rate": 3.700592258919393e-06, "loss": 0.2954, "step": 30753 }, { "epoch": 3.126677511183408, "grad_norm": 0.2806369662284851, "learning_rate": 3.700249569916062e-06, "loss": 0.3247, "step": 30754 }, { "epoch": 3.126779178527857, "grad_norm": 0.274480938911438, "learning_rate": 3.6999068874605653e-06, "loss": 0.3078, "step": 30755 }, { "epoch": 3.126880845872306, "grad_norm": 0.2657604217529297, "learning_rate": 3.699564211554629e-06, "loss": 0.3116, "step": 30756 }, { "epoch": 3.126982513216755, "grad_norm": 0.27030640840530396, "learning_rate": 3.699221542199979e-06, "loss": 0.2839, "step": 30757 }, { "epoch": 3.1270841805612037, "grad_norm": 0.260066419839859, "learning_rate": 3.6988788793983427e-06, "loss": 0.3141, "step": 30758 }, { "epoch": 3.1271858479056527, "grad_norm": 0.23274046182632446, "learning_rate": 3.6985362231514436e-06, "loss": 0.3069, "step": 30759 }, { "epoch": 3.1272875152501016, "grad_norm": 0.27529463171958923, "learning_rate": 3.6981935734610103e-06, "loss": 0.3069, "step": 30760 }, { "epoch": 3.1273891825945506, "grad_norm": 0.26327064633369446, "learning_rate": 3.697850930328769e-06, "loss": 0.3006, "step": 30761 }, { "epoch": 3.1274908499389995, "grad_norm": 0.2755243480205536, "learning_rate": 3.697508293756445e-06, "loss": 0.3216, "step": 30762 }, { "epoch": 3.1275925172834484, "grad_norm": 0.25524502992630005, "learning_rate": 3.6971656637457655e-06, "loss": 0.3601, "step": 30763 }, { "epoch": 3.1276941846278974, "grad_norm": 0.2647041380405426, "learning_rate": 3.696823040298454e-06, "loss": 0.329, "step": 30764 }, { "epoch": 3.1277958519723463, "grad_norm": 0.2901996076107025, "learning_rate": 3.6964804234162387e-06, "loss": 0.3107, "step": 30765 }, { "epoch": 3.1278975193167953, "grad_norm": 0.27672919631004333, "learning_rate": 3.696137813100847e-06, "loss": 0.3352, "step": 30766 }, { "epoch": 3.127999186661244, "grad_norm": 0.29594025015830994, "learning_rate": 3.695795209354002e-06, "loss": 0.3131, "step": 30767 }, { "epoch": 3.1281008540056936, "grad_norm": 0.2766191065311432, "learning_rate": 3.695452612177431e-06, "loss": 0.2855, "step": 30768 }, { "epoch": 3.1282025213501425, "grad_norm": 0.2540285885334015, "learning_rate": 3.695110021572859e-06, "loss": 0.2918, "step": 30769 }, { "epoch": 3.1283041886945915, "grad_norm": 0.24289122223854065, "learning_rate": 3.6947674375420127e-06, "loss": 0.2904, "step": 30770 }, { "epoch": 3.1284058560390404, "grad_norm": 0.26009753346443176, "learning_rate": 3.6944248600866194e-06, "loss": 0.3048, "step": 30771 }, { "epoch": 3.1285075233834894, "grad_norm": 0.26827722787857056, "learning_rate": 3.694082289208402e-06, "loss": 0.3173, "step": 30772 }, { "epoch": 3.1286091907279383, "grad_norm": 0.2787037789821625, "learning_rate": 3.6937397249090883e-06, "loss": 0.3015, "step": 30773 }, { "epoch": 3.1287108580723872, "grad_norm": 0.2888391613960266, "learning_rate": 3.693397167190403e-06, "loss": 0.3351, "step": 30774 }, { "epoch": 3.128812525416836, "grad_norm": 0.2776581943035126, "learning_rate": 3.6930546160540716e-06, "loss": 0.3226, "step": 30775 }, { "epoch": 3.128914192761285, "grad_norm": 0.28929823637008667, "learning_rate": 3.6927120715018222e-06, "loss": 0.3289, "step": 30776 }, { "epoch": 3.129015860105734, "grad_norm": 0.28011569380760193, "learning_rate": 3.6923695335353773e-06, "loss": 0.3006, "step": 30777 }, { "epoch": 3.129117527450183, "grad_norm": 0.29096609354019165, "learning_rate": 3.692027002156465e-06, "loss": 0.303, "step": 30778 }, { "epoch": 3.129219194794632, "grad_norm": 0.25689154863357544, "learning_rate": 3.6916844773668094e-06, "loss": 0.3023, "step": 30779 }, { "epoch": 3.129320862139081, "grad_norm": 0.28969043493270874, "learning_rate": 3.6913419591681356e-06, "loss": 0.3042, "step": 30780 }, { "epoch": 3.12942252948353, "grad_norm": 0.29220935702323914, "learning_rate": 3.6909994475621712e-06, "loss": 0.3403, "step": 30781 }, { "epoch": 3.1295241968279788, "grad_norm": 0.273337721824646, "learning_rate": 3.69065694255064e-06, "loss": 0.3297, "step": 30782 }, { "epoch": 3.1296258641724277, "grad_norm": 0.29098230600357056, "learning_rate": 3.690314444135269e-06, "loss": 0.3138, "step": 30783 }, { "epoch": 3.1297275315168767, "grad_norm": 0.2680194675922394, "learning_rate": 3.689971952317781e-06, "loss": 0.2706, "step": 30784 }, { "epoch": 3.1298291988613256, "grad_norm": 0.27981793880462646, "learning_rate": 3.6896294670999034e-06, "loss": 0.3018, "step": 30785 }, { "epoch": 3.1299308662057745, "grad_norm": 0.26876482367515564, "learning_rate": 3.6892869884833625e-06, "loss": 0.2895, "step": 30786 }, { "epoch": 3.1300325335502235, "grad_norm": 0.25761252641677856, "learning_rate": 3.688944516469881e-06, "loss": 0.3382, "step": 30787 }, { "epoch": 3.1301342008946724, "grad_norm": 0.28181391954421997, "learning_rate": 3.6886020510611863e-06, "loss": 0.311, "step": 30788 }, { "epoch": 3.1302358682391214, "grad_norm": 0.2753511965274811, "learning_rate": 3.6882595922590014e-06, "loss": 0.3511, "step": 30789 }, { "epoch": 3.1303375355835708, "grad_norm": 0.28192147612571716, "learning_rate": 3.6879171400650536e-06, "loss": 0.2849, "step": 30790 }, { "epoch": 3.1304392029280197, "grad_norm": 0.2870384156703949, "learning_rate": 3.6875746944810685e-06, "loss": 0.2985, "step": 30791 }, { "epoch": 3.1305408702724686, "grad_norm": 0.29165709018707275, "learning_rate": 3.6872322555087693e-06, "loss": 0.2907, "step": 30792 }, { "epoch": 3.1306425376169176, "grad_norm": 0.2761632800102234, "learning_rate": 3.6868898231498827e-06, "loss": 0.2906, "step": 30793 }, { "epoch": 3.1307442049613665, "grad_norm": 0.2899179458618164, "learning_rate": 3.6865473974061316e-06, "loss": 0.2792, "step": 30794 }, { "epoch": 3.1308458723058155, "grad_norm": 0.2621000111103058, "learning_rate": 3.686204978279243e-06, "loss": 0.3121, "step": 30795 }, { "epoch": 3.1309475396502644, "grad_norm": 0.29047831892967224, "learning_rate": 3.6858625657709433e-06, "loss": 0.3005, "step": 30796 }, { "epoch": 3.1310492069947133, "grad_norm": 0.2736803889274597, "learning_rate": 3.6855201598829536e-06, "loss": 0.2988, "step": 30797 }, { "epoch": 3.1311508743391623, "grad_norm": 0.27142566442489624, "learning_rate": 3.6851777606170026e-06, "loss": 0.2946, "step": 30798 }, { "epoch": 3.1312525416836112, "grad_norm": 0.28203678131103516, "learning_rate": 3.684835367974813e-06, "loss": 0.316, "step": 30799 }, { "epoch": 3.13135420902806, "grad_norm": 0.26786819100379944, "learning_rate": 3.6844929819581093e-06, "loss": 0.3149, "step": 30800 }, { "epoch": 3.131455876372509, "grad_norm": 0.27898383140563965, "learning_rate": 3.684150602568619e-06, "loss": 0.332, "step": 30801 }, { "epoch": 3.131557543716958, "grad_norm": 0.2804746627807617, "learning_rate": 3.6838082298080634e-06, "loss": 0.309, "step": 30802 }, { "epoch": 3.131659211061407, "grad_norm": 0.2764888405799866, "learning_rate": 3.6834658636781713e-06, "loss": 0.3142, "step": 30803 }, { "epoch": 3.131760878405856, "grad_norm": 0.26672399044036865, "learning_rate": 3.6831235041806633e-06, "loss": 0.2897, "step": 30804 }, { "epoch": 3.131862545750305, "grad_norm": 0.27857744693756104, "learning_rate": 3.6827811513172672e-06, "loss": 0.3158, "step": 30805 }, { "epoch": 3.131964213094754, "grad_norm": 0.29475343227386475, "learning_rate": 3.682438805089707e-06, "loss": 0.3033, "step": 30806 }, { "epoch": 3.1320658804392028, "grad_norm": 0.26889559626579285, "learning_rate": 3.682096465499706e-06, "loss": 0.3234, "step": 30807 }, { "epoch": 3.1321675477836517, "grad_norm": 0.2761280834674835, "learning_rate": 3.6817541325489927e-06, "loss": 0.3154, "step": 30808 }, { "epoch": 3.132269215128101, "grad_norm": 0.27172955870628357, "learning_rate": 3.6814118062392846e-06, "loss": 0.3331, "step": 30809 }, { "epoch": 3.13237088247255, "grad_norm": 0.28787919878959656, "learning_rate": 3.6810694865723116e-06, "loss": 0.2807, "step": 30810 }, { "epoch": 3.132472549816999, "grad_norm": 0.27791646122932434, "learning_rate": 3.6807271735497985e-06, "loss": 0.3106, "step": 30811 }, { "epoch": 3.132574217161448, "grad_norm": 0.271689236164093, "learning_rate": 3.6803848671734675e-06, "loss": 0.2848, "step": 30812 }, { "epoch": 3.132675884505897, "grad_norm": 0.27732253074645996, "learning_rate": 3.6800425674450458e-06, "loss": 0.304, "step": 30813 }, { "epoch": 3.132777551850346, "grad_norm": 0.2826046645641327, "learning_rate": 3.6797002743662524e-06, "loss": 0.2864, "step": 30814 }, { "epoch": 3.1328792191947947, "grad_norm": 0.28910672664642334, "learning_rate": 3.6793579879388175e-06, "loss": 0.2963, "step": 30815 }, { "epoch": 3.1329808865392437, "grad_norm": 0.27830830216407776, "learning_rate": 3.6790157081644634e-06, "loss": 0.3569, "step": 30816 }, { "epoch": 3.1330825538836926, "grad_norm": 0.2624131739139557, "learning_rate": 3.6786734350449134e-06, "loss": 0.2977, "step": 30817 }, { "epoch": 3.1331842212281416, "grad_norm": 0.25770941376686096, "learning_rate": 3.6783311685818947e-06, "loss": 0.3204, "step": 30818 }, { "epoch": 3.1332858885725905, "grad_norm": 0.2694200277328491, "learning_rate": 3.677988908777126e-06, "loss": 0.3272, "step": 30819 }, { "epoch": 3.1333875559170394, "grad_norm": 0.28793802857398987, "learning_rate": 3.6776466556323365e-06, "loss": 0.3105, "step": 30820 }, { "epoch": 3.1334892232614884, "grad_norm": 0.28778624534606934, "learning_rate": 3.67730440914925e-06, "loss": 0.3107, "step": 30821 }, { "epoch": 3.1335908906059373, "grad_norm": 0.2662918269634247, "learning_rate": 3.6769621693295886e-06, "loss": 0.2973, "step": 30822 }, { "epoch": 3.1336925579503863, "grad_norm": 0.29613932967185974, "learning_rate": 3.6766199361750787e-06, "loss": 0.3087, "step": 30823 }, { "epoch": 3.133794225294835, "grad_norm": 0.26912757754325867, "learning_rate": 3.6762777096874407e-06, "loss": 0.3111, "step": 30824 }, { "epoch": 3.133895892639284, "grad_norm": 0.30925992131233215, "learning_rate": 3.675935489868402e-06, "loss": 0.3377, "step": 30825 }, { "epoch": 3.133997559983733, "grad_norm": 0.28497329354286194, "learning_rate": 3.6755932767196877e-06, "loss": 0.3097, "step": 30826 }, { "epoch": 3.134099227328182, "grad_norm": 0.26877832412719727, "learning_rate": 3.6752510702430167e-06, "loss": 0.2958, "step": 30827 }, { "epoch": 3.134200894672631, "grad_norm": 0.2692039906978607, "learning_rate": 3.674908870440119e-06, "loss": 0.2973, "step": 30828 }, { "epoch": 3.13430256201708, "grad_norm": 0.2720717191696167, "learning_rate": 3.674566677312712e-06, "loss": 0.315, "step": 30829 }, { "epoch": 3.134404229361529, "grad_norm": 0.2942029535770416, "learning_rate": 3.6742244908625248e-06, "loss": 0.303, "step": 30830 }, { "epoch": 3.1345058967059782, "grad_norm": 0.27082476019859314, "learning_rate": 3.6738823110912813e-06, "loss": 0.3293, "step": 30831 }, { "epoch": 3.134607564050427, "grad_norm": 0.29004448652267456, "learning_rate": 3.6735401380007007e-06, "loss": 0.3464, "step": 30832 }, { "epoch": 3.134709231394876, "grad_norm": 0.2682258188724518, "learning_rate": 3.6731979715925125e-06, "loss": 0.2986, "step": 30833 }, { "epoch": 3.134810898739325, "grad_norm": 0.2870232164859772, "learning_rate": 3.6728558118684344e-06, "loss": 0.2881, "step": 30834 }, { "epoch": 3.134912566083774, "grad_norm": 0.27551448345184326, "learning_rate": 3.6725136588301946e-06, "loss": 0.3016, "step": 30835 }, { "epoch": 3.135014233428223, "grad_norm": 0.2610979676246643, "learning_rate": 3.672171512479518e-06, "loss": 0.2991, "step": 30836 }, { "epoch": 3.135115900772672, "grad_norm": 0.26265382766723633, "learning_rate": 3.671829372818122e-06, "loss": 0.3462, "step": 30837 }, { "epoch": 3.135217568117121, "grad_norm": 0.2818070650100708, "learning_rate": 3.6714872398477376e-06, "loss": 0.3174, "step": 30838 }, { "epoch": 3.1353192354615698, "grad_norm": 0.2861645519733429, "learning_rate": 3.6711451135700815e-06, "loss": 0.2989, "step": 30839 }, { "epoch": 3.1354209028060187, "grad_norm": 0.2933026850223541, "learning_rate": 3.6708029939868817e-06, "loss": 0.311, "step": 30840 }, { "epoch": 3.1355225701504676, "grad_norm": 0.2650516629219055, "learning_rate": 3.670460881099862e-06, "loss": 0.2968, "step": 30841 }, { "epoch": 3.1356242374949166, "grad_norm": 0.28337207436561584, "learning_rate": 3.670118774910742e-06, "loss": 0.3125, "step": 30842 }, { "epoch": 3.1357259048393655, "grad_norm": 0.28828051686286926, "learning_rate": 3.6697766754212504e-06, "loss": 0.3002, "step": 30843 }, { "epoch": 3.1358275721838145, "grad_norm": 0.2962283194065094, "learning_rate": 3.6694345826331045e-06, "loss": 0.3027, "step": 30844 }, { "epoch": 3.1359292395282634, "grad_norm": 0.26820117235183716, "learning_rate": 3.6690924965480325e-06, "loss": 0.3135, "step": 30845 }, { "epoch": 3.1360309068727124, "grad_norm": 0.26311421394348145, "learning_rate": 3.668750417167758e-06, "loss": 0.3247, "step": 30846 }, { "epoch": 3.1361325742171613, "grad_norm": 0.28740379214286804, "learning_rate": 3.668408344494e-06, "loss": 0.2659, "step": 30847 }, { "epoch": 3.1362342415616102, "grad_norm": 0.26934167742729187, "learning_rate": 3.6680662785284867e-06, "loss": 0.2994, "step": 30848 }, { "epoch": 3.136335908906059, "grad_norm": 0.2708100974559784, "learning_rate": 3.667724219272937e-06, "loss": 0.3067, "step": 30849 }, { "epoch": 3.1364375762505086, "grad_norm": 0.25426027178764343, "learning_rate": 3.6673821667290753e-06, "loss": 0.3201, "step": 30850 }, { "epoch": 3.1365392435949575, "grad_norm": 0.2712002992630005, "learning_rate": 3.6670401208986283e-06, "loss": 0.3023, "step": 30851 }, { "epoch": 3.1366409109394064, "grad_norm": 0.27647316455841064, "learning_rate": 3.6666980817833137e-06, "loss": 0.3126, "step": 30852 }, { "epoch": 3.1367425782838554, "grad_norm": 0.2879053056240082, "learning_rate": 3.6663560493848603e-06, "loss": 0.3504, "step": 30853 }, { "epoch": 3.1368442456283043, "grad_norm": 0.2856592535972595, "learning_rate": 3.6660140237049862e-06, "loss": 0.3188, "step": 30854 }, { "epoch": 3.1369459129727533, "grad_norm": 0.27607566118240356, "learning_rate": 3.6656720047454152e-06, "loss": 0.3144, "step": 30855 }, { "epoch": 3.137047580317202, "grad_norm": 0.294933557510376, "learning_rate": 3.665329992507875e-06, "loss": 0.3236, "step": 30856 }, { "epoch": 3.137149247661651, "grad_norm": 0.2561124563217163, "learning_rate": 3.664987986994082e-06, "loss": 0.3011, "step": 30857 }, { "epoch": 3.1372509150061, "grad_norm": 0.2739470303058624, "learning_rate": 3.664645988205765e-06, "loss": 0.2767, "step": 30858 }, { "epoch": 3.137352582350549, "grad_norm": 0.30681756138801575, "learning_rate": 3.6643039961446426e-06, "loss": 0.3175, "step": 30859 }, { "epoch": 3.137454249694998, "grad_norm": 0.27234897017478943, "learning_rate": 3.6639620108124376e-06, "loss": 0.2873, "step": 30860 }, { "epoch": 3.137555917039447, "grad_norm": 0.29062479734420776, "learning_rate": 3.6636200322108784e-06, "loss": 0.3283, "step": 30861 }, { "epoch": 3.137657584383896, "grad_norm": 0.2711816430091858, "learning_rate": 3.66327806034168e-06, "loss": 0.3193, "step": 30862 }, { "epoch": 3.137759251728345, "grad_norm": 0.25963377952575684, "learning_rate": 3.662936095206573e-06, "loss": 0.3173, "step": 30863 }, { "epoch": 3.1378609190727937, "grad_norm": 0.2646062672138214, "learning_rate": 3.662594136807273e-06, "loss": 0.283, "step": 30864 }, { "epoch": 3.1379625864172427, "grad_norm": 0.2860093414783478, "learning_rate": 3.6622521851455056e-06, "loss": 0.2887, "step": 30865 }, { "epoch": 3.1380642537616916, "grad_norm": 0.30133751034736633, "learning_rate": 3.6619102402229966e-06, "loss": 0.3123, "step": 30866 }, { "epoch": 3.1381659211061406, "grad_norm": 0.30023908615112305, "learning_rate": 3.6615683020414646e-06, "loss": 0.315, "step": 30867 }, { "epoch": 3.1382675884505895, "grad_norm": 0.26350685954093933, "learning_rate": 3.6612263706026342e-06, "loss": 0.3519, "step": 30868 }, { "epoch": 3.1383692557950384, "grad_norm": 0.2821286618709564, "learning_rate": 3.6608844459082253e-06, "loss": 0.2769, "step": 30869 }, { "epoch": 3.1384709231394874, "grad_norm": 0.26448482275009155, "learning_rate": 3.660542527959962e-06, "loss": 0.3257, "step": 30870 }, { "epoch": 3.1385725904839363, "grad_norm": 0.27166229486465454, "learning_rate": 3.6602006167595707e-06, "loss": 0.3124, "step": 30871 }, { "epoch": 3.1386742578283857, "grad_norm": 0.285971462726593, "learning_rate": 3.659858712308767e-06, "loss": 0.2961, "step": 30872 }, { "epoch": 3.1387759251728347, "grad_norm": 0.27852460741996765, "learning_rate": 3.6595168146092785e-06, "loss": 0.3475, "step": 30873 }, { "epoch": 3.1388775925172836, "grad_norm": 0.2798595130443573, "learning_rate": 3.6591749236628243e-06, "loss": 0.31, "step": 30874 }, { "epoch": 3.1389792598617325, "grad_norm": 0.2712090015411377, "learning_rate": 3.6588330394711275e-06, "loss": 0.3004, "step": 30875 }, { "epoch": 3.1390809272061815, "grad_norm": 0.28747430443763733, "learning_rate": 3.6584911620359143e-06, "loss": 0.3042, "step": 30876 }, { "epoch": 3.1391825945506304, "grad_norm": 0.2879799008369446, "learning_rate": 3.6581492913589014e-06, "loss": 0.3438, "step": 30877 }, { "epoch": 3.1392842618950794, "grad_norm": 0.28635039925575256, "learning_rate": 3.6578074274418152e-06, "loss": 0.3219, "step": 30878 }, { "epoch": 3.1393859292395283, "grad_norm": 0.2940029203891754, "learning_rate": 3.657465570286374e-06, "loss": 0.3209, "step": 30879 }, { "epoch": 3.1394875965839772, "grad_norm": 0.27022436261177063, "learning_rate": 3.6571237198943023e-06, "loss": 0.3276, "step": 30880 }, { "epoch": 3.139589263928426, "grad_norm": 0.27089083194732666, "learning_rate": 3.6567818762673246e-06, "loss": 0.3169, "step": 30881 }, { "epoch": 3.139690931272875, "grad_norm": 0.2725420296192169, "learning_rate": 3.6564400394071593e-06, "loss": 0.2932, "step": 30882 }, { "epoch": 3.139792598617324, "grad_norm": 0.28457799553871155, "learning_rate": 3.6560982093155306e-06, "loss": 0.3269, "step": 30883 }, { "epoch": 3.139894265961773, "grad_norm": 0.2790239155292511, "learning_rate": 3.655756385994158e-06, "loss": 0.3489, "step": 30884 }, { "epoch": 3.139995933306222, "grad_norm": 0.28546300530433655, "learning_rate": 3.6554145694447664e-06, "loss": 0.2925, "step": 30885 }, { "epoch": 3.140097600650671, "grad_norm": 0.28154778480529785, "learning_rate": 3.6550727596690768e-06, "loss": 0.3167, "step": 30886 }, { "epoch": 3.14019926799512, "grad_norm": 0.2600049078464508, "learning_rate": 3.6547309566688104e-06, "loss": 0.3365, "step": 30887 }, { "epoch": 3.1403009353395688, "grad_norm": 0.2651512622833252, "learning_rate": 3.6543891604456903e-06, "loss": 0.3163, "step": 30888 }, { "epoch": 3.1404026026840177, "grad_norm": 0.2703401744365692, "learning_rate": 3.6540473710014375e-06, "loss": 0.3195, "step": 30889 }, { "epoch": 3.1405042700284667, "grad_norm": 0.2421967089176178, "learning_rate": 3.6537055883377737e-06, "loss": 0.3035, "step": 30890 }, { "epoch": 3.140605937372916, "grad_norm": 0.29358944296836853, "learning_rate": 3.6533638124564225e-06, "loss": 0.2916, "step": 30891 }, { "epoch": 3.140707604717365, "grad_norm": 0.2594262361526489, "learning_rate": 3.6530220433591035e-06, "loss": 0.3365, "step": 30892 }, { "epoch": 3.140809272061814, "grad_norm": 0.2545444965362549, "learning_rate": 3.6526802810475403e-06, "loss": 0.3008, "step": 30893 }, { "epoch": 3.140910939406263, "grad_norm": 0.2685134708881378, "learning_rate": 3.6523385255234524e-06, "loss": 0.3177, "step": 30894 }, { "epoch": 3.141012606750712, "grad_norm": 0.2976888418197632, "learning_rate": 3.651996776788564e-06, "loss": 0.2981, "step": 30895 }, { "epoch": 3.1411142740951608, "grad_norm": 0.2641504108905792, "learning_rate": 3.6516550348445946e-06, "loss": 0.3182, "step": 30896 }, { "epoch": 3.1412159414396097, "grad_norm": 0.28185784816741943, "learning_rate": 3.6513132996932666e-06, "loss": 0.295, "step": 30897 }, { "epoch": 3.1413176087840586, "grad_norm": 0.2754441499710083, "learning_rate": 3.6509715713363023e-06, "loss": 0.2866, "step": 30898 }, { "epoch": 3.1414192761285076, "grad_norm": 0.26433444023132324, "learning_rate": 3.6506298497754222e-06, "loss": 0.2908, "step": 30899 }, { "epoch": 3.1415209434729565, "grad_norm": 0.29138705134391785, "learning_rate": 3.6502881350123488e-06, "loss": 0.3083, "step": 30900 }, { "epoch": 3.1416226108174055, "grad_norm": 0.28848549723625183, "learning_rate": 3.649946427048802e-06, "loss": 0.3209, "step": 30901 }, { "epoch": 3.1417242781618544, "grad_norm": 0.27116718888282776, "learning_rate": 3.6496047258865045e-06, "loss": 0.3035, "step": 30902 }, { "epoch": 3.1418259455063033, "grad_norm": 0.25749337673187256, "learning_rate": 3.6492630315271787e-06, "loss": 0.306, "step": 30903 }, { "epoch": 3.1419276128507523, "grad_norm": 0.3137693405151367, "learning_rate": 3.6489213439725437e-06, "loss": 0.2959, "step": 30904 }, { "epoch": 3.1420292801952012, "grad_norm": 0.2938339412212372, "learning_rate": 3.6485796632243232e-06, "loss": 0.2958, "step": 30905 }, { "epoch": 3.14213094753965, "grad_norm": 0.27919745445251465, "learning_rate": 3.6482379892842356e-06, "loss": 0.2933, "step": 30906 }, { "epoch": 3.142232614884099, "grad_norm": 0.27004289627075195, "learning_rate": 3.6478963221540044e-06, "loss": 0.301, "step": 30907 }, { "epoch": 3.142334282228548, "grad_norm": 0.28333768248558044, "learning_rate": 3.64755466183535e-06, "loss": 0.3479, "step": 30908 }, { "epoch": 3.142435949572997, "grad_norm": 0.2729076147079468, "learning_rate": 3.647213008329994e-06, "loss": 0.274, "step": 30909 }, { "epoch": 3.142537616917446, "grad_norm": 0.27602753043174744, "learning_rate": 3.6468713616396576e-06, "loss": 0.3238, "step": 30910 }, { "epoch": 3.142639284261895, "grad_norm": 0.2887323796749115, "learning_rate": 3.6465297217660615e-06, "loss": 0.3191, "step": 30911 }, { "epoch": 3.142740951606344, "grad_norm": 0.286851704120636, "learning_rate": 3.646188088710927e-06, "loss": 0.2781, "step": 30912 }, { "epoch": 3.142842618950793, "grad_norm": 0.2859582006931305, "learning_rate": 3.645846462475976e-06, "loss": 0.3128, "step": 30913 }, { "epoch": 3.142944286295242, "grad_norm": 0.28681156039237976, "learning_rate": 3.645504843062927e-06, "loss": 0.315, "step": 30914 }, { "epoch": 3.143045953639691, "grad_norm": 0.2881728410720825, "learning_rate": 3.6451632304735046e-06, "loss": 0.3034, "step": 30915 }, { "epoch": 3.14314762098414, "grad_norm": 0.2682769298553467, "learning_rate": 3.644821624709427e-06, "loss": 0.3078, "step": 30916 }, { "epoch": 3.143249288328589, "grad_norm": 0.29333096742630005, "learning_rate": 3.6444800257724156e-06, "loss": 0.2863, "step": 30917 }, { "epoch": 3.143350955673038, "grad_norm": 0.29303061962127686, "learning_rate": 3.6441384336641928e-06, "loss": 0.3178, "step": 30918 }, { "epoch": 3.143452623017487, "grad_norm": 0.2710356116294861, "learning_rate": 3.643796848386477e-06, "loss": 0.329, "step": 30919 }, { "epoch": 3.143554290361936, "grad_norm": 0.29217129945755005, "learning_rate": 3.643455269940992e-06, "loss": 0.3116, "step": 30920 }, { "epoch": 3.1436559577063847, "grad_norm": 0.28081607818603516, "learning_rate": 3.643113698329456e-06, "loss": 0.3526, "step": 30921 }, { "epoch": 3.1437576250508337, "grad_norm": 0.28911092877388, "learning_rate": 3.64277213355359e-06, "loss": 0.269, "step": 30922 }, { "epoch": 3.1438592923952826, "grad_norm": 0.25467371940612793, "learning_rate": 3.6424305756151167e-06, "loss": 0.2795, "step": 30923 }, { "epoch": 3.1439609597397316, "grad_norm": 0.2728651463985443, "learning_rate": 3.642089024515755e-06, "loss": 0.3029, "step": 30924 }, { "epoch": 3.1440626270841805, "grad_norm": 0.2721371352672577, "learning_rate": 3.6417474802572266e-06, "loss": 0.2982, "step": 30925 }, { "epoch": 3.1441642944286294, "grad_norm": 0.26428115367889404, "learning_rate": 3.641405942841251e-06, "loss": 0.3014, "step": 30926 }, { "epoch": 3.1442659617730784, "grad_norm": 0.2864501178264618, "learning_rate": 3.6410644122695494e-06, "loss": 0.3491, "step": 30927 }, { "epoch": 3.1443676291175273, "grad_norm": 0.27787303924560547, "learning_rate": 3.6407228885438427e-06, "loss": 0.3222, "step": 30928 }, { "epoch": 3.1444692964619763, "grad_norm": 0.2717897891998291, "learning_rate": 3.640381371665851e-06, "loss": 0.2993, "step": 30929 }, { "epoch": 3.144570963806425, "grad_norm": 0.311177134513855, "learning_rate": 3.6400398616372957e-06, "loss": 0.3153, "step": 30930 }, { "epoch": 3.144672631150874, "grad_norm": 0.26433467864990234, "learning_rate": 3.639698358459895e-06, "loss": 0.3359, "step": 30931 }, { "epoch": 3.1447742984953235, "grad_norm": 0.27280086278915405, "learning_rate": 3.639356862135371e-06, "loss": 0.3399, "step": 30932 }, { "epoch": 3.1448759658397725, "grad_norm": 0.28518855571746826, "learning_rate": 3.6390153726654444e-06, "loss": 0.2939, "step": 30933 }, { "epoch": 3.1449776331842214, "grad_norm": 0.27942654490470886, "learning_rate": 3.638673890051834e-06, "loss": 0.3116, "step": 30934 }, { "epoch": 3.1450793005286704, "grad_norm": 0.28026705980300903, "learning_rate": 3.638332414296263e-06, "loss": 0.2793, "step": 30935 }, { "epoch": 3.1451809678731193, "grad_norm": 0.2796659767627716, "learning_rate": 3.6379909454004473e-06, "loss": 0.3497, "step": 30936 }, { "epoch": 3.1452826352175682, "grad_norm": 0.28248894214630127, "learning_rate": 3.6376494833661102e-06, "loss": 0.3255, "step": 30937 }, { "epoch": 3.145384302562017, "grad_norm": 0.2853117287158966, "learning_rate": 3.6373080281949724e-06, "loss": 0.3285, "step": 30938 }, { "epoch": 3.145485969906466, "grad_norm": 0.2818508744239807, "learning_rate": 3.636966579888752e-06, "loss": 0.3229, "step": 30939 }, { "epoch": 3.145587637250915, "grad_norm": 0.2929649353027344, "learning_rate": 3.636625138449171e-06, "loss": 0.3145, "step": 30940 }, { "epoch": 3.145689304595364, "grad_norm": 0.2795145511627197, "learning_rate": 3.6362837038779476e-06, "loss": 0.2989, "step": 30941 }, { "epoch": 3.145790971939813, "grad_norm": 0.2775927484035492, "learning_rate": 3.6359422761768027e-06, "loss": 0.2967, "step": 30942 }, { "epoch": 3.145892639284262, "grad_norm": 0.29534730315208435, "learning_rate": 3.635600855347457e-06, "loss": 0.3196, "step": 30943 }, { "epoch": 3.145994306628711, "grad_norm": 0.28514859080314636, "learning_rate": 3.63525944139163e-06, "loss": 0.3082, "step": 30944 }, { "epoch": 3.1460959739731598, "grad_norm": 0.28655412793159485, "learning_rate": 3.634918034311041e-06, "loss": 0.3389, "step": 30945 }, { "epoch": 3.1461976413176087, "grad_norm": 0.28726327419281006, "learning_rate": 3.6345766341074106e-06, "loss": 0.2899, "step": 30946 }, { "epoch": 3.1462993086620576, "grad_norm": 0.26478227972984314, "learning_rate": 3.6342352407824588e-06, "loss": 0.2987, "step": 30947 }, { "epoch": 3.1464009760065066, "grad_norm": 0.2993755340576172, "learning_rate": 3.6338938543379064e-06, "loss": 0.2946, "step": 30948 }, { "epoch": 3.1465026433509555, "grad_norm": 0.25863248109817505, "learning_rate": 3.633552474775471e-06, "loss": 0.3212, "step": 30949 }, { "epoch": 3.1466043106954045, "grad_norm": 0.2762256860733032, "learning_rate": 3.633211102096874e-06, "loss": 0.2981, "step": 30950 }, { "epoch": 3.1467059780398534, "grad_norm": 0.2985747754573822, "learning_rate": 3.632869736303834e-06, "loss": 0.2892, "step": 30951 }, { "epoch": 3.1468076453843024, "grad_norm": 0.2710733115673065, "learning_rate": 3.6325283773980715e-06, "loss": 0.3028, "step": 30952 }, { "epoch": 3.1469093127287513, "grad_norm": 0.30926957726478577, "learning_rate": 3.6321870253813075e-06, "loss": 0.3079, "step": 30953 }, { "epoch": 3.1470109800732007, "grad_norm": 0.27293646335601807, "learning_rate": 3.6318456802552583e-06, "loss": 0.2871, "step": 30954 }, { "epoch": 3.1471126474176496, "grad_norm": 0.26712971925735474, "learning_rate": 3.6315043420216465e-06, "loss": 0.2738, "step": 30955 }, { "epoch": 3.1472143147620986, "grad_norm": 0.280989408493042, "learning_rate": 3.6311630106821905e-06, "loss": 0.357, "step": 30956 }, { "epoch": 3.1473159821065475, "grad_norm": 0.2705960273742676, "learning_rate": 3.6308216862386093e-06, "loss": 0.2921, "step": 30957 }, { "epoch": 3.1474176494509964, "grad_norm": 0.2777351140975952, "learning_rate": 3.6304803686926237e-06, "loss": 0.3086, "step": 30958 }, { "epoch": 3.1475193167954454, "grad_norm": 0.2642919719219208, "learning_rate": 3.6301390580459526e-06, "loss": 0.3258, "step": 30959 }, { "epoch": 3.1476209841398943, "grad_norm": 0.2795465886592865, "learning_rate": 3.6297977543003173e-06, "loss": 0.2956, "step": 30960 }, { "epoch": 3.1477226514843433, "grad_norm": 0.2947617769241333, "learning_rate": 3.629456457457431e-06, "loss": 0.344, "step": 30961 }, { "epoch": 3.147824318828792, "grad_norm": 0.29053428769111633, "learning_rate": 3.6291151675190196e-06, "loss": 0.2825, "step": 30962 }, { "epoch": 3.147925986173241, "grad_norm": 0.29690954089164734, "learning_rate": 3.6287738844868004e-06, "loss": 0.3055, "step": 30963 }, { "epoch": 3.14802765351769, "grad_norm": 0.27533087134361267, "learning_rate": 3.628432608362492e-06, "loss": 0.3393, "step": 30964 }, { "epoch": 3.148129320862139, "grad_norm": 0.2859881818294525, "learning_rate": 3.6280913391478166e-06, "loss": 0.3016, "step": 30965 }, { "epoch": 3.148230988206588, "grad_norm": 0.26121118664741516, "learning_rate": 3.627750076844487e-06, "loss": 0.3365, "step": 30966 }, { "epoch": 3.148332655551037, "grad_norm": 0.24297678470611572, "learning_rate": 3.627408821454228e-06, "loss": 0.2937, "step": 30967 }, { "epoch": 3.148434322895486, "grad_norm": 0.2693784832954407, "learning_rate": 3.6270675729787585e-06, "loss": 0.3042, "step": 30968 }, { "epoch": 3.148535990239935, "grad_norm": 0.25407469272613525, "learning_rate": 3.6267263314197953e-06, "loss": 0.2946, "step": 30969 }, { "epoch": 3.1486376575843837, "grad_norm": 0.27427005767822266, "learning_rate": 3.6263850967790605e-06, "loss": 0.3198, "step": 30970 }, { "epoch": 3.1487393249288327, "grad_norm": 0.28264257311820984, "learning_rate": 3.6260438690582673e-06, "loss": 0.3385, "step": 30971 }, { "epoch": 3.1488409922732816, "grad_norm": 0.268657386302948, "learning_rate": 3.62570264825914e-06, "loss": 0.3372, "step": 30972 }, { "epoch": 3.148942659617731, "grad_norm": 0.2775696814060211, "learning_rate": 3.6253614343833977e-06, "loss": 0.2761, "step": 30973 }, { "epoch": 3.14904432696218, "grad_norm": 0.2688736319541931, "learning_rate": 3.625020227432756e-06, "loss": 0.3108, "step": 30974 }, { "epoch": 3.149145994306629, "grad_norm": 0.285160630941391, "learning_rate": 3.6246790274089382e-06, "loss": 0.3422, "step": 30975 }, { "epoch": 3.149247661651078, "grad_norm": 0.2891944646835327, "learning_rate": 3.6243378343136576e-06, "loss": 0.3295, "step": 30976 }, { "epoch": 3.1493493289955268, "grad_norm": 0.2873179316520691, "learning_rate": 3.6239966481486366e-06, "loss": 0.3259, "step": 30977 }, { "epoch": 3.1494509963399757, "grad_norm": 0.2986196279525757, "learning_rate": 3.623655468915596e-06, "loss": 0.2784, "step": 30978 }, { "epoch": 3.1495526636844247, "grad_norm": 0.272860050201416, "learning_rate": 3.6233142966162494e-06, "loss": 0.3159, "step": 30979 }, { "epoch": 3.1496543310288736, "grad_norm": 0.2704867422580719, "learning_rate": 3.622973131252321e-06, "loss": 0.3104, "step": 30980 }, { "epoch": 3.1497559983733225, "grad_norm": 0.271998792886734, "learning_rate": 3.622631972825523e-06, "loss": 0.2968, "step": 30981 }, { "epoch": 3.1498576657177715, "grad_norm": 0.2704934775829315, "learning_rate": 3.6222908213375796e-06, "loss": 0.2781, "step": 30982 }, { "epoch": 3.1499593330622204, "grad_norm": 0.2928936779499054, "learning_rate": 3.6219496767902095e-06, "loss": 0.3253, "step": 30983 }, { "epoch": 3.1500610004066694, "grad_norm": 0.26958006620407104, "learning_rate": 3.621608539185126e-06, "loss": 0.2827, "step": 30984 }, { "epoch": 3.1501626677511183, "grad_norm": 0.2811918258666992, "learning_rate": 3.6212674085240545e-06, "loss": 0.3181, "step": 30985 }, { "epoch": 3.1502643350955672, "grad_norm": 0.27651897072792053, "learning_rate": 3.620926284808707e-06, "loss": 0.3216, "step": 30986 }, { "epoch": 3.150366002440016, "grad_norm": 0.270530641078949, "learning_rate": 3.6205851680408068e-06, "loss": 0.3024, "step": 30987 }, { "epoch": 3.150467669784465, "grad_norm": 0.2839334309101105, "learning_rate": 3.620244058222072e-06, "loss": 0.3229, "step": 30988 }, { "epoch": 3.150569337128914, "grad_norm": 0.2957504093647003, "learning_rate": 3.6199029553542166e-06, "loss": 0.3427, "step": 30989 }, { "epoch": 3.150671004473363, "grad_norm": 0.28264036774635315, "learning_rate": 3.6195618594389658e-06, "loss": 0.3334, "step": 30990 }, { "epoch": 3.150772671817812, "grad_norm": 0.3021172285079956, "learning_rate": 3.619220770478031e-06, "loss": 0.2865, "step": 30991 }, { "epoch": 3.150874339162261, "grad_norm": 0.243349090218544, "learning_rate": 3.6188796884731346e-06, "loss": 0.3217, "step": 30992 }, { "epoch": 3.15097600650671, "grad_norm": 0.2666488289833069, "learning_rate": 3.618538613425997e-06, "loss": 0.3088, "step": 30993 }, { "epoch": 3.151077673851159, "grad_norm": 0.27598729729652405, "learning_rate": 3.6181975453383293e-06, "loss": 0.2953, "step": 30994 }, { "epoch": 3.151179341195608, "grad_norm": 0.27755165100097656, "learning_rate": 3.617856484211858e-06, "loss": 0.3331, "step": 30995 }, { "epoch": 3.151281008540057, "grad_norm": 0.2642131447792053, "learning_rate": 3.617515430048294e-06, "loss": 0.315, "step": 30996 }, { "epoch": 3.151382675884506, "grad_norm": 0.2880783677101135, "learning_rate": 3.6171743828493592e-06, "loss": 0.2956, "step": 30997 }, { "epoch": 3.151484343228955, "grad_norm": 0.2706160545349121, "learning_rate": 3.616833342616774e-06, "loss": 0.2805, "step": 30998 }, { "epoch": 3.151586010573404, "grad_norm": 0.29257336258888245, "learning_rate": 3.6164923093522505e-06, "loss": 0.3044, "step": 30999 }, { "epoch": 3.151687677917853, "grad_norm": 0.2703438997268677, "learning_rate": 3.616151283057513e-06, "loss": 0.3009, "step": 31000 }, { "epoch": 3.151789345262302, "grad_norm": 0.2717495560646057, "learning_rate": 3.615810263734274e-06, "loss": 0.3106, "step": 31001 }, { "epoch": 3.1518910126067508, "grad_norm": 0.2810308635234833, "learning_rate": 3.615469251384254e-06, "loss": 0.2652, "step": 31002 }, { "epoch": 3.1519926799511997, "grad_norm": 0.3012905418872833, "learning_rate": 3.615128246009173e-06, "loss": 0.3414, "step": 31003 }, { "epoch": 3.1520943472956486, "grad_norm": 0.27491840720176697, "learning_rate": 3.614787247610744e-06, "loss": 0.2968, "step": 31004 }, { "epoch": 3.1521960146400976, "grad_norm": 0.2803036570549011, "learning_rate": 3.6144462561906906e-06, "loss": 0.3018, "step": 31005 }, { "epoch": 3.1522976819845465, "grad_norm": 0.2684348523616791, "learning_rate": 3.614105271750726e-06, "loss": 0.2628, "step": 31006 }, { "epoch": 3.1523993493289955, "grad_norm": 0.2755305767059326, "learning_rate": 3.613764294292569e-06, "loss": 0.3366, "step": 31007 }, { "epoch": 3.1525010166734444, "grad_norm": 0.2652372717857361, "learning_rate": 3.61342332381794e-06, "loss": 0.2962, "step": 31008 }, { "epoch": 3.1526026840178933, "grad_norm": 0.2589346468448639, "learning_rate": 3.6130823603285524e-06, "loss": 0.3121, "step": 31009 }, { "epoch": 3.1527043513623423, "grad_norm": 0.2579230070114136, "learning_rate": 3.6127414038261295e-06, "loss": 0.2766, "step": 31010 }, { "epoch": 3.1528060187067912, "grad_norm": 0.2843378186225891, "learning_rate": 3.6124004543123837e-06, "loss": 0.2887, "step": 31011 }, { "epoch": 3.15290768605124, "grad_norm": 0.2995529770851135, "learning_rate": 3.6120595117890335e-06, "loss": 0.2803, "step": 31012 }, { "epoch": 3.153009353395689, "grad_norm": 0.2623721957206726, "learning_rate": 3.6117185762578e-06, "loss": 0.3026, "step": 31013 }, { "epoch": 3.1531110207401385, "grad_norm": 0.2650865316390991, "learning_rate": 3.6113776477203966e-06, "loss": 0.3003, "step": 31014 }, { "epoch": 3.1532126880845874, "grad_norm": 0.2598433494567871, "learning_rate": 3.611036726178545e-06, "loss": 0.3037, "step": 31015 }, { "epoch": 3.1533143554290364, "grad_norm": 0.2783181369304657, "learning_rate": 3.6106958116339586e-06, "loss": 0.3019, "step": 31016 }, { "epoch": 3.1534160227734853, "grad_norm": 0.2779597342014313, "learning_rate": 3.6103549040883545e-06, "loss": 0.3078, "step": 31017 }, { "epoch": 3.1535176901179343, "grad_norm": 0.2778833508491516, "learning_rate": 3.610014003543456e-06, "loss": 0.3019, "step": 31018 }, { "epoch": 3.153619357462383, "grad_norm": 0.28327128291130066, "learning_rate": 3.6096731100009754e-06, "loss": 0.3172, "step": 31019 }, { "epoch": 3.153721024806832, "grad_norm": 0.28829848766326904, "learning_rate": 3.6093322234626316e-06, "loss": 0.298, "step": 31020 }, { "epoch": 3.153822692151281, "grad_norm": 0.2920478880405426, "learning_rate": 3.6089913439301406e-06, "loss": 0.2872, "step": 31021 }, { "epoch": 3.15392435949573, "grad_norm": 0.26835545897483826, "learning_rate": 3.608650471405219e-06, "loss": 0.2885, "step": 31022 }, { "epoch": 3.154026026840179, "grad_norm": 0.2837868630886078, "learning_rate": 3.6083096058895894e-06, "loss": 0.3243, "step": 31023 }, { "epoch": 3.154127694184628, "grad_norm": 0.2617305815219879, "learning_rate": 3.6079687473849636e-06, "loss": 0.2973, "step": 31024 }, { "epoch": 3.154229361529077, "grad_norm": 0.2917870283126831, "learning_rate": 3.6076278958930604e-06, "loss": 0.3553, "step": 31025 }, { "epoch": 3.154331028873526, "grad_norm": 0.2614942193031311, "learning_rate": 3.6072870514155966e-06, "loss": 0.3358, "step": 31026 }, { "epoch": 3.1544326962179747, "grad_norm": 0.2910715341567993, "learning_rate": 3.606946213954289e-06, "loss": 0.3484, "step": 31027 }, { "epoch": 3.1545343635624237, "grad_norm": 0.2635800242424011, "learning_rate": 3.606605383510858e-06, "loss": 0.2603, "step": 31028 }, { "epoch": 3.1546360309068726, "grad_norm": 0.2645335793495178, "learning_rate": 3.6062645600870156e-06, "loss": 0.2991, "step": 31029 }, { "epoch": 3.1547376982513216, "grad_norm": 0.27720770239830017, "learning_rate": 3.6059237436844826e-06, "loss": 0.3219, "step": 31030 }, { "epoch": 3.1548393655957705, "grad_norm": 0.25505703687667847, "learning_rate": 3.605582934304973e-06, "loss": 0.286, "step": 31031 }, { "epoch": 3.1549410329402194, "grad_norm": 0.26182883977890015, "learning_rate": 3.6052421319502052e-06, "loss": 0.3045, "step": 31032 }, { "epoch": 3.1550427002846684, "grad_norm": 0.26560071110725403, "learning_rate": 3.604901336621898e-06, "loss": 0.3135, "step": 31033 }, { "epoch": 3.1551443676291173, "grad_norm": 0.26444926857948303, "learning_rate": 3.6045605483217647e-06, "loss": 0.3044, "step": 31034 }, { "epoch": 3.1552460349735663, "grad_norm": 0.2775900065898895, "learning_rate": 3.6042197670515254e-06, "loss": 0.2938, "step": 31035 }, { "epoch": 3.1553477023180156, "grad_norm": 0.27687251567840576, "learning_rate": 3.6038789928128937e-06, "loss": 0.3044, "step": 31036 }, { "epoch": 3.1554493696624646, "grad_norm": 0.2729071378707886, "learning_rate": 3.603538225607588e-06, "loss": 0.3194, "step": 31037 }, { "epoch": 3.1555510370069135, "grad_norm": 0.270405650138855, "learning_rate": 3.6031974654373253e-06, "loss": 0.3367, "step": 31038 }, { "epoch": 3.1556527043513625, "grad_norm": 0.26537734270095825, "learning_rate": 3.6028567123038218e-06, "loss": 0.3046, "step": 31039 }, { "epoch": 3.1557543716958114, "grad_norm": 0.26802560687065125, "learning_rate": 3.602515966208794e-06, "loss": 0.2963, "step": 31040 }, { "epoch": 3.1558560390402604, "grad_norm": 0.2753463387489319, "learning_rate": 3.6021752271539584e-06, "loss": 0.343, "step": 31041 }, { "epoch": 3.1559577063847093, "grad_norm": 0.2729063630104065, "learning_rate": 3.6018344951410317e-06, "loss": 0.294, "step": 31042 }, { "epoch": 3.1560593737291582, "grad_norm": 0.2766769230365753, "learning_rate": 3.601493770171732e-06, "loss": 0.3132, "step": 31043 }, { "epoch": 3.156161041073607, "grad_norm": 0.2775372266769409, "learning_rate": 3.6011530522477723e-06, "loss": 0.3166, "step": 31044 }, { "epoch": 3.156262708418056, "grad_norm": 0.2674003839492798, "learning_rate": 3.600812341370873e-06, "loss": 0.2913, "step": 31045 }, { "epoch": 3.156364375762505, "grad_norm": 0.2559816241264343, "learning_rate": 3.6004716375427474e-06, "loss": 0.3014, "step": 31046 }, { "epoch": 3.156466043106954, "grad_norm": 0.26992353796958923, "learning_rate": 3.600130940765113e-06, "loss": 0.285, "step": 31047 }, { "epoch": 3.156567710451403, "grad_norm": 0.2636807858943939, "learning_rate": 3.5997902510396874e-06, "loss": 0.3456, "step": 31048 }, { "epoch": 3.156669377795852, "grad_norm": 0.24948161840438843, "learning_rate": 3.5994495683681853e-06, "loss": 0.3173, "step": 31049 }, { "epoch": 3.156771045140301, "grad_norm": 0.2723032832145691, "learning_rate": 3.5991088927523233e-06, "loss": 0.3431, "step": 31050 }, { "epoch": 3.1568727124847498, "grad_norm": 0.2557844817638397, "learning_rate": 3.598768224193817e-06, "loss": 0.2943, "step": 31051 }, { "epoch": 3.1569743798291987, "grad_norm": 0.2804122567176819, "learning_rate": 3.5984275626943844e-06, "loss": 0.3203, "step": 31052 }, { "epoch": 3.1570760471736476, "grad_norm": 0.2652791142463684, "learning_rate": 3.598086908255741e-06, "loss": 0.3101, "step": 31053 }, { "epoch": 3.1571777145180966, "grad_norm": 0.2780153453350067, "learning_rate": 3.5977462608796018e-06, "loss": 0.3107, "step": 31054 }, { "epoch": 3.157279381862546, "grad_norm": 0.26445472240448, "learning_rate": 3.5974056205676846e-06, "loss": 0.3388, "step": 31055 }, { "epoch": 3.157381049206995, "grad_norm": 0.27224022150039673, "learning_rate": 3.5970649873217035e-06, "loss": 0.3346, "step": 31056 }, { "epoch": 3.157482716551444, "grad_norm": 0.25839805603027344, "learning_rate": 3.5967243611433755e-06, "loss": 0.3086, "step": 31057 }, { "epoch": 3.157584383895893, "grad_norm": 0.290994256734848, "learning_rate": 3.5963837420344176e-06, "loss": 0.2917, "step": 31058 }, { "epoch": 3.1576860512403417, "grad_norm": 0.26624050736427307, "learning_rate": 3.5960431299965445e-06, "loss": 0.3372, "step": 31059 }, { "epoch": 3.1577877185847907, "grad_norm": 0.28325390815734863, "learning_rate": 3.5957025250314727e-06, "loss": 0.3403, "step": 31060 }, { "epoch": 3.1578893859292396, "grad_norm": 0.2853241264820099, "learning_rate": 3.5953619271409175e-06, "loss": 0.3122, "step": 31061 }, { "epoch": 3.1579910532736886, "grad_norm": 0.28869327902793884, "learning_rate": 3.595021336326595e-06, "loss": 0.2856, "step": 31062 }, { "epoch": 3.1580927206181375, "grad_norm": 0.2934863269329071, "learning_rate": 3.5946807525902216e-06, "loss": 0.3109, "step": 31063 }, { "epoch": 3.1581943879625864, "grad_norm": 0.27864477038383484, "learning_rate": 3.5943401759335122e-06, "loss": 0.3236, "step": 31064 }, { "epoch": 3.1582960553070354, "grad_norm": 0.2832176387310028, "learning_rate": 3.593999606358184e-06, "loss": 0.3331, "step": 31065 }, { "epoch": 3.1583977226514843, "grad_norm": 0.26363512873649597, "learning_rate": 3.5936590438659503e-06, "loss": 0.3059, "step": 31066 }, { "epoch": 3.1584993899959333, "grad_norm": 0.26193416118621826, "learning_rate": 3.5933184884585283e-06, "loss": 0.3435, "step": 31067 }, { "epoch": 3.158601057340382, "grad_norm": 0.2645198702812195, "learning_rate": 3.592977940137634e-06, "loss": 0.3261, "step": 31068 }, { "epoch": 3.158702724684831, "grad_norm": 0.27263954281806946, "learning_rate": 3.5926373989049823e-06, "loss": 0.3069, "step": 31069 }, { "epoch": 3.15880439202928, "grad_norm": 0.2713750898838043, "learning_rate": 3.59229686476229e-06, "loss": 0.2787, "step": 31070 }, { "epoch": 3.158906059373729, "grad_norm": 0.25693854689598083, "learning_rate": 3.5919563377112696e-06, "loss": 0.2966, "step": 31071 }, { "epoch": 3.159007726718178, "grad_norm": 0.2694780230522156, "learning_rate": 3.5916158177536392e-06, "loss": 0.2979, "step": 31072 }, { "epoch": 3.159109394062627, "grad_norm": 0.25319036841392517, "learning_rate": 3.5912753048911147e-06, "loss": 0.3366, "step": 31073 }, { "epoch": 3.159211061407076, "grad_norm": 0.2644696831703186, "learning_rate": 3.5909347991254095e-06, "loss": 0.3028, "step": 31074 }, { "epoch": 3.159312728751525, "grad_norm": 0.29656171798706055, "learning_rate": 3.5905943004582406e-06, "loss": 0.3219, "step": 31075 }, { "epoch": 3.1594143960959737, "grad_norm": 0.2747676968574524, "learning_rate": 3.590253808891322e-06, "loss": 0.2879, "step": 31076 }, { "epoch": 3.159516063440423, "grad_norm": 0.2744385302066803, "learning_rate": 3.5899133244263695e-06, "loss": 0.2904, "step": 31077 }, { "epoch": 3.159617730784872, "grad_norm": 0.27800291776657104, "learning_rate": 3.5895728470650993e-06, "loss": 0.3227, "step": 31078 }, { "epoch": 3.159719398129321, "grad_norm": 0.303409218788147, "learning_rate": 3.589232376809225e-06, "loss": 0.3072, "step": 31079 }, { "epoch": 3.15982106547377, "grad_norm": 0.284706175327301, "learning_rate": 3.588891913660464e-06, "loss": 0.2986, "step": 31080 }, { "epoch": 3.159922732818219, "grad_norm": 0.28644734621047974, "learning_rate": 3.5885514576205294e-06, "loss": 0.3176, "step": 31081 }, { "epoch": 3.160024400162668, "grad_norm": 0.28585782647132874, "learning_rate": 3.588211008691138e-06, "loss": 0.2955, "step": 31082 }, { "epoch": 3.1601260675071168, "grad_norm": 0.2664308249950409, "learning_rate": 3.587870566874003e-06, "loss": 0.305, "step": 31083 }, { "epoch": 3.1602277348515657, "grad_norm": 0.26588767766952515, "learning_rate": 3.58753013217084e-06, "loss": 0.3029, "step": 31084 }, { "epoch": 3.1603294021960147, "grad_norm": 0.24711599946022034, "learning_rate": 3.5871897045833665e-06, "loss": 0.3132, "step": 31085 }, { "epoch": 3.1604310695404636, "grad_norm": 0.2827766239643097, "learning_rate": 3.5868492841132942e-06, "loss": 0.2827, "step": 31086 }, { "epoch": 3.1605327368849125, "grad_norm": 0.293146550655365, "learning_rate": 3.5865088707623395e-06, "loss": 0.3057, "step": 31087 }, { "epoch": 3.1606344042293615, "grad_norm": 0.2879825830459595, "learning_rate": 3.5861684645322175e-06, "loss": 0.3115, "step": 31088 }, { "epoch": 3.1607360715738104, "grad_norm": 0.2660996615886688, "learning_rate": 3.5858280654246423e-06, "loss": 0.3052, "step": 31089 }, { "epoch": 3.1608377389182594, "grad_norm": 0.25115859508514404, "learning_rate": 3.5854876734413303e-06, "loss": 0.2925, "step": 31090 }, { "epoch": 3.1609394062627083, "grad_norm": 0.2831858992576599, "learning_rate": 3.585147288583994e-06, "loss": 0.3302, "step": 31091 }, { "epoch": 3.1610410736071572, "grad_norm": 0.26096269488334656, "learning_rate": 3.5848069108543503e-06, "loss": 0.3027, "step": 31092 }, { "epoch": 3.161142740951606, "grad_norm": 0.26517143845558167, "learning_rate": 3.5844665402541123e-06, "loss": 0.309, "step": 31093 }, { "epoch": 3.161244408296055, "grad_norm": 0.27094006538391113, "learning_rate": 3.5841261767849956e-06, "loss": 0.3329, "step": 31094 }, { "epoch": 3.161346075640504, "grad_norm": 0.26471203565597534, "learning_rate": 3.5837858204487162e-06, "loss": 0.2886, "step": 31095 }, { "epoch": 3.1614477429849535, "grad_norm": 0.26584354043006897, "learning_rate": 3.5834454712469857e-06, "loss": 0.3297, "step": 31096 }, { "epoch": 3.1615494103294024, "grad_norm": 0.27534377574920654, "learning_rate": 3.583105129181521e-06, "loss": 0.3206, "step": 31097 }, { "epoch": 3.1616510776738513, "grad_norm": 0.29967933893203735, "learning_rate": 3.5827647942540356e-06, "loss": 0.3039, "step": 31098 }, { "epoch": 3.1617527450183003, "grad_norm": 0.2900984287261963, "learning_rate": 3.5824244664662444e-06, "loss": 0.2862, "step": 31099 }, { "epoch": 3.1618544123627492, "grad_norm": 0.27438899874687195, "learning_rate": 3.582084145819863e-06, "loss": 0.3045, "step": 31100 }, { "epoch": 3.161956079707198, "grad_norm": 0.2739527225494385, "learning_rate": 3.5817438323166033e-06, "loss": 0.3345, "step": 31101 }, { "epoch": 3.162057747051647, "grad_norm": 0.2635113596916199, "learning_rate": 3.5814035259581816e-06, "loss": 0.2958, "step": 31102 }, { "epoch": 3.162159414396096, "grad_norm": 0.2788236737251282, "learning_rate": 3.581063226746312e-06, "loss": 0.3025, "step": 31103 }, { "epoch": 3.162261081740545, "grad_norm": 0.27031534910202026, "learning_rate": 3.5807229346827075e-06, "loss": 0.3052, "step": 31104 }, { "epoch": 3.162362749084994, "grad_norm": 0.26522985100746155, "learning_rate": 3.5803826497690853e-06, "loss": 0.326, "step": 31105 }, { "epoch": 3.162464416429443, "grad_norm": 0.2644613981246948, "learning_rate": 3.580042372007157e-06, "loss": 0.2937, "step": 31106 }, { "epoch": 3.162566083773892, "grad_norm": 0.26902955770492554, "learning_rate": 3.579702101398639e-06, "loss": 0.3073, "step": 31107 }, { "epoch": 3.1626677511183408, "grad_norm": 0.27540868520736694, "learning_rate": 3.579361837945242e-06, "loss": 0.3023, "step": 31108 }, { "epoch": 3.1627694184627897, "grad_norm": 0.27519580721855164, "learning_rate": 3.579021581648684e-06, "loss": 0.3109, "step": 31109 }, { "epoch": 3.1628710858072386, "grad_norm": 0.2657044231891632, "learning_rate": 3.578681332510678e-06, "loss": 0.312, "step": 31110 }, { "epoch": 3.1629727531516876, "grad_norm": 0.2658745348453522, "learning_rate": 3.5783410905329365e-06, "loss": 0.2996, "step": 31111 }, { "epoch": 3.1630744204961365, "grad_norm": 0.2778830826282501, "learning_rate": 3.578000855717177e-06, "loss": 0.3075, "step": 31112 }, { "epoch": 3.1631760878405855, "grad_norm": 0.29280275106430054, "learning_rate": 3.5776606280651084e-06, "loss": 0.3154, "step": 31113 }, { "epoch": 3.1632777551850344, "grad_norm": 0.30368998646736145, "learning_rate": 3.5773204075784477e-06, "loss": 0.3121, "step": 31114 }, { "epoch": 3.1633794225294833, "grad_norm": 0.2669990062713623, "learning_rate": 3.5769801942589106e-06, "loss": 0.3051, "step": 31115 }, { "epoch": 3.1634810898739323, "grad_norm": 0.2688181400299072, "learning_rate": 3.576639988108208e-06, "loss": 0.2839, "step": 31116 }, { "epoch": 3.1635827572183812, "grad_norm": 0.2682263255119324, "learning_rate": 3.576299789128057e-06, "loss": 0.2853, "step": 31117 }, { "epoch": 3.1636844245628306, "grad_norm": 0.2564987242221832, "learning_rate": 3.5759595973201653e-06, "loss": 0.3161, "step": 31118 }, { "epoch": 3.1637860919072796, "grad_norm": 0.2718861997127533, "learning_rate": 3.575619412686253e-06, "loss": 0.2994, "step": 31119 }, { "epoch": 3.1638877592517285, "grad_norm": 0.26613250374794006, "learning_rate": 3.575279235228032e-06, "loss": 0.3225, "step": 31120 }, { "epoch": 3.1639894265961774, "grad_norm": 0.27387258410453796, "learning_rate": 3.574939064947215e-06, "loss": 0.3376, "step": 31121 }, { "epoch": 3.1640910939406264, "grad_norm": 0.2602086067199707, "learning_rate": 3.5745989018455186e-06, "loss": 0.2972, "step": 31122 }, { "epoch": 3.1641927612850753, "grad_norm": 0.26598939299583435, "learning_rate": 3.5742587459246504e-06, "loss": 0.298, "step": 31123 }, { "epoch": 3.1642944286295243, "grad_norm": 0.28188520669937134, "learning_rate": 3.573918597186329e-06, "loss": 0.3132, "step": 31124 }, { "epoch": 3.164396095973973, "grad_norm": 0.26000022888183594, "learning_rate": 3.5735784556322684e-06, "loss": 0.2957, "step": 31125 }, { "epoch": 3.164497763318422, "grad_norm": 0.27029502391815186, "learning_rate": 3.5732383212641785e-06, "loss": 0.324, "step": 31126 }, { "epoch": 3.164599430662871, "grad_norm": 0.2797877788543701, "learning_rate": 3.572898194083778e-06, "loss": 0.3237, "step": 31127 }, { "epoch": 3.16470109800732, "grad_norm": 0.28701141476631165, "learning_rate": 3.5725580740927735e-06, "loss": 0.3095, "step": 31128 }, { "epoch": 3.164802765351769, "grad_norm": 0.2824058532714844, "learning_rate": 3.572217961292884e-06, "loss": 0.3126, "step": 31129 }, { "epoch": 3.164904432696218, "grad_norm": 0.29231560230255127, "learning_rate": 3.5718778556858225e-06, "loss": 0.3188, "step": 31130 }, { "epoch": 3.165006100040667, "grad_norm": 0.25745195150375366, "learning_rate": 3.571537757273298e-06, "loss": 0.3105, "step": 31131 }, { "epoch": 3.165107767385116, "grad_norm": 0.2731708288192749, "learning_rate": 3.5711976660570303e-06, "loss": 0.3241, "step": 31132 }, { "epoch": 3.1652094347295647, "grad_norm": 0.27814918756484985, "learning_rate": 3.570857582038726e-06, "loss": 0.3171, "step": 31133 }, { "epoch": 3.1653111020740137, "grad_norm": 0.28860408067703247, "learning_rate": 3.5705175052201026e-06, "loss": 0.3149, "step": 31134 }, { "epoch": 3.1654127694184626, "grad_norm": 0.27240732312202454, "learning_rate": 3.570177435602874e-06, "loss": 0.2977, "step": 31135 }, { "epoch": 3.1655144367629116, "grad_norm": 0.26736658811569214, "learning_rate": 3.5698373731887486e-06, "loss": 0.2934, "step": 31136 }, { "epoch": 3.165616104107361, "grad_norm": 0.27257153391838074, "learning_rate": 3.569497317979446e-06, "loss": 0.2935, "step": 31137 }, { "epoch": 3.16571777145181, "grad_norm": 0.2617243230342865, "learning_rate": 3.569157269976673e-06, "loss": 0.3062, "step": 31138 }, { "epoch": 3.165819438796259, "grad_norm": 0.26148948073387146, "learning_rate": 3.5688172291821466e-06, "loss": 0.2896, "step": 31139 }, { "epoch": 3.1659211061407078, "grad_norm": 0.291723370552063, "learning_rate": 3.5684771955975807e-06, "loss": 0.3289, "step": 31140 }, { "epoch": 3.1660227734851567, "grad_norm": 0.2809046506881714, "learning_rate": 3.5681371692246834e-06, "loss": 0.3114, "step": 31141 }, { "epoch": 3.1661244408296056, "grad_norm": 0.26857244968414307, "learning_rate": 3.567797150065174e-06, "loss": 0.3024, "step": 31142 }, { "epoch": 3.1662261081740546, "grad_norm": 0.2715044319629669, "learning_rate": 3.56745713812076e-06, "loss": 0.3064, "step": 31143 }, { "epoch": 3.1663277755185035, "grad_norm": 0.2666946351528168, "learning_rate": 3.5671171333931566e-06, "loss": 0.2975, "step": 31144 }, { "epoch": 3.1664294428629525, "grad_norm": 0.2501120865345001, "learning_rate": 3.5667771358840784e-06, "loss": 0.3307, "step": 31145 }, { "epoch": 3.1665311102074014, "grad_norm": 0.2793009579181671, "learning_rate": 3.566437145595234e-06, "loss": 0.2894, "step": 31146 }, { "epoch": 3.1666327775518504, "grad_norm": 0.27269259095191956, "learning_rate": 3.566097162528341e-06, "loss": 0.2986, "step": 31147 }, { "epoch": 3.1667344448962993, "grad_norm": 0.26823723316192627, "learning_rate": 3.5657571866851088e-06, "loss": 0.2972, "step": 31148 }, { "epoch": 3.1668361122407482, "grad_norm": 0.2758117616176605, "learning_rate": 3.565417218067249e-06, "loss": 0.2938, "step": 31149 }, { "epoch": 3.166937779585197, "grad_norm": 0.290524959564209, "learning_rate": 3.5650772566764804e-06, "loss": 0.3177, "step": 31150 }, { "epoch": 3.167039446929646, "grad_norm": 0.2856004238128662, "learning_rate": 3.5647373025145072e-06, "loss": 0.3099, "step": 31151 }, { "epoch": 3.167141114274095, "grad_norm": 0.2716177701950073, "learning_rate": 3.5643973555830504e-06, "loss": 0.3038, "step": 31152 }, { "epoch": 3.167242781618544, "grad_norm": 0.2805297374725342, "learning_rate": 3.564057415883816e-06, "loss": 0.3152, "step": 31153 }, { "epoch": 3.167344448962993, "grad_norm": 0.2718210816383362, "learning_rate": 3.5637174834185184e-06, "loss": 0.2733, "step": 31154 }, { "epoch": 3.167446116307442, "grad_norm": 0.2688380479812622, "learning_rate": 3.563377558188874e-06, "loss": 0.3092, "step": 31155 }, { "epoch": 3.167547783651891, "grad_norm": 0.29534590244293213, "learning_rate": 3.563037640196589e-06, "loss": 0.3239, "step": 31156 }, { "epoch": 3.1676494509963398, "grad_norm": 0.2516416013240814, "learning_rate": 3.562697729443382e-06, "loss": 0.2904, "step": 31157 }, { "epoch": 3.1677511183407887, "grad_norm": 0.25596755743026733, "learning_rate": 3.5623578259309597e-06, "loss": 0.292, "step": 31158 }, { "epoch": 3.167852785685238, "grad_norm": 0.2700272500514984, "learning_rate": 3.5620179296610365e-06, "loss": 0.3338, "step": 31159 }, { "epoch": 3.167954453029687, "grad_norm": 0.27251505851745605, "learning_rate": 3.561678040635328e-06, "loss": 0.309, "step": 31160 }, { "epoch": 3.168056120374136, "grad_norm": 0.2717287540435791, "learning_rate": 3.561338158855541e-06, "loss": 0.3204, "step": 31161 }, { "epoch": 3.168157787718585, "grad_norm": 0.29001495242118835, "learning_rate": 3.560998284323393e-06, "loss": 0.3302, "step": 31162 }, { "epoch": 3.168259455063034, "grad_norm": 0.26132187247276306, "learning_rate": 3.5606584170405922e-06, "loss": 0.3097, "step": 31163 }, { "epoch": 3.168361122407483, "grad_norm": 0.27995187044143677, "learning_rate": 3.560318557008851e-06, "loss": 0.2804, "step": 31164 }, { "epoch": 3.1684627897519317, "grad_norm": 0.26478812098503113, "learning_rate": 3.559978704229886e-06, "loss": 0.3208, "step": 31165 }, { "epoch": 3.1685644570963807, "grad_norm": 0.2800195813179016, "learning_rate": 3.559638858705403e-06, "loss": 0.3124, "step": 31166 }, { "epoch": 3.1686661244408296, "grad_norm": 0.2697441577911377, "learning_rate": 3.5592990204371205e-06, "loss": 0.31, "step": 31167 }, { "epoch": 3.1687677917852786, "grad_norm": 0.2850247025489807, "learning_rate": 3.558959189426744e-06, "loss": 0.313, "step": 31168 }, { "epoch": 3.1688694591297275, "grad_norm": 0.2892298102378845, "learning_rate": 3.558619365675988e-06, "loss": 0.3019, "step": 31169 }, { "epoch": 3.1689711264741764, "grad_norm": 0.2572679817676544, "learning_rate": 3.5582795491865697e-06, "loss": 0.3116, "step": 31170 }, { "epoch": 3.1690727938186254, "grad_norm": 0.25823351740837097, "learning_rate": 3.557939739960193e-06, "loss": 0.2972, "step": 31171 }, { "epoch": 3.1691744611630743, "grad_norm": 0.2827582359313965, "learning_rate": 3.5575999379985745e-06, "loss": 0.3469, "step": 31172 }, { "epoch": 3.1692761285075233, "grad_norm": 0.28077903389930725, "learning_rate": 3.557260143303424e-06, "loss": 0.3113, "step": 31173 }, { "epoch": 3.169377795851972, "grad_norm": 0.2870888113975525, "learning_rate": 3.556920355876452e-06, "loss": 0.3282, "step": 31174 }, { "epoch": 3.169479463196421, "grad_norm": 0.27209267020225525, "learning_rate": 3.556580575719376e-06, "loss": 0.3153, "step": 31175 }, { "epoch": 3.16958113054087, "grad_norm": 0.28021395206451416, "learning_rate": 3.5562408028339026e-06, "loss": 0.31, "step": 31176 }, { "epoch": 3.169682797885319, "grad_norm": 0.2910713851451874, "learning_rate": 3.555901037221745e-06, "loss": 0.3117, "step": 31177 }, { "epoch": 3.1697844652297684, "grad_norm": 0.2749067544937134, "learning_rate": 3.555561278884615e-06, "loss": 0.2996, "step": 31178 }, { "epoch": 3.1698861325742174, "grad_norm": 0.2555111050605774, "learning_rate": 3.555221527824222e-06, "loss": 0.3584, "step": 31179 }, { "epoch": 3.1699877999186663, "grad_norm": 0.26351743936538696, "learning_rate": 3.5548817840422835e-06, "loss": 0.3234, "step": 31180 }, { "epoch": 3.1700894672631152, "grad_norm": 0.27195557951927185, "learning_rate": 3.554542047540504e-06, "loss": 0.3221, "step": 31181 }, { "epoch": 3.170191134607564, "grad_norm": 0.27229759097099304, "learning_rate": 3.5542023183205998e-06, "loss": 0.3059, "step": 31182 }, { "epoch": 3.170292801952013, "grad_norm": 0.28310248255729675, "learning_rate": 3.55386259638428e-06, "loss": 0.3235, "step": 31183 }, { "epoch": 3.170394469296462, "grad_norm": 0.28495416045188904, "learning_rate": 3.553522881733255e-06, "loss": 0.2966, "step": 31184 }, { "epoch": 3.170496136640911, "grad_norm": 0.26222851872444153, "learning_rate": 3.5531831743692414e-06, "loss": 0.2981, "step": 31185 }, { "epoch": 3.17059780398536, "grad_norm": 0.30486878752708435, "learning_rate": 3.5528434742939455e-06, "loss": 0.3043, "step": 31186 }, { "epoch": 3.170699471329809, "grad_norm": 0.27664992213249207, "learning_rate": 3.552503781509081e-06, "loss": 0.3451, "step": 31187 }, { "epoch": 3.170801138674258, "grad_norm": 0.28297871351242065, "learning_rate": 3.5521640960163573e-06, "loss": 0.3186, "step": 31188 }, { "epoch": 3.1709028060187068, "grad_norm": 0.27197137475013733, "learning_rate": 3.551824417817487e-06, "loss": 0.3247, "step": 31189 }, { "epoch": 3.1710044733631557, "grad_norm": 0.26662373542785645, "learning_rate": 3.551484746914182e-06, "loss": 0.2922, "step": 31190 }, { "epoch": 3.1711061407076047, "grad_norm": 0.3011142313480377, "learning_rate": 3.5511450833081518e-06, "loss": 0.2791, "step": 31191 }, { "epoch": 3.1712078080520536, "grad_norm": 0.2727128267288208, "learning_rate": 3.5508054270011095e-06, "loss": 0.3058, "step": 31192 }, { "epoch": 3.1713094753965025, "grad_norm": 0.25574344396591187, "learning_rate": 3.550465777994764e-06, "loss": 0.3099, "step": 31193 }, { "epoch": 3.1714111427409515, "grad_norm": 0.2822391986846924, "learning_rate": 3.5501261362908275e-06, "loss": 0.3308, "step": 31194 }, { "epoch": 3.1715128100854004, "grad_norm": 0.26745492219924927, "learning_rate": 3.5497865018910117e-06, "loss": 0.3206, "step": 31195 }, { "epoch": 3.1716144774298494, "grad_norm": 0.29114818572998047, "learning_rate": 3.5494468747970257e-06, "loss": 0.3025, "step": 31196 }, { "epoch": 3.1717161447742983, "grad_norm": 0.28985345363616943, "learning_rate": 3.549107255010583e-06, "loss": 0.3513, "step": 31197 }, { "epoch": 3.1718178121187472, "grad_norm": 0.2971005439758301, "learning_rate": 3.5487676425333927e-06, "loss": 0.2956, "step": 31198 }, { "epoch": 3.171919479463196, "grad_norm": 0.26906222105026245, "learning_rate": 3.5484280373671652e-06, "loss": 0.2693, "step": 31199 }, { "epoch": 3.1720211468076456, "grad_norm": 0.2814045250415802, "learning_rate": 3.5480884395136137e-06, "loss": 0.3089, "step": 31200 }, { "epoch": 3.1721228141520945, "grad_norm": 0.26885002851486206, "learning_rate": 3.5477488489744467e-06, "loss": 0.3001, "step": 31201 }, { "epoch": 3.1722244814965435, "grad_norm": 0.2604648768901825, "learning_rate": 3.547409265751377e-06, "loss": 0.2872, "step": 31202 }, { "epoch": 3.1723261488409924, "grad_norm": 0.25984323024749756, "learning_rate": 3.5470696898461126e-06, "loss": 0.2876, "step": 31203 }, { "epoch": 3.1724278161854413, "grad_norm": 0.2742898464202881, "learning_rate": 3.5467301212603667e-06, "loss": 0.3094, "step": 31204 }, { "epoch": 3.1725294835298903, "grad_norm": 0.27118054032325745, "learning_rate": 3.5463905599958492e-06, "loss": 0.3069, "step": 31205 }, { "epoch": 3.1726311508743392, "grad_norm": 0.2701141834259033, "learning_rate": 3.5460510060542704e-06, "loss": 0.3232, "step": 31206 }, { "epoch": 3.172732818218788, "grad_norm": 0.2676093280315399, "learning_rate": 3.5457114594373417e-06, "loss": 0.3178, "step": 31207 }, { "epoch": 3.172834485563237, "grad_norm": 0.2889162003993988, "learning_rate": 3.545371920146772e-06, "loss": 0.3033, "step": 31208 }, { "epoch": 3.172936152907686, "grad_norm": 0.27552086114883423, "learning_rate": 3.545032388184273e-06, "loss": 0.2914, "step": 31209 }, { "epoch": 3.173037820252135, "grad_norm": 0.2715597450733185, "learning_rate": 3.5446928635515567e-06, "loss": 0.3269, "step": 31210 }, { "epoch": 3.173139487596584, "grad_norm": 0.2737281918525696, "learning_rate": 3.54435334625033e-06, "loss": 0.2982, "step": 31211 }, { "epoch": 3.173241154941033, "grad_norm": 0.24344302713871002, "learning_rate": 3.5440138362823064e-06, "loss": 0.3102, "step": 31212 }, { "epoch": 3.173342822285482, "grad_norm": 0.24566984176635742, "learning_rate": 3.543674333649194e-06, "loss": 0.3022, "step": 31213 }, { "epoch": 3.1734444896299308, "grad_norm": 0.25138387084007263, "learning_rate": 3.543334838352705e-06, "loss": 0.2751, "step": 31214 }, { "epoch": 3.1735461569743797, "grad_norm": 0.25925248861312866, "learning_rate": 3.5429953503945493e-06, "loss": 0.2999, "step": 31215 }, { "epoch": 3.1736478243188286, "grad_norm": 0.26399946212768555, "learning_rate": 3.5426558697764356e-06, "loss": 0.289, "step": 31216 }, { "epoch": 3.1737494916632776, "grad_norm": 0.26593342423439026, "learning_rate": 3.5423163965000774e-06, "loss": 0.3062, "step": 31217 }, { "epoch": 3.1738511590077265, "grad_norm": 0.27930748462677, "learning_rate": 3.541976930567181e-06, "loss": 0.338, "step": 31218 }, { "epoch": 3.173952826352176, "grad_norm": 0.302849143743515, "learning_rate": 3.541637471979458e-06, "loss": 0.3309, "step": 31219 }, { "epoch": 3.174054493696625, "grad_norm": 0.27918797731399536, "learning_rate": 3.5412980207386207e-06, "loss": 0.3115, "step": 31220 }, { "epoch": 3.174156161041074, "grad_norm": 0.28930404782295227, "learning_rate": 3.5409585768463754e-06, "loss": 0.3454, "step": 31221 }, { "epoch": 3.1742578283855227, "grad_norm": 0.2416340559720993, "learning_rate": 3.5406191403044355e-06, "loss": 0.2784, "step": 31222 }, { "epoch": 3.1743594957299717, "grad_norm": 0.26822370290756226, "learning_rate": 3.5402797111145087e-06, "loss": 0.2847, "step": 31223 }, { "epoch": 3.1744611630744206, "grad_norm": 0.2754683196544647, "learning_rate": 3.539940289278306e-06, "loss": 0.3233, "step": 31224 }, { "epoch": 3.1745628304188696, "grad_norm": 0.2897733151912689, "learning_rate": 3.5396008747975374e-06, "loss": 0.2804, "step": 31225 }, { "epoch": 3.1746644977633185, "grad_norm": 0.26671668887138367, "learning_rate": 3.5392614676739124e-06, "loss": 0.305, "step": 31226 }, { "epoch": 3.1747661651077674, "grad_norm": 0.2791934311389923, "learning_rate": 3.538922067909142e-06, "loss": 0.2957, "step": 31227 }, { "epoch": 3.1748678324522164, "grad_norm": 0.2614329755306244, "learning_rate": 3.5385826755049335e-06, "loss": 0.2926, "step": 31228 }, { "epoch": 3.1749694997966653, "grad_norm": 0.2885204255580902, "learning_rate": 3.5382432904629983e-06, "loss": 0.3358, "step": 31229 }, { "epoch": 3.1750711671411143, "grad_norm": 0.2818460762500763, "learning_rate": 3.537903912785047e-06, "loss": 0.2939, "step": 31230 }, { "epoch": 3.175172834485563, "grad_norm": 0.288027822971344, "learning_rate": 3.5375645424727873e-06, "loss": 0.2956, "step": 31231 }, { "epoch": 3.175274501830012, "grad_norm": 0.2852194905281067, "learning_rate": 3.5372251795279312e-06, "loss": 0.3132, "step": 31232 }, { "epoch": 3.175376169174461, "grad_norm": 0.254617303609848, "learning_rate": 3.5368858239521856e-06, "loss": 0.2911, "step": 31233 }, { "epoch": 3.17547783651891, "grad_norm": 0.29533758759498596, "learning_rate": 3.5365464757472623e-06, "loss": 0.3022, "step": 31234 }, { "epoch": 3.175579503863359, "grad_norm": 0.28617650270462036, "learning_rate": 3.53620713491487e-06, "loss": 0.298, "step": 31235 }, { "epoch": 3.175681171207808, "grad_norm": 0.31123557686805725, "learning_rate": 3.5358678014567183e-06, "loss": 0.3312, "step": 31236 }, { "epoch": 3.175782838552257, "grad_norm": 0.30628344416618347, "learning_rate": 3.535528475374517e-06, "loss": 0.3254, "step": 31237 }, { "epoch": 3.175884505896706, "grad_norm": 0.28254878520965576, "learning_rate": 3.5351891566699748e-06, "loss": 0.3336, "step": 31238 }, { "epoch": 3.1759861732411547, "grad_norm": 0.30777427554130554, "learning_rate": 3.534849845344801e-06, "loss": 0.3211, "step": 31239 }, { "epoch": 3.1760878405856037, "grad_norm": 0.283498615026474, "learning_rate": 3.534510541400707e-06, "loss": 0.3021, "step": 31240 }, { "epoch": 3.176189507930053, "grad_norm": 0.2743733525276184, "learning_rate": 3.5341712448393993e-06, "loss": 0.3245, "step": 31241 }, { "epoch": 3.176291175274502, "grad_norm": 0.29982396960258484, "learning_rate": 3.5338319556625893e-06, "loss": 0.3056, "step": 31242 }, { "epoch": 3.176392842618951, "grad_norm": 0.28111276030540466, "learning_rate": 3.533492673871985e-06, "loss": 0.2966, "step": 31243 }, { "epoch": 3.1764945099634, "grad_norm": 0.2670280337333679, "learning_rate": 3.5331533994692956e-06, "loss": 0.3015, "step": 31244 }, { "epoch": 3.176596177307849, "grad_norm": 0.2766873240470886, "learning_rate": 3.5328141324562326e-06, "loss": 0.2915, "step": 31245 }, { "epoch": 3.1766978446522978, "grad_norm": 0.24526138603687286, "learning_rate": 3.5324748728345015e-06, "loss": 0.3002, "step": 31246 }, { "epoch": 3.1767995119967467, "grad_norm": 0.2731839418411255, "learning_rate": 3.532135620605815e-06, "loss": 0.3658, "step": 31247 }, { "epoch": 3.1769011793411956, "grad_norm": 0.29225221276283264, "learning_rate": 3.5317963757718786e-06, "loss": 0.335, "step": 31248 }, { "epoch": 3.1770028466856446, "grad_norm": 0.2867441177368164, "learning_rate": 3.531457138334404e-06, "loss": 0.318, "step": 31249 }, { "epoch": 3.1771045140300935, "grad_norm": 0.2625630795955658, "learning_rate": 3.5311179082950996e-06, "loss": 0.3027, "step": 31250 }, { "epoch": 3.1772061813745425, "grad_norm": 0.26665759086608887, "learning_rate": 3.5307786856556736e-06, "loss": 0.3103, "step": 31251 }, { "epoch": 3.1773078487189914, "grad_norm": 0.2724309265613556, "learning_rate": 3.530439470417837e-06, "loss": 0.3044, "step": 31252 }, { "epoch": 3.1774095160634404, "grad_norm": 0.29361075162887573, "learning_rate": 3.5301002625832946e-06, "loss": 0.2852, "step": 31253 }, { "epoch": 3.1775111834078893, "grad_norm": 0.2851814031600952, "learning_rate": 3.5297610621537594e-06, "loss": 0.298, "step": 31254 }, { "epoch": 3.1776128507523382, "grad_norm": 0.2853033244609833, "learning_rate": 3.5294218691309382e-06, "loss": 0.3072, "step": 31255 }, { "epoch": 3.177714518096787, "grad_norm": 0.2737790048122406, "learning_rate": 3.5290826835165404e-06, "loss": 0.299, "step": 31256 }, { "epoch": 3.177816185441236, "grad_norm": 0.26816117763519287, "learning_rate": 3.5287435053122743e-06, "loss": 0.3327, "step": 31257 }, { "epoch": 3.177917852785685, "grad_norm": 0.2751922309398651, "learning_rate": 3.528404334519848e-06, "loss": 0.3293, "step": 31258 }, { "epoch": 3.178019520130134, "grad_norm": 0.2733873426914215, "learning_rate": 3.528065171140972e-06, "loss": 0.3311, "step": 31259 }, { "epoch": 3.1781211874745834, "grad_norm": 0.28084051609039307, "learning_rate": 3.527726015177354e-06, "loss": 0.3152, "step": 31260 }, { "epoch": 3.1782228548190323, "grad_norm": 0.25941023230552673, "learning_rate": 3.527386866630702e-06, "loss": 0.3112, "step": 31261 }, { "epoch": 3.1783245221634813, "grad_norm": 0.2692010998725891, "learning_rate": 3.527047725502726e-06, "loss": 0.2886, "step": 31262 }, { "epoch": 3.17842618950793, "grad_norm": 0.27052611112594604, "learning_rate": 3.5267085917951314e-06, "loss": 0.3153, "step": 31263 }, { "epoch": 3.178527856852379, "grad_norm": 0.26697349548339844, "learning_rate": 3.52636946550963e-06, "loss": 0.3073, "step": 31264 }, { "epoch": 3.178629524196828, "grad_norm": 0.27818965911865234, "learning_rate": 3.5260303466479293e-06, "loss": 0.3219, "step": 31265 }, { "epoch": 3.178731191541277, "grad_norm": 0.2757372558116913, "learning_rate": 3.5256912352117367e-06, "loss": 0.2818, "step": 31266 }, { "epoch": 3.178832858885726, "grad_norm": 0.272229939699173, "learning_rate": 3.5253521312027626e-06, "loss": 0.2849, "step": 31267 }, { "epoch": 3.178934526230175, "grad_norm": 0.2677685022354126, "learning_rate": 3.525013034622713e-06, "loss": 0.2859, "step": 31268 }, { "epoch": 3.179036193574624, "grad_norm": 0.28894075751304626, "learning_rate": 3.5246739454732993e-06, "loss": 0.287, "step": 31269 }, { "epoch": 3.179137860919073, "grad_norm": 0.3024677336215973, "learning_rate": 3.524334863756225e-06, "loss": 0.3212, "step": 31270 }, { "epoch": 3.1792395282635217, "grad_norm": 0.27433446049690247, "learning_rate": 3.523995789473201e-06, "loss": 0.3191, "step": 31271 }, { "epoch": 3.1793411956079707, "grad_norm": 0.2740534543991089, "learning_rate": 3.5236567226259368e-06, "loss": 0.3102, "step": 31272 }, { "epoch": 3.1794428629524196, "grad_norm": 0.2830857038497925, "learning_rate": 3.5233176632161383e-06, "loss": 0.3174, "step": 31273 }, { "epoch": 3.1795445302968686, "grad_norm": 0.2870769202709198, "learning_rate": 3.5229786112455167e-06, "loss": 0.3226, "step": 31274 }, { "epoch": 3.1796461976413175, "grad_norm": 0.2720222771167755, "learning_rate": 3.5226395667157743e-06, "loss": 0.314, "step": 31275 }, { "epoch": 3.1797478649857664, "grad_norm": 0.2653539478778839, "learning_rate": 3.522300529628624e-06, "loss": 0.3032, "step": 31276 }, { "epoch": 3.1798495323302154, "grad_norm": 0.2714168131351471, "learning_rate": 3.5219614999857737e-06, "loss": 0.3225, "step": 31277 }, { "epoch": 3.1799511996746643, "grad_norm": 0.2829470634460449, "learning_rate": 3.5216224777889286e-06, "loss": 0.2986, "step": 31278 }, { "epoch": 3.1800528670191133, "grad_norm": 0.2608869671821594, "learning_rate": 3.5212834630398007e-06, "loss": 0.302, "step": 31279 }, { "epoch": 3.180154534363562, "grad_norm": 0.27682942152023315, "learning_rate": 3.5209444557400917e-06, "loss": 0.305, "step": 31280 }, { "epoch": 3.180256201708011, "grad_norm": 0.2775419056415558, "learning_rate": 3.5206054558915147e-06, "loss": 0.2898, "step": 31281 }, { "epoch": 3.1803578690524605, "grad_norm": 0.26645031571388245, "learning_rate": 3.5202664634957777e-06, "loss": 0.3067, "step": 31282 }, { "epoch": 3.1804595363969095, "grad_norm": 0.27232563495635986, "learning_rate": 3.5199274785545833e-06, "loss": 0.294, "step": 31283 }, { "epoch": 3.1805612037413584, "grad_norm": 0.25892502069473267, "learning_rate": 3.5195885010696463e-06, "loss": 0.328, "step": 31284 }, { "epoch": 3.1806628710858074, "grad_norm": 0.27810239791870117, "learning_rate": 3.5192495310426666e-06, "loss": 0.3091, "step": 31285 }, { "epoch": 3.1807645384302563, "grad_norm": 0.28003329038619995, "learning_rate": 3.5189105684753575e-06, "loss": 0.3193, "step": 31286 }, { "epoch": 3.1808662057747052, "grad_norm": 0.28015780448913574, "learning_rate": 3.518571613369427e-06, "loss": 0.3211, "step": 31287 }, { "epoch": 3.180967873119154, "grad_norm": 0.27867552638053894, "learning_rate": 3.5182326657265775e-06, "loss": 0.3071, "step": 31288 }, { "epoch": 3.181069540463603, "grad_norm": 0.2619229853153229, "learning_rate": 3.5178937255485235e-06, "loss": 0.3066, "step": 31289 }, { "epoch": 3.181171207808052, "grad_norm": 0.25558415055274963, "learning_rate": 3.5175547928369646e-06, "loss": 0.3113, "step": 31290 }, { "epoch": 3.181272875152501, "grad_norm": 0.27381494641304016, "learning_rate": 3.5172158675936134e-06, "loss": 0.305, "step": 31291 }, { "epoch": 3.18137454249695, "grad_norm": 0.253799170255661, "learning_rate": 3.516876949820179e-06, "loss": 0.2996, "step": 31292 }, { "epoch": 3.181476209841399, "grad_norm": 0.29161885380744934, "learning_rate": 3.5165380395183623e-06, "loss": 0.3116, "step": 31293 }, { "epoch": 3.181577877185848, "grad_norm": 0.26084423065185547, "learning_rate": 3.5161991366898784e-06, "loss": 0.3125, "step": 31294 }, { "epoch": 3.1816795445302968, "grad_norm": 0.29050302505493164, "learning_rate": 3.515860241336427e-06, "loss": 0.3015, "step": 31295 }, { "epoch": 3.1817812118747457, "grad_norm": 0.2703271210193634, "learning_rate": 3.51552135345972e-06, "loss": 0.3231, "step": 31296 }, { "epoch": 3.1818828792191947, "grad_norm": 0.2541502118110657, "learning_rate": 3.515182473061466e-06, "loss": 0.2938, "step": 31297 }, { "epoch": 3.1819845465636436, "grad_norm": 0.25882774591445923, "learning_rate": 3.5148436001433662e-06, "loss": 0.3644, "step": 31298 }, { "epoch": 3.1820862139080925, "grad_norm": 0.28176072239875793, "learning_rate": 3.514504734707135e-06, "loss": 0.2887, "step": 31299 }, { "epoch": 3.1821878812525415, "grad_norm": 0.26231715083122253, "learning_rate": 3.514165876754474e-06, "loss": 0.2826, "step": 31300 }, { "epoch": 3.182289548596991, "grad_norm": 0.2696107029914856, "learning_rate": 3.513827026287091e-06, "loss": 0.3165, "step": 31301 }, { "epoch": 3.18239121594144, "grad_norm": 0.28234538435935974, "learning_rate": 3.513488183306697e-06, "loss": 0.3153, "step": 31302 }, { "epoch": 3.1824928832858888, "grad_norm": 0.29815375804901123, "learning_rate": 3.5131493478149938e-06, "loss": 0.2973, "step": 31303 }, { "epoch": 3.1825945506303377, "grad_norm": 0.28087812662124634, "learning_rate": 3.5128105198136935e-06, "loss": 0.3195, "step": 31304 }, { "epoch": 3.1826962179747866, "grad_norm": 0.27987170219421387, "learning_rate": 3.5124716993044984e-06, "loss": 0.3239, "step": 31305 }, { "epoch": 3.1827978853192356, "grad_norm": 0.26988205313682556, "learning_rate": 3.5121328862891168e-06, "loss": 0.3025, "step": 31306 }, { "epoch": 3.1828995526636845, "grad_norm": 0.27275240421295166, "learning_rate": 3.5117940807692585e-06, "loss": 0.3224, "step": 31307 }, { "epoch": 3.1830012200081335, "grad_norm": 0.29226312041282654, "learning_rate": 3.511455282746625e-06, "loss": 0.2992, "step": 31308 }, { "epoch": 3.1831028873525824, "grad_norm": 0.29138731956481934, "learning_rate": 3.51111649222293e-06, "loss": 0.3259, "step": 31309 }, { "epoch": 3.1832045546970313, "grad_norm": 0.2533535957336426, "learning_rate": 3.5107777091998734e-06, "loss": 0.3076, "step": 31310 }, { "epoch": 3.1833062220414803, "grad_norm": 0.2817883789539337, "learning_rate": 3.5104389336791645e-06, "loss": 0.3265, "step": 31311 }, { "epoch": 3.1834078893859292, "grad_norm": 0.2602236270904541, "learning_rate": 3.510100165662512e-06, "loss": 0.3449, "step": 31312 }, { "epoch": 3.183509556730378, "grad_norm": 0.2846142649650574, "learning_rate": 3.5097614051516192e-06, "loss": 0.3061, "step": 31313 }, { "epoch": 3.183611224074827, "grad_norm": 0.2688777446746826, "learning_rate": 3.5094226521481967e-06, "loss": 0.306, "step": 31314 }, { "epoch": 3.183712891419276, "grad_norm": 0.2701804041862488, "learning_rate": 3.5090839066539466e-06, "loss": 0.3495, "step": 31315 }, { "epoch": 3.183814558763725, "grad_norm": 0.2742321789264679, "learning_rate": 3.5087451686705764e-06, "loss": 0.3258, "step": 31316 }, { "epoch": 3.183916226108174, "grad_norm": 0.2946568429470062, "learning_rate": 3.5084064381997973e-06, "loss": 0.3198, "step": 31317 }, { "epoch": 3.184017893452623, "grad_norm": 0.2742767035961151, "learning_rate": 3.5080677152433083e-06, "loss": 0.3187, "step": 31318 }, { "epoch": 3.184119560797072, "grad_norm": 0.2676624655723572, "learning_rate": 3.507728999802823e-06, "loss": 0.3241, "step": 31319 }, { "epoch": 3.1842212281415208, "grad_norm": 0.2691653072834015, "learning_rate": 3.5073902918800428e-06, "loss": 0.317, "step": 31320 }, { "epoch": 3.1843228954859697, "grad_norm": 0.2665526568889618, "learning_rate": 3.507051591476674e-06, "loss": 0.3081, "step": 31321 }, { "epoch": 3.1844245628304186, "grad_norm": 0.3070223927497864, "learning_rate": 3.5067128985944276e-06, "loss": 0.3171, "step": 31322 }, { "epoch": 3.184526230174868, "grad_norm": 0.28508511185646057, "learning_rate": 3.5063742132350054e-06, "loss": 0.3323, "step": 31323 }, { "epoch": 3.184627897519317, "grad_norm": 0.2791406214237213, "learning_rate": 3.5060355354001153e-06, "loss": 0.302, "step": 31324 }, { "epoch": 3.184729564863766, "grad_norm": 0.2823330760002136, "learning_rate": 3.5056968650914623e-06, "loss": 0.3231, "step": 31325 }, { "epoch": 3.184831232208215, "grad_norm": 0.2783377766609192, "learning_rate": 3.5053582023107524e-06, "loss": 0.3373, "step": 31326 }, { "epoch": 3.184932899552664, "grad_norm": 0.26818862557411194, "learning_rate": 3.5050195470596953e-06, "loss": 0.3003, "step": 31327 }, { "epoch": 3.1850345668971127, "grad_norm": 0.2564186155796051, "learning_rate": 3.5046808993399926e-06, "loss": 0.2871, "step": 31328 }, { "epoch": 3.1851362342415617, "grad_norm": 0.28597113490104675, "learning_rate": 3.5043422591533538e-06, "loss": 0.2981, "step": 31329 }, { "epoch": 3.1852379015860106, "grad_norm": 0.2662563621997833, "learning_rate": 3.504003626501482e-06, "loss": 0.2963, "step": 31330 }, { "epoch": 3.1853395689304596, "grad_norm": 0.27798759937286377, "learning_rate": 3.5036650013860827e-06, "loss": 0.3239, "step": 31331 }, { "epoch": 3.1854412362749085, "grad_norm": 0.2704167664051056, "learning_rate": 3.503326383808868e-06, "loss": 0.2716, "step": 31332 }, { "epoch": 3.1855429036193574, "grad_norm": 0.26014652848243713, "learning_rate": 3.502987773771536e-06, "loss": 0.3047, "step": 31333 }, { "epoch": 3.1856445709638064, "grad_norm": 0.2827107906341553, "learning_rate": 3.502649171275797e-06, "loss": 0.2858, "step": 31334 }, { "epoch": 3.1857462383082553, "grad_norm": 0.2839203476905823, "learning_rate": 3.5023105763233554e-06, "loss": 0.3009, "step": 31335 }, { "epoch": 3.1858479056527043, "grad_norm": 0.2633274793624878, "learning_rate": 3.5019719889159155e-06, "loss": 0.3451, "step": 31336 }, { "epoch": 3.185949572997153, "grad_norm": 0.24949052929878235, "learning_rate": 3.5016334090551885e-06, "loss": 0.3129, "step": 31337 }, { "epoch": 3.186051240341602, "grad_norm": 0.25817927718162537, "learning_rate": 3.5012948367428735e-06, "loss": 0.304, "step": 31338 }, { "epoch": 3.186152907686051, "grad_norm": 0.306549072265625, "learning_rate": 3.50095627198068e-06, "loss": 0.32, "step": 31339 }, { "epoch": 3.1862545750305, "grad_norm": 0.28308171033859253, "learning_rate": 3.5006177147703126e-06, "loss": 0.3106, "step": 31340 }, { "epoch": 3.186356242374949, "grad_norm": 0.28175175189971924, "learning_rate": 3.500279165113476e-06, "loss": 0.2804, "step": 31341 }, { "epoch": 3.1864579097193984, "grad_norm": 0.2782365381717682, "learning_rate": 3.499940623011877e-06, "loss": 0.3163, "step": 31342 }, { "epoch": 3.1865595770638473, "grad_norm": 0.28047317266464233, "learning_rate": 3.499602088467221e-06, "loss": 0.2812, "step": 31343 }, { "epoch": 3.1866612444082962, "grad_norm": 0.2562292218208313, "learning_rate": 3.4992635614812133e-06, "loss": 0.3265, "step": 31344 }, { "epoch": 3.186762911752745, "grad_norm": 0.2802634537220001, "learning_rate": 3.4989250420555582e-06, "loss": 0.3177, "step": 31345 }, { "epoch": 3.186864579097194, "grad_norm": 0.2902587950229645, "learning_rate": 3.498586530191962e-06, "loss": 0.3108, "step": 31346 }, { "epoch": 3.186966246441643, "grad_norm": 0.2540562152862549, "learning_rate": 3.4982480258921314e-06, "loss": 0.304, "step": 31347 }, { "epoch": 3.187067913786092, "grad_norm": 0.2674868404865265, "learning_rate": 3.4979095291577694e-06, "loss": 0.3055, "step": 31348 }, { "epoch": 3.187169581130541, "grad_norm": 0.2803264260292053, "learning_rate": 3.497571039990583e-06, "loss": 0.2966, "step": 31349 }, { "epoch": 3.18727124847499, "grad_norm": 0.3011840879917145, "learning_rate": 3.4972325583922757e-06, "loss": 0.2932, "step": 31350 }, { "epoch": 3.187372915819439, "grad_norm": 0.2735774517059326, "learning_rate": 3.4968940843645537e-06, "loss": 0.3034, "step": 31351 }, { "epoch": 3.1874745831638878, "grad_norm": 0.28476831316947937, "learning_rate": 3.496555617909123e-06, "loss": 0.2814, "step": 31352 }, { "epoch": 3.1875762505083367, "grad_norm": 0.2689000964164734, "learning_rate": 3.496217159027687e-06, "loss": 0.3115, "step": 31353 }, { "epoch": 3.1876779178527856, "grad_norm": 0.2962047755718231, "learning_rate": 3.4958787077219527e-06, "loss": 0.2786, "step": 31354 }, { "epoch": 3.1877795851972346, "grad_norm": 0.2793823778629303, "learning_rate": 3.4955402639936225e-06, "loss": 0.3227, "step": 31355 }, { "epoch": 3.1878812525416835, "grad_norm": 0.28160005807876587, "learning_rate": 3.4952018278444033e-06, "loss": 0.3363, "step": 31356 }, { "epoch": 3.1879829198861325, "grad_norm": 0.2937365472316742, "learning_rate": 3.4948633992760007e-06, "loss": 0.3021, "step": 31357 }, { "epoch": 3.1880845872305814, "grad_norm": 0.2649204730987549, "learning_rate": 3.494524978290118e-06, "loss": 0.2799, "step": 31358 }, { "epoch": 3.1881862545750304, "grad_norm": 0.2747412621974945, "learning_rate": 3.494186564888461e-06, "loss": 0.3129, "step": 31359 }, { "epoch": 3.1882879219194793, "grad_norm": 0.283006489276886, "learning_rate": 3.4938481590727337e-06, "loss": 0.28, "step": 31360 }, { "epoch": 3.1883895892639282, "grad_norm": 0.27052703499794006, "learning_rate": 3.4935097608446413e-06, "loss": 0.3374, "step": 31361 }, { "epoch": 3.188491256608377, "grad_norm": 0.29356858134269714, "learning_rate": 3.4931713702058895e-06, "loss": 0.2911, "step": 31362 }, { "epoch": 3.188592923952826, "grad_norm": 0.2653335630893707, "learning_rate": 3.492832987158181e-06, "loss": 0.3484, "step": 31363 }, { "epoch": 3.1886945912972755, "grad_norm": 0.24534256756305695, "learning_rate": 3.4924946117032238e-06, "loss": 0.2911, "step": 31364 }, { "epoch": 3.1887962586417244, "grad_norm": 0.28575631976127625, "learning_rate": 3.4921562438427194e-06, "loss": 0.298, "step": 31365 }, { "epoch": 3.1888979259861734, "grad_norm": 0.3367456793785095, "learning_rate": 3.4918178835783722e-06, "loss": 0.3193, "step": 31366 }, { "epoch": 3.1889995933306223, "grad_norm": 0.28707441687583923, "learning_rate": 3.4914795309118903e-06, "loss": 0.3282, "step": 31367 }, { "epoch": 3.1891012606750713, "grad_norm": 0.2694408595561981, "learning_rate": 3.491141185844975e-06, "loss": 0.2781, "step": 31368 }, { "epoch": 3.18920292801952, "grad_norm": 0.2521294057369232, "learning_rate": 3.490802848379332e-06, "loss": 0.3214, "step": 31369 }, { "epoch": 3.189304595363969, "grad_norm": 0.2806217670440674, "learning_rate": 3.4904645185166657e-06, "loss": 0.3154, "step": 31370 }, { "epoch": 3.189406262708418, "grad_norm": 0.29003289341926575, "learning_rate": 3.4901261962586796e-06, "loss": 0.3234, "step": 31371 }, { "epoch": 3.189507930052867, "grad_norm": 0.28753501176834106, "learning_rate": 3.489787881607081e-06, "loss": 0.2855, "step": 31372 }, { "epoch": 3.189609597397316, "grad_norm": 0.2817530930042267, "learning_rate": 3.4894495745635696e-06, "loss": 0.2862, "step": 31373 }, { "epoch": 3.189711264741765, "grad_norm": 0.25331607460975647, "learning_rate": 3.4891112751298545e-06, "loss": 0.3141, "step": 31374 }, { "epoch": 3.189812932086214, "grad_norm": 0.2800915241241455, "learning_rate": 3.4887729833076367e-06, "loss": 0.2965, "step": 31375 }, { "epoch": 3.189914599430663, "grad_norm": 0.2564641237258911, "learning_rate": 3.4884346990986206e-06, "loss": 0.2871, "step": 31376 }, { "epoch": 3.1900162667751117, "grad_norm": 0.2684410810470581, "learning_rate": 3.488096422504513e-06, "loss": 0.3046, "step": 31377 }, { "epoch": 3.1901179341195607, "grad_norm": 0.2812940776348114, "learning_rate": 3.4877581535270154e-06, "loss": 0.3197, "step": 31378 }, { "epoch": 3.1902196014640096, "grad_norm": 0.29427534341812134, "learning_rate": 3.4874198921678337e-06, "loss": 0.2641, "step": 31379 }, { "epoch": 3.1903212688084586, "grad_norm": 0.27462631464004517, "learning_rate": 3.48708163842867e-06, "loss": 0.3316, "step": 31380 }, { "epoch": 3.1904229361529075, "grad_norm": 0.26812678575515747, "learning_rate": 3.4867433923112294e-06, "loss": 0.2899, "step": 31381 }, { "epoch": 3.1905246034973564, "grad_norm": 0.30779966711997986, "learning_rate": 3.486405153817217e-06, "loss": 0.3321, "step": 31382 }, { "epoch": 3.190626270841806, "grad_norm": 0.27403467893600464, "learning_rate": 3.4860669229483347e-06, "loss": 0.2705, "step": 31383 }, { "epoch": 3.1907279381862548, "grad_norm": 0.2528582215309143, "learning_rate": 3.485728699706289e-06, "loss": 0.2956, "step": 31384 }, { "epoch": 3.1908296055307037, "grad_norm": 0.2827441990375519, "learning_rate": 3.485390484092781e-06, "loss": 0.2833, "step": 31385 }, { "epoch": 3.1909312728751527, "grad_norm": 0.2655412554740906, "learning_rate": 3.485052276109515e-06, "loss": 0.2975, "step": 31386 }, { "epoch": 3.1910329402196016, "grad_norm": 0.2721364498138428, "learning_rate": 3.4847140757581976e-06, "loss": 0.3134, "step": 31387 }, { "epoch": 3.1911346075640505, "grad_norm": 0.27966490387916565, "learning_rate": 3.4843758830405296e-06, "loss": 0.3183, "step": 31388 }, { "epoch": 3.1912362749084995, "grad_norm": 0.2781411111354828, "learning_rate": 3.4840376979582165e-06, "loss": 0.2899, "step": 31389 }, { "epoch": 3.1913379422529484, "grad_norm": 0.24939323961734772, "learning_rate": 3.48369952051296e-06, "loss": 0.2997, "step": 31390 }, { "epoch": 3.1914396095973974, "grad_norm": 0.27200111746788025, "learning_rate": 3.4833613507064655e-06, "loss": 0.2926, "step": 31391 }, { "epoch": 3.1915412769418463, "grad_norm": 0.2539801001548767, "learning_rate": 3.4830231885404364e-06, "loss": 0.3189, "step": 31392 }, { "epoch": 3.1916429442862952, "grad_norm": 0.28717970848083496, "learning_rate": 3.482685034016576e-06, "loss": 0.3428, "step": 31393 }, { "epoch": 3.191744611630744, "grad_norm": 0.2743767201900482, "learning_rate": 3.482346887136588e-06, "loss": 0.3684, "step": 31394 }, { "epoch": 3.191846278975193, "grad_norm": 0.2743403911590576, "learning_rate": 3.4820087479021747e-06, "loss": 0.3086, "step": 31395 }, { "epoch": 3.191947946319642, "grad_norm": 0.29062244296073914, "learning_rate": 3.481670616315041e-06, "loss": 0.3362, "step": 31396 }, { "epoch": 3.192049613664091, "grad_norm": 0.2876138687133789, "learning_rate": 3.481332492376891e-06, "loss": 0.2912, "step": 31397 }, { "epoch": 3.19215128100854, "grad_norm": 0.28737470507621765, "learning_rate": 3.4809943760894256e-06, "loss": 0.3208, "step": 31398 }, { "epoch": 3.192252948352989, "grad_norm": 0.2735929489135742, "learning_rate": 3.480656267454351e-06, "loss": 0.3417, "step": 31399 }, { "epoch": 3.192354615697438, "grad_norm": 0.2893969416618347, "learning_rate": 3.4803181664733677e-06, "loss": 0.3244, "step": 31400 }, { "epoch": 3.1924562830418868, "grad_norm": 0.2821151316165924, "learning_rate": 3.47998007314818e-06, "loss": 0.3036, "step": 31401 }, { "epoch": 3.1925579503863357, "grad_norm": 0.2852652668952942, "learning_rate": 3.479641987480493e-06, "loss": 0.3145, "step": 31402 }, { "epoch": 3.1926596177307847, "grad_norm": 0.2735009491443634, "learning_rate": 3.479303909472008e-06, "loss": 0.3106, "step": 31403 }, { "epoch": 3.1927612850752336, "grad_norm": 0.2592547833919525, "learning_rate": 3.4789658391244286e-06, "loss": 0.3283, "step": 31404 }, { "epoch": 3.192862952419683, "grad_norm": 0.2718571126461029, "learning_rate": 3.478627776439457e-06, "loss": 0.316, "step": 31405 }, { "epoch": 3.192964619764132, "grad_norm": 0.2908475399017334, "learning_rate": 3.4782897214187973e-06, "loss": 0.3356, "step": 31406 }, { "epoch": 3.193066287108581, "grad_norm": 0.25616079568862915, "learning_rate": 3.4779516740641532e-06, "loss": 0.3162, "step": 31407 }, { "epoch": 3.19316795445303, "grad_norm": 0.29944857954978943, "learning_rate": 3.477613634377226e-06, "loss": 0.318, "step": 31408 }, { "epoch": 3.1932696217974788, "grad_norm": 0.2725810408592224, "learning_rate": 3.477275602359721e-06, "loss": 0.3194, "step": 31409 }, { "epoch": 3.1933712891419277, "grad_norm": 0.2724857032299042, "learning_rate": 3.4769375780133372e-06, "loss": 0.319, "step": 31410 }, { "epoch": 3.1934729564863766, "grad_norm": 0.28260430693626404, "learning_rate": 3.4765995613397812e-06, "loss": 0.3035, "step": 31411 }, { "epoch": 3.1935746238308256, "grad_norm": 0.2972639203071594, "learning_rate": 3.4762615523407554e-06, "loss": 0.3207, "step": 31412 }, { "epoch": 3.1936762911752745, "grad_norm": 0.2628835439682007, "learning_rate": 3.4759235510179614e-06, "loss": 0.3315, "step": 31413 }, { "epoch": 3.1937779585197235, "grad_norm": 0.26667001843452454, "learning_rate": 3.475585557373102e-06, "loss": 0.3071, "step": 31414 }, { "epoch": 3.1938796258641724, "grad_norm": 0.27501431107521057, "learning_rate": 3.4752475714078803e-06, "loss": 0.3103, "step": 31415 }, { "epoch": 3.1939812932086213, "grad_norm": 0.2815523147583008, "learning_rate": 3.4749095931239984e-06, "loss": 0.2881, "step": 31416 }, { "epoch": 3.1940829605530703, "grad_norm": 0.2586270272731781, "learning_rate": 3.474571622523161e-06, "loss": 0.3367, "step": 31417 }, { "epoch": 3.1941846278975192, "grad_norm": 0.2544677257537842, "learning_rate": 3.474233659607068e-06, "loss": 0.3033, "step": 31418 }, { "epoch": 3.194286295241968, "grad_norm": 0.2802739441394806, "learning_rate": 3.4738957043774247e-06, "loss": 0.2988, "step": 31419 }, { "epoch": 3.194387962586417, "grad_norm": 0.2913723289966583, "learning_rate": 3.4735577568359306e-06, "loss": 0.3032, "step": 31420 }, { "epoch": 3.194489629930866, "grad_norm": 0.2646341919898987, "learning_rate": 3.47321981698429e-06, "loss": 0.2872, "step": 31421 }, { "epoch": 3.194591297275315, "grad_norm": 0.2512945830821991, "learning_rate": 3.4728818848242064e-06, "loss": 0.349, "step": 31422 }, { "epoch": 3.194692964619764, "grad_norm": 0.2508392035961151, "learning_rate": 3.4725439603573797e-06, "loss": 0.3099, "step": 31423 }, { "epoch": 3.1947946319642133, "grad_norm": 0.2670324444770813, "learning_rate": 3.4722060435855145e-06, "loss": 0.3114, "step": 31424 }, { "epoch": 3.1948962993086623, "grad_norm": 0.25811389088630676, "learning_rate": 3.471868134510311e-06, "loss": 0.2791, "step": 31425 }, { "epoch": 3.194997966653111, "grad_norm": 0.26666998863220215, "learning_rate": 3.4715302331334733e-06, "loss": 0.2884, "step": 31426 }, { "epoch": 3.19509963399756, "grad_norm": 0.2570646405220032, "learning_rate": 3.471192339456704e-06, "loss": 0.33, "step": 31427 }, { "epoch": 3.195201301342009, "grad_norm": 0.2947961390018463, "learning_rate": 3.470854453481703e-06, "loss": 0.3158, "step": 31428 }, { "epoch": 3.195302968686458, "grad_norm": 0.2695053517818451, "learning_rate": 3.470516575210175e-06, "loss": 0.2993, "step": 31429 }, { "epoch": 3.195404636030907, "grad_norm": 0.28704455494880676, "learning_rate": 3.4701787046438197e-06, "loss": 0.3036, "step": 31430 }, { "epoch": 3.195506303375356, "grad_norm": 0.3190005421638489, "learning_rate": 3.469840841784341e-06, "loss": 0.305, "step": 31431 }, { "epoch": 3.195607970719805, "grad_norm": 0.2794235944747925, "learning_rate": 3.4695029866334417e-06, "loss": 0.3086, "step": 31432 }, { "epoch": 3.195709638064254, "grad_norm": 0.27208763360977173, "learning_rate": 3.469165139192821e-06, "loss": 0.3085, "step": 31433 }, { "epoch": 3.1958113054087027, "grad_norm": 0.26697415113449097, "learning_rate": 3.4688272994641852e-06, "loss": 0.3021, "step": 31434 }, { "epoch": 3.1959129727531517, "grad_norm": 0.25544944405555725, "learning_rate": 3.46848946744923e-06, "loss": 0.3449, "step": 31435 }, { "epoch": 3.1960146400976006, "grad_norm": 0.2457108050584793, "learning_rate": 3.468151643149662e-06, "loss": 0.2917, "step": 31436 }, { "epoch": 3.1961163074420496, "grad_norm": 0.24930141866207123, "learning_rate": 3.4678138265671835e-06, "loss": 0.2901, "step": 31437 }, { "epoch": 3.1962179747864985, "grad_norm": 0.25233587622642517, "learning_rate": 3.467476017703494e-06, "loss": 0.3156, "step": 31438 }, { "epoch": 3.1963196421309474, "grad_norm": 0.2754266560077667, "learning_rate": 3.4671382165602974e-06, "loss": 0.3093, "step": 31439 }, { "epoch": 3.1964213094753964, "grad_norm": 0.2647087872028351, "learning_rate": 3.4668004231392915e-06, "loss": 0.2901, "step": 31440 }, { "epoch": 3.1965229768198453, "grad_norm": 0.2769404649734497, "learning_rate": 3.4664626374421818e-06, "loss": 0.3043, "step": 31441 }, { "epoch": 3.1966246441642943, "grad_norm": 0.2892044484615326, "learning_rate": 3.4661248594706698e-06, "loss": 0.327, "step": 31442 }, { "epoch": 3.196726311508743, "grad_norm": 0.2912551760673523, "learning_rate": 3.4657870892264555e-06, "loss": 0.2951, "step": 31443 }, { "epoch": 3.196827978853192, "grad_norm": 0.2646491229534149, "learning_rate": 3.4654493267112433e-06, "loss": 0.296, "step": 31444 }, { "epoch": 3.196929646197641, "grad_norm": 0.2642935812473297, "learning_rate": 3.4651115719267292e-06, "loss": 0.3314, "step": 31445 }, { "epoch": 3.1970313135420905, "grad_norm": 0.2731260359287262, "learning_rate": 3.4647738248746193e-06, "loss": 0.3335, "step": 31446 }, { "epoch": 3.1971329808865394, "grad_norm": 0.29101553559303284, "learning_rate": 3.464436085556615e-06, "loss": 0.2896, "step": 31447 }, { "epoch": 3.1972346482309884, "grad_norm": 0.2743876576423645, "learning_rate": 3.464098353974417e-06, "loss": 0.3023, "step": 31448 }, { "epoch": 3.1973363155754373, "grad_norm": 0.28043684363365173, "learning_rate": 3.4637606301297272e-06, "loss": 0.2891, "step": 31449 }, { "epoch": 3.1974379829198862, "grad_norm": 0.2870900630950928, "learning_rate": 3.463422914024244e-06, "loss": 0.315, "step": 31450 }, { "epoch": 3.197539650264335, "grad_norm": 0.3071206510066986, "learning_rate": 3.463085205659671e-06, "loss": 0.3185, "step": 31451 }, { "epoch": 3.197641317608784, "grad_norm": 0.266858845949173, "learning_rate": 3.462747505037712e-06, "loss": 0.3037, "step": 31452 }, { "epoch": 3.197742984953233, "grad_norm": 0.2704637944698334, "learning_rate": 3.4624098121600635e-06, "loss": 0.3178, "step": 31453 }, { "epoch": 3.197844652297682, "grad_norm": 0.2748219072818756, "learning_rate": 3.462072127028431e-06, "loss": 0.2967, "step": 31454 }, { "epoch": 3.197946319642131, "grad_norm": 0.2742997109889984, "learning_rate": 3.4617344496445113e-06, "loss": 0.2586, "step": 31455 }, { "epoch": 3.19804798698658, "grad_norm": 0.24828685820102692, "learning_rate": 3.461396780010009e-06, "loss": 0.2768, "step": 31456 }, { "epoch": 3.198149654331029, "grad_norm": 0.29638463258743286, "learning_rate": 3.461059118126626e-06, "loss": 0.3015, "step": 31457 }, { "epoch": 3.1982513216754778, "grad_norm": 0.2828555107116699, "learning_rate": 3.460721463996057e-06, "loss": 0.3498, "step": 31458 }, { "epoch": 3.1983529890199267, "grad_norm": 0.31525474786758423, "learning_rate": 3.460383817620012e-06, "loss": 0.3205, "step": 31459 }, { "epoch": 3.1984546563643756, "grad_norm": 0.2704414129257202, "learning_rate": 3.460046179000185e-06, "loss": 0.3065, "step": 31460 }, { "epoch": 3.1985563237088246, "grad_norm": 0.25435882806777954, "learning_rate": 3.459708548138281e-06, "loss": 0.288, "step": 31461 }, { "epoch": 3.1986579910532735, "grad_norm": 0.2958813011646271, "learning_rate": 3.4593709250359982e-06, "loss": 0.2977, "step": 31462 }, { "epoch": 3.1987596583977225, "grad_norm": 0.2635822892189026, "learning_rate": 3.459033309695037e-06, "loss": 0.3027, "step": 31463 }, { "epoch": 3.1988613257421714, "grad_norm": 0.307914674282074, "learning_rate": 3.458695702117103e-06, "loss": 0.3075, "step": 31464 }, { "epoch": 3.198962993086621, "grad_norm": 0.276840478181839, "learning_rate": 3.458358102303891e-06, "loss": 0.3029, "step": 31465 }, { "epoch": 3.1990646604310697, "grad_norm": 0.27327319979667664, "learning_rate": 3.4580205102571074e-06, "loss": 0.2937, "step": 31466 }, { "epoch": 3.1991663277755187, "grad_norm": 0.28233474493026733, "learning_rate": 3.4576829259784477e-06, "loss": 0.3002, "step": 31467 }, { "epoch": 3.1992679951199676, "grad_norm": 0.27733907103538513, "learning_rate": 3.457345349469614e-06, "loss": 0.2933, "step": 31468 }, { "epoch": 3.1993696624644166, "grad_norm": 0.2599363327026367, "learning_rate": 3.457007780732311e-06, "loss": 0.3091, "step": 31469 }, { "epoch": 3.1994713298088655, "grad_norm": 0.2695011794567108, "learning_rate": 3.4566702197682343e-06, "loss": 0.2973, "step": 31470 }, { "epoch": 3.1995729971533144, "grad_norm": 0.2992824912071228, "learning_rate": 3.4563326665790876e-06, "loss": 0.315, "step": 31471 }, { "epoch": 3.1996746644977634, "grad_norm": 0.26397132873535156, "learning_rate": 3.4559951211665686e-06, "loss": 0.2943, "step": 31472 }, { "epoch": 3.1997763318422123, "grad_norm": 0.27043911814689636, "learning_rate": 3.455657583532378e-06, "loss": 0.306, "step": 31473 }, { "epoch": 3.1998779991866613, "grad_norm": 0.2680128216743469, "learning_rate": 3.4553200536782205e-06, "loss": 0.2991, "step": 31474 }, { "epoch": 3.19997966653111, "grad_norm": 0.2610492706298828, "learning_rate": 3.4549825316057923e-06, "loss": 0.3118, "step": 31475 }, { "epoch": 3.200081333875559, "grad_norm": 0.2607119679450989, "learning_rate": 3.4546450173167955e-06, "loss": 0.3185, "step": 31476 }, { "epoch": 3.200183001220008, "grad_norm": 0.27892830967903137, "learning_rate": 3.454307510812929e-06, "loss": 0.3065, "step": 31477 }, { "epoch": 3.200284668564457, "grad_norm": 0.29742854833602905, "learning_rate": 3.4539700120958925e-06, "loss": 0.3649, "step": 31478 }, { "epoch": 3.200386335908906, "grad_norm": 0.2840805947780609, "learning_rate": 3.453632521167391e-06, "loss": 0.2975, "step": 31479 }, { "epoch": 3.200488003253355, "grad_norm": 0.2722153067588806, "learning_rate": 3.4532950380291193e-06, "loss": 0.3089, "step": 31480 }, { "epoch": 3.200589670597804, "grad_norm": 0.3050794303417206, "learning_rate": 3.4529575626827804e-06, "loss": 0.3002, "step": 31481 }, { "epoch": 3.200691337942253, "grad_norm": 0.26588600873947144, "learning_rate": 3.4526200951300733e-06, "loss": 0.3421, "step": 31482 }, { "epoch": 3.2007930052867017, "grad_norm": 0.28169578313827515, "learning_rate": 3.452282635372697e-06, "loss": 0.3129, "step": 31483 }, { "epoch": 3.2008946726311507, "grad_norm": 0.2873624861240387, "learning_rate": 3.4519451834123564e-06, "loss": 0.3296, "step": 31484 }, { "epoch": 3.2009963399755996, "grad_norm": 0.3038838505744934, "learning_rate": 3.4516077392507454e-06, "loss": 0.3179, "step": 31485 }, { "epoch": 3.2010980073200486, "grad_norm": 0.26907482743263245, "learning_rate": 3.451270302889568e-06, "loss": 0.3232, "step": 31486 }, { "epoch": 3.201199674664498, "grad_norm": 0.26723888516426086, "learning_rate": 3.4509328743305215e-06, "loss": 0.288, "step": 31487 }, { "epoch": 3.201301342008947, "grad_norm": 0.2626645565032959, "learning_rate": 3.450595453575307e-06, "loss": 0.2643, "step": 31488 }, { "epoch": 3.201403009353396, "grad_norm": 0.2570703327655792, "learning_rate": 3.4502580406256262e-06, "loss": 0.2969, "step": 31489 }, { "epoch": 3.2015046766978448, "grad_norm": 0.28949010372161865, "learning_rate": 3.449920635483176e-06, "loss": 0.3171, "step": 31490 }, { "epoch": 3.2016063440422937, "grad_norm": 0.2966468930244446, "learning_rate": 3.449583238149658e-06, "loss": 0.318, "step": 31491 }, { "epoch": 3.2017080113867427, "grad_norm": 0.28162896633148193, "learning_rate": 3.44924584862677e-06, "loss": 0.2943, "step": 31492 }, { "epoch": 3.2018096787311916, "grad_norm": 0.2765140235424042, "learning_rate": 3.4489084669162132e-06, "loss": 0.3028, "step": 31493 }, { "epoch": 3.2019113460756405, "grad_norm": 0.26189231872558594, "learning_rate": 3.448571093019688e-06, "loss": 0.359, "step": 31494 }, { "epoch": 3.2020130134200895, "grad_norm": 0.26031506061553955, "learning_rate": 3.448233726938892e-06, "loss": 0.3186, "step": 31495 }, { "epoch": 3.2021146807645384, "grad_norm": 0.26415833830833435, "learning_rate": 3.447896368675526e-06, "loss": 0.3099, "step": 31496 }, { "epoch": 3.2022163481089874, "grad_norm": 0.28598764538764954, "learning_rate": 3.447559018231289e-06, "loss": 0.311, "step": 31497 }, { "epoch": 3.2023180154534363, "grad_norm": 0.2705754041671753, "learning_rate": 3.44722167560788e-06, "loss": 0.3499, "step": 31498 }, { "epoch": 3.2024196827978852, "grad_norm": 0.27983006834983826, "learning_rate": 3.4468843408070012e-06, "loss": 0.2829, "step": 31499 }, { "epoch": 3.202521350142334, "grad_norm": 0.2535160183906555, "learning_rate": 3.4465470138303486e-06, "loss": 0.3274, "step": 31500 }, { "epoch": 3.202623017486783, "grad_norm": 0.28176331520080566, "learning_rate": 3.446209694679624e-06, "loss": 0.3233, "step": 31501 }, { "epoch": 3.202724684831232, "grad_norm": 0.2740701735019684, "learning_rate": 3.4458723833565244e-06, "loss": 0.2955, "step": 31502 }, { "epoch": 3.202826352175681, "grad_norm": 0.2964545488357544, "learning_rate": 3.4455350798627507e-06, "loss": 0.3323, "step": 31503 }, { "epoch": 3.20292801952013, "grad_norm": 0.2757190763950348, "learning_rate": 3.4451977842000028e-06, "loss": 0.3368, "step": 31504 }, { "epoch": 3.203029686864579, "grad_norm": 0.2624795436859131, "learning_rate": 3.444860496369978e-06, "loss": 0.3115, "step": 31505 }, { "epoch": 3.2031313542090283, "grad_norm": 0.27052491903305054, "learning_rate": 3.444523216374378e-06, "loss": 0.315, "step": 31506 }, { "epoch": 3.2032330215534772, "grad_norm": 0.2612038552761078, "learning_rate": 3.4441859442148984e-06, "loss": 0.2886, "step": 31507 }, { "epoch": 3.203334688897926, "grad_norm": 0.2610913813114166, "learning_rate": 3.443848679893241e-06, "loss": 0.3027, "step": 31508 }, { "epoch": 3.203436356242375, "grad_norm": 0.2932484447956085, "learning_rate": 3.443511423411105e-06, "loss": 0.3407, "step": 31509 }, { "epoch": 3.203538023586824, "grad_norm": 0.28619956970214844, "learning_rate": 3.4431741747701876e-06, "loss": 0.3221, "step": 31510 }, { "epoch": 3.203639690931273, "grad_norm": 0.27871638536453247, "learning_rate": 3.4428369339721896e-06, "loss": 0.3024, "step": 31511 }, { "epoch": 3.203741358275722, "grad_norm": 0.2683030664920807, "learning_rate": 3.442499701018808e-06, "loss": 0.3086, "step": 31512 }, { "epoch": 3.203843025620171, "grad_norm": 0.24111692607402802, "learning_rate": 3.4421624759117435e-06, "loss": 0.3178, "step": 31513 }, { "epoch": 3.20394469296462, "grad_norm": 0.27636855840682983, "learning_rate": 3.4418252586526945e-06, "loss": 0.3046, "step": 31514 }, { "epoch": 3.2040463603090688, "grad_norm": 0.2734081447124481, "learning_rate": 3.4414880492433582e-06, "loss": 0.2952, "step": 31515 }, { "epoch": 3.2041480276535177, "grad_norm": 0.26792195439338684, "learning_rate": 3.4411508476854367e-06, "loss": 0.2766, "step": 31516 }, { "epoch": 3.2042496949979666, "grad_norm": 0.27997809648513794, "learning_rate": 3.4408136539806253e-06, "loss": 0.3355, "step": 31517 }, { "epoch": 3.2043513623424156, "grad_norm": 0.26336175203323364, "learning_rate": 3.440476468130624e-06, "loss": 0.2753, "step": 31518 }, { "epoch": 3.2044530296868645, "grad_norm": 0.29683005809783936, "learning_rate": 3.4401392901371333e-06, "loss": 0.3261, "step": 31519 }, { "epoch": 3.2045546970313135, "grad_norm": 0.290052592754364, "learning_rate": 3.4398021200018484e-06, "loss": 0.3088, "step": 31520 }, { "epoch": 3.2046563643757624, "grad_norm": 0.267562597990036, "learning_rate": 3.4394649577264715e-06, "loss": 0.3472, "step": 31521 }, { "epoch": 3.2047580317202113, "grad_norm": 0.28751641511917114, "learning_rate": 3.4391278033126974e-06, "loss": 0.3441, "step": 31522 }, { "epoch": 3.2048596990646603, "grad_norm": 0.26598623394966125, "learning_rate": 3.4387906567622276e-06, "loss": 0.2923, "step": 31523 }, { "epoch": 3.2049613664091092, "grad_norm": 0.2729869484901428, "learning_rate": 3.4384535180767598e-06, "loss": 0.318, "step": 31524 }, { "epoch": 3.205063033753558, "grad_norm": 0.262008935213089, "learning_rate": 3.438116387257991e-06, "loss": 0.3013, "step": 31525 }, { "epoch": 3.205164701098007, "grad_norm": 0.2799299657344818, "learning_rate": 3.4377792643076213e-06, "loss": 0.2993, "step": 31526 }, { "epoch": 3.205266368442456, "grad_norm": 0.2621058523654938, "learning_rate": 3.437442149227348e-06, "loss": 0.3312, "step": 31527 }, { "epoch": 3.2053680357869054, "grad_norm": 0.26764631271362305, "learning_rate": 3.43710504201887e-06, "loss": 0.3198, "step": 31528 }, { "epoch": 3.2054697031313544, "grad_norm": 0.25290560722351074, "learning_rate": 3.436767942683886e-06, "loss": 0.3356, "step": 31529 }, { "epoch": 3.2055713704758033, "grad_norm": 0.3190860152244568, "learning_rate": 3.4364308512240928e-06, "loss": 0.2991, "step": 31530 }, { "epoch": 3.2056730378202523, "grad_norm": 0.27761775255203247, "learning_rate": 3.4360937676411908e-06, "loss": 0.3217, "step": 31531 }, { "epoch": 3.205774705164701, "grad_norm": 0.2694683372974396, "learning_rate": 3.4357566919368755e-06, "loss": 0.3056, "step": 31532 }, { "epoch": 3.20587637250915, "grad_norm": 0.27818432450294495, "learning_rate": 3.4354196241128458e-06, "loss": 0.2867, "step": 31533 }, { "epoch": 3.205978039853599, "grad_norm": 0.26680541038513184, "learning_rate": 3.435082564170802e-06, "loss": 0.3144, "step": 31534 }, { "epoch": 3.206079707198048, "grad_norm": 0.27993080019950867, "learning_rate": 3.4347455121124384e-06, "loss": 0.3406, "step": 31535 }, { "epoch": 3.206181374542497, "grad_norm": 0.2706533372402191, "learning_rate": 3.434408467939456e-06, "loss": 0.3332, "step": 31536 }, { "epoch": 3.206283041886946, "grad_norm": 0.2785629630088806, "learning_rate": 3.4340714316535517e-06, "loss": 0.3112, "step": 31537 }, { "epoch": 3.206384709231395, "grad_norm": 0.2761799991130829, "learning_rate": 3.433734403256423e-06, "loss": 0.319, "step": 31538 }, { "epoch": 3.206486376575844, "grad_norm": 0.2754649519920349, "learning_rate": 3.433397382749769e-06, "loss": 0.3136, "step": 31539 }, { "epoch": 3.2065880439202927, "grad_norm": 0.26720067858695984, "learning_rate": 3.4330603701352862e-06, "loss": 0.2805, "step": 31540 }, { "epoch": 3.2066897112647417, "grad_norm": 0.27901843190193176, "learning_rate": 3.432723365414673e-06, "loss": 0.2932, "step": 31541 }, { "epoch": 3.2067913786091906, "grad_norm": 0.3087017238140106, "learning_rate": 3.4323863685896265e-06, "loss": 0.3306, "step": 31542 }, { "epoch": 3.2068930459536396, "grad_norm": 0.2877351641654968, "learning_rate": 3.432049379661845e-06, "loss": 0.314, "step": 31543 }, { "epoch": 3.2069947132980885, "grad_norm": 0.274966835975647, "learning_rate": 3.431712398633027e-06, "loss": 0.3021, "step": 31544 }, { "epoch": 3.2070963806425374, "grad_norm": 0.26480165123939514, "learning_rate": 3.431375425504868e-06, "loss": 0.3531, "step": 31545 }, { "epoch": 3.2071980479869864, "grad_norm": 0.2771182358264923, "learning_rate": 3.4310384602790686e-06, "loss": 0.2924, "step": 31546 }, { "epoch": 3.2072997153314358, "grad_norm": 0.29693296551704407, "learning_rate": 3.430701502957323e-06, "loss": 0.3172, "step": 31547 }, { "epoch": 3.2074013826758847, "grad_norm": 0.27033549547195435, "learning_rate": 3.4303645535413306e-06, "loss": 0.3139, "step": 31548 }, { "epoch": 3.2075030500203336, "grad_norm": 0.2747660279273987, "learning_rate": 3.430027612032789e-06, "loss": 0.3138, "step": 31549 }, { "epoch": 3.2076047173647826, "grad_norm": 0.2887779474258423, "learning_rate": 3.429690678433395e-06, "loss": 0.3203, "step": 31550 }, { "epoch": 3.2077063847092315, "grad_norm": 0.26335984468460083, "learning_rate": 3.4293537527448474e-06, "loss": 0.3089, "step": 31551 }, { "epoch": 3.2078080520536805, "grad_norm": 0.2754000723361969, "learning_rate": 3.42901683496884e-06, "loss": 0.3085, "step": 31552 }, { "epoch": 3.2079097193981294, "grad_norm": 0.26843440532684326, "learning_rate": 3.4286799251070737e-06, "loss": 0.2792, "step": 31553 }, { "epoch": 3.2080113867425784, "grad_norm": 0.27391180396080017, "learning_rate": 3.4283430231612448e-06, "loss": 0.2967, "step": 31554 }, { "epoch": 3.2081130540870273, "grad_norm": 0.27637457847595215, "learning_rate": 3.4280061291330495e-06, "loss": 0.295, "step": 31555 }, { "epoch": 3.2082147214314762, "grad_norm": 0.27251318097114563, "learning_rate": 3.427669243024187e-06, "loss": 0.3273, "step": 31556 }, { "epoch": 3.208316388775925, "grad_norm": 0.2758137583732605, "learning_rate": 3.427332364836352e-06, "loss": 0.2923, "step": 31557 }, { "epoch": 3.208418056120374, "grad_norm": 0.2767293453216553, "learning_rate": 3.4269954945712423e-06, "loss": 0.3052, "step": 31558 }, { "epoch": 3.208519723464823, "grad_norm": 0.27589091658592224, "learning_rate": 3.4266586322305573e-06, "loss": 0.31, "step": 31559 }, { "epoch": 3.208621390809272, "grad_norm": 0.2813827693462372, "learning_rate": 3.426321777815991e-06, "loss": 0.298, "step": 31560 }, { "epoch": 3.208723058153721, "grad_norm": 0.3231019973754883, "learning_rate": 3.4259849313292425e-06, "loss": 0.3265, "step": 31561 }, { "epoch": 3.20882472549817, "grad_norm": 0.27133604884147644, "learning_rate": 3.4256480927720073e-06, "loss": 0.2952, "step": 31562 }, { "epoch": 3.208926392842619, "grad_norm": 0.27183306217193604, "learning_rate": 3.4253112621459828e-06, "loss": 0.2948, "step": 31563 }, { "epoch": 3.2090280601870678, "grad_norm": 0.27871257066726685, "learning_rate": 3.4249744394528665e-06, "loss": 0.2927, "step": 31564 }, { "epoch": 3.2091297275315167, "grad_norm": 0.25512829422950745, "learning_rate": 3.4246376246943535e-06, "loss": 0.3284, "step": 31565 }, { "epoch": 3.2092313948759656, "grad_norm": 0.2711549699306488, "learning_rate": 3.424300817872144e-06, "loss": 0.3161, "step": 31566 }, { "epoch": 3.2093330622204146, "grad_norm": 0.2921518385410309, "learning_rate": 3.4239640189879304e-06, "loss": 0.3002, "step": 31567 }, { "epoch": 3.2094347295648635, "grad_norm": 0.24705827236175537, "learning_rate": 3.423627228043412e-06, "loss": 0.2903, "step": 31568 }, { "epoch": 3.209536396909313, "grad_norm": 0.286138117313385, "learning_rate": 3.423290445040286e-06, "loss": 0.2974, "step": 31569 }, { "epoch": 3.209638064253762, "grad_norm": 0.2662559449672699, "learning_rate": 3.4229536699802467e-06, "loss": 0.3209, "step": 31570 }, { "epoch": 3.209739731598211, "grad_norm": 0.286449134349823, "learning_rate": 3.4226169028649936e-06, "loss": 0.3274, "step": 31571 }, { "epoch": 3.2098413989426597, "grad_norm": 0.26954248547554016, "learning_rate": 3.42228014369622e-06, "loss": 0.3071, "step": 31572 }, { "epoch": 3.2099430662871087, "grad_norm": 0.2931863069534302, "learning_rate": 3.4219433924756247e-06, "loss": 0.2997, "step": 31573 }, { "epoch": 3.2100447336315576, "grad_norm": 0.2950306534767151, "learning_rate": 3.421606649204904e-06, "loss": 0.2778, "step": 31574 }, { "epoch": 3.2101464009760066, "grad_norm": 0.2891867160797119, "learning_rate": 3.4212699138857536e-06, "loss": 0.3109, "step": 31575 }, { "epoch": 3.2102480683204555, "grad_norm": 0.2700175344944, "learning_rate": 3.4209331865198707e-06, "loss": 0.2836, "step": 31576 }, { "epoch": 3.2103497356649044, "grad_norm": 0.2811896800994873, "learning_rate": 3.420596467108951e-06, "loss": 0.3129, "step": 31577 }, { "epoch": 3.2104514030093534, "grad_norm": 0.26827141642570496, "learning_rate": 3.4202597556546903e-06, "loss": 0.3092, "step": 31578 }, { "epoch": 3.2105530703538023, "grad_norm": 0.2855365574359894, "learning_rate": 3.4199230521587863e-06, "loss": 0.3019, "step": 31579 }, { "epoch": 3.2106547376982513, "grad_norm": 0.2637193500995636, "learning_rate": 3.4195863566229338e-06, "loss": 0.3112, "step": 31580 }, { "epoch": 3.2107564050427, "grad_norm": 0.25345683097839355, "learning_rate": 3.419249669048831e-06, "loss": 0.2866, "step": 31581 }, { "epoch": 3.210858072387149, "grad_norm": 0.2647593319416046, "learning_rate": 3.418912989438171e-06, "loss": 0.3237, "step": 31582 }, { "epoch": 3.210959739731598, "grad_norm": 0.27400636672973633, "learning_rate": 3.418576317792652e-06, "loss": 0.3346, "step": 31583 }, { "epoch": 3.211061407076047, "grad_norm": 0.2620614767074585, "learning_rate": 3.4182396541139706e-06, "loss": 0.3272, "step": 31584 }, { "epoch": 3.211163074420496, "grad_norm": 0.2896047532558441, "learning_rate": 3.417902998403821e-06, "loss": 0.3085, "step": 31585 }, { "epoch": 3.211264741764945, "grad_norm": 0.2826679050922394, "learning_rate": 3.417566350663902e-06, "loss": 0.3198, "step": 31586 }, { "epoch": 3.211366409109394, "grad_norm": 0.26313361525535583, "learning_rate": 3.417229710895905e-06, "loss": 0.3286, "step": 31587 }, { "epoch": 3.2114680764538432, "grad_norm": 0.2657524049282074, "learning_rate": 3.41689307910153e-06, "loss": 0.3003, "step": 31588 }, { "epoch": 3.211569743798292, "grad_norm": 0.2742559015750885, "learning_rate": 3.4165564552824717e-06, "loss": 0.317, "step": 31589 }, { "epoch": 3.211671411142741, "grad_norm": 0.31004053354263306, "learning_rate": 3.4162198394404246e-06, "loss": 0.3109, "step": 31590 }, { "epoch": 3.21177307848719, "grad_norm": 0.2672511041164398, "learning_rate": 3.4158832315770887e-06, "loss": 0.3018, "step": 31591 }, { "epoch": 3.211874745831639, "grad_norm": 0.27541011571884155, "learning_rate": 3.415546631694152e-06, "loss": 0.2981, "step": 31592 }, { "epoch": 3.211976413176088, "grad_norm": 0.25114715099334717, "learning_rate": 3.4152100397933167e-06, "loss": 0.335, "step": 31593 }, { "epoch": 3.212078080520537, "grad_norm": 0.27520695328712463, "learning_rate": 3.4148734558762777e-06, "loss": 0.3304, "step": 31594 }, { "epoch": 3.212179747864986, "grad_norm": 0.3087407052516937, "learning_rate": 3.4145368799447286e-06, "loss": 0.3329, "step": 31595 }, { "epoch": 3.2122814152094348, "grad_norm": 0.2782611548900604, "learning_rate": 3.4142003120003673e-06, "loss": 0.3183, "step": 31596 }, { "epoch": 3.2123830825538837, "grad_norm": 0.2688508629798889, "learning_rate": 3.413863752044886e-06, "loss": 0.2859, "step": 31597 }, { "epoch": 3.2124847498983327, "grad_norm": 0.27149197459220886, "learning_rate": 3.4135272000799824e-06, "loss": 0.302, "step": 31598 }, { "epoch": 3.2125864172427816, "grad_norm": 0.26532402634620667, "learning_rate": 3.413190656107353e-06, "loss": 0.3122, "step": 31599 }, { "epoch": 3.2126880845872305, "grad_norm": 0.32188844680786133, "learning_rate": 3.4128541201286914e-06, "loss": 0.3197, "step": 31600 }, { "epoch": 3.2127897519316795, "grad_norm": 0.26717275381088257, "learning_rate": 3.412517592145695e-06, "loss": 0.3243, "step": 31601 }, { "epoch": 3.2128914192761284, "grad_norm": 0.2744409143924713, "learning_rate": 3.4121810721600546e-06, "loss": 0.3288, "step": 31602 }, { "epoch": 3.2129930866205774, "grad_norm": 0.2780041992664337, "learning_rate": 3.41184456017347e-06, "loss": 0.2842, "step": 31603 }, { "epoch": 3.2130947539650263, "grad_norm": 0.26382139325141907, "learning_rate": 3.4115080561876367e-06, "loss": 0.3091, "step": 31604 }, { "epoch": 3.2131964213094752, "grad_norm": 0.29609623551368713, "learning_rate": 3.411171560204246e-06, "loss": 0.3131, "step": 31605 }, { "epoch": 3.213298088653924, "grad_norm": 0.2761962115764618, "learning_rate": 3.4108350722249984e-06, "loss": 0.3057, "step": 31606 }, { "epoch": 3.213399755998373, "grad_norm": 0.2585969865322113, "learning_rate": 3.4104985922515825e-06, "loss": 0.296, "step": 31607 }, { "epoch": 3.213501423342822, "grad_norm": 0.2745146155357361, "learning_rate": 3.410162120285698e-06, "loss": 0.32, "step": 31608 }, { "epoch": 3.213603090687271, "grad_norm": 0.28662794828414917, "learning_rate": 3.4098256563290416e-06, "loss": 0.3312, "step": 31609 }, { "epoch": 3.2137047580317204, "grad_norm": 0.24683699011802673, "learning_rate": 3.4094892003833022e-06, "loss": 0.2797, "step": 31610 }, { "epoch": 3.2138064253761693, "grad_norm": 0.2828526496887207, "learning_rate": 3.409152752450181e-06, "loss": 0.3131, "step": 31611 }, { "epoch": 3.2139080927206183, "grad_norm": 0.27673202753067017, "learning_rate": 3.4088163125313677e-06, "loss": 0.2796, "step": 31612 }, { "epoch": 3.2140097600650672, "grad_norm": 0.2684062123298645, "learning_rate": 3.40847988062856e-06, "loss": 0.2924, "step": 31613 }, { "epoch": 3.214111427409516, "grad_norm": 0.27365559339523315, "learning_rate": 3.4081434567434558e-06, "loss": 0.3128, "step": 31614 }, { "epoch": 3.214213094753965, "grad_norm": 0.27909713983535767, "learning_rate": 3.407807040877742e-06, "loss": 0.327, "step": 31615 }, { "epoch": 3.214314762098414, "grad_norm": 0.2704311013221741, "learning_rate": 3.407470633033122e-06, "loss": 0.2905, "step": 31616 }, { "epoch": 3.214416429442863, "grad_norm": 0.28287240862846375, "learning_rate": 3.4071342332112834e-06, "loss": 0.2965, "step": 31617 }, { "epoch": 3.214518096787312, "grad_norm": 0.3003832697868347, "learning_rate": 3.4067978414139245e-06, "loss": 0.3133, "step": 31618 }, { "epoch": 3.214619764131761, "grad_norm": 0.2681601345539093, "learning_rate": 3.4064614576427415e-06, "loss": 0.314, "step": 31619 }, { "epoch": 3.21472143147621, "grad_norm": 0.2580876350402832, "learning_rate": 3.406125081899424e-06, "loss": 0.3248, "step": 31620 }, { "epoch": 3.2148230988206588, "grad_norm": 0.27085912227630615, "learning_rate": 3.4057887141856734e-06, "loss": 0.3226, "step": 31621 }, { "epoch": 3.2149247661651077, "grad_norm": 0.2933235466480255, "learning_rate": 3.4054523545031774e-06, "loss": 0.3127, "step": 31622 }, { "epoch": 3.2150264335095566, "grad_norm": 0.32287800312042236, "learning_rate": 3.4051160028536335e-06, "loss": 0.2887, "step": 31623 }, { "epoch": 3.2151281008540056, "grad_norm": 0.2848403751850128, "learning_rate": 3.4047796592387384e-06, "loss": 0.3257, "step": 31624 }, { "epoch": 3.2152297681984545, "grad_norm": 0.3042222857475281, "learning_rate": 3.4044433236601814e-06, "loss": 0.3715, "step": 31625 }, { "epoch": 3.2153314355429035, "grad_norm": 0.26873019337654114, "learning_rate": 3.4041069961196626e-06, "loss": 0.2894, "step": 31626 }, { "epoch": 3.2154331028873524, "grad_norm": 0.2624153792858124, "learning_rate": 3.403770676618872e-06, "loss": 0.3332, "step": 31627 }, { "epoch": 3.2155347702318013, "grad_norm": 0.2790779769420624, "learning_rate": 3.4034343651595043e-06, "loss": 0.2981, "step": 31628 }, { "epoch": 3.2156364375762507, "grad_norm": 0.26764291524887085, "learning_rate": 3.4030980617432576e-06, "loss": 0.3317, "step": 31629 }, { "epoch": 3.2157381049206997, "grad_norm": 0.278507262468338, "learning_rate": 3.4027617663718206e-06, "loss": 0.3123, "step": 31630 }, { "epoch": 3.2158397722651486, "grad_norm": 0.27412793040275574, "learning_rate": 3.4024254790468935e-06, "loss": 0.292, "step": 31631 }, { "epoch": 3.2159414396095976, "grad_norm": 0.2884068489074707, "learning_rate": 3.402089199770164e-06, "loss": 0.3329, "step": 31632 }, { "epoch": 3.2160431069540465, "grad_norm": 0.2740170359611511, "learning_rate": 3.40175292854333e-06, "loss": 0.2924, "step": 31633 }, { "epoch": 3.2161447742984954, "grad_norm": 0.3015139698982239, "learning_rate": 3.401416665368087e-06, "loss": 0.3385, "step": 31634 }, { "epoch": 3.2162464416429444, "grad_norm": 0.26959073543548584, "learning_rate": 3.4010804102461247e-06, "loss": 0.3158, "step": 31635 }, { "epoch": 3.2163481089873933, "grad_norm": 0.2709292769432068, "learning_rate": 3.4007441631791417e-06, "loss": 0.335, "step": 31636 }, { "epoch": 3.2164497763318423, "grad_norm": 0.25664713978767395, "learning_rate": 3.4004079241688277e-06, "loss": 0.3164, "step": 31637 }, { "epoch": 3.216551443676291, "grad_norm": 0.2847367525100708, "learning_rate": 3.4000716932168777e-06, "loss": 0.2978, "step": 31638 }, { "epoch": 3.21665311102074, "grad_norm": 0.2882636785507202, "learning_rate": 3.3997354703249887e-06, "loss": 0.2933, "step": 31639 }, { "epoch": 3.216754778365189, "grad_norm": 0.28828662633895874, "learning_rate": 3.3993992554948497e-06, "loss": 0.29, "step": 31640 }, { "epoch": 3.216856445709638, "grad_norm": 0.28842630982398987, "learning_rate": 3.3990630487281595e-06, "loss": 0.3183, "step": 31641 }, { "epoch": 3.216958113054087, "grad_norm": 0.2590475380420685, "learning_rate": 3.3987268500266074e-06, "loss": 0.2969, "step": 31642 }, { "epoch": 3.217059780398536, "grad_norm": 0.29975426197052, "learning_rate": 3.398390659391888e-06, "loss": 0.2895, "step": 31643 }, { "epoch": 3.217161447742985, "grad_norm": 0.2634945511817932, "learning_rate": 3.3980544768256985e-06, "loss": 0.3125, "step": 31644 }, { "epoch": 3.217263115087434, "grad_norm": 0.2711414694786072, "learning_rate": 3.3977183023297285e-06, "loss": 0.3187, "step": 31645 }, { "epoch": 3.2173647824318827, "grad_norm": 0.26898038387298584, "learning_rate": 3.3973821359056734e-06, "loss": 0.3248, "step": 31646 }, { "epoch": 3.2174664497763317, "grad_norm": 0.2785089910030365, "learning_rate": 3.3970459775552255e-06, "loss": 0.3152, "step": 31647 }, { "epoch": 3.2175681171207806, "grad_norm": 0.27432939410209656, "learning_rate": 3.3967098272800797e-06, "loss": 0.3187, "step": 31648 }, { "epoch": 3.2176697844652296, "grad_norm": 0.2727403938770294, "learning_rate": 3.3963736850819284e-06, "loss": 0.3064, "step": 31649 }, { "epoch": 3.2177714518096785, "grad_norm": 0.27846553921699524, "learning_rate": 3.3960375509624643e-06, "loss": 0.2954, "step": 31650 }, { "epoch": 3.217873119154128, "grad_norm": 0.2775120139122009, "learning_rate": 3.3957014249233845e-06, "loss": 0.3003, "step": 31651 }, { "epoch": 3.217974786498577, "grad_norm": 0.2463255226612091, "learning_rate": 3.395365306966377e-06, "loss": 0.312, "step": 31652 }, { "epoch": 3.2180764538430258, "grad_norm": 0.2676302492618561, "learning_rate": 3.3950291970931398e-06, "loss": 0.2933, "step": 31653 }, { "epoch": 3.2181781211874747, "grad_norm": 0.2886979579925537, "learning_rate": 3.3946930953053625e-06, "loss": 0.3104, "step": 31654 }, { "epoch": 3.2182797885319236, "grad_norm": 0.2825796902179718, "learning_rate": 3.3943570016047407e-06, "loss": 0.3086, "step": 31655 }, { "epoch": 3.2183814558763726, "grad_norm": 0.27237266302108765, "learning_rate": 3.394020915992967e-06, "loss": 0.3388, "step": 31656 }, { "epoch": 3.2184831232208215, "grad_norm": 0.27008357644081116, "learning_rate": 3.3936848384717335e-06, "loss": 0.2934, "step": 31657 }, { "epoch": 3.2185847905652705, "grad_norm": 0.296867698431015, "learning_rate": 3.3933487690427357e-06, "loss": 0.3033, "step": 31658 }, { "epoch": 3.2186864579097194, "grad_norm": 0.27562636137008667, "learning_rate": 3.3930127077076634e-06, "loss": 0.3465, "step": 31659 }, { "epoch": 3.2187881252541684, "grad_norm": 0.28257688879966736, "learning_rate": 3.392676654468211e-06, "loss": 0.3329, "step": 31660 }, { "epoch": 3.2188897925986173, "grad_norm": 0.27759045362472534, "learning_rate": 3.392340609326073e-06, "loss": 0.3416, "step": 31661 }, { "epoch": 3.2189914599430662, "grad_norm": 0.25370267033576965, "learning_rate": 3.39200457228294e-06, "loss": 0.2987, "step": 31662 }, { "epoch": 3.219093127287515, "grad_norm": 0.2676805257797241, "learning_rate": 3.391668543340507e-06, "loss": 0.3071, "step": 31663 }, { "epoch": 3.219194794631964, "grad_norm": 0.2563457787036896, "learning_rate": 3.3913325225004644e-06, "loss": 0.319, "step": 31664 }, { "epoch": 3.219296461976413, "grad_norm": 0.257485568523407, "learning_rate": 3.390996509764506e-06, "loss": 0.3106, "step": 31665 }, { "epoch": 3.219398129320862, "grad_norm": 0.2714800536632538, "learning_rate": 3.390660505134326e-06, "loss": 0.3137, "step": 31666 }, { "epoch": 3.219499796665311, "grad_norm": 0.28218939900398254, "learning_rate": 3.3903245086116153e-06, "loss": 0.3038, "step": 31667 }, { "epoch": 3.21960146400976, "grad_norm": 0.28281232714653015, "learning_rate": 3.3899885201980677e-06, "loss": 0.2966, "step": 31668 }, { "epoch": 3.219703131354209, "grad_norm": 0.30197596549987793, "learning_rate": 3.389652539895375e-06, "loss": 0.2941, "step": 31669 }, { "epoch": 3.219804798698658, "grad_norm": 0.2649538218975067, "learning_rate": 3.38931656770523e-06, "loss": 0.3114, "step": 31670 }, { "epoch": 3.219906466043107, "grad_norm": 0.2927131652832031, "learning_rate": 3.388980603629326e-06, "loss": 0.2988, "step": 31671 }, { "epoch": 3.220008133387556, "grad_norm": 0.27490732073783875, "learning_rate": 3.3886446476693537e-06, "loss": 0.2698, "step": 31672 }, { "epoch": 3.220109800732005, "grad_norm": 0.2687177062034607, "learning_rate": 3.388308699827008e-06, "loss": 0.3236, "step": 31673 }, { "epoch": 3.220211468076454, "grad_norm": 0.2689780592918396, "learning_rate": 3.387972760103979e-06, "loss": 0.3159, "step": 31674 }, { "epoch": 3.220313135420903, "grad_norm": 0.2805878221988678, "learning_rate": 3.38763682850196e-06, "loss": 0.2784, "step": 31675 }, { "epoch": 3.220414802765352, "grad_norm": 0.29720422625541687, "learning_rate": 3.3873009050226454e-06, "loss": 0.2976, "step": 31676 }, { "epoch": 3.220516470109801, "grad_norm": 0.2762324810028076, "learning_rate": 3.3869649896677236e-06, "loss": 0.3034, "step": 31677 }, { "epoch": 3.2206181374542497, "grad_norm": 0.2884569764137268, "learning_rate": 3.3866290824388903e-06, "loss": 0.3332, "step": 31678 }, { "epoch": 3.2207198047986987, "grad_norm": 0.2786029875278473, "learning_rate": 3.3862931833378353e-06, "loss": 0.3137, "step": 31679 }, { "epoch": 3.2208214721431476, "grad_norm": 0.276830792427063, "learning_rate": 3.385957292366252e-06, "loss": 0.3566, "step": 31680 }, { "epoch": 3.2209231394875966, "grad_norm": 0.2787798047065735, "learning_rate": 3.385621409525833e-06, "loss": 0.3036, "step": 31681 }, { "epoch": 3.2210248068320455, "grad_norm": 0.26878103613853455, "learning_rate": 3.3852855348182684e-06, "loss": 0.2915, "step": 31682 }, { "epoch": 3.2211264741764944, "grad_norm": 0.28116557002067566, "learning_rate": 3.384949668245253e-06, "loss": 0.3387, "step": 31683 }, { "epoch": 3.2212281415209434, "grad_norm": 0.2725590467453003, "learning_rate": 3.384613809808476e-06, "loss": 0.3348, "step": 31684 }, { "epoch": 3.2213298088653923, "grad_norm": 0.2531300485134125, "learning_rate": 3.384277959509631e-06, "loss": 0.2899, "step": 31685 }, { "epoch": 3.2214314762098413, "grad_norm": 0.25690245628356934, "learning_rate": 3.3839421173504104e-06, "loss": 0.3395, "step": 31686 }, { "epoch": 3.22153314355429, "grad_norm": 0.2845574915409088, "learning_rate": 3.383606283332505e-06, "loss": 0.3253, "step": 31687 }, { "epoch": 3.221634810898739, "grad_norm": 0.30089622735977173, "learning_rate": 3.383270457457608e-06, "loss": 0.3056, "step": 31688 }, { "epoch": 3.221736478243188, "grad_norm": 0.26599839329719543, "learning_rate": 3.382934639727409e-06, "loss": 0.3421, "step": 31689 }, { "epoch": 3.221838145587637, "grad_norm": 0.25243380665779114, "learning_rate": 3.3825988301436007e-06, "loss": 0.3181, "step": 31690 }, { "epoch": 3.221939812932086, "grad_norm": 0.260353684425354, "learning_rate": 3.382263028707876e-06, "loss": 0.3099, "step": 31691 }, { "epoch": 3.2220414802765354, "grad_norm": 0.2760796546936035, "learning_rate": 3.3819272354219257e-06, "loss": 0.316, "step": 31692 }, { "epoch": 3.2221431476209843, "grad_norm": 0.24796761572360992, "learning_rate": 3.3815914502874415e-06, "loss": 0.2913, "step": 31693 }, { "epoch": 3.2222448149654332, "grad_norm": 0.2757909297943115, "learning_rate": 3.381255673306114e-06, "loss": 0.2927, "step": 31694 }, { "epoch": 3.222346482309882, "grad_norm": 0.2771354019641876, "learning_rate": 3.380919904479637e-06, "loss": 0.3243, "step": 31695 }, { "epoch": 3.222448149654331, "grad_norm": 0.280726820230484, "learning_rate": 3.380584143809701e-06, "loss": 0.2753, "step": 31696 }, { "epoch": 3.22254981699878, "grad_norm": 0.2722237706184387, "learning_rate": 3.3802483912979957e-06, "loss": 0.2957, "step": 31697 }, { "epoch": 3.222651484343229, "grad_norm": 0.28439149260520935, "learning_rate": 3.379912646946215e-06, "loss": 0.3061, "step": 31698 }, { "epoch": 3.222753151687678, "grad_norm": 0.2621937096118927, "learning_rate": 3.3795769107560494e-06, "loss": 0.3433, "step": 31699 }, { "epoch": 3.222854819032127, "grad_norm": 0.26754873991012573, "learning_rate": 3.3792411827291898e-06, "loss": 0.31, "step": 31700 }, { "epoch": 3.222956486376576, "grad_norm": 0.2871834337711334, "learning_rate": 3.378905462867329e-06, "loss": 0.3267, "step": 31701 }, { "epoch": 3.2230581537210248, "grad_norm": 0.2995906174182892, "learning_rate": 3.378569751172156e-06, "loss": 0.3096, "step": 31702 }, { "epoch": 3.2231598210654737, "grad_norm": 0.29035693407058716, "learning_rate": 3.3782340476453646e-06, "loss": 0.3163, "step": 31703 }, { "epoch": 3.2232614884099227, "grad_norm": 0.27455583214759827, "learning_rate": 3.3778983522886432e-06, "loss": 0.3004, "step": 31704 }, { "epoch": 3.2233631557543716, "grad_norm": 0.29893818497657776, "learning_rate": 3.3775626651036854e-06, "loss": 0.2949, "step": 31705 }, { "epoch": 3.2234648230988205, "grad_norm": 0.2779400050640106, "learning_rate": 3.377226986092181e-06, "loss": 0.3638, "step": 31706 }, { "epoch": 3.2235664904432695, "grad_norm": 0.29749003052711487, "learning_rate": 3.3768913152558215e-06, "loss": 0.3322, "step": 31707 }, { "epoch": 3.2236681577877184, "grad_norm": 0.25440117716789246, "learning_rate": 3.3765556525962985e-06, "loss": 0.2992, "step": 31708 }, { "epoch": 3.2237698251321674, "grad_norm": 0.2806508243083954, "learning_rate": 3.376219998115301e-06, "loss": 0.2891, "step": 31709 }, { "epoch": 3.2238714924766163, "grad_norm": 0.2829698920249939, "learning_rate": 3.375884351814522e-06, "loss": 0.3277, "step": 31710 }, { "epoch": 3.2239731598210657, "grad_norm": 0.3013439476490021, "learning_rate": 3.375548713695652e-06, "loss": 0.3484, "step": 31711 }, { "epoch": 3.2240748271655146, "grad_norm": 0.27776092290878296, "learning_rate": 3.3752130837603815e-06, "loss": 0.3032, "step": 31712 }, { "epoch": 3.2241764945099636, "grad_norm": 0.28832271695137024, "learning_rate": 3.3748774620104014e-06, "loss": 0.2836, "step": 31713 }, { "epoch": 3.2242781618544125, "grad_norm": 0.2754497528076172, "learning_rate": 3.3745418484474013e-06, "loss": 0.3289, "step": 31714 }, { "epoch": 3.2243798291988615, "grad_norm": 0.27911192178726196, "learning_rate": 3.3742062430730737e-06, "loss": 0.2998, "step": 31715 }, { "epoch": 3.2244814965433104, "grad_norm": 0.26656267046928406, "learning_rate": 3.3738706458891094e-06, "loss": 0.3175, "step": 31716 }, { "epoch": 3.2245831638877593, "grad_norm": 0.2608703076839447, "learning_rate": 3.373535056897198e-06, "loss": 0.3167, "step": 31717 }, { "epoch": 3.2246848312322083, "grad_norm": 0.28963422775268555, "learning_rate": 3.3731994760990304e-06, "loss": 0.2857, "step": 31718 }, { "epoch": 3.2247864985766572, "grad_norm": 0.269567608833313, "learning_rate": 3.3728639034962973e-06, "loss": 0.3179, "step": 31719 }, { "epoch": 3.224888165921106, "grad_norm": 0.28153467178344727, "learning_rate": 3.3725283390906887e-06, "loss": 0.338, "step": 31720 }, { "epoch": 3.224989833265555, "grad_norm": 0.2725074291229248, "learning_rate": 3.372192782883896e-06, "loss": 0.3274, "step": 31721 }, { "epoch": 3.225091500610004, "grad_norm": 0.2785966992378235, "learning_rate": 3.371857234877609e-06, "loss": 0.3, "step": 31722 }, { "epoch": 3.225193167954453, "grad_norm": 0.2904382348060608, "learning_rate": 3.3715216950735198e-06, "loss": 0.343, "step": 31723 }, { "epoch": 3.225294835298902, "grad_norm": 0.26389917731285095, "learning_rate": 3.3711861634733156e-06, "loss": 0.3248, "step": 31724 }, { "epoch": 3.225396502643351, "grad_norm": 0.28016409277915955, "learning_rate": 3.370850640078689e-06, "loss": 0.2716, "step": 31725 }, { "epoch": 3.2254981699878, "grad_norm": 0.2573394775390625, "learning_rate": 3.3705151248913298e-06, "loss": 0.3215, "step": 31726 }, { "epoch": 3.2255998373322488, "grad_norm": 0.2808911204338074, "learning_rate": 3.3701796179129275e-06, "loss": 0.3222, "step": 31727 }, { "epoch": 3.2257015046766977, "grad_norm": 0.2841765284538269, "learning_rate": 3.3698441191451746e-06, "loss": 0.3071, "step": 31728 }, { "epoch": 3.2258031720211466, "grad_norm": 0.2493133693933487, "learning_rate": 3.369508628589758e-06, "loss": 0.3242, "step": 31729 }, { "epoch": 3.2259048393655956, "grad_norm": 0.27767622470855713, "learning_rate": 3.3691731462483696e-06, "loss": 0.2861, "step": 31730 }, { "epoch": 3.2260065067100445, "grad_norm": 0.2725366950035095, "learning_rate": 3.3688376721227e-06, "loss": 0.3069, "step": 31731 }, { "epoch": 3.226108174054494, "grad_norm": 0.29348137974739075, "learning_rate": 3.368502206214438e-06, "loss": 0.2971, "step": 31732 }, { "epoch": 3.226209841398943, "grad_norm": 0.29226207733154297, "learning_rate": 3.3681667485252755e-06, "loss": 0.3026, "step": 31733 }, { "epoch": 3.226311508743392, "grad_norm": 0.30160948634147644, "learning_rate": 3.3678312990568993e-06, "loss": 0.3153, "step": 31734 }, { "epoch": 3.2264131760878407, "grad_norm": 0.28124719858169556, "learning_rate": 3.367495857811002e-06, "loss": 0.315, "step": 31735 }, { "epoch": 3.2265148434322897, "grad_norm": 0.28246062994003296, "learning_rate": 3.367160424789273e-06, "loss": 0.3038, "step": 31736 }, { "epoch": 3.2266165107767386, "grad_norm": 0.27664679288864136, "learning_rate": 3.366824999993401e-06, "loss": 0.2874, "step": 31737 }, { "epoch": 3.2267181781211876, "grad_norm": 0.2869482934474945, "learning_rate": 3.3664895834250787e-06, "loss": 0.2798, "step": 31738 }, { "epoch": 3.2268198454656365, "grad_norm": 0.28468793630599976, "learning_rate": 3.3661541750859903e-06, "loss": 0.3375, "step": 31739 }, { "epoch": 3.2269215128100854, "grad_norm": 0.2865588068962097, "learning_rate": 3.3658187749778302e-06, "loss": 0.3434, "step": 31740 }, { "epoch": 3.2270231801545344, "grad_norm": 0.2918863296508789, "learning_rate": 3.3654833831022883e-06, "loss": 0.3045, "step": 31741 }, { "epoch": 3.2271248474989833, "grad_norm": 0.2553440034389496, "learning_rate": 3.3651479994610514e-06, "loss": 0.3254, "step": 31742 }, { "epoch": 3.2272265148434323, "grad_norm": 0.28914377093315125, "learning_rate": 3.3648126240558123e-06, "loss": 0.3227, "step": 31743 }, { "epoch": 3.227328182187881, "grad_norm": 0.26593637466430664, "learning_rate": 3.364477256888255e-06, "loss": 0.2783, "step": 31744 }, { "epoch": 3.22742984953233, "grad_norm": 0.30478495359420776, "learning_rate": 3.3641418979600737e-06, "loss": 0.332, "step": 31745 }, { "epoch": 3.227531516876779, "grad_norm": 0.302031934261322, "learning_rate": 3.3638065472729585e-06, "loss": 0.3343, "step": 31746 }, { "epoch": 3.227633184221228, "grad_norm": 0.26572999358177185, "learning_rate": 3.3634712048285954e-06, "loss": 0.3062, "step": 31747 }, { "epoch": 3.227734851565677, "grad_norm": 0.28154924511909485, "learning_rate": 3.3631358706286784e-06, "loss": 0.3045, "step": 31748 }, { "epoch": 3.227836518910126, "grad_norm": 0.26910659670829773, "learning_rate": 3.36280054467489e-06, "loss": 0.3107, "step": 31749 }, { "epoch": 3.227938186254575, "grad_norm": 0.25822097063064575, "learning_rate": 3.362465226968925e-06, "loss": 0.3239, "step": 31750 }, { "epoch": 3.228039853599024, "grad_norm": 0.2819763422012329, "learning_rate": 3.362129917512472e-06, "loss": 0.2984, "step": 31751 }, { "epoch": 3.228141520943473, "grad_norm": 0.2829287052154541, "learning_rate": 3.3617946163072173e-06, "loss": 0.307, "step": 31752 }, { "epoch": 3.228243188287922, "grad_norm": 0.27936941385269165, "learning_rate": 3.3614593233548554e-06, "loss": 0.2784, "step": 31753 }, { "epoch": 3.228344855632371, "grad_norm": 0.27992162108421326, "learning_rate": 3.3611240386570677e-06, "loss": 0.2977, "step": 31754 }, { "epoch": 3.22844652297682, "grad_norm": 0.26472604274749756, "learning_rate": 3.36078876221555e-06, "loss": 0.3107, "step": 31755 }, { "epoch": 3.228548190321269, "grad_norm": 0.2673584222793579, "learning_rate": 3.36045349403199e-06, "loss": 0.3345, "step": 31756 }, { "epoch": 3.228649857665718, "grad_norm": 0.2803180515766144, "learning_rate": 3.360118234108073e-06, "loss": 0.3067, "step": 31757 }, { "epoch": 3.228751525010167, "grad_norm": 0.2896328270435333, "learning_rate": 3.3597829824454935e-06, "loss": 0.3444, "step": 31758 }, { "epoch": 3.2288531923546158, "grad_norm": 0.2788149416446686, "learning_rate": 3.3594477390459346e-06, "loss": 0.3117, "step": 31759 }, { "epoch": 3.2289548596990647, "grad_norm": 0.2821606397628784, "learning_rate": 3.3591125039110893e-06, "loss": 0.308, "step": 31760 }, { "epoch": 3.2290565270435136, "grad_norm": 0.28449946641921997, "learning_rate": 3.3587772770426472e-06, "loss": 0.3246, "step": 31761 }, { "epoch": 3.2291581943879626, "grad_norm": 0.28722381591796875, "learning_rate": 3.358442058442292e-06, "loss": 0.3212, "step": 31762 }, { "epoch": 3.2292598617324115, "grad_norm": 0.28613728284835815, "learning_rate": 3.358106848111719e-06, "loss": 0.2977, "step": 31763 }, { "epoch": 3.2293615290768605, "grad_norm": 0.2917858362197876, "learning_rate": 3.3577716460526098e-06, "loss": 0.2987, "step": 31764 }, { "epoch": 3.2294631964213094, "grad_norm": 0.295619398355484, "learning_rate": 3.3574364522666586e-06, "loss": 0.2784, "step": 31765 }, { "epoch": 3.2295648637657584, "grad_norm": 0.2999727725982666, "learning_rate": 3.357101266755554e-06, "loss": 0.2857, "step": 31766 }, { "epoch": 3.2296665311102073, "grad_norm": 0.2674018442630768, "learning_rate": 3.35676608952098e-06, "loss": 0.3086, "step": 31767 }, { "epoch": 3.2297681984546562, "grad_norm": 0.26201000809669495, "learning_rate": 3.3564309205646306e-06, "loss": 0.3112, "step": 31768 }, { "epoch": 3.229869865799105, "grad_norm": 0.25294995307922363, "learning_rate": 3.356095759888188e-06, "loss": 0.2962, "step": 31769 }, { "epoch": 3.229971533143554, "grad_norm": 0.27273204922676086, "learning_rate": 3.3557606074933468e-06, "loss": 0.3193, "step": 31770 }, { "epoch": 3.230073200488003, "grad_norm": 0.26847049593925476, "learning_rate": 3.355425463381794e-06, "loss": 0.3189, "step": 31771 }, { "epoch": 3.230174867832452, "grad_norm": 0.25925102829933167, "learning_rate": 3.3550903275552136e-06, "loss": 0.3032, "step": 31772 }, { "epoch": 3.2302765351769014, "grad_norm": 0.26741090416908264, "learning_rate": 3.354755200015301e-06, "loss": 0.3198, "step": 31773 }, { "epoch": 3.2303782025213503, "grad_norm": 0.27679866552352905, "learning_rate": 3.3544200807637383e-06, "loss": 0.3271, "step": 31774 }, { "epoch": 3.2304798698657993, "grad_norm": 0.2966074049472809, "learning_rate": 3.3540849698022154e-06, "loss": 0.293, "step": 31775 }, { "epoch": 3.230581537210248, "grad_norm": 0.2714381515979767, "learning_rate": 3.353749867132424e-06, "loss": 0.2881, "step": 31776 }, { "epoch": 3.230683204554697, "grad_norm": 0.27440503239631653, "learning_rate": 3.3534147727560474e-06, "loss": 0.2999, "step": 31777 }, { "epoch": 3.230784871899146, "grad_norm": 0.27732473611831665, "learning_rate": 3.353079686674778e-06, "loss": 0.305, "step": 31778 }, { "epoch": 3.230886539243595, "grad_norm": 0.2923791706562042, "learning_rate": 3.3527446088903004e-06, "loss": 0.3464, "step": 31779 }, { "epoch": 3.230988206588044, "grad_norm": 0.28188657760620117, "learning_rate": 3.3524095394043022e-06, "loss": 0.3078, "step": 31780 }, { "epoch": 3.231089873932493, "grad_norm": 0.27177703380584717, "learning_rate": 3.3520744782184765e-06, "loss": 0.3168, "step": 31781 }, { "epoch": 3.231191541276942, "grad_norm": 0.27707046270370483, "learning_rate": 3.3517394253345054e-06, "loss": 0.3089, "step": 31782 }, { "epoch": 3.231293208621391, "grad_norm": 0.26725560426712036, "learning_rate": 3.351404380754082e-06, "loss": 0.3001, "step": 31783 }, { "epoch": 3.2313948759658397, "grad_norm": 0.2650304436683655, "learning_rate": 3.35106934447889e-06, "loss": 0.3232, "step": 31784 }, { "epoch": 3.2314965433102887, "grad_norm": 0.2938868999481201, "learning_rate": 3.3507343165106172e-06, "loss": 0.3167, "step": 31785 }, { "epoch": 3.2315982106547376, "grad_norm": 0.28208601474761963, "learning_rate": 3.3503992968509565e-06, "loss": 0.2897, "step": 31786 }, { "epoch": 3.2316998779991866, "grad_norm": 0.287110835313797, "learning_rate": 3.3500642855015886e-06, "loss": 0.3336, "step": 31787 }, { "epoch": 3.2318015453436355, "grad_norm": 0.2824946641921997, "learning_rate": 3.349729282464208e-06, "loss": 0.3067, "step": 31788 }, { "epoch": 3.2319032126880844, "grad_norm": 0.26184365153312683, "learning_rate": 3.349394287740497e-06, "loss": 0.2904, "step": 31789 }, { "epoch": 3.2320048800325334, "grad_norm": 0.28875863552093506, "learning_rate": 3.349059301332145e-06, "loss": 0.2872, "step": 31790 }, { "epoch": 3.2321065473769823, "grad_norm": 0.3010011315345764, "learning_rate": 3.3487243232408424e-06, "loss": 0.3114, "step": 31791 }, { "epoch": 3.2322082147214313, "grad_norm": 0.26756274700164795, "learning_rate": 3.348389353468271e-06, "loss": 0.298, "step": 31792 }, { "epoch": 3.2323098820658807, "grad_norm": 0.2695279121398926, "learning_rate": 3.3480543920161245e-06, "loss": 0.3114, "step": 31793 }, { "epoch": 3.2324115494103296, "grad_norm": 0.26765453815460205, "learning_rate": 3.3477194388860864e-06, "loss": 0.2885, "step": 31794 }, { "epoch": 3.2325132167547785, "grad_norm": 0.2828138470649719, "learning_rate": 3.347384494079843e-06, "loss": 0.3193, "step": 31795 }, { "epoch": 3.2326148840992275, "grad_norm": 0.3035363554954529, "learning_rate": 3.3470495575990873e-06, "loss": 0.3134, "step": 31796 }, { "epoch": 3.2327165514436764, "grad_norm": 0.27071550488471985, "learning_rate": 3.346714629445501e-06, "loss": 0.3381, "step": 31797 }, { "epoch": 3.2328182187881254, "grad_norm": 0.2942301034927368, "learning_rate": 3.346379709620775e-06, "loss": 0.3053, "step": 31798 }, { "epoch": 3.2329198861325743, "grad_norm": 0.2576003074645996, "learning_rate": 3.346044798126593e-06, "loss": 0.3344, "step": 31799 }, { "epoch": 3.2330215534770232, "grad_norm": 0.26968905329704285, "learning_rate": 3.345709894964644e-06, "loss": 0.3352, "step": 31800 }, { "epoch": 3.233123220821472, "grad_norm": 0.2610552906990051, "learning_rate": 3.3453750001366185e-06, "loss": 0.3233, "step": 31801 }, { "epoch": 3.233224888165921, "grad_norm": 0.29287323355674744, "learning_rate": 3.3450401136441985e-06, "loss": 0.3103, "step": 31802 }, { "epoch": 3.23332655551037, "grad_norm": 0.2814491391181946, "learning_rate": 3.3447052354890742e-06, "loss": 0.306, "step": 31803 }, { "epoch": 3.233428222854819, "grad_norm": 0.27709266543388367, "learning_rate": 3.3443703656729296e-06, "loss": 0.2851, "step": 31804 }, { "epoch": 3.233529890199268, "grad_norm": 0.2760288715362549, "learning_rate": 3.3440355041974537e-06, "loss": 0.2997, "step": 31805 }, { "epoch": 3.233631557543717, "grad_norm": 0.28098440170288086, "learning_rate": 3.3437006510643366e-06, "loss": 0.2974, "step": 31806 }, { "epoch": 3.233733224888166, "grad_norm": 0.2617037296295166, "learning_rate": 3.3433658062752593e-06, "loss": 0.323, "step": 31807 }, { "epoch": 3.2338348922326148, "grad_norm": 0.2823063135147095, "learning_rate": 3.343030969831913e-06, "loss": 0.3021, "step": 31808 }, { "epoch": 3.2339365595770637, "grad_norm": 0.27584144473075867, "learning_rate": 3.3426961417359815e-06, "loss": 0.307, "step": 31809 }, { "epoch": 3.2340382269215127, "grad_norm": 0.25823333859443665, "learning_rate": 3.342361321989152e-06, "loss": 0.3094, "step": 31810 }, { "epoch": 3.2341398942659616, "grad_norm": 0.2745665907859802, "learning_rate": 3.3420265105931155e-06, "loss": 0.3053, "step": 31811 }, { "epoch": 3.2342415616104105, "grad_norm": 0.2773056626319885, "learning_rate": 3.341691707549553e-06, "loss": 0.2958, "step": 31812 }, { "epoch": 3.2343432289548595, "grad_norm": 0.3051788806915283, "learning_rate": 3.3413569128601555e-06, "loss": 0.3136, "step": 31813 }, { "epoch": 3.234444896299309, "grad_norm": 0.2682178318500519, "learning_rate": 3.3410221265266058e-06, "loss": 0.2873, "step": 31814 }, { "epoch": 3.234546563643758, "grad_norm": 0.2680720090866089, "learning_rate": 3.3406873485505925e-06, "loss": 0.3106, "step": 31815 }, { "epoch": 3.2346482309882068, "grad_norm": 0.2757274806499481, "learning_rate": 3.340352578933803e-06, "loss": 0.2866, "step": 31816 }, { "epoch": 3.2347498983326557, "grad_norm": 0.28616565465927124, "learning_rate": 3.3400178176779223e-06, "loss": 0.3015, "step": 31817 }, { "epoch": 3.2348515656771046, "grad_norm": 0.27478882670402527, "learning_rate": 3.339683064784638e-06, "loss": 0.327, "step": 31818 }, { "epoch": 3.2349532330215536, "grad_norm": 0.2817910313606262, "learning_rate": 3.3393483202556353e-06, "loss": 0.3084, "step": 31819 }, { "epoch": 3.2350549003660025, "grad_norm": 0.2756956219673157, "learning_rate": 3.339013584092601e-06, "loss": 0.3025, "step": 31820 }, { "epoch": 3.2351565677104515, "grad_norm": 0.26278233528137207, "learning_rate": 3.3386788562972216e-06, "loss": 0.3027, "step": 31821 }, { "epoch": 3.2352582350549004, "grad_norm": 0.2841975688934326, "learning_rate": 3.3383441368711834e-06, "loss": 0.3034, "step": 31822 }, { "epoch": 3.2353599023993493, "grad_norm": 0.2903123199939728, "learning_rate": 3.3380094258161733e-06, "loss": 0.3208, "step": 31823 }, { "epoch": 3.2354615697437983, "grad_norm": 0.2999376356601715, "learning_rate": 3.337674723133876e-06, "loss": 0.3528, "step": 31824 }, { "epoch": 3.2355632370882472, "grad_norm": 0.25144922733306885, "learning_rate": 3.3373400288259776e-06, "loss": 0.3017, "step": 31825 }, { "epoch": 3.235664904432696, "grad_norm": 0.2844179570674896, "learning_rate": 3.3370053428941666e-06, "loss": 0.3007, "step": 31826 }, { "epoch": 3.235766571777145, "grad_norm": 0.29337838292121887, "learning_rate": 3.336670665340127e-06, "loss": 0.2941, "step": 31827 }, { "epoch": 3.235868239121594, "grad_norm": 0.2662319242954254, "learning_rate": 3.3363359961655452e-06, "loss": 0.3181, "step": 31828 }, { "epoch": 3.235969906466043, "grad_norm": 0.2709048092365265, "learning_rate": 3.3360013353721073e-06, "loss": 0.3267, "step": 31829 }, { "epoch": 3.236071573810492, "grad_norm": 0.27063310146331787, "learning_rate": 3.3356666829614986e-06, "loss": 0.2938, "step": 31830 }, { "epoch": 3.236173241154941, "grad_norm": 0.2691097855567932, "learning_rate": 3.3353320389354073e-06, "loss": 0.3323, "step": 31831 }, { "epoch": 3.23627490849939, "grad_norm": 0.2763236165046692, "learning_rate": 3.3349974032955157e-06, "loss": 0.3161, "step": 31832 }, { "epoch": 3.2363765758438388, "grad_norm": 0.2538709342479706, "learning_rate": 3.334662776043513e-06, "loss": 0.2938, "step": 31833 }, { "epoch": 3.236478243188288, "grad_norm": 0.26547926664352417, "learning_rate": 3.3343281571810825e-06, "loss": 0.3363, "step": 31834 }, { "epoch": 3.236579910532737, "grad_norm": 0.25580742955207825, "learning_rate": 3.3339935467099117e-06, "loss": 0.3126, "step": 31835 }, { "epoch": 3.236681577877186, "grad_norm": 0.27871808409690857, "learning_rate": 3.3336589446316847e-06, "loss": 0.3256, "step": 31836 }, { "epoch": 3.236783245221635, "grad_norm": 0.2757994532585144, "learning_rate": 3.3333243509480886e-06, "loss": 0.3182, "step": 31837 }, { "epoch": 3.236884912566084, "grad_norm": 0.2670627534389496, "learning_rate": 3.332989765660808e-06, "loss": 0.3087, "step": 31838 }, { "epoch": 3.236986579910533, "grad_norm": 0.2523241937160492, "learning_rate": 3.332655188771529e-06, "loss": 0.2954, "step": 31839 }, { "epoch": 3.237088247254982, "grad_norm": 0.27203941345214844, "learning_rate": 3.3323206202819367e-06, "loss": 0.3226, "step": 31840 }, { "epoch": 3.2371899145994307, "grad_norm": 0.24783839285373688, "learning_rate": 3.331986060193717e-06, "loss": 0.3237, "step": 31841 }, { "epoch": 3.2372915819438797, "grad_norm": 0.28060901165008545, "learning_rate": 3.331651508508555e-06, "loss": 0.2806, "step": 31842 }, { "epoch": 3.2373932492883286, "grad_norm": 0.2758523225784302, "learning_rate": 3.3313169652281367e-06, "loss": 0.3038, "step": 31843 }, { "epoch": 3.2374949166327776, "grad_norm": 0.28704673051834106, "learning_rate": 3.330982430354146e-06, "loss": 0.331, "step": 31844 }, { "epoch": 3.2375965839772265, "grad_norm": 0.2666175365447998, "learning_rate": 3.330647903888271e-06, "loss": 0.3398, "step": 31845 }, { "epoch": 3.2376982513216754, "grad_norm": 0.28202804923057556, "learning_rate": 3.3303133858321936e-06, "loss": 0.2876, "step": 31846 }, { "epoch": 3.2377999186661244, "grad_norm": 0.2612157166004181, "learning_rate": 3.3299788761876008e-06, "loss": 0.3308, "step": 31847 }, { "epoch": 3.2379015860105733, "grad_norm": 0.3077605962753296, "learning_rate": 3.329644374956178e-06, "loss": 0.2972, "step": 31848 }, { "epoch": 3.2380032533550223, "grad_norm": 0.26606693863868713, "learning_rate": 3.3293098821396093e-06, "loss": 0.2902, "step": 31849 }, { "epoch": 3.238104920699471, "grad_norm": 0.2697252333164215, "learning_rate": 3.328975397739581e-06, "loss": 0.3517, "step": 31850 }, { "epoch": 3.23820658804392, "grad_norm": 0.2779226303100586, "learning_rate": 3.3286409217577776e-06, "loss": 0.3066, "step": 31851 }, { "epoch": 3.238308255388369, "grad_norm": 0.27596181631088257, "learning_rate": 3.328306454195883e-06, "loss": 0.3136, "step": 31852 }, { "epoch": 3.238409922732818, "grad_norm": 0.2651715576648712, "learning_rate": 3.3279719950555843e-06, "loss": 0.3263, "step": 31853 }, { "epoch": 3.238511590077267, "grad_norm": 0.28777796030044556, "learning_rate": 3.327637544338565e-06, "loss": 0.3127, "step": 31854 }, { "epoch": 3.2386132574217164, "grad_norm": 0.28043320775032043, "learning_rate": 3.327303102046511e-06, "loss": 0.3138, "step": 31855 }, { "epoch": 3.2387149247661653, "grad_norm": 0.28047308325767517, "learning_rate": 3.3269686681811053e-06, "loss": 0.3008, "step": 31856 }, { "epoch": 3.2388165921106142, "grad_norm": 0.3037971556186676, "learning_rate": 3.3266342427440345e-06, "loss": 0.3188, "step": 31857 }, { "epoch": 3.238918259455063, "grad_norm": 0.264159619808197, "learning_rate": 3.3262998257369825e-06, "loss": 0.3107, "step": 31858 }, { "epoch": 3.239019926799512, "grad_norm": 0.293208509683609, "learning_rate": 3.3259654171616344e-06, "loss": 0.3225, "step": 31859 }, { "epoch": 3.239121594143961, "grad_norm": 0.2724002003669739, "learning_rate": 3.3256310170196755e-06, "loss": 0.3128, "step": 31860 }, { "epoch": 3.23922326148841, "grad_norm": 0.2796902358531952, "learning_rate": 3.325296625312788e-06, "loss": 0.3307, "step": 31861 }, { "epoch": 3.239324928832859, "grad_norm": 0.294290155172348, "learning_rate": 3.3249622420426587e-06, "loss": 0.3139, "step": 31862 }, { "epoch": 3.239426596177308, "grad_norm": 0.2637377679347992, "learning_rate": 3.324627867210973e-06, "loss": 0.321, "step": 31863 }, { "epoch": 3.239528263521757, "grad_norm": 0.26760098338127136, "learning_rate": 3.3242935008194123e-06, "loss": 0.2953, "step": 31864 }, { "epoch": 3.2396299308662058, "grad_norm": 0.29254844784736633, "learning_rate": 3.3239591428696636e-06, "loss": 0.3254, "step": 31865 }, { "epoch": 3.2397315982106547, "grad_norm": 0.2675633430480957, "learning_rate": 3.3236247933634105e-06, "loss": 0.3167, "step": 31866 }, { "epoch": 3.2398332655551036, "grad_norm": 0.3221170902252197, "learning_rate": 3.323290452302336e-06, "loss": 0.296, "step": 31867 }, { "epoch": 3.2399349328995526, "grad_norm": 0.280451238155365, "learning_rate": 3.322956119688128e-06, "loss": 0.3023, "step": 31868 }, { "epoch": 3.2400366002440015, "grad_norm": 0.26613131165504456, "learning_rate": 3.322621795522466e-06, "loss": 0.2961, "step": 31869 }, { "epoch": 3.2401382675884505, "grad_norm": 0.26325392723083496, "learning_rate": 3.3222874798070394e-06, "loss": 0.2927, "step": 31870 }, { "epoch": 3.2402399349328994, "grad_norm": 0.2660369277000427, "learning_rate": 3.321953172543527e-06, "loss": 0.2897, "step": 31871 }, { "epoch": 3.2403416022773484, "grad_norm": 0.281546413898468, "learning_rate": 3.3216188737336176e-06, "loss": 0.3005, "step": 31872 }, { "epoch": 3.2404432696217973, "grad_norm": 0.2866057753562927, "learning_rate": 3.321284583378993e-06, "loss": 0.29, "step": 31873 }, { "epoch": 3.2405449369662462, "grad_norm": 0.27816319465637207, "learning_rate": 3.3209503014813384e-06, "loss": 0.3142, "step": 31874 }, { "epoch": 3.2406466043106956, "grad_norm": 0.2728060781955719, "learning_rate": 3.3206160280423365e-06, "loss": 0.3425, "step": 31875 }, { "epoch": 3.2407482716551446, "grad_norm": 0.2769918143749237, "learning_rate": 3.3202817630636717e-06, "loss": 0.3308, "step": 31876 }, { "epoch": 3.2408499389995935, "grad_norm": 0.2987910211086273, "learning_rate": 3.319947506547028e-06, "loss": 0.3112, "step": 31877 }, { "epoch": 3.2409516063440424, "grad_norm": 0.24977992475032806, "learning_rate": 3.319613258494091e-06, "loss": 0.3086, "step": 31878 }, { "epoch": 3.2410532736884914, "grad_norm": 0.2757587730884552, "learning_rate": 3.3192790189065417e-06, "loss": 0.294, "step": 31879 }, { "epoch": 3.2411549410329403, "grad_norm": 0.26085996627807617, "learning_rate": 3.3189447877860665e-06, "loss": 0.2962, "step": 31880 }, { "epoch": 3.2412566083773893, "grad_norm": 0.2822834551334381, "learning_rate": 3.318610565134347e-06, "loss": 0.3179, "step": 31881 }, { "epoch": 3.241358275721838, "grad_norm": 0.26107868552207947, "learning_rate": 3.3182763509530675e-06, "loss": 0.2814, "step": 31882 }, { "epoch": 3.241459943066287, "grad_norm": 0.2611747682094574, "learning_rate": 3.3179421452439132e-06, "loss": 0.3019, "step": 31883 }, { "epoch": 3.241561610410736, "grad_norm": 0.2669553756713867, "learning_rate": 3.317607948008566e-06, "loss": 0.3064, "step": 31884 }, { "epoch": 3.241663277755185, "grad_norm": 0.26355162262916565, "learning_rate": 3.317273759248712e-06, "loss": 0.3349, "step": 31885 }, { "epoch": 3.241764945099634, "grad_norm": 0.3010251224040985, "learning_rate": 3.31693957896603e-06, "loss": 0.2966, "step": 31886 }, { "epoch": 3.241866612444083, "grad_norm": 0.27482515573501587, "learning_rate": 3.316605407162207e-06, "loss": 0.3164, "step": 31887 }, { "epoch": 3.241968279788532, "grad_norm": 0.27899205684661865, "learning_rate": 3.3162712438389276e-06, "loss": 0.3, "step": 31888 }, { "epoch": 3.242069947132981, "grad_norm": 0.2597411870956421, "learning_rate": 3.3159370889978727e-06, "loss": 0.3122, "step": 31889 }, { "epoch": 3.2421716144774297, "grad_norm": 0.28733453154563904, "learning_rate": 3.315602942640728e-06, "loss": 0.2887, "step": 31890 }, { "epoch": 3.2422732818218787, "grad_norm": 0.29633086919784546, "learning_rate": 3.315268804769172e-06, "loss": 0.337, "step": 31891 }, { "epoch": 3.2423749491663276, "grad_norm": 0.27205023169517517, "learning_rate": 3.3149346753848934e-06, "loss": 0.3229, "step": 31892 }, { "epoch": 3.2424766165107766, "grad_norm": 0.28089478611946106, "learning_rate": 3.3146005544895733e-06, "loss": 0.3318, "step": 31893 }, { "epoch": 3.2425782838552255, "grad_norm": 0.2644217014312744, "learning_rate": 3.3142664420848945e-06, "loss": 0.3216, "step": 31894 }, { "epoch": 3.2426799511996744, "grad_norm": 0.28205814957618713, "learning_rate": 3.3139323381725435e-06, "loss": 0.3237, "step": 31895 }, { "epoch": 3.242781618544124, "grad_norm": 0.2729160487651825, "learning_rate": 3.3135982427541974e-06, "loss": 0.2931, "step": 31896 }, { "epoch": 3.2428832858885728, "grad_norm": 0.2787308394908905, "learning_rate": 3.313264155831544e-06, "loss": 0.2753, "step": 31897 }, { "epoch": 3.2429849532330217, "grad_norm": 0.2728557288646698, "learning_rate": 3.3129300774062643e-06, "loss": 0.3212, "step": 31898 }, { "epoch": 3.2430866205774707, "grad_norm": 0.2721722722053528, "learning_rate": 3.3125960074800422e-06, "loss": 0.3246, "step": 31899 }, { "epoch": 3.2431882879219196, "grad_norm": 0.28389325737953186, "learning_rate": 3.3122619460545623e-06, "loss": 0.3102, "step": 31900 }, { "epoch": 3.2432899552663685, "grad_norm": 0.2795592248439789, "learning_rate": 3.311927893131503e-06, "loss": 0.2938, "step": 31901 }, { "epoch": 3.2433916226108175, "grad_norm": 0.2790486514568329, "learning_rate": 3.3115938487125503e-06, "loss": 0.3179, "step": 31902 }, { "epoch": 3.2434932899552664, "grad_norm": 0.2670150399208069, "learning_rate": 3.3112598127993876e-06, "loss": 0.3068, "step": 31903 }, { "epoch": 3.2435949572997154, "grad_norm": 0.2706526517868042, "learning_rate": 3.310925785393696e-06, "loss": 0.324, "step": 31904 }, { "epoch": 3.2436966246441643, "grad_norm": 0.29189014434814453, "learning_rate": 3.310591766497161e-06, "loss": 0.3155, "step": 31905 }, { "epoch": 3.2437982919886132, "grad_norm": 0.27842384576797485, "learning_rate": 3.3102577561114598e-06, "loss": 0.3255, "step": 31906 }, { "epoch": 3.243899959333062, "grad_norm": 0.26082444190979004, "learning_rate": 3.3099237542382795e-06, "loss": 0.3122, "step": 31907 }, { "epoch": 3.244001626677511, "grad_norm": 0.26006606221199036, "learning_rate": 3.309589760879304e-06, "loss": 0.3409, "step": 31908 }, { "epoch": 3.24410329402196, "grad_norm": 0.2968853712081909, "learning_rate": 3.3092557760362097e-06, "loss": 0.3211, "step": 31909 }, { "epoch": 3.244204961366409, "grad_norm": 0.2698148488998413, "learning_rate": 3.308921799710687e-06, "loss": 0.3346, "step": 31910 }, { "epoch": 3.244306628710858, "grad_norm": 0.26883983612060547, "learning_rate": 3.3085878319044114e-06, "loss": 0.3159, "step": 31911 }, { "epoch": 3.244408296055307, "grad_norm": 0.3029978573322296, "learning_rate": 3.308253872619069e-06, "loss": 0.3105, "step": 31912 }, { "epoch": 3.244509963399756, "grad_norm": 0.27035853266716003, "learning_rate": 3.3079199218563434e-06, "loss": 0.319, "step": 31913 }, { "epoch": 3.2446116307442048, "grad_norm": 0.2743209898471832, "learning_rate": 3.307585979617912e-06, "loss": 0.3094, "step": 31914 }, { "epoch": 3.2447132980886537, "grad_norm": 0.26444029808044434, "learning_rate": 3.307252045905464e-06, "loss": 0.2985, "step": 31915 }, { "epoch": 3.244814965433103, "grad_norm": 0.2532876133918762, "learning_rate": 3.306918120720675e-06, "loss": 0.3039, "step": 31916 }, { "epoch": 3.244916632777552, "grad_norm": 0.29914864897727966, "learning_rate": 3.30658420406523e-06, "loss": 0.3012, "step": 31917 }, { "epoch": 3.245018300122001, "grad_norm": 0.3016733229160309, "learning_rate": 3.3062502959408145e-06, "loss": 0.2902, "step": 31918 }, { "epoch": 3.24511996746645, "grad_norm": 0.27905386686325073, "learning_rate": 3.3059163963491035e-06, "loss": 0.3216, "step": 31919 }, { "epoch": 3.245221634810899, "grad_norm": 0.2604048550128937, "learning_rate": 3.3055825052917873e-06, "loss": 0.2948, "step": 31920 }, { "epoch": 3.245323302155348, "grad_norm": 0.2724074423313141, "learning_rate": 3.30524862277054e-06, "loss": 0.3294, "step": 31921 }, { "epoch": 3.2454249694997968, "grad_norm": 0.2641374468803406, "learning_rate": 3.3049147487870487e-06, "loss": 0.3222, "step": 31922 }, { "epoch": 3.2455266368442457, "grad_norm": 0.26511430740356445, "learning_rate": 3.304580883342996e-06, "loss": 0.3224, "step": 31923 }, { "epoch": 3.2456283041886946, "grad_norm": 0.3028348386287689, "learning_rate": 3.3042470264400588e-06, "loss": 0.3099, "step": 31924 }, { "epoch": 3.2457299715331436, "grad_norm": 0.27205270528793335, "learning_rate": 3.303913178079925e-06, "loss": 0.3146, "step": 31925 }, { "epoch": 3.2458316388775925, "grad_norm": 0.2866519093513489, "learning_rate": 3.3035793382642724e-06, "loss": 0.2858, "step": 31926 }, { "epoch": 3.2459333062220415, "grad_norm": 0.26675471663475037, "learning_rate": 3.3032455069947823e-06, "loss": 0.3376, "step": 31927 }, { "epoch": 3.2460349735664904, "grad_norm": 0.2659589648246765, "learning_rate": 3.3029116842731412e-06, "loss": 0.3345, "step": 31928 }, { "epoch": 3.2461366409109393, "grad_norm": 0.27029019594192505, "learning_rate": 3.3025778701010243e-06, "loss": 0.2817, "step": 31929 }, { "epoch": 3.2462383082553883, "grad_norm": 0.2824254631996155, "learning_rate": 3.30224406448012e-06, "loss": 0.3114, "step": 31930 }, { "epoch": 3.2463399755998372, "grad_norm": 0.27275264263153076, "learning_rate": 3.301910267412105e-06, "loss": 0.2728, "step": 31931 }, { "epoch": 3.246441642944286, "grad_norm": 0.2696646451950073, "learning_rate": 3.3015764788986616e-06, "loss": 0.299, "step": 31932 }, { "epoch": 3.246543310288735, "grad_norm": 0.28928810358047485, "learning_rate": 3.301242698941475e-06, "loss": 0.2815, "step": 31933 }, { "epoch": 3.246644977633184, "grad_norm": 0.26850008964538574, "learning_rate": 3.300908927542221e-06, "loss": 0.3161, "step": 31934 }, { "epoch": 3.246746644977633, "grad_norm": 0.2942453920841217, "learning_rate": 3.300575164702586e-06, "loss": 0.3108, "step": 31935 }, { "epoch": 3.246848312322082, "grad_norm": 0.2909969091415405, "learning_rate": 3.300241410424249e-06, "loss": 0.3023, "step": 31936 }, { "epoch": 3.2469499796665313, "grad_norm": 0.28406381607055664, "learning_rate": 3.2999076647088906e-06, "loss": 0.2944, "step": 31937 }, { "epoch": 3.2470516470109803, "grad_norm": 0.27316197752952576, "learning_rate": 3.299573927558196e-06, "loss": 0.3348, "step": 31938 }, { "epoch": 3.247153314355429, "grad_norm": 0.28039538860321045, "learning_rate": 3.299240198973841e-06, "loss": 0.3008, "step": 31939 }, { "epoch": 3.247254981699878, "grad_norm": 0.2836918830871582, "learning_rate": 3.298906478957513e-06, "loss": 0.3162, "step": 31940 }, { "epoch": 3.247356649044327, "grad_norm": 0.2965899705886841, "learning_rate": 3.298572767510888e-06, "loss": 0.3093, "step": 31941 }, { "epoch": 3.247458316388776, "grad_norm": 0.2859646677970886, "learning_rate": 3.2982390646356477e-06, "loss": 0.3548, "step": 31942 }, { "epoch": 3.247559983733225, "grad_norm": 0.2781639099121094, "learning_rate": 3.2979053703334773e-06, "loss": 0.3124, "step": 31943 }, { "epoch": 3.247661651077674, "grad_norm": 0.28561636805534363, "learning_rate": 3.2975716846060535e-06, "loss": 0.3005, "step": 31944 }, { "epoch": 3.247763318422123, "grad_norm": 0.2861664295196533, "learning_rate": 3.2972380074550603e-06, "loss": 0.3181, "step": 31945 }, { "epoch": 3.247864985766572, "grad_norm": 0.2624063789844513, "learning_rate": 3.296904338882176e-06, "loss": 0.3364, "step": 31946 }, { "epoch": 3.2479666531110207, "grad_norm": 0.27879422903060913, "learning_rate": 3.296570678889082e-06, "loss": 0.3283, "step": 31947 }, { "epoch": 3.2480683204554697, "grad_norm": 0.2911231219768524, "learning_rate": 3.2962370274774628e-06, "loss": 0.3094, "step": 31948 }, { "epoch": 3.2481699877999186, "grad_norm": 0.2556232213973999, "learning_rate": 3.295903384648995e-06, "loss": 0.3166, "step": 31949 }, { "epoch": 3.2482716551443676, "grad_norm": 0.3203198313713074, "learning_rate": 3.295569750405362e-06, "loss": 0.3029, "step": 31950 }, { "epoch": 3.2483733224888165, "grad_norm": 0.2865382134914398, "learning_rate": 3.295236124748242e-06, "loss": 0.308, "step": 31951 }, { "epoch": 3.2484749898332654, "grad_norm": 0.28501585125923157, "learning_rate": 3.2949025076793173e-06, "loss": 0.331, "step": 31952 }, { "epoch": 3.2485766571777144, "grad_norm": 0.24854448437690735, "learning_rate": 3.2945688992002713e-06, "loss": 0.3273, "step": 31953 }, { "epoch": 3.2486783245221633, "grad_norm": 0.2787397801876068, "learning_rate": 3.294235299312779e-06, "loss": 0.3119, "step": 31954 }, { "epoch": 3.2487799918666123, "grad_norm": 0.2894437611103058, "learning_rate": 3.2939017080185252e-06, "loss": 0.3073, "step": 31955 }, { "epoch": 3.248881659211061, "grad_norm": 0.27757924795150757, "learning_rate": 3.2935681253191887e-06, "loss": 0.3273, "step": 31956 }, { "epoch": 3.2489833265555106, "grad_norm": 0.2790054976940155, "learning_rate": 3.2932345512164497e-06, "loss": 0.3117, "step": 31957 }, { "epoch": 3.2490849938999595, "grad_norm": 0.27012866735458374, "learning_rate": 3.292900985711992e-06, "loss": 0.2963, "step": 31958 }, { "epoch": 3.2491866612444085, "grad_norm": 0.2830806374549866, "learning_rate": 3.2925674288074914e-06, "loss": 0.3102, "step": 31959 }, { "epoch": 3.2492883285888574, "grad_norm": 0.28670424222946167, "learning_rate": 3.292233880504632e-06, "loss": 0.2842, "step": 31960 }, { "epoch": 3.2493899959333064, "grad_norm": 0.2615251839160919, "learning_rate": 3.29190034080509e-06, "loss": 0.2975, "step": 31961 }, { "epoch": 3.2494916632777553, "grad_norm": 0.29504653811454773, "learning_rate": 3.291566809710548e-06, "loss": 0.2897, "step": 31962 }, { "epoch": 3.2495933306222042, "grad_norm": 0.2781682312488556, "learning_rate": 3.2912332872226897e-06, "loss": 0.2913, "step": 31963 }, { "epoch": 3.249694997966653, "grad_norm": 0.2837449610233307, "learning_rate": 3.2908997733431903e-06, "loss": 0.3142, "step": 31964 }, { "epoch": 3.249796665311102, "grad_norm": 0.24766267836093903, "learning_rate": 3.290566268073732e-06, "loss": 0.3209, "step": 31965 }, { "epoch": 3.249898332655551, "grad_norm": 0.27789270877838135, "learning_rate": 3.2902327714159942e-06, "loss": 0.3381, "step": 31966 }, { "epoch": 3.25, "grad_norm": 0.2643634080886841, "learning_rate": 3.289899283371657e-06, "loss": 0.2955, "step": 31967 }, { "epoch": 3.250101667344449, "grad_norm": 0.27872222661972046, "learning_rate": 3.2895658039424016e-06, "loss": 0.2865, "step": 31968 }, { "epoch": 3.250203334688898, "grad_norm": 0.26807618141174316, "learning_rate": 3.2892323331299066e-06, "loss": 0.3037, "step": 31969 }, { "epoch": 3.250305002033347, "grad_norm": 0.2782163619995117, "learning_rate": 3.288898870935854e-06, "loss": 0.3248, "step": 31970 }, { "epoch": 3.2504066693777958, "grad_norm": 0.2792631685733795, "learning_rate": 3.28856541736192e-06, "loss": 0.3359, "step": 31971 }, { "epoch": 3.2505083367222447, "grad_norm": 0.2656925618648529, "learning_rate": 3.288231972409788e-06, "loss": 0.3256, "step": 31972 }, { "epoch": 3.2506100040666936, "grad_norm": 0.29913389682769775, "learning_rate": 3.2878985360811365e-06, "loss": 0.3328, "step": 31973 }, { "epoch": 3.2507116714111426, "grad_norm": 0.28366485238075256, "learning_rate": 3.287565108377645e-06, "loss": 0.2777, "step": 31974 }, { "epoch": 3.2508133387555915, "grad_norm": 0.276429146528244, "learning_rate": 3.287231689300994e-06, "loss": 0.3272, "step": 31975 }, { "epoch": 3.2509150061000405, "grad_norm": 0.26155078411102295, "learning_rate": 3.2868982788528624e-06, "loss": 0.2906, "step": 31976 }, { "epoch": 3.2510166734444894, "grad_norm": 0.2685527205467224, "learning_rate": 3.28656487703493e-06, "loss": 0.2993, "step": 31977 }, { "epoch": 3.2511183407889384, "grad_norm": 0.27845388650894165, "learning_rate": 3.2862314838488773e-06, "loss": 0.2879, "step": 31978 }, { "epoch": 3.2512200081333877, "grad_norm": 0.2913960814476013, "learning_rate": 3.2858980992963825e-06, "loss": 0.2843, "step": 31979 }, { "epoch": 3.2513216754778367, "grad_norm": 0.2702309191226959, "learning_rate": 3.2855647233791264e-06, "loss": 0.3271, "step": 31980 }, { "epoch": 3.2514233428222856, "grad_norm": 0.2813141942024231, "learning_rate": 3.285231356098787e-06, "loss": 0.3337, "step": 31981 }, { "epoch": 3.2515250101667346, "grad_norm": 0.2879506051540375, "learning_rate": 3.2848979974570445e-06, "loss": 0.2921, "step": 31982 }, { "epoch": 3.2516266775111835, "grad_norm": 0.2783198952674866, "learning_rate": 3.2845646474555793e-06, "loss": 0.3104, "step": 31983 }, { "epoch": 3.2517283448556324, "grad_norm": 0.25052329897880554, "learning_rate": 3.2842313060960685e-06, "loss": 0.3094, "step": 31984 }, { "epoch": 3.2518300122000814, "grad_norm": 0.27840182185173035, "learning_rate": 3.283897973380194e-06, "loss": 0.2878, "step": 31985 }, { "epoch": 3.2519316795445303, "grad_norm": 0.29533278942108154, "learning_rate": 3.283564649309633e-06, "loss": 0.3295, "step": 31986 }, { "epoch": 3.2520333468889793, "grad_norm": 0.2646086812019348, "learning_rate": 3.2832313338860645e-06, "loss": 0.3029, "step": 31987 }, { "epoch": 3.252135014233428, "grad_norm": 0.2709885835647583, "learning_rate": 3.2828980271111698e-06, "loss": 0.3015, "step": 31988 }, { "epoch": 3.252236681577877, "grad_norm": 0.2679482102394104, "learning_rate": 3.282564728986627e-06, "loss": 0.3083, "step": 31989 }, { "epoch": 3.252338348922326, "grad_norm": 0.2762719690799713, "learning_rate": 3.2822314395141146e-06, "loss": 0.3437, "step": 31990 }, { "epoch": 3.252440016266775, "grad_norm": 0.2874273955821991, "learning_rate": 3.2818981586953113e-06, "loss": 0.2857, "step": 31991 }, { "epoch": 3.252541683611224, "grad_norm": 0.26167726516723633, "learning_rate": 3.2815648865318973e-06, "loss": 0.2904, "step": 31992 }, { "epoch": 3.252643350955673, "grad_norm": 0.2729592025279999, "learning_rate": 3.2812316230255512e-06, "loss": 0.325, "step": 31993 }, { "epoch": 3.252745018300122, "grad_norm": 0.2889854609966278, "learning_rate": 3.2808983681779517e-06, "loss": 0.3197, "step": 31994 }, { "epoch": 3.252846685644571, "grad_norm": 0.28942516446113586, "learning_rate": 3.2805651219907782e-06, "loss": 0.3105, "step": 31995 }, { "epoch": 3.2529483529890197, "grad_norm": 0.27424684166908264, "learning_rate": 3.2802318844657076e-06, "loss": 0.317, "step": 31996 }, { "epoch": 3.253050020333469, "grad_norm": 0.27570584416389465, "learning_rate": 3.279898655604421e-06, "loss": 0.2851, "step": 31997 }, { "epoch": 3.253151687677918, "grad_norm": 0.2781321108341217, "learning_rate": 3.2795654354085964e-06, "loss": 0.3339, "step": 31998 }, { "epoch": 3.253253355022367, "grad_norm": 0.27243006229400635, "learning_rate": 3.2792322238799118e-06, "loss": 0.3036, "step": 31999 }, { "epoch": 3.253355022366816, "grad_norm": 0.2727726399898529, "learning_rate": 3.2788990210200474e-06, "loss": 0.2972, "step": 32000 }, { "epoch": 3.253456689711265, "grad_norm": 0.2734147310256958, "learning_rate": 3.278565826830679e-06, "loss": 0.309, "step": 32001 }, { "epoch": 3.253558357055714, "grad_norm": 0.2674625515937805, "learning_rate": 3.278232641313488e-06, "loss": 0.3092, "step": 32002 }, { "epoch": 3.2536600244001628, "grad_norm": 0.2554049491882324, "learning_rate": 3.2778994644701517e-06, "loss": 0.2965, "step": 32003 }, { "epoch": 3.2537616917446117, "grad_norm": 0.30368173122406006, "learning_rate": 3.277566296302348e-06, "loss": 0.3647, "step": 32004 }, { "epoch": 3.2538633590890607, "grad_norm": 0.2778864800930023, "learning_rate": 3.277233136811757e-06, "loss": 0.3024, "step": 32005 }, { "epoch": 3.2539650264335096, "grad_norm": 0.2881002724170685, "learning_rate": 3.276899986000056e-06, "loss": 0.304, "step": 32006 }, { "epoch": 3.2540666937779585, "grad_norm": 0.2668248116970062, "learning_rate": 3.2765668438689226e-06, "loss": 0.2771, "step": 32007 }, { "epoch": 3.2541683611224075, "grad_norm": 0.29262498021125793, "learning_rate": 3.276233710420037e-06, "loss": 0.3266, "step": 32008 }, { "epoch": 3.2542700284668564, "grad_norm": 0.28894856572151184, "learning_rate": 3.275900585655075e-06, "loss": 0.3027, "step": 32009 }, { "epoch": 3.2543716958113054, "grad_norm": 0.2815111577510834, "learning_rate": 3.2755674695757174e-06, "loss": 0.3267, "step": 32010 }, { "epoch": 3.2544733631557543, "grad_norm": 0.2640845775604248, "learning_rate": 3.27523436218364e-06, "loss": 0.301, "step": 32011 }, { "epoch": 3.2545750305002032, "grad_norm": 0.27696359157562256, "learning_rate": 3.274901263480522e-06, "loss": 0.3169, "step": 32012 }, { "epoch": 3.254676697844652, "grad_norm": 0.2735770344734192, "learning_rate": 3.2745681734680425e-06, "loss": 0.3312, "step": 32013 }, { "epoch": 3.254778365189101, "grad_norm": 0.280342161655426, "learning_rate": 3.2742350921478777e-06, "loss": 0.2968, "step": 32014 }, { "epoch": 3.25488003253355, "grad_norm": 0.25652796030044556, "learning_rate": 3.2739020195217074e-06, "loss": 0.3176, "step": 32015 }, { "epoch": 3.254981699877999, "grad_norm": 0.2979618310928345, "learning_rate": 3.2735689555912074e-06, "loss": 0.3079, "step": 32016 }, { "epoch": 3.255083367222448, "grad_norm": 0.26108893752098083, "learning_rate": 3.2732359003580566e-06, "loss": 0.3159, "step": 32017 }, { "epoch": 3.255185034566897, "grad_norm": 0.2670917510986328, "learning_rate": 3.2729028538239345e-06, "loss": 0.3136, "step": 32018 }, { "epoch": 3.255286701911346, "grad_norm": 0.2815842926502228, "learning_rate": 3.272569815990516e-06, "loss": 0.3345, "step": 32019 }, { "epoch": 3.2553883692557952, "grad_norm": 0.24523596465587616, "learning_rate": 3.2722367868594814e-06, "loss": 0.3172, "step": 32020 }, { "epoch": 3.255490036600244, "grad_norm": 0.27159255743026733, "learning_rate": 3.271903766432506e-06, "loss": 0.2966, "step": 32021 }, { "epoch": 3.255591703944693, "grad_norm": 0.2746458947658539, "learning_rate": 3.2715707547112702e-06, "loss": 0.2989, "step": 32022 }, { "epoch": 3.255693371289142, "grad_norm": 0.27621588110923767, "learning_rate": 3.271237751697449e-06, "loss": 0.3172, "step": 32023 }, { "epoch": 3.255795038633591, "grad_norm": 0.26771676540374756, "learning_rate": 3.270904757392721e-06, "loss": 0.3081, "step": 32024 }, { "epoch": 3.25589670597804, "grad_norm": 0.26242706179618835, "learning_rate": 3.270571771798765e-06, "loss": 0.3293, "step": 32025 }, { "epoch": 3.255998373322489, "grad_norm": 0.2785063683986664, "learning_rate": 3.2702387949172566e-06, "loss": 0.344, "step": 32026 }, { "epoch": 3.256100040666938, "grad_norm": 0.29002416133880615, "learning_rate": 3.2699058267498753e-06, "loss": 0.3287, "step": 32027 }, { "epoch": 3.2562017080113868, "grad_norm": 0.2834855318069458, "learning_rate": 3.269572867298296e-06, "loss": 0.3078, "step": 32028 }, { "epoch": 3.2563033753558357, "grad_norm": 0.29074469208717346, "learning_rate": 3.2692399165641976e-06, "loss": 0.3228, "step": 32029 }, { "epoch": 3.2564050427002846, "grad_norm": 0.26402953267097473, "learning_rate": 3.268906974549258e-06, "loss": 0.3235, "step": 32030 }, { "epoch": 3.2565067100447336, "grad_norm": 0.25667041540145874, "learning_rate": 3.2685740412551527e-06, "loss": 0.3157, "step": 32031 }, { "epoch": 3.2566083773891825, "grad_norm": 0.26723822951316833, "learning_rate": 3.268241116683561e-06, "loss": 0.3184, "step": 32032 }, { "epoch": 3.2567100447336315, "grad_norm": 0.2729055881500244, "learning_rate": 3.267908200836158e-06, "loss": 0.3122, "step": 32033 }, { "epoch": 3.2568117120780804, "grad_norm": 0.2620794177055359, "learning_rate": 3.267575293714622e-06, "loss": 0.3228, "step": 32034 }, { "epoch": 3.2569133794225293, "grad_norm": 0.2617764174938202, "learning_rate": 3.2672423953206307e-06, "loss": 0.2929, "step": 32035 }, { "epoch": 3.2570150467669783, "grad_norm": 0.28032854199409485, "learning_rate": 3.26690950565586e-06, "loss": 0.2979, "step": 32036 }, { "epoch": 3.2571167141114272, "grad_norm": 0.2585299611091614, "learning_rate": 3.266576624721989e-06, "loss": 0.3201, "step": 32037 }, { "epoch": 3.2572183814558766, "grad_norm": 0.2959655523300171, "learning_rate": 3.26624375252069e-06, "loss": 0.3222, "step": 32038 }, { "epoch": 3.2573200488003256, "grad_norm": 0.26802319288253784, "learning_rate": 3.265910889053644e-06, "loss": 0.3319, "step": 32039 }, { "epoch": 3.2574217161447745, "grad_norm": 0.3048517107963562, "learning_rate": 3.2655780343225276e-06, "loss": 0.3264, "step": 32040 }, { "epoch": 3.2575233834892234, "grad_norm": 0.26639991998672485, "learning_rate": 3.265245188329016e-06, "loss": 0.3419, "step": 32041 }, { "epoch": 3.2576250508336724, "grad_norm": 0.28415346145629883, "learning_rate": 3.2649123510747895e-06, "loss": 0.296, "step": 32042 }, { "epoch": 3.2577267181781213, "grad_norm": 0.26760947704315186, "learning_rate": 3.2645795225615186e-06, "loss": 0.2949, "step": 32043 }, { "epoch": 3.2578283855225703, "grad_norm": 0.2767360210418701, "learning_rate": 3.2642467027908842e-06, "loss": 0.2948, "step": 32044 }, { "epoch": 3.257930052867019, "grad_norm": 0.26827460527420044, "learning_rate": 3.263913891764564e-06, "loss": 0.3406, "step": 32045 }, { "epoch": 3.258031720211468, "grad_norm": 0.2684471011161804, "learning_rate": 3.2635810894842325e-06, "loss": 0.2997, "step": 32046 }, { "epoch": 3.258133387555917, "grad_norm": 0.2747097313404083, "learning_rate": 3.2632482959515686e-06, "loss": 0.32, "step": 32047 }, { "epoch": 3.258235054900366, "grad_norm": 0.29040971398353577, "learning_rate": 3.2629155111682442e-06, "loss": 0.2994, "step": 32048 }, { "epoch": 3.258336722244815, "grad_norm": 0.27979984879493713, "learning_rate": 3.2625827351359395e-06, "loss": 0.2881, "step": 32049 }, { "epoch": 3.258438389589264, "grad_norm": 0.27364447712898254, "learning_rate": 3.262249967856331e-06, "loss": 0.3152, "step": 32050 }, { "epoch": 3.258540056933713, "grad_norm": 0.2833971679210663, "learning_rate": 3.261917209331093e-06, "loss": 0.3437, "step": 32051 }, { "epoch": 3.258641724278162, "grad_norm": 0.26340439915657043, "learning_rate": 3.261584459561905e-06, "loss": 0.3269, "step": 32052 }, { "epoch": 3.2587433916226107, "grad_norm": 0.25847476720809937, "learning_rate": 3.261251718550439e-06, "loss": 0.327, "step": 32053 }, { "epoch": 3.2588450589670597, "grad_norm": 0.2752772271633148, "learning_rate": 3.260918986298374e-06, "loss": 0.3303, "step": 32054 }, { "epoch": 3.2589467263115086, "grad_norm": 0.26981624960899353, "learning_rate": 3.2605862628073877e-06, "loss": 0.3523, "step": 32055 }, { "epoch": 3.2590483936559576, "grad_norm": 0.2626618444919586, "learning_rate": 3.260253548079153e-06, "loss": 0.305, "step": 32056 }, { "epoch": 3.2591500610004065, "grad_norm": 0.2585529088973999, "learning_rate": 3.2599208421153493e-06, "loss": 0.3392, "step": 32057 }, { "epoch": 3.2592517283448554, "grad_norm": 0.25433263182640076, "learning_rate": 3.2595881449176483e-06, "loss": 0.332, "step": 32058 }, { "epoch": 3.2593533956893044, "grad_norm": 0.2632105350494385, "learning_rate": 3.259255456487729e-06, "loss": 0.2902, "step": 32059 }, { "epoch": 3.2594550630337533, "grad_norm": 0.2671244442462921, "learning_rate": 3.25892277682727e-06, "loss": 0.3224, "step": 32060 }, { "epoch": 3.2595567303782027, "grad_norm": 0.26686185598373413, "learning_rate": 3.2585901059379404e-06, "loss": 0.3246, "step": 32061 }, { "epoch": 3.2596583977226516, "grad_norm": 0.2518457770347595, "learning_rate": 3.258257443821423e-06, "loss": 0.3236, "step": 32062 }, { "epoch": 3.2597600650671006, "grad_norm": 0.2802193760871887, "learning_rate": 3.2579247904793886e-06, "loss": 0.3078, "step": 32063 }, { "epoch": 3.2598617324115495, "grad_norm": 0.28696373105049133, "learning_rate": 3.257592145913515e-06, "loss": 0.3277, "step": 32064 }, { "epoch": 3.2599633997559985, "grad_norm": 0.2782191038131714, "learning_rate": 3.257259510125481e-06, "loss": 0.2994, "step": 32065 }, { "epoch": 3.2600650671004474, "grad_norm": 0.2808440029621124, "learning_rate": 3.2569268831169565e-06, "loss": 0.3265, "step": 32066 }, { "epoch": 3.2601667344448964, "grad_norm": 0.29103758931159973, "learning_rate": 3.2565942648896224e-06, "loss": 0.3244, "step": 32067 }, { "epoch": 3.2602684017893453, "grad_norm": 0.278599351644516, "learning_rate": 3.2562616554451497e-06, "loss": 0.2885, "step": 32068 }, { "epoch": 3.2603700691337942, "grad_norm": 0.27650290727615356, "learning_rate": 3.2559290547852175e-06, "loss": 0.3092, "step": 32069 }, { "epoch": 3.260471736478243, "grad_norm": 0.23551161587238312, "learning_rate": 3.2555964629115018e-06, "loss": 0.3109, "step": 32070 }, { "epoch": 3.260573403822692, "grad_norm": 0.25837740302085876, "learning_rate": 3.2552638798256737e-06, "loss": 0.3263, "step": 32071 }, { "epoch": 3.260675071167141, "grad_norm": 0.2763422429561615, "learning_rate": 3.2549313055294146e-06, "loss": 0.3265, "step": 32072 }, { "epoch": 3.26077673851159, "grad_norm": 0.2545057535171509, "learning_rate": 3.254598740024394e-06, "loss": 0.2791, "step": 32073 }, { "epoch": 3.260878405856039, "grad_norm": 0.27823346853256226, "learning_rate": 3.2542661833122915e-06, "loss": 0.2978, "step": 32074 }, { "epoch": 3.260980073200488, "grad_norm": 0.2755107879638672, "learning_rate": 3.2539336353947826e-06, "loss": 0.304, "step": 32075 }, { "epoch": 3.261081740544937, "grad_norm": 0.26091277599334717, "learning_rate": 3.2536010962735377e-06, "loss": 0.3061, "step": 32076 }, { "epoch": 3.2611834078893858, "grad_norm": 0.26148027181625366, "learning_rate": 3.2532685659502393e-06, "loss": 0.3028, "step": 32077 }, { "epoch": 3.2612850752338347, "grad_norm": 0.25957322120666504, "learning_rate": 3.252936044426556e-06, "loss": 0.3128, "step": 32078 }, { "epoch": 3.261386742578284, "grad_norm": 0.2737691104412079, "learning_rate": 3.2526035317041654e-06, "loss": 0.3179, "step": 32079 }, { "epoch": 3.261488409922733, "grad_norm": 0.2683740258216858, "learning_rate": 3.2522710277847458e-06, "loss": 0.3347, "step": 32080 }, { "epoch": 3.261590077267182, "grad_norm": 0.2462693452835083, "learning_rate": 3.2519385326699664e-06, "loss": 0.3251, "step": 32081 }, { "epoch": 3.261691744611631, "grad_norm": 0.263979971408844, "learning_rate": 3.2516060463615086e-06, "loss": 0.327, "step": 32082 }, { "epoch": 3.26179341195608, "grad_norm": 0.27689218521118164, "learning_rate": 3.2512735688610415e-06, "loss": 0.3369, "step": 32083 }, { "epoch": 3.261895079300529, "grad_norm": 0.2634727656841278, "learning_rate": 3.250941100170242e-06, "loss": 0.3241, "step": 32084 }, { "epoch": 3.2619967466449777, "grad_norm": 0.28261274099349976, "learning_rate": 3.2506086402907884e-06, "loss": 0.2852, "step": 32085 }, { "epoch": 3.2620984139894267, "grad_norm": 0.25776228308677673, "learning_rate": 3.2502761892243493e-06, "loss": 0.3023, "step": 32086 }, { "epoch": 3.2622000813338756, "grad_norm": 0.2817462682723999, "learning_rate": 3.249943746972607e-06, "loss": 0.2864, "step": 32087 }, { "epoch": 3.2623017486783246, "grad_norm": 0.25750407576560974, "learning_rate": 3.24961131353723e-06, "loss": 0.2929, "step": 32088 }, { "epoch": 3.2624034160227735, "grad_norm": 0.2582315504550934, "learning_rate": 3.2492788889198935e-06, "loss": 0.32, "step": 32089 }, { "epoch": 3.2625050833672224, "grad_norm": 0.2727019488811493, "learning_rate": 3.2489464731222775e-06, "loss": 0.3209, "step": 32090 }, { "epoch": 3.2626067507116714, "grad_norm": 0.25676029920578003, "learning_rate": 3.2486140661460498e-06, "loss": 0.292, "step": 32091 }, { "epoch": 3.2627084180561203, "grad_norm": 0.28930002450942993, "learning_rate": 3.2482816679928907e-06, "loss": 0.3615, "step": 32092 }, { "epoch": 3.2628100854005693, "grad_norm": 0.26465708017349243, "learning_rate": 3.2479492786644716e-06, "loss": 0.3961, "step": 32093 }, { "epoch": 3.262911752745018, "grad_norm": 0.2979947030544281, "learning_rate": 3.2476168981624657e-06, "loss": 0.2971, "step": 32094 }, { "epoch": 3.263013420089467, "grad_norm": 0.2902865707874298, "learning_rate": 3.2472845264885527e-06, "loss": 0.3101, "step": 32095 }, { "epoch": 3.263115087433916, "grad_norm": 0.27647313475608826, "learning_rate": 3.2469521636444017e-06, "loss": 0.3082, "step": 32096 }, { "epoch": 3.263216754778365, "grad_norm": 0.2525809407234192, "learning_rate": 3.2466198096316902e-06, "loss": 0.3059, "step": 32097 }, { "epoch": 3.263318422122814, "grad_norm": 0.2717840373516083, "learning_rate": 3.2462874644520903e-06, "loss": 0.3102, "step": 32098 }, { "epoch": 3.263420089467263, "grad_norm": 0.2629208266735077, "learning_rate": 3.2459551281072765e-06, "loss": 0.3176, "step": 32099 }, { "epoch": 3.263521756811712, "grad_norm": 0.29368817806243896, "learning_rate": 3.2456228005989277e-06, "loss": 0.2944, "step": 32100 }, { "epoch": 3.263623424156161, "grad_norm": 0.2576427757740021, "learning_rate": 3.2452904819287112e-06, "loss": 0.3247, "step": 32101 }, { "epoch": 3.26372509150061, "grad_norm": 0.2654053866863251, "learning_rate": 3.244958172098306e-06, "loss": 0.3075, "step": 32102 }, { "epoch": 3.263826758845059, "grad_norm": 0.29405683279037476, "learning_rate": 3.2446258711093826e-06, "loss": 0.2961, "step": 32103 }, { "epoch": 3.263928426189508, "grad_norm": 0.28294217586517334, "learning_rate": 3.2442935789636167e-06, "loss": 0.3019, "step": 32104 }, { "epoch": 3.264030093533957, "grad_norm": 0.2729726731777191, "learning_rate": 3.243961295662685e-06, "loss": 0.2951, "step": 32105 }, { "epoch": 3.264131760878406, "grad_norm": 0.2763080596923828, "learning_rate": 3.2436290212082578e-06, "loss": 0.2935, "step": 32106 }, { "epoch": 3.264233428222855, "grad_norm": 0.2788107693195343, "learning_rate": 3.24329675560201e-06, "loss": 0.3271, "step": 32107 }, { "epoch": 3.264335095567304, "grad_norm": 0.27682891488075256, "learning_rate": 3.2429644988456153e-06, "loss": 0.3408, "step": 32108 }, { "epoch": 3.2644367629117528, "grad_norm": 0.2503705620765686, "learning_rate": 3.242632250940747e-06, "loss": 0.3206, "step": 32109 }, { "epoch": 3.2645384302562017, "grad_norm": 0.278646856546402, "learning_rate": 3.2423000118890835e-06, "loss": 0.2959, "step": 32110 }, { "epoch": 3.2646400976006507, "grad_norm": 0.2704467475414276, "learning_rate": 3.2419677816922923e-06, "loss": 0.3251, "step": 32111 }, { "epoch": 3.2647417649450996, "grad_norm": 0.27190664410591125, "learning_rate": 3.24163556035205e-06, "loss": 0.314, "step": 32112 }, { "epoch": 3.2648434322895485, "grad_norm": 0.30836161971092224, "learning_rate": 3.2413033478700296e-06, "loss": 0.3138, "step": 32113 }, { "epoch": 3.2649450996339975, "grad_norm": 0.47802624106407166, "learning_rate": 3.2409711442479036e-06, "loss": 0.3135, "step": 32114 }, { "epoch": 3.2650467669784464, "grad_norm": 0.27046072483062744, "learning_rate": 3.2406389494873502e-06, "loss": 0.2617, "step": 32115 }, { "epoch": 3.2651484343228954, "grad_norm": 0.2754075527191162, "learning_rate": 3.2403067635900374e-06, "loss": 0.312, "step": 32116 }, { "epoch": 3.2652501016673443, "grad_norm": 0.2841517925262451, "learning_rate": 3.2399745865576427e-06, "loss": 0.3197, "step": 32117 }, { "epoch": 3.2653517690117932, "grad_norm": 0.2844831049442291, "learning_rate": 3.2396424183918364e-06, "loss": 0.3359, "step": 32118 }, { "epoch": 3.265453436356242, "grad_norm": 0.2650577425956726, "learning_rate": 3.2393102590942927e-06, "loss": 0.2962, "step": 32119 }, { "epoch": 3.2655551037006916, "grad_norm": 0.2593955397605896, "learning_rate": 3.238978108666687e-06, "loss": 0.2921, "step": 32120 }, { "epoch": 3.2656567710451405, "grad_norm": 0.2890402376651764, "learning_rate": 3.23864596711069e-06, "loss": 0.3055, "step": 32121 }, { "epoch": 3.2657584383895895, "grad_norm": 0.2774890661239624, "learning_rate": 3.2383138344279773e-06, "loss": 0.3438, "step": 32122 }, { "epoch": 3.2658601057340384, "grad_norm": 0.2944548726081848, "learning_rate": 3.2379817106202192e-06, "loss": 0.3123, "step": 32123 }, { "epoch": 3.2659617730784873, "grad_norm": 0.26974719762802124, "learning_rate": 3.237649595689092e-06, "loss": 0.2748, "step": 32124 }, { "epoch": 3.2660634404229363, "grad_norm": 0.3009057641029358, "learning_rate": 3.2373174896362667e-06, "loss": 0.3004, "step": 32125 }, { "epoch": 3.2661651077673852, "grad_norm": 0.27497920393943787, "learning_rate": 3.236985392463417e-06, "loss": 0.3149, "step": 32126 }, { "epoch": 3.266266775111834, "grad_norm": 0.2716684937477112, "learning_rate": 3.2366533041722165e-06, "loss": 0.3127, "step": 32127 }, { "epoch": 3.266368442456283, "grad_norm": 0.2974044680595398, "learning_rate": 3.2363212247643364e-06, "loss": 0.3105, "step": 32128 }, { "epoch": 3.266470109800732, "grad_norm": 0.2859138250350952, "learning_rate": 3.2359891542414513e-06, "loss": 0.3114, "step": 32129 }, { "epoch": 3.266571777145181, "grad_norm": 0.2743116021156311, "learning_rate": 3.2356570926052343e-06, "loss": 0.3122, "step": 32130 }, { "epoch": 3.26667344448963, "grad_norm": 0.26662975549697876, "learning_rate": 3.235325039857357e-06, "loss": 0.3126, "step": 32131 }, { "epoch": 3.266775111834079, "grad_norm": 0.2646999955177307, "learning_rate": 3.2349929959994943e-06, "loss": 0.2847, "step": 32132 }, { "epoch": 3.266876779178528, "grad_norm": 0.279804527759552, "learning_rate": 3.234660961033316e-06, "loss": 0.3355, "step": 32133 }, { "epoch": 3.2669784465229768, "grad_norm": 0.26504889130592346, "learning_rate": 3.234328934960496e-06, "loss": 0.2703, "step": 32134 }, { "epoch": 3.2670801138674257, "grad_norm": 0.2971563935279846, "learning_rate": 3.233996917782708e-06, "loss": 0.2885, "step": 32135 }, { "epoch": 3.2671817812118746, "grad_norm": 0.2931426763534546, "learning_rate": 3.233664909501623e-06, "loss": 0.2948, "step": 32136 }, { "epoch": 3.2672834485563236, "grad_norm": 0.2705688774585724, "learning_rate": 3.2333329101189156e-06, "loss": 0.3004, "step": 32137 }, { "epoch": 3.2673851159007725, "grad_norm": 0.30104243755340576, "learning_rate": 3.2330009196362554e-06, "loss": 0.3184, "step": 32138 }, { "epoch": 3.2674867832452215, "grad_norm": 0.26174843311309814, "learning_rate": 3.2326689380553178e-06, "loss": 0.3147, "step": 32139 }, { "epoch": 3.2675884505896704, "grad_norm": 0.2630540728569031, "learning_rate": 3.2323369653777747e-06, "loss": 0.3399, "step": 32140 }, { "epoch": 3.2676901179341193, "grad_norm": 0.2795010805130005, "learning_rate": 3.2320050016052973e-06, "loss": 0.294, "step": 32141 }, { "epoch": 3.2677917852785683, "grad_norm": 0.269402414560318, "learning_rate": 3.231673046739559e-06, "loss": 0.2902, "step": 32142 }, { "epoch": 3.2678934526230177, "grad_norm": 0.23989525437355042, "learning_rate": 3.2313411007822306e-06, "loss": 0.297, "step": 32143 }, { "epoch": 3.2679951199674666, "grad_norm": 0.27974972128868103, "learning_rate": 3.231009163734986e-06, "loss": 0.3086, "step": 32144 }, { "epoch": 3.2680967873119156, "grad_norm": 0.27297982573509216, "learning_rate": 3.2306772355994977e-06, "loss": 0.2904, "step": 32145 }, { "epoch": 3.2681984546563645, "grad_norm": 0.2718503475189209, "learning_rate": 3.2303453163774356e-06, "loss": 0.2964, "step": 32146 }, { "epoch": 3.2683001220008134, "grad_norm": 0.28688788414001465, "learning_rate": 3.2300134060704745e-06, "loss": 0.3129, "step": 32147 }, { "epoch": 3.2684017893452624, "grad_norm": 0.28148412704467773, "learning_rate": 3.2296815046802843e-06, "loss": 0.2934, "step": 32148 }, { "epoch": 3.2685034566897113, "grad_norm": 0.27600449323654175, "learning_rate": 3.2293496122085375e-06, "loss": 0.2911, "step": 32149 }, { "epoch": 3.2686051240341603, "grad_norm": 0.2754327058792114, "learning_rate": 3.2290177286569086e-06, "loss": 0.2897, "step": 32150 }, { "epoch": 3.268706791378609, "grad_norm": 0.28498712182044983, "learning_rate": 3.228685854027066e-06, "loss": 0.3059, "step": 32151 }, { "epoch": 3.268808458723058, "grad_norm": 0.28059184551239014, "learning_rate": 3.228353988320684e-06, "loss": 0.3227, "step": 32152 }, { "epoch": 3.268910126067507, "grad_norm": 0.2767874002456665, "learning_rate": 3.2280221315394327e-06, "loss": 0.2801, "step": 32153 }, { "epoch": 3.269011793411956, "grad_norm": 0.2835167944431305, "learning_rate": 3.227690283684985e-06, "loss": 0.3028, "step": 32154 }, { "epoch": 3.269113460756405, "grad_norm": 0.2612355947494507, "learning_rate": 3.2273584447590133e-06, "loss": 0.2822, "step": 32155 }, { "epoch": 3.269215128100854, "grad_norm": 0.27253463864326477, "learning_rate": 3.227026614763188e-06, "loss": 0.3224, "step": 32156 }, { "epoch": 3.269316795445303, "grad_norm": 0.2898474931716919, "learning_rate": 3.2266947936991817e-06, "loss": 0.3099, "step": 32157 }, { "epoch": 3.269418462789752, "grad_norm": 0.29010719060897827, "learning_rate": 3.226362981568665e-06, "loss": 0.2986, "step": 32158 }, { "epoch": 3.2695201301342007, "grad_norm": 0.28578123450279236, "learning_rate": 3.2260311783733094e-06, "loss": 0.3099, "step": 32159 }, { "epoch": 3.2696217974786497, "grad_norm": 0.26323190331459045, "learning_rate": 3.225699384114789e-06, "loss": 0.3203, "step": 32160 }, { "epoch": 3.269723464823099, "grad_norm": 0.29240718483924866, "learning_rate": 3.225367598794772e-06, "loss": 0.3399, "step": 32161 }, { "epoch": 3.269825132167548, "grad_norm": 0.2726098895072937, "learning_rate": 3.2250358224149324e-06, "loss": 0.3173, "step": 32162 }, { "epoch": 3.269926799511997, "grad_norm": 0.28848639130592346, "learning_rate": 3.2247040549769393e-06, "loss": 0.3156, "step": 32163 }, { "epoch": 3.270028466856446, "grad_norm": 0.2860959768295288, "learning_rate": 3.224372296482465e-06, "loss": 0.3, "step": 32164 }, { "epoch": 3.270130134200895, "grad_norm": 0.28450992703437805, "learning_rate": 3.2240405469331837e-06, "loss": 0.3157, "step": 32165 }, { "epoch": 3.2702318015453438, "grad_norm": 0.28418615460395813, "learning_rate": 3.223708806330762e-06, "loss": 0.3094, "step": 32166 }, { "epoch": 3.2703334688897927, "grad_norm": 0.2861078679561615, "learning_rate": 3.2233770746768737e-06, "loss": 0.2919, "step": 32167 }, { "epoch": 3.2704351362342416, "grad_norm": 0.2727159857749939, "learning_rate": 3.22304535197319e-06, "loss": 0.3153, "step": 32168 }, { "epoch": 3.2705368035786906, "grad_norm": 0.25861889123916626, "learning_rate": 3.22271363822138e-06, "loss": 0.2985, "step": 32169 }, { "epoch": 3.2706384709231395, "grad_norm": 0.28028374910354614, "learning_rate": 3.222381933423118e-06, "loss": 0.3049, "step": 32170 }, { "epoch": 3.2707401382675885, "grad_norm": 0.2589550316333771, "learning_rate": 3.222050237580072e-06, "loss": 0.2911, "step": 32171 }, { "epoch": 3.2708418056120374, "grad_norm": 0.2758559584617615, "learning_rate": 3.2217185506939162e-06, "loss": 0.2941, "step": 32172 }, { "epoch": 3.2709434729564864, "grad_norm": 0.30261772871017456, "learning_rate": 3.2213868727663183e-06, "loss": 0.2955, "step": 32173 }, { "epoch": 3.2710451403009353, "grad_norm": 0.25644490122795105, "learning_rate": 3.2210552037989505e-06, "loss": 0.3096, "step": 32174 }, { "epoch": 3.2711468076453842, "grad_norm": 0.27729296684265137, "learning_rate": 3.220723543793485e-06, "loss": 0.3288, "step": 32175 }, { "epoch": 3.271248474989833, "grad_norm": 0.2705027461051941, "learning_rate": 3.220391892751591e-06, "loss": 0.3421, "step": 32176 }, { "epoch": 3.271350142334282, "grad_norm": 0.29660624265670776, "learning_rate": 3.2200602506749403e-06, "loss": 0.3179, "step": 32177 }, { "epoch": 3.271451809678731, "grad_norm": 0.28129714727401733, "learning_rate": 3.219728617565202e-06, "loss": 0.3122, "step": 32178 }, { "epoch": 3.27155347702318, "grad_norm": 0.2512449026107788, "learning_rate": 3.2193969934240476e-06, "loss": 0.3095, "step": 32179 }, { "epoch": 3.271655144367629, "grad_norm": 0.28417423367500305, "learning_rate": 3.2190653782531487e-06, "loss": 0.31, "step": 32180 }, { "epoch": 3.271756811712078, "grad_norm": 0.2581150233745575, "learning_rate": 3.2187337720541757e-06, "loss": 0.2975, "step": 32181 }, { "epoch": 3.271858479056527, "grad_norm": 0.2740788459777832, "learning_rate": 3.2184021748287985e-06, "loss": 0.2877, "step": 32182 }, { "epoch": 3.2719601464009758, "grad_norm": 0.2757813036441803, "learning_rate": 3.218070586578687e-06, "loss": 0.3252, "step": 32183 }, { "epoch": 3.272061813745425, "grad_norm": 0.2727643549442291, "learning_rate": 3.2177390073055128e-06, "loss": 0.3244, "step": 32184 }, { "epoch": 3.272163481089874, "grad_norm": 0.300693154335022, "learning_rate": 3.2174074370109463e-06, "loss": 0.2786, "step": 32185 }, { "epoch": 3.272265148434323, "grad_norm": 0.29845455288887024, "learning_rate": 3.217075875696657e-06, "loss": 0.3241, "step": 32186 }, { "epoch": 3.272366815778772, "grad_norm": 0.28210726380348206, "learning_rate": 3.216744323364317e-06, "loss": 0.3268, "step": 32187 }, { "epoch": 3.272468483123221, "grad_norm": 0.27516114711761475, "learning_rate": 3.216412780015593e-06, "loss": 0.2951, "step": 32188 }, { "epoch": 3.27257015046767, "grad_norm": 0.2544116973876953, "learning_rate": 3.216081245652159e-06, "loss": 0.3001, "step": 32189 }, { "epoch": 3.272671817812119, "grad_norm": 0.29641610383987427, "learning_rate": 3.215749720275684e-06, "loss": 0.3314, "step": 32190 }, { "epoch": 3.2727734851565677, "grad_norm": 0.254740834236145, "learning_rate": 3.2154182038878373e-06, "loss": 0.3127, "step": 32191 }, { "epoch": 3.2728751525010167, "grad_norm": 0.26156723499298096, "learning_rate": 3.2150866964902906e-06, "loss": 0.3278, "step": 32192 }, { "epoch": 3.2729768198454656, "grad_norm": 0.2734149098396301, "learning_rate": 3.2147551980847118e-06, "loss": 0.2977, "step": 32193 }, { "epoch": 3.2730784871899146, "grad_norm": 0.26918432116508484, "learning_rate": 3.2144237086727726e-06, "loss": 0.3174, "step": 32194 }, { "epoch": 3.2731801545343635, "grad_norm": 0.28537967801094055, "learning_rate": 3.214092228256143e-06, "loss": 0.3257, "step": 32195 }, { "epoch": 3.2732818218788124, "grad_norm": 0.2863171100616455, "learning_rate": 3.2137607568364908e-06, "loss": 0.3145, "step": 32196 }, { "epoch": 3.2733834892232614, "grad_norm": 0.27149826288223267, "learning_rate": 3.213429294415489e-06, "loss": 0.2969, "step": 32197 }, { "epoch": 3.2734851565677103, "grad_norm": 0.29406094551086426, "learning_rate": 3.213097840994805e-06, "loss": 0.3017, "step": 32198 }, { "epoch": 3.2735868239121593, "grad_norm": 0.25691238045692444, "learning_rate": 3.212766396576109e-06, "loss": 0.3212, "step": 32199 }, { "epoch": 3.273688491256608, "grad_norm": 0.2669920027256012, "learning_rate": 3.2124349611610727e-06, "loss": 0.3323, "step": 32200 }, { "epoch": 3.273790158601057, "grad_norm": 0.2651562988758087, "learning_rate": 3.212103534751363e-06, "loss": 0.3213, "step": 32201 }, { "epoch": 3.2738918259455065, "grad_norm": 0.522884726524353, "learning_rate": 3.2117721173486518e-06, "loss": 0.3451, "step": 32202 }, { "epoch": 3.2739934932899555, "grad_norm": 0.2893720865249634, "learning_rate": 3.2114407089546063e-06, "loss": 0.3148, "step": 32203 }, { "epoch": 3.2740951606344044, "grad_norm": 0.29050031304359436, "learning_rate": 3.211109309570898e-06, "loss": 0.3298, "step": 32204 }, { "epoch": 3.2741968279788534, "grad_norm": 0.26801803708076477, "learning_rate": 3.210777919199197e-06, "loss": 0.3316, "step": 32205 }, { "epoch": 3.2742984953233023, "grad_norm": 0.2757743299007416, "learning_rate": 3.210446537841171e-06, "loss": 0.3116, "step": 32206 }, { "epoch": 3.2744001626677512, "grad_norm": 0.2725924551486969, "learning_rate": 3.21011516549849e-06, "loss": 0.3168, "step": 32207 }, { "epoch": 3.2745018300122, "grad_norm": 0.28574246168136597, "learning_rate": 3.209783802172823e-06, "loss": 0.2827, "step": 32208 }, { "epoch": 3.274603497356649, "grad_norm": 0.2662259638309479, "learning_rate": 3.2094524478658416e-06, "loss": 0.3241, "step": 32209 }, { "epoch": 3.274705164701098, "grad_norm": 0.29475703835487366, "learning_rate": 3.209121102579211e-06, "loss": 0.3007, "step": 32210 }, { "epoch": 3.274806832045547, "grad_norm": 0.26826921105384827, "learning_rate": 3.208789766314603e-06, "loss": 0.3384, "step": 32211 }, { "epoch": 3.274908499389996, "grad_norm": 0.27544069290161133, "learning_rate": 3.2084584390736885e-06, "loss": 0.3085, "step": 32212 }, { "epoch": 3.275010166734445, "grad_norm": 0.2807011902332306, "learning_rate": 3.208127120858132e-06, "loss": 0.3163, "step": 32213 }, { "epoch": 3.275111834078894, "grad_norm": 0.2878936529159546, "learning_rate": 3.2077958116696085e-06, "loss": 0.3041, "step": 32214 }, { "epoch": 3.2752135014233428, "grad_norm": 0.2835024893283844, "learning_rate": 3.2074645115097803e-06, "loss": 0.3037, "step": 32215 }, { "epoch": 3.2753151687677917, "grad_norm": 0.2928016483783722, "learning_rate": 3.2071332203803207e-06, "loss": 0.3066, "step": 32216 }, { "epoch": 3.2754168361122407, "grad_norm": 0.2661246061325073, "learning_rate": 3.2068019382829e-06, "loss": 0.3124, "step": 32217 }, { "epoch": 3.2755185034566896, "grad_norm": 0.2708994150161743, "learning_rate": 3.2064706652191824e-06, "loss": 0.2934, "step": 32218 }, { "epoch": 3.2756201708011385, "grad_norm": 0.30242064595222473, "learning_rate": 3.206139401190842e-06, "loss": 0.3149, "step": 32219 }, { "epoch": 3.2757218381455875, "grad_norm": 0.27695462107658386, "learning_rate": 3.2058081461995427e-06, "loss": 0.3382, "step": 32220 }, { "epoch": 3.2758235054900364, "grad_norm": 0.27039480209350586, "learning_rate": 3.2054769002469555e-06, "loss": 0.3124, "step": 32221 }, { "epoch": 3.2759251728344854, "grad_norm": 0.25659695267677307, "learning_rate": 3.2051456633347523e-06, "loss": 0.2838, "step": 32222 }, { "epoch": 3.2760268401789343, "grad_norm": 0.2576572299003601, "learning_rate": 3.2048144354645948e-06, "loss": 0.3222, "step": 32223 }, { "epoch": 3.2761285075233832, "grad_norm": 0.28887486457824707, "learning_rate": 3.204483216638159e-06, "loss": 0.3067, "step": 32224 }, { "epoch": 3.2762301748678326, "grad_norm": 0.2839452624320984, "learning_rate": 3.2041520068571075e-06, "loss": 0.3186, "step": 32225 }, { "epoch": 3.2763318422122816, "grad_norm": 0.2599394619464874, "learning_rate": 3.203820806123112e-06, "loss": 0.3179, "step": 32226 }, { "epoch": 3.2764335095567305, "grad_norm": 0.26956310868263245, "learning_rate": 3.2034896144378425e-06, "loss": 0.3376, "step": 32227 }, { "epoch": 3.2765351769011795, "grad_norm": 0.2866249680519104, "learning_rate": 3.203158431802962e-06, "loss": 0.32, "step": 32228 }, { "epoch": 3.2766368442456284, "grad_norm": 0.26797863841056824, "learning_rate": 3.202827258220146e-06, "loss": 0.3067, "step": 32229 }, { "epoch": 3.2767385115900773, "grad_norm": 0.31266897916793823, "learning_rate": 3.2024960936910564e-06, "loss": 0.3351, "step": 32230 }, { "epoch": 3.2768401789345263, "grad_norm": 0.27153104543685913, "learning_rate": 3.202164938217364e-06, "loss": 0.3316, "step": 32231 }, { "epoch": 3.2769418462789752, "grad_norm": 0.2585712969303131, "learning_rate": 3.2018337918007403e-06, "loss": 0.291, "step": 32232 }, { "epoch": 3.277043513623424, "grad_norm": 0.29094645380973816, "learning_rate": 3.2015026544428473e-06, "loss": 0.3308, "step": 32233 }, { "epoch": 3.277145180967873, "grad_norm": 0.2844668924808502, "learning_rate": 3.2011715261453604e-06, "loss": 0.3452, "step": 32234 }, { "epoch": 3.277246848312322, "grad_norm": 0.2610050141811371, "learning_rate": 3.200840406909941e-06, "loss": 0.2956, "step": 32235 }, { "epoch": 3.277348515656771, "grad_norm": 0.27294549345970154, "learning_rate": 3.2005092967382595e-06, "loss": 0.2982, "step": 32236 }, { "epoch": 3.27745018300122, "grad_norm": 0.2761627435684204, "learning_rate": 3.200178195631988e-06, "loss": 0.3024, "step": 32237 }, { "epoch": 3.277551850345669, "grad_norm": 0.27973830699920654, "learning_rate": 3.199847103592787e-06, "loss": 0.2896, "step": 32238 }, { "epoch": 3.277653517690118, "grad_norm": 0.2699044644832611, "learning_rate": 3.1995160206223317e-06, "loss": 0.3214, "step": 32239 }, { "epoch": 3.2777551850345668, "grad_norm": 0.26136544346809387, "learning_rate": 3.1991849467222854e-06, "loss": 0.2936, "step": 32240 }, { "epoch": 3.2778568523790157, "grad_norm": 0.29865801334381104, "learning_rate": 3.198853881894316e-06, "loss": 0.3239, "step": 32241 }, { "epoch": 3.2779585197234646, "grad_norm": 0.2752983272075653, "learning_rate": 3.198522826140096e-06, "loss": 0.2732, "step": 32242 }, { "epoch": 3.278060187067914, "grad_norm": 0.30057889223098755, "learning_rate": 3.198191779461286e-06, "loss": 0.3204, "step": 32243 }, { "epoch": 3.278161854412363, "grad_norm": 0.2439834326505661, "learning_rate": 3.197860741859562e-06, "loss": 0.3053, "step": 32244 }, { "epoch": 3.278263521756812, "grad_norm": 0.2551308572292328, "learning_rate": 3.1975297133365846e-06, "loss": 0.297, "step": 32245 }, { "epoch": 3.278365189101261, "grad_norm": 0.2770478427410126, "learning_rate": 3.1971986938940233e-06, "loss": 0.3297, "step": 32246 }, { "epoch": 3.27846685644571, "grad_norm": 0.27407869696617126, "learning_rate": 3.1968676835335495e-06, "loss": 0.2897, "step": 32247 }, { "epoch": 3.2785685237901587, "grad_norm": 0.27108675241470337, "learning_rate": 3.1965366822568256e-06, "loss": 0.3119, "step": 32248 }, { "epoch": 3.2786701911346077, "grad_norm": 0.2685811221599579, "learning_rate": 3.1962056900655223e-06, "loss": 0.2956, "step": 32249 }, { "epoch": 3.2787718584790566, "grad_norm": 0.2740095257759094, "learning_rate": 3.1958747069613063e-06, "loss": 0.3169, "step": 32250 }, { "epoch": 3.2788735258235056, "grad_norm": 0.27924320101737976, "learning_rate": 3.1955437329458427e-06, "loss": 0.3467, "step": 32251 }, { "epoch": 3.2789751931679545, "grad_norm": 0.28520843386650085, "learning_rate": 3.195212768020804e-06, "loss": 0.3156, "step": 32252 }, { "epoch": 3.2790768605124034, "grad_norm": 0.25752708315849304, "learning_rate": 3.194881812187852e-06, "loss": 0.2979, "step": 32253 }, { "epoch": 3.2791785278568524, "grad_norm": 0.29257434606552124, "learning_rate": 3.194550865448659e-06, "loss": 0.3035, "step": 32254 }, { "epoch": 3.2792801952013013, "grad_norm": 0.25593993067741394, "learning_rate": 3.1942199278048874e-06, "loss": 0.3057, "step": 32255 }, { "epoch": 3.2793818625457503, "grad_norm": 0.26062241196632385, "learning_rate": 3.193888999258206e-06, "loss": 0.3, "step": 32256 }, { "epoch": 3.279483529890199, "grad_norm": 0.27337750792503357, "learning_rate": 3.193558079810286e-06, "loss": 0.3214, "step": 32257 }, { "epoch": 3.279585197234648, "grad_norm": 0.2758142650127411, "learning_rate": 3.1932271694627887e-06, "loss": 0.3354, "step": 32258 }, { "epoch": 3.279686864579097, "grad_norm": 0.28539860248565674, "learning_rate": 3.1928962682173846e-06, "loss": 0.3172, "step": 32259 }, { "epoch": 3.279788531923546, "grad_norm": 0.27147868275642395, "learning_rate": 3.192565376075739e-06, "loss": 0.3127, "step": 32260 }, { "epoch": 3.279890199267995, "grad_norm": 0.26498734951019287, "learning_rate": 3.1922344930395177e-06, "loss": 0.2879, "step": 32261 }, { "epoch": 3.279991866612444, "grad_norm": 0.277944415807724, "learning_rate": 3.191903619110394e-06, "loss": 0.3234, "step": 32262 }, { "epoch": 3.280093533956893, "grad_norm": 0.2767873704433441, "learning_rate": 3.1915727542900267e-06, "loss": 0.3282, "step": 32263 }, { "epoch": 3.280195201301342, "grad_norm": 0.26498788595199585, "learning_rate": 3.1912418985800873e-06, "loss": 0.3393, "step": 32264 }, { "epoch": 3.2802968686457907, "grad_norm": 0.2794162631034851, "learning_rate": 3.1909110519822408e-06, "loss": 0.2996, "step": 32265 }, { "epoch": 3.28039853599024, "grad_norm": 0.26849231123924255, "learning_rate": 3.1905802144981545e-06, "loss": 0.2909, "step": 32266 }, { "epoch": 3.280500203334689, "grad_norm": 0.2697339355945587, "learning_rate": 3.190249386129496e-06, "loss": 0.3289, "step": 32267 }, { "epoch": 3.280601870679138, "grad_norm": 0.27963340282440186, "learning_rate": 3.1899185668779297e-06, "loss": 0.3518, "step": 32268 }, { "epoch": 3.280703538023587, "grad_norm": 0.2636207640171051, "learning_rate": 3.189587756745125e-06, "loss": 0.3043, "step": 32269 }, { "epoch": 3.280805205368036, "grad_norm": 0.2805805802345276, "learning_rate": 3.1892569557327457e-06, "loss": 0.3088, "step": 32270 }, { "epoch": 3.280906872712485, "grad_norm": 0.3830529749393463, "learning_rate": 3.18892616384246e-06, "loss": 0.2923, "step": 32271 }, { "epoch": 3.2810085400569338, "grad_norm": 0.2642923593521118, "learning_rate": 3.1885953810759347e-06, "loss": 0.2916, "step": 32272 }, { "epoch": 3.2811102074013827, "grad_norm": 0.28958630561828613, "learning_rate": 3.1882646074348346e-06, "loss": 0.282, "step": 32273 }, { "epoch": 3.2812118747458316, "grad_norm": 0.31149598956108093, "learning_rate": 3.1879338429208275e-06, "loss": 0.318, "step": 32274 }, { "epoch": 3.2813135420902806, "grad_norm": 0.29383140802383423, "learning_rate": 3.1876030875355777e-06, "loss": 0.3196, "step": 32275 }, { "epoch": 3.2814152094347295, "grad_norm": 0.29153555631637573, "learning_rate": 3.1872723412807536e-06, "loss": 0.3196, "step": 32276 }, { "epoch": 3.2815168767791785, "grad_norm": 0.30101874470710754, "learning_rate": 3.1869416041580213e-06, "loss": 0.318, "step": 32277 }, { "epoch": 3.2816185441236274, "grad_norm": 0.27585405111312866, "learning_rate": 3.1866108761690455e-06, "loss": 0.2803, "step": 32278 }, { "epoch": 3.2817202114680764, "grad_norm": 0.2739729583263397, "learning_rate": 3.1862801573154946e-06, "loss": 0.2842, "step": 32279 }, { "epoch": 3.2818218788125253, "grad_norm": 0.2920033931732178, "learning_rate": 3.1859494475990316e-06, "loss": 0.3109, "step": 32280 }, { "epoch": 3.2819235461569742, "grad_norm": 0.2738242447376251, "learning_rate": 3.185618747021324e-06, "loss": 0.3317, "step": 32281 }, { "epoch": 3.282025213501423, "grad_norm": 0.28800565004348755, "learning_rate": 3.185288055584039e-06, "loss": 0.337, "step": 32282 }, { "epoch": 3.282126880845872, "grad_norm": 0.27637845277786255, "learning_rate": 3.184957373288842e-06, "loss": 0.3616, "step": 32283 }, { "epoch": 3.2822285481903215, "grad_norm": 0.2643551528453827, "learning_rate": 3.1846267001373975e-06, "loss": 0.3143, "step": 32284 }, { "epoch": 3.2823302155347704, "grad_norm": 0.2730756103992462, "learning_rate": 3.1842960361313724e-06, "loss": 0.2884, "step": 32285 }, { "epoch": 3.2824318828792194, "grad_norm": 0.29722774028778076, "learning_rate": 3.1839653812724325e-06, "loss": 0.2928, "step": 32286 }, { "epoch": 3.2825335502236683, "grad_norm": 0.26821085810661316, "learning_rate": 3.1836347355622437e-06, "loss": 0.3041, "step": 32287 }, { "epoch": 3.2826352175681173, "grad_norm": 0.2884351313114166, "learning_rate": 3.183304099002471e-06, "loss": 0.2701, "step": 32288 }, { "epoch": 3.282736884912566, "grad_norm": 0.25820690393447876, "learning_rate": 3.1829734715947813e-06, "loss": 0.2785, "step": 32289 }, { "epoch": 3.282838552257015, "grad_norm": 0.24829208850860596, "learning_rate": 3.182642853340838e-06, "loss": 0.3127, "step": 32290 }, { "epoch": 3.282940219601464, "grad_norm": 0.2776338756084442, "learning_rate": 3.1823122442423094e-06, "loss": 0.2798, "step": 32291 }, { "epoch": 3.283041886945913, "grad_norm": 0.2989127039909363, "learning_rate": 3.18198164430086e-06, "loss": 0.3025, "step": 32292 }, { "epoch": 3.283143554290362, "grad_norm": 0.27627503871917725, "learning_rate": 3.181651053518154e-06, "loss": 0.3003, "step": 32293 }, { "epoch": 3.283245221634811, "grad_norm": 0.2849956452846527, "learning_rate": 3.181320471895859e-06, "loss": 0.2841, "step": 32294 }, { "epoch": 3.28334688897926, "grad_norm": 0.2871377468109131, "learning_rate": 3.180989899435638e-06, "loss": 0.3375, "step": 32295 }, { "epoch": 3.283448556323709, "grad_norm": 0.24972409009933472, "learning_rate": 3.180659336139158e-06, "loss": 0.2728, "step": 32296 }, { "epoch": 3.2835502236681577, "grad_norm": 0.28017905354499817, "learning_rate": 3.180328782008085e-06, "loss": 0.3063, "step": 32297 }, { "epoch": 3.2836518910126067, "grad_norm": 0.28566214442253113, "learning_rate": 3.179998237044082e-06, "loss": 0.2701, "step": 32298 }, { "epoch": 3.2837535583570556, "grad_norm": 0.26842090487480164, "learning_rate": 3.179667701248816e-06, "loss": 0.2885, "step": 32299 }, { "epoch": 3.2838552257015046, "grad_norm": 0.2986908555030823, "learning_rate": 3.179337174623951e-06, "loss": 0.3443, "step": 32300 }, { "epoch": 3.2839568930459535, "grad_norm": 0.30031782388687134, "learning_rate": 3.1790066571711526e-06, "loss": 0.3151, "step": 32301 }, { "epoch": 3.2840585603904024, "grad_norm": 0.26422446966171265, "learning_rate": 3.178676148892087e-06, "loss": 0.3394, "step": 32302 }, { "epoch": 3.2841602277348514, "grad_norm": 0.2922723591327667, "learning_rate": 3.178345649788417e-06, "loss": 0.3259, "step": 32303 }, { "epoch": 3.2842618950793003, "grad_norm": 0.27465057373046875, "learning_rate": 3.17801515986181e-06, "loss": 0.3313, "step": 32304 }, { "epoch": 3.2843635624237493, "grad_norm": 0.27101126313209534, "learning_rate": 3.177684679113928e-06, "loss": 0.302, "step": 32305 }, { "epoch": 3.284465229768198, "grad_norm": 0.2677186131477356, "learning_rate": 3.177354207546438e-06, "loss": 0.3048, "step": 32306 }, { "epoch": 3.2845668971126476, "grad_norm": 0.2688145637512207, "learning_rate": 3.177023745161005e-06, "loss": 0.3112, "step": 32307 }, { "epoch": 3.2846685644570965, "grad_norm": 0.29647642374038696, "learning_rate": 3.176693291959293e-06, "loss": 0.3359, "step": 32308 }, { "epoch": 3.2847702318015455, "grad_norm": 0.2814599275588989, "learning_rate": 3.1763628479429677e-06, "loss": 0.2845, "step": 32309 }, { "epoch": 3.2848718991459944, "grad_norm": 0.2712143659591675, "learning_rate": 3.176032413113691e-06, "loss": 0.2897, "step": 32310 }, { "epoch": 3.2849735664904434, "grad_norm": 0.2710023522377014, "learning_rate": 3.175701987473131e-06, "loss": 0.3343, "step": 32311 }, { "epoch": 3.2850752338348923, "grad_norm": 0.2682740390300751, "learning_rate": 3.1753715710229504e-06, "loss": 0.3308, "step": 32312 }, { "epoch": 3.2851769011793412, "grad_norm": 0.24872435629367828, "learning_rate": 3.1750411637648144e-06, "loss": 0.3258, "step": 32313 }, { "epoch": 3.28527856852379, "grad_norm": 0.2681456506252289, "learning_rate": 3.1747107657003877e-06, "loss": 0.2994, "step": 32314 }, { "epoch": 3.285380235868239, "grad_norm": 0.2915743589401245, "learning_rate": 3.1743803768313336e-06, "loss": 0.2952, "step": 32315 }, { "epoch": 3.285481903212688, "grad_norm": 0.28113412857055664, "learning_rate": 3.1740499971593176e-06, "loss": 0.3218, "step": 32316 }, { "epoch": 3.285583570557137, "grad_norm": 0.2792786657810211, "learning_rate": 3.1737196266860044e-06, "loss": 0.3098, "step": 32317 }, { "epoch": 3.285685237901586, "grad_norm": 0.27811068296432495, "learning_rate": 3.173389265413057e-06, "loss": 0.3539, "step": 32318 }, { "epoch": 3.285786905246035, "grad_norm": 0.2997071444988251, "learning_rate": 3.173058913342141e-06, "loss": 0.3049, "step": 32319 }, { "epoch": 3.285888572590484, "grad_norm": 0.28905239701271057, "learning_rate": 3.1727285704749193e-06, "loss": 0.3095, "step": 32320 }, { "epoch": 3.2859902399349328, "grad_norm": 0.28396424651145935, "learning_rate": 3.1723982368130566e-06, "loss": 0.3151, "step": 32321 }, { "epoch": 3.2860919072793817, "grad_norm": 0.2739489674568176, "learning_rate": 3.172067912358219e-06, "loss": 0.306, "step": 32322 }, { "epoch": 3.2861935746238307, "grad_norm": 0.2675100862979889, "learning_rate": 3.1717375971120675e-06, "loss": 0.2992, "step": 32323 }, { "epoch": 3.2862952419682796, "grad_norm": 0.28249120712280273, "learning_rate": 3.1714072910762682e-06, "loss": 0.3226, "step": 32324 }, { "epoch": 3.286396909312729, "grad_norm": 0.25660502910614014, "learning_rate": 3.1710769942524838e-06, "loss": 0.3295, "step": 32325 }, { "epoch": 3.286498576657178, "grad_norm": 0.2782982587814331, "learning_rate": 3.170746706642379e-06, "loss": 0.2883, "step": 32326 }, { "epoch": 3.286600244001627, "grad_norm": 0.2934313714504242, "learning_rate": 3.170416428247618e-06, "loss": 0.3167, "step": 32327 }, { "epoch": 3.286701911346076, "grad_norm": 0.2718956172466278, "learning_rate": 3.170086159069864e-06, "loss": 0.3022, "step": 32328 }, { "epoch": 3.2868035786905248, "grad_norm": 0.2583901882171631, "learning_rate": 3.1697558991107812e-06, "loss": 0.3184, "step": 32329 }, { "epoch": 3.2869052460349737, "grad_norm": 0.2727133631706238, "learning_rate": 3.1694256483720333e-06, "loss": 0.3276, "step": 32330 }, { "epoch": 3.2870069133794226, "grad_norm": 0.2693101167678833, "learning_rate": 3.169095406855283e-06, "loss": 0.3017, "step": 32331 }, { "epoch": 3.2871085807238716, "grad_norm": 0.26895418763160706, "learning_rate": 3.1687651745621962e-06, "loss": 0.299, "step": 32332 }, { "epoch": 3.2872102480683205, "grad_norm": 0.26969262957572937, "learning_rate": 3.1684349514944346e-06, "loss": 0.3437, "step": 32333 }, { "epoch": 3.2873119154127695, "grad_norm": 0.2705816924571991, "learning_rate": 3.1681047376536633e-06, "loss": 0.3214, "step": 32334 }, { "epoch": 3.2874135827572184, "grad_norm": 0.25749990344047546, "learning_rate": 3.1677745330415442e-06, "loss": 0.2962, "step": 32335 }, { "epoch": 3.2875152501016673, "grad_norm": 0.2607686519622803, "learning_rate": 3.1674443376597414e-06, "loss": 0.3036, "step": 32336 }, { "epoch": 3.2876169174461163, "grad_norm": 0.26422181725502014, "learning_rate": 3.1671141515099192e-06, "loss": 0.2998, "step": 32337 }, { "epoch": 3.2877185847905652, "grad_norm": 0.27668794989585876, "learning_rate": 3.16678397459374e-06, "loss": 0.2985, "step": 32338 }, { "epoch": 3.287820252135014, "grad_norm": 0.2853890061378479, "learning_rate": 3.166453806912868e-06, "loss": 0.3276, "step": 32339 }, { "epoch": 3.287921919479463, "grad_norm": 0.2582906484603882, "learning_rate": 3.166123648468965e-06, "loss": 0.3042, "step": 32340 }, { "epoch": 3.288023586823912, "grad_norm": 0.2794177830219269, "learning_rate": 3.165793499263696e-06, "loss": 0.3071, "step": 32341 }, { "epoch": 3.288125254168361, "grad_norm": 0.2675890326499939, "learning_rate": 3.1654633592987237e-06, "loss": 0.2922, "step": 32342 }, { "epoch": 3.28822692151281, "grad_norm": 0.2748227119445801, "learning_rate": 3.1651332285757103e-06, "loss": 0.2965, "step": 32343 }, { "epoch": 3.288328588857259, "grad_norm": 0.2659777104854584, "learning_rate": 3.1648031070963205e-06, "loss": 0.3003, "step": 32344 }, { "epoch": 3.288430256201708, "grad_norm": 0.286074697971344, "learning_rate": 3.1644729948622156e-06, "loss": 0.3121, "step": 32345 }, { "epoch": 3.2885319235461568, "grad_norm": 0.2638265788555145, "learning_rate": 3.16414289187506e-06, "loss": 0.2964, "step": 32346 }, { "epoch": 3.2886335908906057, "grad_norm": 0.2741536498069763, "learning_rate": 3.163812798136517e-06, "loss": 0.3194, "step": 32347 }, { "epoch": 3.288735258235055, "grad_norm": 0.29073572158813477, "learning_rate": 3.163482713648247e-06, "loss": 0.2821, "step": 32348 }, { "epoch": 3.288836925579504, "grad_norm": 0.27692925930023193, "learning_rate": 3.163152638411917e-06, "loss": 0.2924, "step": 32349 }, { "epoch": 3.288938592923953, "grad_norm": 0.29180634021759033, "learning_rate": 3.1628225724291854e-06, "loss": 0.3045, "step": 32350 }, { "epoch": 3.289040260268402, "grad_norm": 0.2932935953140259, "learning_rate": 3.162492515701718e-06, "loss": 0.3316, "step": 32351 }, { "epoch": 3.289141927612851, "grad_norm": 0.26123979687690735, "learning_rate": 3.162162468231177e-06, "loss": 0.3243, "step": 32352 }, { "epoch": 3.2892435949573, "grad_norm": 0.2761789858341217, "learning_rate": 3.161832430019224e-06, "loss": 0.3112, "step": 32353 }, { "epoch": 3.2893452623017487, "grad_norm": 0.2589563727378845, "learning_rate": 3.161502401067523e-06, "loss": 0.3019, "step": 32354 }, { "epoch": 3.2894469296461977, "grad_norm": 0.26931464672088623, "learning_rate": 3.1611723813777356e-06, "loss": 0.3136, "step": 32355 }, { "epoch": 3.2895485969906466, "grad_norm": 0.2635263502597809, "learning_rate": 3.1608423709515247e-06, "loss": 0.3156, "step": 32356 }, { "epoch": 3.2896502643350956, "grad_norm": 0.2868214547634125, "learning_rate": 3.1605123697905536e-06, "loss": 0.2883, "step": 32357 }, { "epoch": 3.2897519316795445, "grad_norm": 0.2766640782356262, "learning_rate": 3.160182377896483e-06, "loss": 0.352, "step": 32358 }, { "epoch": 3.2898535990239934, "grad_norm": 0.2701992392539978, "learning_rate": 3.1598523952709788e-06, "loss": 0.2951, "step": 32359 }, { "epoch": 3.2899552663684424, "grad_norm": 0.2629610598087311, "learning_rate": 3.159522421915697e-06, "loss": 0.3384, "step": 32360 }, { "epoch": 3.2900569337128913, "grad_norm": 0.25855645537376404, "learning_rate": 3.1591924578323062e-06, "loss": 0.3301, "step": 32361 }, { "epoch": 3.2901586010573403, "grad_norm": 0.2715435028076172, "learning_rate": 3.1588625030224663e-06, "loss": 0.2886, "step": 32362 }, { "epoch": 3.290260268401789, "grad_norm": 0.2880430817604065, "learning_rate": 3.158532557487839e-06, "loss": 0.3167, "step": 32363 }, { "epoch": 3.290361935746238, "grad_norm": 0.2827349305152893, "learning_rate": 3.158202621230089e-06, "loss": 0.3511, "step": 32364 }, { "epoch": 3.290463603090687, "grad_norm": 0.30935487151145935, "learning_rate": 3.1578726942508743e-06, "loss": 0.2779, "step": 32365 }, { "epoch": 3.2905652704351365, "grad_norm": 0.2637137174606323, "learning_rate": 3.157542776551859e-06, "loss": 0.3206, "step": 32366 }, { "epoch": 3.2906669377795854, "grad_norm": 0.2680923640727997, "learning_rate": 3.157212868134707e-06, "loss": 0.3098, "step": 32367 }, { "epoch": 3.2907686051240344, "grad_norm": 0.2676059901714325, "learning_rate": 3.156882969001077e-06, "loss": 0.3111, "step": 32368 }, { "epoch": 3.2908702724684833, "grad_norm": 0.2835836112499237, "learning_rate": 3.1565530791526355e-06, "loss": 0.291, "step": 32369 }, { "epoch": 3.2909719398129322, "grad_norm": 0.3068992495536804, "learning_rate": 3.156223198591038e-06, "loss": 0.3344, "step": 32370 }, { "epoch": 3.291073607157381, "grad_norm": 0.2782787084579468, "learning_rate": 3.1558933273179514e-06, "loss": 0.331, "step": 32371 }, { "epoch": 3.29117527450183, "grad_norm": 0.26645731925964355, "learning_rate": 3.1555634653350365e-06, "loss": 0.3317, "step": 32372 }, { "epoch": 3.291276941846279, "grad_norm": 0.2810260057449341, "learning_rate": 3.1552336126439532e-06, "loss": 0.2998, "step": 32373 }, { "epoch": 3.291378609190728, "grad_norm": 0.27951112389564514, "learning_rate": 3.1549037692463672e-06, "loss": 0.3068, "step": 32374 }, { "epoch": 3.291480276535177, "grad_norm": 0.25520041584968567, "learning_rate": 3.154573935143934e-06, "loss": 0.3096, "step": 32375 }, { "epoch": 3.291581943879626, "grad_norm": 0.2978382408618927, "learning_rate": 3.15424411033832e-06, "loss": 0.3114, "step": 32376 }, { "epoch": 3.291683611224075, "grad_norm": 0.28128114342689514, "learning_rate": 3.1539142948311875e-06, "loss": 0.2931, "step": 32377 }, { "epoch": 3.2917852785685238, "grad_norm": 0.28104162216186523, "learning_rate": 3.153584488624194e-06, "loss": 0.2814, "step": 32378 }, { "epoch": 3.2918869459129727, "grad_norm": 0.2930373549461365, "learning_rate": 3.1532546917190056e-06, "loss": 0.2963, "step": 32379 }, { "epoch": 3.2919886132574216, "grad_norm": 0.2589963376522064, "learning_rate": 3.1529249041172777e-06, "loss": 0.3013, "step": 32380 }, { "epoch": 3.2920902806018706, "grad_norm": 0.29210689663887024, "learning_rate": 3.1525951258206768e-06, "loss": 0.3252, "step": 32381 }, { "epoch": 3.2921919479463195, "grad_norm": 0.266055703163147, "learning_rate": 3.1522653568308643e-06, "loss": 0.2895, "step": 32382 }, { "epoch": 3.2922936152907685, "grad_norm": 0.2582925260066986, "learning_rate": 3.1519355971494968e-06, "loss": 0.294, "step": 32383 }, { "epoch": 3.2923952826352174, "grad_norm": 0.2784930467605591, "learning_rate": 3.151605846778242e-06, "loss": 0.3225, "step": 32384 }, { "epoch": 3.2924969499796664, "grad_norm": 0.26009270548820496, "learning_rate": 3.1512761057187535e-06, "loss": 0.309, "step": 32385 }, { "epoch": 3.2925986173241153, "grad_norm": 0.2968190610408783, "learning_rate": 3.1509463739726985e-06, "loss": 0.2964, "step": 32386 }, { "epoch": 3.2927002846685642, "grad_norm": 0.2816814184188843, "learning_rate": 3.1506166515417385e-06, "loss": 0.2848, "step": 32387 }, { "epoch": 3.292801952013013, "grad_norm": 0.2778337597846985, "learning_rate": 3.1502869384275293e-06, "loss": 0.28, "step": 32388 }, { "epoch": 3.2929036193574626, "grad_norm": 0.2873474359512329, "learning_rate": 3.1499572346317375e-06, "loss": 0.316, "step": 32389 }, { "epoch": 3.2930052867019115, "grad_norm": 0.26509925723075867, "learning_rate": 3.149627540156019e-06, "loss": 0.3335, "step": 32390 }, { "epoch": 3.2931069540463604, "grad_norm": 0.30691421031951904, "learning_rate": 3.149297855002038e-06, "loss": 0.2888, "step": 32391 }, { "epoch": 3.2932086213908094, "grad_norm": 0.26691779494285583, "learning_rate": 3.1489681791714565e-06, "loss": 0.3383, "step": 32392 }, { "epoch": 3.2933102887352583, "grad_norm": 0.2787158787250519, "learning_rate": 3.1486385126659303e-06, "loss": 0.3246, "step": 32393 }, { "epoch": 3.2934119560797073, "grad_norm": 0.2762228548526764, "learning_rate": 3.1483088554871266e-06, "loss": 0.2764, "step": 32394 }, { "epoch": 3.293513623424156, "grad_norm": 0.27608317136764526, "learning_rate": 3.1479792076367e-06, "loss": 0.3083, "step": 32395 }, { "epoch": 3.293615290768605, "grad_norm": 0.2534825801849365, "learning_rate": 3.1476495691163165e-06, "loss": 0.3342, "step": 32396 }, { "epoch": 3.293716958113054, "grad_norm": 0.28720712661743164, "learning_rate": 3.147319939927633e-06, "loss": 0.302, "step": 32397 }, { "epoch": 3.293818625457503, "grad_norm": 0.288450688123703, "learning_rate": 3.14699032007231e-06, "loss": 0.3106, "step": 32398 }, { "epoch": 3.293920292801952, "grad_norm": 0.27303192019462585, "learning_rate": 3.146660709552013e-06, "loss": 0.3229, "step": 32399 }, { "epoch": 3.294021960146401, "grad_norm": 0.26376670598983765, "learning_rate": 3.1463311083683954e-06, "loss": 0.3662, "step": 32400 }, { "epoch": 3.29412362749085, "grad_norm": 0.2735348045825958, "learning_rate": 3.1460015165231238e-06, "loss": 0.3187, "step": 32401 }, { "epoch": 3.294225294835299, "grad_norm": 0.2955138385295868, "learning_rate": 3.1456719340178545e-06, "loss": 0.3597, "step": 32402 }, { "epoch": 3.2943269621797477, "grad_norm": 0.2694045603275299, "learning_rate": 3.1453423608542477e-06, "loss": 0.3411, "step": 32403 }, { "epoch": 3.2944286295241967, "grad_norm": 0.2588621973991394, "learning_rate": 3.1450127970339695e-06, "loss": 0.3005, "step": 32404 }, { "epoch": 3.2945302968686456, "grad_norm": 0.26918867230415344, "learning_rate": 3.1446832425586726e-06, "loss": 0.2953, "step": 32405 }, { "epoch": 3.2946319642130946, "grad_norm": 0.2879500389099121, "learning_rate": 3.1443536974300225e-06, "loss": 0.3136, "step": 32406 }, { "epoch": 3.294733631557544, "grad_norm": 0.28149908781051636, "learning_rate": 3.144024161649677e-06, "loss": 0.309, "step": 32407 }, { "epoch": 3.294835298901993, "grad_norm": 0.27354928851127625, "learning_rate": 3.1436946352192945e-06, "loss": 0.31, "step": 32408 }, { "epoch": 3.294936966246442, "grad_norm": 0.24684570729732513, "learning_rate": 3.143365118140541e-06, "loss": 0.3344, "step": 32409 }, { "epoch": 3.2950386335908908, "grad_norm": 0.2761322259902954, "learning_rate": 3.1430356104150707e-06, "loss": 0.3228, "step": 32410 }, { "epoch": 3.2951403009353397, "grad_norm": 0.28969013690948486, "learning_rate": 3.142706112044547e-06, "loss": 0.3109, "step": 32411 }, { "epoch": 3.2952419682797887, "grad_norm": 0.2652382254600525, "learning_rate": 3.1423766230306272e-06, "loss": 0.2921, "step": 32412 }, { "epoch": 3.2953436356242376, "grad_norm": 0.27967074513435364, "learning_rate": 3.1420471433749715e-06, "loss": 0.3012, "step": 32413 }, { "epoch": 3.2954453029686865, "grad_norm": 0.27760395407676697, "learning_rate": 3.141717673079244e-06, "loss": 0.3368, "step": 32414 }, { "epoch": 3.2955469703131355, "grad_norm": 0.24907085299491882, "learning_rate": 3.141388212145099e-06, "loss": 0.3015, "step": 32415 }, { "epoch": 3.2956486376575844, "grad_norm": 0.2897222936153412, "learning_rate": 3.1410587605742e-06, "loss": 0.3182, "step": 32416 }, { "epoch": 3.2957503050020334, "grad_norm": 0.25241824984550476, "learning_rate": 3.140729318368204e-06, "loss": 0.3259, "step": 32417 }, { "epoch": 3.2958519723464823, "grad_norm": 0.25927790999412537, "learning_rate": 3.1403998855287717e-06, "loss": 0.3376, "step": 32418 }, { "epoch": 3.2959536396909312, "grad_norm": 0.2809647023677826, "learning_rate": 3.140070462057564e-06, "loss": 0.3532, "step": 32419 }, { "epoch": 3.29605530703538, "grad_norm": 0.27048689126968384, "learning_rate": 3.1397410479562385e-06, "loss": 0.3467, "step": 32420 }, { "epoch": 3.296156974379829, "grad_norm": 0.2922080159187317, "learning_rate": 3.139411643226456e-06, "loss": 0.3034, "step": 32421 }, { "epoch": 3.296258641724278, "grad_norm": 0.2813980281352997, "learning_rate": 3.1390822478698747e-06, "loss": 0.3231, "step": 32422 }, { "epoch": 3.296360309068727, "grad_norm": 0.26897481083869934, "learning_rate": 3.1387528618881546e-06, "loss": 0.3091, "step": 32423 }, { "epoch": 3.296461976413176, "grad_norm": 0.28117161989212036, "learning_rate": 3.1384234852829563e-06, "loss": 0.3104, "step": 32424 }, { "epoch": 3.296563643757625, "grad_norm": 0.2784225046634674, "learning_rate": 3.1380941180559378e-06, "loss": 0.2964, "step": 32425 }, { "epoch": 3.296665311102074, "grad_norm": 0.2690853476524353, "learning_rate": 3.1377647602087592e-06, "loss": 0.2914, "step": 32426 }, { "epoch": 3.2967669784465228, "grad_norm": 0.2708984911441803, "learning_rate": 3.1374354117430783e-06, "loss": 0.2962, "step": 32427 }, { "epoch": 3.2968686457909717, "grad_norm": 0.2522757053375244, "learning_rate": 3.1371060726605553e-06, "loss": 0.2806, "step": 32428 }, { "epoch": 3.2969703131354207, "grad_norm": 0.28411000967025757, "learning_rate": 3.13677674296285e-06, "loss": 0.3069, "step": 32429 }, { "epoch": 3.29707198047987, "grad_norm": 0.2731003761291504, "learning_rate": 3.1364474226516195e-06, "loss": 0.2771, "step": 32430 }, { "epoch": 3.297173647824319, "grad_norm": 0.269727885723114, "learning_rate": 3.1361181117285255e-06, "loss": 0.2994, "step": 32431 }, { "epoch": 3.297275315168768, "grad_norm": 0.2667193114757538, "learning_rate": 3.1357888101952245e-06, "loss": 0.2907, "step": 32432 }, { "epoch": 3.297376982513217, "grad_norm": 0.27874836325645447, "learning_rate": 3.1354595180533764e-06, "loss": 0.3332, "step": 32433 }, { "epoch": 3.297478649857666, "grad_norm": 0.2811429798603058, "learning_rate": 3.135130235304641e-06, "loss": 0.2678, "step": 32434 }, { "epoch": 3.2975803172021148, "grad_norm": 0.27569636702537537, "learning_rate": 3.134800961950675e-06, "loss": 0.3107, "step": 32435 }, { "epoch": 3.2976819845465637, "grad_norm": 0.2773589491844177, "learning_rate": 3.13447169799314e-06, "loss": 0.3125, "step": 32436 }, { "epoch": 3.2977836518910126, "grad_norm": 0.2919563949108124, "learning_rate": 3.134142443433692e-06, "loss": 0.3032, "step": 32437 }, { "epoch": 3.2978853192354616, "grad_norm": 0.28752070665359497, "learning_rate": 3.1338131982739916e-06, "loss": 0.2896, "step": 32438 }, { "epoch": 3.2979869865799105, "grad_norm": 0.27056509256362915, "learning_rate": 3.1334839625156976e-06, "loss": 0.3207, "step": 32439 }, { "epoch": 3.2980886539243595, "grad_norm": 0.2673628330230713, "learning_rate": 3.133154736160466e-06, "loss": 0.3146, "step": 32440 }, { "epoch": 3.2981903212688084, "grad_norm": 0.2690461277961731, "learning_rate": 3.132825519209959e-06, "loss": 0.3226, "step": 32441 }, { "epoch": 3.2982919886132573, "grad_norm": 0.28472909331321716, "learning_rate": 3.1324963116658326e-06, "loss": 0.2948, "step": 32442 }, { "epoch": 3.2983936559577063, "grad_norm": 0.2772320806980133, "learning_rate": 3.132167113529745e-06, "loss": 0.3178, "step": 32443 }, { "epoch": 3.2984953233021552, "grad_norm": 0.2687324583530426, "learning_rate": 3.131837924803357e-06, "loss": 0.3158, "step": 32444 }, { "epoch": 3.298596990646604, "grad_norm": 0.2795736491680145, "learning_rate": 3.1315087454883254e-06, "loss": 0.2829, "step": 32445 }, { "epoch": 3.298698657991053, "grad_norm": 0.2870353162288666, "learning_rate": 3.1311795755863085e-06, "loss": 0.3135, "step": 32446 }, { "epoch": 3.298800325335502, "grad_norm": 0.25078338384628296, "learning_rate": 3.130850415098965e-06, "loss": 0.3587, "step": 32447 }, { "epoch": 3.2989019926799514, "grad_norm": 0.30252236127853394, "learning_rate": 3.130521264027952e-06, "loss": 0.3096, "step": 32448 }, { "epoch": 3.2990036600244004, "grad_norm": 0.26230528950691223, "learning_rate": 3.13019212237493e-06, "loss": 0.3063, "step": 32449 }, { "epoch": 3.2991053273688493, "grad_norm": 0.2568521499633789, "learning_rate": 3.1298629901415545e-06, "loss": 0.3025, "step": 32450 }, { "epoch": 3.2992069947132983, "grad_norm": 0.262077659368515, "learning_rate": 3.129533867329486e-06, "loss": 0.3078, "step": 32451 }, { "epoch": 3.299308662057747, "grad_norm": 0.2997305393218994, "learning_rate": 3.1292047539403804e-06, "loss": 0.2915, "step": 32452 }, { "epoch": 3.299410329402196, "grad_norm": 0.2662228047847748, "learning_rate": 3.1288756499758966e-06, "loss": 0.3032, "step": 32453 }, { "epoch": 3.299511996746645, "grad_norm": 0.2805935740470886, "learning_rate": 3.128546555437694e-06, "loss": 0.3097, "step": 32454 }, { "epoch": 3.299613664091094, "grad_norm": 0.2553921937942505, "learning_rate": 3.128217470327428e-06, "loss": 0.3404, "step": 32455 }, { "epoch": 3.299715331435543, "grad_norm": 0.2569293975830078, "learning_rate": 3.1278883946467577e-06, "loss": 0.3155, "step": 32456 }, { "epoch": 3.299816998779992, "grad_norm": 0.27515631914138794, "learning_rate": 3.1275593283973404e-06, "loss": 0.3011, "step": 32457 }, { "epoch": 3.299918666124441, "grad_norm": 0.2896491289138794, "learning_rate": 3.1272302715808343e-06, "loss": 0.3328, "step": 32458 }, { "epoch": 3.30002033346889, "grad_norm": 0.26267334818840027, "learning_rate": 3.1269012241988984e-06, "loss": 0.2927, "step": 32459 }, { "epoch": 3.3001220008133387, "grad_norm": 0.28636449575424194, "learning_rate": 3.126572186253188e-06, "loss": 0.3395, "step": 32460 }, { "epoch": 3.3002236681577877, "grad_norm": 0.28710728883743286, "learning_rate": 3.1262431577453623e-06, "loss": 0.3086, "step": 32461 }, { "epoch": 3.3003253355022366, "grad_norm": 0.2774413824081421, "learning_rate": 3.1259141386770773e-06, "loss": 0.3303, "step": 32462 }, { "epoch": 3.3004270028466856, "grad_norm": 0.3095929026603699, "learning_rate": 3.125585129049992e-06, "loss": 0.3721, "step": 32463 }, { "epoch": 3.3005286701911345, "grad_norm": 0.2591013014316559, "learning_rate": 3.1252561288657634e-06, "loss": 0.2868, "step": 32464 }, { "epoch": 3.3006303375355834, "grad_norm": 0.27635657787323, "learning_rate": 3.1249271381260486e-06, "loss": 0.3211, "step": 32465 }, { "epoch": 3.3007320048800324, "grad_norm": 0.28167060017585754, "learning_rate": 3.124598156832507e-06, "loss": 0.3577, "step": 32466 }, { "epoch": 3.3008336722244813, "grad_norm": 0.2661738097667694, "learning_rate": 3.1242691849867924e-06, "loss": 0.3053, "step": 32467 }, { "epoch": 3.3009353395689303, "grad_norm": 0.3135046064853668, "learning_rate": 3.1239402225905635e-06, "loss": 0.3164, "step": 32468 }, { "epoch": 3.301037006913379, "grad_norm": 0.2818226218223572, "learning_rate": 3.1236112696454795e-06, "loss": 0.309, "step": 32469 }, { "epoch": 3.301138674257828, "grad_norm": 0.28854474425315857, "learning_rate": 3.123282326153195e-06, "loss": 0.3554, "step": 32470 }, { "epoch": 3.3012403416022775, "grad_norm": 0.27180585265159607, "learning_rate": 3.1229533921153688e-06, "loss": 0.3364, "step": 32471 }, { "epoch": 3.3013420089467265, "grad_norm": 0.2627742290496826, "learning_rate": 3.122624467533657e-06, "loss": 0.322, "step": 32472 }, { "epoch": 3.3014436762911754, "grad_norm": 0.2710948884487152, "learning_rate": 3.1222955524097163e-06, "loss": 0.3065, "step": 32473 }, { "epoch": 3.3015453436356244, "grad_norm": 0.2683221697807312, "learning_rate": 3.121966646745206e-06, "loss": 0.3061, "step": 32474 }, { "epoch": 3.3016470109800733, "grad_norm": 0.2722647190093994, "learning_rate": 3.1216377505417807e-06, "loss": 0.2946, "step": 32475 }, { "epoch": 3.3017486783245222, "grad_norm": 0.281758576631546, "learning_rate": 3.121308863801098e-06, "loss": 0.3115, "step": 32476 }, { "epoch": 3.301850345668971, "grad_norm": 0.26225531101226807, "learning_rate": 3.1209799865248147e-06, "loss": 0.3274, "step": 32477 }, { "epoch": 3.30195201301342, "grad_norm": 0.26623648405075073, "learning_rate": 3.120651118714587e-06, "loss": 0.3323, "step": 32478 }, { "epoch": 3.302053680357869, "grad_norm": 0.2702302038669586, "learning_rate": 3.120322260372074e-06, "loss": 0.3201, "step": 32479 }, { "epoch": 3.302155347702318, "grad_norm": 0.26804956793785095, "learning_rate": 3.1199934114989296e-06, "loss": 0.2699, "step": 32480 }, { "epoch": 3.302257015046767, "grad_norm": 0.28241366147994995, "learning_rate": 3.1196645720968123e-06, "loss": 0.321, "step": 32481 }, { "epoch": 3.302358682391216, "grad_norm": 0.2633977234363556, "learning_rate": 3.119335742167377e-06, "loss": 0.309, "step": 32482 }, { "epoch": 3.302460349735665, "grad_norm": 0.2684462070465088, "learning_rate": 3.119006921712282e-06, "loss": 0.3522, "step": 32483 }, { "epoch": 3.3025620170801138, "grad_norm": 0.28320974111557007, "learning_rate": 3.118678110733183e-06, "loss": 0.2996, "step": 32484 }, { "epoch": 3.3026636844245627, "grad_norm": 0.2596593201160431, "learning_rate": 3.1183493092317363e-06, "loss": 0.3423, "step": 32485 }, { "epoch": 3.3027653517690116, "grad_norm": 0.2756028175354004, "learning_rate": 3.1180205172096002e-06, "loss": 0.3112, "step": 32486 }, { "epoch": 3.3028670191134606, "grad_norm": 0.2546176314353943, "learning_rate": 3.1176917346684277e-06, "loss": 0.3114, "step": 32487 }, { "epoch": 3.3029686864579095, "grad_norm": 0.2776488661766052, "learning_rate": 3.117362961609877e-06, "loss": 0.3059, "step": 32488 }, { "epoch": 3.303070353802359, "grad_norm": 0.28336337208747864, "learning_rate": 3.1170341980356056e-06, "loss": 0.3143, "step": 32489 }, { "epoch": 3.303172021146808, "grad_norm": 0.25862616300582886, "learning_rate": 3.1167054439472675e-06, "loss": 0.2779, "step": 32490 }, { "epoch": 3.303273688491257, "grad_norm": 0.25419002771377563, "learning_rate": 3.1163766993465205e-06, "loss": 0.2837, "step": 32491 }, { "epoch": 3.3033753558357057, "grad_norm": 0.2945992946624756, "learning_rate": 3.1160479642350193e-06, "loss": 0.3006, "step": 32492 }, { "epoch": 3.3034770231801547, "grad_norm": 0.2972390055656433, "learning_rate": 3.115719238614421e-06, "loss": 0.3149, "step": 32493 }, { "epoch": 3.3035786905246036, "grad_norm": 0.2924647331237793, "learning_rate": 3.1153905224863824e-06, "loss": 0.3038, "step": 32494 }, { "epoch": 3.3036803578690526, "grad_norm": 0.2779349982738495, "learning_rate": 3.115061815852557e-06, "loss": 0.297, "step": 32495 }, { "epoch": 3.3037820252135015, "grad_norm": 0.28288787603378296, "learning_rate": 3.1147331187146037e-06, "loss": 0.2821, "step": 32496 }, { "epoch": 3.3038836925579504, "grad_norm": 0.25459983944892883, "learning_rate": 3.114404431074175e-06, "loss": 0.3111, "step": 32497 }, { "epoch": 3.3039853599023994, "grad_norm": 0.26410549879074097, "learning_rate": 3.1140757529329302e-06, "loss": 0.3122, "step": 32498 }, { "epoch": 3.3040870272468483, "grad_norm": 0.30561918020248413, "learning_rate": 3.1137470842925233e-06, "loss": 0.3025, "step": 32499 }, { "epoch": 3.3041886945912973, "grad_norm": 0.29031455516815186, "learning_rate": 3.1134184251546097e-06, "loss": 0.3158, "step": 32500 }, { "epoch": 3.304290361935746, "grad_norm": 0.28407400846481323, "learning_rate": 3.113089775520847e-06, "loss": 0.3004, "step": 32501 }, { "epoch": 3.304392029280195, "grad_norm": 0.2968928813934326, "learning_rate": 3.112761135392889e-06, "loss": 0.3139, "step": 32502 }, { "epoch": 3.304493696624644, "grad_norm": 0.2765553593635559, "learning_rate": 3.1124325047723914e-06, "loss": 0.3211, "step": 32503 }, { "epoch": 3.304595363969093, "grad_norm": 0.27996042370796204, "learning_rate": 3.1121038836610106e-06, "loss": 0.3025, "step": 32504 }, { "epoch": 3.304697031313542, "grad_norm": 0.2836073637008667, "learning_rate": 3.111775272060402e-06, "loss": 0.3236, "step": 32505 }, { "epoch": 3.304798698657991, "grad_norm": 0.2668163478374481, "learning_rate": 3.111446669972221e-06, "loss": 0.317, "step": 32506 }, { "epoch": 3.30490036600244, "grad_norm": 0.26428723335266113, "learning_rate": 3.1111180773981224e-06, "loss": 0.3279, "step": 32507 }, { "epoch": 3.305002033346889, "grad_norm": 0.2684977948665619, "learning_rate": 3.110789494339762e-06, "loss": 0.3408, "step": 32508 }, { "epoch": 3.3051037006913377, "grad_norm": 0.2832159101963043, "learning_rate": 3.1104609207987963e-06, "loss": 0.3029, "step": 32509 }, { "epoch": 3.3052053680357867, "grad_norm": 0.2711329758167267, "learning_rate": 3.110132356776878e-06, "loss": 0.3169, "step": 32510 }, { "epoch": 3.3053070353802356, "grad_norm": 0.2720808684825897, "learning_rate": 3.109803802275666e-06, "loss": 0.31, "step": 32511 }, { "epoch": 3.305408702724685, "grad_norm": 0.2784508168697357, "learning_rate": 3.10947525729681e-06, "loss": 0.3025, "step": 32512 }, { "epoch": 3.305510370069134, "grad_norm": 0.2886470556259155, "learning_rate": 3.1091467218419702e-06, "loss": 0.3052, "step": 32513 }, { "epoch": 3.305612037413583, "grad_norm": 0.27316802740097046, "learning_rate": 3.1088181959128006e-06, "loss": 0.2998, "step": 32514 }, { "epoch": 3.305713704758032, "grad_norm": 0.2710719108581543, "learning_rate": 3.1084896795109543e-06, "loss": 0.3277, "step": 32515 }, { "epoch": 3.3058153721024808, "grad_norm": 0.2886074483394623, "learning_rate": 3.1081611726380893e-06, "loss": 0.3045, "step": 32516 }, { "epoch": 3.3059170394469297, "grad_norm": 0.26624399423599243, "learning_rate": 3.107832675295856e-06, "loss": 0.2965, "step": 32517 }, { "epoch": 3.3060187067913787, "grad_norm": 0.27973803877830505, "learning_rate": 3.107504187485913e-06, "loss": 0.303, "step": 32518 }, { "epoch": 3.3061203741358276, "grad_norm": 0.2999812960624695, "learning_rate": 3.1071757092099155e-06, "loss": 0.3067, "step": 32519 }, { "epoch": 3.3062220414802765, "grad_norm": 0.2745799720287323, "learning_rate": 3.106847240469515e-06, "loss": 0.3648, "step": 32520 }, { "epoch": 3.3063237088247255, "grad_norm": 0.2860228717327118, "learning_rate": 3.106518781266371e-06, "loss": 0.2791, "step": 32521 }, { "epoch": 3.3064253761691744, "grad_norm": 0.2766273617744446, "learning_rate": 3.1061903316021326e-06, "loss": 0.3114, "step": 32522 }, { "epoch": 3.3065270435136234, "grad_norm": 0.27084892988204956, "learning_rate": 3.1058618914784577e-06, "loss": 0.3148, "step": 32523 }, { "epoch": 3.3066287108580723, "grad_norm": 0.2932496666908264, "learning_rate": 3.1055334608970015e-06, "loss": 0.2989, "step": 32524 }, { "epoch": 3.3067303782025212, "grad_norm": 0.2695891261100769, "learning_rate": 3.105205039859417e-06, "loss": 0.3152, "step": 32525 }, { "epoch": 3.30683204554697, "grad_norm": 0.28102582693099976, "learning_rate": 3.1048766283673608e-06, "loss": 0.3024, "step": 32526 }, { "epoch": 3.306933712891419, "grad_norm": 0.2568284273147583, "learning_rate": 3.1045482264224834e-06, "loss": 0.3128, "step": 32527 }, { "epoch": 3.307035380235868, "grad_norm": 0.2710764408111572, "learning_rate": 3.104219834026442e-06, "loss": 0.3019, "step": 32528 }, { "epoch": 3.307137047580317, "grad_norm": 0.28770992159843445, "learning_rate": 3.1038914511808916e-06, "loss": 0.2918, "step": 32529 }, { "epoch": 3.3072387149247664, "grad_norm": 0.2618403732776642, "learning_rate": 3.1035630778874847e-06, "loss": 0.2841, "step": 32530 }, { "epoch": 3.3073403822692153, "grad_norm": 0.2617557644844055, "learning_rate": 3.103234714147878e-06, "loss": 0.3205, "step": 32531 }, { "epoch": 3.3074420496136643, "grad_norm": 0.262956827878952, "learning_rate": 3.102906359963722e-06, "loss": 0.3174, "step": 32532 }, { "epoch": 3.3075437169581132, "grad_norm": 0.26256898045539856, "learning_rate": 3.102578015336673e-06, "loss": 0.2984, "step": 32533 }, { "epoch": 3.307645384302562, "grad_norm": 0.28834813833236694, "learning_rate": 3.102249680268388e-06, "loss": 0.3068, "step": 32534 }, { "epoch": 3.307747051647011, "grad_norm": 0.2688809633255005, "learning_rate": 3.101921354760514e-06, "loss": 0.2982, "step": 32535 }, { "epoch": 3.30784871899146, "grad_norm": 0.2661561965942383, "learning_rate": 3.1015930388147123e-06, "loss": 0.3086, "step": 32536 }, { "epoch": 3.307950386335909, "grad_norm": 0.27110299468040466, "learning_rate": 3.1012647324326316e-06, "loss": 0.3061, "step": 32537 }, { "epoch": 3.308052053680358, "grad_norm": 0.2631511986255646, "learning_rate": 3.1009364356159284e-06, "loss": 0.2994, "step": 32538 }, { "epoch": 3.308153721024807, "grad_norm": 0.2584712505340576, "learning_rate": 3.1006081483662582e-06, "loss": 0.312, "step": 32539 }, { "epoch": 3.308255388369256, "grad_norm": 0.26434415578842163, "learning_rate": 3.10027987068527e-06, "loss": 0.2748, "step": 32540 }, { "epoch": 3.3083570557137048, "grad_norm": 0.2585547864437103, "learning_rate": 3.099951602574623e-06, "loss": 0.3028, "step": 32541 }, { "epoch": 3.3084587230581537, "grad_norm": 0.2694520950317383, "learning_rate": 3.0996233440359658e-06, "loss": 0.3182, "step": 32542 }, { "epoch": 3.3085603904026026, "grad_norm": 0.3057408332824707, "learning_rate": 3.099295095070955e-06, "loss": 0.35, "step": 32543 }, { "epoch": 3.3086620577470516, "grad_norm": 0.2766798436641693, "learning_rate": 3.0989668556812458e-06, "loss": 0.3228, "step": 32544 }, { "epoch": 3.3087637250915005, "grad_norm": 0.2704707682132721, "learning_rate": 3.0986386258684876e-06, "loss": 0.3114, "step": 32545 }, { "epoch": 3.3088653924359495, "grad_norm": 0.28169628977775574, "learning_rate": 3.0983104056343383e-06, "loss": 0.3457, "step": 32546 }, { "epoch": 3.3089670597803984, "grad_norm": 0.25519245862960815, "learning_rate": 3.097982194980447e-06, "loss": 0.3225, "step": 32547 }, { "epoch": 3.3090687271248473, "grad_norm": 0.2688359320163727, "learning_rate": 3.09765399390847e-06, "loss": 0.2991, "step": 32548 }, { "epoch": 3.3091703944692963, "grad_norm": 0.2665722370147705, "learning_rate": 3.097325802420062e-06, "loss": 0.2916, "step": 32549 }, { "epoch": 3.3092720618137452, "grad_norm": 0.2707735002040863, "learning_rate": 3.0969976205168718e-06, "loss": 0.3465, "step": 32550 }, { "epoch": 3.309373729158194, "grad_norm": 0.2690863311290741, "learning_rate": 3.0966694482005584e-06, "loss": 0.3053, "step": 32551 }, { "epoch": 3.309475396502643, "grad_norm": 0.2670358419418335, "learning_rate": 3.0963412854727694e-06, "loss": 0.3019, "step": 32552 }, { "epoch": 3.3095770638470925, "grad_norm": 0.2741908133029938, "learning_rate": 3.0960131323351606e-06, "loss": 0.3113, "step": 32553 }, { "epoch": 3.3096787311915414, "grad_norm": 0.2772925794124603, "learning_rate": 3.095684988789387e-06, "loss": 0.2851, "step": 32554 }, { "epoch": 3.3097803985359904, "grad_norm": 0.26452627778053284, "learning_rate": 3.0953568548370977e-06, "loss": 0.3202, "step": 32555 }, { "epoch": 3.3098820658804393, "grad_norm": 0.29573097825050354, "learning_rate": 3.0950287304799507e-06, "loss": 0.3257, "step": 32556 }, { "epoch": 3.3099837332248883, "grad_norm": 0.27201706171035767, "learning_rate": 3.0947006157195947e-06, "loss": 0.2976, "step": 32557 }, { "epoch": 3.310085400569337, "grad_norm": 0.27846089005470276, "learning_rate": 3.0943725105576827e-06, "loss": 0.3424, "step": 32558 }, { "epoch": 3.310187067913786, "grad_norm": 0.27387216687202454, "learning_rate": 3.0940444149958726e-06, "loss": 0.3045, "step": 32559 }, { "epoch": 3.310288735258235, "grad_norm": 0.26115185022354126, "learning_rate": 3.0937163290358104e-06, "loss": 0.3154, "step": 32560 }, { "epoch": 3.310390402602684, "grad_norm": 0.25656238198280334, "learning_rate": 3.0933882526791553e-06, "loss": 0.2803, "step": 32561 }, { "epoch": 3.310492069947133, "grad_norm": 0.2738387882709503, "learning_rate": 3.0930601859275557e-06, "loss": 0.2998, "step": 32562 }, { "epoch": 3.310593737291582, "grad_norm": 0.27984172105789185, "learning_rate": 3.092732128782664e-06, "loss": 0.3088, "step": 32563 }, { "epoch": 3.310695404636031, "grad_norm": 0.270513117313385, "learning_rate": 3.0924040812461377e-06, "loss": 0.3206, "step": 32564 }, { "epoch": 3.31079707198048, "grad_norm": 0.2648017406463623, "learning_rate": 3.092076043319624e-06, "loss": 0.3087, "step": 32565 }, { "epoch": 3.3108987393249287, "grad_norm": 0.2704755365848541, "learning_rate": 3.09174801500478e-06, "loss": 0.3188, "step": 32566 }, { "epoch": 3.3110004066693777, "grad_norm": 0.27761268615722656, "learning_rate": 3.091419996303254e-06, "loss": 0.2962, "step": 32567 }, { "epoch": 3.3111020740138266, "grad_norm": 0.26885247230529785, "learning_rate": 3.0910919872167e-06, "loss": 0.299, "step": 32568 }, { "epoch": 3.3112037413582756, "grad_norm": 0.2773188054561615, "learning_rate": 3.0907639877467743e-06, "loss": 0.3448, "step": 32569 }, { "epoch": 3.3113054087027245, "grad_norm": 0.2683735489845276, "learning_rate": 3.0904359978951234e-06, "loss": 0.3163, "step": 32570 }, { "epoch": 3.311407076047174, "grad_norm": 0.28313305974006653, "learning_rate": 3.090108017663403e-06, "loss": 0.2805, "step": 32571 }, { "epoch": 3.311508743391623, "grad_norm": 0.2656373977661133, "learning_rate": 3.0897800470532633e-06, "loss": 0.291, "step": 32572 }, { "epoch": 3.3116104107360718, "grad_norm": 0.24522922933101654, "learning_rate": 3.089452086066358e-06, "loss": 0.3613, "step": 32573 }, { "epoch": 3.3117120780805207, "grad_norm": 0.26635387539863586, "learning_rate": 3.0891241347043412e-06, "loss": 0.3113, "step": 32574 }, { "epoch": 3.3118137454249696, "grad_norm": 0.26479005813598633, "learning_rate": 3.088796192968861e-06, "loss": 0.2902, "step": 32575 }, { "epoch": 3.3119154127694186, "grad_norm": 0.28555768728256226, "learning_rate": 3.0884682608615725e-06, "loss": 0.2971, "step": 32576 }, { "epoch": 3.3120170801138675, "grad_norm": 0.2955958843231201, "learning_rate": 3.0881403383841258e-06, "loss": 0.3194, "step": 32577 }, { "epoch": 3.3121187474583165, "grad_norm": 0.2842646837234497, "learning_rate": 3.0878124255381725e-06, "loss": 0.3151, "step": 32578 }, { "epoch": 3.3122204148027654, "grad_norm": 0.2788938581943512, "learning_rate": 3.0874845223253687e-06, "loss": 0.2875, "step": 32579 }, { "epoch": 3.3123220821472144, "grad_norm": 0.268240362405777, "learning_rate": 3.087156628747361e-06, "loss": 0.3112, "step": 32580 }, { "epoch": 3.3124237494916633, "grad_norm": 0.24178798496723175, "learning_rate": 3.0868287448058052e-06, "loss": 0.3105, "step": 32581 }, { "epoch": 3.3125254168361122, "grad_norm": 0.28529587388038635, "learning_rate": 3.0865008705023507e-06, "loss": 0.332, "step": 32582 }, { "epoch": 3.312627084180561, "grad_norm": 0.27614226937294006, "learning_rate": 3.0861730058386507e-06, "loss": 0.3004, "step": 32583 }, { "epoch": 3.31272875152501, "grad_norm": 0.28109368681907654, "learning_rate": 3.085845150816356e-06, "loss": 0.2859, "step": 32584 }, { "epoch": 3.312830418869459, "grad_norm": 0.27533724904060364, "learning_rate": 3.085517305437118e-06, "loss": 0.3029, "step": 32585 }, { "epoch": 3.312932086213908, "grad_norm": 0.29618924856185913, "learning_rate": 3.0851894697025896e-06, "loss": 0.3197, "step": 32586 }, { "epoch": 3.313033753558357, "grad_norm": 0.2721168100833893, "learning_rate": 3.084861643614421e-06, "loss": 0.3159, "step": 32587 }, { "epoch": 3.313135420902806, "grad_norm": 0.27863550186157227, "learning_rate": 3.0845338271742654e-06, "loss": 0.3004, "step": 32588 }, { "epoch": 3.313237088247255, "grad_norm": 0.28024473786354065, "learning_rate": 3.084206020383772e-06, "loss": 0.2778, "step": 32589 }, { "epoch": 3.3133387555917038, "grad_norm": 0.2694646418094635, "learning_rate": 3.0838782232445936e-06, "loss": 0.2983, "step": 32590 }, { "epoch": 3.3134404229361527, "grad_norm": 0.305006206035614, "learning_rate": 3.0835504357583824e-06, "loss": 0.3136, "step": 32591 }, { "epoch": 3.3135420902806016, "grad_norm": 0.31541913747787476, "learning_rate": 3.0832226579267878e-06, "loss": 0.2852, "step": 32592 }, { "epoch": 3.3136437576250506, "grad_norm": 0.26429474353790283, "learning_rate": 3.082894889751462e-06, "loss": 0.3051, "step": 32593 }, { "epoch": 3.3137454249695, "grad_norm": 0.27949902415275574, "learning_rate": 3.082567131234056e-06, "loss": 0.3106, "step": 32594 }, { "epoch": 3.313847092313949, "grad_norm": 0.26397842168807983, "learning_rate": 3.082239382376221e-06, "loss": 0.3514, "step": 32595 }, { "epoch": 3.313948759658398, "grad_norm": 0.2893233299255371, "learning_rate": 3.081911643179609e-06, "loss": 0.2994, "step": 32596 }, { "epoch": 3.314050427002847, "grad_norm": 0.27694588899612427, "learning_rate": 3.08158391364587e-06, "loss": 0.3055, "step": 32597 }, { "epoch": 3.3141520943472957, "grad_norm": 0.27309831976890564, "learning_rate": 3.0812561937766562e-06, "loss": 0.3078, "step": 32598 }, { "epoch": 3.3142537616917447, "grad_norm": 0.27662187814712524, "learning_rate": 3.0809284835736163e-06, "loss": 0.3435, "step": 32599 }, { "epoch": 3.3143554290361936, "grad_norm": 0.27869006991386414, "learning_rate": 3.0806007830384025e-06, "loss": 0.2993, "step": 32600 }, { "epoch": 3.3144570963806426, "grad_norm": 0.26247429847717285, "learning_rate": 3.0802730921726674e-06, "loss": 0.2923, "step": 32601 }, { "epoch": 3.3145587637250915, "grad_norm": 0.2851637899875641, "learning_rate": 3.079945410978059e-06, "loss": 0.2692, "step": 32602 }, { "epoch": 3.3146604310695404, "grad_norm": 0.26597121357917786, "learning_rate": 3.0796177394562304e-06, "loss": 0.3412, "step": 32603 }, { "epoch": 3.3147620984139894, "grad_norm": 0.2639288902282715, "learning_rate": 3.07929007760883e-06, "loss": 0.2913, "step": 32604 }, { "epoch": 3.3148637657584383, "grad_norm": 0.2691684663295746, "learning_rate": 3.0789624254375104e-06, "loss": 0.3075, "step": 32605 }, { "epoch": 3.3149654331028873, "grad_norm": 0.28040000796318054, "learning_rate": 3.0786347829439223e-06, "loss": 0.315, "step": 32606 }, { "epoch": 3.315067100447336, "grad_norm": 0.2647451162338257, "learning_rate": 3.0783071501297148e-06, "loss": 0.303, "step": 32607 }, { "epoch": 3.315168767791785, "grad_norm": 0.27302286028862, "learning_rate": 3.0779795269965394e-06, "loss": 0.3019, "step": 32608 }, { "epoch": 3.315270435136234, "grad_norm": 0.2714943289756775, "learning_rate": 3.077651913546046e-06, "loss": 0.2765, "step": 32609 }, { "epoch": 3.315372102480683, "grad_norm": 0.2686918079853058, "learning_rate": 3.0773243097798854e-06, "loss": 0.3041, "step": 32610 }, { "epoch": 3.315473769825132, "grad_norm": 0.26630568504333496, "learning_rate": 3.076996715699709e-06, "loss": 0.3102, "step": 32611 }, { "epoch": 3.3155754371695814, "grad_norm": 0.27513816952705383, "learning_rate": 3.076669131307165e-06, "loss": 0.2826, "step": 32612 }, { "epoch": 3.3156771045140303, "grad_norm": 0.2970624268054962, "learning_rate": 3.0763415566039058e-06, "loss": 0.3026, "step": 32613 }, { "epoch": 3.3157787718584792, "grad_norm": 0.2967698276042938, "learning_rate": 3.07601399159158e-06, "loss": 0.3166, "step": 32614 }, { "epoch": 3.315880439202928, "grad_norm": 0.2809945344924927, "learning_rate": 3.0756864362718387e-06, "loss": 0.3046, "step": 32615 }, { "epoch": 3.315982106547377, "grad_norm": 0.2940596640110016, "learning_rate": 3.0753588906463327e-06, "loss": 0.3025, "step": 32616 }, { "epoch": 3.316083773891826, "grad_norm": 0.2700863778591156, "learning_rate": 3.07503135471671e-06, "loss": 0.3196, "step": 32617 }, { "epoch": 3.316185441236275, "grad_norm": 0.27295568585395813, "learning_rate": 3.074703828484623e-06, "loss": 0.2875, "step": 32618 }, { "epoch": 3.316287108580724, "grad_norm": 0.2708294093608856, "learning_rate": 3.0743763119517193e-06, "loss": 0.3091, "step": 32619 }, { "epoch": 3.316388775925173, "grad_norm": 0.2806132435798645, "learning_rate": 3.0740488051196502e-06, "loss": 0.3152, "step": 32620 }, { "epoch": 3.316490443269622, "grad_norm": 0.2774966359138489, "learning_rate": 3.0737213079900674e-06, "loss": 0.3241, "step": 32621 }, { "epoch": 3.3165921106140708, "grad_norm": 0.25609076023101807, "learning_rate": 3.073393820564617e-06, "loss": 0.2942, "step": 32622 }, { "epoch": 3.3166937779585197, "grad_norm": 0.27325499057769775, "learning_rate": 3.073066342844952e-06, "loss": 0.3265, "step": 32623 }, { "epoch": 3.3167954453029687, "grad_norm": 0.2701416015625, "learning_rate": 3.07273887483272e-06, "loss": 0.285, "step": 32624 }, { "epoch": 3.3168971126474176, "grad_norm": 0.2795398235321045, "learning_rate": 3.0724114165295717e-06, "loss": 0.3275, "step": 32625 }, { "epoch": 3.3169987799918665, "grad_norm": 0.27600008249282837, "learning_rate": 3.0720839679371566e-06, "loss": 0.279, "step": 32626 }, { "epoch": 3.3171004473363155, "grad_norm": 0.28348657488822937, "learning_rate": 3.071756529057124e-06, "loss": 0.2666, "step": 32627 }, { "epoch": 3.3172021146807644, "grad_norm": 0.2727034091949463, "learning_rate": 3.071429099891125e-06, "loss": 0.2906, "step": 32628 }, { "epoch": 3.3173037820252134, "grad_norm": 0.29769137501716614, "learning_rate": 3.071101680440807e-06, "loss": 0.3173, "step": 32629 }, { "epoch": 3.3174054493696623, "grad_norm": 0.26014456152915955, "learning_rate": 3.0707742707078196e-06, "loss": 0.2776, "step": 32630 }, { "epoch": 3.3175071167141112, "grad_norm": 0.2956918179988861, "learning_rate": 3.070446870693815e-06, "loss": 0.2956, "step": 32631 }, { "epoch": 3.31760878405856, "grad_norm": 0.25603124499320984, "learning_rate": 3.0701194804004386e-06, "loss": 0.3198, "step": 32632 }, { "epoch": 3.317710451403009, "grad_norm": 0.27846965193748474, "learning_rate": 3.0697920998293427e-06, "loss": 0.3204, "step": 32633 }, { "epoch": 3.317812118747458, "grad_norm": 0.29278329014778137, "learning_rate": 3.0694647289821753e-06, "loss": 0.2918, "step": 32634 }, { "epoch": 3.3179137860919075, "grad_norm": 0.2696005403995514, "learning_rate": 3.069137367860585e-06, "loss": 0.3143, "step": 32635 }, { "epoch": 3.3180154534363564, "grad_norm": 0.2749383747577667, "learning_rate": 3.068810016466224e-06, "loss": 0.3491, "step": 32636 }, { "epoch": 3.3181171207808053, "grad_norm": 0.27584055066108704, "learning_rate": 3.0684826748007366e-06, "loss": 0.3074, "step": 32637 }, { "epoch": 3.3182187881252543, "grad_norm": 0.2841290831565857, "learning_rate": 3.068155342865776e-06, "loss": 0.3154, "step": 32638 }, { "epoch": 3.3183204554697032, "grad_norm": 0.2745989263057709, "learning_rate": 3.0678280206629897e-06, "loss": 0.3295, "step": 32639 }, { "epoch": 3.318422122814152, "grad_norm": 0.3002658784389496, "learning_rate": 3.0675007081940255e-06, "loss": 0.3197, "step": 32640 }, { "epoch": 3.318523790158601, "grad_norm": 0.26395195722579956, "learning_rate": 3.0671734054605352e-06, "loss": 0.3292, "step": 32641 }, { "epoch": 3.31862545750305, "grad_norm": 0.27058112621307373, "learning_rate": 3.0668461124641647e-06, "loss": 0.3388, "step": 32642 }, { "epoch": 3.318727124847499, "grad_norm": 0.2550174295902252, "learning_rate": 3.066518829206565e-06, "loss": 0.3254, "step": 32643 }, { "epoch": 3.318828792191948, "grad_norm": 0.2605797350406647, "learning_rate": 3.0661915556893827e-06, "loss": 0.2805, "step": 32644 }, { "epoch": 3.318930459536397, "grad_norm": 0.26364579796791077, "learning_rate": 3.065864291914269e-06, "loss": 0.3076, "step": 32645 }, { "epoch": 3.319032126880846, "grad_norm": 0.2583085000514984, "learning_rate": 3.065537037882871e-06, "loss": 0.3388, "step": 32646 }, { "epoch": 3.3191337942252948, "grad_norm": 0.2655607759952545, "learning_rate": 3.0652097935968374e-06, "loss": 0.2961, "step": 32647 }, { "epoch": 3.3192354615697437, "grad_norm": 0.2645922601222992, "learning_rate": 3.064882559057818e-06, "loss": 0.3067, "step": 32648 }, { "epoch": 3.3193371289141926, "grad_norm": 0.2658967971801758, "learning_rate": 3.0645553342674594e-06, "loss": 0.3088, "step": 32649 }, { "epoch": 3.3194387962586416, "grad_norm": 0.26305410265922546, "learning_rate": 3.064228119227411e-06, "loss": 0.294, "step": 32650 }, { "epoch": 3.3195404636030905, "grad_norm": 0.2898379862308502, "learning_rate": 3.063900913939323e-06, "loss": 0.2798, "step": 32651 }, { "epoch": 3.3196421309475395, "grad_norm": 0.27348586916923523, "learning_rate": 3.0635737184048398e-06, "loss": 0.3186, "step": 32652 }, { "epoch": 3.319743798291989, "grad_norm": 0.2725040018558502, "learning_rate": 3.063246532625614e-06, "loss": 0.3243, "step": 32653 }, { "epoch": 3.319845465636438, "grad_norm": 0.252003937959671, "learning_rate": 3.0629193566032906e-06, "loss": 0.278, "step": 32654 }, { "epoch": 3.3199471329808867, "grad_norm": 0.2879987061023712, "learning_rate": 3.0625921903395196e-06, "loss": 0.3421, "step": 32655 }, { "epoch": 3.3200488003253357, "grad_norm": 0.2663280665874481, "learning_rate": 3.0622650338359487e-06, "loss": 0.2885, "step": 32656 }, { "epoch": 3.3201504676697846, "grad_norm": 0.27071982622146606, "learning_rate": 3.0619378870942262e-06, "loss": 0.2905, "step": 32657 }, { "epoch": 3.3202521350142336, "grad_norm": 0.3131856322288513, "learning_rate": 3.061610750116e-06, "loss": 0.2935, "step": 32658 }, { "epoch": 3.3203538023586825, "grad_norm": 0.27418893575668335, "learning_rate": 3.0612836229029184e-06, "loss": 0.3149, "step": 32659 }, { "epoch": 3.3204554697031314, "grad_norm": 0.29162123799324036, "learning_rate": 3.0609565054566284e-06, "loss": 0.2923, "step": 32660 }, { "epoch": 3.3205571370475804, "grad_norm": 0.28428709506988525, "learning_rate": 3.06062939777878e-06, "loss": 0.3331, "step": 32661 }, { "epoch": 3.3206588043920293, "grad_norm": 0.2796867787837982, "learning_rate": 3.0603022998710185e-06, "loss": 0.3061, "step": 32662 }, { "epoch": 3.3207604717364783, "grad_norm": 0.2622508108615875, "learning_rate": 3.059975211734996e-06, "loss": 0.3169, "step": 32663 }, { "epoch": 3.320862139080927, "grad_norm": 0.3001188635826111, "learning_rate": 3.059648133372354e-06, "loss": 0.2925, "step": 32664 }, { "epoch": 3.320963806425376, "grad_norm": 0.2641089856624603, "learning_rate": 3.0593210647847437e-06, "loss": 0.2948, "step": 32665 }, { "epoch": 3.321065473769825, "grad_norm": 0.2723952829837799, "learning_rate": 3.0589940059738143e-06, "loss": 0.3122, "step": 32666 }, { "epoch": 3.321167141114274, "grad_norm": 0.2624450623989105, "learning_rate": 3.0586669569412108e-06, "loss": 0.3356, "step": 32667 }, { "epoch": 3.321268808458723, "grad_norm": 0.2762676775455475, "learning_rate": 3.0583399176885843e-06, "loss": 0.2926, "step": 32668 }, { "epoch": 3.321370475803172, "grad_norm": 0.29482704401016235, "learning_rate": 3.0580128882175764e-06, "loss": 0.2882, "step": 32669 }, { "epoch": 3.321472143147621, "grad_norm": 0.26033732295036316, "learning_rate": 3.0576858685298394e-06, "loss": 0.3315, "step": 32670 }, { "epoch": 3.32157381049207, "grad_norm": 0.27222341299057007, "learning_rate": 3.0573588586270197e-06, "loss": 0.3217, "step": 32671 }, { "epoch": 3.3216754778365187, "grad_norm": 0.2706219553947449, "learning_rate": 3.0570318585107637e-06, "loss": 0.312, "step": 32672 }, { "epoch": 3.3217771451809677, "grad_norm": 0.28409355878829956, "learning_rate": 3.056704868182722e-06, "loss": 0.309, "step": 32673 }, { "epoch": 3.3218788125254166, "grad_norm": 0.28873389959335327, "learning_rate": 3.056377887644536e-06, "loss": 0.3055, "step": 32674 }, { "epoch": 3.3219804798698656, "grad_norm": 0.2635495960712433, "learning_rate": 3.0560509168978576e-06, "loss": 0.3072, "step": 32675 }, { "epoch": 3.322082147214315, "grad_norm": 0.27367955446243286, "learning_rate": 3.0557239559443337e-06, "loss": 0.324, "step": 32676 }, { "epoch": 3.322183814558764, "grad_norm": 0.2674145996570587, "learning_rate": 3.0553970047856092e-06, "loss": 0.3095, "step": 32677 }, { "epoch": 3.322285481903213, "grad_norm": 0.27493590116500854, "learning_rate": 3.0550700634233345e-06, "loss": 0.3159, "step": 32678 }, { "epoch": 3.3223871492476618, "grad_norm": 0.26602789759635925, "learning_rate": 3.054743131859152e-06, "loss": 0.2805, "step": 32679 }, { "epoch": 3.3224888165921107, "grad_norm": 0.2709624767303467, "learning_rate": 3.0544162100947115e-06, "loss": 0.3266, "step": 32680 }, { "epoch": 3.3225904839365596, "grad_norm": 0.2545872926712036, "learning_rate": 3.054089298131663e-06, "loss": 0.3044, "step": 32681 }, { "epoch": 3.3226921512810086, "grad_norm": 0.26858434081077576, "learning_rate": 3.0537623959716467e-06, "loss": 0.2746, "step": 32682 }, { "epoch": 3.3227938186254575, "grad_norm": 0.2508670687675476, "learning_rate": 3.0534355036163156e-06, "loss": 0.2814, "step": 32683 }, { "epoch": 3.3228954859699065, "grad_norm": 0.27255159616470337, "learning_rate": 3.053108621067311e-06, "loss": 0.3145, "step": 32684 }, { "epoch": 3.3229971533143554, "grad_norm": 0.3548126518726349, "learning_rate": 3.0527817483262835e-06, "loss": 0.2891, "step": 32685 }, { "epoch": 3.3230988206588044, "grad_norm": 0.2574043571949005, "learning_rate": 3.052454885394881e-06, "loss": 0.329, "step": 32686 }, { "epoch": 3.3232004880032533, "grad_norm": 0.27901652455329895, "learning_rate": 3.052128032274745e-06, "loss": 0.2805, "step": 32687 }, { "epoch": 3.3233021553477022, "grad_norm": 0.25192931294441223, "learning_rate": 3.0518011889675283e-06, "loss": 0.3019, "step": 32688 }, { "epoch": 3.323403822692151, "grad_norm": 0.2925317883491516, "learning_rate": 3.0514743554748706e-06, "loss": 0.2931, "step": 32689 }, { "epoch": 3.3235054900366, "grad_norm": 0.2814478278160095, "learning_rate": 3.051147531798423e-06, "loss": 0.3072, "step": 32690 }, { "epoch": 3.323607157381049, "grad_norm": 0.2797596752643585, "learning_rate": 3.0508207179398336e-06, "loss": 0.3203, "step": 32691 }, { "epoch": 3.323708824725498, "grad_norm": 0.2634052634239197, "learning_rate": 3.050493913900743e-06, "loss": 0.3176, "step": 32692 }, { "epoch": 3.323810492069947, "grad_norm": 0.2852574288845062, "learning_rate": 3.050167119682804e-06, "loss": 0.3183, "step": 32693 }, { "epoch": 3.3239121594143963, "grad_norm": 0.2675072252750397, "learning_rate": 3.049840335287656e-06, "loss": 0.2917, "step": 32694 }, { "epoch": 3.3240138267588453, "grad_norm": 0.312149316072464, "learning_rate": 3.0495135607169505e-06, "loss": 0.3083, "step": 32695 }, { "epoch": 3.324115494103294, "grad_norm": 0.2799187898635864, "learning_rate": 3.049186795972334e-06, "loss": 0.3273, "step": 32696 }, { "epoch": 3.324217161447743, "grad_norm": 0.27382272481918335, "learning_rate": 3.048860041055448e-06, "loss": 0.3287, "step": 32697 }, { "epoch": 3.324318828792192, "grad_norm": 0.26583439111709595, "learning_rate": 3.0485332959679447e-06, "loss": 0.3129, "step": 32698 }, { "epoch": 3.324420496136641, "grad_norm": 0.2782919406890869, "learning_rate": 3.048206560711464e-06, "loss": 0.3262, "step": 32699 }, { "epoch": 3.32452216348109, "grad_norm": 0.2820512056350708, "learning_rate": 3.0478798352876552e-06, "loss": 0.3178, "step": 32700 }, { "epoch": 3.324623830825539, "grad_norm": 0.28225842118263245, "learning_rate": 3.0475531196981666e-06, "loss": 0.2932, "step": 32701 }, { "epoch": 3.324725498169988, "grad_norm": 0.284595251083374, "learning_rate": 3.0472264139446376e-06, "loss": 0.2922, "step": 32702 }, { "epoch": 3.324827165514437, "grad_norm": 0.28589850664138794, "learning_rate": 3.0468997180287212e-06, "loss": 0.288, "step": 32703 }, { "epoch": 3.3249288328588857, "grad_norm": 0.27557969093322754, "learning_rate": 3.046573031952058e-06, "loss": 0.2982, "step": 32704 }, { "epoch": 3.3250305002033347, "grad_norm": 0.27701491117477417, "learning_rate": 3.0462463557162945e-06, "loss": 0.2693, "step": 32705 }, { "epoch": 3.3251321675477836, "grad_norm": 0.26918599009513855, "learning_rate": 3.0459196893230813e-06, "loss": 0.2714, "step": 32706 }, { "epoch": 3.3252338348922326, "grad_norm": 0.29581502079963684, "learning_rate": 3.0455930327740562e-06, "loss": 0.3143, "step": 32707 }, { "epoch": 3.3253355022366815, "grad_norm": 0.3010563850402832, "learning_rate": 3.045266386070873e-06, "loss": 0.3037, "step": 32708 }, { "epoch": 3.3254371695811304, "grad_norm": 0.2713366746902466, "learning_rate": 3.0449397492151704e-06, "loss": 0.3049, "step": 32709 }, { "epoch": 3.3255388369255794, "grad_norm": 0.3002770245075226, "learning_rate": 3.0446131222085963e-06, "loss": 0.2858, "step": 32710 }, { "epoch": 3.3256405042700283, "grad_norm": 0.2646563649177551, "learning_rate": 3.0442865050527992e-06, "loss": 0.3007, "step": 32711 }, { "epoch": 3.3257421716144773, "grad_norm": 0.3110574781894684, "learning_rate": 3.043959897749419e-06, "loss": 0.3023, "step": 32712 }, { "epoch": 3.325843838958926, "grad_norm": 0.30300113558769226, "learning_rate": 3.0436333003001072e-06, "loss": 0.2839, "step": 32713 }, { "epoch": 3.325945506303375, "grad_norm": 0.28142049908638, "learning_rate": 3.043306712706504e-06, "loss": 0.3354, "step": 32714 }, { "epoch": 3.326047173647824, "grad_norm": 0.2636106610298157, "learning_rate": 3.0429801349702556e-06, "loss": 0.31, "step": 32715 }, { "epoch": 3.326148840992273, "grad_norm": 0.29178857803344727, "learning_rate": 3.0426535670930113e-06, "loss": 0.3178, "step": 32716 }, { "epoch": 3.3262505083367224, "grad_norm": 0.2753678858280182, "learning_rate": 3.0423270090764097e-06, "loss": 0.3397, "step": 32717 }, { "epoch": 3.3263521756811714, "grad_norm": 0.2776792049407959, "learning_rate": 3.0420004609221028e-06, "loss": 0.3441, "step": 32718 }, { "epoch": 3.3264538430256203, "grad_norm": 0.27533119916915894, "learning_rate": 3.0416739226317304e-06, "loss": 0.3011, "step": 32719 }, { "epoch": 3.3265555103700692, "grad_norm": 0.25839659571647644, "learning_rate": 3.0413473942069375e-06, "loss": 0.3155, "step": 32720 }, { "epoch": 3.326657177714518, "grad_norm": 0.29554450511932373, "learning_rate": 3.0410208756493755e-06, "loss": 0.2952, "step": 32721 }, { "epoch": 3.326758845058967, "grad_norm": 0.2603740990161896, "learning_rate": 3.0406943669606815e-06, "loss": 0.3429, "step": 32722 }, { "epoch": 3.326860512403416, "grad_norm": 0.2777480185031891, "learning_rate": 3.0403678681425053e-06, "loss": 0.3281, "step": 32723 }, { "epoch": 3.326962179747865, "grad_norm": 0.27051496505737305, "learning_rate": 3.0400413791964888e-06, "loss": 0.3205, "step": 32724 }, { "epoch": 3.327063847092314, "grad_norm": 0.2795027792453766, "learning_rate": 3.0397149001242777e-06, "loss": 0.3061, "step": 32725 }, { "epoch": 3.327165514436763, "grad_norm": 0.26732003688812256, "learning_rate": 3.039388430927519e-06, "loss": 0.2854, "step": 32726 }, { "epoch": 3.327267181781212, "grad_norm": 0.2785322070121765, "learning_rate": 3.039061971607854e-06, "loss": 0.3325, "step": 32727 }, { "epoch": 3.3273688491256608, "grad_norm": 0.2930796444416046, "learning_rate": 3.03873552216693e-06, "loss": 0.3511, "step": 32728 }, { "epoch": 3.3274705164701097, "grad_norm": 0.27561578154563904, "learning_rate": 3.0384090826063893e-06, "loss": 0.2883, "step": 32729 }, { "epoch": 3.3275721838145587, "grad_norm": 0.2537941634654999, "learning_rate": 3.0380826529278762e-06, "loss": 0.3065, "step": 32730 }, { "epoch": 3.3276738511590076, "grad_norm": 0.3016860783100128, "learning_rate": 3.03775623313304e-06, "loss": 0.2943, "step": 32731 }, { "epoch": 3.3277755185034565, "grad_norm": 0.2700863182544708, "learning_rate": 3.0374298232235195e-06, "loss": 0.2969, "step": 32732 }, { "epoch": 3.3278771858479055, "grad_norm": 0.2793479561805725, "learning_rate": 3.0371034232009623e-06, "loss": 0.2946, "step": 32733 }, { "epoch": 3.3279788531923544, "grad_norm": 0.29402032494544983, "learning_rate": 3.03677703306701e-06, "loss": 0.3226, "step": 32734 }, { "epoch": 3.328080520536804, "grad_norm": 0.2648288905620575, "learning_rate": 3.0364506528233078e-06, "loss": 0.2938, "step": 32735 }, { "epoch": 3.3281821878812528, "grad_norm": 0.27233508229255676, "learning_rate": 3.0361242824715032e-06, "loss": 0.3018, "step": 32736 }, { "epoch": 3.3282838552257017, "grad_norm": 0.27831000089645386, "learning_rate": 3.0357979220132363e-06, "loss": 0.2869, "step": 32737 }, { "epoch": 3.3283855225701506, "grad_norm": 0.2638612687587738, "learning_rate": 3.035471571450154e-06, "loss": 0.2915, "step": 32738 }, { "epoch": 3.3284871899145996, "grad_norm": 0.26860710978507996, "learning_rate": 3.0351452307838976e-06, "loss": 0.3285, "step": 32739 }, { "epoch": 3.3285888572590485, "grad_norm": 0.2693324685096741, "learning_rate": 3.034818900016112e-06, "loss": 0.314, "step": 32740 }, { "epoch": 3.3286905246034975, "grad_norm": 0.2885573208332062, "learning_rate": 3.0344925791484436e-06, "loss": 0.2963, "step": 32741 }, { "epoch": 3.3287921919479464, "grad_norm": 0.25879618525505066, "learning_rate": 3.0341662681825336e-06, "loss": 0.3041, "step": 32742 }, { "epoch": 3.3288938592923953, "grad_norm": 0.2823515236377716, "learning_rate": 3.0338399671200274e-06, "loss": 0.3, "step": 32743 }, { "epoch": 3.3289955266368443, "grad_norm": 0.26331713795661926, "learning_rate": 3.0335136759625674e-06, "loss": 0.2992, "step": 32744 }, { "epoch": 3.3290971939812932, "grad_norm": 0.2900259792804718, "learning_rate": 3.0331873947117985e-06, "loss": 0.3087, "step": 32745 }, { "epoch": 3.329198861325742, "grad_norm": 0.26638132333755493, "learning_rate": 3.0328611233693643e-06, "loss": 0.3257, "step": 32746 }, { "epoch": 3.329300528670191, "grad_norm": 0.2604162395000458, "learning_rate": 3.0325348619369077e-06, "loss": 0.3039, "step": 32747 }, { "epoch": 3.32940219601464, "grad_norm": 0.29201051592826843, "learning_rate": 3.032208610416074e-06, "loss": 0.3171, "step": 32748 }, { "epoch": 3.329503863359089, "grad_norm": 0.2625811994075775, "learning_rate": 3.0318823688085045e-06, "loss": 0.2963, "step": 32749 }, { "epoch": 3.329605530703538, "grad_norm": 0.2684252858161926, "learning_rate": 3.031556137115844e-06, "loss": 0.3221, "step": 32750 }, { "epoch": 3.329707198047987, "grad_norm": 0.289596825838089, "learning_rate": 3.0312299153397374e-06, "loss": 0.3172, "step": 32751 }, { "epoch": 3.329808865392436, "grad_norm": 0.2767433524131775, "learning_rate": 3.0309037034818254e-06, "loss": 0.3343, "step": 32752 }, { "epoch": 3.3299105327368848, "grad_norm": 0.29857009649276733, "learning_rate": 3.0305775015437535e-06, "loss": 0.2956, "step": 32753 }, { "epoch": 3.3300122000813337, "grad_norm": 0.2577795386314392, "learning_rate": 3.030251309527163e-06, "loss": 0.3036, "step": 32754 }, { "epoch": 3.3301138674257826, "grad_norm": 0.28552111983299255, "learning_rate": 3.0299251274336984e-06, "loss": 0.3203, "step": 32755 }, { "epoch": 3.3302155347702316, "grad_norm": 0.2704761028289795, "learning_rate": 3.029598955265004e-06, "loss": 0.3364, "step": 32756 }, { "epoch": 3.3303172021146805, "grad_norm": 0.29531019926071167, "learning_rate": 3.0292727930227206e-06, "loss": 0.3045, "step": 32757 }, { "epoch": 3.33041886945913, "grad_norm": 0.23905259370803833, "learning_rate": 3.028946640708493e-06, "loss": 0.31, "step": 32758 }, { "epoch": 3.330520536803579, "grad_norm": 0.26882901787757874, "learning_rate": 3.0286204983239644e-06, "loss": 0.3236, "step": 32759 }, { "epoch": 3.330622204148028, "grad_norm": 0.2864897549152374, "learning_rate": 3.0282943658707758e-06, "loss": 0.3404, "step": 32760 }, { "epoch": 3.3307238714924767, "grad_norm": 0.2756411135196686, "learning_rate": 3.027968243350573e-06, "loss": 0.3102, "step": 32761 }, { "epoch": 3.3308255388369257, "grad_norm": 0.25423845648765564, "learning_rate": 3.027642130764997e-06, "loss": 0.3406, "step": 32762 }, { "epoch": 3.3309272061813746, "grad_norm": 0.2911470830440521, "learning_rate": 3.0273160281156923e-06, "loss": 0.3065, "step": 32763 }, { "epoch": 3.3310288735258236, "grad_norm": 0.27367085218429565, "learning_rate": 3.0269899354042986e-06, "loss": 0.3323, "step": 32764 }, { "epoch": 3.3311305408702725, "grad_norm": 0.2744009494781494, "learning_rate": 3.026663852632462e-06, "loss": 0.3388, "step": 32765 }, { "epoch": 3.3312322082147214, "grad_norm": 0.2518783211708069, "learning_rate": 3.0263377798018243e-06, "loss": 0.3017, "step": 32766 }, { "epoch": 3.3313338755591704, "grad_norm": 0.2741674482822418, "learning_rate": 3.026011716914027e-06, "loss": 0.312, "step": 32767 }, { "epoch": 3.3314355429036193, "grad_norm": 0.2729804813861847, "learning_rate": 3.0256856639707144e-06, "loss": 0.3066, "step": 32768 }, { "epoch": 3.3315372102480683, "grad_norm": 0.2694733738899231, "learning_rate": 3.0253596209735275e-06, "loss": 0.3073, "step": 32769 }, { "epoch": 3.331638877592517, "grad_norm": 0.27128472924232483, "learning_rate": 3.0250335879241106e-06, "loss": 0.2937, "step": 32770 }, { "epoch": 3.331740544936966, "grad_norm": 0.29192283749580383, "learning_rate": 3.024707564824104e-06, "loss": 0.2844, "step": 32771 }, { "epoch": 3.331842212281415, "grad_norm": 0.29601654410362244, "learning_rate": 3.0243815516751516e-06, "loss": 0.3096, "step": 32772 }, { "epoch": 3.331943879625864, "grad_norm": 0.28567221760749817, "learning_rate": 3.024055548478896e-06, "loss": 0.3088, "step": 32773 }, { "epoch": 3.332045546970313, "grad_norm": 0.2655237019062042, "learning_rate": 3.0237295552369783e-06, "loss": 0.2766, "step": 32774 }, { "epoch": 3.332147214314762, "grad_norm": 0.27794843912124634, "learning_rate": 3.0234035719510423e-06, "loss": 0.2999, "step": 32775 }, { "epoch": 3.3322488816592113, "grad_norm": 0.2936716675758362, "learning_rate": 3.023077598622729e-06, "loss": 0.2988, "step": 32776 }, { "epoch": 3.3323505490036602, "grad_norm": 0.2775089740753174, "learning_rate": 3.02275163525368e-06, "loss": 0.2871, "step": 32777 }, { "epoch": 3.332452216348109, "grad_norm": 0.28439393639564514, "learning_rate": 3.02242568184554e-06, "loss": 0.3106, "step": 32778 }, { "epoch": 3.332553883692558, "grad_norm": 0.27315568923950195, "learning_rate": 3.0220997383999486e-06, "loss": 0.3164, "step": 32779 }, { "epoch": 3.332655551037007, "grad_norm": 0.2640717327594757, "learning_rate": 3.021773804918549e-06, "loss": 0.3221, "step": 32780 }, { "epoch": 3.332757218381456, "grad_norm": 0.26426464319229126, "learning_rate": 3.021447881402982e-06, "loss": 0.3019, "step": 32781 }, { "epoch": 3.332858885725905, "grad_norm": 0.2575305700302124, "learning_rate": 3.021121967854891e-06, "loss": 0.3131, "step": 32782 }, { "epoch": 3.332960553070354, "grad_norm": 0.2971656024456024, "learning_rate": 3.020796064275918e-06, "loss": 0.3362, "step": 32783 }, { "epoch": 3.333062220414803, "grad_norm": 0.292964905500412, "learning_rate": 3.020470170667703e-06, "loss": 0.2669, "step": 32784 }, { "epoch": 3.3331638877592518, "grad_norm": 0.2785819470882416, "learning_rate": 3.02014428703189e-06, "loss": 0.3015, "step": 32785 }, { "epoch": 3.3332655551037007, "grad_norm": 0.27118706703186035, "learning_rate": 3.019818413370118e-06, "loss": 0.3283, "step": 32786 }, { "epoch": 3.3333672224481496, "grad_norm": 0.25209057331085205, "learning_rate": 3.0194925496840312e-06, "loss": 0.2925, "step": 32787 }, { "epoch": 3.3334688897925986, "grad_norm": 0.28192973136901855, "learning_rate": 3.0191666959752706e-06, "loss": 0.3283, "step": 32788 }, { "epoch": 3.3335705571370475, "grad_norm": 0.26521265506744385, "learning_rate": 3.018840852245477e-06, "loss": 0.3106, "step": 32789 }, { "epoch": 3.3336722244814965, "grad_norm": 0.2995818853378296, "learning_rate": 3.018515018496293e-06, "loss": 0.3234, "step": 32790 }, { "epoch": 3.3337738918259454, "grad_norm": 0.2877037823200226, "learning_rate": 3.0181891947293586e-06, "loss": 0.34, "step": 32791 }, { "epoch": 3.3338755591703944, "grad_norm": 0.2723434567451477, "learning_rate": 3.0178633809463163e-06, "loss": 0.3251, "step": 32792 }, { "epoch": 3.3339772265148433, "grad_norm": 0.26170727610588074, "learning_rate": 3.017537577148808e-06, "loss": 0.2966, "step": 32793 }, { "epoch": 3.3340788938592922, "grad_norm": 0.2971840500831604, "learning_rate": 3.0172117833384733e-06, "loss": 0.3112, "step": 32794 }, { "epoch": 3.334180561203741, "grad_norm": 0.29693061113357544, "learning_rate": 3.0168859995169552e-06, "loss": 0.3256, "step": 32795 }, { "epoch": 3.33428222854819, "grad_norm": 0.29452160000801086, "learning_rate": 3.016560225685894e-06, "loss": 0.3193, "step": 32796 }, { "epoch": 3.334383895892639, "grad_norm": 0.28308746218681335, "learning_rate": 3.0162344618469304e-06, "loss": 0.2891, "step": 32797 }, { "epoch": 3.334485563237088, "grad_norm": 0.27971741557121277, "learning_rate": 3.015908708001707e-06, "loss": 0.3005, "step": 32798 }, { "epoch": 3.3345872305815374, "grad_norm": 0.2656491696834564, "learning_rate": 3.015582964151863e-06, "loss": 0.3019, "step": 32799 }, { "epoch": 3.3346888979259863, "grad_norm": 0.26563403010368347, "learning_rate": 3.0152572302990423e-06, "loss": 0.3134, "step": 32800 }, { "epoch": 3.3347905652704353, "grad_norm": 0.2830682694911957, "learning_rate": 3.0149315064448823e-06, "loss": 0.299, "step": 32801 }, { "epoch": 3.334892232614884, "grad_norm": 0.30202198028564453, "learning_rate": 3.014605792591026e-06, "loss": 0.3253, "step": 32802 }, { "epoch": 3.334993899959333, "grad_norm": 0.2805527150630951, "learning_rate": 3.0142800887391145e-06, "loss": 0.3376, "step": 32803 }, { "epoch": 3.335095567303782, "grad_norm": 0.289002388715744, "learning_rate": 3.0139543948907875e-06, "loss": 0.3228, "step": 32804 }, { "epoch": 3.335197234648231, "grad_norm": 0.28356632590293884, "learning_rate": 3.013628711047687e-06, "loss": 0.3256, "step": 32805 }, { "epoch": 3.33529890199268, "grad_norm": 0.3109247386455536, "learning_rate": 3.0133030372114524e-06, "loss": 0.3225, "step": 32806 }, { "epoch": 3.335400569337129, "grad_norm": 0.2774536907672882, "learning_rate": 3.0129773733837243e-06, "loss": 0.316, "step": 32807 }, { "epoch": 3.335502236681578, "grad_norm": 0.2990611791610718, "learning_rate": 3.0126517195661452e-06, "loss": 0.2993, "step": 32808 }, { "epoch": 3.335603904026027, "grad_norm": 0.28943097591400146, "learning_rate": 3.012326075760354e-06, "loss": 0.3233, "step": 32809 }, { "epoch": 3.3357055713704757, "grad_norm": 0.292883038520813, "learning_rate": 3.0120004419679926e-06, "loss": 0.3268, "step": 32810 }, { "epoch": 3.3358072387149247, "grad_norm": 0.28581348061561584, "learning_rate": 3.011674818190699e-06, "loss": 0.319, "step": 32811 }, { "epoch": 3.3359089060593736, "grad_norm": 0.2534119486808777, "learning_rate": 3.0113492044301153e-06, "loss": 0.3333, "step": 32812 }, { "epoch": 3.3360105734038226, "grad_norm": 0.24601271748542786, "learning_rate": 3.0110236006878836e-06, "loss": 0.306, "step": 32813 }, { "epoch": 3.3361122407482715, "grad_norm": 0.272111713886261, "learning_rate": 3.01069800696564e-06, "loss": 0.3276, "step": 32814 }, { "epoch": 3.3362139080927204, "grad_norm": 0.2766250967979431, "learning_rate": 3.01037242326503e-06, "loss": 0.3181, "step": 32815 }, { "epoch": 3.3363155754371694, "grad_norm": 0.29015159606933594, "learning_rate": 3.010046849587688e-06, "loss": 0.3133, "step": 32816 }, { "epoch": 3.3364172427816188, "grad_norm": 0.2637999653816223, "learning_rate": 3.0097212859352576e-06, "loss": 0.2974, "step": 32817 }, { "epoch": 3.3365189101260677, "grad_norm": 0.2561357617378235, "learning_rate": 3.0093957323093793e-06, "loss": 0.327, "step": 32818 }, { "epoch": 3.3366205774705167, "grad_norm": 0.27443185448646545, "learning_rate": 3.0090701887116915e-06, "loss": 0.3124, "step": 32819 }, { "epoch": 3.3367222448149656, "grad_norm": 0.27576178312301636, "learning_rate": 3.008744655143837e-06, "loss": 0.2894, "step": 32820 }, { "epoch": 3.3368239121594145, "grad_norm": 0.2839089035987854, "learning_rate": 3.008419131607451e-06, "loss": 0.3108, "step": 32821 }, { "epoch": 3.3369255795038635, "grad_norm": 0.2857576310634613, "learning_rate": 3.0080936181041766e-06, "loss": 0.3186, "step": 32822 }, { "epoch": 3.3370272468483124, "grad_norm": 0.27562761306762695, "learning_rate": 3.0077681146356546e-06, "loss": 0.3038, "step": 32823 }, { "epoch": 3.3371289141927614, "grad_norm": 0.2722741663455963, "learning_rate": 3.0074426212035222e-06, "loss": 0.3106, "step": 32824 }, { "epoch": 3.3372305815372103, "grad_norm": 0.2794138789176941, "learning_rate": 3.0071171378094223e-06, "loss": 0.297, "step": 32825 }, { "epoch": 3.3373322488816592, "grad_norm": 0.26835504174232483, "learning_rate": 3.00679166445499e-06, "loss": 0.3393, "step": 32826 }, { "epoch": 3.337433916226108, "grad_norm": 0.28078749775886536, "learning_rate": 3.0064662011418687e-06, "loss": 0.291, "step": 32827 }, { "epoch": 3.337535583570557, "grad_norm": 0.28896334767341614, "learning_rate": 3.0061407478716977e-06, "loss": 0.3284, "step": 32828 }, { "epoch": 3.337637250915006, "grad_norm": 0.2559005916118622, "learning_rate": 3.0058153046461147e-06, "loss": 0.3607, "step": 32829 }, { "epoch": 3.337738918259455, "grad_norm": 0.2505128085613251, "learning_rate": 3.0054898714667622e-06, "loss": 0.3349, "step": 32830 }, { "epoch": 3.337840585603904, "grad_norm": 0.26494577527046204, "learning_rate": 3.005164448335275e-06, "loss": 0.2844, "step": 32831 }, { "epoch": 3.337942252948353, "grad_norm": 0.2670428156852722, "learning_rate": 3.004839035253296e-06, "loss": 0.2991, "step": 32832 }, { "epoch": 3.338043920292802, "grad_norm": 0.26274892687797546, "learning_rate": 3.0045136322224665e-06, "loss": 0.3156, "step": 32833 }, { "epoch": 3.3381455876372508, "grad_norm": 0.2759246826171875, "learning_rate": 3.00418823924442e-06, "loss": 0.3555, "step": 32834 }, { "epoch": 3.3382472549816997, "grad_norm": 0.28877386450767517, "learning_rate": 3.003862856320801e-06, "loss": 0.3111, "step": 32835 }, { "epoch": 3.3383489223261487, "grad_norm": 0.2904678285121918, "learning_rate": 3.003537483453244e-06, "loss": 0.3189, "step": 32836 }, { "epoch": 3.3384505896705976, "grad_norm": 0.27203652262687683, "learning_rate": 3.0032121206433916e-06, "loss": 0.313, "step": 32837 }, { "epoch": 3.3385522570150465, "grad_norm": 0.3036970794200897, "learning_rate": 3.0028867678928846e-06, "loss": 0.2982, "step": 32838 }, { "epoch": 3.3386539243594955, "grad_norm": 0.25375837087631226, "learning_rate": 3.0025614252033565e-06, "loss": 0.3091, "step": 32839 }, { "epoch": 3.338755591703945, "grad_norm": 0.29436177015304565, "learning_rate": 3.0022360925764515e-06, "loss": 0.3222, "step": 32840 }, { "epoch": 3.338857259048394, "grad_norm": 0.2624620199203491, "learning_rate": 3.001910770013804e-06, "loss": 0.312, "step": 32841 }, { "epoch": 3.3389589263928428, "grad_norm": 0.30640774965286255, "learning_rate": 3.0015854575170554e-06, "loss": 0.3254, "step": 32842 }, { "epoch": 3.3390605937372917, "grad_norm": 0.27929404377937317, "learning_rate": 3.0012601550878474e-06, "loss": 0.3214, "step": 32843 }, { "epoch": 3.3391622610817406, "grad_norm": 0.286684513092041, "learning_rate": 3.000934862727812e-06, "loss": 0.3081, "step": 32844 }, { "epoch": 3.3392639284261896, "grad_norm": 0.2992926836013794, "learning_rate": 3.0006095804385955e-06, "loss": 0.3304, "step": 32845 }, { "epoch": 3.3393655957706385, "grad_norm": 0.2793304920196533, "learning_rate": 3.000284308221829e-06, "loss": 0.3029, "step": 32846 }, { "epoch": 3.3394672631150875, "grad_norm": 0.29373350739479065, "learning_rate": 2.999959046079156e-06, "loss": 0.3186, "step": 32847 }, { "epoch": 3.3395689304595364, "grad_norm": 0.28757166862487793, "learning_rate": 2.9996337940122168e-06, "loss": 0.305, "step": 32848 }, { "epoch": 3.3396705978039853, "grad_norm": 0.27103039622306824, "learning_rate": 2.999308552022644e-06, "loss": 0.3059, "step": 32849 }, { "epoch": 3.3397722651484343, "grad_norm": 0.2747822105884552, "learning_rate": 2.998983320112082e-06, "loss": 0.287, "step": 32850 }, { "epoch": 3.3398739324928832, "grad_norm": 0.30099567770957947, "learning_rate": 2.9986580982821632e-06, "loss": 0.3206, "step": 32851 }, { "epoch": 3.339975599837332, "grad_norm": 0.27979719638824463, "learning_rate": 2.9983328865345305e-06, "loss": 0.2801, "step": 32852 }, { "epoch": 3.340077267181781, "grad_norm": 0.2820754945278168, "learning_rate": 2.9980076848708234e-06, "loss": 0.3322, "step": 32853 }, { "epoch": 3.34017893452623, "grad_norm": 0.25604045391082764, "learning_rate": 2.9976824932926745e-06, "loss": 0.2955, "step": 32854 }, { "epoch": 3.340280601870679, "grad_norm": 0.26859867572784424, "learning_rate": 2.9973573118017287e-06, "loss": 0.3323, "step": 32855 }, { "epoch": 3.340382269215128, "grad_norm": 0.2647816836833954, "learning_rate": 2.9970321403996183e-06, "loss": 0.2943, "step": 32856 }, { "epoch": 3.340483936559577, "grad_norm": 0.2886108160018921, "learning_rate": 2.996706979087983e-06, "loss": 0.3017, "step": 32857 }, { "epoch": 3.3405856039040263, "grad_norm": 0.2769278585910797, "learning_rate": 2.9963818278684655e-06, "loss": 0.2885, "step": 32858 }, { "epoch": 3.340687271248475, "grad_norm": 0.2698970139026642, "learning_rate": 2.996056686742697e-06, "loss": 0.2771, "step": 32859 }, { "epoch": 3.340788938592924, "grad_norm": 0.2954525053501129, "learning_rate": 2.995731555712322e-06, "loss": 0.3234, "step": 32860 }, { "epoch": 3.340890605937373, "grad_norm": 0.29340627789497375, "learning_rate": 2.9954064347789726e-06, "loss": 0.3229, "step": 32861 }, { "epoch": 3.340992273281822, "grad_norm": 0.2832215130329132, "learning_rate": 2.9950813239442885e-06, "loss": 0.3427, "step": 32862 }, { "epoch": 3.341093940626271, "grad_norm": 0.28106939792633057, "learning_rate": 2.994756223209911e-06, "loss": 0.3079, "step": 32863 }, { "epoch": 3.34119560797072, "grad_norm": 0.2574532628059387, "learning_rate": 2.9944311325774723e-06, "loss": 0.3256, "step": 32864 }, { "epoch": 3.341297275315169, "grad_norm": 0.2891143560409546, "learning_rate": 2.9941060520486152e-06, "loss": 0.3322, "step": 32865 }, { "epoch": 3.341398942659618, "grad_norm": 0.2848242223262787, "learning_rate": 2.993780981624974e-06, "loss": 0.3132, "step": 32866 }, { "epoch": 3.3415006100040667, "grad_norm": 0.2840766906738281, "learning_rate": 2.993455921308187e-06, "loss": 0.3011, "step": 32867 }, { "epoch": 3.3416022773485157, "grad_norm": 0.30006173253059387, "learning_rate": 2.993130871099894e-06, "loss": 0.3021, "step": 32868 }, { "epoch": 3.3417039446929646, "grad_norm": 0.27743101119995117, "learning_rate": 2.9928058310017284e-06, "loss": 0.3311, "step": 32869 }, { "epoch": 3.3418056120374136, "grad_norm": 0.2709023952484131, "learning_rate": 2.9924808010153333e-06, "loss": 0.324, "step": 32870 }, { "epoch": 3.3419072793818625, "grad_norm": 0.2699425220489502, "learning_rate": 2.992155781142341e-06, "loss": 0.298, "step": 32871 }, { "epoch": 3.3420089467263114, "grad_norm": 0.24560412764549255, "learning_rate": 2.9918307713843884e-06, "loss": 0.3366, "step": 32872 }, { "epoch": 3.3421106140707604, "grad_norm": 0.27748820185661316, "learning_rate": 2.9915057717431193e-06, "loss": 0.3631, "step": 32873 }, { "epoch": 3.3422122814152093, "grad_norm": 0.2904931306838989, "learning_rate": 2.9911807822201644e-06, "loss": 0.3114, "step": 32874 }, { "epoch": 3.3423139487596583, "grad_norm": 0.28667446970939636, "learning_rate": 2.990855802817165e-06, "loss": 0.2864, "step": 32875 }, { "epoch": 3.342415616104107, "grad_norm": 0.28259095549583435, "learning_rate": 2.990530833535755e-06, "loss": 0.3071, "step": 32876 }, { "epoch": 3.342517283448556, "grad_norm": 0.2816690504550934, "learning_rate": 2.990205874377572e-06, "loss": 0.2994, "step": 32877 }, { "epoch": 3.342618950793005, "grad_norm": 0.28404152393341064, "learning_rate": 2.989880925344257e-06, "loss": 0.3035, "step": 32878 }, { "epoch": 3.342720618137454, "grad_norm": 0.28807201981544495, "learning_rate": 2.9895559864374433e-06, "loss": 0.3036, "step": 32879 }, { "epoch": 3.342822285481903, "grad_norm": 0.2856456935405731, "learning_rate": 2.9892310576587693e-06, "loss": 0.3039, "step": 32880 }, { "epoch": 3.3429239528263524, "grad_norm": 0.25830745697021484, "learning_rate": 2.98890613900987e-06, "loss": 0.2975, "step": 32881 }, { "epoch": 3.3430256201708013, "grad_norm": 0.25695446133613586, "learning_rate": 2.988581230492383e-06, "loss": 0.3334, "step": 32882 }, { "epoch": 3.3431272875152502, "grad_norm": 0.26526814699172974, "learning_rate": 2.9882563321079487e-06, "loss": 0.2672, "step": 32883 }, { "epoch": 3.343228954859699, "grad_norm": 0.2821130156517029, "learning_rate": 2.987931443858199e-06, "loss": 0.2842, "step": 32884 }, { "epoch": 3.343330622204148, "grad_norm": 0.279826283454895, "learning_rate": 2.987606565744773e-06, "loss": 0.3262, "step": 32885 }, { "epoch": 3.343432289548597, "grad_norm": 0.27677634358406067, "learning_rate": 2.987281697769307e-06, "loss": 0.3057, "step": 32886 }, { "epoch": 3.343533956893046, "grad_norm": 0.2777470052242279, "learning_rate": 2.986956839933436e-06, "loss": 0.2844, "step": 32887 }, { "epoch": 3.343635624237495, "grad_norm": 0.2741326689720154, "learning_rate": 2.986631992238801e-06, "loss": 0.3298, "step": 32888 }, { "epoch": 3.343737291581944, "grad_norm": 0.26404839754104614, "learning_rate": 2.9863071546870335e-06, "loss": 0.2872, "step": 32889 }, { "epoch": 3.343838958926393, "grad_norm": 0.2708466351032257, "learning_rate": 2.9859823272797727e-06, "loss": 0.2957, "step": 32890 }, { "epoch": 3.3439406262708418, "grad_norm": 0.27992597222328186, "learning_rate": 2.9856575100186537e-06, "loss": 0.3455, "step": 32891 }, { "epoch": 3.3440422936152907, "grad_norm": 0.2983301281929016, "learning_rate": 2.9853327029053138e-06, "loss": 0.3197, "step": 32892 }, { "epoch": 3.3441439609597396, "grad_norm": 0.2753089666366577, "learning_rate": 2.9850079059413894e-06, "loss": 0.3266, "step": 32893 }, { "epoch": 3.3442456283041886, "grad_norm": 0.28863221406936646, "learning_rate": 2.984683119128516e-06, "loss": 0.3076, "step": 32894 }, { "epoch": 3.3443472956486375, "grad_norm": 0.26473134756088257, "learning_rate": 2.984358342468331e-06, "loss": 0.2947, "step": 32895 }, { "epoch": 3.3444489629930865, "grad_norm": 0.26817402243614197, "learning_rate": 2.9840335759624684e-06, "loss": 0.3141, "step": 32896 }, { "epoch": 3.3445506303375354, "grad_norm": 0.2764768898487091, "learning_rate": 2.983708819612565e-06, "loss": 0.3086, "step": 32897 }, { "epoch": 3.3446522976819844, "grad_norm": 0.2678634524345398, "learning_rate": 2.983384073420259e-06, "loss": 0.3131, "step": 32898 }, { "epoch": 3.3447539650264337, "grad_norm": 0.28868329524993896, "learning_rate": 2.9830593373871843e-06, "loss": 0.343, "step": 32899 }, { "epoch": 3.3448556323708827, "grad_norm": 0.2778221070766449, "learning_rate": 2.982734611514978e-06, "loss": 0.3098, "step": 32900 }, { "epoch": 3.3449572997153316, "grad_norm": 0.2738820016384125, "learning_rate": 2.982409895805274e-06, "loss": 0.3115, "step": 32901 }, { "epoch": 3.3450589670597806, "grad_norm": 0.27954012155532837, "learning_rate": 2.9820851902597093e-06, "loss": 0.313, "step": 32902 }, { "epoch": 3.3451606344042295, "grad_norm": 0.2783470153808594, "learning_rate": 2.9817604948799216e-06, "loss": 0.3017, "step": 32903 }, { "epoch": 3.3452623017486784, "grad_norm": 0.2873532176017761, "learning_rate": 2.9814358096675434e-06, "loss": 0.2936, "step": 32904 }, { "epoch": 3.3453639690931274, "grad_norm": 0.278850793838501, "learning_rate": 2.9811111346242134e-06, "loss": 0.2996, "step": 32905 }, { "epoch": 3.3454656364375763, "grad_norm": 0.2615576386451721, "learning_rate": 2.980786469751564e-06, "loss": 0.274, "step": 32906 }, { "epoch": 3.3455673037820253, "grad_norm": 0.2876570522785187, "learning_rate": 2.9804618150512334e-06, "loss": 0.3218, "step": 32907 }, { "epoch": 3.345668971126474, "grad_norm": 0.2924605906009674, "learning_rate": 2.980137170524857e-06, "loss": 0.3265, "step": 32908 }, { "epoch": 3.345770638470923, "grad_norm": 0.2967449426651001, "learning_rate": 2.9798125361740682e-06, "loss": 0.2992, "step": 32909 }, { "epoch": 3.345872305815372, "grad_norm": 0.26735836267471313, "learning_rate": 2.979487912000505e-06, "loss": 0.2979, "step": 32910 }, { "epoch": 3.345973973159821, "grad_norm": 0.26618415117263794, "learning_rate": 2.9791632980058003e-06, "loss": 0.3028, "step": 32911 }, { "epoch": 3.34607564050427, "grad_norm": 0.2811529338359833, "learning_rate": 2.9788386941915914e-06, "loss": 0.2895, "step": 32912 }, { "epoch": 3.346177307848719, "grad_norm": 0.3046467900276184, "learning_rate": 2.978514100559513e-06, "loss": 0.3125, "step": 32913 }, { "epoch": 3.346278975193168, "grad_norm": 0.2814539968967438, "learning_rate": 2.9781895171111994e-06, "loss": 0.3272, "step": 32914 }, { "epoch": 3.346380642537617, "grad_norm": 0.29340022802352905, "learning_rate": 2.977864943848288e-06, "loss": 0.2957, "step": 32915 }, { "epoch": 3.3464823098820657, "grad_norm": 0.24563300609588623, "learning_rate": 2.9775403807724113e-06, "loss": 0.3245, "step": 32916 }, { "epoch": 3.3465839772265147, "grad_norm": 0.2684054970741272, "learning_rate": 2.9772158278852057e-06, "loss": 0.3589, "step": 32917 }, { "epoch": 3.3466856445709636, "grad_norm": 0.30103760957717896, "learning_rate": 2.976891285188307e-06, "loss": 0.3028, "step": 32918 }, { "epoch": 3.3467873119154126, "grad_norm": 0.2543784976005554, "learning_rate": 2.9765667526833473e-06, "loss": 0.3419, "step": 32919 }, { "epoch": 3.3468889792598615, "grad_norm": 0.2816865146160126, "learning_rate": 2.9762422303719657e-06, "loss": 0.2942, "step": 32920 }, { "epoch": 3.3469906466043104, "grad_norm": 0.2669464945793152, "learning_rate": 2.9759177182557932e-06, "loss": 0.3078, "step": 32921 }, { "epoch": 3.34709231394876, "grad_norm": 0.25768885016441345, "learning_rate": 2.9755932163364664e-06, "loss": 0.2997, "step": 32922 }, { "epoch": 3.3471939812932088, "grad_norm": 0.28391775488853455, "learning_rate": 2.9752687246156208e-06, "loss": 0.3035, "step": 32923 }, { "epoch": 3.3472956486376577, "grad_norm": 0.28514042496681213, "learning_rate": 2.9749442430948895e-06, "loss": 0.3205, "step": 32924 }, { "epoch": 3.3473973159821067, "grad_norm": 0.2734580338001251, "learning_rate": 2.9746197717759086e-06, "loss": 0.306, "step": 32925 }, { "epoch": 3.3474989833265556, "grad_norm": 0.2656095623970032, "learning_rate": 2.974295310660311e-06, "loss": 0.3231, "step": 32926 }, { "epoch": 3.3476006506710045, "grad_norm": 0.27324581146240234, "learning_rate": 2.9739708597497322e-06, "loss": 0.3173, "step": 32927 }, { "epoch": 3.3477023180154535, "grad_norm": 0.28305086493492126, "learning_rate": 2.973646419045808e-06, "loss": 0.278, "step": 32928 }, { "epoch": 3.3478039853599024, "grad_norm": 0.28437933325767517, "learning_rate": 2.973321988550171e-06, "loss": 0.2884, "step": 32929 }, { "epoch": 3.3479056527043514, "grad_norm": 0.25486838817596436, "learning_rate": 2.972997568264456e-06, "loss": 0.2957, "step": 32930 }, { "epoch": 3.3480073200488003, "grad_norm": 0.26192042231559753, "learning_rate": 2.972673158190298e-06, "loss": 0.2861, "step": 32931 }, { "epoch": 3.3481089873932492, "grad_norm": 0.2615462839603424, "learning_rate": 2.9723487583293297e-06, "loss": 0.3412, "step": 32932 }, { "epoch": 3.348210654737698, "grad_norm": 0.2703835666179657, "learning_rate": 2.972024368683188e-06, "loss": 0.332, "step": 32933 }, { "epoch": 3.348312322082147, "grad_norm": 0.2738982141017914, "learning_rate": 2.9716999892535044e-06, "loss": 0.2943, "step": 32934 }, { "epoch": 3.348413989426596, "grad_norm": 0.2659933567047119, "learning_rate": 2.971375620041915e-06, "loss": 0.3087, "step": 32935 }, { "epoch": 3.348515656771045, "grad_norm": 0.2678174674510956, "learning_rate": 2.9710512610500532e-06, "loss": 0.3243, "step": 32936 }, { "epoch": 3.348617324115494, "grad_norm": 0.2980378270149231, "learning_rate": 2.970726912279552e-06, "loss": 0.3476, "step": 32937 }, { "epoch": 3.348718991459943, "grad_norm": 0.26866665482521057, "learning_rate": 2.9704025737320473e-06, "loss": 0.3009, "step": 32938 }, { "epoch": 3.348820658804392, "grad_norm": 0.28918060660362244, "learning_rate": 2.970078245409172e-06, "loss": 0.3029, "step": 32939 }, { "epoch": 3.348922326148841, "grad_norm": 0.2855791747570038, "learning_rate": 2.969753927312561e-06, "loss": 0.2966, "step": 32940 }, { "epoch": 3.34902399349329, "grad_norm": 0.3060361444950104, "learning_rate": 2.9694296194438456e-06, "loss": 0.2806, "step": 32941 }, { "epoch": 3.349125660837739, "grad_norm": 0.2816508412361145, "learning_rate": 2.9691053218046616e-06, "loss": 0.3558, "step": 32942 }, { "epoch": 3.349227328182188, "grad_norm": 0.2962517738342285, "learning_rate": 2.968781034396643e-06, "loss": 0.3428, "step": 32943 }, { "epoch": 3.349328995526637, "grad_norm": 0.26640793681144714, "learning_rate": 2.9684567572214222e-06, "loss": 0.3306, "step": 32944 }, { "epoch": 3.349430662871086, "grad_norm": 0.2860300838947296, "learning_rate": 2.9681324902806346e-06, "loss": 0.3308, "step": 32945 }, { "epoch": 3.349532330215535, "grad_norm": 0.30491915345191956, "learning_rate": 2.9678082335759116e-06, "loss": 0.277, "step": 32946 }, { "epoch": 3.349633997559984, "grad_norm": 0.2660216689109802, "learning_rate": 2.9674839871088878e-06, "loss": 0.3168, "step": 32947 }, { "epoch": 3.3497356649044328, "grad_norm": 0.2724463939666748, "learning_rate": 2.9671597508811976e-06, "loss": 0.3145, "step": 32948 }, { "epoch": 3.3498373322488817, "grad_norm": 0.2950277328491211, "learning_rate": 2.9668355248944725e-06, "loss": 0.349, "step": 32949 }, { "epoch": 3.3499389995933306, "grad_norm": 0.2794724702835083, "learning_rate": 2.9665113091503476e-06, "loss": 0.2993, "step": 32950 }, { "epoch": 3.3500406669377796, "grad_norm": 0.28391599655151367, "learning_rate": 2.966187103650455e-06, "loss": 0.2984, "step": 32951 }, { "epoch": 3.3501423342822285, "grad_norm": 0.2837064266204834, "learning_rate": 2.965862908396428e-06, "loss": 0.2824, "step": 32952 }, { "epoch": 3.3502440016266775, "grad_norm": 0.2820422649383545, "learning_rate": 2.9655387233899013e-06, "loss": 0.3242, "step": 32953 }, { "epoch": 3.3503456689711264, "grad_norm": 0.24844269454479218, "learning_rate": 2.965214548632507e-06, "loss": 0.2859, "step": 32954 }, { "epoch": 3.3504473363155753, "grad_norm": 0.2664334177970886, "learning_rate": 2.964890384125878e-06, "loss": 0.354, "step": 32955 }, { "epoch": 3.3505490036600243, "grad_norm": 0.29890525341033936, "learning_rate": 2.964566229871647e-06, "loss": 0.3067, "step": 32956 }, { "epoch": 3.3506506710044732, "grad_norm": 0.26800796389579773, "learning_rate": 2.9642420858714495e-06, "loss": 0.3133, "step": 32957 }, { "epoch": 3.350752338348922, "grad_norm": 0.2517856955528259, "learning_rate": 2.9639179521269148e-06, "loss": 0.2866, "step": 32958 }, { "epoch": 3.350854005693371, "grad_norm": 0.28398019075393677, "learning_rate": 2.963593828639677e-06, "loss": 0.2934, "step": 32959 }, { "epoch": 3.35095567303782, "grad_norm": 0.2811794579029083, "learning_rate": 2.9632697154113717e-06, "loss": 0.3106, "step": 32960 }, { "epoch": 3.351057340382269, "grad_norm": 0.2616135776042938, "learning_rate": 2.962945612443628e-06, "loss": 0.3491, "step": 32961 }, { "epoch": 3.351159007726718, "grad_norm": 0.28876736760139465, "learning_rate": 2.9626215197380813e-06, "loss": 0.3147, "step": 32962 }, { "epoch": 3.3512606750711673, "grad_norm": 0.2836851179599762, "learning_rate": 2.962297437296362e-06, "loss": 0.3006, "step": 32963 }, { "epoch": 3.3513623424156163, "grad_norm": 0.25969135761260986, "learning_rate": 2.9619733651201042e-06, "loss": 0.3537, "step": 32964 }, { "epoch": 3.351464009760065, "grad_norm": 0.2659086287021637, "learning_rate": 2.9616493032109417e-06, "loss": 0.2861, "step": 32965 }, { "epoch": 3.351565677104514, "grad_norm": 0.2790471911430359, "learning_rate": 2.9613252515705044e-06, "loss": 0.2869, "step": 32966 }, { "epoch": 3.351667344448963, "grad_norm": 0.287274569272995, "learning_rate": 2.9610012102004284e-06, "loss": 0.2984, "step": 32967 }, { "epoch": 3.351769011793412, "grad_norm": 0.26237890124320984, "learning_rate": 2.9606771791023396e-06, "loss": 0.2983, "step": 32968 }, { "epoch": 3.351870679137861, "grad_norm": 0.26522842049598694, "learning_rate": 2.9603531582778765e-06, "loss": 0.3171, "step": 32969 }, { "epoch": 3.35197234648231, "grad_norm": 0.2629009485244751, "learning_rate": 2.9600291477286703e-06, "loss": 0.3137, "step": 32970 }, { "epoch": 3.352074013826759, "grad_norm": 0.2766724228858948, "learning_rate": 2.959705147456351e-06, "loss": 0.3402, "step": 32971 }, { "epoch": 3.352175681171208, "grad_norm": 0.2753167450428009, "learning_rate": 2.9593811574625547e-06, "loss": 0.2926, "step": 32972 }, { "epoch": 3.3522773485156567, "grad_norm": 0.27324944734573364, "learning_rate": 2.9590571777489086e-06, "loss": 0.29, "step": 32973 }, { "epoch": 3.3523790158601057, "grad_norm": 0.28176799416542053, "learning_rate": 2.9587332083170477e-06, "loss": 0.2962, "step": 32974 }, { "epoch": 3.3524806832045546, "grad_norm": 0.2697564363479614, "learning_rate": 2.9584092491686056e-06, "loss": 0.3164, "step": 32975 }, { "epoch": 3.3525823505490036, "grad_norm": 0.297067254781723, "learning_rate": 2.9580853003052107e-06, "loss": 0.2919, "step": 32976 }, { "epoch": 3.3526840178934525, "grad_norm": 0.2816926836967468, "learning_rate": 2.9577613617284994e-06, "loss": 0.3059, "step": 32977 }, { "epoch": 3.3527856852379014, "grad_norm": 0.26958999037742615, "learning_rate": 2.957437433440098e-06, "loss": 0.2946, "step": 32978 }, { "epoch": 3.3528873525823504, "grad_norm": 0.3002736568450928, "learning_rate": 2.9571135154416417e-06, "loss": 0.3155, "step": 32979 }, { "epoch": 3.3529890199267993, "grad_norm": 0.2619341015815735, "learning_rate": 2.956789607734764e-06, "loss": 0.331, "step": 32980 }, { "epoch": 3.3530906872712487, "grad_norm": 0.26944881677627563, "learning_rate": 2.9564657103210936e-06, "loss": 0.2885, "step": 32981 }, { "epoch": 3.3531923546156976, "grad_norm": 0.26164698600769043, "learning_rate": 2.956141823202265e-06, "loss": 0.3258, "step": 32982 }, { "epoch": 3.3532940219601466, "grad_norm": 0.2912639081478119, "learning_rate": 2.955817946379905e-06, "loss": 0.2914, "step": 32983 }, { "epoch": 3.3533956893045955, "grad_norm": 0.28368452191352844, "learning_rate": 2.9554940798556495e-06, "loss": 0.2931, "step": 32984 }, { "epoch": 3.3534973566490445, "grad_norm": 0.26918235421180725, "learning_rate": 2.955170223631132e-06, "loss": 0.3242, "step": 32985 }, { "epoch": 3.3535990239934934, "grad_norm": 0.3110280930995941, "learning_rate": 2.9548463777079765e-06, "loss": 0.3177, "step": 32986 }, { "epoch": 3.3537006913379424, "grad_norm": 0.2776343524456024, "learning_rate": 2.954522542087823e-06, "loss": 0.3136, "step": 32987 }, { "epoch": 3.3538023586823913, "grad_norm": 0.28251755237579346, "learning_rate": 2.954198716772295e-06, "loss": 0.3179, "step": 32988 }, { "epoch": 3.3539040260268402, "grad_norm": 0.2835734486579895, "learning_rate": 2.95387490176303e-06, "loss": 0.3068, "step": 32989 }, { "epoch": 3.354005693371289, "grad_norm": 0.2920664846897125, "learning_rate": 2.9535510970616576e-06, "loss": 0.2935, "step": 32990 }, { "epoch": 3.354107360715738, "grad_norm": 0.28963759541511536, "learning_rate": 2.9532273026698062e-06, "loss": 0.3121, "step": 32991 }, { "epoch": 3.354209028060187, "grad_norm": 0.28588515520095825, "learning_rate": 2.952903518589112e-06, "loss": 0.2974, "step": 32992 }, { "epoch": 3.354310695404636, "grad_norm": 0.2651834189891815, "learning_rate": 2.9525797448212007e-06, "loss": 0.3177, "step": 32993 }, { "epoch": 3.354412362749085, "grad_norm": 0.26985931396484375, "learning_rate": 2.9522559813677067e-06, "loss": 0.3115, "step": 32994 }, { "epoch": 3.354514030093534, "grad_norm": 0.2986661493778229, "learning_rate": 2.951932228230263e-06, "loss": 0.3092, "step": 32995 }, { "epoch": 3.354615697437983, "grad_norm": 0.28018632531166077, "learning_rate": 2.951608485410494e-06, "loss": 0.2931, "step": 32996 }, { "epoch": 3.3547173647824318, "grad_norm": 0.2823295295238495, "learning_rate": 2.9512847529100384e-06, "loss": 0.319, "step": 32997 }, { "epoch": 3.3548190321268807, "grad_norm": 0.2962145507335663, "learning_rate": 2.9509610307305207e-06, "loss": 0.2727, "step": 32998 }, { "epoch": 3.3549206994713296, "grad_norm": 0.28645962476730347, "learning_rate": 2.950637318873575e-06, "loss": 0.3134, "step": 32999 }, { "epoch": 3.3550223668157786, "grad_norm": 0.2727099657058716, "learning_rate": 2.9503136173408335e-06, "loss": 0.2914, "step": 33000 }, { "epoch": 3.3551240341602275, "grad_norm": 0.266317218542099, "learning_rate": 2.949989926133922e-06, "loss": 0.319, "step": 33001 }, { "epoch": 3.3552257015046765, "grad_norm": 0.2809111773967743, "learning_rate": 2.9496662452544772e-06, "loss": 0.3066, "step": 33002 }, { "epoch": 3.3553273688491254, "grad_norm": 0.24677111208438873, "learning_rate": 2.949342574704125e-06, "loss": 0.3191, "step": 33003 }, { "epoch": 3.355429036193575, "grad_norm": 0.26845645904541016, "learning_rate": 2.9490189144844955e-06, "loss": 0.3322, "step": 33004 }, { "epoch": 3.3555307035380237, "grad_norm": 0.2910224199295044, "learning_rate": 2.948695264597225e-06, "loss": 0.2847, "step": 33005 }, { "epoch": 3.3556323708824727, "grad_norm": 0.26268336176872253, "learning_rate": 2.948371625043937e-06, "loss": 0.2981, "step": 33006 }, { "epoch": 3.3557340382269216, "grad_norm": 0.28193196654319763, "learning_rate": 2.9480479958262685e-06, "loss": 0.2844, "step": 33007 }, { "epoch": 3.3558357055713706, "grad_norm": 0.2643698453903198, "learning_rate": 2.947724376945844e-06, "loss": 0.3131, "step": 33008 }, { "epoch": 3.3559373729158195, "grad_norm": 0.27203160524368286, "learning_rate": 2.947400768404296e-06, "loss": 0.3291, "step": 33009 }, { "epoch": 3.3560390402602684, "grad_norm": 0.279678076505661, "learning_rate": 2.9470771702032584e-06, "loss": 0.299, "step": 33010 }, { "epoch": 3.3561407076047174, "grad_norm": 0.2886331379413605, "learning_rate": 2.9467535823443547e-06, "loss": 0.3184, "step": 33011 }, { "epoch": 3.3562423749491663, "grad_norm": 0.2552437484264374, "learning_rate": 2.9464300048292214e-06, "loss": 0.3375, "step": 33012 }, { "epoch": 3.3563440422936153, "grad_norm": 0.28423169255256653, "learning_rate": 2.9461064376594838e-06, "loss": 0.3056, "step": 33013 }, { "epoch": 3.356445709638064, "grad_norm": 0.2711731791496277, "learning_rate": 2.945782880836774e-06, "loss": 0.3253, "step": 33014 }, { "epoch": 3.356547376982513, "grad_norm": 0.26814553141593933, "learning_rate": 2.9454593343627237e-06, "loss": 0.3304, "step": 33015 }, { "epoch": 3.356649044326962, "grad_norm": 0.27360519766807556, "learning_rate": 2.945135798238958e-06, "loss": 0.2885, "step": 33016 }, { "epoch": 3.356750711671411, "grad_norm": 0.2816157937049866, "learning_rate": 2.944812272467113e-06, "loss": 0.317, "step": 33017 }, { "epoch": 3.35685237901586, "grad_norm": 0.29502731561660767, "learning_rate": 2.9444887570488134e-06, "loss": 0.2932, "step": 33018 }, { "epoch": 3.356954046360309, "grad_norm": 0.27807167172431946, "learning_rate": 2.94416525198569e-06, "loss": 0.2912, "step": 33019 }, { "epoch": 3.357055713704758, "grad_norm": 0.2803068459033966, "learning_rate": 2.9438417572793763e-06, "loss": 0.2704, "step": 33020 }, { "epoch": 3.357157381049207, "grad_norm": 0.25156912207603455, "learning_rate": 2.943518272931497e-06, "loss": 0.3119, "step": 33021 }, { "epoch": 3.357259048393656, "grad_norm": 0.27874964475631714, "learning_rate": 2.9431947989436866e-06, "loss": 0.2805, "step": 33022 }, { "epoch": 3.357360715738105, "grad_norm": 0.28166165947914124, "learning_rate": 2.9428713353175697e-06, "loss": 0.2959, "step": 33023 }, { "epoch": 3.357462383082554, "grad_norm": 0.29718589782714844, "learning_rate": 2.9425478820547772e-06, "loss": 0.3291, "step": 33024 }, { "epoch": 3.357564050427003, "grad_norm": 0.2654953598976135, "learning_rate": 2.9422244391569427e-06, "loss": 0.321, "step": 33025 }, { "epoch": 3.357665717771452, "grad_norm": 0.2655298113822937, "learning_rate": 2.9419010066256903e-06, "loss": 0.312, "step": 33026 }, { "epoch": 3.357767385115901, "grad_norm": 0.2669369876384735, "learning_rate": 2.9415775844626527e-06, "loss": 0.296, "step": 33027 }, { "epoch": 3.35786905246035, "grad_norm": 0.29161956906318665, "learning_rate": 2.941254172669458e-06, "loss": 0.325, "step": 33028 }, { "epoch": 3.3579707198047988, "grad_norm": 0.29533734917640686, "learning_rate": 2.940930771247733e-06, "loss": 0.3408, "step": 33029 }, { "epoch": 3.3580723871492477, "grad_norm": 0.29251211881637573, "learning_rate": 2.9406073801991132e-06, "loss": 0.3105, "step": 33030 }, { "epoch": 3.3581740544936967, "grad_norm": 0.2830948233604431, "learning_rate": 2.940283999525222e-06, "loss": 0.3125, "step": 33031 }, { "epoch": 3.3582757218381456, "grad_norm": 0.2817302346229553, "learning_rate": 2.939960629227691e-06, "loss": 0.2885, "step": 33032 }, { "epoch": 3.3583773891825945, "grad_norm": 0.26694539189338684, "learning_rate": 2.9396372693081487e-06, "loss": 0.3047, "step": 33033 }, { "epoch": 3.3584790565270435, "grad_norm": 0.30152764916419983, "learning_rate": 2.9393139197682226e-06, "loss": 0.2754, "step": 33034 }, { "epoch": 3.3585807238714924, "grad_norm": 0.28129246830940247, "learning_rate": 2.9389905806095464e-06, "loss": 0.3126, "step": 33035 }, { "epoch": 3.3586823912159414, "grad_norm": 0.2667907476425171, "learning_rate": 2.938667251833744e-06, "loss": 0.2986, "step": 33036 }, { "epoch": 3.3587840585603903, "grad_norm": 0.25181150436401367, "learning_rate": 2.9383439334424467e-06, "loss": 0.3415, "step": 33037 }, { "epoch": 3.3588857259048392, "grad_norm": 0.2642247974872589, "learning_rate": 2.9380206254372823e-06, "loss": 0.3367, "step": 33038 }, { "epoch": 3.358987393249288, "grad_norm": 0.2852182388305664, "learning_rate": 2.9376973278198788e-06, "loss": 0.3307, "step": 33039 }, { "epoch": 3.359089060593737, "grad_norm": 0.29645079374313354, "learning_rate": 2.9373740405918692e-06, "loss": 0.2967, "step": 33040 }, { "epoch": 3.359190727938186, "grad_norm": 0.2885383069515228, "learning_rate": 2.9370507637548775e-06, "loss": 0.297, "step": 33041 }, { "epoch": 3.359292395282635, "grad_norm": 0.28068897128105164, "learning_rate": 2.9367274973105336e-06, "loss": 0.3417, "step": 33042 }, { "epoch": 3.359394062627084, "grad_norm": 0.28693848848342896, "learning_rate": 2.936404241260466e-06, "loss": 0.3153, "step": 33043 }, { "epoch": 3.359495729971533, "grad_norm": 0.29107171297073364, "learning_rate": 2.936080995606303e-06, "loss": 0.2814, "step": 33044 }, { "epoch": 3.3595973973159823, "grad_norm": 0.26838207244873047, "learning_rate": 2.9357577603496755e-06, "loss": 0.3301, "step": 33045 }, { "epoch": 3.3596990646604312, "grad_norm": 0.2778245210647583, "learning_rate": 2.9354345354922078e-06, "loss": 0.3404, "step": 33046 }, { "epoch": 3.35980073200488, "grad_norm": 0.27273622155189514, "learning_rate": 2.935111321035532e-06, "loss": 0.3186, "step": 33047 }, { "epoch": 3.359902399349329, "grad_norm": 0.2880294620990753, "learning_rate": 2.934788116981273e-06, "loss": 0.3299, "step": 33048 }, { "epoch": 3.360004066693778, "grad_norm": 0.2797060012817383, "learning_rate": 2.934464923331061e-06, "loss": 0.278, "step": 33049 }, { "epoch": 3.360105734038227, "grad_norm": 0.28880566358566284, "learning_rate": 2.934141740086525e-06, "loss": 0.275, "step": 33050 }, { "epoch": 3.360207401382676, "grad_norm": 0.283746600151062, "learning_rate": 2.9338185672492914e-06, "loss": 0.3053, "step": 33051 }, { "epoch": 3.360309068727125, "grad_norm": 0.2754814922809601, "learning_rate": 2.933495404820989e-06, "loss": 0.3082, "step": 33052 }, { "epoch": 3.360410736071574, "grad_norm": 0.30274108052253723, "learning_rate": 2.9331722528032454e-06, "loss": 0.332, "step": 33053 }, { "epoch": 3.3605124034160228, "grad_norm": 0.290500670671463, "learning_rate": 2.9328491111976882e-06, "loss": 0.3369, "step": 33054 }, { "epoch": 3.3606140707604717, "grad_norm": 0.25729572772979736, "learning_rate": 2.932525980005947e-06, "loss": 0.3295, "step": 33055 }, { "epoch": 3.3607157381049206, "grad_norm": 0.2787579894065857, "learning_rate": 2.932202859229648e-06, "loss": 0.2961, "step": 33056 }, { "epoch": 3.3608174054493696, "grad_norm": 0.2865389585494995, "learning_rate": 2.931879748870421e-06, "loss": 0.3091, "step": 33057 }, { "epoch": 3.3609190727938185, "grad_norm": 0.2936701476573944, "learning_rate": 2.931556648929891e-06, "loss": 0.3091, "step": 33058 }, { "epoch": 3.3610207401382675, "grad_norm": 0.2738337814807892, "learning_rate": 2.9312335594096864e-06, "loss": 0.2996, "step": 33059 }, { "epoch": 3.3611224074827164, "grad_norm": 0.27605870366096497, "learning_rate": 2.9309104803114367e-06, "loss": 0.3104, "step": 33060 }, { "epoch": 3.3612240748271653, "grad_norm": 0.2653178870677948, "learning_rate": 2.9305874116367677e-06, "loss": 0.3007, "step": 33061 }, { "epoch": 3.3613257421716143, "grad_norm": 0.2799533009529114, "learning_rate": 2.9302643533873087e-06, "loss": 0.3076, "step": 33062 }, { "epoch": 3.3614274095160637, "grad_norm": 0.26790928840637207, "learning_rate": 2.9299413055646846e-06, "loss": 0.3224, "step": 33063 }, { "epoch": 3.3615290768605126, "grad_norm": 0.2733088731765747, "learning_rate": 2.9296182681705243e-06, "loss": 0.3156, "step": 33064 }, { "epoch": 3.3616307442049616, "grad_norm": 0.25768449902534485, "learning_rate": 2.9292952412064567e-06, "loss": 0.3054, "step": 33065 }, { "epoch": 3.3617324115494105, "grad_norm": 0.24733267724514008, "learning_rate": 2.928972224674106e-06, "loss": 0.2772, "step": 33066 }, { "epoch": 3.3618340788938594, "grad_norm": 0.27179887890815735, "learning_rate": 2.9286492185751026e-06, "loss": 0.2914, "step": 33067 }, { "epoch": 3.3619357462383084, "grad_norm": 0.2634279727935791, "learning_rate": 2.9283262229110708e-06, "loss": 0.2799, "step": 33068 }, { "epoch": 3.3620374135827573, "grad_norm": 0.26419660449028015, "learning_rate": 2.9280032376836394e-06, "loss": 0.3409, "step": 33069 }, { "epoch": 3.3621390809272063, "grad_norm": 0.2593405842781067, "learning_rate": 2.927680262894436e-06, "loss": 0.2983, "step": 33070 }, { "epoch": 3.362240748271655, "grad_norm": 0.2860717177391052, "learning_rate": 2.927357298545086e-06, "loss": 0.3383, "step": 33071 }, { "epoch": 3.362342415616104, "grad_norm": 0.2891457676887512, "learning_rate": 2.9270343446372184e-06, "loss": 0.2994, "step": 33072 }, { "epoch": 3.362444082960553, "grad_norm": 0.29221367835998535, "learning_rate": 2.9267114011724585e-06, "loss": 0.3061, "step": 33073 }, { "epoch": 3.362545750305002, "grad_norm": 0.2722666263580322, "learning_rate": 2.926388468152434e-06, "loss": 0.3205, "step": 33074 }, { "epoch": 3.362647417649451, "grad_norm": 0.2876659035682678, "learning_rate": 2.926065545578772e-06, "loss": 0.3125, "step": 33075 }, { "epoch": 3.3627490849939, "grad_norm": 0.2695639133453369, "learning_rate": 2.9257426334530984e-06, "loss": 0.2843, "step": 33076 }, { "epoch": 3.362850752338349, "grad_norm": 0.24434593319892883, "learning_rate": 2.9254197317770414e-06, "loss": 0.3218, "step": 33077 }, { "epoch": 3.362952419682798, "grad_norm": 0.2604147493839264, "learning_rate": 2.9250968405522262e-06, "loss": 0.2916, "step": 33078 }, { "epoch": 3.3630540870272467, "grad_norm": 0.27552857995033264, "learning_rate": 2.924773959780279e-06, "loss": 0.2864, "step": 33079 }, { "epoch": 3.3631557543716957, "grad_norm": 0.27365079522132874, "learning_rate": 2.92445108946283e-06, "loss": 0.3211, "step": 33080 }, { "epoch": 3.3632574217161446, "grad_norm": 0.2828676402568817, "learning_rate": 2.9241282296015004e-06, "loss": 0.3226, "step": 33081 }, { "epoch": 3.3633590890605936, "grad_norm": 0.25915977358818054, "learning_rate": 2.9238053801979214e-06, "loss": 0.3033, "step": 33082 }, { "epoch": 3.3634607564050425, "grad_norm": 0.2664998173713684, "learning_rate": 2.923482541253717e-06, "loss": 0.3098, "step": 33083 }, { "epoch": 3.3635624237494914, "grad_norm": 0.2848305404186249, "learning_rate": 2.9231597127705135e-06, "loss": 0.3013, "step": 33084 }, { "epoch": 3.3636640910939404, "grad_norm": 0.27556705474853516, "learning_rate": 2.9228368947499384e-06, "loss": 0.3063, "step": 33085 }, { "epoch": 3.3637657584383898, "grad_norm": 0.2616112530231476, "learning_rate": 2.9225140871936177e-06, "loss": 0.3092, "step": 33086 }, { "epoch": 3.3638674257828387, "grad_norm": 0.2688230574131012, "learning_rate": 2.9221912901031768e-06, "loss": 0.2932, "step": 33087 }, { "epoch": 3.3639690931272876, "grad_norm": 0.26802048087120056, "learning_rate": 2.9218685034802424e-06, "loss": 0.2905, "step": 33088 }, { "epoch": 3.3640707604717366, "grad_norm": 0.2756115794181824, "learning_rate": 2.9215457273264404e-06, "loss": 0.2896, "step": 33089 }, { "epoch": 3.3641724278161855, "grad_norm": 0.2775658667087555, "learning_rate": 2.9212229616433984e-06, "loss": 0.3049, "step": 33090 }, { "epoch": 3.3642740951606345, "grad_norm": 0.2777037024497986, "learning_rate": 2.9209002064327398e-06, "loss": 0.3149, "step": 33091 }, { "epoch": 3.3643757625050834, "grad_norm": 0.2810532748699188, "learning_rate": 2.9205774616960924e-06, "loss": 0.2935, "step": 33092 }, { "epoch": 3.3644774298495324, "grad_norm": 0.27227210998535156, "learning_rate": 2.920254727435081e-06, "loss": 0.3147, "step": 33093 }, { "epoch": 3.3645790971939813, "grad_norm": 0.2706008553504944, "learning_rate": 2.9199320036513323e-06, "loss": 0.3169, "step": 33094 }, { "epoch": 3.3646807645384302, "grad_norm": 0.2616179585456848, "learning_rate": 2.9196092903464733e-06, "loss": 0.3025, "step": 33095 }, { "epoch": 3.364782431882879, "grad_norm": 0.2722613513469696, "learning_rate": 2.9192865875221267e-06, "loss": 0.3201, "step": 33096 }, { "epoch": 3.364884099227328, "grad_norm": 0.2663850486278534, "learning_rate": 2.9189638951799216e-06, "loss": 0.2594, "step": 33097 }, { "epoch": 3.364985766571777, "grad_norm": 0.26642942428588867, "learning_rate": 2.9186412133214793e-06, "loss": 0.3125, "step": 33098 }, { "epoch": 3.365087433916226, "grad_norm": 0.2665925920009613, "learning_rate": 2.9183185419484278e-06, "loss": 0.3438, "step": 33099 }, { "epoch": 3.365189101260675, "grad_norm": 0.281974732875824, "learning_rate": 2.9179958810623963e-06, "loss": 0.3203, "step": 33100 }, { "epoch": 3.365290768605124, "grad_norm": 0.2982996702194214, "learning_rate": 2.9176732306650036e-06, "loss": 0.3039, "step": 33101 }, { "epoch": 3.365392435949573, "grad_norm": 0.31674623489379883, "learning_rate": 2.91735059075788e-06, "loss": 0.3211, "step": 33102 }, { "epoch": 3.3654941032940218, "grad_norm": 0.26517003774642944, "learning_rate": 2.9170279613426493e-06, "loss": 0.3186, "step": 33103 }, { "epoch": 3.365595770638471, "grad_norm": 0.27044886350631714, "learning_rate": 2.9167053424209348e-06, "loss": 0.3067, "step": 33104 }, { "epoch": 3.36569743798292, "grad_norm": 0.26654312014579773, "learning_rate": 2.9163827339943655e-06, "loss": 0.3235, "step": 33105 }, { "epoch": 3.365799105327369, "grad_norm": 0.3176705241203308, "learning_rate": 2.9160601360645635e-06, "loss": 0.3084, "step": 33106 }, { "epoch": 3.365900772671818, "grad_norm": 0.296842098236084, "learning_rate": 2.9157375486331585e-06, "loss": 0.284, "step": 33107 }, { "epoch": 3.366002440016267, "grad_norm": 0.2647637128829956, "learning_rate": 2.9154149717017688e-06, "loss": 0.3325, "step": 33108 }, { "epoch": 3.366104107360716, "grad_norm": 0.26717397570610046, "learning_rate": 2.915092405272023e-06, "loss": 0.3253, "step": 33109 }, { "epoch": 3.366205774705165, "grad_norm": 0.2692270278930664, "learning_rate": 2.91476984934555e-06, "loss": 0.3126, "step": 33110 }, { "epoch": 3.3663074420496137, "grad_norm": 0.25615620613098145, "learning_rate": 2.9144473039239674e-06, "loss": 0.2866, "step": 33111 }, { "epoch": 3.3664091093940627, "grad_norm": 0.2842867970466614, "learning_rate": 2.9141247690089054e-06, "loss": 0.285, "step": 33112 }, { "epoch": 3.3665107767385116, "grad_norm": 0.26984769105911255, "learning_rate": 2.913802244601987e-06, "loss": 0.3088, "step": 33113 }, { "epoch": 3.3666124440829606, "grad_norm": 0.2853338420391083, "learning_rate": 2.9134797307048357e-06, "loss": 0.308, "step": 33114 }, { "epoch": 3.3667141114274095, "grad_norm": 0.28618189692497253, "learning_rate": 2.913157227319079e-06, "loss": 0.2995, "step": 33115 }, { "epoch": 3.3668157787718584, "grad_norm": 0.2529822289943695, "learning_rate": 2.9128347344463405e-06, "loss": 0.304, "step": 33116 }, { "epoch": 3.3669174461163074, "grad_norm": 0.2665573060512543, "learning_rate": 2.9125122520882448e-06, "loss": 0.3139, "step": 33117 }, { "epoch": 3.3670191134607563, "grad_norm": 0.2842145562171936, "learning_rate": 2.9121897802464138e-06, "loss": 0.3207, "step": 33118 }, { "epoch": 3.3671207808052053, "grad_norm": 0.2764987349510193, "learning_rate": 2.9118673189224755e-06, "loss": 0.3185, "step": 33119 }, { "epoch": 3.367222448149654, "grad_norm": 0.28632619976997375, "learning_rate": 2.911544868118057e-06, "loss": 0.2889, "step": 33120 }, { "epoch": 3.367324115494103, "grad_norm": 0.28344815969467163, "learning_rate": 2.9112224278347746e-06, "loss": 0.3079, "step": 33121 }, { "epoch": 3.367425782838552, "grad_norm": 0.2750939726829529, "learning_rate": 2.9108999980742595e-06, "loss": 0.3087, "step": 33122 }, { "epoch": 3.367527450183001, "grad_norm": 0.2550063729286194, "learning_rate": 2.910577578838134e-06, "loss": 0.2883, "step": 33123 }, { "epoch": 3.36762911752745, "grad_norm": 0.28066152334213257, "learning_rate": 2.9102551701280203e-06, "loss": 0.2934, "step": 33124 }, { "epoch": 3.367730784871899, "grad_norm": 0.28400781750679016, "learning_rate": 2.909932771945546e-06, "loss": 0.3204, "step": 33125 }, { "epoch": 3.367832452216348, "grad_norm": 0.27097630500793457, "learning_rate": 2.909610384292334e-06, "loss": 0.3353, "step": 33126 }, { "epoch": 3.3679341195607972, "grad_norm": 0.2837918996810913, "learning_rate": 2.909288007170008e-06, "loss": 0.3337, "step": 33127 }, { "epoch": 3.368035786905246, "grad_norm": 0.2731300890445709, "learning_rate": 2.9089656405801908e-06, "loss": 0.3051, "step": 33128 }, { "epoch": 3.368137454249695, "grad_norm": 0.26793965697288513, "learning_rate": 2.9086432845245094e-06, "loss": 0.3051, "step": 33129 }, { "epoch": 3.368239121594144, "grad_norm": 0.26690545678138733, "learning_rate": 2.908320939004586e-06, "loss": 0.3269, "step": 33130 }, { "epoch": 3.368340788938593, "grad_norm": 0.27316272258758545, "learning_rate": 2.9079986040220427e-06, "loss": 0.2886, "step": 33131 }, { "epoch": 3.368442456283042, "grad_norm": 0.2722700536251068, "learning_rate": 2.9076762795785076e-06, "loss": 0.2999, "step": 33132 }, { "epoch": 3.368544123627491, "grad_norm": 0.2748056650161743, "learning_rate": 2.907353965675601e-06, "loss": 0.2949, "step": 33133 }, { "epoch": 3.36864579097194, "grad_norm": 0.25561827421188354, "learning_rate": 2.907031662314947e-06, "loss": 0.3029, "step": 33134 }, { "epoch": 3.3687474583163888, "grad_norm": 0.2764038145542145, "learning_rate": 2.906709369498172e-06, "loss": 0.3032, "step": 33135 }, { "epoch": 3.3688491256608377, "grad_norm": 0.283130407333374, "learning_rate": 2.9063870872268974e-06, "loss": 0.2979, "step": 33136 }, { "epoch": 3.3689507930052867, "grad_norm": 0.2782739996910095, "learning_rate": 2.906064815502747e-06, "loss": 0.3153, "step": 33137 }, { "epoch": 3.3690524603497356, "grad_norm": 0.2873185873031616, "learning_rate": 2.905742554327343e-06, "loss": 0.311, "step": 33138 }, { "epoch": 3.3691541276941845, "grad_norm": 0.24606101214885712, "learning_rate": 2.905420303702312e-06, "loss": 0.2948, "step": 33139 }, { "epoch": 3.3692557950386335, "grad_norm": 0.27837252616882324, "learning_rate": 2.905098063629276e-06, "loss": 0.2949, "step": 33140 }, { "epoch": 3.3693574623830824, "grad_norm": 0.2855270206928253, "learning_rate": 2.9047758341098563e-06, "loss": 0.3218, "step": 33141 }, { "epoch": 3.3694591297275314, "grad_norm": 0.26081663370132446, "learning_rate": 2.9044536151456794e-06, "loss": 0.3054, "step": 33142 }, { "epoch": 3.3695607970719803, "grad_norm": 0.2588953673839569, "learning_rate": 2.9041314067383676e-06, "loss": 0.3151, "step": 33143 }, { "epoch": 3.3696624644164297, "grad_norm": 0.2886943817138672, "learning_rate": 2.903809208889542e-06, "loss": 0.3204, "step": 33144 }, { "epoch": 3.3697641317608786, "grad_norm": 0.25175055861473083, "learning_rate": 2.9034870216008293e-06, "loss": 0.3203, "step": 33145 }, { "epoch": 3.3698657991053276, "grad_norm": 0.26509150862693787, "learning_rate": 2.9031648448738504e-06, "loss": 0.3312, "step": 33146 }, { "epoch": 3.3699674664497765, "grad_norm": 0.2579781711101532, "learning_rate": 2.9028426787102295e-06, "loss": 0.3105, "step": 33147 }, { "epoch": 3.3700691337942255, "grad_norm": 0.2554425001144409, "learning_rate": 2.902520523111586e-06, "loss": 0.2874, "step": 33148 }, { "epoch": 3.3701708011386744, "grad_norm": 0.2835756540298462, "learning_rate": 2.9021983780795476e-06, "loss": 0.2989, "step": 33149 }, { "epoch": 3.3702724684831233, "grad_norm": 0.2734592854976654, "learning_rate": 2.9018762436157354e-06, "loss": 0.3511, "step": 33150 }, { "epoch": 3.3703741358275723, "grad_norm": 0.2773257791996002, "learning_rate": 2.9015541197217707e-06, "loss": 0.3383, "step": 33151 }, { "epoch": 3.3704758031720212, "grad_norm": 0.29057109355926514, "learning_rate": 2.901232006399279e-06, "loss": 0.3089, "step": 33152 }, { "epoch": 3.37057747051647, "grad_norm": 0.29636457562446594, "learning_rate": 2.9009099036498823e-06, "loss": 0.304, "step": 33153 }, { "epoch": 3.370679137860919, "grad_norm": 0.26958027482032776, "learning_rate": 2.9005878114752017e-06, "loss": 0.3261, "step": 33154 }, { "epoch": 3.370780805205368, "grad_norm": 0.2711831033229828, "learning_rate": 2.90026572987686e-06, "loss": 0.304, "step": 33155 }, { "epoch": 3.370882472549817, "grad_norm": 0.26950976252555847, "learning_rate": 2.899943658856482e-06, "loss": 0.3187, "step": 33156 }, { "epoch": 3.370984139894266, "grad_norm": 0.2695302665233612, "learning_rate": 2.899621598415689e-06, "loss": 0.3002, "step": 33157 }, { "epoch": 3.371085807238715, "grad_norm": 0.2571326494216919, "learning_rate": 2.899299548556101e-06, "loss": 0.3163, "step": 33158 }, { "epoch": 3.371187474583164, "grad_norm": 0.27399080991744995, "learning_rate": 2.898977509279345e-06, "loss": 0.2925, "step": 33159 }, { "epoch": 3.3712891419276128, "grad_norm": 0.28626549243927, "learning_rate": 2.8986554805870406e-06, "loss": 0.2983, "step": 33160 }, { "epoch": 3.3713908092720617, "grad_norm": 0.2927905023097992, "learning_rate": 2.8983334624808093e-06, "loss": 0.3235, "step": 33161 }, { "epoch": 3.3714924766165106, "grad_norm": 0.28258055448532104, "learning_rate": 2.898011454962275e-06, "loss": 0.3105, "step": 33162 }, { "epoch": 3.3715941439609596, "grad_norm": 0.25612205266952515, "learning_rate": 2.8976894580330604e-06, "loss": 0.3065, "step": 33163 }, { "epoch": 3.3716958113054085, "grad_norm": 0.29067468643188477, "learning_rate": 2.8973674716947875e-06, "loss": 0.3298, "step": 33164 }, { "epoch": 3.3717974786498575, "grad_norm": 0.2793939709663391, "learning_rate": 2.8970454959490745e-06, "loss": 0.3318, "step": 33165 }, { "epoch": 3.3718991459943064, "grad_norm": 0.3066602051258087, "learning_rate": 2.8967235307975494e-06, "loss": 0.3257, "step": 33166 }, { "epoch": 3.3720008133387553, "grad_norm": 0.28961610794067383, "learning_rate": 2.8964015762418306e-06, "loss": 0.3186, "step": 33167 }, { "epoch": 3.3721024806832047, "grad_norm": 0.3113084137439728, "learning_rate": 2.8960796322835395e-06, "loss": 0.3129, "step": 33168 }, { "epoch": 3.3722041480276537, "grad_norm": 0.2740395665168762, "learning_rate": 2.895757698924301e-06, "loss": 0.3307, "step": 33169 }, { "epoch": 3.3723058153721026, "grad_norm": 0.2657108008861542, "learning_rate": 2.8954357761657347e-06, "loss": 0.301, "step": 33170 }, { "epoch": 3.3724074827165516, "grad_norm": 0.26793813705444336, "learning_rate": 2.8951138640094613e-06, "loss": 0.3058, "step": 33171 }, { "epoch": 3.3725091500610005, "grad_norm": 0.292123407125473, "learning_rate": 2.8947919624571056e-06, "loss": 0.344, "step": 33172 }, { "epoch": 3.3726108174054494, "grad_norm": 0.286349892616272, "learning_rate": 2.894470071510288e-06, "loss": 0.3303, "step": 33173 }, { "epoch": 3.3727124847498984, "grad_norm": 0.2986220121383667, "learning_rate": 2.8941481911706305e-06, "loss": 0.3333, "step": 33174 }, { "epoch": 3.3728141520943473, "grad_norm": 0.2557884752750397, "learning_rate": 2.893826321439751e-06, "loss": 0.3069, "step": 33175 }, { "epoch": 3.3729158194387963, "grad_norm": 0.287308007478714, "learning_rate": 2.8935044623192766e-06, "loss": 0.3131, "step": 33176 }, { "epoch": 3.373017486783245, "grad_norm": 0.2861648499965668, "learning_rate": 2.8931826138108253e-06, "loss": 0.3133, "step": 33177 }, { "epoch": 3.373119154127694, "grad_norm": 0.26491403579711914, "learning_rate": 2.8928607759160183e-06, "loss": 0.3093, "step": 33178 }, { "epoch": 3.373220821472143, "grad_norm": 0.3000796139240265, "learning_rate": 2.8925389486364797e-06, "loss": 0.3305, "step": 33179 }, { "epoch": 3.373322488816592, "grad_norm": 0.2747556269168854, "learning_rate": 2.892217131973829e-06, "loss": 0.3048, "step": 33180 }, { "epoch": 3.373424156161041, "grad_norm": 0.280030757188797, "learning_rate": 2.8918953259296857e-06, "loss": 0.2805, "step": 33181 }, { "epoch": 3.37352582350549, "grad_norm": 0.27024203538894653, "learning_rate": 2.8915735305056743e-06, "loss": 0.2997, "step": 33182 }, { "epoch": 3.373627490849939, "grad_norm": 0.2898506820201874, "learning_rate": 2.8912517457034146e-06, "loss": 0.311, "step": 33183 }, { "epoch": 3.373729158194388, "grad_norm": 0.2940741181373596, "learning_rate": 2.8909299715245274e-06, "loss": 0.3134, "step": 33184 }, { "epoch": 3.373830825538837, "grad_norm": 0.2611275017261505, "learning_rate": 2.890608207970632e-06, "loss": 0.2833, "step": 33185 }, { "epoch": 3.373932492883286, "grad_norm": 0.2828880250453949, "learning_rate": 2.890286455043353e-06, "loss": 0.3048, "step": 33186 }, { "epoch": 3.374034160227735, "grad_norm": 0.27901598811149597, "learning_rate": 2.8899647127443096e-06, "loss": 0.3185, "step": 33187 }, { "epoch": 3.374135827572184, "grad_norm": 0.2914411127567291, "learning_rate": 2.88964298107512e-06, "loss": 0.3167, "step": 33188 }, { "epoch": 3.374237494916633, "grad_norm": 0.29576098918914795, "learning_rate": 2.889321260037411e-06, "loss": 0.3071, "step": 33189 }, { "epoch": 3.374339162261082, "grad_norm": 0.28406640887260437, "learning_rate": 2.888999549632799e-06, "loss": 0.3103, "step": 33190 }, { "epoch": 3.374440829605531, "grad_norm": 0.3052644431591034, "learning_rate": 2.8886778498629038e-06, "loss": 0.3256, "step": 33191 }, { "epoch": 3.3745424969499798, "grad_norm": 0.2748745083808899, "learning_rate": 2.88835616072935e-06, "loss": 0.2929, "step": 33192 }, { "epoch": 3.3746441642944287, "grad_norm": 0.24611811339855194, "learning_rate": 2.8880344822337553e-06, "loss": 0.2893, "step": 33193 }, { "epoch": 3.3747458316388776, "grad_norm": 0.2769239544868469, "learning_rate": 2.8877128143777415e-06, "loss": 0.2842, "step": 33194 }, { "epoch": 3.3748474989833266, "grad_norm": 0.2847383916378021, "learning_rate": 2.8873911571629265e-06, "loss": 0.2779, "step": 33195 }, { "epoch": 3.3749491663277755, "grad_norm": 0.2741450369358063, "learning_rate": 2.887069510590935e-06, "loss": 0.3159, "step": 33196 }, { "epoch": 3.3750508336722245, "grad_norm": 0.2668033540248871, "learning_rate": 2.886747874663385e-06, "loss": 0.2983, "step": 33197 }, { "epoch": 3.3751525010166734, "grad_norm": 0.2658250033855438, "learning_rate": 2.8864262493818952e-06, "loss": 0.299, "step": 33198 }, { "epoch": 3.3752541683611224, "grad_norm": 0.275106817483902, "learning_rate": 2.8861046347480896e-06, "loss": 0.3165, "step": 33199 }, { "epoch": 3.3753558357055713, "grad_norm": 0.2776603102684021, "learning_rate": 2.885783030763587e-06, "loss": 0.294, "step": 33200 }, { "epoch": 3.3754575030500202, "grad_norm": 0.29846876859664917, "learning_rate": 2.8854614374300042e-06, "loss": 0.2847, "step": 33201 }, { "epoch": 3.375559170394469, "grad_norm": 0.2806585133075714, "learning_rate": 2.885139854748967e-06, "loss": 0.306, "step": 33202 }, { "epoch": 3.375660837738918, "grad_norm": 0.2769010663032532, "learning_rate": 2.8848182827220917e-06, "loss": 0.3333, "step": 33203 }, { "epoch": 3.375762505083367, "grad_norm": 0.2628043293952942, "learning_rate": 2.884496721351e-06, "loss": 0.3101, "step": 33204 }, { "epoch": 3.375864172427816, "grad_norm": 0.26320359110832214, "learning_rate": 2.884175170637309e-06, "loss": 0.3076, "step": 33205 }, { "epoch": 3.375965839772265, "grad_norm": 0.2747124433517456, "learning_rate": 2.8838536305826424e-06, "loss": 0.3141, "step": 33206 }, { "epoch": 3.376067507116714, "grad_norm": 0.24031701683998108, "learning_rate": 2.8835321011886184e-06, "loss": 0.3348, "step": 33207 }, { "epoch": 3.376169174461163, "grad_norm": 0.2674742639064789, "learning_rate": 2.8832105824568547e-06, "loss": 0.3258, "step": 33208 }, { "epoch": 3.376270841805612, "grad_norm": 0.28014880418777466, "learning_rate": 2.8828890743889778e-06, "loss": 0.3144, "step": 33209 }, { "epoch": 3.376372509150061, "grad_norm": 0.2624466121196747, "learning_rate": 2.882567576986598e-06, "loss": 0.2997, "step": 33210 }, { "epoch": 3.37647417649451, "grad_norm": 0.27961286902427673, "learning_rate": 2.8822460902513395e-06, "loss": 0.3295, "step": 33211 }, { "epoch": 3.376575843838959, "grad_norm": 0.2714087665081024, "learning_rate": 2.881924614184824e-06, "loss": 0.3375, "step": 33212 }, { "epoch": 3.376677511183408, "grad_norm": 0.27633169293403625, "learning_rate": 2.8816031487886686e-06, "loss": 0.3534, "step": 33213 }, { "epoch": 3.376779178527857, "grad_norm": 0.2941592037677765, "learning_rate": 2.881281694064494e-06, "loss": 0.2848, "step": 33214 }, { "epoch": 3.376880845872306, "grad_norm": 0.2640787959098816, "learning_rate": 2.880960250013916e-06, "loss": 0.3258, "step": 33215 }, { "epoch": 3.376982513216755, "grad_norm": 0.28063011169433594, "learning_rate": 2.880638816638559e-06, "loss": 0.3156, "step": 33216 }, { "epoch": 3.3770841805612037, "grad_norm": 0.29163792729377747, "learning_rate": 2.8803173939400398e-06, "loss": 0.2831, "step": 33217 }, { "epoch": 3.3771858479056527, "grad_norm": 0.2864081859588623, "learning_rate": 2.879995981919976e-06, "loss": 0.2851, "step": 33218 }, { "epoch": 3.3772875152501016, "grad_norm": 0.30444663763046265, "learning_rate": 2.879674580579993e-06, "loss": 0.3127, "step": 33219 }, { "epoch": 3.3773891825945506, "grad_norm": 0.26887762546539307, "learning_rate": 2.8793531899217013e-06, "loss": 0.266, "step": 33220 }, { "epoch": 3.3774908499389995, "grad_norm": 0.297802209854126, "learning_rate": 2.879031809946724e-06, "loss": 0.3087, "step": 33221 }, { "epoch": 3.3775925172834484, "grad_norm": 0.2722737789154053, "learning_rate": 2.8787104406566846e-06, "loss": 0.3304, "step": 33222 }, { "epoch": 3.3776941846278974, "grad_norm": 0.28278595209121704, "learning_rate": 2.8783890820531948e-06, "loss": 0.3324, "step": 33223 }, { "epoch": 3.3777958519723463, "grad_norm": 0.2687983512878418, "learning_rate": 2.878067734137877e-06, "loss": 0.3246, "step": 33224 }, { "epoch": 3.3778975193167953, "grad_norm": 0.30725136399269104, "learning_rate": 2.8777463969123487e-06, "loss": 0.3372, "step": 33225 }, { "epoch": 3.3779991866612447, "grad_norm": 0.25731363892555237, "learning_rate": 2.8774250703782314e-06, "loss": 0.3243, "step": 33226 }, { "epoch": 3.3781008540056936, "grad_norm": 0.26058056950569153, "learning_rate": 2.8771037545371428e-06, "loss": 0.2981, "step": 33227 }, { "epoch": 3.3782025213501425, "grad_norm": 0.2849327027797699, "learning_rate": 2.876782449390698e-06, "loss": 0.3085, "step": 33228 }, { "epoch": 3.3783041886945915, "grad_norm": 0.29744961857795715, "learning_rate": 2.876461154940524e-06, "loss": 0.3057, "step": 33229 }, { "epoch": 3.3784058560390404, "grad_norm": 0.30268895626068115, "learning_rate": 2.8761398711882295e-06, "loss": 0.3138, "step": 33230 }, { "epoch": 3.3785075233834894, "grad_norm": 0.2993411421775818, "learning_rate": 2.875818598135437e-06, "loss": 0.312, "step": 33231 }, { "epoch": 3.3786091907279383, "grad_norm": 0.27036187052726746, "learning_rate": 2.87549733578377e-06, "loss": 0.3457, "step": 33232 }, { "epoch": 3.3787108580723872, "grad_norm": 0.2540659010410309, "learning_rate": 2.8751760841348384e-06, "loss": 0.3314, "step": 33233 }, { "epoch": 3.378812525416836, "grad_norm": 0.27223899960517883, "learning_rate": 2.8748548431902673e-06, "loss": 0.326, "step": 33234 }, { "epoch": 3.378914192761285, "grad_norm": 0.27451291680336, "learning_rate": 2.87453361295167e-06, "loss": 0.3152, "step": 33235 }, { "epoch": 3.379015860105734, "grad_norm": 0.2772601842880249, "learning_rate": 2.8742123934206695e-06, "loss": 0.3127, "step": 33236 }, { "epoch": 3.379117527450183, "grad_norm": 0.27743056416511536, "learning_rate": 2.8738911845988814e-06, "loss": 0.2823, "step": 33237 }, { "epoch": 3.379219194794632, "grad_norm": 0.27493271231651306, "learning_rate": 2.873569986487923e-06, "loss": 0.3044, "step": 33238 }, { "epoch": 3.379320862139081, "grad_norm": 0.2776772379875183, "learning_rate": 2.873248799089417e-06, "loss": 0.3427, "step": 33239 }, { "epoch": 3.37942252948353, "grad_norm": 0.2643776834011078, "learning_rate": 2.8729276224049746e-06, "loss": 0.3348, "step": 33240 }, { "epoch": 3.3795241968279788, "grad_norm": 0.2627602815628052, "learning_rate": 2.8726064564362173e-06, "loss": 0.3086, "step": 33241 }, { "epoch": 3.3796258641724277, "grad_norm": 0.2769271433353424, "learning_rate": 2.8722853011847666e-06, "loss": 0.2889, "step": 33242 }, { "epoch": 3.3797275315168767, "grad_norm": 0.27266421914100647, "learning_rate": 2.8719641566522338e-06, "loss": 0.2761, "step": 33243 }, { "epoch": 3.3798291988613256, "grad_norm": 0.2716112732887268, "learning_rate": 2.8716430228402416e-06, "loss": 0.3225, "step": 33244 }, { "epoch": 3.3799308662057745, "grad_norm": 0.2755638062953949, "learning_rate": 2.8713218997504054e-06, "loss": 0.2946, "step": 33245 }, { "epoch": 3.3800325335502235, "grad_norm": 0.27644848823547363, "learning_rate": 2.8710007873843426e-06, "loss": 0.3217, "step": 33246 }, { "epoch": 3.3801342008946724, "grad_norm": 0.28598612546920776, "learning_rate": 2.870679685743673e-06, "loss": 0.3158, "step": 33247 }, { "epoch": 3.3802358682391214, "grad_norm": 0.2732824385166168, "learning_rate": 2.870358594830012e-06, "loss": 0.3033, "step": 33248 }, { "epoch": 3.3803375355835703, "grad_norm": 0.2578064203262329, "learning_rate": 2.8700375146449817e-06, "loss": 0.3337, "step": 33249 }, { "epoch": 3.3804392029280197, "grad_norm": 0.2708369195461273, "learning_rate": 2.869716445190193e-06, "loss": 0.2798, "step": 33250 }, { "epoch": 3.3805408702724686, "grad_norm": 0.28109118342399597, "learning_rate": 2.8693953864672653e-06, "loss": 0.3267, "step": 33251 }, { "epoch": 3.3806425376169176, "grad_norm": 0.266746461391449, "learning_rate": 2.869074338477822e-06, "loss": 0.3307, "step": 33252 }, { "epoch": 3.3807442049613665, "grad_norm": 0.27525725960731506, "learning_rate": 2.868753301223472e-06, "loss": 0.3024, "step": 33253 }, { "epoch": 3.3808458723058155, "grad_norm": 0.2710264325141907, "learning_rate": 2.868432274705838e-06, "loss": 0.3071, "step": 33254 }, { "epoch": 3.3809475396502644, "grad_norm": 0.2815299332141876, "learning_rate": 2.8681112589265364e-06, "loss": 0.3051, "step": 33255 }, { "epoch": 3.3810492069947133, "grad_norm": 0.2786039412021637, "learning_rate": 2.867790253887181e-06, "loss": 0.3142, "step": 33256 }, { "epoch": 3.3811508743391623, "grad_norm": 0.25891047716140747, "learning_rate": 2.867469259589394e-06, "loss": 0.251, "step": 33257 }, { "epoch": 3.3812525416836112, "grad_norm": 0.2572166323661804, "learning_rate": 2.867148276034788e-06, "loss": 0.3012, "step": 33258 }, { "epoch": 3.38135420902806, "grad_norm": 0.25920039415359497, "learning_rate": 2.866827303224986e-06, "loss": 0.3103, "step": 33259 }, { "epoch": 3.381455876372509, "grad_norm": 0.26794925332069397, "learning_rate": 2.866506341161597e-06, "loss": 0.314, "step": 33260 }, { "epoch": 3.381557543716958, "grad_norm": 0.27213254570961, "learning_rate": 2.8661853898462423e-06, "loss": 0.346, "step": 33261 }, { "epoch": 3.381659211061407, "grad_norm": 0.27993646264076233, "learning_rate": 2.8658644492805425e-06, "loss": 0.3013, "step": 33262 }, { "epoch": 3.381760878405856, "grad_norm": 0.29163670539855957, "learning_rate": 2.8655435194661065e-06, "loss": 0.3319, "step": 33263 }, { "epoch": 3.381862545750305, "grad_norm": 0.26904773712158203, "learning_rate": 2.865222600404557e-06, "loss": 0.3102, "step": 33264 }, { "epoch": 3.381964213094754, "grad_norm": 0.26614344120025635, "learning_rate": 2.864901692097509e-06, "loss": 0.3015, "step": 33265 }, { "epoch": 3.3820658804392028, "grad_norm": 0.2771551311016083, "learning_rate": 2.8645807945465765e-06, "loss": 0.3321, "step": 33266 }, { "epoch": 3.382167547783652, "grad_norm": 0.2964887022972107, "learning_rate": 2.8642599077533805e-06, "loss": 0.3027, "step": 33267 }, { "epoch": 3.382269215128101, "grad_norm": 0.2839038670063019, "learning_rate": 2.863939031719536e-06, "loss": 0.3126, "step": 33268 }, { "epoch": 3.38237088247255, "grad_norm": 0.24811208248138428, "learning_rate": 2.8636181664466583e-06, "loss": 0.3045, "step": 33269 }, { "epoch": 3.382472549816999, "grad_norm": 0.25330421328544617, "learning_rate": 2.8632973119363626e-06, "loss": 0.3151, "step": 33270 }, { "epoch": 3.382574217161448, "grad_norm": 0.2648598551750183, "learning_rate": 2.8629764681902683e-06, "loss": 0.3237, "step": 33271 }, { "epoch": 3.382675884505897, "grad_norm": 0.2703043222427368, "learning_rate": 2.8626556352099937e-06, "loss": 0.3045, "step": 33272 }, { "epoch": 3.382777551850346, "grad_norm": 0.2546193599700928, "learning_rate": 2.8623348129971485e-06, "loss": 0.3344, "step": 33273 }, { "epoch": 3.3828792191947947, "grad_norm": 0.2900599241256714, "learning_rate": 2.862014001553355e-06, "loss": 0.2885, "step": 33274 }, { "epoch": 3.3829808865392437, "grad_norm": 0.28102949261665344, "learning_rate": 2.861693200880226e-06, "loss": 0.3444, "step": 33275 }, { "epoch": 3.3830825538836926, "grad_norm": 0.28489920496940613, "learning_rate": 2.861372410979377e-06, "loss": 0.293, "step": 33276 }, { "epoch": 3.3831842212281416, "grad_norm": 0.27362722158432007, "learning_rate": 2.8610516318524273e-06, "loss": 0.3164, "step": 33277 }, { "epoch": 3.3832858885725905, "grad_norm": 0.28278276324272156, "learning_rate": 2.860730863500992e-06, "loss": 0.3322, "step": 33278 }, { "epoch": 3.3833875559170394, "grad_norm": 0.2916024923324585, "learning_rate": 2.8604101059266853e-06, "loss": 0.3259, "step": 33279 }, { "epoch": 3.3834892232614884, "grad_norm": 0.2930670976638794, "learning_rate": 2.8600893591311224e-06, "loss": 0.3078, "step": 33280 }, { "epoch": 3.3835908906059373, "grad_norm": 0.2747366428375244, "learning_rate": 2.8597686231159227e-06, "loss": 0.3123, "step": 33281 }, { "epoch": 3.3836925579503863, "grad_norm": 0.2970212399959564, "learning_rate": 2.8594478978827e-06, "loss": 0.2737, "step": 33282 }, { "epoch": 3.383794225294835, "grad_norm": 0.2584865689277649, "learning_rate": 2.8591271834330688e-06, "loss": 0.2761, "step": 33283 }, { "epoch": 3.383895892639284, "grad_norm": 0.278088241815567, "learning_rate": 2.8588064797686476e-06, "loss": 0.3199, "step": 33284 }, { "epoch": 3.383997559983733, "grad_norm": 0.2729182541370392, "learning_rate": 2.8584857868910504e-06, "loss": 0.3074, "step": 33285 }, { "epoch": 3.384099227328182, "grad_norm": 0.2965138256549835, "learning_rate": 2.8581651048018912e-06, "loss": 0.323, "step": 33286 }, { "epoch": 3.384200894672631, "grad_norm": 0.25567418336868286, "learning_rate": 2.8578444335027887e-06, "loss": 0.3167, "step": 33287 }, { "epoch": 3.38430256201708, "grad_norm": 0.2846009135246277, "learning_rate": 2.8575237729953577e-06, "loss": 0.2793, "step": 33288 }, { "epoch": 3.384404229361529, "grad_norm": 0.29084324836730957, "learning_rate": 2.857203123281212e-06, "loss": 0.2832, "step": 33289 }, { "epoch": 3.3845058967059782, "grad_norm": 0.2684342563152313, "learning_rate": 2.8568824843619657e-06, "loss": 0.2972, "step": 33290 }, { "epoch": 3.384607564050427, "grad_norm": 0.26089876890182495, "learning_rate": 2.856561856239239e-06, "loss": 0.3367, "step": 33291 }, { "epoch": 3.384709231394876, "grad_norm": 0.29561325907707214, "learning_rate": 2.8562412389146437e-06, "loss": 0.3245, "step": 33292 }, { "epoch": 3.384810898739325, "grad_norm": 0.26475396752357483, "learning_rate": 2.8559206323897935e-06, "loss": 0.3319, "step": 33293 }, { "epoch": 3.384912566083774, "grad_norm": 0.28678926825523376, "learning_rate": 2.8556000366663082e-06, "loss": 0.3136, "step": 33294 }, { "epoch": 3.385014233428223, "grad_norm": 0.2767133116722107, "learning_rate": 2.8552794517458e-06, "loss": 0.3064, "step": 33295 }, { "epoch": 3.385115900772672, "grad_norm": 0.2814802825450897, "learning_rate": 2.854958877629882e-06, "loss": 0.3308, "step": 33296 }, { "epoch": 3.385217568117121, "grad_norm": 0.27256032824516296, "learning_rate": 2.854638314320174e-06, "loss": 0.2912, "step": 33297 }, { "epoch": 3.3853192354615698, "grad_norm": 0.3056032657623291, "learning_rate": 2.8543177618182883e-06, "loss": 0.2716, "step": 33298 }, { "epoch": 3.3854209028060187, "grad_norm": 0.2652159333229065, "learning_rate": 2.8539972201258392e-06, "loss": 0.3186, "step": 33299 }, { "epoch": 3.3855225701504676, "grad_norm": 0.27606990933418274, "learning_rate": 2.8536766892444405e-06, "loss": 0.2889, "step": 33300 }, { "epoch": 3.3856242374949166, "grad_norm": 0.2759774625301361, "learning_rate": 2.8533561691757104e-06, "loss": 0.3002, "step": 33301 }, { "epoch": 3.3857259048393655, "grad_norm": 0.2559540271759033, "learning_rate": 2.8530356599212626e-06, "loss": 0.283, "step": 33302 }, { "epoch": 3.3858275721838145, "grad_norm": 0.2637377679347992, "learning_rate": 2.852715161482708e-06, "loss": 0.2933, "step": 33303 }, { "epoch": 3.3859292395282634, "grad_norm": 0.2778288722038269, "learning_rate": 2.852394673861667e-06, "loss": 0.3294, "step": 33304 }, { "epoch": 3.3860309068727124, "grad_norm": 0.29399779438972473, "learning_rate": 2.852074197059751e-06, "loss": 0.2653, "step": 33305 }, { "epoch": 3.3861325742171613, "grad_norm": 0.2946392297744751, "learning_rate": 2.851753731078572e-06, "loss": 0.3168, "step": 33306 }, { "epoch": 3.3862342415616102, "grad_norm": 0.29033657908439636, "learning_rate": 2.85143327591975e-06, "loss": 0.361, "step": 33307 }, { "epoch": 3.3863359089060596, "grad_norm": 0.2712697386741638, "learning_rate": 2.8511128315848963e-06, "loss": 0.3449, "step": 33308 }, { "epoch": 3.3864375762505086, "grad_norm": 0.26804667711257935, "learning_rate": 2.8507923980756257e-06, "loss": 0.2969, "step": 33309 }, { "epoch": 3.3865392435949575, "grad_norm": 0.2738029360771179, "learning_rate": 2.85047197539355e-06, "loss": 0.2933, "step": 33310 }, { "epoch": 3.3866409109394064, "grad_norm": 0.28330403566360474, "learning_rate": 2.8501515635402876e-06, "loss": 0.3112, "step": 33311 }, { "epoch": 3.3867425782838554, "grad_norm": 0.29678893089294434, "learning_rate": 2.849831162517451e-06, "loss": 0.2552, "step": 33312 }, { "epoch": 3.3868442456283043, "grad_norm": 0.2769334018230438, "learning_rate": 2.8495107723266524e-06, "loss": 0.3255, "step": 33313 }, { "epoch": 3.3869459129727533, "grad_norm": 0.25096359848976135, "learning_rate": 2.8491903929695085e-06, "loss": 0.2957, "step": 33314 }, { "epoch": 3.387047580317202, "grad_norm": 0.25359779596328735, "learning_rate": 2.8488700244476332e-06, "loss": 0.2968, "step": 33315 }, { "epoch": 3.387149247661651, "grad_norm": 0.26971501111984253, "learning_rate": 2.8485496667626374e-06, "loss": 0.3075, "step": 33316 }, { "epoch": 3.3872509150061, "grad_norm": 0.28709810972213745, "learning_rate": 2.848229319916139e-06, "loss": 0.3126, "step": 33317 }, { "epoch": 3.387352582350549, "grad_norm": 0.26140016317367554, "learning_rate": 2.8479089839097498e-06, "loss": 0.2854, "step": 33318 }, { "epoch": 3.387454249694998, "grad_norm": 0.26811492443084717, "learning_rate": 2.8475886587450834e-06, "loss": 0.3129, "step": 33319 }, { "epoch": 3.387555917039447, "grad_norm": 0.2597813606262207, "learning_rate": 2.8472683444237526e-06, "loss": 0.3028, "step": 33320 }, { "epoch": 3.387657584383896, "grad_norm": 0.2379848212003708, "learning_rate": 2.8469480409473737e-06, "loss": 0.3088, "step": 33321 }, { "epoch": 3.387759251728345, "grad_norm": 0.2895139455795288, "learning_rate": 2.8466277483175593e-06, "loss": 0.315, "step": 33322 }, { "epoch": 3.3878609190727937, "grad_norm": 0.2897380292415619, "learning_rate": 2.8463074665359202e-06, "loss": 0.3211, "step": 33323 }, { "epoch": 3.3879625864172427, "grad_norm": 0.2906723618507385, "learning_rate": 2.845987195604075e-06, "loss": 0.3126, "step": 33324 }, { "epoch": 3.3880642537616916, "grad_norm": 0.28930771350860596, "learning_rate": 2.8456669355236334e-06, "loss": 0.3018, "step": 33325 }, { "epoch": 3.3881659211061406, "grad_norm": 0.2838314473628998, "learning_rate": 2.8453466862962085e-06, "loss": 0.3145, "step": 33326 }, { "epoch": 3.3882675884505895, "grad_norm": 0.28212645649909973, "learning_rate": 2.845026447923417e-06, "loss": 0.2844, "step": 33327 }, { "epoch": 3.3883692557950384, "grad_norm": 0.2709366977214813, "learning_rate": 2.84470622040687e-06, "loss": 0.3087, "step": 33328 }, { "epoch": 3.3884709231394874, "grad_norm": 0.27972325682640076, "learning_rate": 2.844386003748181e-06, "loss": 0.3052, "step": 33329 }, { "epoch": 3.3885725904839363, "grad_norm": 0.24667946994304657, "learning_rate": 2.844065797948961e-06, "loss": 0.3089, "step": 33330 }, { "epoch": 3.3886742578283857, "grad_norm": 0.2730691432952881, "learning_rate": 2.8437456030108267e-06, "loss": 0.3271, "step": 33331 }, { "epoch": 3.3887759251728347, "grad_norm": 0.29400768876075745, "learning_rate": 2.8434254189353906e-06, "loss": 0.2849, "step": 33332 }, { "epoch": 3.3888775925172836, "grad_norm": 0.2629041075706482, "learning_rate": 2.8431052457242625e-06, "loss": 0.3101, "step": 33333 }, { "epoch": 3.3889792598617325, "grad_norm": 0.2754073441028595, "learning_rate": 2.8427850833790594e-06, "loss": 0.2613, "step": 33334 }, { "epoch": 3.3890809272061815, "grad_norm": 0.27942195534706116, "learning_rate": 2.842464931901392e-06, "loss": 0.3313, "step": 33335 }, { "epoch": 3.3891825945506304, "grad_norm": 0.25288769602775574, "learning_rate": 2.842144791292874e-06, "loss": 0.3299, "step": 33336 }, { "epoch": 3.3892842618950794, "grad_norm": 0.277893990278244, "learning_rate": 2.8418246615551158e-06, "loss": 0.3028, "step": 33337 }, { "epoch": 3.3893859292395283, "grad_norm": 0.27756160497665405, "learning_rate": 2.841504542689734e-06, "loss": 0.3158, "step": 33338 }, { "epoch": 3.3894875965839772, "grad_norm": 0.25992682576179504, "learning_rate": 2.8411844346983398e-06, "loss": 0.3276, "step": 33339 }, { "epoch": 3.389589263928426, "grad_norm": 0.28226369619369507, "learning_rate": 2.840864337582543e-06, "loss": 0.3343, "step": 33340 }, { "epoch": 3.389690931272875, "grad_norm": 0.27334001660346985, "learning_rate": 2.84054425134396e-06, "loss": 0.3018, "step": 33341 }, { "epoch": 3.389792598617324, "grad_norm": 0.2879883050918579, "learning_rate": 2.8402241759842032e-06, "loss": 0.3123, "step": 33342 }, { "epoch": 3.389894265961773, "grad_norm": 0.3010914921760559, "learning_rate": 2.839904111504881e-06, "loss": 0.3189, "step": 33343 }, { "epoch": 3.389995933306222, "grad_norm": 0.2719273269176483, "learning_rate": 2.83958405790761e-06, "loss": 0.3252, "step": 33344 }, { "epoch": 3.390097600650671, "grad_norm": 0.2856551706790924, "learning_rate": 2.839264015194002e-06, "loss": 0.2881, "step": 33345 }, { "epoch": 3.39019926799512, "grad_norm": 0.2750144600868225, "learning_rate": 2.838943983365668e-06, "loss": 0.3442, "step": 33346 }, { "epoch": 3.3903009353395688, "grad_norm": 0.28205054998397827, "learning_rate": 2.8386239624242184e-06, "loss": 0.3055, "step": 33347 }, { "epoch": 3.3904026026840177, "grad_norm": 0.25289884209632874, "learning_rate": 2.8383039523712706e-06, "loss": 0.3337, "step": 33348 }, { "epoch": 3.390504270028467, "grad_norm": 0.2902168929576874, "learning_rate": 2.837983953208433e-06, "loss": 0.2939, "step": 33349 }, { "epoch": 3.390605937372916, "grad_norm": 0.2937706410884857, "learning_rate": 2.837663964937317e-06, "loss": 0.2675, "step": 33350 }, { "epoch": 3.390707604717365, "grad_norm": 0.2794363796710968, "learning_rate": 2.8373439875595376e-06, "loss": 0.3494, "step": 33351 }, { "epoch": 3.390809272061814, "grad_norm": 0.2816503942012787, "learning_rate": 2.837024021076705e-06, "loss": 0.2885, "step": 33352 }, { "epoch": 3.390910939406263, "grad_norm": 0.284807950258255, "learning_rate": 2.8367040654904305e-06, "loss": 0.3307, "step": 33353 }, { "epoch": 3.391012606750712, "grad_norm": 0.2744492292404175, "learning_rate": 2.836384120802328e-06, "loss": 0.2991, "step": 33354 }, { "epoch": 3.3911142740951608, "grad_norm": 0.26248154044151306, "learning_rate": 2.8360641870140083e-06, "loss": 0.316, "step": 33355 }, { "epoch": 3.3912159414396097, "grad_norm": 0.2824605107307434, "learning_rate": 2.835744264127083e-06, "loss": 0.3071, "step": 33356 }, { "epoch": 3.3913176087840586, "grad_norm": 0.27546465396881104, "learning_rate": 2.8354243521431624e-06, "loss": 0.3005, "step": 33357 }, { "epoch": 3.3914192761285076, "grad_norm": 0.25297117233276367, "learning_rate": 2.8351044510638603e-06, "loss": 0.2946, "step": 33358 }, { "epoch": 3.3915209434729565, "grad_norm": 0.25907957553863525, "learning_rate": 2.834784560890789e-06, "loss": 0.312, "step": 33359 }, { "epoch": 3.3916226108174055, "grad_norm": 0.254984587430954, "learning_rate": 2.834464681625555e-06, "loss": 0.3015, "step": 33360 }, { "epoch": 3.3917242781618544, "grad_norm": 0.28551626205444336, "learning_rate": 2.834144813269778e-06, "loss": 0.3108, "step": 33361 }, { "epoch": 3.3918259455063033, "grad_norm": 0.30041730403900146, "learning_rate": 2.8338249558250608e-06, "loss": 0.3036, "step": 33362 }, { "epoch": 3.3919276128507523, "grad_norm": 0.2662147581577301, "learning_rate": 2.8335051092930186e-06, "loss": 0.3224, "step": 33363 }, { "epoch": 3.3920292801952012, "grad_norm": 0.2817811667919159, "learning_rate": 2.8331852736752653e-06, "loss": 0.305, "step": 33364 }, { "epoch": 3.39213094753965, "grad_norm": 0.2703894376754761, "learning_rate": 2.832865448973409e-06, "loss": 0.3285, "step": 33365 }, { "epoch": 3.392232614884099, "grad_norm": 0.2865636646747589, "learning_rate": 2.8325456351890624e-06, "loss": 0.3118, "step": 33366 }, { "epoch": 3.392334282228548, "grad_norm": 0.27040305733680725, "learning_rate": 2.832225832323833e-06, "loss": 0.2779, "step": 33367 }, { "epoch": 3.392435949572997, "grad_norm": 0.29494181275367737, "learning_rate": 2.8319060403793376e-06, "loss": 0.3161, "step": 33368 }, { "epoch": 3.392537616917446, "grad_norm": 0.2683919370174408, "learning_rate": 2.831586259357183e-06, "loss": 0.3186, "step": 33369 }, { "epoch": 3.392639284261895, "grad_norm": 0.27623432874679565, "learning_rate": 2.83126648925898e-06, "loss": 0.2972, "step": 33370 }, { "epoch": 3.392740951606344, "grad_norm": 0.26735320687294006, "learning_rate": 2.830946730086346e-06, "loss": 0.3374, "step": 33371 }, { "epoch": 3.392842618950793, "grad_norm": 0.2795647978782654, "learning_rate": 2.8306269818408826e-06, "loss": 0.2963, "step": 33372 }, { "epoch": 3.392944286295242, "grad_norm": 0.2686620056629181, "learning_rate": 2.830307244524204e-06, "loss": 0.311, "step": 33373 }, { "epoch": 3.393045953639691, "grad_norm": 0.27008211612701416, "learning_rate": 2.8299875181379264e-06, "loss": 0.2906, "step": 33374 }, { "epoch": 3.39314762098414, "grad_norm": 0.26854655146598816, "learning_rate": 2.8296678026836527e-06, "loss": 0.2791, "step": 33375 }, { "epoch": 3.393249288328589, "grad_norm": 0.282358318567276, "learning_rate": 2.8293480981629983e-06, "loss": 0.3564, "step": 33376 }, { "epoch": 3.393350955673038, "grad_norm": 0.2595851421356201, "learning_rate": 2.8290284045775706e-06, "loss": 0.341, "step": 33377 }, { "epoch": 3.393452623017487, "grad_norm": 0.27823489904403687, "learning_rate": 2.8287087219289833e-06, "loss": 0.276, "step": 33378 }, { "epoch": 3.393554290361936, "grad_norm": 0.3061729967594147, "learning_rate": 2.828389050218846e-06, "loss": 0.3378, "step": 33379 }, { "epoch": 3.3936559577063847, "grad_norm": 0.2875750660896301, "learning_rate": 2.8280693894487664e-06, "loss": 0.3147, "step": 33380 }, { "epoch": 3.3937576250508337, "grad_norm": 0.27943524718284607, "learning_rate": 2.827749739620361e-06, "loss": 0.2771, "step": 33381 }, { "epoch": 3.3938592923952826, "grad_norm": 0.28585782647132874, "learning_rate": 2.8274301007352324e-06, "loss": 0.3557, "step": 33382 }, { "epoch": 3.3939609597397316, "grad_norm": 0.28118517994880676, "learning_rate": 2.827110472794995e-06, "loss": 0.304, "step": 33383 }, { "epoch": 3.3940626270841805, "grad_norm": 0.2575607895851135, "learning_rate": 2.826790855801262e-06, "loss": 0.2969, "step": 33384 }, { "epoch": 3.3941642944286294, "grad_norm": 0.26840245723724365, "learning_rate": 2.826471249755637e-06, "loss": 0.3087, "step": 33385 }, { "epoch": 3.3942659617730784, "grad_norm": 0.26483574509620667, "learning_rate": 2.8261516546597344e-06, "loss": 0.2825, "step": 33386 }, { "epoch": 3.3943676291175273, "grad_norm": 0.25771990418434143, "learning_rate": 2.8258320705151614e-06, "loss": 0.316, "step": 33387 }, { "epoch": 3.3944692964619763, "grad_norm": 0.2596617639064789, "learning_rate": 2.825512497323532e-06, "loss": 0.2948, "step": 33388 }, { "epoch": 3.394570963806425, "grad_norm": 0.2684500813484192, "learning_rate": 2.8251929350864536e-06, "loss": 0.3047, "step": 33389 }, { "epoch": 3.3946726311508746, "grad_norm": 0.27961793541908264, "learning_rate": 2.8248733838055355e-06, "loss": 0.2885, "step": 33390 }, { "epoch": 3.3947742984953235, "grad_norm": 0.2796960473060608, "learning_rate": 2.8245538434823916e-06, "loss": 0.3264, "step": 33391 }, { "epoch": 3.3948759658397725, "grad_norm": 0.27774229645729065, "learning_rate": 2.824234314118624e-06, "loss": 0.3076, "step": 33392 }, { "epoch": 3.3949776331842214, "grad_norm": 0.2523364722728729, "learning_rate": 2.8239147957158476e-06, "loss": 0.3134, "step": 33393 }, { "epoch": 3.3950793005286704, "grad_norm": 0.2644652724266052, "learning_rate": 2.823595288275675e-06, "loss": 0.3341, "step": 33394 }, { "epoch": 3.3951809678731193, "grad_norm": 0.25634413957595825, "learning_rate": 2.8232757917997085e-06, "loss": 0.307, "step": 33395 }, { "epoch": 3.3952826352175682, "grad_norm": 0.27483898401260376, "learning_rate": 2.8229563062895625e-06, "loss": 0.3229, "step": 33396 }, { "epoch": 3.395384302562017, "grad_norm": 0.27556711435317993, "learning_rate": 2.822636831746846e-06, "loss": 0.2768, "step": 33397 }, { "epoch": 3.395485969906466, "grad_norm": 0.2563098669052124, "learning_rate": 2.8223173681731653e-06, "loss": 0.3298, "step": 33398 }, { "epoch": 3.395587637250915, "grad_norm": 0.26956620812416077, "learning_rate": 2.821997915570135e-06, "loss": 0.3216, "step": 33399 }, { "epoch": 3.395689304595364, "grad_norm": 0.2897363603115082, "learning_rate": 2.821678473939359e-06, "loss": 0.3033, "step": 33400 }, { "epoch": 3.395790971939813, "grad_norm": 0.3002592921257019, "learning_rate": 2.8213590432824533e-06, "loss": 0.3393, "step": 33401 }, { "epoch": 3.395892639284262, "grad_norm": 0.30344316363334656, "learning_rate": 2.8210396236010186e-06, "loss": 0.2826, "step": 33402 }, { "epoch": 3.395994306628711, "grad_norm": 0.28789010643959045, "learning_rate": 2.820720214896669e-06, "loss": 0.3341, "step": 33403 }, { "epoch": 3.3960959739731598, "grad_norm": 0.2729097008705139, "learning_rate": 2.820400817171017e-06, "loss": 0.3261, "step": 33404 }, { "epoch": 3.3961976413176087, "grad_norm": 0.27722376585006714, "learning_rate": 2.8200814304256634e-06, "loss": 0.3017, "step": 33405 }, { "epoch": 3.3962993086620576, "grad_norm": 0.2706427574157715, "learning_rate": 2.8197620546622238e-06, "loss": 0.328, "step": 33406 }, { "epoch": 3.3964009760065066, "grad_norm": 0.2756175398826599, "learning_rate": 2.8194426898823045e-06, "loss": 0.3246, "step": 33407 }, { "epoch": 3.3965026433509555, "grad_norm": 0.2835925817489624, "learning_rate": 2.8191233360875135e-06, "loss": 0.306, "step": 33408 }, { "epoch": 3.3966043106954045, "grad_norm": 0.3210009038448334, "learning_rate": 2.8188039932794624e-06, "loss": 0.303, "step": 33409 }, { "epoch": 3.3967059780398534, "grad_norm": 0.2703973352909088, "learning_rate": 2.8184846614597567e-06, "loss": 0.306, "step": 33410 }, { "epoch": 3.3968076453843024, "grad_norm": 0.28700539469718933, "learning_rate": 2.818165340630011e-06, "loss": 0.3152, "step": 33411 }, { "epoch": 3.3969093127287513, "grad_norm": 0.2608952820301056, "learning_rate": 2.8178460307918253e-06, "loss": 0.2854, "step": 33412 }, { "epoch": 3.3970109800732007, "grad_norm": 0.2744807004928589, "learning_rate": 2.8175267319468124e-06, "loss": 0.3012, "step": 33413 }, { "epoch": 3.3971126474176496, "grad_norm": 0.2729802131652832, "learning_rate": 2.817207444096586e-06, "loss": 0.2946, "step": 33414 }, { "epoch": 3.3972143147620986, "grad_norm": 0.2675631642341614, "learning_rate": 2.816888167242746e-06, "loss": 0.3339, "step": 33415 }, { "epoch": 3.3973159821065475, "grad_norm": 0.2683597505092621, "learning_rate": 2.8165689013869056e-06, "loss": 0.3321, "step": 33416 }, { "epoch": 3.3974176494509964, "grad_norm": 0.2674033045768738, "learning_rate": 2.816249646530673e-06, "loss": 0.3007, "step": 33417 }, { "epoch": 3.3975193167954454, "grad_norm": 0.27054286003112793, "learning_rate": 2.815930402675654e-06, "loss": 0.339, "step": 33418 }, { "epoch": 3.3976209841398943, "grad_norm": 0.2562068998813629, "learning_rate": 2.81561116982346e-06, "loss": 0.2847, "step": 33419 }, { "epoch": 3.3977226514843433, "grad_norm": 0.2471841424703598, "learning_rate": 2.8152919479756978e-06, "loss": 0.2928, "step": 33420 }, { "epoch": 3.397824318828792, "grad_norm": 0.2792448401451111, "learning_rate": 2.814972737133976e-06, "loss": 0.3311, "step": 33421 }, { "epoch": 3.397925986173241, "grad_norm": 0.2781045436859131, "learning_rate": 2.8146535372998995e-06, "loss": 0.3318, "step": 33422 }, { "epoch": 3.39802765351769, "grad_norm": 0.2587902545928955, "learning_rate": 2.81433434847508e-06, "loss": 0.3138, "step": 33423 }, { "epoch": 3.398129320862139, "grad_norm": 0.28576725721359253, "learning_rate": 2.8140151706611285e-06, "loss": 0.2943, "step": 33424 }, { "epoch": 3.398230988206588, "grad_norm": 0.2983376085758209, "learning_rate": 2.813696003859645e-06, "loss": 0.2852, "step": 33425 }, { "epoch": 3.398332655551037, "grad_norm": 0.2815648317337036, "learning_rate": 2.8133768480722433e-06, "loss": 0.3227, "step": 33426 }, { "epoch": 3.398434322895486, "grad_norm": 0.2791938781738281, "learning_rate": 2.8130577033005296e-06, "loss": 0.3032, "step": 33427 }, { "epoch": 3.398535990239935, "grad_norm": 0.3005400598049164, "learning_rate": 2.8127385695461095e-06, "loss": 0.3323, "step": 33428 }, { "epoch": 3.3986376575843837, "grad_norm": 0.26949116587638855, "learning_rate": 2.812419446810595e-06, "loss": 0.327, "step": 33429 }, { "epoch": 3.3987393249288327, "grad_norm": 0.2993440628051758, "learning_rate": 2.8121003350955913e-06, "loss": 0.3134, "step": 33430 }, { "epoch": 3.398840992273282, "grad_norm": 0.278815895318985, "learning_rate": 2.8117812344027055e-06, "loss": 0.2959, "step": 33431 }, { "epoch": 3.398942659617731, "grad_norm": 0.26809635758399963, "learning_rate": 2.8114621447335445e-06, "loss": 0.3055, "step": 33432 }, { "epoch": 3.39904432696218, "grad_norm": 0.2991132438182831, "learning_rate": 2.8111430660897192e-06, "loss": 0.3115, "step": 33433 }, { "epoch": 3.399145994306629, "grad_norm": 0.2881051003932953, "learning_rate": 2.810823998472835e-06, "loss": 0.3124, "step": 33434 }, { "epoch": 3.399247661651078, "grad_norm": 0.27179136872291565, "learning_rate": 2.8105049418844973e-06, "loss": 0.2965, "step": 33435 }, { "epoch": 3.3993493289955268, "grad_norm": 0.2738407254219055, "learning_rate": 2.8101858963263168e-06, "loss": 0.2838, "step": 33436 }, { "epoch": 3.3994509963399757, "grad_norm": 0.2667374312877655, "learning_rate": 2.8098668617998997e-06, "loss": 0.3053, "step": 33437 }, { "epoch": 3.3995526636844247, "grad_norm": 0.2689451575279236, "learning_rate": 2.809547838306851e-06, "loss": 0.2909, "step": 33438 }, { "epoch": 3.3996543310288736, "grad_norm": 0.24261189997196198, "learning_rate": 2.809228825848782e-06, "loss": 0.3229, "step": 33439 }, { "epoch": 3.3997559983733225, "grad_norm": 0.26327255368232727, "learning_rate": 2.808909824427297e-06, "loss": 0.2965, "step": 33440 }, { "epoch": 3.3998576657177715, "grad_norm": 0.2529812157154083, "learning_rate": 2.808590834044004e-06, "loss": 0.3177, "step": 33441 }, { "epoch": 3.3999593330622204, "grad_norm": 0.25287455320358276, "learning_rate": 2.8082718547005073e-06, "loss": 0.3094, "step": 33442 }, { "epoch": 3.4000610004066694, "grad_norm": 0.2864581346511841, "learning_rate": 2.8079528863984185e-06, "loss": 0.3115, "step": 33443 }, { "epoch": 3.4001626677511183, "grad_norm": 0.27659741044044495, "learning_rate": 2.807633929139342e-06, "loss": 0.2958, "step": 33444 }, { "epoch": 3.4002643350955672, "grad_norm": 0.2944031357765198, "learning_rate": 2.807314982924883e-06, "loss": 0.3534, "step": 33445 }, { "epoch": 3.400366002440016, "grad_norm": 0.26343032717704773, "learning_rate": 2.8069960477566517e-06, "loss": 0.3281, "step": 33446 }, { "epoch": 3.400467669784465, "grad_norm": 0.2799200117588043, "learning_rate": 2.8066771236362533e-06, "loss": 0.3227, "step": 33447 }, { "epoch": 3.400569337128914, "grad_norm": 0.2865966856479645, "learning_rate": 2.806358210565293e-06, "loss": 0.3179, "step": 33448 }, { "epoch": 3.400671004473363, "grad_norm": 0.2619039714336395, "learning_rate": 2.8060393085453797e-06, "loss": 0.3022, "step": 33449 }, { "epoch": 3.400772671817812, "grad_norm": 0.25566616654396057, "learning_rate": 2.8057204175781194e-06, "loss": 0.2973, "step": 33450 }, { "epoch": 3.400874339162261, "grad_norm": 0.2733983099460602, "learning_rate": 2.8054015376651185e-06, "loss": 0.2888, "step": 33451 }, { "epoch": 3.40097600650671, "grad_norm": 0.28803664445877075, "learning_rate": 2.8050826688079808e-06, "loss": 0.2995, "step": 33452 }, { "epoch": 3.401077673851159, "grad_norm": 0.2880570888519287, "learning_rate": 2.804763811008317e-06, "loss": 0.342, "step": 33453 }, { "epoch": 3.401179341195608, "grad_norm": 0.25756046175956726, "learning_rate": 2.8044449642677317e-06, "loss": 0.3059, "step": 33454 }, { "epoch": 3.401281008540057, "grad_norm": 0.28309786319732666, "learning_rate": 2.8041261285878285e-06, "loss": 0.2937, "step": 33455 }, { "epoch": 3.401382675884506, "grad_norm": 0.2771710753440857, "learning_rate": 2.8038073039702183e-06, "loss": 0.3299, "step": 33456 }, { "epoch": 3.401484343228955, "grad_norm": 0.27998530864715576, "learning_rate": 2.803488490416505e-06, "loss": 0.3043, "step": 33457 }, { "epoch": 3.401586010573404, "grad_norm": 0.27898144721984863, "learning_rate": 2.8031696879282934e-06, "loss": 0.3144, "step": 33458 }, { "epoch": 3.401687677917853, "grad_norm": 0.2470974624156952, "learning_rate": 2.802850896507192e-06, "loss": 0.3134, "step": 33459 }, { "epoch": 3.401789345262302, "grad_norm": 0.3518446385860443, "learning_rate": 2.8025321161548057e-06, "loss": 0.294, "step": 33460 }, { "epoch": 3.4018910126067508, "grad_norm": 0.29710420966148376, "learning_rate": 2.8022133468727408e-06, "loss": 0.3025, "step": 33461 }, { "epoch": 3.4019926799511997, "grad_norm": 0.27291423082351685, "learning_rate": 2.8018945886626005e-06, "loss": 0.3156, "step": 33462 }, { "epoch": 3.4020943472956486, "grad_norm": 0.2860782742500305, "learning_rate": 2.8015758415259947e-06, "loss": 0.3254, "step": 33463 }, { "epoch": 3.4021960146400976, "grad_norm": 0.2731224000453949, "learning_rate": 2.8012571054645276e-06, "loss": 0.2746, "step": 33464 }, { "epoch": 3.4022976819845465, "grad_norm": 0.2868659198284149, "learning_rate": 2.800938380479803e-06, "loss": 0.2929, "step": 33465 }, { "epoch": 3.4023993493289955, "grad_norm": 0.27123740315437317, "learning_rate": 2.8006196665734298e-06, "loss": 0.3174, "step": 33466 }, { "epoch": 3.4025010166734444, "grad_norm": 0.28293418884277344, "learning_rate": 2.8003009637470122e-06, "loss": 0.3158, "step": 33467 }, { "epoch": 3.4026026840178933, "grad_norm": 0.29509177803993225, "learning_rate": 2.799982272002154e-06, "loss": 0.2868, "step": 33468 }, { "epoch": 3.4027043513623423, "grad_norm": 0.29209011793136597, "learning_rate": 2.7996635913404634e-06, "loss": 0.2845, "step": 33469 }, { "epoch": 3.4028060187067912, "grad_norm": 0.2761903405189514, "learning_rate": 2.799344921763546e-06, "loss": 0.2735, "step": 33470 }, { "epoch": 3.40290768605124, "grad_norm": 0.27620425820350647, "learning_rate": 2.799026263273005e-06, "loss": 0.2878, "step": 33471 }, { "epoch": 3.4030093533956896, "grad_norm": 0.26141679286956787, "learning_rate": 2.798707615870445e-06, "loss": 0.2805, "step": 33472 }, { "epoch": 3.4031110207401385, "grad_norm": 0.26526549458503723, "learning_rate": 2.798388979557475e-06, "loss": 0.3123, "step": 33473 }, { "epoch": 3.4032126880845874, "grad_norm": 0.2716827690601349, "learning_rate": 2.798070354335698e-06, "loss": 0.353, "step": 33474 }, { "epoch": 3.4033143554290364, "grad_norm": 0.27071523666381836, "learning_rate": 2.7977517402067167e-06, "loss": 0.3158, "step": 33475 }, { "epoch": 3.4034160227734853, "grad_norm": 0.29965442419052124, "learning_rate": 2.797433137172142e-06, "loss": 0.294, "step": 33476 }, { "epoch": 3.4035176901179343, "grad_norm": 0.2667287290096283, "learning_rate": 2.7971145452335744e-06, "loss": 0.2931, "step": 33477 }, { "epoch": 3.403619357462383, "grad_norm": 0.29015251994132996, "learning_rate": 2.796795964392619e-06, "loss": 0.3213, "step": 33478 }, { "epoch": 3.403721024806832, "grad_norm": 0.2672598361968994, "learning_rate": 2.796477394650884e-06, "loss": 0.3232, "step": 33479 }, { "epoch": 3.403822692151281, "grad_norm": 0.27526506781578064, "learning_rate": 2.7961588360099723e-06, "loss": 0.3097, "step": 33480 }, { "epoch": 3.40392435949573, "grad_norm": 0.28445446491241455, "learning_rate": 2.7958402884714886e-06, "loss": 0.3242, "step": 33481 }, { "epoch": 3.404026026840179, "grad_norm": 0.2922615110874176, "learning_rate": 2.795521752037036e-06, "loss": 0.3191, "step": 33482 }, { "epoch": 3.404127694184628, "grad_norm": 0.2672790288925171, "learning_rate": 2.7952032267082225e-06, "loss": 0.2936, "step": 33483 }, { "epoch": 3.404229361529077, "grad_norm": 0.2944295108318329, "learning_rate": 2.794884712486651e-06, "loss": 0.3117, "step": 33484 }, { "epoch": 3.404331028873526, "grad_norm": 0.2696358561515808, "learning_rate": 2.7945662093739244e-06, "loss": 0.2901, "step": 33485 }, { "epoch": 3.4044326962179747, "grad_norm": 0.2696264386177063, "learning_rate": 2.794247717371652e-06, "loss": 0.3122, "step": 33486 }, { "epoch": 3.4045343635624237, "grad_norm": 0.2736351191997528, "learning_rate": 2.7939292364814347e-06, "loss": 0.2862, "step": 33487 }, { "epoch": 3.4046360309068726, "grad_norm": 0.27702975273132324, "learning_rate": 2.7936107667048764e-06, "loss": 0.3038, "step": 33488 }, { "epoch": 3.4047376982513216, "grad_norm": 0.26743361353874207, "learning_rate": 2.7932923080435837e-06, "loss": 0.3245, "step": 33489 }, { "epoch": 3.4048393655957705, "grad_norm": 0.2767414450645447, "learning_rate": 2.79297386049916e-06, "loss": 0.3025, "step": 33490 }, { "epoch": 3.4049410329402194, "grad_norm": 0.2509147524833679, "learning_rate": 2.79265542407321e-06, "loss": 0.3222, "step": 33491 }, { "epoch": 3.4050427002846684, "grad_norm": 0.27292877435684204, "learning_rate": 2.7923369987673353e-06, "loss": 0.329, "step": 33492 }, { "epoch": 3.4051443676291173, "grad_norm": 0.2866843044757843, "learning_rate": 2.7920185845831443e-06, "loss": 0.3142, "step": 33493 }, { "epoch": 3.4052460349735663, "grad_norm": 0.2658904194831848, "learning_rate": 2.791700181522239e-06, "loss": 0.2771, "step": 33494 }, { "epoch": 3.4053477023180156, "grad_norm": 0.2935781478881836, "learning_rate": 2.791381789586221e-06, "loss": 0.3439, "step": 33495 }, { "epoch": 3.4054493696624646, "grad_norm": 0.2873016893863678, "learning_rate": 2.7910634087766992e-06, "loss": 0.3183, "step": 33496 }, { "epoch": 3.4055510370069135, "grad_norm": 0.27941393852233887, "learning_rate": 2.790745039095275e-06, "loss": 0.37, "step": 33497 }, { "epoch": 3.4056527043513625, "grad_norm": 0.26813405752182007, "learning_rate": 2.7904266805435497e-06, "loss": 0.2943, "step": 33498 }, { "epoch": 3.4057543716958114, "grad_norm": 0.2802925705909729, "learning_rate": 2.7901083331231326e-06, "loss": 0.3343, "step": 33499 }, { "epoch": 3.4058560390402604, "grad_norm": 0.2792593538761139, "learning_rate": 2.7897899968356235e-06, "loss": 0.3259, "step": 33500 }, { "epoch": 3.4059577063847093, "grad_norm": 0.29165738821029663, "learning_rate": 2.789471671682628e-06, "loss": 0.2805, "step": 33501 }, { "epoch": 3.4060593737291582, "grad_norm": 0.25762906670570374, "learning_rate": 2.789153357665747e-06, "loss": 0.2777, "step": 33502 }, { "epoch": 3.406161041073607, "grad_norm": 0.25928255915641785, "learning_rate": 2.788835054786587e-06, "loss": 0.3011, "step": 33503 }, { "epoch": 3.406262708418056, "grad_norm": 0.26959770917892456, "learning_rate": 2.788516763046751e-06, "loss": 0.2997, "step": 33504 }, { "epoch": 3.406364375762505, "grad_norm": 0.2648847997188568, "learning_rate": 2.7881984824478408e-06, "loss": 0.2984, "step": 33505 }, { "epoch": 3.406466043106954, "grad_norm": 0.28163203597068787, "learning_rate": 2.787880212991463e-06, "loss": 0.3288, "step": 33506 }, { "epoch": 3.406567710451403, "grad_norm": 0.29059669375419617, "learning_rate": 2.7875619546792176e-06, "loss": 0.3173, "step": 33507 }, { "epoch": 3.406669377795852, "grad_norm": 0.279582142829895, "learning_rate": 2.7872437075127086e-06, "loss": 0.3103, "step": 33508 }, { "epoch": 3.406771045140301, "grad_norm": 0.26925304532051086, "learning_rate": 2.7869254714935412e-06, "loss": 0.2808, "step": 33509 }, { "epoch": 3.4068727124847498, "grad_norm": 0.27013421058654785, "learning_rate": 2.786607246623318e-06, "loss": 0.2932, "step": 33510 }, { "epoch": 3.4069743798291987, "grad_norm": 0.27647700905799866, "learning_rate": 2.7862890329036414e-06, "loss": 0.3107, "step": 33511 }, { "epoch": 3.4070760471736476, "grad_norm": 0.2575453519821167, "learning_rate": 2.7859708303361124e-06, "loss": 0.319, "step": 33512 }, { "epoch": 3.407177714518097, "grad_norm": 0.2903907597064972, "learning_rate": 2.785652638922338e-06, "loss": 0.3109, "step": 33513 }, { "epoch": 3.407279381862546, "grad_norm": 0.28162682056427, "learning_rate": 2.7853344586639198e-06, "loss": 0.333, "step": 33514 }, { "epoch": 3.407381049206995, "grad_norm": 0.2730045020580292, "learning_rate": 2.7850162895624584e-06, "loss": 0.3032, "step": 33515 }, { "epoch": 3.407482716551444, "grad_norm": 0.28068283200263977, "learning_rate": 2.78469813161956e-06, "loss": 0.3278, "step": 33516 }, { "epoch": 3.407584383895893, "grad_norm": 0.291799396276474, "learning_rate": 2.7843799848368264e-06, "loss": 0.3082, "step": 33517 }, { "epoch": 3.4076860512403417, "grad_norm": 0.27954307198524475, "learning_rate": 2.7840618492158576e-06, "loss": 0.2922, "step": 33518 }, { "epoch": 3.4077877185847907, "grad_norm": 0.2657058537006378, "learning_rate": 2.783743724758261e-06, "loss": 0.2822, "step": 33519 }, { "epoch": 3.4078893859292396, "grad_norm": 0.27202191948890686, "learning_rate": 2.7834256114656366e-06, "loss": 0.2828, "step": 33520 }, { "epoch": 3.4079910532736886, "grad_norm": 0.28934451937675476, "learning_rate": 2.7831075093395875e-06, "loss": 0.294, "step": 33521 }, { "epoch": 3.4080927206181375, "grad_norm": 0.2857823371887207, "learning_rate": 2.782789418381714e-06, "loss": 0.2884, "step": 33522 }, { "epoch": 3.4081943879625864, "grad_norm": 0.2580490708351135, "learning_rate": 2.782471338593624e-06, "loss": 0.301, "step": 33523 }, { "epoch": 3.4082960553070354, "grad_norm": 0.27452242374420166, "learning_rate": 2.7821532699769128e-06, "loss": 0.3203, "step": 33524 }, { "epoch": 3.4083977226514843, "grad_norm": 0.2962401211261749, "learning_rate": 2.7818352125331865e-06, "loss": 0.3214, "step": 33525 }, { "epoch": 3.4084993899959333, "grad_norm": 0.2729206681251526, "learning_rate": 2.7815171662640513e-06, "loss": 0.2993, "step": 33526 }, { "epoch": 3.408601057340382, "grad_norm": 0.27715519070625305, "learning_rate": 2.7811991311711007e-06, "loss": 0.3419, "step": 33527 }, { "epoch": 3.408702724684831, "grad_norm": 0.3057173788547516, "learning_rate": 2.7808811072559442e-06, "loss": 0.2963, "step": 33528 }, { "epoch": 3.40880439202928, "grad_norm": 0.2722799777984619, "learning_rate": 2.7805630945201793e-06, "loss": 0.2918, "step": 33529 }, { "epoch": 3.408906059373729, "grad_norm": 0.2735849618911743, "learning_rate": 2.7802450929654123e-06, "loss": 0.3095, "step": 33530 }, { "epoch": 3.409007726718178, "grad_norm": 0.2766834497451782, "learning_rate": 2.779927102593243e-06, "loss": 0.2955, "step": 33531 }, { "epoch": 3.409109394062627, "grad_norm": 0.2679388225078583, "learning_rate": 2.779609123405271e-06, "loss": 0.334, "step": 33532 }, { "epoch": 3.409211061407076, "grad_norm": 0.2942943274974823, "learning_rate": 2.779291155403105e-06, "loss": 0.3028, "step": 33533 }, { "epoch": 3.409312728751525, "grad_norm": 0.25769299268722534, "learning_rate": 2.7789731985883385e-06, "loss": 0.2959, "step": 33534 }, { "epoch": 3.4094143960959737, "grad_norm": 0.27760937809944153, "learning_rate": 2.7786552529625764e-06, "loss": 0.3033, "step": 33535 }, { "epoch": 3.409516063440423, "grad_norm": 0.2709229588508606, "learning_rate": 2.778337318527426e-06, "loss": 0.3115, "step": 33536 }, { "epoch": 3.409617730784872, "grad_norm": 0.2599602937698364, "learning_rate": 2.7780193952844803e-06, "loss": 0.2821, "step": 33537 }, { "epoch": 3.409719398129321, "grad_norm": 0.2642870247364044, "learning_rate": 2.7777014832353465e-06, "loss": 0.314, "step": 33538 }, { "epoch": 3.40982106547377, "grad_norm": 0.29762986302375793, "learning_rate": 2.7773835823816225e-06, "loss": 0.3116, "step": 33539 }, { "epoch": 3.409922732818219, "grad_norm": 0.271282821893692, "learning_rate": 2.7770656927249138e-06, "loss": 0.3362, "step": 33540 }, { "epoch": 3.410024400162668, "grad_norm": 0.2913125455379486, "learning_rate": 2.77674781426682e-06, "loss": 0.2971, "step": 33541 }, { "epoch": 3.4101260675071168, "grad_norm": 0.2615048885345459, "learning_rate": 2.7764299470089405e-06, "loss": 0.3363, "step": 33542 }, { "epoch": 3.4102277348515657, "grad_norm": 0.29799023270606995, "learning_rate": 2.776112090952883e-06, "loss": 0.2998, "step": 33543 }, { "epoch": 3.4103294021960147, "grad_norm": 0.2651185691356659, "learning_rate": 2.7757942461002394e-06, "loss": 0.3081, "step": 33544 }, { "epoch": 3.4104310695404636, "grad_norm": 0.2838604152202606, "learning_rate": 2.7754764124526162e-06, "loss": 0.2987, "step": 33545 }, { "epoch": 3.4105327368849125, "grad_norm": 0.25920310616493225, "learning_rate": 2.7751585900116184e-06, "loss": 0.2963, "step": 33546 }, { "epoch": 3.4106344042293615, "grad_norm": 0.2937696576118469, "learning_rate": 2.7748407787788387e-06, "loss": 0.3058, "step": 33547 }, { "epoch": 3.4107360715738104, "grad_norm": 0.27233049273490906, "learning_rate": 2.7745229787558846e-06, "loss": 0.3215, "step": 33548 }, { "epoch": 3.4108377389182594, "grad_norm": 0.271636426448822, "learning_rate": 2.7742051899443546e-06, "loss": 0.2995, "step": 33549 }, { "epoch": 3.4109394062627083, "grad_norm": 0.26162394881248474, "learning_rate": 2.773887412345848e-06, "loss": 0.313, "step": 33550 }, { "epoch": 3.4110410736071572, "grad_norm": 0.340243399143219, "learning_rate": 2.7735696459619697e-06, "loss": 0.3352, "step": 33551 }, { "epoch": 3.411142740951606, "grad_norm": 0.2679396867752075, "learning_rate": 2.773251890794316e-06, "loss": 0.2855, "step": 33552 }, { "epoch": 3.411244408296055, "grad_norm": 0.2644546627998352, "learning_rate": 2.772934146844495e-06, "loss": 0.3184, "step": 33553 }, { "epoch": 3.4113460756405045, "grad_norm": 0.290277361869812, "learning_rate": 2.772616414114098e-06, "loss": 0.3336, "step": 33554 }, { "epoch": 3.4114477429849535, "grad_norm": 0.2985551953315735, "learning_rate": 2.7722986926047303e-06, "loss": 0.3247, "step": 33555 }, { "epoch": 3.4115494103294024, "grad_norm": 0.309598445892334, "learning_rate": 2.7719809823179962e-06, "loss": 0.3139, "step": 33556 }, { "epoch": 3.4116510776738513, "grad_norm": 0.25288283824920654, "learning_rate": 2.7716632832554884e-06, "loss": 0.3109, "step": 33557 }, { "epoch": 3.4117527450183003, "grad_norm": 0.2675369679927826, "learning_rate": 2.771345595418814e-06, "loss": 0.2842, "step": 33558 }, { "epoch": 3.4118544123627492, "grad_norm": 0.29511475563049316, "learning_rate": 2.77102791880957e-06, "loss": 0.3034, "step": 33559 }, { "epoch": 3.411956079707198, "grad_norm": 0.28979766368865967, "learning_rate": 2.770710253429356e-06, "loss": 0.3054, "step": 33560 }, { "epoch": 3.412057747051647, "grad_norm": 0.26915645599365234, "learning_rate": 2.7703925992797764e-06, "loss": 0.3385, "step": 33561 }, { "epoch": 3.412159414396096, "grad_norm": 0.26629096269607544, "learning_rate": 2.770074956362429e-06, "loss": 0.336, "step": 33562 }, { "epoch": 3.412261081740545, "grad_norm": 0.2571079134941101, "learning_rate": 2.7697573246789138e-06, "loss": 0.2979, "step": 33563 }, { "epoch": 3.412362749084994, "grad_norm": 0.2726462781429291, "learning_rate": 2.7694397042308298e-06, "loss": 0.3129, "step": 33564 }, { "epoch": 3.412464416429443, "grad_norm": 0.25808513164520264, "learning_rate": 2.769122095019778e-06, "loss": 0.3049, "step": 33565 }, { "epoch": 3.412566083773892, "grad_norm": 0.2764010727405548, "learning_rate": 2.7688044970473625e-06, "loss": 0.2963, "step": 33566 }, { "epoch": 3.4126677511183408, "grad_norm": 0.2824874222278595, "learning_rate": 2.7684869103151763e-06, "loss": 0.2954, "step": 33567 }, { "epoch": 3.4127694184627897, "grad_norm": 0.28504520654678345, "learning_rate": 2.7681693348248245e-06, "loss": 0.3678, "step": 33568 }, { "epoch": 3.4128710858072386, "grad_norm": 0.27437183260917664, "learning_rate": 2.767851770577905e-06, "loss": 0.2888, "step": 33569 }, { "epoch": 3.4129727531516876, "grad_norm": 0.2743001878261566, "learning_rate": 2.767534217576017e-06, "loss": 0.3513, "step": 33570 }, { "epoch": 3.4130744204961365, "grad_norm": 0.2597291171550751, "learning_rate": 2.7672166758207618e-06, "loss": 0.3501, "step": 33571 }, { "epoch": 3.4131760878405855, "grad_norm": 0.26866084337234497, "learning_rate": 2.7668991453137388e-06, "loss": 0.3322, "step": 33572 }, { "epoch": 3.4132777551850344, "grad_norm": 0.27385738492012024, "learning_rate": 2.7665816260565475e-06, "loss": 0.3016, "step": 33573 }, { "epoch": 3.4133794225294833, "grad_norm": 0.2457781732082367, "learning_rate": 2.7662641180507853e-06, "loss": 0.3218, "step": 33574 }, { "epoch": 3.4134810898739323, "grad_norm": 0.27579328417778015, "learning_rate": 2.7659466212980536e-06, "loss": 0.2855, "step": 33575 }, { "epoch": 3.4135827572183812, "grad_norm": 0.32408902049064636, "learning_rate": 2.7656291357999554e-06, "loss": 0.3375, "step": 33576 }, { "epoch": 3.4136844245628306, "grad_norm": 0.29385754466056824, "learning_rate": 2.765311661558083e-06, "loss": 0.2865, "step": 33577 }, { "epoch": 3.4137860919072796, "grad_norm": 0.2501985728740692, "learning_rate": 2.7649941985740416e-06, "loss": 0.3089, "step": 33578 }, { "epoch": 3.4138877592517285, "grad_norm": 0.27043163776397705, "learning_rate": 2.764676746849428e-06, "loss": 0.331, "step": 33579 }, { "epoch": 3.4139894265961774, "grad_norm": 0.2716819643974304, "learning_rate": 2.7643593063858397e-06, "loss": 0.3185, "step": 33580 }, { "epoch": 3.4140910939406264, "grad_norm": 0.2663981020450592, "learning_rate": 2.7640418771848797e-06, "loss": 0.3053, "step": 33581 }, { "epoch": 3.4141927612850753, "grad_norm": 0.2843073904514313, "learning_rate": 2.763724459248146e-06, "loss": 0.2993, "step": 33582 }, { "epoch": 3.4142944286295243, "grad_norm": 0.2932340204715729, "learning_rate": 2.763407052577237e-06, "loss": 0.3272, "step": 33583 }, { "epoch": 3.414396095973973, "grad_norm": 0.27001628279685974, "learning_rate": 2.7630896571737497e-06, "loss": 0.3018, "step": 33584 }, { "epoch": 3.414497763318422, "grad_norm": 0.2909674644470215, "learning_rate": 2.762772273039287e-06, "loss": 0.2807, "step": 33585 }, { "epoch": 3.414599430662871, "grad_norm": 0.26545366644859314, "learning_rate": 2.7624549001754456e-06, "loss": 0.3292, "step": 33586 }, { "epoch": 3.41470109800732, "grad_norm": 0.2798350751399994, "learning_rate": 2.762137538583823e-06, "loss": 0.3658, "step": 33587 }, { "epoch": 3.414802765351769, "grad_norm": 0.25974053144454956, "learning_rate": 2.761820188266022e-06, "loss": 0.3045, "step": 33588 }, { "epoch": 3.414904432696218, "grad_norm": 0.29740849137306213, "learning_rate": 2.761502849223639e-06, "loss": 0.3169, "step": 33589 }, { "epoch": 3.415006100040667, "grad_norm": 0.2664508521556854, "learning_rate": 2.7611855214582705e-06, "loss": 0.3335, "step": 33590 }, { "epoch": 3.415107767385116, "grad_norm": 0.2783259153366089, "learning_rate": 2.7608682049715192e-06, "loss": 0.2916, "step": 33591 }, { "epoch": 3.4152094347295647, "grad_norm": 0.27621495723724365, "learning_rate": 2.7605508997649823e-06, "loss": 0.3115, "step": 33592 }, { "epoch": 3.4153111020740137, "grad_norm": 0.24832959473133087, "learning_rate": 2.7602336058402568e-06, "loss": 0.3291, "step": 33593 }, { "epoch": 3.4154127694184626, "grad_norm": 0.28967049717903137, "learning_rate": 2.759916323198941e-06, "loss": 0.277, "step": 33594 }, { "epoch": 3.415514436762912, "grad_norm": 0.2819764018058777, "learning_rate": 2.7595990518426363e-06, "loss": 0.3123, "step": 33595 }, { "epoch": 3.415616104107361, "grad_norm": 0.2663978636264801, "learning_rate": 2.759281791772939e-06, "loss": 0.29, "step": 33596 }, { "epoch": 3.41571777145181, "grad_norm": 0.2826300263404846, "learning_rate": 2.7589645429914454e-06, "loss": 0.2927, "step": 33597 }, { "epoch": 3.415819438796259, "grad_norm": 0.2684626877307892, "learning_rate": 2.7586473054997576e-06, "loss": 0.3318, "step": 33598 }, { "epoch": 3.4159211061407078, "grad_norm": 0.27778497338294983, "learning_rate": 2.7583300792994726e-06, "loss": 0.3417, "step": 33599 }, { "epoch": 3.4160227734851567, "grad_norm": 0.2699351906776428, "learning_rate": 2.758012864392186e-06, "loss": 0.3038, "step": 33600 }, { "epoch": 3.4161244408296056, "grad_norm": 0.2884250581264496, "learning_rate": 2.7576956607794997e-06, "loss": 0.289, "step": 33601 }, { "epoch": 3.4162261081740546, "grad_norm": 0.2594830393791199, "learning_rate": 2.75737846846301e-06, "loss": 0.3385, "step": 33602 }, { "epoch": 3.4163277755185035, "grad_norm": 0.2774820327758789, "learning_rate": 2.757061287444314e-06, "loss": 0.286, "step": 33603 }, { "epoch": 3.4164294428629525, "grad_norm": 0.24426861107349396, "learning_rate": 2.7567441177250086e-06, "loss": 0.2983, "step": 33604 }, { "epoch": 3.4165311102074014, "grad_norm": 0.2562227249145508, "learning_rate": 2.7564269593066954e-06, "loss": 0.3346, "step": 33605 }, { "epoch": 3.4166327775518504, "grad_norm": 0.29951345920562744, "learning_rate": 2.75610981219097e-06, "loss": 0.3202, "step": 33606 }, { "epoch": 3.4167344448962993, "grad_norm": 0.26567086577415466, "learning_rate": 2.7557926763794285e-06, "loss": 0.3262, "step": 33607 }, { "epoch": 3.4168361122407482, "grad_norm": 0.2834935188293457, "learning_rate": 2.755475551873672e-06, "loss": 0.2831, "step": 33608 }, { "epoch": 3.416937779585197, "grad_norm": 0.2734687626361847, "learning_rate": 2.7551584386752954e-06, "loss": 0.2917, "step": 33609 }, { "epoch": 3.417039446929646, "grad_norm": 0.27298885583877563, "learning_rate": 2.754841336785896e-06, "loss": 0.2953, "step": 33610 }, { "epoch": 3.417141114274095, "grad_norm": 0.27608522772789, "learning_rate": 2.7545242462070743e-06, "loss": 0.299, "step": 33611 }, { "epoch": 3.417242781618544, "grad_norm": 0.29369133710861206, "learning_rate": 2.7542071669404257e-06, "loss": 0.3347, "step": 33612 }, { "epoch": 3.417344448962993, "grad_norm": 0.25981080532073975, "learning_rate": 2.753890098987548e-06, "loss": 0.3248, "step": 33613 }, { "epoch": 3.417446116307442, "grad_norm": 0.29207077622413635, "learning_rate": 2.7535730423500355e-06, "loss": 0.3055, "step": 33614 }, { "epoch": 3.417547783651891, "grad_norm": 0.2974614202976227, "learning_rate": 2.753255997029491e-06, "loss": 0.3368, "step": 33615 }, { "epoch": 3.4176494509963398, "grad_norm": 0.29025793075561523, "learning_rate": 2.752938963027508e-06, "loss": 0.3309, "step": 33616 }, { "epoch": 3.4177511183407887, "grad_norm": 0.27964305877685547, "learning_rate": 2.752621940345683e-06, "loss": 0.324, "step": 33617 }, { "epoch": 3.417852785685238, "grad_norm": 0.27226996421813965, "learning_rate": 2.7523049289856173e-06, "loss": 0.2957, "step": 33618 }, { "epoch": 3.417954453029687, "grad_norm": 0.24711476266384125, "learning_rate": 2.7519879289489042e-06, "loss": 0.3033, "step": 33619 }, { "epoch": 3.418056120374136, "grad_norm": 0.2800953984260559, "learning_rate": 2.75167094023714e-06, "loss": 0.286, "step": 33620 }, { "epoch": 3.418157787718585, "grad_norm": 0.25367453694343567, "learning_rate": 2.751353962851926e-06, "loss": 0.3011, "step": 33621 }, { "epoch": 3.418259455063034, "grad_norm": 0.28022414445877075, "learning_rate": 2.7510369967948557e-06, "loss": 0.3168, "step": 33622 }, { "epoch": 3.418361122407483, "grad_norm": 0.2839255630970001, "learning_rate": 2.750720042067526e-06, "loss": 0.2877, "step": 33623 }, { "epoch": 3.4184627897519317, "grad_norm": 0.2774356007575989, "learning_rate": 2.7504030986715336e-06, "loss": 0.3147, "step": 33624 }, { "epoch": 3.4185644570963807, "grad_norm": 0.2694622874259949, "learning_rate": 2.7500861666084764e-06, "loss": 0.318, "step": 33625 }, { "epoch": 3.4186661244408296, "grad_norm": 0.2602118253707886, "learning_rate": 2.7497692458799517e-06, "loss": 0.3003, "step": 33626 }, { "epoch": 3.4187677917852786, "grad_norm": 0.2710667848587036, "learning_rate": 2.7494523364875517e-06, "loss": 0.3117, "step": 33627 }, { "epoch": 3.4188694591297275, "grad_norm": 0.2955714166164398, "learning_rate": 2.749135438432879e-06, "loss": 0.3206, "step": 33628 }, { "epoch": 3.4189711264741764, "grad_norm": 0.2705429792404175, "learning_rate": 2.7488185517175266e-06, "loss": 0.2893, "step": 33629 }, { "epoch": 3.4190727938186254, "grad_norm": 0.2811605930328369, "learning_rate": 2.7485016763430896e-06, "loss": 0.3558, "step": 33630 }, { "epoch": 3.4191744611630743, "grad_norm": 0.2608281970024109, "learning_rate": 2.7481848123111675e-06, "loss": 0.2966, "step": 33631 }, { "epoch": 3.4192761285075233, "grad_norm": 0.25968828797340393, "learning_rate": 2.747867959623356e-06, "loss": 0.3247, "step": 33632 }, { "epoch": 3.419377795851972, "grad_norm": 0.2831539809703827, "learning_rate": 2.7475511182812498e-06, "loss": 0.2824, "step": 33633 }, { "epoch": 3.419479463196421, "grad_norm": 0.3028753995895386, "learning_rate": 2.7472342882864445e-06, "loss": 0.3229, "step": 33634 }, { "epoch": 3.41958113054087, "grad_norm": 0.26708200573921204, "learning_rate": 2.7469174696405387e-06, "loss": 0.309, "step": 33635 }, { "epoch": 3.4196827978853195, "grad_norm": 0.28651705384254456, "learning_rate": 2.746600662345128e-06, "loss": 0.3088, "step": 33636 }, { "epoch": 3.4197844652297684, "grad_norm": 0.29120737314224243, "learning_rate": 2.7462838664018053e-06, "loss": 0.3215, "step": 33637 }, { "epoch": 3.4198861325742174, "grad_norm": 0.2603270411491394, "learning_rate": 2.7459670818121707e-06, "loss": 0.3106, "step": 33638 }, { "epoch": 3.4199877999186663, "grad_norm": 0.256416380405426, "learning_rate": 2.7456503085778187e-06, "loss": 0.3428, "step": 33639 }, { "epoch": 3.4200894672631152, "grad_norm": 0.26477059721946716, "learning_rate": 2.7453335467003426e-06, "loss": 0.3048, "step": 33640 }, { "epoch": 3.420191134607564, "grad_norm": 0.2878594994544983, "learning_rate": 2.7450167961813425e-06, "loss": 0.2997, "step": 33641 }, { "epoch": 3.420292801952013, "grad_norm": 0.3001195192337036, "learning_rate": 2.7447000570224115e-06, "loss": 0.3345, "step": 33642 }, { "epoch": 3.420394469296462, "grad_norm": 0.2823970913887024, "learning_rate": 2.7443833292251464e-06, "loss": 0.332, "step": 33643 }, { "epoch": 3.420496136640911, "grad_norm": 0.27834394574165344, "learning_rate": 2.74406661279114e-06, "loss": 0.3041, "step": 33644 }, { "epoch": 3.42059780398536, "grad_norm": 0.2743341326713562, "learning_rate": 2.743749907721991e-06, "loss": 0.3282, "step": 33645 }, { "epoch": 3.420699471329809, "grad_norm": 0.2681618928909302, "learning_rate": 2.7434332140192944e-06, "loss": 0.3229, "step": 33646 }, { "epoch": 3.420801138674258, "grad_norm": 0.2874204218387604, "learning_rate": 2.7431165316846427e-06, "loss": 0.3137, "step": 33647 }, { "epoch": 3.4209028060187068, "grad_norm": 0.3007286787033081, "learning_rate": 2.742799860719636e-06, "loss": 0.3077, "step": 33648 }, { "epoch": 3.4210044733631557, "grad_norm": 0.26524412631988525, "learning_rate": 2.742483201125867e-06, "loss": 0.2954, "step": 33649 }, { "epoch": 3.4211061407076047, "grad_norm": 0.2616090774536133, "learning_rate": 2.742166552904929e-06, "loss": 0.2979, "step": 33650 }, { "epoch": 3.4212078080520536, "grad_norm": 0.27914807200431824, "learning_rate": 2.7418499160584215e-06, "loss": 0.276, "step": 33651 }, { "epoch": 3.4213094753965025, "grad_norm": 0.28022029995918274, "learning_rate": 2.7415332905879376e-06, "loss": 0.3051, "step": 33652 }, { "epoch": 3.4214111427409515, "grad_norm": 0.2739681303501129, "learning_rate": 2.741216676495072e-06, "loss": 0.2936, "step": 33653 }, { "epoch": 3.4215128100854004, "grad_norm": 0.3072449266910553, "learning_rate": 2.7409000737814184e-06, "loss": 0.3283, "step": 33654 }, { "epoch": 3.4216144774298494, "grad_norm": 0.2701141834259033, "learning_rate": 2.7405834824485756e-06, "loss": 0.3154, "step": 33655 }, { "epoch": 3.4217161447742983, "grad_norm": 0.26254457235336304, "learning_rate": 2.7402669024981353e-06, "loss": 0.2912, "step": 33656 }, { "epoch": 3.4218178121187472, "grad_norm": 0.2719630300998688, "learning_rate": 2.7399503339316924e-06, "loss": 0.3006, "step": 33657 }, { "epoch": 3.421919479463196, "grad_norm": 0.2863277494907379, "learning_rate": 2.739633776750844e-06, "loss": 0.2756, "step": 33658 }, { "epoch": 3.4220211468076456, "grad_norm": 0.29243841767311096, "learning_rate": 2.7393172309571843e-06, "loss": 0.3381, "step": 33659 }, { "epoch": 3.4221228141520945, "grad_norm": 0.28364694118499756, "learning_rate": 2.739000696552304e-06, "loss": 0.294, "step": 33660 }, { "epoch": 3.4222244814965435, "grad_norm": 0.27033254504203796, "learning_rate": 2.738684173537804e-06, "loss": 0.3172, "step": 33661 }, { "epoch": 3.4223261488409924, "grad_norm": 0.26391735672950745, "learning_rate": 2.7383676619152757e-06, "loss": 0.32, "step": 33662 }, { "epoch": 3.4224278161854413, "grad_norm": 0.26540932059288025, "learning_rate": 2.7380511616863125e-06, "loss": 0.3048, "step": 33663 }, { "epoch": 3.4225294835298903, "grad_norm": 0.2736181318759918, "learning_rate": 2.737734672852509e-06, "loss": 0.3407, "step": 33664 }, { "epoch": 3.4226311508743392, "grad_norm": 0.25965744256973267, "learning_rate": 2.7374181954154623e-06, "loss": 0.2913, "step": 33665 }, { "epoch": 3.422732818218788, "grad_norm": 0.25527387857437134, "learning_rate": 2.7371017293767656e-06, "loss": 0.334, "step": 33666 }, { "epoch": 3.422834485563237, "grad_norm": 0.2807992100715637, "learning_rate": 2.7367852747380096e-06, "loss": 0.3363, "step": 33667 }, { "epoch": 3.422936152907686, "grad_norm": 0.27596715092658997, "learning_rate": 2.7364688315007943e-06, "loss": 0.2912, "step": 33668 }, { "epoch": 3.423037820252135, "grad_norm": 0.29191145300865173, "learning_rate": 2.7361523996667106e-06, "loss": 0.3319, "step": 33669 }, { "epoch": 3.423139487596584, "grad_norm": 0.28362247347831726, "learning_rate": 2.7358359792373516e-06, "loss": 0.3117, "step": 33670 }, { "epoch": 3.423241154941033, "grad_norm": 0.2768176794052124, "learning_rate": 2.735519570214315e-06, "loss": 0.3361, "step": 33671 }, { "epoch": 3.423342822285482, "grad_norm": 0.26612067222595215, "learning_rate": 2.7352031725991925e-06, "loss": 0.333, "step": 33672 }, { "epoch": 3.4234444896299308, "grad_norm": 0.2633504271507263, "learning_rate": 2.734886786393578e-06, "loss": 0.2983, "step": 33673 }, { "epoch": 3.4235461569743797, "grad_norm": 0.25888702273368835, "learning_rate": 2.734570411599064e-06, "loss": 0.3313, "step": 33674 }, { "epoch": 3.4236478243188286, "grad_norm": 0.307329386472702, "learning_rate": 2.7342540482172474e-06, "loss": 0.2983, "step": 33675 }, { "epoch": 3.4237494916632776, "grad_norm": 0.2834303677082062, "learning_rate": 2.733937696249721e-06, "loss": 0.3187, "step": 33676 }, { "epoch": 3.423851159007727, "grad_norm": 0.26546528935432434, "learning_rate": 2.7336213556980763e-06, "loss": 0.3176, "step": 33677 }, { "epoch": 3.423952826352176, "grad_norm": 0.27474653720855713, "learning_rate": 2.7333050265639117e-06, "loss": 0.3277, "step": 33678 }, { "epoch": 3.424054493696625, "grad_norm": 0.2687835097312927, "learning_rate": 2.732988708848814e-06, "loss": 0.315, "step": 33679 }, { "epoch": 3.424156161041074, "grad_norm": 0.30733537673950195, "learning_rate": 2.7326724025543807e-06, "loss": 0.3027, "step": 33680 }, { "epoch": 3.4242578283855227, "grad_norm": 0.26218879222869873, "learning_rate": 2.732356107682207e-06, "loss": 0.326, "step": 33681 }, { "epoch": 3.4243594957299717, "grad_norm": 0.2956392168998718, "learning_rate": 2.7320398242338843e-06, "loss": 0.3161, "step": 33682 }, { "epoch": 3.4244611630744206, "grad_norm": 0.2658097445964813, "learning_rate": 2.7317235522110064e-06, "loss": 0.3115, "step": 33683 }, { "epoch": 3.4245628304188696, "grad_norm": 0.26468250155448914, "learning_rate": 2.7314072916151633e-06, "loss": 0.2897, "step": 33684 }, { "epoch": 3.4246644977633185, "grad_norm": 0.29433634877204895, "learning_rate": 2.731091042447954e-06, "loss": 0.3373, "step": 33685 }, { "epoch": 3.4247661651077674, "grad_norm": 0.2999568581581116, "learning_rate": 2.7307748047109684e-06, "loss": 0.3111, "step": 33686 }, { "epoch": 3.4248678324522164, "grad_norm": 0.2819884419441223, "learning_rate": 2.730458578405798e-06, "loss": 0.31, "step": 33687 }, { "epoch": 3.4249694997966653, "grad_norm": 0.27432024478912354, "learning_rate": 2.7301423635340424e-06, "loss": 0.3341, "step": 33688 }, { "epoch": 3.4250711671411143, "grad_norm": 0.2914027273654938, "learning_rate": 2.7298261600972857e-06, "loss": 0.3293, "step": 33689 }, { "epoch": 3.425172834485563, "grad_norm": 0.25655579566955566, "learning_rate": 2.729509968097126e-06, "loss": 0.3115, "step": 33690 }, { "epoch": 3.425274501830012, "grad_norm": 0.27763834595680237, "learning_rate": 2.729193787535157e-06, "loss": 0.2896, "step": 33691 }, { "epoch": 3.425376169174461, "grad_norm": 0.29217541217803955, "learning_rate": 2.7288776184129702e-06, "loss": 0.3704, "step": 33692 }, { "epoch": 3.42547783651891, "grad_norm": 0.3010931611061096, "learning_rate": 2.7285614607321583e-06, "loss": 0.3154, "step": 33693 }, { "epoch": 3.425579503863359, "grad_norm": 0.2868208885192871, "learning_rate": 2.728245314494312e-06, "loss": 0.3398, "step": 33694 }, { "epoch": 3.425681171207808, "grad_norm": 0.26535579562187195, "learning_rate": 2.7279291797010276e-06, "loss": 0.2902, "step": 33695 }, { "epoch": 3.425782838552257, "grad_norm": 0.2740511894226074, "learning_rate": 2.727613056353896e-06, "loss": 0.3171, "step": 33696 }, { "epoch": 3.425884505896706, "grad_norm": 0.2640402019023895, "learning_rate": 2.727296944454508e-06, "loss": 0.2919, "step": 33697 }, { "epoch": 3.4259861732411547, "grad_norm": 0.2756389081478119, "learning_rate": 2.7269808440044625e-06, "loss": 0.3028, "step": 33698 }, { "epoch": 3.4260878405856037, "grad_norm": 0.2507711350917816, "learning_rate": 2.7266647550053433e-06, "loss": 0.3292, "step": 33699 }, { "epoch": 3.426189507930053, "grad_norm": 0.26648464798927307, "learning_rate": 2.7263486774587466e-06, "loss": 0.3106, "step": 33700 }, { "epoch": 3.426291175274502, "grad_norm": 0.2644055187702179, "learning_rate": 2.726032611366268e-06, "loss": 0.2734, "step": 33701 }, { "epoch": 3.426392842618951, "grad_norm": 0.2717466652393341, "learning_rate": 2.725716556729494e-06, "loss": 0.2977, "step": 33702 }, { "epoch": 3.4264945099634, "grad_norm": 0.2771848440170288, "learning_rate": 2.725400513550021e-06, "loss": 0.3131, "step": 33703 }, { "epoch": 3.426596177307849, "grad_norm": 0.26788297295570374, "learning_rate": 2.7250844818294383e-06, "loss": 0.3101, "step": 33704 }, { "epoch": 3.4266978446522978, "grad_norm": 0.2940327823162079, "learning_rate": 2.7247684615693404e-06, "loss": 0.3183, "step": 33705 }, { "epoch": 3.4267995119967467, "grad_norm": 0.254553884267807, "learning_rate": 2.724452452771319e-06, "loss": 0.3057, "step": 33706 }, { "epoch": 3.4269011793411956, "grad_norm": 0.2712075412273407, "learning_rate": 2.7241364554369632e-06, "loss": 0.3087, "step": 33707 }, { "epoch": 3.4270028466856446, "grad_norm": 0.2741473615169525, "learning_rate": 2.723820469567871e-06, "loss": 0.3407, "step": 33708 }, { "epoch": 3.4271045140300935, "grad_norm": 0.2959310710430145, "learning_rate": 2.7235044951656264e-06, "loss": 0.328, "step": 33709 }, { "epoch": 3.4272061813745425, "grad_norm": 0.26815569400787354, "learning_rate": 2.7231885322318273e-06, "loss": 0.2917, "step": 33710 }, { "epoch": 3.4273078487189914, "grad_norm": 0.25316309928894043, "learning_rate": 2.7228725807680635e-06, "loss": 0.3282, "step": 33711 }, { "epoch": 3.4274095160634404, "grad_norm": 0.26277926564216614, "learning_rate": 2.722556640775924e-06, "loss": 0.3116, "step": 33712 }, { "epoch": 3.4275111834078893, "grad_norm": 0.28948286175727844, "learning_rate": 2.722240712257005e-06, "loss": 0.2747, "step": 33713 }, { "epoch": 3.4276128507523382, "grad_norm": 0.2731017768383026, "learning_rate": 2.7219247952128962e-06, "loss": 0.3269, "step": 33714 }, { "epoch": 3.427714518096787, "grad_norm": 0.2864930331707001, "learning_rate": 2.7216088896451887e-06, "loss": 0.2949, "step": 33715 }, { "epoch": 3.427816185441236, "grad_norm": 0.27366840839385986, "learning_rate": 2.721292995555472e-06, "loss": 0.3012, "step": 33716 }, { "epoch": 3.427917852785685, "grad_norm": 0.2958942949771881, "learning_rate": 2.720977112945341e-06, "loss": 0.326, "step": 33717 }, { "epoch": 3.4280195201301344, "grad_norm": 0.28122255206108093, "learning_rate": 2.7206612418163885e-06, "loss": 0.2921, "step": 33718 }, { "epoch": 3.4281211874745834, "grad_norm": 0.29430094361305237, "learning_rate": 2.7203453821701995e-06, "loss": 0.2875, "step": 33719 }, { "epoch": 3.4282228548190323, "grad_norm": 0.2821894884109497, "learning_rate": 2.7200295340083704e-06, "loss": 0.3348, "step": 33720 }, { "epoch": 3.4283245221634813, "grad_norm": 0.2994508445262909, "learning_rate": 2.719713697332491e-06, "loss": 0.3211, "step": 33721 }, { "epoch": 3.42842618950793, "grad_norm": 0.29072651267051697, "learning_rate": 2.71939787214415e-06, "loss": 0.3099, "step": 33722 }, { "epoch": 3.428527856852379, "grad_norm": 0.2629253566265106, "learning_rate": 2.719082058444943e-06, "loss": 0.2914, "step": 33723 }, { "epoch": 3.428629524196828, "grad_norm": 0.28885337710380554, "learning_rate": 2.7187662562364583e-06, "loss": 0.3357, "step": 33724 }, { "epoch": 3.428731191541277, "grad_norm": 0.26884621381759644, "learning_rate": 2.718450465520286e-06, "loss": 0.2985, "step": 33725 }, { "epoch": 3.428832858885726, "grad_norm": 0.25478291511535645, "learning_rate": 2.7181346862980172e-06, "loss": 0.2973, "step": 33726 }, { "epoch": 3.428934526230175, "grad_norm": 0.29067251086235046, "learning_rate": 2.7178189185712435e-06, "loss": 0.3193, "step": 33727 }, { "epoch": 3.429036193574624, "grad_norm": 0.26533427834510803, "learning_rate": 2.7175031623415593e-06, "loss": 0.3124, "step": 33728 }, { "epoch": 3.429137860919073, "grad_norm": 0.2786823809146881, "learning_rate": 2.7171874176105485e-06, "loss": 0.3203, "step": 33729 }, { "epoch": 3.4292395282635217, "grad_norm": 0.2782413065433502, "learning_rate": 2.7168716843798066e-06, "loss": 0.3076, "step": 33730 }, { "epoch": 3.4293411956079707, "grad_norm": 0.29082798957824707, "learning_rate": 2.7165559626509225e-06, "loss": 0.2927, "step": 33731 }, { "epoch": 3.4294428629524196, "grad_norm": 0.289097398519516, "learning_rate": 2.716240252425485e-06, "loss": 0.3097, "step": 33732 }, { "epoch": 3.4295445302968686, "grad_norm": 0.2971748113632202, "learning_rate": 2.715924553705088e-06, "loss": 0.2995, "step": 33733 }, { "epoch": 3.4296461976413175, "grad_norm": 0.2721746265888214, "learning_rate": 2.715608866491321e-06, "loss": 0.2881, "step": 33734 }, { "epoch": 3.4297478649857664, "grad_norm": 0.2791367769241333, "learning_rate": 2.715293190785773e-06, "loss": 0.3298, "step": 33735 }, { "epoch": 3.4298495323302154, "grad_norm": 0.261030375957489, "learning_rate": 2.7149775265900337e-06, "loss": 0.2984, "step": 33736 }, { "epoch": 3.4299511996746643, "grad_norm": 0.2562180161476135, "learning_rate": 2.7146618739056964e-06, "loss": 0.2959, "step": 33737 }, { "epoch": 3.4300528670191133, "grad_norm": 0.27160176634788513, "learning_rate": 2.7143462327343496e-06, "loss": 0.3304, "step": 33738 }, { "epoch": 3.430154534363562, "grad_norm": 0.26635393500328064, "learning_rate": 2.714030603077581e-06, "loss": 0.3022, "step": 33739 }, { "epoch": 3.430256201708011, "grad_norm": 0.28222915530204773, "learning_rate": 2.713714984936985e-06, "loss": 0.3129, "step": 33740 }, { "epoch": 3.4303578690524605, "grad_norm": 0.2872120141983032, "learning_rate": 2.7133993783141495e-06, "loss": 0.3193, "step": 33741 }, { "epoch": 3.4304595363969095, "grad_norm": 0.28219789266586304, "learning_rate": 2.7130837832106627e-06, "loss": 0.317, "step": 33742 }, { "epoch": 3.4305612037413584, "grad_norm": 0.2752349078655243, "learning_rate": 2.712768199628118e-06, "loss": 0.3058, "step": 33743 }, { "epoch": 3.4306628710858074, "grad_norm": 0.27386412024497986, "learning_rate": 2.7124526275681042e-06, "loss": 0.3109, "step": 33744 }, { "epoch": 3.4307645384302563, "grad_norm": 0.28758689761161804, "learning_rate": 2.71213706703221e-06, "loss": 0.3051, "step": 33745 }, { "epoch": 3.4308662057747052, "grad_norm": 0.2596273124217987, "learning_rate": 2.711821518022024e-06, "loss": 0.3102, "step": 33746 }, { "epoch": 3.430967873119154, "grad_norm": 0.27990561723709106, "learning_rate": 2.7115059805391385e-06, "loss": 0.3408, "step": 33747 }, { "epoch": 3.431069540463603, "grad_norm": 0.26909708976745605, "learning_rate": 2.711190454585143e-06, "loss": 0.3387, "step": 33748 }, { "epoch": 3.431171207808052, "grad_norm": 0.28256940841674805, "learning_rate": 2.710874940161624e-06, "loss": 0.3147, "step": 33749 }, { "epoch": 3.431272875152501, "grad_norm": 0.2676139771938324, "learning_rate": 2.7105594372701748e-06, "loss": 0.2964, "step": 33750 }, { "epoch": 3.43137454249695, "grad_norm": 0.25729450583457947, "learning_rate": 2.710243945912383e-06, "loss": 0.3169, "step": 33751 }, { "epoch": 3.431476209841399, "grad_norm": 0.28291574120521545, "learning_rate": 2.7099284660898365e-06, "loss": 0.3143, "step": 33752 }, { "epoch": 3.431577877185848, "grad_norm": 0.2779521346092224, "learning_rate": 2.709612997804128e-06, "loss": 0.3048, "step": 33753 }, { "epoch": 3.4316795445302968, "grad_norm": 0.26604580879211426, "learning_rate": 2.709297541056845e-06, "loss": 0.3021, "step": 33754 }, { "epoch": 3.4317812118747457, "grad_norm": 0.30155879259109497, "learning_rate": 2.708982095849576e-06, "loss": 0.3049, "step": 33755 }, { "epoch": 3.4318828792191947, "grad_norm": 0.292866051197052, "learning_rate": 2.70866666218391e-06, "loss": 0.3237, "step": 33756 }, { "epoch": 3.4319845465636436, "grad_norm": 0.2838154733181, "learning_rate": 2.7083512400614378e-06, "loss": 0.333, "step": 33757 }, { "epoch": 3.4320862139080925, "grad_norm": 0.2692723572254181, "learning_rate": 2.708035829483748e-06, "loss": 0.2946, "step": 33758 }, { "epoch": 3.432187881252542, "grad_norm": 0.27046602964401245, "learning_rate": 2.7077204304524267e-06, "loss": 0.3118, "step": 33759 }, { "epoch": 3.432289548596991, "grad_norm": 0.26112282276153564, "learning_rate": 2.7074050429690675e-06, "loss": 0.3269, "step": 33760 }, { "epoch": 3.43239121594144, "grad_norm": 0.2780420184135437, "learning_rate": 2.7070896670352563e-06, "loss": 0.2968, "step": 33761 }, { "epoch": 3.4324928832858888, "grad_norm": 0.2850514054298401, "learning_rate": 2.706774302652581e-06, "loss": 0.3194, "step": 33762 }, { "epoch": 3.4325945506303377, "grad_norm": 0.2739311456680298, "learning_rate": 2.7064589498226336e-06, "loss": 0.3331, "step": 33763 }, { "epoch": 3.4326962179747866, "grad_norm": 0.2673742175102234, "learning_rate": 2.7061436085470016e-06, "loss": 0.3157, "step": 33764 }, { "epoch": 3.4327978853192356, "grad_norm": 0.2732607424259186, "learning_rate": 2.705828278827272e-06, "loss": 0.3138, "step": 33765 }, { "epoch": 3.4328995526636845, "grad_norm": 0.28318849205970764, "learning_rate": 2.7055129606650333e-06, "loss": 0.3603, "step": 33766 }, { "epoch": 3.4330012200081335, "grad_norm": 0.27096936106681824, "learning_rate": 2.7051976540618763e-06, "loss": 0.3436, "step": 33767 }, { "epoch": 3.4331028873525824, "grad_norm": 0.2758532464504242, "learning_rate": 2.7048823590193884e-06, "loss": 0.3216, "step": 33768 }, { "epoch": 3.4332045546970313, "grad_norm": 0.2746427357196808, "learning_rate": 2.7045670755391555e-06, "loss": 0.3227, "step": 33769 }, { "epoch": 3.4333062220414803, "grad_norm": 0.2613593339920044, "learning_rate": 2.7042518036227707e-06, "loss": 0.3153, "step": 33770 }, { "epoch": 3.4334078893859292, "grad_norm": 0.2682020962238312, "learning_rate": 2.7039365432718194e-06, "loss": 0.293, "step": 33771 }, { "epoch": 3.433509556730378, "grad_norm": 0.26087138056755066, "learning_rate": 2.7036212944878886e-06, "loss": 0.3217, "step": 33772 }, { "epoch": 3.433611224074827, "grad_norm": 0.27361881732940674, "learning_rate": 2.70330605727257e-06, "loss": 0.3012, "step": 33773 }, { "epoch": 3.433712891419276, "grad_norm": 0.271310955286026, "learning_rate": 2.702990831627449e-06, "loss": 0.3046, "step": 33774 }, { "epoch": 3.433814558763725, "grad_norm": 0.26945996284484863, "learning_rate": 2.7026756175541145e-06, "loss": 0.3418, "step": 33775 }, { "epoch": 3.433916226108174, "grad_norm": 0.27823740243911743, "learning_rate": 2.7023604150541528e-06, "loss": 0.315, "step": 33776 }, { "epoch": 3.434017893452623, "grad_norm": 0.2752263844013214, "learning_rate": 2.702045224129155e-06, "loss": 0.3316, "step": 33777 }, { "epoch": 3.434119560797072, "grad_norm": 0.27404624223709106, "learning_rate": 2.701730044780708e-06, "loss": 0.3066, "step": 33778 }, { "epoch": 3.4342212281415208, "grad_norm": 0.2884683907032013, "learning_rate": 2.7014148770103966e-06, "loss": 0.3234, "step": 33779 }, { "epoch": 3.4343228954859697, "grad_norm": 0.26991915702819824, "learning_rate": 2.701099720819812e-06, "loss": 0.3001, "step": 33780 }, { "epoch": 3.4344245628304186, "grad_norm": 0.28060898184776306, "learning_rate": 2.700784576210541e-06, "loss": 0.3457, "step": 33781 }, { "epoch": 3.434526230174868, "grad_norm": 0.2915431559085846, "learning_rate": 2.7004694431841696e-06, "loss": 0.3125, "step": 33782 }, { "epoch": 3.434627897519317, "grad_norm": 0.2749814987182617, "learning_rate": 2.7001543217422882e-06, "loss": 0.3213, "step": 33783 }, { "epoch": 3.434729564863766, "grad_norm": 0.28200334310531616, "learning_rate": 2.6998392118864822e-06, "loss": 0.3344, "step": 33784 }, { "epoch": 3.434831232208215, "grad_norm": 0.2916148006916046, "learning_rate": 2.6995241136183407e-06, "loss": 0.3041, "step": 33785 }, { "epoch": 3.434932899552664, "grad_norm": 0.2727688252925873, "learning_rate": 2.699209026939448e-06, "loss": 0.3001, "step": 33786 }, { "epoch": 3.4350345668971127, "grad_norm": 0.26047423481941223, "learning_rate": 2.6988939518513946e-06, "loss": 0.2968, "step": 33787 }, { "epoch": 3.4351362342415617, "grad_norm": 0.27166396379470825, "learning_rate": 2.6985788883557673e-06, "loss": 0.2839, "step": 33788 }, { "epoch": 3.4352379015860106, "grad_norm": 0.2646726369857788, "learning_rate": 2.6982638364541504e-06, "loss": 0.3248, "step": 33789 }, { "epoch": 3.4353395689304596, "grad_norm": 0.2755921483039856, "learning_rate": 2.6979487961481355e-06, "loss": 0.2849, "step": 33790 }, { "epoch": 3.4354412362749085, "grad_norm": 0.267687052488327, "learning_rate": 2.6976337674393073e-06, "loss": 0.3036, "step": 33791 }, { "epoch": 3.4355429036193574, "grad_norm": 0.27868783473968506, "learning_rate": 2.6973187503292515e-06, "loss": 0.3152, "step": 33792 }, { "epoch": 3.4356445709638064, "grad_norm": 0.2793326675891876, "learning_rate": 2.6970037448195584e-06, "loss": 0.3305, "step": 33793 }, { "epoch": 3.4357462383082553, "grad_norm": 0.26237547397613525, "learning_rate": 2.6966887509118128e-06, "loss": 0.2945, "step": 33794 }, { "epoch": 3.4358479056527043, "grad_norm": 0.2797049283981323, "learning_rate": 2.696373768607602e-06, "loss": 0.2994, "step": 33795 }, { "epoch": 3.435949572997153, "grad_norm": 0.2940324544906616, "learning_rate": 2.6960587979085113e-06, "loss": 0.2887, "step": 33796 }, { "epoch": 3.436051240341602, "grad_norm": 0.2987493574619293, "learning_rate": 2.6957438388161305e-06, "loss": 0.3044, "step": 33797 }, { "epoch": 3.436152907686051, "grad_norm": 0.2895498275756836, "learning_rate": 2.695428891332045e-06, "loss": 0.3315, "step": 33798 }, { "epoch": 3.4362545750305, "grad_norm": 0.28368714451789856, "learning_rate": 2.695113955457839e-06, "loss": 0.2761, "step": 33799 }, { "epoch": 3.4363562423749494, "grad_norm": 0.30257269740104675, "learning_rate": 2.694799031195102e-06, "loss": 0.3168, "step": 33800 }, { "epoch": 3.4364579097193984, "grad_norm": 0.2821309268474579, "learning_rate": 2.6944841185454206e-06, "loss": 0.3045, "step": 33801 }, { "epoch": 3.4365595770638473, "grad_norm": 0.2738053500652313, "learning_rate": 2.6941692175103784e-06, "loss": 0.3208, "step": 33802 }, { "epoch": 3.4366612444082962, "grad_norm": 0.2678925395011902, "learning_rate": 2.6938543280915665e-06, "loss": 0.3279, "step": 33803 }, { "epoch": 3.436762911752745, "grad_norm": 0.26346030831336975, "learning_rate": 2.693539450290567e-06, "loss": 0.3301, "step": 33804 }, { "epoch": 3.436864579097194, "grad_norm": 0.26314857602119446, "learning_rate": 2.6932245841089684e-06, "loss": 0.347, "step": 33805 }, { "epoch": 3.436966246441643, "grad_norm": 0.2593211531639099, "learning_rate": 2.692909729548354e-06, "loss": 0.3123, "step": 33806 }, { "epoch": 3.437067913786092, "grad_norm": 0.2668429911136627, "learning_rate": 2.6925948866103145e-06, "loss": 0.2967, "step": 33807 }, { "epoch": 3.437169581130541, "grad_norm": 0.27018919587135315, "learning_rate": 2.6922800552964334e-06, "loss": 0.3037, "step": 33808 }, { "epoch": 3.43727124847499, "grad_norm": 0.2658681571483612, "learning_rate": 2.6919652356082946e-06, "loss": 0.3337, "step": 33809 }, { "epoch": 3.437372915819439, "grad_norm": 0.25902146100997925, "learning_rate": 2.6916504275474885e-06, "loss": 0.2756, "step": 33810 }, { "epoch": 3.4374745831638878, "grad_norm": 0.2696306109428406, "learning_rate": 2.6913356311156e-06, "loss": 0.2944, "step": 33811 }, { "epoch": 3.4375762505083367, "grad_norm": 0.2688182294368744, "learning_rate": 2.691020846314211e-06, "loss": 0.3213, "step": 33812 }, { "epoch": 3.4376779178527856, "grad_norm": 0.2865312099456787, "learning_rate": 2.6907060731449123e-06, "loss": 0.3173, "step": 33813 }, { "epoch": 3.4377795851972346, "grad_norm": 0.29310178756713867, "learning_rate": 2.690391311609288e-06, "loss": 0.3314, "step": 33814 }, { "epoch": 3.4378812525416835, "grad_norm": 0.28150874376296997, "learning_rate": 2.6900765617089232e-06, "loss": 0.3107, "step": 33815 }, { "epoch": 3.4379829198861325, "grad_norm": 0.26851412653923035, "learning_rate": 2.689761823445402e-06, "loss": 0.3137, "step": 33816 }, { "epoch": 3.4380845872305814, "grad_norm": 0.2609872817993164, "learning_rate": 2.6894470968203135e-06, "loss": 0.3033, "step": 33817 }, { "epoch": 3.4381862545750304, "grad_norm": 0.26643019914627075, "learning_rate": 2.6891323818352417e-06, "loss": 0.3445, "step": 33818 }, { "epoch": 3.4382879219194793, "grad_norm": 0.2755661904811859, "learning_rate": 2.6888176784917695e-06, "loss": 0.3062, "step": 33819 }, { "epoch": 3.4383895892639282, "grad_norm": 0.28765684366226196, "learning_rate": 2.688502986791487e-06, "loss": 0.2898, "step": 33820 }, { "epoch": 3.438491256608377, "grad_norm": 0.2890729308128357, "learning_rate": 2.6881883067359773e-06, "loss": 0.2929, "step": 33821 }, { "epoch": 3.438592923952826, "grad_norm": 0.25357332825660706, "learning_rate": 2.687873638326822e-06, "loss": 0.3071, "step": 33822 }, { "epoch": 3.4386945912972755, "grad_norm": 0.2646164000034332, "learning_rate": 2.6875589815656135e-06, "loss": 0.3061, "step": 33823 }, { "epoch": 3.4387962586417244, "grad_norm": 0.293601393699646, "learning_rate": 2.687244336453933e-06, "loss": 0.3212, "step": 33824 }, { "epoch": 3.4388979259861734, "grad_norm": 0.26718807220458984, "learning_rate": 2.686929702993366e-06, "loss": 0.3138, "step": 33825 }, { "epoch": 3.4389995933306223, "grad_norm": 0.2673080265522003, "learning_rate": 2.6866150811854953e-06, "loss": 0.2967, "step": 33826 }, { "epoch": 3.4391012606750713, "grad_norm": 0.2918322682380676, "learning_rate": 2.6863004710319096e-06, "loss": 0.2866, "step": 33827 }, { "epoch": 3.43920292801952, "grad_norm": 0.26355791091918945, "learning_rate": 2.685985872534193e-06, "loss": 0.3085, "step": 33828 }, { "epoch": 3.439304595363969, "grad_norm": 0.2623315751552582, "learning_rate": 2.685671285693927e-06, "loss": 0.2966, "step": 33829 }, { "epoch": 3.439406262708418, "grad_norm": 0.2792665958404541, "learning_rate": 2.6853567105127044e-06, "loss": 0.288, "step": 33830 }, { "epoch": 3.439507930052867, "grad_norm": 0.27387914061546326, "learning_rate": 2.6850421469920995e-06, "loss": 0.3019, "step": 33831 }, { "epoch": 3.439609597397316, "grad_norm": 0.26639845967292786, "learning_rate": 2.6847275951337026e-06, "loss": 0.2935, "step": 33832 }, { "epoch": 3.439711264741765, "grad_norm": 0.26753324270248413, "learning_rate": 2.6844130549390997e-06, "loss": 0.3046, "step": 33833 }, { "epoch": 3.439812932086214, "grad_norm": 0.2672165036201477, "learning_rate": 2.6840985264098735e-06, "loss": 0.3185, "step": 33834 }, { "epoch": 3.439914599430663, "grad_norm": 0.26996996998786926, "learning_rate": 2.6837840095476084e-06, "loss": 0.274, "step": 33835 }, { "epoch": 3.4400162667751117, "grad_norm": 0.25093939900398254, "learning_rate": 2.683469504353887e-06, "loss": 0.3368, "step": 33836 }, { "epoch": 3.4401179341195607, "grad_norm": 0.2699538469314575, "learning_rate": 2.6831550108302986e-06, "loss": 0.2868, "step": 33837 }, { "epoch": 3.4402196014640096, "grad_norm": 0.25979331135749817, "learning_rate": 2.6828405289784243e-06, "loss": 0.3607, "step": 33838 }, { "epoch": 3.4403212688084586, "grad_norm": 0.2742740511894226, "learning_rate": 2.6825260587998465e-06, "loss": 0.2821, "step": 33839 }, { "epoch": 3.4404229361529075, "grad_norm": 0.2688828706741333, "learning_rate": 2.682211600296156e-06, "loss": 0.3034, "step": 33840 }, { "epoch": 3.440524603497357, "grad_norm": 0.27283740043640137, "learning_rate": 2.6818971534689283e-06, "loss": 0.344, "step": 33841 }, { "epoch": 3.440626270841806, "grad_norm": 0.24460746347904205, "learning_rate": 2.6815827183197516e-06, "loss": 0.2995, "step": 33842 }, { "epoch": 3.4407279381862548, "grad_norm": 0.26579180359840393, "learning_rate": 2.681268294850213e-06, "loss": 0.3033, "step": 33843 }, { "epoch": 3.4408296055307037, "grad_norm": 0.276479572057724, "learning_rate": 2.680953883061893e-06, "loss": 0.3019, "step": 33844 }, { "epoch": 3.4409312728751527, "grad_norm": 0.28138503432273865, "learning_rate": 2.680639482956377e-06, "loss": 0.351, "step": 33845 }, { "epoch": 3.4410329402196016, "grad_norm": 0.2703275680541992, "learning_rate": 2.6803250945352454e-06, "loss": 0.3336, "step": 33846 }, { "epoch": 3.4411346075640505, "grad_norm": 0.29779911041259766, "learning_rate": 2.680010717800088e-06, "loss": 0.2994, "step": 33847 }, { "epoch": 3.4412362749084995, "grad_norm": 0.2744983434677124, "learning_rate": 2.679696352752484e-06, "loss": 0.321, "step": 33848 }, { "epoch": 3.4413379422529484, "grad_norm": 0.26169154047966003, "learning_rate": 2.6793819993940166e-06, "loss": 0.3226, "step": 33849 }, { "epoch": 3.4414396095973974, "grad_norm": 0.2739088833332062, "learning_rate": 2.6790676577262755e-06, "loss": 0.2857, "step": 33850 }, { "epoch": 3.4415412769418463, "grad_norm": 0.2701171338558197, "learning_rate": 2.678753327750836e-06, "loss": 0.2964, "step": 33851 }, { "epoch": 3.4416429442862952, "grad_norm": 0.2793745696544647, "learning_rate": 2.678439009469285e-06, "loss": 0.2816, "step": 33852 }, { "epoch": 3.441744611630744, "grad_norm": 0.2920621335506439, "learning_rate": 2.6781247028832113e-06, "loss": 0.2814, "step": 33853 }, { "epoch": 3.441846278975193, "grad_norm": 0.2572139501571655, "learning_rate": 2.67781040799419e-06, "loss": 0.2838, "step": 33854 }, { "epoch": 3.441947946319642, "grad_norm": 0.2659529447555542, "learning_rate": 2.677496124803809e-06, "loss": 0.3248, "step": 33855 }, { "epoch": 3.442049613664091, "grad_norm": 0.28801780939102173, "learning_rate": 2.677181853313649e-06, "loss": 0.3032, "step": 33856 }, { "epoch": 3.44215128100854, "grad_norm": 0.2877672612667084, "learning_rate": 2.676867593525297e-06, "loss": 0.3156, "step": 33857 }, { "epoch": 3.442252948352989, "grad_norm": 0.25558948516845703, "learning_rate": 2.6765533454403334e-06, "loss": 0.2937, "step": 33858 }, { "epoch": 3.442354615697438, "grad_norm": 0.25768595933914185, "learning_rate": 2.6762391090603407e-06, "loss": 0.3064, "step": 33859 }, { "epoch": 3.4424562830418868, "grad_norm": 0.27441880106925964, "learning_rate": 2.6759248843869066e-06, "loss": 0.3226, "step": 33860 }, { "epoch": 3.4425579503863357, "grad_norm": 0.29306405782699585, "learning_rate": 2.675610671421607e-06, "loss": 0.3298, "step": 33861 }, { "epoch": 3.4426596177307847, "grad_norm": 0.2653217911720276, "learning_rate": 2.6752964701660288e-06, "loss": 0.3012, "step": 33862 }, { "epoch": 3.4427612850752336, "grad_norm": 0.2452869564294815, "learning_rate": 2.6749822806217572e-06, "loss": 0.2687, "step": 33863 }, { "epoch": 3.442862952419683, "grad_norm": 0.2689795196056366, "learning_rate": 2.67466810279037e-06, "loss": 0.3115, "step": 33864 }, { "epoch": 3.442964619764132, "grad_norm": 0.2648996412754059, "learning_rate": 2.6743539366734534e-06, "loss": 0.316, "step": 33865 }, { "epoch": 3.443066287108581, "grad_norm": 0.2675626575946808, "learning_rate": 2.6740397822725894e-06, "loss": 0.3205, "step": 33866 }, { "epoch": 3.44316795445303, "grad_norm": 0.27536576986312866, "learning_rate": 2.6737256395893578e-06, "loss": 0.3047, "step": 33867 }, { "epoch": 3.4432696217974788, "grad_norm": 0.2926357090473175, "learning_rate": 2.673411508625346e-06, "loss": 0.3094, "step": 33868 }, { "epoch": 3.4433712891419277, "grad_norm": 0.26789233088493347, "learning_rate": 2.673097389382132e-06, "loss": 0.2728, "step": 33869 }, { "epoch": 3.4434729564863766, "grad_norm": 0.28648000955581665, "learning_rate": 2.672783281861304e-06, "loss": 0.3309, "step": 33870 }, { "epoch": 3.4435746238308256, "grad_norm": 0.273803174495697, "learning_rate": 2.6724691860644377e-06, "loss": 0.3371, "step": 33871 }, { "epoch": 3.4436762911752745, "grad_norm": 0.26738786697387695, "learning_rate": 2.672155101993118e-06, "loss": 0.3191, "step": 33872 }, { "epoch": 3.4437779585197235, "grad_norm": 0.2635623514652252, "learning_rate": 2.6718410296489315e-06, "loss": 0.3332, "step": 33873 }, { "epoch": 3.4438796258641724, "grad_norm": 0.2560826539993286, "learning_rate": 2.671526969033453e-06, "loss": 0.3045, "step": 33874 }, { "epoch": 3.4439812932086213, "grad_norm": 0.27277183532714844, "learning_rate": 2.67121292014827e-06, "loss": 0.3163, "step": 33875 }, { "epoch": 3.4440829605530703, "grad_norm": 0.28246474266052246, "learning_rate": 2.6708988829949635e-06, "loss": 0.299, "step": 33876 }, { "epoch": 3.4441846278975192, "grad_norm": 0.2599477767944336, "learning_rate": 2.6705848575751124e-06, "loss": 0.3341, "step": 33877 }, { "epoch": 3.444286295241968, "grad_norm": 0.2710188031196594, "learning_rate": 2.670270843890303e-06, "loss": 0.3054, "step": 33878 }, { "epoch": 3.444387962586417, "grad_norm": 0.25702330470085144, "learning_rate": 2.669956841942114e-06, "loss": 0.3017, "step": 33879 }, { "epoch": 3.444489629930866, "grad_norm": 0.2718578279018402, "learning_rate": 2.669642851732133e-06, "loss": 0.3109, "step": 33880 }, { "epoch": 3.444591297275315, "grad_norm": 0.28390225768089294, "learning_rate": 2.6693288732619327e-06, "loss": 0.2853, "step": 33881 }, { "epoch": 3.4446929646197644, "grad_norm": 0.24590951204299927, "learning_rate": 2.6690149065331e-06, "loss": 0.2794, "step": 33882 }, { "epoch": 3.4447946319642133, "grad_norm": 0.26015686988830566, "learning_rate": 2.6687009515472207e-06, "loss": 0.2977, "step": 33883 }, { "epoch": 3.4448962993086623, "grad_norm": 0.26958802342414856, "learning_rate": 2.668387008305867e-06, "loss": 0.2971, "step": 33884 }, { "epoch": 3.444997966653111, "grad_norm": 0.29413700103759766, "learning_rate": 2.6680730768106284e-06, "loss": 0.339, "step": 33885 }, { "epoch": 3.44509963399756, "grad_norm": 0.26244258880615234, "learning_rate": 2.6677591570630833e-06, "loss": 0.3299, "step": 33886 }, { "epoch": 3.445201301342009, "grad_norm": 0.2860345244407654, "learning_rate": 2.6674452490648114e-06, "loss": 0.288, "step": 33887 }, { "epoch": 3.445302968686458, "grad_norm": 0.26184192299842834, "learning_rate": 2.6671313528173975e-06, "loss": 0.3403, "step": 33888 }, { "epoch": 3.445404636030907, "grad_norm": 0.2615506947040558, "learning_rate": 2.6668174683224217e-06, "loss": 0.3296, "step": 33889 }, { "epoch": 3.445506303375356, "grad_norm": 0.27303221821784973, "learning_rate": 2.6665035955814655e-06, "loss": 0.3272, "step": 33890 }, { "epoch": 3.445607970719805, "grad_norm": 0.2810944616794586, "learning_rate": 2.6661897345961073e-06, "loss": 0.3173, "step": 33891 }, { "epoch": 3.445709638064254, "grad_norm": 0.2665333151817322, "learning_rate": 2.6658758853679306e-06, "loss": 0.3185, "step": 33892 }, { "epoch": 3.4458113054087027, "grad_norm": 0.2819340229034424, "learning_rate": 2.6655620478985207e-06, "loss": 0.3226, "step": 33893 }, { "epoch": 3.4459129727531517, "grad_norm": 0.25856277346611023, "learning_rate": 2.6652482221894505e-06, "loss": 0.3129, "step": 33894 }, { "epoch": 3.4460146400976006, "grad_norm": 0.28604909777641296, "learning_rate": 2.664934408242307e-06, "loss": 0.3201, "step": 33895 }, { "epoch": 3.4461163074420496, "grad_norm": 0.25822684168815613, "learning_rate": 2.6646206060586683e-06, "loss": 0.2948, "step": 33896 }, { "epoch": 3.4462179747864985, "grad_norm": 0.25874751806259155, "learning_rate": 2.6643068156401162e-06, "loss": 0.3045, "step": 33897 }, { "epoch": 3.4463196421309474, "grad_norm": 0.256136953830719, "learning_rate": 2.66399303698823e-06, "loss": 0.3149, "step": 33898 }, { "epoch": 3.4464213094753964, "grad_norm": 0.278703898191452, "learning_rate": 2.6636792701045933e-06, "loss": 0.3148, "step": 33899 }, { "epoch": 3.4465229768198453, "grad_norm": 0.2646854519844055, "learning_rate": 2.6633655149907857e-06, "loss": 0.3216, "step": 33900 }, { "epoch": 3.4466246441642943, "grad_norm": 0.27281948924064636, "learning_rate": 2.663051771648385e-06, "loss": 0.2837, "step": 33901 }, { "epoch": 3.446726311508743, "grad_norm": 0.2572556138038635, "learning_rate": 2.662738040078976e-06, "loss": 0.2943, "step": 33902 }, { "epoch": 3.446827978853192, "grad_norm": 0.2709919512271881, "learning_rate": 2.6624243202841375e-06, "loss": 0.3016, "step": 33903 }, { "epoch": 3.446929646197641, "grad_norm": 0.26426973938941956, "learning_rate": 2.6621106122654477e-06, "loss": 0.3021, "step": 33904 }, { "epoch": 3.4470313135420905, "grad_norm": 0.26916149258613586, "learning_rate": 2.6617969160244914e-06, "loss": 0.3215, "step": 33905 }, { "epoch": 3.4471329808865394, "grad_norm": 0.27883198857307434, "learning_rate": 2.661483231562846e-06, "loss": 0.317, "step": 33906 }, { "epoch": 3.4472346482309884, "grad_norm": 0.28244414925575256, "learning_rate": 2.661169558882093e-06, "loss": 0.3145, "step": 33907 }, { "epoch": 3.4473363155754373, "grad_norm": 0.2809683680534363, "learning_rate": 2.6608558979838095e-06, "loss": 0.2887, "step": 33908 }, { "epoch": 3.4474379829198862, "grad_norm": 0.2682786285877228, "learning_rate": 2.6605422488695803e-06, "loss": 0.279, "step": 33909 }, { "epoch": 3.447539650264335, "grad_norm": 0.27219486236572266, "learning_rate": 2.6602286115409835e-06, "loss": 0.3109, "step": 33910 }, { "epoch": 3.447641317608784, "grad_norm": 0.2520686089992523, "learning_rate": 2.6599149859995965e-06, "loss": 0.3413, "step": 33911 }, { "epoch": 3.447742984953233, "grad_norm": 0.27481481432914734, "learning_rate": 2.659601372247004e-06, "loss": 0.2933, "step": 33912 }, { "epoch": 3.447844652297682, "grad_norm": 0.26668745279312134, "learning_rate": 2.6592877702847835e-06, "loss": 0.2931, "step": 33913 }, { "epoch": 3.447946319642131, "grad_norm": 0.263499915599823, "learning_rate": 2.658974180114513e-06, "loss": 0.3245, "step": 33914 }, { "epoch": 3.44804798698658, "grad_norm": 0.27018511295318604, "learning_rate": 2.658660601737777e-06, "loss": 0.3058, "step": 33915 }, { "epoch": 3.448149654331029, "grad_norm": 0.26763877272605896, "learning_rate": 2.6583470351561514e-06, "loss": 0.2878, "step": 33916 }, { "epoch": 3.4482513216754778, "grad_norm": 0.27110177278518677, "learning_rate": 2.658033480371217e-06, "loss": 0.3346, "step": 33917 }, { "epoch": 3.4483529890199267, "grad_norm": 0.27295687794685364, "learning_rate": 2.657719937384552e-06, "loss": 0.3059, "step": 33918 }, { "epoch": 3.4484546563643756, "grad_norm": 0.28140467405319214, "learning_rate": 2.657406406197739e-06, "loss": 0.3053, "step": 33919 }, { "epoch": 3.4485563237088246, "grad_norm": 0.27889227867126465, "learning_rate": 2.6570928868123564e-06, "loss": 0.2809, "step": 33920 }, { "epoch": 3.4486579910532735, "grad_norm": 0.2788117527961731, "learning_rate": 2.656779379229981e-06, "loss": 0.341, "step": 33921 }, { "epoch": 3.4487596583977225, "grad_norm": 0.2859862744808197, "learning_rate": 2.6564658834521952e-06, "loss": 0.3191, "step": 33922 }, { "epoch": 3.448861325742172, "grad_norm": 0.253558874130249, "learning_rate": 2.6561523994805784e-06, "loss": 0.3312, "step": 33923 }, { "epoch": 3.448962993086621, "grad_norm": 0.2787627577781677, "learning_rate": 2.6558389273167064e-06, "loss": 0.3169, "step": 33924 }, { "epoch": 3.4490646604310697, "grad_norm": 0.27164730429649353, "learning_rate": 2.6555254669621626e-06, "loss": 0.3055, "step": 33925 }, { "epoch": 3.4491663277755187, "grad_norm": 0.29844555258750916, "learning_rate": 2.6552120184185247e-06, "loss": 0.298, "step": 33926 }, { "epoch": 3.4492679951199676, "grad_norm": 0.2935474216938019, "learning_rate": 2.654898581687372e-06, "loss": 0.3177, "step": 33927 }, { "epoch": 3.4493696624644166, "grad_norm": 0.26988494396209717, "learning_rate": 2.6545851567702802e-06, "loss": 0.2907, "step": 33928 }, { "epoch": 3.4494713298088655, "grad_norm": 0.2761480212211609, "learning_rate": 2.654271743668833e-06, "loss": 0.324, "step": 33929 }, { "epoch": 3.4495729971533144, "grad_norm": 0.2739064395427704, "learning_rate": 2.653958342384608e-06, "loss": 0.291, "step": 33930 }, { "epoch": 3.4496746644977634, "grad_norm": 0.29179468750953674, "learning_rate": 2.6536449529191806e-06, "loss": 0.3209, "step": 33931 }, { "epoch": 3.4497763318422123, "grad_norm": 0.27342331409454346, "learning_rate": 2.6533315752741345e-06, "loss": 0.3299, "step": 33932 }, { "epoch": 3.4498779991866613, "grad_norm": 0.28639400005340576, "learning_rate": 2.6530182094510465e-06, "loss": 0.297, "step": 33933 }, { "epoch": 3.44997966653111, "grad_norm": 0.2632182836532593, "learning_rate": 2.6527048554514935e-06, "loss": 0.3004, "step": 33934 }, { "epoch": 3.450081333875559, "grad_norm": 0.2826767861843109, "learning_rate": 2.6523915132770567e-06, "loss": 0.2957, "step": 33935 }, { "epoch": 3.450183001220008, "grad_norm": 0.2808426320552826, "learning_rate": 2.652078182929314e-06, "loss": 0.3145, "step": 33936 }, { "epoch": 3.450284668564457, "grad_norm": 0.2820219397544861, "learning_rate": 2.651764864409843e-06, "loss": 0.313, "step": 33937 }, { "epoch": 3.450386335908906, "grad_norm": 0.2847687005996704, "learning_rate": 2.6514515577202214e-06, "loss": 0.3224, "step": 33938 }, { "epoch": 3.450488003253355, "grad_norm": 0.27344971895217896, "learning_rate": 2.65113826286203e-06, "loss": 0.285, "step": 33939 }, { "epoch": 3.450589670597804, "grad_norm": 0.2700982391834259, "learning_rate": 2.6508249798368467e-06, "loss": 0.3053, "step": 33940 }, { "epoch": 3.450691337942253, "grad_norm": 0.25329354405403137, "learning_rate": 2.6505117086462463e-06, "loss": 0.3102, "step": 33941 }, { "epoch": 3.4507930052867017, "grad_norm": 0.27201974391937256, "learning_rate": 2.650198449291812e-06, "loss": 0.303, "step": 33942 }, { "epoch": 3.4508946726311507, "grad_norm": 0.25776001811027527, "learning_rate": 2.649885201775119e-06, "loss": 0.2988, "step": 33943 }, { "epoch": 3.4509963399755996, "grad_norm": 0.29074451327323914, "learning_rate": 2.6495719660977435e-06, "loss": 0.2815, "step": 33944 }, { "epoch": 3.4510980073200486, "grad_norm": 0.2517411708831787, "learning_rate": 2.6492587422612693e-06, "loss": 0.2916, "step": 33945 }, { "epoch": 3.451199674664498, "grad_norm": 0.2758098542690277, "learning_rate": 2.6489455302672693e-06, "loss": 0.3112, "step": 33946 }, { "epoch": 3.451301342008947, "grad_norm": 0.2995279133319855, "learning_rate": 2.6486323301173243e-06, "loss": 0.3085, "step": 33947 }, { "epoch": 3.451403009353396, "grad_norm": 0.28016236424446106, "learning_rate": 2.6483191418130085e-06, "loss": 0.3189, "step": 33948 }, { "epoch": 3.4515046766978448, "grad_norm": 0.28113991022109985, "learning_rate": 2.6480059653559044e-06, "loss": 0.2739, "step": 33949 }, { "epoch": 3.4516063440422937, "grad_norm": 0.2703876495361328, "learning_rate": 2.6476928007475864e-06, "loss": 0.3075, "step": 33950 }, { "epoch": 3.4517080113867427, "grad_norm": 0.2653801441192627, "learning_rate": 2.647379647989632e-06, "loss": 0.2987, "step": 33951 }, { "epoch": 3.4518096787311916, "grad_norm": 0.2628970742225647, "learning_rate": 2.647066507083621e-06, "loss": 0.34, "step": 33952 }, { "epoch": 3.4519113460756405, "grad_norm": 0.278981477022171, "learning_rate": 2.64675337803113e-06, "loss": 0.3226, "step": 33953 }, { "epoch": 3.4520130134200895, "grad_norm": 0.2874164283275604, "learning_rate": 2.6464402608337347e-06, "loss": 0.3107, "step": 33954 }, { "epoch": 3.4521146807645384, "grad_norm": 0.29578185081481934, "learning_rate": 2.646127155493016e-06, "loss": 0.2888, "step": 33955 }, { "epoch": 3.4522163481089874, "grad_norm": 0.2573045492172241, "learning_rate": 2.6458140620105486e-06, "loss": 0.303, "step": 33956 }, { "epoch": 3.4523180154534363, "grad_norm": 0.26921531558036804, "learning_rate": 2.645500980387911e-06, "loss": 0.3077, "step": 33957 }, { "epoch": 3.4524196827978852, "grad_norm": 0.2560310661792755, "learning_rate": 2.6451879106266787e-06, "loss": 0.3124, "step": 33958 }, { "epoch": 3.452521350142334, "grad_norm": 0.28666552901268005, "learning_rate": 2.6448748527284307e-06, "loss": 0.3076, "step": 33959 }, { "epoch": 3.452623017486783, "grad_norm": 0.2954709827899933, "learning_rate": 2.6445618066947443e-06, "loss": 0.3353, "step": 33960 }, { "epoch": 3.452724684831232, "grad_norm": 0.27947309613227844, "learning_rate": 2.6442487725271936e-06, "loss": 0.2813, "step": 33961 }, { "epoch": 3.452826352175681, "grad_norm": 0.26130566000938416, "learning_rate": 2.64393575022736e-06, "loss": 0.2851, "step": 33962 }, { "epoch": 3.45292801952013, "grad_norm": 0.30430516600608826, "learning_rate": 2.6436227397968183e-06, "loss": 0.3137, "step": 33963 }, { "epoch": 3.4530296868645793, "grad_norm": 0.276455283164978, "learning_rate": 2.6433097412371432e-06, "loss": 0.3046, "step": 33964 }, { "epoch": 3.4531313542090283, "grad_norm": 0.2777072787284851, "learning_rate": 2.6429967545499156e-06, "loss": 0.2972, "step": 33965 }, { "epoch": 3.4532330215534772, "grad_norm": 0.26959216594696045, "learning_rate": 2.64268377973671e-06, "loss": 0.2936, "step": 33966 }, { "epoch": 3.453334688897926, "grad_norm": 0.2569628357887268, "learning_rate": 2.6423708167991035e-06, "loss": 0.3136, "step": 33967 }, { "epoch": 3.453436356242375, "grad_norm": 0.24929474294185638, "learning_rate": 2.6420578657386708e-06, "loss": 0.3022, "step": 33968 }, { "epoch": 3.453538023586824, "grad_norm": 0.27573367953300476, "learning_rate": 2.6417449265569917e-06, "loss": 0.295, "step": 33969 }, { "epoch": 3.453639690931273, "grad_norm": 0.2815012037754059, "learning_rate": 2.6414319992556413e-06, "loss": 0.3113, "step": 33970 }, { "epoch": 3.453741358275722, "grad_norm": 0.26825374364852905, "learning_rate": 2.6411190838361946e-06, "loss": 0.3116, "step": 33971 }, { "epoch": 3.453843025620171, "grad_norm": 0.26106417179107666, "learning_rate": 2.6408061803002306e-06, "loss": 0.2911, "step": 33972 }, { "epoch": 3.45394469296462, "grad_norm": 0.2579612731933594, "learning_rate": 2.640493288649325e-06, "loss": 0.3098, "step": 33973 }, { "epoch": 3.4540463603090688, "grad_norm": 0.28924837708473206, "learning_rate": 2.6401804088850514e-06, "loss": 0.3281, "step": 33974 }, { "epoch": 3.4541480276535177, "grad_norm": 0.26866328716278076, "learning_rate": 2.63986754100899e-06, "loss": 0.2806, "step": 33975 }, { "epoch": 3.4542496949979666, "grad_norm": 0.27470988035202026, "learning_rate": 2.639554685022715e-06, "loss": 0.3225, "step": 33976 }, { "epoch": 3.4543513623424156, "grad_norm": 0.2563038468360901, "learning_rate": 2.6392418409278033e-06, "loss": 0.2936, "step": 33977 }, { "epoch": 3.4544530296868645, "grad_norm": 0.25689688324928284, "learning_rate": 2.638929008725827e-06, "loss": 0.3183, "step": 33978 }, { "epoch": 3.4545546970313135, "grad_norm": 0.27136334776878357, "learning_rate": 2.638616188418368e-06, "loss": 0.2946, "step": 33979 }, { "epoch": 3.4546563643757624, "grad_norm": 0.2518177032470703, "learning_rate": 2.6383033800069997e-06, "loss": 0.306, "step": 33980 }, { "epoch": 3.4547580317202113, "grad_norm": 0.2834533154964447, "learning_rate": 2.637990583493295e-06, "loss": 0.301, "step": 33981 }, { "epoch": 3.4548596990646603, "grad_norm": 0.2778227925300598, "learning_rate": 2.6376777988788372e-06, "loss": 0.2825, "step": 33982 }, { "epoch": 3.4549613664091092, "grad_norm": 0.2663310170173645, "learning_rate": 2.637365026165193e-06, "loss": 0.3072, "step": 33983 }, { "epoch": 3.455063033753558, "grad_norm": 0.27921923995018005, "learning_rate": 2.637052265353942e-06, "loss": 0.3313, "step": 33984 }, { "epoch": 3.455164701098007, "grad_norm": 0.29033952951431274, "learning_rate": 2.6367395164466624e-06, "loss": 0.2938, "step": 33985 }, { "epoch": 3.455266368442456, "grad_norm": 0.28615084290504456, "learning_rate": 2.636426779444928e-06, "loss": 0.3226, "step": 33986 }, { "epoch": 3.4553680357869054, "grad_norm": 0.2794911861419678, "learning_rate": 2.6361140543503133e-06, "loss": 0.2812, "step": 33987 }, { "epoch": 3.4554697031313544, "grad_norm": 0.28477293252944946, "learning_rate": 2.6358013411643925e-06, "loss": 0.311, "step": 33988 }, { "epoch": 3.4555713704758033, "grad_norm": 0.2780570387840271, "learning_rate": 2.6354886398887446e-06, "loss": 0.2959, "step": 33989 }, { "epoch": 3.4556730378202523, "grad_norm": 0.2809768319129944, "learning_rate": 2.635175950524943e-06, "loss": 0.3087, "step": 33990 }, { "epoch": 3.455774705164701, "grad_norm": 0.2586654722690582, "learning_rate": 2.6348632730745605e-06, "loss": 0.3184, "step": 33991 }, { "epoch": 3.45587637250915, "grad_norm": 0.2748626172542572, "learning_rate": 2.634550607539179e-06, "loss": 0.3023, "step": 33992 }, { "epoch": 3.455978039853599, "grad_norm": 0.2734962999820709, "learning_rate": 2.634237953920366e-06, "loss": 0.2964, "step": 33993 }, { "epoch": 3.456079707198048, "grad_norm": 0.2691202759742737, "learning_rate": 2.6339253122197002e-06, "loss": 0.3058, "step": 33994 }, { "epoch": 3.456181374542497, "grad_norm": 0.26312506198883057, "learning_rate": 2.6336126824387577e-06, "loss": 0.3013, "step": 33995 }, { "epoch": 3.456283041886946, "grad_norm": 0.2658742666244507, "learning_rate": 2.6333000645791123e-06, "loss": 0.3242, "step": 33996 }, { "epoch": 3.456384709231395, "grad_norm": 0.26742199063301086, "learning_rate": 2.6329874586423386e-06, "loss": 0.3106, "step": 33997 }, { "epoch": 3.456486376575844, "grad_norm": 0.306800901889801, "learning_rate": 2.6326748646300105e-06, "loss": 0.35, "step": 33998 }, { "epoch": 3.4565880439202927, "grad_norm": 0.2592061758041382, "learning_rate": 2.6323622825437047e-06, "loss": 0.3417, "step": 33999 }, { "epoch": 3.4566897112647417, "grad_norm": 0.27896952629089355, "learning_rate": 2.6320497123849954e-06, "loss": 0.3031, "step": 34000 }, { "epoch": 3.4567913786091906, "grad_norm": 0.26025089621543884, "learning_rate": 2.6317371541554556e-06, "loss": 0.3005, "step": 34001 }, { "epoch": 3.4568930459536396, "grad_norm": 0.2581693232059479, "learning_rate": 2.6314246078566645e-06, "loss": 0.2936, "step": 34002 }, { "epoch": 3.4569947132980885, "grad_norm": 0.3007912337779999, "learning_rate": 2.6311120734901896e-06, "loss": 0.276, "step": 34003 }, { "epoch": 3.4570963806425374, "grad_norm": 0.2795034348964691, "learning_rate": 2.630799551057609e-06, "loss": 0.3189, "step": 34004 }, { "epoch": 3.457198047986987, "grad_norm": 0.2565591633319855, "learning_rate": 2.630487040560502e-06, "loss": 0.3341, "step": 34005 }, { "epoch": 3.4572997153314358, "grad_norm": 0.28424495458602905, "learning_rate": 2.6301745420004333e-06, "loss": 0.2978, "step": 34006 }, { "epoch": 3.4574013826758847, "grad_norm": 0.2604568600654602, "learning_rate": 2.6298620553789843e-06, "loss": 0.3253, "step": 34007 }, { "epoch": 3.4575030500203336, "grad_norm": 0.2830400764942169, "learning_rate": 2.6295495806977254e-06, "loss": 0.2715, "step": 34008 }, { "epoch": 3.4576047173647826, "grad_norm": 0.2819182872772217, "learning_rate": 2.6292371179582344e-06, "loss": 0.3327, "step": 34009 }, { "epoch": 3.4577063847092315, "grad_norm": 0.26513954997062683, "learning_rate": 2.628924667162083e-06, "loss": 0.3206, "step": 34010 }, { "epoch": 3.4578080520536805, "grad_norm": 0.24527357518672943, "learning_rate": 2.6286122283108437e-06, "loss": 0.3161, "step": 34011 }, { "epoch": 3.4579097193981294, "grad_norm": 0.27183204889297485, "learning_rate": 2.6282998014060967e-06, "loss": 0.283, "step": 34012 }, { "epoch": 3.4580113867425784, "grad_norm": 0.25353968143463135, "learning_rate": 2.6279873864494076e-06, "loss": 0.3016, "step": 34013 }, { "epoch": 3.4581130540870273, "grad_norm": 0.26700833439826965, "learning_rate": 2.6276749834423543e-06, "loss": 0.3472, "step": 34014 }, { "epoch": 3.4582147214314762, "grad_norm": 0.2672966420650482, "learning_rate": 2.627362592386515e-06, "loss": 0.3286, "step": 34015 }, { "epoch": 3.458316388775925, "grad_norm": 0.2802635133266449, "learning_rate": 2.627050213283455e-06, "loss": 0.3256, "step": 34016 }, { "epoch": 3.458418056120374, "grad_norm": 0.2929885983467102, "learning_rate": 2.6267378461347537e-06, "loss": 0.299, "step": 34017 }, { "epoch": 3.458519723464823, "grad_norm": 0.26613661646842957, "learning_rate": 2.626425490941984e-06, "loss": 0.3102, "step": 34018 }, { "epoch": 3.458621390809272, "grad_norm": 0.2768900990486145, "learning_rate": 2.6261131477067157e-06, "loss": 0.3041, "step": 34019 }, { "epoch": 3.458723058153721, "grad_norm": 0.25997263193130493, "learning_rate": 2.6258008164305277e-06, "loss": 0.31, "step": 34020 }, { "epoch": 3.45882472549817, "grad_norm": 0.24788238108158112, "learning_rate": 2.625488497114989e-06, "loss": 0.3131, "step": 34021 }, { "epoch": 3.458926392842619, "grad_norm": 0.25478824973106384, "learning_rate": 2.625176189761679e-06, "loss": 0.294, "step": 34022 }, { "epoch": 3.4590280601870678, "grad_norm": 0.27313393354415894, "learning_rate": 2.6248638943721627e-06, "loss": 0.3121, "step": 34023 }, { "epoch": 3.4591297275315167, "grad_norm": 0.2876524329185486, "learning_rate": 2.6245516109480186e-06, "loss": 0.3045, "step": 34024 }, { "epoch": 3.4592313948759656, "grad_norm": 0.29724884033203125, "learning_rate": 2.6242393394908216e-06, "loss": 0.3155, "step": 34025 }, { "epoch": 3.4593330622204146, "grad_norm": 0.25869590044021606, "learning_rate": 2.6239270800021387e-06, "loss": 0.3235, "step": 34026 }, { "epoch": 3.4594347295648635, "grad_norm": 0.25383126735687256, "learning_rate": 2.6236148324835493e-06, "loss": 0.3378, "step": 34027 }, { "epoch": 3.459536396909313, "grad_norm": 0.28883010149002075, "learning_rate": 2.6233025969366232e-06, "loss": 0.3102, "step": 34028 }, { "epoch": 3.459638064253762, "grad_norm": 0.30271291732788086, "learning_rate": 2.6229903733629314e-06, "loss": 0.3032, "step": 34029 }, { "epoch": 3.459739731598211, "grad_norm": 0.2899889349937439, "learning_rate": 2.622678161764052e-06, "loss": 0.3322, "step": 34030 }, { "epoch": 3.4598413989426597, "grad_norm": 0.2640460729598999, "learning_rate": 2.6223659621415524e-06, "loss": 0.3445, "step": 34031 }, { "epoch": 3.4599430662871087, "grad_norm": 0.2752784192562103, "learning_rate": 2.622053774497012e-06, "loss": 0.3045, "step": 34032 }, { "epoch": 3.4600447336315576, "grad_norm": 0.25609761476516724, "learning_rate": 2.621741598831996e-06, "loss": 0.3318, "step": 34033 }, { "epoch": 3.4601464009760066, "grad_norm": 0.2910494804382324, "learning_rate": 2.6214294351480806e-06, "loss": 0.2984, "step": 34034 }, { "epoch": 3.4602480683204555, "grad_norm": 0.2732669711112976, "learning_rate": 2.621117283446843e-06, "loss": 0.3221, "step": 34035 }, { "epoch": 3.4603497356649044, "grad_norm": 0.27182987332344055, "learning_rate": 2.6208051437298465e-06, "loss": 0.3246, "step": 34036 }, { "epoch": 3.4604514030093534, "grad_norm": 0.2653622627258301, "learning_rate": 2.620493015998671e-06, "loss": 0.3385, "step": 34037 }, { "epoch": 3.4605530703538023, "grad_norm": 0.2781563997268677, "learning_rate": 2.620180900254885e-06, "loss": 0.2912, "step": 34038 }, { "epoch": 3.4606547376982513, "grad_norm": 0.2616429328918457, "learning_rate": 2.619868796500061e-06, "loss": 0.354, "step": 34039 }, { "epoch": 3.4607564050427, "grad_norm": 0.2577804625034332, "learning_rate": 2.6195567047357747e-06, "loss": 0.3331, "step": 34040 }, { "epoch": 3.460858072387149, "grad_norm": 0.27029192447662354, "learning_rate": 2.619244624963595e-06, "loss": 0.3149, "step": 34041 }, { "epoch": 3.460959739731598, "grad_norm": 0.27329719066619873, "learning_rate": 2.618932557185096e-06, "loss": 0.3248, "step": 34042 }, { "epoch": 3.461061407076047, "grad_norm": 0.26491665840148926, "learning_rate": 2.6186205014018468e-06, "loss": 0.3236, "step": 34043 }, { "epoch": 3.461163074420496, "grad_norm": 0.2836970388889313, "learning_rate": 2.6183084576154216e-06, "loss": 0.3351, "step": 34044 }, { "epoch": 3.461264741764945, "grad_norm": 0.2439933568239212, "learning_rate": 2.617996425827396e-06, "loss": 0.2913, "step": 34045 }, { "epoch": 3.4613664091093943, "grad_norm": 0.2776704728603363, "learning_rate": 2.617684406039335e-06, "loss": 0.3065, "step": 34046 }, { "epoch": 3.4614680764538432, "grad_norm": 0.2860579490661621, "learning_rate": 2.6173723982528154e-06, "loss": 0.2835, "step": 34047 }, { "epoch": 3.461569743798292, "grad_norm": 0.25946661829948425, "learning_rate": 2.6170604024694066e-06, "loss": 0.2938, "step": 34048 }, { "epoch": 3.461671411142741, "grad_norm": 0.2789469361305237, "learning_rate": 2.616748418690681e-06, "loss": 0.3033, "step": 34049 }, { "epoch": 3.46177307848719, "grad_norm": 0.26616430282592773, "learning_rate": 2.616436446918211e-06, "loss": 0.3026, "step": 34050 }, { "epoch": 3.461874745831639, "grad_norm": 0.26730796694755554, "learning_rate": 2.6161244871535686e-06, "loss": 0.2699, "step": 34051 }, { "epoch": 3.461976413176088, "grad_norm": 0.3028067350387573, "learning_rate": 2.6158125393983247e-06, "loss": 0.2804, "step": 34052 }, { "epoch": 3.462078080520537, "grad_norm": 0.26121434569358826, "learning_rate": 2.6155006036540483e-06, "loss": 0.2734, "step": 34053 }, { "epoch": 3.462179747864986, "grad_norm": 0.2730211913585663, "learning_rate": 2.615188679922313e-06, "loss": 0.3083, "step": 34054 }, { "epoch": 3.4622814152094348, "grad_norm": 0.27953824400901794, "learning_rate": 2.614876768204695e-06, "loss": 0.3233, "step": 34055 }, { "epoch": 3.4623830825538837, "grad_norm": 0.2896111309528351, "learning_rate": 2.6145648685027564e-06, "loss": 0.2885, "step": 34056 }, { "epoch": 3.4624847498983327, "grad_norm": 0.25498902797698975, "learning_rate": 2.614252980818075e-06, "loss": 0.306, "step": 34057 }, { "epoch": 3.4625864172427816, "grad_norm": 0.2656721770763397, "learning_rate": 2.613941105152221e-06, "loss": 0.34, "step": 34058 }, { "epoch": 3.4626880845872305, "grad_norm": 0.26722022891044617, "learning_rate": 2.613629241506762e-06, "loss": 0.3008, "step": 34059 }, { "epoch": 3.4627897519316795, "grad_norm": 0.283753365278244, "learning_rate": 2.613317389883274e-06, "loss": 0.342, "step": 34060 }, { "epoch": 3.4628914192761284, "grad_norm": 0.26139363646507263, "learning_rate": 2.6130055502833264e-06, "loss": 0.3097, "step": 34061 }, { "epoch": 3.4629930866205774, "grad_norm": 0.28282442688941956, "learning_rate": 2.6126937227084887e-06, "loss": 0.3034, "step": 34062 }, { "epoch": 3.4630947539650263, "grad_norm": 0.28705835342407227, "learning_rate": 2.6123819071603314e-06, "loss": 0.3198, "step": 34063 }, { "epoch": 3.4631964213094752, "grad_norm": 0.24678950011730194, "learning_rate": 2.6120701036404285e-06, "loss": 0.3082, "step": 34064 }, { "epoch": 3.463298088653924, "grad_norm": 0.282512366771698, "learning_rate": 2.611758312150349e-06, "loss": 0.2782, "step": 34065 }, { "epoch": 3.463399755998373, "grad_norm": 0.2741912007331848, "learning_rate": 2.6114465326916603e-06, "loss": 0.3299, "step": 34066 }, { "epoch": 3.463501423342822, "grad_norm": 0.28053194284439087, "learning_rate": 2.61113476526594e-06, "loss": 0.3019, "step": 34067 }, { "epoch": 3.463603090687271, "grad_norm": 0.27097785472869873, "learning_rate": 2.6108230098747546e-06, "loss": 0.3211, "step": 34068 }, { "epoch": 3.4637047580317204, "grad_norm": 0.2708659768104553, "learning_rate": 2.610511266519673e-06, "loss": 0.33, "step": 34069 }, { "epoch": 3.4638064253761693, "grad_norm": 0.28317978978157043, "learning_rate": 2.6101995352022703e-06, "loss": 0.3313, "step": 34070 }, { "epoch": 3.4639080927206183, "grad_norm": 0.26003894209861755, "learning_rate": 2.6098878159241135e-06, "loss": 0.3103, "step": 34071 }, { "epoch": 3.4640097600650672, "grad_norm": 0.2680198550224304, "learning_rate": 2.6095761086867744e-06, "loss": 0.3278, "step": 34072 }, { "epoch": 3.464111427409516, "grad_norm": 0.2616557478904724, "learning_rate": 2.609264413491821e-06, "loss": 0.3217, "step": 34073 }, { "epoch": 3.464213094753965, "grad_norm": 0.26746562123298645, "learning_rate": 2.6089527303408268e-06, "loss": 0.3332, "step": 34074 }, { "epoch": 3.464314762098414, "grad_norm": 0.2610747516155243, "learning_rate": 2.608641059235361e-06, "loss": 0.3343, "step": 34075 }, { "epoch": 3.464416429442863, "grad_norm": 0.2742757499217987, "learning_rate": 2.608329400176991e-06, "loss": 0.3273, "step": 34076 }, { "epoch": 3.464518096787312, "grad_norm": 0.2651989758014679, "learning_rate": 2.6080177531672917e-06, "loss": 0.344, "step": 34077 }, { "epoch": 3.464619764131761, "grad_norm": 0.2643122971057892, "learning_rate": 2.60770611820783e-06, "loss": 0.293, "step": 34078 }, { "epoch": 3.46472143147621, "grad_norm": 0.27379319071769714, "learning_rate": 2.607394495300175e-06, "loss": 0.338, "step": 34079 }, { "epoch": 3.4648230988206588, "grad_norm": 0.2659914493560791, "learning_rate": 2.6070828844458995e-06, "loss": 0.3088, "step": 34080 }, { "epoch": 3.4649247661651077, "grad_norm": 0.26281973719596863, "learning_rate": 2.6067712856465717e-06, "loss": 0.311, "step": 34081 }, { "epoch": 3.4650264335095566, "grad_norm": 0.2808278501033783, "learning_rate": 2.6064596989037618e-06, "loss": 0.2814, "step": 34082 }, { "epoch": 3.4651281008540056, "grad_norm": 0.26000210642814636, "learning_rate": 2.606148124219037e-06, "loss": 0.2657, "step": 34083 }, { "epoch": 3.4652297681984545, "grad_norm": 0.265697181224823, "learning_rate": 2.6058365615939705e-06, "loss": 0.3219, "step": 34084 }, { "epoch": 3.4653314355429035, "grad_norm": 0.27577677369117737, "learning_rate": 2.605525011030131e-06, "loss": 0.2919, "step": 34085 }, { "epoch": 3.4654331028873524, "grad_norm": 0.26385268568992615, "learning_rate": 2.605213472529085e-06, "loss": 0.2861, "step": 34086 }, { "epoch": 3.465534770231802, "grad_norm": 0.2776317596435547, "learning_rate": 2.604901946092407e-06, "loss": 0.2973, "step": 34087 }, { "epoch": 3.4656364375762507, "grad_norm": 0.28265923261642456, "learning_rate": 2.6045904317216637e-06, "loss": 0.2898, "step": 34088 }, { "epoch": 3.4657381049206997, "grad_norm": 0.26717817783355713, "learning_rate": 2.6042789294184244e-06, "loss": 0.2958, "step": 34089 }, { "epoch": 3.4658397722651486, "grad_norm": 0.2671881318092346, "learning_rate": 2.603967439184257e-06, "loss": 0.3528, "step": 34090 }, { "epoch": 3.4659414396095976, "grad_norm": 0.2709301710128784, "learning_rate": 2.6036559610207336e-06, "loss": 0.2958, "step": 34091 }, { "epoch": 3.4660431069540465, "grad_norm": 0.3025140166282654, "learning_rate": 2.603344494929422e-06, "loss": 0.3138, "step": 34092 }, { "epoch": 3.4661447742984954, "grad_norm": 0.28321367502212524, "learning_rate": 2.6030330409118897e-06, "loss": 0.2965, "step": 34093 }, { "epoch": 3.4662464416429444, "grad_norm": 0.27597612142562866, "learning_rate": 2.60272159896971e-06, "loss": 0.3034, "step": 34094 }, { "epoch": 3.4663481089873933, "grad_norm": 0.2874353528022766, "learning_rate": 2.602410169104448e-06, "loss": 0.326, "step": 34095 }, { "epoch": 3.4664497763318423, "grad_norm": 0.2927853763103485, "learning_rate": 2.602098751317672e-06, "loss": 0.3273, "step": 34096 }, { "epoch": 3.466551443676291, "grad_norm": 0.26967719197273254, "learning_rate": 2.6017873456109555e-06, "loss": 0.3118, "step": 34097 }, { "epoch": 3.46665311102074, "grad_norm": 0.27677664160728455, "learning_rate": 2.6014759519858634e-06, "loss": 0.3032, "step": 34098 }, { "epoch": 3.466754778365189, "grad_norm": 0.27215060591697693, "learning_rate": 2.6011645704439666e-06, "loss": 0.3407, "step": 34099 }, { "epoch": 3.466856445709638, "grad_norm": 0.28305143117904663, "learning_rate": 2.60085320098683e-06, "loss": 0.3141, "step": 34100 }, { "epoch": 3.466958113054087, "grad_norm": 0.2863675653934479, "learning_rate": 2.600541843616027e-06, "loss": 0.3059, "step": 34101 }, { "epoch": 3.467059780398536, "grad_norm": 0.2665087878704071, "learning_rate": 2.600230498333124e-06, "loss": 0.3318, "step": 34102 }, { "epoch": 3.467161447742985, "grad_norm": 0.27907755970954895, "learning_rate": 2.599919165139687e-06, "loss": 0.344, "step": 34103 }, { "epoch": 3.467263115087434, "grad_norm": 0.2998718023300171, "learning_rate": 2.5996078440372895e-06, "loss": 0.2837, "step": 34104 }, { "epoch": 3.4673647824318827, "grad_norm": 0.27838295698165894, "learning_rate": 2.599296535027497e-06, "loss": 0.2944, "step": 34105 }, { "epoch": 3.4674664497763317, "grad_norm": 0.26259300112724304, "learning_rate": 2.5989852381118757e-06, "loss": 0.3063, "step": 34106 }, { "epoch": 3.4675681171207806, "grad_norm": 0.26573559641838074, "learning_rate": 2.5986739532919986e-06, "loss": 0.3041, "step": 34107 }, { "epoch": 3.4676697844652296, "grad_norm": 0.27259841561317444, "learning_rate": 2.5983626805694316e-06, "loss": 0.3179, "step": 34108 }, { "epoch": 3.4677714518096785, "grad_norm": 0.2697899043560028, "learning_rate": 2.5980514199457426e-06, "loss": 0.3245, "step": 34109 }, { "epoch": 3.467873119154128, "grad_norm": 0.2931244969367981, "learning_rate": 2.5977401714224978e-06, "loss": 0.2717, "step": 34110 }, { "epoch": 3.467974786498577, "grad_norm": 0.26661184430122375, "learning_rate": 2.597428935001269e-06, "loss": 0.308, "step": 34111 }, { "epoch": 3.4680764538430258, "grad_norm": 0.2709587812423706, "learning_rate": 2.5971177106836222e-06, "loss": 0.3008, "step": 34112 }, { "epoch": 3.4681781211874747, "grad_norm": 0.2717013955116272, "learning_rate": 2.596806498471124e-06, "loss": 0.3027, "step": 34113 }, { "epoch": 3.4682797885319236, "grad_norm": 0.271852046251297, "learning_rate": 2.5964952983653445e-06, "loss": 0.2875, "step": 34114 }, { "epoch": 3.4683814558763726, "grad_norm": 0.28402793407440186, "learning_rate": 2.5961841103678507e-06, "loss": 0.3072, "step": 34115 }, { "epoch": 3.4684831232208215, "grad_norm": 0.3047623038291931, "learning_rate": 2.595872934480209e-06, "loss": 0.3174, "step": 34116 }, { "epoch": 3.4685847905652705, "grad_norm": 0.27194544672966003, "learning_rate": 2.59556177070399e-06, "loss": 0.3285, "step": 34117 }, { "epoch": 3.4686864579097194, "grad_norm": 0.27048107981681824, "learning_rate": 2.595250619040759e-06, "loss": 0.3125, "step": 34118 }, { "epoch": 3.4687881252541684, "grad_norm": 0.2814364731311798, "learning_rate": 2.5949394794920835e-06, "loss": 0.3222, "step": 34119 }, { "epoch": 3.4688897925986173, "grad_norm": 0.2843325734138489, "learning_rate": 2.5946283520595304e-06, "loss": 0.3294, "step": 34120 }, { "epoch": 3.4689914599430662, "grad_norm": 0.28160393238067627, "learning_rate": 2.594317236744669e-06, "loss": 0.3223, "step": 34121 }, { "epoch": 3.469093127287515, "grad_norm": 0.28438565135002136, "learning_rate": 2.5940061335490664e-06, "loss": 0.309, "step": 34122 }, { "epoch": 3.469194794631964, "grad_norm": 0.2788810431957245, "learning_rate": 2.5936950424742866e-06, "loss": 0.2947, "step": 34123 }, { "epoch": 3.469296461976413, "grad_norm": 0.29423877596855164, "learning_rate": 2.5933839635219027e-06, "loss": 0.2913, "step": 34124 }, { "epoch": 3.469398129320862, "grad_norm": 0.2809120714664459, "learning_rate": 2.593072896693477e-06, "loss": 0.3289, "step": 34125 }, { "epoch": 3.469499796665311, "grad_norm": 0.29770705103874207, "learning_rate": 2.5927618419905766e-06, "loss": 0.3189, "step": 34126 }, { "epoch": 3.46960146400976, "grad_norm": 0.26367875933647156, "learning_rate": 2.592450799414772e-06, "loss": 0.3045, "step": 34127 }, { "epoch": 3.4697031313542093, "grad_norm": 0.2602749168872833, "learning_rate": 2.592139768967628e-06, "loss": 0.3127, "step": 34128 }, { "epoch": 3.469804798698658, "grad_norm": 0.27345705032348633, "learning_rate": 2.591828750650712e-06, "loss": 0.2998, "step": 34129 }, { "epoch": 3.469906466043107, "grad_norm": 0.2906646430492401, "learning_rate": 2.5915177444655883e-06, "loss": 0.2957, "step": 34130 }, { "epoch": 3.470008133387556, "grad_norm": 0.2669549882411957, "learning_rate": 2.5912067504138277e-06, "loss": 0.3196, "step": 34131 }, { "epoch": 3.470109800732005, "grad_norm": 0.27741754055023193, "learning_rate": 2.590895768496995e-06, "loss": 0.3188, "step": 34132 }, { "epoch": 3.470211468076454, "grad_norm": 0.25997278094291687, "learning_rate": 2.5905847987166555e-06, "loss": 0.3216, "step": 34133 }, { "epoch": 3.470313135420903, "grad_norm": 0.36911919713020325, "learning_rate": 2.5902738410743812e-06, "loss": 0.303, "step": 34134 }, { "epoch": 3.470414802765352, "grad_norm": 0.2819474935531616, "learning_rate": 2.58996289557173e-06, "loss": 0.2934, "step": 34135 }, { "epoch": 3.470516470109801, "grad_norm": 0.28764161467552185, "learning_rate": 2.5896519622102734e-06, "loss": 0.3089, "step": 34136 }, { "epoch": 3.4706181374542497, "grad_norm": 0.2646646499633789, "learning_rate": 2.5893410409915793e-06, "loss": 0.2713, "step": 34137 }, { "epoch": 3.4707198047986987, "grad_norm": 0.2880662679672241, "learning_rate": 2.5890301319172116e-06, "loss": 0.2835, "step": 34138 }, { "epoch": 3.4708214721431476, "grad_norm": 0.28961753845214844, "learning_rate": 2.588719234988738e-06, "loss": 0.2947, "step": 34139 }, { "epoch": 3.4709231394875966, "grad_norm": 0.286959171295166, "learning_rate": 2.5884083502077218e-06, "loss": 0.3016, "step": 34140 }, { "epoch": 3.4710248068320455, "grad_norm": 0.2846871316432953, "learning_rate": 2.588097477575733e-06, "loss": 0.2842, "step": 34141 }, { "epoch": 3.4711264741764944, "grad_norm": 0.27482491731643677, "learning_rate": 2.5877866170943367e-06, "loss": 0.3498, "step": 34142 }, { "epoch": 3.4712281415209434, "grad_norm": 0.26052573323249817, "learning_rate": 2.587475768765095e-06, "loss": 0.2946, "step": 34143 }, { "epoch": 3.4713298088653923, "grad_norm": 0.27790388464927673, "learning_rate": 2.587164932589582e-06, "loss": 0.2883, "step": 34144 }, { "epoch": 3.4714314762098413, "grad_norm": 0.2851625978946686, "learning_rate": 2.5868541085693545e-06, "loss": 0.3196, "step": 34145 }, { "epoch": 3.47153314355429, "grad_norm": 0.2571253776550293, "learning_rate": 2.586543296705982e-06, "loss": 0.3036, "step": 34146 }, { "epoch": 3.471634810898739, "grad_norm": 0.2573167383670807, "learning_rate": 2.5862324970010333e-06, "loss": 0.3231, "step": 34147 }, { "epoch": 3.471736478243188, "grad_norm": 0.27501440048217773, "learning_rate": 2.5859217094560717e-06, "loss": 0.3578, "step": 34148 }, { "epoch": 3.471838145587637, "grad_norm": 0.26879483461380005, "learning_rate": 2.5856109340726626e-06, "loss": 0.3299, "step": 34149 }, { "epoch": 3.471939812932086, "grad_norm": 0.2790355980396271, "learning_rate": 2.58530017085237e-06, "loss": 0.3453, "step": 34150 }, { "epoch": 3.4720414802765354, "grad_norm": 0.25803470611572266, "learning_rate": 2.5849894197967633e-06, "loss": 0.3165, "step": 34151 }, { "epoch": 3.4721431476209843, "grad_norm": 0.2577632665634155, "learning_rate": 2.5846786809074067e-06, "loss": 0.298, "step": 34152 }, { "epoch": 3.4722448149654332, "grad_norm": 0.26449525356292725, "learning_rate": 2.5843679541858617e-06, "loss": 0.3185, "step": 34153 }, { "epoch": 3.472346482309882, "grad_norm": 0.2668522000312805, "learning_rate": 2.5840572396337017e-06, "loss": 0.3099, "step": 34154 }, { "epoch": 3.472448149654331, "grad_norm": 0.27663442492485046, "learning_rate": 2.583746537252483e-06, "loss": 0.3233, "step": 34155 }, { "epoch": 3.47254981699878, "grad_norm": 0.25740402936935425, "learning_rate": 2.583435847043775e-06, "loss": 0.3258, "step": 34156 }, { "epoch": 3.472651484343229, "grad_norm": 0.30481281876564026, "learning_rate": 2.5831251690091467e-06, "loss": 0.3133, "step": 34157 }, { "epoch": 3.472753151687678, "grad_norm": 0.25856563448905945, "learning_rate": 2.5828145031501563e-06, "loss": 0.3346, "step": 34158 }, { "epoch": 3.472854819032127, "grad_norm": 0.27046874165534973, "learning_rate": 2.582503849468373e-06, "loss": 0.291, "step": 34159 }, { "epoch": 3.472956486376576, "grad_norm": 0.2721012532711029, "learning_rate": 2.5821932079653593e-06, "loss": 0.3533, "step": 34160 }, { "epoch": 3.4730581537210248, "grad_norm": 0.28046613931655884, "learning_rate": 2.5818825786426836e-06, "loss": 0.2748, "step": 34161 }, { "epoch": 3.4731598210654737, "grad_norm": 0.2682688236236572, "learning_rate": 2.581571961501909e-06, "loss": 0.2962, "step": 34162 }, { "epoch": 3.4732614884099227, "grad_norm": 0.2613040506839752, "learning_rate": 2.5812613565445983e-06, "loss": 0.3468, "step": 34163 }, { "epoch": 3.4733631557543716, "grad_norm": 0.29078948497772217, "learning_rate": 2.580950763772322e-06, "loss": 0.3224, "step": 34164 }, { "epoch": 3.4734648230988205, "grad_norm": 0.29415345191955566, "learning_rate": 2.5806401831866366e-06, "loss": 0.3441, "step": 34165 }, { "epoch": 3.4735664904432695, "grad_norm": 0.26796138286590576, "learning_rate": 2.580329614789111e-06, "loss": 0.3329, "step": 34166 }, { "epoch": 3.4736681577877184, "grad_norm": 0.26532524824142456, "learning_rate": 2.580019058581314e-06, "loss": 0.2907, "step": 34167 }, { "epoch": 3.4737698251321674, "grad_norm": 0.2642078995704651, "learning_rate": 2.5797085145648014e-06, "loss": 0.3103, "step": 34168 }, { "epoch": 3.4738714924766168, "grad_norm": 0.2836264371871948, "learning_rate": 2.5793979827411453e-06, "loss": 0.3139, "step": 34169 }, { "epoch": 3.4739731598210657, "grad_norm": 0.25825220346450806, "learning_rate": 2.5790874631119057e-06, "loss": 0.3078, "step": 34170 }, { "epoch": 3.4740748271655146, "grad_norm": 0.2727474868297577, "learning_rate": 2.5787769556786467e-06, "loss": 0.3431, "step": 34171 }, { "epoch": 3.4741764945099636, "grad_norm": 0.27083563804626465, "learning_rate": 2.5784664604429367e-06, "loss": 0.2709, "step": 34172 }, { "epoch": 3.4742781618544125, "grad_norm": 0.2632673382759094, "learning_rate": 2.578155977406335e-06, "loss": 0.3025, "step": 34173 }, { "epoch": 3.4743798291988615, "grad_norm": 0.26588746905326843, "learning_rate": 2.5778455065704113e-06, "loss": 0.3051, "step": 34174 }, { "epoch": 3.4744814965433104, "grad_norm": 0.28787147998809814, "learning_rate": 2.5775350479367224e-06, "loss": 0.3112, "step": 34175 }, { "epoch": 3.4745831638877593, "grad_norm": 0.3019507825374603, "learning_rate": 2.5772246015068364e-06, "loss": 0.3071, "step": 34176 }, { "epoch": 3.4746848312322083, "grad_norm": 0.28669747710227966, "learning_rate": 2.5769141672823213e-06, "loss": 0.3073, "step": 34177 }, { "epoch": 3.4747864985766572, "grad_norm": 0.26953935623168945, "learning_rate": 2.5766037452647325e-06, "loss": 0.3199, "step": 34178 }, { "epoch": 3.474888165921106, "grad_norm": 0.2936844527721405, "learning_rate": 2.57629333545564e-06, "loss": 0.3253, "step": 34179 }, { "epoch": 3.474989833265555, "grad_norm": 0.26425275206565857, "learning_rate": 2.5759829378566064e-06, "loss": 0.322, "step": 34180 }, { "epoch": 3.475091500610004, "grad_norm": 0.2866068482398987, "learning_rate": 2.575672552469193e-06, "loss": 0.3192, "step": 34181 }, { "epoch": 3.475193167954453, "grad_norm": 0.2530902028083801, "learning_rate": 2.5753621792949657e-06, "loss": 0.3211, "step": 34182 }, { "epoch": 3.475294835298902, "grad_norm": 0.27403512597084045, "learning_rate": 2.5750518183354865e-06, "loss": 0.301, "step": 34183 }, { "epoch": 3.475396502643351, "grad_norm": 0.25775471329689026, "learning_rate": 2.574741469592324e-06, "loss": 0.3131, "step": 34184 }, { "epoch": 3.4754981699878, "grad_norm": 0.2637418210506439, "learning_rate": 2.5744311330670334e-06, "loss": 0.3047, "step": 34185 }, { "epoch": 3.4755998373322488, "grad_norm": 0.27060312032699585, "learning_rate": 2.5741208087611825e-06, "loss": 0.3491, "step": 34186 }, { "epoch": 3.4757015046766977, "grad_norm": 0.25908035039901733, "learning_rate": 2.573810496676339e-06, "loss": 0.3252, "step": 34187 }, { "epoch": 3.4758031720211466, "grad_norm": 0.2686116099357605, "learning_rate": 2.573500196814057e-06, "loss": 0.2905, "step": 34188 }, { "epoch": 3.4759048393655956, "grad_norm": 0.29139673709869385, "learning_rate": 2.573189909175906e-06, "loss": 0.3419, "step": 34189 }, { "epoch": 3.4760065067100445, "grad_norm": 0.268714964389801, "learning_rate": 2.5728796337634487e-06, "loss": 0.311, "step": 34190 }, { "epoch": 3.4761081740544935, "grad_norm": 0.2679285705089569, "learning_rate": 2.5725693705782438e-06, "loss": 0.3156, "step": 34191 }, { "epoch": 3.476209841398943, "grad_norm": 0.2676485478878021, "learning_rate": 2.5722591196218606e-06, "loss": 0.308, "step": 34192 }, { "epoch": 3.476311508743392, "grad_norm": 0.2661104202270508, "learning_rate": 2.5719488808958582e-06, "loss": 0.2974, "step": 34193 }, { "epoch": 3.4764131760878407, "grad_norm": 0.28717976808547974, "learning_rate": 2.5716386544018014e-06, "loss": 0.3113, "step": 34194 }, { "epoch": 3.4765148434322897, "grad_norm": 0.2652537524700165, "learning_rate": 2.5713284401412488e-06, "loss": 0.3366, "step": 34195 }, { "epoch": 3.4766165107767386, "grad_norm": 0.2866823971271515, "learning_rate": 2.5710182381157673e-06, "loss": 0.3017, "step": 34196 }, { "epoch": 3.4767181781211876, "grad_norm": 0.3071061372756958, "learning_rate": 2.570708048326922e-06, "loss": 0.2758, "step": 34197 }, { "epoch": 3.4768198454656365, "grad_norm": 0.25831031799316406, "learning_rate": 2.570397870776269e-06, "loss": 0.315, "step": 34198 }, { "epoch": 3.4769215128100854, "grad_norm": 0.2566874027252197, "learning_rate": 2.570087705465375e-06, "loss": 0.3082, "step": 34199 }, { "epoch": 3.4770231801545344, "grad_norm": 0.27122563123703003, "learning_rate": 2.569777552395803e-06, "loss": 0.2907, "step": 34200 }, { "epoch": 3.4771248474989833, "grad_norm": 0.26475322246551514, "learning_rate": 2.5694674115691115e-06, "loss": 0.2957, "step": 34201 }, { "epoch": 3.4772265148434323, "grad_norm": 0.3031615614891052, "learning_rate": 2.5691572829868677e-06, "loss": 0.2476, "step": 34202 }, { "epoch": 3.477328182187881, "grad_norm": 0.2731747031211853, "learning_rate": 2.568847166650632e-06, "loss": 0.2995, "step": 34203 }, { "epoch": 3.47742984953233, "grad_norm": 0.29059866070747375, "learning_rate": 2.5685370625619666e-06, "loss": 0.3151, "step": 34204 }, { "epoch": 3.477531516876779, "grad_norm": 0.2718423306941986, "learning_rate": 2.568226970722431e-06, "loss": 0.3316, "step": 34205 }, { "epoch": 3.477633184221228, "grad_norm": 0.2898155152797699, "learning_rate": 2.5679168911335906e-06, "loss": 0.2996, "step": 34206 }, { "epoch": 3.477734851565677, "grad_norm": 0.28338849544525146, "learning_rate": 2.567606823797011e-06, "loss": 0.2883, "step": 34207 }, { "epoch": 3.477836518910126, "grad_norm": 0.2772333025932312, "learning_rate": 2.567296768714246e-06, "loss": 0.3058, "step": 34208 }, { "epoch": 3.477938186254575, "grad_norm": 0.2751365303993225, "learning_rate": 2.566986725886864e-06, "loss": 0.3298, "step": 34209 }, { "epoch": 3.4780398535990242, "grad_norm": 0.27269160747528076, "learning_rate": 2.5666766953164246e-06, "loss": 0.3226, "step": 34210 }, { "epoch": 3.478141520943473, "grad_norm": 0.2742539942264557, "learning_rate": 2.5663666770044874e-06, "loss": 0.3099, "step": 34211 }, { "epoch": 3.478243188287922, "grad_norm": 0.2688153088092804, "learning_rate": 2.566056670952619e-06, "loss": 0.3161, "step": 34212 }, { "epoch": 3.478344855632371, "grad_norm": 0.25406092405319214, "learning_rate": 2.5657466771623784e-06, "loss": 0.2844, "step": 34213 }, { "epoch": 3.47844652297682, "grad_norm": 0.27536994218826294, "learning_rate": 2.5654366956353283e-06, "loss": 0.295, "step": 34214 }, { "epoch": 3.478548190321269, "grad_norm": 0.29977819323539734, "learning_rate": 2.5651267263730274e-06, "loss": 0.2755, "step": 34215 }, { "epoch": 3.478649857665718, "grad_norm": 0.25770094990730286, "learning_rate": 2.5648167693770408e-06, "loss": 0.321, "step": 34216 }, { "epoch": 3.478751525010167, "grad_norm": 0.2889164090156555, "learning_rate": 2.5645068246489293e-06, "loss": 0.3181, "step": 34217 }, { "epoch": 3.4788531923546158, "grad_norm": 0.2685694694519043, "learning_rate": 2.5641968921902505e-06, "loss": 0.3423, "step": 34218 }, { "epoch": 3.4789548596990647, "grad_norm": 0.2575323283672333, "learning_rate": 2.5638869720025715e-06, "loss": 0.3281, "step": 34219 }, { "epoch": 3.4790565270435136, "grad_norm": 0.26112601161003113, "learning_rate": 2.563577064087451e-06, "loss": 0.3169, "step": 34220 }, { "epoch": 3.4791581943879626, "grad_norm": 0.281002938747406, "learning_rate": 2.5632671684464495e-06, "loss": 0.2841, "step": 34221 }, { "epoch": 3.4792598617324115, "grad_norm": 0.26966726779937744, "learning_rate": 2.5629572850811295e-06, "loss": 0.3128, "step": 34222 }, { "epoch": 3.4793615290768605, "grad_norm": 0.26610976457595825, "learning_rate": 2.5626474139930523e-06, "loss": 0.3352, "step": 34223 }, { "epoch": 3.4794631964213094, "grad_norm": 0.2751171290874481, "learning_rate": 2.5623375551837778e-06, "loss": 0.3134, "step": 34224 }, { "epoch": 3.4795648637657584, "grad_norm": 0.2480391412973404, "learning_rate": 2.5620277086548652e-06, "loss": 0.3198, "step": 34225 }, { "epoch": 3.4796665311102073, "grad_norm": 0.2850101888179779, "learning_rate": 2.56171787440788e-06, "loss": 0.3063, "step": 34226 }, { "epoch": 3.4797681984546562, "grad_norm": 0.2587898373603821, "learning_rate": 2.5614080524443807e-06, "loss": 0.32, "step": 34227 }, { "epoch": 3.479869865799105, "grad_norm": 0.27883458137512207, "learning_rate": 2.5610982427659263e-06, "loss": 0.3207, "step": 34228 }, { "epoch": 3.479971533143554, "grad_norm": 0.2730153799057007, "learning_rate": 2.5607884453740815e-06, "loss": 0.2938, "step": 34229 }, { "epoch": 3.480073200488003, "grad_norm": 0.27936363220214844, "learning_rate": 2.5604786602704047e-06, "loss": 0.2985, "step": 34230 }, { "epoch": 3.480174867832452, "grad_norm": 0.2734919488430023, "learning_rate": 2.5601688874564546e-06, "loss": 0.3283, "step": 34231 }, { "epoch": 3.480276535176901, "grad_norm": 0.27885517477989197, "learning_rate": 2.559859126933796e-06, "loss": 0.2746, "step": 34232 }, { "epoch": 3.4803782025213503, "grad_norm": 0.28019046783447266, "learning_rate": 2.5595493787039864e-06, "loss": 0.2861, "step": 34233 }, { "epoch": 3.4804798698657993, "grad_norm": 0.26220014691352844, "learning_rate": 2.5592396427685884e-06, "loss": 0.2911, "step": 34234 }, { "epoch": 3.480581537210248, "grad_norm": 0.27340927720069885, "learning_rate": 2.5589299191291585e-06, "loss": 0.2983, "step": 34235 }, { "epoch": 3.480683204554697, "grad_norm": 0.28788480162620544, "learning_rate": 2.5586202077872613e-06, "loss": 0.331, "step": 34236 }, { "epoch": 3.480784871899146, "grad_norm": 0.26832500100135803, "learning_rate": 2.5583105087444553e-06, "loss": 0.3385, "step": 34237 }, { "epoch": 3.480886539243595, "grad_norm": 0.28134483098983765, "learning_rate": 2.558000822002299e-06, "loss": 0.3173, "step": 34238 }, { "epoch": 3.480988206588044, "grad_norm": 0.27259212732315063, "learning_rate": 2.557691147562357e-06, "loss": 0.3117, "step": 34239 }, { "epoch": 3.481089873932493, "grad_norm": 0.2666432857513428, "learning_rate": 2.5573814854261854e-06, "loss": 0.3205, "step": 34240 }, { "epoch": 3.481191541276942, "grad_norm": 0.2798062562942505, "learning_rate": 2.557071835595344e-06, "loss": 0.3015, "step": 34241 }, { "epoch": 3.481293208621391, "grad_norm": 0.26252126693725586, "learning_rate": 2.5567621980713957e-06, "loss": 0.3119, "step": 34242 }, { "epoch": 3.4813948759658397, "grad_norm": 0.2725338339805603, "learning_rate": 2.556452572855899e-06, "loss": 0.3144, "step": 34243 }, { "epoch": 3.4814965433102887, "grad_norm": 0.2546239495277405, "learning_rate": 2.556142959950414e-06, "loss": 0.3027, "step": 34244 }, { "epoch": 3.4815982106547376, "grad_norm": 0.28479185700416565, "learning_rate": 2.5558333593564977e-06, "loss": 0.2975, "step": 34245 }, { "epoch": 3.4816998779991866, "grad_norm": 0.25706347823143005, "learning_rate": 2.555523771075714e-06, "loss": 0.3522, "step": 34246 }, { "epoch": 3.4818015453436355, "grad_norm": 0.2947149872779846, "learning_rate": 2.555214195109621e-06, "loss": 0.2997, "step": 34247 }, { "epoch": 3.4819032126880844, "grad_norm": 0.26183074712753296, "learning_rate": 2.5549046314597757e-06, "loss": 0.3196, "step": 34248 }, { "epoch": 3.4820048800325334, "grad_norm": 0.2845592200756073, "learning_rate": 2.554595080127742e-06, "loss": 0.3036, "step": 34249 }, { "epoch": 3.4821065473769823, "grad_norm": 0.29668572545051575, "learning_rate": 2.554285541115077e-06, "loss": 0.3097, "step": 34250 }, { "epoch": 3.4822082147214317, "grad_norm": 0.2897147834300995, "learning_rate": 2.5539760144233382e-06, "loss": 0.2807, "step": 34251 }, { "epoch": 3.4823098820658807, "grad_norm": 0.275438517332077, "learning_rate": 2.5536665000540897e-06, "loss": 0.3104, "step": 34252 }, { "epoch": 3.4824115494103296, "grad_norm": 0.2631329894065857, "learning_rate": 2.5533569980088867e-06, "loss": 0.2973, "step": 34253 }, { "epoch": 3.4825132167547785, "grad_norm": 0.28612643480300903, "learning_rate": 2.5530475082892905e-06, "loss": 0.3159, "step": 34254 }, { "epoch": 3.4826148840992275, "grad_norm": 0.30433496832847595, "learning_rate": 2.5527380308968574e-06, "loss": 0.3105, "step": 34255 }, { "epoch": 3.4827165514436764, "grad_norm": 0.27103155851364136, "learning_rate": 2.5524285658331506e-06, "loss": 0.3328, "step": 34256 }, { "epoch": 3.4828182187881254, "grad_norm": 0.28526797890663147, "learning_rate": 2.552119113099727e-06, "loss": 0.3018, "step": 34257 }, { "epoch": 3.4829198861325743, "grad_norm": 0.282382994890213, "learning_rate": 2.5518096726981434e-06, "loss": 0.3286, "step": 34258 }, { "epoch": 3.4830215534770232, "grad_norm": 0.28400832414627075, "learning_rate": 2.551500244629963e-06, "loss": 0.2738, "step": 34259 }, { "epoch": 3.483123220821472, "grad_norm": 0.26425012946128845, "learning_rate": 2.5511908288967423e-06, "loss": 0.3167, "step": 34260 }, { "epoch": 3.483224888165921, "grad_norm": 0.274159699678421, "learning_rate": 2.550881425500039e-06, "loss": 0.3399, "step": 34261 }, { "epoch": 3.48332655551037, "grad_norm": 0.2757096290588379, "learning_rate": 2.550572034441415e-06, "loss": 0.3063, "step": 34262 }, { "epoch": 3.483428222854819, "grad_norm": 0.2748950123786926, "learning_rate": 2.5502626557224265e-06, "loss": 0.3198, "step": 34263 }, { "epoch": 3.483529890199268, "grad_norm": 0.2848437428474426, "learning_rate": 2.5499532893446327e-06, "loss": 0.2966, "step": 34264 }, { "epoch": 3.483631557543717, "grad_norm": 0.27270591259002686, "learning_rate": 2.5496439353095905e-06, "loss": 0.3066, "step": 34265 }, { "epoch": 3.483733224888166, "grad_norm": 0.24927915632724762, "learning_rate": 2.549334593618861e-06, "loss": 0.322, "step": 34266 }, { "epoch": 3.4838348922326148, "grad_norm": 0.2721238434314728, "learning_rate": 2.5490252642740023e-06, "loss": 0.329, "step": 34267 }, { "epoch": 3.4839365595770637, "grad_norm": 0.26687362790107727, "learning_rate": 2.5487159472765687e-06, "loss": 0.331, "step": 34268 }, { "epoch": 3.4840382269215127, "grad_norm": 0.2810845375061035, "learning_rate": 2.5484066426281247e-06, "loss": 0.2921, "step": 34269 }, { "epoch": 3.4841398942659616, "grad_norm": 0.30735307931900024, "learning_rate": 2.5480973503302252e-06, "loss": 0.3039, "step": 34270 }, { "epoch": 3.4842415616104105, "grad_norm": 0.27559366822242737, "learning_rate": 2.547788070384428e-06, "loss": 0.2991, "step": 34271 }, { "epoch": 3.4843432289548595, "grad_norm": 0.2658470571041107, "learning_rate": 2.54747880279229e-06, "loss": 0.3051, "step": 34272 }, { "epoch": 3.4844448962993084, "grad_norm": 0.2793367803096771, "learning_rate": 2.5471695475553738e-06, "loss": 0.3335, "step": 34273 }, { "epoch": 3.484546563643758, "grad_norm": 0.25226330757141113, "learning_rate": 2.5468603046752336e-06, "loss": 0.3227, "step": 34274 }, { "epoch": 3.4846482309882068, "grad_norm": 0.2789627015590668, "learning_rate": 2.546551074153427e-06, "loss": 0.3052, "step": 34275 }, { "epoch": 3.4847498983326557, "grad_norm": 0.2706855535507202, "learning_rate": 2.546241855991515e-06, "loss": 0.3158, "step": 34276 }, { "epoch": 3.4848515656771046, "grad_norm": 0.2620268762111664, "learning_rate": 2.545932650191053e-06, "loss": 0.3185, "step": 34277 }, { "epoch": 3.4849532330215536, "grad_norm": 0.2610619366168976, "learning_rate": 2.5456234567535974e-06, "loss": 0.38, "step": 34278 }, { "epoch": 3.4850549003660025, "grad_norm": 0.2512912154197693, "learning_rate": 2.5453142756807094e-06, "loss": 0.327, "step": 34279 }, { "epoch": 3.4851565677104515, "grad_norm": 0.2648887634277344, "learning_rate": 2.5450051069739446e-06, "loss": 0.2868, "step": 34280 }, { "epoch": 3.4852582350549004, "grad_norm": 0.25126564502716064, "learning_rate": 2.5446959506348612e-06, "loss": 0.2791, "step": 34281 }, { "epoch": 3.4853599023993493, "grad_norm": 0.27466902136802673, "learning_rate": 2.5443868066650136e-06, "loss": 0.3328, "step": 34282 }, { "epoch": 3.4854615697437983, "grad_norm": 0.24199509620666504, "learning_rate": 2.544077675065964e-06, "loss": 0.3263, "step": 34283 }, { "epoch": 3.4855632370882472, "grad_norm": 0.28191882371902466, "learning_rate": 2.543768555839268e-06, "loss": 0.2945, "step": 34284 }, { "epoch": 3.485664904432696, "grad_norm": 0.2810860276222229, "learning_rate": 2.5434594489864793e-06, "loss": 0.2962, "step": 34285 }, { "epoch": 3.485766571777145, "grad_norm": 0.26424840092658997, "learning_rate": 2.5431503545091623e-06, "loss": 0.3302, "step": 34286 }, { "epoch": 3.485868239121594, "grad_norm": 0.2773592472076416, "learning_rate": 2.542841272408866e-06, "loss": 0.3114, "step": 34287 }, { "epoch": 3.485969906466043, "grad_norm": 0.2614912986755371, "learning_rate": 2.542532202687152e-06, "loss": 0.3097, "step": 34288 }, { "epoch": 3.486071573810492, "grad_norm": 0.2757783532142639, "learning_rate": 2.542223145345578e-06, "loss": 0.3255, "step": 34289 }, { "epoch": 3.486173241154941, "grad_norm": 0.29462411999702454, "learning_rate": 2.5419141003857008e-06, "loss": 0.2998, "step": 34290 }, { "epoch": 3.48627490849939, "grad_norm": 0.2929488718509674, "learning_rate": 2.5416050678090755e-06, "loss": 0.2908, "step": 34291 }, { "epoch": 3.486376575843839, "grad_norm": 0.27209824323654175, "learning_rate": 2.5412960476172578e-06, "loss": 0.283, "step": 34292 }, { "epoch": 3.486478243188288, "grad_norm": 0.2795650064945221, "learning_rate": 2.5409870398118082e-06, "loss": 0.3393, "step": 34293 }, { "epoch": 3.486579910532737, "grad_norm": 0.29525652527809143, "learning_rate": 2.540678044394281e-06, "loss": 0.3358, "step": 34294 }, { "epoch": 3.486681577877186, "grad_norm": 0.2755093276500702, "learning_rate": 2.5403690613662322e-06, "loss": 0.3164, "step": 34295 }, { "epoch": 3.486783245221635, "grad_norm": 0.2744717597961426, "learning_rate": 2.5400600907292237e-06, "loss": 0.2829, "step": 34296 }, { "epoch": 3.486884912566084, "grad_norm": 0.26676708459854126, "learning_rate": 2.5397511324848035e-06, "loss": 0.2701, "step": 34297 }, { "epoch": 3.486986579910533, "grad_norm": 0.2521492838859558, "learning_rate": 2.5394421866345333e-06, "loss": 0.2936, "step": 34298 }, { "epoch": 3.487088247254982, "grad_norm": 0.2579568326473236, "learning_rate": 2.5391332531799693e-06, "loss": 0.3101, "step": 34299 }, { "epoch": 3.4871899145994307, "grad_norm": 0.2766564190387726, "learning_rate": 2.538824332122668e-06, "loss": 0.3119, "step": 34300 }, { "epoch": 3.4872915819438797, "grad_norm": 0.25882986187934875, "learning_rate": 2.5385154234641847e-06, "loss": 0.3113, "step": 34301 }, { "epoch": 3.4873932492883286, "grad_norm": 0.29463234543800354, "learning_rate": 2.5382065272060736e-06, "loss": 0.3186, "step": 34302 }, { "epoch": 3.4874949166327776, "grad_norm": 0.2850567698478699, "learning_rate": 2.537897643349895e-06, "loss": 0.2911, "step": 34303 }, { "epoch": 3.4875965839772265, "grad_norm": 0.2741806209087372, "learning_rate": 2.5375887718972032e-06, "loss": 0.3014, "step": 34304 }, { "epoch": 3.4876982513216754, "grad_norm": 0.2711634933948517, "learning_rate": 2.537279912849552e-06, "loss": 0.3146, "step": 34305 }, { "epoch": 3.4877999186661244, "grad_norm": 0.27482369542121887, "learning_rate": 2.5369710662085034e-06, "loss": 0.3089, "step": 34306 }, { "epoch": 3.4879015860105733, "grad_norm": 0.2656547725200653, "learning_rate": 2.536662231975605e-06, "loss": 0.3326, "step": 34307 }, { "epoch": 3.4880032533550223, "grad_norm": 0.2762804627418518, "learning_rate": 2.536353410152417e-06, "loss": 0.3188, "step": 34308 }, { "epoch": 3.488104920699471, "grad_norm": 0.2784262001514435, "learning_rate": 2.5360446007404994e-06, "loss": 0.3207, "step": 34309 }, { "epoch": 3.48820658804392, "grad_norm": 0.26693859696388245, "learning_rate": 2.535735803741399e-06, "loss": 0.2982, "step": 34310 }, { "epoch": 3.488308255388369, "grad_norm": 0.2663346230983734, "learning_rate": 2.5354270191566784e-06, "loss": 0.2999, "step": 34311 }, { "epoch": 3.488409922732818, "grad_norm": 0.25525999069213867, "learning_rate": 2.5351182469878886e-06, "loss": 0.2997, "step": 34312 }, { "epoch": 3.488511590077267, "grad_norm": 0.29977262020111084, "learning_rate": 2.534809487236589e-06, "loss": 0.2928, "step": 34313 }, { "epoch": 3.488613257421716, "grad_norm": 0.2961394786834717, "learning_rate": 2.534500739904334e-06, "loss": 0.2952, "step": 34314 }, { "epoch": 3.4887149247661653, "grad_norm": 0.2700173258781433, "learning_rate": 2.534192004992676e-06, "loss": 0.307, "step": 34315 }, { "epoch": 3.4888165921106142, "grad_norm": 0.26346060633659363, "learning_rate": 2.5338832825031767e-06, "loss": 0.326, "step": 34316 }, { "epoch": 3.488918259455063, "grad_norm": 0.27389147877693176, "learning_rate": 2.533574572437384e-06, "loss": 0.2807, "step": 34317 }, { "epoch": 3.489019926799512, "grad_norm": 0.2834455370903015, "learning_rate": 2.5332658747968564e-06, "loss": 0.2947, "step": 34318 }, { "epoch": 3.489121594143961, "grad_norm": 0.27825629711151123, "learning_rate": 2.5329571895831527e-06, "loss": 0.304, "step": 34319 }, { "epoch": 3.48922326148841, "grad_norm": 0.3176977038383484, "learning_rate": 2.5326485167978203e-06, "loss": 0.3362, "step": 34320 }, { "epoch": 3.489324928832859, "grad_norm": 0.25215622782707214, "learning_rate": 2.5323398564424206e-06, "loss": 0.3401, "step": 34321 }, { "epoch": 3.489426596177308, "grad_norm": 0.26828449964523315, "learning_rate": 2.5320312085185063e-06, "loss": 0.3149, "step": 34322 }, { "epoch": 3.489528263521757, "grad_norm": 0.2706925868988037, "learning_rate": 2.53172257302763e-06, "loss": 0.2804, "step": 34323 }, { "epoch": 3.4896299308662058, "grad_norm": 0.28103306889533997, "learning_rate": 2.5314139499713512e-06, "loss": 0.3218, "step": 34324 }, { "epoch": 3.4897315982106547, "grad_norm": 0.2645244002342224, "learning_rate": 2.5311053393512203e-06, "loss": 0.2988, "step": 34325 }, { "epoch": 3.4898332655551036, "grad_norm": 0.277774840593338, "learning_rate": 2.530796741168798e-06, "loss": 0.3258, "step": 34326 }, { "epoch": 3.4899349328995526, "grad_norm": 0.2742099165916443, "learning_rate": 2.5304881554256303e-06, "loss": 0.338, "step": 34327 }, { "epoch": 3.4900366002440015, "grad_norm": 0.2669455409049988, "learning_rate": 2.5301795821232774e-06, "loss": 0.3017, "step": 34328 }, { "epoch": 3.4901382675884505, "grad_norm": 0.29113835096359253, "learning_rate": 2.529871021263296e-06, "loss": 0.2767, "step": 34329 }, { "epoch": 3.4902399349328994, "grad_norm": 0.2729981243610382, "learning_rate": 2.529562472847234e-06, "loss": 0.3278, "step": 34330 }, { "epoch": 3.4903416022773484, "grad_norm": 0.27286049723625183, "learning_rate": 2.529253936876651e-06, "loss": 0.3135, "step": 34331 }, { "epoch": 3.4904432696217973, "grad_norm": 0.2898476719856262, "learning_rate": 2.5289454133530996e-06, "loss": 0.3253, "step": 34332 }, { "epoch": 3.4905449369662467, "grad_norm": 0.2957916855812073, "learning_rate": 2.5286369022781322e-06, "loss": 0.2828, "step": 34333 }, { "epoch": 3.4906466043106956, "grad_norm": 0.284312903881073, "learning_rate": 2.528328403653306e-06, "loss": 0.3254, "step": 34334 }, { "epoch": 3.4907482716551446, "grad_norm": 0.26256948709487915, "learning_rate": 2.5280199174801723e-06, "loss": 0.2966, "step": 34335 }, { "epoch": 3.4908499389995935, "grad_norm": 0.27715638279914856, "learning_rate": 2.5277114437602913e-06, "loss": 0.3017, "step": 34336 }, { "epoch": 3.4909516063440424, "grad_norm": 0.2690281569957733, "learning_rate": 2.5274029824952082e-06, "loss": 0.327, "step": 34337 }, { "epoch": 3.4910532736884914, "grad_norm": 0.2836575508117676, "learning_rate": 2.5270945336864815e-06, "loss": 0.3021, "step": 34338 }, { "epoch": 3.4911549410329403, "grad_norm": 0.284841388463974, "learning_rate": 2.5267860973356685e-06, "loss": 0.3157, "step": 34339 }, { "epoch": 3.4912566083773893, "grad_norm": 0.2713276445865631, "learning_rate": 2.5264776734443155e-06, "loss": 0.3174, "step": 34340 }, { "epoch": 3.491358275721838, "grad_norm": 0.298855721950531, "learning_rate": 2.526169262013982e-06, "loss": 0.3066, "step": 34341 }, { "epoch": 3.491459943066287, "grad_norm": 0.2664010524749756, "learning_rate": 2.5258608630462207e-06, "loss": 0.3231, "step": 34342 }, { "epoch": 3.491561610410736, "grad_norm": 0.28362947702407837, "learning_rate": 2.525552476542582e-06, "loss": 0.31, "step": 34343 }, { "epoch": 3.491663277755185, "grad_norm": 0.28615012764930725, "learning_rate": 2.5252441025046238e-06, "loss": 0.295, "step": 34344 }, { "epoch": 3.491764945099634, "grad_norm": 0.28708434104919434, "learning_rate": 2.5249357409338977e-06, "loss": 0.3014, "step": 34345 }, { "epoch": 3.491866612444083, "grad_norm": 0.2769494354724884, "learning_rate": 2.524627391831957e-06, "loss": 0.305, "step": 34346 }, { "epoch": 3.491968279788532, "grad_norm": 0.2876725196838379, "learning_rate": 2.5243190552003537e-06, "loss": 0.2895, "step": 34347 }, { "epoch": 3.492069947132981, "grad_norm": 0.28434550762176514, "learning_rate": 2.5240107310406427e-06, "loss": 0.3001, "step": 34348 }, { "epoch": 3.4921716144774297, "grad_norm": 0.2745666801929474, "learning_rate": 2.52370241935438e-06, "loss": 0.3017, "step": 34349 }, { "epoch": 3.4922732818218787, "grad_norm": 0.3109663128852844, "learning_rate": 2.5233941201431135e-06, "loss": 0.2836, "step": 34350 }, { "epoch": 3.4923749491663276, "grad_norm": 0.2804540693759918, "learning_rate": 2.523085833408401e-06, "loss": 0.2901, "step": 34351 }, { "epoch": 3.4924766165107766, "grad_norm": 0.2864525616168976, "learning_rate": 2.5227775591517924e-06, "loss": 0.2947, "step": 34352 }, { "epoch": 3.4925782838552255, "grad_norm": 0.27884799242019653, "learning_rate": 2.5224692973748403e-06, "loss": 0.3082, "step": 34353 }, { "epoch": 3.4926799511996744, "grad_norm": 0.2565155625343323, "learning_rate": 2.5221610480791014e-06, "loss": 0.3124, "step": 34354 }, { "epoch": 3.4927816185441234, "grad_norm": 0.2788967490196228, "learning_rate": 2.5218528112661254e-06, "loss": 0.3472, "step": 34355 }, { "epoch": 3.4928832858885728, "grad_norm": 0.2702440321445465, "learning_rate": 2.5215445869374665e-06, "loss": 0.305, "step": 34356 }, { "epoch": 3.4929849532330217, "grad_norm": 0.2742590606212616, "learning_rate": 2.5212363750946754e-06, "loss": 0.2885, "step": 34357 }, { "epoch": 3.4930866205774707, "grad_norm": 0.27595144510269165, "learning_rate": 2.5209281757393083e-06, "loss": 0.2744, "step": 34358 }, { "epoch": 3.4931882879219196, "grad_norm": 0.2781449854373932, "learning_rate": 2.520619988872916e-06, "loss": 0.3249, "step": 34359 }, { "epoch": 3.4932899552663685, "grad_norm": 0.2769882082939148, "learning_rate": 2.5203118144970484e-06, "loss": 0.2758, "step": 34360 }, { "epoch": 3.4933916226108175, "grad_norm": 0.2779652774333954, "learning_rate": 2.520003652613263e-06, "loss": 0.3326, "step": 34361 }, { "epoch": 3.4934932899552664, "grad_norm": 0.288217693567276, "learning_rate": 2.5196955032231103e-06, "loss": 0.3109, "step": 34362 }, { "epoch": 3.4935949572997154, "grad_norm": 0.25692492723464966, "learning_rate": 2.51938736632814e-06, "loss": 0.3325, "step": 34363 }, { "epoch": 3.4936966246441643, "grad_norm": 0.26049724221229553, "learning_rate": 2.519079241929908e-06, "loss": 0.3233, "step": 34364 }, { "epoch": 3.4937982919886132, "grad_norm": 0.27037450671195984, "learning_rate": 2.518771130029966e-06, "loss": 0.3223, "step": 34365 }, { "epoch": 3.493899959333062, "grad_norm": 0.3109865188598633, "learning_rate": 2.518463030629865e-06, "loss": 0.3071, "step": 34366 }, { "epoch": 3.494001626677511, "grad_norm": 0.29407212138175964, "learning_rate": 2.518154943731156e-06, "loss": 0.3225, "step": 34367 }, { "epoch": 3.49410329402196, "grad_norm": 0.2896822690963745, "learning_rate": 2.517846869335394e-06, "loss": 0.2968, "step": 34368 }, { "epoch": 3.494204961366409, "grad_norm": 0.28012725710868835, "learning_rate": 2.5175388074441298e-06, "loss": 0.3423, "step": 34369 }, { "epoch": 3.494306628710858, "grad_norm": 0.29258447885513306, "learning_rate": 2.517230758058914e-06, "loss": 0.3005, "step": 34370 }, { "epoch": 3.494408296055307, "grad_norm": 0.276286244392395, "learning_rate": 2.5169227211813008e-06, "loss": 0.3081, "step": 34371 }, { "epoch": 3.494509963399756, "grad_norm": 0.28804925084114075, "learning_rate": 2.51661469681284e-06, "loss": 0.3013, "step": 34372 }, { "epoch": 3.4946116307442048, "grad_norm": 0.286391019821167, "learning_rate": 2.516306684955084e-06, "loss": 0.2999, "step": 34373 }, { "epoch": 3.494713298088654, "grad_norm": 0.28059735894203186, "learning_rate": 2.515998685609586e-06, "loss": 0.3111, "step": 34374 }, { "epoch": 3.494814965433103, "grad_norm": 0.27285683155059814, "learning_rate": 2.5156906987778963e-06, "loss": 0.2863, "step": 34375 }, { "epoch": 3.494916632777552, "grad_norm": 0.27527767419815063, "learning_rate": 2.5153827244615657e-06, "loss": 0.3002, "step": 34376 }, { "epoch": 3.495018300122001, "grad_norm": 0.2652118504047394, "learning_rate": 2.515074762662145e-06, "loss": 0.3066, "step": 34377 }, { "epoch": 3.49511996746645, "grad_norm": 0.28122177720069885, "learning_rate": 2.5147668133811896e-06, "loss": 0.3022, "step": 34378 }, { "epoch": 3.495221634810899, "grad_norm": 0.27188554406166077, "learning_rate": 2.514458876620248e-06, "loss": 0.3264, "step": 34379 }, { "epoch": 3.495323302155348, "grad_norm": 0.27208366990089417, "learning_rate": 2.5141509523808704e-06, "loss": 0.2971, "step": 34380 }, { "epoch": 3.4954249694997968, "grad_norm": 0.2879874110221863, "learning_rate": 2.513843040664612e-06, "loss": 0.3083, "step": 34381 }, { "epoch": 3.4955266368442457, "grad_norm": 0.26182547211647034, "learning_rate": 2.51353514147302e-06, "loss": 0.2946, "step": 34382 }, { "epoch": 3.4956283041886946, "grad_norm": 0.2774118483066559, "learning_rate": 2.5132272548076462e-06, "loss": 0.3241, "step": 34383 }, { "epoch": 3.4957299715331436, "grad_norm": 0.29314935207366943, "learning_rate": 2.5129193806700447e-06, "loss": 0.308, "step": 34384 }, { "epoch": 3.4958316388775925, "grad_norm": 0.2787749469280243, "learning_rate": 2.512611519061764e-06, "loss": 0.3075, "step": 34385 }, { "epoch": 3.4959333062220415, "grad_norm": 0.27420639991760254, "learning_rate": 2.512303669984355e-06, "loss": 0.2854, "step": 34386 }, { "epoch": 3.4960349735664904, "grad_norm": 0.2806240916252136, "learning_rate": 2.511995833439367e-06, "loss": 0.288, "step": 34387 }, { "epoch": 3.4961366409109393, "grad_norm": 0.25563499331474304, "learning_rate": 2.5116880094283548e-06, "loss": 0.2858, "step": 34388 }, { "epoch": 3.4962383082553883, "grad_norm": 0.2764660716056824, "learning_rate": 2.5113801979528674e-06, "loss": 0.3418, "step": 34389 }, { "epoch": 3.4963399755998372, "grad_norm": 0.2981998920440674, "learning_rate": 2.5110723990144533e-06, "loss": 0.29, "step": 34390 }, { "epoch": 3.496441642944286, "grad_norm": 0.26243358850479126, "learning_rate": 2.510764612614667e-06, "loss": 0.3068, "step": 34391 }, { "epoch": 3.496543310288735, "grad_norm": 0.26191502809524536, "learning_rate": 2.5104568387550565e-06, "loss": 0.3004, "step": 34392 }, { "epoch": 3.496644977633184, "grad_norm": 0.29361873865127563, "learning_rate": 2.5101490774371706e-06, "loss": 0.3241, "step": 34393 }, { "epoch": 3.496746644977633, "grad_norm": 0.2841724157333374, "learning_rate": 2.5098413286625645e-06, "loss": 0.3172, "step": 34394 }, { "epoch": 3.496848312322082, "grad_norm": 0.272877037525177, "learning_rate": 2.509533592432786e-06, "loss": 0.3481, "step": 34395 }, { "epoch": 3.496949979666531, "grad_norm": 0.26923391222953796, "learning_rate": 2.509225868749386e-06, "loss": 0.3013, "step": 34396 }, { "epoch": 3.4970516470109803, "grad_norm": 0.26675888895988464, "learning_rate": 2.5089181576139116e-06, "loss": 0.3043, "step": 34397 }, { "epoch": 3.497153314355429, "grad_norm": 0.24540291726589203, "learning_rate": 2.508610459027917e-06, "loss": 0.3002, "step": 34398 }, { "epoch": 3.497254981699878, "grad_norm": 0.2829623520374298, "learning_rate": 2.5083027729929522e-06, "loss": 0.3195, "step": 34399 }, { "epoch": 3.497356649044327, "grad_norm": 0.2821900546550751, "learning_rate": 2.5079950995105627e-06, "loss": 0.3008, "step": 34400 }, { "epoch": 3.497458316388776, "grad_norm": 0.24582186341285706, "learning_rate": 2.5076874385823048e-06, "loss": 0.2981, "step": 34401 }, { "epoch": 3.497559983733225, "grad_norm": 0.26669514179229736, "learning_rate": 2.507379790209724e-06, "loss": 0.2932, "step": 34402 }, { "epoch": 3.497661651077674, "grad_norm": 0.2722126543521881, "learning_rate": 2.5070721543943697e-06, "loss": 0.287, "step": 34403 }, { "epoch": 3.497763318422123, "grad_norm": 0.28083011507987976, "learning_rate": 2.5067645311377962e-06, "loss": 0.3167, "step": 34404 }, { "epoch": 3.497864985766572, "grad_norm": 0.2621925175189972, "learning_rate": 2.5064569204415507e-06, "loss": 0.3162, "step": 34405 }, { "epoch": 3.4979666531110207, "grad_norm": 0.29542112350463867, "learning_rate": 2.506149322307182e-06, "loss": 0.2904, "step": 34406 }, { "epoch": 3.4980683204554697, "grad_norm": 0.2863929867744446, "learning_rate": 2.5058417367362385e-06, "loss": 0.2961, "step": 34407 }, { "epoch": 3.4981699877999186, "grad_norm": 0.2736169397830963, "learning_rate": 2.5055341637302736e-06, "loss": 0.3095, "step": 34408 }, { "epoch": 3.4982716551443676, "grad_norm": 0.26003000140190125, "learning_rate": 2.505226603290835e-06, "loss": 0.3252, "step": 34409 }, { "epoch": 3.4983733224888165, "grad_norm": 0.2935764789581299, "learning_rate": 2.5049190554194703e-06, "loss": 0.3067, "step": 34410 }, { "epoch": 3.4984749898332654, "grad_norm": 0.2639920115470886, "learning_rate": 2.5046115201177316e-06, "loss": 0.2691, "step": 34411 }, { "epoch": 3.4985766571777144, "grad_norm": 0.26660677790641785, "learning_rate": 2.5043039973871676e-06, "loss": 0.3165, "step": 34412 }, { "epoch": 3.4986783245221633, "grad_norm": 0.28666186332702637, "learning_rate": 2.5039964872293254e-06, "loss": 0.3078, "step": 34413 }, { "epoch": 3.4987799918666123, "grad_norm": 0.2567962110042572, "learning_rate": 2.503688989645757e-06, "loss": 0.3266, "step": 34414 }, { "epoch": 3.4988816592110616, "grad_norm": 0.2814527750015259, "learning_rate": 2.5033815046380105e-06, "loss": 0.3269, "step": 34415 }, { "epoch": 3.4989833265555106, "grad_norm": 0.29996976256370544, "learning_rate": 2.503074032207634e-06, "loss": 0.3104, "step": 34416 }, { "epoch": 3.4990849938999595, "grad_norm": 0.2777195870876312, "learning_rate": 2.5027665723561768e-06, "loss": 0.3069, "step": 34417 }, { "epoch": 3.4991866612444085, "grad_norm": 0.29009464383125305, "learning_rate": 2.5024591250851887e-06, "loss": 0.3675, "step": 34418 }, { "epoch": 3.4992883285888574, "grad_norm": 0.27611204981803894, "learning_rate": 2.5021516903962185e-06, "loss": 0.3105, "step": 34419 }, { "epoch": 3.4993899959333064, "grad_norm": 0.2764953076839447, "learning_rate": 2.5018442682908127e-06, "loss": 0.3197, "step": 34420 }, { "epoch": 3.4994916632777553, "grad_norm": 0.27009081840515137, "learning_rate": 2.5015368587705236e-06, "loss": 0.3193, "step": 34421 }, { "epoch": 3.4995933306222042, "grad_norm": 0.2785193920135498, "learning_rate": 2.501229461836898e-06, "loss": 0.2993, "step": 34422 }, { "epoch": 3.499694997966653, "grad_norm": 0.30279502272605896, "learning_rate": 2.5009220774914832e-06, "loss": 0.2986, "step": 34423 }, { "epoch": 3.499796665311102, "grad_norm": 0.27419450879096985, "learning_rate": 2.5006147057358303e-06, "loss": 0.3277, "step": 34424 }, { "epoch": 3.499898332655551, "grad_norm": 0.2629578113555908, "learning_rate": 2.5003073465714865e-06, "loss": 0.3129, "step": 34425 }, { "epoch": 3.5, "grad_norm": 0.27997109293937683, "learning_rate": 2.5000000000000015e-06, "loss": 0.3165, "step": 34426 }, { "epoch": 3.500101667344449, "grad_norm": 0.27030953764915466, "learning_rate": 2.4996926660229193e-06, "loss": 0.3019, "step": 34427 }, { "epoch": 3.500203334688898, "grad_norm": 0.2634366750717163, "learning_rate": 2.499385344641794e-06, "loss": 0.3088, "step": 34428 }, { "epoch": 3.500305002033347, "grad_norm": 0.28104913234710693, "learning_rate": 2.49907803585817e-06, "loss": 0.3245, "step": 34429 }, { "epoch": 3.5004066693777958, "grad_norm": 0.2643261253833771, "learning_rate": 2.4987707396735957e-06, "loss": 0.3299, "step": 34430 }, { "epoch": 3.5005083367222447, "grad_norm": 0.2867838740348816, "learning_rate": 2.4984634560896215e-06, "loss": 0.2946, "step": 34431 }, { "epoch": 3.5006100040666936, "grad_norm": 0.26747357845306396, "learning_rate": 2.498156185107794e-06, "loss": 0.3301, "step": 34432 }, { "epoch": 3.5007116714111426, "grad_norm": 0.2657848298549652, "learning_rate": 2.497848926729659e-06, "loss": 0.3283, "step": 34433 }, { "epoch": 3.5008133387555915, "grad_norm": 0.2712179720401764, "learning_rate": 2.497541680956769e-06, "loss": 0.3171, "step": 34434 }, { "epoch": 3.5009150061000405, "grad_norm": 0.2896287441253662, "learning_rate": 2.4972344477906685e-06, "loss": 0.2747, "step": 34435 }, { "epoch": 3.5010166734444894, "grad_norm": 0.28954625129699707, "learning_rate": 2.4969272272329065e-06, "loss": 0.3208, "step": 34436 }, { "epoch": 3.5011183407889384, "grad_norm": 0.28046444058418274, "learning_rate": 2.496620019285028e-06, "loss": 0.3421, "step": 34437 }, { "epoch": 3.5012200081333873, "grad_norm": 0.2681897282600403, "learning_rate": 2.4963128239485856e-06, "loss": 0.3122, "step": 34438 }, { "epoch": 3.5013216754778367, "grad_norm": 0.2630012035369873, "learning_rate": 2.496005641225124e-06, "loss": 0.3126, "step": 34439 }, { "epoch": 3.5014233428222856, "grad_norm": 0.28412938117980957, "learning_rate": 2.4956984711161885e-06, "loss": 0.3158, "step": 34440 }, { "epoch": 3.5015250101667346, "grad_norm": 0.29412901401519775, "learning_rate": 2.4953913136233316e-06, "loss": 0.3206, "step": 34441 }, { "epoch": 3.5016266775111835, "grad_norm": 0.2697278559207916, "learning_rate": 2.495084168748098e-06, "loss": 0.338, "step": 34442 }, { "epoch": 3.5017283448556324, "grad_norm": 0.2784126102924347, "learning_rate": 2.4947770364920325e-06, "loss": 0.322, "step": 34443 }, { "epoch": 3.5018300122000814, "grad_norm": 0.2538858652114868, "learning_rate": 2.4944699168566876e-06, "loss": 0.3072, "step": 34444 }, { "epoch": 3.5019316795445303, "grad_norm": 0.27598536014556885, "learning_rate": 2.494162809843608e-06, "loss": 0.326, "step": 34445 }, { "epoch": 3.5020333468889793, "grad_norm": 0.30993232131004333, "learning_rate": 2.4938557154543404e-06, "loss": 0.3165, "step": 34446 }, { "epoch": 3.502135014233428, "grad_norm": 0.2740611433982849, "learning_rate": 2.4935486336904297e-06, "loss": 0.3201, "step": 34447 }, { "epoch": 3.502236681577877, "grad_norm": 0.2682448625564575, "learning_rate": 2.493241564553428e-06, "loss": 0.2917, "step": 34448 }, { "epoch": 3.502338348922326, "grad_norm": 0.2749035060405731, "learning_rate": 2.4929345080448793e-06, "loss": 0.3309, "step": 34449 }, { "epoch": 3.502440016266775, "grad_norm": 0.26989373564720154, "learning_rate": 2.4926274641663282e-06, "loss": 0.3283, "step": 34450 }, { "epoch": 3.502541683611224, "grad_norm": 0.2609997093677521, "learning_rate": 2.492320432919329e-06, "loss": 0.3397, "step": 34451 }, { "epoch": 3.502643350955673, "grad_norm": 0.2586880326271057, "learning_rate": 2.492013414305419e-06, "loss": 0.3097, "step": 34452 }, { "epoch": 3.502745018300122, "grad_norm": 0.30172744393348694, "learning_rate": 2.4917064083261498e-06, "loss": 0.2803, "step": 34453 }, { "epoch": 3.502846685644571, "grad_norm": 0.27545982599258423, "learning_rate": 2.49139941498307e-06, "loss": 0.3201, "step": 34454 }, { "epoch": 3.50294835298902, "grad_norm": 0.26679152250289917, "learning_rate": 2.491092434277723e-06, "loss": 0.3332, "step": 34455 }, { "epoch": 3.503050020333469, "grad_norm": 0.2818514108657837, "learning_rate": 2.4907854662116565e-06, "loss": 0.2966, "step": 34456 }, { "epoch": 3.503151687677918, "grad_norm": 0.277916818857193, "learning_rate": 2.4904785107864145e-06, "loss": 0.3043, "step": 34457 }, { "epoch": 3.503253355022367, "grad_norm": 0.26691368222236633, "learning_rate": 2.490171568003549e-06, "loss": 0.3271, "step": 34458 }, { "epoch": 3.503355022366816, "grad_norm": 0.2836044430732727, "learning_rate": 2.4898646378645996e-06, "loss": 0.2967, "step": 34459 }, { "epoch": 3.503456689711265, "grad_norm": 0.2633841037750244, "learning_rate": 2.4895577203711154e-06, "loss": 0.318, "step": 34460 }, { "epoch": 3.503558357055714, "grad_norm": 0.2703438997268677, "learning_rate": 2.4892508155246463e-06, "loss": 0.332, "step": 34461 }, { "epoch": 3.5036600244001628, "grad_norm": 0.268166720867157, "learning_rate": 2.4889439233267317e-06, "loss": 0.3238, "step": 34462 }, { "epoch": 3.5037616917446117, "grad_norm": 0.2803539037704468, "learning_rate": 2.488637043778922e-06, "loss": 0.2904, "step": 34463 }, { "epoch": 3.5038633590890607, "grad_norm": 0.26764732599258423, "learning_rate": 2.4883301768827605e-06, "loss": 0.3148, "step": 34464 }, { "epoch": 3.5039650264335096, "grad_norm": 0.301154762506485, "learning_rate": 2.4880233226397973e-06, "loss": 0.2843, "step": 34465 }, { "epoch": 3.5040666937779585, "grad_norm": 0.25998353958129883, "learning_rate": 2.487716481051575e-06, "loss": 0.2904, "step": 34466 }, { "epoch": 3.5041683611224075, "grad_norm": 0.27608877420425415, "learning_rate": 2.487409652119638e-06, "loss": 0.2803, "step": 34467 }, { "epoch": 3.5042700284668564, "grad_norm": 0.2921387553215027, "learning_rate": 2.487102835845538e-06, "loss": 0.3085, "step": 34468 }, { "epoch": 3.5043716958113054, "grad_norm": 0.2948492467403412, "learning_rate": 2.486796032230813e-06, "loss": 0.2946, "step": 34469 }, { "epoch": 3.5044733631557543, "grad_norm": 0.28409773111343384, "learning_rate": 2.4864892412770125e-06, "loss": 0.3012, "step": 34470 }, { "epoch": 3.5045750305002032, "grad_norm": 0.2799505591392517, "learning_rate": 2.4861824629856856e-06, "loss": 0.3234, "step": 34471 }, { "epoch": 3.504676697844652, "grad_norm": 0.2808936834335327, "learning_rate": 2.4858756973583704e-06, "loss": 0.3196, "step": 34472 }, { "epoch": 3.504778365189101, "grad_norm": 0.2739417850971222, "learning_rate": 2.4855689443966176e-06, "loss": 0.3429, "step": 34473 }, { "epoch": 3.50488003253355, "grad_norm": 0.26422882080078125, "learning_rate": 2.485262204101971e-06, "loss": 0.3164, "step": 34474 }, { "epoch": 3.504981699877999, "grad_norm": 0.27012842893600464, "learning_rate": 2.4849554764759743e-06, "loss": 0.307, "step": 34475 }, { "epoch": 3.505083367222448, "grad_norm": 0.28995540738105774, "learning_rate": 2.4846487615201757e-06, "loss": 0.2986, "step": 34476 }, { "epoch": 3.505185034566897, "grad_norm": 0.29027459025382996, "learning_rate": 2.484342059236117e-06, "loss": 0.3228, "step": 34477 }, { "epoch": 3.505286701911346, "grad_norm": 0.2878119647502899, "learning_rate": 2.4840353696253493e-06, "loss": 0.3109, "step": 34478 }, { "epoch": 3.505388369255795, "grad_norm": 0.2418166548013687, "learning_rate": 2.48372869268941e-06, "loss": 0.312, "step": 34479 }, { "epoch": 3.505490036600244, "grad_norm": 0.25890105962753296, "learning_rate": 2.4834220284298473e-06, "loss": 0.2982, "step": 34480 }, { "epoch": 3.505591703944693, "grad_norm": 0.2877568006515503, "learning_rate": 2.4831153768482098e-06, "loss": 0.3203, "step": 34481 }, { "epoch": 3.505693371289142, "grad_norm": 0.277322918176651, "learning_rate": 2.4828087379460354e-06, "loss": 0.2895, "step": 34482 }, { "epoch": 3.505795038633591, "grad_norm": 0.266777902841568, "learning_rate": 2.4825021117248744e-06, "loss": 0.312, "step": 34483 }, { "epoch": 3.50589670597804, "grad_norm": 0.2630491852760315, "learning_rate": 2.4821954981862694e-06, "loss": 0.3249, "step": 34484 }, { "epoch": 3.505998373322489, "grad_norm": 0.2743006646633148, "learning_rate": 2.4818888973317635e-06, "loss": 0.2972, "step": 34485 }, { "epoch": 3.506100040666938, "grad_norm": 0.2532511055469513, "learning_rate": 2.4815823091629045e-06, "loss": 0.3, "step": 34486 }, { "epoch": 3.5062017080113868, "grad_norm": 0.298485666513443, "learning_rate": 2.4812757336812337e-06, "loss": 0.2976, "step": 34487 }, { "epoch": 3.5063033753558357, "grad_norm": 0.25843530893325806, "learning_rate": 2.4809691708883006e-06, "loss": 0.318, "step": 34488 }, { "epoch": 3.5064050427002846, "grad_norm": 0.2858199179172516, "learning_rate": 2.4806626207856427e-06, "loss": 0.2886, "step": 34489 }, { "epoch": 3.5065067100447336, "grad_norm": 0.2771175801753998, "learning_rate": 2.480356083374808e-06, "loss": 0.2938, "step": 34490 }, { "epoch": 3.5066083773891825, "grad_norm": 0.26006653904914856, "learning_rate": 2.480049558657344e-06, "loss": 0.3129, "step": 34491 }, { "epoch": 3.5067100447336315, "grad_norm": 0.2996319830417633, "learning_rate": 2.4797430466347883e-06, "loss": 0.313, "step": 34492 }, { "epoch": 3.5068117120780804, "grad_norm": 0.2737676203250885, "learning_rate": 2.4794365473086893e-06, "loss": 0.3105, "step": 34493 }, { "epoch": 3.5069133794225293, "grad_norm": 0.26338374614715576, "learning_rate": 2.479130060680591e-06, "loss": 0.3091, "step": 34494 }, { "epoch": 3.5070150467669783, "grad_norm": 0.2695484757423401, "learning_rate": 2.4788235867520336e-06, "loss": 0.3065, "step": 34495 }, { "epoch": 3.5071167141114277, "grad_norm": 0.28168168663978577, "learning_rate": 2.478517125524566e-06, "loss": 0.3129, "step": 34496 }, { "epoch": 3.5072183814558766, "grad_norm": 0.2585170567035675, "learning_rate": 2.47821067699973e-06, "loss": 0.3308, "step": 34497 }, { "epoch": 3.5073200488003256, "grad_norm": 0.26472535729408264, "learning_rate": 2.477904241179069e-06, "loss": 0.2857, "step": 34498 }, { "epoch": 3.5074217161447745, "grad_norm": 0.2632318139076233, "learning_rate": 2.4775978180641256e-06, "loss": 0.2881, "step": 34499 }, { "epoch": 3.5075233834892234, "grad_norm": 0.27985528111457825, "learning_rate": 2.4772914076564453e-06, "loss": 0.345, "step": 34500 }, { "epoch": 3.5076250508336724, "grad_norm": 0.2611576318740845, "learning_rate": 2.476985009957575e-06, "loss": 0.2808, "step": 34501 }, { "epoch": 3.5077267181781213, "grad_norm": 0.25553908944129944, "learning_rate": 2.4766786249690505e-06, "loss": 0.3176, "step": 34502 }, { "epoch": 3.5078283855225703, "grad_norm": 0.2528409957885742, "learning_rate": 2.4763722526924217e-06, "loss": 0.3227, "step": 34503 }, { "epoch": 3.507930052867019, "grad_norm": 0.30695095658302307, "learning_rate": 2.47606589312923e-06, "loss": 0.319, "step": 34504 }, { "epoch": 3.508031720211468, "grad_norm": 0.2532925307750702, "learning_rate": 2.475759546281016e-06, "loss": 0.3099, "step": 34505 }, { "epoch": 3.508133387555917, "grad_norm": 0.2907274067401886, "learning_rate": 2.4754532121493276e-06, "loss": 0.3364, "step": 34506 }, { "epoch": 3.508235054900366, "grad_norm": 0.26713746786117554, "learning_rate": 2.4751468907357064e-06, "loss": 0.3106, "step": 34507 }, { "epoch": 3.508336722244815, "grad_norm": 0.296154648065567, "learning_rate": 2.474840582041695e-06, "loss": 0.2898, "step": 34508 }, { "epoch": 3.508438389589264, "grad_norm": 0.2830767035484314, "learning_rate": 2.4745342860688344e-06, "loss": 0.2976, "step": 34509 }, { "epoch": 3.508540056933713, "grad_norm": 0.2830202877521515, "learning_rate": 2.4742280028186723e-06, "loss": 0.3311, "step": 34510 }, { "epoch": 3.508641724278162, "grad_norm": 0.31105148792266846, "learning_rate": 2.4739217322927488e-06, "loss": 0.3532, "step": 34511 }, { "epoch": 3.5087433916226107, "grad_norm": 0.26905256509780884, "learning_rate": 2.4736154744926055e-06, "loss": 0.3541, "step": 34512 }, { "epoch": 3.5088450589670597, "grad_norm": 0.27700087428092957, "learning_rate": 2.473309229419789e-06, "loss": 0.322, "step": 34513 }, { "epoch": 3.5089467263115086, "grad_norm": 0.2646366357803345, "learning_rate": 2.473002997075841e-06, "loss": 0.3026, "step": 34514 }, { "epoch": 3.5090483936559576, "grad_norm": 0.2789374589920044, "learning_rate": 2.4726967774623005e-06, "loss": 0.284, "step": 34515 }, { "epoch": 3.5091500610004065, "grad_norm": 0.26749753952026367, "learning_rate": 2.4723905705807153e-06, "loss": 0.3436, "step": 34516 }, { "epoch": 3.5092517283448554, "grad_norm": 0.270206481218338, "learning_rate": 2.472084376432626e-06, "loss": 0.3046, "step": 34517 }, { "epoch": 3.5093533956893044, "grad_norm": 0.2859956622123718, "learning_rate": 2.4717781950195745e-06, "loss": 0.3376, "step": 34518 }, { "epoch": 3.5094550630337533, "grad_norm": 0.283910870552063, "learning_rate": 2.471472026343102e-06, "loss": 0.3326, "step": 34519 }, { "epoch": 3.5095567303782023, "grad_norm": 0.26599037647247314, "learning_rate": 2.471165870404754e-06, "loss": 0.3314, "step": 34520 }, { "epoch": 3.5096583977226516, "grad_norm": 0.25255677103996277, "learning_rate": 2.4708597272060717e-06, "loss": 0.352, "step": 34521 }, { "epoch": 3.5097600650671006, "grad_norm": 0.2766667604446411, "learning_rate": 2.4705535967485954e-06, "loss": 0.3225, "step": 34522 }, { "epoch": 3.5098617324115495, "grad_norm": 0.29454851150512695, "learning_rate": 2.4702474790338708e-06, "loss": 0.3253, "step": 34523 }, { "epoch": 3.5099633997559985, "grad_norm": 0.28982439637184143, "learning_rate": 2.4699413740634388e-06, "loss": 0.2769, "step": 34524 }, { "epoch": 3.5100650671004474, "grad_norm": 0.3158780038356781, "learning_rate": 2.469635281838838e-06, "loss": 0.3275, "step": 34525 }, { "epoch": 3.5101667344448964, "grad_norm": 0.28761038184165955, "learning_rate": 2.4693292023616154e-06, "loss": 0.2979, "step": 34526 }, { "epoch": 3.5102684017893453, "grad_norm": 0.2780841290950775, "learning_rate": 2.4690231356333115e-06, "loss": 0.3135, "step": 34527 }, { "epoch": 3.5103700691337942, "grad_norm": 0.28853750228881836, "learning_rate": 2.4687170816554673e-06, "loss": 0.3032, "step": 34528 }, { "epoch": 3.510471736478243, "grad_norm": 0.27720385789871216, "learning_rate": 2.468411040429623e-06, "loss": 0.3373, "step": 34529 }, { "epoch": 3.510573403822692, "grad_norm": 0.27325522899627686, "learning_rate": 2.468105011957324e-06, "loss": 0.3072, "step": 34530 }, { "epoch": 3.510675071167141, "grad_norm": 0.2595386207103729, "learning_rate": 2.4677989962401106e-06, "loss": 0.2955, "step": 34531 }, { "epoch": 3.51077673851159, "grad_norm": 0.2920062839984894, "learning_rate": 2.4674929932795217e-06, "loss": 0.3108, "step": 34532 }, { "epoch": 3.510878405856039, "grad_norm": 0.2782355546951294, "learning_rate": 2.467187003077104e-06, "loss": 0.3028, "step": 34533 }, { "epoch": 3.510980073200488, "grad_norm": 0.5406852960586548, "learning_rate": 2.4668810256343956e-06, "loss": 0.3332, "step": 34534 }, { "epoch": 3.511081740544937, "grad_norm": 0.2544827461242676, "learning_rate": 2.4665750609529366e-06, "loss": 0.3207, "step": 34535 }, { "epoch": 3.5111834078893858, "grad_norm": 0.25650766491889954, "learning_rate": 2.4662691090342727e-06, "loss": 0.3206, "step": 34536 }, { "epoch": 3.511285075233835, "grad_norm": 0.2678677439689636, "learning_rate": 2.465963169879943e-06, "loss": 0.3124, "step": 34537 }, { "epoch": 3.511386742578284, "grad_norm": 0.28664952516555786, "learning_rate": 2.4656572434914883e-06, "loss": 0.3214, "step": 34538 }, { "epoch": 3.511488409922733, "grad_norm": 0.26608166098594666, "learning_rate": 2.4653513298704484e-06, "loss": 0.2919, "step": 34539 }, { "epoch": 3.511590077267182, "grad_norm": 0.2767806947231293, "learning_rate": 2.4650454290183684e-06, "loss": 0.3082, "step": 34540 }, { "epoch": 3.511691744611631, "grad_norm": 0.2640267312526703, "learning_rate": 2.464739540936787e-06, "loss": 0.327, "step": 34541 }, { "epoch": 3.51179341195608, "grad_norm": 0.262085884809494, "learning_rate": 2.4644336656272433e-06, "loss": 0.2947, "step": 34542 }, { "epoch": 3.511895079300529, "grad_norm": 0.3026755750179291, "learning_rate": 2.464127803091282e-06, "loss": 0.3261, "step": 34543 }, { "epoch": 3.5119967466449777, "grad_norm": 0.2704995572566986, "learning_rate": 2.4638219533304423e-06, "loss": 0.3234, "step": 34544 }, { "epoch": 3.5120984139894267, "grad_norm": 0.2921508848667145, "learning_rate": 2.4635161163462635e-06, "loss": 0.3193, "step": 34545 }, { "epoch": 3.5122000813338756, "grad_norm": 0.2753424644470215, "learning_rate": 2.463210292140289e-06, "loss": 0.2802, "step": 34546 }, { "epoch": 3.5123017486783246, "grad_norm": 0.28455063700675964, "learning_rate": 2.4629044807140584e-06, "loss": 0.3069, "step": 34547 }, { "epoch": 3.5124034160227735, "grad_norm": 0.25981956720352173, "learning_rate": 2.4625986820691124e-06, "loss": 0.3659, "step": 34548 }, { "epoch": 3.5125050833672224, "grad_norm": 0.28939032554626465, "learning_rate": 2.462292896206989e-06, "loss": 0.3017, "step": 34549 }, { "epoch": 3.5126067507116714, "grad_norm": 0.2761364281177521, "learning_rate": 2.461987123129233e-06, "loss": 0.3167, "step": 34550 }, { "epoch": 3.5127084180561203, "grad_norm": 0.2789658308029175, "learning_rate": 2.4616813628373827e-06, "loss": 0.3025, "step": 34551 }, { "epoch": 3.5128100854005693, "grad_norm": 0.2628830075263977, "learning_rate": 2.4613756153329767e-06, "loss": 0.2961, "step": 34552 }, { "epoch": 3.512911752745018, "grad_norm": 0.2638780474662781, "learning_rate": 2.4610698806175586e-06, "loss": 0.3332, "step": 34553 }, { "epoch": 3.513013420089467, "grad_norm": 0.29139235615730286, "learning_rate": 2.460764158692668e-06, "loss": 0.3115, "step": 34554 }, { "epoch": 3.513115087433916, "grad_norm": 0.2809220552444458, "learning_rate": 2.460458449559841e-06, "loss": 0.3139, "step": 34555 }, { "epoch": 3.513216754778365, "grad_norm": 0.27816635370254517, "learning_rate": 2.460152753220624e-06, "loss": 0.2728, "step": 34556 }, { "epoch": 3.513318422122814, "grad_norm": 0.25165703892707825, "learning_rate": 2.4598470696765533e-06, "loss": 0.3103, "step": 34557 }, { "epoch": 3.513420089467263, "grad_norm": 0.28414857387542725, "learning_rate": 2.459541398929169e-06, "loss": 0.3345, "step": 34558 }, { "epoch": 3.513521756811712, "grad_norm": 0.27296513319015503, "learning_rate": 2.45923574098001e-06, "loss": 0.3207, "step": 34559 }, { "epoch": 3.513623424156161, "grad_norm": 0.27537110447883606, "learning_rate": 2.4589300958306194e-06, "loss": 0.3218, "step": 34560 }, { "epoch": 3.5137250915006097, "grad_norm": 0.27530601620674133, "learning_rate": 2.458624463482535e-06, "loss": 0.3212, "step": 34561 }, { "epoch": 3.513826758845059, "grad_norm": 0.2811334729194641, "learning_rate": 2.4583188439372946e-06, "loss": 0.318, "step": 34562 }, { "epoch": 3.513928426189508, "grad_norm": 0.26856040954589844, "learning_rate": 2.4580132371964423e-06, "loss": 0.2888, "step": 34563 }, { "epoch": 3.514030093533957, "grad_norm": 0.2661450207233429, "learning_rate": 2.4577076432615147e-06, "loss": 0.2795, "step": 34564 }, { "epoch": 3.514131760878406, "grad_norm": 0.2928653955459595, "learning_rate": 2.4574020621340506e-06, "loss": 0.3446, "step": 34565 }, { "epoch": 3.514233428222855, "grad_norm": 0.29945188760757446, "learning_rate": 2.4570964938155924e-06, "loss": 0.2975, "step": 34566 }, { "epoch": 3.514335095567304, "grad_norm": 0.26770496368408203, "learning_rate": 2.456790938307677e-06, "loss": 0.301, "step": 34567 }, { "epoch": 3.5144367629117528, "grad_norm": 0.28358232975006104, "learning_rate": 2.456485395611845e-06, "loss": 0.3173, "step": 34568 }, { "epoch": 3.5145384302562017, "grad_norm": 0.2706177234649658, "learning_rate": 2.4561798657296338e-06, "loss": 0.3283, "step": 34569 }, { "epoch": 3.5146400976006507, "grad_norm": 0.276044100522995, "learning_rate": 2.4558743486625854e-06, "loss": 0.2933, "step": 34570 }, { "epoch": 3.5147417649450996, "grad_norm": 0.2701544463634491, "learning_rate": 2.4555688444122373e-06, "loss": 0.2988, "step": 34571 }, { "epoch": 3.5148434322895485, "grad_norm": 0.3088350296020508, "learning_rate": 2.455263352980127e-06, "loss": 0.3045, "step": 34572 }, { "epoch": 3.5149450996339975, "grad_norm": 0.26654359698295593, "learning_rate": 2.4549578743677966e-06, "loss": 0.3335, "step": 34573 }, { "epoch": 3.5150467669784464, "grad_norm": 0.24712523818016052, "learning_rate": 2.4546524085767843e-06, "loss": 0.3253, "step": 34574 }, { "epoch": 3.5151484343228954, "grad_norm": 0.25959545373916626, "learning_rate": 2.454346955608626e-06, "loss": 0.3496, "step": 34575 }, { "epoch": 3.5152501016673443, "grad_norm": 0.2771783769130707, "learning_rate": 2.454041515464865e-06, "loss": 0.2762, "step": 34576 }, { "epoch": 3.5153517690117932, "grad_norm": 0.2936100959777832, "learning_rate": 2.4537360881470372e-06, "loss": 0.3007, "step": 34577 }, { "epoch": 3.5154534363562426, "grad_norm": 0.29006192088127136, "learning_rate": 2.4534306736566825e-06, "loss": 0.281, "step": 34578 }, { "epoch": 3.5155551037006916, "grad_norm": 0.26749610900878906, "learning_rate": 2.4531252719953367e-06, "loss": 0.2944, "step": 34579 }, { "epoch": 3.5156567710451405, "grad_norm": 0.2950027585029602, "learning_rate": 2.4528198831645428e-06, "loss": 0.3044, "step": 34580 }, { "epoch": 3.5157584383895895, "grad_norm": 0.27876463532447815, "learning_rate": 2.452514507165837e-06, "loss": 0.2846, "step": 34581 }, { "epoch": 3.5158601057340384, "grad_norm": 0.2768632769584656, "learning_rate": 2.4522091440007557e-06, "loss": 0.3329, "step": 34582 }, { "epoch": 3.5159617730784873, "grad_norm": 0.2565246820449829, "learning_rate": 2.451903793670841e-06, "loss": 0.3162, "step": 34583 }, { "epoch": 3.5160634404229363, "grad_norm": 0.3034273386001587, "learning_rate": 2.4515984561776294e-06, "loss": 0.3304, "step": 34584 }, { "epoch": 3.5161651077673852, "grad_norm": 0.28160205483436584, "learning_rate": 2.4512931315226578e-06, "loss": 0.2625, "step": 34585 }, { "epoch": 3.516266775111834, "grad_norm": 0.27457067370414734, "learning_rate": 2.4509878197074676e-06, "loss": 0.2984, "step": 34586 }, { "epoch": 3.516368442456283, "grad_norm": 0.2809676229953766, "learning_rate": 2.450682520733595e-06, "loss": 0.3176, "step": 34587 }, { "epoch": 3.516470109800732, "grad_norm": 0.29071325063705444, "learning_rate": 2.4503772346025776e-06, "loss": 0.3058, "step": 34588 }, { "epoch": 3.516571777145181, "grad_norm": 0.28423675894737244, "learning_rate": 2.4500719613159516e-06, "loss": 0.3349, "step": 34589 }, { "epoch": 3.51667344448963, "grad_norm": 0.2714575231075287, "learning_rate": 2.44976670087526e-06, "loss": 0.3187, "step": 34590 }, { "epoch": 3.516775111834079, "grad_norm": 0.2712518870830536, "learning_rate": 2.4494614532820372e-06, "loss": 0.2986, "step": 34591 }, { "epoch": 3.516876779178528, "grad_norm": 0.26186010241508484, "learning_rate": 2.4491562185378197e-06, "loss": 0.3118, "step": 34592 }, { "epoch": 3.5169784465229768, "grad_norm": 0.2620833218097687, "learning_rate": 2.448850996644149e-06, "loss": 0.3307, "step": 34593 }, { "epoch": 3.5170801138674257, "grad_norm": 0.286760151386261, "learning_rate": 2.4485457876025607e-06, "loss": 0.2958, "step": 34594 }, { "epoch": 3.5171817812118746, "grad_norm": 0.2633487284183502, "learning_rate": 2.44824059141459e-06, "loss": 0.3068, "step": 34595 }, { "epoch": 3.5172834485563236, "grad_norm": 0.2834763526916504, "learning_rate": 2.4479354080817798e-06, "loss": 0.294, "step": 34596 }, { "epoch": 3.5173851159007725, "grad_norm": 0.2499113231897354, "learning_rate": 2.4476302376056637e-06, "loss": 0.3167, "step": 34597 }, { "epoch": 3.5174867832452215, "grad_norm": 0.28162145614624023, "learning_rate": 2.44732507998778e-06, "loss": 0.316, "step": 34598 }, { "epoch": 3.5175884505896704, "grad_norm": 0.2659735381603241, "learning_rate": 2.447019935229664e-06, "loss": 0.3166, "step": 34599 }, { "epoch": 3.5176901179341193, "grad_norm": 0.2791232168674469, "learning_rate": 2.446714803332857e-06, "loss": 0.3061, "step": 34600 }, { "epoch": 3.5177917852785683, "grad_norm": 0.2729451656341553, "learning_rate": 2.4464096842988938e-06, "loss": 0.3073, "step": 34601 }, { "epoch": 3.5178934526230172, "grad_norm": 0.2657322883605957, "learning_rate": 2.4461045781293102e-06, "loss": 0.3149, "step": 34602 }, { "epoch": 3.5179951199674666, "grad_norm": 0.270883709192276, "learning_rate": 2.445799484825649e-06, "loss": 0.2971, "step": 34603 }, { "epoch": 3.5180967873119156, "grad_norm": 0.2912081182003021, "learning_rate": 2.4454944043894384e-06, "loss": 0.2959, "step": 34604 }, { "epoch": 3.5181984546563645, "grad_norm": 0.2903248965740204, "learning_rate": 2.44518933682222e-06, "loss": 0.3134, "step": 34605 }, { "epoch": 3.5183001220008134, "grad_norm": 0.25801265239715576, "learning_rate": 2.4448842821255333e-06, "loss": 0.2832, "step": 34606 }, { "epoch": 3.5184017893452624, "grad_norm": 0.2743915319442749, "learning_rate": 2.4445792403009118e-06, "loss": 0.3378, "step": 34607 }, { "epoch": 3.5185034566897113, "grad_norm": 0.27928370237350464, "learning_rate": 2.444274211349893e-06, "loss": 0.2815, "step": 34608 }, { "epoch": 3.5186051240341603, "grad_norm": 0.28715962171554565, "learning_rate": 2.4439691952740113e-06, "loss": 0.26, "step": 34609 }, { "epoch": 3.518706791378609, "grad_norm": 0.25032857060432434, "learning_rate": 2.443664192074807e-06, "loss": 0.3104, "step": 34610 }, { "epoch": 3.518808458723058, "grad_norm": 0.2523970305919647, "learning_rate": 2.4433592017538156e-06, "loss": 0.3315, "step": 34611 }, { "epoch": 3.518910126067507, "grad_norm": 0.2768362760543823, "learning_rate": 2.443054224312571e-06, "loss": 0.3014, "step": 34612 }, { "epoch": 3.519011793411956, "grad_norm": 0.27929410338401794, "learning_rate": 2.4427492597526154e-06, "loss": 0.3047, "step": 34613 }, { "epoch": 3.519113460756405, "grad_norm": 0.27258458733558655, "learning_rate": 2.4424443080754773e-06, "loss": 0.3047, "step": 34614 }, { "epoch": 3.519215128100854, "grad_norm": 0.28453317284584045, "learning_rate": 2.442139369282697e-06, "loss": 0.2897, "step": 34615 }, { "epoch": 3.519316795445303, "grad_norm": 0.28537794947624207, "learning_rate": 2.441834443375813e-06, "loss": 0.2949, "step": 34616 }, { "epoch": 3.519418462789752, "grad_norm": 0.27625373005867004, "learning_rate": 2.4415295303563586e-06, "loss": 0.3279, "step": 34617 }, { "epoch": 3.5195201301342007, "grad_norm": 0.2862202823162079, "learning_rate": 2.441224630225871e-06, "loss": 0.3175, "step": 34618 }, { "epoch": 3.51962179747865, "grad_norm": 0.26349568367004395, "learning_rate": 2.4409197429858833e-06, "loss": 0.2793, "step": 34619 }, { "epoch": 3.519723464823099, "grad_norm": 0.2757052183151245, "learning_rate": 2.4406148686379362e-06, "loss": 0.3257, "step": 34620 }, { "epoch": 3.519825132167548, "grad_norm": 0.28390541672706604, "learning_rate": 2.4403100071835628e-06, "loss": 0.2886, "step": 34621 }, { "epoch": 3.519926799511997, "grad_norm": 0.26658111810684204, "learning_rate": 2.4400051586242974e-06, "loss": 0.3117, "step": 34622 }, { "epoch": 3.520028466856446, "grad_norm": 0.27319642901420593, "learning_rate": 2.4397003229616816e-06, "loss": 0.3108, "step": 34623 }, { "epoch": 3.520130134200895, "grad_norm": 0.2726501524448395, "learning_rate": 2.4393955001972436e-06, "loss": 0.3003, "step": 34624 }, { "epoch": 3.5202318015453438, "grad_norm": 0.26558536291122437, "learning_rate": 2.4390906903325222e-06, "loss": 0.3139, "step": 34625 }, { "epoch": 3.5203334688897927, "grad_norm": 0.2899872958660126, "learning_rate": 2.438785893369057e-06, "loss": 0.2863, "step": 34626 }, { "epoch": 3.5204351362342416, "grad_norm": 0.30002567172050476, "learning_rate": 2.4384811093083768e-06, "loss": 0.3177, "step": 34627 }, { "epoch": 3.5205368035786906, "grad_norm": 0.24813662469387054, "learning_rate": 2.4381763381520214e-06, "loss": 0.272, "step": 34628 }, { "epoch": 3.5206384709231395, "grad_norm": 0.29146143794059753, "learning_rate": 2.4378715799015225e-06, "loss": 0.3019, "step": 34629 }, { "epoch": 3.5207401382675885, "grad_norm": 0.264222115278244, "learning_rate": 2.4375668345584204e-06, "loss": 0.307, "step": 34630 }, { "epoch": 3.5208418056120374, "grad_norm": 0.2755260467529297, "learning_rate": 2.437262102124247e-06, "loss": 0.307, "step": 34631 }, { "epoch": 3.5209434729564864, "grad_norm": 0.2751905024051666, "learning_rate": 2.4369573826005365e-06, "loss": 0.3187, "step": 34632 }, { "epoch": 3.5210451403009353, "grad_norm": 0.2607375383377075, "learning_rate": 2.43665267598883e-06, "loss": 0.3108, "step": 34633 }, { "epoch": 3.5211468076453842, "grad_norm": 0.30762016773223877, "learning_rate": 2.436347982290654e-06, "loss": 0.316, "step": 34634 }, { "epoch": 3.521248474989833, "grad_norm": 0.2780771851539612, "learning_rate": 2.4360433015075467e-06, "loss": 0.2935, "step": 34635 }, { "epoch": 3.521350142334282, "grad_norm": 0.25960978865623474, "learning_rate": 2.4357386336410486e-06, "loss": 0.2875, "step": 34636 }, { "epoch": 3.521451809678731, "grad_norm": 0.2655390501022339, "learning_rate": 2.435433978692687e-06, "loss": 0.3094, "step": 34637 }, { "epoch": 3.52155347702318, "grad_norm": 0.26051852107048035, "learning_rate": 2.435129336664e-06, "loss": 0.2984, "step": 34638 }, { "epoch": 3.521655144367629, "grad_norm": 0.2729569971561432, "learning_rate": 2.4348247075565205e-06, "loss": 0.3302, "step": 34639 }, { "epoch": 3.521756811712078, "grad_norm": 0.2715558707714081, "learning_rate": 2.4345200913717863e-06, "loss": 0.3141, "step": 34640 }, { "epoch": 3.521858479056527, "grad_norm": 0.2839198410511017, "learning_rate": 2.4342154881113304e-06, "loss": 0.2821, "step": 34641 }, { "epoch": 3.5219601464009758, "grad_norm": 0.29535913467407227, "learning_rate": 2.4339108977766857e-06, "loss": 0.3126, "step": 34642 }, { "epoch": 3.5220618137454247, "grad_norm": 0.2747412919998169, "learning_rate": 2.4336063203693914e-06, "loss": 0.328, "step": 34643 }, { "epoch": 3.522163481089874, "grad_norm": 0.27453377842903137, "learning_rate": 2.433301755890975e-06, "loss": 0.301, "step": 34644 }, { "epoch": 3.522265148434323, "grad_norm": 0.2656758725643158, "learning_rate": 2.432997204342976e-06, "loss": 0.3066, "step": 34645 }, { "epoch": 3.522366815778772, "grad_norm": 0.27454495429992676, "learning_rate": 2.4326926657269275e-06, "loss": 0.3394, "step": 34646 }, { "epoch": 3.522468483123221, "grad_norm": 0.2654215097427368, "learning_rate": 2.432388140044361e-06, "loss": 0.325, "step": 34647 }, { "epoch": 3.52257015046767, "grad_norm": 0.2581061124801636, "learning_rate": 2.432083627296815e-06, "loss": 0.3419, "step": 34648 }, { "epoch": 3.522671817812119, "grad_norm": 0.26688826084136963, "learning_rate": 2.431779127485821e-06, "loss": 0.2924, "step": 34649 }, { "epoch": 3.5227734851565677, "grad_norm": 0.26857006549835205, "learning_rate": 2.431474640612914e-06, "loss": 0.3434, "step": 34650 }, { "epoch": 3.5228751525010167, "grad_norm": 0.27964073419570923, "learning_rate": 2.4311701666796255e-06, "loss": 0.292, "step": 34651 }, { "epoch": 3.5229768198454656, "grad_norm": 0.2662123739719391, "learning_rate": 2.430865705687491e-06, "loss": 0.3191, "step": 34652 }, { "epoch": 3.5230784871899146, "grad_norm": 0.2619725167751312, "learning_rate": 2.4305612576380484e-06, "loss": 0.3243, "step": 34653 }, { "epoch": 3.5231801545343635, "grad_norm": 0.2649025619029999, "learning_rate": 2.4302568225328237e-06, "loss": 0.315, "step": 34654 }, { "epoch": 3.5232818218788124, "grad_norm": 0.2721598446369171, "learning_rate": 2.4299524003733566e-06, "loss": 0.2872, "step": 34655 }, { "epoch": 3.5233834892232614, "grad_norm": 0.28241270780563354, "learning_rate": 2.4296479911611782e-06, "loss": 0.3267, "step": 34656 }, { "epoch": 3.5234851565677103, "grad_norm": 0.2524769604206085, "learning_rate": 2.429343594897821e-06, "loss": 0.3076, "step": 34657 }, { "epoch": 3.5235868239121593, "grad_norm": 0.27094826102256775, "learning_rate": 2.4290392115848216e-06, "loss": 0.3085, "step": 34658 }, { "epoch": 3.523688491256608, "grad_norm": 0.2534143030643463, "learning_rate": 2.4287348412237118e-06, "loss": 0.3352, "step": 34659 }, { "epoch": 3.5237901586010576, "grad_norm": 0.28387731313705444, "learning_rate": 2.4284304838160244e-06, "loss": 0.33, "step": 34660 }, { "epoch": 3.5238918259455065, "grad_norm": 0.2612467110157013, "learning_rate": 2.4281261393632917e-06, "loss": 0.3065, "step": 34661 }, { "epoch": 3.5239934932899555, "grad_norm": 0.2705094516277313, "learning_rate": 2.4278218078670495e-06, "loss": 0.3259, "step": 34662 }, { "epoch": 3.5240951606344044, "grad_norm": 0.2574129104614258, "learning_rate": 2.4275174893288308e-06, "loss": 0.3104, "step": 34663 }, { "epoch": 3.5241968279788534, "grad_norm": 0.29949793219566345, "learning_rate": 2.427213183750165e-06, "loss": 0.2974, "step": 34664 }, { "epoch": 3.5242984953233023, "grad_norm": 0.2663417160511017, "learning_rate": 2.42690889113259e-06, "loss": 0.3176, "step": 34665 }, { "epoch": 3.5244001626677512, "grad_norm": 0.27012598514556885, "learning_rate": 2.4266046114776365e-06, "loss": 0.3224, "step": 34666 }, { "epoch": 3.5245018300122, "grad_norm": 0.26947203278541565, "learning_rate": 2.4263003447868355e-06, "loss": 0.3298, "step": 34667 }, { "epoch": 3.524603497356649, "grad_norm": 0.27136528491973877, "learning_rate": 2.425996091061724e-06, "loss": 0.3228, "step": 34668 }, { "epoch": 3.524705164701098, "grad_norm": 0.29846617579460144, "learning_rate": 2.425691850303832e-06, "loss": 0.3011, "step": 34669 }, { "epoch": 3.524806832045547, "grad_norm": 0.2782747149467468, "learning_rate": 2.425387622514693e-06, "loss": 0.3554, "step": 34670 }, { "epoch": 3.524908499389996, "grad_norm": 0.2731182873249054, "learning_rate": 2.4250834076958375e-06, "loss": 0.2924, "step": 34671 }, { "epoch": 3.525010166734445, "grad_norm": 0.2483101338148117, "learning_rate": 2.424779205848802e-06, "loss": 0.2631, "step": 34672 }, { "epoch": 3.525111834078894, "grad_norm": 0.29186001420021057, "learning_rate": 2.4244750169751164e-06, "loss": 0.2844, "step": 34673 }, { "epoch": 3.5252135014233428, "grad_norm": 0.2551456391811371, "learning_rate": 2.424170841076312e-06, "loss": 0.3197, "step": 34674 }, { "epoch": 3.5253151687677917, "grad_norm": 0.26107051968574524, "learning_rate": 2.4238666781539243e-06, "loss": 0.3706, "step": 34675 }, { "epoch": 3.5254168361122407, "grad_norm": 0.2951945662498474, "learning_rate": 2.4235625282094844e-06, "loss": 0.3038, "step": 34676 }, { "epoch": 3.5255185034566896, "grad_norm": 0.27063941955566406, "learning_rate": 2.4232583912445224e-06, "loss": 0.2925, "step": 34677 }, { "epoch": 3.5256201708011385, "grad_norm": 0.3018706738948822, "learning_rate": 2.422954267260574e-06, "loss": 0.3062, "step": 34678 }, { "epoch": 3.5257218381455875, "grad_norm": 0.2757185697555542, "learning_rate": 2.4226501562591698e-06, "loss": 0.2961, "step": 34679 }, { "epoch": 3.5258235054900364, "grad_norm": 0.28086167573928833, "learning_rate": 2.422346058241842e-06, "loss": 0.3227, "step": 34680 }, { "epoch": 3.5259251728344854, "grad_norm": 0.28132280707359314, "learning_rate": 2.4220419732101196e-06, "loss": 0.3038, "step": 34681 }, { "epoch": 3.5260268401789343, "grad_norm": 0.2790903151035309, "learning_rate": 2.421737901165539e-06, "loss": 0.3006, "step": 34682 }, { "epoch": 3.5261285075233832, "grad_norm": 0.2975374758243561, "learning_rate": 2.4214338421096306e-06, "loss": 0.3198, "step": 34683 }, { "epoch": 3.526230174867832, "grad_norm": 0.2826772630214691, "learning_rate": 2.421129796043924e-06, "loss": 0.3189, "step": 34684 }, { "epoch": 3.5263318422122816, "grad_norm": 0.2715471088886261, "learning_rate": 2.4208257629699538e-06, "loss": 0.3119, "step": 34685 }, { "epoch": 3.5264335095567305, "grad_norm": 0.2594248652458191, "learning_rate": 2.4205217428892514e-06, "loss": 0.3279, "step": 34686 }, { "epoch": 3.5265351769011795, "grad_norm": 0.29492926597595215, "learning_rate": 2.4202177358033453e-06, "loss": 0.3137, "step": 34687 }, { "epoch": 3.5266368442456284, "grad_norm": 0.2874615788459778, "learning_rate": 2.4199137417137706e-06, "loss": 0.312, "step": 34688 }, { "epoch": 3.5267385115900773, "grad_norm": 0.2543271780014038, "learning_rate": 2.4196097606220575e-06, "loss": 0.3089, "step": 34689 }, { "epoch": 3.5268401789345263, "grad_norm": 0.2620607018470764, "learning_rate": 2.4193057925297375e-06, "loss": 0.3141, "step": 34690 }, { "epoch": 3.5269418462789752, "grad_norm": 0.2507783770561218, "learning_rate": 2.4190018374383394e-06, "loss": 0.3297, "step": 34691 }, { "epoch": 3.527043513623424, "grad_norm": 0.29183104634284973, "learning_rate": 2.418697895349399e-06, "loss": 0.2882, "step": 34692 }, { "epoch": 3.527145180967873, "grad_norm": 0.2725815176963806, "learning_rate": 2.4183939662644453e-06, "loss": 0.3403, "step": 34693 }, { "epoch": 3.527246848312322, "grad_norm": 0.26854586601257324, "learning_rate": 2.4180900501850067e-06, "loss": 0.316, "step": 34694 }, { "epoch": 3.527348515656771, "grad_norm": 0.27998051047325134, "learning_rate": 2.4177861471126196e-06, "loss": 0.286, "step": 34695 }, { "epoch": 3.52745018300122, "grad_norm": 0.2700244188308716, "learning_rate": 2.417482257048812e-06, "loss": 0.3458, "step": 34696 }, { "epoch": 3.527551850345669, "grad_norm": 0.28794214129447937, "learning_rate": 2.4171783799951133e-06, "loss": 0.2974, "step": 34697 }, { "epoch": 3.527653517690118, "grad_norm": 0.26026320457458496, "learning_rate": 2.416874515953058e-06, "loss": 0.3043, "step": 34698 }, { "epoch": 3.5277551850345668, "grad_norm": 0.26744940876960754, "learning_rate": 2.4165706649241745e-06, "loss": 0.3069, "step": 34699 }, { "epoch": 3.5278568523790157, "grad_norm": 0.27017682790756226, "learning_rate": 2.4162668269099944e-06, "loss": 0.297, "step": 34700 }, { "epoch": 3.527958519723465, "grad_norm": 0.2817401885986328, "learning_rate": 2.4159630019120463e-06, "loss": 0.2991, "step": 34701 }, { "epoch": 3.528060187067914, "grad_norm": 0.2898271381855011, "learning_rate": 2.4156591899318642e-06, "loss": 0.3127, "step": 34702 }, { "epoch": 3.528161854412363, "grad_norm": 0.2750334143638611, "learning_rate": 2.415355390970977e-06, "loss": 0.3308, "step": 34703 }, { "epoch": 3.528263521756812, "grad_norm": 0.26497480273246765, "learning_rate": 2.415051605030914e-06, "loss": 0.3211, "step": 34704 }, { "epoch": 3.528365189101261, "grad_norm": 0.27578410506248474, "learning_rate": 2.4147478321132075e-06, "loss": 0.2772, "step": 34705 }, { "epoch": 3.52846685644571, "grad_norm": 0.28174465894699097, "learning_rate": 2.4144440722193874e-06, "loss": 0.3133, "step": 34706 }, { "epoch": 3.5285685237901587, "grad_norm": 0.29653704166412354, "learning_rate": 2.414140325350982e-06, "loss": 0.2962, "step": 34707 }, { "epoch": 3.5286701911346077, "grad_norm": 0.2551391124725342, "learning_rate": 2.4138365915095246e-06, "loss": 0.2941, "step": 34708 }, { "epoch": 3.5287718584790566, "grad_norm": 0.25294220447540283, "learning_rate": 2.4135328706965443e-06, "loss": 0.3201, "step": 34709 }, { "epoch": 3.5288735258235056, "grad_norm": 0.27125585079193115, "learning_rate": 2.4132291629135705e-06, "loss": 0.2793, "step": 34710 }, { "epoch": 3.5289751931679545, "grad_norm": 0.26791438460350037, "learning_rate": 2.412925468162132e-06, "loss": 0.2856, "step": 34711 }, { "epoch": 3.5290768605124034, "grad_norm": 0.26913750171661377, "learning_rate": 2.412621786443762e-06, "loss": 0.3101, "step": 34712 }, { "epoch": 3.5291785278568524, "grad_norm": 0.2702227532863617, "learning_rate": 2.4123181177599886e-06, "loss": 0.2981, "step": 34713 }, { "epoch": 3.5292801952013013, "grad_norm": 0.25671443343162537, "learning_rate": 2.412014462112339e-06, "loss": 0.306, "step": 34714 }, { "epoch": 3.5293818625457503, "grad_norm": 0.26071301102638245, "learning_rate": 2.4117108195023482e-06, "loss": 0.3041, "step": 34715 }, { "epoch": 3.529483529890199, "grad_norm": 0.2782866954803467, "learning_rate": 2.411407189931543e-06, "loss": 0.3046, "step": 34716 }, { "epoch": 3.529585197234648, "grad_norm": 0.2820538282394409, "learning_rate": 2.4111035734014515e-06, "loss": 0.2941, "step": 34717 }, { "epoch": 3.529686864579097, "grad_norm": 0.2589264214038849, "learning_rate": 2.4107999699136064e-06, "loss": 0.3184, "step": 34718 }, { "epoch": 3.529788531923546, "grad_norm": 0.2693992853164673, "learning_rate": 2.4104963794695363e-06, "loss": 0.3034, "step": 34719 }, { "epoch": 3.529890199267995, "grad_norm": 0.3323706090450287, "learning_rate": 2.4101928020707698e-06, "loss": 0.2934, "step": 34720 }, { "epoch": 3.529991866612444, "grad_norm": 0.24860455095767975, "learning_rate": 2.4098892377188347e-06, "loss": 0.2946, "step": 34721 }, { "epoch": 3.530093533956893, "grad_norm": 0.28439152240753174, "learning_rate": 2.409585686415264e-06, "loss": 0.28, "step": 34722 }, { "epoch": 3.530195201301342, "grad_norm": 0.2722398042678833, "learning_rate": 2.409282148161585e-06, "loss": 0.2882, "step": 34723 }, { "epoch": 3.5302968686457907, "grad_norm": 0.2601904273033142, "learning_rate": 2.408978622959325e-06, "loss": 0.299, "step": 34724 }, { "epoch": 3.5303985359902397, "grad_norm": 0.2666985094547272, "learning_rate": 2.4086751108100172e-06, "loss": 0.312, "step": 34725 }, { "epoch": 3.530500203334689, "grad_norm": 0.26611390709877014, "learning_rate": 2.4083716117151877e-06, "loss": 0.275, "step": 34726 }, { "epoch": 3.530601870679138, "grad_norm": 0.27328553795814514, "learning_rate": 2.408068125676365e-06, "loss": 0.2826, "step": 34727 }, { "epoch": 3.530703538023587, "grad_norm": 0.2893044054508209, "learning_rate": 2.4077646526950805e-06, "loss": 0.2962, "step": 34728 }, { "epoch": 3.530805205368036, "grad_norm": 0.27800190448760986, "learning_rate": 2.4074611927728617e-06, "loss": 0.2992, "step": 34729 }, { "epoch": 3.530906872712485, "grad_norm": 0.25133267045021057, "learning_rate": 2.4071577459112376e-06, "loss": 0.2989, "step": 34730 }, { "epoch": 3.5310085400569338, "grad_norm": 0.2680336833000183, "learning_rate": 2.4068543121117343e-06, "loss": 0.3162, "step": 34731 }, { "epoch": 3.5311102074013827, "grad_norm": 0.28858131170272827, "learning_rate": 2.406550891375885e-06, "loss": 0.2844, "step": 34732 }, { "epoch": 3.5312118747458316, "grad_norm": 0.2978200316429138, "learning_rate": 2.4062474837052163e-06, "loss": 0.2837, "step": 34733 }, { "epoch": 3.5313135420902806, "grad_norm": 0.26968154311180115, "learning_rate": 2.405944089101254e-06, "loss": 0.3112, "step": 34734 }, { "epoch": 3.5314152094347295, "grad_norm": 0.2910873293876648, "learning_rate": 2.4056407075655307e-06, "loss": 0.3096, "step": 34735 }, { "epoch": 3.5315168767791785, "grad_norm": 0.2973020076751709, "learning_rate": 2.405337339099574e-06, "loss": 0.3226, "step": 34736 }, { "epoch": 3.5316185441236274, "grad_norm": 0.2694109082221985, "learning_rate": 2.405033983704908e-06, "loss": 0.3175, "step": 34737 }, { "epoch": 3.5317202114680764, "grad_norm": 0.2903517484664917, "learning_rate": 2.404730641383067e-06, "loss": 0.3109, "step": 34738 }, { "epoch": 3.5318218788125253, "grad_norm": 0.2900305390357971, "learning_rate": 2.404427312135576e-06, "loss": 0.3037, "step": 34739 }, { "epoch": 3.5319235461569742, "grad_norm": 0.26275596022605896, "learning_rate": 2.404123995963963e-06, "loss": 0.3139, "step": 34740 }, { "epoch": 3.532025213501423, "grad_norm": 0.2663625180721283, "learning_rate": 2.403820692869755e-06, "loss": 0.3116, "step": 34741 }, { "epoch": 3.5321268808458726, "grad_norm": 0.28256115317344666, "learning_rate": 2.403517402854483e-06, "loss": 0.2913, "step": 34742 }, { "epoch": 3.5322285481903215, "grad_norm": 0.25270235538482666, "learning_rate": 2.403214125919674e-06, "loss": 0.3226, "step": 34743 }, { "epoch": 3.5323302155347704, "grad_norm": 0.26561057567596436, "learning_rate": 2.402910862066852e-06, "loss": 0.3285, "step": 34744 }, { "epoch": 3.5324318828792194, "grad_norm": 0.2611527740955353, "learning_rate": 2.402607611297551e-06, "loss": 0.298, "step": 34745 }, { "epoch": 3.5325335502236683, "grad_norm": 0.25662386417388916, "learning_rate": 2.402304373613295e-06, "loss": 0.3142, "step": 34746 }, { "epoch": 3.5326352175681173, "grad_norm": 0.2654815912246704, "learning_rate": 2.4020011490156107e-06, "loss": 0.321, "step": 34747 }, { "epoch": 3.532736884912566, "grad_norm": 0.27728140354156494, "learning_rate": 2.401697937506029e-06, "loss": 0.3107, "step": 34748 }, { "epoch": 3.532838552257015, "grad_norm": 0.2660665810108185, "learning_rate": 2.401394739086075e-06, "loss": 0.3122, "step": 34749 }, { "epoch": 3.532940219601464, "grad_norm": 0.29247164726257324, "learning_rate": 2.4010915537572775e-06, "loss": 0.3143, "step": 34750 }, { "epoch": 3.533041886945913, "grad_norm": 0.28134506940841675, "learning_rate": 2.400788381521161e-06, "loss": 0.3273, "step": 34751 }, { "epoch": 3.533143554290362, "grad_norm": 0.2761215269565582, "learning_rate": 2.4004852223792574e-06, "loss": 0.3043, "step": 34752 }, { "epoch": 3.533245221634811, "grad_norm": 0.25402599573135376, "learning_rate": 2.400182076333091e-06, "loss": 0.2835, "step": 34753 }, { "epoch": 3.53334688897926, "grad_norm": 0.2591429352760315, "learning_rate": 2.399878943384188e-06, "loss": 0.3005, "step": 34754 }, { "epoch": 3.533448556323709, "grad_norm": 0.27220606803894043, "learning_rate": 2.3995758235340805e-06, "loss": 0.2707, "step": 34755 }, { "epoch": 3.5335502236681577, "grad_norm": 0.27845048904418945, "learning_rate": 2.3992727167842883e-06, "loss": 0.324, "step": 34756 }, { "epoch": 3.5336518910126067, "grad_norm": 0.29786956310272217, "learning_rate": 2.3989696231363417e-06, "loss": 0.3133, "step": 34757 }, { "epoch": 3.5337535583570556, "grad_norm": 0.27278339862823486, "learning_rate": 2.3986665425917706e-06, "loss": 0.3135, "step": 34758 }, { "epoch": 3.5338552257015046, "grad_norm": 0.26085159182548523, "learning_rate": 2.398363475152099e-06, "loss": 0.2808, "step": 34759 }, { "epoch": 3.5339568930459535, "grad_norm": 0.2649451494216919, "learning_rate": 2.3980604208188534e-06, "loss": 0.32, "step": 34760 }, { "epoch": 3.5340585603904024, "grad_norm": 0.27044111490249634, "learning_rate": 2.39775737959356e-06, "loss": 0.2973, "step": 34761 }, { "epoch": 3.5341602277348514, "grad_norm": 0.27087655663490295, "learning_rate": 2.397454351477748e-06, "loss": 0.3156, "step": 34762 }, { "epoch": 3.5342618950793003, "grad_norm": 0.2680318057537079, "learning_rate": 2.397151336472943e-06, "loss": 0.3178, "step": 34763 }, { "epoch": 3.5343635624237493, "grad_norm": 0.27586841583251953, "learning_rate": 2.3968483345806685e-06, "loss": 0.3283, "step": 34764 }, { "epoch": 3.534465229768198, "grad_norm": 0.2620123326778412, "learning_rate": 2.3965453458024577e-06, "loss": 0.2974, "step": 34765 }, { "epoch": 3.534566897112647, "grad_norm": 0.27890744805336, "learning_rate": 2.3962423701398292e-06, "loss": 0.3276, "step": 34766 }, { "epoch": 3.5346685644570965, "grad_norm": 0.2808579206466675, "learning_rate": 2.3959394075943127e-06, "loss": 0.2965, "step": 34767 }, { "epoch": 3.5347702318015455, "grad_norm": 0.2676871418952942, "learning_rate": 2.3956364581674363e-06, "loss": 0.3107, "step": 34768 }, { "epoch": 3.5348718991459944, "grad_norm": 0.2648516893386841, "learning_rate": 2.3953335218607245e-06, "loss": 0.2876, "step": 34769 }, { "epoch": 3.5349735664904434, "grad_norm": 0.28543195128440857, "learning_rate": 2.3950305986757034e-06, "loss": 0.2894, "step": 34770 }, { "epoch": 3.5350752338348923, "grad_norm": 0.27314090728759766, "learning_rate": 2.3947276886138975e-06, "loss": 0.2753, "step": 34771 }, { "epoch": 3.5351769011793412, "grad_norm": 0.2695677876472473, "learning_rate": 2.3944247916768366e-06, "loss": 0.3152, "step": 34772 }, { "epoch": 3.53527856852379, "grad_norm": 0.2796207666397095, "learning_rate": 2.394121907866044e-06, "loss": 0.315, "step": 34773 }, { "epoch": 3.535380235868239, "grad_norm": 0.2722914516925812, "learning_rate": 2.393819037183044e-06, "loss": 0.3203, "step": 34774 }, { "epoch": 3.535481903212688, "grad_norm": 0.2752035856246948, "learning_rate": 2.3935161796293683e-06, "loss": 0.3509, "step": 34775 }, { "epoch": 3.535583570557137, "grad_norm": 0.27590852975845337, "learning_rate": 2.3932133352065346e-06, "loss": 0.2983, "step": 34776 }, { "epoch": 3.535685237901586, "grad_norm": 0.27707183361053467, "learning_rate": 2.3929105039160726e-06, "loss": 0.3395, "step": 34777 }, { "epoch": 3.535786905246035, "grad_norm": 0.29338935017585754, "learning_rate": 2.3926076857595124e-06, "loss": 0.3212, "step": 34778 }, { "epoch": 3.535888572590484, "grad_norm": 0.27748391032218933, "learning_rate": 2.3923048807383708e-06, "loss": 0.2732, "step": 34779 }, { "epoch": 3.5359902399349328, "grad_norm": 0.2829585373401642, "learning_rate": 2.3920020888541795e-06, "loss": 0.329, "step": 34780 }, { "epoch": 3.5360919072793817, "grad_norm": 0.29941362142562866, "learning_rate": 2.39169931010846e-06, "loss": 0.3213, "step": 34781 }, { "epoch": 3.5361935746238307, "grad_norm": 0.2656732201576233, "learning_rate": 2.3913965445027414e-06, "loss": 0.3084, "step": 34782 }, { "epoch": 3.53629524196828, "grad_norm": 0.28925859928131104, "learning_rate": 2.3910937920385474e-06, "loss": 0.3446, "step": 34783 }, { "epoch": 3.536396909312729, "grad_norm": 0.2657751739025116, "learning_rate": 2.390791052717401e-06, "loss": 0.3093, "step": 34784 }, { "epoch": 3.536498576657178, "grad_norm": 0.277924507856369, "learning_rate": 2.390488326540833e-06, "loss": 0.2902, "step": 34785 }, { "epoch": 3.536600244001627, "grad_norm": 0.26437586545944214, "learning_rate": 2.390185613510361e-06, "loss": 0.3078, "step": 34786 }, { "epoch": 3.536701911346076, "grad_norm": 0.279598593711853, "learning_rate": 2.3898829136275133e-06, "loss": 0.3365, "step": 34787 }, { "epoch": 3.5368035786905248, "grad_norm": 0.2658558189868927, "learning_rate": 2.389580226893819e-06, "loss": 0.3069, "step": 34788 }, { "epoch": 3.5369052460349737, "grad_norm": 0.29772844910621643, "learning_rate": 2.3892775533107954e-06, "loss": 0.3555, "step": 34789 }, { "epoch": 3.5370069133794226, "grad_norm": 0.26063916087150574, "learning_rate": 2.388974892879973e-06, "loss": 0.3326, "step": 34790 }, { "epoch": 3.5371085807238716, "grad_norm": 0.26776430010795593, "learning_rate": 2.3886722456028733e-06, "loss": 0.3038, "step": 34791 }, { "epoch": 3.5372102480683205, "grad_norm": 0.2922986149787903, "learning_rate": 2.3883696114810235e-06, "loss": 0.3217, "step": 34792 }, { "epoch": 3.5373119154127695, "grad_norm": 0.2700212001800537, "learning_rate": 2.388066990515947e-06, "loss": 0.2902, "step": 34793 }, { "epoch": 3.5374135827572184, "grad_norm": 0.2800062894821167, "learning_rate": 2.387764382709167e-06, "loss": 0.2881, "step": 34794 }, { "epoch": 3.5375152501016673, "grad_norm": 0.30079177021980286, "learning_rate": 2.3874617880622123e-06, "loss": 0.342, "step": 34795 }, { "epoch": 3.5376169174461163, "grad_norm": 0.26422441005706787, "learning_rate": 2.3871592065766014e-06, "loss": 0.2999, "step": 34796 }, { "epoch": 3.5377185847905652, "grad_norm": 0.2811625897884369, "learning_rate": 2.3868566382538604e-06, "loss": 0.3172, "step": 34797 }, { "epoch": 3.537820252135014, "grad_norm": 0.2699195444583893, "learning_rate": 2.386554083095519e-06, "loss": 0.3282, "step": 34798 }, { "epoch": 3.537921919479463, "grad_norm": 0.29070353507995605, "learning_rate": 2.386251541103094e-06, "loss": 0.3197, "step": 34799 }, { "epoch": 3.538023586823912, "grad_norm": 0.2828487157821655, "learning_rate": 2.3859490122781138e-06, "loss": 0.3121, "step": 34800 }, { "epoch": 3.538125254168361, "grad_norm": 0.277032732963562, "learning_rate": 2.385646496622101e-06, "loss": 0.3431, "step": 34801 }, { "epoch": 3.53822692151281, "grad_norm": 0.2807823121547699, "learning_rate": 2.3853439941365785e-06, "loss": 0.3048, "step": 34802 }, { "epoch": 3.538328588857259, "grad_norm": 0.2806128263473511, "learning_rate": 2.3850415048230736e-06, "loss": 0.3042, "step": 34803 }, { "epoch": 3.538430256201708, "grad_norm": 0.2863471210002899, "learning_rate": 2.3847390286831057e-06, "loss": 0.3329, "step": 34804 }, { "epoch": 3.5385319235461568, "grad_norm": 0.25076690316200256, "learning_rate": 2.384436565718205e-06, "loss": 0.2952, "step": 34805 }, { "epoch": 3.5386335908906057, "grad_norm": 0.27568379044532776, "learning_rate": 2.384134115929888e-06, "loss": 0.3033, "step": 34806 }, { "epoch": 3.5387352582350546, "grad_norm": 0.2839156687259674, "learning_rate": 2.3838316793196814e-06, "loss": 0.3293, "step": 34807 }, { "epoch": 3.538836925579504, "grad_norm": 0.25841906666755676, "learning_rate": 2.383529255889113e-06, "loss": 0.3613, "step": 34808 }, { "epoch": 3.538938592923953, "grad_norm": 0.268303781747818, "learning_rate": 2.3832268456396985e-06, "loss": 0.3005, "step": 34809 }, { "epoch": 3.539040260268402, "grad_norm": 0.26651260256767273, "learning_rate": 2.3829244485729676e-06, "loss": 0.3117, "step": 34810 }, { "epoch": 3.539141927612851, "grad_norm": 0.2808510363101959, "learning_rate": 2.382622064690441e-06, "loss": 0.2868, "step": 34811 }, { "epoch": 3.5392435949573, "grad_norm": 0.28405359387397766, "learning_rate": 2.3823196939936406e-06, "loss": 0.3186, "step": 34812 }, { "epoch": 3.5393452623017487, "grad_norm": 0.29414165019989014, "learning_rate": 2.3820173364840937e-06, "loss": 0.3156, "step": 34813 }, { "epoch": 3.5394469296461977, "grad_norm": 0.26480382680892944, "learning_rate": 2.381714992163321e-06, "loss": 0.2921, "step": 34814 }, { "epoch": 3.5395485969906466, "grad_norm": 0.28228452801704407, "learning_rate": 2.3814126610328457e-06, "loss": 0.3118, "step": 34815 }, { "epoch": 3.5396502643350956, "grad_norm": 0.27303966879844666, "learning_rate": 2.38111034309419e-06, "loss": 0.3132, "step": 34816 }, { "epoch": 3.5397519316795445, "grad_norm": 0.2671376168727875, "learning_rate": 2.380808038348878e-06, "loss": 0.3089, "step": 34817 }, { "epoch": 3.5398535990239934, "grad_norm": 0.27510735392570496, "learning_rate": 2.380505746798436e-06, "loss": 0.2838, "step": 34818 }, { "epoch": 3.5399552663684424, "grad_norm": 0.25611525774002075, "learning_rate": 2.3802034684443797e-06, "loss": 0.2936, "step": 34819 }, { "epoch": 3.5400569337128913, "grad_norm": 0.28395992517471313, "learning_rate": 2.3799012032882378e-06, "loss": 0.315, "step": 34820 }, { "epoch": 3.5401586010573403, "grad_norm": 0.2743057310581207, "learning_rate": 2.3795989513315314e-06, "loss": 0.3249, "step": 34821 }, { "epoch": 3.540260268401789, "grad_norm": 0.28104695677757263, "learning_rate": 2.3792967125757806e-06, "loss": 0.2812, "step": 34822 }, { "epoch": 3.540361935746238, "grad_norm": 0.24816085398197174, "learning_rate": 2.3789944870225116e-06, "loss": 0.3215, "step": 34823 }, { "epoch": 3.5404636030906875, "grad_norm": 0.2715923488140106, "learning_rate": 2.3786922746732467e-06, "loss": 0.3383, "step": 34824 }, { "epoch": 3.5405652704351365, "grad_norm": 0.25948309898376465, "learning_rate": 2.378390075529506e-06, "loss": 0.2944, "step": 34825 }, { "epoch": 3.5406669377795854, "grad_norm": 0.2570160925388336, "learning_rate": 2.3780878895928123e-06, "loss": 0.3011, "step": 34826 }, { "epoch": 3.5407686051240344, "grad_norm": 0.26568901538848877, "learning_rate": 2.377785716864688e-06, "loss": 0.3088, "step": 34827 }, { "epoch": 3.5408702724684833, "grad_norm": 0.2575623095035553, "learning_rate": 2.3774835573466597e-06, "loss": 0.3237, "step": 34828 }, { "epoch": 3.5409719398129322, "grad_norm": 0.28294795751571655, "learning_rate": 2.377181411040243e-06, "loss": 0.3114, "step": 34829 }, { "epoch": 3.541073607157381, "grad_norm": 0.27232885360717773, "learning_rate": 2.376879277946965e-06, "loss": 0.3232, "step": 34830 }, { "epoch": 3.54117527450183, "grad_norm": 0.25300654768943787, "learning_rate": 2.376577158068345e-06, "loss": 0.3341, "step": 34831 }, { "epoch": 3.541276941846279, "grad_norm": 0.2746520936489105, "learning_rate": 2.376275051405904e-06, "loss": 0.3123, "step": 34832 }, { "epoch": 3.541378609190728, "grad_norm": 0.2931525707244873, "learning_rate": 2.3759729579611684e-06, "loss": 0.2903, "step": 34833 }, { "epoch": 3.541480276535177, "grad_norm": 0.2638907730579376, "learning_rate": 2.3756708777356575e-06, "loss": 0.2886, "step": 34834 }, { "epoch": 3.541581943879626, "grad_norm": 0.27828454971313477, "learning_rate": 2.3753688107308923e-06, "loss": 0.3216, "step": 34835 }, { "epoch": 3.541683611224075, "grad_norm": 0.2771652638912201, "learning_rate": 2.3750667569483945e-06, "loss": 0.3184, "step": 34836 }, { "epoch": 3.5417852785685238, "grad_norm": 0.2507369816303253, "learning_rate": 2.3747647163896877e-06, "loss": 0.3147, "step": 34837 }, { "epoch": 3.5418869459129727, "grad_norm": 0.2527610659599304, "learning_rate": 2.374462689056292e-06, "loss": 0.295, "step": 34838 }, { "epoch": 3.5419886132574216, "grad_norm": 0.2687436640262604, "learning_rate": 2.374160674949728e-06, "loss": 0.3246, "step": 34839 }, { "epoch": 3.5420902806018706, "grad_norm": 0.27337291836738586, "learning_rate": 2.373858674071521e-06, "loss": 0.3241, "step": 34840 }, { "epoch": 3.5421919479463195, "grad_norm": 0.27193817496299744, "learning_rate": 2.373556686423189e-06, "loss": 0.3006, "step": 34841 }, { "epoch": 3.5422936152907685, "grad_norm": 0.280328631401062, "learning_rate": 2.3732547120062547e-06, "loss": 0.2984, "step": 34842 }, { "epoch": 3.5423952826352174, "grad_norm": 0.27576881647109985, "learning_rate": 2.3729527508222372e-06, "loss": 0.3222, "step": 34843 }, { "epoch": 3.5424969499796664, "grad_norm": 0.2630775272846222, "learning_rate": 2.3726508028726614e-06, "loss": 0.3565, "step": 34844 }, { "epoch": 3.5425986173241153, "grad_norm": 0.2654403746128082, "learning_rate": 2.372348868159046e-06, "loss": 0.2893, "step": 34845 }, { "epoch": 3.5427002846685642, "grad_norm": 0.27882155776023865, "learning_rate": 2.372046946682911e-06, "loss": 0.2926, "step": 34846 }, { "epoch": 3.542801952013013, "grad_norm": 0.2891596257686615, "learning_rate": 2.371745038445781e-06, "loss": 0.3049, "step": 34847 }, { "epoch": 3.542903619357462, "grad_norm": 0.2728583514690399, "learning_rate": 2.3714431434491746e-06, "loss": 0.3218, "step": 34848 }, { "epoch": 3.5430052867019115, "grad_norm": 0.26374807953834534, "learning_rate": 2.371141261694611e-06, "loss": 0.3051, "step": 34849 }, { "epoch": 3.5431069540463604, "grad_norm": 0.25962722301483154, "learning_rate": 2.3708393931836154e-06, "loss": 0.2946, "step": 34850 }, { "epoch": 3.5432086213908094, "grad_norm": 0.26775023341178894, "learning_rate": 2.370537537917706e-06, "loss": 0.292, "step": 34851 }, { "epoch": 3.5433102887352583, "grad_norm": 0.2898675501346588, "learning_rate": 2.3702356958984036e-06, "loss": 0.2812, "step": 34852 }, { "epoch": 3.5434119560797073, "grad_norm": 0.2767758071422577, "learning_rate": 2.369933867127227e-06, "loss": 0.3113, "step": 34853 }, { "epoch": 3.543513623424156, "grad_norm": 0.283515065908432, "learning_rate": 2.3696320516057e-06, "loss": 0.308, "step": 34854 }, { "epoch": 3.543615290768605, "grad_norm": 0.2751953601837158, "learning_rate": 2.369330249335342e-06, "loss": 0.318, "step": 34855 }, { "epoch": 3.543716958113054, "grad_norm": 0.2506486773490906, "learning_rate": 2.3690284603176717e-06, "loss": 0.3019, "step": 34856 }, { "epoch": 3.543818625457503, "grad_norm": 0.3139795958995819, "learning_rate": 2.3687266845542116e-06, "loss": 0.353, "step": 34857 }, { "epoch": 3.543920292801952, "grad_norm": 0.286794513463974, "learning_rate": 2.3684249220464822e-06, "loss": 0.3211, "step": 34858 }, { "epoch": 3.544021960146401, "grad_norm": 0.275343656539917, "learning_rate": 2.368123172796e-06, "loss": 0.2799, "step": 34859 }, { "epoch": 3.54412362749085, "grad_norm": 0.26476770639419556, "learning_rate": 2.3678214368042907e-06, "loss": 0.2877, "step": 34860 }, { "epoch": 3.544225294835299, "grad_norm": 0.2849714159965515, "learning_rate": 2.3675197140728706e-06, "loss": 0.3101, "step": 34861 }, { "epoch": 3.5443269621797477, "grad_norm": 0.2867826521396637, "learning_rate": 2.3672180046032607e-06, "loss": 0.3102, "step": 34862 }, { "epoch": 3.5444286295241967, "grad_norm": 0.3120866119861603, "learning_rate": 2.3669163083969794e-06, "loss": 0.2956, "step": 34863 }, { "epoch": 3.5445302968686456, "grad_norm": 0.27413684129714966, "learning_rate": 2.3666146254555495e-06, "loss": 0.3226, "step": 34864 }, { "epoch": 3.544631964213095, "grad_norm": 0.2828918695449829, "learning_rate": 2.3663129557804894e-06, "loss": 0.3121, "step": 34865 }, { "epoch": 3.544733631557544, "grad_norm": 0.29817140102386475, "learning_rate": 2.366011299373317e-06, "loss": 0.3075, "step": 34866 }, { "epoch": 3.544835298901993, "grad_norm": 0.27698981761932373, "learning_rate": 2.365709656235556e-06, "loss": 0.3018, "step": 34867 }, { "epoch": 3.544936966246442, "grad_norm": 0.2873838543891907, "learning_rate": 2.365408026368723e-06, "loss": 0.3158, "step": 34868 }, { "epoch": 3.5450386335908908, "grad_norm": 0.29040414094924927, "learning_rate": 2.3651064097743366e-06, "loss": 0.3195, "step": 34869 }, { "epoch": 3.5451403009353397, "grad_norm": 0.26354965567588806, "learning_rate": 2.3648048064539204e-06, "loss": 0.3389, "step": 34870 }, { "epoch": 3.5452419682797887, "grad_norm": 0.2895556092262268, "learning_rate": 2.364503216408991e-06, "loss": 0.3513, "step": 34871 }, { "epoch": 3.5453436356242376, "grad_norm": 0.2600604295730591, "learning_rate": 2.364201639641068e-06, "loss": 0.3059, "step": 34872 }, { "epoch": 3.5454453029686865, "grad_norm": 0.29183706641197205, "learning_rate": 2.363900076151669e-06, "loss": 0.3376, "step": 34873 }, { "epoch": 3.5455469703131355, "grad_norm": 0.2980605661869049, "learning_rate": 2.3635985259423167e-06, "loss": 0.2853, "step": 34874 }, { "epoch": 3.5456486376575844, "grad_norm": 0.27973318099975586, "learning_rate": 2.3632969890145286e-06, "loss": 0.3164, "step": 34875 }, { "epoch": 3.5457503050020334, "grad_norm": 0.26051443815231323, "learning_rate": 2.362995465369822e-06, "loss": 0.2792, "step": 34876 }, { "epoch": 3.5458519723464823, "grad_norm": 0.27004680037498474, "learning_rate": 2.362693955009719e-06, "loss": 0.3135, "step": 34877 }, { "epoch": 3.5459536396909312, "grad_norm": 0.27854466438293457, "learning_rate": 2.3623924579357377e-06, "loss": 0.3132, "step": 34878 }, { "epoch": 3.54605530703538, "grad_norm": 0.2702576518058777, "learning_rate": 2.362090974149394e-06, "loss": 0.3148, "step": 34879 }, { "epoch": 3.546156974379829, "grad_norm": 0.248310387134552, "learning_rate": 2.3617895036522117e-06, "loss": 0.3097, "step": 34880 }, { "epoch": 3.546258641724278, "grad_norm": 0.25714752078056335, "learning_rate": 2.3614880464457064e-06, "loss": 0.2954, "step": 34881 }, { "epoch": 3.546360309068727, "grad_norm": 0.28122058510780334, "learning_rate": 2.3611866025313974e-06, "loss": 0.3011, "step": 34882 }, { "epoch": 3.546461976413176, "grad_norm": 0.2745082974433899, "learning_rate": 2.360885171910801e-06, "loss": 0.2976, "step": 34883 }, { "epoch": 3.546563643757625, "grad_norm": 0.29738977551460266, "learning_rate": 2.3605837545854404e-06, "loss": 0.3244, "step": 34884 }, { "epoch": 3.546665311102074, "grad_norm": 0.27680081129074097, "learning_rate": 2.3602823505568312e-06, "loss": 0.2987, "step": 34885 }, { "epoch": 3.5467669784465228, "grad_norm": 0.2611144185066223, "learning_rate": 2.3599809598264906e-06, "loss": 0.2757, "step": 34886 }, { "epoch": 3.5468686457909717, "grad_norm": 0.26120176911354065, "learning_rate": 2.3596795823959408e-06, "loss": 0.2956, "step": 34887 }, { "epoch": 3.5469703131354207, "grad_norm": 0.2550358176231384, "learning_rate": 2.3593782182666974e-06, "loss": 0.3007, "step": 34888 }, { "epoch": 3.5470719804798696, "grad_norm": 0.2999563217163086, "learning_rate": 2.359076867440277e-06, "loss": 0.3021, "step": 34889 }, { "epoch": 3.547173647824319, "grad_norm": 0.25942668318748474, "learning_rate": 2.3587755299182018e-06, "loss": 0.3107, "step": 34890 }, { "epoch": 3.547275315168768, "grad_norm": 0.2842613458633423, "learning_rate": 2.3584742057019877e-06, "loss": 0.3241, "step": 34891 }, { "epoch": 3.547376982513217, "grad_norm": 0.28610000014305115, "learning_rate": 2.358172894793153e-06, "loss": 0.3005, "step": 34892 }, { "epoch": 3.547478649857666, "grad_norm": 0.295964777469635, "learning_rate": 2.357871597193214e-06, "loss": 0.317, "step": 34893 }, { "epoch": 3.5475803172021148, "grad_norm": 0.29845738410949707, "learning_rate": 2.3575703129036914e-06, "loss": 0.3006, "step": 34894 }, { "epoch": 3.5476819845465637, "grad_norm": 0.2672535181045532, "learning_rate": 2.357269041926102e-06, "loss": 0.3025, "step": 34895 }, { "epoch": 3.5477836518910126, "grad_norm": 0.2948245406150818, "learning_rate": 2.3569677842619603e-06, "loss": 0.2975, "step": 34896 }, { "epoch": 3.5478853192354616, "grad_norm": 0.262928307056427, "learning_rate": 2.35666653991279e-06, "loss": 0.3079, "step": 34897 }, { "epoch": 3.5479869865799105, "grad_norm": 0.2986501157283783, "learning_rate": 2.356365308880105e-06, "loss": 0.3015, "step": 34898 }, { "epoch": 3.5480886539243595, "grad_norm": 0.2722293734550476, "learning_rate": 2.356064091165421e-06, "loss": 0.2745, "step": 34899 }, { "epoch": 3.5481903212688084, "grad_norm": 0.26797962188720703, "learning_rate": 2.35576288677026e-06, "loss": 0.2891, "step": 34900 }, { "epoch": 3.5482919886132573, "grad_norm": 0.2773720920085907, "learning_rate": 2.355461695696137e-06, "loss": 0.2993, "step": 34901 }, { "epoch": 3.5483936559577063, "grad_norm": 0.25694042444229126, "learning_rate": 2.3551605179445702e-06, "loss": 0.3399, "step": 34902 }, { "epoch": 3.5484953233021552, "grad_norm": 0.2744804322719574, "learning_rate": 2.3548593535170737e-06, "loss": 0.2741, "step": 34903 }, { "epoch": 3.548596990646604, "grad_norm": 0.2623007893562317, "learning_rate": 2.354558202415169e-06, "loss": 0.3072, "step": 34904 }, { "epoch": 3.5486986579910536, "grad_norm": 0.2592746615409851, "learning_rate": 2.3542570646403723e-06, "loss": 0.3234, "step": 34905 }, { "epoch": 3.5488003253355025, "grad_norm": 0.29912006855010986, "learning_rate": 2.3539559401941968e-06, "loss": 0.306, "step": 34906 }, { "epoch": 3.5489019926799514, "grad_norm": 0.27817049622535706, "learning_rate": 2.3536548290781667e-06, "loss": 0.303, "step": 34907 }, { "epoch": 3.5490036600244004, "grad_norm": 0.2763459086418152, "learning_rate": 2.3533537312937914e-06, "loss": 0.2793, "step": 34908 }, { "epoch": 3.5491053273688493, "grad_norm": 0.25922301411628723, "learning_rate": 2.3530526468425906e-06, "loss": 0.3143, "step": 34909 }, { "epoch": 3.5492069947132983, "grad_norm": 0.2683553397655487, "learning_rate": 2.3527515757260838e-06, "loss": 0.3032, "step": 34910 }, { "epoch": 3.549308662057747, "grad_norm": 0.268500953912735, "learning_rate": 2.352450517945785e-06, "loss": 0.2962, "step": 34911 }, { "epoch": 3.549410329402196, "grad_norm": 0.23870843648910522, "learning_rate": 2.3521494735032114e-06, "loss": 0.3136, "step": 34912 }, { "epoch": 3.549511996746645, "grad_norm": 0.27464255690574646, "learning_rate": 2.3518484423998777e-06, "loss": 0.3207, "step": 34913 }, { "epoch": 3.549613664091094, "grad_norm": 0.2698572874069214, "learning_rate": 2.3515474246373046e-06, "loss": 0.3065, "step": 34914 }, { "epoch": 3.549715331435543, "grad_norm": 0.28318578004837036, "learning_rate": 2.3512464202170055e-06, "loss": 0.3232, "step": 34915 }, { "epoch": 3.549816998779992, "grad_norm": 0.2777763307094574, "learning_rate": 2.350945429140496e-06, "loss": 0.3318, "step": 34916 }, { "epoch": 3.549918666124441, "grad_norm": 0.2694922983646393, "learning_rate": 2.3506444514092973e-06, "loss": 0.3122, "step": 34917 }, { "epoch": 3.55002033346889, "grad_norm": 0.27058666944503784, "learning_rate": 2.3503434870249193e-06, "loss": 0.2923, "step": 34918 }, { "epoch": 3.5501220008133387, "grad_norm": 0.2740131914615631, "learning_rate": 2.3500425359888807e-06, "loss": 0.31, "step": 34919 }, { "epoch": 3.5502236681577877, "grad_norm": 0.26623794436454773, "learning_rate": 2.3497415983027e-06, "loss": 0.3082, "step": 34920 }, { "epoch": 3.5503253355022366, "grad_norm": 0.25627949833869934, "learning_rate": 2.349440673967891e-06, "loss": 0.3606, "step": 34921 }, { "epoch": 3.5504270028466856, "grad_norm": 0.2842141091823578, "learning_rate": 2.34913976298597e-06, "loss": 0.2864, "step": 34922 }, { "epoch": 3.5505286701911345, "grad_norm": 0.28595852851867676, "learning_rate": 2.3488388653584513e-06, "loss": 0.3168, "step": 34923 }, { "epoch": 3.5506303375355834, "grad_norm": 0.2775118350982666, "learning_rate": 2.348537981086855e-06, "loss": 0.3382, "step": 34924 }, { "epoch": 3.5507320048800324, "grad_norm": 0.27101820707321167, "learning_rate": 2.348237110172693e-06, "loss": 0.3021, "step": 34925 }, { "epoch": 3.5508336722244813, "grad_norm": 0.29362115263938904, "learning_rate": 2.347936252617481e-06, "loss": 0.2896, "step": 34926 }, { "epoch": 3.5509353395689303, "grad_norm": 0.28936803340911865, "learning_rate": 2.34763540842274e-06, "loss": 0.3123, "step": 34927 }, { "epoch": 3.551037006913379, "grad_norm": 0.2763393521308899, "learning_rate": 2.347334577589977e-06, "loss": 0.3219, "step": 34928 }, { "epoch": 3.551138674257828, "grad_norm": 0.3224053680896759, "learning_rate": 2.3470337601207126e-06, "loss": 0.3508, "step": 34929 }, { "epoch": 3.551240341602277, "grad_norm": 0.29365891218185425, "learning_rate": 2.3467329560164655e-06, "loss": 0.2949, "step": 34930 }, { "epoch": 3.5513420089467265, "grad_norm": 0.27297019958496094, "learning_rate": 2.346432165278743e-06, "loss": 0.3007, "step": 34931 }, { "epoch": 3.5514436762911754, "grad_norm": 0.27167680859565735, "learning_rate": 2.346131387909067e-06, "loss": 0.307, "step": 34932 }, { "epoch": 3.5515453436356244, "grad_norm": 0.2736847996711731, "learning_rate": 2.345830623908948e-06, "loss": 0.3038, "step": 34933 }, { "epoch": 3.5516470109800733, "grad_norm": 0.2592313885688782, "learning_rate": 2.345529873279906e-06, "loss": 0.296, "step": 34934 }, { "epoch": 3.5517486783245222, "grad_norm": 0.28529712557792664, "learning_rate": 2.3452291360234537e-06, "loss": 0.3415, "step": 34935 }, { "epoch": 3.551850345668971, "grad_norm": 0.264186829328537, "learning_rate": 2.3449284121411038e-06, "loss": 0.2993, "step": 34936 }, { "epoch": 3.55195201301342, "grad_norm": 0.2731248736381531, "learning_rate": 2.3446277016343777e-06, "loss": 0.326, "step": 34937 }, { "epoch": 3.552053680357869, "grad_norm": 0.2820020914077759, "learning_rate": 2.3443270045047824e-06, "loss": 0.3109, "step": 34938 }, { "epoch": 3.552155347702318, "grad_norm": 0.2826762795448303, "learning_rate": 2.3440263207538366e-06, "loss": 0.3409, "step": 34939 }, { "epoch": 3.552257015046767, "grad_norm": 0.29831016063690186, "learning_rate": 2.3437256503830586e-06, "loss": 0.3136, "step": 34940 }, { "epoch": 3.552358682391216, "grad_norm": 0.2706749439239502, "learning_rate": 2.343424993393956e-06, "loss": 0.3072, "step": 34941 }, { "epoch": 3.552460349735665, "grad_norm": 0.28971943259239197, "learning_rate": 2.343124349788049e-06, "loss": 0.2908, "step": 34942 }, { "epoch": 3.5525620170801138, "grad_norm": 0.24678656458854675, "learning_rate": 2.342823719566848e-06, "loss": 0.2948, "step": 34943 }, { "epoch": 3.5526636844245627, "grad_norm": 0.2675515115261078, "learning_rate": 2.342523102731872e-06, "loss": 0.3442, "step": 34944 }, { "epoch": 3.5527653517690116, "grad_norm": 0.2751130163669586, "learning_rate": 2.342222499284632e-06, "loss": 0.3333, "step": 34945 }, { "epoch": 3.552867019113461, "grad_norm": 0.2667860984802246, "learning_rate": 2.3419219092266424e-06, "loss": 0.3117, "step": 34946 }, { "epoch": 3.55296868645791, "grad_norm": 0.266413152217865, "learning_rate": 2.341621332559422e-06, "loss": 0.2972, "step": 34947 }, { "epoch": 3.553070353802359, "grad_norm": 0.285316526889801, "learning_rate": 2.3413207692844788e-06, "loss": 0.2854, "step": 34948 }, { "epoch": 3.553172021146808, "grad_norm": 0.2645067274570465, "learning_rate": 2.3410202194033286e-06, "loss": 0.3003, "step": 34949 }, { "epoch": 3.553273688491257, "grad_norm": 0.24582640826702118, "learning_rate": 2.340719682917491e-06, "loss": 0.3032, "step": 34950 }, { "epoch": 3.5533753558357057, "grad_norm": 0.27507030963897705, "learning_rate": 2.340419159828472e-06, "loss": 0.3129, "step": 34951 }, { "epoch": 3.5534770231801547, "grad_norm": 0.2794002294540405, "learning_rate": 2.340118650137791e-06, "loss": 0.3208, "step": 34952 }, { "epoch": 3.5535786905246036, "grad_norm": 0.2771379053592682, "learning_rate": 2.33981815384696e-06, "loss": 0.3094, "step": 34953 }, { "epoch": 3.5536803578690526, "grad_norm": 0.26068273186683655, "learning_rate": 2.3395176709574913e-06, "loss": 0.3157, "step": 34954 }, { "epoch": 3.5537820252135015, "grad_norm": 0.28844180703163147, "learning_rate": 2.339217201470902e-06, "loss": 0.2959, "step": 34955 }, { "epoch": 3.5538836925579504, "grad_norm": 0.2787286341190338, "learning_rate": 2.3389167453887024e-06, "loss": 0.322, "step": 34956 }, { "epoch": 3.5539853599023994, "grad_norm": 0.2541263699531555, "learning_rate": 2.3386163027124115e-06, "loss": 0.2925, "step": 34957 }, { "epoch": 3.5540870272468483, "grad_norm": 0.27157697081565857, "learning_rate": 2.3383158734435357e-06, "loss": 0.333, "step": 34958 }, { "epoch": 3.5541886945912973, "grad_norm": 0.28118738532066345, "learning_rate": 2.3380154575835917e-06, "loss": 0.3249, "step": 34959 }, { "epoch": 3.554290361935746, "grad_norm": 0.2882872521877289, "learning_rate": 2.3377150551340973e-06, "loss": 0.2889, "step": 34960 }, { "epoch": 3.554392029280195, "grad_norm": 0.27316367626190186, "learning_rate": 2.337414666096558e-06, "loss": 0.3164, "step": 34961 }, { "epoch": 3.554493696624644, "grad_norm": 0.26395687460899353, "learning_rate": 2.3371142904724923e-06, "loss": 0.2808, "step": 34962 }, { "epoch": 3.554595363969093, "grad_norm": 0.27634039521217346, "learning_rate": 2.3368139282634128e-06, "loss": 0.3311, "step": 34963 }, { "epoch": 3.554697031313542, "grad_norm": 0.28415876626968384, "learning_rate": 2.3365135794708294e-06, "loss": 0.3142, "step": 34964 }, { "epoch": 3.554798698657991, "grad_norm": 0.2628996670246124, "learning_rate": 2.3362132440962597e-06, "loss": 0.317, "step": 34965 }, { "epoch": 3.55490036600244, "grad_norm": 0.27837690711021423, "learning_rate": 2.335912922141215e-06, "loss": 0.3393, "step": 34966 }, { "epoch": 3.555002033346889, "grad_norm": 0.3253495693206787, "learning_rate": 2.335612613607208e-06, "loss": 0.3275, "step": 34967 }, { "epoch": 3.5551037006913377, "grad_norm": 0.2731666564941406, "learning_rate": 2.335312318495749e-06, "loss": 0.3142, "step": 34968 }, { "epoch": 3.5552053680357867, "grad_norm": 0.2772783041000366, "learning_rate": 2.3350120368083533e-06, "loss": 0.3057, "step": 34969 }, { "epoch": 3.5553070353802356, "grad_norm": 0.27611231803894043, "learning_rate": 2.3347117685465376e-06, "loss": 0.312, "step": 34970 }, { "epoch": 3.5554087027246846, "grad_norm": 0.2843903601169586, "learning_rate": 2.334411513711807e-06, "loss": 0.294, "step": 34971 }, { "epoch": 3.555510370069134, "grad_norm": 0.2906455099582672, "learning_rate": 2.334111272305679e-06, "loss": 0.3372, "step": 34972 }, { "epoch": 3.555612037413583, "grad_norm": 0.2811879813671112, "learning_rate": 2.333811044329666e-06, "loss": 0.3169, "step": 34973 }, { "epoch": 3.555713704758032, "grad_norm": 0.27914997935295105, "learning_rate": 2.3335108297852766e-06, "loss": 0.3049, "step": 34974 }, { "epoch": 3.5558153721024808, "grad_norm": 0.2857802212238312, "learning_rate": 2.3332106286740286e-06, "loss": 0.3301, "step": 34975 }, { "epoch": 3.5559170394469297, "grad_norm": 0.2947220504283905, "learning_rate": 2.332910440997431e-06, "loss": 0.3031, "step": 34976 }, { "epoch": 3.5560187067913787, "grad_norm": 0.285523384809494, "learning_rate": 2.3326102667569972e-06, "loss": 0.2935, "step": 34977 }, { "epoch": 3.5561203741358276, "grad_norm": 0.2860433757305145, "learning_rate": 2.332310105954237e-06, "loss": 0.2915, "step": 34978 }, { "epoch": 3.5562220414802765, "grad_norm": 0.26790231466293335, "learning_rate": 2.3320099585906646e-06, "loss": 0.3212, "step": 34979 }, { "epoch": 3.5563237088247255, "grad_norm": 0.2661851942539215, "learning_rate": 2.3317098246677962e-06, "loss": 0.2883, "step": 34980 }, { "epoch": 3.5564253761691744, "grad_norm": 0.2776280343532562, "learning_rate": 2.3314097041871353e-06, "loss": 0.3299, "step": 34981 }, { "epoch": 3.5565270435136234, "grad_norm": 0.2660524845123291, "learning_rate": 2.3311095971502e-06, "loss": 0.3249, "step": 34982 }, { "epoch": 3.5566287108580723, "grad_norm": 0.27384111285209656, "learning_rate": 2.3308095035585005e-06, "loss": 0.2829, "step": 34983 }, { "epoch": 3.5567303782025212, "grad_norm": 0.26521924138069153, "learning_rate": 2.330509423413547e-06, "loss": 0.3019, "step": 34984 }, { "epoch": 3.55683204554697, "grad_norm": 0.2809787690639496, "learning_rate": 2.330209356716854e-06, "loss": 0.3225, "step": 34985 }, { "epoch": 3.556933712891419, "grad_norm": 0.2838449776172638, "learning_rate": 2.3299093034699317e-06, "loss": 0.2865, "step": 34986 }, { "epoch": 3.5570353802358685, "grad_norm": 0.2808893322944641, "learning_rate": 2.329609263674292e-06, "loss": 0.292, "step": 34987 }, { "epoch": 3.5571370475803175, "grad_norm": 0.2651686668395996, "learning_rate": 2.3293092373314442e-06, "loss": 0.2833, "step": 34988 }, { "epoch": 3.5572387149247664, "grad_norm": 0.2746618688106537, "learning_rate": 2.3290092244429043e-06, "loss": 0.3348, "step": 34989 }, { "epoch": 3.5573403822692153, "grad_norm": 0.25710025429725647, "learning_rate": 2.3287092250101806e-06, "loss": 0.327, "step": 34990 }, { "epoch": 3.5574420496136643, "grad_norm": 0.26811426877975464, "learning_rate": 2.3284092390347836e-06, "loss": 0.3216, "step": 34991 }, { "epoch": 3.5575437169581132, "grad_norm": 0.28703299164772034, "learning_rate": 2.328109266518227e-06, "loss": 0.2764, "step": 34992 }, { "epoch": 3.557645384302562, "grad_norm": 0.25622573494911194, "learning_rate": 2.3278093074620217e-06, "loss": 0.3407, "step": 34993 }, { "epoch": 3.557747051647011, "grad_norm": 0.28035593032836914, "learning_rate": 2.3275093618676765e-06, "loss": 0.3186, "step": 34994 }, { "epoch": 3.55784871899146, "grad_norm": 0.2826726734638214, "learning_rate": 2.3272094297367054e-06, "loss": 0.2969, "step": 34995 }, { "epoch": 3.557950386335909, "grad_norm": 0.2945026159286499, "learning_rate": 2.326909511070618e-06, "loss": 0.3219, "step": 34996 }, { "epoch": 3.558052053680358, "grad_norm": 0.2760958671569824, "learning_rate": 2.3266096058709255e-06, "loss": 0.3207, "step": 34997 }, { "epoch": 3.558153721024807, "grad_norm": 0.28999146819114685, "learning_rate": 2.3263097141391365e-06, "loss": 0.3089, "step": 34998 }, { "epoch": 3.558255388369256, "grad_norm": 0.29075121879577637, "learning_rate": 2.326009835876766e-06, "loss": 0.3461, "step": 34999 }, { "epoch": 3.5583570557137048, "grad_norm": 0.27277877926826477, "learning_rate": 2.325709971085322e-06, "loss": 0.3556, "step": 35000 }, { "epoch": 3.5584587230581537, "grad_norm": 0.2760413587093353, "learning_rate": 2.325410119766314e-06, "loss": 0.3421, "step": 35001 }, { "epoch": 3.5585603904026026, "grad_norm": 0.2545149624347687, "learning_rate": 2.3251102819212562e-06, "loss": 0.3188, "step": 35002 }, { "epoch": 3.5586620577470516, "grad_norm": 0.2533832788467407, "learning_rate": 2.3248104575516568e-06, "loss": 0.3079, "step": 35003 }, { "epoch": 3.5587637250915005, "grad_norm": 0.2630881667137146, "learning_rate": 2.324510646659025e-06, "loss": 0.3413, "step": 35004 }, { "epoch": 3.5588653924359495, "grad_norm": 0.27655264735221863, "learning_rate": 2.3242108492448747e-06, "loss": 0.3187, "step": 35005 }, { "epoch": 3.5589670597803984, "grad_norm": 0.2817291021347046, "learning_rate": 2.3239110653107142e-06, "loss": 0.3006, "step": 35006 }, { "epoch": 3.5590687271248473, "grad_norm": 0.2765429615974426, "learning_rate": 2.3236112948580534e-06, "loss": 0.2815, "step": 35007 }, { "epoch": 3.5591703944692963, "grad_norm": 0.2622515857219696, "learning_rate": 2.323311537888402e-06, "loss": 0.331, "step": 35008 }, { "epoch": 3.5592720618137452, "grad_norm": 0.2684991955757141, "learning_rate": 2.3230117944032722e-06, "loss": 0.3184, "step": 35009 }, { "epoch": 3.559373729158194, "grad_norm": 0.2663934826850891, "learning_rate": 2.3227120644041727e-06, "loss": 0.3006, "step": 35010 }, { "epoch": 3.559475396502643, "grad_norm": 0.26605895161628723, "learning_rate": 2.3224123478926126e-06, "loss": 0.294, "step": 35011 }, { "epoch": 3.559577063847092, "grad_norm": 0.27784860134124756, "learning_rate": 2.322112644870104e-06, "loss": 0.3115, "step": 35012 }, { "epoch": 3.5596787311915414, "grad_norm": 0.29261696338653564, "learning_rate": 2.321812955338155e-06, "loss": 0.3123, "step": 35013 }, { "epoch": 3.5597803985359904, "grad_norm": 0.2778875529766083, "learning_rate": 2.3215132792982747e-06, "loss": 0.3033, "step": 35014 }, { "epoch": 3.5598820658804393, "grad_norm": 0.2794229984283447, "learning_rate": 2.321213616751976e-06, "loss": 0.3484, "step": 35015 }, { "epoch": 3.5599837332248883, "grad_norm": 0.2587544620037079, "learning_rate": 2.3209139677007665e-06, "loss": 0.3048, "step": 35016 }, { "epoch": 3.560085400569337, "grad_norm": 0.272874653339386, "learning_rate": 2.320614332146155e-06, "loss": 0.3322, "step": 35017 }, { "epoch": 3.560187067913786, "grad_norm": 0.3013145625591278, "learning_rate": 2.3203147100896506e-06, "loss": 0.3006, "step": 35018 }, { "epoch": 3.560288735258235, "grad_norm": 0.26694780588150024, "learning_rate": 2.3200151015327658e-06, "loss": 0.3238, "step": 35019 }, { "epoch": 3.560390402602684, "grad_norm": 0.2626884877681732, "learning_rate": 2.3197155064770076e-06, "loss": 0.3342, "step": 35020 }, { "epoch": 3.560492069947133, "grad_norm": 0.27021265029907227, "learning_rate": 2.3194159249238846e-06, "loss": 0.2996, "step": 35021 }, { "epoch": 3.560593737291582, "grad_norm": 0.2603849470615387, "learning_rate": 2.319116356874909e-06, "loss": 0.2878, "step": 35022 }, { "epoch": 3.560695404636031, "grad_norm": 0.285432368516922, "learning_rate": 2.318816802331587e-06, "loss": 0.323, "step": 35023 }, { "epoch": 3.56079707198048, "grad_norm": 0.2741103172302246, "learning_rate": 2.31851726129543e-06, "loss": 0.3167, "step": 35024 }, { "epoch": 3.5608987393249287, "grad_norm": 0.2761388421058655, "learning_rate": 2.318217733767943e-06, "loss": 0.3223, "step": 35025 }, { "epoch": 3.5610004066693777, "grad_norm": 0.2956558167934418, "learning_rate": 2.31791821975064e-06, "loss": 0.3047, "step": 35026 }, { "epoch": 3.5611020740138266, "grad_norm": 0.28254908323287964, "learning_rate": 2.317618719245028e-06, "loss": 0.3026, "step": 35027 }, { "epoch": 3.561203741358276, "grad_norm": 0.26317286491394043, "learning_rate": 2.3173192322526133e-06, "loss": 0.3409, "step": 35028 }, { "epoch": 3.561305408702725, "grad_norm": 0.253129243850708, "learning_rate": 2.317019758774909e-06, "loss": 0.3311, "step": 35029 }, { "epoch": 3.561407076047174, "grad_norm": 0.2915189862251282, "learning_rate": 2.3167202988134213e-06, "loss": 0.32, "step": 35030 }, { "epoch": 3.561508743391623, "grad_norm": 0.2813120186328888, "learning_rate": 2.316420852369657e-06, "loss": 0.3136, "step": 35031 }, { "epoch": 3.5616104107360718, "grad_norm": 0.2555304765701294, "learning_rate": 2.3161214194451285e-06, "loss": 0.3279, "step": 35032 }, { "epoch": 3.5617120780805207, "grad_norm": 0.2736800014972687, "learning_rate": 2.3158220000413434e-06, "loss": 0.281, "step": 35033 }, { "epoch": 3.5618137454249696, "grad_norm": 0.2841853201389313, "learning_rate": 2.3155225941598086e-06, "loss": 0.3092, "step": 35034 }, { "epoch": 3.5619154127694186, "grad_norm": 0.28086909651756287, "learning_rate": 2.315223201802031e-06, "loss": 0.3204, "step": 35035 }, { "epoch": 3.5620170801138675, "grad_norm": 0.26387494802474976, "learning_rate": 2.314923822969523e-06, "loss": 0.326, "step": 35036 }, { "epoch": 3.5621187474583165, "grad_norm": 0.25564873218536377, "learning_rate": 2.3146244576637904e-06, "loss": 0.3085, "step": 35037 }, { "epoch": 3.5622204148027654, "grad_norm": 0.296812504529953, "learning_rate": 2.3143251058863403e-06, "loss": 0.3271, "step": 35038 }, { "epoch": 3.5623220821472144, "grad_norm": 0.25767961144447327, "learning_rate": 2.3140257676386834e-06, "loss": 0.3195, "step": 35039 }, { "epoch": 3.5624237494916633, "grad_norm": 0.27090415358543396, "learning_rate": 2.3137264429223274e-06, "loss": 0.3119, "step": 35040 }, { "epoch": 3.5625254168361122, "grad_norm": 0.26723480224609375, "learning_rate": 2.3134271317387762e-06, "loss": 0.2872, "step": 35041 }, { "epoch": 3.562627084180561, "grad_norm": 0.2814904749393463, "learning_rate": 2.313127834089543e-06, "loss": 0.285, "step": 35042 }, { "epoch": 3.56272875152501, "grad_norm": 0.2777261435985565, "learning_rate": 2.3128285499761333e-06, "loss": 0.3017, "step": 35043 }, { "epoch": 3.562830418869459, "grad_norm": 0.25932788848876953, "learning_rate": 2.312529279400055e-06, "loss": 0.3295, "step": 35044 }, { "epoch": 3.562932086213908, "grad_norm": 0.2626888155937195, "learning_rate": 2.3122300223628138e-06, "loss": 0.3142, "step": 35045 }, { "epoch": 3.563033753558357, "grad_norm": 0.27153143286705017, "learning_rate": 2.31193077886592e-06, "loss": 0.297, "step": 35046 }, { "epoch": 3.563135420902806, "grad_norm": 0.2835026979446411, "learning_rate": 2.311631548910881e-06, "loss": 0.34, "step": 35047 }, { "epoch": 3.563237088247255, "grad_norm": 0.28252679109573364, "learning_rate": 2.311332332499201e-06, "loss": 0.3014, "step": 35048 }, { "epoch": 3.5633387555917038, "grad_norm": 0.26467418670654297, "learning_rate": 2.3110331296323925e-06, "loss": 0.3164, "step": 35049 }, { "epoch": 3.5634404229361527, "grad_norm": 0.2639031410217285, "learning_rate": 2.3107339403119593e-06, "loss": 0.2718, "step": 35050 }, { "epoch": 3.5635420902806016, "grad_norm": 0.27157461643218994, "learning_rate": 2.3104347645394078e-06, "loss": 0.3485, "step": 35051 }, { "epoch": 3.5636437576250506, "grad_norm": 0.30421072244644165, "learning_rate": 2.3101356023162485e-06, "loss": 0.3062, "step": 35052 }, { "epoch": 3.5637454249694995, "grad_norm": 0.26312556862831116, "learning_rate": 2.309836453643987e-06, "loss": 0.2874, "step": 35053 }, { "epoch": 3.563847092313949, "grad_norm": 0.2795485556125641, "learning_rate": 2.3095373185241306e-06, "loss": 0.2847, "step": 35054 }, { "epoch": 3.563948759658398, "grad_norm": 0.257381409406662, "learning_rate": 2.3092381969581835e-06, "loss": 0.3144, "step": 35055 }, { "epoch": 3.564050427002847, "grad_norm": 0.28287824988365173, "learning_rate": 2.3089390889476566e-06, "loss": 0.3171, "step": 35056 }, { "epoch": 3.5641520943472957, "grad_norm": 0.3075668513774872, "learning_rate": 2.308639994494055e-06, "loss": 0.313, "step": 35057 }, { "epoch": 3.5642537616917447, "grad_norm": 0.2858752906322479, "learning_rate": 2.3083409135988845e-06, "loss": 0.3001, "step": 35058 }, { "epoch": 3.5643554290361936, "grad_norm": 0.2787546217441559, "learning_rate": 2.3080418462636557e-06, "loss": 0.2961, "step": 35059 }, { "epoch": 3.5644570963806426, "grad_norm": 0.2798120975494385, "learning_rate": 2.3077427924898694e-06, "loss": 0.2894, "step": 35060 }, { "epoch": 3.5645587637250915, "grad_norm": 0.28404831886291504, "learning_rate": 2.3074437522790343e-06, "loss": 0.3105, "step": 35061 }, { "epoch": 3.5646604310695404, "grad_norm": 0.27225565910339355, "learning_rate": 2.30714472563266e-06, "loss": 0.2923, "step": 35062 }, { "epoch": 3.5647620984139894, "grad_norm": 0.2666282653808594, "learning_rate": 2.306845712552251e-06, "loss": 0.3238, "step": 35063 }, { "epoch": 3.5648637657584383, "grad_norm": 0.26148754358291626, "learning_rate": 2.3065467130393127e-06, "loss": 0.3206, "step": 35064 }, { "epoch": 3.5649654331028873, "grad_norm": 0.27302974462509155, "learning_rate": 2.3062477270953506e-06, "loss": 0.297, "step": 35065 }, { "epoch": 3.565067100447336, "grad_norm": 0.26619449257850647, "learning_rate": 2.305948754721874e-06, "loss": 0.333, "step": 35066 }, { "epoch": 3.565168767791785, "grad_norm": 0.2594817578792572, "learning_rate": 2.305649795920387e-06, "loss": 0.3303, "step": 35067 }, { "epoch": 3.565270435136234, "grad_norm": 0.28316956758499146, "learning_rate": 2.305350850692395e-06, "loss": 0.3055, "step": 35068 }, { "epoch": 3.5653721024806835, "grad_norm": 0.26518452167510986, "learning_rate": 2.305051919039408e-06, "loss": 0.3062, "step": 35069 }, { "epoch": 3.5654737698251324, "grad_norm": 0.29645606875419617, "learning_rate": 2.304753000962926e-06, "loss": 0.2977, "step": 35070 }, { "epoch": 3.5655754371695814, "grad_norm": 0.25807416439056396, "learning_rate": 2.3044540964644574e-06, "loss": 0.3212, "step": 35071 }, { "epoch": 3.5656771045140303, "grad_norm": 0.2982926070690155, "learning_rate": 2.3041552055455107e-06, "loss": 0.2765, "step": 35072 }, { "epoch": 3.5657787718584792, "grad_norm": 0.2695833146572113, "learning_rate": 2.303856328207589e-06, "loss": 0.2997, "step": 35073 }, { "epoch": 3.565880439202928, "grad_norm": 0.253356009721756, "learning_rate": 2.3035574644521993e-06, "loss": 0.2937, "step": 35074 }, { "epoch": 3.565982106547377, "grad_norm": 0.26206302642822266, "learning_rate": 2.303258614280843e-06, "loss": 0.3011, "step": 35075 }, { "epoch": 3.566083773891826, "grad_norm": 0.2711605131626129, "learning_rate": 2.302959777695032e-06, "loss": 0.3075, "step": 35076 }, { "epoch": 3.566185441236275, "grad_norm": 0.256728857755661, "learning_rate": 2.3026609546962687e-06, "loss": 0.3183, "step": 35077 }, { "epoch": 3.566287108580724, "grad_norm": 0.26829469203948975, "learning_rate": 2.302362145286056e-06, "loss": 0.3113, "step": 35078 }, { "epoch": 3.566388775925173, "grad_norm": 0.2467133104801178, "learning_rate": 2.3020633494659057e-06, "loss": 0.3064, "step": 35079 }, { "epoch": 3.566490443269622, "grad_norm": 0.27782535552978516, "learning_rate": 2.301764567237316e-06, "loss": 0.2914, "step": 35080 }, { "epoch": 3.5665921106140708, "grad_norm": 0.29148826003074646, "learning_rate": 2.301465798601794e-06, "loss": 0.3248, "step": 35081 }, { "epoch": 3.5666937779585197, "grad_norm": 0.27963176369667053, "learning_rate": 2.3011670435608507e-06, "loss": 0.3142, "step": 35082 }, { "epoch": 3.5667954453029687, "grad_norm": 0.24672886729240417, "learning_rate": 2.300868302115983e-06, "loss": 0.3036, "step": 35083 }, { "epoch": 3.5668971126474176, "grad_norm": 0.28814271092414856, "learning_rate": 2.3005695742687005e-06, "loss": 0.3043, "step": 35084 }, { "epoch": 3.5669987799918665, "grad_norm": 0.26757195591926575, "learning_rate": 2.3002708600205054e-06, "loss": 0.3099, "step": 35085 }, { "epoch": 3.5671004473363155, "grad_norm": 0.2718897759914398, "learning_rate": 2.2999721593729057e-06, "loss": 0.3184, "step": 35086 }, { "epoch": 3.5672021146807644, "grad_norm": 0.26956668496131897, "learning_rate": 2.299673472327405e-06, "loss": 0.2885, "step": 35087 }, { "epoch": 3.5673037820252134, "grad_norm": 0.27104634046554565, "learning_rate": 2.299374798885506e-06, "loss": 0.3013, "step": 35088 }, { "epoch": 3.5674054493696623, "grad_norm": 0.2615048587322235, "learning_rate": 2.299076139048719e-06, "loss": 0.3029, "step": 35089 }, { "epoch": 3.5675071167141112, "grad_norm": 0.2718607187271118, "learning_rate": 2.2987774928185396e-06, "loss": 0.3259, "step": 35090 }, { "epoch": 3.56760878405856, "grad_norm": 0.2745952904224396, "learning_rate": 2.298478860196478e-06, "loss": 0.3285, "step": 35091 }, { "epoch": 3.567710451403009, "grad_norm": 0.2504449784755707, "learning_rate": 2.298180241184042e-06, "loss": 0.3129, "step": 35092 }, { "epoch": 3.567812118747458, "grad_norm": 0.2820439636707306, "learning_rate": 2.2978816357827275e-06, "loss": 0.3214, "step": 35093 }, { "epoch": 3.567913786091907, "grad_norm": 0.2794201076030731, "learning_rate": 2.2975830439940455e-06, "loss": 0.3037, "step": 35094 }, { "epoch": 3.5680154534363564, "grad_norm": 0.24312527477741241, "learning_rate": 2.2972844658194983e-06, "loss": 0.3027, "step": 35095 }, { "epoch": 3.5681171207808053, "grad_norm": 0.2927928864955902, "learning_rate": 2.2969859012605873e-06, "loss": 0.2406, "step": 35096 }, { "epoch": 3.5682187881252543, "grad_norm": 0.28501641750335693, "learning_rate": 2.2966873503188207e-06, "loss": 0.2988, "step": 35097 }, { "epoch": 3.5683204554697032, "grad_norm": 0.28897786140441895, "learning_rate": 2.296388812995699e-06, "loss": 0.3187, "step": 35098 }, { "epoch": 3.568422122814152, "grad_norm": 0.2710844576358795, "learning_rate": 2.2960902892927318e-06, "loss": 0.3095, "step": 35099 }, { "epoch": 3.568523790158601, "grad_norm": 0.2803539037704468, "learning_rate": 2.295791779211416e-06, "loss": 0.3163, "step": 35100 }, { "epoch": 3.56862545750305, "grad_norm": 0.2926568388938904, "learning_rate": 2.2954932827532578e-06, "loss": 0.3284, "step": 35101 }, { "epoch": 3.568727124847499, "grad_norm": 0.2637988030910492, "learning_rate": 2.295194799919766e-06, "loss": 0.3067, "step": 35102 }, { "epoch": 3.568828792191948, "grad_norm": 0.2936205565929413, "learning_rate": 2.2948963307124363e-06, "loss": 0.3123, "step": 35103 }, { "epoch": 3.568930459536397, "grad_norm": 0.2533715069293976, "learning_rate": 2.2945978751327775e-06, "loss": 0.3281, "step": 35104 }, { "epoch": 3.569032126880846, "grad_norm": 0.2654716670513153, "learning_rate": 2.2942994331822914e-06, "loss": 0.3209, "step": 35105 }, { "epoch": 3.5691337942252948, "grad_norm": 0.28103259205818176, "learning_rate": 2.294001004862481e-06, "loss": 0.3215, "step": 35106 }, { "epoch": 3.5692354615697437, "grad_norm": 0.28448450565338135, "learning_rate": 2.2937025901748518e-06, "loss": 0.3065, "step": 35107 }, { "epoch": 3.5693371289141926, "grad_norm": 0.27759382128715515, "learning_rate": 2.293404189120904e-06, "loss": 0.2952, "step": 35108 }, { "epoch": 3.5694387962586416, "grad_norm": 0.26608049869537354, "learning_rate": 2.293105801702146e-06, "loss": 0.3289, "step": 35109 }, { "epoch": 3.569540463603091, "grad_norm": 0.2650092542171478, "learning_rate": 2.2928074279200745e-06, "loss": 0.2948, "step": 35110 }, { "epoch": 3.56964213094754, "grad_norm": 0.27335432171821594, "learning_rate": 2.2925090677761956e-06, "loss": 0.2822, "step": 35111 }, { "epoch": 3.569743798291989, "grad_norm": 0.2891707122325897, "learning_rate": 2.2922107212720167e-06, "loss": 0.318, "step": 35112 }, { "epoch": 3.569845465636438, "grad_norm": 0.28749069571495056, "learning_rate": 2.2919123884090327e-06, "loss": 0.2954, "step": 35113 }, { "epoch": 3.5699471329808867, "grad_norm": 0.291713148355484, "learning_rate": 2.2916140691887527e-06, "loss": 0.322, "step": 35114 }, { "epoch": 3.5700488003253357, "grad_norm": 0.27496278285980225, "learning_rate": 2.2913157636126772e-06, "loss": 0.3065, "step": 35115 }, { "epoch": 3.5701504676697846, "grad_norm": 0.27835986018180847, "learning_rate": 2.2910174716823076e-06, "loss": 0.307, "step": 35116 }, { "epoch": 3.5702521350142336, "grad_norm": 0.2664163410663605, "learning_rate": 2.2907191933991495e-06, "loss": 0.3, "step": 35117 }, { "epoch": 3.5703538023586825, "grad_norm": 0.27561625838279724, "learning_rate": 2.290420928764705e-06, "loss": 0.3135, "step": 35118 }, { "epoch": 3.5704554697031314, "grad_norm": 0.287399560213089, "learning_rate": 2.290122677780475e-06, "loss": 0.286, "step": 35119 }, { "epoch": 3.5705571370475804, "grad_norm": 0.27681124210357666, "learning_rate": 2.289824440447962e-06, "loss": 0.3293, "step": 35120 }, { "epoch": 3.5706588043920293, "grad_norm": 0.2719043493270874, "learning_rate": 2.2895262167686693e-06, "loss": 0.3017, "step": 35121 }, { "epoch": 3.5707604717364783, "grad_norm": 0.2649736702442169, "learning_rate": 2.289228006744103e-06, "loss": 0.2863, "step": 35122 }, { "epoch": 3.570862139080927, "grad_norm": 0.26640090346336365, "learning_rate": 2.288929810375758e-06, "loss": 0.3199, "step": 35123 }, { "epoch": 3.570963806425376, "grad_norm": 0.27019384503364563, "learning_rate": 2.288631627665142e-06, "loss": 0.314, "step": 35124 }, { "epoch": 3.571065473769825, "grad_norm": 0.2899222671985626, "learning_rate": 2.288333458613756e-06, "loss": 0.2878, "step": 35125 }, { "epoch": 3.571167141114274, "grad_norm": 0.28560376167297363, "learning_rate": 2.2880353032230994e-06, "loss": 0.3166, "step": 35126 }, { "epoch": 3.571268808458723, "grad_norm": 0.28358107805252075, "learning_rate": 2.2877371614946793e-06, "loss": 0.3587, "step": 35127 }, { "epoch": 3.571370475803172, "grad_norm": 0.2656268775463104, "learning_rate": 2.287439033429994e-06, "loss": 0.3191, "step": 35128 }, { "epoch": 3.571472143147621, "grad_norm": 0.2595437467098236, "learning_rate": 2.287140919030546e-06, "loss": 0.3105, "step": 35129 }, { "epoch": 3.57157381049207, "grad_norm": 0.27378609776496887, "learning_rate": 2.286842818297836e-06, "loss": 0.2906, "step": 35130 }, { "epoch": 3.5716754778365187, "grad_norm": 0.2674550414085388, "learning_rate": 2.286544731233367e-06, "loss": 0.3142, "step": 35131 }, { "epoch": 3.5717771451809677, "grad_norm": 0.2751566469669342, "learning_rate": 2.2862466578386443e-06, "loss": 0.3037, "step": 35132 }, { "epoch": 3.5718788125254166, "grad_norm": 0.26590725779533386, "learning_rate": 2.285948598115163e-06, "loss": 0.2974, "step": 35133 }, { "epoch": 3.5719804798698656, "grad_norm": 0.2857479453086853, "learning_rate": 2.285650552064429e-06, "loss": 0.3191, "step": 35134 }, { "epoch": 3.5720821472143145, "grad_norm": 0.28092440962791443, "learning_rate": 2.2853525196879428e-06, "loss": 0.3344, "step": 35135 }, { "epoch": 3.572183814558764, "grad_norm": 0.28598731756210327, "learning_rate": 2.2850545009872034e-06, "loss": 0.3134, "step": 35136 }, { "epoch": 3.572285481903213, "grad_norm": 0.2581007182598114, "learning_rate": 2.2847564959637164e-06, "loss": 0.2996, "step": 35137 }, { "epoch": 3.5723871492476618, "grad_norm": 0.28558632731437683, "learning_rate": 2.2844585046189805e-06, "loss": 0.3263, "step": 35138 }, { "epoch": 3.5724888165921107, "grad_norm": 0.24724459648132324, "learning_rate": 2.2841605269544975e-06, "loss": 0.3094, "step": 35139 }, { "epoch": 3.5725904839365596, "grad_norm": 0.2600859999656677, "learning_rate": 2.2838625629717664e-06, "loss": 0.3206, "step": 35140 }, { "epoch": 3.5726921512810086, "grad_norm": 0.28134533762931824, "learning_rate": 2.2835646126722926e-06, "loss": 0.3125, "step": 35141 }, { "epoch": 3.5727938186254575, "grad_norm": 0.29490551352500916, "learning_rate": 2.2832666760575746e-06, "loss": 0.3257, "step": 35142 }, { "epoch": 3.5728954859699065, "grad_norm": 0.26657694578170776, "learning_rate": 2.282968753129111e-06, "loss": 0.2995, "step": 35143 }, { "epoch": 3.5729971533143554, "grad_norm": 0.2617611289024353, "learning_rate": 2.2826708438884075e-06, "loss": 0.2748, "step": 35144 }, { "epoch": 3.5730988206588044, "grad_norm": 0.2799907624721527, "learning_rate": 2.282372948336963e-06, "loss": 0.2794, "step": 35145 }, { "epoch": 3.5732004880032533, "grad_norm": 0.2864731550216675, "learning_rate": 2.2820750664762753e-06, "loss": 0.3168, "step": 35146 }, { "epoch": 3.5733021553477022, "grad_norm": 0.2861260175704956, "learning_rate": 2.2817771983078495e-06, "loss": 0.3161, "step": 35147 }, { "epoch": 3.573403822692151, "grad_norm": 0.27752768993377686, "learning_rate": 2.2814793438331846e-06, "loss": 0.3054, "step": 35148 }, { "epoch": 3.5735054900366, "grad_norm": 0.287412166595459, "learning_rate": 2.2811815030537804e-06, "loss": 0.35, "step": 35149 }, { "epoch": 3.573607157381049, "grad_norm": 0.2806816101074219, "learning_rate": 2.280883675971136e-06, "loss": 0.3343, "step": 35150 }, { "epoch": 3.5737088247254984, "grad_norm": 0.31503069400787354, "learning_rate": 2.280585862586755e-06, "loss": 0.3131, "step": 35151 }, { "epoch": 3.5738104920699474, "grad_norm": 0.27099716663360596, "learning_rate": 2.2802880629021368e-06, "loss": 0.3089, "step": 35152 }, { "epoch": 3.5739121594143963, "grad_norm": 0.25772950053215027, "learning_rate": 2.279990276918779e-06, "loss": 0.3122, "step": 35153 }, { "epoch": 3.5740138267588453, "grad_norm": 0.30127596855163574, "learning_rate": 2.279692504638185e-06, "loss": 0.3177, "step": 35154 }, { "epoch": 3.574115494103294, "grad_norm": 0.27516719698905945, "learning_rate": 2.279394746061854e-06, "loss": 0.3279, "step": 35155 }, { "epoch": 3.574217161447743, "grad_norm": 0.2683914601802826, "learning_rate": 2.2790970011912845e-06, "loss": 0.3117, "step": 35156 }, { "epoch": 3.574318828792192, "grad_norm": 0.26879844069480896, "learning_rate": 2.2787992700279794e-06, "loss": 0.3321, "step": 35157 }, { "epoch": 3.574420496136641, "grad_norm": 0.27360981702804565, "learning_rate": 2.2785015525734362e-06, "loss": 0.3351, "step": 35158 }, { "epoch": 3.57452216348109, "grad_norm": 0.2869687080383301, "learning_rate": 2.2782038488291564e-06, "loss": 0.328, "step": 35159 }, { "epoch": 3.574623830825539, "grad_norm": 0.28142663836479187, "learning_rate": 2.277906158796636e-06, "loss": 0.3271, "step": 35160 }, { "epoch": 3.574725498169988, "grad_norm": 0.28511494398117065, "learning_rate": 2.2776084824773802e-06, "loss": 0.3096, "step": 35161 }, { "epoch": 3.574827165514437, "grad_norm": 0.27867406606674194, "learning_rate": 2.2773108198728854e-06, "loss": 0.3167, "step": 35162 }, { "epoch": 3.5749288328588857, "grad_norm": 0.27334874868392944, "learning_rate": 2.27701317098465e-06, "loss": 0.3417, "step": 35163 }, { "epoch": 3.5750305002033347, "grad_norm": 0.2977749705314636, "learning_rate": 2.276715535814177e-06, "loss": 0.3149, "step": 35164 }, { "epoch": 3.5751321675477836, "grad_norm": 0.27748575806617737, "learning_rate": 2.2764179143629637e-06, "loss": 0.3112, "step": 35165 }, { "epoch": 3.5752338348922326, "grad_norm": 0.2507520616054535, "learning_rate": 2.276120306632508e-06, "loss": 0.3188, "step": 35166 }, { "epoch": 3.5753355022366815, "grad_norm": 0.27582651376724243, "learning_rate": 2.2758227126243134e-06, "loss": 0.288, "step": 35167 }, { "epoch": 3.5754371695811304, "grad_norm": 0.2729548215866089, "learning_rate": 2.2755251323398764e-06, "loss": 0.3095, "step": 35168 }, { "epoch": 3.5755388369255794, "grad_norm": 0.26373088359832764, "learning_rate": 2.275227565780696e-06, "loss": 0.3063, "step": 35169 }, { "epoch": 3.5756405042700283, "grad_norm": 0.25325697660446167, "learning_rate": 2.27493001294827e-06, "loss": 0.3339, "step": 35170 }, { "epoch": 3.5757421716144773, "grad_norm": 0.2717563211917877, "learning_rate": 2.2746324738441007e-06, "loss": 0.294, "step": 35171 }, { "epoch": 3.575843838958926, "grad_norm": 0.3021791875362396, "learning_rate": 2.2743349484696854e-06, "loss": 0.3216, "step": 35172 }, { "epoch": 3.575945506303375, "grad_norm": 0.2782069742679596, "learning_rate": 2.2740374368265204e-06, "loss": 0.3033, "step": 35173 }, { "epoch": 3.576047173647824, "grad_norm": 0.26616111397743225, "learning_rate": 2.2737399389161095e-06, "loss": 0.3156, "step": 35174 }, { "epoch": 3.576148840992273, "grad_norm": 0.2759914994239807, "learning_rate": 2.273442454739949e-06, "loss": 0.2875, "step": 35175 }, { "epoch": 3.576250508336722, "grad_norm": 0.2715624272823334, "learning_rate": 2.273144984299536e-06, "loss": 0.3058, "step": 35176 }, { "epoch": 3.5763521756811714, "grad_norm": 0.2520339787006378, "learning_rate": 2.272847527596372e-06, "loss": 0.3001, "step": 35177 }, { "epoch": 3.5764538430256203, "grad_norm": 0.27529969811439514, "learning_rate": 2.2725500846319538e-06, "loss": 0.2909, "step": 35178 }, { "epoch": 3.5765555103700692, "grad_norm": 0.26855865120887756, "learning_rate": 2.2722526554077804e-06, "loss": 0.315, "step": 35179 }, { "epoch": 3.576657177714518, "grad_norm": 0.2648707926273346, "learning_rate": 2.271955239925348e-06, "loss": 0.3025, "step": 35180 }, { "epoch": 3.576758845058967, "grad_norm": 0.26862961053848267, "learning_rate": 2.2716578381861587e-06, "loss": 0.32, "step": 35181 }, { "epoch": 3.576860512403416, "grad_norm": 0.29095566272735596, "learning_rate": 2.2713604501917093e-06, "loss": 0.3157, "step": 35182 }, { "epoch": 3.576962179747865, "grad_norm": 0.29306817054748535, "learning_rate": 2.271063075943495e-06, "loss": 0.3017, "step": 35183 }, { "epoch": 3.577063847092314, "grad_norm": 0.31143030524253845, "learning_rate": 2.2707657154430183e-06, "loss": 0.3202, "step": 35184 }, { "epoch": 3.577165514436763, "grad_norm": 0.30648425221443176, "learning_rate": 2.2704683686917758e-06, "loss": 0.3262, "step": 35185 }, { "epoch": 3.577267181781212, "grad_norm": 0.2558901309967041, "learning_rate": 2.270171035691263e-06, "loss": 0.3157, "step": 35186 }, { "epoch": 3.5773688491256608, "grad_norm": 0.279649019241333, "learning_rate": 2.2698737164429817e-06, "loss": 0.2926, "step": 35187 }, { "epoch": 3.5774705164701097, "grad_norm": 0.2705981731414795, "learning_rate": 2.2695764109484275e-06, "loss": 0.3033, "step": 35188 }, { "epoch": 3.5775721838145587, "grad_norm": 0.2607850730419159, "learning_rate": 2.269279119209099e-06, "loss": 0.2976, "step": 35189 }, { "epoch": 3.5776738511590076, "grad_norm": 0.2900301218032837, "learning_rate": 2.2689818412264914e-06, "loss": 0.3712, "step": 35190 }, { "epoch": 3.5777755185034565, "grad_norm": 0.2683696150779724, "learning_rate": 2.268684577002106e-06, "loss": 0.2983, "step": 35191 }, { "epoch": 3.577877185847906, "grad_norm": 0.2840741276741028, "learning_rate": 2.2683873265374383e-06, "loss": 0.3047, "step": 35192 }, { "epoch": 3.577978853192355, "grad_norm": 0.2591818571090698, "learning_rate": 2.2680900898339847e-06, "loss": 0.3159, "step": 35193 }, { "epoch": 3.578080520536804, "grad_norm": 0.28094717860221863, "learning_rate": 2.2677928668932457e-06, "loss": 0.2921, "step": 35194 }, { "epoch": 3.5781821878812528, "grad_norm": 0.26776817440986633, "learning_rate": 2.2674956577167175e-06, "loss": 0.3037, "step": 35195 }, { "epoch": 3.5782838552257017, "grad_norm": 0.28434979915618896, "learning_rate": 2.2671984623058947e-06, "loss": 0.3327, "step": 35196 }, { "epoch": 3.5783855225701506, "grad_norm": 0.25707027316093445, "learning_rate": 2.266901280662278e-06, "loss": 0.2884, "step": 35197 }, { "epoch": 3.5784871899145996, "grad_norm": 0.2610437870025635, "learning_rate": 2.2666041127873634e-06, "loss": 0.3213, "step": 35198 }, { "epoch": 3.5785888572590485, "grad_norm": 0.28445398807525635, "learning_rate": 2.2663069586826483e-06, "loss": 0.3104, "step": 35199 }, { "epoch": 3.5786905246034975, "grad_norm": 0.2632160186767578, "learning_rate": 2.2660098183496266e-06, "loss": 0.3071, "step": 35200 }, { "epoch": 3.5787921919479464, "grad_norm": 0.26707392930984497, "learning_rate": 2.2657126917898e-06, "loss": 0.3185, "step": 35201 }, { "epoch": 3.5788938592923953, "grad_norm": 0.248297318816185, "learning_rate": 2.2654155790046627e-06, "loss": 0.3152, "step": 35202 }, { "epoch": 3.5789955266368443, "grad_norm": 0.2678861618041992, "learning_rate": 2.26511847999571e-06, "loss": 0.3145, "step": 35203 }, { "epoch": 3.5790971939812932, "grad_norm": 0.26949045062065125, "learning_rate": 2.264821394764443e-06, "loss": 0.3159, "step": 35204 }, { "epoch": 3.579198861325742, "grad_norm": 0.28100356459617615, "learning_rate": 2.2645243233123547e-06, "loss": 0.3263, "step": 35205 }, { "epoch": 3.579300528670191, "grad_norm": 0.3104449212551117, "learning_rate": 2.264227265640942e-06, "loss": 0.2974, "step": 35206 }, { "epoch": 3.57940219601464, "grad_norm": 0.2939590811729431, "learning_rate": 2.263930221751704e-06, "loss": 0.3279, "step": 35207 }, { "epoch": 3.579503863359089, "grad_norm": 0.25297507643699646, "learning_rate": 2.2636331916461346e-06, "loss": 0.3058, "step": 35208 }, { "epoch": 3.579605530703538, "grad_norm": 0.28363823890686035, "learning_rate": 2.2633361753257316e-06, "loss": 0.3065, "step": 35209 }, { "epoch": 3.579707198047987, "grad_norm": 0.2971702218055725, "learning_rate": 2.2630391727919885e-06, "loss": 0.3053, "step": 35210 }, { "epoch": 3.579808865392436, "grad_norm": 0.2827690541744232, "learning_rate": 2.2627421840464077e-06, "loss": 0.3125, "step": 35211 }, { "epoch": 3.5799105327368848, "grad_norm": 0.2807188928127289, "learning_rate": 2.262445209090477e-06, "loss": 0.2716, "step": 35212 }, { "epoch": 3.5800122000813337, "grad_norm": 0.27624592185020447, "learning_rate": 2.2621482479256974e-06, "loss": 0.3308, "step": 35213 }, { "epoch": 3.5801138674257826, "grad_norm": 0.2854222059249878, "learning_rate": 2.261851300553566e-06, "loss": 0.3009, "step": 35214 }, { "epoch": 3.5802155347702316, "grad_norm": 0.2721957862377167, "learning_rate": 2.2615543669755765e-06, "loss": 0.2818, "step": 35215 }, { "epoch": 3.5803172021146805, "grad_norm": 0.27855122089385986, "learning_rate": 2.2612574471932258e-06, "loss": 0.2787, "step": 35216 }, { "epoch": 3.5804188694591295, "grad_norm": 0.276960551738739, "learning_rate": 2.2609605412080075e-06, "loss": 0.3201, "step": 35217 }, { "epoch": 3.580520536803579, "grad_norm": 0.26583132147789, "learning_rate": 2.2606636490214206e-06, "loss": 0.277, "step": 35218 }, { "epoch": 3.580622204148028, "grad_norm": 0.282335102558136, "learning_rate": 2.2603667706349596e-06, "loss": 0.3039, "step": 35219 }, { "epoch": 3.5807238714924767, "grad_norm": 0.27767708897590637, "learning_rate": 2.260069906050118e-06, "loss": 0.3295, "step": 35220 }, { "epoch": 3.5808255388369257, "grad_norm": 0.2740849554538727, "learning_rate": 2.2597730552683965e-06, "loss": 0.3181, "step": 35221 }, { "epoch": 3.5809272061813746, "grad_norm": 0.2693738043308258, "learning_rate": 2.2594762182912837e-06, "loss": 0.3332, "step": 35222 }, { "epoch": 3.5810288735258236, "grad_norm": 0.2677033245563507, "learning_rate": 2.2591793951202785e-06, "loss": 0.3365, "step": 35223 }, { "epoch": 3.5811305408702725, "grad_norm": 0.2655327022075653, "learning_rate": 2.2588825857568776e-06, "loss": 0.3363, "step": 35224 }, { "epoch": 3.5812322082147214, "grad_norm": 0.2733680307865143, "learning_rate": 2.2585857902025753e-06, "loss": 0.3334, "step": 35225 }, { "epoch": 3.5813338755591704, "grad_norm": 0.2894379496574402, "learning_rate": 2.258289008458866e-06, "loss": 0.3468, "step": 35226 }, { "epoch": 3.5814355429036193, "grad_norm": 0.27993661165237427, "learning_rate": 2.2579922405272437e-06, "loss": 0.3157, "step": 35227 }, { "epoch": 3.5815372102480683, "grad_norm": 0.2766342759132385, "learning_rate": 2.2576954864092066e-06, "loss": 0.2942, "step": 35228 }, { "epoch": 3.581638877592517, "grad_norm": 0.29262250661849976, "learning_rate": 2.2573987461062472e-06, "loss": 0.2961, "step": 35229 }, { "epoch": 3.581740544936966, "grad_norm": 0.2831326425075531, "learning_rate": 2.25710201961986e-06, "loss": 0.2917, "step": 35230 }, { "epoch": 3.581842212281415, "grad_norm": 0.28394925594329834, "learning_rate": 2.256805306951545e-06, "loss": 0.3273, "step": 35231 }, { "epoch": 3.581943879625864, "grad_norm": 0.25111570954322815, "learning_rate": 2.2565086081027894e-06, "loss": 0.3112, "step": 35232 }, { "epoch": 3.5820455469703134, "grad_norm": 0.2746710777282715, "learning_rate": 2.2562119230750906e-06, "loss": 0.3107, "step": 35233 }, { "epoch": 3.5821472143147624, "grad_norm": 0.2857118844985962, "learning_rate": 2.2559152518699484e-06, "loss": 0.3283, "step": 35234 }, { "epoch": 3.5822488816592113, "grad_norm": 0.2621178925037384, "learning_rate": 2.2556185944888493e-06, "loss": 0.3123, "step": 35235 }, { "epoch": 3.5823505490036602, "grad_norm": 0.310302197933197, "learning_rate": 2.2553219509332935e-06, "loss": 0.2781, "step": 35236 }, { "epoch": 3.582452216348109, "grad_norm": 0.272844523191452, "learning_rate": 2.2550253212047713e-06, "loss": 0.2913, "step": 35237 }, { "epoch": 3.582553883692558, "grad_norm": 0.2558496594429016, "learning_rate": 2.254728705304781e-06, "loss": 0.3128, "step": 35238 }, { "epoch": 3.582655551037007, "grad_norm": 0.2693452835083008, "learning_rate": 2.2544321032348154e-06, "loss": 0.2871, "step": 35239 }, { "epoch": 3.582757218381456, "grad_norm": 0.25575128197669983, "learning_rate": 2.2541355149963666e-06, "loss": 0.2816, "step": 35240 }, { "epoch": 3.582858885725905, "grad_norm": 0.28674301505088806, "learning_rate": 2.2538389405909344e-06, "loss": 0.3138, "step": 35241 }, { "epoch": 3.582960553070354, "grad_norm": 0.2747049629688263, "learning_rate": 2.2535423800200053e-06, "loss": 0.3068, "step": 35242 }, { "epoch": 3.583062220414803, "grad_norm": 0.28113338351249695, "learning_rate": 2.2532458332850766e-06, "loss": 0.3126, "step": 35243 }, { "epoch": 3.5831638877592518, "grad_norm": 0.25801920890808105, "learning_rate": 2.252949300387646e-06, "loss": 0.3158, "step": 35244 }, { "epoch": 3.5832655551037007, "grad_norm": 0.25685158371925354, "learning_rate": 2.2526527813292017e-06, "loss": 0.2855, "step": 35245 }, { "epoch": 3.5833672224481496, "grad_norm": 0.25684377551078796, "learning_rate": 2.2523562761112404e-06, "loss": 0.3224, "step": 35246 }, { "epoch": 3.5834688897925986, "grad_norm": 0.2651819586753845, "learning_rate": 2.252059784735256e-06, "loss": 0.31, "step": 35247 }, { "epoch": 3.5835705571370475, "grad_norm": 0.26071619987487793, "learning_rate": 2.2517633072027396e-06, "loss": 0.3022, "step": 35248 }, { "epoch": 3.5836722244814965, "grad_norm": 0.26779118180274963, "learning_rate": 2.2514668435151875e-06, "loss": 0.305, "step": 35249 }, { "epoch": 3.5837738918259454, "grad_norm": 0.2538014054298401, "learning_rate": 2.2511703936740915e-06, "loss": 0.3216, "step": 35250 }, { "epoch": 3.5838755591703944, "grad_norm": 0.26006993651390076, "learning_rate": 2.250873957680949e-06, "loss": 0.3076, "step": 35251 }, { "epoch": 3.5839772265148433, "grad_norm": 0.2805316746234894, "learning_rate": 2.2505775355372468e-06, "loss": 0.3073, "step": 35252 }, { "epoch": 3.5840788938592922, "grad_norm": 0.27127110958099365, "learning_rate": 2.250281127244481e-06, "loss": 0.3265, "step": 35253 }, { "epoch": 3.584180561203741, "grad_norm": 0.27305546402931213, "learning_rate": 2.2499847328041495e-06, "loss": 0.3237, "step": 35254 }, { "epoch": 3.58428222854819, "grad_norm": 0.26511242985725403, "learning_rate": 2.2496883522177378e-06, "loss": 0.3111, "step": 35255 }, { "epoch": 3.584383895892639, "grad_norm": 0.2965317666530609, "learning_rate": 2.2493919854867446e-06, "loss": 0.3082, "step": 35256 }, { "epoch": 3.584485563237088, "grad_norm": 0.27883341908454895, "learning_rate": 2.2490956326126607e-06, "loss": 0.3524, "step": 35257 }, { "epoch": 3.584587230581537, "grad_norm": 0.25364989042282104, "learning_rate": 2.2487992935969776e-06, "loss": 0.3206, "step": 35258 }, { "epoch": 3.5846888979259863, "grad_norm": 0.2826610505580902, "learning_rate": 2.2485029684411914e-06, "loss": 0.2835, "step": 35259 }, { "epoch": 3.5847905652704353, "grad_norm": 0.2862958014011383, "learning_rate": 2.248206657146792e-06, "loss": 0.3344, "step": 35260 }, { "epoch": 3.584892232614884, "grad_norm": 0.2594221532344818, "learning_rate": 2.2479103597152766e-06, "loss": 0.3088, "step": 35261 }, { "epoch": 3.584993899959333, "grad_norm": 0.29107388854026794, "learning_rate": 2.2476140761481318e-06, "loss": 0.2708, "step": 35262 }, { "epoch": 3.585095567303782, "grad_norm": 0.28081029653549194, "learning_rate": 2.2473178064468527e-06, "loss": 0.2924, "step": 35263 }, { "epoch": 3.585197234648231, "grad_norm": 0.27536049485206604, "learning_rate": 2.247021550612936e-06, "loss": 0.2985, "step": 35264 }, { "epoch": 3.58529890199268, "grad_norm": 0.2720226049423218, "learning_rate": 2.2467253086478675e-06, "loss": 0.309, "step": 35265 }, { "epoch": 3.585400569337129, "grad_norm": 0.2675703763961792, "learning_rate": 2.246429080553144e-06, "loss": 0.3126, "step": 35266 }, { "epoch": 3.585502236681578, "grad_norm": 0.2938918471336365, "learning_rate": 2.246132866330257e-06, "loss": 0.3219, "step": 35267 }, { "epoch": 3.585603904026027, "grad_norm": 0.2874787449836731, "learning_rate": 2.2458366659806952e-06, "loss": 0.3061, "step": 35268 }, { "epoch": 3.5857055713704757, "grad_norm": 0.26641756296157837, "learning_rate": 2.2455404795059564e-06, "loss": 0.3478, "step": 35269 }, { "epoch": 3.5858072387149247, "grad_norm": 0.2695673704147339, "learning_rate": 2.2452443069075303e-06, "loss": 0.3152, "step": 35270 }, { "epoch": 3.5859089060593736, "grad_norm": 0.27114441990852356, "learning_rate": 2.244948148186908e-06, "loss": 0.288, "step": 35271 }, { "epoch": 3.5860105734038226, "grad_norm": 0.2673444151878357, "learning_rate": 2.244652003345581e-06, "loss": 0.3152, "step": 35272 }, { "epoch": 3.5861122407482715, "grad_norm": 0.28382542729377747, "learning_rate": 2.244355872385042e-06, "loss": 0.315, "step": 35273 }, { "epoch": 3.586213908092721, "grad_norm": 0.25977689027786255, "learning_rate": 2.2440597553067873e-06, "loss": 0.3252, "step": 35274 }, { "epoch": 3.58631557543717, "grad_norm": 0.2753266394138336, "learning_rate": 2.2437636521123008e-06, "loss": 0.2916, "step": 35275 }, { "epoch": 3.5864172427816188, "grad_norm": 0.26130810379981995, "learning_rate": 2.24346756280308e-06, "loss": 0.3235, "step": 35276 }, { "epoch": 3.5865189101260677, "grad_norm": 0.2818962037563324, "learning_rate": 2.2431714873806147e-06, "loss": 0.3091, "step": 35277 }, { "epoch": 3.5866205774705167, "grad_norm": 0.25445839762687683, "learning_rate": 2.242875425846394e-06, "loss": 0.2837, "step": 35278 }, { "epoch": 3.5867222448149656, "grad_norm": 0.27838757634162903, "learning_rate": 2.242579378201914e-06, "loss": 0.2875, "step": 35279 }, { "epoch": 3.5868239121594145, "grad_norm": 0.2655366063117981, "learning_rate": 2.2422833444486634e-06, "loss": 0.2948, "step": 35280 }, { "epoch": 3.5869255795038635, "grad_norm": 0.28752678632736206, "learning_rate": 2.2419873245881344e-06, "loss": 0.3457, "step": 35281 }, { "epoch": 3.5870272468483124, "grad_norm": 0.28936484456062317, "learning_rate": 2.241691318621816e-06, "loss": 0.3074, "step": 35282 }, { "epoch": 3.5871289141927614, "grad_norm": 0.26784130930900574, "learning_rate": 2.2413953265512012e-06, "loss": 0.289, "step": 35283 }, { "epoch": 3.5872305815372103, "grad_norm": 0.3092656135559082, "learning_rate": 2.241099348377785e-06, "loss": 0.3244, "step": 35284 }, { "epoch": 3.5873322488816592, "grad_norm": 0.2621513605117798, "learning_rate": 2.2408033841030507e-06, "loss": 0.2899, "step": 35285 }, { "epoch": 3.587433916226108, "grad_norm": 0.26524242758750916, "learning_rate": 2.2405074337284955e-06, "loss": 0.3215, "step": 35286 }, { "epoch": 3.587535583570557, "grad_norm": 0.2728117108345032, "learning_rate": 2.2402114972556077e-06, "loss": 0.3218, "step": 35287 }, { "epoch": 3.587637250915006, "grad_norm": 0.2728751003742218, "learning_rate": 2.2399155746858763e-06, "loss": 0.3204, "step": 35288 }, { "epoch": 3.587738918259455, "grad_norm": 0.28356480598449707, "learning_rate": 2.239619666020797e-06, "loss": 0.3057, "step": 35289 }, { "epoch": 3.587840585603904, "grad_norm": 0.2757517993450165, "learning_rate": 2.2393237712618575e-06, "loss": 0.2898, "step": 35290 }, { "epoch": 3.587942252948353, "grad_norm": 0.26974067091941833, "learning_rate": 2.239027890410549e-06, "loss": 0.2819, "step": 35291 }, { "epoch": 3.588043920292802, "grad_norm": 0.31502819061279297, "learning_rate": 2.2387320234683597e-06, "loss": 0.2929, "step": 35292 }, { "epoch": 3.5881455876372508, "grad_norm": 0.27930182218551636, "learning_rate": 2.2384361704367846e-06, "loss": 0.3288, "step": 35293 }, { "epoch": 3.5882472549816997, "grad_norm": 0.27434608340263367, "learning_rate": 2.238140331317311e-06, "loss": 0.299, "step": 35294 }, { "epoch": 3.5883489223261487, "grad_norm": 0.2800407409667969, "learning_rate": 2.237844506111429e-06, "loss": 0.3115, "step": 35295 }, { "epoch": 3.5884505896705976, "grad_norm": 0.30414825677871704, "learning_rate": 2.2375486948206313e-06, "loss": 0.324, "step": 35296 }, { "epoch": 3.5885522570150465, "grad_norm": 0.27630922198295593, "learning_rate": 2.2372528974464074e-06, "loss": 0.3074, "step": 35297 }, { "epoch": 3.5886539243594955, "grad_norm": 0.2646423280239105, "learning_rate": 2.236957113990245e-06, "loss": 0.347, "step": 35298 }, { "epoch": 3.5887555917039444, "grad_norm": 0.2970454692840576, "learning_rate": 2.236661344453638e-06, "loss": 0.3518, "step": 35299 }, { "epoch": 3.588857259048394, "grad_norm": 0.27065765857696533, "learning_rate": 2.2363655888380734e-06, "loss": 0.3457, "step": 35300 }, { "epoch": 3.5889589263928428, "grad_norm": 0.27393078804016113, "learning_rate": 2.236069847145043e-06, "loss": 0.2994, "step": 35301 }, { "epoch": 3.5890605937372917, "grad_norm": 0.2857866883277893, "learning_rate": 2.235774119376034e-06, "loss": 0.3257, "step": 35302 }, { "epoch": 3.5891622610817406, "grad_norm": 0.2700377404689789, "learning_rate": 2.23547840553254e-06, "loss": 0.3289, "step": 35303 }, { "epoch": 3.5892639284261896, "grad_norm": 0.2917034327983856, "learning_rate": 2.2351827056160492e-06, "loss": 0.3141, "step": 35304 }, { "epoch": 3.5893655957706385, "grad_norm": 0.2839367687702179, "learning_rate": 2.2348870196280487e-06, "loss": 0.2908, "step": 35305 }, { "epoch": 3.5894672631150875, "grad_norm": 0.2870500981807709, "learning_rate": 2.2345913475700322e-06, "loss": 0.3216, "step": 35306 }, { "epoch": 3.5895689304595364, "grad_norm": 0.29911550879478455, "learning_rate": 2.234295689443487e-06, "loss": 0.3, "step": 35307 }, { "epoch": 3.5896705978039853, "grad_norm": 0.2596920430660248, "learning_rate": 2.234000045249901e-06, "loss": 0.3221, "step": 35308 }, { "epoch": 3.5897722651484343, "grad_norm": 0.27904176712036133, "learning_rate": 2.2337044149907677e-06, "loss": 0.3169, "step": 35309 }, { "epoch": 3.5898739324928832, "grad_norm": 0.26934173703193665, "learning_rate": 2.2334087986675746e-06, "loss": 0.3252, "step": 35310 }, { "epoch": 3.589975599837332, "grad_norm": 0.3123086392879486, "learning_rate": 2.23311319628181e-06, "loss": 0.3236, "step": 35311 }, { "epoch": 3.590077267181781, "grad_norm": 0.29320210218429565, "learning_rate": 2.2328176078349615e-06, "loss": 0.3043, "step": 35312 }, { "epoch": 3.59017893452623, "grad_norm": 0.29060098528862, "learning_rate": 2.2325220333285226e-06, "loss": 0.2933, "step": 35313 }, { "epoch": 3.590280601870679, "grad_norm": 0.27102217078208923, "learning_rate": 2.2322264727639796e-06, "loss": 0.314, "step": 35314 }, { "epoch": 3.5903822692151284, "grad_norm": 0.2650987207889557, "learning_rate": 2.2319309261428207e-06, "loss": 0.3018, "step": 35315 }, { "epoch": 3.5904839365595773, "grad_norm": 0.26461315155029297, "learning_rate": 2.2316353934665376e-06, "loss": 0.3354, "step": 35316 }, { "epoch": 3.5905856039040263, "grad_norm": 0.2709794342517853, "learning_rate": 2.2313398747366173e-06, "loss": 0.3329, "step": 35317 }, { "epoch": 3.590687271248475, "grad_norm": 0.26282018423080444, "learning_rate": 2.231044369954547e-06, "loss": 0.2865, "step": 35318 }, { "epoch": 3.590788938592924, "grad_norm": 0.2725953161716461, "learning_rate": 2.230748879121819e-06, "loss": 0.3081, "step": 35319 }, { "epoch": 3.590890605937373, "grad_norm": 0.27763959765434265, "learning_rate": 2.2304534022399198e-06, "loss": 0.3315, "step": 35320 }, { "epoch": 3.590992273281822, "grad_norm": 0.2606283128261566, "learning_rate": 2.230157939310339e-06, "loss": 0.2781, "step": 35321 }, { "epoch": 3.591093940626271, "grad_norm": 0.26850491762161255, "learning_rate": 2.2298624903345616e-06, "loss": 0.3198, "step": 35322 }, { "epoch": 3.59119560797072, "grad_norm": 0.298799067735672, "learning_rate": 2.229567055314081e-06, "loss": 0.2922, "step": 35323 }, { "epoch": 3.591297275315169, "grad_norm": 0.28989383578300476, "learning_rate": 2.2292716342503836e-06, "loss": 0.3212, "step": 35324 }, { "epoch": 3.591398942659618, "grad_norm": 0.3003354072570801, "learning_rate": 2.2289762271449545e-06, "loss": 0.2778, "step": 35325 }, { "epoch": 3.5915006100040667, "grad_norm": 0.29196420311927795, "learning_rate": 2.2286808339992873e-06, "loss": 0.3002, "step": 35326 }, { "epoch": 3.5916022773485157, "grad_norm": 0.2839623689651489, "learning_rate": 2.228385454814867e-06, "loss": 0.3044, "step": 35327 }, { "epoch": 3.5917039446929646, "grad_norm": 0.27471795678138733, "learning_rate": 2.22809008959318e-06, "loss": 0.28, "step": 35328 }, { "epoch": 3.5918056120374136, "grad_norm": 0.27287980914115906, "learning_rate": 2.227794738335719e-06, "loss": 0.3032, "step": 35329 }, { "epoch": 3.5919072793818625, "grad_norm": 0.2835797071456909, "learning_rate": 2.227499401043969e-06, "loss": 0.3283, "step": 35330 }, { "epoch": 3.5920089467263114, "grad_norm": 0.27130600810050964, "learning_rate": 2.227204077719418e-06, "loss": 0.3063, "step": 35331 }, { "epoch": 3.5921106140707604, "grad_norm": 0.281329870223999, "learning_rate": 2.2269087683635527e-06, "loss": 0.3371, "step": 35332 }, { "epoch": 3.5922122814152093, "grad_norm": 0.28077277541160583, "learning_rate": 2.2266134729778633e-06, "loss": 0.3245, "step": 35333 }, { "epoch": 3.5923139487596583, "grad_norm": 0.2699141204357147, "learning_rate": 2.2263181915638366e-06, "loss": 0.3127, "step": 35334 }, { "epoch": 3.592415616104107, "grad_norm": 0.25455591082572937, "learning_rate": 2.2260229241229575e-06, "loss": 0.3118, "step": 35335 }, { "epoch": 3.592517283448556, "grad_norm": 0.27996018528938293, "learning_rate": 2.2257276706567177e-06, "loss": 0.2947, "step": 35336 }, { "epoch": 3.592618950793005, "grad_norm": 0.28738585114479065, "learning_rate": 2.2254324311666024e-06, "loss": 0.3111, "step": 35337 }, { "epoch": 3.592720618137454, "grad_norm": 0.2670234143733978, "learning_rate": 2.2251372056540977e-06, "loss": 0.3272, "step": 35338 }, { "epoch": 3.592822285481903, "grad_norm": 0.2566204369068146, "learning_rate": 2.2248419941206945e-06, "loss": 0.3023, "step": 35339 }, { "epoch": 3.592923952826352, "grad_norm": 0.28070732951164246, "learning_rate": 2.2245467965678775e-06, "loss": 0.2599, "step": 35340 }, { "epoch": 3.5930256201708013, "grad_norm": 0.28455671668052673, "learning_rate": 2.224251612997134e-06, "loss": 0.3293, "step": 35341 }, { "epoch": 3.5931272875152502, "grad_norm": 0.28417035937309265, "learning_rate": 2.223956443409949e-06, "loss": 0.3121, "step": 35342 }, { "epoch": 3.593228954859699, "grad_norm": 0.27412477135658264, "learning_rate": 2.2236612878078147e-06, "loss": 0.3245, "step": 35343 }, { "epoch": 3.593330622204148, "grad_norm": 0.26821815967559814, "learning_rate": 2.223366146192214e-06, "loss": 0.3171, "step": 35344 }, { "epoch": 3.593432289548597, "grad_norm": 0.27254602313041687, "learning_rate": 2.2230710185646335e-06, "loss": 0.3214, "step": 35345 }, { "epoch": 3.593533956893046, "grad_norm": 0.2824193239212036, "learning_rate": 2.2227759049265634e-06, "loss": 0.3561, "step": 35346 }, { "epoch": 3.593635624237495, "grad_norm": 0.2935619652271271, "learning_rate": 2.222480805279488e-06, "loss": 0.2906, "step": 35347 }, { "epoch": 3.593737291581944, "grad_norm": 0.25003111362457275, "learning_rate": 2.2221857196248927e-06, "loss": 0.2881, "step": 35348 }, { "epoch": 3.593838958926393, "grad_norm": 0.2786480486392975, "learning_rate": 2.2218906479642665e-06, "loss": 0.3178, "step": 35349 }, { "epoch": 3.5939406262708418, "grad_norm": 0.2688157260417938, "learning_rate": 2.221595590299096e-06, "loss": 0.2836, "step": 35350 }, { "epoch": 3.5940422936152907, "grad_norm": 0.29875102639198303, "learning_rate": 2.221300546630866e-06, "loss": 0.2995, "step": 35351 }, { "epoch": 3.5941439609597396, "grad_norm": 0.28262490034103394, "learning_rate": 2.2210055169610627e-06, "loss": 0.3162, "step": 35352 }, { "epoch": 3.5942456283041886, "grad_norm": 0.30203574895858765, "learning_rate": 2.2207105012911744e-06, "loss": 0.293, "step": 35353 }, { "epoch": 3.5943472956486375, "grad_norm": 0.2771929204463959, "learning_rate": 2.220415499622686e-06, "loss": 0.305, "step": 35354 }, { "epoch": 3.5944489629930865, "grad_norm": 0.28693631291389465, "learning_rate": 2.2201205119570825e-06, "loss": 0.3402, "step": 35355 }, { "epoch": 3.594550630337536, "grad_norm": 0.26728492975234985, "learning_rate": 2.219825538295853e-06, "loss": 0.2889, "step": 35356 }, { "epoch": 3.594652297681985, "grad_norm": 0.2809443175792694, "learning_rate": 2.219530578640481e-06, "loss": 0.2994, "step": 35357 }, { "epoch": 3.5947539650264337, "grad_norm": 0.26471346616744995, "learning_rate": 2.219235632992452e-06, "loss": 0.3094, "step": 35358 }, { "epoch": 3.5948556323708827, "grad_norm": 0.2922874391078949, "learning_rate": 2.218940701353255e-06, "loss": 0.2856, "step": 35359 }, { "epoch": 3.5949572997153316, "grad_norm": 0.28474143147468567, "learning_rate": 2.2186457837243737e-06, "loss": 0.2826, "step": 35360 }, { "epoch": 3.5950589670597806, "grad_norm": 0.30126953125, "learning_rate": 2.218350880107294e-06, "loss": 0.3232, "step": 35361 }, { "epoch": 3.5951606344042295, "grad_norm": 0.30135464668273926, "learning_rate": 2.2180559905034995e-06, "loss": 0.3414, "step": 35362 }, { "epoch": 3.5952623017486784, "grad_norm": 0.2729789614677429, "learning_rate": 2.2177611149144796e-06, "loss": 0.3258, "step": 35363 }, { "epoch": 3.5953639690931274, "grad_norm": 0.2768779993057251, "learning_rate": 2.2174662533417183e-06, "loss": 0.2908, "step": 35364 }, { "epoch": 3.5954656364375763, "grad_norm": 0.2874903678894043, "learning_rate": 2.217171405786699e-06, "loss": 0.3303, "step": 35365 }, { "epoch": 3.5955673037820253, "grad_norm": 0.29201143980026245, "learning_rate": 2.2168765722509107e-06, "loss": 0.2862, "step": 35366 }, { "epoch": 3.595668971126474, "grad_norm": 0.25055086612701416, "learning_rate": 2.216581752735836e-06, "loss": 0.3269, "step": 35367 }, { "epoch": 3.595770638470923, "grad_norm": 0.2725992798805237, "learning_rate": 2.2162869472429604e-06, "loss": 0.3274, "step": 35368 }, { "epoch": 3.595872305815372, "grad_norm": 0.26722392439842224, "learning_rate": 2.2159921557737714e-06, "loss": 0.2773, "step": 35369 }, { "epoch": 3.595973973159821, "grad_norm": 0.2891303598880768, "learning_rate": 2.2156973783297514e-06, "loss": 0.2965, "step": 35370 }, { "epoch": 3.59607564050427, "grad_norm": 0.2873413562774658, "learning_rate": 2.215402614912387e-06, "loss": 0.2903, "step": 35371 }, { "epoch": 3.596177307848719, "grad_norm": 0.27779731154441833, "learning_rate": 2.2151078655231604e-06, "loss": 0.325, "step": 35372 }, { "epoch": 3.596278975193168, "grad_norm": 0.2632025480270386, "learning_rate": 2.2148131301635607e-06, "loss": 0.2831, "step": 35373 }, { "epoch": 3.596380642537617, "grad_norm": 0.26587170362472534, "learning_rate": 2.2145184088350707e-06, "loss": 0.3192, "step": 35374 }, { "epoch": 3.5964823098820657, "grad_norm": 0.2649727165699005, "learning_rate": 2.2142237015391727e-06, "loss": 0.3185, "step": 35375 }, { "epoch": 3.5965839772265147, "grad_norm": 0.2679590582847595, "learning_rate": 2.2139290082773557e-06, "loss": 0.2991, "step": 35376 }, { "epoch": 3.5966856445709636, "grad_norm": 0.2574036419391632, "learning_rate": 2.2136343290511025e-06, "loss": 0.3489, "step": 35377 }, { "epoch": 3.5967873119154126, "grad_norm": 0.24009384214878082, "learning_rate": 2.2133396638618955e-06, "loss": 0.3019, "step": 35378 }, { "epoch": 3.5968889792598615, "grad_norm": 0.2856460511684418, "learning_rate": 2.213045012711223e-06, "loss": 0.3269, "step": 35379 }, { "epoch": 3.5969906466043104, "grad_norm": 0.27218249440193176, "learning_rate": 2.212750375600567e-06, "loss": 0.3291, "step": 35380 }, { "epoch": 3.5970923139487594, "grad_norm": 0.271662175655365, "learning_rate": 2.212455752531413e-06, "loss": 0.2855, "step": 35381 }, { "epoch": 3.5971939812932088, "grad_norm": 0.26362916827201843, "learning_rate": 2.2121611435052415e-06, "loss": 0.3143, "step": 35382 }, { "epoch": 3.5972956486376577, "grad_norm": 0.25390762090682983, "learning_rate": 2.2118665485235423e-06, "loss": 0.3038, "step": 35383 }, { "epoch": 3.5973973159821067, "grad_norm": 0.2720188796520233, "learning_rate": 2.2115719675877966e-06, "loss": 0.2884, "step": 35384 }, { "epoch": 3.5974989833265556, "grad_norm": 0.28148800134658813, "learning_rate": 2.2112774006994875e-06, "loss": 0.3037, "step": 35385 }, { "epoch": 3.5976006506710045, "grad_norm": 0.28222063183784485, "learning_rate": 2.210982847860103e-06, "loss": 0.3194, "step": 35386 }, { "epoch": 3.5977023180154535, "grad_norm": 0.2676686942577362, "learning_rate": 2.210688309071121e-06, "loss": 0.2911, "step": 35387 }, { "epoch": 3.5978039853599024, "grad_norm": 0.27716386318206787, "learning_rate": 2.210393784334028e-06, "loss": 0.296, "step": 35388 }, { "epoch": 3.5979056527043514, "grad_norm": 0.2872002124786377, "learning_rate": 2.21009927365031e-06, "loss": 0.2851, "step": 35389 }, { "epoch": 3.5980073200488003, "grad_norm": 0.2796119749546051, "learning_rate": 2.2098047770214497e-06, "loss": 0.3111, "step": 35390 }, { "epoch": 3.5981089873932492, "grad_norm": 0.27793946862220764, "learning_rate": 2.209510294448929e-06, "loss": 0.2989, "step": 35391 }, { "epoch": 3.598210654737698, "grad_norm": 0.2729139029979706, "learning_rate": 2.2092158259342313e-06, "loss": 0.3455, "step": 35392 }, { "epoch": 3.598312322082147, "grad_norm": 0.2930758595466614, "learning_rate": 2.208921371478842e-06, "loss": 0.3141, "step": 35393 }, { "epoch": 3.598413989426596, "grad_norm": 0.28975212574005127, "learning_rate": 2.2086269310842445e-06, "loss": 0.3312, "step": 35394 }, { "epoch": 3.598515656771045, "grad_norm": 0.283578485250473, "learning_rate": 2.208332504751919e-06, "loss": 0.3094, "step": 35395 }, { "epoch": 3.598617324115494, "grad_norm": 0.2748139500617981, "learning_rate": 2.208038092483354e-06, "loss": 0.3279, "step": 35396 }, { "epoch": 3.5987189914599433, "grad_norm": 0.26084551215171814, "learning_rate": 2.207743694280027e-06, "loss": 0.2948, "step": 35397 }, { "epoch": 3.5988206588043923, "grad_norm": 0.25713250041007996, "learning_rate": 2.207449310143425e-06, "loss": 0.2915, "step": 35398 }, { "epoch": 3.598922326148841, "grad_norm": 0.2826769948005676, "learning_rate": 2.207154940075029e-06, "loss": 0.3239, "step": 35399 }, { "epoch": 3.59902399349329, "grad_norm": 0.26886212825775146, "learning_rate": 2.206860584076322e-06, "loss": 0.3219, "step": 35400 }, { "epoch": 3.599125660837739, "grad_norm": 0.288501501083374, "learning_rate": 2.206566242148789e-06, "loss": 0.2969, "step": 35401 }, { "epoch": 3.599227328182188, "grad_norm": 0.27601832151412964, "learning_rate": 2.2062719142939098e-06, "loss": 0.2914, "step": 35402 }, { "epoch": 3.599328995526637, "grad_norm": 0.2873104512691498, "learning_rate": 2.205977600513172e-06, "loss": 0.323, "step": 35403 }, { "epoch": 3.599430662871086, "grad_norm": 0.2609410583972931, "learning_rate": 2.205683300808052e-06, "loss": 0.3318, "step": 35404 }, { "epoch": 3.599532330215535, "grad_norm": 0.2800207734107971, "learning_rate": 2.2053890151800357e-06, "loss": 0.2729, "step": 35405 }, { "epoch": 3.599633997559984, "grad_norm": 0.2895129323005676, "learning_rate": 2.2050947436306088e-06, "loss": 0.3009, "step": 35406 }, { "epoch": 3.5997356649044328, "grad_norm": 0.26094236969947815, "learning_rate": 2.2048004861612466e-06, "loss": 0.2883, "step": 35407 }, { "epoch": 3.5998373322488817, "grad_norm": 0.25630098581314087, "learning_rate": 2.204506242773437e-06, "loss": 0.2874, "step": 35408 }, { "epoch": 3.5999389995933306, "grad_norm": 0.27759963274002075, "learning_rate": 2.2042120134686606e-06, "loss": 0.315, "step": 35409 }, { "epoch": 3.6000406669377796, "grad_norm": 0.2853297293186188, "learning_rate": 2.2039177982483985e-06, "loss": 0.2915, "step": 35410 }, { "epoch": 3.6001423342822285, "grad_norm": 0.2660057842731476, "learning_rate": 2.203623597114135e-06, "loss": 0.2868, "step": 35411 }, { "epoch": 3.6002440016266775, "grad_norm": 0.27062588930130005, "learning_rate": 2.2033294100673504e-06, "loss": 0.3035, "step": 35412 }, { "epoch": 3.6003456689711264, "grad_norm": 0.30561575293540955, "learning_rate": 2.203035237109531e-06, "loss": 0.3153, "step": 35413 }, { "epoch": 3.6004473363155753, "grad_norm": 0.3013584315776825, "learning_rate": 2.2027410782421517e-06, "loss": 0.32, "step": 35414 }, { "epoch": 3.6005490036600243, "grad_norm": 0.28919678926467896, "learning_rate": 2.2024469334666975e-06, "loss": 0.3055, "step": 35415 }, { "epoch": 3.6006506710044732, "grad_norm": 0.23554278910160065, "learning_rate": 2.2021528027846557e-06, "loss": 0.3011, "step": 35416 }, { "epoch": 3.600752338348922, "grad_norm": 0.2658587396144867, "learning_rate": 2.2018586861974983e-06, "loss": 0.3352, "step": 35417 }, { "epoch": 3.600854005693371, "grad_norm": 0.27496615052223206, "learning_rate": 2.2015645837067145e-06, "loss": 0.3112, "step": 35418 }, { "epoch": 3.60095567303782, "grad_norm": 0.2714974582195282, "learning_rate": 2.201270495313783e-06, "loss": 0.3074, "step": 35419 }, { "epoch": 3.601057340382269, "grad_norm": 0.27846625447273254, "learning_rate": 2.2009764210201845e-06, "loss": 0.306, "step": 35420 }, { "epoch": 3.601159007726718, "grad_norm": 0.27516791224479675, "learning_rate": 2.2006823608274025e-06, "loss": 0.3194, "step": 35421 }, { "epoch": 3.601260675071167, "grad_norm": 0.2818305492401123, "learning_rate": 2.200388314736918e-06, "loss": 0.3382, "step": 35422 }, { "epoch": 3.6013623424156163, "grad_norm": 0.2680104076862335, "learning_rate": 2.200094282750212e-06, "loss": 0.2869, "step": 35423 }, { "epoch": 3.601464009760065, "grad_norm": 0.27658823132514954, "learning_rate": 2.1998002648687638e-06, "loss": 0.3466, "step": 35424 }, { "epoch": 3.601565677104514, "grad_norm": 0.2833373546600342, "learning_rate": 2.1995062610940567e-06, "loss": 0.2942, "step": 35425 }, { "epoch": 3.601667344448963, "grad_norm": 0.2526077926158905, "learning_rate": 2.1992122714275744e-06, "loss": 0.2963, "step": 35426 }, { "epoch": 3.601769011793412, "grad_norm": 0.28119346499443054, "learning_rate": 2.198918295870792e-06, "loss": 0.2976, "step": 35427 }, { "epoch": 3.601870679137861, "grad_norm": 0.31111857295036316, "learning_rate": 2.1986243344251957e-06, "loss": 0.3152, "step": 35428 }, { "epoch": 3.60197234648231, "grad_norm": 0.26567867398262024, "learning_rate": 2.1983303870922635e-06, "loss": 0.2887, "step": 35429 }, { "epoch": 3.602074013826759, "grad_norm": 0.2908344566822052, "learning_rate": 2.1980364538734756e-06, "loss": 0.3075, "step": 35430 }, { "epoch": 3.602175681171208, "grad_norm": 0.26986902952194214, "learning_rate": 2.1977425347703164e-06, "loss": 0.3169, "step": 35431 }, { "epoch": 3.6022773485156567, "grad_norm": 0.26748713850975037, "learning_rate": 2.197448629784264e-06, "loss": 0.3126, "step": 35432 }, { "epoch": 3.6023790158601057, "grad_norm": 0.2907527685165405, "learning_rate": 2.197154738916799e-06, "loss": 0.3155, "step": 35433 }, { "epoch": 3.6024806832045546, "grad_norm": 0.26671481132507324, "learning_rate": 2.196860862169401e-06, "loss": 0.3043, "step": 35434 }, { "epoch": 3.6025823505490036, "grad_norm": 0.2512812316417694, "learning_rate": 2.1965669995435517e-06, "loss": 0.3034, "step": 35435 }, { "epoch": 3.6026840178934525, "grad_norm": 0.2867695391178131, "learning_rate": 2.196273151040736e-06, "loss": 0.3071, "step": 35436 }, { "epoch": 3.6027856852379014, "grad_norm": 0.26653748750686646, "learning_rate": 2.1959793166624257e-06, "loss": 0.3007, "step": 35437 }, { "epoch": 3.602887352582351, "grad_norm": 0.2634047269821167, "learning_rate": 2.195685496410107e-06, "loss": 0.3097, "step": 35438 }, { "epoch": 3.6029890199267998, "grad_norm": 0.2630852460861206, "learning_rate": 2.1953916902852586e-06, "loss": 0.2971, "step": 35439 }, { "epoch": 3.6030906872712487, "grad_norm": 0.28421133756637573, "learning_rate": 2.1950978982893585e-06, "loss": 0.3173, "step": 35440 }, { "epoch": 3.6031923546156976, "grad_norm": 0.26393067836761475, "learning_rate": 2.1948041204238903e-06, "loss": 0.3221, "step": 35441 }, { "epoch": 3.6032940219601466, "grad_norm": 0.27465036511421204, "learning_rate": 2.194510356690332e-06, "loss": 0.2679, "step": 35442 }, { "epoch": 3.6033956893045955, "grad_norm": 0.2675480842590332, "learning_rate": 2.1942166070901644e-06, "loss": 0.3517, "step": 35443 }, { "epoch": 3.6034973566490445, "grad_norm": 0.2655291259288788, "learning_rate": 2.1939228716248646e-06, "loss": 0.3059, "step": 35444 }, { "epoch": 3.6035990239934934, "grad_norm": 0.27749529480934143, "learning_rate": 2.1936291502959166e-06, "loss": 0.3199, "step": 35445 }, { "epoch": 3.6037006913379424, "grad_norm": 0.2691628038883209, "learning_rate": 2.193335443104798e-06, "loss": 0.295, "step": 35446 }, { "epoch": 3.6038023586823913, "grad_norm": 0.27096840739250183, "learning_rate": 2.1930417500529866e-06, "loss": 0.3131, "step": 35447 }, { "epoch": 3.6039040260268402, "grad_norm": 0.2569572627544403, "learning_rate": 2.192748071141966e-06, "loss": 0.3138, "step": 35448 }, { "epoch": 3.604005693371289, "grad_norm": 0.2708819806575775, "learning_rate": 2.1924544063732126e-06, "loss": 0.3475, "step": 35449 }, { "epoch": 3.604107360715738, "grad_norm": 0.2647836208343506, "learning_rate": 2.1921607557482062e-06, "loss": 0.3001, "step": 35450 }, { "epoch": 3.604209028060187, "grad_norm": 0.28859180212020874, "learning_rate": 2.1918671192684277e-06, "loss": 0.3137, "step": 35451 }, { "epoch": 3.604310695404636, "grad_norm": 0.2938005328178406, "learning_rate": 2.191573496935355e-06, "loss": 0.2985, "step": 35452 }, { "epoch": 3.604412362749085, "grad_norm": 0.2720162570476532, "learning_rate": 2.191279888750468e-06, "loss": 0.2918, "step": 35453 }, { "epoch": 3.604514030093534, "grad_norm": 0.24721424281597137, "learning_rate": 2.1909862947152443e-06, "loss": 0.2975, "step": 35454 }, { "epoch": 3.604615697437983, "grad_norm": 0.2852206826210022, "learning_rate": 2.190692714831165e-06, "loss": 0.3148, "step": 35455 }, { "epoch": 3.6047173647824318, "grad_norm": 0.2990608513355255, "learning_rate": 2.190399149099709e-06, "loss": 0.3018, "step": 35456 }, { "epoch": 3.6048190321268807, "grad_norm": 0.2844744324684143, "learning_rate": 2.1901055975223515e-06, "loss": 0.314, "step": 35457 }, { "epoch": 3.6049206994713296, "grad_norm": 0.255837619304657, "learning_rate": 2.1898120601005774e-06, "loss": 0.3167, "step": 35458 }, { "epoch": 3.6050223668157786, "grad_norm": 0.28532397747039795, "learning_rate": 2.1895185368358617e-06, "loss": 0.3268, "step": 35459 }, { "epoch": 3.6051240341602275, "grad_norm": 0.27772754430770874, "learning_rate": 2.1892250277296816e-06, "loss": 0.3108, "step": 35460 }, { "epoch": 3.6052257015046765, "grad_norm": 0.2802581191062927, "learning_rate": 2.1889315327835202e-06, "loss": 0.3122, "step": 35461 }, { "epoch": 3.6053273688491254, "grad_norm": 0.2837528884410858, "learning_rate": 2.1886380519988536e-06, "loss": 0.3306, "step": 35462 }, { "epoch": 3.6054290361935744, "grad_norm": 0.29944491386413574, "learning_rate": 2.18834458537716e-06, "loss": 0.3009, "step": 35463 }, { "epoch": 3.6055307035380237, "grad_norm": 0.297977089881897, "learning_rate": 2.188051132919917e-06, "loss": 0.2588, "step": 35464 }, { "epoch": 3.6056323708824727, "grad_norm": 0.2709430158138275, "learning_rate": 2.1877576946286057e-06, "loss": 0.3675, "step": 35465 }, { "epoch": 3.6057340382269216, "grad_norm": 0.2623426914215088, "learning_rate": 2.187464270504703e-06, "loss": 0.3286, "step": 35466 }, { "epoch": 3.6058357055713706, "grad_norm": 0.2546481490135193, "learning_rate": 2.187170860549685e-06, "loss": 0.3079, "step": 35467 }, { "epoch": 3.6059373729158195, "grad_norm": 0.28686434030532837, "learning_rate": 2.1868774647650336e-06, "loss": 0.3053, "step": 35468 }, { "epoch": 3.6060390402602684, "grad_norm": 0.27161872386932373, "learning_rate": 2.186584083152225e-06, "loss": 0.2977, "step": 35469 }, { "epoch": 3.6061407076047174, "grad_norm": 0.25289997458457947, "learning_rate": 2.1862907157127355e-06, "loss": 0.3109, "step": 35470 }, { "epoch": 3.6062423749491663, "grad_norm": 0.2899373769760132, "learning_rate": 2.1859973624480464e-06, "loss": 0.31, "step": 35471 }, { "epoch": 3.6063440422936153, "grad_norm": 0.27724453806877136, "learning_rate": 2.1857040233596336e-06, "loss": 0.2997, "step": 35472 }, { "epoch": 3.606445709638064, "grad_norm": 0.26603540778160095, "learning_rate": 2.185410698448976e-06, "loss": 0.3159, "step": 35473 }, { "epoch": 3.606547376982513, "grad_norm": 0.3074105978012085, "learning_rate": 2.185117387717548e-06, "loss": 0.2792, "step": 35474 }, { "epoch": 3.606649044326962, "grad_norm": 0.2892059087753296, "learning_rate": 2.1848240911668317e-06, "loss": 0.305, "step": 35475 }, { "epoch": 3.606750711671411, "grad_norm": 0.27559009194374084, "learning_rate": 2.1845308087983027e-06, "loss": 0.28, "step": 35476 }, { "epoch": 3.60685237901586, "grad_norm": 0.2611163556575775, "learning_rate": 2.1842375406134365e-06, "loss": 0.3245, "step": 35477 }, { "epoch": 3.606954046360309, "grad_norm": 0.2786523997783661, "learning_rate": 2.183944286613714e-06, "loss": 0.3118, "step": 35478 }, { "epoch": 3.6070557137047583, "grad_norm": 0.27195248007774353, "learning_rate": 2.1836510468006118e-06, "loss": 0.3163, "step": 35479 }, { "epoch": 3.6071573810492072, "grad_norm": 0.28352823853492737, "learning_rate": 2.1833578211756034e-06, "loss": 0.2862, "step": 35480 }, { "epoch": 3.607259048393656, "grad_norm": 0.2575540840625763, "learning_rate": 2.1830646097401714e-06, "loss": 0.3262, "step": 35481 }, { "epoch": 3.607360715738105, "grad_norm": 0.2752617597579956, "learning_rate": 2.1827714124957906e-06, "loss": 0.2934, "step": 35482 }, { "epoch": 3.607462383082554, "grad_norm": 0.2612994611263275, "learning_rate": 2.182478229443938e-06, "loss": 0.2929, "step": 35483 }, { "epoch": 3.607564050427003, "grad_norm": 0.27875378727912903, "learning_rate": 2.1821850605860884e-06, "loss": 0.3082, "step": 35484 }, { "epoch": 3.607665717771452, "grad_norm": 0.277171790599823, "learning_rate": 2.1818919059237225e-06, "loss": 0.3195, "step": 35485 }, { "epoch": 3.607767385115901, "grad_norm": 0.25978511571884155, "learning_rate": 2.181598765458316e-06, "loss": 0.3287, "step": 35486 }, { "epoch": 3.60786905246035, "grad_norm": 0.2718208432197571, "learning_rate": 2.1813056391913435e-06, "loss": 0.2892, "step": 35487 }, { "epoch": 3.6079707198047988, "grad_norm": 0.2657647430896759, "learning_rate": 2.1810125271242845e-06, "loss": 0.2939, "step": 35488 }, { "epoch": 3.6080723871492477, "grad_norm": 0.292275607585907, "learning_rate": 2.1807194292586146e-06, "loss": 0.2834, "step": 35489 }, { "epoch": 3.6081740544936967, "grad_norm": 0.28416672348976135, "learning_rate": 2.180426345595809e-06, "loss": 0.3108, "step": 35490 }, { "epoch": 3.6082757218381456, "grad_norm": 0.27312469482421875, "learning_rate": 2.180133276137347e-06, "loss": 0.3381, "step": 35491 }, { "epoch": 3.6083773891825945, "grad_norm": 0.24194398522377014, "learning_rate": 2.179840220884703e-06, "loss": 0.2991, "step": 35492 }, { "epoch": 3.6084790565270435, "grad_norm": 0.2839911878108978, "learning_rate": 2.179547179839354e-06, "loss": 0.3102, "step": 35493 }, { "epoch": 3.6085807238714924, "grad_norm": 0.28038233518600464, "learning_rate": 2.1792541530027745e-06, "loss": 0.302, "step": 35494 }, { "epoch": 3.6086823912159414, "grad_norm": 0.2971612215042114, "learning_rate": 2.178961140376444e-06, "loss": 0.3063, "step": 35495 }, { "epoch": 3.6087840585603903, "grad_norm": 0.3122391998767853, "learning_rate": 2.178668141961837e-06, "loss": 0.3086, "step": 35496 }, { "epoch": 3.6088857259048392, "grad_norm": 0.28000375628471375, "learning_rate": 2.1783751577604274e-06, "loss": 0.3024, "step": 35497 }, { "epoch": 3.608987393249288, "grad_norm": 0.24820831418037415, "learning_rate": 2.1780821877736954e-06, "loss": 0.3088, "step": 35498 }, { "epoch": 3.609089060593737, "grad_norm": 0.2744608521461487, "learning_rate": 2.177789232003114e-06, "loss": 0.2945, "step": 35499 }, { "epoch": 3.609190727938186, "grad_norm": 0.273111492395401, "learning_rate": 2.177496290450159e-06, "loss": 0.3481, "step": 35500 }, { "epoch": 3.609292395282635, "grad_norm": 0.28001663088798523, "learning_rate": 2.177203363116308e-06, "loss": 0.2954, "step": 35501 }, { "epoch": 3.609394062627084, "grad_norm": 0.27088671922683716, "learning_rate": 2.1769104500030356e-06, "loss": 0.3138, "step": 35502 }, { "epoch": 3.609495729971533, "grad_norm": 0.3163151144981384, "learning_rate": 2.176617551111818e-06, "loss": 0.3177, "step": 35503 }, { "epoch": 3.609597397315982, "grad_norm": 0.26719680428504944, "learning_rate": 2.1763246664441283e-06, "loss": 0.3225, "step": 35504 }, { "epoch": 3.6096990646604312, "grad_norm": 0.2772848308086395, "learning_rate": 2.1760317960014456e-06, "loss": 0.3296, "step": 35505 }, { "epoch": 3.60980073200488, "grad_norm": 0.2777366042137146, "learning_rate": 2.1757389397852437e-06, "loss": 0.3132, "step": 35506 }, { "epoch": 3.609902399349329, "grad_norm": 0.29455408453941345, "learning_rate": 2.175446097796996e-06, "loss": 0.2937, "step": 35507 }, { "epoch": 3.610004066693778, "grad_norm": 0.28091490268707275, "learning_rate": 2.1751532700381818e-06, "loss": 0.3183, "step": 35508 }, { "epoch": 3.610105734038227, "grad_norm": 0.2572627663612366, "learning_rate": 2.1748604565102737e-06, "loss": 0.32, "step": 35509 }, { "epoch": 3.610207401382676, "grad_norm": 0.2562831938266754, "learning_rate": 2.1745676572147455e-06, "loss": 0.2997, "step": 35510 }, { "epoch": 3.610309068727125, "grad_norm": 0.32369545102119446, "learning_rate": 2.1742748721530758e-06, "loss": 0.329, "step": 35511 }, { "epoch": 3.610410736071574, "grad_norm": 0.2567090094089508, "learning_rate": 2.1739821013267383e-06, "loss": 0.2864, "step": 35512 }, { "epoch": 3.6105124034160228, "grad_norm": 0.27733999490737915, "learning_rate": 2.1736893447372066e-06, "loss": 0.2828, "step": 35513 }, { "epoch": 3.6106140707604717, "grad_norm": 0.2894275486469269, "learning_rate": 2.1733966023859548e-06, "loss": 0.312, "step": 35514 }, { "epoch": 3.6107157381049206, "grad_norm": 0.2869970202445984, "learning_rate": 2.1731038742744614e-06, "loss": 0.288, "step": 35515 }, { "epoch": 3.6108174054493696, "grad_norm": 0.2657563090324402, "learning_rate": 2.1728111604041986e-06, "loss": 0.3132, "step": 35516 }, { "epoch": 3.6109190727938185, "grad_norm": 0.24830806255340576, "learning_rate": 2.17251846077664e-06, "loss": 0.2896, "step": 35517 }, { "epoch": 3.6110207401382675, "grad_norm": 0.25764450430870056, "learning_rate": 2.1722257753932623e-06, "loss": 0.3161, "step": 35518 }, { "epoch": 3.6111224074827164, "grad_norm": 0.2553790807723999, "learning_rate": 2.1719331042555402e-06, "loss": 0.29, "step": 35519 }, { "epoch": 3.611224074827166, "grad_norm": 0.29309263825416565, "learning_rate": 2.171640447364945e-06, "loss": 0.3531, "step": 35520 }, { "epoch": 3.6113257421716147, "grad_norm": 0.2948245108127594, "learning_rate": 2.1713478047229545e-06, "loss": 0.3156, "step": 35521 }, { "epoch": 3.6114274095160637, "grad_norm": 0.26490285992622375, "learning_rate": 2.1710551763310415e-06, "loss": 0.3089, "step": 35522 }, { "epoch": 3.6115290768605126, "grad_norm": 0.28206926584243774, "learning_rate": 2.1707625621906806e-06, "loss": 0.2966, "step": 35523 }, { "epoch": 3.6116307442049616, "grad_norm": 0.26949477195739746, "learning_rate": 2.1704699623033436e-06, "loss": 0.3088, "step": 35524 }, { "epoch": 3.6117324115494105, "grad_norm": 0.27686774730682373, "learning_rate": 2.170177376670509e-06, "loss": 0.3068, "step": 35525 }, { "epoch": 3.6118340788938594, "grad_norm": 0.2636173665523529, "learning_rate": 2.1698848052936477e-06, "loss": 0.3171, "step": 35526 }, { "epoch": 3.6119357462383084, "grad_norm": 0.27891048789024353, "learning_rate": 2.1695922481742328e-06, "loss": 0.2857, "step": 35527 }, { "epoch": 3.6120374135827573, "grad_norm": 0.228233203291893, "learning_rate": 2.1692997053137404e-06, "loss": 0.3301, "step": 35528 }, { "epoch": 3.6121390809272063, "grad_norm": 0.2736183702945709, "learning_rate": 2.1690071767136444e-06, "loss": 0.3048, "step": 35529 }, { "epoch": 3.612240748271655, "grad_norm": 0.27080461382865906, "learning_rate": 2.1687146623754153e-06, "loss": 0.3074, "step": 35530 }, { "epoch": 3.612342415616104, "grad_norm": 0.2823924422264099, "learning_rate": 2.1684221623005307e-06, "loss": 0.2755, "step": 35531 }, { "epoch": 3.612444082960553, "grad_norm": 0.2581368684768677, "learning_rate": 2.1681296764904623e-06, "loss": 0.3403, "step": 35532 }, { "epoch": 3.612545750305002, "grad_norm": 0.2678057551383972, "learning_rate": 2.167837204946684e-06, "loss": 0.2995, "step": 35533 }, { "epoch": 3.612647417649451, "grad_norm": 0.26790910959243774, "learning_rate": 2.167544747670667e-06, "loss": 0.3309, "step": 35534 }, { "epoch": 3.6127490849939, "grad_norm": 0.2582615315914154, "learning_rate": 2.1672523046638877e-06, "loss": 0.3222, "step": 35535 }, { "epoch": 3.612850752338349, "grad_norm": 0.28045064210891724, "learning_rate": 2.1669598759278183e-06, "loss": 0.2779, "step": 35536 }, { "epoch": 3.612952419682798, "grad_norm": 0.28301993012428284, "learning_rate": 2.1666674614639305e-06, "loss": 0.3239, "step": 35537 }, { "epoch": 3.6130540870272467, "grad_norm": 0.291561096906662, "learning_rate": 2.1663750612737015e-06, "loss": 0.314, "step": 35538 }, { "epoch": 3.6131557543716957, "grad_norm": 0.2671922743320465, "learning_rate": 2.166082675358599e-06, "loss": 0.2951, "step": 35539 }, { "epoch": 3.6132574217161446, "grad_norm": 0.2644280791282654, "learning_rate": 2.1657903037200976e-06, "loss": 0.3054, "step": 35540 }, { "epoch": 3.6133590890605936, "grad_norm": 0.26903748512268066, "learning_rate": 2.1654979463596737e-06, "loss": 0.2772, "step": 35541 }, { "epoch": 3.6134607564050425, "grad_norm": 0.2598446309566498, "learning_rate": 2.1652056032787972e-06, "loss": 0.3277, "step": 35542 }, { "epoch": 3.6135624237494914, "grad_norm": 0.27310678362846375, "learning_rate": 2.1649132744789415e-06, "loss": 0.3295, "step": 35543 }, { "epoch": 3.6136640910939404, "grad_norm": 0.27691924571990967, "learning_rate": 2.1646209599615763e-06, "loss": 0.3112, "step": 35544 }, { "epoch": 3.6137657584383893, "grad_norm": 0.26871979236602783, "learning_rate": 2.16432865972818e-06, "loss": 0.3058, "step": 35545 }, { "epoch": 3.6138674257828387, "grad_norm": 0.30314865708351135, "learning_rate": 2.164036373780221e-06, "loss": 0.3589, "step": 35546 }, { "epoch": 3.6139690931272876, "grad_norm": 0.2779570519924164, "learning_rate": 2.1637441021191714e-06, "loss": 0.3638, "step": 35547 }, { "epoch": 3.6140707604717366, "grad_norm": 0.27121055126190186, "learning_rate": 2.1634518447465087e-06, "loss": 0.3021, "step": 35548 }, { "epoch": 3.6141724278161855, "grad_norm": 0.2884047329425812, "learning_rate": 2.163159601663698e-06, "loss": 0.3517, "step": 35549 }, { "epoch": 3.6142740951606345, "grad_norm": 0.29856735467910767, "learning_rate": 2.162867372872215e-06, "loss": 0.3024, "step": 35550 }, { "epoch": 3.6143757625050834, "grad_norm": 0.27350783348083496, "learning_rate": 2.1625751583735353e-06, "loss": 0.3676, "step": 35551 }, { "epoch": 3.6144774298495324, "grad_norm": 0.2822556793689728, "learning_rate": 2.1622829581691248e-06, "loss": 0.3242, "step": 35552 }, { "epoch": 3.6145790971939813, "grad_norm": 0.2699891924858093, "learning_rate": 2.1619907722604594e-06, "loss": 0.3218, "step": 35553 }, { "epoch": 3.6146807645384302, "grad_norm": 0.3037971258163452, "learning_rate": 2.161698600649009e-06, "loss": 0.29, "step": 35554 }, { "epoch": 3.614782431882879, "grad_norm": 0.2552793323993683, "learning_rate": 2.1614064433362475e-06, "loss": 0.3109, "step": 35555 }, { "epoch": 3.614884099227328, "grad_norm": 0.28654950857162476, "learning_rate": 2.1611143003236464e-06, "loss": 0.2831, "step": 35556 }, { "epoch": 3.614985766571777, "grad_norm": 0.2827749252319336, "learning_rate": 2.160822171612675e-06, "loss": 0.3356, "step": 35557 }, { "epoch": 3.615087433916226, "grad_norm": 0.27197888493537903, "learning_rate": 2.1605300572048104e-06, "loss": 0.3194, "step": 35558 }, { "epoch": 3.615189101260675, "grad_norm": 0.27550098299980164, "learning_rate": 2.160237957101517e-06, "loss": 0.266, "step": 35559 }, { "epoch": 3.615290768605124, "grad_norm": 0.2882360816001892, "learning_rate": 2.15994587130427e-06, "loss": 0.294, "step": 35560 }, { "epoch": 3.6153924359495733, "grad_norm": 0.26103293895721436, "learning_rate": 2.159653799814544e-06, "loss": 0.3248, "step": 35561 }, { "epoch": 3.615494103294022, "grad_norm": 0.2605048418045044, "learning_rate": 2.1593617426338038e-06, "loss": 0.3352, "step": 35562 }, { "epoch": 3.615595770638471, "grad_norm": 0.2962391674518585, "learning_rate": 2.1590696997635258e-06, "loss": 0.2769, "step": 35563 }, { "epoch": 3.61569743798292, "grad_norm": 0.27154409885406494, "learning_rate": 2.1587776712051776e-06, "loss": 0.3054, "step": 35564 }, { "epoch": 3.615799105327369, "grad_norm": 0.30624881386756897, "learning_rate": 2.158485656960235e-06, "loss": 0.3188, "step": 35565 }, { "epoch": 3.615900772671818, "grad_norm": 0.2851235866546631, "learning_rate": 2.1581936570301655e-06, "loss": 0.3077, "step": 35566 }, { "epoch": 3.616002440016267, "grad_norm": 0.27530330419540405, "learning_rate": 2.15790167141644e-06, "loss": 0.3378, "step": 35567 }, { "epoch": 3.616104107360716, "grad_norm": 0.25294190645217896, "learning_rate": 2.157609700120533e-06, "loss": 0.2734, "step": 35568 }, { "epoch": 3.616205774705165, "grad_norm": 0.2774762511253357, "learning_rate": 2.1573177431439102e-06, "loss": 0.3265, "step": 35569 }, { "epoch": 3.6163074420496137, "grad_norm": 0.26363152265548706, "learning_rate": 2.1570258004880446e-06, "loss": 0.3177, "step": 35570 }, { "epoch": 3.6164091093940627, "grad_norm": 0.2587534487247467, "learning_rate": 2.1567338721544112e-06, "loss": 0.2827, "step": 35571 }, { "epoch": 3.6165107767385116, "grad_norm": 0.26910650730133057, "learning_rate": 2.1564419581444735e-06, "loss": 0.2825, "step": 35572 }, { "epoch": 3.6166124440829606, "grad_norm": 0.26530903577804565, "learning_rate": 2.1561500584597073e-06, "loss": 0.2682, "step": 35573 }, { "epoch": 3.6167141114274095, "grad_norm": 0.26062965393066406, "learning_rate": 2.155858173101581e-06, "loss": 0.2861, "step": 35574 }, { "epoch": 3.6168157787718584, "grad_norm": 0.28440335392951965, "learning_rate": 2.1555663020715634e-06, "loss": 0.2838, "step": 35575 }, { "epoch": 3.6169174461163074, "grad_norm": 0.27899834513664246, "learning_rate": 2.155274445371129e-06, "loss": 0.2942, "step": 35576 }, { "epoch": 3.6170191134607563, "grad_norm": 0.2889142632484436, "learning_rate": 2.154982603001744e-06, "loss": 0.3318, "step": 35577 }, { "epoch": 3.6171207808052053, "grad_norm": 0.27032986283302307, "learning_rate": 2.154690774964884e-06, "loss": 0.3323, "step": 35578 }, { "epoch": 3.617222448149654, "grad_norm": 0.28229251503944397, "learning_rate": 2.1543989612620127e-06, "loss": 0.2789, "step": 35579 }, { "epoch": 3.617324115494103, "grad_norm": 0.2856041193008423, "learning_rate": 2.1541071618946026e-06, "loss": 0.313, "step": 35580 }, { "epoch": 3.617425782838552, "grad_norm": 0.2867896556854248, "learning_rate": 2.1538153768641285e-06, "loss": 0.3079, "step": 35581 }, { "epoch": 3.617527450183001, "grad_norm": 0.28029078245162964, "learning_rate": 2.153523606172052e-06, "loss": 0.317, "step": 35582 }, { "epoch": 3.61762911752745, "grad_norm": 0.26674509048461914, "learning_rate": 2.153231849819849e-06, "loss": 0.341, "step": 35583 }, { "epoch": 3.617730784871899, "grad_norm": 0.2558390498161316, "learning_rate": 2.1529401078089877e-06, "loss": 0.2948, "step": 35584 }, { "epoch": 3.617832452216348, "grad_norm": 0.2593258321285248, "learning_rate": 2.1526483801409375e-06, "loss": 0.3069, "step": 35585 }, { "epoch": 3.617934119560797, "grad_norm": 0.2514316141605377, "learning_rate": 2.152356666817166e-06, "loss": 0.3326, "step": 35586 }, { "epoch": 3.618035786905246, "grad_norm": 0.2840437591075897, "learning_rate": 2.152064967839145e-06, "loss": 0.303, "step": 35587 }, { "epoch": 3.618137454249695, "grad_norm": 0.2629640996456146, "learning_rate": 2.151773283208348e-06, "loss": 0.3086, "step": 35588 }, { "epoch": 3.618239121594144, "grad_norm": 0.28306692838668823, "learning_rate": 2.1514816129262367e-06, "loss": 0.3444, "step": 35589 }, { "epoch": 3.618340788938593, "grad_norm": 0.2697904706001282, "learning_rate": 2.1511899569942856e-06, "loss": 0.3062, "step": 35590 }, { "epoch": 3.618442456283042, "grad_norm": 0.27574121952056885, "learning_rate": 2.1508983154139627e-06, "loss": 0.2826, "step": 35591 }, { "epoch": 3.618544123627491, "grad_norm": 0.27558714151382446, "learning_rate": 2.1506066881867355e-06, "loss": 0.3289, "step": 35592 }, { "epoch": 3.61864579097194, "grad_norm": 0.27244776487350464, "learning_rate": 2.150315075314076e-06, "loss": 0.317, "step": 35593 }, { "epoch": 3.6187474583163888, "grad_norm": 0.25230395793914795, "learning_rate": 2.150023476797452e-06, "loss": 0.2623, "step": 35594 }, { "epoch": 3.6188491256608377, "grad_norm": 0.26775166392326355, "learning_rate": 2.1497318926383325e-06, "loss": 0.32, "step": 35595 }, { "epoch": 3.6189507930052867, "grad_norm": 0.28482693433761597, "learning_rate": 2.149440322838185e-06, "loss": 0.3004, "step": 35596 }, { "epoch": 3.6190524603497356, "grad_norm": 0.26801764965057373, "learning_rate": 2.149148767398481e-06, "loss": 0.3005, "step": 35597 }, { "epoch": 3.6191541276941845, "grad_norm": 0.271851122379303, "learning_rate": 2.1488572263206887e-06, "loss": 0.3135, "step": 35598 }, { "epoch": 3.6192557950386335, "grad_norm": 0.26470494270324707, "learning_rate": 2.148565699606274e-06, "loss": 0.3148, "step": 35599 }, { "epoch": 3.6193574623830824, "grad_norm": 0.26663848757743835, "learning_rate": 2.1482741872567093e-06, "loss": 0.3024, "step": 35600 }, { "epoch": 3.6194591297275314, "grad_norm": 0.28365838527679443, "learning_rate": 2.1479826892734614e-06, "loss": 0.2888, "step": 35601 }, { "epoch": 3.6195607970719808, "grad_norm": 0.2668170928955078, "learning_rate": 2.147691205657997e-06, "loss": 0.3313, "step": 35602 }, { "epoch": 3.6196624644164297, "grad_norm": 0.27785617113113403, "learning_rate": 2.147399736411789e-06, "loss": 0.2954, "step": 35603 }, { "epoch": 3.6197641317608786, "grad_norm": 0.25988444685935974, "learning_rate": 2.147108281536303e-06, "loss": 0.3037, "step": 35604 }, { "epoch": 3.6198657991053276, "grad_norm": 0.286204069852829, "learning_rate": 2.1468168410330075e-06, "loss": 0.3119, "step": 35605 }, { "epoch": 3.6199674664497765, "grad_norm": 0.25615355372428894, "learning_rate": 2.1465254149033693e-06, "loss": 0.3003, "step": 35606 }, { "epoch": 3.6200691337942255, "grad_norm": 0.265708863735199, "learning_rate": 2.1462340031488594e-06, "loss": 0.3167, "step": 35607 }, { "epoch": 3.6201708011386744, "grad_norm": 0.3010318875312805, "learning_rate": 2.1459426057709443e-06, "loss": 0.2998, "step": 35608 }, { "epoch": 3.6202724684831233, "grad_norm": 0.2841794192790985, "learning_rate": 2.145651222771091e-06, "loss": 0.2901, "step": 35609 }, { "epoch": 3.6203741358275723, "grad_norm": 0.29096654057502747, "learning_rate": 2.14535985415077e-06, "loss": 0.3086, "step": 35610 }, { "epoch": 3.6204758031720212, "grad_norm": 0.29298192262649536, "learning_rate": 2.1450684999114475e-06, "loss": 0.3182, "step": 35611 }, { "epoch": 3.62057747051647, "grad_norm": 0.2569989562034607, "learning_rate": 2.144777160054589e-06, "loss": 0.2813, "step": 35612 }, { "epoch": 3.620679137860919, "grad_norm": 0.28128859400749207, "learning_rate": 2.1444858345816677e-06, "loss": 0.3123, "step": 35613 }, { "epoch": 3.620780805205368, "grad_norm": 0.28014716506004333, "learning_rate": 2.1441945234941475e-06, "loss": 0.3294, "step": 35614 }, { "epoch": 3.620882472549817, "grad_norm": 0.2599203884601593, "learning_rate": 2.1439032267934966e-06, "loss": 0.3346, "step": 35615 }, { "epoch": 3.620984139894266, "grad_norm": 0.2819679081439972, "learning_rate": 2.143611944481181e-06, "loss": 0.2933, "step": 35616 }, { "epoch": 3.621085807238715, "grad_norm": 0.2679259181022644, "learning_rate": 2.143320676558671e-06, "loss": 0.2877, "step": 35617 }, { "epoch": 3.621187474583164, "grad_norm": 0.28551486134529114, "learning_rate": 2.1430294230274335e-06, "loss": 0.3064, "step": 35618 }, { "epoch": 3.6212891419276128, "grad_norm": 0.26453495025634766, "learning_rate": 2.1427381838889322e-06, "loss": 0.3354, "step": 35619 }, { "epoch": 3.6213908092720617, "grad_norm": 0.26533767580986023, "learning_rate": 2.142446959144639e-06, "loss": 0.2767, "step": 35620 }, { "epoch": 3.6214924766165106, "grad_norm": 0.2656640112400055, "learning_rate": 2.142155748796019e-06, "loss": 0.3206, "step": 35621 }, { "epoch": 3.6215941439609596, "grad_norm": 0.2780703902244568, "learning_rate": 2.1418645528445375e-06, "loss": 0.3078, "step": 35622 }, { "epoch": 3.6216958113054085, "grad_norm": 0.300506055355072, "learning_rate": 2.141573371291665e-06, "loss": 0.2786, "step": 35623 }, { "epoch": 3.6217974786498575, "grad_norm": 0.299879252910614, "learning_rate": 2.1412822041388664e-06, "loss": 0.3109, "step": 35624 }, { "epoch": 3.6218991459943064, "grad_norm": 0.28046172857284546, "learning_rate": 2.140991051387609e-06, "loss": 0.3221, "step": 35625 }, { "epoch": 3.6220008133387553, "grad_norm": 0.27406129240989685, "learning_rate": 2.140699913039357e-06, "loss": 0.3237, "step": 35626 }, { "epoch": 3.6221024806832043, "grad_norm": 0.2625342905521393, "learning_rate": 2.1404087890955816e-06, "loss": 0.3004, "step": 35627 }, { "epoch": 3.6222041480276537, "grad_norm": 0.28450271487236023, "learning_rate": 2.140117679557747e-06, "loss": 0.3264, "step": 35628 }, { "epoch": 3.6223058153721026, "grad_norm": 0.29021087288856506, "learning_rate": 2.139826584427318e-06, "loss": 0.2852, "step": 35629 }, { "epoch": 3.6224074827165516, "grad_norm": 0.26718446612358093, "learning_rate": 2.1395355037057654e-06, "loss": 0.2831, "step": 35630 }, { "epoch": 3.6225091500610005, "grad_norm": 0.2707647383213043, "learning_rate": 2.1392444373945525e-06, "loss": 0.3293, "step": 35631 }, { "epoch": 3.6226108174054494, "grad_norm": 0.2611662447452545, "learning_rate": 2.1389533854951445e-06, "loss": 0.3416, "step": 35632 }, { "epoch": 3.6227124847498984, "grad_norm": 0.29488274455070496, "learning_rate": 2.1386623480090113e-06, "loss": 0.3365, "step": 35633 }, { "epoch": 3.6228141520943473, "grad_norm": 0.2656278610229492, "learning_rate": 2.1383713249376175e-06, "loss": 0.294, "step": 35634 }, { "epoch": 3.6229158194387963, "grad_norm": 0.31677892804145813, "learning_rate": 2.138080316282429e-06, "loss": 0.3107, "step": 35635 }, { "epoch": 3.623017486783245, "grad_norm": 0.2911395728588104, "learning_rate": 2.13778932204491e-06, "loss": 0.3112, "step": 35636 }, { "epoch": 3.623119154127694, "grad_norm": 0.2596358358860016, "learning_rate": 2.137498342226529e-06, "loss": 0.3129, "step": 35637 }, { "epoch": 3.623220821472143, "grad_norm": 0.2654002606868744, "learning_rate": 2.137207376828753e-06, "loss": 0.3006, "step": 35638 }, { "epoch": 3.623322488816592, "grad_norm": 0.29348835349082947, "learning_rate": 2.1369164258530426e-06, "loss": 0.3104, "step": 35639 }, { "epoch": 3.623424156161041, "grad_norm": 0.2751261591911316, "learning_rate": 2.136625489300869e-06, "loss": 0.3241, "step": 35640 }, { "epoch": 3.62352582350549, "grad_norm": 0.2646092176437378, "learning_rate": 2.1363345671736963e-06, "loss": 0.2987, "step": 35641 }, { "epoch": 3.623627490849939, "grad_norm": 0.2745071053504944, "learning_rate": 2.136043659472987e-06, "loss": 0.3491, "step": 35642 }, { "epoch": 3.6237291581943882, "grad_norm": 0.2631170153617859, "learning_rate": 2.1357527662002114e-06, "loss": 0.3264, "step": 35643 }, { "epoch": 3.623830825538837, "grad_norm": 0.27191439270973206, "learning_rate": 2.1354618873568326e-06, "loss": 0.3058, "step": 35644 }, { "epoch": 3.623932492883286, "grad_norm": 0.2813051640987396, "learning_rate": 2.1351710229443164e-06, "loss": 0.2993, "step": 35645 }, { "epoch": 3.624034160227735, "grad_norm": 0.28518712520599365, "learning_rate": 2.1348801729641257e-06, "loss": 0.3099, "step": 35646 }, { "epoch": 3.624135827572184, "grad_norm": 0.2777999937534332, "learning_rate": 2.1345893374177296e-06, "loss": 0.2974, "step": 35647 }, { "epoch": 3.624237494916633, "grad_norm": 0.2796717882156372, "learning_rate": 2.1342985163065914e-06, "loss": 0.3084, "step": 35648 }, { "epoch": 3.624339162261082, "grad_norm": 0.2772797644138336, "learning_rate": 2.1340077096321743e-06, "loss": 0.292, "step": 35649 }, { "epoch": 3.624440829605531, "grad_norm": 0.29175183176994324, "learning_rate": 2.1337169173959475e-06, "loss": 0.3087, "step": 35650 }, { "epoch": 3.6245424969499798, "grad_norm": 0.2960391342639923, "learning_rate": 2.133426139599374e-06, "loss": 0.321, "step": 35651 }, { "epoch": 3.6246441642944287, "grad_norm": 0.2885330617427826, "learning_rate": 2.1331353762439162e-06, "loss": 0.3056, "step": 35652 }, { "epoch": 3.6247458316388776, "grad_norm": 0.26922160387039185, "learning_rate": 2.132844627331043e-06, "loss": 0.2975, "step": 35653 }, { "epoch": 3.6248474989833266, "grad_norm": 0.2810862064361572, "learning_rate": 2.132553892862217e-06, "loss": 0.2979, "step": 35654 }, { "epoch": 3.6249491663277755, "grad_norm": 0.2774200439453125, "learning_rate": 2.1322631728389033e-06, "loss": 0.3378, "step": 35655 }, { "epoch": 3.6250508336722245, "grad_norm": 0.2754436135292053, "learning_rate": 2.131972467262564e-06, "loss": 0.3444, "step": 35656 }, { "epoch": 3.6251525010166734, "grad_norm": 0.27429696917533875, "learning_rate": 2.1316817761346687e-06, "loss": 0.3156, "step": 35657 }, { "epoch": 3.6252541683611224, "grad_norm": 0.2874314486980438, "learning_rate": 2.1313910994566783e-06, "loss": 0.2871, "step": 35658 }, { "epoch": 3.6253558357055713, "grad_norm": 0.2704067528247833, "learning_rate": 2.131100437230056e-06, "loss": 0.2961, "step": 35659 }, { "epoch": 3.6254575030500202, "grad_norm": 0.2631281614303589, "learning_rate": 2.13080978945627e-06, "loss": 0.2922, "step": 35660 }, { "epoch": 3.625559170394469, "grad_norm": 0.29350367188453674, "learning_rate": 2.130519156136782e-06, "loss": 0.3035, "step": 35661 }, { "epoch": 3.625660837738918, "grad_norm": 0.24098503589630127, "learning_rate": 2.130228537273055e-06, "loss": 0.2802, "step": 35662 }, { "epoch": 3.625762505083367, "grad_norm": 0.27028611302375793, "learning_rate": 2.1299379328665563e-06, "loss": 0.3134, "step": 35663 }, { "epoch": 3.625864172427816, "grad_norm": 0.2758052945137024, "learning_rate": 2.129647342918749e-06, "loss": 0.2861, "step": 35664 }, { "epoch": 3.625965839772265, "grad_norm": 0.28576910495758057, "learning_rate": 2.129356767431096e-06, "loss": 0.2952, "step": 35665 }, { "epoch": 3.626067507116714, "grad_norm": 0.27546021342277527, "learning_rate": 2.1290662064050593e-06, "loss": 0.317, "step": 35666 }, { "epoch": 3.626169174461163, "grad_norm": 0.26049014925956726, "learning_rate": 2.1287756598421068e-06, "loss": 0.3123, "step": 35667 }, { "epoch": 3.6262708418056118, "grad_norm": 0.292018324136734, "learning_rate": 2.1284851277437004e-06, "loss": 0.3346, "step": 35668 }, { "epoch": 3.626372509150061, "grad_norm": 0.2853347659111023, "learning_rate": 2.1281946101113018e-06, "loss": 0.3525, "step": 35669 }, { "epoch": 3.62647417649451, "grad_norm": 0.28933194279670715, "learning_rate": 2.1279041069463775e-06, "loss": 0.3078, "step": 35670 }, { "epoch": 3.626575843838959, "grad_norm": 0.2879911959171295, "learning_rate": 2.12761361825039e-06, "loss": 0.3224, "step": 35671 }, { "epoch": 3.626677511183408, "grad_norm": 0.27626240253448486, "learning_rate": 2.127323144024801e-06, "loss": 0.3025, "step": 35672 }, { "epoch": 3.626779178527857, "grad_norm": 0.2887488901615143, "learning_rate": 2.127032684271077e-06, "loss": 0.3224, "step": 35673 }, { "epoch": 3.626880845872306, "grad_norm": 0.27713996171951294, "learning_rate": 2.1267422389906796e-06, "loss": 0.3026, "step": 35674 }, { "epoch": 3.626982513216755, "grad_norm": 0.2652059495449066, "learning_rate": 2.126451808185072e-06, "loss": 0.2849, "step": 35675 }, { "epoch": 3.6270841805612037, "grad_norm": 0.3026026487350464, "learning_rate": 2.1261613918557158e-06, "loss": 0.3592, "step": 35676 }, { "epoch": 3.6271858479056527, "grad_norm": 0.2822388708591461, "learning_rate": 2.125870990004077e-06, "loss": 0.3386, "step": 35677 }, { "epoch": 3.6272875152501016, "grad_norm": 0.255794882774353, "learning_rate": 2.1255806026316174e-06, "loss": 0.3214, "step": 35678 }, { "epoch": 3.6273891825945506, "grad_norm": 0.28440216183662415, "learning_rate": 2.1252902297397975e-06, "loss": 0.305, "step": 35679 }, { "epoch": 3.6274908499389995, "grad_norm": 0.2778569757938385, "learning_rate": 2.1249998713300845e-06, "loss": 0.3307, "step": 35680 }, { "epoch": 3.6275925172834484, "grad_norm": 0.2979196012020111, "learning_rate": 2.1247095274039387e-06, "loss": 0.3335, "step": 35681 }, { "epoch": 3.6276941846278974, "grad_norm": 0.271494060754776, "learning_rate": 2.1244191979628215e-06, "loss": 0.3322, "step": 35682 }, { "epoch": 3.6277958519723463, "grad_norm": 0.2548814117908478, "learning_rate": 2.1241288830081984e-06, "loss": 0.2919, "step": 35683 }, { "epoch": 3.6278975193167957, "grad_norm": 0.27529096603393555, "learning_rate": 2.123838582541531e-06, "loss": 0.3249, "step": 35684 }, { "epoch": 3.6279991866612447, "grad_norm": 0.2821386456489563, "learning_rate": 2.123548296564281e-06, "loss": 0.3092, "step": 35685 }, { "epoch": 3.6281008540056936, "grad_norm": 0.28235241770744324, "learning_rate": 2.1232580250779094e-06, "loss": 0.3066, "step": 35686 }, { "epoch": 3.6282025213501425, "grad_norm": 0.2699272036552429, "learning_rate": 2.1229677680838823e-06, "loss": 0.3201, "step": 35687 }, { "epoch": 3.6283041886945915, "grad_norm": 0.25796833634376526, "learning_rate": 2.1226775255836596e-06, "loss": 0.3205, "step": 35688 }, { "epoch": 3.6284058560390404, "grad_norm": 0.3057365119457245, "learning_rate": 2.1223872975787024e-06, "loss": 0.3327, "step": 35689 }, { "epoch": 3.6285075233834894, "grad_norm": 0.2867901623249054, "learning_rate": 2.1220970840704774e-06, "loss": 0.3327, "step": 35690 }, { "epoch": 3.6286091907279383, "grad_norm": 0.27105268836021423, "learning_rate": 2.1218068850604397e-06, "loss": 0.2935, "step": 35691 }, { "epoch": 3.6287108580723872, "grad_norm": 0.2906953692436218, "learning_rate": 2.1215167005500555e-06, "loss": 0.2926, "step": 35692 }, { "epoch": 3.628812525416836, "grad_norm": 0.2819988429546356, "learning_rate": 2.121226530540787e-06, "loss": 0.2981, "step": 35693 }, { "epoch": 3.628914192761285, "grad_norm": 0.27215561270713806, "learning_rate": 2.1209363750340963e-06, "loss": 0.2902, "step": 35694 }, { "epoch": 3.629015860105734, "grad_norm": 0.27056005597114563, "learning_rate": 2.120646234031443e-06, "loss": 0.3114, "step": 35695 }, { "epoch": 3.629117527450183, "grad_norm": 0.2962355613708496, "learning_rate": 2.1203561075342882e-06, "loss": 0.3069, "step": 35696 }, { "epoch": 3.629219194794632, "grad_norm": 0.2746340036392212, "learning_rate": 2.1200659955440974e-06, "loss": 0.2859, "step": 35697 }, { "epoch": 3.629320862139081, "grad_norm": 0.29071030020713806, "learning_rate": 2.119775898062329e-06, "loss": 0.2995, "step": 35698 }, { "epoch": 3.62942252948353, "grad_norm": 0.28506338596343994, "learning_rate": 2.1194858150904433e-06, "loss": 0.2766, "step": 35699 }, { "epoch": 3.6295241968279788, "grad_norm": 0.2596369981765747, "learning_rate": 2.119195746629908e-06, "loss": 0.2921, "step": 35700 }, { "epoch": 3.6296258641724277, "grad_norm": 0.29265767335891724, "learning_rate": 2.1189056926821754e-06, "loss": 0.3358, "step": 35701 }, { "epoch": 3.6297275315168767, "grad_norm": 0.26686540246009827, "learning_rate": 2.1186156532487116e-06, "loss": 0.3226, "step": 35702 }, { "epoch": 3.6298291988613256, "grad_norm": 0.2824975848197937, "learning_rate": 2.118325628330981e-06, "loss": 0.3178, "step": 35703 }, { "epoch": 3.6299308662057745, "grad_norm": 0.29346123337745667, "learning_rate": 2.118035617930438e-06, "loss": 0.296, "step": 35704 }, { "epoch": 3.6300325335502235, "grad_norm": 0.25558245182037354, "learning_rate": 2.1177456220485483e-06, "loss": 0.285, "step": 35705 }, { "epoch": 3.6301342008946724, "grad_norm": 0.2582777142524719, "learning_rate": 2.1174556406867693e-06, "loss": 0.3218, "step": 35706 }, { "epoch": 3.6302358682391214, "grad_norm": 0.27379533648490906, "learning_rate": 2.117165673846565e-06, "loss": 0.3063, "step": 35707 }, { "epoch": 3.6303375355835703, "grad_norm": 0.2776729166507721, "learning_rate": 2.1168757215293955e-06, "loss": 0.3237, "step": 35708 }, { "epoch": 3.6304392029280192, "grad_norm": 0.26805737614631653, "learning_rate": 2.116585783736719e-06, "loss": 0.3103, "step": 35709 }, { "epoch": 3.6305408702724686, "grad_norm": 0.28137287497520447, "learning_rate": 2.1162958604700023e-06, "loss": 0.2913, "step": 35710 }, { "epoch": 3.6306425376169176, "grad_norm": 0.2630031704902649, "learning_rate": 2.1160059517306975e-06, "loss": 0.2758, "step": 35711 }, { "epoch": 3.6307442049613665, "grad_norm": 0.26737797260284424, "learning_rate": 2.1157160575202697e-06, "loss": 0.3061, "step": 35712 }, { "epoch": 3.6308458723058155, "grad_norm": 0.2718709409236908, "learning_rate": 2.1154261778401822e-06, "loss": 0.3314, "step": 35713 }, { "epoch": 3.6309475396502644, "grad_norm": 0.28033125400543213, "learning_rate": 2.1151363126918894e-06, "loss": 0.3141, "step": 35714 }, { "epoch": 3.6310492069947133, "grad_norm": 0.29164406657218933, "learning_rate": 2.1148464620768554e-06, "loss": 0.3337, "step": 35715 }, { "epoch": 3.6311508743391623, "grad_norm": 0.28629812598228455, "learning_rate": 2.114556625996538e-06, "loss": 0.3257, "step": 35716 }, { "epoch": 3.6312525416836112, "grad_norm": 0.29099640250205994, "learning_rate": 2.1142668044524e-06, "loss": 0.3627, "step": 35717 }, { "epoch": 3.63135420902806, "grad_norm": 0.270148366689682, "learning_rate": 2.1139769974459e-06, "loss": 0.3071, "step": 35718 }, { "epoch": 3.631455876372509, "grad_norm": 0.2773444354534149, "learning_rate": 2.113687204978496e-06, "loss": 0.2915, "step": 35719 }, { "epoch": 3.631557543716958, "grad_norm": 0.2687775790691376, "learning_rate": 2.1133974270516542e-06, "loss": 0.2958, "step": 35720 }, { "epoch": 3.631659211061407, "grad_norm": 0.297226220369339, "learning_rate": 2.1131076636668268e-06, "loss": 0.3304, "step": 35721 }, { "epoch": 3.631760878405856, "grad_norm": 0.2626543343067169, "learning_rate": 2.1128179148254763e-06, "loss": 0.3345, "step": 35722 }, { "epoch": 3.631862545750305, "grad_norm": 0.2904265522956848, "learning_rate": 2.112528180529067e-06, "loss": 0.3182, "step": 35723 }, { "epoch": 3.631964213094754, "grad_norm": 0.26342976093292236, "learning_rate": 2.112238460779051e-06, "loss": 0.3128, "step": 35724 }, { "epoch": 3.632065880439203, "grad_norm": 0.2839968502521515, "learning_rate": 2.111948755576893e-06, "loss": 0.299, "step": 35725 }, { "epoch": 3.632167547783652, "grad_norm": 0.2554342448711395, "learning_rate": 2.111659064924051e-06, "loss": 0.331, "step": 35726 }, { "epoch": 3.632269215128101, "grad_norm": 0.2679286599159241, "learning_rate": 2.111369388821982e-06, "loss": 0.3152, "step": 35727 }, { "epoch": 3.63237088247255, "grad_norm": 0.28289860486984253, "learning_rate": 2.1110797272721502e-06, "loss": 0.2945, "step": 35728 }, { "epoch": 3.632472549816999, "grad_norm": 0.280868262052536, "learning_rate": 2.11079008027601e-06, "loss": 0.299, "step": 35729 }, { "epoch": 3.632574217161448, "grad_norm": 0.26571542024612427, "learning_rate": 2.1105004478350267e-06, "loss": 0.3227, "step": 35730 }, { "epoch": 3.632675884505897, "grad_norm": 0.26366743445396423, "learning_rate": 2.1102108299506513e-06, "loss": 0.3, "step": 35731 }, { "epoch": 3.632777551850346, "grad_norm": 0.2865017354488373, "learning_rate": 2.1099212266243476e-06, "loss": 0.3463, "step": 35732 }, { "epoch": 3.6328792191947947, "grad_norm": 0.2713559567928314, "learning_rate": 2.1096316378575772e-06, "loss": 0.3172, "step": 35733 }, { "epoch": 3.6329808865392437, "grad_norm": 0.273721843957901, "learning_rate": 2.109342063651792e-06, "loss": 0.323, "step": 35734 }, { "epoch": 3.6330825538836926, "grad_norm": 0.2888433039188385, "learning_rate": 2.1090525040084563e-06, "loss": 0.3309, "step": 35735 }, { "epoch": 3.6331842212281416, "grad_norm": 0.28907710313796997, "learning_rate": 2.1087629589290276e-06, "loss": 0.3063, "step": 35736 }, { "epoch": 3.6332858885725905, "grad_norm": 0.26504844427108765, "learning_rate": 2.1084734284149623e-06, "loss": 0.2948, "step": 35737 }, { "epoch": 3.6333875559170394, "grad_norm": 0.2669893205165863, "learning_rate": 2.1081839124677224e-06, "loss": 0.3143, "step": 35738 }, { "epoch": 3.6334892232614884, "grad_norm": 0.270325243473053, "learning_rate": 2.107894411088762e-06, "loss": 0.2841, "step": 35739 }, { "epoch": 3.6335908906059373, "grad_norm": 0.287935346364975, "learning_rate": 2.107604924279547e-06, "loss": 0.2909, "step": 35740 }, { "epoch": 3.6336925579503863, "grad_norm": 0.2668384313583374, "learning_rate": 2.1073154520415273e-06, "loss": 0.3493, "step": 35741 }, { "epoch": 3.633794225294835, "grad_norm": 0.2546013593673706, "learning_rate": 2.1070259943761643e-06, "loss": 0.2833, "step": 35742 }, { "epoch": 3.633895892639284, "grad_norm": 0.28907760977745056, "learning_rate": 2.1067365512849214e-06, "loss": 0.3139, "step": 35743 }, { "epoch": 3.633997559983733, "grad_norm": 0.29705512523651123, "learning_rate": 2.1064471227692477e-06, "loss": 0.3195, "step": 35744 }, { "epoch": 3.634099227328182, "grad_norm": 0.27983808517456055, "learning_rate": 2.1061577088306083e-06, "loss": 0.3279, "step": 35745 }, { "epoch": 3.634200894672631, "grad_norm": 0.2975797653198242, "learning_rate": 2.1058683094704584e-06, "loss": 0.3327, "step": 35746 }, { "epoch": 3.63430256201708, "grad_norm": 0.283788800239563, "learning_rate": 2.1055789246902548e-06, "loss": 0.2912, "step": 35747 }, { "epoch": 3.634404229361529, "grad_norm": 0.25086188316345215, "learning_rate": 2.1052895544914583e-06, "loss": 0.3241, "step": 35748 }, { "epoch": 3.634505896705978, "grad_norm": 0.2795471251010895, "learning_rate": 2.105000198875525e-06, "loss": 0.313, "step": 35749 }, { "epoch": 3.6346075640504267, "grad_norm": 0.29095029830932617, "learning_rate": 2.1047108578439123e-06, "loss": 0.3181, "step": 35750 }, { "epoch": 3.634709231394876, "grad_norm": 0.2745695412158966, "learning_rate": 2.104421531398077e-06, "loss": 0.3141, "step": 35751 }, { "epoch": 3.634810898739325, "grad_norm": 0.2700924277305603, "learning_rate": 2.1041322195394782e-06, "loss": 0.3109, "step": 35752 }, { "epoch": 3.634912566083774, "grad_norm": 0.27969294786453247, "learning_rate": 2.103842922269576e-06, "loss": 0.2896, "step": 35753 }, { "epoch": 3.635014233428223, "grad_norm": 0.2558334767818451, "learning_rate": 2.1035536395898222e-06, "loss": 0.2901, "step": 35754 }, { "epoch": 3.635115900772672, "grad_norm": 0.2781992554664612, "learning_rate": 2.103264371501678e-06, "loss": 0.3243, "step": 35755 }, { "epoch": 3.635217568117121, "grad_norm": 0.28487512469291687, "learning_rate": 2.1029751180065994e-06, "loss": 0.3121, "step": 35756 }, { "epoch": 3.6353192354615698, "grad_norm": 0.26909884810447693, "learning_rate": 2.102685879106042e-06, "loss": 0.3347, "step": 35757 }, { "epoch": 3.6354209028060187, "grad_norm": 0.2776198983192444, "learning_rate": 2.1023966548014667e-06, "loss": 0.3041, "step": 35758 }, { "epoch": 3.6355225701504676, "grad_norm": 0.27363401651382446, "learning_rate": 2.1021074450943285e-06, "loss": 0.2995, "step": 35759 }, { "epoch": 3.6356242374949166, "grad_norm": 0.2810361981391907, "learning_rate": 2.1018182499860844e-06, "loss": 0.3084, "step": 35760 }, { "epoch": 3.6357259048393655, "grad_norm": 0.28427115082740784, "learning_rate": 2.101529069478189e-06, "loss": 0.2908, "step": 35761 }, { "epoch": 3.6358275721838145, "grad_norm": 0.27785298228263855, "learning_rate": 2.1012399035721037e-06, "loss": 0.2888, "step": 35762 }, { "epoch": 3.6359292395282634, "grad_norm": 0.27532705664634705, "learning_rate": 2.1009507522692827e-06, "loss": 0.2933, "step": 35763 }, { "epoch": 3.6360309068727124, "grad_norm": 0.27617719769477844, "learning_rate": 2.1006616155711813e-06, "loss": 0.2754, "step": 35764 }, { "epoch": 3.6361325742171613, "grad_norm": 0.3077802360057831, "learning_rate": 2.100372493479259e-06, "loss": 0.2998, "step": 35765 }, { "epoch": 3.6362342415616107, "grad_norm": 0.2836149036884308, "learning_rate": 2.1000833859949717e-06, "loss": 0.3146, "step": 35766 }, { "epoch": 3.6363359089060596, "grad_norm": 0.27515020966529846, "learning_rate": 2.0997942931197727e-06, "loss": 0.3174, "step": 35767 }, { "epoch": 3.6364375762505086, "grad_norm": 0.2685616612434387, "learning_rate": 2.099505214855123e-06, "loss": 0.3223, "step": 35768 }, { "epoch": 3.6365392435949575, "grad_norm": 0.27357980608940125, "learning_rate": 2.099216151202477e-06, "loss": 0.2907, "step": 35769 }, { "epoch": 3.6366409109394064, "grad_norm": 0.2813510596752167, "learning_rate": 2.09892710216329e-06, "loss": 0.3345, "step": 35770 }, { "epoch": 3.6367425782838554, "grad_norm": 0.26836299896240234, "learning_rate": 2.098638067739018e-06, "loss": 0.2861, "step": 35771 }, { "epoch": 3.6368442456283043, "grad_norm": 0.2790471315383911, "learning_rate": 2.0983490479311188e-06, "loss": 0.301, "step": 35772 }, { "epoch": 3.6369459129727533, "grad_norm": 0.26630085706710815, "learning_rate": 2.098060042741048e-06, "loss": 0.3159, "step": 35773 }, { "epoch": 3.637047580317202, "grad_norm": 0.2619973421096802, "learning_rate": 2.097771052170259e-06, "loss": 0.2989, "step": 35774 }, { "epoch": 3.637149247661651, "grad_norm": 0.28484630584716797, "learning_rate": 2.097482076220212e-06, "loss": 0.2925, "step": 35775 }, { "epoch": 3.6372509150061, "grad_norm": 0.25278425216674805, "learning_rate": 2.09719311489236e-06, "loss": 0.3593, "step": 35776 }, { "epoch": 3.637352582350549, "grad_norm": 0.27629151940345764, "learning_rate": 2.0969041681881604e-06, "loss": 0.2745, "step": 35777 }, { "epoch": 3.637454249694998, "grad_norm": 0.25577858090400696, "learning_rate": 2.0966152361090646e-06, "loss": 0.3204, "step": 35778 }, { "epoch": 3.637555917039447, "grad_norm": 0.2811835706233978, "learning_rate": 2.0963263186565337e-06, "loss": 0.2784, "step": 35779 }, { "epoch": 3.637657584383896, "grad_norm": 0.26566964387893677, "learning_rate": 2.0960374158320208e-06, "loss": 0.2868, "step": 35780 }, { "epoch": 3.637759251728345, "grad_norm": 0.2785308063030243, "learning_rate": 2.0957485276369794e-06, "loss": 0.3008, "step": 35781 }, { "epoch": 3.6378609190727937, "grad_norm": 0.27428942918777466, "learning_rate": 2.095459654072869e-06, "loss": 0.3245, "step": 35782 }, { "epoch": 3.6379625864172427, "grad_norm": 0.25945067405700684, "learning_rate": 2.0951707951411423e-06, "loss": 0.3067, "step": 35783 }, { "epoch": 3.6380642537616916, "grad_norm": 0.27419567108154297, "learning_rate": 2.0948819508432526e-06, "loss": 0.3091, "step": 35784 }, { "epoch": 3.6381659211061406, "grad_norm": 0.25905829668045044, "learning_rate": 2.0945931211806596e-06, "loss": 0.3563, "step": 35785 }, { "epoch": 3.6382675884505895, "grad_norm": 0.2797049880027771, "learning_rate": 2.0943043061548158e-06, "loss": 0.3138, "step": 35786 }, { "epoch": 3.6383692557950384, "grad_norm": 0.25842440128326416, "learning_rate": 2.0940155057671764e-06, "loss": 0.2846, "step": 35787 }, { "epoch": 3.6384709231394874, "grad_norm": 0.24782799184322357, "learning_rate": 2.0937267200191945e-06, "loss": 0.3056, "step": 35788 }, { "epoch": 3.6385725904839363, "grad_norm": 0.3235745131969452, "learning_rate": 2.093437948912329e-06, "loss": 0.3004, "step": 35789 }, { "epoch": 3.6386742578283853, "grad_norm": 0.29967352747917175, "learning_rate": 2.0931491924480318e-06, "loss": 0.3057, "step": 35790 }, { "epoch": 3.638775925172834, "grad_norm": 0.2696760296821594, "learning_rate": 2.0928604506277567e-06, "loss": 0.3178, "step": 35791 }, { "epoch": 3.6388775925172836, "grad_norm": 0.26800137758255005, "learning_rate": 2.092571723452962e-06, "loss": 0.298, "step": 35792 }, { "epoch": 3.6389792598617325, "grad_norm": 0.27326443791389465, "learning_rate": 2.092283010925099e-06, "loss": 0.2851, "step": 35793 }, { "epoch": 3.6390809272061815, "grad_norm": 0.2824234366416931, "learning_rate": 2.091994313045622e-06, "loss": 0.3007, "step": 35794 }, { "epoch": 3.6391825945506304, "grad_norm": 0.2561190724372864, "learning_rate": 2.091705629815989e-06, "loss": 0.3256, "step": 35795 }, { "epoch": 3.6392842618950794, "grad_norm": 0.2873761057853699, "learning_rate": 2.091416961237651e-06, "loss": 0.3067, "step": 35796 }, { "epoch": 3.6393859292395283, "grad_norm": 0.3036110997200012, "learning_rate": 2.0911283073120635e-06, "loss": 0.3075, "step": 35797 }, { "epoch": 3.6394875965839772, "grad_norm": 0.2601200044155121, "learning_rate": 2.090839668040678e-06, "loss": 0.2846, "step": 35798 }, { "epoch": 3.639589263928426, "grad_norm": 0.26259803771972656, "learning_rate": 2.090551043424954e-06, "loss": 0.2971, "step": 35799 }, { "epoch": 3.639690931272875, "grad_norm": 0.27829408645629883, "learning_rate": 2.0902624334663417e-06, "loss": 0.2831, "step": 35800 }, { "epoch": 3.639792598617324, "grad_norm": 0.2966815233230591, "learning_rate": 2.089973838166294e-06, "loss": 0.299, "step": 35801 }, { "epoch": 3.639894265961773, "grad_norm": 0.2751242518424988, "learning_rate": 2.089685257526268e-06, "loss": 0.3272, "step": 35802 }, { "epoch": 3.639995933306222, "grad_norm": 0.2825014889240265, "learning_rate": 2.0893966915477167e-06, "loss": 0.3079, "step": 35803 }, { "epoch": 3.640097600650671, "grad_norm": 0.28454649448394775, "learning_rate": 2.089108140232091e-06, "loss": 0.3101, "step": 35804 }, { "epoch": 3.64019926799512, "grad_norm": 0.2926074266433716, "learning_rate": 2.088819603580849e-06, "loss": 0.3043, "step": 35805 }, { "epoch": 3.6403009353395688, "grad_norm": 0.2723522186279297, "learning_rate": 2.0885310815954417e-06, "loss": 0.3422, "step": 35806 }, { "epoch": 3.640402602684018, "grad_norm": 0.2691221237182617, "learning_rate": 2.088242574277323e-06, "loss": 0.2955, "step": 35807 }, { "epoch": 3.640504270028467, "grad_norm": 0.2885282635688782, "learning_rate": 2.087954081627945e-06, "loss": 0.3244, "step": 35808 }, { "epoch": 3.640605937372916, "grad_norm": 0.2671537399291992, "learning_rate": 2.087665603648763e-06, "loss": 0.2936, "step": 35809 }, { "epoch": 3.640707604717365, "grad_norm": 0.2665500342845917, "learning_rate": 2.08737714034123e-06, "loss": 0.298, "step": 35810 }, { "epoch": 3.640809272061814, "grad_norm": 0.2608318030834198, "learning_rate": 2.087088691706798e-06, "loss": 0.3107, "step": 35811 }, { "epoch": 3.640910939406263, "grad_norm": 0.27664878964424133, "learning_rate": 2.0868002577469213e-06, "loss": 0.2883, "step": 35812 }, { "epoch": 3.641012606750712, "grad_norm": 0.2791619598865509, "learning_rate": 2.0865118384630532e-06, "loss": 0.3428, "step": 35813 }, { "epoch": 3.6411142740951608, "grad_norm": 0.2841031551361084, "learning_rate": 2.086223433856645e-06, "loss": 0.3192, "step": 35814 }, { "epoch": 3.6412159414396097, "grad_norm": 0.2603676915168762, "learning_rate": 2.0859350439291515e-06, "loss": 0.2991, "step": 35815 }, { "epoch": 3.6413176087840586, "grad_norm": 0.31494954228401184, "learning_rate": 2.0856466686820253e-06, "loss": 0.3314, "step": 35816 }, { "epoch": 3.6414192761285076, "grad_norm": 0.2707655727863312, "learning_rate": 2.085358308116719e-06, "loss": 0.2865, "step": 35817 }, { "epoch": 3.6415209434729565, "grad_norm": 0.2646102011203766, "learning_rate": 2.085069962234682e-06, "loss": 0.3309, "step": 35818 }, { "epoch": 3.6416226108174055, "grad_norm": 0.26637932658195496, "learning_rate": 2.0847816310373727e-06, "loss": 0.2983, "step": 35819 }, { "epoch": 3.6417242781618544, "grad_norm": 0.2645229697227478, "learning_rate": 2.08449331452624e-06, "loss": 0.3135, "step": 35820 }, { "epoch": 3.6418259455063033, "grad_norm": 0.3007107079029083, "learning_rate": 2.0842050127027357e-06, "loss": 0.323, "step": 35821 }, { "epoch": 3.6419276128507523, "grad_norm": 0.2500043511390686, "learning_rate": 2.0839167255683156e-06, "loss": 0.312, "step": 35822 }, { "epoch": 3.6420292801952012, "grad_norm": 0.2670946717262268, "learning_rate": 2.08362845312443e-06, "loss": 0.3133, "step": 35823 }, { "epoch": 3.64213094753965, "grad_norm": 0.27037444710731506, "learning_rate": 2.0833401953725296e-06, "loss": 0.3065, "step": 35824 }, { "epoch": 3.642232614884099, "grad_norm": 0.2682808041572571, "learning_rate": 2.08305195231407e-06, "loss": 0.313, "step": 35825 }, { "epoch": 3.642334282228548, "grad_norm": 0.2788662016391754, "learning_rate": 2.0827637239505006e-06, "loss": 0.3355, "step": 35826 }, { "epoch": 3.642435949572997, "grad_norm": 0.2804165780544281, "learning_rate": 2.082475510283275e-06, "loss": 0.2974, "step": 35827 }, { "epoch": 3.642537616917446, "grad_norm": 0.26518377661705017, "learning_rate": 2.082187311313843e-06, "loss": 0.2958, "step": 35828 }, { "epoch": 3.642639284261895, "grad_norm": 0.26885855197906494, "learning_rate": 2.081899127043659e-06, "loss": 0.2934, "step": 35829 }, { "epoch": 3.642740951606344, "grad_norm": 0.250408798456192, "learning_rate": 2.0816109574741745e-06, "loss": 0.3117, "step": 35830 }, { "epoch": 3.6428426189507928, "grad_norm": 0.2802906632423401, "learning_rate": 2.081322802606838e-06, "loss": 0.2908, "step": 35831 }, { "epoch": 3.6429442862952417, "grad_norm": 0.28205230832099915, "learning_rate": 2.0810346624431056e-06, "loss": 0.3119, "step": 35832 }, { "epoch": 3.643045953639691, "grad_norm": 0.2830190658569336, "learning_rate": 2.0807465369844273e-06, "loss": 0.3116, "step": 35833 }, { "epoch": 3.64314762098414, "grad_norm": 0.27424824237823486, "learning_rate": 2.0804584262322518e-06, "loss": 0.2808, "step": 35834 }, { "epoch": 3.643249288328589, "grad_norm": 0.29115045070648193, "learning_rate": 2.0801703301880355e-06, "loss": 0.3236, "step": 35835 }, { "epoch": 3.643350955673038, "grad_norm": 0.2689242959022522, "learning_rate": 2.0798822488532266e-06, "loss": 0.3166, "step": 35836 }, { "epoch": 3.643452623017487, "grad_norm": 0.25841662287712097, "learning_rate": 2.0795941822292777e-06, "loss": 0.3648, "step": 35837 }, { "epoch": 3.643554290361936, "grad_norm": 0.2591777443885803, "learning_rate": 2.0793061303176367e-06, "loss": 0.3048, "step": 35838 }, { "epoch": 3.6436559577063847, "grad_norm": 0.2780061662197113, "learning_rate": 2.0790180931197597e-06, "loss": 0.3324, "step": 35839 }, { "epoch": 3.6437576250508337, "grad_norm": 0.30626940727233887, "learning_rate": 2.078730070637095e-06, "loss": 0.3481, "step": 35840 }, { "epoch": 3.6438592923952826, "grad_norm": 0.26636937260627747, "learning_rate": 2.0784420628710927e-06, "loss": 0.3202, "step": 35841 }, { "epoch": 3.6439609597397316, "grad_norm": 0.2657780349254608, "learning_rate": 2.0781540698232085e-06, "loss": 0.3082, "step": 35842 }, { "epoch": 3.6440626270841805, "grad_norm": 0.2756422162055969, "learning_rate": 2.0778660914948857e-06, "loss": 0.2738, "step": 35843 }, { "epoch": 3.6441642944286294, "grad_norm": 0.275676429271698, "learning_rate": 2.0775781278875793e-06, "loss": 0.289, "step": 35844 }, { "epoch": 3.6442659617730784, "grad_norm": 0.25996118783950806, "learning_rate": 2.0772901790027415e-06, "loss": 0.3059, "step": 35845 }, { "epoch": 3.6443676291175273, "grad_norm": 0.28951695561408997, "learning_rate": 2.077002244841822e-06, "loss": 0.3204, "step": 35846 }, { "epoch": 3.6444692964619763, "grad_norm": 0.2738356590270996, "learning_rate": 2.0767143254062698e-06, "loss": 0.2854, "step": 35847 }, { "epoch": 3.6445709638064256, "grad_norm": 0.2826119363307953, "learning_rate": 2.076426420697535e-06, "loss": 0.2934, "step": 35848 }, { "epoch": 3.6446726311508746, "grad_norm": 0.30972030758857727, "learning_rate": 2.0761385307170707e-06, "loss": 0.33, "step": 35849 }, { "epoch": 3.6447742984953235, "grad_norm": 0.2800051271915436, "learning_rate": 2.0758506554663257e-06, "loss": 0.3264, "step": 35850 }, { "epoch": 3.6448759658397725, "grad_norm": 0.27303746342658997, "learning_rate": 2.075562794946748e-06, "loss": 0.3182, "step": 35851 }, { "epoch": 3.6449776331842214, "grad_norm": 0.2841741442680359, "learning_rate": 2.0752749491597935e-06, "loss": 0.2953, "step": 35852 }, { "epoch": 3.6450793005286704, "grad_norm": 0.27607056498527527, "learning_rate": 2.0749871181069054e-06, "loss": 0.3195, "step": 35853 }, { "epoch": 3.6451809678731193, "grad_norm": 0.2612350583076477, "learning_rate": 2.0746993017895373e-06, "loss": 0.3092, "step": 35854 }, { "epoch": 3.6452826352175682, "grad_norm": 0.2777920961380005, "learning_rate": 2.0744115002091424e-06, "loss": 0.2943, "step": 35855 }, { "epoch": 3.645384302562017, "grad_norm": 0.2809235453605652, "learning_rate": 2.074123713367164e-06, "loss": 0.3275, "step": 35856 }, { "epoch": 3.645485969906466, "grad_norm": 0.2656012773513794, "learning_rate": 2.073835941265056e-06, "loss": 0.3317, "step": 35857 }, { "epoch": 3.645587637250915, "grad_norm": 0.26359671354293823, "learning_rate": 2.073548183904266e-06, "loss": 0.3307, "step": 35858 }, { "epoch": 3.645689304595364, "grad_norm": 0.290822297334671, "learning_rate": 2.073260441286247e-06, "loss": 0.3243, "step": 35859 }, { "epoch": 3.645790971939813, "grad_norm": 0.26556622982025146, "learning_rate": 2.0729727134124457e-06, "loss": 0.3268, "step": 35860 }, { "epoch": 3.645892639284262, "grad_norm": 0.2516588866710663, "learning_rate": 2.0726850002843103e-06, "loss": 0.3045, "step": 35861 }, { "epoch": 3.645994306628711, "grad_norm": 0.281946063041687, "learning_rate": 2.072397301903296e-06, "loss": 0.2729, "step": 35862 }, { "epoch": 3.6460959739731598, "grad_norm": 0.2727620005607605, "learning_rate": 2.072109618270845e-06, "loss": 0.3035, "step": 35863 }, { "epoch": 3.6461976413176087, "grad_norm": 0.26436325907707214, "learning_rate": 2.07182194938841e-06, "loss": 0.3475, "step": 35864 }, { "epoch": 3.6462993086620576, "grad_norm": 0.2676597237586975, "learning_rate": 2.0715342952574434e-06, "loss": 0.3044, "step": 35865 }, { "epoch": 3.6464009760065066, "grad_norm": 0.24930967390537262, "learning_rate": 2.071246655879388e-06, "loss": 0.3292, "step": 35866 }, { "epoch": 3.6465026433509555, "grad_norm": 0.26693591475486755, "learning_rate": 2.0709590312556975e-06, "loss": 0.3124, "step": 35867 }, { "epoch": 3.6466043106954045, "grad_norm": 0.2643289268016815, "learning_rate": 2.070671421387818e-06, "loss": 0.3183, "step": 35868 }, { "epoch": 3.6467059780398534, "grad_norm": 0.25655078887939453, "learning_rate": 2.0703838262772014e-06, "loss": 0.33, "step": 35869 }, { "epoch": 3.6468076453843024, "grad_norm": 0.2819857597351074, "learning_rate": 2.0700962459252943e-06, "loss": 0.329, "step": 35870 }, { "epoch": 3.6469093127287513, "grad_norm": 0.26962810754776, "learning_rate": 2.069808680333545e-06, "loss": 0.303, "step": 35871 }, { "epoch": 3.6470109800732002, "grad_norm": 0.3021824061870575, "learning_rate": 2.0695211295034062e-06, "loss": 0.3147, "step": 35872 }, { "epoch": 3.647112647417649, "grad_norm": 0.27737846970558167, "learning_rate": 2.0692335934363205e-06, "loss": 0.3117, "step": 35873 }, { "epoch": 3.6472143147620986, "grad_norm": 0.2655254304409027, "learning_rate": 2.068946072133739e-06, "loss": 0.2841, "step": 35874 }, { "epoch": 3.6473159821065475, "grad_norm": 0.2854422628879547, "learning_rate": 2.0686585655971147e-06, "loss": 0.2891, "step": 35875 }, { "epoch": 3.6474176494509964, "grad_norm": 0.26125144958496094, "learning_rate": 2.0683710738278883e-06, "loss": 0.3242, "step": 35876 }, { "epoch": 3.6475193167954454, "grad_norm": 0.26411172747612, "learning_rate": 2.068083596827513e-06, "loss": 0.2944, "step": 35877 }, { "epoch": 3.6476209841398943, "grad_norm": 0.27290233969688416, "learning_rate": 2.067796134597437e-06, "loss": 0.2884, "step": 35878 }, { "epoch": 3.6477226514843433, "grad_norm": 0.2761039435863495, "learning_rate": 2.0675086871391044e-06, "loss": 0.309, "step": 35879 }, { "epoch": 3.647824318828792, "grad_norm": 0.26166123151779175, "learning_rate": 2.0672212544539683e-06, "loss": 0.3295, "step": 35880 }, { "epoch": 3.647925986173241, "grad_norm": 0.26949167251586914, "learning_rate": 2.066933836543473e-06, "loss": 0.3239, "step": 35881 }, { "epoch": 3.64802765351769, "grad_norm": 0.2809370756149292, "learning_rate": 2.066646433409071e-06, "loss": 0.3097, "step": 35882 }, { "epoch": 3.648129320862139, "grad_norm": 0.2626652121543884, "learning_rate": 2.066359045052204e-06, "loss": 0.3063, "step": 35883 }, { "epoch": 3.648230988206588, "grad_norm": 0.2895638942718506, "learning_rate": 2.066071671474323e-06, "loss": 0.2979, "step": 35884 }, { "epoch": 3.648332655551037, "grad_norm": 0.2761469781398773, "learning_rate": 2.0657843126768793e-06, "loss": 0.3201, "step": 35885 }, { "epoch": 3.648434322895486, "grad_norm": 0.27765288949012756, "learning_rate": 2.0654969686613137e-06, "loss": 0.2999, "step": 35886 }, { "epoch": 3.648535990239935, "grad_norm": 0.3047007918357849, "learning_rate": 2.065209639429078e-06, "loss": 0.3156, "step": 35887 }, { "epoch": 3.6486376575843837, "grad_norm": 0.2574450373649597, "learning_rate": 2.0649223249816197e-06, "loss": 0.3133, "step": 35888 }, { "epoch": 3.648739324928833, "grad_norm": 0.2552805542945862, "learning_rate": 2.064635025320383e-06, "loss": 0.2973, "step": 35889 }, { "epoch": 3.648840992273282, "grad_norm": 0.27947038412094116, "learning_rate": 2.0643477404468194e-06, "loss": 0.2942, "step": 35890 }, { "epoch": 3.648942659617731, "grad_norm": 0.27498283982276917, "learning_rate": 2.064060470362374e-06, "loss": 0.3197, "step": 35891 }, { "epoch": 3.64904432696218, "grad_norm": 0.27638304233551025, "learning_rate": 2.063773215068494e-06, "loss": 0.3015, "step": 35892 }, { "epoch": 3.649145994306629, "grad_norm": 0.24670059978961945, "learning_rate": 2.0634859745666256e-06, "loss": 0.2921, "step": 35893 }, { "epoch": 3.649247661651078, "grad_norm": 0.2831137776374817, "learning_rate": 2.063198748858217e-06, "loss": 0.3456, "step": 35894 }, { "epoch": 3.6493493289955268, "grad_norm": 0.26995643973350525, "learning_rate": 2.0629115379447183e-06, "loss": 0.3421, "step": 35895 }, { "epoch": 3.6494509963399757, "grad_norm": 0.2656266391277313, "learning_rate": 2.06262434182757e-06, "loss": 0.2861, "step": 35896 }, { "epoch": 3.6495526636844247, "grad_norm": 0.25961634516716003, "learning_rate": 2.0623371605082243e-06, "loss": 0.3221, "step": 35897 }, { "epoch": 3.6496543310288736, "grad_norm": 0.2917768955230713, "learning_rate": 2.0620499939881257e-06, "loss": 0.3031, "step": 35898 }, { "epoch": 3.6497559983733225, "grad_norm": 0.2947348654270172, "learning_rate": 2.061762842268719e-06, "loss": 0.3098, "step": 35899 }, { "epoch": 3.6498576657177715, "grad_norm": 0.2787127196788788, "learning_rate": 2.061475705351455e-06, "loss": 0.2999, "step": 35900 }, { "epoch": 3.6499593330622204, "grad_norm": 0.28253740072250366, "learning_rate": 2.0611885832377783e-06, "loss": 0.3097, "step": 35901 }, { "epoch": 3.6500610004066694, "grad_norm": 0.2901257872581482, "learning_rate": 2.0609014759291346e-06, "loss": 0.2935, "step": 35902 }, { "epoch": 3.6501626677511183, "grad_norm": 0.29619231820106506, "learning_rate": 2.0606143834269694e-06, "loss": 0.3157, "step": 35903 }, { "epoch": 3.6502643350955672, "grad_norm": 0.2761784493923187, "learning_rate": 2.06032730573273e-06, "loss": 0.2816, "step": 35904 }, { "epoch": 3.650366002440016, "grad_norm": 0.28409361839294434, "learning_rate": 2.060040242847867e-06, "loss": 0.2878, "step": 35905 }, { "epoch": 3.650467669784465, "grad_norm": 0.2572532892227173, "learning_rate": 2.0597531947738192e-06, "loss": 0.3437, "step": 35906 }, { "epoch": 3.650569337128914, "grad_norm": 0.26614904403686523, "learning_rate": 2.0594661615120377e-06, "loss": 0.3073, "step": 35907 }, { "epoch": 3.650671004473363, "grad_norm": 0.2716512680053711, "learning_rate": 2.0591791430639666e-06, "loss": 0.35, "step": 35908 }, { "epoch": 3.650772671817812, "grad_norm": 0.28761154413223267, "learning_rate": 2.0588921394310507e-06, "loss": 0.29, "step": 35909 }, { "epoch": 3.650874339162261, "grad_norm": 0.30438491702079773, "learning_rate": 2.058605150614739e-06, "loss": 0.3365, "step": 35910 }, { "epoch": 3.65097600650671, "grad_norm": 0.29260730743408203, "learning_rate": 2.058318176616476e-06, "loss": 0.3271, "step": 35911 }, { "epoch": 3.651077673851159, "grad_norm": 0.27038583159446716, "learning_rate": 2.058031217437706e-06, "loss": 0.2875, "step": 35912 }, { "epoch": 3.6511793411956077, "grad_norm": 0.2731214761734009, "learning_rate": 2.0577442730798746e-06, "loss": 0.2944, "step": 35913 }, { "epoch": 3.6512810085400567, "grad_norm": 0.2745705842971802, "learning_rate": 2.0574573435444293e-06, "loss": 0.3109, "step": 35914 }, { "epoch": 3.651382675884506, "grad_norm": 0.24872340261936188, "learning_rate": 2.0571704288328158e-06, "loss": 0.2815, "step": 35915 }, { "epoch": 3.651484343228955, "grad_norm": 0.27176597714424133, "learning_rate": 2.056883528946476e-06, "loss": 0.2923, "step": 35916 }, { "epoch": 3.651586010573404, "grad_norm": 0.25739604234695435, "learning_rate": 2.0565966438868597e-06, "loss": 0.3203, "step": 35917 }, { "epoch": 3.651687677917853, "grad_norm": 0.27019649744033813, "learning_rate": 2.0563097736554094e-06, "loss": 0.31, "step": 35918 }, { "epoch": 3.651789345262302, "grad_norm": 0.28644564747810364, "learning_rate": 2.0560229182535697e-06, "loss": 0.2947, "step": 35919 }, { "epoch": 3.6518910126067508, "grad_norm": 0.2654266655445099, "learning_rate": 2.0557360776827886e-06, "loss": 0.2948, "step": 35920 }, { "epoch": 3.6519926799511997, "grad_norm": 0.26909029483795166, "learning_rate": 2.055449251944509e-06, "loss": 0.2951, "step": 35921 }, { "epoch": 3.6520943472956486, "grad_norm": 0.27232837677001953, "learning_rate": 2.0551624410401767e-06, "loss": 0.2947, "step": 35922 }, { "epoch": 3.6521960146400976, "grad_norm": 0.2667050063610077, "learning_rate": 2.0548756449712344e-06, "loss": 0.2945, "step": 35923 }, { "epoch": 3.6522976819845465, "grad_norm": 0.2723117172718048, "learning_rate": 2.054588863739131e-06, "loss": 0.3258, "step": 35924 }, { "epoch": 3.6523993493289955, "grad_norm": 0.2594705820083618, "learning_rate": 2.054302097345308e-06, "loss": 0.2982, "step": 35925 }, { "epoch": 3.6525010166734444, "grad_norm": 0.2917272746562958, "learning_rate": 2.05401534579121e-06, "loss": 0.3481, "step": 35926 }, { "epoch": 3.6526026840178933, "grad_norm": 0.2717561721801758, "learning_rate": 2.0537286090782843e-06, "loss": 0.3242, "step": 35927 }, { "epoch": 3.6527043513623423, "grad_norm": 0.27848827838897705, "learning_rate": 2.053441887207974e-06, "loss": 0.2955, "step": 35928 }, { "epoch": 3.6528060187067912, "grad_norm": 0.259059876203537, "learning_rate": 2.0531551801817205e-06, "loss": 0.2902, "step": 35929 }, { "epoch": 3.6529076860512406, "grad_norm": 0.27282392978668213, "learning_rate": 2.0528684880009736e-06, "loss": 0.3205, "step": 35930 }, { "epoch": 3.6530093533956896, "grad_norm": 0.2674109935760498, "learning_rate": 2.0525818106671747e-06, "loss": 0.3407, "step": 35931 }, { "epoch": 3.6531110207401385, "grad_norm": 0.2840157747268677, "learning_rate": 2.0522951481817678e-06, "loss": 0.297, "step": 35932 }, { "epoch": 3.6532126880845874, "grad_norm": 0.2760482430458069, "learning_rate": 2.052008500546196e-06, "loss": 0.3191, "step": 35933 }, { "epoch": 3.6533143554290364, "grad_norm": 0.2820936143398285, "learning_rate": 2.0517218677619066e-06, "loss": 0.3043, "step": 35934 }, { "epoch": 3.6534160227734853, "grad_norm": 0.2864782214164734, "learning_rate": 2.051435249830342e-06, "loss": 0.3109, "step": 35935 }, { "epoch": 3.6535176901179343, "grad_norm": 0.28141963481903076, "learning_rate": 2.051148646752944e-06, "loss": 0.2948, "step": 35936 }, { "epoch": 3.653619357462383, "grad_norm": 0.3000801205635071, "learning_rate": 2.05086205853116e-06, "loss": 0.3306, "step": 35937 }, { "epoch": 3.653721024806832, "grad_norm": 0.2782679796218872, "learning_rate": 2.050575485166432e-06, "loss": 0.3247, "step": 35938 }, { "epoch": 3.653822692151281, "grad_norm": 0.26169392466545105, "learning_rate": 2.050288926660203e-06, "loss": 0.3047, "step": 35939 }, { "epoch": 3.65392435949573, "grad_norm": 0.27371206879615784, "learning_rate": 2.050002383013918e-06, "loss": 0.2913, "step": 35940 }, { "epoch": 3.654026026840179, "grad_norm": 0.2893833518028259, "learning_rate": 2.049715854229021e-06, "loss": 0.2868, "step": 35941 }, { "epoch": 3.654127694184628, "grad_norm": 0.25540691614151, "learning_rate": 2.0494293403069537e-06, "loss": 0.3299, "step": 35942 }, { "epoch": 3.654229361529077, "grad_norm": 0.28542211651802063, "learning_rate": 2.049142841249158e-06, "loss": 0.3408, "step": 35943 }, { "epoch": 3.654331028873526, "grad_norm": 0.27331817150115967, "learning_rate": 2.0488563570570824e-06, "loss": 0.3285, "step": 35944 }, { "epoch": 3.6544326962179747, "grad_norm": 0.3186785578727722, "learning_rate": 2.0485698877321663e-06, "loss": 0.3138, "step": 35945 }, { "epoch": 3.6545343635624237, "grad_norm": 0.2821533977985382, "learning_rate": 2.0482834332758522e-06, "loss": 0.3041, "step": 35946 }, { "epoch": 3.6546360309068726, "grad_norm": 0.29281899333000183, "learning_rate": 2.0479969936895866e-06, "loss": 0.3098, "step": 35947 }, { "epoch": 3.6547376982513216, "grad_norm": 0.2783430218696594, "learning_rate": 2.0477105689748105e-06, "loss": 0.3141, "step": 35948 }, { "epoch": 3.6548393655957705, "grad_norm": 0.28243911266326904, "learning_rate": 2.0474241591329657e-06, "loss": 0.3235, "step": 35949 }, { "epoch": 3.6549410329402194, "grad_norm": 0.2693553864955902, "learning_rate": 2.0471377641654976e-06, "loss": 0.3117, "step": 35950 }, { "epoch": 3.6550427002846684, "grad_norm": 0.2659550607204437, "learning_rate": 2.046851384073848e-06, "loss": 0.3044, "step": 35951 }, { "epoch": 3.6551443676291173, "grad_norm": 0.25682899355888367, "learning_rate": 2.0465650188594593e-06, "loss": 0.3077, "step": 35952 }, { "epoch": 3.6552460349735663, "grad_norm": 0.2808459997177124, "learning_rate": 2.0462786685237725e-06, "loss": 0.3014, "step": 35953 }, { "epoch": 3.655347702318015, "grad_norm": 0.2879505753517151, "learning_rate": 2.0459923330682336e-06, "loss": 0.3049, "step": 35954 }, { "epoch": 3.655449369662464, "grad_norm": 0.26610568165779114, "learning_rate": 2.045706012494283e-06, "loss": 0.3262, "step": 35955 }, { "epoch": 3.6555510370069135, "grad_norm": 0.26541009545326233, "learning_rate": 2.045419706803362e-06, "loss": 0.3106, "step": 35956 }, { "epoch": 3.6556527043513625, "grad_norm": 0.27521052956581116, "learning_rate": 2.045133415996916e-06, "loss": 0.3491, "step": 35957 }, { "epoch": 3.6557543716958114, "grad_norm": 0.2970390021800995, "learning_rate": 2.0448471400763856e-06, "loss": 0.3061, "step": 35958 }, { "epoch": 3.6558560390402604, "grad_norm": 0.24359068274497986, "learning_rate": 2.044560879043213e-06, "loss": 0.316, "step": 35959 }, { "epoch": 3.6559577063847093, "grad_norm": 0.26248645782470703, "learning_rate": 2.0442746328988387e-06, "loss": 0.2975, "step": 35960 }, { "epoch": 3.6560593737291582, "grad_norm": 0.2939763367176056, "learning_rate": 2.0439884016447085e-06, "loss": 0.3025, "step": 35961 }, { "epoch": 3.656161041073607, "grad_norm": 0.27101650834083557, "learning_rate": 2.0437021852822625e-06, "loss": 0.2845, "step": 35962 }, { "epoch": 3.656262708418056, "grad_norm": 0.2633977234363556, "learning_rate": 2.0434159838129397e-06, "loss": 0.3265, "step": 35963 }, { "epoch": 3.656364375762505, "grad_norm": 0.25559794902801514, "learning_rate": 2.043129797238187e-06, "loss": 0.3126, "step": 35964 }, { "epoch": 3.656466043106954, "grad_norm": 0.25261223316192627, "learning_rate": 2.0428436255594436e-06, "loss": 0.3226, "step": 35965 }, { "epoch": 3.656567710451403, "grad_norm": 0.2845318913459778, "learning_rate": 2.0425574687781497e-06, "loss": 0.278, "step": 35966 }, { "epoch": 3.656669377795852, "grad_norm": 0.2812720239162445, "learning_rate": 2.04227132689575e-06, "loss": 0.3213, "step": 35967 }, { "epoch": 3.656771045140301, "grad_norm": 0.281581848859787, "learning_rate": 2.041985199913684e-06, "loss": 0.2766, "step": 35968 }, { "epoch": 3.6568727124847498, "grad_norm": 0.27394092082977295, "learning_rate": 2.041699087833394e-06, "loss": 0.2858, "step": 35969 }, { "epoch": 3.6569743798291987, "grad_norm": 0.2566016912460327, "learning_rate": 2.0414129906563196e-06, "loss": 0.3256, "step": 35970 }, { "epoch": 3.657076047173648, "grad_norm": 0.2786414921283722, "learning_rate": 2.0411269083839054e-06, "loss": 0.3095, "step": 35971 }, { "epoch": 3.657177714518097, "grad_norm": 0.27343520522117615, "learning_rate": 2.04084084101759e-06, "loss": 0.3381, "step": 35972 }, { "epoch": 3.657279381862546, "grad_norm": 0.2780975103378296, "learning_rate": 2.0405547885588138e-06, "loss": 0.2861, "step": 35973 }, { "epoch": 3.657381049206995, "grad_norm": 0.2785051763057709, "learning_rate": 2.0402687510090213e-06, "loss": 0.286, "step": 35974 }, { "epoch": 3.657482716551444, "grad_norm": 0.28769248723983765, "learning_rate": 2.039982728369651e-06, "loss": 0.2955, "step": 35975 }, { "epoch": 3.657584383895893, "grad_norm": 0.2909875810146332, "learning_rate": 2.039696720642143e-06, "loss": 0.2983, "step": 35976 }, { "epoch": 3.6576860512403417, "grad_norm": 0.26696285605430603, "learning_rate": 2.0394107278279408e-06, "loss": 0.314, "step": 35977 }, { "epoch": 3.6577877185847907, "grad_norm": 0.2687563896179199, "learning_rate": 2.039124749928484e-06, "loss": 0.3311, "step": 35978 }, { "epoch": 3.6578893859292396, "grad_norm": 0.2799776494503021, "learning_rate": 2.038838786945213e-06, "loss": 0.3052, "step": 35979 }, { "epoch": 3.6579910532736886, "grad_norm": 0.2770935893058777, "learning_rate": 2.0385528388795667e-06, "loss": 0.2984, "step": 35980 }, { "epoch": 3.6580927206181375, "grad_norm": 0.28343093395233154, "learning_rate": 2.0382669057329894e-06, "loss": 0.3227, "step": 35981 }, { "epoch": 3.6581943879625864, "grad_norm": 0.27710703015327454, "learning_rate": 2.0379809875069195e-06, "loss": 0.3116, "step": 35982 }, { "epoch": 3.6582960553070354, "grad_norm": 0.24653072655200958, "learning_rate": 2.0376950842027957e-06, "loss": 0.2931, "step": 35983 }, { "epoch": 3.6583977226514843, "grad_norm": 0.25847628712654114, "learning_rate": 2.0374091958220643e-06, "loss": 0.296, "step": 35984 }, { "epoch": 3.6584993899959333, "grad_norm": 0.2629701495170593, "learning_rate": 2.0371233223661576e-06, "loss": 0.3211, "step": 35985 }, { "epoch": 3.658601057340382, "grad_norm": 0.27506154775619507, "learning_rate": 2.0368374638365185e-06, "loss": 0.3401, "step": 35986 }, { "epoch": 3.658702724684831, "grad_norm": 0.27982404828071594, "learning_rate": 2.036551620234591e-06, "loss": 0.2585, "step": 35987 }, { "epoch": 3.65880439202928, "grad_norm": 0.2870788276195526, "learning_rate": 2.0362657915618115e-06, "loss": 0.3145, "step": 35988 }, { "epoch": 3.658906059373729, "grad_norm": 0.29623037576675415, "learning_rate": 2.035979977819621e-06, "loss": 0.3106, "step": 35989 }, { "epoch": 3.659007726718178, "grad_norm": 0.2811305522918701, "learning_rate": 2.035694179009457e-06, "loss": 0.2856, "step": 35990 }, { "epoch": 3.659109394062627, "grad_norm": 0.29134994745254517, "learning_rate": 2.0354083951327636e-06, "loss": 0.3006, "step": 35991 }, { "epoch": 3.659211061407076, "grad_norm": 0.2773302495479584, "learning_rate": 2.0351226261909775e-06, "loss": 0.308, "step": 35992 }, { "epoch": 3.659312728751525, "grad_norm": 0.270377516746521, "learning_rate": 2.034836872185538e-06, "loss": 0.3569, "step": 35993 }, { "epoch": 3.6594143960959737, "grad_norm": 0.30590134859085083, "learning_rate": 2.0345511331178884e-06, "loss": 0.3005, "step": 35994 }, { "epoch": 3.6595160634404227, "grad_norm": 0.276022732257843, "learning_rate": 2.0342654089894624e-06, "loss": 0.3008, "step": 35995 }, { "epoch": 3.6596177307848716, "grad_norm": 0.2549121379852295, "learning_rate": 2.0339796998017024e-06, "loss": 0.3596, "step": 35996 }, { "epoch": 3.659719398129321, "grad_norm": 0.27977269887924194, "learning_rate": 2.03369400555605e-06, "loss": 0.2904, "step": 35997 }, { "epoch": 3.65982106547377, "grad_norm": 0.28544703125953674, "learning_rate": 2.0334083262539424e-06, "loss": 0.2948, "step": 35998 }, { "epoch": 3.659922732818219, "grad_norm": 0.27154409885406494, "learning_rate": 2.033122661896818e-06, "loss": 0.3036, "step": 35999 }, { "epoch": 3.660024400162668, "grad_norm": 0.2928043603897095, "learning_rate": 2.032837012486115e-06, "loss": 0.2787, "step": 36000 }, { "epoch": 3.6601260675071168, "grad_norm": 0.2638242244720459, "learning_rate": 2.032551378023276e-06, "loss": 0.3394, "step": 36001 }, { "epoch": 3.6602277348515657, "grad_norm": 0.25940993428230286, "learning_rate": 2.0322657585097377e-06, "loss": 0.2957, "step": 36002 }, { "epoch": 3.6603294021960147, "grad_norm": 0.295375257730484, "learning_rate": 2.031980153946938e-06, "loss": 0.2674, "step": 36003 }, { "epoch": 3.6604310695404636, "grad_norm": 0.2534918189048767, "learning_rate": 2.03169456433632e-06, "loss": 0.2954, "step": 36004 }, { "epoch": 3.6605327368849125, "grad_norm": 0.27694132924079895, "learning_rate": 2.031408989679316e-06, "loss": 0.3019, "step": 36005 }, { "epoch": 3.6606344042293615, "grad_norm": 0.2714828848838806, "learning_rate": 2.0311234299773685e-06, "loss": 0.313, "step": 36006 }, { "epoch": 3.6607360715738104, "grad_norm": 0.26967906951904297, "learning_rate": 2.0308378852319187e-06, "loss": 0.3158, "step": 36007 }, { "epoch": 3.6608377389182594, "grad_norm": 0.2891857922077179, "learning_rate": 2.0305523554443985e-06, "loss": 0.3211, "step": 36008 }, { "epoch": 3.6609394062627083, "grad_norm": 0.28371918201446533, "learning_rate": 2.030266840616252e-06, "loss": 0.3616, "step": 36009 }, { "epoch": 3.6610410736071572, "grad_norm": 0.2798508405685425, "learning_rate": 2.029981340748913e-06, "loss": 0.2915, "step": 36010 }, { "epoch": 3.661142740951606, "grad_norm": 0.25700893998146057, "learning_rate": 2.029695855843825e-06, "loss": 0.3197, "step": 36011 }, { "epoch": 3.6612444082960556, "grad_norm": 0.31926363706588745, "learning_rate": 2.029410385902423e-06, "loss": 0.3167, "step": 36012 }, { "epoch": 3.6613460756405045, "grad_norm": 0.3041480779647827, "learning_rate": 2.0291249309261435e-06, "loss": 0.3202, "step": 36013 }, { "epoch": 3.6614477429849535, "grad_norm": 0.26120319962501526, "learning_rate": 2.0288394909164306e-06, "loss": 0.315, "step": 36014 }, { "epoch": 3.6615494103294024, "grad_norm": 0.251472145318985, "learning_rate": 2.0285540658747145e-06, "loss": 0.3138, "step": 36015 }, { "epoch": 3.6616510776738513, "grad_norm": 0.2644352912902832, "learning_rate": 2.0282686558024367e-06, "loss": 0.2856, "step": 36016 }, { "epoch": 3.6617527450183003, "grad_norm": 0.2863852381706238, "learning_rate": 2.0279832607010385e-06, "loss": 0.3221, "step": 36017 }, { "epoch": 3.6618544123627492, "grad_norm": 0.29320764541625977, "learning_rate": 2.027697880571952e-06, "loss": 0.3406, "step": 36018 }, { "epoch": 3.661956079707198, "grad_norm": 0.27597302198410034, "learning_rate": 2.027412515416618e-06, "loss": 0.3049, "step": 36019 }, { "epoch": 3.662057747051647, "grad_norm": 0.26570039987564087, "learning_rate": 2.0271271652364715e-06, "loss": 0.3185, "step": 36020 }, { "epoch": 3.662159414396096, "grad_norm": 0.2850258946418762, "learning_rate": 2.026841830032954e-06, "loss": 0.3042, "step": 36021 }, { "epoch": 3.662261081740545, "grad_norm": 0.2664259970188141, "learning_rate": 2.0265565098075005e-06, "loss": 0.2827, "step": 36022 }, { "epoch": 3.662362749084994, "grad_norm": 0.2751409709453583, "learning_rate": 2.026271204561548e-06, "loss": 0.3375, "step": 36023 }, { "epoch": 3.662464416429443, "grad_norm": 0.2811916768550873, "learning_rate": 2.025985914296537e-06, "loss": 0.3088, "step": 36024 }, { "epoch": 3.662566083773892, "grad_norm": 0.27439069747924805, "learning_rate": 2.0257006390138988e-06, "loss": 0.3212, "step": 36025 }, { "epoch": 3.6626677511183408, "grad_norm": 0.27478715777397156, "learning_rate": 2.0254153787150743e-06, "loss": 0.2863, "step": 36026 }, { "epoch": 3.6627694184627897, "grad_norm": 0.2594398558139801, "learning_rate": 2.025130133401504e-06, "loss": 0.3363, "step": 36027 }, { "epoch": 3.6628710858072386, "grad_norm": 0.3091163635253906, "learning_rate": 2.024844903074617e-06, "loss": 0.3127, "step": 36028 }, { "epoch": 3.6629727531516876, "grad_norm": 0.2724902927875519, "learning_rate": 2.0245596877358565e-06, "loss": 0.3091, "step": 36029 }, { "epoch": 3.6630744204961365, "grad_norm": 0.26148486137390137, "learning_rate": 2.024274487386657e-06, "loss": 0.3422, "step": 36030 }, { "epoch": 3.6631760878405855, "grad_norm": 0.28384411334991455, "learning_rate": 2.023989302028454e-06, "loss": 0.2774, "step": 36031 }, { "epoch": 3.6632777551850344, "grad_norm": 0.2867388427257538, "learning_rate": 2.0237041316626877e-06, "loss": 0.3236, "step": 36032 }, { "epoch": 3.6633794225294833, "grad_norm": 0.26728248596191406, "learning_rate": 2.02341897629079e-06, "loss": 0.2988, "step": 36033 }, { "epoch": 3.6634810898739323, "grad_norm": 0.26116496324539185, "learning_rate": 2.0231338359142045e-06, "loss": 0.307, "step": 36034 }, { "epoch": 3.6635827572183812, "grad_norm": 0.2768550515174866, "learning_rate": 2.02284871053436e-06, "loss": 0.2949, "step": 36035 }, { "epoch": 3.66368442456283, "grad_norm": 0.2595169246196747, "learning_rate": 2.0225636001526955e-06, "loss": 0.3232, "step": 36036 }, { "epoch": 3.663786091907279, "grad_norm": 0.2760714590549469, "learning_rate": 2.022278504770652e-06, "loss": 0.3023, "step": 36037 }, { "epoch": 3.6638877592517285, "grad_norm": 0.25921085476875305, "learning_rate": 2.0219934243896578e-06, "loss": 0.3023, "step": 36038 }, { "epoch": 3.6639894265961774, "grad_norm": 0.29418855905532837, "learning_rate": 2.0217083590111554e-06, "loss": 0.2986, "step": 36039 }, { "epoch": 3.6640910939406264, "grad_norm": 0.2702125012874603, "learning_rate": 2.021423308636578e-06, "loss": 0.3052, "step": 36040 }, { "epoch": 3.6641927612850753, "grad_norm": 0.275776207447052, "learning_rate": 2.0211382732673607e-06, "loss": 0.3212, "step": 36041 }, { "epoch": 3.6642944286295243, "grad_norm": 0.28021740913391113, "learning_rate": 2.0208532529049423e-06, "loss": 0.298, "step": 36042 }, { "epoch": 3.664396095973973, "grad_norm": 0.2780473232269287, "learning_rate": 2.0205682475507576e-06, "loss": 0.299, "step": 36043 }, { "epoch": 3.664497763318422, "grad_norm": 0.2687215805053711, "learning_rate": 2.020283257206242e-06, "loss": 0.3195, "step": 36044 }, { "epoch": 3.664599430662871, "grad_norm": 0.27789002656936646, "learning_rate": 2.019998281872829e-06, "loss": 0.347, "step": 36045 }, { "epoch": 3.66470109800732, "grad_norm": 0.2776159644126892, "learning_rate": 2.019713321551957e-06, "loss": 0.3251, "step": 36046 }, { "epoch": 3.664802765351769, "grad_norm": 0.2625858783721924, "learning_rate": 2.019428376245065e-06, "loss": 0.3182, "step": 36047 }, { "epoch": 3.664904432696218, "grad_norm": 0.27507808804512024, "learning_rate": 2.01914344595358e-06, "loss": 0.2998, "step": 36048 }, { "epoch": 3.665006100040667, "grad_norm": 0.2731266915798187, "learning_rate": 2.0188585306789447e-06, "loss": 0.3455, "step": 36049 }, { "epoch": 3.665107767385116, "grad_norm": 0.24200430512428284, "learning_rate": 2.018573630422591e-06, "loss": 0.2882, "step": 36050 }, { "epoch": 3.6652094347295647, "grad_norm": 0.28917646408081055, "learning_rate": 2.018288745185954e-06, "loss": 0.3197, "step": 36051 }, { "epoch": 3.6653111020740137, "grad_norm": 0.27887436747550964, "learning_rate": 2.0180038749704705e-06, "loss": 0.29, "step": 36052 }, { "epoch": 3.665412769418463, "grad_norm": 0.2764941453933716, "learning_rate": 2.0177190197775752e-06, "loss": 0.3056, "step": 36053 }, { "epoch": 3.665514436762912, "grad_norm": 0.26736152172088623, "learning_rate": 2.0174341796087032e-06, "loss": 0.2953, "step": 36054 }, { "epoch": 3.665616104107361, "grad_norm": 0.28056761622428894, "learning_rate": 2.0171493544652864e-06, "loss": 0.3291, "step": 36055 }, { "epoch": 3.66571777145181, "grad_norm": 0.27368125319480896, "learning_rate": 2.0168645443487635e-06, "loss": 0.2829, "step": 36056 }, { "epoch": 3.665819438796259, "grad_norm": 0.2646874487400055, "learning_rate": 2.0165797492605706e-06, "loss": 0.3031, "step": 36057 }, { "epoch": 3.6659211061407078, "grad_norm": 0.2650560736656189, "learning_rate": 2.0162949692021367e-06, "loss": 0.3384, "step": 36058 }, { "epoch": 3.6660227734851567, "grad_norm": 0.2699415981769562, "learning_rate": 2.0160102041749013e-06, "loss": 0.3308, "step": 36059 }, { "epoch": 3.6661244408296056, "grad_norm": 0.2850666344165802, "learning_rate": 2.015725454180298e-06, "loss": 0.3187, "step": 36060 }, { "epoch": 3.6662261081740546, "grad_norm": 0.2719486355781555, "learning_rate": 2.0154407192197583e-06, "loss": 0.2879, "step": 36061 }, { "epoch": 3.6663277755185035, "grad_norm": 0.2726895213127136, "learning_rate": 2.0151559992947212e-06, "loss": 0.2897, "step": 36062 }, { "epoch": 3.6664294428629525, "grad_norm": 0.2914750277996063, "learning_rate": 2.014871294406619e-06, "loss": 0.3426, "step": 36063 }, { "epoch": 3.6665311102074014, "grad_norm": 0.2634788453578949, "learning_rate": 2.014586604556885e-06, "loss": 0.3204, "step": 36064 }, { "epoch": 3.6666327775518504, "grad_norm": 0.28180235624313354, "learning_rate": 2.0143019297469535e-06, "loss": 0.324, "step": 36065 }, { "epoch": 3.6667344448962993, "grad_norm": 0.26713690161705017, "learning_rate": 2.0140172699782608e-06, "loss": 0.3105, "step": 36066 }, { "epoch": 3.6668361122407482, "grad_norm": 0.27313339710235596, "learning_rate": 2.013732625252239e-06, "loss": 0.3201, "step": 36067 }, { "epoch": 3.666937779585197, "grad_norm": 0.27157071232795715, "learning_rate": 2.013447995570321e-06, "loss": 0.28, "step": 36068 }, { "epoch": 3.667039446929646, "grad_norm": 0.2833004593849182, "learning_rate": 2.0131633809339447e-06, "loss": 0.3634, "step": 36069 }, { "epoch": 3.667141114274095, "grad_norm": 0.2945432960987091, "learning_rate": 2.012878781344541e-06, "loss": 0.2895, "step": 36070 }, { "epoch": 3.667242781618544, "grad_norm": 0.26596972346305847, "learning_rate": 2.012594196803542e-06, "loss": 0.3134, "step": 36071 }, { "epoch": 3.667344448962993, "grad_norm": 0.286908894777298, "learning_rate": 2.012309627312386e-06, "loss": 0.3127, "step": 36072 }, { "epoch": 3.667446116307442, "grad_norm": 0.2679166793823242, "learning_rate": 2.012025072872504e-06, "loss": 0.3218, "step": 36073 }, { "epoch": 3.667547783651891, "grad_norm": 0.28468677401542664, "learning_rate": 2.0117405334853297e-06, "loss": 0.3116, "step": 36074 }, { "epoch": 3.6676494509963398, "grad_norm": 0.2548951804637909, "learning_rate": 2.0114560091522937e-06, "loss": 0.295, "step": 36075 }, { "epoch": 3.6677511183407887, "grad_norm": 0.2635885775089264, "learning_rate": 2.011171499874835e-06, "loss": 0.3045, "step": 36076 }, { "epoch": 3.6678527856852376, "grad_norm": 0.2629971206188202, "learning_rate": 2.010887005654384e-06, "loss": 0.294, "step": 36077 }, { "epoch": 3.6679544530296866, "grad_norm": 0.25899550318717957, "learning_rate": 2.0106025264923717e-06, "loss": 0.3267, "step": 36078 }, { "epoch": 3.668056120374136, "grad_norm": 0.26532143354415894, "learning_rate": 2.010318062390236e-06, "loss": 0.3078, "step": 36079 }, { "epoch": 3.668157787718585, "grad_norm": 0.24775996804237366, "learning_rate": 2.0100336133494063e-06, "loss": 0.3394, "step": 36080 }, { "epoch": 3.668259455063034, "grad_norm": 0.2786877751350403, "learning_rate": 2.0097491793713157e-06, "loss": 0.2959, "step": 36081 }, { "epoch": 3.668361122407483, "grad_norm": 0.25948044657707214, "learning_rate": 2.0094647604573995e-06, "loss": 0.2979, "step": 36082 }, { "epoch": 3.6684627897519317, "grad_norm": 0.27984723448753357, "learning_rate": 2.0091803566090896e-06, "loss": 0.3236, "step": 36083 }, { "epoch": 3.6685644570963807, "grad_norm": 0.26551154255867004, "learning_rate": 2.0088959678278182e-06, "loss": 0.3061, "step": 36084 }, { "epoch": 3.6686661244408296, "grad_norm": 0.2539784014225006, "learning_rate": 2.0086115941150165e-06, "loss": 0.3268, "step": 36085 }, { "epoch": 3.6687677917852786, "grad_norm": 0.25714531540870667, "learning_rate": 2.00832723547212e-06, "loss": 0.3416, "step": 36086 }, { "epoch": 3.6688694591297275, "grad_norm": 0.2785838544368744, "learning_rate": 2.0080428919005603e-06, "loss": 0.314, "step": 36087 }, { "epoch": 3.6689711264741764, "grad_norm": 0.2662350833415985, "learning_rate": 2.007758563401768e-06, "loss": 0.2855, "step": 36088 }, { "epoch": 3.6690727938186254, "grad_norm": 0.27136966586112976, "learning_rate": 2.007474249977178e-06, "loss": 0.3036, "step": 36089 }, { "epoch": 3.6691744611630743, "grad_norm": 0.2889425456523895, "learning_rate": 2.0071899516282224e-06, "loss": 0.2973, "step": 36090 }, { "epoch": 3.6692761285075233, "grad_norm": 0.2657240033149719, "learning_rate": 2.0069056683563303e-06, "loss": 0.323, "step": 36091 }, { "epoch": 3.669377795851972, "grad_norm": 0.30156707763671875, "learning_rate": 2.006621400162938e-06, "loss": 0.2875, "step": 36092 }, { "epoch": 3.669479463196421, "grad_norm": 0.285787433385849, "learning_rate": 2.0063371470494764e-06, "loss": 0.2972, "step": 36093 }, { "epoch": 3.6695811305408705, "grad_norm": 0.28010305762290955, "learning_rate": 2.0060529090173764e-06, "loss": 0.3125, "step": 36094 }, { "epoch": 3.6696827978853195, "grad_norm": 0.2584421634674072, "learning_rate": 2.0057686860680685e-06, "loss": 0.3071, "step": 36095 }, { "epoch": 3.6697844652297684, "grad_norm": 0.29904797673225403, "learning_rate": 2.005484478202988e-06, "loss": 0.3123, "step": 36096 }, { "epoch": 3.6698861325742174, "grad_norm": 0.30190256237983704, "learning_rate": 2.005200285423565e-06, "loss": 0.3077, "step": 36097 }, { "epoch": 3.6699877999186663, "grad_norm": 0.29356327652931213, "learning_rate": 2.00491610773123e-06, "loss": 0.3098, "step": 36098 }, { "epoch": 3.6700894672631152, "grad_norm": 0.26162052154541016, "learning_rate": 2.0046319451274175e-06, "loss": 0.3233, "step": 36099 }, { "epoch": 3.670191134607564, "grad_norm": 0.2702552080154419, "learning_rate": 2.0043477976135577e-06, "loss": 0.2993, "step": 36100 }, { "epoch": 3.670292801952013, "grad_norm": 0.2891034483909607, "learning_rate": 2.00406366519108e-06, "loss": 0.3183, "step": 36101 }, { "epoch": 3.670394469296462, "grad_norm": 0.27155736088752747, "learning_rate": 2.003779547861419e-06, "loss": 0.3139, "step": 36102 }, { "epoch": 3.670496136640911, "grad_norm": 0.26787111163139343, "learning_rate": 2.0034954456260055e-06, "loss": 0.3248, "step": 36103 }, { "epoch": 3.67059780398536, "grad_norm": 0.2676413357257843, "learning_rate": 2.003211358486269e-06, "loss": 0.3317, "step": 36104 }, { "epoch": 3.670699471329809, "grad_norm": 0.27492544054985046, "learning_rate": 2.00292728644364e-06, "loss": 0.3203, "step": 36105 }, { "epoch": 3.670801138674258, "grad_norm": 0.3014257252216339, "learning_rate": 2.002643229499553e-06, "loss": 0.3219, "step": 36106 }, { "epoch": 3.6709028060187068, "grad_norm": 0.26942721009254456, "learning_rate": 2.0023591876554366e-06, "loss": 0.3143, "step": 36107 }, { "epoch": 3.6710044733631557, "grad_norm": 0.2783123254776001, "learning_rate": 2.0020751609127207e-06, "loss": 0.3214, "step": 36108 }, { "epoch": 3.6711061407076047, "grad_norm": 0.2769359350204468, "learning_rate": 2.00179114927284e-06, "loss": 0.3084, "step": 36109 }, { "epoch": 3.6712078080520536, "grad_norm": 0.2633773684501648, "learning_rate": 2.0015071527372227e-06, "loss": 0.2992, "step": 36110 }, { "epoch": 3.6713094753965025, "grad_norm": 0.2919727861881256, "learning_rate": 2.0012231713072983e-06, "loss": 0.2927, "step": 36111 }, { "epoch": 3.6714111427409515, "grad_norm": 0.28418025374412537, "learning_rate": 2.0009392049845e-06, "loss": 0.3336, "step": 36112 }, { "epoch": 3.6715128100854004, "grad_norm": 0.27669253945350647, "learning_rate": 2.000655253770258e-06, "loss": 0.2988, "step": 36113 }, { "epoch": 3.6716144774298494, "grad_norm": 0.2867639362812042, "learning_rate": 2.0003713176660016e-06, "loss": 0.3157, "step": 36114 }, { "epoch": 3.6717161447742983, "grad_norm": 0.2942745089530945, "learning_rate": 2.0000873966731605e-06, "loss": 0.281, "step": 36115 }, { "epoch": 3.6718178121187472, "grad_norm": 0.2542840838432312, "learning_rate": 1.9998034907931672e-06, "loss": 0.2825, "step": 36116 }, { "epoch": 3.671919479463196, "grad_norm": 0.2670193910598755, "learning_rate": 1.9995196000274515e-06, "loss": 0.2997, "step": 36117 }, { "epoch": 3.672021146807645, "grad_norm": 0.26317277550697327, "learning_rate": 1.999235724377441e-06, "loss": 0.2983, "step": 36118 }, { "epoch": 3.672122814152094, "grad_norm": 0.3021533787250519, "learning_rate": 1.99895186384457e-06, "loss": 0.2812, "step": 36119 }, { "epoch": 3.6722244814965435, "grad_norm": 0.2754403054714203, "learning_rate": 1.998668018430266e-06, "loss": 0.3148, "step": 36120 }, { "epoch": 3.6723261488409924, "grad_norm": 0.2859385311603546, "learning_rate": 1.9983841881359577e-06, "loss": 0.3007, "step": 36121 }, { "epoch": 3.6724278161854413, "grad_norm": 0.25631463527679443, "learning_rate": 1.9981003729630782e-06, "loss": 0.2986, "step": 36122 }, { "epoch": 3.6725294835298903, "grad_norm": 0.2763650715351105, "learning_rate": 1.9978165729130555e-06, "loss": 0.2941, "step": 36123 }, { "epoch": 3.6726311508743392, "grad_norm": 0.2662992477416992, "learning_rate": 1.9975327879873198e-06, "loss": 0.3305, "step": 36124 }, { "epoch": 3.672732818218788, "grad_norm": 0.2890492379665375, "learning_rate": 1.9972490181872983e-06, "loss": 0.3248, "step": 36125 }, { "epoch": 3.672834485563237, "grad_norm": 0.2743137776851654, "learning_rate": 1.9969652635144247e-06, "loss": 0.3142, "step": 36126 }, { "epoch": 3.672936152907686, "grad_norm": 0.27008140087127686, "learning_rate": 1.9966815239701266e-06, "loss": 0.316, "step": 36127 }, { "epoch": 3.673037820252135, "grad_norm": 0.2759600579738617, "learning_rate": 1.9963977995558313e-06, "loss": 0.3133, "step": 36128 }, { "epoch": 3.673139487596584, "grad_norm": 0.2802593410015106, "learning_rate": 1.996114090272972e-06, "loss": 0.3224, "step": 36129 }, { "epoch": 3.673241154941033, "grad_norm": 0.25690406560897827, "learning_rate": 1.9958303961229762e-06, "loss": 0.3161, "step": 36130 }, { "epoch": 3.673342822285482, "grad_norm": 0.2662777900695801, "learning_rate": 1.9955467171072714e-06, "loss": 0.3214, "step": 36131 }, { "epoch": 3.6734444896299308, "grad_norm": 0.253711462020874, "learning_rate": 1.9952630532272893e-06, "loss": 0.3349, "step": 36132 }, { "epoch": 3.6735461569743797, "grad_norm": 0.28147321939468384, "learning_rate": 1.994979404484458e-06, "loss": 0.2942, "step": 36133 }, { "epoch": 3.6736478243188286, "grad_norm": 0.2762419283390045, "learning_rate": 1.994695770880207e-06, "loss": 0.3248, "step": 36134 }, { "epoch": 3.673749491663278, "grad_norm": 0.26730218529701233, "learning_rate": 1.994412152415962e-06, "loss": 0.3086, "step": 36135 }, { "epoch": 3.673851159007727, "grad_norm": 0.3288514316082001, "learning_rate": 1.9941285490931565e-06, "loss": 0.3331, "step": 36136 }, { "epoch": 3.673952826352176, "grad_norm": 0.2949983775615692, "learning_rate": 1.9938449609132175e-06, "loss": 0.3101, "step": 36137 }, { "epoch": 3.674054493696625, "grad_norm": 0.28243085741996765, "learning_rate": 1.993561387877571e-06, "loss": 0.2854, "step": 36138 }, { "epoch": 3.674156161041074, "grad_norm": 0.2758539915084839, "learning_rate": 1.9932778299876493e-06, "loss": 0.3051, "step": 36139 }, { "epoch": 3.6742578283855227, "grad_norm": 0.2698458731174469, "learning_rate": 1.9929942872448797e-06, "loss": 0.3371, "step": 36140 }, { "epoch": 3.6743594957299717, "grad_norm": 0.2759624719619751, "learning_rate": 1.9927107596506888e-06, "loss": 0.3413, "step": 36141 }, { "epoch": 3.6744611630744206, "grad_norm": 0.2610492408275604, "learning_rate": 1.9924272472065075e-06, "loss": 0.3226, "step": 36142 }, { "epoch": 3.6745628304188696, "grad_norm": 0.2762051224708557, "learning_rate": 1.9921437499137636e-06, "loss": 0.3138, "step": 36143 }, { "epoch": 3.6746644977633185, "grad_norm": 0.2591617703437805, "learning_rate": 1.9918602677738848e-06, "loss": 0.3051, "step": 36144 }, { "epoch": 3.6747661651077674, "grad_norm": 0.2684038579463959, "learning_rate": 1.991576800788297e-06, "loss": 0.3094, "step": 36145 }, { "epoch": 3.6748678324522164, "grad_norm": 0.27268773317337036, "learning_rate": 1.991293348958434e-06, "loss": 0.2955, "step": 36146 }, { "epoch": 3.6749694997966653, "grad_norm": 0.2779141366481781, "learning_rate": 1.991009912285717e-06, "loss": 0.3271, "step": 36147 }, { "epoch": 3.6750711671411143, "grad_norm": 0.2583405673503876, "learning_rate": 1.9907264907715763e-06, "loss": 0.3125, "step": 36148 }, { "epoch": 3.675172834485563, "grad_norm": 0.2656845152378082, "learning_rate": 1.990443084417443e-06, "loss": 0.3196, "step": 36149 }, { "epoch": 3.675274501830012, "grad_norm": 0.28731343150138855, "learning_rate": 1.9901596932247414e-06, "loss": 0.3453, "step": 36150 }, { "epoch": 3.675376169174461, "grad_norm": 0.2704308331012726, "learning_rate": 1.9898763171949003e-06, "loss": 0.2801, "step": 36151 }, { "epoch": 3.67547783651891, "grad_norm": 0.28111350536346436, "learning_rate": 1.9895929563293453e-06, "loss": 0.2945, "step": 36152 }, { "epoch": 3.675579503863359, "grad_norm": 0.2758839428424835, "learning_rate": 1.9893096106295073e-06, "loss": 0.2901, "step": 36153 }, { "epoch": 3.675681171207808, "grad_norm": 0.2978300154209137, "learning_rate": 1.9890262800968124e-06, "loss": 0.3082, "step": 36154 }, { "epoch": 3.675782838552257, "grad_norm": 0.2689482569694519, "learning_rate": 1.9887429647326857e-06, "loss": 0.3122, "step": 36155 }, { "epoch": 3.675884505896706, "grad_norm": 0.2541995942592621, "learning_rate": 1.9884596645385595e-06, "loss": 0.2877, "step": 36156 }, { "epoch": 3.6759861732411547, "grad_norm": 0.28287404775619507, "learning_rate": 1.9881763795158543e-06, "loss": 0.2937, "step": 36157 }, { "epoch": 3.6760878405856037, "grad_norm": 0.2372908741235733, "learning_rate": 1.9878931096660016e-06, "loss": 0.3023, "step": 36158 }, { "epoch": 3.6761895079300526, "grad_norm": 0.269197940826416, "learning_rate": 1.9876098549904303e-06, "loss": 0.3004, "step": 36159 }, { "epoch": 3.6762911752745016, "grad_norm": 0.2661786675453186, "learning_rate": 1.9873266154905614e-06, "loss": 0.2806, "step": 36160 }, { "epoch": 3.676392842618951, "grad_norm": 0.2825928330421448, "learning_rate": 1.987043391167827e-06, "loss": 0.3142, "step": 36161 }, { "epoch": 3.6764945099634, "grad_norm": 0.27719685435295105, "learning_rate": 1.9867601820236504e-06, "loss": 0.2909, "step": 36162 }, { "epoch": 3.676596177307849, "grad_norm": 0.28235703706741333, "learning_rate": 1.9864769880594616e-06, "loss": 0.3083, "step": 36163 }, { "epoch": 3.6766978446522978, "grad_norm": 0.2606980800628662, "learning_rate": 1.9861938092766855e-06, "loss": 0.3217, "step": 36164 }, { "epoch": 3.6767995119967467, "grad_norm": 0.2471434772014618, "learning_rate": 1.985910645676747e-06, "loss": 0.3187, "step": 36165 }, { "epoch": 3.6769011793411956, "grad_norm": 0.28986629843711853, "learning_rate": 1.985627497261078e-06, "loss": 0.3178, "step": 36166 }, { "epoch": 3.6770028466856446, "grad_norm": 0.2685219943523407, "learning_rate": 1.985344364031097e-06, "loss": 0.3163, "step": 36167 }, { "epoch": 3.6771045140300935, "grad_norm": 0.3046092987060547, "learning_rate": 1.9850612459882356e-06, "loss": 0.3107, "step": 36168 }, { "epoch": 3.6772061813745425, "grad_norm": 0.3059818148612976, "learning_rate": 1.984778143133923e-06, "loss": 0.3132, "step": 36169 }, { "epoch": 3.6773078487189914, "grad_norm": 0.2798486053943634, "learning_rate": 1.9844950554695775e-06, "loss": 0.3255, "step": 36170 }, { "epoch": 3.6774095160634404, "grad_norm": 0.29987993836402893, "learning_rate": 1.9842119829966313e-06, "loss": 0.3169, "step": 36171 }, { "epoch": 3.6775111834078893, "grad_norm": 0.25227710604667664, "learning_rate": 1.983928925716507e-06, "loss": 0.3118, "step": 36172 }, { "epoch": 3.6776128507523382, "grad_norm": 0.27130722999572754, "learning_rate": 1.9836458836306334e-06, "loss": 0.3103, "step": 36173 }, { "epoch": 3.677714518096787, "grad_norm": 0.27949777245521545, "learning_rate": 1.9833628567404346e-06, "loss": 0.2931, "step": 36174 }, { "epoch": 3.677816185441236, "grad_norm": 0.27652430534362793, "learning_rate": 1.9830798450473353e-06, "loss": 0.3095, "step": 36175 }, { "epoch": 3.6779178527856855, "grad_norm": 0.27352455258369446, "learning_rate": 1.9827968485527672e-06, "loss": 0.3112, "step": 36176 }, { "epoch": 3.6780195201301344, "grad_norm": 0.27925023436546326, "learning_rate": 1.9825138672581474e-06, "loss": 0.2936, "step": 36177 }, { "epoch": 3.6781211874745834, "grad_norm": 0.29514840245246887, "learning_rate": 1.9822309011649055e-06, "loss": 0.3455, "step": 36178 }, { "epoch": 3.6782228548190323, "grad_norm": 0.27842774987220764, "learning_rate": 1.981947950274471e-06, "loss": 0.3047, "step": 36179 }, { "epoch": 3.6783245221634813, "grad_norm": 0.2659706771373749, "learning_rate": 1.9816650145882622e-06, "loss": 0.2924, "step": 36180 }, { "epoch": 3.67842618950793, "grad_norm": 0.27430427074432373, "learning_rate": 1.9813820941077094e-06, "loss": 0.2945, "step": 36181 }, { "epoch": 3.678527856852379, "grad_norm": 0.2757197916507721, "learning_rate": 1.9810991888342355e-06, "loss": 0.3167, "step": 36182 }, { "epoch": 3.678629524196828, "grad_norm": 0.31852492690086365, "learning_rate": 1.9808162987692658e-06, "loss": 0.2898, "step": 36183 }, { "epoch": 3.678731191541277, "grad_norm": 0.25916123390197754, "learning_rate": 1.980533423914228e-06, "loss": 0.3111, "step": 36184 }, { "epoch": 3.678832858885726, "grad_norm": 0.28994327783584595, "learning_rate": 1.980250564270543e-06, "loss": 0.3344, "step": 36185 }, { "epoch": 3.678934526230175, "grad_norm": 0.2706156373023987, "learning_rate": 1.979967719839641e-06, "loss": 0.3123, "step": 36186 }, { "epoch": 3.679036193574624, "grad_norm": 0.2788650393486023, "learning_rate": 1.979684890622941e-06, "loss": 0.3565, "step": 36187 }, { "epoch": 3.679137860919073, "grad_norm": 0.2913287281990051, "learning_rate": 1.979402076621871e-06, "loss": 0.2636, "step": 36188 }, { "epoch": 3.6792395282635217, "grad_norm": 0.2790496349334717, "learning_rate": 1.979119277837859e-06, "loss": 0.3308, "step": 36189 }, { "epoch": 3.6793411956079707, "grad_norm": 0.2722434103488922, "learning_rate": 1.9788364942723224e-06, "loss": 0.3277, "step": 36190 }, { "epoch": 3.6794428629524196, "grad_norm": 0.27866190671920776, "learning_rate": 1.978553725926691e-06, "loss": 0.3226, "step": 36191 }, { "epoch": 3.6795445302968686, "grad_norm": 0.28382787108421326, "learning_rate": 1.978270972802389e-06, "loss": 0.3384, "step": 36192 }, { "epoch": 3.6796461976413175, "grad_norm": 0.2555769681930542, "learning_rate": 1.977988234900837e-06, "loss": 0.3236, "step": 36193 }, { "epoch": 3.6797478649857664, "grad_norm": 0.27771198749542236, "learning_rate": 1.9777055122234645e-06, "loss": 0.2971, "step": 36194 }, { "epoch": 3.6798495323302154, "grad_norm": 0.27290260791778564, "learning_rate": 1.9774228047716936e-06, "loss": 0.3118, "step": 36195 }, { "epoch": 3.6799511996746643, "grad_norm": 0.26476240158081055, "learning_rate": 1.977140112546948e-06, "loss": 0.3227, "step": 36196 }, { "epoch": 3.6800528670191133, "grad_norm": 0.26509204506874084, "learning_rate": 1.9768574355506506e-06, "loss": 0.2843, "step": 36197 }, { "epoch": 3.680154534363562, "grad_norm": 0.2719897925853729, "learning_rate": 1.976574773784227e-06, "loss": 0.3465, "step": 36198 }, { "epoch": 3.680256201708011, "grad_norm": 0.26571178436279297, "learning_rate": 1.9762921272491043e-06, "loss": 0.3447, "step": 36199 }, { "epoch": 3.68035786905246, "grad_norm": 0.2670597732067108, "learning_rate": 1.9760094959466995e-06, "loss": 0.3179, "step": 36200 }, { "epoch": 3.680459536396909, "grad_norm": 0.2693992853164673, "learning_rate": 1.975726879878443e-06, "loss": 0.3223, "step": 36201 }, { "epoch": 3.6805612037413584, "grad_norm": 0.2760436534881592, "learning_rate": 1.975444279045755e-06, "loss": 0.3197, "step": 36202 }, { "epoch": 3.6806628710858074, "grad_norm": 0.2746359705924988, "learning_rate": 1.975161693450059e-06, "loss": 0.3194, "step": 36203 }, { "epoch": 3.6807645384302563, "grad_norm": 0.2398926466703415, "learning_rate": 1.9748791230927807e-06, "loss": 0.2822, "step": 36204 }, { "epoch": 3.6808662057747052, "grad_norm": 0.25718843936920166, "learning_rate": 1.9745965679753424e-06, "loss": 0.3267, "step": 36205 }, { "epoch": 3.680967873119154, "grad_norm": 0.2499087154865265, "learning_rate": 1.9743140280991684e-06, "loss": 0.3029, "step": 36206 }, { "epoch": 3.681069540463603, "grad_norm": 0.256510853767395, "learning_rate": 1.9740315034656793e-06, "loss": 0.3014, "step": 36207 }, { "epoch": 3.681171207808052, "grad_norm": 0.2990281879901886, "learning_rate": 1.9737489940762998e-06, "loss": 0.3153, "step": 36208 }, { "epoch": 3.681272875152501, "grad_norm": 0.2616034746170044, "learning_rate": 1.9734664999324574e-06, "loss": 0.3042, "step": 36209 }, { "epoch": 3.68137454249695, "grad_norm": 0.2628839910030365, "learning_rate": 1.973184021035569e-06, "loss": 0.3036, "step": 36210 }, { "epoch": 3.681476209841399, "grad_norm": 0.28893405199050903, "learning_rate": 1.9729015573870613e-06, "loss": 0.3175, "step": 36211 }, { "epoch": 3.681577877185848, "grad_norm": 0.26538699865341187, "learning_rate": 1.972619108988356e-06, "loss": 0.3249, "step": 36212 }, { "epoch": 3.6816795445302968, "grad_norm": 0.2746492922306061, "learning_rate": 1.9723366758408745e-06, "loss": 0.3015, "step": 36213 }, { "epoch": 3.6817812118747457, "grad_norm": 0.2809331715106964, "learning_rate": 1.972054257946043e-06, "loss": 0.3259, "step": 36214 }, { "epoch": 3.6818828792191947, "grad_norm": 0.2413925677537918, "learning_rate": 1.9717718553052834e-06, "loss": 0.311, "step": 36215 }, { "epoch": 3.6819845465636436, "grad_norm": 0.25574809312820435, "learning_rate": 1.971489467920017e-06, "loss": 0.3176, "step": 36216 }, { "epoch": 3.682086213908093, "grad_norm": 0.2754411995410919, "learning_rate": 1.9712070957916652e-06, "loss": 0.2962, "step": 36217 }, { "epoch": 3.682187881252542, "grad_norm": 0.28161537647247314, "learning_rate": 1.9709247389216542e-06, "loss": 0.3147, "step": 36218 }, { "epoch": 3.682289548596991, "grad_norm": 0.25572097301483154, "learning_rate": 1.9706423973114042e-06, "loss": 0.2931, "step": 36219 }, { "epoch": 3.68239121594144, "grad_norm": 0.29662469029426575, "learning_rate": 1.9703600709623366e-06, "loss": 0.3032, "step": 36220 }, { "epoch": 3.6824928832858888, "grad_norm": 0.28744444251060486, "learning_rate": 1.9700777598758764e-06, "loss": 0.3108, "step": 36221 }, { "epoch": 3.6825945506303377, "grad_norm": 0.28696131706237793, "learning_rate": 1.9697954640534446e-06, "loss": 0.3134, "step": 36222 }, { "epoch": 3.6826962179747866, "grad_norm": 0.2639980614185333, "learning_rate": 1.9695131834964613e-06, "loss": 0.2948, "step": 36223 }, { "epoch": 3.6827978853192356, "grad_norm": 0.26014280319213867, "learning_rate": 1.9692309182063523e-06, "loss": 0.3483, "step": 36224 }, { "epoch": 3.6828995526636845, "grad_norm": 0.2889227271080017, "learning_rate": 1.968948668184538e-06, "loss": 0.3057, "step": 36225 }, { "epoch": 3.6830012200081335, "grad_norm": 0.2719483971595764, "learning_rate": 1.9686664334324395e-06, "loss": 0.291, "step": 36226 }, { "epoch": 3.6831028873525824, "grad_norm": 0.2690292298793793, "learning_rate": 1.9683842139514775e-06, "loss": 0.2946, "step": 36227 }, { "epoch": 3.6832045546970313, "grad_norm": 0.2737051546573639, "learning_rate": 1.9681020097430777e-06, "loss": 0.3226, "step": 36228 }, { "epoch": 3.6833062220414803, "grad_norm": 0.2821986675262451, "learning_rate": 1.9678198208086585e-06, "loss": 0.3064, "step": 36229 }, { "epoch": 3.6834078893859292, "grad_norm": 0.28946515917778015, "learning_rate": 1.9675376471496416e-06, "loss": 0.3145, "step": 36230 }, { "epoch": 3.683509556730378, "grad_norm": 0.2551482915878296, "learning_rate": 1.967255488767451e-06, "loss": 0.31, "step": 36231 }, { "epoch": 3.683611224074827, "grad_norm": 0.2731323540210724, "learning_rate": 1.9669733456635065e-06, "loss": 0.2991, "step": 36232 }, { "epoch": 3.683712891419276, "grad_norm": 0.2647559642791748, "learning_rate": 1.966691217839227e-06, "loss": 0.3054, "step": 36233 }, { "epoch": 3.683814558763725, "grad_norm": 0.2694217264652252, "learning_rate": 1.9664091052960395e-06, "loss": 0.3089, "step": 36234 }, { "epoch": 3.683916226108174, "grad_norm": 0.2832746207714081, "learning_rate": 1.9661270080353613e-06, "loss": 0.3332, "step": 36235 }, { "epoch": 3.684017893452623, "grad_norm": 0.2863980531692505, "learning_rate": 1.965844926058614e-06, "loss": 0.2893, "step": 36236 }, { "epoch": 3.684119560797072, "grad_norm": 0.2674303948879242, "learning_rate": 1.9655628593672175e-06, "loss": 0.3383, "step": 36237 }, { "epoch": 3.6842212281415208, "grad_norm": 0.27158331871032715, "learning_rate": 1.9652808079625958e-06, "loss": 0.2914, "step": 36238 }, { "epoch": 3.6843228954859697, "grad_norm": 0.26920372247695923, "learning_rate": 1.9649987718461683e-06, "loss": 0.3465, "step": 36239 }, { "epoch": 3.6844245628304186, "grad_norm": 0.2929908335208893, "learning_rate": 1.964716751019354e-06, "loss": 0.311, "step": 36240 }, { "epoch": 3.6845262301748676, "grad_norm": 0.25442999601364136, "learning_rate": 1.964434745483578e-06, "loss": 0.2931, "step": 36241 }, { "epoch": 3.6846278975193165, "grad_norm": 0.2757183313369751, "learning_rate": 1.964152755240257e-06, "loss": 0.2779, "step": 36242 }, { "epoch": 3.684729564863766, "grad_norm": 0.27173563838005066, "learning_rate": 1.963870780290812e-06, "loss": 0.3274, "step": 36243 }, { "epoch": 3.684831232208215, "grad_norm": 0.2776298522949219, "learning_rate": 1.963588820636666e-06, "loss": 0.3166, "step": 36244 }, { "epoch": 3.684932899552664, "grad_norm": 0.2798154056072235, "learning_rate": 1.963306876279238e-06, "loss": 0.2973, "step": 36245 }, { "epoch": 3.6850345668971127, "grad_norm": 0.28589531779289246, "learning_rate": 1.9630249472199487e-06, "loss": 0.3451, "step": 36246 }, { "epoch": 3.6851362342415617, "grad_norm": 0.2897648811340332, "learning_rate": 1.962743033460216e-06, "loss": 0.2989, "step": 36247 }, { "epoch": 3.6852379015860106, "grad_norm": 0.28570979833602905, "learning_rate": 1.9624611350014637e-06, "loss": 0.3057, "step": 36248 }, { "epoch": 3.6853395689304596, "grad_norm": 0.25557032227516174, "learning_rate": 1.9621792518451104e-06, "loss": 0.3091, "step": 36249 }, { "epoch": 3.6854412362749085, "grad_norm": 0.2813371419906616, "learning_rate": 1.961897383992575e-06, "loss": 0.3234, "step": 36250 }, { "epoch": 3.6855429036193574, "grad_norm": 0.26026594638824463, "learning_rate": 1.9616155314452794e-06, "loss": 0.3105, "step": 36251 }, { "epoch": 3.6856445709638064, "grad_norm": 0.26712122559547424, "learning_rate": 1.9613336942046434e-06, "loss": 0.2851, "step": 36252 }, { "epoch": 3.6857462383082553, "grad_norm": 0.2668401300907135, "learning_rate": 1.9610518722720844e-06, "loss": 0.2804, "step": 36253 }, { "epoch": 3.6858479056527043, "grad_norm": 0.2793894112110138, "learning_rate": 1.960770065649025e-06, "loss": 0.3409, "step": 36254 }, { "epoch": 3.685949572997153, "grad_norm": 0.2546689212322235, "learning_rate": 1.9604882743368846e-06, "loss": 0.3007, "step": 36255 }, { "epoch": 3.686051240341602, "grad_norm": 0.27090850472450256, "learning_rate": 1.960206498337082e-06, "loss": 0.2957, "step": 36256 }, { "epoch": 3.686152907686051, "grad_norm": 0.27636462450027466, "learning_rate": 1.9599247376510337e-06, "loss": 0.3341, "step": 36257 }, { "epoch": 3.6862545750305005, "grad_norm": 0.29077550768852234, "learning_rate": 1.959642992280165e-06, "loss": 0.306, "step": 36258 }, { "epoch": 3.6863562423749494, "grad_norm": 0.2708510160446167, "learning_rate": 1.9593612622258924e-06, "loss": 0.2807, "step": 36259 }, { "epoch": 3.6864579097193984, "grad_norm": 0.26214686036109924, "learning_rate": 1.959079547489633e-06, "loss": 0.2986, "step": 36260 }, { "epoch": 3.6865595770638473, "grad_norm": 0.2610126733779907, "learning_rate": 1.9587978480728105e-06, "loss": 0.3315, "step": 36261 }, { "epoch": 3.6866612444082962, "grad_norm": 0.2659080922603607, "learning_rate": 1.9585161639768414e-06, "loss": 0.3193, "step": 36262 }, { "epoch": 3.686762911752745, "grad_norm": 0.27077212929725647, "learning_rate": 1.958234495203143e-06, "loss": 0.2794, "step": 36263 }, { "epoch": 3.686864579097194, "grad_norm": 0.2870262563228607, "learning_rate": 1.9579528417531386e-06, "loss": 0.2951, "step": 36264 }, { "epoch": 3.686966246441643, "grad_norm": 0.29846885800361633, "learning_rate": 1.9576712036282445e-06, "loss": 0.3216, "step": 36265 }, { "epoch": 3.687067913786092, "grad_norm": 0.2645214796066284, "learning_rate": 1.9573895808298805e-06, "loss": 0.3031, "step": 36266 }, { "epoch": 3.687169581130541, "grad_norm": 0.2789204716682434, "learning_rate": 1.957107973359463e-06, "loss": 0.2932, "step": 36267 }, { "epoch": 3.68727124847499, "grad_norm": 0.289455771446228, "learning_rate": 1.9568263812184133e-06, "loss": 0.3019, "step": 36268 }, { "epoch": 3.687372915819439, "grad_norm": 0.2596074342727661, "learning_rate": 1.9565448044081497e-06, "loss": 0.3287, "step": 36269 }, { "epoch": 3.6874745831638878, "grad_norm": 0.2848924994468689, "learning_rate": 1.956263242930089e-06, "loss": 0.3145, "step": 36270 }, { "epoch": 3.6875762505083367, "grad_norm": 0.2739471197128296, "learning_rate": 1.9559816967856515e-06, "loss": 0.3226, "step": 36271 }, { "epoch": 3.6876779178527856, "grad_norm": 0.2538238763809204, "learning_rate": 1.9557001659762553e-06, "loss": 0.3052, "step": 36272 }, { "epoch": 3.6877795851972346, "grad_norm": 0.2604445815086365, "learning_rate": 1.9554186505033163e-06, "loss": 0.3026, "step": 36273 }, { "epoch": 3.6878812525416835, "grad_norm": 0.2864665389060974, "learning_rate": 1.9551371503682565e-06, "loss": 0.3069, "step": 36274 }, { "epoch": 3.6879829198861325, "grad_norm": 0.27409133315086365, "learning_rate": 1.954855665572492e-06, "loss": 0.2927, "step": 36275 }, { "epoch": 3.6880845872305814, "grad_norm": 0.2552240788936615, "learning_rate": 1.9545741961174414e-06, "loss": 0.3016, "step": 36276 }, { "epoch": 3.6881862545750304, "grad_norm": 0.28001388907432556, "learning_rate": 1.9542927420045204e-06, "loss": 0.3135, "step": 36277 }, { "epoch": 3.6882879219194793, "grad_norm": 0.27473098039627075, "learning_rate": 1.9540113032351506e-06, "loss": 0.3252, "step": 36278 }, { "epoch": 3.6883895892639282, "grad_norm": 0.27169400453567505, "learning_rate": 1.9537298798107474e-06, "loss": 0.2787, "step": 36279 }, { "epoch": 3.688491256608377, "grad_norm": 0.30047088861465454, "learning_rate": 1.953448471732728e-06, "loss": 0.323, "step": 36280 }, { "epoch": 3.688592923952826, "grad_norm": 0.2778172492980957, "learning_rate": 1.953167079002513e-06, "loss": 0.2931, "step": 36281 }, { "epoch": 3.688694591297275, "grad_norm": 0.30415821075439453, "learning_rate": 1.952885701621518e-06, "loss": 0.2795, "step": 36282 }, { "epoch": 3.688796258641724, "grad_norm": 0.2530238926410675, "learning_rate": 1.9526043395911595e-06, "loss": 0.3226, "step": 36283 }, { "epoch": 3.6888979259861734, "grad_norm": 0.2690396308898926, "learning_rate": 1.9523229929128575e-06, "loss": 0.2796, "step": 36284 }, { "epoch": 3.6889995933306223, "grad_norm": 0.2593957781791687, "learning_rate": 1.9520416615880276e-06, "loss": 0.2872, "step": 36285 }, { "epoch": 3.6891012606750713, "grad_norm": 0.2714679539203644, "learning_rate": 1.9517603456180877e-06, "loss": 0.3324, "step": 36286 }, { "epoch": 3.68920292801952, "grad_norm": 0.2691195607185364, "learning_rate": 1.9514790450044537e-06, "loss": 0.3175, "step": 36287 }, { "epoch": 3.689304595363969, "grad_norm": 0.2722018361091614, "learning_rate": 1.9511977597485447e-06, "loss": 0.2887, "step": 36288 }, { "epoch": 3.689406262708418, "grad_norm": 0.26788806915283203, "learning_rate": 1.9509164898517774e-06, "loss": 0.2838, "step": 36289 }, { "epoch": 3.689507930052867, "grad_norm": 0.27913153171539307, "learning_rate": 1.950635235315567e-06, "loss": 0.3181, "step": 36290 }, { "epoch": 3.689609597397316, "grad_norm": 0.2708161473274231, "learning_rate": 1.9503539961413325e-06, "loss": 0.3034, "step": 36291 }, { "epoch": 3.689711264741765, "grad_norm": 0.3125421702861786, "learning_rate": 1.9500727723304903e-06, "loss": 0.2907, "step": 36292 }, { "epoch": 3.689812932086214, "grad_norm": 0.25657057762145996, "learning_rate": 1.9497915638844544e-06, "loss": 0.3153, "step": 36293 }, { "epoch": 3.689914599430663, "grad_norm": 0.2598360478878021, "learning_rate": 1.9495103708046464e-06, "loss": 0.3362, "step": 36294 }, { "epoch": 3.6900162667751117, "grad_norm": 0.2845720648765564, "learning_rate": 1.9492291930924793e-06, "loss": 0.3584, "step": 36295 }, { "epoch": 3.6901179341195607, "grad_norm": 0.28582578897476196, "learning_rate": 1.9489480307493714e-06, "loss": 0.2975, "step": 36296 }, { "epoch": 3.6902196014640096, "grad_norm": 0.2799694240093231, "learning_rate": 1.948666883776736e-06, "loss": 0.3339, "step": 36297 }, { "epoch": 3.6903212688084586, "grad_norm": 0.2687373459339142, "learning_rate": 1.9483857521759934e-06, "loss": 0.3065, "step": 36298 }, { "epoch": 3.690422936152908, "grad_norm": 0.2900277376174927, "learning_rate": 1.9481046359485588e-06, "loss": 0.3148, "step": 36299 }, { "epoch": 3.690524603497357, "grad_norm": 0.27974119782447815, "learning_rate": 1.9478235350958457e-06, "loss": 0.3224, "step": 36300 }, { "epoch": 3.690626270841806, "grad_norm": 0.2836381196975708, "learning_rate": 1.947542449619274e-06, "loss": 0.3277, "step": 36301 }, { "epoch": 3.6907279381862548, "grad_norm": 0.2839941084384918, "learning_rate": 1.9472613795202584e-06, "loss": 0.3021, "step": 36302 }, { "epoch": 3.6908296055307037, "grad_norm": 0.2617682218551636, "learning_rate": 1.9469803248002123e-06, "loss": 0.3201, "step": 36303 }, { "epoch": 3.6909312728751527, "grad_norm": 0.2821216881275177, "learning_rate": 1.946699285460556e-06, "loss": 0.3146, "step": 36304 }, { "epoch": 3.6910329402196016, "grad_norm": 0.2725866436958313, "learning_rate": 1.946418261502703e-06, "loss": 0.3166, "step": 36305 }, { "epoch": 3.6911346075640505, "grad_norm": 0.26418671011924744, "learning_rate": 1.9461372529280685e-06, "loss": 0.3193, "step": 36306 }, { "epoch": 3.6912362749084995, "grad_norm": 0.2797839641571045, "learning_rate": 1.945856259738068e-06, "loss": 0.2852, "step": 36307 }, { "epoch": 3.6913379422529484, "grad_norm": 0.2687859535217285, "learning_rate": 1.945575281934119e-06, "loss": 0.3432, "step": 36308 }, { "epoch": 3.6914396095973974, "grad_norm": 0.27729958295822144, "learning_rate": 1.9452943195176367e-06, "loss": 0.304, "step": 36309 }, { "epoch": 3.6915412769418463, "grad_norm": 0.26630786061286926, "learning_rate": 1.9450133724900334e-06, "loss": 0.3249, "step": 36310 }, { "epoch": 3.6916429442862952, "grad_norm": 0.2773562967777252, "learning_rate": 1.9447324408527302e-06, "loss": 0.3434, "step": 36311 }, { "epoch": 3.691744611630744, "grad_norm": 0.2681659758090973, "learning_rate": 1.944451524607135e-06, "loss": 0.3089, "step": 36312 }, { "epoch": 3.691846278975193, "grad_norm": 0.2702327072620392, "learning_rate": 1.9441706237546676e-06, "loss": 0.3357, "step": 36313 }, { "epoch": 3.691947946319642, "grad_norm": 0.27956509590148926, "learning_rate": 1.9438897382967436e-06, "loss": 0.3105, "step": 36314 }, { "epoch": 3.692049613664091, "grad_norm": 0.2825194001197815, "learning_rate": 1.9436088682347775e-06, "loss": 0.2927, "step": 36315 }, { "epoch": 3.69215128100854, "grad_norm": 0.26043152809143066, "learning_rate": 1.943328013570183e-06, "loss": 0.3156, "step": 36316 }, { "epoch": 3.692252948352989, "grad_norm": 0.29419925808906555, "learning_rate": 1.943047174304374e-06, "loss": 0.3228, "step": 36317 }, { "epoch": 3.692354615697438, "grad_norm": 0.26689615845680237, "learning_rate": 1.9427663504387694e-06, "loss": 0.3143, "step": 36318 }, { "epoch": 3.6924562830418868, "grad_norm": 0.27971151471138, "learning_rate": 1.9424855419747808e-06, "loss": 0.3011, "step": 36319 }, { "epoch": 3.6925579503863357, "grad_norm": 0.3005836606025696, "learning_rate": 1.942204748913822e-06, "loss": 0.2991, "step": 36320 }, { "epoch": 3.6926596177307847, "grad_norm": 0.3022550046443939, "learning_rate": 1.941923971257312e-06, "loss": 0.2972, "step": 36321 }, { "epoch": 3.6927612850752336, "grad_norm": 0.28938421607017517, "learning_rate": 1.941643209006659e-06, "loss": 0.3165, "step": 36322 }, { "epoch": 3.6928629524196825, "grad_norm": 0.2758251130580902, "learning_rate": 1.941362462163281e-06, "loss": 0.3039, "step": 36323 }, { "epoch": 3.6929646197641315, "grad_norm": 0.2851264178752899, "learning_rate": 1.9410817307285935e-06, "loss": 0.3107, "step": 36324 }, { "epoch": 3.693066287108581, "grad_norm": 0.2852726876735687, "learning_rate": 1.9408010147040095e-06, "loss": 0.3125, "step": 36325 }, { "epoch": 3.69316795445303, "grad_norm": 0.2636243402957916, "learning_rate": 1.940520314090943e-06, "loss": 0.3069, "step": 36326 }, { "epoch": 3.6932696217974788, "grad_norm": 0.2702608108520508, "learning_rate": 1.9402396288908066e-06, "loss": 0.3378, "step": 36327 }, { "epoch": 3.6933712891419277, "grad_norm": 0.26583030819892883, "learning_rate": 1.939958959105017e-06, "loss": 0.295, "step": 36328 }, { "epoch": 3.6934729564863766, "grad_norm": 0.2617473304271698, "learning_rate": 1.9396783047349878e-06, "loss": 0.3373, "step": 36329 }, { "epoch": 3.6935746238308256, "grad_norm": 0.2623685896396637, "learning_rate": 1.9393976657821294e-06, "loss": 0.3036, "step": 36330 }, { "epoch": 3.6936762911752745, "grad_norm": 0.2605801224708557, "learning_rate": 1.939117042247862e-06, "loss": 0.332, "step": 36331 }, { "epoch": 3.6937779585197235, "grad_norm": 0.27321481704711914, "learning_rate": 1.9388364341335926e-06, "loss": 0.3277, "step": 36332 }, { "epoch": 3.6938796258641724, "grad_norm": 0.2747044563293457, "learning_rate": 1.938555841440739e-06, "loss": 0.2909, "step": 36333 }, { "epoch": 3.6939812932086213, "grad_norm": 0.27282851934432983, "learning_rate": 1.9382752641707138e-06, "loss": 0.2915, "step": 36334 }, { "epoch": 3.6940829605530703, "grad_norm": 0.2955755889415741, "learning_rate": 1.9379947023249286e-06, "loss": 0.3049, "step": 36335 }, { "epoch": 3.6941846278975192, "grad_norm": 0.2619914710521698, "learning_rate": 1.9377141559048e-06, "loss": 0.3419, "step": 36336 }, { "epoch": 3.694286295241968, "grad_norm": 0.2787439227104187, "learning_rate": 1.937433624911738e-06, "loss": 0.3251, "step": 36337 }, { "epoch": 3.694387962586417, "grad_norm": 0.2641422748565674, "learning_rate": 1.9371531093471614e-06, "loss": 0.3075, "step": 36338 }, { "epoch": 3.694489629930866, "grad_norm": 0.2827552258968353, "learning_rate": 1.9368726092124758e-06, "loss": 0.3214, "step": 36339 }, { "epoch": 3.6945912972753154, "grad_norm": 0.27482718229293823, "learning_rate": 1.9365921245090978e-06, "loss": 0.2967, "step": 36340 }, { "epoch": 3.6946929646197644, "grad_norm": 0.2691420316696167, "learning_rate": 1.936311655238445e-06, "loss": 0.2912, "step": 36341 }, { "epoch": 3.6947946319642133, "grad_norm": 0.31089189648628235, "learning_rate": 1.9360312014019224e-06, "loss": 0.2986, "step": 36342 }, { "epoch": 3.6948962993086623, "grad_norm": 0.3000819981098175, "learning_rate": 1.9357507630009474e-06, "loss": 0.3382, "step": 36343 }, { "epoch": 3.694997966653111, "grad_norm": 0.26430413126945496, "learning_rate": 1.9354703400369322e-06, "loss": 0.3155, "step": 36344 }, { "epoch": 3.69509963399756, "grad_norm": 0.2641703486442566, "learning_rate": 1.935189932511288e-06, "loss": 0.3206, "step": 36345 }, { "epoch": 3.695201301342009, "grad_norm": 0.2716587781906128, "learning_rate": 1.9349095404254297e-06, "loss": 0.2953, "step": 36346 }, { "epoch": 3.695302968686458, "grad_norm": 0.2777712047100067, "learning_rate": 1.9346291637807697e-06, "loss": 0.3438, "step": 36347 }, { "epoch": 3.695404636030907, "grad_norm": 0.2723505198955536, "learning_rate": 1.934348802578719e-06, "loss": 0.3212, "step": 36348 }, { "epoch": 3.695506303375356, "grad_norm": 0.2923373878002167, "learning_rate": 1.9340684568206887e-06, "loss": 0.3171, "step": 36349 }, { "epoch": 3.695607970719805, "grad_norm": 0.2643556296825409, "learning_rate": 1.9337881265080926e-06, "loss": 0.3205, "step": 36350 }, { "epoch": 3.695709638064254, "grad_norm": 0.28632181882858276, "learning_rate": 1.9335078116423473e-06, "loss": 0.3055, "step": 36351 }, { "epoch": 3.6958113054087027, "grad_norm": 0.31077712774276733, "learning_rate": 1.9332275122248566e-06, "loss": 0.2987, "step": 36352 }, { "epoch": 3.6959129727531517, "grad_norm": 0.3063160181045532, "learning_rate": 1.9329472282570393e-06, "loss": 0.3208, "step": 36353 }, { "epoch": 3.6960146400976006, "grad_norm": 0.26639652252197266, "learning_rate": 1.9326669597403054e-06, "loss": 0.3176, "step": 36354 }, { "epoch": 3.6961163074420496, "grad_norm": 0.27719780802726746, "learning_rate": 1.932386706676064e-06, "loss": 0.3194, "step": 36355 }, { "epoch": 3.6962179747864985, "grad_norm": 0.2626616358757019, "learning_rate": 1.9321064690657316e-06, "loss": 0.289, "step": 36356 }, { "epoch": 3.6963196421309474, "grad_norm": 0.25675246119499207, "learning_rate": 1.9318262469107175e-06, "loss": 0.3518, "step": 36357 }, { "epoch": 3.6964213094753964, "grad_norm": 0.2810070514678955, "learning_rate": 1.9315460402124344e-06, "loss": 0.318, "step": 36358 }, { "epoch": 3.6965229768198453, "grad_norm": 0.2800094485282898, "learning_rate": 1.9312658489722903e-06, "loss": 0.3293, "step": 36359 }, { "epoch": 3.6966246441642943, "grad_norm": 0.27941054105758667, "learning_rate": 1.9309856731917003e-06, "loss": 0.3187, "step": 36360 }, { "epoch": 3.696726311508743, "grad_norm": 0.29416653513908386, "learning_rate": 1.930705512872078e-06, "loss": 0.2788, "step": 36361 }, { "epoch": 3.696827978853192, "grad_norm": 0.28317663073539734, "learning_rate": 1.9304253680148293e-06, "loss": 0.3036, "step": 36362 }, { "epoch": 3.696929646197641, "grad_norm": 0.269832581281662, "learning_rate": 1.9301452386213693e-06, "loss": 0.3502, "step": 36363 }, { "epoch": 3.69703131354209, "grad_norm": 0.252551406621933, "learning_rate": 1.9298651246931085e-06, "loss": 0.333, "step": 36364 }, { "epoch": 3.697132980886539, "grad_norm": 0.2583552598953247, "learning_rate": 1.929585026231456e-06, "loss": 0.2842, "step": 36365 }, { "epoch": 3.6972346482309884, "grad_norm": 0.27207621932029724, "learning_rate": 1.929304943237826e-06, "loss": 0.3142, "step": 36366 }, { "epoch": 3.6973363155754373, "grad_norm": 0.26193201541900635, "learning_rate": 1.9290248757136276e-06, "loss": 0.3217, "step": 36367 }, { "epoch": 3.6974379829198862, "grad_norm": 0.2686125636100769, "learning_rate": 1.9287448236602723e-06, "loss": 0.3236, "step": 36368 }, { "epoch": 3.697539650264335, "grad_norm": 0.26829200983047485, "learning_rate": 1.928464787079169e-06, "loss": 0.2972, "step": 36369 }, { "epoch": 3.697641317608784, "grad_norm": 0.28895488381385803, "learning_rate": 1.9281847659717324e-06, "loss": 0.2866, "step": 36370 }, { "epoch": 3.697742984953233, "grad_norm": 0.2651216983795166, "learning_rate": 1.9279047603393702e-06, "loss": 0.3201, "step": 36371 }, { "epoch": 3.697844652297682, "grad_norm": 0.25508201122283936, "learning_rate": 1.9276247701834926e-06, "loss": 0.3161, "step": 36372 }, { "epoch": 3.697946319642131, "grad_norm": 0.27719154953956604, "learning_rate": 1.927344795505513e-06, "loss": 0.3474, "step": 36373 }, { "epoch": 3.69804798698658, "grad_norm": 0.28123316168785095, "learning_rate": 1.92706483630684e-06, "loss": 0.3021, "step": 36374 }, { "epoch": 3.698149654331029, "grad_norm": 0.28779810667037964, "learning_rate": 1.926784892588882e-06, "loss": 0.2783, "step": 36375 }, { "epoch": 3.6982513216754778, "grad_norm": 0.2593320608139038, "learning_rate": 1.926504964353054e-06, "loss": 0.3035, "step": 36376 }, { "epoch": 3.6983529890199267, "grad_norm": 0.2820068597793579, "learning_rate": 1.926225051600763e-06, "loss": 0.3217, "step": 36377 }, { "epoch": 3.6984546563643756, "grad_norm": 0.2723686695098877, "learning_rate": 1.9259451543334203e-06, "loss": 0.3116, "step": 36378 }, { "epoch": 3.6985563237088246, "grad_norm": 0.3046577274799347, "learning_rate": 1.9256652725524326e-06, "loss": 0.3213, "step": 36379 }, { "epoch": 3.6986579910532735, "grad_norm": 0.2870469391345978, "learning_rate": 1.9253854062592154e-06, "loss": 0.3238, "step": 36380 }, { "epoch": 3.698759658397723, "grad_norm": 0.24665208160877228, "learning_rate": 1.9251055554551755e-06, "loss": 0.3244, "step": 36381 }, { "epoch": 3.698861325742172, "grad_norm": 0.2803475856781006, "learning_rate": 1.9248257201417216e-06, "loss": 0.2824, "step": 36382 }, { "epoch": 3.698962993086621, "grad_norm": 0.2725428640842438, "learning_rate": 1.9245459003202666e-06, "loss": 0.2885, "step": 36383 }, { "epoch": 3.6990646604310697, "grad_norm": 0.2760639786720276, "learning_rate": 1.924266095992218e-06, "loss": 0.3245, "step": 36384 }, { "epoch": 3.6991663277755187, "grad_norm": 0.26601874828338623, "learning_rate": 1.923986307158985e-06, "loss": 0.2895, "step": 36385 }, { "epoch": 3.6992679951199676, "grad_norm": 0.26424935460090637, "learning_rate": 1.9237065338219796e-06, "loss": 0.3201, "step": 36386 }, { "epoch": 3.6993696624644166, "grad_norm": 0.2843470275402069, "learning_rate": 1.9234267759826094e-06, "loss": 0.2918, "step": 36387 }, { "epoch": 3.6994713298088655, "grad_norm": 0.26368215680122375, "learning_rate": 1.9231470336422836e-06, "loss": 0.3173, "step": 36388 }, { "epoch": 3.6995729971533144, "grad_norm": 0.24225033819675446, "learning_rate": 1.9228673068024104e-06, "loss": 0.3218, "step": 36389 }, { "epoch": 3.6996746644977634, "grad_norm": 0.28360772132873535, "learning_rate": 1.9225875954644024e-06, "loss": 0.3012, "step": 36390 }, { "epoch": 3.6997763318422123, "grad_norm": 0.2929912805557251, "learning_rate": 1.9223078996296667e-06, "loss": 0.3078, "step": 36391 }, { "epoch": 3.6998779991866613, "grad_norm": 0.277814656496048, "learning_rate": 1.9220282192996108e-06, "loss": 0.2874, "step": 36392 }, { "epoch": 3.69997966653111, "grad_norm": 0.26794490218162537, "learning_rate": 1.921748554475647e-06, "loss": 0.2995, "step": 36393 }, { "epoch": 3.700081333875559, "grad_norm": 0.24752646684646606, "learning_rate": 1.921468905159183e-06, "loss": 0.301, "step": 36394 }, { "epoch": 3.700183001220008, "grad_norm": 0.27063316106796265, "learning_rate": 1.9211892713516246e-06, "loss": 0.3202, "step": 36395 }, { "epoch": 3.700284668564457, "grad_norm": 0.2588164210319519, "learning_rate": 1.920909653054385e-06, "loss": 0.345, "step": 36396 }, { "epoch": 3.700386335908906, "grad_norm": 0.26928451657295227, "learning_rate": 1.9206300502688705e-06, "loss": 0.3107, "step": 36397 }, { "epoch": 3.700488003253355, "grad_norm": 0.2554206848144531, "learning_rate": 1.92035046299649e-06, "loss": 0.3266, "step": 36398 }, { "epoch": 3.700589670597804, "grad_norm": 0.2907031178474426, "learning_rate": 1.920070891238651e-06, "loss": 0.2845, "step": 36399 }, { "epoch": 3.700691337942253, "grad_norm": 0.2687835097312927, "learning_rate": 1.9197913349967646e-06, "loss": 0.3055, "step": 36400 }, { "epoch": 3.7007930052867017, "grad_norm": 0.26816612482070923, "learning_rate": 1.919511794272237e-06, "loss": 0.3034, "step": 36401 }, { "epoch": 3.7008946726311507, "grad_norm": 0.2617032527923584, "learning_rate": 1.9192322690664754e-06, "loss": 0.3304, "step": 36402 }, { "epoch": 3.7009963399755996, "grad_norm": 0.2896013557910919, "learning_rate": 1.918952759380891e-06, "loss": 0.3158, "step": 36403 }, { "epoch": 3.7010980073200486, "grad_norm": 0.2855816185474396, "learning_rate": 1.918673265216891e-06, "loss": 0.2857, "step": 36404 }, { "epoch": 3.7011996746644975, "grad_norm": 0.2829728424549103, "learning_rate": 1.9183937865758806e-06, "loss": 0.3049, "step": 36405 }, { "epoch": 3.7013013420089464, "grad_norm": 0.2646823823451996, "learning_rate": 1.918114323459272e-06, "loss": 0.3005, "step": 36406 }, { "epoch": 3.701403009353396, "grad_norm": 0.26589328050613403, "learning_rate": 1.917834875868471e-06, "loss": 0.301, "step": 36407 }, { "epoch": 3.7015046766978448, "grad_norm": 0.266593873500824, "learning_rate": 1.9175554438048853e-06, "loss": 0.2833, "step": 36408 }, { "epoch": 3.7016063440422937, "grad_norm": 0.29482173919677734, "learning_rate": 1.9172760272699213e-06, "loss": 0.2856, "step": 36409 }, { "epoch": 3.7017080113867427, "grad_norm": 0.26386281847953796, "learning_rate": 1.91699662626499e-06, "loss": 0.2966, "step": 36410 }, { "epoch": 3.7018096787311916, "grad_norm": 0.29400834441185, "learning_rate": 1.916717240791497e-06, "loss": 0.342, "step": 36411 }, { "epoch": 3.7019113460756405, "grad_norm": 0.2746525704860687, "learning_rate": 1.9164378708508475e-06, "loss": 0.2794, "step": 36412 }, { "epoch": 3.7020130134200895, "grad_norm": 0.2800891697406769, "learning_rate": 1.9161585164444537e-06, "loss": 0.3342, "step": 36413 }, { "epoch": 3.7021146807645384, "grad_norm": 0.2666289806365967, "learning_rate": 1.9158791775737203e-06, "loss": 0.3074, "step": 36414 }, { "epoch": 3.7022163481089874, "grad_norm": 0.24773617088794708, "learning_rate": 1.915599854240052e-06, "loss": 0.2921, "step": 36415 }, { "epoch": 3.7023180154534363, "grad_norm": 0.2778523564338684, "learning_rate": 1.9153205464448616e-06, "loss": 0.2974, "step": 36416 }, { "epoch": 3.7024196827978852, "grad_norm": 0.25350216031074524, "learning_rate": 1.915041254189553e-06, "loss": 0.2888, "step": 36417 }, { "epoch": 3.702521350142334, "grad_norm": 0.2821838855743408, "learning_rate": 1.914761977475533e-06, "loss": 0.3052, "step": 36418 }, { "epoch": 3.702623017486783, "grad_norm": 0.2639913260936737, "learning_rate": 1.914482716304208e-06, "loss": 0.3431, "step": 36419 }, { "epoch": 3.702724684831232, "grad_norm": 0.28230974078178406, "learning_rate": 1.9142034706769868e-06, "loss": 0.3148, "step": 36420 }, { "epoch": 3.702826352175681, "grad_norm": 0.2851467430591583, "learning_rate": 1.9139242405952755e-06, "loss": 0.3154, "step": 36421 }, { "epoch": 3.7029280195201304, "grad_norm": 0.29438817501068115, "learning_rate": 1.9136450260604793e-06, "loss": 0.3191, "step": 36422 }, { "epoch": 3.7030296868645793, "grad_norm": 0.2917979955673218, "learning_rate": 1.9133658270740074e-06, "loss": 0.3231, "step": 36423 }, { "epoch": 3.7031313542090283, "grad_norm": 0.30687403678894043, "learning_rate": 1.9130866436372647e-06, "loss": 0.3203, "step": 36424 }, { "epoch": 3.7032330215534772, "grad_norm": 0.2968064546585083, "learning_rate": 1.9128074757516564e-06, "loss": 0.3185, "step": 36425 }, { "epoch": 3.703334688897926, "grad_norm": 0.3128959536552429, "learning_rate": 1.912528323418592e-06, "loss": 0.304, "step": 36426 }, { "epoch": 3.703436356242375, "grad_norm": 0.2703120708465576, "learning_rate": 1.9122491866394764e-06, "loss": 0.3008, "step": 36427 }, { "epoch": 3.703538023586824, "grad_norm": 0.296833336353302, "learning_rate": 1.911970065415716e-06, "loss": 0.3125, "step": 36428 }, { "epoch": 3.703639690931273, "grad_norm": 0.26760774850845337, "learning_rate": 1.911690959748714e-06, "loss": 0.3219, "step": 36429 }, { "epoch": 3.703741358275722, "grad_norm": 0.28479817509651184, "learning_rate": 1.9114118696398815e-06, "loss": 0.3126, "step": 36430 }, { "epoch": 3.703843025620171, "grad_norm": 0.27169889211654663, "learning_rate": 1.911132795090622e-06, "loss": 0.3218, "step": 36431 }, { "epoch": 3.70394469296462, "grad_norm": 0.24629221856594086, "learning_rate": 1.9108537361023395e-06, "loss": 0.2841, "step": 36432 }, { "epoch": 3.7040463603090688, "grad_norm": 0.2797887921333313, "learning_rate": 1.9105746926764435e-06, "loss": 0.3297, "step": 36433 }, { "epoch": 3.7041480276535177, "grad_norm": 0.2848232686519623, "learning_rate": 1.910295664814338e-06, "loss": 0.289, "step": 36434 }, { "epoch": 3.7042496949979666, "grad_norm": 0.26539236307144165, "learning_rate": 1.9100166525174268e-06, "loss": 0.2983, "step": 36435 }, { "epoch": 3.7043513623424156, "grad_norm": 0.27309900522232056, "learning_rate": 1.9097376557871194e-06, "loss": 0.278, "step": 36436 }, { "epoch": 3.7044530296868645, "grad_norm": 0.2761930227279663, "learning_rate": 1.909458674624819e-06, "loss": 0.3383, "step": 36437 }, { "epoch": 3.7045546970313135, "grad_norm": 0.28192201256752014, "learning_rate": 1.909179709031932e-06, "loss": 0.3345, "step": 36438 }, { "epoch": 3.7046563643757624, "grad_norm": 0.2529962360858917, "learning_rate": 1.9089007590098607e-06, "loss": 0.3292, "step": 36439 }, { "epoch": 3.7047580317202113, "grad_norm": 0.2717822790145874, "learning_rate": 1.908621824560015e-06, "loss": 0.3085, "step": 36440 }, { "epoch": 3.7048596990646603, "grad_norm": 0.27370285987854004, "learning_rate": 1.9083429056837984e-06, "loss": 0.3051, "step": 36441 }, { "epoch": 3.7049613664091092, "grad_norm": 0.28827643394470215, "learning_rate": 1.908064002382613e-06, "loss": 0.312, "step": 36442 }, { "epoch": 3.705063033753558, "grad_norm": 0.2893516719341278, "learning_rate": 1.907785114657868e-06, "loss": 0.3127, "step": 36443 }, { "epoch": 3.705164701098007, "grad_norm": 0.25924643874168396, "learning_rate": 1.9075062425109675e-06, "loss": 0.2823, "step": 36444 }, { "epoch": 3.705266368442456, "grad_norm": 0.2764444649219513, "learning_rate": 1.9072273859433133e-06, "loss": 0.3008, "step": 36445 }, { "epoch": 3.705368035786905, "grad_norm": 0.2719106376171112, "learning_rate": 1.9069485449563146e-06, "loss": 0.2996, "step": 36446 }, { "epoch": 3.7054697031313544, "grad_norm": 0.2983923852443695, "learning_rate": 1.9066697195513738e-06, "loss": 0.3202, "step": 36447 }, { "epoch": 3.7055713704758033, "grad_norm": 0.2813476324081421, "learning_rate": 1.9063909097298966e-06, "loss": 0.3106, "step": 36448 }, { "epoch": 3.7056730378202523, "grad_norm": 0.2777906358242035, "learning_rate": 1.9061121154932843e-06, "loss": 0.315, "step": 36449 }, { "epoch": 3.705774705164701, "grad_norm": 0.27125075459480286, "learning_rate": 1.9058333368429465e-06, "loss": 0.3399, "step": 36450 }, { "epoch": 3.70587637250915, "grad_norm": 0.27542367577552795, "learning_rate": 1.9055545737802845e-06, "loss": 0.2933, "step": 36451 }, { "epoch": 3.705978039853599, "grad_norm": 0.296409010887146, "learning_rate": 1.9052758263067017e-06, "loss": 0.3057, "step": 36452 }, { "epoch": 3.706079707198048, "grad_norm": 0.2862013578414917, "learning_rate": 1.9049970944236052e-06, "loss": 0.29, "step": 36453 }, { "epoch": 3.706181374542497, "grad_norm": 0.2934480607509613, "learning_rate": 1.904718378132398e-06, "loss": 0.3049, "step": 36454 }, { "epoch": 3.706283041886946, "grad_norm": 0.26171913743019104, "learning_rate": 1.9044396774344826e-06, "loss": 0.3108, "step": 36455 }, { "epoch": 3.706384709231395, "grad_norm": 0.2853178083896637, "learning_rate": 1.904160992331266e-06, "loss": 0.295, "step": 36456 }, { "epoch": 3.706486376575844, "grad_norm": 0.2642253339290619, "learning_rate": 1.903882322824151e-06, "loss": 0.2916, "step": 36457 }, { "epoch": 3.7065880439202927, "grad_norm": 0.29478007555007935, "learning_rate": 1.9036036689145409e-06, "loss": 0.2994, "step": 36458 }, { "epoch": 3.7066897112647417, "grad_norm": 0.26929813623428345, "learning_rate": 1.9033250306038375e-06, "loss": 0.3378, "step": 36459 }, { "epoch": 3.7067913786091906, "grad_norm": 0.2813155949115753, "learning_rate": 1.903046407893449e-06, "loss": 0.3049, "step": 36460 }, { "epoch": 3.7068930459536396, "grad_norm": 0.28714489936828613, "learning_rate": 1.9027678007847766e-06, "loss": 0.3132, "step": 36461 }, { "epoch": 3.7069947132980885, "grad_norm": 0.25538164377212524, "learning_rate": 1.9024892092792224e-06, "loss": 0.3097, "step": 36462 }, { "epoch": 3.707096380642538, "grad_norm": 0.2623039484024048, "learning_rate": 1.9022106333781948e-06, "loss": 0.3089, "step": 36463 }, { "epoch": 3.707198047986987, "grad_norm": 0.2696247696876526, "learning_rate": 1.9019320730830903e-06, "loss": 0.309, "step": 36464 }, { "epoch": 3.7072997153314358, "grad_norm": 0.27782702445983887, "learning_rate": 1.9016535283953157e-06, "loss": 0.3205, "step": 36465 }, { "epoch": 3.7074013826758847, "grad_norm": 0.2573627233505249, "learning_rate": 1.9013749993162768e-06, "loss": 0.3351, "step": 36466 }, { "epoch": 3.7075030500203336, "grad_norm": 0.294234961271286, "learning_rate": 1.9010964858473734e-06, "loss": 0.3247, "step": 36467 }, { "epoch": 3.7076047173647826, "grad_norm": 0.25622811913490295, "learning_rate": 1.9008179879900102e-06, "loss": 0.304, "step": 36468 }, { "epoch": 3.7077063847092315, "grad_norm": 0.26446789503097534, "learning_rate": 1.9005395057455873e-06, "loss": 0.3062, "step": 36469 }, { "epoch": 3.7078080520536805, "grad_norm": 0.2856729328632355, "learning_rate": 1.9002610391155118e-06, "loss": 0.2893, "step": 36470 }, { "epoch": 3.7079097193981294, "grad_norm": 0.2761130630970001, "learning_rate": 1.899982588101184e-06, "loss": 0.2955, "step": 36471 }, { "epoch": 3.7080113867425784, "grad_norm": 0.29195886850357056, "learning_rate": 1.8997041527040062e-06, "loss": 0.3014, "step": 36472 }, { "epoch": 3.7081130540870273, "grad_norm": 0.2563507854938507, "learning_rate": 1.8994257329253858e-06, "loss": 0.3057, "step": 36473 }, { "epoch": 3.7082147214314762, "grad_norm": 0.2659793496131897, "learning_rate": 1.8991473287667178e-06, "loss": 0.3123, "step": 36474 }, { "epoch": 3.708316388775925, "grad_norm": 0.3211289346218109, "learning_rate": 1.8988689402294086e-06, "loss": 0.2893, "step": 36475 }, { "epoch": 3.708418056120374, "grad_norm": 0.2775088846683502, "learning_rate": 1.8985905673148624e-06, "loss": 0.2922, "step": 36476 }, { "epoch": 3.708519723464823, "grad_norm": 0.2721865475177765, "learning_rate": 1.8983122100244804e-06, "loss": 0.2976, "step": 36477 }, { "epoch": 3.708621390809272, "grad_norm": 0.26820996403694153, "learning_rate": 1.8980338683596639e-06, "loss": 0.2912, "step": 36478 }, { "epoch": 3.708723058153721, "grad_norm": 0.3002494275569916, "learning_rate": 1.8977555423218142e-06, "loss": 0.3518, "step": 36479 }, { "epoch": 3.70882472549817, "grad_norm": 0.29344433546066284, "learning_rate": 1.897477231912337e-06, "loss": 0.2701, "step": 36480 }, { "epoch": 3.708926392842619, "grad_norm": 0.2823718190193176, "learning_rate": 1.8971989371326317e-06, "loss": 0.3056, "step": 36481 }, { "epoch": 3.7090280601870678, "grad_norm": 0.25381430983543396, "learning_rate": 1.8969206579840988e-06, "loss": 0.3006, "step": 36482 }, { "epoch": 3.7091297275315167, "grad_norm": 0.2660510241985321, "learning_rate": 1.896642394468146e-06, "loss": 0.3339, "step": 36483 }, { "epoch": 3.7092313948759656, "grad_norm": 0.2817930281162262, "learning_rate": 1.8963641465861681e-06, "loss": 0.3294, "step": 36484 }, { "epoch": 3.7093330622204146, "grad_norm": 0.2883652448654175, "learning_rate": 1.8960859143395699e-06, "loss": 0.3242, "step": 36485 }, { "epoch": 3.7094347295648635, "grad_norm": 0.29689791798591614, "learning_rate": 1.8958076977297568e-06, "loss": 0.3459, "step": 36486 }, { "epoch": 3.7095363969093125, "grad_norm": 0.27447229623794556, "learning_rate": 1.8955294967581235e-06, "loss": 0.3152, "step": 36487 }, { "epoch": 3.709638064253762, "grad_norm": 0.29390349984169006, "learning_rate": 1.8952513114260762e-06, "loss": 0.3284, "step": 36488 }, { "epoch": 3.709739731598211, "grad_norm": 0.27831414341926575, "learning_rate": 1.8949731417350136e-06, "loss": 0.2807, "step": 36489 }, { "epoch": 3.7098413989426597, "grad_norm": 0.279567688703537, "learning_rate": 1.8946949876863402e-06, "loss": 0.3195, "step": 36490 }, { "epoch": 3.7099430662871087, "grad_norm": 0.2827994227409363, "learning_rate": 1.8944168492814551e-06, "loss": 0.3043, "step": 36491 }, { "epoch": 3.7100447336315576, "grad_norm": 0.26081201434135437, "learning_rate": 1.8941387265217586e-06, "loss": 0.3022, "step": 36492 }, { "epoch": 3.7101464009760066, "grad_norm": 0.29914191365242004, "learning_rate": 1.8938606194086567e-06, "loss": 0.3266, "step": 36493 }, { "epoch": 3.7102480683204555, "grad_norm": 0.26318153738975525, "learning_rate": 1.8935825279435432e-06, "loss": 0.3098, "step": 36494 }, { "epoch": 3.7103497356649044, "grad_norm": 0.2580621540546417, "learning_rate": 1.8933044521278226e-06, "loss": 0.3352, "step": 36495 }, { "epoch": 3.7104514030093534, "grad_norm": 0.2590213716030121, "learning_rate": 1.8930263919628995e-06, "loss": 0.3248, "step": 36496 }, { "epoch": 3.7105530703538023, "grad_norm": 0.27341219782829285, "learning_rate": 1.8927483474501673e-06, "loss": 0.3105, "step": 36497 }, { "epoch": 3.7106547376982513, "grad_norm": 0.3050576448440552, "learning_rate": 1.8924703185910325e-06, "loss": 0.3126, "step": 36498 }, { "epoch": 3.7107564050427, "grad_norm": 0.26315242052078247, "learning_rate": 1.892192305386894e-06, "loss": 0.3075, "step": 36499 }, { "epoch": 3.710858072387149, "grad_norm": 0.2679864168167114, "learning_rate": 1.8919143078391506e-06, "loss": 0.3176, "step": 36500 }, { "epoch": 3.710959739731598, "grad_norm": 0.2533818185329437, "learning_rate": 1.8916363259492054e-06, "loss": 0.3114, "step": 36501 }, { "epoch": 3.711061407076047, "grad_norm": 0.259040504693985, "learning_rate": 1.8913583597184566e-06, "loss": 0.2932, "step": 36502 }, { "epoch": 3.711163074420496, "grad_norm": 0.2861746549606323, "learning_rate": 1.8910804091483087e-06, "loss": 0.2833, "step": 36503 }, { "epoch": 3.7112647417649454, "grad_norm": 0.25407326221466064, "learning_rate": 1.8908024742401555e-06, "loss": 0.3478, "step": 36504 }, { "epoch": 3.7113664091093943, "grad_norm": 0.29355835914611816, "learning_rate": 1.8905245549954004e-06, "loss": 0.3313, "step": 36505 }, { "epoch": 3.7114680764538432, "grad_norm": 0.272931843996048, "learning_rate": 1.8902466514154477e-06, "loss": 0.306, "step": 36506 }, { "epoch": 3.711569743798292, "grad_norm": 0.2778456211090088, "learning_rate": 1.8899687635016896e-06, "loss": 0.314, "step": 36507 }, { "epoch": 3.711671411142741, "grad_norm": 0.3189433813095093, "learning_rate": 1.8896908912555312e-06, "loss": 0.3083, "step": 36508 }, { "epoch": 3.71177307848719, "grad_norm": 0.26785996556282043, "learning_rate": 1.8894130346783719e-06, "loss": 0.2706, "step": 36509 }, { "epoch": 3.711874745831639, "grad_norm": 0.25208476185798645, "learning_rate": 1.889135193771608e-06, "loss": 0.3127, "step": 36510 }, { "epoch": 3.711976413176088, "grad_norm": 0.2659815847873688, "learning_rate": 1.8888573685366434e-06, "loss": 0.287, "step": 36511 }, { "epoch": 3.712078080520537, "grad_norm": 0.2559683322906494, "learning_rate": 1.8885795589748745e-06, "loss": 0.3064, "step": 36512 }, { "epoch": 3.712179747864986, "grad_norm": 0.27198758721351624, "learning_rate": 1.8883017650877062e-06, "loss": 0.3004, "step": 36513 }, { "epoch": 3.7122814152094348, "grad_norm": 0.26205796003341675, "learning_rate": 1.8880239868765304e-06, "loss": 0.2948, "step": 36514 }, { "epoch": 3.7123830825538837, "grad_norm": 0.27954763174057007, "learning_rate": 1.8877462243427503e-06, "loss": 0.294, "step": 36515 }, { "epoch": 3.7124847498983327, "grad_norm": 0.25995275378227234, "learning_rate": 1.8874684774877688e-06, "loss": 0.3033, "step": 36516 }, { "epoch": 3.7125864172427816, "grad_norm": 0.2708943486213684, "learning_rate": 1.8871907463129774e-06, "loss": 0.302, "step": 36517 }, { "epoch": 3.7126880845872305, "grad_norm": 0.2904326319694519, "learning_rate": 1.8869130308197813e-06, "loss": 0.2897, "step": 36518 }, { "epoch": 3.7127897519316795, "grad_norm": 0.2750946283340454, "learning_rate": 1.886635331009578e-06, "loss": 0.304, "step": 36519 }, { "epoch": 3.7128914192761284, "grad_norm": 0.28923118114471436, "learning_rate": 1.886357646883764e-06, "loss": 0.3221, "step": 36520 }, { "epoch": 3.7129930866205774, "grad_norm": 0.30259501934051514, "learning_rate": 1.8860799784437422e-06, "loss": 0.2964, "step": 36521 }, { "epoch": 3.7130947539650263, "grad_norm": 0.2742327153682709, "learning_rate": 1.8858023256909092e-06, "loss": 0.3212, "step": 36522 }, { "epoch": 3.7131964213094752, "grad_norm": 0.2973291277885437, "learning_rate": 1.8855246886266637e-06, "loss": 0.3132, "step": 36523 }, { "epoch": 3.713298088653924, "grad_norm": 0.2686883807182312, "learning_rate": 1.8852470672524037e-06, "loss": 0.34, "step": 36524 }, { "epoch": 3.713399755998373, "grad_norm": 0.26375600695610046, "learning_rate": 1.8849694615695302e-06, "loss": 0.2861, "step": 36525 }, { "epoch": 3.713501423342822, "grad_norm": 0.273671418428421, "learning_rate": 1.8846918715794404e-06, "loss": 0.3033, "step": 36526 }, { "epoch": 3.713603090687271, "grad_norm": 0.27192360162734985, "learning_rate": 1.8844142972835305e-06, "loss": 0.3082, "step": 36527 }, { "epoch": 3.71370475803172, "grad_norm": 0.2631875276565552, "learning_rate": 1.8841367386832032e-06, "loss": 0.3269, "step": 36528 }, { "epoch": 3.7138064253761693, "grad_norm": 0.26266056299209595, "learning_rate": 1.8838591957798547e-06, "loss": 0.2885, "step": 36529 }, { "epoch": 3.7139080927206183, "grad_norm": 0.25763240456581116, "learning_rate": 1.8835816685748826e-06, "loss": 0.305, "step": 36530 }, { "epoch": 3.7140097600650672, "grad_norm": 0.2643481492996216, "learning_rate": 1.883304157069684e-06, "loss": 0.2906, "step": 36531 }, { "epoch": 3.714111427409516, "grad_norm": 0.2890720069408417, "learning_rate": 1.88302666126566e-06, "loss": 0.3105, "step": 36532 }, { "epoch": 3.714213094753965, "grad_norm": 0.29212021827697754, "learning_rate": 1.8827491811642073e-06, "loss": 0.3203, "step": 36533 }, { "epoch": 3.714314762098414, "grad_norm": 0.2898169755935669, "learning_rate": 1.8824717167667212e-06, "loss": 0.3205, "step": 36534 }, { "epoch": 3.714416429442863, "grad_norm": 0.25759807229042053, "learning_rate": 1.8821942680746042e-06, "loss": 0.2968, "step": 36535 }, { "epoch": 3.714518096787312, "grad_norm": 0.26161709427833557, "learning_rate": 1.881916835089251e-06, "loss": 0.3228, "step": 36536 }, { "epoch": 3.714619764131761, "grad_norm": 0.2564246654510498, "learning_rate": 1.8816394178120577e-06, "loss": 0.3343, "step": 36537 }, { "epoch": 3.71472143147621, "grad_norm": 0.2714741826057434, "learning_rate": 1.8813620162444268e-06, "loss": 0.3092, "step": 36538 }, { "epoch": 3.7148230988206588, "grad_norm": 0.2512093484401703, "learning_rate": 1.8810846303877522e-06, "loss": 0.3089, "step": 36539 }, { "epoch": 3.7149247661651077, "grad_norm": 0.27189454436302185, "learning_rate": 1.8808072602434323e-06, "loss": 0.3098, "step": 36540 }, { "epoch": 3.7150264335095566, "grad_norm": 0.2650497555732727, "learning_rate": 1.8805299058128624e-06, "loss": 0.2914, "step": 36541 }, { "epoch": 3.7151281008540056, "grad_norm": 0.27516496181488037, "learning_rate": 1.8802525670974431e-06, "loss": 0.304, "step": 36542 }, { "epoch": 3.7152297681984545, "grad_norm": 0.27456140518188477, "learning_rate": 1.8799752440985703e-06, "loss": 0.3063, "step": 36543 }, { "epoch": 3.7153314355429035, "grad_norm": 0.25769054889678955, "learning_rate": 1.8796979368176387e-06, "loss": 0.2986, "step": 36544 }, { "epoch": 3.715433102887353, "grad_norm": 0.26860642433166504, "learning_rate": 1.8794206452560498e-06, "loss": 0.3337, "step": 36545 }, { "epoch": 3.715534770231802, "grad_norm": 0.2535687983036041, "learning_rate": 1.8791433694151974e-06, "loss": 0.3341, "step": 36546 }, { "epoch": 3.7156364375762507, "grad_norm": 0.29160335659980774, "learning_rate": 1.8788661092964777e-06, "loss": 0.3051, "step": 36547 }, { "epoch": 3.7157381049206997, "grad_norm": 0.27811259031295776, "learning_rate": 1.8785888649012906e-06, "loss": 0.2956, "step": 36548 }, { "epoch": 3.7158397722651486, "grad_norm": 0.272627592086792, "learning_rate": 1.8783116362310311e-06, "loss": 0.3116, "step": 36549 }, { "epoch": 3.7159414396095976, "grad_norm": 0.27175024151802063, "learning_rate": 1.8780344232870956e-06, "loss": 0.3289, "step": 36550 }, { "epoch": 3.7160431069540465, "grad_norm": 0.24273721873760223, "learning_rate": 1.8777572260708788e-06, "loss": 0.2989, "step": 36551 }, { "epoch": 3.7161447742984954, "grad_norm": 0.2662935256958008, "learning_rate": 1.8774800445837809e-06, "loss": 0.353, "step": 36552 }, { "epoch": 3.7162464416429444, "grad_norm": 0.31814947724342346, "learning_rate": 1.8772028788271974e-06, "loss": 0.2842, "step": 36553 }, { "epoch": 3.7163481089873933, "grad_norm": 0.245433047413826, "learning_rate": 1.876925728802521e-06, "loss": 0.3162, "step": 36554 }, { "epoch": 3.7164497763318423, "grad_norm": 0.2710944414138794, "learning_rate": 1.8766485945111522e-06, "loss": 0.311, "step": 36555 }, { "epoch": 3.716551443676291, "grad_norm": 0.27980276942253113, "learning_rate": 1.8763714759544866e-06, "loss": 0.3191, "step": 36556 }, { "epoch": 3.71665311102074, "grad_norm": 0.29619377851486206, "learning_rate": 1.8760943731339165e-06, "loss": 0.3167, "step": 36557 }, { "epoch": 3.716754778365189, "grad_norm": 0.2672421634197235, "learning_rate": 1.8758172860508429e-06, "loss": 0.3051, "step": 36558 }, { "epoch": 3.716856445709638, "grad_norm": 0.2796388268470764, "learning_rate": 1.8755402147066593e-06, "loss": 0.3057, "step": 36559 }, { "epoch": 3.716958113054087, "grad_norm": 0.26575756072998047, "learning_rate": 1.8752631591027616e-06, "loss": 0.3337, "step": 36560 }, { "epoch": 3.717059780398536, "grad_norm": 0.2746223211288452, "learning_rate": 1.8749861192405438e-06, "loss": 0.2918, "step": 36561 }, { "epoch": 3.717161447742985, "grad_norm": 0.2931828796863556, "learning_rate": 1.8747090951214047e-06, "loss": 0.319, "step": 36562 }, { "epoch": 3.717263115087434, "grad_norm": 0.27175232768058777, "learning_rate": 1.874432086746739e-06, "loss": 0.2866, "step": 36563 }, { "epoch": 3.7173647824318827, "grad_norm": 0.2647121250629425, "learning_rate": 1.8741550941179397e-06, "loss": 0.3094, "step": 36564 }, { "epoch": 3.7174664497763317, "grad_norm": 0.28522589802742004, "learning_rate": 1.8738781172364061e-06, "loss": 0.3356, "step": 36565 }, { "epoch": 3.7175681171207806, "grad_norm": 0.26289093494415283, "learning_rate": 1.8736011561035317e-06, "loss": 0.3133, "step": 36566 }, { "epoch": 3.7176697844652296, "grad_norm": 0.27012553811073303, "learning_rate": 1.8733242107207095e-06, "loss": 0.3156, "step": 36567 }, { "epoch": 3.7177714518096785, "grad_norm": 0.2906586527824402, "learning_rate": 1.873047281089339e-06, "loss": 0.2993, "step": 36568 }, { "epoch": 3.7178731191541274, "grad_norm": 0.25396353006362915, "learning_rate": 1.8727703672108132e-06, "loss": 0.3336, "step": 36569 }, { "epoch": 3.717974786498577, "grad_norm": 0.2779543399810791, "learning_rate": 1.8724934690865276e-06, "loss": 0.3501, "step": 36570 }, { "epoch": 3.7180764538430258, "grad_norm": 0.2819291949272156, "learning_rate": 1.8722165867178743e-06, "loss": 0.3104, "step": 36571 }, { "epoch": 3.7181781211874747, "grad_norm": 0.26833662390708923, "learning_rate": 1.8719397201062528e-06, "loss": 0.28, "step": 36572 }, { "epoch": 3.7182797885319236, "grad_norm": 0.26229217648506165, "learning_rate": 1.8716628692530552e-06, "loss": 0.2844, "step": 36573 }, { "epoch": 3.7183814558763726, "grad_norm": 0.27280592918395996, "learning_rate": 1.8713860341596752e-06, "loss": 0.3084, "step": 36574 }, { "epoch": 3.7184831232208215, "grad_norm": 0.2709050476551056, "learning_rate": 1.8711092148275107e-06, "loss": 0.2941, "step": 36575 }, { "epoch": 3.7185847905652705, "grad_norm": 0.2846526801586151, "learning_rate": 1.870832411257954e-06, "loss": 0.2733, "step": 36576 }, { "epoch": 3.7186864579097194, "grad_norm": 0.2684518098831177, "learning_rate": 1.8705556234523986e-06, "loss": 0.3285, "step": 36577 }, { "epoch": 3.7187881252541684, "grad_norm": 0.27524039149284363, "learning_rate": 1.8702788514122422e-06, "loss": 0.3132, "step": 36578 }, { "epoch": 3.7188897925986173, "grad_norm": 0.24095715582370758, "learning_rate": 1.870002095138877e-06, "loss": 0.3, "step": 36579 }, { "epoch": 3.7189914599430662, "grad_norm": 0.2545399069786072, "learning_rate": 1.8697253546336975e-06, "loss": 0.3173, "step": 36580 }, { "epoch": 3.719093127287515, "grad_norm": 0.2679796516895294, "learning_rate": 1.8694486298980956e-06, "loss": 0.3025, "step": 36581 }, { "epoch": 3.719194794631964, "grad_norm": 0.25746631622314453, "learning_rate": 1.8691719209334697e-06, "loss": 0.2966, "step": 36582 }, { "epoch": 3.719296461976413, "grad_norm": 0.2876092195510864, "learning_rate": 1.8688952277412114e-06, "loss": 0.3263, "step": 36583 }, { "epoch": 3.719398129320862, "grad_norm": 0.35963499546051025, "learning_rate": 1.868618550322714e-06, "loss": 0.3077, "step": 36584 }, { "epoch": 3.719499796665311, "grad_norm": 0.2822704613208771, "learning_rate": 1.868341888679373e-06, "loss": 0.2966, "step": 36585 }, { "epoch": 3.7196014640097603, "grad_norm": 0.26007190346717834, "learning_rate": 1.8680652428125817e-06, "loss": 0.3067, "step": 36586 }, { "epoch": 3.7197031313542093, "grad_norm": 0.2859278619289398, "learning_rate": 1.8677886127237315e-06, "loss": 0.3068, "step": 36587 }, { "epoch": 3.719804798698658, "grad_norm": 0.2981122136116028, "learning_rate": 1.8675119984142203e-06, "loss": 0.2889, "step": 36588 }, { "epoch": 3.719906466043107, "grad_norm": 0.2508552372455597, "learning_rate": 1.8672353998854388e-06, "loss": 0.3017, "step": 36589 }, { "epoch": 3.720008133387556, "grad_norm": 0.28618496656417847, "learning_rate": 1.866958817138781e-06, "loss": 0.3018, "step": 36590 }, { "epoch": 3.720109800732005, "grad_norm": 0.25633177161216736, "learning_rate": 1.8666822501756383e-06, "loss": 0.2931, "step": 36591 }, { "epoch": 3.720211468076454, "grad_norm": 0.28780925273895264, "learning_rate": 1.8664056989974077e-06, "loss": 0.3236, "step": 36592 }, { "epoch": 3.720313135420903, "grad_norm": 0.28128013014793396, "learning_rate": 1.8661291636054808e-06, "loss": 0.2826, "step": 36593 }, { "epoch": 3.720414802765352, "grad_norm": 0.2495315968990326, "learning_rate": 1.865852644001248e-06, "loss": 0.3016, "step": 36594 }, { "epoch": 3.720516470109801, "grad_norm": 0.27979984879493713, "learning_rate": 1.8655761401861071e-06, "loss": 0.2966, "step": 36595 }, { "epoch": 3.7206181374542497, "grad_norm": 0.2693721055984497, "learning_rate": 1.8652996521614487e-06, "loss": 0.3108, "step": 36596 }, { "epoch": 3.7207198047986987, "grad_norm": 0.2643817663192749, "learning_rate": 1.8650231799286639e-06, "loss": 0.3214, "step": 36597 }, { "epoch": 3.7208214721431476, "grad_norm": 0.27711719274520874, "learning_rate": 1.8647467234891487e-06, "loss": 0.2811, "step": 36598 }, { "epoch": 3.7209231394875966, "grad_norm": 0.2734191119670868, "learning_rate": 1.864470282844295e-06, "loss": 0.3147, "step": 36599 }, { "epoch": 3.7210248068320455, "grad_norm": 0.261958509683609, "learning_rate": 1.8641938579954942e-06, "loss": 0.3263, "step": 36600 }, { "epoch": 3.7211264741764944, "grad_norm": 0.2835584282875061, "learning_rate": 1.8639174489441385e-06, "loss": 0.2877, "step": 36601 }, { "epoch": 3.7212281415209434, "grad_norm": 0.2795165181159973, "learning_rate": 1.8636410556916234e-06, "loss": 0.3068, "step": 36602 }, { "epoch": 3.7213298088653923, "grad_norm": 0.2755429446697235, "learning_rate": 1.863364678239339e-06, "loss": 0.3263, "step": 36603 }, { "epoch": 3.7214314762098413, "grad_norm": 0.27315887808799744, "learning_rate": 1.863088316588676e-06, "loss": 0.305, "step": 36604 }, { "epoch": 3.72153314355429, "grad_norm": 0.2649586796760559, "learning_rate": 1.8628119707410304e-06, "loss": 0.2855, "step": 36605 }, { "epoch": 3.721634810898739, "grad_norm": 0.2842816710472107, "learning_rate": 1.862535640697793e-06, "loss": 0.307, "step": 36606 }, { "epoch": 3.721736478243188, "grad_norm": 0.28180113434791565, "learning_rate": 1.8622593264603533e-06, "loss": 0.3198, "step": 36607 }, { "epoch": 3.721838145587637, "grad_norm": 0.28874409198760986, "learning_rate": 1.8619830280301071e-06, "loss": 0.3104, "step": 36608 }, { "epoch": 3.721939812932086, "grad_norm": 0.2746097445487976, "learning_rate": 1.8617067454084453e-06, "loss": 0.3022, "step": 36609 }, { "epoch": 3.722041480276535, "grad_norm": 0.2814515233039856, "learning_rate": 1.8614304785967585e-06, "loss": 0.3155, "step": 36610 }, { "epoch": 3.7221431476209843, "grad_norm": 0.27358147501945496, "learning_rate": 1.8611542275964378e-06, "loss": 0.3039, "step": 36611 }, { "epoch": 3.7222448149654332, "grad_norm": 0.25758039951324463, "learning_rate": 1.8608779924088777e-06, "loss": 0.3476, "step": 36612 }, { "epoch": 3.722346482309882, "grad_norm": 0.2674780786037445, "learning_rate": 1.8606017730354687e-06, "loss": 0.2923, "step": 36613 }, { "epoch": 3.722448149654331, "grad_norm": 0.26087725162506104, "learning_rate": 1.8603255694775996e-06, "loss": 0.2976, "step": 36614 }, { "epoch": 3.72254981699878, "grad_norm": 0.27451905608177185, "learning_rate": 1.8600493817366682e-06, "loss": 0.3166, "step": 36615 }, { "epoch": 3.722651484343229, "grad_norm": 0.27652889490127563, "learning_rate": 1.859773209814058e-06, "loss": 0.3074, "step": 36616 }, { "epoch": 3.722753151687678, "grad_norm": 0.2987429201602936, "learning_rate": 1.8594970537111646e-06, "loss": 0.3149, "step": 36617 }, { "epoch": 3.722854819032127, "grad_norm": 0.26771318912506104, "learning_rate": 1.85922091342938e-06, "loss": 0.3224, "step": 36618 }, { "epoch": 3.722956486376576, "grad_norm": 0.26144251227378845, "learning_rate": 1.858944788970094e-06, "loss": 0.306, "step": 36619 }, { "epoch": 3.7230581537210248, "grad_norm": 0.2881661653518677, "learning_rate": 1.858668680334698e-06, "loss": 0.3188, "step": 36620 }, { "epoch": 3.7231598210654737, "grad_norm": 0.25177714228630066, "learning_rate": 1.8583925875245812e-06, "loss": 0.2997, "step": 36621 }, { "epoch": 3.7232614884099227, "grad_norm": 0.27490657567977905, "learning_rate": 1.858116510541137e-06, "loss": 0.307, "step": 36622 }, { "epoch": 3.7233631557543716, "grad_norm": 0.28443068265914917, "learning_rate": 1.857840449385756e-06, "loss": 0.3182, "step": 36623 }, { "epoch": 3.7234648230988205, "grad_norm": 0.2562234699726105, "learning_rate": 1.8575644040598256e-06, "loss": 0.2929, "step": 36624 }, { "epoch": 3.7235664904432695, "grad_norm": 0.2839552164077759, "learning_rate": 1.857288374564743e-06, "loss": 0.2766, "step": 36625 }, { "epoch": 3.7236681577877184, "grad_norm": 0.27821674942970276, "learning_rate": 1.8570123609018914e-06, "loss": 0.2721, "step": 36626 }, { "epoch": 3.723769825132168, "grad_norm": 0.2459530383348465, "learning_rate": 1.8567363630726642e-06, "loss": 0.273, "step": 36627 }, { "epoch": 3.7238714924766168, "grad_norm": 0.26556679606437683, "learning_rate": 1.8564603810784559e-06, "loss": 0.2953, "step": 36628 }, { "epoch": 3.7239731598210657, "grad_norm": 0.26812583208084106, "learning_rate": 1.8561844149206498e-06, "loss": 0.2906, "step": 36629 }, { "epoch": 3.7240748271655146, "grad_norm": 0.2648279070854187, "learning_rate": 1.855908464600641e-06, "loss": 0.3024, "step": 36630 }, { "epoch": 3.7241764945099636, "grad_norm": 0.3060706853866577, "learning_rate": 1.855632530119817e-06, "loss": 0.3411, "step": 36631 }, { "epoch": 3.7242781618544125, "grad_norm": 0.2828214764595032, "learning_rate": 1.85535661147957e-06, "loss": 0.3277, "step": 36632 }, { "epoch": 3.7243798291988615, "grad_norm": 0.2561575770378113, "learning_rate": 1.85508070868129e-06, "loss": 0.2946, "step": 36633 }, { "epoch": 3.7244814965433104, "grad_norm": 0.2675752639770508, "learning_rate": 1.8548048217263636e-06, "loss": 0.3165, "step": 36634 }, { "epoch": 3.7245831638877593, "grad_norm": 0.28250637650489807, "learning_rate": 1.8545289506161868e-06, "loss": 0.3251, "step": 36635 }, { "epoch": 3.7246848312322083, "grad_norm": 0.2983081340789795, "learning_rate": 1.854253095352142e-06, "loss": 0.2972, "step": 36636 }, { "epoch": 3.7247864985766572, "grad_norm": 0.28546252846717834, "learning_rate": 1.8539772559356234e-06, "loss": 0.3169, "step": 36637 }, { "epoch": 3.724888165921106, "grad_norm": 0.2691521942615509, "learning_rate": 1.8537014323680225e-06, "loss": 0.3283, "step": 36638 }, { "epoch": 3.724989833265555, "grad_norm": 0.2986094355583191, "learning_rate": 1.8534256246507226e-06, "loss": 0.2853, "step": 36639 }, { "epoch": 3.725091500610004, "grad_norm": 0.2801061272621155, "learning_rate": 1.8531498327851189e-06, "loss": 0.3393, "step": 36640 }, { "epoch": 3.725193167954453, "grad_norm": 0.28411585092544556, "learning_rate": 1.852874056772596e-06, "loss": 0.3195, "step": 36641 }, { "epoch": 3.725294835298902, "grad_norm": 0.27560409903526306, "learning_rate": 1.8525982966145485e-06, "loss": 0.2962, "step": 36642 }, { "epoch": 3.725396502643351, "grad_norm": 0.27666857838630676, "learning_rate": 1.8523225523123621e-06, "loss": 0.3194, "step": 36643 }, { "epoch": 3.7254981699878, "grad_norm": 0.2619195282459259, "learning_rate": 1.852046823867425e-06, "loss": 0.3339, "step": 36644 }, { "epoch": 3.7255998373322488, "grad_norm": 0.2670160233974457, "learning_rate": 1.8517711112811314e-06, "loss": 0.3099, "step": 36645 }, { "epoch": 3.7257015046766977, "grad_norm": 0.2484682947397232, "learning_rate": 1.8514954145548636e-06, "loss": 0.3173, "step": 36646 }, { "epoch": 3.7258031720211466, "grad_norm": 0.2721414864063263, "learning_rate": 1.8512197336900134e-06, "loss": 0.323, "step": 36647 }, { "epoch": 3.7259048393655956, "grad_norm": 0.2766861617565155, "learning_rate": 1.8509440686879736e-06, "loss": 0.3051, "step": 36648 }, { "epoch": 3.7260065067100445, "grad_norm": 0.2702382802963257, "learning_rate": 1.8506684195501261e-06, "loss": 0.3026, "step": 36649 }, { "epoch": 3.7261081740544935, "grad_norm": 0.2660095989704132, "learning_rate": 1.8503927862778643e-06, "loss": 0.2945, "step": 36650 }, { "epoch": 3.7262098413989424, "grad_norm": 0.30609285831451416, "learning_rate": 1.8501171688725745e-06, "loss": 0.3144, "step": 36651 }, { "epoch": 3.726311508743392, "grad_norm": 0.2697826623916626, "learning_rate": 1.8498415673356446e-06, "loss": 0.299, "step": 36652 }, { "epoch": 3.7264131760878407, "grad_norm": 0.28295040130615234, "learning_rate": 1.8495659816684663e-06, "loss": 0.3256, "step": 36653 }, { "epoch": 3.7265148434322897, "grad_norm": 0.2822934091091156, "learning_rate": 1.8492904118724237e-06, "loss": 0.2971, "step": 36654 }, { "epoch": 3.7266165107767386, "grad_norm": 0.27503520250320435, "learning_rate": 1.849014857948911e-06, "loss": 0.3163, "step": 36655 }, { "epoch": 3.7267181781211876, "grad_norm": 0.2757699489593506, "learning_rate": 1.8487393198993093e-06, "loss": 0.3294, "step": 36656 }, { "epoch": 3.7268198454656365, "grad_norm": 0.2707022428512573, "learning_rate": 1.8484637977250096e-06, "loss": 0.3263, "step": 36657 }, { "epoch": 3.7269215128100854, "grad_norm": 0.2731701731681824, "learning_rate": 1.8481882914274047e-06, "loss": 0.322, "step": 36658 }, { "epoch": 3.7270231801545344, "grad_norm": 0.29337823390960693, "learning_rate": 1.8479128010078739e-06, "loss": 0.3108, "step": 36659 }, { "epoch": 3.7271248474989833, "grad_norm": 0.27640658617019653, "learning_rate": 1.8476373264678116e-06, "loss": 0.3057, "step": 36660 }, { "epoch": 3.7272265148434323, "grad_norm": 0.28109678626060486, "learning_rate": 1.847361867808603e-06, "loss": 0.3097, "step": 36661 }, { "epoch": 3.727328182187881, "grad_norm": 0.2697705924510956, "learning_rate": 1.8470864250316344e-06, "loss": 0.3175, "step": 36662 }, { "epoch": 3.72742984953233, "grad_norm": 0.2903011441230774, "learning_rate": 1.8468109981382971e-06, "loss": 0.2806, "step": 36663 }, { "epoch": 3.727531516876779, "grad_norm": 0.2589869201183319, "learning_rate": 1.8465355871299751e-06, "loss": 0.3057, "step": 36664 }, { "epoch": 3.727633184221228, "grad_norm": 0.2829209268093109, "learning_rate": 1.8462601920080602e-06, "loss": 0.3115, "step": 36665 }, { "epoch": 3.727734851565677, "grad_norm": 0.29519733786582947, "learning_rate": 1.8459848127739337e-06, "loss": 0.3037, "step": 36666 }, { "epoch": 3.727836518910126, "grad_norm": 0.28756481409072876, "learning_rate": 1.8457094494289869e-06, "loss": 0.3162, "step": 36667 }, { "epoch": 3.7279381862545753, "grad_norm": 0.27539360523223877, "learning_rate": 1.8454341019746091e-06, "loss": 0.2906, "step": 36668 }, { "epoch": 3.7280398535990242, "grad_norm": 0.29743072390556335, "learning_rate": 1.8451587704121816e-06, "loss": 0.3129, "step": 36669 }, { "epoch": 3.728141520943473, "grad_norm": 0.28352129459381104, "learning_rate": 1.844883454743096e-06, "loss": 0.3026, "step": 36670 }, { "epoch": 3.728243188287922, "grad_norm": 0.2717514932155609, "learning_rate": 1.8446081549687378e-06, "loss": 0.2848, "step": 36671 }, { "epoch": 3.728344855632371, "grad_norm": 0.2753516137599945, "learning_rate": 1.8443328710904923e-06, "loss": 0.3085, "step": 36672 }, { "epoch": 3.72844652297682, "grad_norm": 0.29935818910598755, "learning_rate": 1.8440576031097495e-06, "loss": 0.3033, "step": 36673 }, { "epoch": 3.728548190321269, "grad_norm": 0.2793443500995636, "learning_rate": 1.8437823510278946e-06, "loss": 0.3146, "step": 36674 }, { "epoch": 3.728649857665718, "grad_norm": 0.26991894841194153, "learning_rate": 1.8435071148463146e-06, "loss": 0.3243, "step": 36675 }, { "epoch": 3.728751525010167, "grad_norm": 0.27875974774360657, "learning_rate": 1.8432318945663934e-06, "loss": 0.3134, "step": 36676 }, { "epoch": 3.7288531923546158, "grad_norm": 0.3116583824157715, "learning_rate": 1.8429566901895196e-06, "loss": 0.3029, "step": 36677 }, { "epoch": 3.7289548596990647, "grad_norm": 0.287773460149765, "learning_rate": 1.8426815017170836e-06, "loss": 0.3003, "step": 36678 }, { "epoch": 3.7290565270435136, "grad_norm": 0.2760368287563324, "learning_rate": 1.842406329150464e-06, "loss": 0.341, "step": 36679 }, { "epoch": 3.7291581943879626, "grad_norm": 0.2811812162399292, "learning_rate": 1.8421311724910535e-06, "loss": 0.3254, "step": 36680 }, { "epoch": 3.7292598617324115, "grad_norm": 0.2831714153289795, "learning_rate": 1.8418560317402352e-06, "loss": 0.3227, "step": 36681 }, { "epoch": 3.7293615290768605, "grad_norm": 0.27453723549842834, "learning_rate": 1.841580906899394e-06, "loss": 0.3182, "step": 36682 }, { "epoch": 3.7294631964213094, "grad_norm": 0.27322253584861755, "learning_rate": 1.841305797969919e-06, "loss": 0.3181, "step": 36683 }, { "epoch": 3.7295648637657584, "grad_norm": 0.28675219416618347, "learning_rate": 1.8410307049531956e-06, "loss": 0.3206, "step": 36684 }, { "epoch": 3.7296665311102073, "grad_norm": 0.29220134019851685, "learning_rate": 1.8407556278506084e-06, "loss": 0.2919, "step": 36685 }, { "epoch": 3.7297681984546562, "grad_norm": 0.2653748095035553, "learning_rate": 1.8404805666635417e-06, "loss": 0.3154, "step": 36686 }, { "epoch": 3.729869865799105, "grad_norm": 0.2945561110973358, "learning_rate": 1.840205521393385e-06, "loss": 0.3084, "step": 36687 }, { "epoch": 3.729971533143554, "grad_norm": 0.2688608765602112, "learning_rate": 1.8399304920415218e-06, "loss": 0.3233, "step": 36688 }, { "epoch": 3.730073200488003, "grad_norm": 0.258865088224411, "learning_rate": 1.8396554786093362e-06, "loss": 0.2732, "step": 36689 }, { "epoch": 3.730174867832452, "grad_norm": 0.26884981989860535, "learning_rate": 1.8393804810982168e-06, "loss": 0.3023, "step": 36690 }, { "epoch": 3.730276535176901, "grad_norm": 0.29018634557724, "learning_rate": 1.8391054995095482e-06, "loss": 0.3053, "step": 36691 }, { "epoch": 3.73037820252135, "grad_norm": 0.279794305562973, "learning_rate": 1.8388305338447127e-06, "loss": 0.3145, "step": 36692 }, { "epoch": 3.7304798698657993, "grad_norm": 0.2893781363964081, "learning_rate": 1.8385555841050995e-06, "loss": 0.3111, "step": 36693 }, { "epoch": 3.730581537210248, "grad_norm": 0.28498589992523193, "learning_rate": 1.8382806502920918e-06, "loss": 0.3178, "step": 36694 }, { "epoch": 3.730683204554697, "grad_norm": 0.287066251039505, "learning_rate": 1.838005732407075e-06, "loss": 0.2807, "step": 36695 }, { "epoch": 3.730784871899146, "grad_norm": 0.2933367192745209, "learning_rate": 1.8377308304514324e-06, "loss": 0.3244, "step": 36696 }, { "epoch": 3.730886539243595, "grad_norm": 0.2723165452480316, "learning_rate": 1.837455944426552e-06, "loss": 0.3412, "step": 36697 }, { "epoch": 3.730988206588044, "grad_norm": 0.2919948995113373, "learning_rate": 1.837181074333817e-06, "loss": 0.3346, "step": 36698 }, { "epoch": 3.731089873932493, "grad_norm": 0.28589653968811035, "learning_rate": 1.8369062201746108e-06, "loss": 0.3141, "step": 36699 }, { "epoch": 3.731191541276942, "grad_norm": 0.27764031291007996, "learning_rate": 1.8366313819503207e-06, "loss": 0.2876, "step": 36700 }, { "epoch": 3.731293208621391, "grad_norm": 0.2902752459049225, "learning_rate": 1.8363565596623296e-06, "loss": 0.2961, "step": 36701 }, { "epoch": 3.7313948759658397, "grad_norm": 0.30626776814460754, "learning_rate": 1.8360817533120212e-06, "loss": 0.2876, "step": 36702 }, { "epoch": 3.7314965433102887, "grad_norm": 0.27043578028678894, "learning_rate": 1.8358069629007824e-06, "loss": 0.2946, "step": 36703 }, { "epoch": 3.7315982106547376, "grad_norm": 0.2546095848083496, "learning_rate": 1.8355321884299964e-06, "loss": 0.312, "step": 36704 }, { "epoch": 3.7316998779991866, "grad_norm": 0.2867341637611389, "learning_rate": 1.835257429901048e-06, "loss": 0.2946, "step": 36705 }, { "epoch": 3.7318015453436355, "grad_norm": 0.29630419611930847, "learning_rate": 1.8349826873153175e-06, "loss": 0.3151, "step": 36706 }, { "epoch": 3.7319032126880844, "grad_norm": 0.2550587058067322, "learning_rate": 1.8347079606741947e-06, "loss": 0.3111, "step": 36707 }, { "epoch": 3.7320048800325334, "grad_norm": 0.27419188618659973, "learning_rate": 1.8344332499790608e-06, "loss": 0.3447, "step": 36708 }, { "epoch": 3.7321065473769828, "grad_norm": 0.27162832021713257, "learning_rate": 1.8341585552312979e-06, "loss": 0.3108, "step": 36709 }, { "epoch": 3.7322082147214317, "grad_norm": 0.258561909198761, "learning_rate": 1.8338838764322942e-06, "loss": 0.2856, "step": 36710 }, { "epoch": 3.7323098820658807, "grad_norm": 0.28825706243515015, "learning_rate": 1.833609213583431e-06, "loss": 0.2908, "step": 36711 }, { "epoch": 3.7324115494103296, "grad_norm": 0.2726912796497345, "learning_rate": 1.833334566686092e-06, "loss": 0.3426, "step": 36712 }, { "epoch": 3.7325132167547785, "grad_norm": 0.2685771882534027, "learning_rate": 1.833059935741659e-06, "loss": 0.3164, "step": 36713 }, { "epoch": 3.7326148840992275, "grad_norm": 0.28580376505851746, "learning_rate": 1.8327853207515196e-06, "loss": 0.2754, "step": 36714 }, { "epoch": 3.7327165514436764, "grad_norm": 0.2792842984199524, "learning_rate": 1.8325107217170545e-06, "loss": 0.2972, "step": 36715 }, { "epoch": 3.7328182187881254, "grad_norm": 0.2700808346271515, "learning_rate": 1.8322361386396465e-06, "loss": 0.3311, "step": 36716 }, { "epoch": 3.7329198861325743, "grad_norm": 0.26758331060409546, "learning_rate": 1.8319615715206818e-06, "loss": 0.3157, "step": 36717 }, { "epoch": 3.7330215534770232, "grad_norm": 0.2569194436073303, "learning_rate": 1.8316870203615417e-06, "loss": 0.3396, "step": 36718 }, { "epoch": 3.733123220821472, "grad_norm": 0.27724650502204895, "learning_rate": 1.8314124851636072e-06, "loss": 0.3421, "step": 36719 }, { "epoch": 3.733224888165921, "grad_norm": 0.2814719080924988, "learning_rate": 1.8311379659282663e-06, "loss": 0.3091, "step": 36720 }, { "epoch": 3.73332655551037, "grad_norm": 0.2613470256328583, "learning_rate": 1.8308634626568988e-06, "loss": 0.3104, "step": 36721 }, { "epoch": 3.733428222854819, "grad_norm": 0.29255157709121704, "learning_rate": 1.8305889753508883e-06, "loss": 0.292, "step": 36722 }, { "epoch": 3.733529890199268, "grad_norm": 0.2683151364326477, "learning_rate": 1.8303145040116155e-06, "loss": 0.3221, "step": 36723 }, { "epoch": 3.733631557543717, "grad_norm": 0.27434882521629333, "learning_rate": 1.8300400486404669e-06, "loss": 0.3006, "step": 36724 }, { "epoch": 3.733733224888166, "grad_norm": 0.27609026432037354, "learning_rate": 1.8297656092388228e-06, "loss": 0.3422, "step": 36725 }, { "epoch": 3.7338348922326148, "grad_norm": 0.2872438430786133, "learning_rate": 1.8294911858080645e-06, "loss": 0.3105, "step": 36726 }, { "epoch": 3.7339365595770637, "grad_norm": 0.2642345130443573, "learning_rate": 1.8292167783495784e-06, "loss": 0.3053, "step": 36727 }, { "epoch": 3.7340382269215127, "grad_norm": 0.2500254511833191, "learning_rate": 1.8289423868647444e-06, "loss": 0.3297, "step": 36728 }, { "epoch": 3.7341398942659616, "grad_norm": 0.29178014397621155, "learning_rate": 1.8286680113549433e-06, "loss": 0.2881, "step": 36729 }, { "epoch": 3.7342415616104105, "grad_norm": 0.28061360120773315, "learning_rate": 1.828393651821561e-06, "loss": 0.2787, "step": 36730 }, { "epoch": 3.7343432289548595, "grad_norm": 0.2693237364292145, "learning_rate": 1.8281193082659775e-06, "loss": 0.2959, "step": 36731 }, { "epoch": 3.7344448962993084, "grad_norm": 0.28072595596313477, "learning_rate": 1.8278449806895755e-06, "loss": 0.3061, "step": 36732 }, { "epoch": 3.7345465636437574, "grad_norm": 0.27418023347854614, "learning_rate": 1.827570669093735e-06, "loss": 0.3083, "step": 36733 }, { "epoch": 3.7346482309882068, "grad_norm": 0.2725721001625061, "learning_rate": 1.827296373479841e-06, "loss": 0.3067, "step": 36734 }, { "epoch": 3.7347498983326557, "grad_norm": 0.2963797450065613, "learning_rate": 1.8270220938492738e-06, "loss": 0.3272, "step": 36735 }, { "epoch": 3.7348515656771046, "grad_norm": 0.27277716994285583, "learning_rate": 1.8267478302034142e-06, "loss": 0.321, "step": 36736 }, { "epoch": 3.7349532330215536, "grad_norm": 0.26510924100875854, "learning_rate": 1.8264735825436464e-06, "loss": 0.3027, "step": 36737 }, { "epoch": 3.7350549003660025, "grad_norm": 0.2919389605522156, "learning_rate": 1.8261993508713504e-06, "loss": 0.287, "step": 36738 }, { "epoch": 3.7351565677104515, "grad_norm": 0.30400702357292175, "learning_rate": 1.825925135187906e-06, "loss": 0.3275, "step": 36739 }, { "epoch": 3.7352582350549004, "grad_norm": 0.24901115894317627, "learning_rate": 1.8256509354946983e-06, "loss": 0.3142, "step": 36740 }, { "epoch": 3.7353599023993493, "grad_norm": 0.25197458267211914, "learning_rate": 1.825376751793107e-06, "loss": 0.316, "step": 36741 }, { "epoch": 3.7354615697437983, "grad_norm": 0.2628467381000519, "learning_rate": 1.8251025840845133e-06, "loss": 0.3051, "step": 36742 }, { "epoch": 3.7355632370882472, "grad_norm": 0.2750849425792694, "learning_rate": 1.8248284323702964e-06, "loss": 0.3077, "step": 36743 }, { "epoch": 3.735664904432696, "grad_norm": 0.29137203097343445, "learning_rate": 1.8245542966518409e-06, "loss": 0.3174, "step": 36744 }, { "epoch": 3.735766571777145, "grad_norm": 0.28572919964790344, "learning_rate": 1.8242801769305268e-06, "loss": 0.3148, "step": 36745 }, { "epoch": 3.735868239121594, "grad_norm": 0.26429539918899536, "learning_rate": 1.8240060732077324e-06, "loss": 0.2973, "step": 36746 }, { "epoch": 3.735969906466043, "grad_norm": 0.26211056113243103, "learning_rate": 1.8237319854848424e-06, "loss": 0.3267, "step": 36747 }, { "epoch": 3.736071573810492, "grad_norm": 0.277309387922287, "learning_rate": 1.8234579137632359e-06, "loss": 0.3026, "step": 36748 }, { "epoch": 3.736173241154941, "grad_norm": 0.26041457056999207, "learning_rate": 1.8231838580442918e-06, "loss": 0.2943, "step": 36749 }, { "epoch": 3.7362749084993903, "grad_norm": 0.27553340792655945, "learning_rate": 1.8229098183293942e-06, "loss": 0.2688, "step": 36750 }, { "epoch": 3.736376575843839, "grad_norm": 0.2759137451648712, "learning_rate": 1.822635794619922e-06, "loss": 0.3287, "step": 36751 }, { "epoch": 3.736478243188288, "grad_norm": 0.2740527093410492, "learning_rate": 1.8223617869172556e-06, "loss": 0.2884, "step": 36752 }, { "epoch": 3.736579910532737, "grad_norm": 0.2701527178287506, "learning_rate": 1.8220877952227735e-06, "loss": 0.3108, "step": 36753 }, { "epoch": 3.736681577877186, "grad_norm": 0.2631377875804901, "learning_rate": 1.82181381953786e-06, "loss": 0.2746, "step": 36754 }, { "epoch": 3.736783245221635, "grad_norm": 0.2616705298423767, "learning_rate": 1.8215398598638928e-06, "loss": 0.2923, "step": 36755 }, { "epoch": 3.736884912566084, "grad_norm": 0.2786879241466522, "learning_rate": 1.8212659162022512e-06, "loss": 0.3149, "step": 36756 }, { "epoch": 3.736986579910533, "grad_norm": 0.264680415391922, "learning_rate": 1.8209919885543182e-06, "loss": 0.307, "step": 36757 }, { "epoch": 3.737088247254982, "grad_norm": 0.2601269781589508, "learning_rate": 1.8207180769214716e-06, "loss": 0.3371, "step": 36758 }, { "epoch": 3.7371899145994307, "grad_norm": 0.2682180404663086, "learning_rate": 1.8204441813050905e-06, "loss": 0.3457, "step": 36759 }, { "epoch": 3.7372915819438797, "grad_norm": 0.2734300196170807, "learning_rate": 1.8201703017065575e-06, "loss": 0.3209, "step": 36760 }, { "epoch": 3.7373932492883286, "grad_norm": 0.27692094445228577, "learning_rate": 1.819896438127251e-06, "loss": 0.3237, "step": 36761 }, { "epoch": 3.7374949166327776, "grad_norm": 0.29670318961143494, "learning_rate": 1.8196225905685504e-06, "loss": 0.3138, "step": 36762 }, { "epoch": 3.7375965839772265, "grad_norm": 0.28694844245910645, "learning_rate": 1.8193487590318343e-06, "loss": 0.3326, "step": 36763 }, { "epoch": 3.7376982513216754, "grad_norm": 0.28297150135040283, "learning_rate": 1.8190749435184847e-06, "loss": 0.3266, "step": 36764 }, { "epoch": 3.7377999186661244, "grad_norm": 0.2714354395866394, "learning_rate": 1.8188011440298793e-06, "loss": 0.327, "step": 36765 }, { "epoch": 3.7379015860105733, "grad_norm": 0.2799125611782074, "learning_rate": 1.8185273605673964e-06, "loss": 0.2899, "step": 36766 }, { "epoch": 3.7380032533550223, "grad_norm": 0.2703832983970642, "learning_rate": 1.8182535931324198e-06, "loss": 0.3, "step": 36767 }, { "epoch": 3.738104920699471, "grad_norm": 0.2900756001472473, "learning_rate": 1.8179798417263223e-06, "loss": 0.311, "step": 36768 }, { "epoch": 3.73820658804392, "grad_norm": 0.3302159309387207, "learning_rate": 1.8177061063504865e-06, "loss": 0.3113, "step": 36769 }, { "epoch": 3.738308255388369, "grad_norm": 0.2744643986225128, "learning_rate": 1.8174323870062922e-06, "loss": 0.3203, "step": 36770 }, { "epoch": 3.738409922732818, "grad_norm": 0.2731148898601532, "learning_rate": 1.8171586836951173e-06, "loss": 0.307, "step": 36771 }, { "epoch": 3.738511590077267, "grad_norm": 0.2777894139289856, "learning_rate": 1.816884996418341e-06, "loss": 0.3191, "step": 36772 }, { "epoch": 3.738613257421716, "grad_norm": 0.301695317029953, "learning_rate": 1.8166113251773393e-06, "loss": 0.275, "step": 36773 }, { "epoch": 3.738714924766165, "grad_norm": 0.284587562084198, "learning_rate": 1.8163376699734959e-06, "loss": 0.3277, "step": 36774 }, { "epoch": 3.7388165921106142, "grad_norm": 0.2675107717514038, "learning_rate": 1.8160640308081861e-06, "loss": 0.2748, "step": 36775 }, { "epoch": 3.738918259455063, "grad_norm": 0.2682269513607025, "learning_rate": 1.8157904076827876e-06, "loss": 0.2996, "step": 36776 }, { "epoch": 3.739019926799512, "grad_norm": 0.27997875213623047, "learning_rate": 1.8155168005986835e-06, "loss": 0.3047, "step": 36777 }, { "epoch": 3.739121594143961, "grad_norm": 0.2781432271003723, "learning_rate": 1.815243209557246e-06, "loss": 0.3215, "step": 36778 }, { "epoch": 3.73922326148841, "grad_norm": 0.2593350112438202, "learning_rate": 1.8149696345598561e-06, "loss": 0.3313, "step": 36779 }, { "epoch": 3.739324928832859, "grad_norm": 0.29774507880210876, "learning_rate": 1.8146960756078957e-06, "loss": 0.2843, "step": 36780 }, { "epoch": 3.739426596177308, "grad_norm": 0.2729407846927643, "learning_rate": 1.814422532702736e-06, "loss": 0.302, "step": 36781 }, { "epoch": 3.739528263521757, "grad_norm": 0.26193374395370483, "learning_rate": 1.8141490058457606e-06, "loss": 0.2868, "step": 36782 }, { "epoch": 3.7396299308662058, "grad_norm": 0.2717313766479492, "learning_rate": 1.8138754950383435e-06, "loss": 0.2919, "step": 36783 }, { "epoch": 3.7397315982106547, "grad_norm": 0.2633105218410492, "learning_rate": 1.8136020002818667e-06, "loss": 0.2905, "step": 36784 }, { "epoch": 3.7398332655551036, "grad_norm": 0.25698602199554443, "learning_rate": 1.8133285215777051e-06, "loss": 0.2883, "step": 36785 }, { "epoch": 3.7399349328995526, "grad_norm": 0.2828965485095978, "learning_rate": 1.813055058927236e-06, "loss": 0.2954, "step": 36786 }, { "epoch": 3.7400366002440015, "grad_norm": 0.2963099479675293, "learning_rate": 1.8127816123318415e-06, "loss": 0.3049, "step": 36787 }, { "epoch": 3.7401382675884505, "grad_norm": 0.2893480062484741, "learning_rate": 1.8125081817928918e-06, "loss": 0.3292, "step": 36788 }, { "epoch": 3.7402399349328994, "grad_norm": 0.29252007603645325, "learning_rate": 1.8122347673117691e-06, "loss": 0.3663, "step": 36789 }, { "epoch": 3.7403416022773484, "grad_norm": 0.25653204321861267, "learning_rate": 1.8119613688898534e-06, "loss": 0.3313, "step": 36790 }, { "epoch": 3.7404432696217977, "grad_norm": 0.28257328271865845, "learning_rate": 1.8116879865285153e-06, "loss": 0.2963, "step": 36791 }, { "epoch": 3.7405449369662467, "grad_norm": 0.26549386978149414, "learning_rate": 1.8114146202291373e-06, "loss": 0.2868, "step": 36792 }, { "epoch": 3.7406466043106956, "grad_norm": 0.2878456115722656, "learning_rate": 1.8111412699930926e-06, "loss": 0.3022, "step": 36793 }, { "epoch": 3.7407482716551446, "grad_norm": 0.2658476233482361, "learning_rate": 1.8108679358217623e-06, "loss": 0.2825, "step": 36794 }, { "epoch": 3.7408499389995935, "grad_norm": 0.2855447232723236, "learning_rate": 1.8105946177165218e-06, "loss": 0.327, "step": 36795 }, { "epoch": 3.7409516063440424, "grad_norm": 0.27577972412109375, "learning_rate": 1.810321315678746e-06, "loss": 0.3054, "step": 36796 }, { "epoch": 3.7410532736884914, "grad_norm": 0.2875656187534332, "learning_rate": 1.810048029709816e-06, "loss": 0.2814, "step": 36797 }, { "epoch": 3.7411549410329403, "grad_norm": 0.29313716292381287, "learning_rate": 1.8097747598111031e-06, "loss": 0.267, "step": 36798 }, { "epoch": 3.7412566083773893, "grad_norm": 0.2683716416358948, "learning_rate": 1.809501505983987e-06, "loss": 0.3349, "step": 36799 }, { "epoch": 3.741358275721838, "grad_norm": 0.27137520909309387, "learning_rate": 1.8092282682298473e-06, "loss": 0.2904, "step": 36800 }, { "epoch": 3.741459943066287, "grad_norm": 0.2746977210044861, "learning_rate": 1.8089550465500545e-06, "loss": 0.3214, "step": 36801 }, { "epoch": 3.741561610410736, "grad_norm": 0.2742040157318115, "learning_rate": 1.8086818409459888e-06, "loss": 0.3217, "step": 36802 }, { "epoch": 3.741663277755185, "grad_norm": 0.26933351159095764, "learning_rate": 1.8084086514190258e-06, "loss": 0.3273, "step": 36803 }, { "epoch": 3.741764945099634, "grad_norm": 0.27677688002586365, "learning_rate": 1.80813547797054e-06, "loss": 0.2634, "step": 36804 }, { "epoch": 3.741866612444083, "grad_norm": 0.2791929543018341, "learning_rate": 1.8078623206019107e-06, "loss": 0.3052, "step": 36805 }, { "epoch": 3.741968279788532, "grad_norm": 0.2587529420852661, "learning_rate": 1.8075891793145113e-06, "loss": 0.3369, "step": 36806 }, { "epoch": 3.742069947132981, "grad_norm": 0.26133230328559875, "learning_rate": 1.8073160541097217e-06, "loss": 0.3381, "step": 36807 }, { "epoch": 3.7421716144774297, "grad_norm": 0.2920476496219635, "learning_rate": 1.8070429449889122e-06, "loss": 0.3138, "step": 36808 }, { "epoch": 3.7422732818218787, "grad_norm": 0.2712863087654114, "learning_rate": 1.8067698519534616e-06, "loss": 0.2936, "step": 36809 }, { "epoch": 3.7423749491663276, "grad_norm": 0.2801676392555237, "learning_rate": 1.8064967750047485e-06, "loss": 0.312, "step": 36810 }, { "epoch": 3.7424766165107766, "grad_norm": 0.3042929172515869, "learning_rate": 1.806223714144143e-06, "loss": 0.3414, "step": 36811 }, { "epoch": 3.7425782838552255, "grad_norm": 0.26816973090171814, "learning_rate": 1.805950669373025e-06, "loss": 0.3135, "step": 36812 }, { "epoch": 3.7426799511996744, "grad_norm": 0.26609575748443604, "learning_rate": 1.8056776406927685e-06, "loss": 0.317, "step": 36813 }, { "epoch": 3.7427816185441234, "grad_norm": 0.25041821599006653, "learning_rate": 1.8054046281047472e-06, "loss": 0.3024, "step": 36814 }, { "epoch": 3.7428832858885723, "grad_norm": 0.26568710803985596, "learning_rate": 1.8051316316103395e-06, "loss": 0.2868, "step": 36815 }, { "epoch": 3.7429849532330217, "grad_norm": 0.256396621465683, "learning_rate": 1.8048586512109185e-06, "loss": 0.2907, "step": 36816 }, { "epoch": 3.7430866205774707, "grad_norm": 0.294644832611084, "learning_rate": 1.8045856869078633e-06, "loss": 0.2967, "step": 36817 }, { "epoch": 3.7431882879219196, "grad_norm": 0.281830370426178, "learning_rate": 1.8043127387025421e-06, "loss": 0.3271, "step": 36818 }, { "epoch": 3.7432899552663685, "grad_norm": 0.2936709225177765, "learning_rate": 1.804039806596335e-06, "loss": 0.3138, "step": 36819 }, { "epoch": 3.7433916226108175, "grad_norm": 0.2765337824821472, "learning_rate": 1.8037668905906187e-06, "loss": 0.3196, "step": 36820 }, { "epoch": 3.7434932899552664, "grad_norm": 0.27226319909095764, "learning_rate": 1.8034939906867622e-06, "loss": 0.3128, "step": 36821 }, { "epoch": 3.7435949572997154, "grad_norm": 0.27010297775268555, "learning_rate": 1.8032211068861443e-06, "loss": 0.3195, "step": 36822 }, { "epoch": 3.7436966246441643, "grad_norm": 0.2903895676136017, "learning_rate": 1.8029482391901393e-06, "loss": 0.3008, "step": 36823 }, { "epoch": 3.7437982919886132, "grad_norm": 0.2931561768054962, "learning_rate": 1.8026753876001195e-06, "loss": 0.2913, "step": 36824 }, { "epoch": 3.743899959333062, "grad_norm": 0.2858302891254425, "learning_rate": 1.8024025521174638e-06, "loss": 0.317, "step": 36825 }, { "epoch": 3.744001626677511, "grad_norm": 0.27253586053848267, "learning_rate": 1.8021297327435432e-06, "loss": 0.3185, "step": 36826 }, { "epoch": 3.74410329402196, "grad_norm": 0.2547333836555481, "learning_rate": 1.8018569294797334e-06, "loss": 0.3587, "step": 36827 }, { "epoch": 3.744204961366409, "grad_norm": 0.2533322274684906, "learning_rate": 1.8015841423274067e-06, "loss": 0.2844, "step": 36828 }, { "epoch": 3.744306628710858, "grad_norm": 0.2672654688358307, "learning_rate": 1.8013113712879388e-06, "loss": 0.3046, "step": 36829 }, { "epoch": 3.744408296055307, "grad_norm": 0.2680176794528961, "learning_rate": 1.801038616362708e-06, "loss": 0.3371, "step": 36830 }, { "epoch": 3.744509963399756, "grad_norm": 0.27753809094429016, "learning_rate": 1.8007658775530807e-06, "loss": 0.3056, "step": 36831 }, { "epoch": 3.744611630744205, "grad_norm": 0.28244927525520325, "learning_rate": 1.8004931548604365e-06, "loss": 0.3271, "step": 36832 }, { "epoch": 3.744713298088654, "grad_norm": 0.26057228446006775, "learning_rate": 1.8002204482861474e-06, "loss": 0.3312, "step": 36833 }, { "epoch": 3.744814965433103, "grad_norm": 0.2698807418346405, "learning_rate": 1.7999477578315854e-06, "loss": 0.2958, "step": 36834 }, { "epoch": 3.744916632777552, "grad_norm": 0.25975146889686584, "learning_rate": 1.799675083498128e-06, "loss": 0.3526, "step": 36835 }, { "epoch": 3.745018300122001, "grad_norm": 0.24979987740516663, "learning_rate": 1.7994024252871472e-06, "loss": 0.3167, "step": 36836 }, { "epoch": 3.74511996746645, "grad_norm": 0.26828694343566895, "learning_rate": 1.799129783200016e-06, "loss": 0.2929, "step": 36837 }, { "epoch": 3.745221634810899, "grad_norm": 0.2672945261001587, "learning_rate": 1.7988571572381069e-06, "loss": 0.3271, "step": 36838 }, { "epoch": 3.745323302155348, "grad_norm": 0.2633959949016571, "learning_rate": 1.7985845474027963e-06, "loss": 0.2852, "step": 36839 }, { "epoch": 3.7454249694997968, "grad_norm": 0.2680397629737854, "learning_rate": 1.7983119536954562e-06, "loss": 0.32, "step": 36840 }, { "epoch": 3.7455266368442457, "grad_norm": 0.2748255431652069, "learning_rate": 1.7980393761174581e-06, "loss": 0.3026, "step": 36841 }, { "epoch": 3.7456283041886946, "grad_norm": 0.2525470554828644, "learning_rate": 1.7977668146701783e-06, "loss": 0.3022, "step": 36842 }, { "epoch": 3.7457299715331436, "grad_norm": 0.2569120228290558, "learning_rate": 1.797494269354988e-06, "loss": 0.2912, "step": 36843 }, { "epoch": 3.7458316388775925, "grad_norm": 0.2544786036014557, "learning_rate": 1.797221740173259e-06, "loss": 0.3371, "step": 36844 }, { "epoch": 3.7459333062220415, "grad_norm": 0.26526978611946106, "learning_rate": 1.7969492271263672e-06, "loss": 0.2894, "step": 36845 }, { "epoch": 3.7460349735664904, "grad_norm": 0.2674938142299652, "learning_rate": 1.7966767302156846e-06, "loss": 0.3105, "step": 36846 }, { "epoch": 3.7461366409109393, "grad_norm": 0.2679691016674042, "learning_rate": 1.7964042494425827e-06, "loss": 0.3012, "step": 36847 }, { "epoch": 3.7462383082553883, "grad_norm": 0.26168298721313477, "learning_rate": 1.7961317848084336e-06, "loss": 0.3276, "step": 36848 }, { "epoch": 3.7463399755998372, "grad_norm": 0.26040562987327576, "learning_rate": 1.7958593363146131e-06, "loss": 0.3357, "step": 36849 }, { "epoch": 3.746441642944286, "grad_norm": 0.26842236518859863, "learning_rate": 1.7955869039624918e-06, "loss": 0.306, "step": 36850 }, { "epoch": 3.746543310288735, "grad_norm": 0.26926353573799133, "learning_rate": 1.7953144877534402e-06, "loss": 0.3066, "step": 36851 }, { "epoch": 3.746644977633184, "grad_norm": 0.28506097197532654, "learning_rate": 1.7950420876888348e-06, "loss": 0.3131, "step": 36852 }, { "epoch": 3.746746644977633, "grad_norm": 0.27623501420021057, "learning_rate": 1.7947697037700451e-06, "loss": 0.2957, "step": 36853 }, { "epoch": 3.746848312322082, "grad_norm": 0.2530796229839325, "learning_rate": 1.7944973359984425e-06, "loss": 0.2956, "step": 36854 }, { "epoch": 3.746949979666531, "grad_norm": 0.2812180519104004, "learning_rate": 1.7942249843754024e-06, "loss": 0.3071, "step": 36855 }, { "epoch": 3.74705164701098, "grad_norm": 0.27838054299354553, "learning_rate": 1.7939526489022951e-06, "loss": 0.2785, "step": 36856 }, { "epoch": 3.747153314355429, "grad_norm": 0.28629347681999207, "learning_rate": 1.7936803295804922e-06, "loss": 0.2918, "step": 36857 }, { "epoch": 3.747254981699878, "grad_norm": 0.27206847071647644, "learning_rate": 1.7934080264113647e-06, "loss": 0.3283, "step": 36858 }, { "epoch": 3.747356649044327, "grad_norm": 0.2575748562812805, "learning_rate": 1.7931357393962867e-06, "loss": 0.3101, "step": 36859 }, { "epoch": 3.747458316388776, "grad_norm": 0.25748971104621887, "learning_rate": 1.792863468536629e-06, "loss": 0.3083, "step": 36860 }, { "epoch": 3.747559983733225, "grad_norm": 0.2919256091117859, "learning_rate": 1.7925912138337614e-06, "loss": 0.3205, "step": 36861 }, { "epoch": 3.747661651077674, "grad_norm": 0.27488288283348083, "learning_rate": 1.7923189752890586e-06, "loss": 0.3367, "step": 36862 }, { "epoch": 3.747763318422123, "grad_norm": 0.25026413798332214, "learning_rate": 1.7920467529038904e-06, "loss": 0.2717, "step": 36863 }, { "epoch": 3.747864985766572, "grad_norm": 0.2661398947238922, "learning_rate": 1.7917745466796272e-06, "loss": 0.298, "step": 36864 }, { "epoch": 3.7479666531110207, "grad_norm": 0.28416672348976135, "learning_rate": 1.7915023566176427e-06, "loss": 0.3066, "step": 36865 }, { "epoch": 3.7480683204554697, "grad_norm": 0.2735128700733185, "learning_rate": 1.7912301827193068e-06, "loss": 0.3048, "step": 36866 }, { "epoch": 3.7481699877999186, "grad_norm": 0.2785261571407318, "learning_rate": 1.790958024985991e-06, "loss": 0.2744, "step": 36867 }, { "epoch": 3.7482716551443676, "grad_norm": 0.2685994803905487, "learning_rate": 1.7906858834190643e-06, "loss": 0.3231, "step": 36868 }, { "epoch": 3.7483733224888165, "grad_norm": 0.27474597096443176, "learning_rate": 1.790413758019901e-06, "loss": 0.302, "step": 36869 }, { "epoch": 3.7484749898332654, "grad_norm": 0.2764768600463867, "learning_rate": 1.7901416487898704e-06, "loss": 0.3041, "step": 36870 }, { "epoch": 3.7485766571777144, "grad_norm": 0.2848842144012451, "learning_rate": 1.7898695557303419e-06, "loss": 0.2766, "step": 36871 }, { "epoch": 3.7486783245221633, "grad_norm": 0.27549275755882263, "learning_rate": 1.7895974788426895e-06, "loss": 0.2899, "step": 36872 }, { "epoch": 3.7487799918666127, "grad_norm": 0.2722531259059906, "learning_rate": 1.7893254181282821e-06, "loss": 0.3158, "step": 36873 }, { "epoch": 3.7488816592110616, "grad_norm": 0.26280614733695984, "learning_rate": 1.7890533735884885e-06, "loss": 0.2777, "step": 36874 }, { "epoch": 3.7489833265555106, "grad_norm": 0.2826373279094696, "learning_rate": 1.7887813452246828e-06, "loss": 0.2823, "step": 36875 }, { "epoch": 3.7490849938999595, "grad_norm": 0.26538702845573425, "learning_rate": 1.7885093330382331e-06, "loss": 0.3222, "step": 36876 }, { "epoch": 3.7491866612444085, "grad_norm": 0.2621486783027649, "learning_rate": 1.7882373370305107e-06, "loss": 0.3137, "step": 36877 }, { "epoch": 3.7492883285888574, "grad_norm": 0.25515443086624146, "learning_rate": 1.7879653572028837e-06, "loss": 0.3208, "step": 36878 }, { "epoch": 3.7493899959333064, "grad_norm": 0.2737233638763428, "learning_rate": 1.7876933935567252e-06, "loss": 0.3262, "step": 36879 }, { "epoch": 3.7494916632777553, "grad_norm": 0.2685258090496063, "learning_rate": 1.7874214460934041e-06, "loss": 0.3681, "step": 36880 }, { "epoch": 3.7495933306222042, "grad_norm": 0.29148727655410767, "learning_rate": 1.7871495148142892e-06, "loss": 0.3249, "step": 36881 }, { "epoch": 3.749694997966653, "grad_norm": 0.2754600942134857, "learning_rate": 1.7868775997207528e-06, "loss": 0.2956, "step": 36882 }, { "epoch": 3.749796665311102, "grad_norm": 0.25334402918815613, "learning_rate": 1.7866057008141635e-06, "loss": 0.3125, "step": 36883 }, { "epoch": 3.749898332655551, "grad_norm": 0.25247523188591003, "learning_rate": 1.7863338180958894e-06, "loss": 0.3284, "step": 36884 }, { "epoch": 3.75, "grad_norm": 0.2774636149406433, "learning_rate": 1.7860619515673034e-06, "loss": 0.3035, "step": 36885 }, { "epoch": 3.750101667344449, "grad_norm": 0.2871882915496826, "learning_rate": 1.785790101229774e-06, "loss": 0.3462, "step": 36886 }, { "epoch": 3.750203334688898, "grad_norm": 0.28794053196907043, "learning_rate": 1.7855182670846698e-06, "loss": 0.3021, "step": 36887 }, { "epoch": 3.750305002033347, "grad_norm": 0.2758011817932129, "learning_rate": 1.7852464491333587e-06, "loss": 0.3163, "step": 36888 }, { "epoch": 3.7504066693777958, "grad_norm": 0.27902910113334656, "learning_rate": 1.7849746473772146e-06, "loss": 0.3075, "step": 36889 }, { "epoch": 3.7505083367222447, "grad_norm": 0.2777045965194702, "learning_rate": 1.7847028618176038e-06, "loss": 0.2807, "step": 36890 }, { "epoch": 3.7506100040666936, "grad_norm": 0.2677781283855438, "learning_rate": 1.7844310924558938e-06, "loss": 0.3293, "step": 36891 }, { "epoch": 3.7507116714111426, "grad_norm": 0.2790226638317108, "learning_rate": 1.7841593392934582e-06, "loss": 0.3183, "step": 36892 }, { "epoch": 3.7508133387555915, "grad_norm": 0.2640194892883301, "learning_rate": 1.783887602331663e-06, "loss": 0.3227, "step": 36893 }, { "epoch": 3.7509150061000405, "grad_norm": 0.2781495749950409, "learning_rate": 1.783615881571877e-06, "loss": 0.3401, "step": 36894 }, { "epoch": 3.7510166734444894, "grad_norm": 0.27655792236328125, "learning_rate": 1.7833441770154708e-06, "loss": 0.304, "step": 36895 }, { "epoch": 3.7511183407889384, "grad_norm": 0.28100356459617615, "learning_rate": 1.7830724886638124e-06, "loss": 0.3142, "step": 36896 }, { "epoch": 3.7512200081333873, "grad_norm": 0.27362385392189026, "learning_rate": 1.7828008165182703e-06, "loss": 0.3155, "step": 36897 }, { "epoch": 3.7513216754778367, "grad_norm": 0.2650804817676544, "learning_rate": 1.782529160580212e-06, "loss": 0.3217, "step": 36898 }, { "epoch": 3.7514233428222856, "grad_norm": 0.26268503069877625, "learning_rate": 1.7822575208510084e-06, "loss": 0.3144, "step": 36899 }, { "epoch": 3.7515250101667346, "grad_norm": 0.250588595867157, "learning_rate": 1.781985897332027e-06, "loss": 0.3266, "step": 36900 }, { "epoch": 3.7516266775111835, "grad_norm": 0.28035786747932434, "learning_rate": 1.7817142900246336e-06, "loss": 0.3143, "step": 36901 }, { "epoch": 3.7517283448556324, "grad_norm": 0.25364282727241516, "learning_rate": 1.7814426989302013e-06, "loss": 0.3302, "step": 36902 }, { "epoch": 3.7518300122000814, "grad_norm": 0.2746865153312683, "learning_rate": 1.7811711240500957e-06, "loss": 0.2835, "step": 36903 }, { "epoch": 3.7519316795445303, "grad_norm": 0.2792888283729553, "learning_rate": 1.7808995653856848e-06, "loss": 0.3345, "step": 36904 }, { "epoch": 3.7520333468889793, "grad_norm": 0.26983246207237244, "learning_rate": 1.780628022938335e-06, "loss": 0.3159, "step": 36905 }, { "epoch": 3.752135014233428, "grad_norm": 0.2536674737930298, "learning_rate": 1.7803564967094183e-06, "loss": 0.286, "step": 36906 }, { "epoch": 3.752236681577877, "grad_norm": 0.269080251455307, "learning_rate": 1.7800849867003e-06, "loss": 0.2961, "step": 36907 }, { "epoch": 3.752338348922326, "grad_norm": 0.2522059977054596, "learning_rate": 1.7798134929123474e-06, "loss": 0.295, "step": 36908 }, { "epoch": 3.752440016266775, "grad_norm": 0.2623976171016693, "learning_rate": 1.7795420153469306e-06, "loss": 0.2843, "step": 36909 }, { "epoch": 3.752541683611224, "grad_norm": 0.2593458890914917, "learning_rate": 1.7792705540054157e-06, "loss": 0.3066, "step": 36910 }, { "epoch": 3.752643350955673, "grad_norm": 0.27596497535705566, "learning_rate": 1.7789991088891684e-06, "loss": 0.312, "step": 36911 }, { "epoch": 3.752745018300122, "grad_norm": 0.28778207302093506, "learning_rate": 1.7787276799995606e-06, "loss": 0.3106, "step": 36912 }, { "epoch": 3.752846685644571, "grad_norm": 0.25769954919815063, "learning_rate": 1.778456267337957e-06, "loss": 0.3359, "step": 36913 }, { "epoch": 3.75294835298902, "grad_norm": 0.27759429812431335, "learning_rate": 1.7781848709057249e-06, "loss": 0.3124, "step": 36914 }, { "epoch": 3.753050020333469, "grad_norm": 0.28346383571624756, "learning_rate": 1.77791349070423e-06, "loss": 0.3031, "step": 36915 }, { "epoch": 3.753151687677918, "grad_norm": 0.26128488779067993, "learning_rate": 1.777642126734843e-06, "loss": 0.3356, "step": 36916 }, { "epoch": 3.753253355022367, "grad_norm": 0.2593826353549957, "learning_rate": 1.7773707789989293e-06, "loss": 0.2988, "step": 36917 }, { "epoch": 3.753355022366816, "grad_norm": 0.2772579789161682, "learning_rate": 1.7770994474978543e-06, "loss": 0.303, "step": 36918 }, { "epoch": 3.753456689711265, "grad_norm": 0.2901148200035095, "learning_rate": 1.7768281322329894e-06, "loss": 0.2781, "step": 36919 }, { "epoch": 3.753558357055714, "grad_norm": 0.24512052536010742, "learning_rate": 1.7765568332056955e-06, "loss": 0.3044, "step": 36920 }, { "epoch": 3.7536600244001628, "grad_norm": 0.26757991313934326, "learning_rate": 1.7762855504173422e-06, "loss": 0.3319, "step": 36921 }, { "epoch": 3.7537616917446117, "grad_norm": 0.2685624063014984, "learning_rate": 1.776014283869298e-06, "loss": 0.3164, "step": 36922 }, { "epoch": 3.7538633590890607, "grad_norm": 0.248038649559021, "learning_rate": 1.7757430335629272e-06, "loss": 0.3238, "step": 36923 }, { "epoch": 3.7539650264335096, "grad_norm": 0.26243335008621216, "learning_rate": 1.7754717994995973e-06, "loss": 0.2924, "step": 36924 }, { "epoch": 3.7540666937779585, "grad_norm": 0.2594871520996094, "learning_rate": 1.7752005816806721e-06, "loss": 0.308, "step": 36925 }, { "epoch": 3.7541683611224075, "grad_norm": 0.28276142477989197, "learning_rate": 1.7749293801075217e-06, "loss": 0.332, "step": 36926 }, { "epoch": 3.7542700284668564, "grad_norm": 0.2654176652431488, "learning_rate": 1.774658194781511e-06, "loss": 0.3232, "step": 36927 }, { "epoch": 3.7543716958113054, "grad_norm": 0.27835315465927124, "learning_rate": 1.7743870257040042e-06, "loss": 0.3363, "step": 36928 }, { "epoch": 3.7544733631557543, "grad_norm": 0.2632301449775696, "learning_rate": 1.7741158728763723e-06, "loss": 0.2924, "step": 36929 }, { "epoch": 3.7545750305002032, "grad_norm": 0.25935840606689453, "learning_rate": 1.773844736299975e-06, "loss": 0.3296, "step": 36930 }, { "epoch": 3.754676697844652, "grad_norm": 0.24895958602428436, "learning_rate": 1.7735736159761806e-06, "loss": 0.3267, "step": 36931 }, { "epoch": 3.754778365189101, "grad_norm": 0.2788573205471039, "learning_rate": 1.7733025119063597e-06, "loss": 0.3179, "step": 36932 }, { "epoch": 3.75488003253355, "grad_norm": 0.2854435443878174, "learning_rate": 1.7730314240918701e-06, "loss": 0.2773, "step": 36933 }, { "epoch": 3.754981699877999, "grad_norm": 0.2748127579689026, "learning_rate": 1.772760352534083e-06, "loss": 0.2966, "step": 36934 }, { "epoch": 3.755083367222448, "grad_norm": 0.2613368630409241, "learning_rate": 1.7724892972343605e-06, "loss": 0.2969, "step": 36935 }, { "epoch": 3.755185034566897, "grad_norm": 0.2820718586444855, "learning_rate": 1.7722182581940723e-06, "loss": 0.3142, "step": 36936 }, { "epoch": 3.755286701911346, "grad_norm": 0.26480257511138916, "learning_rate": 1.7719472354145812e-06, "loss": 0.3091, "step": 36937 }, { "epoch": 3.755388369255795, "grad_norm": 0.27843964099884033, "learning_rate": 1.7716762288972506e-06, "loss": 0.3378, "step": 36938 }, { "epoch": 3.755490036600244, "grad_norm": 0.31842127442359924, "learning_rate": 1.7714052386434522e-06, "loss": 0.321, "step": 36939 }, { "epoch": 3.755591703944693, "grad_norm": 0.2756536900997162, "learning_rate": 1.7711342646545433e-06, "loss": 0.3247, "step": 36940 }, { "epoch": 3.755693371289142, "grad_norm": 0.26926136016845703, "learning_rate": 1.770863306931892e-06, "loss": 0.3001, "step": 36941 }, { "epoch": 3.755795038633591, "grad_norm": 0.28312432765960693, "learning_rate": 1.770592365476868e-06, "loss": 0.3459, "step": 36942 }, { "epoch": 3.75589670597804, "grad_norm": 0.25999709963798523, "learning_rate": 1.7703214402908293e-06, "loss": 0.2952, "step": 36943 }, { "epoch": 3.755998373322489, "grad_norm": 0.24604809284210205, "learning_rate": 1.7700505313751448e-06, "loss": 0.3027, "step": 36944 }, { "epoch": 3.756100040666938, "grad_norm": 0.2817979156970978, "learning_rate": 1.7697796387311766e-06, "loss": 0.3067, "step": 36945 }, { "epoch": 3.7562017080113868, "grad_norm": 0.28574827313423157, "learning_rate": 1.7695087623602924e-06, "loss": 0.3172, "step": 36946 }, { "epoch": 3.7563033753558357, "grad_norm": 0.2843996584415436, "learning_rate": 1.7692379022638556e-06, "loss": 0.3035, "step": 36947 }, { "epoch": 3.7564050427002846, "grad_norm": 0.276617169380188, "learning_rate": 1.768967058443229e-06, "loss": 0.3133, "step": 36948 }, { "epoch": 3.7565067100447336, "grad_norm": 0.27446553111076355, "learning_rate": 1.768696230899782e-06, "loss": 0.3321, "step": 36949 }, { "epoch": 3.7566083773891825, "grad_norm": 0.2835865020751953, "learning_rate": 1.7684254196348716e-06, "loss": 0.327, "step": 36950 }, { "epoch": 3.7567100447336315, "grad_norm": 0.29476383328437805, "learning_rate": 1.768154624649866e-06, "loss": 0.295, "step": 36951 }, { "epoch": 3.7568117120780804, "grad_norm": 0.2983756363391876, "learning_rate": 1.7678838459461335e-06, "loss": 0.3089, "step": 36952 }, { "epoch": 3.7569133794225293, "grad_norm": 0.2679460346698761, "learning_rate": 1.76761308352503e-06, "loss": 0.2905, "step": 36953 }, { "epoch": 3.7570150467669783, "grad_norm": 0.2809528708457947, "learning_rate": 1.7673423373879256e-06, "loss": 0.3167, "step": 36954 }, { "epoch": 3.7571167141114277, "grad_norm": 0.28761082887649536, "learning_rate": 1.767071607536182e-06, "loss": 0.2983, "step": 36955 }, { "epoch": 3.7572183814558766, "grad_norm": 0.2787284255027771, "learning_rate": 1.7668008939711624e-06, "loss": 0.3058, "step": 36956 }, { "epoch": 3.7573200488003256, "grad_norm": 0.27457815408706665, "learning_rate": 1.7665301966942329e-06, "loss": 0.315, "step": 36957 }, { "epoch": 3.7574217161447745, "grad_norm": 0.30192315578460693, "learning_rate": 1.766259515706754e-06, "loss": 0.2984, "step": 36958 }, { "epoch": 3.7575233834892234, "grad_norm": 0.29827556014060974, "learning_rate": 1.7659888510100947e-06, "loss": 0.3035, "step": 36959 }, { "epoch": 3.7576250508336724, "grad_norm": 0.27743619680404663, "learning_rate": 1.7657182026056119e-06, "loss": 0.3484, "step": 36960 }, { "epoch": 3.7577267181781213, "grad_norm": 0.2583410143852234, "learning_rate": 1.7654475704946722e-06, "loss": 0.2936, "step": 36961 }, { "epoch": 3.7578283855225703, "grad_norm": 0.29383358359336853, "learning_rate": 1.7651769546786418e-06, "loss": 0.3024, "step": 36962 }, { "epoch": 3.757930052867019, "grad_norm": 0.2877045273780823, "learning_rate": 1.7649063551588785e-06, "loss": 0.3068, "step": 36963 }, { "epoch": 3.758031720211468, "grad_norm": 0.3125942051410675, "learning_rate": 1.7646357719367497e-06, "loss": 0.2878, "step": 36964 }, { "epoch": 3.758133387555917, "grad_norm": 0.26480814814567566, "learning_rate": 1.7643652050136168e-06, "loss": 0.3419, "step": 36965 }, { "epoch": 3.758235054900366, "grad_norm": 0.28540849685668945, "learning_rate": 1.7640946543908416e-06, "loss": 0.2984, "step": 36966 }, { "epoch": 3.758336722244815, "grad_norm": 0.26842907071113586, "learning_rate": 1.7638241200697902e-06, "loss": 0.3082, "step": 36967 }, { "epoch": 3.758438389589264, "grad_norm": 0.2811644375324249, "learning_rate": 1.7635536020518223e-06, "loss": 0.3036, "step": 36968 }, { "epoch": 3.758540056933713, "grad_norm": 0.2714598476886749, "learning_rate": 1.7632831003383056e-06, "loss": 0.3151, "step": 36969 }, { "epoch": 3.758641724278162, "grad_norm": 0.28662109375, "learning_rate": 1.763012614930596e-06, "loss": 0.301, "step": 36970 }, { "epoch": 3.7587433916226107, "grad_norm": 0.27734488248825073, "learning_rate": 1.7627421458300592e-06, "loss": 0.3364, "step": 36971 }, { "epoch": 3.7588450589670597, "grad_norm": 0.25009533762931824, "learning_rate": 1.762471693038062e-06, "loss": 0.3062, "step": 36972 }, { "epoch": 3.7589467263115086, "grad_norm": 0.28810691833496094, "learning_rate": 1.7622012565559598e-06, "loss": 0.3101, "step": 36973 }, { "epoch": 3.7590483936559576, "grad_norm": 0.27700814604759216, "learning_rate": 1.7619308363851195e-06, "loss": 0.3059, "step": 36974 }, { "epoch": 3.7591500610004065, "grad_norm": 0.2547948658466339, "learning_rate": 1.7616604325269026e-06, "loss": 0.3008, "step": 36975 }, { "epoch": 3.7592517283448554, "grad_norm": 0.2611287534236908, "learning_rate": 1.7613900449826693e-06, "loss": 0.2921, "step": 36976 }, { "epoch": 3.7593533956893044, "grad_norm": 0.2809833586215973, "learning_rate": 1.7611196737537844e-06, "loss": 0.3278, "step": 36977 }, { "epoch": 3.7594550630337533, "grad_norm": 0.23991331458091736, "learning_rate": 1.7608493188416097e-06, "loss": 0.3143, "step": 36978 }, { "epoch": 3.7595567303782023, "grad_norm": 0.27132076025009155, "learning_rate": 1.760578980247506e-06, "loss": 0.2933, "step": 36979 }, { "epoch": 3.7596583977226516, "grad_norm": 0.2796791195869446, "learning_rate": 1.760308657972834e-06, "loss": 0.302, "step": 36980 }, { "epoch": 3.7597600650671006, "grad_norm": 0.28277620673179626, "learning_rate": 1.7600383520189567e-06, "loss": 0.3208, "step": 36981 }, { "epoch": 3.7598617324115495, "grad_norm": 0.27006620168685913, "learning_rate": 1.7597680623872398e-06, "loss": 0.2845, "step": 36982 }, { "epoch": 3.7599633997559985, "grad_norm": 0.26609811186790466, "learning_rate": 1.7594977890790383e-06, "loss": 0.3153, "step": 36983 }, { "epoch": 3.7600650671004474, "grad_norm": 0.26069197058677673, "learning_rate": 1.7592275320957187e-06, "loss": 0.298, "step": 36984 }, { "epoch": 3.7601667344448964, "grad_norm": 0.27707111835479736, "learning_rate": 1.7589572914386398e-06, "loss": 0.3163, "step": 36985 }, { "epoch": 3.7602684017893453, "grad_norm": 0.2747346758842468, "learning_rate": 1.7586870671091633e-06, "loss": 0.3081, "step": 36986 }, { "epoch": 3.7603700691337942, "grad_norm": 0.2798760235309601, "learning_rate": 1.758416859108652e-06, "loss": 0.3112, "step": 36987 }, { "epoch": 3.760471736478243, "grad_norm": 0.2644234001636505, "learning_rate": 1.7581466674384667e-06, "loss": 0.2827, "step": 36988 }, { "epoch": 3.760573403822692, "grad_norm": 0.30588066577911377, "learning_rate": 1.757876492099968e-06, "loss": 0.2902, "step": 36989 }, { "epoch": 3.760675071167141, "grad_norm": 0.2829548120498657, "learning_rate": 1.7576063330945154e-06, "loss": 0.2919, "step": 36990 }, { "epoch": 3.76077673851159, "grad_norm": 0.2581413686275482, "learning_rate": 1.7573361904234732e-06, "loss": 0.3371, "step": 36991 }, { "epoch": 3.760878405856039, "grad_norm": 0.25806668400764465, "learning_rate": 1.7570660640882003e-06, "loss": 0.3163, "step": 36992 }, { "epoch": 3.760980073200488, "grad_norm": 0.2777150869369507, "learning_rate": 1.756795954090057e-06, "loss": 0.3015, "step": 36993 }, { "epoch": 3.761081740544937, "grad_norm": 0.2809619903564453, "learning_rate": 1.7565258604304059e-06, "loss": 0.3045, "step": 36994 }, { "epoch": 3.7611834078893858, "grad_norm": 0.27458468079566956, "learning_rate": 1.7562557831106075e-06, "loss": 0.3259, "step": 36995 }, { "epoch": 3.761285075233835, "grad_norm": 0.2937764823436737, "learning_rate": 1.7559857221320193e-06, "loss": 0.3328, "step": 36996 }, { "epoch": 3.761386742578284, "grad_norm": 0.2934713661670685, "learning_rate": 1.755715677496006e-06, "loss": 0.2933, "step": 36997 }, { "epoch": 3.761488409922733, "grad_norm": 0.2713712155818939, "learning_rate": 1.7554456492039262e-06, "loss": 0.2987, "step": 36998 }, { "epoch": 3.761590077267182, "grad_norm": 0.27705156803131104, "learning_rate": 1.7551756372571405e-06, "loss": 0.3296, "step": 36999 }, { "epoch": 3.761691744611631, "grad_norm": 0.2793017029762268, "learning_rate": 1.754905641657007e-06, "loss": 0.3194, "step": 37000 }, { "epoch": 3.76179341195608, "grad_norm": 0.3094002604484558, "learning_rate": 1.7546356624048895e-06, "loss": 0.3266, "step": 37001 }, { "epoch": 3.761895079300529, "grad_norm": 0.27118387818336487, "learning_rate": 1.7543656995021458e-06, "loss": 0.2957, "step": 37002 }, { "epoch": 3.7619967466449777, "grad_norm": 0.2729896605014801, "learning_rate": 1.7540957529501352e-06, "loss": 0.3286, "step": 37003 }, { "epoch": 3.7620984139894267, "grad_norm": 0.280509889125824, "learning_rate": 1.7538258227502204e-06, "loss": 0.318, "step": 37004 }, { "epoch": 3.7622000813338756, "grad_norm": 0.2784450054168701, "learning_rate": 1.7535559089037596e-06, "loss": 0.3138, "step": 37005 }, { "epoch": 3.7623017486783246, "grad_norm": 0.27101990580558777, "learning_rate": 1.7532860114121114e-06, "loss": 0.2892, "step": 37006 }, { "epoch": 3.7624034160227735, "grad_norm": 0.2703055739402771, "learning_rate": 1.753016130276638e-06, "loss": 0.3077, "step": 37007 }, { "epoch": 3.7625050833672224, "grad_norm": 0.2721726596355438, "learning_rate": 1.7527462654986982e-06, "loss": 0.321, "step": 37008 }, { "epoch": 3.7626067507116714, "grad_norm": 0.2754509449005127, "learning_rate": 1.7524764170796504e-06, "loss": 0.3277, "step": 37009 }, { "epoch": 3.7627084180561203, "grad_norm": 0.2959064841270447, "learning_rate": 1.752206585020853e-06, "loss": 0.3197, "step": 37010 }, { "epoch": 3.7628100854005693, "grad_norm": 0.2687591016292572, "learning_rate": 1.7519367693236695e-06, "loss": 0.2969, "step": 37011 }, { "epoch": 3.762911752745018, "grad_norm": 0.2756929099559784, "learning_rate": 1.7516669699894563e-06, "loss": 0.3223, "step": 37012 }, { "epoch": 3.763013420089467, "grad_norm": 0.27942565083503723, "learning_rate": 1.7513971870195713e-06, "loss": 0.3454, "step": 37013 }, { "epoch": 3.763115087433916, "grad_norm": 0.2882128953933716, "learning_rate": 1.7511274204153767e-06, "loss": 0.3152, "step": 37014 }, { "epoch": 3.763216754778365, "grad_norm": 0.2799621820449829, "learning_rate": 1.7508576701782304e-06, "loss": 0.2889, "step": 37015 }, { "epoch": 3.763318422122814, "grad_norm": 0.2835064232349396, "learning_rate": 1.750587936309489e-06, "loss": 0.335, "step": 37016 }, { "epoch": 3.763420089467263, "grad_norm": 0.2773735821247101, "learning_rate": 1.7503182188105155e-06, "loss": 0.3105, "step": 37017 }, { "epoch": 3.763521756811712, "grad_norm": 0.2833656668663025, "learning_rate": 1.7500485176826659e-06, "loss": 0.3408, "step": 37018 }, { "epoch": 3.763623424156161, "grad_norm": 0.26323091983795166, "learning_rate": 1.7497788329273002e-06, "loss": 0.2942, "step": 37019 }, { "epoch": 3.7637250915006097, "grad_norm": 0.2800937294960022, "learning_rate": 1.7495091645457746e-06, "loss": 0.2925, "step": 37020 }, { "epoch": 3.763826758845059, "grad_norm": 0.2668053209781647, "learning_rate": 1.7492395125394512e-06, "loss": 0.2934, "step": 37021 }, { "epoch": 3.763928426189508, "grad_norm": 0.2851126194000244, "learning_rate": 1.7489698769096864e-06, "loss": 0.2851, "step": 37022 }, { "epoch": 3.764030093533957, "grad_norm": 0.27076783776283264, "learning_rate": 1.7487002576578372e-06, "loss": 0.3027, "step": 37023 }, { "epoch": 3.764131760878406, "grad_norm": 0.2711758613586426, "learning_rate": 1.7484306547852653e-06, "loss": 0.3291, "step": 37024 }, { "epoch": 3.764233428222855, "grad_norm": 0.2711881697177887, "learning_rate": 1.7481610682933265e-06, "loss": 0.2908, "step": 37025 }, { "epoch": 3.764335095567304, "grad_norm": 0.26960256695747375, "learning_rate": 1.7478914981833783e-06, "loss": 0.2929, "step": 37026 }, { "epoch": 3.7644367629117528, "grad_norm": 0.2786325514316559, "learning_rate": 1.7476219444567816e-06, "loss": 0.3252, "step": 37027 }, { "epoch": 3.7645384302562017, "grad_norm": 0.2789945900440216, "learning_rate": 1.7473524071148922e-06, "loss": 0.322, "step": 37028 }, { "epoch": 3.7646400976006507, "grad_norm": 0.2772332727909088, "learning_rate": 1.747082886159069e-06, "loss": 0.327, "step": 37029 }, { "epoch": 3.7647417649450996, "grad_norm": 0.2608702480792999, "learning_rate": 1.7468133815906673e-06, "loss": 0.2909, "step": 37030 }, { "epoch": 3.7648434322895485, "grad_norm": 0.26550543308258057, "learning_rate": 1.7465438934110484e-06, "loss": 0.3515, "step": 37031 }, { "epoch": 3.7649450996339975, "grad_norm": 0.27132585644721985, "learning_rate": 1.7462744216215682e-06, "loss": 0.3054, "step": 37032 }, { "epoch": 3.7650467669784464, "grad_norm": 0.2649427652359009, "learning_rate": 1.746004966223583e-06, "loss": 0.2953, "step": 37033 }, { "epoch": 3.7651484343228954, "grad_norm": 0.2716599106788635, "learning_rate": 1.7457355272184522e-06, "loss": 0.286, "step": 37034 }, { "epoch": 3.7652501016673443, "grad_norm": 0.2646951377391815, "learning_rate": 1.7454661046075332e-06, "loss": 0.2988, "step": 37035 }, { "epoch": 3.7653517690117932, "grad_norm": 0.2586825489997864, "learning_rate": 1.7451966983921808e-06, "loss": 0.2833, "step": 37036 }, { "epoch": 3.7654534363562426, "grad_norm": 0.271400511264801, "learning_rate": 1.7449273085737555e-06, "loss": 0.3195, "step": 37037 }, { "epoch": 3.7655551037006916, "grad_norm": 0.2642834186553955, "learning_rate": 1.744657935153613e-06, "loss": 0.2941, "step": 37038 }, { "epoch": 3.7656567710451405, "grad_norm": 0.2766396999359131, "learning_rate": 1.7443885781331104e-06, "loss": 0.2936, "step": 37039 }, { "epoch": 3.7657584383895895, "grad_norm": 0.2642015814781189, "learning_rate": 1.7441192375136028e-06, "loss": 0.2869, "step": 37040 }, { "epoch": 3.7658601057340384, "grad_norm": 0.2668847441673279, "learning_rate": 1.7438499132964504e-06, "loss": 0.3215, "step": 37041 }, { "epoch": 3.7659617730784873, "grad_norm": 0.2564765214920044, "learning_rate": 1.7435806054830078e-06, "loss": 0.3182, "step": 37042 }, { "epoch": 3.7660634404229363, "grad_norm": 0.27656009793281555, "learning_rate": 1.743311314074631e-06, "loss": 0.3098, "step": 37043 }, { "epoch": 3.7661651077673852, "grad_norm": 0.26646411418914795, "learning_rate": 1.7430420390726799e-06, "loss": 0.3077, "step": 37044 }, { "epoch": 3.766266775111834, "grad_norm": 0.2638779282569885, "learning_rate": 1.7427727804785088e-06, "loss": 0.3087, "step": 37045 }, { "epoch": 3.766368442456283, "grad_norm": 0.27404171228408813, "learning_rate": 1.7425035382934723e-06, "loss": 0.3013, "step": 37046 }, { "epoch": 3.766470109800732, "grad_norm": 0.26402467489242554, "learning_rate": 1.7422343125189311e-06, "loss": 0.3063, "step": 37047 }, { "epoch": 3.766571777145181, "grad_norm": 0.27290743589401245, "learning_rate": 1.741965103156239e-06, "loss": 0.2975, "step": 37048 }, { "epoch": 3.76667344448963, "grad_norm": 0.2585994601249695, "learning_rate": 1.7416959102067527e-06, "loss": 0.2892, "step": 37049 }, { "epoch": 3.766775111834079, "grad_norm": 0.2657338082790375, "learning_rate": 1.7414267336718266e-06, "loss": 0.3343, "step": 37050 }, { "epoch": 3.766876779178528, "grad_norm": 0.283194363117218, "learning_rate": 1.7411575735528198e-06, "loss": 0.3074, "step": 37051 }, { "epoch": 3.7669784465229768, "grad_norm": 0.2588462829589844, "learning_rate": 1.7408884298510864e-06, "loss": 0.3276, "step": 37052 }, { "epoch": 3.7670801138674257, "grad_norm": 0.26161080598831177, "learning_rate": 1.7406193025679807e-06, "loss": 0.3145, "step": 37053 }, { "epoch": 3.7671817812118746, "grad_norm": 0.2646310031414032, "learning_rate": 1.7403501917048626e-06, "loss": 0.2865, "step": 37054 }, { "epoch": 3.7672834485563236, "grad_norm": 0.2511507272720337, "learning_rate": 1.7400810972630855e-06, "loss": 0.3334, "step": 37055 }, { "epoch": 3.7673851159007725, "grad_norm": 0.27292323112487793, "learning_rate": 1.7398120192440039e-06, "loss": 0.3059, "step": 37056 }, { "epoch": 3.7674867832452215, "grad_norm": 0.27939730882644653, "learning_rate": 1.7395429576489754e-06, "loss": 0.3262, "step": 37057 }, { "epoch": 3.7675884505896704, "grad_norm": 0.28189951181411743, "learning_rate": 1.739273912479355e-06, "loss": 0.287, "step": 37058 }, { "epoch": 3.7676901179341193, "grad_norm": 0.2430259734392166, "learning_rate": 1.7390048837364981e-06, "loss": 0.2984, "step": 37059 }, { "epoch": 3.7677917852785683, "grad_norm": 0.2855587601661682, "learning_rate": 1.7387358714217578e-06, "loss": 0.2805, "step": 37060 }, { "epoch": 3.7678934526230172, "grad_norm": 0.27831026911735535, "learning_rate": 1.738466875536493e-06, "loss": 0.3062, "step": 37061 }, { "epoch": 3.7679951199674666, "grad_norm": 0.29010435938835144, "learning_rate": 1.7381978960820567e-06, "loss": 0.2962, "step": 37062 }, { "epoch": 3.7680967873119156, "grad_norm": 0.28841766715049744, "learning_rate": 1.7379289330598026e-06, "loss": 0.305, "step": 37063 }, { "epoch": 3.7681984546563645, "grad_norm": 0.26271510124206543, "learning_rate": 1.7376599864710897e-06, "loss": 0.2976, "step": 37064 }, { "epoch": 3.7683001220008134, "grad_norm": 0.28705447912216187, "learning_rate": 1.7373910563172696e-06, "loss": 0.3221, "step": 37065 }, { "epoch": 3.7684017893452624, "grad_norm": 0.26610156893730164, "learning_rate": 1.7371221425996971e-06, "loss": 0.3206, "step": 37066 }, { "epoch": 3.7685034566897113, "grad_norm": 0.25559571385383606, "learning_rate": 1.736853245319729e-06, "loss": 0.3023, "step": 37067 }, { "epoch": 3.7686051240341603, "grad_norm": 0.2664368450641632, "learning_rate": 1.736584364478719e-06, "loss": 0.3236, "step": 37068 }, { "epoch": 3.768706791378609, "grad_norm": 0.290022075176239, "learning_rate": 1.7363155000780218e-06, "loss": 0.3277, "step": 37069 }, { "epoch": 3.768808458723058, "grad_norm": 0.2931760251522064, "learning_rate": 1.7360466521189895e-06, "loss": 0.2922, "step": 37070 }, { "epoch": 3.768910126067507, "grad_norm": 0.2753107249736786, "learning_rate": 1.73577782060298e-06, "loss": 0.3284, "step": 37071 }, { "epoch": 3.769011793411956, "grad_norm": 0.2862902879714966, "learning_rate": 1.7355090055313466e-06, "loss": 0.2949, "step": 37072 }, { "epoch": 3.769113460756405, "grad_norm": 0.27589064836502075, "learning_rate": 1.7352402069054415e-06, "loss": 0.3205, "step": 37073 }, { "epoch": 3.769215128100854, "grad_norm": 0.26471588015556335, "learning_rate": 1.7349714247266213e-06, "loss": 0.2949, "step": 37074 }, { "epoch": 3.769316795445303, "grad_norm": 0.30206722021102905, "learning_rate": 1.7347026589962401e-06, "loss": 0.2923, "step": 37075 }, { "epoch": 3.769418462789752, "grad_norm": 0.2740798890590668, "learning_rate": 1.7344339097156488e-06, "loss": 0.3214, "step": 37076 }, { "epoch": 3.7695201301342007, "grad_norm": 0.27185308933258057, "learning_rate": 1.734165176886205e-06, "loss": 0.2883, "step": 37077 }, { "epoch": 3.76962179747865, "grad_norm": 0.308231383562088, "learning_rate": 1.733896460509261e-06, "loss": 0.3471, "step": 37078 }, { "epoch": 3.769723464823099, "grad_norm": 0.25130608677864075, "learning_rate": 1.7336277605861706e-06, "loss": 0.2841, "step": 37079 }, { "epoch": 3.769825132167548, "grad_norm": 0.24931886792182922, "learning_rate": 1.7333590771182857e-06, "loss": 0.2768, "step": 37080 }, { "epoch": 3.769926799511997, "grad_norm": 0.2715575098991394, "learning_rate": 1.7330904101069634e-06, "loss": 0.3055, "step": 37081 }, { "epoch": 3.770028466856446, "grad_norm": 0.25794079899787903, "learning_rate": 1.732821759553555e-06, "loss": 0.3166, "step": 37082 }, { "epoch": 3.770130134200895, "grad_norm": 0.3039742112159729, "learning_rate": 1.7325531254594125e-06, "loss": 0.3094, "step": 37083 }, { "epoch": 3.7702318015453438, "grad_norm": 0.2621369957923889, "learning_rate": 1.7322845078258943e-06, "loss": 0.3199, "step": 37084 }, { "epoch": 3.7703334688897927, "grad_norm": 0.28733110427856445, "learning_rate": 1.732015906654347e-06, "loss": 0.3324, "step": 37085 }, { "epoch": 3.7704351362342416, "grad_norm": 0.2583378553390503, "learning_rate": 1.7317473219461283e-06, "loss": 0.2944, "step": 37086 }, { "epoch": 3.7705368035786906, "grad_norm": 0.30629703402519226, "learning_rate": 1.7314787537025884e-06, "loss": 0.3244, "step": 37087 }, { "epoch": 3.7706384709231395, "grad_norm": 0.26663094758987427, "learning_rate": 1.7312102019250837e-06, "loss": 0.3073, "step": 37088 }, { "epoch": 3.7707401382675885, "grad_norm": 0.28575655817985535, "learning_rate": 1.7309416666149647e-06, "loss": 0.3213, "step": 37089 }, { "epoch": 3.7708418056120374, "grad_norm": 0.27871260046958923, "learning_rate": 1.7306731477735839e-06, "loss": 0.3089, "step": 37090 }, { "epoch": 3.7709434729564864, "grad_norm": 0.27876806259155273, "learning_rate": 1.7304046454022977e-06, "loss": 0.3064, "step": 37091 }, { "epoch": 3.7710451403009353, "grad_norm": 0.2645188868045807, "learning_rate": 1.7301361595024525e-06, "loss": 0.2933, "step": 37092 }, { "epoch": 3.7711468076453842, "grad_norm": 0.2930300831794739, "learning_rate": 1.7298676900754042e-06, "loss": 0.2836, "step": 37093 }, { "epoch": 3.771248474989833, "grad_norm": 0.28628018498420715, "learning_rate": 1.729599237122509e-06, "loss": 0.2942, "step": 37094 }, { "epoch": 3.771350142334282, "grad_norm": 0.2778410017490387, "learning_rate": 1.729330800645112e-06, "loss": 0.2972, "step": 37095 }, { "epoch": 3.771451809678731, "grad_norm": 0.26443761587142944, "learning_rate": 1.7290623806445711e-06, "loss": 0.3162, "step": 37096 }, { "epoch": 3.77155347702318, "grad_norm": 0.283584862947464, "learning_rate": 1.728793977122235e-06, "loss": 0.3726, "step": 37097 }, { "epoch": 3.771655144367629, "grad_norm": 0.2800217866897583, "learning_rate": 1.7285255900794594e-06, "loss": 0.3342, "step": 37098 }, { "epoch": 3.771756811712078, "grad_norm": 0.27221444249153137, "learning_rate": 1.728257219517594e-06, "loss": 0.3313, "step": 37099 }, { "epoch": 3.771858479056527, "grad_norm": 0.27828216552734375, "learning_rate": 1.7279888654379901e-06, "loss": 0.3112, "step": 37100 }, { "epoch": 3.7719601464009758, "grad_norm": 0.27840515971183777, "learning_rate": 1.7277205278420033e-06, "loss": 0.3084, "step": 37101 }, { "epoch": 3.7720618137454247, "grad_norm": 0.27522388100624084, "learning_rate": 1.7274522067309802e-06, "loss": 0.3081, "step": 37102 }, { "epoch": 3.772163481089874, "grad_norm": 0.30257153511047363, "learning_rate": 1.7271839021062753e-06, "loss": 0.3016, "step": 37103 }, { "epoch": 3.772265148434323, "grad_norm": 0.2807844281196594, "learning_rate": 1.7269156139692428e-06, "loss": 0.3044, "step": 37104 }, { "epoch": 3.772366815778772, "grad_norm": 0.2807562053203583, "learning_rate": 1.7266473423212287e-06, "loss": 0.3347, "step": 37105 }, { "epoch": 3.772468483123221, "grad_norm": 0.2628976106643677, "learning_rate": 1.7263790871635893e-06, "loss": 0.3287, "step": 37106 }, { "epoch": 3.77257015046767, "grad_norm": 0.2521332800388336, "learning_rate": 1.7261108484976736e-06, "loss": 0.2941, "step": 37107 }, { "epoch": 3.772671817812119, "grad_norm": 0.2761857509613037, "learning_rate": 1.7258426263248324e-06, "loss": 0.3122, "step": 37108 }, { "epoch": 3.7727734851565677, "grad_norm": 0.26277488470077515, "learning_rate": 1.7255744206464191e-06, "loss": 0.3031, "step": 37109 }, { "epoch": 3.7728751525010167, "grad_norm": 0.2582596242427826, "learning_rate": 1.7253062314637826e-06, "loss": 0.315, "step": 37110 }, { "epoch": 3.7729768198454656, "grad_norm": 0.25687453150749207, "learning_rate": 1.7250380587782778e-06, "loss": 0.3086, "step": 37111 }, { "epoch": 3.7730784871899146, "grad_norm": 0.27115264534950256, "learning_rate": 1.72476990259125e-06, "loss": 0.2791, "step": 37112 }, { "epoch": 3.7731801545343635, "grad_norm": 0.26592332124710083, "learning_rate": 1.7245017629040533e-06, "loss": 0.3333, "step": 37113 }, { "epoch": 3.7732818218788124, "grad_norm": 0.2800230085849762, "learning_rate": 1.7242336397180416e-06, "loss": 0.2835, "step": 37114 }, { "epoch": 3.7733834892232614, "grad_norm": 0.26995721459388733, "learning_rate": 1.7239655330345589e-06, "loss": 0.3135, "step": 37115 }, { "epoch": 3.7734851565677103, "grad_norm": 0.2857370674610138, "learning_rate": 1.7236974428549613e-06, "loss": 0.3165, "step": 37116 }, { "epoch": 3.7735868239121593, "grad_norm": 0.28196004033088684, "learning_rate": 1.723429369180597e-06, "loss": 0.2985, "step": 37117 }, { "epoch": 3.773688491256608, "grad_norm": 0.2726530432701111, "learning_rate": 1.7231613120128155e-06, "loss": 0.3295, "step": 37118 }, { "epoch": 3.7737901586010576, "grad_norm": 0.31117209792137146, "learning_rate": 1.72289327135297e-06, "loss": 0.3065, "step": 37119 }, { "epoch": 3.7738918259455065, "grad_norm": 0.29454344511032104, "learning_rate": 1.722625247202408e-06, "loss": 0.3003, "step": 37120 }, { "epoch": 3.7739934932899555, "grad_norm": 0.24661390483379364, "learning_rate": 1.722357239562485e-06, "loss": 0.2974, "step": 37121 }, { "epoch": 3.7740951606344044, "grad_norm": 0.2812424898147583, "learning_rate": 1.722089248434543e-06, "loss": 0.3076, "step": 37122 }, { "epoch": 3.7741968279788534, "grad_norm": 0.2808544635772705, "learning_rate": 1.721821273819937e-06, "loss": 0.297, "step": 37123 }, { "epoch": 3.7742984953233023, "grad_norm": 0.270944207906723, "learning_rate": 1.7215533157200198e-06, "loss": 0.3166, "step": 37124 }, { "epoch": 3.7744001626677512, "grad_norm": 0.27660128474235535, "learning_rate": 1.7212853741361345e-06, "loss": 0.3461, "step": 37125 }, { "epoch": 3.7745018300122, "grad_norm": 0.28377383947372437, "learning_rate": 1.7210174490696358e-06, "loss": 0.3113, "step": 37126 }, { "epoch": 3.774603497356649, "grad_norm": 0.2757548689842224, "learning_rate": 1.720749540521872e-06, "loss": 0.2925, "step": 37127 }, { "epoch": 3.774705164701098, "grad_norm": 0.25544047355651855, "learning_rate": 1.7204816484941916e-06, "loss": 0.3393, "step": 37128 }, { "epoch": 3.774806832045547, "grad_norm": 0.2854366898536682, "learning_rate": 1.7202137729879465e-06, "loss": 0.3004, "step": 37129 }, { "epoch": 3.774908499389996, "grad_norm": 0.28594374656677246, "learning_rate": 1.719945914004485e-06, "loss": 0.3079, "step": 37130 }, { "epoch": 3.775010166734445, "grad_norm": 0.2680143117904663, "learning_rate": 1.7196780715451568e-06, "loss": 0.2832, "step": 37131 }, { "epoch": 3.775111834078894, "grad_norm": 0.25174570083618164, "learning_rate": 1.7194102456113088e-06, "loss": 0.3212, "step": 37132 }, { "epoch": 3.7752135014233428, "grad_norm": 0.2622799277305603, "learning_rate": 1.719142436204293e-06, "loss": 0.3024, "step": 37133 }, { "epoch": 3.7753151687677917, "grad_norm": 0.312806636095047, "learning_rate": 1.718874643325461e-06, "loss": 0.2914, "step": 37134 }, { "epoch": 3.7754168361122407, "grad_norm": 0.2655490040779114, "learning_rate": 1.7186068669761558e-06, "loss": 0.3064, "step": 37135 }, { "epoch": 3.7755185034566896, "grad_norm": 0.273777037858963, "learning_rate": 1.7183391071577305e-06, "loss": 0.2987, "step": 37136 }, { "epoch": 3.7756201708011385, "grad_norm": 0.2661524713039398, "learning_rate": 1.7180713638715335e-06, "loss": 0.333, "step": 37137 }, { "epoch": 3.7757218381455875, "grad_norm": 0.25670984387397766, "learning_rate": 1.717803637118911e-06, "loss": 0.3063, "step": 37138 }, { "epoch": 3.7758235054900364, "grad_norm": 0.2526339292526245, "learning_rate": 1.7175359269012155e-06, "loss": 0.3228, "step": 37139 }, { "epoch": 3.7759251728344854, "grad_norm": 0.2646946310997009, "learning_rate": 1.7172682332197944e-06, "loss": 0.3128, "step": 37140 }, { "epoch": 3.7760268401789343, "grad_norm": 0.2670101821422577, "learning_rate": 1.717000556075995e-06, "loss": 0.2911, "step": 37141 }, { "epoch": 3.7761285075233832, "grad_norm": 0.2632650136947632, "learning_rate": 1.716732895471166e-06, "loss": 0.2807, "step": 37142 }, { "epoch": 3.776230174867832, "grad_norm": 0.2899686098098755, "learning_rate": 1.7164652514066572e-06, "loss": 0.3211, "step": 37143 }, { "epoch": 3.7763318422122816, "grad_norm": 0.2534540295600891, "learning_rate": 1.7161976238838168e-06, "loss": 0.3014, "step": 37144 }, { "epoch": 3.7764335095567305, "grad_norm": 0.27470800280570984, "learning_rate": 1.7159300129039902e-06, "loss": 0.2802, "step": 37145 }, { "epoch": 3.7765351769011795, "grad_norm": 0.24560435116291046, "learning_rate": 1.71566241846853e-06, "loss": 0.2862, "step": 37146 }, { "epoch": 3.7766368442456284, "grad_norm": 0.2890012860298157, "learning_rate": 1.7153948405787819e-06, "loss": 0.3125, "step": 37147 }, { "epoch": 3.7767385115900773, "grad_norm": 0.26469147205352783, "learning_rate": 1.7151272792360923e-06, "loss": 0.3149, "step": 37148 }, { "epoch": 3.7768401789345263, "grad_norm": 0.3063724935054779, "learning_rate": 1.7148597344418127e-06, "loss": 0.3084, "step": 37149 }, { "epoch": 3.7769418462789752, "grad_norm": 0.2823210060596466, "learning_rate": 1.7145922061972885e-06, "loss": 0.3193, "step": 37150 }, { "epoch": 3.777043513623424, "grad_norm": 0.3025544285774231, "learning_rate": 1.7143246945038688e-06, "loss": 0.2847, "step": 37151 }, { "epoch": 3.777145180967873, "grad_norm": 0.2929431200027466, "learning_rate": 1.7140571993628984e-06, "loss": 0.3087, "step": 37152 }, { "epoch": 3.777246848312322, "grad_norm": 0.2896542549133301, "learning_rate": 1.7137897207757286e-06, "loss": 0.3244, "step": 37153 }, { "epoch": 3.777348515656771, "grad_norm": 0.27576449513435364, "learning_rate": 1.7135222587437056e-06, "loss": 0.2928, "step": 37154 }, { "epoch": 3.77745018300122, "grad_norm": 0.2631828188896179, "learning_rate": 1.7132548132681747e-06, "loss": 0.2991, "step": 37155 }, { "epoch": 3.777551850345669, "grad_norm": 0.2660306394100189, "learning_rate": 1.7129873843504863e-06, "loss": 0.3196, "step": 37156 }, { "epoch": 3.777653517690118, "grad_norm": 0.2743037939071655, "learning_rate": 1.7127199719919868e-06, "loss": 0.2884, "step": 37157 }, { "epoch": 3.7777551850345668, "grad_norm": 0.2680739164352417, "learning_rate": 1.7124525761940208e-06, "loss": 0.2967, "step": 37158 }, { "epoch": 3.7778568523790157, "grad_norm": 0.2603808641433716, "learning_rate": 1.7121851969579396e-06, "loss": 0.2815, "step": 37159 }, { "epoch": 3.777958519723465, "grad_norm": 0.2699401080608368, "learning_rate": 1.7119178342850878e-06, "loss": 0.3252, "step": 37160 }, { "epoch": 3.778060187067914, "grad_norm": 0.26083457469940186, "learning_rate": 1.7116504881768126e-06, "loss": 0.2982, "step": 37161 }, { "epoch": 3.778161854412363, "grad_norm": 0.2857652008533478, "learning_rate": 1.7113831586344592e-06, "loss": 0.2997, "step": 37162 }, { "epoch": 3.778263521756812, "grad_norm": 0.278024822473526, "learning_rate": 1.7111158456593779e-06, "loss": 0.2959, "step": 37163 }, { "epoch": 3.778365189101261, "grad_norm": 0.3082636892795563, "learning_rate": 1.710848549252913e-06, "loss": 0.3179, "step": 37164 }, { "epoch": 3.77846685644571, "grad_norm": 0.2764816880226135, "learning_rate": 1.7105812694164098e-06, "loss": 0.285, "step": 37165 }, { "epoch": 3.7785685237901587, "grad_norm": 0.27560678124427795, "learning_rate": 1.7103140061512186e-06, "loss": 0.3123, "step": 37166 }, { "epoch": 3.7786701911346077, "grad_norm": 0.2617558538913727, "learning_rate": 1.7100467594586834e-06, "loss": 0.3085, "step": 37167 }, { "epoch": 3.7787718584790566, "grad_norm": 0.29026785492897034, "learning_rate": 1.7097795293401493e-06, "loss": 0.3409, "step": 37168 }, { "epoch": 3.7788735258235056, "grad_norm": 0.26586538553237915, "learning_rate": 1.709512315796965e-06, "loss": 0.3319, "step": 37169 }, { "epoch": 3.7789751931679545, "grad_norm": 0.2983579933643341, "learning_rate": 1.7092451188304765e-06, "loss": 0.2991, "step": 37170 }, { "epoch": 3.7790768605124034, "grad_norm": 0.28846898674964905, "learning_rate": 1.708977938442029e-06, "loss": 0.3373, "step": 37171 }, { "epoch": 3.7791785278568524, "grad_norm": 0.2830597460269928, "learning_rate": 1.7087107746329667e-06, "loss": 0.3161, "step": 37172 }, { "epoch": 3.7792801952013013, "grad_norm": 0.28589966893196106, "learning_rate": 1.708443627404639e-06, "loss": 0.3141, "step": 37173 }, { "epoch": 3.7793818625457503, "grad_norm": 0.2844351828098297, "learning_rate": 1.7081764967583902e-06, "loss": 0.3054, "step": 37174 }, { "epoch": 3.779483529890199, "grad_norm": 0.2886238396167755, "learning_rate": 1.7079093826955645e-06, "loss": 0.3212, "step": 37175 }, { "epoch": 3.779585197234648, "grad_norm": 0.2899487614631653, "learning_rate": 1.7076422852175102e-06, "loss": 0.3122, "step": 37176 }, { "epoch": 3.779686864579097, "grad_norm": 0.25949710607528687, "learning_rate": 1.707375204325572e-06, "loss": 0.2941, "step": 37177 }, { "epoch": 3.779788531923546, "grad_norm": 0.2662949562072754, "learning_rate": 1.7071081400210937e-06, "loss": 0.3211, "step": 37178 }, { "epoch": 3.779890199267995, "grad_norm": 0.24669882655143738, "learning_rate": 1.706841092305423e-06, "loss": 0.3071, "step": 37179 }, { "epoch": 3.779991866612444, "grad_norm": 0.2651495933532715, "learning_rate": 1.7065740611799049e-06, "loss": 0.3137, "step": 37180 }, { "epoch": 3.780093533956893, "grad_norm": 0.26297298073768616, "learning_rate": 1.7063070466458836e-06, "loss": 0.2997, "step": 37181 }, { "epoch": 3.780195201301342, "grad_norm": 0.28036588430404663, "learning_rate": 1.7060400487047035e-06, "loss": 0.328, "step": 37182 }, { "epoch": 3.7802968686457907, "grad_norm": 0.2949558198451996, "learning_rate": 1.7057730673577122e-06, "loss": 0.3242, "step": 37183 }, { "epoch": 3.7803985359902397, "grad_norm": 0.28464722633361816, "learning_rate": 1.7055061026062536e-06, "loss": 0.3244, "step": 37184 }, { "epoch": 3.780500203334689, "grad_norm": 0.26763561367988586, "learning_rate": 1.7052391544516705e-06, "loss": 0.3294, "step": 37185 }, { "epoch": 3.780601870679138, "grad_norm": 0.2772543132305145, "learning_rate": 1.7049722228953113e-06, "loss": 0.3223, "step": 37186 }, { "epoch": 3.780703538023587, "grad_norm": 0.2775110602378845, "learning_rate": 1.7047053079385195e-06, "loss": 0.2966, "step": 37187 }, { "epoch": 3.780805205368036, "grad_norm": 0.276079922914505, "learning_rate": 1.7044384095826372e-06, "loss": 0.2869, "step": 37188 }, { "epoch": 3.780906872712485, "grad_norm": 0.27355822920799255, "learning_rate": 1.7041715278290122e-06, "loss": 0.2998, "step": 37189 }, { "epoch": 3.7810085400569338, "grad_norm": 0.26240119338035583, "learning_rate": 1.7039046626789885e-06, "loss": 0.2864, "step": 37190 }, { "epoch": 3.7811102074013827, "grad_norm": 0.2648555338382721, "learning_rate": 1.70363781413391e-06, "loss": 0.3169, "step": 37191 }, { "epoch": 3.7812118747458316, "grad_norm": 0.26336994767189026, "learning_rate": 1.7033709821951183e-06, "loss": 0.3119, "step": 37192 }, { "epoch": 3.7813135420902806, "grad_norm": 0.2563760280609131, "learning_rate": 1.7031041668639625e-06, "loss": 0.2763, "step": 37193 }, { "epoch": 3.7814152094347295, "grad_norm": 0.2597671151161194, "learning_rate": 1.702837368141785e-06, "loss": 0.3094, "step": 37194 }, { "epoch": 3.7815168767791785, "grad_norm": 0.26832371950149536, "learning_rate": 1.7025705860299263e-06, "loss": 0.2757, "step": 37195 }, { "epoch": 3.7816185441236274, "grad_norm": 0.2740190923213959, "learning_rate": 1.702303820529736e-06, "loss": 0.3184, "step": 37196 }, { "epoch": 3.7817202114680764, "grad_norm": 0.28717684745788574, "learning_rate": 1.702037071642555e-06, "loss": 0.319, "step": 37197 }, { "epoch": 3.7818218788125253, "grad_norm": 0.2643793225288391, "learning_rate": 1.7017703393697261e-06, "loss": 0.2874, "step": 37198 }, { "epoch": 3.7819235461569742, "grad_norm": 0.26227378845214844, "learning_rate": 1.701503623712596e-06, "loss": 0.3131, "step": 37199 }, { "epoch": 3.782025213501423, "grad_norm": 0.27116090059280396, "learning_rate": 1.701236924672507e-06, "loss": 0.321, "step": 37200 }, { "epoch": 3.7821268808458726, "grad_norm": 0.2743876576423645, "learning_rate": 1.7009702422508024e-06, "loss": 0.3127, "step": 37201 }, { "epoch": 3.7822285481903215, "grad_norm": 0.26636645197868347, "learning_rate": 1.7007035764488238e-06, "loss": 0.2927, "step": 37202 }, { "epoch": 3.7823302155347704, "grad_norm": 0.26368218660354614, "learning_rate": 1.7004369272679183e-06, "loss": 0.2872, "step": 37203 }, { "epoch": 3.7824318828792194, "grad_norm": 0.2670592963695526, "learning_rate": 1.7001702947094279e-06, "loss": 0.3033, "step": 37204 }, { "epoch": 3.7825335502236683, "grad_norm": 0.2785010039806366, "learning_rate": 1.6999036787746932e-06, "loss": 0.3277, "step": 37205 }, { "epoch": 3.7826352175681173, "grad_norm": 0.26098859310150146, "learning_rate": 1.6996370794650613e-06, "loss": 0.3129, "step": 37206 }, { "epoch": 3.782736884912566, "grad_norm": 0.2947816252708435, "learning_rate": 1.6993704967818736e-06, "loss": 0.2933, "step": 37207 }, { "epoch": 3.782838552257015, "grad_norm": 0.2654915153980255, "learning_rate": 1.6991039307264712e-06, "loss": 0.308, "step": 37208 }, { "epoch": 3.782940219601464, "grad_norm": 0.2556610107421875, "learning_rate": 1.6988373813002007e-06, "loss": 0.3156, "step": 37209 }, { "epoch": 3.783041886945913, "grad_norm": 0.2615315020084381, "learning_rate": 1.6985708485044029e-06, "loss": 0.3534, "step": 37210 }, { "epoch": 3.783143554290362, "grad_norm": 0.2526664733886719, "learning_rate": 1.6983043323404208e-06, "loss": 0.2868, "step": 37211 }, { "epoch": 3.783245221634811, "grad_norm": 0.27638331055641174, "learning_rate": 1.6980378328095948e-06, "loss": 0.3073, "step": 37212 }, { "epoch": 3.78334688897926, "grad_norm": 0.2570204734802246, "learning_rate": 1.6977713499132714e-06, "loss": 0.3308, "step": 37213 }, { "epoch": 3.783448556323709, "grad_norm": 0.2805899977684021, "learning_rate": 1.6975048836527912e-06, "loss": 0.2908, "step": 37214 }, { "epoch": 3.7835502236681577, "grad_norm": 0.2848620116710663, "learning_rate": 1.697238434029495e-06, "loss": 0.3098, "step": 37215 }, { "epoch": 3.7836518910126067, "grad_norm": 0.2708030641078949, "learning_rate": 1.6969720010447282e-06, "loss": 0.3401, "step": 37216 }, { "epoch": 3.7837535583570556, "grad_norm": 0.2717326581478119, "learning_rate": 1.6967055846998315e-06, "loss": 0.3161, "step": 37217 }, { "epoch": 3.7838552257015046, "grad_norm": 0.29538971185684204, "learning_rate": 1.696439184996146e-06, "loss": 0.3132, "step": 37218 }, { "epoch": 3.7839568930459535, "grad_norm": 0.2835516631603241, "learning_rate": 1.696172801935016e-06, "loss": 0.3036, "step": 37219 }, { "epoch": 3.7840585603904024, "grad_norm": 0.2777635157108307, "learning_rate": 1.6959064355177824e-06, "loss": 0.2849, "step": 37220 }, { "epoch": 3.7841602277348514, "grad_norm": 0.2720741927623749, "learning_rate": 1.6956400857457866e-06, "loss": 0.3201, "step": 37221 }, { "epoch": 3.7842618950793003, "grad_norm": 0.2835812270641327, "learning_rate": 1.6953737526203694e-06, "loss": 0.2875, "step": 37222 }, { "epoch": 3.7843635624237493, "grad_norm": 0.25836417078971863, "learning_rate": 1.695107436142876e-06, "loss": 0.3261, "step": 37223 }, { "epoch": 3.784465229768198, "grad_norm": 0.2858618497848511, "learning_rate": 1.6948411363146456e-06, "loss": 0.3061, "step": 37224 }, { "epoch": 3.784566897112647, "grad_norm": 0.26136183738708496, "learning_rate": 1.6945748531370188e-06, "loss": 0.3247, "step": 37225 }, { "epoch": 3.7846685644570965, "grad_norm": 0.27641165256500244, "learning_rate": 1.6943085866113395e-06, "loss": 0.3064, "step": 37226 }, { "epoch": 3.7847702318015455, "grad_norm": 0.26578906178474426, "learning_rate": 1.6940423367389486e-06, "loss": 0.3295, "step": 37227 }, { "epoch": 3.7848718991459944, "grad_norm": 0.2871968746185303, "learning_rate": 1.6937761035211847e-06, "loss": 0.3211, "step": 37228 }, { "epoch": 3.7849735664904434, "grad_norm": 0.27329736948013306, "learning_rate": 1.693509886959393e-06, "loss": 0.3331, "step": 37229 }, { "epoch": 3.7850752338348923, "grad_norm": 0.2918454110622406, "learning_rate": 1.6932436870549128e-06, "loss": 0.2967, "step": 37230 }, { "epoch": 3.7851769011793412, "grad_norm": 0.27628323435783386, "learning_rate": 1.6929775038090852e-06, "loss": 0.299, "step": 37231 }, { "epoch": 3.78527856852379, "grad_norm": 0.2602217495441437, "learning_rate": 1.692711337223249e-06, "loss": 0.2993, "step": 37232 }, { "epoch": 3.785380235868239, "grad_norm": 0.2504415214061737, "learning_rate": 1.6924451872987492e-06, "loss": 0.2951, "step": 37233 }, { "epoch": 3.785481903212688, "grad_norm": 0.28335145115852356, "learning_rate": 1.6921790540369242e-06, "loss": 0.2981, "step": 37234 }, { "epoch": 3.785583570557137, "grad_norm": 0.28030529618263245, "learning_rate": 1.6919129374391136e-06, "loss": 0.3134, "step": 37235 }, { "epoch": 3.785685237901586, "grad_norm": 0.27342715859413147, "learning_rate": 1.6916468375066625e-06, "loss": 0.3015, "step": 37236 }, { "epoch": 3.785786905246035, "grad_norm": 0.2909674048423767, "learning_rate": 1.6913807542409056e-06, "loss": 0.3153, "step": 37237 }, { "epoch": 3.785888572590484, "grad_norm": 0.27886393666267395, "learning_rate": 1.691114687643186e-06, "loss": 0.3298, "step": 37238 }, { "epoch": 3.7859902399349328, "grad_norm": 0.2867027521133423, "learning_rate": 1.690848637714846e-06, "loss": 0.3455, "step": 37239 }, { "epoch": 3.7860919072793817, "grad_norm": 0.28395673632621765, "learning_rate": 1.6905826044572237e-06, "loss": 0.2706, "step": 37240 }, { "epoch": 3.7861935746238307, "grad_norm": 0.28939908742904663, "learning_rate": 1.6903165878716605e-06, "loss": 0.3141, "step": 37241 }, { "epoch": 3.78629524196828, "grad_norm": 0.29568764567375183, "learning_rate": 1.690050587959494e-06, "loss": 0.3185, "step": 37242 }, { "epoch": 3.786396909312729, "grad_norm": 0.2824150323867798, "learning_rate": 1.6897846047220678e-06, "loss": 0.2962, "step": 37243 }, { "epoch": 3.786498576657178, "grad_norm": 0.2710159420967102, "learning_rate": 1.6895186381607198e-06, "loss": 0.2903, "step": 37244 }, { "epoch": 3.786600244001627, "grad_norm": 0.28163495659828186, "learning_rate": 1.689252688276789e-06, "loss": 0.279, "step": 37245 }, { "epoch": 3.786701911346076, "grad_norm": 0.2770146429538727, "learning_rate": 1.6889867550716194e-06, "loss": 0.307, "step": 37246 }, { "epoch": 3.7868035786905248, "grad_norm": 0.2776454985141754, "learning_rate": 1.6887208385465442e-06, "loss": 0.3046, "step": 37247 }, { "epoch": 3.7869052460349737, "grad_norm": 0.30864155292510986, "learning_rate": 1.688454938702907e-06, "loss": 0.2995, "step": 37248 }, { "epoch": 3.7870069133794226, "grad_norm": 0.27157220244407654, "learning_rate": 1.6881890555420483e-06, "loss": 0.308, "step": 37249 }, { "epoch": 3.7871085807238716, "grad_norm": 0.2673695683479309, "learning_rate": 1.687923189065306e-06, "loss": 0.3112, "step": 37250 }, { "epoch": 3.7872102480683205, "grad_norm": 0.27822884917259216, "learning_rate": 1.6876573392740197e-06, "loss": 0.2934, "step": 37251 }, { "epoch": 3.7873119154127695, "grad_norm": 0.2679363787174225, "learning_rate": 1.6873915061695273e-06, "loss": 0.3241, "step": 37252 }, { "epoch": 3.7874135827572184, "grad_norm": 0.29659414291381836, "learning_rate": 1.68712568975317e-06, "loss": 0.344, "step": 37253 }, { "epoch": 3.7875152501016673, "grad_norm": 0.27327412366867065, "learning_rate": 1.6868598900262873e-06, "loss": 0.331, "step": 37254 }, { "epoch": 3.7876169174461163, "grad_norm": 0.2684704661369324, "learning_rate": 1.6865941069902148e-06, "loss": 0.3407, "step": 37255 }, { "epoch": 3.7877185847905652, "grad_norm": 0.26928940415382385, "learning_rate": 1.6863283406462977e-06, "loss": 0.3122, "step": 37256 }, { "epoch": 3.787820252135014, "grad_norm": 0.25705036520957947, "learning_rate": 1.6860625909958673e-06, "loss": 0.3053, "step": 37257 }, { "epoch": 3.787921919479463, "grad_norm": 0.29339075088500977, "learning_rate": 1.685796858040266e-06, "loss": 0.3211, "step": 37258 }, { "epoch": 3.788023586823912, "grad_norm": 0.25149357318878174, "learning_rate": 1.6855311417808356e-06, "loss": 0.3092, "step": 37259 }, { "epoch": 3.788125254168361, "grad_norm": 0.2879132032394409, "learning_rate": 1.685265442218909e-06, "loss": 0.3092, "step": 37260 }, { "epoch": 3.78822692151281, "grad_norm": 0.2682775557041168, "learning_rate": 1.6849997593558286e-06, "loss": 0.3176, "step": 37261 }, { "epoch": 3.788328588857259, "grad_norm": 0.27113234996795654, "learning_rate": 1.6847340931929302e-06, "loss": 0.2865, "step": 37262 }, { "epoch": 3.788430256201708, "grad_norm": 0.288718044757843, "learning_rate": 1.684468443731555e-06, "loss": 0.3153, "step": 37263 }, { "epoch": 3.7885319235461568, "grad_norm": 0.25527679920196533, "learning_rate": 1.6842028109730403e-06, "loss": 0.3225, "step": 37264 }, { "epoch": 3.7886335908906057, "grad_norm": 0.2910873293876648, "learning_rate": 1.6839371949187221e-06, "loss": 0.3072, "step": 37265 }, { "epoch": 3.7887352582350546, "grad_norm": 0.275896281003952, "learning_rate": 1.6836715955699433e-06, "loss": 0.305, "step": 37266 }, { "epoch": 3.788836925579504, "grad_norm": 0.2582874596118927, "learning_rate": 1.683406012928036e-06, "loss": 0.3047, "step": 37267 }, { "epoch": 3.788938592923953, "grad_norm": 0.2868543863296509, "learning_rate": 1.6831404469943407e-06, "loss": 0.2823, "step": 37268 }, { "epoch": 3.789040260268402, "grad_norm": 0.27784937620162964, "learning_rate": 1.6828748977701991e-06, "loss": 0.2967, "step": 37269 }, { "epoch": 3.789141927612851, "grad_norm": 0.26767969131469727, "learning_rate": 1.6826093652569426e-06, "loss": 0.3295, "step": 37270 }, { "epoch": 3.7892435949573, "grad_norm": 0.2637333571910858, "learning_rate": 1.682343849455913e-06, "loss": 0.292, "step": 37271 }, { "epoch": 3.7893452623017487, "grad_norm": 0.26660650968551636, "learning_rate": 1.6820783503684451e-06, "loss": 0.361, "step": 37272 }, { "epoch": 3.7894469296461977, "grad_norm": 0.2861107885837555, "learning_rate": 1.6818128679958811e-06, "loss": 0.2926, "step": 37273 }, { "epoch": 3.7895485969906466, "grad_norm": 0.2859483063220978, "learning_rate": 1.6815474023395523e-06, "loss": 0.3268, "step": 37274 }, { "epoch": 3.7896502643350956, "grad_norm": 0.25915947556495667, "learning_rate": 1.681281953400799e-06, "loss": 0.3002, "step": 37275 }, { "epoch": 3.7897519316795445, "grad_norm": 0.27636227011680603, "learning_rate": 1.681016521180962e-06, "loss": 0.3137, "step": 37276 }, { "epoch": 3.7898535990239934, "grad_norm": 0.27221354842185974, "learning_rate": 1.680751105681372e-06, "loss": 0.3197, "step": 37277 }, { "epoch": 3.7899552663684424, "grad_norm": 0.2644888460636139, "learning_rate": 1.6804857069033702e-06, "loss": 0.3009, "step": 37278 }, { "epoch": 3.7900569337128913, "grad_norm": 0.29353347420692444, "learning_rate": 1.680220324848293e-06, "loss": 0.3032, "step": 37279 }, { "epoch": 3.7901586010573403, "grad_norm": 0.2722032070159912, "learning_rate": 1.6799549595174753e-06, "loss": 0.2997, "step": 37280 }, { "epoch": 3.790260268401789, "grad_norm": 0.2577393352985382, "learning_rate": 1.6796896109122568e-06, "loss": 0.2864, "step": 37281 }, { "epoch": 3.790361935746238, "grad_norm": 0.25380367040634155, "learning_rate": 1.6794242790339733e-06, "loss": 0.3087, "step": 37282 }, { "epoch": 3.7904636030906875, "grad_norm": 0.28478971123695374, "learning_rate": 1.679158963883961e-06, "loss": 0.2981, "step": 37283 }, { "epoch": 3.7905652704351365, "grad_norm": 0.25866931676864624, "learning_rate": 1.6788936654635552e-06, "loss": 0.2572, "step": 37284 }, { "epoch": 3.7906669377795854, "grad_norm": 0.2896205186843872, "learning_rate": 1.6786283837740941e-06, "loss": 0.3266, "step": 37285 }, { "epoch": 3.7907686051240344, "grad_norm": 0.28737369179725647, "learning_rate": 1.6783631188169168e-06, "loss": 0.3365, "step": 37286 }, { "epoch": 3.7908702724684833, "grad_norm": 0.2528115510940552, "learning_rate": 1.6780978705933538e-06, "loss": 0.3052, "step": 37287 }, { "epoch": 3.7909719398129322, "grad_norm": 0.27797865867614746, "learning_rate": 1.6778326391047456e-06, "loss": 0.3207, "step": 37288 }, { "epoch": 3.791073607157381, "grad_norm": 0.2728256285190582, "learning_rate": 1.6775674243524276e-06, "loss": 0.3118, "step": 37289 }, { "epoch": 3.79117527450183, "grad_norm": 0.2981513440608978, "learning_rate": 1.677302226337733e-06, "loss": 0.3295, "step": 37290 }, { "epoch": 3.791276941846279, "grad_norm": 0.26865214109420776, "learning_rate": 1.6770370450620026e-06, "loss": 0.3008, "step": 37291 }, { "epoch": 3.791378609190728, "grad_norm": 0.2759664058685303, "learning_rate": 1.6767718805265697e-06, "loss": 0.2845, "step": 37292 }, { "epoch": 3.791480276535177, "grad_norm": 0.29516106843948364, "learning_rate": 1.67650673273277e-06, "loss": 0.3066, "step": 37293 }, { "epoch": 3.791581943879626, "grad_norm": 0.2709622085094452, "learning_rate": 1.6762416016819384e-06, "loss": 0.3071, "step": 37294 }, { "epoch": 3.791683611224075, "grad_norm": 0.26674285531044006, "learning_rate": 1.6759764873754131e-06, "loss": 0.3059, "step": 37295 }, { "epoch": 3.7917852785685238, "grad_norm": 0.2771189510822296, "learning_rate": 1.6757113898145283e-06, "loss": 0.3056, "step": 37296 }, { "epoch": 3.7918869459129727, "grad_norm": 0.28172361850738525, "learning_rate": 1.6754463090006184e-06, "loss": 0.287, "step": 37297 }, { "epoch": 3.7919886132574216, "grad_norm": 0.28795167803764343, "learning_rate": 1.6751812449350208e-06, "loss": 0.3345, "step": 37298 }, { "epoch": 3.7920902806018706, "grad_norm": 0.2710367739200592, "learning_rate": 1.6749161976190708e-06, "loss": 0.3075, "step": 37299 }, { "epoch": 3.7921919479463195, "grad_norm": 0.2828757166862488, "learning_rate": 1.6746511670541004e-06, "loss": 0.319, "step": 37300 }, { "epoch": 3.7922936152907685, "grad_norm": 0.28686368465423584, "learning_rate": 1.6743861532414496e-06, "loss": 0.3109, "step": 37301 }, { "epoch": 3.7923952826352174, "grad_norm": 0.26750648021698, "learning_rate": 1.6741211561824505e-06, "loss": 0.3191, "step": 37302 }, { "epoch": 3.7924969499796664, "grad_norm": 0.2766762971878052, "learning_rate": 1.6738561758784394e-06, "loss": 0.3194, "step": 37303 }, { "epoch": 3.7925986173241153, "grad_norm": 0.2586953043937683, "learning_rate": 1.673591212330748e-06, "loss": 0.3042, "step": 37304 }, { "epoch": 3.7927002846685642, "grad_norm": 0.2794444262981415, "learning_rate": 1.6733262655407156e-06, "loss": 0.3128, "step": 37305 }, { "epoch": 3.792801952013013, "grad_norm": 0.274116575717926, "learning_rate": 1.6730613355096747e-06, "loss": 0.2937, "step": 37306 }, { "epoch": 3.792903619357462, "grad_norm": 0.2645489275455475, "learning_rate": 1.6727964222389593e-06, "loss": 0.2986, "step": 37307 }, { "epoch": 3.7930052867019115, "grad_norm": 0.26136884093284607, "learning_rate": 1.6725315257299056e-06, "loss": 0.303, "step": 37308 }, { "epoch": 3.7931069540463604, "grad_norm": 0.2786875069141388, "learning_rate": 1.6722666459838477e-06, "loss": 0.2867, "step": 37309 }, { "epoch": 3.7932086213908094, "grad_norm": 0.2638227641582489, "learning_rate": 1.6720017830021178e-06, "loss": 0.3262, "step": 37310 }, { "epoch": 3.7933102887352583, "grad_norm": 0.2839864492416382, "learning_rate": 1.6717369367860542e-06, "loss": 0.312, "step": 37311 }, { "epoch": 3.7934119560797073, "grad_norm": 0.28100141882896423, "learning_rate": 1.6714721073369888e-06, "loss": 0.3411, "step": 37312 }, { "epoch": 3.793513623424156, "grad_norm": 0.28658923506736755, "learning_rate": 1.6712072946562557e-06, "loss": 0.3087, "step": 37313 }, { "epoch": 3.793615290768605, "grad_norm": 0.2831563353538513, "learning_rate": 1.6709424987451878e-06, "loss": 0.3085, "step": 37314 }, { "epoch": 3.793716958113054, "grad_norm": 0.27368342876434326, "learning_rate": 1.6706777196051222e-06, "loss": 0.2827, "step": 37315 }, { "epoch": 3.793818625457503, "grad_norm": 0.2635936439037323, "learning_rate": 1.6704129572373905e-06, "loss": 0.2919, "step": 37316 }, { "epoch": 3.793920292801952, "grad_norm": 0.28510981798171997, "learning_rate": 1.6701482116433266e-06, "loss": 0.2887, "step": 37317 }, { "epoch": 3.794021960146401, "grad_norm": 0.27350351214408875, "learning_rate": 1.6698834828242654e-06, "loss": 0.3023, "step": 37318 }, { "epoch": 3.79412362749085, "grad_norm": 0.2674453854560852, "learning_rate": 1.6696187707815397e-06, "loss": 0.3121, "step": 37319 }, { "epoch": 3.794225294835299, "grad_norm": 0.2743951976299286, "learning_rate": 1.6693540755164822e-06, "loss": 0.3179, "step": 37320 }, { "epoch": 3.7943269621797477, "grad_norm": 0.28171196579933167, "learning_rate": 1.669089397030429e-06, "loss": 0.3289, "step": 37321 }, { "epoch": 3.7944286295241967, "grad_norm": 0.26794472336769104, "learning_rate": 1.6688247353247116e-06, "loss": 0.3074, "step": 37322 }, { "epoch": 3.7945302968686456, "grad_norm": 0.28017958998680115, "learning_rate": 1.6685600904006638e-06, "loss": 0.3242, "step": 37323 }, { "epoch": 3.794631964213095, "grad_norm": 0.27831244468688965, "learning_rate": 1.6682954622596165e-06, "loss": 0.3027, "step": 37324 }, { "epoch": 3.794733631557544, "grad_norm": 0.26195085048675537, "learning_rate": 1.668030850902907e-06, "loss": 0.3205, "step": 37325 }, { "epoch": 3.794835298901993, "grad_norm": 0.2842785120010376, "learning_rate": 1.6677662563318657e-06, "loss": 0.3133, "step": 37326 }, { "epoch": 3.794936966246442, "grad_norm": 0.2791615426540375, "learning_rate": 1.6675016785478254e-06, "loss": 0.3098, "step": 37327 }, { "epoch": 3.7950386335908908, "grad_norm": 0.28235334157943726, "learning_rate": 1.6672371175521206e-06, "loss": 0.3592, "step": 37328 }, { "epoch": 3.7951403009353397, "grad_norm": 0.26866328716278076, "learning_rate": 1.6669725733460835e-06, "loss": 0.3159, "step": 37329 }, { "epoch": 3.7952419682797887, "grad_norm": 0.29164737462997437, "learning_rate": 1.6667080459310452e-06, "loss": 0.279, "step": 37330 }, { "epoch": 3.7953436356242376, "grad_norm": 0.26448172330856323, "learning_rate": 1.6664435353083408e-06, "loss": 0.3042, "step": 37331 }, { "epoch": 3.7954453029686865, "grad_norm": 0.2621088922023773, "learning_rate": 1.6661790414793022e-06, "loss": 0.3106, "step": 37332 }, { "epoch": 3.7955469703131355, "grad_norm": 0.27332672476768494, "learning_rate": 1.6659145644452608e-06, "loss": 0.3092, "step": 37333 }, { "epoch": 3.7956486376575844, "grad_norm": 0.2568727731704712, "learning_rate": 1.665650104207548e-06, "loss": 0.3062, "step": 37334 }, { "epoch": 3.7957503050020334, "grad_norm": 0.2971149981021881, "learning_rate": 1.6653856607674996e-06, "loss": 0.3167, "step": 37335 }, { "epoch": 3.7958519723464823, "grad_norm": 0.2863031327724457, "learning_rate": 1.6651212341264456e-06, "loss": 0.2875, "step": 37336 }, { "epoch": 3.7959536396909312, "grad_norm": 0.29476749897003174, "learning_rate": 1.6648568242857165e-06, "loss": 0.3115, "step": 37337 }, { "epoch": 3.79605530703538, "grad_norm": 0.274629682302475, "learning_rate": 1.6645924312466482e-06, "loss": 0.3111, "step": 37338 }, { "epoch": 3.796156974379829, "grad_norm": 0.2780078649520874, "learning_rate": 1.66432805501057e-06, "loss": 0.3131, "step": 37339 }, { "epoch": 3.796258641724278, "grad_norm": 0.28249481320381165, "learning_rate": 1.6640636955788132e-06, "loss": 0.3369, "step": 37340 }, { "epoch": 3.796360309068727, "grad_norm": 0.28934338688850403, "learning_rate": 1.6637993529527124e-06, "loss": 0.3157, "step": 37341 }, { "epoch": 3.796461976413176, "grad_norm": 0.2933271527290344, "learning_rate": 1.6635350271335976e-06, "loss": 0.3385, "step": 37342 }, { "epoch": 3.796563643757625, "grad_norm": 0.27839481830596924, "learning_rate": 1.6632707181228008e-06, "loss": 0.3225, "step": 37343 }, { "epoch": 3.796665311102074, "grad_norm": 0.298019677400589, "learning_rate": 1.6630064259216512e-06, "loss": 0.2866, "step": 37344 }, { "epoch": 3.7967669784465228, "grad_norm": 0.27370867133140564, "learning_rate": 1.662742150531484e-06, "loss": 0.2988, "step": 37345 }, { "epoch": 3.7968686457909717, "grad_norm": 0.25737443566322327, "learning_rate": 1.6624778919536284e-06, "loss": 0.2756, "step": 37346 }, { "epoch": 3.7969703131354207, "grad_norm": 0.2800602614879608, "learning_rate": 1.662213650189415e-06, "loss": 0.2845, "step": 37347 }, { "epoch": 3.7970719804798696, "grad_norm": 0.26681581139564514, "learning_rate": 1.6619494252401768e-06, "loss": 0.2931, "step": 37348 }, { "epoch": 3.797173647824319, "grad_norm": 0.2880530059337616, "learning_rate": 1.6616852171072451e-06, "loss": 0.3023, "step": 37349 }, { "epoch": 3.797275315168768, "grad_norm": 0.25373440980911255, "learning_rate": 1.6614210257919477e-06, "loss": 0.2785, "step": 37350 }, { "epoch": 3.797376982513217, "grad_norm": 0.25852710008621216, "learning_rate": 1.66115685129562e-06, "loss": 0.3007, "step": 37351 }, { "epoch": 3.797478649857666, "grad_norm": 0.2713800370693207, "learning_rate": 1.6608926936195901e-06, "loss": 0.309, "step": 37352 }, { "epoch": 3.7975803172021148, "grad_norm": 0.2685990035533905, "learning_rate": 1.66062855276519e-06, "loss": 0.3219, "step": 37353 }, { "epoch": 3.7976819845465637, "grad_norm": 0.26405197381973267, "learning_rate": 1.6603644287337473e-06, "loss": 0.308, "step": 37354 }, { "epoch": 3.7977836518910126, "grad_norm": 0.25650715827941895, "learning_rate": 1.660100321526597e-06, "loss": 0.2879, "step": 37355 }, { "epoch": 3.7978853192354616, "grad_norm": 0.26988688111305237, "learning_rate": 1.6598362311450677e-06, "loss": 0.3284, "step": 37356 }, { "epoch": 3.7979869865799105, "grad_norm": 0.25845423340797424, "learning_rate": 1.6595721575904883e-06, "loss": 0.306, "step": 37357 }, { "epoch": 3.7980886539243595, "grad_norm": 0.2708730101585388, "learning_rate": 1.6593081008641914e-06, "loss": 0.327, "step": 37358 }, { "epoch": 3.7981903212688084, "grad_norm": 0.27698561549186707, "learning_rate": 1.6590440609675074e-06, "loss": 0.3332, "step": 37359 }, { "epoch": 3.7982919886132573, "grad_norm": 0.2587730586528778, "learning_rate": 1.6587800379017631e-06, "loss": 0.3057, "step": 37360 }, { "epoch": 3.7983936559577063, "grad_norm": 0.2865139842033386, "learning_rate": 1.658516031668293e-06, "loss": 0.3181, "step": 37361 }, { "epoch": 3.7984953233021552, "grad_norm": 0.26511815190315247, "learning_rate": 1.6582520422684245e-06, "loss": 0.2954, "step": 37362 }, { "epoch": 3.798596990646604, "grad_norm": 0.2904304563999176, "learning_rate": 1.6579880697034889e-06, "loss": 0.338, "step": 37363 }, { "epoch": 3.7986986579910536, "grad_norm": 0.26471155881881714, "learning_rate": 1.6577241139748129e-06, "loss": 0.3067, "step": 37364 }, { "epoch": 3.7988003253355025, "grad_norm": 0.2699001431465149, "learning_rate": 1.6574601750837305e-06, "loss": 0.3277, "step": 37365 }, { "epoch": 3.7989019926799514, "grad_norm": 0.2662356495857239, "learning_rate": 1.6571962530315694e-06, "loss": 0.3113, "step": 37366 }, { "epoch": 3.7990036600244004, "grad_norm": 0.27833688259124756, "learning_rate": 1.6569323478196576e-06, "loss": 0.2972, "step": 37367 }, { "epoch": 3.7991053273688493, "grad_norm": 0.2949424386024475, "learning_rate": 1.656668459449328e-06, "loss": 0.3036, "step": 37368 }, { "epoch": 3.7992069947132983, "grad_norm": 0.2661893367767334, "learning_rate": 1.656404587921908e-06, "loss": 0.2949, "step": 37369 }, { "epoch": 3.799308662057747, "grad_norm": 0.2816371023654938, "learning_rate": 1.6561407332387252e-06, "loss": 0.3155, "step": 37370 }, { "epoch": 3.799410329402196, "grad_norm": 0.2772766649723053, "learning_rate": 1.655876895401113e-06, "loss": 0.297, "step": 37371 }, { "epoch": 3.799511996746645, "grad_norm": 0.2789324223995209, "learning_rate": 1.6556130744103981e-06, "loss": 0.3003, "step": 37372 }, { "epoch": 3.799613664091094, "grad_norm": 0.2419915497303009, "learning_rate": 1.6553492702679098e-06, "loss": 0.2766, "step": 37373 }, { "epoch": 3.799715331435543, "grad_norm": 0.2886626124382019, "learning_rate": 1.6550854829749753e-06, "loss": 0.3321, "step": 37374 }, { "epoch": 3.799816998779992, "grad_norm": 0.2720024585723877, "learning_rate": 1.6548217125329269e-06, "loss": 0.317, "step": 37375 }, { "epoch": 3.799918666124441, "grad_norm": 0.29754358530044556, "learning_rate": 1.6545579589430915e-06, "loss": 0.3283, "step": 37376 }, { "epoch": 3.80002033346889, "grad_norm": 0.28117331862449646, "learning_rate": 1.654294222206797e-06, "loss": 0.3134, "step": 37377 }, { "epoch": 3.8001220008133387, "grad_norm": 0.2752055525779724, "learning_rate": 1.6540305023253744e-06, "loss": 0.3202, "step": 37378 }, { "epoch": 3.8002236681577877, "grad_norm": 0.25025683641433716, "learning_rate": 1.6537667993001515e-06, "loss": 0.285, "step": 37379 }, { "epoch": 3.8003253355022366, "grad_norm": 0.2687215805053711, "learning_rate": 1.6535031131324542e-06, "loss": 0.3167, "step": 37380 }, { "epoch": 3.8004270028466856, "grad_norm": 0.28074657917022705, "learning_rate": 1.653239443823615e-06, "loss": 0.2997, "step": 37381 }, { "epoch": 3.8005286701911345, "grad_norm": 0.2655026912689209, "learning_rate": 1.6529757913749595e-06, "loss": 0.3301, "step": 37382 }, { "epoch": 3.8006303375355834, "grad_norm": 0.2594855725765228, "learning_rate": 1.6527121557878173e-06, "loss": 0.296, "step": 37383 }, { "epoch": 3.8007320048800324, "grad_norm": 0.2739331126213074, "learning_rate": 1.6524485370635135e-06, "loss": 0.3426, "step": 37384 }, { "epoch": 3.8008336722244813, "grad_norm": 0.2677363157272339, "learning_rate": 1.6521849352033808e-06, "loss": 0.3009, "step": 37385 }, { "epoch": 3.8009353395689303, "grad_norm": 0.2870669662952423, "learning_rate": 1.6519213502087445e-06, "loss": 0.3287, "step": 37386 }, { "epoch": 3.801037006913379, "grad_norm": 0.27156656980514526, "learning_rate": 1.651657782080931e-06, "loss": 0.2707, "step": 37387 }, { "epoch": 3.801138674257828, "grad_norm": 0.24813945591449738, "learning_rate": 1.6513942308212732e-06, "loss": 0.3137, "step": 37388 }, { "epoch": 3.801240341602277, "grad_norm": 0.2510735094547272, "learning_rate": 1.6511306964310925e-06, "loss": 0.3122, "step": 37389 }, { "epoch": 3.8013420089467265, "grad_norm": 0.2658221423625946, "learning_rate": 1.6508671789117193e-06, "loss": 0.268, "step": 37390 }, { "epoch": 3.8014436762911754, "grad_norm": 0.26717016100883484, "learning_rate": 1.6506036782644836e-06, "loss": 0.3269, "step": 37391 }, { "epoch": 3.8015453436356244, "grad_norm": 0.2757902443408966, "learning_rate": 1.65034019449071e-06, "loss": 0.311, "step": 37392 }, { "epoch": 3.8016470109800733, "grad_norm": 0.285979688167572, "learning_rate": 1.6500767275917267e-06, "loss": 0.3453, "step": 37393 }, { "epoch": 3.8017486783245222, "grad_norm": 0.2726108133792877, "learning_rate": 1.6498132775688591e-06, "loss": 0.318, "step": 37394 }, { "epoch": 3.801850345668971, "grad_norm": 0.2732376754283905, "learning_rate": 1.6495498444234375e-06, "loss": 0.3165, "step": 37395 }, { "epoch": 3.80195201301342, "grad_norm": 0.27464401721954346, "learning_rate": 1.6492864281567878e-06, "loss": 0.3184, "step": 37396 }, { "epoch": 3.802053680357869, "grad_norm": 0.2906914949417114, "learning_rate": 1.6490230287702353e-06, "loss": 0.3133, "step": 37397 }, { "epoch": 3.802155347702318, "grad_norm": 0.2797040045261383, "learning_rate": 1.6487596462651117e-06, "loss": 0.3057, "step": 37398 }, { "epoch": 3.802257015046767, "grad_norm": 0.2757810950279236, "learning_rate": 1.6484962806427373e-06, "loss": 0.3128, "step": 37399 }, { "epoch": 3.802358682391216, "grad_norm": 0.2693825960159302, "learning_rate": 1.6482329319044422e-06, "loss": 0.2745, "step": 37400 }, { "epoch": 3.802460349735665, "grad_norm": 0.2930062413215637, "learning_rate": 1.6479696000515549e-06, "loss": 0.3667, "step": 37401 }, { "epoch": 3.8025620170801138, "grad_norm": 0.28032609820365906, "learning_rate": 1.6477062850853997e-06, "loss": 0.3091, "step": 37402 }, { "epoch": 3.8026636844245627, "grad_norm": 0.26462066173553467, "learning_rate": 1.6474429870073045e-06, "loss": 0.2956, "step": 37403 }, { "epoch": 3.8027653517690116, "grad_norm": 0.28090688586235046, "learning_rate": 1.6471797058185924e-06, "loss": 0.265, "step": 37404 }, { "epoch": 3.802867019113461, "grad_norm": 0.26917147636413574, "learning_rate": 1.646916441520594e-06, "loss": 0.3016, "step": 37405 }, { "epoch": 3.80296868645791, "grad_norm": 0.2762310206890106, "learning_rate": 1.6466531941146335e-06, "loss": 0.3405, "step": 37406 }, { "epoch": 3.803070353802359, "grad_norm": 0.2890928089618683, "learning_rate": 1.6463899636020364e-06, "loss": 0.2869, "step": 37407 }, { "epoch": 3.803172021146808, "grad_norm": 0.2678512930870056, "learning_rate": 1.6461267499841326e-06, "loss": 0.2996, "step": 37408 }, { "epoch": 3.803273688491257, "grad_norm": 0.2677428424358368, "learning_rate": 1.6458635532622418e-06, "loss": 0.35, "step": 37409 }, { "epoch": 3.8033753558357057, "grad_norm": 0.2762242257595062, "learning_rate": 1.6456003734376934e-06, "loss": 0.3171, "step": 37410 }, { "epoch": 3.8034770231801547, "grad_norm": 0.2859257161617279, "learning_rate": 1.6453372105118171e-06, "loss": 0.332, "step": 37411 }, { "epoch": 3.8035786905246036, "grad_norm": 0.28828415274620056, "learning_rate": 1.6450740644859308e-06, "loss": 0.3528, "step": 37412 }, { "epoch": 3.8036803578690526, "grad_norm": 0.29120922088623047, "learning_rate": 1.6448109353613655e-06, "loss": 0.3286, "step": 37413 }, { "epoch": 3.8037820252135015, "grad_norm": 0.28412389755249023, "learning_rate": 1.6445478231394445e-06, "loss": 0.2914, "step": 37414 }, { "epoch": 3.8038836925579504, "grad_norm": 0.2726934254169464, "learning_rate": 1.6442847278214952e-06, "loss": 0.3247, "step": 37415 }, { "epoch": 3.8039853599023994, "grad_norm": 0.2879593074321747, "learning_rate": 1.6440216494088424e-06, "loss": 0.2901, "step": 37416 }, { "epoch": 3.8040870272468483, "grad_norm": 0.2625891864299774, "learning_rate": 1.6437585879028095e-06, "loss": 0.3403, "step": 37417 }, { "epoch": 3.8041886945912973, "grad_norm": 0.2797689735889435, "learning_rate": 1.643495543304726e-06, "loss": 0.307, "step": 37418 }, { "epoch": 3.804290361935746, "grad_norm": 0.28633496165275574, "learning_rate": 1.6432325156159112e-06, "loss": 0.3597, "step": 37419 }, { "epoch": 3.804392029280195, "grad_norm": 0.28108301758766174, "learning_rate": 1.6429695048376937e-06, "loss": 0.313, "step": 37420 }, { "epoch": 3.804493696624644, "grad_norm": 0.2652457654476166, "learning_rate": 1.6427065109714014e-06, "loss": 0.3503, "step": 37421 }, { "epoch": 3.804595363969093, "grad_norm": 0.27489277720451355, "learning_rate": 1.642443534018352e-06, "loss": 0.2907, "step": 37422 }, { "epoch": 3.804697031313542, "grad_norm": 0.28459492325782776, "learning_rate": 1.6421805739798764e-06, "loss": 0.2797, "step": 37423 }, { "epoch": 3.804798698657991, "grad_norm": 0.26833376288414, "learning_rate": 1.6419176308572966e-06, "loss": 0.317, "step": 37424 }, { "epoch": 3.80490036600244, "grad_norm": 0.30167385935783386, "learning_rate": 1.6416547046519366e-06, "loss": 0.3143, "step": 37425 }, { "epoch": 3.805002033346889, "grad_norm": 0.272333025932312, "learning_rate": 1.6413917953651238e-06, "loss": 0.2966, "step": 37426 }, { "epoch": 3.8051037006913377, "grad_norm": 0.27812376618385315, "learning_rate": 1.6411289029981793e-06, "loss": 0.3144, "step": 37427 }, { "epoch": 3.8052053680357867, "grad_norm": 0.2808164954185486, "learning_rate": 1.6408660275524319e-06, "loss": 0.3049, "step": 37428 }, { "epoch": 3.8053070353802356, "grad_norm": 0.2795425355434418, "learning_rate": 1.6406031690292002e-06, "loss": 0.3485, "step": 37429 }, { "epoch": 3.8054087027246846, "grad_norm": 0.26056763529777527, "learning_rate": 1.6403403274298119e-06, "loss": 0.3137, "step": 37430 }, { "epoch": 3.805510370069134, "grad_norm": 0.31122884154319763, "learning_rate": 1.6400775027555937e-06, "loss": 0.3437, "step": 37431 }, { "epoch": 3.805612037413583, "grad_norm": 0.28192275762557983, "learning_rate": 1.639814695007863e-06, "loss": 0.2987, "step": 37432 }, { "epoch": 3.805713704758032, "grad_norm": 0.2936387062072754, "learning_rate": 1.6395519041879494e-06, "loss": 0.288, "step": 37433 }, { "epoch": 3.8058153721024808, "grad_norm": 0.3005264699459076, "learning_rate": 1.6392891302971742e-06, "loss": 0.3442, "step": 37434 }, { "epoch": 3.8059170394469297, "grad_norm": 0.27224454283714294, "learning_rate": 1.6390263733368605e-06, "loss": 0.3401, "step": 37435 }, { "epoch": 3.8060187067913787, "grad_norm": 0.27839091420173645, "learning_rate": 1.638763633308335e-06, "loss": 0.302, "step": 37436 }, { "epoch": 3.8061203741358276, "grad_norm": 0.28263846039772034, "learning_rate": 1.6385009102129178e-06, "loss": 0.3069, "step": 37437 }, { "epoch": 3.8062220414802765, "grad_norm": 0.28521743416786194, "learning_rate": 1.6382382040519374e-06, "loss": 0.3068, "step": 37438 }, { "epoch": 3.8063237088247255, "grad_norm": 0.2638581693172455, "learning_rate": 1.6379755148267107e-06, "loss": 0.3053, "step": 37439 }, { "epoch": 3.8064253761691744, "grad_norm": 0.27145007252693176, "learning_rate": 1.6377128425385647e-06, "loss": 0.3076, "step": 37440 }, { "epoch": 3.8065270435136234, "grad_norm": 0.2796473205089569, "learning_rate": 1.6374501871888255e-06, "loss": 0.3352, "step": 37441 }, { "epoch": 3.8066287108580723, "grad_norm": 0.29861581325531006, "learning_rate": 1.6371875487788103e-06, "loss": 0.3034, "step": 37442 }, { "epoch": 3.8067303782025212, "grad_norm": 0.2803226709365845, "learning_rate": 1.6369249273098458e-06, "loss": 0.332, "step": 37443 }, { "epoch": 3.80683204554697, "grad_norm": 0.2807609736919403, "learning_rate": 1.636662322783255e-06, "loss": 0.2627, "step": 37444 }, { "epoch": 3.806933712891419, "grad_norm": 0.27526822686195374, "learning_rate": 1.6363997352003586e-06, "loss": 0.3233, "step": 37445 }, { "epoch": 3.8070353802358685, "grad_norm": 0.26051872968673706, "learning_rate": 1.6361371645624824e-06, "loss": 0.3199, "step": 37446 }, { "epoch": 3.8071370475803175, "grad_norm": 0.30831101536750793, "learning_rate": 1.6358746108709477e-06, "loss": 0.336, "step": 37447 }, { "epoch": 3.8072387149247664, "grad_norm": 0.28167805075645447, "learning_rate": 1.6356120741270776e-06, "loss": 0.3227, "step": 37448 }, { "epoch": 3.8073403822692153, "grad_norm": 0.2852312922477722, "learning_rate": 1.635349554332193e-06, "loss": 0.2967, "step": 37449 }, { "epoch": 3.8074420496136643, "grad_norm": 0.281608909368515, "learning_rate": 1.6350870514876171e-06, "loss": 0.2978, "step": 37450 }, { "epoch": 3.8075437169581132, "grad_norm": 0.28601816296577454, "learning_rate": 1.6348245655946765e-06, "loss": 0.3174, "step": 37451 }, { "epoch": 3.807645384302562, "grad_norm": 0.2775880694389343, "learning_rate": 1.6345620966546865e-06, "loss": 0.3244, "step": 37452 }, { "epoch": 3.807747051647011, "grad_norm": 0.3120155930519104, "learning_rate": 1.6342996446689751e-06, "loss": 0.2918, "step": 37453 }, { "epoch": 3.80784871899146, "grad_norm": 0.27813655138015747, "learning_rate": 1.6340372096388618e-06, "loss": 0.2938, "step": 37454 }, { "epoch": 3.807950386335909, "grad_norm": 0.289337158203125, "learning_rate": 1.6337747915656681e-06, "loss": 0.269, "step": 37455 }, { "epoch": 3.808052053680358, "grad_norm": 0.26685968041419983, "learning_rate": 1.6335123904507183e-06, "loss": 0.2979, "step": 37456 }, { "epoch": 3.808153721024807, "grad_norm": 0.2870977818965912, "learning_rate": 1.6332500062953333e-06, "loss": 0.2792, "step": 37457 }, { "epoch": 3.808255388369256, "grad_norm": 0.27152949571609497, "learning_rate": 1.6329876391008342e-06, "loss": 0.3197, "step": 37458 }, { "epoch": 3.8083570557137048, "grad_norm": 0.2844351828098297, "learning_rate": 1.632725288868542e-06, "loss": 0.3347, "step": 37459 }, { "epoch": 3.8084587230581537, "grad_norm": 0.25024041533470154, "learning_rate": 1.632462955599781e-06, "loss": 0.2966, "step": 37460 }, { "epoch": 3.8085603904026026, "grad_norm": 0.2687787115573883, "learning_rate": 1.6322006392958717e-06, "loss": 0.2868, "step": 37461 }, { "epoch": 3.8086620577470516, "grad_norm": 0.3057146966457367, "learning_rate": 1.6319383399581334e-06, "loss": 0.2834, "step": 37462 }, { "epoch": 3.8087637250915005, "grad_norm": 0.29252415895462036, "learning_rate": 1.6316760575878915e-06, "loss": 0.2842, "step": 37463 }, { "epoch": 3.8088653924359495, "grad_norm": 0.27822232246398926, "learning_rate": 1.6314137921864648e-06, "loss": 0.3278, "step": 37464 }, { "epoch": 3.8089670597803984, "grad_norm": 0.2868397533893585, "learning_rate": 1.631151543755175e-06, "loss": 0.3416, "step": 37465 }, { "epoch": 3.8090687271248473, "grad_norm": 0.3098528981208801, "learning_rate": 1.6308893122953412e-06, "loss": 0.3214, "step": 37466 }, { "epoch": 3.8091703944692963, "grad_norm": 0.2698661983013153, "learning_rate": 1.6306270978082882e-06, "loss": 0.3182, "step": 37467 }, { "epoch": 3.8092720618137452, "grad_norm": 0.2636023759841919, "learning_rate": 1.6303649002953354e-06, "loss": 0.3002, "step": 37468 }, { "epoch": 3.809373729158194, "grad_norm": 0.26400226354599, "learning_rate": 1.6301027197578017e-06, "loss": 0.3026, "step": 37469 }, { "epoch": 3.809475396502643, "grad_norm": 0.27645593881607056, "learning_rate": 1.6298405561970116e-06, "loss": 0.2786, "step": 37470 }, { "epoch": 3.809577063847092, "grad_norm": 0.28913843631744385, "learning_rate": 1.6295784096142835e-06, "loss": 0.3199, "step": 37471 }, { "epoch": 3.8096787311915414, "grad_norm": 0.27085965871810913, "learning_rate": 1.6293162800109369e-06, "loss": 0.3039, "step": 37472 }, { "epoch": 3.8097803985359904, "grad_norm": 0.2750625014305115, "learning_rate": 1.6290541673882959e-06, "loss": 0.3028, "step": 37473 }, { "epoch": 3.8098820658804393, "grad_norm": 0.2597760856151581, "learning_rate": 1.6287920717476784e-06, "loss": 0.2995, "step": 37474 }, { "epoch": 3.8099837332248883, "grad_norm": 0.27408963441848755, "learning_rate": 1.6285299930904057e-06, "loss": 0.3031, "step": 37475 }, { "epoch": 3.810085400569337, "grad_norm": 0.26548728346824646, "learning_rate": 1.6282679314177958e-06, "loss": 0.3084, "step": 37476 }, { "epoch": 3.810187067913786, "grad_norm": 0.27948078513145447, "learning_rate": 1.6280058867311727e-06, "loss": 0.3118, "step": 37477 }, { "epoch": 3.810288735258235, "grad_norm": 0.2808125615119934, "learning_rate": 1.6277438590318544e-06, "loss": 0.3252, "step": 37478 }, { "epoch": 3.810390402602684, "grad_norm": 0.274059534072876, "learning_rate": 1.6274818483211596e-06, "loss": 0.3581, "step": 37479 }, { "epoch": 3.810492069947133, "grad_norm": 0.2491324245929718, "learning_rate": 1.6272198546004113e-06, "loss": 0.3283, "step": 37480 }, { "epoch": 3.810593737291582, "grad_norm": 0.2962545156478882, "learning_rate": 1.6269578778709282e-06, "loss": 0.3237, "step": 37481 }, { "epoch": 3.810695404636031, "grad_norm": 0.2851859927177429, "learning_rate": 1.626695918134028e-06, "loss": 0.2971, "step": 37482 }, { "epoch": 3.81079707198048, "grad_norm": 0.2708197236061096, "learning_rate": 1.6264339753910341e-06, "loss": 0.3214, "step": 37483 }, { "epoch": 3.8108987393249287, "grad_norm": 0.267630398273468, "learning_rate": 1.6261720496432636e-06, "loss": 0.293, "step": 37484 }, { "epoch": 3.8110004066693777, "grad_norm": 0.2789147198200226, "learning_rate": 1.625910140892037e-06, "loss": 0.2956, "step": 37485 }, { "epoch": 3.8111020740138266, "grad_norm": 0.2894437313079834, "learning_rate": 1.625648249138671e-06, "loss": 0.3181, "step": 37486 }, { "epoch": 3.811203741358276, "grad_norm": 0.2743288576602936, "learning_rate": 1.6253863743844895e-06, "loss": 0.3071, "step": 37487 }, { "epoch": 3.811305408702725, "grad_norm": 0.3007447421550751, "learning_rate": 1.6251245166308095e-06, "loss": 0.2698, "step": 37488 }, { "epoch": 3.811407076047174, "grad_norm": 0.3096230626106262, "learning_rate": 1.6248626758789482e-06, "loss": 0.3094, "step": 37489 }, { "epoch": 3.811508743391623, "grad_norm": 0.31090718507766724, "learning_rate": 1.6246008521302288e-06, "loss": 0.326, "step": 37490 }, { "epoch": 3.8116104107360718, "grad_norm": 0.28458553552627563, "learning_rate": 1.6243390453859676e-06, "loss": 0.3095, "step": 37491 }, { "epoch": 3.8117120780805207, "grad_norm": 0.3040449917316437, "learning_rate": 1.6240772556474832e-06, "loss": 0.3466, "step": 37492 }, { "epoch": 3.8118137454249696, "grad_norm": 0.27551233768463135, "learning_rate": 1.6238154829160963e-06, "loss": 0.2726, "step": 37493 }, { "epoch": 3.8119154127694186, "grad_norm": 0.28376132249832153, "learning_rate": 1.6235537271931252e-06, "loss": 0.2866, "step": 37494 }, { "epoch": 3.8120170801138675, "grad_norm": 0.274736613035202, "learning_rate": 1.6232919884798875e-06, "loss": 0.3299, "step": 37495 }, { "epoch": 3.8121187474583165, "grad_norm": 0.2860090136528015, "learning_rate": 1.6230302667777008e-06, "loss": 0.2952, "step": 37496 }, { "epoch": 3.8122204148027654, "grad_norm": 0.2796027660369873, "learning_rate": 1.6227685620878875e-06, "loss": 0.3161, "step": 37497 }, { "epoch": 3.8123220821472144, "grad_norm": 0.2730323076248169, "learning_rate": 1.6225068744117627e-06, "loss": 0.2899, "step": 37498 }, { "epoch": 3.8124237494916633, "grad_norm": 0.2634737491607666, "learning_rate": 1.6222452037506447e-06, "loss": 0.3013, "step": 37499 }, { "epoch": 3.8125254168361122, "grad_norm": 0.2866216003894806, "learning_rate": 1.6219835501058538e-06, "loss": 0.3077, "step": 37500 }, { "epoch": 3.812627084180561, "grad_norm": 0.30060943961143494, "learning_rate": 1.6217219134787077e-06, "loss": 0.3017, "step": 37501 }, { "epoch": 3.81272875152501, "grad_norm": 0.263876348733902, "learning_rate": 1.6214602938705215e-06, "loss": 0.2851, "step": 37502 }, { "epoch": 3.812830418869459, "grad_norm": 0.26258179545402527, "learning_rate": 1.6211986912826173e-06, "loss": 0.3225, "step": 37503 }, { "epoch": 3.812932086213908, "grad_norm": 0.2792413532733917, "learning_rate": 1.6209371057163115e-06, "loss": 0.2962, "step": 37504 }, { "epoch": 3.813033753558357, "grad_norm": 0.2684275507926941, "learning_rate": 1.620675537172921e-06, "loss": 0.3183, "step": 37505 }, { "epoch": 3.813135420902806, "grad_norm": 0.27714967727661133, "learning_rate": 1.6204139856537627e-06, "loss": 0.3156, "step": 37506 }, { "epoch": 3.813237088247255, "grad_norm": 0.26345640420913696, "learning_rate": 1.6201524511601574e-06, "loss": 0.3009, "step": 37507 }, { "epoch": 3.8133387555917038, "grad_norm": 0.2809569537639618, "learning_rate": 1.619890933693421e-06, "loss": 0.3134, "step": 37508 }, { "epoch": 3.8134404229361527, "grad_norm": 0.2623614966869354, "learning_rate": 1.6196294332548689e-06, "loss": 0.3289, "step": 37509 }, { "epoch": 3.8135420902806016, "grad_norm": 0.2591233253479004, "learning_rate": 1.6193679498458221e-06, "loss": 0.3197, "step": 37510 }, { "epoch": 3.8136437576250506, "grad_norm": 0.2772272527217865, "learning_rate": 1.6191064834675963e-06, "loss": 0.3095, "step": 37511 }, { "epoch": 3.8137454249694995, "grad_norm": 0.259626567363739, "learning_rate": 1.6188450341215068e-06, "loss": 0.3314, "step": 37512 }, { "epoch": 3.813847092313949, "grad_norm": 0.2760619521141052, "learning_rate": 1.6185836018088747e-06, "loss": 0.3153, "step": 37513 }, { "epoch": 3.813948759658398, "grad_norm": 0.2791753113269806, "learning_rate": 1.6183221865310139e-06, "loss": 0.3168, "step": 37514 }, { "epoch": 3.814050427002847, "grad_norm": 0.27487581968307495, "learning_rate": 1.618060788289243e-06, "loss": 0.3008, "step": 37515 }, { "epoch": 3.8141520943472957, "grad_norm": 0.27189794182777405, "learning_rate": 1.6177994070848768e-06, "loss": 0.3121, "step": 37516 }, { "epoch": 3.8142537616917447, "grad_norm": 0.26634401082992554, "learning_rate": 1.6175380429192344e-06, "loss": 0.3363, "step": 37517 }, { "epoch": 3.8143554290361936, "grad_norm": 0.2703457474708557, "learning_rate": 1.617276695793632e-06, "loss": 0.3134, "step": 37518 }, { "epoch": 3.8144570963806426, "grad_norm": 0.2709028124809265, "learning_rate": 1.617015365709384e-06, "loss": 0.3058, "step": 37519 }, { "epoch": 3.8145587637250915, "grad_norm": 0.25631633400917053, "learning_rate": 1.61675405266781e-06, "loss": 0.3206, "step": 37520 }, { "epoch": 3.8146604310695404, "grad_norm": 0.27575305104255676, "learning_rate": 1.6164927566702255e-06, "loss": 0.3069, "step": 37521 }, { "epoch": 3.8147620984139894, "grad_norm": 0.2881651222705841, "learning_rate": 1.616231477717945e-06, "loss": 0.2742, "step": 37522 }, { "epoch": 3.8148637657584383, "grad_norm": 0.2505747675895691, "learning_rate": 1.615970215812287e-06, "loss": 0.3051, "step": 37523 }, { "epoch": 3.8149654331028873, "grad_norm": 0.24511949717998505, "learning_rate": 1.6157089709545676e-06, "loss": 0.3113, "step": 37524 }, { "epoch": 3.815067100447336, "grad_norm": 0.29224658012390137, "learning_rate": 1.6154477431461018e-06, "loss": 0.2813, "step": 37525 }, { "epoch": 3.815168767791785, "grad_norm": 0.25406959652900696, "learning_rate": 1.6151865323882048e-06, "loss": 0.3023, "step": 37526 }, { "epoch": 3.815270435136234, "grad_norm": 0.26390355825424194, "learning_rate": 1.614925338682195e-06, "loss": 0.3241, "step": 37527 }, { "epoch": 3.8153721024806835, "grad_norm": 0.2813883423805237, "learning_rate": 1.6146641620293867e-06, "loss": 0.2957, "step": 37528 }, { "epoch": 3.8154737698251324, "grad_norm": 0.255299836397171, "learning_rate": 1.6144030024310947e-06, "loss": 0.3439, "step": 37529 }, { "epoch": 3.8155754371695814, "grad_norm": 0.2846435308456421, "learning_rate": 1.6141418598886371e-06, "loss": 0.3059, "step": 37530 }, { "epoch": 3.8156771045140303, "grad_norm": 0.2636663019657135, "learning_rate": 1.6138807344033286e-06, "loss": 0.3314, "step": 37531 }, { "epoch": 3.8157787718584792, "grad_norm": 0.2691786289215088, "learning_rate": 1.6136196259764825e-06, "loss": 0.3355, "step": 37532 }, { "epoch": 3.815880439202928, "grad_norm": 0.27271363139152527, "learning_rate": 1.6133585346094178e-06, "loss": 0.3026, "step": 37533 }, { "epoch": 3.815982106547377, "grad_norm": 0.24918624758720398, "learning_rate": 1.6130974603034478e-06, "loss": 0.3179, "step": 37534 }, { "epoch": 3.816083773891826, "grad_norm": 0.27723026275634766, "learning_rate": 1.6128364030598876e-06, "loss": 0.3131, "step": 37535 }, { "epoch": 3.816185441236275, "grad_norm": 0.2806990146636963, "learning_rate": 1.6125753628800516e-06, "loss": 0.304, "step": 37536 }, { "epoch": 3.816287108580724, "grad_norm": 0.2810904085636139, "learning_rate": 1.6123143397652575e-06, "loss": 0.2954, "step": 37537 }, { "epoch": 3.816388775925173, "grad_norm": 0.2708747386932373, "learning_rate": 1.6120533337168182e-06, "loss": 0.2938, "step": 37538 }, { "epoch": 3.816490443269622, "grad_norm": 0.2728109359741211, "learning_rate": 1.6117923447360478e-06, "loss": 0.295, "step": 37539 }, { "epoch": 3.8165921106140708, "grad_norm": 0.2877886891365051, "learning_rate": 1.611531372824266e-06, "loss": 0.283, "step": 37540 }, { "epoch": 3.8166937779585197, "grad_norm": 0.2871510088443756, "learning_rate": 1.6112704179827804e-06, "loss": 0.3038, "step": 37541 }, { "epoch": 3.8167954453029687, "grad_norm": 0.28050777316093445, "learning_rate": 1.611009480212909e-06, "loss": 0.3428, "step": 37542 }, { "epoch": 3.8168971126474176, "grad_norm": 0.2776941657066345, "learning_rate": 1.6107485595159684e-06, "loss": 0.2804, "step": 37543 }, { "epoch": 3.8169987799918665, "grad_norm": 0.28988322615623474, "learning_rate": 1.6104876558932709e-06, "loss": 0.313, "step": 37544 }, { "epoch": 3.8171004473363155, "grad_norm": 0.2990434765815735, "learning_rate": 1.610226769346131e-06, "loss": 0.3179, "step": 37545 }, { "epoch": 3.8172021146807644, "grad_norm": 0.2759243845939636, "learning_rate": 1.6099658998758623e-06, "loss": 0.3088, "step": 37546 }, { "epoch": 3.8173037820252134, "grad_norm": 0.2729397118091583, "learning_rate": 1.6097050474837806e-06, "loss": 0.2977, "step": 37547 }, { "epoch": 3.8174054493696623, "grad_norm": 0.2576669454574585, "learning_rate": 1.6094442121711994e-06, "loss": 0.3136, "step": 37548 }, { "epoch": 3.8175071167141112, "grad_norm": 0.2454274743795395, "learning_rate": 1.6091833939394309e-06, "loss": 0.3376, "step": 37549 }, { "epoch": 3.81760878405856, "grad_norm": 0.2664683163166046, "learning_rate": 1.6089225927897938e-06, "loss": 0.2872, "step": 37550 }, { "epoch": 3.817710451403009, "grad_norm": 0.29082706570625305, "learning_rate": 1.6086618087235956e-06, "loss": 0.3064, "step": 37551 }, { "epoch": 3.817812118747458, "grad_norm": 0.26302075386047363, "learning_rate": 1.6084010417421535e-06, "loss": 0.3351, "step": 37552 }, { "epoch": 3.817913786091907, "grad_norm": 0.26807254552841187, "learning_rate": 1.6081402918467825e-06, "loss": 0.3003, "step": 37553 }, { "epoch": 3.8180154534363564, "grad_norm": 0.2691115438938141, "learning_rate": 1.6078795590387951e-06, "loss": 0.3246, "step": 37554 }, { "epoch": 3.8181171207808053, "grad_norm": 0.2666610777378082, "learning_rate": 1.607618843319504e-06, "loss": 0.3179, "step": 37555 }, { "epoch": 3.8182187881252543, "grad_norm": 0.28187674283981323, "learning_rate": 1.6073581446902214e-06, "loss": 0.3431, "step": 37556 }, { "epoch": 3.8183204554697032, "grad_norm": 0.26847946643829346, "learning_rate": 1.607097463152264e-06, "loss": 0.3129, "step": 37557 }, { "epoch": 3.818422122814152, "grad_norm": 0.28846603631973267, "learning_rate": 1.6068367987069433e-06, "loss": 0.3179, "step": 37558 }, { "epoch": 3.818523790158601, "grad_norm": 0.2760021984577179, "learning_rate": 1.6065761513555704e-06, "loss": 0.3237, "step": 37559 }, { "epoch": 3.81862545750305, "grad_norm": 0.29225924611091614, "learning_rate": 1.6063155210994642e-06, "loss": 0.2684, "step": 37560 }, { "epoch": 3.818727124847499, "grad_norm": 0.28016534447669983, "learning_rate": 1.6060549079399306e-06, "loss": 0.2881, "step": 37561 }, { "epoch": 3.818828792191948, "grad_norm": 0.2605822682380676, "learning_rate": 1.6057943118782859e-06, "loss": 0.2988, "step": 37562 }, { "epoch": 3.818930459536397, "grad_norm": 0.2796782851219177, "learning_rate": 1.605533732915846e-06, "loss": 0.2982, "step": 37563 }, { "epoch": 3.819032126880846, "grad_norm": 0.276108980178833, "learning_rate": 1.6052731710539171e-06, "loss": 0.2955, "step": 37564 }, { "epoch": 3.8191337942252948, "grad_norm": 0.2597237229347229, "learning_rate": 1.605012626293817e-06, "loss": 0.2966, "step": 37565 }, { "epoch": 3.8192354615697437, "grad_norm": 0.260527640581131, "learning_rate": 1.6047520986368548e-06, "loss": 0.3068, "step": 37566 }, { "epoch": 3.8193371289141926, "grad_norm": 0.2839202582836151, "learning_rate": 1.6044915880843465e-06, "loss": 0.3198, "step": 37567 }, { "epoch": 3.8194387962586416, "grad_norm": 0.2788274586200714, "learning_rate": 1.6042310946376027e-06, "loss": 0.3122, "step": 37568 }, { "epoch": 3.819540463603091, "grad_norm": 0.28580036759376526, "learning_rate": 1.6039706182979343e-06, "loss": 0.3187, "step": 37569 }, { "epoch": 3.81964213094754, "grad_norm": 0.28504374623298645, "learning_rate": 1.6037101590666576e-06, "loss": 0.3131, "step": 37570 }, { "epoch": 3.819743798291989, "grad_norm": 0.27201035618782043, "learning_rate": 1.603449716945079e-06, "loss": 0.3303, "step": 37571 }, { "epoch": 3.819845465636438, "grad_norm": 0.2866280674934387, "learning_rate": 1.6031892919345138e-06, "loss": 0.3116, "step": 37572 }, { "epoch": 3.8199471329808867, "grad_norm": 0.27333173155784607, "learning_rate": 1.6029288840362766e-06, "loss": 0.3278, "step": 37573 }, { "epoch": 3.8200488003253357, "grad_norm": 0.2718503177165985, "learning_rate": 1.6026684932516735e-06, "loss": 0.2857, "step": 37574 }, { "epoch": 3.8201504676697846, "grad_norm": 0.285564661026001, "learning_rate": 1.6024081195820207e-06, "loss": 0.2871, "step": 37575 }, { "epoch": 3.8202521350142336, "grad_norm": 0.2756052315235138, "learning_rate": 1.6021477630286287e-06, "loss": 0.315, "step": 37576 }, { "epoch": 3.8203538023586825, "grad_norm": 0.27261659502983093, "learning_rate": 1.6018874235928067e-06, "loss": 0.2985, "step": 37577 }, { "epoch": 3.8204554697031314, "grad_norm": 0.27374333143234253, "learning_rate": 1.60162710127587e-06, "loss": 0.2982, "step": 37578 }, { "epoch": 3.8205571370475804, "grad_norm": 0.28006666898727417, "learning_rate": 1.6013667960791267e-06, "loss": 0.3425, "step": 37579 }, { "epoch": 3.8206588043920293, "grad_norm": 0.28520241379737854, "learning_rate": 1.6011065080038935e-06, "loss": 0.3224, "step": 37580 }, { "epoch": 3.8207604717364783, "grad_norm": 0.2584909200668335, "learning_rate": 1.600846237051475e-06, "loss": 0.3342, "step": 37581 }, { "epoch": 3.820862139080927, "grad_norm": 0.26042765378952026, "learning_rate": 1.6005859832231846e-06, "loss": 0.3224, "step": 37582 }, { "epoch": 3.820963806425376, "grad_norm": 0.2821291983127594, "learning_rate": 1.6003257465203376e-06, "loss": 0.3083, "step": 37583 }, { "epoch": 3.821065473769825, "grad_norm": 0.2946784794330597, "learning_rate": 1.600065526944239e-06, "loss": 0.3177, "step": 37584 }, { "epoch": 3.821167141114274, "grad_norm": 0.26936739683151245, "learning_rate": 1.599805324496203e-06, "loss": 0.3128, "step": 37585 }, { "epoch": 3.821268808458723, "grad_norm": 0.29959946870803833, "learning_rate": 1.5995451391775407e-06, "loss": 0.314, "step": 37586 }, { "epoch": 3.821370475803172, "grad_norm": 0.25017213821411133, "learning_rate": 1.5992849709895598e-06, "loss": 0.314, "step": 37587 }, { "epoch": 3.821472143147621, "grad_norm": 0.2938128411769867, "learning_rate": 1.5990248199335745e-06, "loss": 0.2959, "step": 37588 }, { "epoch": 3.82157381049207, "grad_norm": 0.28433164954185486, "learning_rate": 1.598764686010893e-06, "loss": 0.3396, "step": 37589 }, { "epoch": 3.8216754778365187, "grad_norm": 0.27323049306869507, "learning_rate": 1.5985045692228296e-06, "loss": 0.3111, "step": 37590 }, { "epoch": 3.8217771451809677, "grad_norm": 0.26641541719436646, "learning_rate": 1.5982444695706883e-06, "loss": 0.3173, "step": 37591 }, { "epoch": 3.8218788125254166, "grad_norm": 0.2606039345264435, "learning_rate": 1.5979843870557837e-06, "loss": 0.3205, "step": 37592 }, { "epoch": 3.8219804798698656, "grad_norm": 0.27769142389297485, "learning_rate": 1.597724321679428e-06, "loss": 0.3304, "step": 37593 }, { "epoch": 3.8220821472143145, "grad_norm": 0.256253182888031, "learning_rate": 1.5974642734429259e-06, "loss": 0.3183, "step": 37594 }, { "epoch": 3.822183814558764, "grad_norm": 0.2632630169391632, "learning_rate": 1.5972042423475915e-06, "loss": 0.3176, "step": 37595 }, { "epoch": 3.822285481903213, "grad_norm": 0.26517942547798157, "learning_rate": 1.5969442283947335e-06, "loss": 0.2947, "step": 37596 }, { "epoch": 3.8223871492476618, "grad_norm": 0.27496567368507385, "learning_rate": 1.5966842315856601e-06, "loss": 0.3162, "step": 37597 }, { "epoch": 3.8224888165921107, "grad_norm": 0.24853742122650146, "learning_rate": 1.5964242519216844e-06, "loss": 0.2662, "step": 37598 }, { "epoch": 3.8225904839365596, "grad_norm": 0.27948638796806335, "learning_rate": 1.5961642894041146e-06, "loss": 0.2925, "step": 37599 }, { "epoch": 3.8226921512810086, "grad_norm": 0.26346760988235474, "learning_rate": 1.5959043440342603e-06, "loss": 0.3178, "step": 37600 }, { "epoch": 3.8227938186254575, "grad_norm": 0.2967510521411896, "learning_rate": 1.5956444158134293e-06, "loss": 0.3229, "step": 37601 }, { "epoch": 3.8228954859699065, "grad_norm": 0.3976084887981415, "learning_rate": 1.595384504742933e-06, "loss": 0.2964, "step": 37602 }, { "epoch": 3.8229971533143554, "grad_norm": 0.2739972174167633, "learning_rate": 1.5951246108240831e-06, "loss": 0.3152, "step": 37603 }, { "epoch": 3.8230988206588044, "grad_norm": 0.26839613914489746, "learning_rate": 1.5948647340581835e-06, "loss": 0.2839, "step": 37604 }, { "epoch": 3.8232004880032533, "grad_norm": 0.26448413729667664, "learning_rate": 1.5946048744465476e-06, "loss": 0.3123, "step": 37605 }, { "epoch": 3.8233021553477022, "grad_norm": 0.281058669090271, "learning_rate": 1.5943450319904836e-06, "loss": 0.3387, "step": 37606 }, { "epoch": 3.823403822692151, "grad_norm": 0.2660582661628723, "learning_rate": 1.5940852066912977e-06, "loss": 0.3029, "step": 37607 }, { "epoch": 3.8235054900366, "grad_norm": 0.2886941730976105, "learning_rate": 1.5938253985503032e-06, "loss": 0.3014, "step": 37608 }, { "epoch": 3.823607157381049, "grad_norm": 0.25825634598731995, "learning_rate": 1.5935656075688071e-06, "loss": 0.329, "step": 37609 }, { "epoch": 3.8237088247254984, "grad_norm": 0.2563146650791168, "learning_rate": 1.5933058337481179e-06, "loss": 0.3062, "step": 37610 }, { "epoch": 3.8238104920699474, "grad_norm": 0.277883380651474, "learning_rate": 1.5930460770895424e-06, "loss": 0.3067, "step": 37611 }, { "epoch": 3.8239121594143963, "grad_norm": 0.239630788564682, "learning_rate": 1.5927863375943909e-06, "loss": 0.335, "step": 37612 }, { "epoch": 3.8240138267588453, "grad_norm": 0.2806530296802521, "learning_rate": 1.5925266152639756e-06, "loss": 0.2928, "step": 37613 }, { "epoch": 3.824115494103294, "grad_norm": 0.287968248128891, "learning_rate": 1.5922669100995986e-06, "loss": 0.3442, "step": 37614 }, { "epoch": 3.824217161447743, "grad_norm": 0.2534008026123047, "learning_rate": 1.5920072221025728e-06, "loss": 0.2971, "step": 37615 }, { "epoch": 3.824318828792192, "grad_norm": 0.26524052023887634, "learning_rate": 1.5917475512742042e-06, "loss": 0.3196, "step": 37616 }, { "epoch": 3.824420496136641, "grad_norm": 0.2834293246269226, "learning_rate": 1.5914878976158005e-06, "loss": 0.327, "step": 37617 }, { "epoch": 3.82452216348109, "grad_norm": 0.26961103081703186, "learning_rate": 1.5912282611286717e-06, "loss": 0.3189, "step": 37618 }, { "epoch": 3.824623830825539, "grad_norm": 0.2772078216075897, "learning_rate": 1.5909686418141257e-06, "loss": 0.2867, "step": 37619 }, { "epoch": 3.824725498169988, "grad_norm": 0.2571077048778534, "learning_rate": 1.5907090396734686e-06, "loss": 0.2922, "step": 37620 }, { "epoch": 3.824827165514437, "grad_norm": 0.27639856934547424, "learning_rate": 1.5904494547080085e-06, "loss": 0.3094, "step": 37621 }, { "epoch": 3.8249288328588857, "grad_norm": 0.30400270223617554, "learning_rate": 1.590189886919055e-06, "loss": 0.3009, "step": 37622 }, { "epoch": 3.8250305002033347, "grad_norm": 0.2809197008609772, "learning_rate": 1.5899303363079144e-06, "loss": 0.3237, "step": 37623 }, { "epoch": 3.8251321675477836, "grad_norm": 0.2613096535205841, "learning_rate": 1.5896708028758928e-06, "loss": 0.303, "step": 37624 }, { "epoch": 3.8252338348922326, "grad_norm": 0.28842800855636597, "learning_rate": 1.589411286624301e-06, "loss": 0.2945, "step": 37625 }, { "epoch": 3.8253355022366815, "grad_norm": 0.2613798975944519, "learning_rate": 1.5891517875544448e-06, "loss": 0.2791, "step": 37626 }, { "epoch": 3.8254371695811304, "grad_norm": 0.27899685502052307, "learning_rate": 1.5888923056676298e-06, "loss": 0.2915, "step": 37627 }, { "epoch": 3.8255388369255794, "grad_norm": 0.28606387972831726, "learning_rate": 1.5886328409651663e-06, "loss": 0.3279, "step": 37628 }, { "epoch": 3.8256405042700283, "grad_norm": 0.2726554572582245, "learning_rate": 1.5883733934483598e-06, "loss": 0.2932, "step": 37629 }, { "epoch": 3.8257421716144773, "grad_norm": 0.2585473358631134, "learning_rate": 1.5881139631185177e-06, "loss": 0.3032, "step": 37630 }, { "epoch": 3.825843838958926, "grad_norm": 0.2713727653026581, "learning_rate": 1.5878545499769444e-06, "loss": 0.315, "step": 37631 }, { "epoch": 3.825945506303375, "grad_norm": 0.2902902364730835, "learning_rate": 1.5875951540249513e-06, "loss": 0.3228, "step": 37632 }, { "epoch": 3.826047173647824, "grad_norm": 0.2790352702140808, "learning_rate": 1.5873357752638424e-06, "loss": 0.2993, "step": 37633 }, { "epoch": 3.826148840992273, "grad_norm": 0.26962098479270935, "learning_rate": 1.5870764136949235e-06, "loss": 0.2829, "step": 37634 }, { "epoch": 3.826250508336722, "grad_norm": 0.27134761214256287, "learning_rate": 1.5868170693195046e-06, "loss": 0.3071, "step": 37635 }, { "epoch": 3.8263521756811714, "grad_norm": 0.25521355867385864, "learning_rate": 1.5865577421388895e-06, "loss": 0.2854, "step": 37636 }, { "epoch": 3.8264538430256203, "grad_norm": 0.2855844497680664, "learning_rate": 1.586298432154384e-06, "loss": 0.2956, "step": 37637 }, { "epoch": 3.8265555103700692, "grad_norm": 0.26201188564300537, "learning_rate": 1.5860391393672976e-06, "loss": 0.2946, "step": 37638 }, { "epoch": 3.826657177714518, "grad_norm": 0.28006118535995483, "learning_rate": 1.5857798637789345e-06, "loss": 0.2955, "step": 37639 }, { "epoch": 3.826758845058967, "grad_norm": 0.27882423996925354, "learning_rate": 1.5855206053906014e-06, "loss": 0.3261, "step": 37640 }, { "epoch": 3.826860512403416, "grad_norm": 0.27409347891807556, "learning_rate": 1.5852613642036019e-06, "loss": 0.3175, "step": 37641 }, { "epoch": 3.826962179747865, "grad_norm": 0.2763139605522156, "learning_rate": 1.5850021402192462e-06, "loss": 0.3303, "step": 37642 }, { "epoch": 3.827063847092314, "grad_norm": 0.31197410821914673, "learning_rate": 1.584742933438838e-06, "loss": 0.3024, "step": 37643 }, { "epoch": 3.827165514436763, "grad_norm": 0.274901419878006, "learning_rate": 1.5844837438636823e-06, "loss": 0.3082, "step": 37644 }, { "epoch": 3.827267181781212, "grad_norm": 0.2609674632549286, "learning_rate": 1.5842245714950866e-06, "loss": 0.2935, "step": 37645 }, { "epoch": 3.8273688491256608, "grad_norm": 0.2736380696296692, "learning_rate": 1.5839654163343564e-06, "loss": 0.2869, "step": 37646 }, { "epoch": 3.8274705164701097, "grad_norm": 0.2823934853076935, "learning_rate": 1.583706278382796e-06, "loss": 0.3071, "step": 37647 }, { "epoch": 3.8275721838145587, "grad_norm": 0.265593022108078, "learning_rate": 1.583447157641711e-06, "loss": 0.2968, "step": 37648 }, { "epoch": 3.8276738511590076, "grad_norm": 0.30579400062561035, "learning_rate": 1.5831880541124078e-06, "loss": 0.3063, "step": 37649 }, { "epoch": 3.8277755185034565, "grad_norm": 0.262774795293808, "learning_rate": 1.5829289677961918e-06, "loss": 0.312, "step": 37650 }, { "epoch": 3.827877185847906, "grad_norm": 0.2850577235221863, "learning_rate": 1.5826698986943662e-06, "loss": 0.2825, "step": 37651 }, { "epoch": 3.827978853192355, "grad_norm": 0.2906699776649475, "learning_rate": 1.5824108468082389e-06, "loss": 0.3114, "step": 37652 }, { "epoch": 3.828080520536804, "grad_norm": 0.2845620810985565, "learning_rate": 1.5821518121391133e-06, "loss": 0.3095, "step": 37653 }, { "epoch": 3.8281821878812528, "grad_norm": 0.26812058687210083, "learning_rate": 1.5818927946882934e-06, "loss": 0.3458, "step": 37654 }, { "epoch": 3.8282838552257017, "grad_norm": 0.253102570772171, "learning_rate": 1.581633794457087e-06, "loss": 0.3241, "step": 37655 }, { "epoch": 3.8283855225701506, "grad_norm": 0.23866412043571472, "learning_rate": 1.5813748114467974e-06, "loss": 0.2942, "step": 37656 }, { "epoch": 3.8284871899145996, "grad_norm": 0.26138073205947876, "learning_rate": 1.5811158456587288e-06, "loss": 0.3269, "step": 37657 }, { "epoch": 3.8285888572590485, "grad_norm": 0.28481432795524597, "learning_rate": 1.5808568970941846e-06, "loss": 0.3253, "step": 37658 }, { "epoch": 3.8286905246034975, "grad_norm": 0.2808308005332947, "learning_rate": 1.5805979657544722e-06, "loss": 0.3085, "step": 37659 }, { "epoch": 3.8287921919479464, "grad_norm": 0.27996522188186646, "learning_rate": 1.580339051640895e-06, "loss": 0.2879, "step": 37660 }, { "epoch": 3.8288938592923953, "grad_norm": 0.27234020829200745, "learning_rate": 1.580080154754755e-06, "loss": 0.3117, "step": 37661 }, { "epoch": 3.8289955266368443, "grad_norm": 0.25684911012649536, "learning_rate": 1.5798212750973608e-06, "loss": 0.3315, "step": 37662 }, { "epoch": 3.8290971939812932, "grad_norm": 0.2623918056488037, "learning_rate": 1.5795624126700138e-06, "loss": 0.2972, "step": 37663 }, { "epoch": 3.829198861325742, "grad_norm": 0.26530107855796814, "learning_rate": 1.579303567474017e-06, "loss": 0.3092, "step": 37664 }, { "epoch": 3.829300528670191, "grad_norm": 0.30293166637420654, "learning_rate": 1.5790447395106772e-06, "loss": 0.346, "step": 37665 }, { "epoch": 3.82940219601464, "grad_norm": 0.2687651515007019, "learning_rate": 1.578785928781298e-06, "loss": 0.2935, "step": 37666 }, { "epoch": 3.829503863359089, "grad_norm": 0.26378557085990906, "learning_rate": 1.578527135287181e-06, "loss": 0.3206, "step": 37667 }, { "epoch": 3.829605530703538, "grad_norm": 0.2721669375896454, "learning_rate": 1.5782683590296306e-06, "loss": 0.3176, "step": 37668 }, { "epoch": 3.829707198047987, "grad_norm": 0.27901920676231384, "learning_rate": 1.5780096000099527e-06, "loss": 0.3209, "step": 37669 }, { "epoch": 3.829808865392436, "grad_norm": 0.2757568061351776, "learning_rate": 1.5777508582294487e-06, "loss": 0.3012, "step": 37670 }, { "epoch": 3.8299105327368848, "grad_norm": 0.26676639914512634, "learning_rate": 1.5774921336894212e-06, "loss": 0.299, "step": 37671 }, { "epoch": 3.8300122000813337, "grad_norm": 0.270537406206131, "learning_rate": 1.5772334263911766e-06, "loss": 0.3175, "step": 37672 }, { "epoch": 3.8301138674257826, "grad_norm": 0.2767208516597748, "learning_rate": 1.5769747363360166e-06, "loss": 0.3382, "step": 37673 }, { "epoch": 3.8302155347702316, "grad_norm": 0.2816407084465027, "learning_rate": 1.5767160635252432e-06, "loss": 0.3378, "step": 37674 }, { "epoch": 3.8303172021146805, "grad_norm": 0.2584586441516876, "learning_rate": 1.5764574079601619e-06, "loss": 0.3206, "step": 37675 }, { "epoch": 3.8304188694591295, "grad_norm": 0.27476853132247925, "learning_rate": 1.576198769642075e-06, "loss": 0.306, "step": 37676 }, { "epoch": 3.830520536803579, "grad_norm": 0.266959547996521, "learning_rate": 1.5759401485722847e-06, "loss": 0.3039, "step": 37677 }, { "epoch": 3.830622204148028, "grad_norm": 0.27508050203323364, "learning_rate": 1.5756815447520928e-06, "loss": 0.314, "step": 37678 }, { "epoch": 3.8307238714924767, "grad_norm": 0.2557121515274048, "learning_rate": 1.5754229581828056e-06, "loss": 0.3199, "step": 37679 }, { "epoch": 3.8308255388369257, "grad_norm": 0.2744661867618561, "learning_rate": 1.5751643888657231e-06, "loss": 0.2916, "step": 37680 }, { "epoch": 3.8309272061813746, "grad_norm": 0.27376601099967957, "learning_rate": 1.5749058368021476e-06, "loss": 0.3372, "step": 37681 }, { "epoch": 3.8310288735258236, "grad_norm": 0.2768900692462921, "learning_rate": 1.5746473019933834e-06, "loss": 0.3004, "step": 37682 }, { "epoch": 3.8311305408702725, "grad_norm": 0.29862937331199646, "learning_rate": 1.5743887844407323e-06, "loss": 0.3195, "step": 37683 }, { "epoch": 3.8312322082147214, "grad_norm": 0.2755553424358368, "learning_rate": 1.5741302841454953e-06, "loss": 0.3031, "step": 37684 }, { "epoch": 3.8313338755591704, "grad_norm": 0.2811890244483948, "learning_rate": 1.5738718011089771e-06, "loss": 0.2646, "step": 37685 }, { "epoch": 3.8314355429036193, "grad_norm": 0.27785399556159973, "learning_rate": 1.5736133353324785e-06, "loss": 0.3096, "step": 37686 }, { "epoch": 3.8315372102480683, "grad_norm": 0.28125691413879395, "learning_rate": 1.5733548868173015e-06, "loss": 0.3128, "step": 37687 }, { "epoch": 3.831638877592517, "grad_norm": 0.2613111138343811, "learning_rate": 1.5730964555647471e-06, "loss": 0.3025, "step": 37688 }, { "epoch": 3.831740544936966, "grad_norm": 0.2632323205471039, "learning_rate": 1.5728380415761203e-06, "loss": 0.2634, "step": 37689 }, { "epoch": 3.831842212281415, "grad_norm": 0.2736540138721466, "learning_rate": 1.5725796448527204e-06, "loss": 0.3124, "step": 37690 }, { "epoch": 3.831943879625864, "grad_norm": 0.26971399784088135, "learning_rate": 1.572321265395848e-06, "loss": 0.3295, "step": 37691 }, { "epoch": 3.8320455469703134, "grad_norm": 0.280770868062973, "learning_rate": 1.5720629032068102e-06, "loss": 0.2959, "step": 37692 }, { "epoch": 3.8321472143147624, "grad_norm": 0.2604219913482666, "learning_rate": 1.5718045582869019e-06, "loss": 0.2849, "step": 37693 }, { "epoch": 3.8322488816592113, "grad_norm": 0.24419717490673065, "learning_rate": 1.571546230637427e-06, "loss": 0.3343, "step": 37694 }, { "epoch": 3.8323505490036602, "grad_norm": 0.25962376594543457, "learning_rate": 1.5712879202596892e-06, "loss": 0.3402, "step": 37695 }, { "epoch": 3.832452216348109, "grad_norm": 0.2742721736431122, "learning_rate": 1.5710296271549885e-06, "loss": 0.3107, "step": 37696 }, { "epoch": 3.832553883692558, "grad_norm": 0.28952622413635254, "learning_rate": 1.5707713513246254e-06, "loss": 0.3181, "step": 37697 }, { "epoch": 3.832655551037007, "grad_norm": 0.26260778307914734, "learning_rate": 1.5705130927698997e-06, "loss": 0.3062, "step": 37698 }, { "epoch": 3.832757218381456, "grad_norm": 0.2672014832496643, "learning_rate": 1.5702548514921156e-06, "loss": 0.3279, "step": 37699 }, { "epoch": 3.832858885725905, "grad_norm": 0.2814716696739197, "learning_rate": 1.5699966274925727e-06, "loss": 0.3466, "step": 37700 }, { "epoch": 3.832960553070354, "grad_norm": 0.25531601905822754, "learning_rate": 1.5697384207725697e-06, "loss": 0.3249, "step": 37701 }, { "epoch": 3.833062220414803, "grad_norm": 0.24884964525699615, "learning_rate": 1.5694802313334129e-06, "loss": 0.3023, "step": 37702 }, { "epoch": 3.8331638877592518, "grad_norm": 0.2984194755554199, "learning_rate": 1.5692220591763963e-06, "loss": 0.3369, "step": 37703 }, { "epoch": 3.8332655551037007, "grad_norm": 0.2520485520362854, "learning_rate": 1.5689639043028237e-06, "loss": 0.2973, "step": 37704 }, { "epoch": 3.8333672224481496, "grad_norm": 0.28918927907943726, "learning_rate": 1.5687057667139972e-06, "loss": 0.2967, "step": 37705 }, { "epoch": 3.8334688897925986, "grad_norm": 0.27934274077415466, "learning_rate": 1.5684476464112154e-06, "loss": 0.3091, "step": 37706 }, { "epoch": 3.8335705571370475, "grad_norm": 0.2552637755870819, "learning_rate": 1.5681895433957795e-06, "loss": 0.2953, "step": 37707 }, { "epoch": 3.8336722244814965, "grad_norm": 0.29854118824005127, "learning_rate": 1.567931457668987e-06, "loss": 0.3095, "step": 37708 }, { "epoch": 3.8337738918259454, "grad_norm": 0.26002082228660583, "learning_rate": 1.5676733892321416e-06, "loss": 0.3163, "step": 37709 }, { "epoch": 3.8338755591703944, "grad_norm": 0.28774571418762207, "learning_rate": 1.5674153380865426e-06, "loss": 0.3044, "step": 37710 }, { "epoch": 3.8339772265148433, "grad_norm": 0.25501272082328796, "learning_rate": 1.5671573042334875e-06, "loss": 0.3123, "step": 37711 }, { "epoch": 3.8340788938592922, "grad_norm": 0.2825106680393219, "learning_rate": 1.5668992876742817e-06, "loss": 0.3444, "step": 37712 }, { "epoch": 3.834180561203741, "grad_norm": 0.2803088426589966, "learning_rate": 1.5666412884102179e-06, "loss": 0.3194, "step": 37713 }, { "epoch": 3.83428222854819, "grad_norm": 0.26372697949409485, "learning_rate": 1.5663833064425992e-06, "loss": 0.3008, "step": 37714 }, { "epoch": 3.834383895892639, "grad_norm": 0.27834492921829224, "learning_rate": 1.566125341772729e-06, "loss": 0.3318, "step": 37715 }, { "epoch": 3.834485563237088, "grad_norm": 0.29025572538375854, "learning_rate": 1.5658673944019004e-06, "loss": 0.3392, "step": 37716 }, { "epoch": 3.834587230581537, "grad_norm": 0.2709799110889435, "learning_rate": 1.5656094643314168e-06, "loss": 0.2871, "step": 37717 }, { "epoch": 3.8346888979259863, "grad_norm": 0.26944923400878906, "learning_rate": 1.5653515515625756e-06, "loss": 0.2997, "step": 37718 }, { "epoch": 3.8347905652704353, "grad_norm": 0.2567528784275055, "learning_rate": 1.5650936560966784e-06, "loss": 0.3221, "step": 37719 }, { "epoch": 3.834892232614884, "grad_norm": 0.2899600565433502, "learning_rate": 1.5648357779350226e-06, "loss": 0.308, "step": 37720 }, { "epoch": 3.834993899959333, "grad_norm": 0.26582035422325134, "learning_rate": 1.5645779170789067e-06, "loss": 0.3038, "step": 37721 }, { "epoch": 3.835095567303782, "grad_norm": 0.25805696845054626, "learning_rate": 1.5643200735296343e-06, "loss": 0.2884, "step": 37722 }, { "epoch": 3.835197234648231, "grad_norm": 0.29245832562446594, "learning_rate": 1.5640622472884975e-06, "loss": 0.3017, "step": 37723 }, { "epoch": 3.83529890199268, "grad_norm": 0.2594309449195862, "learning_rate": 1.5638044383567986e-06, "loss": 0.3061, "step": 37724 }, { "epoch": 3.835400569337129, "grad_norm": 0.27181580662727356, "learning_rate": 1.5635466467358395e-06, "loss": 0.3409, "step": 37725 }, { "epoch": 3.835502236681578, "grad_norm": 0.263492614030838, "learning_rate": 1.5632888724269124e-06, "loss": 0.3235, "step": 37726 }, { "epoch": 3.835603904026027, "grad_norm": 0.26313769817352295, "learning_rate": 1.563031115431321e-06, "loss": 0.3277, "step": 37727 }, { "epoch": 3.8357055713704757, "grad_norm": 0.2600069046020508, "learning_rate": 1.5627733757503621e-06, "loss": 0.2974, "step": 37728 }, { "epoch": 3.8358072387149247, "grad_norm": 0.27099886536598206, "learning_rate": 1.5625156533853324e-06, "loss": 0.2914, "step": 37729 }, { "epoch": 3.8359089060593736, "grad_norm": 0.26185914874076843, "learning_rate": 1.562257948337534e-06, "loss": 0.334, "step": 37730 }, { "epoch": 3.8360105734038226, "grad_norm": 0.3005007803440094, "learning_rate": 1.5620002606082613e-06, "loss": 0.2952, "step": 37731 }, { "epoch": 3.8361122407482715, "grad_norm": 0.29636427760124207, "learning_rate": 1.5617425901988176e-06, "loss": 0.3253, "step": 37732 }, { "epoch": 3.836213908092721, "grad_norm": 0.2760879397392273, "learning_rate": 1.561484937110494e-06, "loss": 0.296, "step": 37733 }, { "epoch": 3.83631557543717, "grad_norm": 0.30077558755874634, "learning_rate": 1.5612273013445927e-06, "loss": 0.2738, "step": 37734 }, { "epoch": 3.8364172427816188, "grad_norm": 0.2744651436805725, "learning_rate": 1.560969682902414e-06, "loss": 0.3436, "step": 37735 }, { "epoch": 3.8365189101260677, "grad_norm": 0.30539265275001526, "learning_rate": 1.56071208178525e-06, "loss": 0.3133, "step": 37736 }, { "epoch": 3.8366205774705167, "grad_norm": 0.2696876525878906, "learning_rate": 1.5604544979944024e-06, "loss": 0.2816, "step": 37737 }, { "epoch": 3.8367222448149656, "grad_norm": 0.29302528500556946, "learning_rate": 1.5601969315311677e-06, "loss": 0.288, "step": 37738 }, { "epoch": 3.8368239121594145, "grad_norm": 0.288291335105896, "learning_rate": 1.5599393823968417e-06, "loss": 0.3644, "step": 37739 }, { "epoch": 3.8369255795038635, "grad_norm": 0.26718974113464355, "learning_rate": 1.5596818505927258e-06, "loss": 0.31, "step": 37740 }, { "epoch": 3.8370272468483124, "grad_norm": 0.25024664402008057, "learning_rate": 1.5594243361201133e-06, "loss": 0.2807, "step": 37741 }, { "epoch": 3.8371289141927614, "grad_norm": 0.2817453444004059, "learning_rate": 1.559166838980306e-06, "loss": 0.3026, "step": 37742 }, { "epoch": 3.8372305815372103, "grad_norm": 0.27643322944641113, "learning_rate": 1.5589093591745963e-06, "loss": 0.3124, "step": 37743 }, { "epoch": 3.8373322488816592, "grad_norm": 0.27241387963294983, "learning_rate": 1.558651896704283e-06, "loss": 0.3433, "step": 37744 }, { "epoch": 3.837433916226108, "grad_norm": 0.2666279375553131, "learning_rate": 1.5583944515706667e-06, "loss": 0.321, "step": 37745 }, { "epoch": 3.837535583570557, "grad_norm": 0.3038494884967804, "learning_rate": 1.5581370237750382e-06, "loss": 0.3235, "step": 37746 }, { "epoch": 3.837637250915006, "grad_norm": 0.24805045127868652, "learning_rate": 1.5578796133186991e-06, "loss": 0.3011, "step": 37747 }, { "epoch": 3.837738918259455, "grad_norm": 0.2671283185482025, "learning_rate": 1.5576222202029445e-06, "loss": 0.2802, "step": 37748 }, { "epoch": 3.837840585603904, "grad_norm": 0.26157939434051514, "learning_rate": 1.5573648444290701e-06, "loss": 0.3034, "step": 37749 }, { "epoch": 3.837942252948353, "grad_norm": 0.28218647837638855, "learning_rate": 1.5571074859983743e-06, "loss": 0.2964, "step": 37750 }, { "epoch": 3.838043920292802, "grad_norm": 0.26219162344932556, "learning_rate": 1.5568501449121531e-06, "loss": 0.2727, "step": 37751 }, { "epoch": 3.8381455876372508, "grad_norm": 0.2821159064769745, "learning_rate": 1.556592821171703e-06, "loss": 0.3145, "step": 37752 }, { "epoch": 3.8382472549816997, "grad_norm": 0.2629471719264984, "learning_rate": 1.5563355147783177e-06, "loss": 0.2867, "step": 37753 }, { "epoch": 3.8383489223261487, "grad_norm": 0.2831414043903351, "learning_rate": 1.5560782257332962e-06, "loss": 0.3045, "step": 37754 }, { "epoch": 3.8384505896705976, "grad_norm": 0.25958967208862305, "learning_rate": 1.5558209540379377e-06, "loss": 0.3195, "step": 37755 }, { "epoch": 3.8385522570150465, "grad_norm": 0.27658870816230774, "learning_rate": 1.5555636996935308e-06, "loss": 0.2965, "step": 37756 }, { "epoch": 3.8386539243594955, "grad_norm": 0.28641802072525024, "learning_rate": 1.5553064627013775e-06, "loss": 0.3121, "step": 37757 }, { "epoch": 3.8387555917039444, "grad_norm": 0.2903452515602112, "learning_rate": 1.5550492430627717e-06, "loss": 0.2564, "step": 37758 }, { "epoch": 3.838857259048394, "grad_norm": 0.2617778182029724, "learning_rate": 1.5547920407790074e-06, "loss": 0.3249, "step": 37759 }, { "epoch": 3.8389589263928428, "grad_norm": 0.2802443206310272, "learning_rate": 1.554534855851384e-06, "loss": 0.3497, "step": 37760 }, { "epoch": 3.8390605937372917, "grad_norm": 0.2719978094100952, "learning_rate": 1.5542776882811949e-06, "loss": 0.2808, "step": 37761 }, { "epoch": 3.8391622610817406, "grad_norm": 0.2642953395843506, "learning_rate": 1.554020538069736e-06, "loss": 0.3012, "step": 37762 }, { "epoch": 3.8392639284261896, "grad_norm": 0.265571653842926, "learning_rate": 1.5537634052183015e-06, "loss": 0.2945, "step": 37763 }, { "epoch": 3.8393655957706385, "grad_norm": 0.2644313871860504, "learning_rate": 1.5535062897281877e-06, "loss": 0.3052, "step": 37764 }, { "epoch": 3.8394672631150875, "grad_norm": 0.2724871039390564, "learning_rate": 1.5532491916006932e-06, "loss": 0.2786, "step": 37765 }, { "epoch": 3.8395689304595364, "grad_norm": 0.2792218029499054, "learning_rate": 1.5529921108371072e-06, "loss": 0.3199, "step": 37766 }, { "epoch": 3.8396705978039853, "grad_norm": 0.27047911286354065, "learning_rate": 1.5527350474387293e-06, "loss": 0.3215, "step": 37767 }, { "epoch": 3.8397722651484343, "grad_norm": 0.277575820684433, "learning_rate": 1.5524780014068535e-06, "loss": 0.3088, "step": 37768 }, { "epoch": 3.8398739324928832, "grad_norm": 0.2917148470878601, "learning_rate": 1.5522209727427723e-06, "loss": 0.325, "step": 37769 }, { "epoch": 3.839975599837332, "grad_norm": 0.29884108901023865, "learning_rate": 1.551963961447784e-06, "loss": 0.3206, "step": 37770 }, { "epoch": 3.840077267181781, "grad_norm": 0.25607946515083313, "learning_rate": 1.5517069675231823e-06, "loss": 0.3113, "step": 37771 }, { "epoch": 3.84017893452623, "grad_norm": 0.2766496539115906, "learning_rate": 1.5514499909702614e-06, "loss": 0.33, "step": 37772 }, { "epoch": 3.840280601870679, "grad_norm": 0.2620110809803009, "learning_rate": 1.551193031790314e-06, "loss": 0.3284, "step": 37773 }, { "epoch": 3.8403822692151284, "grad_norm": 0.2904369533061981, "learning_rate": 1.550936089984638e-06, "loss": 0.3168, "step": 37774 }, { "epoch": 3.8404839365595773, "grad_norm": 0.2680259048938751, "learning_rate": 1.5506791655545266e-06, "loss": 0.3419, "step": 37775 }, { "epoch": 3.8405856039040263, "grad_norm": 0.2823939025402069, "learning_rate": 1.5504222585012723e-06, "loss": 0.3496, "step": 37776 }, { "epoch": 3.840687271248475, "grad_norm": 0.3171432912349701, "learning_rate": 1.5501653688261726e-06, "loss": 0.3014, "step": 37777 }, { "epoch": 3.840788938592924, "grad_norm": 0.2588156461715698, "learning_rate": 1.54990849653052e-06, "loss": 0.3411, "step": 37778 }, { "epoch": 3.840890605937373, "grad_norm": 0.26671430468559265, "learning_rate": 1.5496516416156066e-06, "loss": 0.2944, "step": 37779 }, { "epoch": 3.840992273281822, "grad_norm": 0.2526693344116211, "learning_rate": 1.5493948040827305e-06, "loss": 0.2681, "step": 37780 }, { "epoch": 3.841093940626271, "grad_norm": 0.28133106231689453, "learning_rate": 1.5491379839331827e-06, "loss": 0.3114, "step": 37781 }, { "epoch": 3.84119560797072, "grad_norm": 0.30629289150238037, "learning_rate": 1.548881181168258e-06, "loss": 0.3271, "step": 37782 }, { "epoch": 3.841297275315169, "grad_norm": 0.27549442648887634, "learning_rate": 1.5486243957892483e-06, "loss": 0.3504, "step": 37783 }, { "epoch": 3.841398942659618, "grad_norm": 0.3012276589870453, "learning_rate": 1.5483676277974502e-06, "loss": 0.3032, "step": 37784 }, { "epoch": 3.8415006100040667, "grad_norm": 0.3022049069404602, "learning_rate": 1.548110877194156e-06, "loss": 0.2853, "step": 37785 }, { "epoch": 3.8416022773485157, "grad_norm": 0.2597425878047943, "learning_rate": 1.547854143980657e-06, "loss": 0.3148, "step": 37786 }, { "epoch": 3.8417039446929646, "grad_norm": 0.2578328251838684, "learning_rate": 1.5475974281582501e-06, "loss": 0.3176, "step": 37787 }, { "epoch": 3.8418056120374136, "grad_norm": 0.2783639430999756, "learning_rate": 1.5473407297282273e-06, "loss": 0.3512, "step": 37788 }, { "epoch": 3.8419072793818625, "grad_norm": 0.2818838655948639, "learning_rate": 1.5470840486918793e-06, "loss": 0.3109, "step": 37789 }, { "epoch": 3.8420089467263114, "grad_norm": 0.28993165493011475, "learning_rate": 1.5468273850505034e-06, "loss": 0.3227, "step": 37790 }, { "epoch": 3.8421106140707604, "grad_norm": 0.25473207235336304, "learning_rate": 1.5465707388053901e-06, "loss": 0.3501, "step": 37791 }, { "epoch": 3.8422122814152093, "grad_norm": 0.273279070854187, "learning_rate": 1.5463141099578326e-06, "loss": 0.3177, "step": 37792 }, { "epoch": 3.8423139487596583, "grad_norm": 0.2542344629764557, "learning_rate": 1.5460574985091226e-06, "loss": 0.3174, "step": 37793 }, { "epoch": 3.842415616104107, "grad_norm": 0.28317439556121826, "learning_rate": 1.545800904460556e-06, "loss": 0.3095, "step": 37794 }, { "epoch": 3.842517283448556, "grad_norm": 0.2780306339263916, "learning_rate": 1.5455443278134236e-06, "loss": 0.3266, "step": 37795 }, { "epoch": 3.842618950793005, "grad_norm": 0.26764601469039917, "learning_rate": 1.545287768569016e-06, "loss": 0.311, "step": 37796 }, { "epoch": 3.842720618137454, "grad_norm": 0.2640143930912018, "learning_rate": 1.5450312267286294e-06, "loss": 0.3126, "step": 37797 }, { "epoch": 3.842822285481903, "grad_norm": 0.25322964787483215, "learning_rate": 1.5447747022935545e-06, "loss": 0.3186, "step": 37798 }, { "epoch": 3.842923952826352, "grad_norm": 0.29278281331062317, "learning_rate": 1.5445181952650817e-06, "loss": 0.3396, "step": 37799 }, { "epoch": 3.8430256201708013, "grad_norm": 0.2686821520328522, "learning_rate": 1.544261705644507e-06, "loss": 0.3101, "step": 37800 }, { "epoch": 3.8431272875152502, "grad_norm": 0.25999149680137634, "learning_rate": 1.5440052334331208e-06, "loss": 0.2958, "step": 37801 }, { "epoch": 3.843228954859699, "grad_norm": 0.27753597497940063, "learning_rate": 1.5437487786322142e-06, "loss": 0.2885, "step": 37802 }, { "epoch": 3.843330622204148, "grad_norm": 0.28727132081985474, "learning_rate": 1.5434923412430792e-06, "loss": 0.2965, "step": 37803 }, { "epoch": 3.843432289548597, "grad_norm": 0.27393874526023865, "learning_rate": 1.5432359212670094e-06, "loss": 0.3216, "step": 37804 }, { "epoch": 3.843533956893046, "grad_norm": 0.264251172542572, "learning_rate": 1.5429795187052959e-06, "loss": 0.3029, "step": 37805 }, { "epoch": 3.843635624237495, "grad_norm": 0.26949024200439453, "learning_rate": 1.542723133559228e-06, "loss": 0.2909, "step": 37806 }, { "epoch": 3.843737291581944, "grad_norm": 0.3051743805408478, "learning_rate": 1.5424667658301013e-06, "loss": 0.3199, "step": 37807 }, { "epoch": 3.843838958926393, "grad_norm": 0.26516982913017273, "learning_rate": 1.5422104155192058e-06, "loss": 0.3069, "step": 37808 }, { "epoch": 3.8439406262708418, "grad_norm": 0.2546493411064148, "learning_rate": 1.54195408262783e-06, "loss": 0.3034, "step": 37809 }, { "epoch": 3.8440422936152907, "grad_norm": 0.25727835297584534, "learning_rate": 1.5416977671572703e-06, "loss": 0.3176, "step": 37810 }, { "epoch": 3.8441439609597396, "grad_norm": 0.296966016292572, "learning_rate": 1.5414414691088147e-06, "loss": 0.3241, "step": 37811 }, { "epoch": 3.8442456283041886, "grad_norm": 0.28922683000564575, "learning_rate": 1.5411851884837558e-06, "loss": 0.3177, "step": 37812 }, { "epoch": 3.8443472956486375, "grad_norm": 0.28942084312438965, "learning_rate": 1.540928925283382e-06, "loss": 0.2935, "step": 37813 }, { "epoch": 3.8444489629930865, "grad_norm": 0.27442678809165955, "learning_rate": 1.5406726795089876e-06, "loss": 0.3165, "step": 37814 }, { "epoch": 3.844550630337536, "grad_norm": 0.27133631706237793, "learning_rate": 1.5404164511618625e-06, "loss": 0.2981, "step": 37815 }, { "epoch": 3.844652297681985, "grad_norm": 0.28453609347343445, "learning_rate": 1.5401602402432959e-06, "loss": 0.3037, "step": 37816 }, { "epoch": 3.8447539650264337, "grad_norm": 0.2902771234512329, "learning_rate": 1.5399040467545811e-06, "loss": 0.326, "step": 37817 }, { "epoch": 3.8448556323708827, "grad_norm": 0.27551189064979553, "learning_rate": 1.5396478706970076e-06, "loss": 0.282, "step": 37818 }, { "epoch": 3.8449572997153316, "grad_norm": 0.27207836508750916, "learning_rate": 1.5393917120718638e-06, "loss": 0.2942, "step": 37819 }, { "epoch": 3.8450589670597806, "grad_norm": 0.2717430889606476, "learning_rate": 1.539135570880444e-06, "loss": 0.344, "step": 37820 }, { "epoch": 3.8451606344042295, "grad_norm": 0.28546321392059326, "learning_rate": 1.538879447124037e-06, "loss": 0.2935, "step": 37821 }, { "epoch": 3.8452623017486784, "grad_norm": 0.25640416145324707, "learning_rate": 1.5386233408039325e-06, "loss": 0.331, "step": 37822 }, { "epoch": 3.8453639690931274, "grad_norm": 0.27391043305397034, "learning_rate": 1.5383672519214199e-06, "loss": 0.3223, "step": 37823 }, { "epoch": 3.8454656364375763, "grad_norm": 0.2959291636943817, "learning_rate": 1.5381111804777914e-06, "loss": 0.2766, "step": 37824 }, { "epoch": 3.8455673037820253, "grad_norm": 0.2785239815711975, "learning_rate": 1.537855126474337e-06, "loss": 0.2995, "step": 37825 }, { "epoch": 3.845668971126474, "grad_norm": 0.286260187625885, "learning_rate": 1.5375990899123433e-06, "loss": 0.2735, "step": 37826 }, { "epoch": 3.845770638470923, "grad_norm": 0.2728423476219177, "learning_rate": 1.5373430707931047e-06, "loss": 0.2904, "step": 37827 }, { "epoch": 3.845872305815372, "grad_norm": 0.2777961492538452, "learning_rate": 1.537087069117909e-06, "loss": 0.314, "step": 37828 }, { "epoch": 3.845973973159821, "grad_norm": 0.27703070640563965, "learning_rate": 1.5368310848880435e-06, "loss": 0.3469, "step": 37829 }, { "epoch": 3.84607564050427, "grad_norm": 0.27273228764533997, "learning_rate": 1.536575118104802e-06, "loss": 0.2995, "step": 37830 }, { "epoch": 3.846177307848719, "grad_norm": 0.2705542743206024, "learning_rate": 1.5363191687694723e-06, "loss": 0.3299, "step": 37831 }, { "epoch": 3.846278975193168, "grad_norm": 0.29056090116500854, "learning_rate": 1.5360632368833428e-06, "loss": 0.2782, "step": 37832 }, { "epoch": 3.846380642537617, "grad_norm": 0.2672838866710663, "learning_rate": 1.5358073224477027e-06, "loss": 0.305, "step": 37833 }, { "epoch": 3.8464823098820657, "grad_norm": 0.2648765444755554, "learning_rate": 1.5355514254638431e-06, "loss": 0.3114, "step": 37834 }, { "epoch": 3.8465839772265147, "grad_norm": 0.273062527179718, "learning_rate": 1.5352955459330527e-06, "loss": 0.3089, "step": 37835 }, { "epoch": 3.8466856445709636, "grad_norm": 0.2665717899799347, "learning_rate": 1.5350396838566177e-06, "loss": 0.3116, "step": 37836 }, { "epoch": 3.8467873119154126, "grad_norm": 0.2683773636817932, "learning_rate": 1.5347838392358316e-06, "loss": 0.3053, "step": 37837 }, { "epoch": 3.8468889792598615, "grad_norm": 0.2747464179992676, "learning_rate": 1.5345280120719807e-06, "loss": 0.2989, "step": 37838 }, { "epoch": 3.8469906466043104, "grad_norm": 0.25854432582855225, "learning_rate": 1.5342722023663543e-06, "loss": 0.3296, "step": 37839 }, { "epoch": 3.8470923139487594, "grad_norm": 0.2750748097896576, "learning_rate": 1.5340164101202394e-06, "loss": 0.3342, "step": 37840 }, { "epoch": 3.8471939812932088, "grad_norm": 0.25282523036003113, "learning_rate": 1.5337606353349277e-06, "loss": 0.311, "step": 37841 }, { "epoch": 3.8472956486376577, "grad_norm": 0.2588954269886017, "learning_rate": 1.5335048780117061e-06, "loss": 0.3247, "step": 37842 }, { "epoch": 3.8473973159821067, "grad_norm": 0.27854016423225403, "learning_rate": 1.5332491381518621e-06, "loss": 0.3095, "step": 37843 }, { "epoch": 3.8474989833265556, "grad_norm": 0.26823046803474426, "learning_rate": 1.532993415756688e-06, "loss": 0.2914, "step": 37844 }, { "epoch": 3.8476006506710045, "grad_norm": 0.2851196527481079, "learning_rate": 1.5327377108274654e-06, "loss": 0.3449, "step": 37845 }, { "epoch": 3.8477023180154535, "grad_norm": 0.26866886019706726, "learning_rate": 1.532482023365487e-06, "loss": 0.3225, "step": 37846 }, { "epoch": 3.8478039853599024, "grad_norm": 0.27918004989624023, "learning_rate": 1.5322263533720415e-06, "loss": 0.3059, "step": 37847 }, { "epoch": 3.8479056527043514, "grad_norm": 0.28622177243232727, "learning_rate": 1.5319707008484159e-06, "loss": 0.293, "step": 37848 }, { "epoch": 3.8480073200488003, "grad_norm": 0.2490495890378952, "learning_rate": 1.5317150657958974e-06, "loss": 0.2797, "step": 37849 }, { "epoch": 3.8481089873932492, "grad_norm": 0.27557966113090515, "learning_rate": 1.5314594482157725e-06, "loss": 0.3195, "step": 37850 }, { "epoch": 3.848210654737698, "grad_norm": 0.2910222113132477, "learning_rate": 1.5312038481093323e-06, "loss": 0.311, "step": 37851 }, { "epoch": 3.848312322082147, "grad_norm": 0.2625686526298523, "learning_rate": 1.5309482654778628e-06, "loss": 0.3243, "step": 37852 }, { "epoch": 3.848413989426596, "grad_norm": 0.2867978811264038, "learning_rate": 1.53069270032265e-06, "loss": 0.2675, "step": 37853 }, { "epoch": 3.848515656771045, "grad_norm": 0.26106491684913635, "learning_rate": 1.5304371526449862e-06, "loss": 0.3082, "step": 37854 }, { "epoch": 3.848617324115494, "grad_norm": 0.25700515508651733, "learning_rate": 1.5301816224461524e-06, "loss": 0.3192, "step": 37855 }, { "epoch": 3.8487189914599433, "grad_norm": 0.2821118235588074, "learning_rate": 1.5299261097274392e-06, "loss": 0.2659, "step": 37856 }, { "epoch": 3.8488206588043923, "grad_norm": 0.2819961607456207, "learning_rate": 1.5296706144901353e-06, "loss": 0.3162, "step": 37857 }, { "epoch": 3.848922326148841, "grad_norm": 0.26901087164878845, "learning_rate": 1.529415136735526e-06, "loss": 0.316, "step": 37858 }, { "epoch": 3.84902399349329, "grad_norm": 0.2890653908252716, "learning_rate": 1.529159676464898e-06, "loss": 0.2877, "step": 37859 }, { "epoch": 3.849125660837739, "grad_norm": 0.281512975692749, "learning_rate": 1.5289042336795378e-06, "loss": 0.3005, "step": 37860 }, { "epoch": 3.849227328182188, "grad_norm": 0.28726112842559814, "learning_rate": 1.5286488083807344e-06, "loss": 0.3178, "step": 37861 }, { "epoch": 3.849328995526637, "grad_norm": 0.2691175639629364, "learning_rate": 1.5283934005697737e-06, "loss": 0.3505, "step": 37862 }, { "epoch": 3.849430662871086, "grad_norm": 0.2702559530735016, "learning_rate": 1.5281380102479404e-06, "loss": 0.3201, "step": 37863 }, { "epoch": 3.849532330215535, "grad_norm": 0.3025105595588684, "learning_rate": 1.5278826374165257e-06, "loss": 0.2919, "step": 37864 }, { "epoch": 3.849633997559984, "grad_norm": 0.2803938388824463, "learning_rate": 1.52762728207681e-06, "loss": 0.2802, "step": 37865 }, { "epoch": 3.8497356649044328, "grad_norm": 0.282971054315567, "learning_rate": 1.5273719442300832e-06, "loss": 0.3471, "step": 37866 }, { "epoch": 3.8498373322488817, "grad_norm": 0.2692686915397644, "learning_rate": 1.5271166238776341e-06, "loss": 0.3125, "step": 37867 }, { "epoch": 3.8499389995933306, "grad_norm": 0.293318510055542, "learning_rate": 1.5268613210207433e-06, "loss": 0.3158, "step": 37868 }, { "epoch": 3.8500406669377796, "grad_norm": 0.2774103581905365, "learning_rate": 1.5266060356607015e-06, "loss": 0.3034, "step": 37869 }, { "epoch": 3.8501423342822285, "grad_norm": 0.2572212517261505, "learning_rate": 1.5263507677987916e-06, "loss": 0.3011, "step": 37870 }, { "epoch": 3.8502440016266775, "grad_norm": 0.2589198052883148, "learning_rate": 1.5260955174363023e-06, "loss": 0.3326, "step": 37871 }, { "epoch": 3.8503456689711264, "grad_norm": 0.27203088998794556, "learning_rate": 1.525840284574518e-06, "loss": 0.2993, "step": 37872 }, { "epoch": 3.8504473363155753, "grad_norm": 0.25509899854660034, "learning_rate": 1.525585069214724e-06, "loss": 0.3127, "step": 37873 }, { "epoch": 3.8505490036600243, "grad_norm": 0.2961125075817108, "learning_rate": 1.5253298713582093e-06, "loss": 0.3086, "step": 37874 }, { "epoch": 3.8506506710044732, "grad_norm": 0.25545138120651245, "learning_rate": 1.5250746910062537e-06, "loss": 0.3097, "step": 37875 }, { "epoch": 3.850752338348922, "grad_norm": 0.26171332597732544, "learning_rate": 1.5248195281601463e-06, "loss": 0.3219, "step": 37876 }, { "epoch": 3.850854005693371, "grad_norm": 0.2721666097640991, "learning_rate": 1.524564382821176e-06, "loss": 0.3023, "step": 37877 }, { "epoch": 3.85095567303782, "grad_norm": 0.2586680054664612, "learning_rate": 1.5243092549906201e-06, "loss": 0.3327, "step": 37878 }, { "epoch": 3.851057340382269, "grad_norm": 0.268159419298172, "learning_rate": 1.524054144669771e-06, "loss": 0.3036, "step": 37879 }, { "epoch": 3.851159007726718, "grad_norm": 0.2946070432662964, "learning_rate": 1.5237990518599106e-06, "loss": 0.3145, "step": 37880 }, { "epoch": 3.851260675071167, "grad_norm": 0.28976237773895264, "learning_rate": 1.5235439765623227e-06, "loss": 0.3054, "step": 37881 }, { "epoch": 3.8513623424156163, "grad_norm": 0.2629430890083313, "learning_rate": 1.5232889187782961e-06, "loss": 0.3227, "step": 37882 }, { "epoch": 3.851464009760065, "grad_norm": 0.2686876058578491, "learning_rate": 1.523033878509112e-06, "loss": 0.3235, "step": 37883 }, { "epoch": 3.851565677104514, "grad_norm": 0.2638988494873047, "learning_rate": 1.5227788557560596e-06, "loss": 0.287, "step": 37884 }, { "epoch": 3.851667344448963, "grad_norm": 0.26461905241012573, "learning_rate": 1.5225238505204187e-06, "loss": 0.2939, "step": 37885 }, { "epoch": 3.851769011793412, "grad_norm": 0.2750512659549713, "learning_rate": 1.522268862803476e-06, "loss": 0.2851, "step": 37886 }, { "epoch": 3.851870679137861, "grad_norm": 0.26286715269088745, "learning_rate": 1.5220138926065192e-06, "loss": 0.3212, "step": 37887 }, { "epoch": 3.85197234648231, "grad_norm": 0.28626900911331177, "learning_rate": 1.521758939930827e-06, "loss": 0.269, "step": 37888 }, { "epoch": 3.852074013826759, "grad_norm": 0.26808962225914, "learning_rate": 1.521504004777688e-06, "loss": 0.3225, "step": 37889 }, { "epoch": 3.852175681171208, "grad_norm": 0.2787913382053375, "learning_rate": 1.5212490871483854e-06, "loss": 0.3021, "step": 37890 }, { "epoch": 3.8522773485156567, "grad_norm": 0.26376649737358093, "learning_rate": 1.520994187044202e-06, "loss": 0.2974, "step": 37891 }, { "epoch": 3.8523790158601057, "grad_norm": 0.27406877279281616, "learning_rate": 1.5207393044664242e-06, "loss": 0.2939, "step": 37892 }, { "epoch": 3.8524806832045546, "grad_norm": 0.26974886655807495, "learning_rate": 1.5204844394163337e-06, "loss": 0.3209, "step": 37893 }, { "epoch": 3.8525823505490036, "grad_norm": 0.28325769305229187, "learning_rate": 1.5202295918952187e-06, "loss": 0.2926, "step": 37894 }, { "epoch": 3.8526840178934525, "grad_norm": 0.2719743847846985, "learning_rate": 1.519974761904357e-06, "loss": 0.3056, "step": 37895 }, { "epoch": 3.8527856852379014, "grad_norm": 0.2779698967933655, "learning_rate": 1.5197199494450354e-06, "loss": 0.303, "step": 37896 }, { "epoch": 3.852887352582351, "grad_norm": 0.2992844581604004, "learning_rate": 1.5194651545185413e-06, "loss": 0.2843, "step": 37897 }, { "epoch": 3.8529890199267998, "grad_norm": 0.28728654980659485, "learning_rate": 1.5192103771261508e-06, "loss": 0.2908, "step": 37898 }, { "epoch": 3.8530906872712487, "grad_norm": 0.27521973848342896, "learning_rate": 1.5189556172691532e-06, "loss": 0.2735, "step": 37899 }, { "epoch": 3.8531923546156976, "grad_norm": 0.2975398302078247, "learning_rate": 1.5187008749488302e-06, "loss": 0.3369, "step": 37900 }, { "epoch": 3.8532940219601466, "grad_norm": 0.29030105471611023, "learning_rate": 1.5184461501664628e-06, "loss": 0.3078, "step": 37901 }, { "epoch": 3.8533956893045955, "grad_norm": 0.2581437826156616, "learning_rate": 1.5181914429233386e-06, "loss": 0.3086, "step": 37902 }, { "epoch": 3.8534973566490445, "grad_norm": 0.2654114067554474, "learning_rate": 1.5179367532207378e-06, "loss": 0.3091, "step": 37903 }, { "epoch": 3.8535990239934934, "grad_norm": 0.28148597478866577, "learning_rate": 1.5176820810599447e-06, "loss": 0.2732, "step": 37904 }, { "epoch": 3.8537006913379424, "grad_norm": 0.2901407480239868, "learning_rate": 1.5174274264422395e-06, "loss": 0.281, "step": 37905 }, { "epoch": 3.8538023586823913, "grad_norm": 0.2783122658729553, "learning_rate": 1.5171727893689082e-06, "loss": 0.3253, "step": 37906 }, { "epoch": 3.8539040260268402, "grad_norm": 0.29646021127700806, "learning_rate": 1.516918169841235e-06, "loss": 0.3213, "step": 37907 }, { "epoch": 3.854005693371289, "grad_norm": 0.2654056251049042, "learning_rate": 1.516663567860498e-06, "loss": 0.271, "step": 37908 }, { "epoch": 3.854107360715738, "grad_norm": 0.2633744180202484, "learning_rate": 1.5164089834279832e-06, "loss": 0.3197, "step": 37909 }, { "epoch": 3.854209028060187, "grad_norm": 0.27009865641593933, "learning_rate": 1.5161544165449725e-06, "loss": 0.3029, "step": 37910 }, { "epoch": 3.854310695404636, "grad_norm": 0.28850287199020386, "learning_rate": 1.515899867212746e-06, "loss": 0.3036, "step": 37911 }, { "epoch": 3.854412362749085, "grad_norm": 0.28882449865341187, "learning_rate": 1.51564533543259e-06, "loss": 0.3311, "step": 37912 }, { "epoch": 3.854514030093534, "grad_norm": 0.27522963285446167, "learning_rate": 1.5153908212057843e-06, "loss": 0.3358, "step": 37913 }, { "epoch": 3.854615697437983, "grad_norm": 0.27927759289741516, "learning_rate": 1.515136324533612e-06, "loss": 0.3401, "step": 37914 }, { "epoch": 3.8547173647824318, "grad_norm": 0.27790191769599915, "learning_rate": 1.514881845417353e-06, "loss": 0.3219, "step": 37915 }, { "epoch": 3.8548190321268807, "grad_norm": 0.2962733507156372, "learning_rate": 1.5146273838582915e-06, "loss": 0.2689, "step": 37916 }, { "epoch": 3.8549206994713296, "grad_norm": 0.27545323967933655, "learning_rate": 1.5143729398577118e-06, "loss": 0.3129, "step": 37917 }, { "epoch": 3.8550223668157786, "grad_norm": 0.27530214190483093, "learning_rate": 1.5141185134168895e-06, "loss": 0.2972, "step": 37918 }, { "epoch": 3.8551240341602275, "grad_norm": 0.27792391180992126, "learning_rate": 1.5138641045371117e-06, "loss": 0.3283, "step": 37919 }, { "epoch": 3.8552257015046765, "grad_norm": 0.2779720723628998, "learning_rate": 1.513609713219658e-06, "loss": 0.3164, "step": 37920 }, { "epoch": 3.8553273688491254, "grad_norm": 0.25770851969718933, "learning_rate": 1.5133553394658085e-06, "loss": 0.309, "step": 37921 }, { "epoch": 3.8554290361935744, "grad_norm": 0.29412245750427246, "learning_rate": 1.5131009832768479e-06, "loss": 0.2944, "step": 37922 }, { "epoch": 3.8555307035380237, "grad_norm": 0.2716296315193176, "learning_rate": 1.5128466446540557e-06, "loss": 0.3198, "step": 37923 }, { "epoch": 3.8556323708824727, "grad_norm": 0.27923640608787537, "learning_rate": 1.5125923235987133e-06, "loss": 0.3209, "step": 37924 }, { "epoch": 3.8557340382269216, "grad_norm": 0.2748831808567047, "learning_rate": 1.5123380201121008e-06, "loss": 0.2698, "step": 37925 }, { "epoch": 3.8558357055713706, "grad_norm": 0.2776985764503479, "learning_rate": 1.512083734195502e-06, "loss": 0.2934, "step": 37926 }, { "epoch": 3.8559373729158195, "grad_norm": 0.2746511697769165, "learning_rate": 1.5118294658501963e-06, "loss": 0.3112, "step": 37927 }, { "epoch": 3.8560390402602684, "grad_norm": 0.27483415603637695, "learning_rate": 1.5115752150774638e-06, "loss": 0.2936, "step": 37928 }, { "epoch": 3.8561407076047174, "grad_norm": 0.3041940927505493, "learning_rate": 1.511320981878588e-06, "loss": 0.3258, "step": 37929 }, { "epoch": 3.8562423749491663, "grad_norm": 0.2725526988506317, "learning_rate": 1.5110667662548472e-06, "loss": 0.2934, "step": 37930 }, { "epoch": 3.8563440422936153, "grad_norm": 0.29010891914367676, "learning_rate": 1.510812568207522e-06, "loss": 0.305, "step": 37931 }, { "epoch": 3.856445709638064, "grad_norm": 0.27332353591918945, "learning_rate": 1.5105583877378954e-06, "loss": 0.3592, "step": 37932 }, { "epoch": 3.856547376982513, "grad_norm": 0.2771863043308258, "learning_rate": 1.5103042248472467e-06, "loss": 0.3015, "step": 37933 }, { "epoch": 3.856649044326962, "grad_norm": 0.2822601795196533, "learning_rate": 1.5100500795368556e-06, "loss": 0.3285, "step": 37934 }, { "epoch": 3.856750711671411, "grad_norm": 0.26508787274360657, "learning_rate": 1.5097959518080014e-06, "loss": 0.3247, "step": 37935 }, { "epoch": 3.85685237901586, "grad_norm": 0.2689445912837982, "learning_rate": 1.5095418416619672e-06, "loss": 0.3269, "step": 37936 }, { "epoch": 3.856954046360309, "grad_norm": 0.26659852266311646, "learning_rate": 1.5092877491000325e-06, "loss": 0.3393, "step": 37937 }, { "epoch": 3.8570557137047583, "grad_norm": 0.2926173806190491, "learning_rate": 1.509033674123474e-06, "loss": 0.3134, "step": 37938 }, { "epoch": 3.8571573810492072, "grad_norm": 0.27467581629753113, "learning_rate": 1.5087796167335767e-06, "loss": 0.2914, "step": 37939 }, { "epoch": 3.857259048393656, "grad_norm": 0.27596303820610046, "learning_rate": 1.5085255769316175e-06, "loss": 0.3028, "step": 37940 }, { "epoch": 3.857360715738105, "grad_norm": 0.24862489104270935, "learning_rate": 1.5082715547188753e-06, "loss": 0.2983, "step": 37941 }, { "epoch": 3.857462383082554, "grad_norm": 0.2801726162433624, "learning_rate": 1.5080175500966326e-06, "loss": 0.324, "step": 37942 }, { "epoch": 3.857564050427003, "grad_norm": 0.26361915469169617, "learning_rate": 1.507763563066168e-06, "loss": 0.3507, "step": 37943 }, { "epoch": 3.857665717771452, "grad_norm": 0.2844668924808502, "learning_rate": 1.507509593628761e-06, "loss": 0.2909, "step": 37944 }, { "epoch": 3.857767385115901, "grad_norm": 0.2897976040840149, "learning_rate": 1.5072556417856881e-06, "loss": 0.3011, "step": 37945 }, { "epoch": 3.85786905246035, "grad_norm": 0.27193155884742737, "learning_rate": 1.5070017075382336e-06, "loss": 0.2852, "step": 37946 }, { "epoch": 3.8579707198047988, "grad_norm": 0.2618831992149353, "learning_rate": 1.5067477908876743e-06, "loss": 0.3046, "step": 37947 }, { "epoch": 3.8580723871492477, "grad_norm": 0.2650982439517975, "learning_rate": 1.5064938918352885e-06, "loss": 0.3252, "step": 37948 }, { "epoch": 3.8581740544936967, "grad_norm": 0.30461370944976807, "learning_rate": 1.5062400103823572e-06, "loss": 0.3065, "step": 37949 }, { "epoch": 3.8582757218381456, "grad_norm": 0.2857023775577545, "learning_rate": 1.5059861465301583e-06, "loss": 0.3014, "step": 37950 }, { "epoch": 3.8583773891825945, "grad_norm": 0.3080933392047882, "learning_rate": 1.5057323002799695e-06, "loss": 0.3562, "step": 37951 }, { "epoch": 3.8584790565270435, "grad_norm": 0.263187438249588, "learning_rate": 1.5054784716330728e-06, "loss": 0.3184, "step": 37952 }, { "epoch": 3.8585807238714924, "grad_norm": 0.261950820684433, "learning_rate": 1.505224660590745e-06, "loss": 0.3113, "step": 37953 }, { "epoch": 3.8586823912159414, "grad_norm": 0.26871973276138306, "learning_rate": 1.5049708671542651e-06, "loss": 0.3288, "step": 37954 }, { "epoch": 3.8587840585603903, "grad_norm": 0.2583218216896057, "learning_rate": 1.5047170913249098e-06, "loss": 0.2967, "step": 37955 }, { "epoch": 3.8588857259048392, "grad_norm": 0.29810401797294617, "learning_rate": 1.5044633331039604e-06, "loss": 0.3282, "step": 37956 }, { "epoch": 3.858987393249288, "grad_norm": 0.2678245007991791, "learning_rate": 1.504209592492694e-06, "loss": 0.3257, "step": 37957 }, { "epoch": 3.859089060593737, "grad_norm": 0.25980156660079956, "learning_rate": 1.503955869492388e-06, "loss": 0.3155, "step": 37958 }, { "epoch": 3.859190727938186, "grad_norm": 0.28957289457321167, "learning_rate": 1.5037021641043226e-06, "loss": 0.3374, "step": 37959 }, { "epoch": 3.859292395282635, "grad_norm": 0.29347413778305054, "learning_rate": 1.5034484763297753e-06, "loss": 0.3107, "step": 37960 }, { "epoch": 3.859394062627084, "grad_norm": 0.25729992985725403, "learning_rate": 1.5031948061700214e-06, "loss": 0.314, "step": 37961 }, { "epoch": 3.859495729971533, "grad_norm": 0.27315595746040344, "learning_rate": 1.5029411536263433e-06, "loss": 0.2941, "step": 37962 }, { "epoch": 3.859597397315982, "grad_norm": 0.2862161099910736, "learning_rate": 1.5026875187000163e-06, "loss": 0.3382, "step": 37963 }, { "epoch": 3.8596990646604312, "grad_norm": 0.2748064696788788, "learning_rate": 1.5024339013923188e-06, "loss": 0.315, "step": 37964 }, { "epoch": 3.85980073200488, "grad_norm": 0.27718454599380493, "learning_rate": 1.502180301704526e-06, "loss": 0.306, "step": 37965 }, { "epoch": 3.859902399349329, "grad_norm": 0.2518320381641388, "learning_rate": 1.50192671963792e-06, "loss": 0.3002, "step": 37966 }, { "epoch": 3.860004066693778, "grad_norm": 0.2816828787326813, "learning_rate": 1.5016731551937752e-06, "loss": 0.304, "step": 37967 }, { "epoch": 3.860105734038227, "grad_norm": 0.2723759710788727, "learning_rate": 1.5014196083733684e-06, "loss": 0.2996, "step": 37968 }, { "epoch": 3.860207401382676, "grad_norm": 0.27786335349082947, "learning_rate": 1.5011660791779803e-06, "loss": 0.3042, "step": 37969 }, { "epoch": 3.860309068727125, "grad_norm": 0.27165931463241577, "learning_rate": 1.5009125676088855e-06, "loss": 0.3454, "step": 37970 }, { "epoch": 3.860410736071574, "grad_norm": 0.25638100504875183, "learning_rate": 1.5006590736673599e-06, "loss": 0.3448, "step": 37971 }, { "epoch": 3.8605124034160228, "grad_norm": 0.27972835302352905, "learning_rate": 1.5004055973546844e-06, "loss": 0.2895, "step": 37972 }, { "epoch": 3.8606140707604717, "grad_norm": 0.28133222460746765, "learning_rate": 1.5001521386721334e-06, "loss": 0.3397, "step": 37973 }, { "epoch": 3.8607157381049206, "grad_norm": 0.279239296913147, "learning_rate": 1.4998986976209845e-06, "loss": 0.3073, "step": 37974 }, { "epoch": 3.8608174054493696, "grad_norm": 0.26777052879333496, "learning_rate": 1.4996452742025126e-06, "loss": 0.3322, "step": 37975 }, { "epoch": 3.8609190727938185, "grad_norm": 0.2841549813747406, "learning_rate": 1.4993918684179976e-06, "loss": 0.3202, "step": 37976 }, { "epoch": 3.8610207401382675, "grad_norm": 0.2734890282154083, "learning_rate": 1.4991384802687149e-06, "loss": 0.3174, "step": 37977 }, { "epoch": 3.8611224074827164, "grad_norm": 0.2619113028049469, "learning_rate": 1.4988851097559382e-06, "loss": 0.3113, "step": 37978 }, { "epoch": 3.861224074827166, "grad_norm": 0.27182137966156006, "learning_rate": 1.4986317568809483e-06, "loss": 0.3057, "step": 37979 }, { "epoch": 3.8613257421716147, "grad_norm": 0.2697829604148865, "learning_rate": 1.4983784216450197e-06, "loss": 0.305, "step": 37980 }, { "epoch": 3.8614274095160637, "grad_norm": 0.26901087164878845, "learning_rate": 1.4981251040494266e-06, "loss": 0.309, "step": 37981 }, { "epoch": 3.8615290768605126, "grad_norm": 0.27035024762153625, "learning_rate": 1.4978718040954488e-06, "loss": 0.3069, "step": 37982 }, { "epoch": 3.8616307442049616, "grad_norm": 0.2680829167366028, "learning_rate": 1.4976185217843608e-06, "loss": 0.3067, "step": 37983 }, { "epoch": 3.8617324115494105, "grad_norm": 0.26740947365760803, "learning_rate": 1.4973652571174385e-06, "loss": 0.3445, "step": 37984 }, { "epoch": 3.8618340788938594, "grad_norm": 0.26407942175865173, "learning_rate": 1.4971120100959557e-06, "loss": 0.3331, "step": 37985 }, { "epoch": 3.8619357462383084, "grad_norm": 0.270393431186676, "learning_rate": 1.4968587807211916e-06, "loss": 0.3131, "step": 37986 }, { "epoch": 3.8620374135827573, "grad_norm": 0.2802180051803589, "learning_rate": 1.496605568994421e-06, "loss": 0.3201, "step": 37987 }, { "epoch": 3.8621390809272063, "grad_norm": 0.2881535291671753, "learning_rate": 1.4963523749169174e-06, "loss": 0.2979, "step": 37988 }, { "epoch": 3.862240748271655, "grad_norm": 0.28616467118263245, "learning_rate": 1.4960991984899592e-06, "loss": 0.3335, "step": 37989 }, { "epoch": 3.862342415616104, "grad_norm": 0.2519882619380951, "learning_rate": 1.4958460397148206e-06, "loss": 0.3248, "step": 37990 }, { "epoch": 3.862444082960553, "grad_norm": 0.25991231203079224, "learning_rate": 1.4955928985927752e-06, "loss": 0.3592, "step": 37991 }, { "epoch": 3.862545750305002, "grad_norm": 0.25566574931144714, "learning_rate": 1.495339775125102e-06, "loss": 0.3269, "step": 37992 }, { "epoch": 3.862647417649451, "grad_norm": 0.26978248357772827, "learning_rate": 1.4950866693130739e-06, "loss": 0.3083, "step": 37993 }, { "epoch": 3.8627490849939, "grad_norm": 0.2600238025188446, "learning_rate": 1.4948335811579668e-06, "loss": 0.2718, "step": 37994 }, { "epoch": 3.862850752338349, "grad_norm": 0.26231658458709717, "learning_rate": 1.4945805106610528e-06, "loss": 0.3169, "step": 37995 }, { "epoch": 3.862952419682798, "grad_norm": 0.2772164046764374, "learning_rate": 1.494327457823611e-06, "loss": 0.308, "step": 37996 }, { "epoch": 3.8630540870272467, "grad_norm": 0.2966659963130951, "learning_rate": 1.494074422646915e-06, "loss": 0.3357, "step": 37997 }, { "epoch": 3.8631557543716957, "grad_norm": 0.27475133538246155, "learning_rate": 1.493821405132237e-06, "loss": 0.3317, "step": 37998 }, { "epoch": 3.8632574217161446, "grad_norm": 0.26560550928115845, "learning_rate": 1.4935684052808548e-06, "loss": 0.2795, "step": 37999 }, { "epoch": 3.8633590890605936, "grad_norm": 0.2657138705253601, "learning_rate": 1.4933154230940417e-06, "loss": 0.2789, "step": 38000 }, { "epoch": 3.8634607564050425, "grad_norm": 0.27261701226234436, "learning_rate": 1.4930624585730706e-06, "loss": 0.3105, "step": 38001 }, { "epoch": 3.8635624237494914, "grad_norm": 0.2679607570171356, "learning_rate": 1.49280951171922e-06, "loss": 0.321, "step": 38002 }, { "epoch": 3.8636640910939404, "grad_norm": 0.27211686968803406, "learning_rate": 1.4925565825337607e-06, "loss": 0.3097, "step": 38003 }, { "epoch": 3.8637657584383893, "grad_norm": 0.25260719656944275, "learning_rate": 1.4923036710179679e-06, "loss": 0.2928, "step": 38004 }, { "epoch": 3.8638674257828387, "grad_norm": 0.2634083926677704, "learning_rate": 1.492050777173114e-06, "loss": 0.3026, "step": 38005 }, { "epoch": 3.8639690931272876, "grad_norm": 0.2907371520996094, "learning_rate": 1.4917979010004768e-06, "loss": 0.2961, "step": 38006 }, { "epoch": 3.8640707604717366, "grad_norm": 0.3008204698562622, "learning_rate": 1.4915450425013278e-06, "loss": 0.3147, "step": 38007 }, { "epoch": 3.8641724278161855, "grad_norm": 0.25478261709213257, "learning_rate": 1.4912922016769394e-06, "loss": 0.3512, "step": 38008 }, { "epoch": 3.8642740951606345, "grad_norm": 0.27437645196914673, "learning_rate": 1.4910393785285887e-06, "loss": 0.311, "step": 38009 }, { "epoch": 3.8643757625050834, "grad_norm": 0.26392799615859985, "learning_rate": 1.4907865730575477e-06, "loss": 0.2995, "step": 38010 }, { "epoch": 3.8644774298495324, "grad_norm": 0.2921423614025116, "learning_rate": 1.4905337852650887e-06, "loss": 0.3407, "step": 38011 }, { "epoch": 3.8645790971939813, "grad_norm": 0.26417964696884155, "learning_rate": 1.4902810151524882e-06, "loss": 0.3047, "step": 38012 }, { "epoch": 3.8646807645384302, "grad_norm": 0.250008225440979, "learning_rate": 1.4900282627210177e-06, "loss": 0.3412, "step": 38013 }, { "epoch": 3.864782431882879, "grad_norm": 0.26520639657974243, "learning_rate": 1.4897755279719506e-06, "loss": 0.3058, "step": 38014 }, { "epoch": 3.864884099227328, "grad_norm": 0.29175320267677307, "learning_rate": 1.4895228109065584e-06, "loss": 0.3402, "step": 38015 }, { "epoch": 3.864985766571777, "grad_norm": 0.2736976146697998, "learning_rate": 1.4892701115261182e-06, "loss": 0.2811, "step": 38016 }, { "epoch": 3.865087433916226, "grad_norm": 0.2736203670501709, "learning_rate": 1.489017429831901e-06, "loss": 0.2813, "step": 38017 }, { "epoch": 3.865189101260675, "grad_norm": 0.2996115982532501, "learning_rate": 1.4887647658251774e-06, "loss": 0.3258, "step": 38018 }, { "epoch": 3.865290768605124, "grad_norm": 0.24911734461784363, "learning_rate": 1.488512119507226e-06, "loss": 0.3255, "step": 38019 }, { "epoch": 3.8653924359495733, "grad_norm": 0.2806050777435303, "learning_rate": 1.488259490879313e-06, "loss": 0.3133, "step": 38020 }, { "epoch": 3.865494103294022, "grad_norm": 0.2812740206718445, "learning_rate": 1.4880068799427162e-06, "loss": 0.2983, "step": 38021 }, { "epoch": 3.865595770638471, "grad_norm": 0.29842668771743774, "learning_rate": 1.4877542866987039e-06, "loss": 0.3298, "step": 38022 }, { "epoch": 3.86569743798292, "grad_norm": 0.2834635376930237, "learning_rate": 1.4875017111485523e-06, "loss": 0.3142, "step": 38023 }, { "epoch": 3.865799105327369, "grad_norm": 0.27879875898361206, "learning_rate": 1.487249153293533e-06, "loss": 0.3334, "step": 38024 }, { "epoch": 3.865900772671818, "grad_norm": 0.2764715552330017, "learning_rate": 1.4869966131349156e-06, "loss": 0.3124, "step": 38025 }, { "epoch": 3.866002440016267, "grad_norm": 0.2593161165714264, "learning_rate": 1.4867440906739783e-06, "loss": 0.2913, "step": 38026 }, { "epoch": 3.866104107360716, "grad_norm": 0.2544671297073364, "learning_rate": 1.4864915859119855e-06, "loss": 0.298, "step": 38027 }, { "epoch": 3.866205774705165, "grad_norm": 0.28247472643852234, "learning_rate": 1.4862390988502134e-06, "loss": 0.3262, "step": 38028 }, { "epoch": 3.8663074420496137, "grad_norm": 0.2762664258480072, "learning_rate": 1.4859866294899371e-06, "loss": 0.291, "step": 38029 }, { "epoch": 3.8664091093940627, "grad_norm": 0.28752297163009644, "learning_rate": 1.4857341778324219e-06, "loss": 0.3282, "step": 38030 }, { "epoch": 3.8665107767385116, "grad_norm": 0.26775923371315, "learning_rate": 1.4854817438789442e-06, "loss": 0.2935, "step": 38031 }, { "epoch": 3.8666124440829606, "grad_norm": 0.2749257981777191, "learning_rate": 1.4852293276307744e-06, "loss": 0.3065, "step": 38032 }, { "epoch": 3.8667141114274095, "grad_norm": 0.2869585156440735, "learning_rate": 1.4849769290891824e-06, "loss": 0.2914, "step": 38033 }, { "epoch": 3.8668157787718584, "grad_norm": 0.27345535159111023, "learning_rate": 1.484724548255443e-06, "loss": 0.2912, "step": 38034 }, { "epoch": 3.8669174461163074, "grad_norm": 0.2628450095653534, "learning_rate": 1.4844721851308242e-06, "loss": 0.3246, "step": 38035 }, { "epoch": 3.8670191134607563, "grad_norm": 0.29242610931396484, "learning_rate": 1.484219839716602e-06, "loss": 0.3104, "step": 38036 }, { "epoch": 3.8671207808052053, "grad_norm": 0.2990384101867676, "learning_rate": 1.4839675120140423e-06, "loss": 0.3215, "step": 38037 }, { "epoch": 3.867222448149654, "grad_norm": 0.3022162616252899, "learning_rate": 1.4837152020244182e-06, "loss": 0.302, "step": 38038 }, { "epoch": 3.867324115494103, "grad_norm": 0.29736053943634033, "learning_rate": 1.4834629097490044e-06, "loss": 0.305, "step": 38039 }, { "epoch": 3.867425782838552, "grad_norm": 0.2784964144229889, "learning_rate": 1.4832106351890662e-06, "loss": 0.2992, "step": 38040 }, { "epoch": 3.867527450183001, "grad_norm": 0.26489323377609253, "learning_rate": 1.4829583783458783e-06, "loss": 0.3258, "step": 38041 }, { "epoch": 3.86762911752745, "grad_norm": 0.2649386525154114, "learning_rate": 1.4827061392207105e-06, "loss": 0.3426, "step": 38042 }, { "epoch": 3.867730784871899, "grad_norm": 0.2686520218849182, "learning_rate": 1.4824539178148317e-06, "loss": 0.3487, "step": 38043 }, { "epoch": 3.867832452216348, "grad_norm": 0.2778252959251404, "learning_rate": 1.4822017141295158e-06, "loss": 0.2813, "step": 38044 }, { "epoch": 3.867934119560797, "grad_norm": 0.2730309069156647, "learning_rate": 1.4819495281660295e-06, "loss": 0.2794, "step": 38045 }, { "epoch": 3.868035786905246, "grad_norm": 0.2951587438583374, "learning_rate": 1.4816973599256497e-06, "loss": 0.3099, "step": 38046 }, { "epoch": 3.868137454249695, "grad_norm": 0.2732974588871002, "learning_rate": 1.4814452094096388e-06, "loss": 0.328, "step": 38047 }, { "epoch": 3.868239121594144, "grad_norm": 0.3106924891471863, "learning_rate": 1.4811930766192706e-06, "loss": 0.2866, "step": 38048 }, { "epoch": 3.868340788938593, "grad_norm": 0.27459481358528137, "learning_rate": 1.480940961555819e-06, "loss": 0.299, "step": 38049 }, { "epoch": 3.868442456283042, "grad_norm": 0.30067723989486694, "learning_rate": 1.480688864220547e-06, "loss": 0.3333, "step": 38050 }, { "epoch": 3.868544123627491, "grad_norm": 0.2682367265224457, "learning_rate": 1.48043678461473e-06, "loss": 0.3188, "step": 38051 }, { "epoch": 3.86864579097194, "grad_norm": 0.2706514894962311, "learning_rate": 1.4801847227396365e-06, "loss": 0.2881, "step": 38052 }, { "epoch": 3.8687474583163888, "grad_norm": 0.25932955741882324, "learning_rate": 1.479932678596534e-06, "loss": 0.3145, "step": 38053 }, { "epoch": 3.8688491256608377, "grad_norm": 0.2931051254272461, "learning_rate": 1.4796806521866957e-06, "loss": 0.3018, "step": 38054 }, { "epoch": 3.8689507930052867, "grad_norm": 0.2656402587890625, "learning_rate": 1.4794286435113898e-06, "loss": 0.3015, "step": 38055 }, { "epoch": 3.8690524603497356, "grad_norm": 0.2796020805835724, "learning_rate": 1.4791766525718853e-06, "loss": 0.2802, "step": 38056 }, { "epoch": 3.8691541276941845, "grad_norm": 0.25259265303611755, "learning_rate": 1.4789246793694512e-06, "loss": 0.295, "step": 38057 }, { "epoch": 3.8692557950386335, "grad_norm": 0.28178033232688904, "learning_rate": 1.4786727239053577e-06, "loss": 0.3197, "step": 38058 }, { "epoch": 3.8693574623830824, "grad_norm": 0.2680281698703766, "learning_rate": 1.4784207861808774e-06, "loss": 0.3525, "step": 38059 }, { "epoch": 3.8694591297275314, "grad_norm": 0.2837429344654083, "learning_rate": 1.4781688661972731e-06, "loss": 0.3236, "step": 38060 }, { "epoch": 3.8695607970719808, "grad_norm": 0.2639794647693634, "learning_rate": 1.4779169639558183e-06, "loss": 0.3101, "step": 38061 }, { "epoch": 3.8696624644164297, "grad_norm": 0.25879713892936707, "learning_rate": 1.4776650794577812e-06, "loss": 0.3123, "step": 38062 }, { "epoch": 3.8697641317608786, "grad_norm": 0.28335273265838623, "learning_rate": 1.4774132127044293e-06, "loss": 0.34, "step": 38063 }, { "epoch": 3.8698657991053276, "grad_norm": 0.2840798497200012, "learning_rate": 1.4771613636970333e-06, "loss": 0.314, "step": 38064 }, { "epoch": 3.8699674664497765, "grad_norm": 0.2825583815574646, "learning_rate": 1.4769095324368622e-06, "loss": 0.3305, "step": 38065 }, { "epoch": 3.8700691337942255, "grad_norm": 0.26503685116767883, "learning_rate": 1.4766577189251825e-06, "loss": 0.3081, "step": 38066 }, { "epoch": 3.8701708011386744, "grad_norm": 0.30597472190856934, "learning_rate": 1.4764059231632632e-06, "loss": 0.2876, "step": 38067 }, { "epoch": 3.8702724684831233, "grad_norm": 0.2683447599411011, "learning_rate": 1.4761541451523731e-06, "loss": 0.3023, "step": 38068 }, { "epoch": 3.8703741358275723, "grad_norm": 0.27077001333236694, "learning_rate": 1.4759023848937842e-06, "loss": 0.2943, "step": 38069 }, { "epoch": 3.8704758031720212, "grad_norm": 0.2867271602153778, "learning_rate": 1.4756506423887585e-06, "loss": 0.3155, "step": 38070 }, { "epoch": 3.87057747051647, "grad_norm": 0.2603847086429596, "learning_rate": 1.4753989176385691e-06, "loss": 0.3293, "step": 38071 }, { "epoch": 3.870679137860919, "grad_norm": 0.2633047103881836, "learning_rate": 1.4751472106444826e-06, "loss": 0.3104, "step": 38072 }, { "epoch": 3.870780805205368, "grad_norm": 0.2530008852481842, "learning_rate": 1.4748955214077648e-06, "loss": 0.314, "step": 38073 }, { "epoch": 3.870882472549817, "grad_norm": 0.2603445053100586, "learning_rate": 1.4746438499296872e-06, "loss": 0.3014, "step": 38074 }, { "epoch": 3.870984139894266, "grad_norm": 0.290787011384964, "learning_rate": 1.4743921962115165e-06, "loss": 0.3125, "step": 38075 }, { "epoch": 3.871085807238715, "grad_norm": 0.27715370059013367, "learning_rate": 1.47414056025452e-06, "loss": 0.266, "step": 38076 }, { "epoch": 3.871187474583164, "grad_norm": 0.2604116201400757, "learning_rate": 1.4738889420599633e-06, "loss": 0.3194, "step": 38077 }, { "epoch": 3.8712891419276128, "grad_norm": 0.3002321124076843, "learning_rate": 1.4736373416291183e-06, "loss": 0.2633, "step": 38078 }, { "epoch": 3.8713908092720617, "grad_norm": 0.2577935755252838, "learning_rate": 1.4733857589632505e-06, "loss": 0.2908, "step": 38079 }, { "epoch": 3.8714924766165106, "grad_norm": 0.2625662684440613, "learning_rate": 1.4731341940636256e-06, "loss": 0.3087, "step": 38080 }, { "epoch": 3.8715941439609596, "grad_norm": 0.2608453631401062, "learning_rate": 1.4728826469315137e-06, "loss": 0.362, "step": 38081 }, { "epoch": 3.8716958113054085, "grad_norm": 0.2680353820323944, "learning_rate": 1.4726311175681812e-06, "loss": 0.3218, "step": 38082 }, { "epoch": 3.8717974786498575, "grad_norm": 0.28084859251976013, "learning_rate": 1.4723796059748935e-06, "loss": 0.2927, "step": 38083 }, { "epoch": 3.8718991459943064, "grad_norm": 0.2723919451236725, "learning_rate": 1.4721281121529201e-06, "loss": 0.3495, "step": 38084 }, { "epoch": 3.8720008133387553, "grad_norm": 0.2843865752220154, "learning_rate": 1.4718766361035275e-06, "loss": 0.3065, "step": 38085 }, { "epoch": 3.8721024806832043, "grad_norm": 0.2782478630542755, "learning_rate": 1.4716251778279817e-06, "loss": 0.3308, "step": 38086 }, { "epoch": 3.8722041480276537, "grad_norm": 0.27719318866729736, "learning_rate": 1.4713737373275478e-06, "loss": 0.2876, "step": 38087 }, { "epoch": 3.8723058153721026, "grad_norm": 0.24921506643295288, "learning_rate": 1.4711223146034964e-06, "loss": 0.29, "step": 38088 }, { "epoch": 3.8724074827165516, "grad_norm": 0.2632785439491272, "learning_rate": 1.4708709096570922e-06, "loss": 0.2765, "step": 38089 }, { "epoch": 3.8725091500610005, "grad_norm": 0.28779491782188416, "learning_rate": 1.4706195224896003e-06, "loss": 0.334, "step": 38090 }, { "epoch": 3.8726108174054494, "grad_norm": 0.2730407416820526, "learning_rate": 1.4703681531022902e-06, "loss": 0.314, "step": 38091 }, { "epoch": 3.8727124847498984, "grad_norm": 0.2795063257217407, "learning_rate": 1.4701168014964263e-06, "loss": 0.2808, "step": 38092 }, { "epoch": 3.8728141520943473, "grad_norm": 0.26109519600868225, "learning_rate": 1.4698654676732738e-06, "loss": 0.3002, "step": 38093 }, { "epoch": 3.8729158194387963, "grad_norm": 0.2933402359485626, "learning_rate": 1.4696141516341012e-06, "loss": 0.3088, "step": 38094 }, { "epoch": 3.873017486783245, "grad_norm": 0.2570415139198303, "learning_rate": 1.4693628533801741e-06, "loss": 0.3123, "step": 38095 }, { "epoch": 3.873119154127694, "grad_norm": 0.2536718249320984, "learning_rate": 1.4691115729127575e-06, "loss": 0.2843, "step": 38096 }, { "epoch": 3.873220821472143, "grad_norm": 0.29421117901802063, "learning_rate": 1.4688603102331166e-06, "loss": 0.2921, "step": 38097 }, { "epoch": 3.873322488816592, "grad_norm": 0.2737387716770172, "learning_rate": 1.4686090653425195e-06, "loss": 0.312, "step": 38098 }, { "epoch": 3.873424156161041, "grad_norm": 0.2844940721988678, "learning_rate": 1.4683578382422315e-06, "loss": 0.3096, "step": 38099 }, { "epoch": 3.87352582350549, "grad_norm": 0.2599053978919983, "learning_rate": 1.4681066289335149e-06, "loss": 0.2865, "step": 38100 }, { "epoch": 3.873627490849939, "grad_norm": 0.28263768553733826, "learning_rate": 1.4678554374176402e-06, "loss": 0.3016, "step": 38101 }, { "epoch": 3.8737291581943882, "grad_norm": 0.2822515070438385, "learning_rate": 1.4676042636958698e-06, "loss": 0.2974, "step": 38102 }, { "epoch": 3.873830825538837, "grad_norm": 0.2611020505428314, "learning_rate": 1.4673531077694686e-06, "loss": 0.3025, "step": 38103 }, { "epoch": 3.873932492883286, "grad_norm": 0.27388542890548706, "learning_rate": 1.4671019696397042e-06, "loss": 0.3092, "step": 38104 }, { "epoch": 3.874034160227735, "grad_norm": 0.31302782893180847, "learning_rate": 1.4668508493078404e-06, "loss": 0.3, "step": 38105 }, { "epoch": 3.874135827572184, "grad_norm": 0.2785879671573639, "learning_rate": 1.4665997467751425e-06, "loss": 0.2829, "step": 38106 }, { "epoch": 3.874237494916633, "grad_norm": 0.2747075855731964, "learning_rate": 1.466348662042874e-06, "loss": 0.3102, "step": 38107 }, { "epoch": 3.874339162261082, "grad_norm": 0.255066454410553, "learning_rate": 1.466097595112303e-06, "loss": 0.3236, "step": 38108 }, { "epoch": 3.874440829605531, "grad_norm": 0.2532062530517578, "learning_rate": 1.465846545984692e-06, "loss": 0.3204, "step": 38109 }, { "epoch": 3.8745424969499798, "grad_norm": 0.274833619594574, "learning_rate": 1.4655955146613055e-06, "loss": 0.3005, "step": 38110 }, { "epoch": 3.8746441642944287, "grad_norm": 0.27628302574157715, "learning_rate": 1.4653445011434098e-06, "loss": 0.3464, "step": 38111 }, { "epoch": 3.8747458316388776, "grad_norm": 0.2743905186653137, "learning_rate": 1.4650935054322684e-06, "loss": 0.2968, "step": 38112 }, { "epoch": 3.8748474989833266, "grad_norm": 0.2852213680744171, "learning_rate": 1.464842527529145e-06, "loss": 0.2948, "step": 38113 }, { "epoch": 3.8749491663277755, "grad_norm": 0.2842405140399933, "learning_rate": 1.4645915674353063e-06, "loss": 0.3084, "step": 38114 }, { "epoch": 3.8750508336722245, "grad_norm": 0.27031180262565613, "learning_rate": 1.4643406251520149e-06, "loss": 0.3246, "step": 38115 }, { "epoch": 3.8751525010166734, "grad_norm": 0.2837202250957489, "learning_rate": 1.4640897006805349e-06, "loss": 0.3116, "step": 38116 }, { "epoch": 3.8752541683611224, "grad_norm": 0.2929777204990387, "learning_rate": 1.4638387940221293e-06, "loss": 0.3208, "step": 38117 }, { "epoch": 3.8753558357055713, "grad_norm": 0.2608056366443634, "learning_rate": 1.4635879051780655e-06, "loss": 0.3215, "step": 38118 }, { "epoch": 3.8754575030500202, "grad_norm": 0.273678183555603, "learning_rate": 1.463337034149605e-06, "loss": 0.352, "step": 38119 }, { "epoch": 3.875559170394469, "grad_norm": 0.3080504834651947, "learning_rate": 1.463086180938011e-06, "loss": 0.31, "step": 38120 }, { "epoch": 3.875660837738918, "grad_norm": 0.2874501347541809, "learning_rate": 1.4628353455445498e-06, "loss": 0.2998, "step": 38121 }, { "epoch": 3.875762505083367, "grad_norm": 0.2725176513195038, "learning_rate": 1.4625845279704832e-06, "loss": 0.357, "step": 38122 }, { "epoch": 3.875864172427816, "grad_norm": 0.2750151455402374, "learning_rate": 1.4623337282170741e-06, "loss": 0.3003, "step": 38123 }, { "epoch": 3.875965839772265, "grad_norm": 0.27238303422927856, "learning_rate": 1.4620829462855879e-06, "loss": 0.2951, "step": 38124 }, { "epoch": 3.876067507116714, "grad_norm": 0.29289114475250244, "learning_rate": 1.4618321821772874e-06, "loss": 0.2713, "step": 38125 }, { "epoch": 3.876169174461163, "grad_norm": 0.2692321836948395, "learning_rate": 1.4615814358934355e-06, "loss": 0.3113, "step": 38126 }, { "epoch": 3.8762708418056118, "grad_norm": 0.2748158276081085, "learning_rate": 1.4613307074352938e-06, "loss": 0.3055, "step": 38127 }, { "epoch": 3.876372509150061, "grad_norm": 0.2945616841316223, "learning_rate": 1.4610799968041294e-06, "loss": 0.3091, "step": 38128 }, { "epoch": 3.87647417649451, "grad_norm": 0.2855145335197449, "learning_rate": 1.4608293040012022e-06, "loss": 0.3299, "step": 38129 }, { "epoch": 3.876575843838959, "grad_norm": 0.25329163670539856, "learning_rate": 1.4605786290277746e-06, "loss": 0.297, "step": 38130 }, { "epoch": 3.876677511183408, "grad_norm": 0.2784711420536041, "learning_rate": 1.4603279718851127e-06, "loss": 0.3654, "step": 38131 }, { "epoch": 3.876779178527857, "grad_norm": 0.30683666467666626, "learning_rate": 1.4600773325744765e-06, "loss": 0.3068, "step": 38132 }, { "epoch": 3.876880845872306, "grad_norm": 0.2863367795944214, "learning_rate": 1.4598267110971286e-06, "loss": 0.3554, "step": 38133 }, { "epoch": 3.876982513216755, "grad_norm": 0.2643685042858124, "learning_rate": 1.4595761074543334e-06, "loss": 0.3059, "step": 38134 }, { "epoch": 3.8770841805612037, "grad_norm": 0.27293118834495544, "learning_rate": 1.4593255216473535e-06, "loss": 0.2961, "step": 38135 }, { "epoch": 3.8771858479056527, "grad_norm": 0.26759546995162964, "learning_rate": 1.459074953677449e-06, "loss": 0.3162, "step": 38136 }, { "epoch": 3.8772875152501016, "grad_norm": 0.27895718812942505, "learning_rate": 1.4588244035458827e-06, "loss": 0.2963, "step": 38137 }, { "epoch": 3.8773891825945506, "grad_norm": 0.2880961298942566, "learning_rate": 1.4585738712539187e-06, "loss": 0.3137, "step": 38138 }, { "epoch": 3.8774908499389995, "grad_norm": 0.27305299043655396, "learning_rate": 1.4583233568028181e-06, "loss": 0.3309, "step": 38139 }, { "epoch": 3.8775925172834484, "grad_norm": 0.2662043571472168, "learning_rate": 1.4580728601938415e-06, "loss": 0.3006, "step": 38140 }, { "epoch": 3.8776941846278974, "grad_norm": 0.24671123921871185, "learning_rate": 1.4578223814282533e-06, "loss": 0.2965, "step": 38141 }, { "epoch": 3.8777958519723463, "grad_norm": 0.27437910437583923, "learning_rate": 1.457571920507314e-06, "loss": 0.3054, "step": 38142 }, { "epoch": 3.8778975193167957, "grad_norm": 0.2544722855091095, "learning_rate": 1.4573214774322846e-06, "loss": 0.3039, "step": 38143 }, { "epoch": 3.8779991866612447, "grad_norm": 0.2987920045852661, "learning_rate": 1.457071052204429e-06, "loss": 0.2844, "step": 38144 }, { "epoch": 3.8781008540056936, "grad_norm": 0.2887004613876343, "learning_rate": 1.4568206448250077e-06, "loss": 0.3351, "step": 38145 }, { "epoch": 3.8782025213501425, "grad_norm": 0.3042911887168884, "learning_rate": 1.4565702552952816e-06, "loss": 0.294, "step": 38146 }, { "epoch": 3.8783041886945915, "grad_norm": 0.2700446844100952, "learning_rate": 1.456319883616511e-06, "loss": 0.3094, "step": 38147 }, { "epoch": 3.8784058560390404, "grad_norm": 0.2808496356010437, "learning_rate": 1.4560695297899607e-06, "loss": 0.3072, "step": 38148 }, { "epoch": 3.8785075233834894, "grad_norm": 0.2700118124485016, "learning_rate": 1.4558191938168898e-06, "loss": 0.2979, "step": 38149 }, { "epoch": 3.8786091907279383, "grad_norm": 0.28721004724502563, "learning_rate": 1.4555688756985575e-06, "loss": 0.2825, "step": 38150 }, { "epoch": 3.8787108580723872, "grad_norm": 0.26674795150756836, "learning_rate": 1.4553185754362292e-06, "loss": 0.3355, "step": 38151 }, { "epoch": 3.878812525416836, "grad_norm": 0.2868722379207611, "learning_rate": 1.4550682930311633e-06, "loss": 0.3027, "step": 38152 }, { "epoch": 3.878914192761285, "grad_norm": 0.2770949900150299, "learning_rate": 1.454818028484619e-06, "loss": 0.3087, "step": 38153 }, { "epoch": 3.879015860105734, "grad_norm": 0.27199336886405945, "learning_rate": 1.454567781797861e-06, "loss": 0.3349, "step": 38154 }, { "epoch": 3.879117527450183, "grad_norm": 0.27732211351394653, "learning_rate": 1.4543175529721481e-06, "loss": 0.2852, "step": 38155 }, { "epoch": 3.879219194794632, "grad_norm": 0.2669978737831116, "learning_rate": 1.4540673420087404e-06, "loss": 0.2845, "step": 38156 }, { "epoch": 3.879320862139081, "grad_norm": 0.2804534137248993, "learning_rate": 1.4538171489088976e-06, "loss": 0.3268, "step": 38157 }, { "epoch": 3.87942252948353, "grad_norm": 0.2652008831501007, "learning_rate": 1.4535669736738828e-06, "loss": 0.3186, "step": 38158 }, { "epoch": 3.8795241968279788, "grad_norm": 0.26507729291915894, "learning_rate": 1.4533168163049545e-06, "loss": 0.3308, "step": 38159 }, { "epoch": 3.8796258641724277, "grad_norm": 0.27611881494522095, "learning_rate": 1.453066676803372e-06, "loss": 0.2786, "step": 38160 }, { "epoch": 3.8797275315168767, "grad_norm": 0.25542375445365906, "learning_rate": 1.4528165551703994e-06, "loss": 0.3214, "step": 38161 }, { "epoch": 3.8798291988613256, "grad_norm": 0.27073803544044495, "learning_rate": 1.4525664514072912e-06, "loss": 0.3448, "step": 38162 }, { "epoch": 3.8799308662057745, "grad_norm": 0.2636967599391937, "learning_rate": 1.4523163655153105e-06, "loss": 0.308, "step": 38163 }, { "epoch": 3.8800325335502235, "grad_norm": 0.27199921011924744, "learning_rate": 1.4520662974957183e-06, "loss": 0.2803, "step": 38164 }, { "epoch": 3.8801342008946724, "grad_norm": 0.2806437015533447, "learning_rate": 1.4518162473497727e-06, "loss": 0.2713, "step": 38165 }, { "epoch": 3.8802358682391214, "grad_norm": 0.25400713086128235, "learning_rate": 1.451566215078734e-06, "loss": 0.2894, "step": 38166 }, { "epoch": 3.8803375355835703, "grad_norm": 0.2792104482650757, "learning_rate": 1.4513162006838593e-06, "loss": 0.2981, "step": 38167 }, { "epoch": 3.8804392029280192, "grad_norm": 0.2840850353240967, "learning_rate": 1.4510662041664125e-06, "loss": 0.3051, "step": 38168 }, { "epoch": 3.8805408702724686, "grad_norm": 0.26615047454833984, "learning_rate": 1.450816225527651e-06, "loss": 0.2983, "step": 38169 }, { "epoch": 3.8806425376169176, "grad_norm": 0.2869119346141815, "learning_rate": 1.4505662647688318e-06, "loss": 0.3319, "step": 38170 }, { "epoch": 3.8807442049613665, "grad_norm": 0.2542431652545929, "learning_rate": 1.4503163218912198e-06, "loss": 0.3106, "step": 38171 }, { "epoch": 3.8808458723058155, "grad_norm": 0.25699251890182495, "learning_rate": 1.450066396896067e-06, "loss": 0.3064, "step": 38172 }, { "epoch": 3.8809475396502644, "grad_norm": 0.26324915885925293, "learning_rate": 1.4498164897846368e-06, "loss": 0.2979, "step": 38173 }, { "epoch": 3.8810492069947133, "grad_norm": 0.26151973009109497, "learning_rate": 1.4495666005581882e-06, "loss": 0.3326, "step": 38174 }, { "epoch": 3.8811508743391623, "grad_norm": 0.2673822343349457, "learning_rate": 1.44931672921798e-06, "loss": 0.303, "step": 38175 }, { "epoch": 3.8812525416836112, "grad_norm": 0.2721285820007324, "learning_rate": 1.4490668757652697e-06, "loss": 0.3099, "step": 38176 }, { "epoch": 3.88135420902806, "grad_norm": 0.2920554280281067, "learning_rate": 1.4488170402013158e-06, "loss": 0.2881, "step": 38177 }, { "epoch": 3.881455876372509, "grad_norm": 0.2891928255558014, "learning_rate": 1.448567222527379e-06, "loss": 0.3136, "step": 38178 }, { "epoch": 3.881557543716958, "grad_norm": 0.273639053106308, "learning_rate": 1.4483174227447167e-06, "loss": 0.2978, "step": 38179 }, { "epoch": 3.881659211061407, "grad_norm": 0.24061831831932068, "learning_rate": 1.4480676408545858e-06, "loss": 0.2997, "step": 38180 }, { "epoch": 3.881760878405856, "grad_norm": 0.29968833923339844, "learning_rate": 1.4478178768582491e-06, "loss": 0.3594, "step": 38181 }, { "epoch": 3.881862545750305, "grad_norm": 0.272118479013443, "learning_rate": 1.4475681307569583e-06, "loss": 0.3067, "step": 38182 }, { "epoch": 3.881964213094754, "grad_norm": 0.2924111485481262, "learning_rate": 1.4473184025519755e-06, "loss": 0.3086, "step": 38183 }, { "epoch": 3.882065880439203, "grad_norm": 0.2853407859802246, "learning_rate": 1.4470686922445609e-06, "loss": 0.3012, "step": 38184 }, { "epoch": 3.882167547783652, "grad_norm": 0.2657034397125244, "learning_rate": 1.4468189998359672e-06, "loss": 0.3235, "step": 38185 }, { "epoch": 3.882269215128101, "grad_norm": 0.301131933927536, "learning_rate": 1.4465693253274564e-06, "loss": 0.3077, "step": 38186 }, { "epoch": 3.88237088247255, "grad_norm": 0.26482370495796204, "learning_rate": 1.446319668720284e-06, "loss": 0.3021, "step": 38187 }, { "epoch": 3.882472549816999, "grad_norm": 0.2526635229587555, "learning_rate": 1.4460700300157094e-06, "loss": 0.3095, "step": 38188 }, { "epoch": 3.882574217161448, "grad_norm": 0.28359007835388184, "learning_rate": 1.4458204092149897e-06, "loss": 0.2904, "step": 38189 }, { "epoch": 3.882675884505897, "grad_norm": 0.26999127864837646, "learning_rate": 1.4455708063193807e-06, "loss": 0.3233, "step": 38190 }, { "epoch": 3.882777551850346, "grad_norm": 0.2906803488731384, "learning_rate": 1.4453212213301444e-06, "loss": 0.2901, "step": 38191 }, { "epoch": 3.8828792191947947, "grad_norm": 0.26511916518211365, "learning_rate": 1.4450716542485322e-06, "loss": 0.3245, "step": 38192 }, { "epoch": 3.8829808865392437, "grad_norm": 0.24230632185935974, "learning_rate": 1.4448221050758037e-06, "loss": 0.3038, "step": 38193 }, { "epoch": 3.8830825538836926, "grad_norm": 0.28332042694091797, "learning_rate": 1.4445725738132199e-06, "loss": 0.3344, "step": 38194 }, { "epoch": 3.8831842212281416, "grad_norm": 0.2688445448875427, "learning_rate": 1.4443230604620318e-06, "loss": 0.3327, "step": 38195 }, { "epoch": 3.8832858885725905, "grad_norm": 0.26882198452949524, "learning_rate": 1.4440735650235005e-06, "loss": 0.3332, "step": 38196 }, { "epoch": 3.8833875559170394, "grad_norm": 0.29345178604125977, "learning_rate": 1.4438240874988802e-06, "loss": 0.3069, "step": 38197 }, { "epoch": 3.8834892232614884, "grad_norm": 0.28616830706596375, "learning_rate": 1.4435746278894303e-06, "loss": 0.3057, "step": 38198 }, { "epoch": 3.8835908906059373, "grad_norm": 0.2539973855018616, "learning_rate": 1.4433251861964059e-06, "loss": 0.306, "step": 38199 }, { "epoch": 3.8836925579503863, "grad_norm": 0.28000643849372864, "learning_rate": 1.4430757624210634e-06, "loss": 0.3301, "step": 38200 }, { "epoch": 3.883794225294835, "grad_norm": 0.28164735436439514, "learning_rate": 1.442826356564662e-06, "loss": 0.3112, "step": 38201 }, { "epoch": 3.883895892639284, "grad_norm": 0.2924126088619232, "learning_rate": 1.442576968628454e-06, "loss": 0.3266, "step": 38202 }, { "epoch": 3.883997559983733, "grad_norm": 0.28437161445617676, "learning_rate": 1.4423275986136975e-06, "loss": 0.3, "step": 38203 }, { "epoch": 3.884099227328182, "grad_norm": 0.2622731029987335, "learning_rate": 1.442078246521652e-06, "loss": 0.3164, "step": 38204 }, { "epoch": 3.884200894672631, "grad_norm": 0.28803443908691406, "learning_rate": 1.4418289123535678e-06, "loss": 0.3356, "step": 38205 }, { "epoch": 3.88430256201708, "grad_norm": 0.28880780935287476, "learning_rate": 1.441579596110706e-06, "loss": 0.3212, "step": 38206 }, { "epoch": 3.884404229361529, "grad_norm": 0.2772410809993744, "learning_rate": 1.44133029779432e-06, "loss": 0.2851, "step": 38207 }, { "epoch": 3.884505896705978, "grad_norm": 0.2757747173309326, "learning_rate": 1.441081017405665e-06, "loss": 0.3179, "step": 38208 }, { "epoch": 3.8846075640504267, "grad_norm": 0.24810832738876343, "learning_rate": 1.4408317549459995e-06, "loss": 0.3114, "step": 38209 }, { "epoch": 3.884709231394876, "grad_norm": 0.2612159550189972, "learning_rate": 1.4405825104165766e-06, "loss": 0.2941, "step": 38210 }, { "epoch": 3.884810898739325, "grad_norm": 0.2983472943305969, "learning_rate": 1.4403332838186556e-06, "loss": 0.3282, "step": 38211 }, { "epoch": 3.884912566083774, "grad_norm": 0.3027472198009491, "learning_rate": 1.4400840751534872e-06, "loss": 0.3363, "step": 38212 }, { "epoch": 3.885014233428223, "grad_norm": 0.2657891809940338, "learning_rate": 1.439834884422331e-06, "loss": 0.2965, "step": 38213 }, { "epoch": 3.885115900772672, "grad_norm": 0.30246633291244507, "learning_rate": 1.43958571162644e-06, "loss": 0.3198, "step": 38214 }, { "epoch": 3.885217568117121, "grad_norm": 0.2762354016304016, "learning_rate": 1.4393365567670686e-06, "loss": 0.3341, "step": 38215 }, { "epoch": 3.8853192354615698, "grad_norm": 0.27653756737709045, "learning_rate": 1.4390874198454752e-06, "loss": 0.297, "step": 38216 }, { "epoch": 3.8854209028060187, "grad_norm": 0.2732067108154297, "learning_rate": 1.438838300862913e-06, "loss": 0.2952, "step": 38217 }, { "epoch": 3.8855225701504676, "grad_norm": 0.29063767194747925, "learning_rate": 1.4385891998206375e-06, "loss": 0.2971, "step": 38218 }, { "epoch": 3.8856242374949166, "grad_norm": 0.2740425765514374, "learning_rate": 1.4383401167199012e-06, "loss": 0.3198, "step": 38219 }, { "epoch": 3.8857259048393655, "grad_norm": 0.2791363596916199, "learning_rate": 1.4380910515619635e-06, "loss": 0.2869, "step": 38220 }, { "epoch": 3.8858275721838145, "grad_norm": 0.2609602212905884, "learning_rate": 1.4378420043480756e-06, "loss": 0.299, "step": 38221 }, { "epoch": 3.8859292395282634, "grad_norm": 0.27348002791404724, "learning_rate": 1.4375929750794926e-06, "loss": 0.298, "step": 38222 }, { "epoch": 3.8860309068727124, "grad_norm": 0.2805507481098175, "learning_rate": 1.4373439637574703e-06, "loss": 0.3242, "step": 38223 }, { "epoch": 3.8861325742171613, "grad_norm": 0.27337446808815, "learning_rate": 1.437094970383263e-06, "loss": 0.2709, "step": 38224 }, { "epoch": 3.8862342415616107, "grad_norm": 0.261719286441803, "learning_rate": 1.4368459949581232e-06, "loss": 0.2965, "step": 38225 }, { "epoch": 3.8863359089060596, "grad_norm": 0.29897409677505493, "learning_rate": 1.4365970374833078e-06, "loss": 0.3218, "step": 38226 }, { "epoch": 3.8864375762505086, "grad_norm": 0.2658843994140625, "learning_rate": 1.436348097960069e-06, "loss": 0.2714, "step": 38227 }, { "epoch": 3.8865392435949575, "grad_norm": 0.26640012860298157, "learning_rate": 1.4360991763896626e-06, "loss": 0.311, "step": 38228 }, { "epoch": 3.8866409109394064, "grad_norm": 0.3281318247318268, "learning_rate": 1.4358502727733397e-06, "loss": 0.3085, "step": 38229 }, { "epoch": 3.8867425782838554, "grad_norm": 0.3037087619304657, "learning_rate": 1.435601387112358e-06, "loss": 0.3005, "step": 38230 }, { "epoch": 3.8868442456283043, "grad_norm": 0.2823411226272583, "learning_rate": 1.4353525194079693e-06, "loss": 0.2879, "step": 38231 }, { "epoch": 3.8869459129727533, "grad_norm": 0.26747772097587585, "learning_rate": 1.4351036696614263e-06, "loss": 0.3057, "step": 38232 }, { "epoch": 3.887047580317202, "grad_norm": 0.2715001404285431, "learning_rate": 1.434854837873985e-06, "loss": 0.3357, "step": 38233 }, { "epoch": 3.887149247661651, "grad_norm": 0.2954469919204712, "learning_rate": 1.434606024046898e-06, "loss": 0.3129, "step": 38234 }, { "epoch": 3.8872509150061, "grad_norm": 0.2630150020122528, "learning_rate": 1.4343572281814167e-06, "loss": 0.3012, "step": 38235 }, { "epoch": 3.887352582350549, "grad_norm": 0.263219952583313, "learning_rate": 1.4341084502787983e-06, "loss": 0.3175, "step": 38236 }, { "epoch": 3.887454249694998, "grad_norm": 0.2772327661514282, "learning_rate": 1.433859690340294e-06, "loss": 0.3207, "step": 38237 }, { "epoch": 3.887555917039447, "grad_norm": 0.26616695523262024, "learning_rate": 1.4336109483671573e-06, "loss": 0.3057, "step": 38238 }, { "epoch": 3.887657584383896, "grad_norm": 0.2694811224937439, "learning_rate": 1.4333622243606394e-06, "loss": 0.3051, "step": 38239 }, { "epoch": 3.887759251728345, "grad_norm": 0.26929351687431335, "learning_rate": 1.4331135183219964e-06, "loss": 0.3238, "step": 38240 }, { "epoch": 3.8878609190727937, "grad_norm": 0.2650563716888428, "learning_rate": 1.43286483025248e-06, "loss": 0.3126, "step": 38241 }, { "epoch": 3.8879625864172427, "grad_norm": 0.26702752709388733, "learning_rate": 1.4326161601533418e-06, "loss": 0.3119, "step": 38242 }, { "epoch": 3.8880642537616916, "grad_norm": 0.26570215821266174, "learning_rate": 1.4323675080258365e-06, "loss": 0.3098, "step": 38243 }, { "epoch": 3.8881659211061406, "grad_norm": 0.2586439847946167, "learning_rate": 1.4321188738712166e-06, "loss": 0.2875, "step": 38244 }, { "epoch": 3.8882675884505895, "grad_norm": 0.28745409846305847, "learning_rate": 1.4318702576907317e-06, "loss": 0.3357, "step": 38245 }, { "epoch": 3.8883692557950384, "grad_norm": 0.26212894916534424, "learning_rate": 1.4316216594856386e-06, "loss": 0.3284, "step": 38246 }, { "epoch": 3.8884709231394874, "grad_norm": 0.25703883171081543, "learning_rate": 1.4313730792571872e-06, "loss": 0.3228, "step": 38247 }, { "epoch": 3.8885725904839363, "grad_norm": 0.2642742395401001, "learning_rate": 1.4311245170066306e-06, "loss": 0.2895, "step": 38248 }, { "epoch": 3.8886742578283853, "grad_norm": 0.26878607273101807, "learning_rate": 1.4308759727352189e-06, "loss": 0.3096, "step": 38249 }, { "epoch": 3.888775925172834, "grad_norm": 0.2788377106189728, "learning_rate": 1.430627446444207e-06, "loss": 0.2977, "step": 38250 }, { "epoch": 3.8888775925172836, "grad_norm": 0.2805115282535553, "learning_rate": 1.4303789381348465e-06, "loss": 0.3241, "step": 38251 }, { "epoch": 3.8889792598617325, "grad_norm": 0.2593781352043152, "learning_rate": 1.4301304478083865e-06, "loss": 0.3203, "step": 38252 }, { "epoch": 3.8890809272061815, "grad_norm": 0.2841506004333496, "learning_rate": 1.429881975466083e-06, "loss": 0.3144, "step": 38253 }, { "epoch": 3.8891825945506304, "grad_norm": 0.29616039991378784, "learning_rate": 1.4296335211091855e-06, "loss": 0.313, "step": 38254 }, { "epoch": 3.8892842618950794, "grad_norm": 0.29689693450927734, "learning_rate": 1.4293850847389445e-06, "loss": 0.2822, "step": 38255 }, { "epoch": 3.8893859292395283, "grad_norm": 0.2823258936405182, "learning_rate": 1.4291366663566142e-06, "loss": 0.3113, "step": 38256 }, { "epoch": 3.8894875965839772, "grad_norm": 0.2794605791568756, "learning_rate": 1.428888265963445e-06, "loss": 0.3457, "step": 38257 }, { "epoch": 3.889589263928426, "grad_norm": 0.2681160271167755, "learning_rate": 1.4286398835606879e-06, "loss": 0.3282, "step": 38258 }, { "epoch": 3.889690931272875, "grad_norm": 0.2597728967666626, "learning_rate": 1.4283915191495934e-06, "loss": 0.3143, "step": 38259 }, { "epoch": 3.889792598617324, "grad_norm": 0.25816503167152405, "learning_rate": 1.428143172731415e-06, "loss": 0.3277, "step": 38260 }, { "epoch": 3.889894265961773, "grad_norm": 0.27757471799850464, "learning_rate": 1.427894844307402e-06, "loss": 0.3448, "step": 38261 }, { "epoch": 3.889995933306222, "grad_norm": 0.25433027744293213, "learning_rate": 1.4276465338788048e-06, "loss": 0.2813, "step": 38262 }, { "epoch": 3.890097600650671, "grad_norm": 0.28220948576927185, "learning_rate": 1.4273982414468768e-06, "loss": 0.3116, "step": 38263 }, { "epoch": 3.89019926799512, "grad_norm": 0.273008793592453, "learning_rate": 1.4271499670128674e-06, "loss": 0.2968, "step": 38264 }, { "epoch": 3.8903009353395688, "grad_norm": 0.26774823665618896, "learning_rate": 1.4269017105780258e-06, "loss": 0.2852, "step": 38265 }, { "epoch": 3.890402602684018, "grad_norm": 0.2824316620826721, "learning_rate": 1.426653472143606e-06, "loss": 0.3149, "step": 38266 }, { "epoch": 3.890504270028467, "grad_norm": 0.2561472952365875, "learning_rate": 1.4264052517108573e-06, "loss": 0.3259, "step": 38267 }, { "epoch": 3.890605937372916, "grad_norm": 0.29238295555114746, "learning_rate": 1.426157049281029e-06, "loss": 0.3083, "step": 38268 }, { "epoch": 3.890707604717365, "grad_norm": 0.29622408747673035, "learning_rate": 1.4259088648553709e-06, "loss": 0.3145, "step": 38269 }, { "epoch": 3.890809272061814, "grad_norm": 0.2694062292575836, "learning_rate": 1.4256606984351362e-06, "loss": 0.2912, "step": 38270 }, { "epoch": 3.890910939406263, "grad_norm": 0.25718581676483154, "learning_rate": 1.4254125500215737e-06, "loss": 0.3123, "step": 38271 }, { "epoch": 3.891012606750712, "grad_norm": 0.28553107380867004, "learning_rate": 1.4251644196159314e-06, "loss": 0.3112, "step": 38272 }, { "epoch": 3.8911142740951608, "grad_norm": 0.27645620703697205, "learning_rate": 1.4249163072194634e-06, "loss": 0.2958, "step": 38273 }, { "epoch": 3.8912159414396097, "grad_norm": 0.2839471399784088, "learning_rate": 1.424668212833417e-06, "loss": 0.3152, "step": 38274 }, { "epoch": 3.8913176087840586, "grad_norm": 0.272744357585907, "learning_rate": 1.4244201364590409e-06, "loss": 0.351, "step": 38275 }, { "epoch": 3.8914192761285076, "grad_norm": 0.2742539644241333, "learning_rate": 1.4241720780975881e-06, "loss": 0.3307, "step": 38276 }, { "epoch": 3.8915209434729565, "grad_norm": 0.2635567784309387, "learning_rate": 1.4239240377503065e-06, "loss": 0.3207, "step": 38277 }, { "epoch": 3.8916226108174055, "grad_norm": 0.2649701237678528, "learning_rate": 1.4236760154184465e-06, "loss": 0.2708, "step": 38278 }, { "epoch": 3.8917242781618544, "grad_norm": 0.2661469578742981, "learning_rate": 1.4234280111032544e-06, "loss": 0.3231, "step": 38279 }, { "epoch": 3.8918259455063033, "grad_norm": 0.2869683504104614, "learning_rate": 1.423180024805984e-06, "loss": 0.288, "step": 38280 }, { "epoch": 3.8919276128507523, "grad_norm": 0.27560025453567505, "learning_rate": 1.4229320565278826e-06, "loss": 0.3109, "step": 38281 }, { "epoch": 3.8920292801952012, "grad_norm": 0.28019818663597107, "learning_rate": 1.422684106270198e-06, "loss": 0.2966, "step": 38282 }, { "epoch": 3.89213094753965, "grad_norm": 0.31352537870407104, "learning_rate": 1.4224361740341825e-06, "loss": 0.2979, "step": 38283 }, { "epoch": 3.892232614884099, "grad_norm": 0.26962369680404663, "learning_rate": 1.4221882598210828e-06, "loss": 0.311, "step": 38284 }, { "epoch": 3.892334282228548, "grad_norm": 0.26542341709136963, "learning_rate": 1.4219403636321478e-06, "loss": 0.3093, "step": 38285 }, { "epoch": 3.892435949572997, "grad_norm": 0.27562472224235535, "learning_rate": 1.4216924854686276e-06, "loss": 0.3706, "step": 38286 }, { "epoch": 3.892537616917446, "grad_norm": 0.25873568654060364, "learning_rate": 1.4214446253317704e-06, "loss": 0.3032, "step": 38287 }, { "epoch": 3.892639284261895, "grad_norm": 0.2738457918167114, "learning_rate": 1.4211967832228253e-06, "loss": 0.3512, "step": 38288 }, { "epoch": 3.892740951606344, "grad_norm": 0.24754665791988373, "learning_rate": 1.4209489591430387e-06, "loss": 0.3166, "step": 38289 }, { "epoch": 3.8928426189507928, "grad_norm": 0.2704492211341858, "learning_rate": 1.420701153093662e-06, "loss": 0.3274, "step": 38290 }, { "epoch": 3.8929442862952417, "grad_norm": 0.259064257144928, "learning_rate": 1.4204533650759428e-06, "loss": 0.3146, "step": 38291 }, { "epoch": 3.893045953639691, "grad_norm": 0.2876635193824768, "learning_rate": 1.420205595091127e-06, "loss": 0.3168, "step": 38292 }, { "epoch": 3.89314762098414, "grad_norm": 0.2537427544593811, "learning_rate": 1.4199578431404658e-06, "loss": 0.3365, "step": 38293 }, { "epoch": 3.893249288328589, "grad_norm": 0.279837965965271, "learning_rate": 1.4197101092252069e-06, "loss": 0.2975, "step": 38294 }, { "epoch": 3.893350955673038, "grad_norm": 0.28429940342903137, "learning_rate": 1.4194623933465961e-06, "loss": 0.2896, "step": 38295 }, { "epoch": 3.893452623017487, "grad_norm": 0.29012688994407654, "learning_rate": 1.4192146955058844e-06, "loss": 0.3431, "step": 38296 }, { "epoch": 3.893554290361936, "grad_norm": 0.25730207562446594, "learning_rate": 1.4189670157043178e-06, "loss": 0.325, "step": 38297 }, { "epoch": 3.8936559577063847, "grad_norm": 0.2619606852531433, "learning_rate": 1.4187193539431448e-06, "loss": 0.3327, "step": 38298 }, { "epoch": 3.8937576250508337, "grad_norm": 0.28136298060417175, "learning_rate": 1.4184717102236107e-06, "loss": 0.3102, "step": 38299 }, { "epoch": 3.8938592923952826, "grad_norm": 0.2706436514854431, "learning_rate": 1.418224084546967e-06, "loss": 0.2779, "step": 38300 }, { "epoch": 3.8939609597397316, "grad_norm": 0.28350791335105896, "learning_rate": 1.417976476914459e-06, "loss": 0.2972, "step": 38301 }, { "epoch": 3.8940626270841805, "grad_norm": 0.289720356464386, "learning_rate": 1.4177288873273326e-06, "loss": 0.3279, "step": 38302 }, { "epoch": 3.8941642944286294, "grad_norm": 0.259686678647995, "learning_rate": 1.4174813157868388e-06, "loss": 0.2967, "step": 38303 }, { "epoch": 3.8942659617730784, "grad_norm": 0.2775039076805115, "learning_rate": 1.4172337622942222e-06, "loss": 0.3212, "step": 38304 }, { "epoch": 3.8943676291175273, "grad_norm": 0.2710917890071869, "learning_rate": 1.4169862268507296e-06, "loss": 0.3, "step": 38305 }, { "epoch": 3.8944692964619763, "grad_norm": 0.28139543533325195, "learning_rate": 1.4167387094576096e-06, "loss": 0.3041, "step": 38306 }, { "epoch": 3.8945709638064256, "grad_norm": 0.2944391071796417, "learning_rate": 1.4164912101161094e-06, "loss": 0.3041, "step": 38307 }, { "epoch": 3.8946726311508746, "grad_norm": 0.2671646773815155, "learning_rate": 1.4162437288274744e-06, "loss": 0.3421, "step": 38308 }, { "epoch": 3.8947742984953235, "grad_norm": 0.26620280742645264, "learning_rate": 1.4159962655929504e-06, "loss": 0.2738, "step": 38309 }, { "epoch": 3.8948759658397725, "grad_norm": 0.26914161443710327, "learning_rate": 1.415748820413787e-06, "loss": 0.3114, "step": 38310 }, { "epoch": 3.8949776331842214, "grad_norm": 0.28046342730522156, "learning_rate": 1.4155013932912293e-06, "loss": 0.3163, "step": 38311 }, { "epoch": 3.8950793005286704, "grad_norm": 0.2888168394565582, "learning_rate": 1.4152539842265222e-06, "loss": 0.33, "step": 38312 }, { "epoch": 3.8951809678731193, "grad_norm": 0.2587406039237976, "learning_rate": 1.4150065932209173e-06, "loss": 0.3553, "step": 38313 }, { "epoch": 3.8952826352175682, "grad_norm": 0.28450292348861694, "learning_rate": 1.4147592202756532e-06, "loss": 0.3185, "step": 38314 }, { "epoch": 3.895384302562017, "grad_norm": 0.2695695459842682, "learning_rate": 1.414511865391981e-06, "loss": 0.3277, "step": 38315 }, { "epoch": 3.895485969906466, "grad_norm": 0.28965628147125244, "learning_rate": 1.4142645285711471e-06, "loss": 0.2827, "step": 38316 }, { "epoch": 3.895587637250915, "grad_norm": 0.2992899417877197, "learning_rate": 1.4140172098143968e-06, "loss": 0.3299, "step": 38317 }, { "epoch": 3.895689304595364, "grad_norm": 0.2437274158000946, "learning_rate": 1.4137699091229751e-06, "loss": 0.3225, "step": 38318 }, { "epoch": 3.895790971939813, "grad_norm": 0.274136483669281, "learning_rate": 1.4135226264981272e-06, "loss": 0.3262, "step": 38319 }, { "epoch": 3.895892639284262, "grad_norm": 0.2868673801422119, "learning_rate": 1.4132753619411016e-06, "loss": 0.3041, "step": 38320 }, { "epoch": 3.895994306628711, "grad_norm": 0.24945515394210815, "learning_rate": 1.4130281154531422e-06, "loss": 0.3118, "step": 38321 }, { "epoch": 3.8960959739731598, "grad_norm": 0.3009586036205292, "learning_rate": 1.412780887035493e-06, "loss": 0.299, "step": 38322 }, { "epoch": 3.8961976413176087, "grad_norm": 0.28205373883247375, "learning_rate": 1.412533676689405e-06, "loss": 0.3162, "step": 38323 }, { "epoch": 3.8962993086620576, "grad_norm": 0.272519588470459, "learning_rate": 1.412286484416116e-06, "loss": 0.3094, "step": 38324 }, { "epoch": 3.8964009760065066, "grad_norm": 0.28290414810180664, "learning_rate": 1.4120393102168757e-06, "loss": 0.2869, "step": 38325 }, { "epoch": 3.8965026433509555, "grad_norm": 0.2542480528354645, "learning_rate": 1.4117921540929298e-06, "loss": 0.3163, "step": 38326 }, { "epoch": 3.8966043106954045, "grad_norm": 0.27156364917755127, "learning_rate": 1.4115450160455225e-06, "loss": 0.3145, "step": 38327 }, { "epoch": 3.8967059780398534, "grad_norm": 0.27167168259620667, "learning_rate": 1.4112978960758987e-06, "loss": 0.3356, "step": 38328 }, { "epoch": 3.8968076453843024, "grad_norm": 0.27093935012817383, "learning_rate": 1.411050794185302e-06, "loss": 0.3285, "step": 38329 }, { "epoch": 3.8969093127287513, "grad_norm": 0.26542848348617554, "learning_rate": 1.41080371037498e-06, "loss": 0.2961, "step": 38330 }, { "epoch": 3.8970109800732002, "grad_norm": 0.3002527356147766, "learning_rate": 1.4105566446461755e-06, "loss": 0.3033, "step": 38331 }, { "epoch": 3.897112647417649, "grad_norm": 0.2952987849712372, "learning_rate": 1.4103095970001324e-06, "loss": 0.3071, "step": 38332 }, { "epoch": 3.8972143147620986, "grad_norm": 0.27042609453201294, "learning_rate": 1.4100625674380997e-06, "loss": 0.2638, "step": 38333 }, { "epoch": 3.8973159821065475, "grad_norm": 0.34892737865448, "learning_rate": 1.4098155559613157e-06, "loss": 0.2816, "step": 38334 }, { "epoch": 3.8974176494509964, "grad_norm": 0.30698737502098083, "learning_rate": 1.4095685625710276e-06, "loss": 0.2778, "step": 38335 }, { "epoch": 3.8975193167954454, "grad_norm": 0.28631287813186646, "learning_rate": 1.4093215872684828e-06, "loss": 0.2875, "step": 38336 }, { "epoch": 3.8976209841398943, "grad_norm": 0.2685509920120239, "learning_rate": 1.4090746300549196e-06, "loss": 0.3046, "step": 38337 }, { "epoch": 3.8977226514843433, "grad_norm": 0.27731001377105713, "learning_rate": 1.4088276909315868e-06, "loss": 0.3143, "step": 38338 }, { "epoch": 3.897824318828792, "grad_norm": 0.2712652087211609, "learning_rate": 1.4085807698997251e-06, "loss": 0.3508, "step": 38339 }, { "epoch": 3.897925986173241, "grad_norm": 0.28621795773506165, "learning_rate": 1.4083338669605813e-06, "loss": 0.348, "step": 38340 }, { "epoch": 3.89802765351769, "grad_norm": 0.2774820923805237, "learning_rate": 1.408086982115398e-06, "loss": 0.3, "step": 38341 }, { "epoch": 3.898129320862139, "grad_norm": 0.2593544125556946, "learning_rate": 1.4078401153654175e-06, "loss": 0.3244, "step": 38342 }, { "epoch": 3.898230988206588, "grad_norm": 0.2727266848087311, "learning_rate": 1.407593266711888e-06, "loss": 0.3256, "step": 38343 }, { "epoch": 3.898332655551037, "grad_norm": 0.28069770336151123, "learning_rate": 1.407346436156047e-06, "loss": 0.3584, "step": 38344 }, { "epoch": 3.898434322895486, "grad_norm": 0.2715223729610443, "learning_rate": 1.4070996236991408e-06, "loss": 0.2991, "step": 38345 }, { "epoch": 3.898535990239935, "grad_norm": 0.265414834022522, "learning_rate": 1.4068528293424155e-06, "loss": 0.3132, "step": 38346 }, { "epoch": 3.8986376575843837, "grad_norm": 0.29232558608055115, "learning_rate": 1.4066060530871096e-06, "loss": 0.3079, "step": 38347 }, { "epoch": 3.898739324928833, "grad_norm": 0.2791205942630768, "learning_rate": 1.406359294934469e-06, "loss": 0.3095, "step": 38348 }, { "epoch": 3.898840992273282, "grad_norm": 0.2895229756832123, "learning_rate": 1.4061125548857351e-06, "loss": 0.2773, "step": 38349 }, { "epoch": 3.898942659617731, "grad_norm": 0.24712100625038147, "learning_rate": 1.4058658329421542e-06, "loss": 0.3028, "step": 38350 }, { "epoch": 3.89904432696218, "grad_norm": 0.2601868510246277, "learning_rate": 1.4056191291049664e-06, "loss": 0.3128, "step": 38351 }, { "epoch": 3.899145994306629, "grad_norm": 0.2706077992916107, "learning_rate": 1.4053724433754145e-06, "loss": 0.3042, "step": 38352 }, { "epoch": 3.899247661651078, "grad_norm": 0.2755470871925354, "learning_rate": 1.4051257757547448e-06, "loss": 0.2996, "step": 38353 }, { "epoch": 3.8993493289955268, "grad_norm": 0.2663097679615021, "learning_rate": 1.4048791262441936e-06, "loss": 0.3082, "step": 38354 }, { "epoch": 3.8994509963399757, "grad_norm": 0.2600419521331787, "learning_rate": 1.4046324948450075e-06, "loss": 0.3099, "step": 38355 }, { "epoch": 3.8995526636844247, "grad_norm": 0.28833454847335815, "learning_rate": 1.4043858815584316e-06, "loss": 0.3183, "step": 38356 }, { "epoch": 3.8996543310288736, "grad_norm": 0.2769124507904053, "learning_rate": 1.4041392863857018e-06, "loss": 0.3299, "step": 38357 }, { "epoch": 3.8997559983733225, "grad_norm": 0.27108582854270935, "learning_rate": 1.4038927093280657e-06, "loss": 0.3084, "step": 38358 }, { "epoch": 3.8998576657177715, "grad_norm": 0.2983691096305847, "learning_rate": 1.4036461503867632e-06, "loss": 0.302, "step": 38359 }, { "epoch": 3.8999593330622204, "grad_norm": 0.27601751685142517, "learning_rate": 1.4033996095630352e-06, "loss": 0.2944, "step": 38360 }, { "epoch": 3.9000610004066694, "grad_norm": 0.27703621983528137, "learning_rate": 1.4031530868581272e-06, "loss": 0.3152, "step": 38361 }, { "epoch": 3.9001626677511183, "grad_norm": 0.2698264718055725, "learning_rate": 1.4029065822732773e-06, "loss": 0.3265, "step": 38362 }, { "epoch": 3.9002643350955672, "grad_norm": 0.27399173378944397, "learning_rate": 1.4026600958097325e-06, "loss": 0.336, "step": 38363 }, { "epoch": 3.900366002440016, "grad_norm": 0.29807281494140625, "learning_rate": 1.4024136274687282e-06, "loss": 0.28, "step": 38364 }, { "epoch": 3.900467669784465, "grad_norm": 0.2541305720806122, "learning_rate": 1.4021671772515094e-06, "loss": 0.3493, "step": 38365 }, { "epoch": 3.900569337128914, "grad_norm": 0.268422394990921, "learning_rate": 1.4019207451593204e-06, "loss": 0.2788, "step": 38366 }, { "epoch": 3.900671004473363, "grad_norm": 0.2523629069328308, "learning_rate": 1.401674331193396e-06, "loss": 0.3274, "step": 38367 }, { "epoch": 3.900772671817812, "grad_norm": 0.3054085969924927, "learning_rate": 1.401427935354983e-06, "loss": 0.3057, "step": 38368 }, { "epoch": 3.900874339162261, "grad_norm": 0.26827824115753174, "learning_rate": 1.4011815576453213e-06, "loss": 0.2884, "step": 38369 }, { "epoch": 3.90097600650671, "grad_norm": 0.26581764221191406, "learning_rate": 1.40093519806565e-06, "loss": 0.2901, "step": 38370 }, { "epoch": 3.901077673851159, "grad_norm": 0.2549228370189667, "learning_rate": 1.4006888566172132e-06, "loss": 0.3318, "step": 38371 }, { "epoch": 3.9011793411956077, "grad_norm": 0.2674659788608551, "learning_rate": 1.4004425333012506e-06, "loss": 0.2989, "step": 38372 }, { "epoch": 3.9012810085400567, "grad_norm": 0.2864854037761688, "learning_rate": 1.4001962281190035e-06, "loss": 0.2767, "step": 38373 }, { "epoch": 3.901382675884506, "grad_norm": 0.2785755693912506, "learning_rate": 1.3999499410717104e-06, "loss": 0.307, "step": 38374 }, { "epoch": 3.901484343228955, "grad_norm": 0.28101691603660583, "learning_rate": 1.3997036721606145e-06, "loss": 0.2937, "step": 38375 }, { "epoch": 3.901586010573404, "grad_norm": 0.27057862281799316, "learning_rate": 1.3994574213869582e-06, "loss": 0.3213, "step": 38376 }, { "epoch": 3.901687677917853, "grad_norm": 0.26524996757507324, "learning_rate": 1.3992111887519777e-06, "loss": 0.3117, "step": 38377 }, { "epoch": 3.901789345262302, "grad_norm": 0.2852540910243988, "learning_rate": 1.3989649742569166e-06, "loss": 0.3284, "step": 38378 }, { "epoch": 3.9018910126067508, "grad_norm": 0.27075082063674927, "learning_rate": 1.3987187779030143e-06, "loss": 0.3254, "step": 38379 }, { "epoch": 3.9019926799511997, "grad_norm": 0.26356518268585205, "learning_rate": 1.3984725996915093e-06, "loss": 0.3191, "step": 38380 }, { "epoch": 3.9020943472956486, "grad_norm": 0.2774484157562256, "learning_rate": 1.398226439623645e-06, "loss": 0.2914, "step": 38381 }, { "epoch": 3.9021960146400976, "grad_norm": 0.28574487566947937, "learning_rate": 1.3979802977006601e-06, "loss": 0.3054, "step": 38382 }, { "epoch": 3.9022976819845465, "grad_norm": 0.27599287033081055, "learning_rate": 1.3977341739237942e-06, "loss": 0.3293, "step": 38383 }, { "epoch": 3.9023993493289955, "grad_norm": 0.27380967140197754, "learning_rate": 1.3974880682942864e-06, "loss": 0.2849, "step": 38384 }, { "epoch": 3.9025010166734444, "grad_norm": 0.27667543292045593, "learning_rate": 1.3972419808133775e-06, "loss": 0.3193, "step": 38385 }, { "epoch": 3.9026026840178933, "grad_norm": 0.29526016116142273, "learning_rate": 1.3969959114823105e-06, "loss": 0.2891, "step": 38386 }, { "epoch": 3.9027043513623423, "grad_norm": 0.28949806094169617, "learning_rate": 1.396749860302319e-06, "loss": 0.3099, "step": 38387 }, { "epoch": 3.9028060187067912, "grad_norm": 0.27725282311439514, "learning_rate": 1.3965038272746467e-06, "loss": 0.3047, "step": 38388 }, { "epoch": 3.9029076860512406, "grad_norm": 0.24746961891651154, "learning_rate": 1.3962578124005315e-06, "loss": 0.3007, "step": 38389 }, { "epoch": 3.9030093533956896, "grad_norm": 0.28139519691467285, "learning_rate": 1.3960118156812119e-06, "loss": 0.2976, "step": 38390 }, { "epoch": 3.9031110207401385, "grad_norm": 0.26340481638908386, "learning_rate": 1.39576583711793e-06, "loss": 0.2748, "step": 38391 }, { "epoch": 3.9032126880845874, "grad_norm": 0.2773897647857666, "learning_rate": 1.3955198767119233e-06, "loss": 0.3106, "step": 38392 }, { "epoch": 3.9033143554290364, "grad_norm": 0.2602495551109314, "learning_rate": 1.3952739344644306e-06, "loss": 0.3101, "step": 38393 }, { "epoch": 3.9034160227734853, "grad_norm": 0.29134687781333923, "learning_rate": 1.3950280103766895e-06, "loss": 0.3154, "step": 38394 }, { "epoch": 3.9035176901179343, "grad_norm": 0.274370402097702, "learning_rate": 1.3947821044499427e-06, "loss": 0.3121, "step": 38395 }, { "epoch": 3.903619357462383, "grad_norm": 0.29108503460884094, "learning_rate": 1.3945362166854265e-06, "loss": 0.2895, "step": 38396 }, { "epoch": 3.903721024806832, "grad_norm": 0.2825097143650055, "learning_rate": 1.3942903470843788e-06, "loss": 0.2992, "step": 38397 }, { "epoch": 3.903822692151281, "grad_norm": 0.2538210451602936, "learning_rate": 1.3940444956480408e-06, "loss": 0.3188, "step": 38398 }, { "epoch": 3.90392435949573, "grad_norm": 0.2911633253097534, "learning_rate": 1.3937986623776496e-06, "loss": 0.336, "step": 38399 }, { "epoch": 3.904026026840179, "grad_norm": 0.2698703706264496, "learning_rate": 1.3935528472744442e-06, "loss": 0.3194, "step": 38400 }, { "epoch": 3.904127694184628, "grad_norm": 0.28595706820487976, "learning_rate": 1.3933070503396607e-06, "loss": 0.3092, "step": 38401 }, { "epoch": 3.904229361529077, "grad_norm": 0.26130735874176025, "learning_rate": 1.3930612715745406e-06, "loss": 0.3129, "step": 38402 }, { "epoch": 3.904331028873526, "grad_norm": 0.28420427441596985, "learning_rate": 1.392815510980321e-06, "loss": 0.3381, "step": 38403 }, { "epoch": 3.9044326962179747, "grad_norm": 0.2729916274547577, "learning_rate": 1.392569768558238e-06, "loss": 0.3171, "step": 38404 }, { "epoch": 3.9045343635624237, "grad_norm": 0.25621163845062256, "learning_rate": 1.3923240443095331e-06, "loss": 0.3231, "step": 38405 }, { "epoch": 3.9046360309068726, "grad_norm": 0.2825027406215668, "learning_rate": 1.3920783382354419e-06, "loss": 0.3117, "step": 38406 }, { "epoch": 3.9047376982513216, "grad_norm": 0.27710142731666565, "learning_rate": 1.3918326503372015e-06, "loss": 0.2773, "step": 38407 }, { "epoch": 3.9048393655957705, "grad_norm": 0.2659918963909149, "learning_rate": 1.391586980616052e-06, "loss": 0.2968, "step": 38408 }, { "epoch": 3.9049410329402194, "grad_norm": 0.29124879837036133, "learning_rate": 1.3913413290732296e-06, "loss": 0.3306, "step": 38409 }, { "epoch": 3.9050427002846684, "grad_norm": 0.27922141551971436, "learning_rate": 1.3910956957099725e-06, "loss": 0.3033, "step": 38410 }, { "epoch": 3.9051443676291173, "grad_norm": 0.25760865211486816, "learning_rate": 1.390850080527516e-06, "loss": 0.3244, "step": 38411 }, { "epoch": 3.9052460349735663, "grad_norm": 0.27127188444137573, "learning_rate": 1.3906044835271005e-06, "loss": 0.3195, "step": 38412 }, { "epoch": 3.905347702318015, "grad_norm": 0.33207598328590393, "learning_rate": 1.390358904709962e-06, "loss": 0.3135, "step": 38413 }, { "epoch": 3.905449369662464, "grad_norm": 0.277057021856308, "learning_rate": 1.3901133440773362e-06, "loss": 0.321, "step": 38414 }, { "epoch": 3.9055510370069135, "grad_norm": 0.29284510016441345, "learning_rate": 1.389867801630463e-06, "loss": 0.3285, "step": 38415 }, { "epoch": 3.9056527043513625, "grad_norm": 0.27937132120132446, "learning_rate": 1.3896222773705776e-06, "loss": 0.2956, "step": 38416 }, { "epoch": 3.9057543716958114, "grad_norm": 0.2790640592575073, "learning_rate": 1.389376771298916e-06, "loss": 0.2936, "step": 38417 }, { "epoch": 3.9058560390402604, "grad_norm": 0.27379336953163147, "learning_rate": 1.3891312834167176e-06, "loss": 0.2901, "step": 38418 }, { "epoch": 3.9059577063847093, "grad_norm": 0.27486348152160645, "learning_rate": 1.3888858137252176e-06, "loss": 0.2952, "step": 38419 }, { "epoch": 3.9060593737291582, "grad_norm": 0.27570751309394836, "learning_rate": 1.3886403622256528e-06, "loss": 0.3198, "step": 38420 }, { "epoch": 3.906161041073607, "grad_norm": 0.2729749381542206, "learning_rate": 1.3883949289192583e-06, "loss": 0.2949, "step": 38421 }, { "epoch": 3.906262708418056, "grad_norm": 0.2678484618663788, "learning_rate": 1.3881495138072737e-06, "loss": 0.2885, "step": 38422 }, { "epoch": 3.906364375762505, "grad_norm": 0.25026941299438477, "learning_rate": 1.387904116890933e-06, "loss": 0.3022, "step": 38423 }, { "epoch": 3.906466043106954, "grad_norm": 0.26285022497177124, "learning_rate": 1.3876587381714718e-06, "loss": 0.3074, "step": 38424 }, { "epoch": 3.906567710451403, "grad_norm": 0.2514311373233795, "learning_rate": 1.3874133776501287e-06, "loss": 0.3043, "step": 38425 }, { "epoch": 3.906669377795852, "grad_norm": 0.26070088148117065, "learning_rate": 1.3871680353281387e-06, "loss": 0.3135, "step": 38426 }, { "epoch": 3.906771045140301, "grad_norm": 0.259496808052063, "learning_rate": 1.3869227112067357e-06, "loss": 0.2996, "step": 38427 }, { "epoch": 3.9068727124847498, "grad_norm": 0.2764819264411926, "learning_rate": 1.3866774052871595e-06, "loss": 0.3053, "step": 38428 }, { "epoch": 3.9069743798291987, "grad_norm": 0.27593907713890076, "learning_rate": 1.3864321175706435e-06, "loss": 0.3125, "step": 38429 }, { "epoch": 3.907076047173648, "grad_norm": 0.2591778635978699, "learning_rate": 1.3861868480584244e-06, "loss": 0.307, "step": 38430 }, { "epoch": 3.907177714518097, "grad_norm": 0.2783338725566864, "learning_rate": 1.3859415967517354e-06, "loss": 0.3367, "step": 38431 }, { "epoch": 3.907279381862546, "grad_norm": 0.2951809763908386, "learning_rate": 1.3856963636518151e-06, "loss": 0.3536, "step": 38432 }, { "epoch": 3.907381049206995, "grad_norm": 0.2784096300601959, "learning_rate": 1.3854511487598975e-06, "loss": 0.3187, "step": 38433 }, { "epoch": 3.907482716551444, "grad_norm": 0.2585095465183258, "learning_rate": 1.3852059520772165e-06, "loss": 0.3272, "step": 38434 }, { "epoch": 3.907584383895893, "grad_norm": 0.2806927263736725, "learning_rate": 1.3849607736050108e-06, "loss": 0.3288, "step": 38435 }, { "epoch": 3.9076860512403417, "grad_norm": 0.2581391930580139, "learning_rate": 1.384715613344514e-06, "loss": 0.3295, "step": 38436 }, { "epoch": 3.9077877185847907, "grad_norm": 0.2553862929344177, "learning_rate": 1.3844704712969587e-06, "loss": 0.3049, "step": 38437 }, { "epoch": 3.9078893859292396, "grad_norm": 0.27596962451934814, "learning_rate": 1.3842253474635836e-06, "loss": 0.309, "step": 38438 }, { "epoch": 3.9079910532736886, "grad_norm": 0.2724812924861908, "learning_rate": 1.3839802418456216e-06, "loss": 0.3074, "step": 38439 }, { "epoch": 3.9080927206181375, "grad_norm": 0.25011202692985535, "learning_rate": 1.3837351544443084e-06, "loss": 0.3472, "step": 38440 }, { "epoch": 3.9081943879625864, "grad_norm": 0.2721714675426483, "learning_rate": 1.3834900852608767e-06, "loss": 0.311, "step": 38441 }, { "epoch": 3.9082960553070354, "grad_norm": 0.27479150891304016, "learning_rate": 1.3832450342965636e-06, "loss": 0.3029, "step": 38442 }, { "epoch": 3.9083977226514843, "grad_norm": 0.2539616525173187, "learning_rate": 1.3830000015526029e-06, "loss": 0.2924, "step": 38443 }, { "epoch": 3.9084993899959333, "grad_norm": 0.2733156979084015, "learning_rate": 1.382754987030227e-06, "loss": 0.265, "step": 38444 }, { "epoch": 3.908601057340382, "grad_norm": 0.27121156454086304, "learning_rate": 1.3825099907306732e-06, "loss": 0.3281, "step": 38445 }, { "epoch": 3.908702724684831, "grad_norm": 0.2810269892215729, "learning_rate": 1.3822650126551745e-06, "loss": 0.3124, "step": 38446 }, { "epoch": 3.90880439202928, "grad_norm": 0.2669501006603241, "learning_rate": 1.3820200528049637e-06, "loss": 0.3001, "step": 38447 }, { "epoch": 3.908906059373729, "grad_norm": 0.2694932520389557, "learning_rate": 1.3817751111812776e-06, "loss": 0.3263, "step": 38448 }, { "epoch": 3.909007726718178, "grad_norm": 0.28318193554878235, "learning_rate": 1.3815301877853488e-06, "loss": 0.2905, "step": 38449 }, { "epoch": 3.909109394062627, "grad_norm": 0.26742416620254517, "learning_rate": 1.3812852826184103e-06, "loss": 0.3184, "step": 38450 }, { "epoch": 3.909211061407076, "grad_norm": 0.25937125086784363, "learning_rate": 1.3810403956816958e-06, "loss": 0.3343, "step": 38451 }, { "epoch": 3.909312728751525, "grad_norm": 0.28078174591064453, "learning_rate": 1.3807955269764412e-06, "loss": 0.3252, "step": 38452 }, { "epoch": 3.9094143960959737, "grad_norm": 0.2933906018733978, "learning_rate": 1.3805506765038785e-06, "loss": 0.2983, "step": 38453 }, { "epoch": 3.9095160634404227, "grad_norm": 0.271670937538147, "learning_rate": 1.38030584426524e-06, "loss": 0.3084, "step": 38454 }, { "epoch": 3.9096177307848716, "grad_norm": 0.2839682996273041, "learning_rate": 1.3800610302617618e-06, "loss": 0.3095, "step": 38455 }, { "epoch": 3.909719398129321, "grad_norm": 0.28483736515045166, "learning_rate": 1.3798162344946758e-06, "loss": 0.304, "step": 38456 }, { "epoch": 3.90982106547377, "grad_norm": 0.2578751742839813, "learning_rate": 1.379571456965214e-06, "loss": 0.2799, "step": 38457 }, { "epoch": 3.909922732818219, "grad_norm": 0.2831190526485443, "learning_rate": 1.3793266976746129e-06, "loss": 0.3152, "step": 38458 }, { "epoch": 3.910024400162668, "grad_norm": 0.2901703417301178, "learning_rate": 1.3790819566241026e-06, "loss": 0.3232, "step": 38459 }, { "epoch": 3.9101260675071168, "grad_norm": 0.27925363183021545, "learning_rate": 1.3788372338149176e-06, "loss": 0.2664, "step": 38460 }, { "epoch": 3.9102277348515657, "grad_norm": 0.2569635212421417, "learning_rate": 1.3785925292482881e-06, "loss": 0.2898, "step": 38461 }, { "epoch": 3.9103294021960147, "grad_norm": 0.2622271478176117, "learning_rate": 1.378347842925451e-06, "loss": 0.328, "step": 38462 }, { "epoch": 3.9104310695404636, "grad_norm": 0.25209951400756836, "learning_rate": 1.3781031748476365e-06, "loss": 0.3285, "step": 38463 }, { "epoch": 3.9105327368849125, "grad_norm": 0.26680898666381836, "learning_rate": 1.3778585250160758e-06, "loss": 0.3201, "step": 38464 }, { "epoch": 3.9106344042293615, "grad_norm": 0.26274657249450684, "learning_rate": 1.3776138934320061e-06, "loss": 0.3149, "step": 38465 }, { "epoch": 3.9107360715738104, "grad_norm": 0.26434147357940674, "learning_rate": 1.3773692800966538e-06, "loss": 0.3006, "step": 38466 }, { "epoch": 3.9108377389182594, "grad_norm": 0.2894270420074463, "learning_rate": 1.3771246850112547e-06, "loss": 0.3075, "step": 38467 }, { "epoch": 3.9109394062627083, "grad_norm": 0.29596078395843506, "learning_rate": 1.376880108177041e-06, "loss": 0.3063, "step": 38468 }, { "epoch": 3.9110410736071572, "grad_norm": 0.2866009473800659, "learning_rate": 1.3766355495952454e-06, "loss": 0.2815, "step": 38469 }, { "epoch": 3.911142740951606, "grad_norm": 0.2731352150440216, "learning_rate": 1.3763910092670985e-06, "loss": 0.3208, "step": 38470 }, { "epoch": 3.9112444082960556, "grad_norm": 0.27920326590538025, "learning_rate": 1.3761464871938308e-06, "loss": 0.3266, "step": 38471 }, { "epoch": 3.9113460756405045, "grad_norm": 0.2759118974208832, "learning_rate": 1.375901983376678e-06, "loss": 0.3033, "step": 38472 }, { "epoch": 3.9114477429849535, "grad_norm": 0.2932315766811371, "learning_rate": 1.3756574978168691e-06, "loss": 0.3198, "step": 38473 }, { "epoch": 3.9115494103294024, "grad_norm": 0.2709629535675049, "learning_rate": 1.3754130305156354e-06, "loss": 0.3009, "step": 38474 }, { "epoch": 3.9116510776738513, "grad_norm": 0.26994556188583374, "learning_rate": 1.375168581474212e-06, "loss": 0.3073, "step": 38475 }, { "epoch": 3.9117527450183003, "grad_norm": 0.27300164103507996, "learning_rate": 1.3749241506938255e-06, "loss": 0.3182, "step": 38476 }, { "epoch": 3.9118544123627492, "grad_norm": 0.2634179890155792, "learning_rate": 1.37467973817571e-06, "loss": 0.2934, "step": 38477 }, { "epoch": 3.911956079707198, "grad_norm": 0.26028868556022644, "learning_rate": 1.3744353439210972e-06, "loss": 0.3114, "step": 38478 }, { "epoch": 3.912057747051647, "grad_norm": 0.2582511305809021, "learning_rate": 1.3741909679312177e-06, "loss": 0.3061, "step": 38479 }, { "epoch": 3.912159414396096, "grad_norm": 0.280399352312088, "learning_rate": 1.3739466102073024e-06, "loss": 0.3049, "step": 38480 }, { "epoch": 3.912261081740545, "grad_norm": 0.27627936005592346, "learning_rate": 1.3737022707505816e-06, "loss": 0.3201, "step": 38481 }, { "epoch": 3.912362749084994, "grad_norm": 0.25536632537841797, "learning_rate": 1.3734579495622879e-06, "loss": 0.3032, "step": 38482 }, { "epoch": 3.912464416429443, "grad_norm": 0.2720048427581787, "learning_rate": 1.3732136466436512e-06, "loss": 0.3026, "step": 38483 }, { "epoch": 3.912566083773892, "grad_norm": 0.26087233424186707, "learning_rate": 1.372969361995901e-06, "loss": 0.3196, "step": 38484 }, { "epoch": 3.9126677511183408, "grad_norm": 0.29365333914756775, "learning_rate": 1.372725095620272e-06, "loss": 0.326, "step": 38485 }, { "epoch": 3.9127694184627897, "grad_norm": 0.27845627069473267, "learning_rate": 1.3724808475179895e-06, "loss": 0.3202, "step": 38486 }, { "epoch": 3.9128710858072386, "grad_norm": 0.2874682545661926, "learning_rate": 1.3722366176902858e-06, "loss": 0.3006, "step": 38487 }, { "epoch": 3.9129727531516876, "grad_norm": 0.28981566429138184, "learning_rate": 1.3719924061383954e-06, "loss": 0.3138, "step": 38488 }, { "epoch": 3.9130744204961365, "grad_norm": 0.275756299495697, "learning_rate": 1.3717482128635424e-06, "loss": 0.3136, "step": 38489 }, { "epoch": 3.9131760878405855, "grad_norm": 0.2744370996952057, "learning_rate": 1.3715040378669603e-06, "loss": 0.2895, "step": 38490 }, { "epoch": 3.9132777551850344, "grad_norm": 0.2664406895637512, "learning_rate": 1.3712598811498778e-06, "loss": 0.3166, "step": 38491 }, { "epoch": 3.9133794225294833, "grad_norm": 0.2736128866672516, "learning_rate": 1.3710157427135268e-06, "loss": 0.3195, "step": 38492 }, { "epoch": 3.9134810898739323, "grad_norm": 0.2500830292701721, "learning_rate": 1.3707716225591367e-06, "loss": 0.3099, "step": 38493 }, { "epoch": 3.9135827572183812, "grad_norm": 0.27750226855278015, "learning_rate": 1.3705275206879343e-06, "loss": 0.2974, "step": 38494 }, { "epoch": 3.91368442456283, "grad_norm": 0.28330981731414795, "learning_rate": 1.3702834371011552e-06, "loss": 0.317, "step": 38495 }, { "epoch": 3.913786091907279, "grad_norm": 0.2797791659832001, "learning_rate": 1.3700393718000233e-06, "loss": 0.306, "step": 38496 }, { "epoch": 3.9138877592517285, "grad_norm": 0.2650442123413086, "learning_rate": 1.3697953247857694e-06, "loss": 0.3115, "step": 38497 }, { "epoch": 3.9139894265961774, "grad_norm": 0.26252758502960205, "learning_rate": 1.3695512960596274e-06, "loss": 0.2946, "step": 38498 }, { "epoch": 3.9140910939406264, "grad_norm": 0.2743108570575714, "learning_rate": 1.3693072856228202e-06, "loss": 0.3029, "step": 38499 }, { "epoch": 3.9141927612850753, "grad_norm": 0.27682042121887207, "learning_rate": 1.369063293476582e-06, "loss": 0.3053, "step": 38500 }, { "epoch": 3.9142944286295243, "grad_norm": 0.27035653591156006, "learning_rate": 1.3688193196221377e-06, "loss": 0.3064, "step": 38501 }, { "epoch": 3.914396095973973, "grad_norm": 0.25730377435684204, "learning_rate": 1.3685753640607207e-06, "loss": 0.3049, "step": 38502 }, { "epoch": 3.914497763318422, "grad_norm": 0.2582786977291107, "learning_rate": 1.3683314267935582e-06, "loss": 0.3017, "step": 38503 }, { "epoch": 3.914599430662871, "grad_norm": 0.2843015789985657, "learning_rate": 1.3680875078218775e-06, "loss": 0.3084, "step": 38504 }, { "epoch": 3.91470109800732, "grad_norm": 0.2755441665649414, "learning_rate": 1.367843607146912e-06, "loss": 0.3209, "step": 38505 }, { "epoch": 3.914802765351769, "grad_norm": 0.2706913650035858, "learning_rate": 1.367599724769884e-06, "loss": 0.2979, "step": 38506 }, { "epoch": 3.914904432696218, "grad_norm": 0.2746697962284088, "learning_rate": 1.3673558606920257e-06, "loss": 0.3229, "step": 38507 }, { "epoch": 3.915006100040667, "grad_norm": 0.27896225452423096, "learning_rate": 1.3671120149145677e-06, "loss": 0.3191, "step": 38508 }, { "epoch": 3.915107767385116, "grad_norm": 0.260912150144577, "learning_rate": 1.3668681874387335e-06, "loss": 0.3388, "step": 38509 }, { "epoch": 3.9152094347295647, "grad_norm": 0.2925229072570801, "learning_rate": 1.3666243782657556e-06, "loss": 0.2964, "step": 38510 }, { "epoch": 3.9153111020740137, "grad_norm": 0.254064679145813, "learning_rate": 1.3663805873968605e-06, "loss": 0.2864, "step": 38511 }, { "epoch": 3.915412769418463, "grad_norm": 0.2663654088973999, "learning_rate": 1.3661368148332749e-06, "loss": 0.3201, "step": 38512 }, { "epoch": 3.915514436762912, "grad_norm": 0.26834994554519653, "learning_rate": 1.3658930605762299e-06, "loss": 0.3184, "step": 38513 }, { "epoch": 3.915616104107361, "grad_norm": 0.2628939151763916, "learning_rate": 1.3656493246269508e-06, "loss": 0.3049, "step": 38514 }, { "epoch": 3.91571777145181, "grad_norm": 0.2898615300655365, "learning_rate": 1.36540560698667e-06, "loss": 0.3027, "step": 38515 }, { "epoch": 3.915819438796259, "grad_norm": 0.27759674191474915, "learning_rate": 1.3651619076566086e-06, "loss": 0.3171, "step": 38516 }, { "epoch": 3.9159211061407078, "grad_norm": 0.26945897936820984, "learning_rate": 1.3649182266379978e-06, "loss": 0.2903, "step": 38517 }, { "epoch": 3.9160227734851567, "grad_norm": 0.2777886390686035, "learning_rate": 1.3646745639320685e-06, "loss": 0.2954, "step": 38518 }, { "epoch": 3.9161244408296056, "grad_norm": 0.2639291286468506, "learning_rate": 1.3644309195400411e-06, "loss": 0.3145, "step": 38519 }, { "epoch": 3.9162261081740546, "grad_norm": 0.27995359897613525, "learning_rate": 1.364187293463149e-06, "loss": 0.3285, "step": 38520 }, { "epoch": 3.9163277755185035, "grad_norm": 0.2656247615814209, "learning_rate": 1.3639436857026172e-06, "loss": 0.3202, "step": 38521 }, { "epoch": 3.9164294428629525, "grad_norm": 0.29296940565109253, "learning_rate": 1.3637000962596714e-06, "loss": 0.3019, "step": 38522 }, { "epoch": 3.9165311102074014, "grad_norm": 0.25543415546417236, "learning_rate": 1.363456525135542e-06, "loss": 0.272, "step": 38523 }, { "epoch": 3.9166327775518504, "grad_norm": 0.2803567945957184, "learning_rate": 1.3632129723314541e-06, "loss": 0.323, "step": 38524 }, { "epoch": 3.9167344448962993, "grad_norm": 0.26258528232574463, "learning_rate": 1.3629694378486352e-06, "loss": 0.2935, "step": 38525 }, { "epoch": 3.9168361122407482, "grad_norm": 0.25836265087127686, "learning_rate": 1.362725921688311e-06, "loss": 0.3265, "step": 38526 }, { "epoch": 3.916937779585197, "grad_norm": 0.2708645761013031, "learning_rate": 1.3624824238517087e-06, "loss": 0.3095, "step": 38527 }, { "epoch": 3.917039446929646, "grad_norm": 0.2853206396102905, "learning_rate": 1.3622389443400586e-06, "loss": 0.2712, "step": 38528 }, { "epoch": 3.917141114274095, "grad_norm": 0.25231456756591797, "learning_rate": 1.3619954831545813e-06, "loss": 0.3064, "step": 38529 }, { "epoch": 3.917242781618544, "grad_norm": 0.2746627926826477, "learning_rate": 1.3617520402965078e-06, "loss": 0.3229, "step": 38530 }, { "epoch": 3.917344448962993, "grad_norm": 0.2666260004043579, "learning_rate": 1.3615086157670631e-06, "loss": 0.327, "step": 38531 }, { "epoch": 3.917446116307442, "grad_norm": 0.2812841832637787, "learning_rate": 1.3612652095674717e-06, "loss": 0.2996, "step": 38532 }, { "epoch": 3.917547783651891, "grad_norm": 0.2817983329296112, "learning_rate": 1.361021821698963e-06, "loss": 0.3087, "step": 38533 }, { "epoch": 3.9176494509963398, "grad_norm": 0.2750638723373413, "learning_rate": 1.3607784521627615e-06, "loss": 0.341, "step": 38534 }, { "epoch": 3.9177511183407887, "grad_norm": 0.2635688781738281, "learning_rate": 1.3605351009600932e-06, "loss": 0.2957, "step": 38535 }, { "epoch": 3.9178527856852376, "grad_norm": 0.291422963142395, "learning_rate": 1.360291768092183e-06, "loss": 0.3581, "step": 38536 }, { "epoch": 3.9179544530296866, "grad_norm": 0.27154988050460815, "learning_rate": 1.3600484535602577e-06, "loss": 0.2971, "step": 38537 }, { "epoch": 3.918056120374136, "grad_norm": 0.2545452117919922, "learning_rate": 1.3598051573655462e-06, "loss": 0.2986, "step": 38538 }, { "epoch": 3.918157787718585, "grad_norm": 0.24444876611232758, "learning_rate": 1.3595618795092686e-06, "loss": 0.3312, "step": 38539 }, { "epoch": 3.918259455063034, "grad_norm": 0.2767065167427063, "learning_rate": 1.3593186199926544e-06, "loss": 0.3273, "step": 38540 }, { "epoch": 3.918361122407483, "grad_norm": 0.2731141746044159, "learning_rate": 1.359075378816928e-06, "loss": 0.291, "step": 38541 }, { "epoch": 3.9184627897519317, "grad_norm": 0.2796632945537567, "learning_rate": 1.3588321559833134e-06, "loss": 0.3029, "step": 38542 }, { "epoch": 3.9185644570963807, "grad_norm": 0.2792588472366333, "learning_rate": 1.3585889514930384e-06, "loss": 0.3241, "step": 38543 }, { "epoch": 3.9186661244408296, "grad_norm": 0.2911490201950073, "learning_rate": 1.3583457653473264e-06, "loss": 0.3071, "step": 38544 }, { "epoch": 3.9187677917852786, "grad_norm": 0.2665734887123108, "learning_rate": 1.358102597547404e-06, "loss": 0.3188, "step": 38545 }, { "epoch": 3.9188694591297275, "grad_norm": 0.2689935564994812, "learning_rate": 1.357859448094493e-06, "loss": 0.2691, "step": 38546 }, { "epoch": 3.9189711264741764, "grad_norm": 0.2884620130062103, "learning_rate": 1.3576163169898222e-06, "loss": 0.2792, "step": 38547 }, { "epoch": 3.9190727938186254, "grad_norm": 0.24992431700229645, "learning_rate": 1.357373204234615e-06, "loss": 0.3055, "step": 38548 }, { "epoch": 3.9191744611630743, "grad_norm": 0.29523417353630066, "learning_rate": 1.3571301098300948e-06, "loss": 0.3153, "step": 38549 }, { "epoch": 3.9192761285075233, "grad_norm": 0.2618032395839691, "learning_rate": 1.3568870337774887e-06, "loss": 0.2897, "step": 38550 }, { "epoch": 3.919377795851972, "grad_norm": 0.2951117157936096, "learning_rate": 1.3566439760780197e-06, "loss": 0.2818, "step": 38551 }, { "epoch": 3.919479463196421, "grad_norm": 0.2628406584262848, "learning_rate": 1.3564009367329112e-06, "loss": 0.2817, "step": 38552 }, { "epoch": 3.9195811305408705, "grad_norm": 0.2681325376033783, "learning_rate": 1.3561579157433902e-06, "loss": 0.3195, "step": 38553 }, { "epoch": 3.9196827978853195, "grad_norm": 0.28392475843429565, "learning_rate": 1.3559149131106803e-06, "loss": 0.2855, "step": 38554 }, { "epoch": 3.9197844652297684, "grad_norm": 0.28421568870544434, "learning_rate": 1.3556719288360048e-06, "loss": 0.3167, "step": 38555 }, { "epoch": 3.9198861325742174, "grad_norm": 0.2819341719150543, "learning_rate": 1.3554289629205863e-06, "loss": 0.3002, "step": 38556 }, { "epoch": 3.9199877999186663, "grad_norm": 0.255704402923584, "learning_rate": 1.3551860153656526e-06, "loss": 0.3232, "step": 38557 }, { "epoch": 3.9200894672631152, "grad_norm": 0.2687380909919739, "learning_rate": 1.3549430861724256e-06, "loss": 0.2808, "step": 38558 }, { "epoch": 3.920191134607564, "grad_norm": 0.26536089181900024, "learning_rate": 1.3547001753421268e-06, "loss": 0.2965, "step": 38559 }, { "epoch": 3.920292801952013, "grad_norm": 0.2570843994617462, "learning_rate": 1.3544572828759845e-06, "loss": 0.323, "step": 38560 }, { "epoch": 3.920394469296462, "grad_norm": 0.27672499418258667, "learning_rate": 1.35421440877522e-06, "loss": 0.2754, "step": 38561 }, { "epoch": 3.920496136640911, "grad_norm": 0.2852359414100647, "learning_rate": 1.3539715530410558e-06, "loss": 0.2718, "step": 38562 }, { "epoch": 3.92059780398536, "grad_norm": 0.2930036783218384, "learning_rate": 1.3537287156747176e-06, "loss": 0.3378, "step": 38563 }, { "epoch": 3.920699471329809, "grad_norm": 0.27704760432243347, "learning_rate": 1.3534858966774272e-06, "loss": 0.3171, "step": 38564 }, { "epoch": 3.920801138674258, "grad_norm": 0.27257856726646423, "learning_rate": 1.353243096050409e-06, "loss": 0.2894, "step": 38565 }, { "epoch": 3.9209028060187068, "grad_norm": 0.2556465268135071, "learning_rate": 1.3530003137948839e-06, "loss": 0.3025, "step": 38566 }, { "epoch": 3.9210044733631557, "grad_norm": 0.26897066831588745, "learning_rate": 1.3527575499120777e-06, "loss": 0.3145, "step": 38567 }, { "epoch": 3.9211061407076047, "grad_norm": 0.2862735390663147, "learning_rate": 1.3525148044032128e-06, "loss": 0.2969, "step": 38568 }, { "epoch": 3.9212078080520536, "grad_norm": 0.2695391774177551, "learning_rate": 1.3522720772695091e-06, "loss": 0.3244, "step": 38569 }, { "epoch": 3.9213094753965025, "grad_norm": 0.293684720993042, "learning_rate": 1.3520293685121944e-06, "loss": 0.277, "step": 38570 }, { "epoch": 3.9214111427409515, "grad_norm": 0.2867678105831146, "learning_rate": 1.3517866781324879e-06, "loss": 0.2986, "step": 38571 }, { "epoch": 3.9215128100854004, "grad_norm": 0.25561922788619995, "learning_rate": 1.351544006131612e-06, "loss": 0.3109, "step": 38572 }, { "epoch": 3.9216144774298494, "grad_norm": 0.26282912492752075, "learning_rate": 1.3513013525107921e-06, "loss": 0.3079, "step": 38573 }, { "epoch": 3.9217161447742983, "grad_norm": 0.2794264256954193, "learning_rate": 1.351058717271249e-06, "loss": 0.3147, "step": 38574 }, { "epoch": 3.9218178121187472, "grad_norm": 0.26990020275115967, "learning_rate": 1.350816100414205e-06, "loss": 0.3074, "step": 38575 }, { "epoch": 3.921919479463196, "grad_norm": 0.270687997341156, "learning_rate": 1.3505735019408805e-06, "loss": 0.3136, "step": 38576 }, { "epoch": 3.922021146807645, "grad_norm": 0.27298295497894287, "learning_rate": 1.3503309218525012e-06, "loss": 0.3203, "step": 38577 }, { "epoch": 3.922122814152094, "grad_norm": 0.24853186309337616, "learning_rate": 1.3500883601502873e-06, "loss": 0.2931, "step": 38578 }, { "epoch": 3.9222244814965435, "grad_norm": 0.28091326355934143, "learning_rate": 1.3498458168354595e-06, "loss": 0.3126, "step": 38579 }, { "epoch": 3.9223261488409924, "grad_norm": 0.2701755166053772, "learning_rate": 1.3496032919092422e-06, "loss": 0.3228, "step": 38580 }, { "epoch": 3.9224278161854413, "grad_norm": 0.27790066599845886, "learning_rate": 1.3493607853728564e-06, "loss": 0.3185, "step": 38581 }, { "epoch": 3.9225294835298903, "grad_norm": 0.26563477516174316, "learning_rate": 1.3491182972275219e-06, "loss": 0.3348, "step": 38582 }, { "epoch": 3.9226311508743392, "grad_norm": 0.2938099503517151, "learning_rate": 1.348875827474463e-06, "loss": 0.3479, "step": 38583 }, { "epoch": 3.922732818218788, "grad_norm": 0.266422301530838, "learning_rate": 1.3486333761149007e-06, "loss": 0.2913, "step": 38584 }, { "epoch": 3.922834485563237, "grad_norm": 0.2927013635635376, "learning_rate": 1.3483909431500552e-06, "loss": 0.2924, "step": 38585 }, { "epoch": 3.922936152907686, "grad_norm": 0.263164222240448, "learning_rate": 1.3481485285811469e-06, "loss": 0.2915, "step": 38586 }, { "epoch": 3.923037820252135, "grad_norm": 0.2786640226840973, "learning_rate": 1.3479061324094e-06, "loss": 0.2964, "step": 38587 }, { "epoch": 3.923139487596584, "grad_norm": 0.26897555589675903, "learning_rate": 1.3476637546360343e-06, "loss": 0.3127, "step": 38588 }, { "epoch": 3.923241154941033, "grad_norm": 0.2952204942703247, "learning_rate": 1.3474213952622694e-06, "loss": 0.307, "step": 38589 }, { "epoch": 3.923342822285482, "grad_norm": 0.27680543065071106, "learning_rate": 1.3471790542893287e-06, "loss": 0.3118, "step": 38590 }, { "epoch": 3.9234444896299308, "grad_norm": 0.2788900136947632, "learning_rate": 1.3469367317184317e-06, "loss": 0.2862, "step": 38591 }, { "epoch": 3.9235461569743797, "grad_norm": 0.28690531849861145, "learning_rate": 1.3466944275507999e-06, "loss": 0.3127, "step": 38592 }, { "epoch": 3.9236478243188286, "grad_norm": 0.29023200273513794, "learning_rate": 1.3464521417876514e-06, "loss": 0.2925, "step": 38593 }, { "epoch": 3.923749491663278, "grad_norm": 0.2672586441040039, "learning_rate": 1.3462098744302105e-06, "loss": 0.2931, "step": 38594 }, { "epoch": 3.923851159007727, "grad_norm": 0.26712173223495483, "learning_rate": 1.345967625479696e-06, "loss": 0.2878, "step": 38595 }, { "epoch": 3.923952826352176, "grad_norm": 0.24962840974330902, "learning_rate": 1.3457253949373268e-06, "loss": 0.3074, "step": 38596 }, { "epoch": 3.924054493696625, "grad_norm": 0.2961028218269348, "learning_rate": 1.3454831828043262e-06, "loss": 0.3045, "step": 38597 }, { "epoch": 3.924156161041074, "grad_norm": 0.29704469442367554, "learning_rate": 1.345240989081913e-06, "loss": 0.3054, "step": 38598 }, { "epoch": 3.9242578283855227, "grad_norm": 0.2562469244003296, "learning_rate": 1.3449988137713055e-06, "loss": 0.3266, "step": 38599 }, { "epoch": 3.9243594957299717, "grad_norm": 0.2827305793762207, "learning_rate": 1.344756656873727e-06, "loss": 0.3367, "step": 38600 }, { "epoch": 3.9244611630744206, "grad_norm": 0.26118192076683044, "learning_rate": 1.3445145183903957e-06, "loss": 0.2982, "step": 38601 }, { "epoch": 3.9245628304188696, "grad_norm": 0.2579793632030487, "learning_rate": 1.3442723983225313e-06, "loss": 0.3197, "step": 38602 }, { "epoch": 3.9246644977633185, "grad_norm": 0.27802690863609314, "learning_rate": 1.3440302966713532e-06, "loss": 0.3113, "step": 38603 }, { "epoch": 3.9247661651077674, "grad_norm": 0.2860853970050812, "learning_rate": 1.3437882134380825e-06, "loss": 0.3151, "step": 38604 }, { "epoch": 3.9248678324522164, "grad_norm": 0.2646459639072418, "learning_rate": 1.3435461486239376e-06, "loss": 0.3035, "step": 38605 }, { "epoch": 3.9249694997966653, "grad_norm": 0.2843360900878906, "learning_rate": 1.3433041022301373e-06, "loss": 0.3054, "step": 38606 }, { "epoch": 3.9250711671411143, "grad_norm": 0.25218626856803894, "learning_rate": 1.3430620742579037e-06, "loss": 0.3504, "step": 38607 }, { "epoch": 3.925172834485563, "grad_norm": 0.2701634168624878, "learning_rate": 1.342820064708454e-06, "loss": 0.3282, "step": 38608 }, { "epoch": 3.925274501830012, "grad_norm": 0.2682046592235565, "learning_rate": 1.3425780735830063e-06, "loss": 0.3127, "step": 38609 }, { "epoch": 3.925376169174461, "grad_norm": 0.281258225440979, "learning_rate": 1.342336100882783e-06, "loss": 0.3089, "step": 38610 }, { "epoch": 3.92547783651891, "grad_norm": 0.2703549861907959, "learning_rate": 1.3420941466090004e-06, "loss": 0.3219, "step": 38611 }, { "epoch": 3.925579503863359, "grad_norm": 0.2732149064540863, "learning_rate": 1.341852210762879e-06, "loss": 0.304, "step": 38612 }, { "epoch": 3.925681171207808, "grad_norm": 0.2793112099170685, "learning_rate": 1.341610293345635e-06, "loss": 0.3025, "step": 38613 }, { "epoch": 3.925782838552257, "grad_norm": 0.26133066415786743, "learning_rate": 1.3413683943584905e-06, "loss": 0.3233, "step": 38614 }, { "epoch": 3.925884505896706, "grad_norm": 0.26948967576026917, "learning_rate": 1.3411265138026625e-06, "loss": 0.3256, "step": 38615 }, { "epoch": 3.9259861732411547, "grad_norm": 0.2631078064441681, "learning_rate": 1.3408846516793683e-06, "loss": 0.3017, "step": 38616 }, { "epoch": 3.9260878405856037, "grad_norm": 0.277229905128479, "learning_rate": 1.340642807989831e-06, "loss": 0.2923, "step": 38617 }, { "epoch": 3.9261895079300526, "grad_norm": 0.2592056393623352, "learning_rate": 1.3404009827352622e-06, "loss": 0.3117, "step": 38618 }, { "epoch": 3.9262911752745016, "grad_norm": 0.26669779419898987, "learning_rate": 1.3401591759168837e-06, "loss": 0.3001, "step": 38619 }, { "epoch": 3.926392842618951, "grad_norm": 0.2600039541721344, "learning_rate": 1.3399173875359151e-06, "loss": 0.3047, "step": 38620 }, { "epoch": 3.9264945099634, "grad_norm": 0.27221259474754333, "learning_rate": 1.3396756175935728e-06, "loss": 0.2832, "step": 38621 }, { "epoch": 3.926596177307849, "grad_norm": 0.2683471739292145, "learning_rate": 1.339433866091075e-06, "loss": 0.3115, "step": 38622 }, { "epoch": 3.9266978446522978, "grad_norm": 0.27349328994750977, "learning_rate": 1.339192133029638e-06, "loss": 0.2848, "step": 38623 }, { "epoch": 3.9267995119967467, "grad_norm": 0.2918364107608795, "learning_rate": 1.3389504184104823e-06, "loss": 0.2791, "step": 38624 }, { "epoch": 3.9269011793411956, "grad_norm": 0.29315775632858276, "learning_rate": 1.338708722234825e-06, "loss": 0.3205, "step": 38625 }, { "epoch": 3.9270028466856446, "grad_norm": 0.2845284044742584, "learning_rate": 1.3384670445038811e-06, "loss": 0.2904, "step": 38626 }, { "epoch": 3.9271045140300935, "grad_norm": 0.2746775448322296, "learning_rate": 1.3382253852188731e-06, "loss": 0.3123, "step": 38627 }, { "epoch": 3.9272061813745425, "grad_norm": 0.2684420049190521, "learning_rate": 1.3379837443810128e-06, "loss": 0.3039, "step": 38628 }, { "epoch": 3.9273078487189914, "grad_norm": 0.26554882526397705, "learning_rate": 1.33774212199152e-06, "loss": 0.3174, "step": 38629 }, { "epoch": 3.9274095160634404, "grad_norm": 0.2953326106071472, "learning_rate": 1.3375005180516138e-06, "loss": 0.2786, "step": 38630 }, { "epoch": 3.9275111834078893, "grad_norm": 0.28914594650268555, "learning_rate": 1.3372589325625097e-06, "loss": 0.2861, "step": 38631 }, { "epoch": 3.9276128507523382, "grad_norm": 0.2652275562286377, "learning_rate": 1.3370173655254243e-06, "loss": 0.3076, "step": 38632 }, { "epoch": 3.927714518096787, "grad_norm": 0.24919261038303375, "learning_rate": 1.3367758169415735e-06, "loss": 0.2883, "step": 38633 }, { "epoch": 3.927816185441236, "grad_norm": 0.2582349181175232, "learning_rate": 1.3365342868121772e-06, "loss": 0.3717, "step": 38634 }, { "epoch": 3.9279178527856855, "grad_norm": 0.26429134607315063, "learning_rate": 1.3362927751384509e-06, "loss": 0.2962, "step": 38635 }, { "epoch": 3.9280195201301344, "grad_norm": 0.2724275290966034, "learning_rate": 1.3360512819216087e-06, "loss": 0.2884, "step": 38636 }, { "epoch": 3.9281211874745834, "grad_norm": 0.28312569856643677, "learning_rate": 1.3358098071628733e-06, "loss": 0.3097, "step": 38637 }, { "epoch": 3.9282228548190323, "grad_norm": 0.28083986043930054, "learning_rate": 1.3355683508634537e-06, "loss": 0.3272, "step": 38638 }, { "epoch": 3.9283245221634813, "grad_norm": 0.268113911151886, "learning_rate": 1.3353269130245706e-06, "loss": 0.2972, "step": 38639 }, { "epoch": 3.92842618950793, "grad_norm": 0.271411269903183, "learning_rate": 1.3350854936474423e-06, "loss": 0.3309, "step": 38640 }, { "epoch": 3.928527856852379, "grad_norm": 0.255172461271286, "learning_rate": 1.334844092733279e-06, "loss": 0.3228, "step": 38641 }, { "epoch": 3.928629524196828, "grad_norm": 0.27039635181427, "learning_rate": 1.334602710283302e-06, "loss": 0.2965, "step": 38642 }, { "epoch": 3.928731191541277, "grad_norm": 0.2740662693977356, "learning_rate": 1.3343613462987242e-06, "loss": 0.3332, "step": 38643 }, { "epoch": 3.928832858885726, "grad_norm": 0.2531837821006775, "learning_rate": 1.3341200007807643e-06, "loss": 0.3126, "step": 38644 }, { "epoch": 3.928934526230175, "grad_norm": 0.2803058326244354, "learning_rate": 1.3338786737306363e-06, "loss": 0.3229, "step": 38645 }, { "epoch": 3.929036193574624, "grad_norm": 0.26034995913505554, "learning_rate": 1.3336373651495555e-06, "loss": 0.3302, "step": 38646 }, { "epoch": 3.929137860919073, "grad_norm": 0.2599339187145233, "learning_rate": 1.3333960750387408e-06, "loss": 0.3393, "step": 38647 }, { "epoch": 3.9292395282635217, "grad_norm": 0.29786285758018494, "learning_rate": 1.3331548033994023e-06, "loss": 0.3141, "step": 38648 }, { "epoch": 3.9293411956079707, "grad_norm": 0.2746833562850952, "learning_rate": 1.332913550232759e-06, "loss": 0.2882, "step": 38649 }, { "epoch": 3.9294428629524196, "grad_norm": 0.2651565670967102, "learning_rate": 1.3326723155400284e-06, "loss": 0.3132, "step": 38650 }, { "epoch": 3.9295445302968686, "grad_norm": 0.2691251039505005, "learning_rate": 1.3324310993224203e-06, "loss": 0.3144, "step": 38651 }, { "epoch": 3.9296461976413175, "grad_norm": 0.29898709058761597, "learning_rate": 1.3321899015811546e-06, "loss": 0.3042, "step": 38652 }, { "epoch": 3.9297478649857664, "grad_norm": 0.2605125904083252, "learning_rate": 1.3319487223174426e-06, "loss": 0.3193, "step": 38653 }, { "epoch": 3.9298495323302154, "grad_norm": 0.2639789879322052, "learning_rate": 1.331707561532503e-06, "loss": 0.3479, "step": 38654 }, { "epoch": 3.9299511996746643, "grad_norm": 0.2875588834285736, "learning_rate": 1.3314664192275494e-06, "loss": 0.2962, "step": 38655 }, { "epoch": 3.9300528670191133, "grad_norm": 0.31004494428634644, "learning_rate": 1.3312252954037942e-06, "loss": 0.3178, "step": 38656 }, { "epoch": 3.930154534363562, "grad_norm": 0.280679315328598, "learning_rate": 1.3309841900624571e-06, "loss": 0.3003, "step": 38657 }, { "epoch": 3.930256201708011, "grad_norm": 0.3077956736087799, "learning_rate": 1.330743103204747e-06, "loss": 0.2964, "step": 38658 }, { "epoch": 3.93035786905246, "grad_norm": 0.27093878388404846, "learning_rate": 1.330502034831881e-06, "loss": 0.3051, "step": 38659 }, { "epoch": 3.930459536396909, "grad_norm": 0.28355443477630615, "learning_rate": 1.3302609849450771e-06, "loss": 0.2987, "step": 38660 }, { "epoch": 3.9305612037413584, "grad_norm": 0.3128819167613983, "learning_rate": 1.3300199535455426e-06, "loss": 0.3268, "step": 38661 }, { "epoch": 3.9306628710858074, "grad_norm": 0.28194430470466614, "learning_rate": 1.3297789406344974e-06, "loss": 0.3099, "step": 38662 }, { "epoch": 3.9307645384302563, "grad_norm": 0.27860555052757263, "learning_rate": 1.3295379462131537e-06, "loss": 0.3259, "step": 38663 }, { "epoch": 3.9308662057747052, "grad_norm": 0.27146124839782715, "learning_rate": 1.329296970282724e-06, "loss": 0.3366, "step": 38664 }, { "epoch": 3.930967873119154, "grad_norm": 0.28061604499816895, "learning_rate": 1.3290560128444258e-06, "loss": 0.3146, "step": 38665 }, { "epoch": 3.931069540463603, "grad_norm": 0.28591716289520264, "learning_rate": 1.3288150738994688e-06, "loss": 0.32, "step": 38666 }, { "epoch": 3.931171207808052, "grad_norm": 0.24658583104610443, "learning_rate": 1.3285741534490725e-06, "loss": 0.314, "step": 38667 }, { "epoch": 3.931272875152501, "grad_norm": 0.26430389285087585, "learning_rate": 1.3283332514944441e-06, "loss": 0.3018, "step": 38668 }, { "epoch": 3.93137454249695, "grad_norm": 0.28014397621154785, "learning_rate": 1.3280923680368003e-06, "loss": 0.3203, "step": 38669 }, { "epoch": 3.931476209841399, "grad_norm": 0.2927669584751129, "learning_rate": 1.3278515030773576e-06, "loss": 0.2797, "step": 38670 }, { "epoch": 3.931577877185848, "grad_norm": 0.28297391533851624, "learning_rate": 1.327610656617323e-06, "loss": 0.2868, "step": 38671 }, { "epoch": 3.9316795445302968, "grad_norm": 0.2990743815898895, "learning_rate": 1.327369828657915e-06, "loss": 0.2809, "step": 38672 }, { "epoch": 3.9317812118747457, "grad_norm": 0.2571130692958832, "learning_rate": 1.3271290192003456e-06, "loss": 0.2998, "step": 38673 }, { "epoch": 3.9318828792191947, "grad_norm": 0.28007593750953674, "learning_rate": 1.326888228245825e-06, "loss": 0.2808, "step": 38674 }, { "epoch": 3.9319845465636436, "grad_norm": 0.26713311672210693, "learning_rate": 1.3266474557955706e-06, "loss": 0.354, "step": 38675 }, { "epoch": 3.932086213908093, "grad_norm": 0.2775885760784149, "learning_rate": 1.3264067018507931e-06, "loss": 0.3003, "step": 38676 }, { "epoch": 3.932187881252542, "grad_norm": 0.2564402222633362, "learning_rate": 1.3261659664127063e-06, "loss": 0.3305, "step": 38677 }, { "epoch": 3.932289548596991, "grad_norm": 0.28590211272239685, "learning_rate": 1.3259252494825202e-06, "loss": 0.2922, "step": 38678 }, { "epoch": 3.93239121594144, "grad_norm": 0.2686252295970917, "learning_rate": 1.3256845510614497e-06, "loss": 0.2955, "step": 38679 }, { "epoch": 3.9324928832858888, "grad_norm": 0.2562797963619232, "learning_rate": 1.3254438711507105e-06, "loss": 0.3075, "step": 38680 }, { "epoch": 3.9325945506303377, "grad_norm": 0.2637714445590973, "learning_rate": 1.325203209751509e-06, "loss": 0.285, "step": 38681 }, { "epoch": 3.9326962179747866, "grad_norm": 0.27067697048187256, "learning_rate": 1.3249625668650612e-06, "loss": 0.305, "step": 38682 }, { "epoch": 3.9327978853192356, "grad_norm": 0.26087579131126404, "learning_rate": 1.3247219424925795e-06, "loss": 0.2999, "step": 38683 }, { "epoch": 3.9328995526636845, "grad_norm": 0.2885206341743469, "learning_rate": 1.3244813366352733e-06, "loss": 0.3333, "step": 38684 }, { "epoch": 3.9330012200081335, "grad_norm": 0.25264856219291687, "learning_rate": 1.3242407492943582e-06, "loss": 0.3238, "step": 38685 }, { "epoch": 3.9331028873525824, "grad_norm": 0.2589527666568756, "learning_rate": 1.3240001804710451e-06, "loss": 0.3346, "step": 38686 }, { "epoch": 3.9332045546970313, "grad_norm": 0.2813732922077179, "learning_rate": 1.3237596301665451e-06, "loss": 0.3148, "step": 38687 }, { "epoch": 3.9333062220414803, "grad_norm": 0.2801624834537506, "learning_rate": 1.3235190983820694e-06, "loss": 0.3138, "step": 38688 }, { "epoch": 3.9334078893859292, "grad_norm": 0.2856205403804779, "learning_rate": 1.3232785851188307e-06, "loss": 0.2818, "step": 38689 }, { "epoch": 3.933509556730378, "grad_norm": 0.2895423173904419, "learning_rate": 1.3230380903780433e-06, "loss": 0.3056, "step": 38690 }, { "epoch": 3.933611224074827, "grad_norm": 0.2882085144519806, "learning_rate": 1.3227976141609133e-06, "loss": 0.2936, "step": 38691 }, { "epoch": 3.933712891419276, "grad_norm": 0.2737153172492981, "learning_rate": 1.3225571564686573e-06, "loss": 0.3486, "step": 38692 }, { "epoch": 3.933814558763725, "grad_norm": 0.2854033410549164, "learning_rate": 1.3223167173024837e-06, "loss": 0.3448, "step": 38693 }, { "epoch": 3.933916226108174, "grad_norm": 0.26206862926483154, "learning_rate": 1.3220762966636035e-06, "loss": 0.3066, "step": 38694 }, { "epoch": 3.934017893452623, "grad_norm": 0.280154824256897, "learning_rate": 1.32183589455323e-06, "loss": 0.2958, "step": 38695 }, { "epoch": 3.934119560797072, "grad_norm": 0.2757290303707123, "learning_rate": 1.3215955109725736e-06, "loss": 0.284, "step": 38696 }, { "epoch": 3.9342212281415208, "grad_norm": 0.254837304353714, "learning_rate": 1.3213551459228447e-06, "loss": 0.2807, "step": 38697 }, { "epoch": 3.9343228954859697, "grad_norm": 0.28896793723106384, "learning_rate": 1.3211147994052531e-06, "loss": 0.2817, "step": 38698 }, { "epoch": 3.9344245628304186, "grad_norm": 0.2848544418811798, "learning_rate": 1.320874471421012e-06, "loss": 0.2938, "step": 38699 }, { "epoch": 3.9345262301748676, "grad_norm": 0.29821568727493286, "learning_rate": 1.3206341619713315e-06, "loss": 0.3356, "step": 38700 }, { "epoch": 3.9346278975193165, "grad_norm": 0.2897856533527374, "learning_rate": 1.32039387105742e-06, "loss": 0.3114, "step": 38701 }, { "epoch": 3.934729564863766, "grad_norm": 0.27365657687187195, "learning_rate": 1.320153598680491e-06, "loss": 0.2986, "step": 38702 }, { "epoch": 3.934831232208215, "grad_norm": 0.28126153349876404, "learning_rate": 1.319913344841754e-06, "loss": 0.3039, "step": 38703 }, { "epoch": 3.934932899552664, "grad_norm": 0.2617787718772888, "learning_rate": 1.3196731095424175e-06, "loss": 0.2917, "step": 38704 }, { "epoch": 3.9350345668971127, "grad_norm": 0.26972195506095886, "learning_rate": 1.3194328927836947e-06, "loss": 0.3333, "step": 38705 }, { "epoch": 3.9351362342415617, "grad_norm": 0.28769904375076294, "learning_rate": 1.3191926945667944e-06, "loss": 0.3092, "step": 38706 }, { "epoch": 3.9352379015860106, "grad_norm": 0.2667844295501709, "learning_rate": 1.3189525148929266e-06, "loss": 0.2982, "step": 38707 }, { "epoch": 3.9353395689304596, "grad_norm": 0.2837674915790558, "learning_rate": 1.3187123537633001e-06, "loss": 0.2961, "step": 38708 }, { "epoch": 3.9354412362749085, "grad_norm": 0.26663437485694885, "learning_rate": 1.318472211179127e-06, "loss": 0.2803, "step": 38709 }, { "epoch": 3.9355429036193574, "grad_norm": 0.27486979961395264, "learning_rate": 1.318232087141616e-06, "loss": 0.2705, "step": 38710 }, { "epoch": 3.9356445709638064, "grad_norm": 0.27716660499572754, "learning_rate": 1.3179919816519754e-06, "loss": 0.2979, "step": 38711 }, { "epoch": 3.9357462383082553, "grad_norm": 0.2906946539878845, "learning_rate": 1.3177518947114182e-06, "loss": 0.3186, "step": 38712 }, { "epoch": 3.9358479056527043, "grad_norm": 0.2916843891143799, "learning_rate": 1.3175118263211517e-06, "loss": 0.3156, "step": 38713 }, { "epoch": 3.935949572997153, "grad_norm": 0.2782077491283417, "learning_rate": 1.317271776482384e-06, "loss": 0.3189, "step": 38714 }, { "epoch": 3.936051240341602, "grad_norm": 0.2645677924156189, "learning_rate": 1.3170317451963271e-06, "loss": 0.3178, "step": 38715 }, { "epoch": 3.936152907686051, "grad_norm": 0.2737160623073578, "learning_rate": 1.3167917324641894e-06, "loss": 0.3053, "step": 38716 }, { "epoch": 3.9362545750305005, "grad_norm": 0.267746239900589, "learning_rate": 1.3165517382871796e-06, "loss": 0.2997, "step": 38717 }, { "epoch": 3.9363562423749494, "grad_norm": 0.27487659454345703, "learning_rate": 1.3163117626665051e-06, "loss": 0.2894, "step": 38718 }, { "epoch": 3.9364579097193984, "grad_norm": 0.2823357880115509, "learning_rate": 1.3160718056033783e-06, "loss": 0.2999, "step": 38719 }, { "epoch": 3.9365595770638473, "grad_norm": 0.279453307390213, "learning_rate": 1.3158318670990061e-06, "loss": 0.3178, "step": 38720 }, { "epoch": 3.9366612444082962, "grad_norm": 0.27434301376342773, "learning_rate": 1.3155919471545963e-06, "loss": 0.3257, "step": 38721 }, { "epoch": 3.936762911752745, "grad_norm": 0.2729372978210449, "learning_rate": 1.3153520457713598e-06, "loss": 0.2884, "step": 38722 }, { "epoch": 3.936864579097194, "grad_norm": 0.2607271671295166, "learning_rate": 1.3151121629505044e-06, "loss": 0.3021, "step": 38723 }, { "epoch": 3.936966246441643, "grad_norm": 0.2666507661342621, "learning_rate": 1.3148722986932365e-06, "loss": 0.3299, "step": 38724 }, { "epoch": 3.937067913786092, "grad_norm": 0.2557055652141571, "learning_rate": 1.3146324530007682e-06, "loss": 0.3539, "step": 38725 }, { "epoch": 3.937169581130541, "grad_norm": 0.27189213037490845, "learning_rate": 1.3143926258743056e-06, "loss": 0.3166, "step": 38726 }, { "epoch": 3.93727124847499, "grad_norm": 0.26699429750442505, "learning_rate": 1.314152817315057e-06, "loss": 0.291, "step": 38727 }, { "epoch": 3.937372915819439, "grad_norm": 0.282814621925354, "learning_rate": 1.313913027324229e-06, "loss": 0.3047, "step": 38728 }, { "epoch": 3.9374745831638878, "grad_norm": 0.2768119275569916, "learning_rate": 1.313673255903033e-06, "loss": 0.3266, "step": 38729 }, { "epoch": 3.9375762505083367, "grad_norm": 0.27518174052238464, "learning_rate": 1.3134335030526752e-06, "loss": 0.315, "step": 38730 }, { "epoch": 3.9376779178527856, "grad_norm": 0.2607538104057312, "learning_rate": 1.3131937687743618e-06, "loss": 0.332, "step": 38731 }, { "epoch": 3.9377795851972346, "grad_norm": 0.26676201820373535, "learning_rate": 1.3129540530693037e-06, "loss": 0.2985, "step": 38732 }, { "epoch": 3.9378812525416835, "grad_norm": 0.27216777205467224, "learning_rate": 1.3127143559387063e-06, "loss": 0.2933, "step": 38733 }, { "epoch": 3.9379829198861325, "grad_norm": 0.28414568305015564, "learning_rate": 1.3124746773837765e-06, "loss": 0.3305, "step": 38734 }, { "epoch": 3.9380845872305814, "grad_norm": 0.2717672884464264, "learning_rate": 1.3122350174057247e-06, "loss": 0.2757, "step": 38735 }, { "epoch": 3.9381862545750304, "grad_norm": 0.2777853310108185, "learning_rate": 1.3119953760057563e-06, "loss": 0.3017, "step": 38736 }, { "epoch": 3.9382879219194793, "grad_norm": 0.2775525450706482, "learning_rate": 1.3117557531850788e-06, "loss": 0.3126, "step": 38737 }, { "epoch": 3.9383895892639282, "grad_norm": 0.2971664369106293, "learning_rate": 1.311516148944898e-06, "loss": 0.3309, "step": 38738 }, { "epoch": 3.938491256608377, "grad_norm": 0.25789448618888855, "learning_rate": 1.3112765632864238e-06, "loss": 0.3184, "step": 38739 }, { "epoch": 3.938592923952826, "grad_norm": 0.29737940430641174, "learning_rate": 1.311036996210862e-06, "loss": 0.3445, "step": 38740 }, { "epoch": 3.938694591297275, "grad_norm": 0.2956666350364685, "learning_rate": 1.3107974477194174e-06, "loss": 0.3296, "step": 38741 }, { "epoch": 3.938796258641724, "grad_norm": 0.28409531712532043, "learning_rate": 1.3105579178133e-06, "loss": 0.3086, "step": 38742 }, { "epoch": 3.9388979259861734, "grad_norm": 0.2704341411590576, "learning_rate": 1.3103184064937152e-06, "loss": 0.3183, "step": 38743 }, { "epoch": 3.9389995933306223, "grad_norm": 0.28689873218536377, "learning_rate": 1.3100789137618674e-06, "loss": 0.3204, "step": 38744 }, { "epoch": 3.9391012606750713, "grad_norm": 0.2829728126525879, "learning_rate": 1.3098394396189674e-06, "loss": 0.3041, "step": 38745 }, { "epoch": 3.93920292801952, "grad_norm": 0.2825472950935364, "learning_rate": 1.3095999840662194e-06, "loss": 0.3184, "step": 38746 }, { "epoch": 3.939304595363969, "grad_norm": 0.25658857822418213, "learning_rate": 1.3093605471048293e-06, "loss": 0.2956, "step": 38747 }, { "epoch": 3.939406262708418, "grad_norm": 0.2830871343612671, "learning_rate": 1.3091211287360023e-06, "loss": 0.2954, "step": 38748 }, { "epoch": 3.939507930052867, "grad_norm": 0.2672814726829529, "learning_rate": 1.3088817289609473e-06, "loss": 0.3072, "step": 38749 }, { "epoch": 3.939609597397316, "grad_norm": 0.2670169174671173, "learning_rate": 1.3086423477808696e-06, "loss": 0.3017, "step": 38750 }, { "epoch": 3.939711264741765, "grad_norm": 0.26891982555389404, "learning_rate": 1.3084029851969726e-06, "loss": 0.3334, "step": 38751 }, { "epoch": 3.939812932086214, "grad_norm": 0.2608601450920105, "learning_rate": 1.3081636412104654e-06, "loss": 0.3003, "step": 38752 }, { "epoch": 3.939914599430663, "grad_norm": 0.2802850604057312, "learning_rate": 1.307924315822553e-06, "loss": 0.3293, "step": 38753 }, { "epoch": 3.9400162667751117, "grad_norm": 0.27384260296821594, "learning_rate": 1.307685009034439e-06, "loss": 0.2897, "step": 38754 }, { "epoch": 3.9401179341195607, "grad_norm": 0.2552603483200073, "learning_rate": 1.3074457208473313e-06, "loss": 0.3207, "step": 38755 }, { "epoch": 3.9402196014640096, "grad_norm": 0.27116096019744873, "learning_rate": 1.307206451262435e-06, "loss": 0.3374, "step": 38756 }, { "epoch": 3.9403212688084586, "grad_norm": 0.2933809161186218, "learning_rate": 1.3069672002809553e-06, "loss": 0.2928, "step": 38757 }, { "epoch": 3.940422936152908, "grad_norm": 0.28021344542503357, "learning_rate": 1.3067279679040957e-06, "loss": 0.3078, "step": 38758 }, { "epoch": 3.940524603497357, "grad_norm": 0.2632826268672943, "learning_rate": 1.306488754133064e-06, "loss": 0.3047, "step": 38759 }, { "epoch": 3.940626270841806, "grad_norm": 0.26327618956565857, "learning_rate": 1.3062495589690649e-06, "loss": 0.3157, "step": 38760 }, { "epoch": 3.9407279381862548, "grad_norm": 0.2878887951374054, "learning_rate": 1.3060103824133009e-06, "loss": 0.282, "step": 38761 }, { "epoch": 3.9408296055307037, "grad_norm": 0.30970197916030884, "learning_rate": 1.3057712244669801e-06, "loss": 0.3212, "step": 38762 }, { "epoch": 3.9409312728751527, "grad_norm": 0.2736125588417053, "learning_rate": 1.3055320851313058e-06, "loss": 0.2952, "step": 38763 }, { "epoch": 3.9410329402196016, "grad_norm": 0.2585301101207733, "learning_rate": 1.3052929644074818e-06, "loss": 0.3497, "step": 38764 }, { "epoch": 3.9411346075640505, "grad_norm": 0.27274975180625916, "learning_rate": 1.3050538622967158e-06, "loss": 0.2774, "step": 38765 }, { "epoch": 3.9412362749084995, "grad_norm": 0.255109041929245, "learning_rate": 1.3048147788002096e-06, "loss": 0.2804, "step": 38766 }, { "epoch": 3.9413379422529484, "grad_norm": 0.3017878532409668, "learning_rate": 1.3045757139191689e-06, "loss": 0.3091, "step": 38767 }, { "epoch": 3.9414396095973974, "grad_norm": 0.2872403860092163, "learning_rate": 1.304336667654796e-06, "loss": 0.3113, "step": 38768 }, { "epoch": 3.9415412769418463, "grad_norm": 0.2581213414669037, "learning_rate": 1.3040976400082978e-06, "loss": 0.2885, "step": 38769 }, { "epoch": 3.9416429442862952, "grad_norm": 0.2777791917324066, "learning_rate": 1.3038586309808777e-06, "loss": 0.368, "step": 38770 }, { "epoch": 3.941744611630744, "grad_norm": 0.256829172372818, "learning_rate": 1.3036196405737384e-06, "loss": 0.2968, "step": 38771 }, { "epoch": 3.941846278975193, "grad_norm": 0.28589898347854614, "learning_rate": 1.3033806687880857e-06, "loss": 0.292, "step": 38772 }, { "epoch": 3.941947946319642, "grad_norm": 0.26339662075042725, "learning_rate": 1.3031417156251235e-06, "loss": 0.3174, "step": 38773 }, { "epoch": 3.942049613664091, "grad_norm": 0.2629885673522949, "learning_rate": 1.3029027810860546e-06, "loss": 0.2939, "step": 38774 }, { "epoch": 3.94215128100854, "grad_norm": 0.2665175497531891, "learning_rate": 1.3026638651720808e-06, "loss": 0.3177, "step": 38775 }, { "epoch": 3.942252948352989, "grad_norm": 0.2807348668575287, "learning_rate": 1.3024249678844098e-06, "loss": 0.3011, "step": 38776 }, { "epoch": 3.942354615697438, "grad_norm": 0.2945597767829895, "learning_rate": 1.3021860892242433e-06, "loss": 0.3237, "step": 38777 }, { "epoch": 3.9424562830418868, "grad_norm": 0.26636484265327454, "learning_rate": 1.301947229192783e-06, "loss": 0.3062, "step": 38778 }, { "epoch": 3.9425579503863357, "grad_norm": 0.27716898918151855, "learning_rate": 1.3017083877912368e-06, "loss": 0.32, "step": 38779 }, { "epoch": 3.9426596177307847, "grad_norm": 0.283141165971756, "learning_rate": 1.3014695650208014e-06, "loss": 0.2903, "step": 38780 }, { "epoch": 3.9427612850752336, "grad_norm": 0.25841063261032104, "learning_rate": 1.301230760882684e-06, "loss": 0.3253, "step": 38781 }, { "epoch": 3.9428629524196825, "grad_norm": 0.2586304843425751, "learning_rate": 1.3009919753780881e-06, "loss": 0.3155, "step": 38782 }, { "epoch": 3.9429646197641315, "grad_norm": 0.2757472097873688, "learning_rate": 1.300753208508216e-06, "loss": 0.341, "step": 38783 }, { "epoch": 3.943066287108581, "grad_norm": 0.26799458265304565, "learning_rate": 1.3005144602742697e-06, "loss": 0.2827, "step": 38784 }, { "epoch": 3.94316795445303, "grad_norm": 0.26395899057388306, "learning_rate": 1.3002757306774516e-06, "loss": 0.3012, "step": 38785 }, { "epoch": 3.9432696217974788, "grad_norm": 0.2862342298030853, "learning_rate": 1.300037019718966e-06, "loss": 0.2941, "step": 38786 }, { "epoch": 3.9433712891419277, "grad_norm": 0.277321994304657, "learning_rate": 1.2997983274000153e-06, "loss": 0.2986, "step": 38787 }, { "epoch": 3.9434729564863766, "grad_norm": 0.26106324791908264, "learning_rate": 1.2995596537217992e-06, "loss": 0.2899, "step": 38788 }, { "epoch": 3.9435746238308256, "grad_norm": 0.27117472887039185, "learning_rate": 1.2993209986855255e-06, "loss": 0.3323, "step": 38789 }, { "epoch": 3.9436762911752745, "grad_norm": 0.30779483914375305, "learning_rate": 1.2990823622923904e-06, "loss": 0.3097, "step": 38790 }, { "epoch": 3.9437779585197235, "grad_norm": 0.2774082124233246, "learning_rate": 1.2988437445435987e-06, "loss": 0.2837, "step": 38791 }, { "epoch": 3.9438796258641724, "grad_norm": 0.28407883644104004, "learning_rate": 1.2986051454403554e-06, "loss": 0.3151, "step": 38792 }, { "epoch": 3.9439812932086213, "grad_norm": 0.28821104764938354, "learning_rate": 1.2983665649838573e-06, "loss": 0.3163, "step": 38793 }, { "epoch": 3.9440829605530703, "grad_norm": 0.26546669006347656, "learning_rate": 1.2981280031753097e-06, "loss": 0.3253, "step": 38794 }, { "epoch": 3.9441846278975192, "grad_norm": 0.26462432742118835, "learning_rate": 1.2978894600159126e-06, "loss": 0.2996, "step": 38795 }, { "epoch": 3.944286295241968, "grad_norm": 0.277981698513031, "learning_rate": 1.2976509355068701e-06, "loss": 0.3095, "step": 38796 }, { "epoch": 3.944387962586417, "grad_norm": 0.28218260407447815, "learning_rate": 1.2974124296493818e-06, "loss": 0.2933, "step": 38797 }, { "epoch": 3.944489629930866, "grad_norm": 0.26526716351509094, "learning_rate": 1.2971739424446489e-06, "loss": 0.2659, "step": 38798 }, { "epoch": 3.9445912972753154, "grad_norm": 0.2762325406074524, "learning_rate": 1.2969354738938767e-06, "loss": 0.2967, "step": 38799 }, { "epoch": 3.9446929646197644, "grad_norm": 0.253648042678833, "learning_rate": 1.2966970239982602e-06, "loss": 0.318, "step": 38800 }, { "epoch": 3.9447946319642133, "grad_norm": 0.2726244628429413, "learning_rate": 1.2964585927590045e-06, "loss": 0.3238, "step": 38801 }, { "epoch": 3.9448962993086623, "grad_norm": 0.285914808511734, "learning_rate": 1.2962201801773127e-06, "loss": 0.3084, "step": 38802 }, { "epoch": 3.944997966653111, "grad_norm": 0.28035661578178406, "learning_rate": 1.295981786254381e-06, "loss": 0.3216, "step": 38803 }, { "epoch": 3.94509963399756, "grad_norm": 0.2624940574169159, "learning_rate": 1.295743410991414e-06, "loss": 0.3275, "step": 38804 }, { "epoch": 3.945201301342009, "grad_norm": 0.2584335505962372, "learning_rate": 1.2955050543896102e-06, "loss": 0.3297, "step": 38805 }, { "epoch": 3.945302968686458, "grad_norm": 0.2697570323944092, "learning_rate": 1.2952667164501731e-06, "loss": 0.3257, "step": 38806 }, { "epoch": 3.945404636030907, "grad_norm": 0.2705230116844177, "learning_rate": 1.295028397174302e-06, "loss": 0.2758, "step": 38807 }, { "epoch": 3.945506303375356, "grad_norm": 0.2722477614879608, "learning_rate": 1.294790096563196e-06, "loss": 0.2967, "step": 38808 }, { "epoch": 3.945607970719805, "grad_norm": 0.28573596477508545, "learning_rate": 1.2945518146180596e-06, "loss": 0.3007, "step": 38809 }, { "epoch": 3.945709638064254, "grad_norm": 0.2829936742782593, "learning_rate": 1.294313551340088e-06, "loss": 0.2919, "step": 38810 }, { "epoch": 3.9458113054087027, "grad_norm": 0.2975938022136688, "learning_rate": 1.294075306730484e-06, "loss": 0.2639, "step": 38811 }, { "epoch": 3.9459129727531517, "grad_norm": 0.2784866690635681, "learning_rate": 1.2938370807904503e-06, "loss": 0.3285, "step": 38812 }, { "epoch": 3.9460146400976006, "grad_norm": 0.26120731234550476, "learning_rate": 1.2935988735211824e-06, "loss": 0.2751, "step": 38813 }, { "epoch": 3.9461163074420496, "grad_norm": 0.30265653133392334, "learning_rate": 1.293360684923884e-06, "loss": 0.3256, "step": 38814 }, { "epoch": 3.9462179747864985, "grad_norm": 0.28159862756729126, "learning_rate": 1.2931225149997533e-06, "loss": 0.2896, "step": 38815 }, { "epoch": 3.9463196421309474, "grad_norm": 0.25364693999290466, "learning_rate": 1.2928843637499893e-06, "loss": 0.2924, "step": 38816 }, { "epoch": 3.9464213094753964, "grad_norm": 0.23563052713871002, "learning_rate": 1.2926462311757947e-06, "loss": 0.2965, "step": 38817 }, { "epoch": 3.9465229768198453, "grad_norm": 0.2569514513015747, "learning_rate": 1.2924081172783653e-06, "loss": 0.2957, "step": 38818 }, { "epoch": 3.9466246441642943, "grad_norm": 0.27643418312072754, "learning_rate": 1.2921700220589063e-06, "loss": 0.3062, "step": 38819 }, { "epoch": 3.946726311508743, "grad_norm": 0.27213630080223083, "learning_rate": 1.2919319455186101e-06, "loss": 0.3036, "step": 38820 }, { "epoch": 3.946827978853192, "grad_norm": 0.28771892189979553, "learning_rate": 1.29169388765868e-06, "loss": 0.3196, "step": 38821 }, { "epoch": 3.946929646197641, "grad_norm": 0.2669379711151123, "learning_rate": 1.2914558484803174e-06, "loss": 0.3054, "step": 38822 }, { "epoch": 3.94703131354209, "grad_norm": 0.27072271704673767, "learning_rate": 1.2912178279847165e-06, "loss": 0.3131, "step": 38823 }, { "epoch": 3.947132980886539, "grad_norm": 0.2806304097175598, "learning_rate": 1.2909798261730798e-06, "loss": 0.3169, "step": 38824 }, { "epoch": 3.9472346482309884, "grad_norm": 0.275371253490448, "learning_rate": 1.2907418430466057e-06, "loss": 0.3063, "step": 38825 }, { "epoch": 3.9473363155754373, "grad_norm": 0.2853207588195801, "learning_rate": 1.2905038786064911e-06, "loss": 0.352, "step": 38826 }, { "epoch": 3.9474379829198862, "grad_norm": 0.28613466024398804, "learning_rate": 1.290265932853938e-06, "loss": 0.3259, "step": 38827 }, { "epoch": 3.947539650264335, "grad_norm": 0.2900901138782501, "learning_rate": 1.2900280057901432e-06, "loss": 0.3357, "step": 38828 }, { "epoch": 3.947641317608784, "grad_norm": 0.27050113677978516, "learning_rate": 1.2897900974163064e-06, "loss": 0.3519, "step": 38829 }, { "epoch": 3.947742984953233, "grad_norm": 0.2927699387073517, "learning_rate": 1.2895522077336231e-06, "loss": 0.3002, "step": 38830 }, { "epoch": 3.947844652297682, "grad_norm": 0.28645727038383484, "learning_rate": 1.2893143367432947e-06, "loss": 0.3012, "step": 38831 }, { "epoch": 3.947946319642131, "grad_norm": 0.28256717324256897, "learning_rate": 1.2890764844465214e-06, "loss": 0.2992, "step": 38832 }, { "epoch": 3.94804798698658, "grad_norm": 0.25767970085144043, "learning_rate": 1.2888386508444962e-06, "loss": 0.2906, "step": 38833 }, { "epoch": 3.948149654331029, "grad_norm": 0.3073694109916687, "learning_rate": 1.2886008359384217e-06, "loss": 0.3143, "step": 38834 }, { "epoch": 3.9482513216754778, "grad_norm": 0.2757149040699005, "learning_rate": 1.2883630397294939e-06, "loss": 0.3035, "step": 38835 }, { "epoch": 3.9483529890199267, "grad_norm": 0.2782030701637268, "learning_rate": 1.2881252622189095e-06, "loss": 0.3467, "step": 38836 }, { "epoch": 3.9484546563643756, "grad_norm": 0.3036269247531891, "learning_rate": 1.2878875034078697e-06, "loss": 0.278, "step": 38837 }, { "epoch": 3.9485563237088246, "grad_norm": 0.2970264256000519, "learning_rate": 1.2876497632975705e-06, "loss": 0.2932, "step": 38838 }, { "epoch": 3.9486579910532735, "grad_norm": 0.29384174942970276, "learning_rate": 1.2874120418892094e-06, "loss": 0.296, "step": 38839 }, { "epoch": 3.948759658397723, "grad_norm": 0.2612139880657196, "learning_rate": 1.287174339183983e-06, "loss": 0.3142, "step": 38840 }, { "epoch": 3.948861325742172, "grad_norm": 0.26916682720184326, "learning_rate": 1.2869366551830898e-06, "loss": 0.3102, "step": 38841 }, { "epoch": 3.948962993086621, "grad_norm": 0.27628931403160095, "learning_rate": 1.2866989898877303e-06, "loss": 0.3319, "step": 38842 }, { "epoch": 3.9490646604310697, "grad_norm": 0.26313838362693787, "learning_rate": 1.2864613432990957e-06, "loss": 0.3174, "step": 38843 }, { "epoch": 3.9491663277755187, "grad_norm": 0.2642996609210968, "learning_rate": 1.2862237154183881e-06, "loss": 0.3048, "step": 38844 }, { "epoch": 3.9492679951199676, "grad_norm": 0.2860090732574463, "learning_rate": 1.2859861062468032e-06, "loss": 0.3023, "step": 38845 }, { "epoch": 3.9493696624644166, "grad_norm": 0.28824904561042786, "learning_rate": 1.285748515785536e-06, "loss": 0.3025, "step": 38846 }, { "epoch": 3.9494713298088655, "grad_norm": 0.2827009856700897, "learning_rate": 1.285510944035786e-06, "loss": 0.3047, "step": 38847 }, { "epoch": 3.9495729971533144, "grad_norm": 0.27918970584869385, "learning_rate": 1.2852733909987497e-06, "loss": 0.305, "step": 38848 }, { "epoch": 3.9496746644977634, "grad_norm": 0.2598069906234741, "learning_rate": 1.2850358566756238e-06, "loss": 0.3317, "step": 38849 }, { "epoch": 3.9497763318422123, "grad_norm": 0.3026469945907593, "learning_rate": 1.2847983410676023e-06, "loss": 0.2985, "step": 38850 }, { "epoch": 3.9498779991866613, "grad_norm": 0.2739741802215576, "learning_rate": 1.2845608441758855e-06, "loss": 0.3079, "step": 38851 }, { "epoch": 3.94997966653111, "grad_norm": 0.2764677107334137, "learning_rate": 1.2843233660016684e-06, "loss": 0.3044, "step": 38852 }, { "epoch": 3.950081333875559, "grad_norm": 0.25571200251579285, "learning_rate": 1.2840859065461453e-06, "loss": 0.2955, "step": 38853 }, { "epoch": 3.950183001220008, "grad_norm": 0.3008383810520172, "learning_rate": 1.2838484658105161e-06, "loss": 0.3259, "step": 38854 }, { "epoch": 3.950284668564457, "grad_norm": 0.2779657244682312, "learning_rate": 1.2836110437959753e-06, "loss": 0.2908, "step": 38855 }, { "epoch": 3.950386335908906, "grad_norm": 0.29323750734329224, "learning_rate": 1.2833736405037177e-06, "loss": 0.2973, "step": 38856 }, { "epoch": 3.950488003253355, "grad_norm": 0.2686874568462372, "learning_rate": 1.2831362559349414e-06, "loss": 0.2991, "step": 38857 }, { "epoch": 3.950589670597804, "grad_norm": 0.2739035189151764, "learning_rate": 1.2828988900908418e-06, "loss": 0.3293, "step": 38858 }, { "epoch": 3.950691337942253, "grad_norm": 0.2589344084262848, "learning_rate": 1.2826615429726141e-06, "loss": 0.3445, "step": 38859 }, { "epoch": 3.9507930052867017, "grad_norm": 0.27387839555740356, "learning_rate": 1.2824242145814531e-06, "loss": 0.285, "step": 38860 }, { "epoch": 3.9508946726311507, "grad_norm": 0.2630128860473633, "learning_rate": 1.2821869049185565e-06, "loss": 0.3214, "step": 38861 }, { "epoch": 3.9509963399755996, "grad_norm": 0.2874547243118286, "learning_rate": 1.281949613985119e-06, "loss": 0.3173, "step": 38862 }, { "epoch": 3.9510980073200486, "grad_norm": 0.24203164875507355, "learning_rate": 1.281712341782334e-06, "loss": 0.3026, "step": 38863 }, { "epoch": 3.9511996746644975, "grad_norm": 0.27914008498191833, "learning_rate": 1.2814750883114002e-06, "loss": 0.3198, "step": 38864 }, { "epoch": 3.9513013420089464, "grad_norm": 0.27617594599723816, "learning_rate": 1.2812378535735115e-06, "loss": 0.2983, "step": 38865 }, { "epoch": 3.951403009353396, "grad_norm": 0.29121872782707214, "learning_rate": 1.2810006375698609e-06, "loss": 0.3236, "step": 38866 }, { "epoch": 3.9515046766978448, "grad_norm": 0.2690005898475647, "learning_rate": 1.2807634403016473e-06, "loss": 0.337, "step": 38867 }, { "epoch": 3.9516063440422937, "grad_norm": 0.28108060359954834, "learning_rate": 1.2805262617700632e-06, "loss": 0.3123, "step": 38868 }, { "epoch": 3.9517080113867427, "grad_norm": 0.255746454000473, "learning_rate": 1.2802891019763042e-06, "loss": 0.3357, "step": 38869 }, { "epoch": 3.9518096787311916, "grad_norm": 0.2699536085128784, "learning_rate": 1.2800519609215634e-06, "loss": 0.2972, "step": 38870 }, { "epoch": 3.9519113460756405, "grad_norm": 0.28514280915260315, "learning_rate": 1.279814838607038e-06, "loss": 0.3235, "step": 38871 }, { "epoch": 3.9520130134200895, "grad_norm": 0.2673146426677704, "learning_rate": 1.2795777350339211e-06, "loss": 0.3068, "step": 38872 }, { "epoch": 3.9521146807645384, "grad_norm": 0.2682802379131317, "learning_rate": 1.2793406502034068e-06, "loss": 0.3362, "step": 38873 }, { "epoch": 3.9522163481089874, "grad_norm": 0.26976028084754944, "learning_rate": 1.279103584116691e-06, "loss": 0.3133, "step": 38874 }, { "epoch": 3.9523180154534363, "grad_norm": 0.286908358335495, "learning_rate": 1.2788665367749675e-06, "loss": 0.3076, "step": 38875 }, { "epoch": 3.9524196827978852, "grad_norm": 0.288222074508667, "learning_rate": 1.278629508179428e-06, "loss": 0.3051, "step": 38876 }, { "epoch": 3.952521350142334, "grad_norm": 0.2736481726169586, "learning_rate": 1.2783924983312712e-06, "loss": 0.3425, "step": 38877 }, { "epoch": 3.952623017486783, "grad_norm": 0.27131548523902893, "learning_rate": 1.2781555072316876e-06, "loss": 0.278, "step": 38878 }, { "epoch": 3.952724684831232, "grad_norm": 0.27233830094337463, "learning_rate": 1.2779185348818729e-06, "loss": 0.3116, "step": 38879 }, { "epoch": 3.952826352175681, "grad_norm": 0.25919073820114136, "learning_rate": 1.277681581283018e-06, "loss": 0.2992, "step": 38880 }, { "epoch": 3.9529280195201304, "grad_norm": 0.2524220645427704, "learning_rate": 1.2774446464363204e-06, "loss": 0.316, "step": 38881 }, { "epoch": 3.9530296868645793, "grad_norm": 0.28650715947151184, "learning_rate": 1.2772077303429724e-06, "loss": 0.2814, "step": 38882 }, { "epoch": 3.9531313542090283, "grad_norm": 0.2804132103919983, "learning_rate": 1.2769708330041653e-06, "loss": 0.2758, "step": 38883 }, { "epoch": 3.9532330215534772, "grad_norm": 0.2533954083919525, "learning_rate": 1.276733954421096e-06, "loss": 0.2533, "step": 38884 }, { "epoch": 3.953334688897926, "grad_norm": 0.2703879177570343, "learning_rate": 1.2764970945949568e-06, "loss": 0.3352, "step": 38885 }, { "epoch": 3.953436356242375, "grad_norm": 0.28381291031837463, "learning_rate": 1.276260253526938e-06, "loss": 0.2612, "step": 38886 }, { "epoch": 3.953538023586824, "grad_norm": 0.27266255021095276, "learning_rate": 1.2760234312182373e-06, "loss": 0.297, "step": 38887 }, { "epoch": 3.953639690931273, "grad_norm": 0.2872928977012634, "learning_rate": 1.2757866276700454e-06, "loss": 0.3188, "step": 38888 }, { "epoch": 3.953741358275722, "grad_norm": 0.24536652863025665, "learning_rate": 1.2755498428835556e-06, "loss": 0.3081, "step": 38889 }, { "epoch": 3.953843025620171, "grad_norm": 0.2701655924320221, "learning_rate": 1.2753130768599586e-06, "loss": 0.3355, "step": 38890 }, { "epoch": 3.95394469296462, "grad_norm": 0.29728204011917114, "learning_rate": 1.275076329600451e-06, "loss": 0.3097, "step": 38891 }, { "epoch": 3.9540463603090688, "grad_norm": 0.2710821032524109, "learning_rate": 1.2748396011062237e-06, "loss": 0.2796, "step": 38892 }, { "epoch": 3.9541480276535177, "grad_norm": 0.3136669397354126, "learning_rate": 1.2746028913784681e-06, "loss": 0.2663, "step": 38893 }, { "epoch": 3.9542496949979666, "grad_norm": 0.2783161401748657, "learning_rate": 1.2743662004183788e-06, "loss": 0.3041, "step": 38894 }, { "epoch": 3.9543513623424156, "grad_norm": 0.26112493872642517, "learning_rate": 1.2741295282271477e-06, "loss": 0.34, "step": 38895 }, { "epoch": 3.9544530296868645, "grad_norm": 0.28001144528388977, "learning_rate": 1.2738928748059647e-06, "loss": 0.3261, "step": 38896 }, { "epoch": 3.9545546970313135, "grad_norm": 0.2803186774253845, "learning_rate": 1.2736562401560254e-06, "loss": 0.3012, "step": 38897 }, { "epoch": 3.9546563643757624, "grad_norm": 0.2886804938316345, "learning_rate": 1.2734196242785207e-06, "loss": 0.3178, "step": 38898 }, { "epoch": 3.9547580317202113, "grad_norm": 0.26859650015830994, "learning_rate": 1.2731830271746426e-06, "loss": 0.309, "step": 38899 }, { "epoch": 3.9548596990646603, "grad_norm": 0.26985734701156616, "learning_rate": 1.2729464488455812e-06, "loss": 0.3069, "step": 38900 }, { "epoch": 3.9549613664091092, "grad_norm": 0.2561492323875427, "learning_rate": 1.272709889292531e-06, "loss": 0.2963, "step": 38901 }, { "epoch": 3.955063033753558, "grad_norm": 0.26652711629867554, "learning_rate": 1.2724733485166829e-06, "loss": 0.3098, "step": 38902 }, { "epoch": 3.955164701098007, "grad_norm": 0.25055885314941406, "learning_rate": 1.2722368265192264e-06, "loss": 0.3115, "step": 38903 }, { "epoch": 3.955266368442456, "grad_norm": 0.25548362731933594, "learning_rate": 1.2720003233013566e-06, "loss": 0.345, "step": 38904 }, { "epoch": 3.955368035786905, "grad_norm": 0.2712860107421875, "learning_rate": 1.2717638388642639e-06, "loss": 0.3372, "step": 38905 }, { "epoch": 3.9554697031313544, "grad_norm": 0.26414579153060913, "learning_rate": 1.2715273732091372e-06, "loss": 0.304, "step": 38906 }, { "epoch": 3.9555713704758033, "grad_norm": 0.26992133259773254, "learning_rate": 1.2712909263371703e-06, "loss": 0.3301, "step": 38907 }, { "epoch": 3.9556730378202523, "grad_norm": 0.26854991912841797, "learning_rate": 1.2710544982495542e-06, "loss": 0.3441, "step": 38908 }, { "epoch": 3.955774705164701, "grad_norm": 0.269700288772583, "learning_rate": 1.2708180889474798e-06, "loss": 0.3377, "step": 38909 }, { "epoch": 3.95587637250915, "grad_norm": 0.2733928859233856, "learning_rate": 1.2705816984321357e-06, "loss": 0.2954, "step": 38910 }, { "epoch": 3.955978039853599, "grad_norm": 0.26673588156700134, "learning_rate": 1.2703453267047166e-06, "loss": 0.3295, "step": 38911 }, { "epoch": 3.956079707198048, "grad_norm": 0.2968216836452484, "learning_rate": 1.2701089737664113e-06, "loss": 0.2936, "step": 38912 }, { "epoch": 3.956181374542497, "grad_norm": 0.24864010512828827, "learning_rate": 1.2698726396184087e-06, "loss": 0.2968, "step": 38913 }, { "epoch": 3.956283041886946, "grad_norm": 0.26863449811935425, "learning_rate": 1.2696363242619032e-06, "loss": 0.2881, "step": 38914 }, { "epoch": 3.956384709231395, "grad_norm": 0.266710489988327, "learning_rate": 1.2694000276980833e-06, "loss": 0.3064, "step": 38915 }, { "epoch": 3.956486376575844, "grad_norm": 0.283225417137146, "learning_rate": 1.2691637499281384e-06, "loss": 0.3266, "step": 38916 }, { "epoch": 3.9565880439202927, "grad_norm": 0.27448225021362305, "learning_rate": 1.2689274909532613e-06, "loss": 0.3041, "step": 38917 }, { "epoch": 3.9566897112647417, "grad_norm": 0.28226494789123535, "learning_rate": 1.2686912507746407e-06, "loss": 0.2992, "step": 38918 }, { "epoch": 3.9567913786091906, "grad_norm": 0.25685030221939087, "learning_rate": 1.2684550293934667e-06, "loss": 0.2914, "step": 38919 }, { "epoch": 3.9568930459536396, "grad_norm": 0.27784445881843567, "learning_rate": 1.2682188268109285e-06, "loss": 0.2974, "step": 38920 }, { "epoch": 3.9569947132980885, "grad_norm": 0.27013811469078064, "learning_rate": 1.2679826430282183e-06, "loss": 0.3075, "step": 38921 }, { "epoch": 3.957096380642538, "grad_norm": 0.269439160823822, "learning_rate": 1.2677464780465243e-06, "loss": 0.3251, "step": 38922 }, { "epoch": 3.957198047986987, "grad_norm": 0.2791045308113098, "learning_rate": 1.2675103318670357e-06, "loss": 0.3172, "step": 38923 }, { "epoch": 3.9572997153314358, "grad_norm": 0.27223536372184753, "learning_rate": 1.2672742044909443e-06, "loss": 0.3137, "step": 38924 }, { "epoch": 3.9574013826758847, "grad_norm": 0.2759820222854614, "learning_rate": 1.2670380959194383e-06, "loss": 0.3384, "step": 38925 }, { "epoch": 3.9575030500203336, "grad_norm": 0.3108590841293335, "learning_rate": 1.2668020061537062e-06, "loss": 0.3008, "step": 38926 }, { "epoch": 3.9576047173647826, "grad_norm": 0.2504009008407593, "learning_rate": 1.2665659351949394e-06, "loss": 0.3097, "step": 38927 }, { "epoch": 3.9577063847092315, "grad_norm": 0.2638438940048218, "learning_rate": 1.2663298830443266e-06, "loss": 0.2712, "step": 38928 }, { "epoch": 3.9578080520536805, "grad_norm": 0.2646000385284424, "learning_rate": 1.2660938497030567e-06, "loss": 0.3233, "step": 38929 }, { "epoch": 3.9579097193981294, "grad_norm": 0.25593698024749756, "learning_rate": 1.2658578351723173e-06, "loss": 0.3312, "step": 38930 }, { "epoch": 3.9580113867425784, "grad_norm": 0.30640754103660583, "learning_rate": 1.2656218394532998e-06, "loss": 0.3005, "step": 38931 }, { "epoch": 3.9581130540870273, "grad_norm": 0.2794744670391083, "learning_rate": 1.2653858625471921e-06, "loss": 0.2994, "step": 38932 }, { "epoch": 3.9582147214314762, "grad_norm": 0.28399816155433655, "learning_rate": 1.2651499044551813e-06, "loss": 0.2929, "step": 38933 }, { "epoch": 3.958316388775925, "grad_norm": 0.2763104736804962, "learning_rate": 1.2649139651784598e-06, "loss": 0.3169, "step": 38934 }, { "epoch": 3.958418056120374, "grad_norm": 0.2802012860774994, "learning_rate": 1.264678044718214e-06, "loss": 0.3269, "step": 38935 }, { "epoch": 3.958519723464823, "grad_norm": 0.2737208306789398, "learning_rate": 1.264442143075631e-06, "loss": 0.2988, "step": 38936 }, { "epoch": 3.958621390809272, "grad_norm": 0.2771666944026947, "learning_rate": 1.2642062602519018e-06, "loss": 0.3114, "step": 38937 }, { "epoch": 3.958723058153721, "grad_norm": 0.27584192156791687, "learning_rate": 1.2639703962482142e-06, "loss": 0.3399, "step": 38938 }, { "epoch": 3.95882472549817, "grad_norm": 0.28727343678474426, "learning_rate": 1.2637345510657555e-06, "loss": 0.3207, "step": 38939 }, { "epoch": 3.958926392842619, "grad_norm": 0.29225480556488037, "learning_rate": 1.2634987247057134e-06, "loss": 0.2988, "step": 38940 }, { "epoch": 3.9590280601870678, "grad_norm": 0.2632179856300354, "learning_rate": 1.263262917169278e-06, "loss": 0.3269, "step": 38941 }, { "epoch": 3.9591297275315167, "grad_norm": 0.2979294955730438, "learning_rate": 1.2630271284576356e-06, "loss": 0.328, "step": 38942 }, { "epoch": 3.9592313948759656, "grad_norm": 0.27530333399772644, "learning_rate": 1.2627913585719736e-06, "loss": 0.3272, "step": 38943 }, { "epoch": 3.9593330622204146, "grad_norm": 0.25065767765045166, "learning_rate": 1.2625556075134837e-06, "loss": 0.3315, "step": 38944 }, { "epoch": 3.9594347295648635, "grad_norm": 0.2748534381389618, "learning_rate": 1.262319875283347e-06, "loss": 0.3176, "step": 38945 }, { "epoch": 3.9595363969093125, "grad_norm": 0.29075056314468384, "learning_rate": 1.262084161882755e-06, "loss": 0.3076, "step": 38946 }, { "epoch": 3.959638064253762, "grad_norm": 0.291148841381073, "learning_rate": 1.2618484673128962e-06, "loss": 0.3335, "step": 38947 }, { "epoch": 3.959739731598211, "grad_norm": 0.25477781891822815, "learning_rate": 1.261612791574956e-06, "loss": 0.3189, "step": 38948 }, { "epoch": 3.9598413989426597, "grad_norm": 0.2937694191932678, "learning_rate": 1.2613771346701227e-06, "loss": 0.2858, "step": 38949 }, { "epoch": 3.9599430662871087, "grad_norm": 0.2598876953125, "learning_rate": 1.261141496599581e-06, "loss": 0.318, "step": 38950 }, { "epoch": 3.9600447336315576, "grad_norm": 0.2976126968860626, "learning_rate": 1.2609058773645216e-06, "loss": 0.2931, "step": 38951 }, { "epoch": 3.9601464009760066, "grad_norm": 0.2906962037086487, "learning_rate": 1.26067027696613e-06, "loss": 0.2828, "step": 38952 }, { "epoch": 3.9602480683204555, "grad_norm": 0.2561465799808502, "learning_rate": 1.2604346954055914e-06, "loss": 0.2887, "step": 38953 }, { "epoch": 3.9603497356649044, "grad_norm": 0.26974964141845703, "learning_rate": 1.2601991326840973e-06, "loss": 0.324, "step": 38954 }, { "epoch": 3.9604514030093534, "grad_norm": 0.28655320405960083, "learning_rate": 1.2599635888028278e-06, "loss": 0.298, "step": 38955 }, { "epoch": 3.9605530703538023, "grad_norm": 0.26528221368789673, "learning_rate": 1.2597280637629732e-06, "loss": 0.3452, "step": 38956 }, { "epoch": 3.9606547376982513, "grad_norm": 0.29449746012687683, "learning_rate": 1.259492557565723e-06, "loss": 0.3164, "step": 38957 }, { "epoch": 3.9607564050427, "grad_norm": 0.28051647543907166, "learning_rate": 1.2592570702122574e-06, "loss": 0.323, "step": 38958 }, { "epoch": 3.960858072387149, "grad_norm": 0.29109427332878113, "learning_rate": 1.2590216017037671e-06, "loss": 0.2924, "step": 38959 }, { "epoch": 3.960959739731598, "grad_norm": 0.25972452759742737, "learning_rate": 1.2587861520414357e-06, "loss": 0.2985, "step": 38960 }, { "epoch": 3.961061407076047, "grad_norm": 0.26770538091659546, "learning_rate": 1.258550721226453e-06, "loss": 0.2932, "step": 38961 }, { "epoch": 3.961163074420496, "grad_norm": 0.28712624311447144, "learning_rate": 1.2583153092600004e-06, "loss": 0.3049, "step": 38962 }, { "epoch": 3.9612647417649454, "grad_norm": 0.26349055767059326, "learning_rate": 1.2580799161432655e-06, "loss": 0.2975, "step": 38963 }, { "epoch": 3.9613664091093943, "grad_norm": 0.2948080897331238, "learning_rate": 1.2578445418774382e-06, "loss": 0.305, "step": 38964 }, { "epoch": 3.9614680764538432, "grad_norm": 0.26447704434394836, "learning_rate": 1.2576091864636975e-06, "loss": 0.3191, "step": 38965 }, { "epoch": 3.961569743798292, "grad_norm": 0.2971772849559784, "learning_rate": 1.257373849903234e-06, "loss": 0.3323, "step": 38966 }, { "epoch": 3.961671411142741, "grad_norm": 0.26403599977493286, "learning_rate": 1.2571385321972318e-06, "loss": 0.3284, "step": 38967 }, { "epoch": 3.96177307848719, "grad_norm": 0.2666179835796356, "learning_rate": 1.256903233346875e-06, "loss": 0.3351, "step": 38968 }, { "epoch": 3.961874745831639, "grad_norm": 0.26995086669921875, "learning_rate": 1.256667953353351e-06, "loss": 0.3122, "step": 38969 }, { "epoch": 3.961976413176088, "grad_norm": 0.28497326374053955, "learning_rate": 1.2564326922178433e-06, "loss": 0.3079, "step": 38970 }, { "epoch": 3.962078080520537, "grad_norm": 0.2614697813987732, "learning_rate": 1.2561974499415408e-06, "loss": 0.2824, "step": 38971 }, { "epoch": 3.962179747864986, "grad_norm": 0.28529003262519836, "learning_rate": 1.2559622265256232e-06, "loss": 0.2861, "step": 38972 }, { "epoch": 3.9622814152094348, "grad_norm": 0.28463995456695557, "learning_rate": 1.255727021971278e-06, "loss": 0.2978, "step": 38973 }, { "epoch": 3.9623830825538837, "grad_norm": 0.2719092071056366, "learning_rate": 1.2554918362796932e-06, "loss": 0.317, "step": 38974 }, { "epoch": 3.9624847498983327, "grad_norm": 0.26067787408828735, "learning_rate": 1.2552566694520478e-06, "loss": 0.3261, "step": 38975 }, { "epoch": 3.9625864172427816, "grad_norm": 0.2834983468055725, "learning_rate": 1.2550215214895311e-06, "loss": 0.3076, "step": 38976 }, { "epoch": 3.9626880845872305, "grad_norm": 0.2704576849937439, "learning_rate": 1.2547863923933257e-06, "loss": 0.3051, "step": 38977 }, { "epoch": 3.9627897519316795, "grad_norm": 0.2524982690811157, "learning_rate": 1.254551282164615e-06, "loss": 0.2987, "step": 38978 }, { "epoch": 3.9628914192761284, "grad_norm": 0.2923487424850464, "learning_rate": 1.2543161908045865e-06, "loss": 0.3177, "step": 38979 }, { "epoch": 3.9629930866205774, "grad_norm": 0.2721048593521118, "learning_rate": 1.254081118314423e-06, "loss": 0.3276, "step": 38980 }, { "epoch": 3.9630947539650263, "grad_norm": 0.2875309884548187, "learning_rate": 1.2538460646953088e-06, "loss": 0.2929, "step": 38981 }, { "epoch": 3.9631964213094752, "grad_norm": 0.2631528675556183, "learning_rate": 1.253611029948426e-06, "loss": 0.3181, "step": 38982 }, { "epoch": 3.963298088653924, "grad_norm": 0.297031432390213, "learning_rate": 1.2533760140749607e-06, "loss": 0.3121, "step": 38983 }, { "epoch": 3.963399755998373, "grad_norm": 0.2814197242259979, "learning_rate": 1.2531410170761e-06, "loss": 0.3065, "step": 38984 }, { "epoch": 3.963501423342822, "grad_norm": 0.2727420926094055, "learning_rate": 1.2529060389530211e-06, "loss": 0.3299, "step": 38985 }, { "epoch": 3.963603090687271, "grad_norm": 0.2903593182563782, "learning_rate": 1.2526710797069124e-06, "loss": 0.3263, "step": 38986 }, { "epoch": 3.96370475803172, "grad_norm": 0.26007166504859924, "learning_rate": 1.2524361393389567e-06, "loss": 0.3057, "step": 38987 }, { "epoch": 3.9638064253761693, "grad_norm": 0.26685598492622375, "learning_rate": 1.2522012178503357e-06, "loss": 0.3058, "step": 38988 }, { "epoch": 3.9639080927206183, "grad_norm": 0.2921906113624573, "learning_rate": 1.2519663152422357e-06, "loss": 0.2994, "step": 38989 }, { "epoch": 3.9640097600650672, "grad_norm": 0.2707793116569519, "learning_rate": 1.251731431515839e-06, "loss": 0.299, "step": 38990 }, { "epoch": 3.964111427409516, "grad_norm": 0.2884078919887543, "learning_rate": 1.2514965666723284e-06, "loss": 0.3155, "step": 38991 }, { "epoch": 3.964213094753965, "grad_norm": 0.26255661249160767, "learning_rate": 1.2512617207128858e-06, "loss": 0.3244, "step": 38992 }, { "epoch": 3.964314762098414, "grad_norm": 0.2710062563419342, "learning_rate": 1.251026893638696e-06, "loss": 0.3112, "step": 38993 }, { "epoch": 3.964416429442863, "grad_norm": 0.27294737100601196, "learning_rate": 1.2507920854509442e-06, "loss": 0.2921, "step": 38994 }, { "epoch": 3.964518096787312, "grad_norm": 0.24052074551582336, "learning_rate": 1.2505572961508083e-06, "loss": 0.3092, "step": 38995 }, { "epoch": 3.964619764131761, "grad_norm": 0.2930799722671509, "learning_rate": 1.2503225257394758e-06, "loss": 0.3378, "step": 38996 }, { "epoch": 3.96472143147621, "grad_norm": 0.2788107693195343, "learning_rate": 1.2500877742181267e-06, "loss": 0.2985, "step": 38997 }, { "epoch": 3.9648230988206588, "grad_norm": 0.2789294123649597, "learning_rate": 1.2498530415879428e-06, "loss": 0.2987, "step": 38998 }, { "epoch": 3.9649247661651077, "grad_norm": 0.30163049697875977, "learning_rate": 1.24961832785011e-06, "loss": 0.3206, "step": 38999 }, { "epoch": 3.9650264335095566, "grad_norm": 0.3014533519744873, "learning_rate": 1.2493836330058084e-06, "loss": 0.3127, "step": 39000 }, { "epoch": 3.9651281008540056, "grad_norm": 0.2612999379634857, "learning_rate": 1.249148957056221e-06, "loss": 0.2849, "step": 39001 }, { "epoch": 3.9652297681984545, "grad_norm": 0.2826642692089081, "learning_rate": 1.2489143000025284e-06, "loss": 0.2825, "step": 39002 }, { "epoch": 3.9653314355429035, "grad_norm": 0.24565760791301727, "learning_rate": 1.248679661845915e-06, "loss": 0.3484, "step": 39003 }, { "epoch": 3.965433102887353, "grad_norm": 0.26442036032676697, "learning_rate": 1.2484450425875627e-06, "loss": 0.2998, "step": 39004 }, { "epoch": 3.965534770231802, "grad_norm": 0.28698599338531494, "learning_rate": 1.2482104422286512e-06, "loss": 0.3359, "step": 39005 }, { "epoch": 3.9656364375762507, "grad_norm": 0.3036520481109619, "learning_rate": 1.2479758607703657e-06, "loss": 0.296, "step": 39006 }, { "epoch": 3.9657381049206997, "grad_norm": 0.27237647771835327, "learning_rate": 1.2477412982138858e-06, "loss": 0.3105, "step": 39007 }, { "epoch": 3.9658397722651486, "grad_norm": 0.27792277932167053, "learning_rate": 1.247506754560392e-06, "loss": 0.3175, "step": 39008 }, { "epoch": 3.9659414396095976, "grad_norm": 0.2812197208404541, "learning_rate": 1.247272229811069e-06, "loss": 0.3061, "step": 39009 }, { "epoch": 3.9660431069540465, "grad_norm": 0.26446038484573364, "learning_rate": 1.2470377239670967e-06, "loss": 0.3341, "step": 39010 }, { "epoch": 3.9661447742984954, "grad_norm": 0.2527998387813568, "learning_rate": 1.2468032370296568e-06, "loss": 0.2946, "step": 39011 }, { "epoch": 3.9662464416429444, "grad_norm": 0.28475213050842285, "learning_rate": 1.2465687689999284e-06, "loss": 0.3309, "step": 39012 }, { "epoch": 3.9663481089873933, "grad_norm": 0.29363882541656494, "learning_rate": 1.2463343198790962e-06, "loss": 0.3035, "step": 39013 }, { "epoch": 3.9664497763318423, "grad_norm": 0.2815392017364502, "learning_rate": 1.2460998896683396e-06, "loss": 0.2963, "step": 39014 }, { "epoch": 3.966551443676291, "grad_norm": 0.27313172817230225, "learning_rate": 1.2458654783688385e-06, "loss": 0.3179, "step": 39015 }, { "epoch": 3.96665311102074, "grad_norm": 0.27789559960365295, "learning_rate": 1.2456310859817766e-06, "loss": 0.3089, "step": 39016 }, { "epoch": 3.966754778365189, "grad_norm": 0.2752518355846405, "learning_rate": 1.2453967125083327e-06, "loss": 0.2902, "step": 39017 }, { "epoch": 3.966856445709638, "grad_norm": 0.26586443185806274, "learning_rate": 1.2451623579496862e-06, "loss": 0.3122, "step": 39018 }, { "epoch": 3.966958113054087, "grad_norm": 0.2627725601196289, "learning_rate": 1.2449280223070214e-06, "loss": 0.3082, "step": 39019 }, { "epoch": 3.967059780398536, "grad_norm": 0.28299570083618164, "learning_rate": 1.2446937055815172e-06, "loss": 0.2751, "step": 39020 }, { "epoch": 3.967161447742985, "grad_norm": 0.26124510169029236, "learning_rate": 1.2444594077743532e-06, "loss": 0.3084, "step": 39021 }, { "epoch": 3.967263115087434, "grad_norm": 0.26897209882736206, "learning_rate": 1.244225128886709e-06, "loss": 0.3237, "step": 39022 }, { "epoch": 3.9673647824318827, "grad_norm": 0.2573670446872711, "learning_rate": 1.2439908689197676e-06, "loss": 0.3237, "step": 39023 }, { "epoch": 3.9674664497763317, "grad_norm": 0.2568231523036957, "learning_rate": 1.2437566278747072e-06, "loss": 0.2989, "step": 39024 }, { "epoch": 3.9675681171207806, "grad_norm": 0.28904566168785095, "learning_rate": 1.2435224057527072e-06, "loss": 0.2969, "step": 39025 }, { "epoch": 3.9676697844652296, "grad_norm": 0.2804715633392334, "learning_rate": 1.2432882025549504e-06, "loss": 0.3221, "step": 39026 }, { "epoch": 3.9677714518096785, "grad_norm": 0.28855061531066895, "learning_rate": 1.2430540182826145e-06, "loss": 0.2811, "step": 39027 }, { "epoch": 3.9678731191541274, "grad_norm": 0.2890011668205261, "learning_rate": 1.242819852936878e-06, "loss": 0.3195, "step": 39028 }, { "epoch": 3.967974786498577, "grad_norm": 0.2672549784183502, "learning_rate": 1.242585706518924e-06, "loss": 0.3041, "step": 39029 }, { "epoch": 3.9680764538430258, "grad_norm": 0.26205670833587646, "learning_rate": 1.24235157902993e-06, "loss": 0.3514, "step": 39030 }, { "epoch": 3.9681781211874747, "grad_norm": 0.2684786021709442, "learning_rate": 1.2421174704710765e-06, "loss": 0.3073, "step": 39031 }, { "epoch": 3.9682797885319236, "grad_norm": 0.2532380521297455, "learning_rate": 1.24188338084354e-06, "loss": 0.3326, "step": 39032 }, { "epoch": 3.9683814558763726, "grad_norm": 0.2671254575252533, "learning_rate": 1.241649310148504e-06, "loss": 0.3271, "step": 39033 }, { "epoch": 3.9684831232208215, "grad_norm": 0.2672681510448456, "learning_rate": 1.2414152583871453e-06, "loss": 0.3291, "step": 39034 }, { "epoch": 3.9685847905652705, "grad_norm": 0.26113221049308777, "learning_rate": 1.2411812255606414e-06, "loss": 0.3364, "step": 39035 }, { "epoch": 3.9686864579097194, "grad_norm": 0.27490532398223877, "learning_rate": 1.2409472116701753e-06, "loss": 0.3038, "step": 39036 }, { "epoch": 3.9687881252541684, "grad_norm": 0.2827710807323456, "learning_rate": 1.240713216716924e-06, "loss": 0.3253, "step": 39037 }, { "epoch": 3.9688897925986173, "grad_norm": 0.2732005715370178, "learning_rate": 1.2404792407020644e-06, "loss": 0.3027, "step": 39038 }, { "epoch": 3.9689914599430662, "grad_norm": 0.26832395792007446, "learning_rate": 1.2402452836267786e-06, "loss": 0.3044, "step": 39039 }, { "epoch": 3.969093127287515, "grad_norm": 0.2724781632423401, "learning_rate": 1.2400113454922436e-06, "loss": 0.3003, "step": 39040 }, { "epoch": 3.969194794631964, "grad_norm": 0.24779251217842102, "learning_rate": 1.2397774262996382e-06, "loss": 0.2767, "step": 39041 }, { "epoch": 3.969296461976413, "grad_norm": 0.2526983916759491, "learning_rate": 1.2395435260501387e-06, "loss": 0.3086, "step": 39042 }, { "epoch": 3.969398129320862, "grad_norm": 0.27781495451927185, "learning_rate": 1.2393096447449265e-06, "loss": 0.3259, "step": 39043 }, { "epoch": 3.969499796665311, "grad_norm": 0.28178733587265015, "learning_rate": 1.239075782385179e-06, "loss": 0.2991, "step": 39044 }, { "epoch": 3.9696014640097603, "grad_norm": 0.27916622161865234, "learning_rate": 1.238841938972073e-06, "loss": 0.3004, "step": 39045 }, { "epoch": 3.9697031313542093, "grad_norm": 0.25929582118988037, "learning_rate": 1.238608114506788e-06, "loss": 0.3245, "step": 39046 }, { "epoch": 3.969804798698658, "grad_norm": 0.28316691517829895, "learning_rate": 1.238374308990502e-06, "loss": 0.3132, "step": 39047 }, { "epoch": 3.969906466043107, "grad_norm": 0.26451751589775085, "learning_rate": 1.238140522424391e-06, "loss": 0.3021, "step": 39048 }, { "epoch": 3.970008133387556, "grad_norm": 0.2955574095249176, "learning_rate": 1.2379067548096351e-06, "loss": 0.3102, "step": 39049 }, { "epoch": 3.970109800732005, "grad_norm": 0.2668830454349518, "learning_rate": 1.2376730061474112e-06, "loss": 0.3177, "step": 39050 }, { "epoch": 3.970211468076454, "grad_norm": 0.26819708943367004, "learning_rate": 1.237439276438897e-06, "loss": 0.3033, "step": 39051 }, { "epoch": 3.970313135420903, "grad_norm": 0.2870599031448364, "learning_rate": 1.2372055656852678e-06, "loss": 0.2938, "step": 39052 }, { "epoch": 3.970414802765352, "grad_norm": 0.2615370452404022, "learning_rate": 1.236971873887704e-06, "loss": 0.3208, "step": 39053 }, { "epoch": 3.970516470109801, "grad_norm": 0.28444764018058777, "learning_rate": 1.236738201047382e-06, "loss": 0.2865, "step": 39054 }, { "epoch": 3.9706181374542497, "grad_norm": 0.2884766459465027, "learning_rate": 1.2365045471654774e-06, "loss": 0.3153, "step": 39055 }, { "epoch": 3.9707198047986987, "grad_norm": 0.26959121227264404, "learning_rate": 1.2362709122431693e-06, "loss": 0.3401, "step": 39056 }, { "epoch": 3.9708214721431476, "grad_norm": 0.283162921667099, "learning_rate": 1.2360372962816347e-06, "loss": 0.3025, "step": 39057 }, { "epoch": 3.9709231394875966, "grad_norm": 0.26511043310165405, "learning_rate": 1.2358036992820476e-06, "loss": 0.3398, "step": 39058 }, { "epoch": 3.9710248068320455, "grad_norm": 0.28870129585266113, "learning_rate": 1.235570121245589e-06, "loss": 0.2813, "step": 39059 }, { "epoch": 3.9711264741764944, "grad_norm": 0.26167264580726624, "learning_rate": 1.2353365621734337e-06, "loss": 0.286, "step": 39060 }, { "epoch": 3.9712281415209434, "grad_norm": 0.259812593460083, "learning_rate": 1.2351030220667576e-06, "loss": 0.3117, "step": 39061 }, { "epoch": 3.9713298088653923, "grad_norm": 0.27674150466918945, "learning_rate": 1.2348695009267363e-06, "loss": 0.2789, "step": 39062 }, { "epoch": 3.9714314762098413, "grad_norm": 0.28193238377571106, "learning_rate": 1.2346359987545498e-06, "loss": 0.2931, "step": 39063 }, { "epoch": 3.97153314355429, "grad_norm": 0.2724207639694214, "learning_rate": 1.2344025155513718e-06, "loss": 0.3074, "step": 39064 }, { "epoch": 3.971634810898739, "grad_norm": 0.26315951347351074, "learning_rate": 1.234169051318378e-06, "loss": 0.3157, "step": 39065 }, { "epoch": 3.971736478243188, "grad_norm": 0.27426737546920776, "learning_rate": 1.2339356060567465e-06, "loss": 0.3047, "step": 39066 }, { "epoch": 3.971838145587637, "grad_norm": 0.28783559799194336, "learning_rate": 1.233702179767653e-06, "loss": 0.3347, "step": 39067 }, { "epoch": 3.971939812932086, "grad_norm": 0.25755050778388977, "learning_rate": 1.2334687724522715e-06, "loss": 0.3121, "step": 39068 }, { "epoch": 3.972041480276535, "grad_norm": 0.2602449357509613, "learning_rate": 1.2332353841117805e-06, "loss": 0.2961, "step": 39069 }, { "epoch": 3.9721431476209843, "grad_norm": 0.265866219997406, "learning_rate": 1.2330020147473543e-06, "loss": 0.3079, "step": 39070 }, { "epoch": 3.9722448149654332, "grad_norm": 0.2698027789592743, "learning_rate": 1.232768664360169e-06, "loss": 0.3225, "step": 39071 }, { "epoch": 3.972346482309882, "grad_norm": 0.2717784345149994, "learning_rate": 1.2325353329513983e-06, "loss": 0.3062, "step": 39072 }, { "epoch": 3.972448149654331, "grad_norm": 0.29176267981529236, "learning_rate": 1.2323020205222214e-06, "loss": 0.2967, "step": 39073 }, { "epoch": 3.97254981699878, "grad_norm": 0.259475439786911, "learning_rate": 1.2320687270738109e-06, "loss": 0.3137, "step": 39074 }, { "epoch": 3.972651484343229, "grad_norm": 0.2724270224571228, "learning_rate": 1.2318354526073418e-06, "loss": 0.318, "step": 39075 }, { "epoch": 3.972753151687678, "grad_norm": 0.27544188499450684, "learning_rate": 1.231602197123991e-06, "loss": 0.3018, "step": 39076 }, { "epoch": 3.972854819032127, "grad_norm": 0.26959535479545593, "learning_rate": 1.2313689606249335e-06, "loss": 0.2845, "step": 39077 }, { "epoch": 3.972956486376576, "grad_norm": 0.2902984917163849, "learning_rate": 1.231135743111342e-06, "loss": 0.3175, "step": 39078 }, { "epoch": 3.9730581537210248, "grad_norm": 0.2536751329898834, "learning_rate": 1.2309025445843941e-06, "loss": 0.3185, "step": 39079 }, { "epoch": 3.9731598210654737, "grad_norm": 0.2843477725982666, "learning_rate": 1.2306693650452638e-06, "loss": 0.3028, "step": 39080 }, { "epoch": 3.9732614884099227, "grad_norm": 0.27742016315460205, "learning_rate": 1.2304362044951252e-06, "loss": 0.3003, "step": 39081 }, { "epoch": 3.9733631557543716, "grad_norm": 0.24713873863220215, "learning_rate": 1.2302030629351525e-06, "loss": 0.3295, "step": 39082 }, { "epoch": 3.9734648230988205, "grad_norm": 0.2734687626361847, "learning_rate": 1.2299699403665216e-06, "loss": 0.3048, "step": 39083 }, { "epoch": 3.9735664904432695, "grad_norm": 0.2685180902481079, "learning_rate": 1.2297368367904067e-06, "loss": 0.3422, "step": 39084 }, { "epoch": 3.9736681577877184, "grad_norm": 0.29966115951538086, "learning_rate": 1.22950375220798e-06, "loss": 0.3317, "step": 39085 }, { "epoch": 3.973769825132168, "grad_norm": 0.2898845374584198, "learning_rate": 1.229270686620419e-06, "loss": 0.3049, "step": 39086 }, { "epoch": 3.9738714924766168, "grad_norm": 0.2956137955188751, "learning_rate": 1.229037640028896e-06, "loss": 0.3147, "step": 39087 }, { "epoch": 3.9739731598210657, "grad_norm": 0.27563443779945374, "learning_rate": 1.2288046124345838e-06, "loss": 0.2938, "step": 39088 }, { "epoch": 3.9740748271655146, "grad_norm": 0.25091153383255005, "learning_rate": 1.2285716038386598e-06, "loss": 0.3228, "step": 39089 }, { "epoch": 3.9741764945099636, "grad_norm": 0.2863881587982178, "learning_rate": 1.2283386142422949e-06, "loss": 0.3278, "step": 39090 }, { "epoch": 3.9742781618544125, "grad_norm": 0.28300368785858154, "learning_rate": 1.2281056436466643e-06, "loss": 0.3022, "step": 39091 }, { "epoch": 3.9743798291988615, "grad_norm": 0.2943517863750458, "learning_rate": 1.22787269205294e-06, "loss": 0.2912, "step": 39092 }, { "epoch": 3.9744814965433104, "grad_norm": 0.2761857509613037, "learning_rate": 1.2276397594622975e-06, "loss": 0.2951, "step": 39093 }, { "epoch": 3.9745831638877593, "grad_norm": 0.27771466970443726, "learning_rate": 1.2274068458759098e-06, "loss": 0.2983, "step": 39094 }, { "epoch": 3.9746848312322083, "grad_norm": 0.2737615704536438, "learning_rate": 1.2271739512949482e-06, "loss": 0.3345, "step": 39095 }, { "epoch": 3.9747864985766572, "grad_norm": 0.28447458148002625, "learning_rate": 1.2269410757205907e-06, "loss": 0.3361, "step": 39096 }, { "epoch": 3.974888165921106, "grad_norm": 0.27344608306884766, "learning_rate": 1.2267082191540047e-06, "loss": 0.355, "step": 39097 }, { "epoch": 3.974989833265555, "grad_norm": 0.2846616208553314, "learning_rate": 1.226475381596366e-06, "loss": 0.3009, "step": 39098 }, { "epoch": 3.975091500610004, "grad_norm": 0.267851859331131, "learning_rate": 1.226242563048849e-06, "loss": 0.3387, "step": 39099 }, { "epoch": 3.975193167954453, "grad_norm": 0.28557661175727844, "learning_rate": 1.226009763512625e-06, "loss": 0.321, "step": 39100 }, { "epoch": 3.975294835298902, "grad_norm": 0.26975786685943604, "learning_rate": 1.2257769829888673e-06, "loss": 0.2902, "step": 39101 }, { "epoch": 3.975396502643351, "grad_norm": 0.28905460238456726, "learning_rate": 1.2255442214787466e-06, "loss": 0.305, "step": 39102 }, { "epoch": 3.9754981699878, "grad_norm": 0.28065764904022217, "learning_rate": 1.2253114789834386e-06, "loss": 0.3168, "step": 39103 }, { "epoch": 3.9755998373322488, "grad_norm": 0.25551164150238037, "learning_rate": 1.2250787555041145e-06, "loss": 0.3039, "step": 39104 }, { "epoch": 3.9757015046766977, "grad_norm": 0.26017963886260986, "learning_rate": 1.2248460510419453e-06, "loss": 0.2996, "step": 39105 }, { "epoch": 3.9758031720211466, "grad_norm": 0.29531341791152954, "learning_rate": 1.2246133655981069e-06, "loss": 0.3262, "step": 39106 }, { "epoch": 3.9759048393655956, "grad_norm": 0.2660280466079712, "learning_rate": 1.2243806991737667e-06, "loss": 0.2876, "step": 39107 }, { "epoch": 3.9760065067100445, "grad_norm": 0.2681392431259155, "learning_rate": 1.2241480517700993e-06, "loss": 0.328, "step": 39108 }, { "epoch": 3.9761081740544935, "grad_norm": 0.3036279082298279, "learning_rate": 1.2239154233882795e-06, "loss": 0.3143, "step": 39109 }, { "epoch": 3.9762098413989424, "grad_norm": 0.2659013569355011, "learning_rate": 1.2236828140294737e-06, "loss": 0.3107, "step": 39110 }, { "epoch": 3.976311508743392, "grad_norm": 0.282486230134964, "learning_rate": 1.2234502236948575e-06, "loss": 0.2988, "step": 39111 }, { "epoch": 3.9764131760878407, "grad_norm": 0.2791839838027954, "learning_rate": 1.2232176523856004e-06, "loss": 0.2954, "step": 39112 }, { "epoch": 3.9765148434322897, "grad_norm": 0.24262723326683044, "learning_rate": 1.222985100102877e-06, "loss": 0.3211, "step": 39113 }, { "epoch": 3.9766165107767386, "grad_norm": 0.2770617604255676, "learning_rate": 1.2227525668478574e-06, "loss": 0.3182, "step": 39114 }, { "epoch": 3.9767181781211876, "grad_norm": 0.28211838006973267, "learning_rate": 1.2225200526217108e-06, "loss": 0.3095, "step": 39115 }, { "epoch": 3.9768198454656365, "grad_norm": 0.2760693430900574, "learning_rate": 1.2222875574256133e-06, "loss": 0.2946, "step": 39116 }, { "epoch": 3.9769215128100854, "grad_norm": 0.26860281825065613, "learning_rate": 1.2220550812607306e-06, "loss": 0.2903, "step": 39117 }, { "epoch": 3.9770231801545344, "grad_norm": 0.28154459595680237, "learning_rate": 1.2218226241282366e-06, "loss": 0.2856, "step": 39118 }, { "epoch": 3.9771248474989833, "grad_norm": 0.28474143147468567, "learning_rate": 1.2215901860293055e-06, "loss": 0.3104, "step": 39119 }, { "epoch": 3.9772265148434323, "grad_norm": 0.2697654068470001, "learning_rate": 1.2213577669651023e-06, "loss": 0.3256, "step": 39120 }, { "epoch": 3.977328182187881, "grad_norm": 0.2613520920276642, "learning_rate": 1.2211253669368017e-06, "loss": 0.3472, "step": 39121 }, { "epoch": 3.97742984953233, "grad_norm": 0.28240180015563965, "learning_rate": 1.2208929859455726e-06, "loss": 0.319, "step": 39122 }, { "epoch": 3.977531516876779, "grad_norm": 0.2519858777523041, "learning_rate": 1.2206606239925877e-06, "loss": 0.3127, "step": 39123 }, { "epoch": 3.977633184221228, "grad_norm": 0.24429942667484283, "learning_rate": 1.2204282810790163e-06, "loss": 0.3004, "step": 39124 }, { "epoch": 3.977734851565677, "grad_norm": 0.26527494192123413, "learning_rate": 1.2201959572060279e-06, "loss": 0.299, "step": 39125 }, { "epoch": 3.977836518910126, "grad_norm": 0.27860212326049805, "learning_rate": 1.219963652374797e-06, "loss": 0.2954, "step": 39126 }, { "epoch": 3.9779381862545753, "grad_norm": 0.27966952323913574, "learning_rate": 1.219731366586488e-06, "loss": 0.3044, "step": 39127 }, { "epoch": 3.9780398535990242, "grad_norm": 0.27564871311187744, "learning_rate": 1.219499099842274e-06, "loss": 0.3041, "step": 39128 }, { "epoch": 3.978141520943473, "grad_norm": 0.26251381635665894, "learning_rate": 1.2192668521433277e-06, "loss": 0.3009, "step": 39129 }, { "epoch": 3.978243188287922, "grad_norm": 0.25326645374298096, "learning_rate": 1.2190346234908141e-06, "loss": 0.3096, "step": 39130 }, { "epoch": 3.978344855632371, "grad_norm": 0.28253594040870667, "learning_rate": 1.2188024138859067e-06, "loss": 0.2918, "step": 39131 }, { "epoch": 3.97844652297682, "grad_norm": 0.2901795208454132, "learning_rate": 1.2185702233297746e-06, "loss": 0.3145, "step": 39132 }, { "epoch": 3.978548190321269, "grad_norm": 0.26804670691490173, "learning_rate": 1.2183380518235855e-06, "loss": 0.3306, "step": 39133 }, { "epoch": 3.978649857665718, "grad_norm": 0.25745701789855957, "learning_rate": 1.2181058993685118e-06, "loss": 0.3151, "step": 39134 }, { "epoch": 3.978751525010167, "grad_norm": 0.2635875940322876, "learning_rate": 1.2178737659657207e-06, "loss": 0.2877, "step": 39135 }, { "epoch": 3.9788531923546158, "grad_norm": 0.27965104579925537, "learning_rate": 1.2176416516163859e-06, "loss": 0.2954, "step": 39136 }, { "epoch": 3.9789548596990647, "grad_norm": 0.28349563479423523, "learning_rate": 1.2174095563216704e-06, "loss": 0.2691, "step": 39137 }, { "epoch": 3.9790565270435136, "grad_norm": 0.28072839975357056, "learning_rate": 1.2171774800827469e-06, "loss": 0.2898, "step": 39138 }, { "epoch": 3.9791581943879626, "grad_norm": 0.2898227870464325, "learning_rate": 1.216945422900787e-06, "loss": 0.3127, "step": 39139 }, { "epoch": 3.9792598617324115, "grad_norm": 0.2563484013080597, "learning_rate": 1.216713384776954e-06, "loss": 0.3051, "step": 39140 }, { "epoch": 3.9793615290768605, "grad_norm": 0.26617181301116943, "learning_rate": 1.216481365712422e-06, "loss": 0.3447, "step": 39141 }, { "epoch": 3.9794631964213094, "grad_norm": 0.26544615626335144, "learning_rate": 1.2162493657083575e-06, "loss": 0.3242, "step": 39142 }, { "epoch": 3.9795648637657584, "grad_norm": 0.29532870650291443, "learning_rate": 1.2160173847659279e-06, "loss": 0.2953, "step": 39143 }, { "epoch": 3.9796665311102073, "grad_norm": 0.27031248807907104, "learning_rate": 1.2157854228863047e-06, "loss": 0.3028, "step": 39144 }, { "epoch": 3.9797681984546562, "grad_norm": 0.26847201585769653, "learning_rate": 1.2155534800706543e-06, "loss": 0.2897, "step": 39145 }, { "epoch": 3.979869865799105, "grad_norm": 0.2672121524810791, "learning_rate": 1.2153215563201487e-06, "loss": 0.2979, "step": 39146 }, { "epoch": 3.979971533143554, "grad_norm": 0.2526708245277405, "learning_rate": 1.2150896516359511e-06, "loss": 0.318, "step": 39147 }, { "epoch": 3.980073200488003, "grad_norm": 0.27806755900382996, "learning_rate": 1.214857766019234e-06, "loss": 0.3027, "step": 39148 }, { "epoch": 3.980174867832452, "grad_norm": 0.2847694158554077, "learning_rate": 1.2146258994711636e-06, "loss": 0.3046, "step": 39149 }, { "epoch": 3.980276535176901, "grad_norm": 0.27800145745277405, "learning_rate": 1.2143940519929071e-06, "loss": 0.316, "step": 39150 }, { "epoch": 3.98037820252135, "grad_norm": 0.2714351415634155, "learning_rate": 1.2141622235856354e-06, "loss": 0.2841, "step": 39151 }, { "epoch": 3.9804798698657993, "grad_norm": 0.2751864194869995, "learning_rate": 1.2139304142505144e-06, "loss": 0.3073, "step": 39152 }, { "epoch": 3.980581537210248, "grad_norm": 0.27849099040031433, "learning_rate": 1.2136986239887122e-06, "loss": 0.3147, "step": 39153 }, { "epoch": 3.980683204554697, "grad_norm": 0.28792476654052734, "learning_rate": 1.2134668528013954e-06, "loss": 0.3063, "step": 39154 }, { "epoch": 3.980784871899146, "grad_norm": 0.2868461608886719, "learning_rate": 1.2132351006897342e-06, "loss": 0.2906, "step": 39155 }, { "epoch": 3.980886539243595, "grad_norm": 0.2735234797000885, "learning_rate": 1.2130033676548946e-06, "loss": 0.2949, "step": 39156 }, { "epoch": 3.980988206588044, "grad_norm": 0.2829222083091736, "learning_rate": 1.2127716536980426e-06, "loss": 0.296, "step": 39157 }, { "epoch": 3.981089873932493, "grad_norm": 0.27383339405059814, "learning_rate": 1.212539958820349e-06, "loss": 0.2919, "step": 39158 }, { "epoch": 3.981191541276942, "grad_norm": 0.26760438084602356, "learning_rate": 1.2123082830229788e-06, "loss": 0.3129, "step": 39159 }, { "epoch": 3.981293208621391, "grad_norm": 0.2870909571647644, "learning_rate": 1.2120766263070976e-06, "loss": 0.2996, "step": 39160 }, { "epoch": 3.9813948759658397, "grad_norm": 0.2775237560272217, "learning_rate": 1.2118449886738758e-06, "loss": 0.2983, "step": 39161 }, { "epoch": 3.9814965433102887, "grad_norm": 0.26770374178886414, "learning_rate": 1.2116133701244787e-06, "loss": 0.3179, "step": 39162 }, { "epoch": 3.9815982106547376, "grad_norm": 0.25948816537857056, "learning_rate": 1.2113817706600734e-06, "loss": 0.299, "step": 39163 }, { "epoch": 3.9816998779991866, "grad_norm": 0.28908178210258484, "learning_rate": 1.2111501902818245e-06, "loss": 0.3521, "step": 39164 }, { "epoch": 3.9818015453436355, "grad_norm": 0.2734813690185547, "learning_rate": 1.2109186289909025e-06, "loss": 0.2833, "step": 39165 }, { "epoch": 3.9819032126880844, "grad_norm": 0.27416738867759705, "learning_rate": 1.2106870867884717e-06, "loss": 0.3028, "step": 39166 }, { "epoch": 3.9820048800325334, "grad_norm": 0.282026082277298, "learning_rate": 1.2104555636756976e-06, "loss": 0.2906, "step": 39167 }, { "epoch": 3.9821065473769828, "grad_norm": 0.25942322611808777, "learning_rate": 1.2102240596537496e-06, "loss": 0.3025, "step": 39168 }, { "epoch": 3.9822082147214317, "grad_norm": 0.26819688081741333, "learning_rate": 1.2099925747237918e-06, "loss": 0.2759, "step": 39169 }, { "epoch": 3.9823098820658807, "grad_norm": 0.2898913025856018, "learning_rate": 1.2097611088869892e-06, "loss": 0.3096, "step": 39170 }, { "epoch": 3.9824115494103296, "grad_norm": 0.29672372341156006, "learning_rate": 1.2095296621445113e-06, "loss": 0.2936, "step": 39171 }, { "epoch": 3.9825132167547785, "grad_norm": 0.28568369150161743, "learning_rate": 1.209298234497522e-06, "loss": 0.3114, "step": 39172 }, { "epoch": 3.9826148840992275, "grad_norm": 0.3035551905632019, "learning_rate": 1.2090668259471872e-06, "loss": 0.294, "step": 39173 }, { "epoch": 3.9827165514436764, "grad_norm": 0.25978606939315796, "learning_rate": 1.2088354364946719e-06, "loss": 0.3135, "step": 39174 }, { "epoch": 3.9828182187881254, "grad_norm": 0.26853230595588684, "learning_rate": 1.2086040661411435e-06, "loss": 0.3108, "step": 39175 }, { "epoch": 3.9829198861325743, "grad_norm": 0.2787644565105438, "learning_rate": 1.2083727148877677e-06, "loss": 0.2856, "step": 39176 }, { "epoch": 3.9830215534770232, "grad_norm": 0.2633591592311859, "learning_rate": 1.2081413827357069e-06, "loss": 0.3314, "step": 39177 }, { "epoch": 3.983123220821472, "grad_norm": 0.26335883140563965, "learning_rate": 1.2079100696861307e-06, "loss": 0.3341, "step": 39178 }, { "epoch": 3.983224888165921, "grad_norm": 0.26211652159690857, "learning_rate": 1.207678775740202e-06, "loss": 0.2974, "step": 39179 }, { "epoch": 3.98332655551037, "grad_norm": 0.26013481616973877, "learning_rate": 1.2074475008990855e-06, "loss": 0.3017, "step": 39180 }, { "epoch": 3.983428222854819, "grad_norm": 0.2861084043979645, "learning_rate": 1.2072162451639485e-06, "loss": 0.3219, "step": 39181 }, { "epoch": 3.983529890199268, "grad_norm": 0.2615566849708557, "learning_rate": 1.2069850085359547e-06, "loss": 0.3085, "step": 39182 }, { "epoch": 3.983631557543717, "grad_norm": 0.27810171246528625, "learning_rate": 1.2067537910162686e-06, "loss": 0.3011, "step": 39183 }, { "epoch": 3.983733224888166, "grad_norm": 0.25247132778167725, "learning_rate": 1.2065225926060548e-06, "loss": 0.3258, "step": 39184 }, { "epoch": 3.9838348922326148, "grad_norm": 0.27327588200569153, "learning_rate": 1.2062914133064797e-06, "loss": 0.3064, "step": 39185 }, { "epoch": 3.9839365595770637, "grad_norm": 0.29756805300712585, "learning_rate": 1.2060602531187071e-06, "loss": 0.3154, "step": 39186 }, { "epoch": 3.9840382269215127, "grad_norm": 0.26176851987838745, "learning_rate": 1.2058291120438997e-06, "loss": 0.3009, "step": 39187 }, { "epoch": 3.9841398942659616, "grad_norm": 0.26451534032821655, "learning_rate": 1.2055979900832255e-06, "loss": 0.3193, "step": 39188 }, { "epoch": 3.9842415616104105, "grad_norm": 0.2936590611934662, "learning_rate": 1.2053668872378466e-06, "loss": 0.3391, "step": 39189 }, { "epoch": 3.9843432289548595, "grad_norm": 0.2772940993309021, "learning_rate": 1.205135803508926e-06, "loss": 0.3232, "step": 39190 }, { "epoch": 3.9844448962993084, "grad_norm": 0.30160683393478394, "learning_rate": 1.2049047388976314e-06, "loss": 0.3157, "step": 39191 }, { "epoch": 3.9845465636437574, "grad_norm": 0.26138460636138916, "learning_rate": 1.2046736934051245e-06, "loss": 0.3069, "step": 39192 }, { "epoch": 3.9846482309882068, "grad_norm": 0.2718355655670166, "learning_rate": 1.2044426670325692e-06, "loss": 0.3373, "step": 39193 }, { "epoch": 3.9847498983326557, "grad_norm": 0.2904207408428192, "learning_rate": 1.2042116597811292e-06, "loss": 0.3138, "step": 39194 }, { "epoch": 3.9848515656771046, "grad_norm": 0.2652440071105957, "learning_rate": 1.2039806716519698e-06, "loss": 0.3167, "step": 39195 }, { "epoch": 3.9849532330215536, "grad_norm": 0.2567605674266815, "learning_rate": 1.2037497026462536e-06, "loss": 0.3205, "step": 39196 }, { "epoch": 3.9850549003660025, "grad_norm": 0.2750973701477051, "learning_rate": 1.2035187527651431e-06, "loss": 0.2941, "step": 39197 }, { "epoch": 3.9851565677104515, "grad_norm": 0.2537950277328491, "learning_rate": 1.2032878220098043e-06, "loss": 0.3232, "step": 39198 }, { "epoch": 3.9852582350549004, "grad_norm": 0.25626152753829956, "learning_rate": 1.2030569103813994e-06, "loss": 0.3125, "step": 39199 }, { "epoch": 3.9853599023993493, "grad_norm": 0.26267844438552856, "learning_rate": 1.2028260178810896e-06, "loss": 0.345, "step": 39200 }, { "epoch": 3.9854615697437983, "grad_norm": 0.2815629839897156, "learning_rate": 1.202595144510042e-06, "loss": 0.2886, "step": 39201 }, { "epoch": 3.9855632370882472, "grad_norm": 0.2471742033958435, "learning_rate": 1.2023642902694172e-06, "loss": 0.321, "step": 39202 }, { "epoch": 3.985664904432696, "grad_norm": 0.2954440116882324, "learning_rate": 1.2021334551603785e-06, "loss": 0.3311, "step": 39203 }, { "epoch": 3.985766571777145, "grad_norm": 0.2661879062652588, "learning_rate": 1.2019026391840882e-06, "loss": 0.3134, "step": 39204 }, { "epoch": 3.985868239121594, "grad_norm": 0.2623867392539978, "learning_rate": 1.2016718423417107e-06, "loss": 0.3209, "step": 39205 }, { "epoch": 3.985969906466043, "grad_norm": 0.26874205470085144, "learning_rate": 1.2014410646344083e-06, "loss": 0.3147, "step": 39206 }, { "epoch": 3.986071573810492, "grad_norm": 0.28049561381340027, "learning_rate": 1.2012103060633413e-06, "loss": 0.3229, "step": 39207 }, { "epoch": 3.986173241154941, "grad_norm": 0.2872898578643799, "learning_rate": 1.2009795666296752e-06, "loss": 0.3014, "step": 39208 }, { "epoch": 3.9862749084993903, "grad_norm": 0.27849969267845154, "learning_rate": 1.2007488463345718e-06, "loss": 0.2937, "step": 39209 }, { "epoch": 3.986376575843839, "grad_norm": 0.2676081657409668, "learning_rate": 1.2005181451791914e-06, "loss": 0.3085, "step": 39210 }, { "epoch": 3.986478243188288, "grad_norm": 0.25786465406417847, "learning_rate": 1.2002874631646987e-06, "loss": 0.2889, "step": 39211 }, { "epoch": 3.986579910532737, "grad_norm": 0.267169713973999, "learning_rate": 1.2000568002922553e-06, "loss": 0.3272, "step": 39212 }, { "epoch": 3.986681577877186, "grad_norm": 0.2960078716278076, "learning_rate": 1.1998261565630225e-06, "loss": 0.3033, "step": 39213 }, { "epoch": 3.986783245221635, "grad_norm": 0.28676044940948486, "learning_rate": 1.199595531978161e-06, "loss": 0.3179, "step": 39214 }, { "epoch": 3.986884912566084, "grad_norm": 0.2949807047843933, "learning_rate": 1.1993649265388357e-06, "loss": 0.3191, "step": 39215 }, { "epoch": 3.986986579910533, "grad_norm": 0.2619584798812866, "learning_rate": 1.199134340246207e-06, "loss": 0.3063, "step": 39216 }, { "epoch": 3.987088247254982, "grad_norm": 0.26049989461898804, "learning_rate": 1.1989037731014342e-06, "loss": 0.2922, "step": 39217 }, { "epoch": 3.9871899145994307, "grad_norm": 0.2618695795536041, "learning_rate": 1.1986732251056826e-06, "loss": 0.2972, "step": 39218 }, { "epoch": 3.9872915819438797, "grad_norm": 0.27321523427963257, "learning_rate": 1.198442696260112e-06, "loss": 0.2948, "step": 39219 }, { "epoch": 3.9873932492883286, "grad_norm": 0.27042749524116516, "learning_rate": 1.1982121865658824e-06, "loss": 0.3179, "step": 39220 }, { "epoch": 3.9874949166327776, "grad_norm": 0.2600301206111908, "learning_rate": 1.1979816960241575e-06, "loss": 0.2992, "step": 39221 }, { "epoch": 3.9875965839772265, "grad_norm": 0.28705844283103943, "learning_rate": 1.197751224636098e-06, "loss": 0.305, "step": 39222 }, { "epoch": 3.9876982513216754, "grad_norm": 0.3128730356693268, "learning_rate": 1.1975207724028632e-06, "loss": 0.3183, "step": 39223 }, { "epoch": 3.9877999186661244, "grad_norm": 0.2758573293685913, "learning_rate": 1.197290339325614e-06, "loss": 0.2963, "step": 39224 }, { "epoch": 3.9879015860105733, "grad_norm": 0.25830328464508057, "learning_rate": 1.1970599254055142e-06, "loss": 0.2922, "step": 39225 }, { "epoch": 3.9880032533550223, "grad_norm": 0.28388968110084534, "learning_rate": 1.1968295306437227e-06, "loss": 0.2987, "step": 39226 }, { "epoch": 3.988104920699471, "grad_norm": 0.2702447474002838, "learning_rate": 1.196599155041399e-06, "loss": 0.2756, "step": 39227 }, { "epoch": 3.98820658804392, "grad_norm": 0.2843282222747803, "learning_rate": 1.1963687985997063e-06, "loss": 0.3158, "step": 39228 }, { "epoch": 3.988308255388369, "grad_norm": 0.26252248883247375, "learning_rate": 1.1961384613198036e-06, "loss": 0.2825, "step": 39229 }, { "epoch": 3.988409922732818, "grad_norm": 0.27814963459968567, "learning_rate": 1.1959081432028496e-06, "loss": 0.346, "step": 39230 }, { "epoch": 3.988511590077267, "grad_norm": 0.26375967264175415, "learning_rate": 1.1956778442500083e-06, "loss": 0.2965, "step": 39231 }, { "epoch": 3.988613257421716, "grad_norm": 0.2627030909061432, "learning_rate": 1.195447564462438e-06, "loss": 0.3186, "step": 39232 }, { "epoch": 3.988714924766165, "grad_norm": 0.2692301571369171, "learning_rate": 1.1952173038412983e-06, "loss": 0.3374, "step": 39233 }, { "epoch": 3.9888165921106142, "grad_norm": 0.25705426931381226, "learning_rate": 1.1949870623877485e-06, "loss": 0.3037, "step": 39234 }, { "epoch": 3.988918259455063, "grad_norm": 0.285012423992157, "learning_rate": 1.1947568401029513e-06, "loss": 0.3086, "step": 39235 }, { "epoch": 3.989019926799512, "grad_norm": 0.27450481057167053, "learning_rate": 1.1945266369880648e-06, "loss": 0.3036, "step": 39236 }, { "epoch": 3.989121594143961, "grad_norm": 0.27086034417152405, "learning_rate": 1.194296453044247e-06, "loss": 0.3537, "step": 39237 }, { "epoch": 3.98922326148841, "grad_norm": 0.2829228639602661, "learning_rate": 1.1940662882726612e-06, "loss": 0.3122, "step": 39238 }, { "epoch": 3.989324928832859, "grad_norm": 0.26417988538742065, "learning_rate": 1.193836142674465e-06, "loss": 0.3078, "step": 39239 }, { "epoch": 3.989426596177308, "grad_norm": 0.28968125581741333, "learning_rate": 1.1936060162508157e-06, "loss": 0.2993, "step": 39240 }, { "epoch": 3.989528263521757, "grad_norm": 0.2924233078956604, "learning_rate": 1.1933759090028768e-06, "loss": 0.3142, "step": 39241 }, { "epoch": 3.9896299308662058, "grad_norm": 0.266940712928772, "learning_rate": 1.1931458209318053e-06, "loss": 0.2875, "step": 39242 }, { "epoch": 3.9897315982106547, "grad_norm": 0.27759748697280884, "learning_rate": 1.1929157520387607e-06, "loss": 0.3059, "step": 39243 }, { "epoch": 3.9898332655551036, "grad_norm": 0.26580533385276794, "learning_rate": 1.1926857023249e-06, "loss": 0.293, "step": 39244 }, { "epoch": 3.9899349328995526, "grad_norm": 0.29532065987586975, "learning_rate": 1.1924556717913859e-06, "loss": 0.2822, "step": 39245 }, { "epoch": 3.9900366002440015, "grad_norm": 0.28136104345321655, "learning_rate": 1.192225660439375e-06, "loss": 0.3294, "step": 39246 }, { "epoch": 3.9901382675884505, "grad_norm": 0.24968074262142181, "learning_rate": 1.1919956682700251e-06, "loss": 0.3287, "step": 39247 }, { "epoch": 3.9902399349328994, "grad_norm": 0.2955953776836395, "learning_rate": 1.1917656952844992e-06, "loss": 0.3323, "step": 39248 }, { "epoch": 3.9903416022773484, "grad_norm": 0.2735605239868164, "learning_rate": 1.191535741483949e-06, "loss": 0.3269, "step": 39249 }, { "epoch": 3.9904432696217977, "grad_norm": 0.26168468594551086, "learning_rate": 1.1913058068695376e-06, "loss": 0.3016, "step": 39250 }, { "epoch": 3.9905449369662467, "grad_norm": 0.28182289004325867, "learning_rate": 1.1910758914424235e-06, "loss": 0.3334, "step": 39251 }, { "epoch": 3.9906466043106956, "grad_norm": 0.28683969378471375, "learning_rate": 1.1908459952037637e-06, "loss": 0.2902, "step": 39252 }, { "epoch": 3.9907482716551446, "grad_norm": 0.2757183611392975, "learning_rate": 1.190616118154717e-06, "loss": 0.3243, "step": 39253 }, { "epoch": 3.9908499389995935, "grad_norm": 0.28836560249328613, "learning_rate": 1.1903862602964395e-06, "loss": 0.2831, "step": 39254 }, { "epoch": 3.9909516063440424, "grad_norm": 0.2597181499004364, "learning_rate": 1.190156421630092e-06, "loss": 0.307, "step": 39255 }, { "epoch": 3.9910532736884914, "grad_norm": 0.2753376364707947, "learning_rate": 1.1899266021568313e-06, "loss": 0.3227, "step": 39256 }, { "epoch": 3.9911549410329403, "grad_norm": 0.24872319400310516, "learning_rate": 1.1896968018778139e-06, "loss": 0.3154, "step": 39257 }, { "epoch": 3.9912566083773893, "grad_norm": 0.25856098532676697, "learning_rate": 1.1894670207942006e-06, "loss": 0.2819, "step": 39258 }, { "epoch": 3.991358275721838, "grad_norm": 0.25733134150505066, "learning_rate": 1.1892372589071444e-06, "loss": 0.3078, "step": 39259 }, { "epoch": 3.991459943066287, "grad_norm": 0.28546300530433655, "learning_rate": 1.1890075162178055e-06, "loss": 0.3291, "step": 39260 }, { "epoch": 3.991561610410736, "grad_norm": 0.290263831615448, "learning_rate": 1.1887777927273437e-06, "loss": 0.3035, "step": 39261 }, { "epoch": 3.991663277755185, "grad_norm": 0.28498193621635437, "learning_rate": 1.1885480884369111e-06, "loss": 0.3247, "step": 39262 }, { "epoch": 3.991764945099634, "grad_norm": 0.2734007239341736, "learning_rate": 1.1883184033476686e-06, "loss": 0.2942, "step": 39263 }, { "epoch": 3.991866612444083, "grad_norm": 0.27993243932724, "learning_rate": 1.188088737460771e-06, "loss": 0.3199, "step": 39264 }, { "epoch": 3.991968279788532, "grad_norm": 0.26878365874290466, "learning_rate": 1.187859090777378e-06, "loss": 0.2923, "step": 39265 }, { "epoch": 3.992069947132981, "grad_norm": 0.2775794267654419, "learning_rate": 1.1876294632986447e-06, "loss": 0.3282, "step": 39266 }, { "epoch": 3.9921716144774297, "grad_norm": 0.2805999517440796, "learning_rate": 1.1873998550257266e-06, "loss": 0.3133, "step": 39267 }, { "epoch": 3.9922732818218787, "grad_norm": 0.2653030753135681, "learning_rate": 1.1871702659597856e-06, "loss": 0.2991, "step": 39268 }, { "epoch": 3.9923749491663276, "grad_norm": 0.2725701332092285, "learning_rate": 1.1869406961019714e-06, "loss": 0.3211, "step": 39269 }, { "epoch": 3.9924766165107766, "grad_norm": 0.2772918939590454, "learning_rate": 1.1867111454534435e-06, "loss": 0.304, "step": 39270 }, { "epoch": 3.9925782838552255, "grad_norm": 0.2584649920463562, "learning_rate": 1.1864816140153624e-06, "loss": 0.3174, "step": 39271 }, { "epoch": 3.9926799511996744, "grad_norm": 0.28119614720344543, "learning_rate": 1.1862521017888773e-06, "loss": 0.341, "step": 39272 }, { "epoch": 3.9927816185441234, "grad_norm": 0.2452344000339508, "learning_rate": 1.186022608775149e-06, "loss": 0.3257, "step": 39273 }, { "epoch": 3.9928832858885723, "grad_norm": 0.2717142105102539, "learning_rate": 1.1857931349753316e-06, "loss": 0.3064, "step": 39274 }, { "epoch": 3.9929849532330217, "grad_norm": 0.2841775417327881, "learning_rate": 1.1855636803905829e-06, "loss": 0.2829, "step": 39275 }, { "epoch": 3.9930866205774707, "grad_norm": 0.2823418080806732, "learning_rate": 1.1853342450220584e-06, "loss": 0.2974, "step": 39276 }, { "epoch": 3.9931882879219196, "grad_norm": 0.3019826114177704, "learning_rate": 1.1851048288709121e-06, "loss": 0.3147, "step": 39277 }, { "epoch": 3.9932899552663685, "grad_norm": 0.25729215145111084, "learning_rate": 1.1848754319383037e-06, "loss": 0.3212, "step": 39278 }, { "epoch": 3.9933916226108175, "grad_norm": 0.2770310640335083, "learning_rate": 1.1846460542253834e-06, "loss": 0.2973, "step": 39279 }, { "epoch": 3.9934932899552664, "grad_norm": 0.2552091181278229, "learning_rate": 1.1844166957333098e-06, "loss": 0.3291, "step": 39280 }, { "epoch": 3.9935949572997154, "grad_norm": 0.25455421209335327, "learning_rate": 1.1841873564632405e-06, "loss": 0.3092, "step": 39281 }, { "epoch": 3.9936966246441643, "grad_norm": 0.2701491713523865, "learning_rate": 1.183958036416326e-06, "loss": 0.2839, "step": 39282 }, { "epoch": 3.9937982919886132, "grad_norm": 0.2532414197921753, "learning_rate": 1.1837287355937256e-06, "loss": 0.2929, "step": 39283 }, { "epoch": 3.993899959333062, "grad_norm": 0.2700231373310089, "learning_rate": 1.1834994539965927e-06, "loss": 0.307, "step": 39284 }, { "epoch": 3.994001626677511, "grad_norm": 0.2735290229320526, "learning_rate": 1.1832701916260814e-06, "loss": 0.3023, "step": 39285 }, { "epoch": 3.99410329402196, "grad_norm": 0.2802472412586212, "learning_rate": 1.1830409484833494e-06, "loss": 0.3034, "step": 39286 }, { "epoch": 3.994204961366409, "grad_norm": 0.2763075530529022, "learning_rate": 1.1828117245695487e-06, "loss": 0.2948, "step": 39287 }, { "epoch": 3.994306628710858, "grad_norm": 0.2835535705089569, "learning_rate": 1.1825825198858382e-06, "loss": 0.3093, "step": 39288 }, { "epoch": 3.994408296055307, "grad_norm": 0.29831835627555847, "learning_rate": 1.1823533344333664e-06, "loss": 0.2877, "step": 39289 }, { "epoch": 3.994509963399756, "grad_norm": 0.2628726661205292, "learning_rate": 1.1821241682132917e-06, "loss": 0.3091, "step": 39290 }, { "epoch": 3.994611630744205, "grad_norm": 0.29423782229423523, "learning_rate": 1.181895021226771e-06, "loss": 0.2838, "step": 39291 }, { "epoch": 3.994713298088654, "grad_norm": 0.25963467359542847, "learning_rate": 1.181665893474953e-06, "loss": 0.3193, "step": 39292 }, { "epoch": 3.994814965433103, "grad_norm": 0.2651149332523346, "learning_rate": 1.1814367849589964e-06, "loss": 0.2915, "step": 39293 }, { "epoch": 3.994916632777552, "grad_norm": 0.259477823972702, "learning_rate": 1.1812076956800538e-06, "loss": 0.3026, "step": 39294 }, { "epoch": 3.995018300122001, "grad_norm": 0.2821654975414276, "learning_rate": 1.180978625639278e-06, "loss": 0.3045, "step": 39295 }, { "epoch": 3.99511996746645, "grad_norm": 0.28634151816368103, "learning_rate": 1.1807495748378255e-06, "loss": 0.3217, "step": 39296 }, { "epoch": 3.995221634810899, "grad_norm": 0.26810142397880554, "learning_rate": 1.180520543276848e-06, "loss": 0.2958, "step": 39297 }, { "epoch": 3.995323302155348, "grad_norm": 0.2879868149757385, "learning_rate": 1.1802915309575031e-06, "loss": 0.3169, "step": 39298 }, { "epoch": 3.9954249694997968, "grad_norm": 0.2551030218601227, "learning_rate": 1.180062537880939e-06, "loss": 0.3136, "step": 39299 }, { "epoch": 3.9955266368442457, "grad_norm": 0.2776109576225281, "learning_rate": 1.1798335640483117e-06, "loss": 0.3068, "step": 39300 }, { "epoch": 3.9956283041886946, "grad_norm": 0.24688749015331268, "learning_rate": 1.1796046094607783e-06, "loss": 0.2931, "step": 39301 }, { "epoch": 3.9957299715331436, "grad_norm": 0.29357752203941345, "learning_rate": 1.1793756741194856e-06, "loss": 0.3198, "step": 39302 }, { "epoch": 3.9958316388775925, "grad_norm": 0.27035844326019287, "learning_rate": 1.1791467580255921e-06, "loss": 0.3038, "step": 39303 }, { "epoch": 3.9959333062220415, "grad_norm": 0.2733215093612671, "learning_rate": 1.1789178611802498e-06, "loss": 0.2956, "step": 39304 }, { "epoch": 3.9960349735664904, "grad_norm": 0.2859112322330475, "learning_rate": 1.1786889835846094e-06, "loss": 0.3132, "step": 39305 }, { "epoch": 3.9961366409109393, "grad_norm": 0.2944219410419464, "learning_rate": 1.178460125239827e-06, "loss": 0.2965, "step": 39306 }, { "epoch": 3.9962383082553883, "grad_norm": 0.26684433221817017, "learning_rate": 1.1782312861470546e-06, "loss": 0.3088, "step": 39307 }, { "epoch": 3.9963399755998372, "grad_norm": 0.2708403170108795, "learning_rate": 1.1780024663074446e-06, "loss": 0.2918, "step": 39308 }, { "epoch": 3.996441642944286, "grad_norm": 0.2697426676750183, "learning_rate": 1.177773665722149e-06, "loss": 0.3017, "step": 39309 }, { "epoch": 3.996543310288735, "grad_norm": 0.28815749287605286, "learning_rate": 1.177544884392321e-06, "loss": 0.3136, "step": 39310 }, { "epoch": 3.996644977633184, "grad_norm": 0.26795658469200134, "learning_rate": 1.1773161223191165e-06, "loss": 0.2894, "step": 39311 }, { "epoch": 3.996746644977633, "grad_norm": 0.27005428075790405, "learning_rate": 1.177087379503682e-06, "loss": 0.2894, "step": 39312 }, { "epoch": 3.996848312322082, "grad_norm": 0.26827749609947205, "learning_rate": 1.176858655947174e-06, "loss": 0.2875, "step": 39313 }, { "epoch": 3.996949979666531, "grad_norm": 0.27120357751846313, "learning_rate": 1.176629951650744e-06, "loss": 0.3014, "step": 39314 }, { "epoch": 3.99705164701098, "grad_norm": 0.2687714397907257, "learning_rate": 1.1764012666155422e-06, "loss": 0.3121, "step": 39315 }, { "epoch": 3.997153314355429, "grad_norm": 0.25552940368652344, "learning_rate": 1.1761726008427232e-06, "loss": 0.2818, "step": 39316 }, { "epoch": 3.997254981699878, "grad_norm": 0.2676094174385071, "learning_rate": 1.1759439543334384e-06, "loss": 0.2904, "step": 39317 }, { "epoch": 3.997356649044327, "grad_norm": 0.27963268756866455, "learning_rate": 1.1757153270888394e-06, "loss": 0.3025, "step": 39318 }, { "epoch": 3.997458316388776, "grad_norm": 0.27175450325012207, "learning_rate": 1.175486719110076e-06, "loss": 0.3065, "step": 39319 }, { "epoch": 3.997559983733225, "grad_norm": 0.2644467353820801, "learning_rate": 1.175258130398303e-06, "loss": 0.311, "step": 39320 }, { "epoch": 3.997661651077674, "grad_norm": 0.27256831526756287, "learning_rate": 1.1750295609546708e-06, "loss": 0.2925, "step": 39321 }, { "epoch": 3.997763318422123, "grad_norm": 0.25819599628448486, "learning_rate": 1.1748010107803293e-06, "loss": 0.3012, "step": 39322 }, { "epoch": 3.997864985766572, "grad_norm": 0.27146148681640625, "learning_rate": 1.174572479876433e-06, "loss": 0.2954, "step": 39323 }, { "epoch": 3.9979666531110207, "grad_norm": 0.2706364393234253, "learning_rate": 1.174343968244131e-06, "loss": 0.3309, "step": 39324 }, { "epoch": 3.9980683204554697, "grad_norm": 0.2696261703968048, "learning_rate": 1.1741154758845741e-06, "loss": 0.3379, "step": 39325 }, { "epoch": 3.9981699877999186, "grad_norm": 0.2710619866847992, "learning_rate": 1.1738870027989153e-06, "loss": 0.3278, "step": 39326 }, { "epoch": 3.9982716551443676, "grad_norm": 0.2728704512119293, "learning_rate": 1.1736585489883046e-06, "loss": 0.3199, "step": 39327 }, { "epoch": 3.9983733224888165, "grad_norm": 0.2731156349182129, "learning_rate": 1.1734301144538928e-06, "loss": 0.3187, "step": 39328 }, { "epoch": 3.9984749898332654, "grad_norm": 0.2696705758571625, "learning_rate": 1.1732016991968299e-06, "loss": 0.2983, "step": 39329 }, { "epoch": 3.9985766571777144, "grad_norm": 0.2658451199531555, "learning_rate": 1.1729733032182684e-06, "loss": 0.3168, "step": 39330 }, { "epoch": 3.9986783245221633, "grad_norm": 0.29046517610549927, "learning_rate": 1.1727449265193575e-06, "loss": 0.2965, "step": 39331 }, { "epoch": 3.9987799918666127, "grad_norm": 0.26728498935699463, "learning_rate": 1.1725165691012474e-06, "loss": 0.2987, "step": 39332 }, { "epoch": 3.9988816592110616, "grad_norm": 0.2462911307811737, "learning_rate": 1.1722882309650901e-06, "loss": 0.3009, "step": 39333 }, { "epoch": 3.9989833265555106, "grad_norm": 0.2764129340648651, "learning_rate": 1.1720599121120358e-06, "loss": 0.3446, "step": 39334 }, { "epoch": 3.9990849938999595, "grad_norm": 0.2808161675930023, "learning_rate": 1.1718316125432338e-06, "loss": 0.3077, "step": 39335 }, { "epoch": 3.9991866612444085, "grad_norm": 0.2656175494194031, "learning_rate": 1.1716033322598324e-06, "loss": 0.3361, "step": 39336 }, { "epoch": 3.9992883285888574, "grad_norm": 0.2771567404270172, "learning_rate": 1.1713750712629857e-06, "loss": 0.2671, "step": 39337 }, { "epoch": 3.9993899959333064, "grad_norm": 0.2747158110141754, "learning_rate": 1.1711468295538414e-06, "loss": 0.3093, "step": 39338 }, { "epoch": 3.9994916632777553, "grad_norm": 0.27795183658599854, "learning_rate": 1.1709186071335482e-06, "loss": 0.3215, "step": 39339 }, { "epoch": 3.9995933306222042, "grad_norm": 0.2954271733760834, "learning_rate": 1.1706904040032585e-06, "loss": 0.314, "step": 39340 }, { "epoch": 3.999694997966653, "grad_norm": 0.28085455298423767, "learning_rate": 1.1704622201641203e-06, "loss": 0.3424, "step": 39341 }, { "epoch": 3.999796665311102, "grad_norm": 0.2443121373653412, "learning_rate": 1.1702340556172825e-06, "loss": 0.305, "step": 39342 }, { "epoch": 3.999898332655551, "grad_norm": 0.298047810792923, "learning_rate": 1.1700059103638966e-06, "loss": 0.2813, "step": 39343 }, { "epoch": 4.0, "grad_norm": 0.2516866624355316, "learning_rate": 1.1697777844051105e-06, "loss": 0.3713, "step": 39344 }, { "epoch": 4.000101667344449, "grad_norm": 0.28843843936920166, "learning_rate": 1.169549677742074e-06, "loss": 0.304, "step": 39345 }, { "epoch": 4.000203334688898, "grad_norm": 0.3036442995071411, "learning_rate": 1.1693215903759348e-06, "loss": 0.3077, "step": 39346 }, { "epoch": 4.000305002033347, "grad_norm": 0.287723183631897, "learning_rate": 1.1690935223078437e-06, "loss": 0.2703, "step": 39347 }, { "epoch": 4.000406669377796, "grad_norm": 0.3037227392196655, "learning_rate": 1.1688654735389499e-06, "loss": 0.2778, "step": 39348 }, { "epoch": 4.000508336722245, "grad_norm": 0.2700815796852112, "learning_rate": 1.1686374440703997e-06, "loss": 0.2782, "step": 39349 }, { "epoch": 4.000610004066694, "grad_norm": 0.2765944302082062, "learning_rate": 1.1684094339033447e-06, "loss": 0.2942, "step": 39350 }, { "epoch": 4.000711671411143, "grad_norm": 0.2587612569332123, "learning_rate": 1.1681814430389332e-06, "loss": 0.2936, "step": 39351 }, { "epoch": 4.0008133387555915, "grad_norm": 0.3580032289028168, "learning_rate": 1.1679534714783108e-06, "loss": 0.2839, "step": 39352 }, { "epoch": 4.0009150061000405, "grad_norm": 0.26319751143455505, "learning_rate": 1.16772551922263e-06, "loss": 0.299, "step": 39353 }, { "epoch": 4.001016673444489, "grad_norm": 0.28610971570014954, "learning_rate": 1.1674975862730377e-06, "loss": 0.3115, "step": 39354 }, { "epoch": 4.001118340788938, "grad_norm": 0.2701397240161896, "learning_rate": 1.167269672630681e-06, "loss": 0.2897, "step": 39355 }, { "epoch": 4.001220008133387, "grad_norm": 0.29858750104904175, "learning_rate": 1.167041778296708e-06, "loss": 0.3063, "step": 39356 }, { "epoch": 4.001321675477836, "grad_norm": 0.2746501863002777, "learning_rate": 1.1668139032722687e-06, "loss": 0.2833, "step": 39357 }, { "epoch": 4.001423342822285, "grad_norm": 0.2636459171772003, "learning_rate": 1.1665860475585105e-06, "loss": 0.2617, "step": 39358 }, { "epoch": 4.001525010166734, "grad_norm": 0.2884208858013153, "learning_rate": 1.166358211156579e-06, "loss": 0.294, "step": 39359 }, { "epoch": 4.001626677511183, "grad_norm": 0.2654632031917572, "learning_rate": 1.1661303940676255e-06, "loss": 0.3004, "step": 39360 }, { "epoch": 4.001728344855632, "grad_norm": 0.2907639741897583, "learning_rate": 1.165902596292796e-06, "loss": 0.2967, "step": 39361 }, { "epoch": 4.001830012200081, "grad_norm": 0.28155410289764404, "learning_rate": 1.1656748178332366e-06, "loss": 0.3049, "step": 39362 }, { "epoch": 4.00193167954453, "grad_norm": 0.279582142829895, "learning_rate": 1.165447058690098e-06, "loss": 0.3201, "step": 39363 }, { "epoch": 4.002033346888979, "grad_norm": 0.30474546551704407, "learning_rate": 1.1652193188645255e-06, "loss": 0.2866, "step": 39364 }, { "epoch": 4.002135014233429, "grad_norm": 0.2837940752506256, "learning_rate": 1.1649915983576672e-06, "loss": 0.3152, "step": 39365 }, { "epoch": 4.002236681577878, "grad_norm": 0.2733393609523773, "learning_rate": 1.164763897170668e-06, "loss": 0.2831, "step": 39366 }, { "epoch": 4.0023383489223265, "grad_norm": 0.28935596346855164, "learning_rate": 1.1645362153046786e-06, "loss": 0.2833, "step": 39367 }, { "epoch": 4.0024400162667755, "grad_norm": 0.28305694460868835, "learning_rate": 1.1643085527608445e-06, "loss": 0.3363, "step": 39368 }, { "epoch": 4.002541683611224, "grad_norm": 0.27013862133026123, "learning_rate": 1.1640809095403106e-06, "loss": 0.3055, "step": 39369 }, { "epoch": 4.002643350955673, "grad_norm": 0.2709257900714874, "learning_rate": 1.163853285644227e-06, "loss": 0.2848, "step": 39370 }, { "epoch": 4.002745018300122, "grad_norm": 0.2739298939704895, "learning_rate": 1.163625681073739e-06, "loss": 0.3291, "step": 39371 }, { "epoch": 4.002846685644571, "grad_norm": 0.25776782631874084, "learning_rate": 1.1633980958299918e-06, "loss": 0.2869, "step": 39372 }, { "epoch": 4.00294835298902, "grad_norm": 0.27146056294441223, "learning_rate": 1.1631705299141343e-06, "loss": 0.2827, "step": 39373 }, { "epoch": 4.003050020333469, "grad_norm": 0.27399784326553345, "learning_rate": 1.1629429833273127e-06, "loss": 0.2766, "step": 39374 }, { "epoch": 4.003151687677918, "grad_norm": 0.28334882855415344, "learning_rate": 1.1627154560706716e-06, "loss": 0.2806, "step": 39375 }, { "epoch": 4.003253355022367, "grad_norm": 0.28297266364097595, "learning_rate": 1.1624879481453572e-06, "loss": 0.3087, "step": 39376 }, { "epoch": 4.003355022366816, "grad_norm": 0.28111669421195984, "learning_rate": 1.1622604595525182e-06, "loss": 0.3124, "step": 39377 }, { "epoch": 4.003456689711265, "grad_norm": 0.27979207038879395, "learning_rate": 1.1620329902932986e-06, "loss": 0.2983, "step": 39378 }, { "epoch": 4.003558357055714, "grad_norm": 0.26803234219551086, "learning_rate": 1.161805540368844e-06, "loss": 0.2899, "step": 39379 }, { "epoch": 4.003660024400163, "grad_norm": 0.2693963050842285, "learning_rate": 1.161578109780302e-06, "loss": 0.3124, "step": 39380 }, { "epoch": 4.003761691744612, "grad_norm": 0.2997187077999115, "learning_rate": 1.161350698528817e-06, "loss": 0.3238, "step": 39381 }, { "epoch": 4.003863359089061, "grad_norm": 0.276793509721756, "learning_rate": 1.1611233066155336e-06, "loss": 0.3, "step": 39382 }, { "epoch": 4.00396502643351, "grad_norm": 0.2658238410949707, "learning_rate": 1.1608959340416004e-06, "loss": 0.3218, "step": 39383 }, { "epoch": 4.0040666937779585, "grad_norm": 0.28687784075737, "learning_rate": 1.1606685808081615e-06, "loss": 0.302, "step": 39384 }, { "epoch": 4.0041683611224075, "grad_norm": 0.2875504195690155, "learning_rate": 1.1604412469163618e-06, "loss": 0.3115, "step": 39385 }, { "epoch": 4.004270028466856, "grad_norm": 0.29350924491882324, "learning_rate": 1.1602139323673445e-06, "loss": 0.3439, "step": 39386 }, { "epoch": 4.004371695811305, "grad_norm": 0.29060062766075134, "learning_rate": 1.1599866371622591e-06, "loss": 0.2843, "step": 39387 }, { "epoch": 4.004473363155754, "grad_norm": 0.28692981600761414, "learning_rate": 1.1597593613022484e-06, "loss": 0.301, "step": 39388 }, { "epoch": 4.004575030500203, "grad_norm": 0.26919281482696533, "learning_rate": 1.159532104788456e-06, "loss": 0.2683, "step": 39389 }, { "epoch": 4.004676697844652, "grad_norm": 0.28087615966796875, "learning_rate": 1.1593048676220293e-06, "loss": 0.2987, "step": 39390 }, { "epoch": 4.004778365189101, "grad_norm": 0.2667652666568756, "learning_rate": 1.1590776498041123e-06, "loss": 0.2653, "step": 39391 }, { "epoch": 4.00488003253355, "grad_norm": 0.28753888607025146, "learning_rate": 1.158850451335848e-06, "loss": 0.2946, "step": 39392 }, { "epoch": 4.004981699877999, "grad_norm": 0.273417204618454, "learning_rate": 1.1586232722183839e-06, "loss": 0.2942, "step": 39393 }, { "epoch": 4.005083367222448, "grad_norm": 0.2882843613624573, "learning_rate": 1.1583961124528626e-06, "loss": 0.2798, "step": 39394 }, { "epoch": 4.005185034566897, "grad_norm": 0.2791503965854645, "learning_rate": 1.1581689720404288e-06, "loss": 0.2917, "step": 39395 }, { "epoch": 4.005286701911346, "grad_norm": 0.2597333788871765, "learning_rate": 1.1579418509822255e-06, "loss": 0.3297, "step": 39396 }, { "epoch": 4.005388369255795, "grad_norm": 0.27200937271118164, "learning_rate": 1.1577147492793995e-06, "loss": 0.2817, "step": 39397 }, { "epoch": 4.005490036600244, "grad_norm": 0.263059139251709, "learning_rate": 1.157487666933093e-06, "loss": 0.3021, "step": 39398 }, { "epoch": 4.005591703944693, "grad_norm": 0.2641875743865967, "learning_rate": 1.1572606039444494e-06, "loss": 0.2962, "step": 39399 }, { "epoch": 4.005693371289142, "grad_norm": 0.27561721205711365, "learning_rate": 1.1570335603146171e-06, "loss": 0.2891, "step": 39400 }, { "epoch": 4.0057950386335905, "grad_norm": 0.2905130684375763, "learning_rate": 1.1568065360447334e-06, "loss": 0.3025, "step": 39401 }, { "epoch": 4.0058967059780395, "grad_norm": 0.3210172951221466, "learning_rate": 1.1565795311359446e-06, "loss": 0.2729, "step": 39402 }, { "epoch": 4.005998373322488, "grad_norm": 0.30034294724464417, "learning_rate": 1.1563525455893964e-06, "loss": 0.2764, "step": 39403 }, { "epoch": 4.006100040666937, "grad_norm": 0.2387028932571411, "learning_rate": 1.156125579406231e-06, "loss": 0.2991, "step": 39404 }, { "epoch": 4.006201708011386, "grad_norm": 0.2771419286727905, "learning_rate": 1.1558986325875916e-06, "loss": 0.2934, "step": 39405 }, { "epoch": 4.006303375355836, "grad_norm": 0.2639143466949463, "learning_rate": 1.1556717051346195e-06, "loss": 0.3011, "step": 39406 }, { "epoch": 4.006405042700285, "grad_norm": 0.2783766984939575, "learning_rate": 1.155444797048461e-06, "loss": 0.3162, "step": 39407 }, { "epoch": 4.006506710044734, "grad_norm": 0.2564915418624878, "learning_rate": 1.155217908330259e-06, "loss": 0.2982, "step": 39408 }, { "epoch": 4.006608377389183, "grad_norm": 0.2690250873565674, "learning_rate": 1.154991038981153e-06, "loss": 0.291, "step": 39409 }, { "epoch": 4.006710044733632, "grad_norm": 0.27212047576904297, "learning_rate": 1.154764189002292e-06, "loss": 0.3169, "step": 39410 }, { "epoch": 4.006811712078081, "grad_norm": 0.29109713435173035, "learning_rate": 1.1545373583948121e-06, "loss": 0.2957, "step": 39411 }, { "epoch": 4.00691337942253, "grad_norm": 0.2748744785785675, "learning_rate": 1.1543105471598592e-06, "loss": 0.3199, "step": 39412 }, { "epoch": 4.007015046766979, "grad_norm": 0.26979127526283264, "learning_rate": 1.154083755298579e-06, "loss": 0.3132, "step": 39413 }, { "epoch": 4.007116714111428, "grad_norm": 0.2930876314640045, "learning_rate": 1.1538569828121078e-06, "loss": 0.3085, "step": 39414 }, { "epoch": 4.007218381455877, "grad_norm": 0.27994680404663086, "learning_rate": 1.1536302297015929e-06, "loss": 0.277, "step": 39415 }, { "epoch": 4.0073200488003256, "grad_norm": 0.26157018542289734, "learning_rate": 1.1534034959681728e-06, "loss": 0.2947, "step": 39416 }, { "epoch": 4.0074217161447745, "grad_norm": 0.27181610465049744, "learning_rate": 1.1531767816129935e-06, "loss": 0.3045, "step": 39417 }, { "epoch": 4.007523383489223, "grad_norm": 0.2622714936733246, "learning_rate": 1.1529500866371956e-06, "loss": 0.3258, "step": 39418 }, { "epoch": 4.007625050833672, "grad_norm": 0.28360697627067566, "learning_rate": 1.1527234110419193e-06, "loss": 0.3041, "step": 39419 }, { "epoch": 4.007726718178121, "grad_norm": 0.28236082196235657, "learning_rate": 1.152496754828311e-06, "loss": 0.3005, "step": 39420 }, { "epoch": 4.00782838552257, "grad_norm": 0.27577969431877136, "learning_rate": 1.1522701179975076e-06, "loss": 0.3154, "step": 39421 }, { "epoch": 4.007930052867019, "grad_norm": 0.27260586619377136, "learning_rate": 1.1520435005506525e-06, "loss": 0.2936, "step": 39422 }, { "epoch": 4.008031720211468, "grad_norm": 0.2695300281047821, "learning_rate": 1.1518169024888903e-06, "loss": 0.3131, "step": 39423 }, { "epoch": 4.008133387555917, "grad_norm": 0.29960739612579346, "learning_rate": 1.1515903238133575e-06, "loss": 0.2728, "step": 39424 }, { "epoch": 4.008235054900366, "grad_norm": 0.26768043637275696, "learning_rate": 1.1513637645251996e-06, "loss": 0.2768, "step": 39425 }, { "epoch": 4.008336722244815, "grad_norm": 0.2862855792045593, "learning_rate": 1.1511372246255552e-06, "loss": 0.2918, "step": 39426 }, { "epoch": 4.008438389589264, "grad_norm": 0.25962311029434204, "learning_rate": 1.150910704115568e-06, "loss": 0.2916, "step": 39427 }, { "epoch": 4.008540056933713, "grad_norm": 0.30343109369277954, "learning_rate": 1.1506842029963783e-06, "loss": 0.3018, "step": 39428 }, { "epoch": 4.008641724278162, "grad_norm": 0.2714804410934448, "learning_rate": 1.1504577212691253e-06, "loss": 0.3175, "step": 39429 }, { "epoch": 4.008743391622611, "grad_norm": 0.2631879150867462, "learning_rate": 1.150231258934954e-06, "loss": 0.2865, "step": 39430 }, { "epoch": 4.00884505896706, "grad_norm": 0.2924775779247284, "learning_rate": 1.1500048159950005e-06, "loss": 0.3354, "step": 39431 }, { "epoch": 4.008946726311509, "grad_norm": 0.27206477522850037, "learning_rate": 1.1497783924504074e-06, "loss": 0.3103, "step": 39432 }, { "epoch": 4.0090483936559576, "grad_norm": 0.2689037621021271, "learning_rate": 1.1495519883023186e-06, "loss": 0.3145, "step": 39433 }, { "epoch": 4.0091500610004065, "grad_norm": 0.26936113834381104, "learning_rate": 1.1493256035518697e-06, "loss": 0.3032, "step": 39434 }, { "epoch": 4.009251728344855, "grad_norm": 0.27685925364494324, "learning_rate": 1.1490992382002042e-06, "loss": 0.2952, "step": 39435 }, { "epoch": 4.009353395689304, "grad_norm": 0.2504311501979828, "learning_rate": 1.1488728922484615e-06, "loss": 0.293, "step": 39436 }, { "epoch": 4.009455063033753, "grad_norm": 0.29816490411758423, "learning_rate": 1.1486465656977812e-06, "loss": 0.2991, "step": 39437 }, { "epoch": 4.009556730378202, "grad_norm": 0.27463823556900024, "learning_rate": 1.1484202585493054e-06, "loss": 0.2843, "step": 39438 }, { "epoch": 4.009658397722651, "grad_norm": 0.2761896252632141, "learning_rate": 1.1481939708041712e-06, "loss": 0.2886, "step": 39439 }, { "epoch": 4.0097600650671, "grad_norm": 0.28331199288368225, "learning_rate": 1.1479677024635237e-06, "loss": 0.3109, "step": 39440 }, { "epoch": 4.009861732411549, "grad_norm": 0.2807605564594269, "learning_rate": 1.147741453528497e-06, "loss": 0.2642, "step": 39441 }, { "epoch": 4.009963399755998, "grad_norm": 0.2860601544380188, "learning_rate": 1.147515224000233e-06, "loss": 0.3135, "step": 39442 }, { "epoch": 4.010065067100447, "grad_norm": 0.25535598397254944, "learning_rate": 1.1472890138798742e-06, "loss": 0.321, "step": 39443 }, { "epoch": 4.010166734444896, "grad_norm": 0.3009428083896637, "learning_rate": 1.147062823168556e-06, "loss": 0.2659, "step": 39444 }, { "epoch": 4.010268401789345, "grad_norm": 0.30183637142181396, "learning_rate": 1.14683665186742e-06, "loss": 0.2878, "step": 39445 }, { "epoch": 4.010370069133794, "grad_norm": 0.27640843391418457, "learning_rate": 1.1466104999776063e-06, "loss": 0.2738, "step": 39446 }, { "epoch": 4.010471736478244, "grad_norm": 0.2923734486103058, "learning_rate": 1.1463843675002512e-06, "loss": 0.281, "step": 39447 }, { "epoch": 4.010573403822693, "grad_norm": 0.28146597743034363, "learning_rate": 1.1461582544364974e-06, "loss": 0.2941, "step": 39448 }, { "epoch": 4.0106750711671415, "grad_norm": 0.2746709883213043, "learning_rate": 1.1459321607874813e-06, "loss": 0.3037, "step": 39449 }, { "epoch": 4.0107767385115904, "grad_norm": 0.29229313135147095, "learning_rate": 1.1457060865543457e-06, "loss": 0.291, "step": 39450 }, { "epoch": 4.010878405856039, "grad_norm": 0.2856614291667938, "learning_rate": 1.1454800317382241e-06, "loss": 0.315, "step": 39451 }, { "epoch": 4.010980073200488, "grad_norm": 0.26006558537483215, "learning_rate": 1.145253996340258e-06, "loss": 0.3182, "step": 39452 }, { "epoch": 4.011081740544937, "grad_norm": 0.30160823464393616, "learning_rate": 1.1450279803615894e-06, "loss": 0.2903, "step": 39453 }, { "epoch": 4.011183407889386, "grad_norm": 0.26422119140625, "learning_rate": 1.1448019838033507e-06, "loss": 0.2532, "step": 39454 }, { "epoch": 4.011285075233835, "grad_norm": 0.28986459970474243, "learning_rate": 1.1445760066666855e-06, "loss": 0.304, "step": 39455 }, { "epoch": 4.011386742578284, "grad_norm": 0.2911182940006256, "learning_rate": 1.1443500489527292e-06, "loss": 0.3084, "step": 39456 }, { "epoch": 4.011488409922733, "grad_norm": 0.2959640324115753, "learning_rate": 1.1441241106626205e-06, "loss": 0.2829, "step": 39457 }, { "epoch": 4.011590077267182, "grad_norm": 0.28771305084228516, "learning_rate": 1.1438981917974996e-06, "loss": 0.3421, "step": 39458 }, { "epoch": 4.011691744611631, "grad_norm": 0.27695947885513306, "learning_rate": 1.1436722923585025e-06, "loss": 0.2959, "step": 39459 }, { "epoch": 4.01179341195608, "grad_norm": 0.2823579013347626, "learning_rate": 1.1434464123467687e-06, "loss": 0.298, "step": 39460 }, { "epoch": 4.011895079300529, "grad_norm": 0.2749860882759094, "learning_rate": 1.1432205517634338e-06, "loss": 0.2947, "step": 39461 }, { "epoch": 4.011996746644978, "grad_norm": 0.27537837624549866, "learning_rate": 1.1429947106096373e-06, "loss": 0.3133, "step": 39462 }, { "epoch": 4.012098413989427, "grad_norm": 0.2538309693336487, "learning_rate": 1.1427688888865196e-06, "loss": 0.2825, "step": 39463 }, { "epoch": 4.012200081333876, "grad_norm": 0.28279873728752136, "learning_rate": 1.1425430865952125e-06, "loss": 0.3236, "step": 39464 }, { "epoch": 4.012301748678325, "grad_norm": 0.26024171710014343, "learning_rate": 1.1423173037368584e-06, "loss": 0.3068, "step": 39465 }, { "epoch": 4.0124034160227735, "grad_norm": 0.2694568634033203, "learning_rate": 1.1420915403125931e-06, "loss": 0.3269, "step": 39466 }, { "epoch": 4.0125050833672224, "grad_norm": 0.2731275260448456, "learning_rate": 1.1418657963235524e-06, "loss": 0.2823, "step": 39467 }, { "epoch": 4.012606750711671, "grad_norm": 0.2817373275756836, "learning_rate": 1.1416400717708764e-06, "loss": 0.2781, "step": 39468 }, { "epoch": 4.01270841805612, "grad_norm": 0.2806480824947357, "learning_rate": 1.1414143666557003e-06, "loss": 0.2868, "step": 39469 }, { "epoch": 4.012810085400569, "grad_norm": 0.28243929147720337, "learning_rate": 1.1411886809791622e-06, "loss": 0.2842, "step": 39470 }, { "epoch": 4.012911752745018, "grad_norm": 0.28262653946876526, "learning_rate": 1.1409630147423973e-06, "loss": 0.2994, "step": 39471 }, { "epoch": 4.013013420089467, "grad_norm": 0.27417755126953125, "learning_rate": 1.1407373679465444e-06, "loss": 0.3215, "step": 39472 }, { "epoch": 4.013115087433916, "grad_norm": 0.29207298159599304, "learning_rate": 1.1405117405927403e-06, "loss": 0.2934, "step": 39473 }, { "epoch": 4.013216754778365, "grad_norm": 0.26868268847465515, "learning_rate": 1.140286132682119e-06, "loss": 0.3078, "step": 39474 }, { "epoch": 4.013318422122814, "grad_norm": 0.2776549458503723, "learning_rate": 1.1400605442158207e-06, "loss": 0.3215, "step": 39475 }, { "epoch": 4.013420089467263, "grad_norm": 0.2905426323413849, "learning_rate": 1.13983497519498e-06, "loss": 0.304, "step": 39476 }, { "epoch": 4.013521756811712, "grad_norm": 0.2758186161518097, "learning_rate": 1.1396094256207318e-06, "loss": 0.3186, "step": 39477 }, { "epoch": 4.013623424156161, "grad_norm": 0.27349504828453064, "learning_rate": 1.1393838954942155e-06, "loss": 0.3018, "step": 39478 }, { "epoch": 4.01372509150061, "grad_norm": 0.2617487907409668, "learning_rate": 1.1391583848165654e-06, "loss": 0.33, "step": 39479 }, { "epoch": 4.013826758845059, "grad_norm": 0.27108055353164673, "learning_rate": 1.1389328935889183e-06, "loss": 0.3193, "step": 39480 }, { "epoch": 4.013928426189508, "grad_norm": 0.28990501165390015, "learning_rate": 1.1387074218124083e-06, "loss": 0.3045, "step": 39481 }, { "epoch": 4.014030093533957, "grad_norm": 0.27409592270851135, "learning_rate": 1.1384819694881738e-06, "loss": 0.2887, "step": 39482 }, { "epoch": 4.0141317608784055, "grad_norm": 0.2723475396633148, "learning_rate": 1.1382565366173499e-06, "loss": 0.3127, "step": 39483 }, { "epoch": 4.0142334282228544, "grad_norm": 0.2756073772907257, "learning_rate": 1.1380311232010705e-06, "loss": 0.2644, "step": 39484 }, { "epoch": 4.014335095567303, "grad_norm": 0.2886185944080353, "learning_rate": 1.1378057292404738e-06, "loss": 0.2851, "step": 39485 }, { "epoch": 4.014436762911752, "grad_norm": 0.2517487704753876, "learning_rate": 1.1375803547366938e-06, "loss": 0.2918, "step": 39486 }, { "epoch": 4.014538430256201, "grad_norm": 0.2787691652774811, "learning_rate": 1.137354999690865e-06, "loss": 0.2774, "step": 39487 }, { "epoch": 4.014640097600651, "grad_norm": 0.2696872353553772, "learning_rate": 1.1371296641041252e-06, "loss": 0.3486, "step": 39488 }, { "epoch": 4.0147417649451, "grad_norm": 0.2533174157142639, "learning_rate": 1.1369043479776076e-06, "loss": 0.2954, "step": 39489 }, { "epoch": 4.014843432289549, "grad_norm": 0.31161829829216003, "learning_rate": 1.1366790513124486e-06, "loss": 0.3081, "step": 39490 }, { "epoch": 4.014945099633998, "grad_norm": 0.2905943989753723, "learning_rate": 1.1364537741097808e-06, "loss": 0.301, "step": 39491 }, { "epoch": 4.015046766978447, "grad_norm": 0.27974486351013184, "learning_rate": 1.1362285163707426e-06, "loss": 0.2923, "step": 39492 }, { "epoch": 4.015148434322896, "grad_norm": 0.27650246024131775, "learning_rate": 1.1360032780964663e-06, "loss": 0.2893, "step": 39493 }, { "epoch": 4.015250101667345, "grad_norm": 0.25456586480140686, "learning_rate": 1.1357780592880862e-06, "loss": 0.3094, "step": 39494 }, { "epoch": 4.015351769011794, "grad_norm": 0.2763635814189911, "learning_rate": 1.135552859946739e-06, "loss": 0.2767, "step": 39495 }, { "epoch": 4.015453436356243, "grad_norm": 0.26734215021133423, "learning_rate": 1.135327680073559e-06, "loss": 0.2767, "step": 39496 }, { "epoch": 4.015555103700692, "grad_norm": 0.28208687901496887, "learning_rate": 1.1351025196696775e-06, "loss": 0.2998, "step": 39497 }, { "epoch": 4.0156567710451405, "grad_norm": 0.2696503698825836, "learning_rate": 1.1348773787362328e-06, "loss": 0.3093, "step": 39498 }, { "epoch": 4.0157584383895895, "grad_norm": 0.26872947812080383, "learning_rate": 1.1346522572743574e-06, "loss": 0.3056, "step": 39499 }, { "epoch": 4.015860105734038, "grad_norm": 0.26206478476524353, "learning_rate": 1.1344271552851854e-06, "loss": 0.3044, "step": 39500 }, { "epoch": 4.015961773078487, "grad_norm": 0.25649210810661316, "learning_rate": 1.1342020727698494e-06, "loss": 0.2925, "step": 39501 }, { "epoch": 4.016063440422936, "grad_norm": 0.2890598773956299, "learning_rate": 1.1339770097294862e-06, "loss": 0.3066, "step": 39502 }, { "epoch": 4.016165107767385, "grad_norm": 0.27636581659317017, "learning_rate": 1.133751966165228e-06, "loss": 0.2819, "step": 39503 }, { "epoch": 4.016266775111834, "grad_norm": 0.2794108986854553, "learning_rate": 1.133526942078207e-06, "loss": 0.2677, "step": 39504 }, { "epoch": 4.016368442456283, "grad_norm": 0.2517685294151306, "learning_rate": 1.1333019374695603e-06, "loss": 0.2993, "step": 39505 }, { "epoch": 4.016470109800732, "grad_norm": 0.2750454545021057, "learning_rate": 1.133076952340419e-06, "loss": 0.2877, "step": 39506 }, { "epoch": 4.016571777145181, "grad_norm": 0.2812809646129608, "learning_rate": 1.1328519866919163e-06, "loss": 0.2975, "step": 39507 }, { "epoch": 4.01667344448963, "grad_norm": 0.2611755132675171, "learning_rate": 1.1326270405251871e-06, "loss": 0.2939, "step": 39508 }, { "epoch": 4.016775111834079, "grad_norm": 0.27479252219200134, "learning_rate": 1.1324021138413643e-06, "loss": 0.3294, "step": 39509 }, { "epoch": 4.016876779178528, "grad_norm": 0.27955368161201477, "learning_rate": 1.1321772066415803e-06, "loss": 0.3039, "step": 39510 }, { "epoch": 4.016978446522977, "grad_norm": 0.2616884112358093, "learning_rate": 1.1319523189269672e-06, "loss": 0.2854, "step": 39511 }, { "epoch": 4.017080113867426, "grad_norm": 0.2777453064918518, "learning_rate": 1.1317274506986602e-06, "loss": 0.2835, "step": 39512 }, { "epoch": 4.017181781211875, "grad_norm": 0.2963569164276123, "learning_rate": 1.131502601957792e-06, "loss": 0.269, "step": 39513 }, { "epoch": 4.017283448556324, "grad_norm": 0.25990572571754456, "learning_rate": 1.131277772705492e-06, "loss": 0.2663, "step": 39514 }, { "epoch": 4.0173851159007725, "grad_norm": 0.26537150144577026, "learning_rate": 1.1310529629428969e-06, "loss": 0.288, "step": 39515 }, { "epoch": 4.0174867832452215, "grad_norm": 0.2887134253978729, "learning_rate": 1.1308281726711374e-06, "loss": 0.324, "step": 39516 }, { "epoch": 4.01758845058967, "grad_norm": 0.2622644603252411, "learning_rate": 1.1306034018913452e-06, "loss": 0.2884, "step": 39517 }, { "epoch": 4.017690117934119, "grad_norm": 0.28366419672966003, "learning_rate": 1.1303786506046543e-06, "loss": 0.2795, "step": 39518 }, { "epoch": 4.017791785278568, "grad_norm": 0.2723715901374817, "learning_rate": 1.1301539188121967e-06, "loss": 0.3056, "step": 39519 }, { "epoch": 4.017893452623017, "grad_norm": 0.2916002571582794, "learning_rate": 1.129929206515104e-06, "loss": 0.3053, "step": 39520 }, { "epoch": 4.017995119967466, "grad_norm": 0.30558112263679504, "learning_rate": 1.129704513714507e-06, "loss": 0.3029, "step": 39521 }, { "epoch": 4.018096787311915, "grad_norm": 0.2740575671195984, "learning_rate": 1.1294798404115397e-06, "loss": 0.2786, "step": 39522 }, { "epoch": 4.018198454656364, "grad_norm": 0.2927698791027069, "learning_rate": 1.1292551866073336e-06, "loss": 0.2849, "step": 39523 }, { "epoch": 4.018300122000813, "grad_norm": 0.28881511092185974, "learning_rate": 1.1290305523030188e-06, "loss": 0.3138, "step": 39524 }, { "epoch": 4.018401789345262, "grad_norm": 0.27416154742240906, "learning_rate": 1.1288059374997295e-06, "loss": 0.3085, "step": 39525 }, { "epoch": 4.018503456689711, "grad_norm": 0.27731379866600037, "learning_rate": 1.128581342198596e-06, "loss": 0.261, "step": 39526 }, { "epoch": 4.01860512403416, "grad_norm": 0.291291207075119, "learning_rate": 1.1283567664007494e-06, "loss": 0.2679, "step": 39527 }, { "epoch": 4.018706791378609, "grad_norm": 0.2937535047531128, "learning_rate": 1.1281322101073206e-06, "loss": 0.2864, "step": 39528 }, { "epoch": 4.018808458723059, "grad_norm": 0.30115729570388794, "learning_rate": 1.1279076733194422e-06, "loss": 0.2896, "step": 39529 }, { "epoch": 4.0189101260675075, "grad_norm": 0.29220297932624817, "learning_rate": 1.1276831560382451e-06, "loss": 0.27, "step": 39530 }, { "epoch": 4.0190117934119565, "grad_norm": 0.2890361547470093, "learning_rate": 1.127458658264859e-06, "loss": 0.2584, "step": 39531 }, { "epoch": 4.019113460756405, "grad_norm": 0.2841359078884125, "learning_rate": 1.1272341800004173e-06, "loss": 0.2947, "step": 39532 }, { "epoch": 4.019215128100854, "grad_norm": 0.29396530985832214, "learning_rate": 1.127009721246049e-06, "loss": 0.278, "step": 39533 }, { "epoch": 4.019316795445303, "grad_norm": 0.2720610499382019, "learning_rate": 1.126785282002884e-06, "loss": 0.2954, "step": 39534 }, { "epoch": 4.019418462789752, "grad_norm": 0.3049268424510956, "learning_rate": 1.1265608622720564e-06, "loss": 0.2836, "step": 39535 }, { "epoch": 4.019520130134201, "grad_norm": 0.2708372175693512, "learning_rate": 1.126336462054694e-06, "loss": 0.3298, "step": 39536 }, { "epoch": 4.01962179747865, "grad_norm": 0.2870011627674103, "learning_rate": 1.1261120813519282e-06, "loss": 0.2943, "step": 39537 }, { "epoch": 4.019723464823099, "grad_norm": 0.2963393032550812, "learning_rate": 1.1258877201648877e-06, "loss": 0.3022, "step": 39538 }, { "epoch": 4.019825132167548, "grad_norm": 0.2768692970275879, "learning_rate": 1.1256633784947058e-06, "loss": 0.2875, "step": 39539 }, { "epoch": 4.019926799511997, "grad_norm": 0.30627232789993286, "learning_rate": 1.125439056342511e-06, "loss": 0.2968, "step": 39540 }, { "epoch": 4.020028466856446, "grad_norm": 0.2832462787628174, "learning_rate": 1.1252147537094321e-06, "loss": 0.2604, "step": 39541 }, { "epoch": 4.020130134200895, "grad_norm": 0.2977999746799469, "learning_rate": 1.124990470596602e-06, "loss": 0.2767, "step": 39542 }, { "epoch": 4.020231801545344, "grad_norm": 0.2881426215171814, "learning_rate": 1.124766207005149e-06, "loss": 0.309, "step": 39543 }, { "epoch": 4.020333468889793, "grad_norm": 0.27842146158218384, "learning_rate": 1.124541962936202e-06, "loss": 0.2882, "step": 39544 }, { "epoch": 4.020435136234242, "grad_norm": 0.2901998460292816, "learning_rate": 1.124317738390892e-06, "loss": 0.3061, "step": 39545 }, { "epoch": 4.020536803578691, "grad_norm": 0.2751842737197876, "learning_rate": 1.124093533370349e-06, "loss": 0.301, "step": 39546 }, { "epoch": 4.0206384709231395, "grad_norm": 0.2698412537574768, "learning_rate": 1.1238693478757019e-06, "loss": 0.2967, "step": 39547 }, { "epoch": 4.0207401382675885, "grad_norm": 0.29083436727523804, "learning_rate": 1.123645181908078e-06, "loss": 0.2796, "step": 39548 }, { "epoch": 4.020841805612037, "grad_norm": 0.27222439646720886, "learning_rate": 1.1234210354686098e-06, "loss": 0.2872, "step": 39549 }, { "epoch": 4.020943472956486, "grad_norm": 0.26463645696640015, "learning_rate": 1.1231969085584254e-06, "loss": 0.2899, "step": 39550 }, { "epoch": 4.021045140300935, "grad_norm": 0.272085577249527, "learning_rate": 1.1229728011786528e-06, "loss": 0.2726, "step": 39551 }, { "epoch": 4.021146807645384, "grad_norm": 0.27896174788475037, "learning_rate": 1.1227487133304244e-06, "loss": 0.316, "step": 39552 }, { "epoch": 4.021248474989833, "grad_norm": 0.2668722867965698, "learning_rate": 1.1225246450148635e-06, "loss": 0.2863, "step": 39553 }, { "epoch": 4.021350142334282, "grad_norm": 0.2766744792461395, "learning_rate": 1.1223005962331019e-06, "loss": 0.317, "step": 39554 }, { "epoch": 4.021451809678731, "grad_norm": 0.30956581234931946, "learning_rate": 1.1220765669862705e-06, "loss": 0.2805, "step": 39555 }, { "epoch": 4.02155347702318, "grad_norm": 0.30774056911468506, "learning_rate": 1.121852557275495e-06, "loss": 0.3202, "step": 39556 }, { "epoch": 4.021655144367629, "grad_norm": 0.27431657910346985, "learning_rate": 1.1216285671019056e-06, "loss": 0.3056, "step": 39557 }, { "epoch": 4.021756811712078, "grad_norm": 0.2729108929634094, "learning_rate": 1.1214045964666276e-06, "loss": 0.3032, "step": 39558 }, { "epoch": 4.021858479056527, "grad_norm": 0.2843756675720215, "learning_rate": 1.1211806453707935e-06, "loss": 0.2742, "step": 39559 }, { "epoch": 4.021960146400976, "grad_norm": 0.2745427191257477, "learning_rate": 1.1209567138155292e-06, "loss": 0.3145, "step": 39560 }, { "epoch": 4.022061813745425, "grad_norm": 0.28578388690948486, "learning_rate": 1.120732801801962e-06, "loss": 0.2801, "step": 39561 }, { "epoch": 4.022163481089874, "grad_norm": 0.2758895456790924, "learning_rate": 1.120508909331224e-06, "loss": 0.3368, "step": 39562 }, { "epoch": 4.022265148434323, "grad_norm": 0.2624945044517517, "learning_rate": 1.120285036404437e-06, "loss": 0.295, "step": 39563 }, { "epoch": 4.0223668157787715, "grad_norm": 0.30852481722831726, "learning_rate": 1.1200611830227327e-06, "loss": 0.2855, "step": 39564 }, { "epoch": 4.0224684831232205, "grad_norm": 0.260540246963501, "learning_rate": 1.1198373491872406e-06, "loss": 0.3071, "step": 39565 }, { "epoch": 4.022570150467669, "grad_norm": 0.2562175393104553, "learning_rate": 1.1196135348990827e-06, "loss": 0.3152, "step": 39566 }, { "epoch": 4.022671817812118, "grad_norm": 0.28978773951530457, "learning_rate": 1.1193897401593918e-06, "loss": 0.3025, "step": 39567 }, { "epoch": 4.022773485156567, "grad_norm": 0.27939048409461975, "learning_rate": 1.119165964969291e-06, "loss": 0.2752, "step": 39568 }, { "epoch": 4.022875152501016, "grad_norm": 0.27325472235679626, "learning_rate": 1.1189422093299118e-06, "loss": 0.2964, "step": 39569 }, { "epoch": 4.022976819845466, "grad_norm": 0.28723374009132385, "learning_rate": 1.1187184732423795e-06, "loss": 0.2983, "step": 39570 }, { "epoch": 4.023078487189915, "grad_norm": 0.29433712363243103, "learning_rate": 1.1184947567078197e-06, "loss": 0.2771, "step": 39571 }, { "epoch": 4.023180154534364, "grad_norm": 0.27386465668678284, "learning_rate": 1.1182710597273632e-06, "loss": 0.3064, "step": 39572 }, { "epoch": 4.023281821878813, "grad_norm": 0.24929606914520264, "learning_rate": 1.1180473823021327e-06, "loss": 0.2841, "step": 39573 }, { "epoch": 4.023383489223262, "grad_norm": 0.2733490467071533, "learning_rate": 1.1178237244332563e-06, "loss": 0.2797, "step": 39574 }, { "epoch": 4.023485156567711, "grad_norm": 0.27796971797943115, "learning_rate": 1.1176000861218645e-06, "loss": 0.3035, "step": 39575 }, { "epoch": 4.02358682391216, "grad_norm": 0.2716352343559265, "learning_rate": 1.1173764673690785e-06, "loss": 0.2917, "step": 39576 }, { "epoch": 4.023688491256609, "grad_norm": 0.28128015995025635, "learning_rate": 1.117152868176028e-06, "loss": 0.2929, "step": 39577 }, { "epoch": 4.023790158601058, "grad_norm": 0.2630608379840851, "learning_rate": 1.116929288543837e-06, "loss": 0.2892, "step": 39578 }, { "epoch": 4.0238918259455065, "grad_norm": 0.2660537660121918, "learning_rate": 1.1167057284736355e-06, "loss": 0.3546, "step": 39579 }, { "epoch": 4.0239934932899555, "grad_norm": 0.26910415291786194, "learning_rate": 1.1164821879665477e-06, "loss": 0.2976, "step": 39580 }, { "epoch": 4.024095160634404, "grad_norm": 0.26737654209136963, "learning_rate": 1.116258667023698e-06, "loss": 0.2684, "step": 39581 }, { "epoch": 4.024196827978853, "grad_norm": 0.2841056287288666, "learning_rate": 1.1160351656462177e-06, "loss": 0.3023, "step": 39582 }, { "epoch": 4.024298495323302, "grad_norm": 0.2970885634422302, "learning_rate": 1.1158116838352256e-06, "loss": 0.3254, "step": 39583 }, { "epoch": 4.024400162667751, "grad_norm": 0.29252511262893677, "learning_rate": 1.1155882215918517e-06, "loss": 0.2635, "step": 39584 }, { "epoch": 4.0245018300122, "grad_norm": 0.29681727290153503, "learning_rate": 1.1153647789172239e-06, "loss": 0.2913, "step": 39585 }, { "epoch": 4.024603497356649, "grad_norm": 0.2707211971282959, "learning_rate": 1.1151413558124625e-06, "loss": 0.2921, "step": 39586 }, { "epoch": 4.024705164701098, "grad_norm": 0.2589132785797119, "learning_rate": 1.1149179522786974e-06, "loss": 0.3064, "step": 39587 }, { "epoch": 4.024806832045547, "grad_norm": 0.26321783661842346, "learning_rate": 1.1146945683170524e-06, "loss": 0.3396, "step": 39588 }, { "epoch": 4.024908499389996, "grad_norm": 0.29183870553970337, "learning_rate": 1.1144712039286508e-06, "loss": 0.2709, "step": 39589 }, { "epoch": 4.025010166734445, "grad_norm": 0.26000604033470154, "learning_rate": 1.1142478591146221e-06, "loss": 0.2786, "step": 39590 }, { "epoch": 4.025111834078894, "grad_norm": 0.28548604249954224, "learning_rate": 1.1140245338760874e-06, "loss": 0.2934, "step": 39591 }, { "epoch": 4.025213501423343, "grad_norm": 0.27395039796829224, "learning_rate": 1.1138012282141763e-06, "loss": 0.3277, "step": 39592 }, { "epoch": 4.025315168767792, "grad_norm": 0.2814508378505707, "learning_rate": 1.1135779421300092e-06, "loss": 0.2908, "step": 39593 }, { "epoch": 4.025416836112241, "grad_norm": 0.2768040895462036, "learning_rate": 1.113354675624712e-06, "loss": 0.3102, "step": 39594 }, { "epoch": 4.02551850345669, "grad_norm": 0.2607981264591217, "learning_rate": 1.1131314286994132e-06, "loss": 0.3302, "step": 39595 }, { "epoch": 4.0256201708011385, "grad_norm": 0.2723328173160553, "learning_rate": 1.1129082013552322e-06, "loss": 0.2912, "step": 39596 }, { "epoch": 4.0257218381455875, "grad_norm": 0.276784211397171, "learning_rate": 1.112684993593297e-06, "loss": 0.2922, "step": 39597 }, { "epoch": 4.025823505490036, "grad_norm": 0.26949477195739746, "learning_rate": 1.1124618054147319e-06, "loss": 0.3133, "step": 39598 }, { "epoch": 4.025925172834485, "grad_norm": 0.2518152594566345, "learning_rate": 1.1122386368206577e-06, "loss": 0.3108, "step": 39599 }, { "epoch": 4.026026840178934, "grad_norm": 0.2741268575191498, "learning_rate": 1.1120154878122037e-06, "loss": 0.2881, "step": 39600 }, { "epoch": 4.026128507523383, "grad_norm": 0.27396562695503235, "learning_rate": 1.11179235839049e-06, "loss": 0.3149, "step": 39601 }, { "epoch": 4.026230174867832, "grad_norm": 0.27707263827323914, "learning_rate": 1.1115692485566454e-06, "loss": 0.3176, "step": 39602 }, { "epoch": 4.026331842212281, "grad_norm": 0.30200085043907166, "learning_rate": 1.111346158311788e-06, "loss": 0.2765, "step": 39603 }, { "epoch": 4.02643350955673, "grad_norm": 0.2820066511631012, "learning_rate": 1.1111230876570446e-06, "loss": 0.3027, "step": 39604 }, { "epoch": 4.026535176901179, "grad_norm": 0.2900747060775757, "learning_rate": 1.1109000365935413e-06, "loss": 0.2949, "step": 39605 }, { "epoch": 4.026636844245628, "grad_norm": 0.2663250267505646, "learning_rate": 1.1106770051223975e-06, "loss": 0.2904, "step": 39606 }, { "epoch": 4.026738511590077, "grad_norm": 0.3146231472492218, "learning_rate": 1.1104539932447394e-06, "loss": 0.2734, "step": 39607 }, { "epoch": 4.026840178934526, "grad_norm": 0.27083706855773926, "learning_rate": 1.11023100096169e-06, "loss": 0.2921, "step": 39608 }, { "epoch": 4.026941846278975, "grad_norm": 0.28580591082572937, "learning_rate": 1.1100080282743708e-06, "loss": 0.3017, "step": 39609 }, { "epoch": 4.027043513623424, "grad_norm": 0.26970717310905457, "learning_rate": 1.1097850751839083e-06, "loss": 0.2756, "step": 39610 }, { "epoch": 4.0271451809678736, "grad_norm": 0.2628212869167328, "learning_rate": 1.1095621416914249e-06, "loss": 0.2652, "step": 39611 }, { "epoch": 4.0272468483123225, "grad_norm": 0.2691853642463684, "learning_rate": 1.1093392277980418e-06, "loss": 0.2812, "step": 39612 }, { "epoch": 4.027348515656771, "grad_norm": 0.2857804298400879, "learning_rate": 1.1091163335048822e-06, "loss": 0.3253, "step": 39613 }, { "epoch": 4.02745018300122, "grad_norm": 0.285436749458313, "learning_rate": 1.1088934588130702e-06, "loss": 0.293, "step": 39614 }, { "epoch": 4.027551850345669, "grad_norm": 0.2739144265651703, "learning_rate": 1.1086706037237304e-06, "loss": 0.3054, "step": 39615 }, { "epoch": 4.027653517690118, "grad_norm": 0.28736111521720886, "learning_rate": 1.1084477682379812e-06, "loss": 0.2781, "step": 39616 }, { "epoch": 4.027755185034567, "grad_norm": 0.2632482945919037, "learning_rate": 1.1082249523569483e-06, "loss": 0.3029, "step": 39617 }, { "epoch": 4.027856852379016, "grad_norm": 0.2849824130535126, "learning_rate": 1.1080021560817533e-06, "loss": 0.3141, "step": 39618 }, { "epoch": 4.027958519723465, "grad_norm": 0.2774367034435272, "learning_rate": 1.1077793794135177e-06, "loss": 0.2782, "step": 39619 }, { "epoch": 4.028060187067914, "grad_norm": 0.27765271067619324, "learning_rate": 1.1075566223533656e-06, "loss": 0.3016, "step": 39620 }, { "epoch": 4.028161854412363, "grad_norm": 0.2610187828540802, "learning_rate": 1.1073338849024184e-06, "loss": 0.2994, "step": 39621 }, { "epoch": 4.028263521756812, "grad_norm": 0.2766155004501343, "learning_rate": 1.107111167061798e-06, "loss": 0.319, "step": 39622 }, { "epoch": 4.028365189101261, "grad_norm": 0.2540788948535919, "learning_rate": 1.1068884688326247e-06, "loss": 0.2813, "step": 39623 }, { "epoch": 4.02846685644571, "grad_norm": 0.2745083272457123, "learning_rate": 1.1066657902160238e-06, "loss": 0.3262, "step": 39624 }, { "epoch": 4.028568523790159, "grad_norm": 0.27337902784347534, "learning_rate": 1.1064431312131158e-06, "loss": 0.281, "step": 39625 }, { "epoch": 4.028670191134608, "grad_norm": 0.28306517004966736, "learning_rate": 1.1062204918250203e-06, "loss": 0.2794, "step": 39626 }, { "epoch": 4.028771858479057, "grad_norm": 0.3000076115131378, "learning_rate": 1.1059978720528618e-06, "loss": 0.3389, "step": 39627 }, { "epoch": 4.0288735258235056, "grad_norm": 0.2692834436893463, "learning_rate": 1.1057752718977616e-06, "loss": 0.2923, "step": 39628 }, { "epoch": 4.0289751931679545, "grad_norm": 0.2878580391407013, "learning_rate": 1.1055526913608378e-06, "loss": 0.3131, "step": 39629 }, { "epoch": 4.029076860512403, "grad_norm": 0.2733762264251709, "learning_rate": 1.1053301304432158e-06, "loss": 0.2597, "step": 39630 }, { "epoch": 4.029178527856852, "grad_norm": 0.275715172290802, "learning_rate": 1.105107589146015e-06, "loss": 0.2923, "step": 39631 }, { "epoch": 4.029280195201301, "grad_norm": 0.26419195532798767, "learning_rate": 1.1048850674703571e-06, "loss": 0.3053, "step": 39632 }, { "epoch": 4.02938186254575, "grad_norm": 0.28838714957237244, "learning_rate": 1.104662565417361e-06, "loss": 0.3006, "step": 39633 }, { "epoch": 4.029483529890199, "grad_norm": 0.2795570194721222, "learning_rate": 1.1044400829881502e-06, "loss": 0.309, "step": 39634 }, { "epoch": 4.029585197234648, "grad_norm": 0.310674250125885, "learning_rate": 1.1042176201838444e-06, "loss": 0.2903, "step": 39635 }, { "epoch": 4.029686864579097, "grad_norm": 0.2955787777900696, "learning_rate": 1.103995177005564e-06, "loss": 0.2784, "step": 39636 }, { "epoch": 4.029788531923546, "grad_norm": 0.270052433013916, "learning_rate": 1.1037727534544307e-06, "loss": 0.2989, "step": 39637 }, { "epoch": 4.029890199267995, "grad_norm": 0.28008267283439636, "learning_rate": 1.1035503495315647e-06, "loss": 0.3079, "step": 39638 }, { "epoch": 4.029991866612444, "grad_norm": 0.2871025502681732, "learning_rate": 1.1033279652380847e-06, "loss": 0.2826, "step": 39639 }, { "epoch": 4.030093533956893, "grad_norm": 0.2764328420162201, "learning_rate": 1.1031056005751134e-06, "loss": 0.2994, "step": 39640 }, { "epoch": 4.030195201301342, "grad_norm": 0.27494218945503235, "learning_rate": 1.1028832555437702e-06, "loss": 0.3137, "step": 39641 }, { "epoch": 4.030296868645791, "grad_norm": 0.2741275131702423, "learning_rate": 1.102660930145175e-06, "loss": 0.2793, "step": 39642 }, { "epoch": 4.03039853599024, "grad_norm": 0.279069721698761, "learning_rate": 1.102438624380447e-06, "loss": 0.2856, "step": 39643 }, { "epoch": 4.030500203334689, "grad_norm": 0.2827463746070862, "learning_rate": 1.1022163382507084e-06, "loss": 0.3042, "step": 39644 }, { "epoch": 4.0306018706791376, "grad_norm": 0.26831334829330444, "learning_rate": 1.101994071757077e-06, "loss": 0.2832, "step": 39645 }, { "epoch": 4.0307035380235865, "grad_norm": 0.29235532879829407, "learning_rate": 1.101771824900672e-06, "loss": 0.297, "step": 39646 }, { "epoch": 4.030805205368035, "grad_norm": 0.2923334538936615, "learning_rate": 1.1015495976826158e-06, "loss": 0.2858, "step": 39647 }, { "epoch": 4.030906872712484, "grad_norm": 0.2687035799026489, "learning_rate": 1.1013273901040262e-06, "loss": 0.3183, "step": 39648 }, { "epoch": 4.031008540056933, "grad_norm": 0.2723826766014099, "learning_rate": 1.1011052021660212e-06, "loss": 0.2741, "step": 39649 }, { "epoch": 4.031110207401382, "grad_norm": 0.29898732900619507, "learning_rate": 1.1008830338697236e-06, "loss": 0.2914, "step": 39650 }, { "epoch": 4.031211874745831, "grad_norm": 0.280956894159317, "learning_rate": 1.1006608852162505e-06, "loss": 0.2777, "step": 39651 }, { "epoch": 4.031313542090281, "grad_norm": 0.2969217300415039, "learning_rate": 1.1004387562067204e-06, "loss": 0.2831, "step": 39652 }, { "epoch": 4.03141520943473, "grad_norm": 0.2975991368293762, "learning_rate": 1.1002166468422526e-06, "loss": 0.2868, "step": 39653 }, { "epoch": 4.031516876779179, "grad_norm": 0.2765580117702484, "learning_rate": 1.0999945571239678e-06, "loss": 0.2589, "step": 39654 }, { "epoch": 4.031618544123628, "grad_norm": 0.2745186388492584, "learning_rate": 1.0997724870529835e-06, "loss": 0.3006, "step": 39655 }, { "epoch": 4.031720211468077, "grad_norm": 0.2824530601501465, "learning_rate": 1.0995504366304176e-06, "loss": 0.2517, "step": 39656 }, { "epoch": 4.031821878812526, "grad_norm": 0.28686949610710144, "learning_rate": 1.0993284058573905e-06, "loss": 0.302, "step": 39657 }, { "epoch": 4.031923546156975, "grad_norm": 0.2774696946144104, "learning_rate": 1.0991063947350206e-06, "loss": 0.2793, "step": 39658 }, { "epoch": 4.032025213501424, "grad_norm": 0.29369255900382996, "learning_rate": 1.098884403264424e-06, "loss": 0.2841, "step": 39659 }, { "epoch": 4.032126880845873, "grad_norm": 0.28826552629470825, "learning_rate": 1.098662431446722e-06, "loss": 0.3118, "step": 39660 }, { "epoch": 4.0322285481903215, "grad_norm": 0.2628030478954315, "learning_rate": 1.0984404792830317e-06, "loss": 0.3155, "step": 39661 }, { "epoch": 4.0323302155347704, "grad_norm": 0.28131362795829773, "learning_rate": 1.098218546774471e-06, "loss": 0.2946, "step": 39662 }, { "epoch": 4.032431882879219, "grad_norm": 0.2736762464046478, "learning_rate": 1.097996633922157e-06, "loss": 0.2654, "step": 39663 }, { "epoch": 4.032533550223668, "grad_norm": 0.31111177802085876, "learning_rate": 1.0977747407272093e-06, "loss": 0.2504, "step": 39664 }, { "epoch": 4.032635217568117, "grad_norm": 0.28719285130500793, "learning_rate": 1.097552867190746e-06, "loss": 0.2785, "step": 39665 }, { "epoch": 4.032736884912566, "grad_norm": 0.2699090838432312, "learning_rate": 1.097331013313882e-06, "loss": 0.3015, "step": 39666 }, { "epoch": 4.032838552257015, "grad_norm": 0.3144531846046448, "learning_rate": 1.097109179097739e-06, "loss": 0.3171, "step": 39667 }, { "epoch": 4.032940219601464, "grad_norm": 0.2855943739414215, "learning_rate": 1.096887364543432e-06, "loss": 0.2995, "step": 39668 }, { "epoch": 4.033041886945913, "grad_norm": 0.28548920154571533, "learning_rate": 1.0966655696520779e-06, "loss": 0.2922, "step": 39669 }, { "epoch": 4.033143554290362, "grad_norm": 0.27516481280326843, "learning_rate": 1.0964437944247957e-06, "loss": 0.2924, "step": 39670 }, { "epoch": 4.033245221634811, "grad_norm": 0.2951424717903137, "learning_rate": 1.0962220388627027e-06, "loss": 0.2906, "step": 39671 }, { "epoch": 4.03334688897926, "grad_norm": 0.3119240701198578, "learning_rate": 1.0960003029669152e-06, "loss": 0.2964, "step": 39672 }, { "epoch": 4.033448556323709, "grad_norm": 0.2559446394443512, "learning_rate": 1.095778586738549e-06, "loss": 0.3486, "step": 39673 }, { "epoch": 4.033550223668158, "grad_norm": 0.26768961548805237, "learning_rate": 1.095556890178724e-06, "loss": 0.2822, "step": 39674 }, { "epoch": 4.033651891012607, "grad_norm": 0.28776445984840393, "learning_rate": 1.0953352132885558e-06, "loss": 0.2762, "step": 39675 }, { "epoch": 4.033753558357056, "grad_norm": 0.2855885922908783, "learning_rate": 1.095113556069159e-06, "loss": 0.2806, "step": 39676 }, { "epoch": 4.033855225701505, "grad_norm": 0.2652535140514374, "learning_rate": 1.094891918521654e-06, "loss": 0.3064, "step": 39677 }, { "epoch": 4.0339568930459535, "grad_norm": 0.25406983494758606, "learning_rate": 1.0946703006471553e-06, "loss": 0.278, "step": 39678 }, { "epoch": 4.0340585603904024, "grad_norm": 0.269073486328125, "learning_rate": 1.0944487024467782e-06, "loss": 0.2617, "step": 39679 }, { "epoch": 4.034160227734851, "grad_norm": 0.30268386006355286, "learning_rate": 1.0942271239216418e-06, "loss": 0.2903, "step": 39680 }, { "epoch": 4.0342618950793, "grad_norm": 0.29372501373291016, "learning_rate": 1.094005565072861e-06, "loss": 0.3031, "step": 39681 }, { "epoch": 4.034363562423749, "grad_norm": 0.2769598960876465, "learning_rate": 1.093784025901552e-06, "loss": 0.2975, "step": 39682 }, { "epoch": 4.034465229768198, "grad_norm": 0.27068397402763367, "learning_rate": 1.0935625064088295e-06, "loss": 0.276, "step": 39683 }, { "epoch": 4.034566897112647, "grad_norm": 0.26899293065071106, "learning_rate": 1.0933410065958117e-06, "loss": 0.2771, "step": 39684 }, { "epoch": 4.034668564457096, "grad_norm": 0.2696976959705353, "learning_rate": 1.093119526463614e-06, "loss": 0.2883, "step": 39685 }, { "epoch": 4.034770231801545, "grad_norm": 0.2651655972003937, "learning_rate": 1.0928980660133504e-06, "loss": 0.2934, "step": 39686 }, { "epoch": 4.034871899145994, "grad_norm": 0.2801772952079773, "learning_rate": 1.092676625246139e-06, "loss": 0.3121, "step": 39687 }, { "epoch": 4.034973566490443, "grad_norm": 0.2777229845523834, "learning_rate": 1.0924552041630943e-06, "loss": 0.2868, "step": 39688 }, { "epoch": 4.035075233834892, "grad_norm": 0.2914676070213318, "learning_rate": 1.0922338027653306e-06, "loss": 0.3068, "step": 39689 }, { "epoch": 4.035176901179341, "grad_norm": 0.2693735361099243, "learning_rate": 1.092012421053965e-06, "loss": 0.2863, "step": 39690 }, { "epoch": 4.03527856852379, "grad_norm": 0.25802701711654663, "learning_rate": 1.0917910590301128e-06, "loss": 0.282, "step": 39691 }, { "epoch": 4.035380235868239, "grad_norm": 0.2912968397140503, "learning_rate": 1.0915697166948886e-06, "loss": 0.2945, "step": 39692 }, { "epoch": 4.0354819032126885, "grad_norm": 0.27658557891845703, "learning_rate": 1.0913483940494058e-06, "loss": 0.2672, "step": 39693 }, { "epoch": 4.0355835705571375, "grad_norm": 0.26487359404563904, "learning_rate": 1.0911270910947825e-06, "loss": 0.2975, "step": 39694 }, { "epoch": 4.035685237901586, "grad_norm": 0.3009175658226013, "learning_rate": 1.0909058078321315e-06, "loss": 0.2864, "step": 39695 }, { "epoch": 4.035786905246035, "grad_norm": 0.2672156095504761, "learning_rate": 1.090684544262567e-06, "loss": 0.3196, "step": 39696 }, { "epoch": 4.035888572590484, "grad_norm": 0.26176315546035767, "learning_rate": 1.0904633003872061e-06, "loss": 0.3079, "step": 39697 }, { "epoch": 4.035990239934933, "grad_norm": 0.27117040753364563, "learning_rate": 1.0902420762071626e-06, "loss": 0.2671, "step": 39698 }, { "epoch": 4.036091907279382, "grad_norm": 0.2513878047466278, "learning_rate": 1.0900208717235484e-06, "loss": 0.283, "step": 39699 }, { "epoch": 4.036193574623831, "grad_norm": 0.27979475259780884, "learning_rate": 1.089799686937481e-06, "loss": 0.2817, "step": 39700 }, { "epoch": 4.03629524196828, "grad_norm": 0.2999032437801361, "learning_rate": 1.0895785218500742e-06, "loss": 0.3118, "step": 39701 }, { "epoch": 4.036396909312729, "grad_norm": 0.2780911326408386, "learning_rate": 1.0893573764624415e-06, "loss": 0.2909, "step": 39702 }, { "epoch": 4.036498576657178, "grad_norm": 0.27293291687965393, "learning_rate": 1.0891362507756953e-06, "loss": 0.297, "step": 39703 }, { "epoch": 4.036600244001627, "grad_norm": 0.28031599521636963, "learning_rate": 1.088915144790953e-06, "loss": 0.2587, "step": 39704 }, { "epoch": 4.036701911346076, "grad_norm": 0.2808375060558319, "learning_rate": 1.088694058509327e-06, "loss": 0.3005, "step": 39705 }, { "epoch": 4.036803578690525, "grad_norm": 0.24946282804012299, "learning_rate": 1.0884729919319287e-06, "loss": 0.3282, "step": 39706 }, { "epoch": 4.036905246034974, "grad_norm": 0.26587849855422974, "learning_rate": 1.088251945059876e-06, "loss": 0.3143, "step": 39707 }, { "epoch": 4.037006913379423, "grad_norm": 0.2753294110298157, "learning_rate": 1.0880309178942805e-06, "loss": 0.2984, "step": 39708 }, { "epoch": 4.037108580723872, "grad_norm": 0.26966559886932373, "learning_rate": 1.087809910436255e-06, "loss": 0.2924, "step": 39709 }, { "epoch": 4.0372102480683205, "grad_norm": 0.28646141290664673, "learning_rate": 1.0875889226869124e-06, "loss": 0.3251, "step": 39710 }, { "epoch": 4.0373119154127695, "grad_norm": 0.26702389121055603, "learning_rate": 1.0873679546473686e-06, "loss": 0.334, "step": 39711 }, { "epoch": 4.037413582757218, "grad_norm": 0.2777080833911896, "learning_rate": 1.087147006318735e-06, "loss": 0.307, "step": 39712 }, { "epoch": 4.037515250101667, "grad_norm": 0.2895309329032898, "learning_rate": 1.086926077702124e-06, "loss": 0.2618, "step": 39713 }, { "epoch": 4.037616917446116, "grad_norm": 0.3015710711479187, "learning_rate": 1.086705168798652e-06, "loss": 0.3002, "step": 39714 }, { "epoch": 4.037718584790565, "grad_norm": 0.2867431640625, "learning_rate": 1.0864842796094266e-06, "loss": 0.2967, "step": 39715 }, { "epoch": 4.037820252135014, "grad_norm": 0.2732026278972626, "learning_rate": 1.0862634101355635e-06, "loss": 0.3041, "step": 39716 }, { "epoch": 4.037921919479463, "grad_norm": 0.2995317876338959, "learning_rate": 1.0860425603781783e-06, "loss": 0.2875, "step": 39717 }, { "epoch": 4.038023586823912, "grad_norm": 0.28328821063041687, "learning_rate": 1.0858217303383773e-06, "loss": 0.3048, "step": 39718 }, { "epoch": 4.038125254168361, "grad_norm": 0.27762550115585327, "learning_rate": 1.085600920017278e-06, "loss": 0.2767, "step": 39719 }, { "epoch": 4.03822692151281, "grad_norm": 0.28212806582450867, "learning_rate": 1.0853801294159894e-06, "loss": 0.2828, "step": 39720 }, { "epoch": 4.038328588857259, "grad_norm": 0.2757195830345154, "learning_rate": 1.0851593585356267e-06, "loss": 0.286, "step": 39721 }, { "epoch": 4.038430256201708, "grad_norm": 0.3015812337398529, "learning_rate": 1.0849386073773017e-06, "loss": 0.318, "step": 39722 }, { "epoch": 4.038531923546157, "grad_norm": 0.27202171087265015, "learning_rate": 1.0847178759421233e-06, "loss": 0.2996, "step": 39723 }, { "epoch": 4.038633590890606, "grad_norm": 0.2656066417694092, "learning_rate": 1.0844971642312091e-06, "loss": 0.2882, "step": 39724 }, { "epoch": 4.038735258235055, "grad_norm": 0.2851238250732422, "learning_rate": 1.084276472245665e-06, "loss": 0.3113, "step": 39725 }, { "epoch": 4.038836925579504, "grad_norm": 0.2497331202030182, "learning_rate": 1.0840557999866053e-06, "loss": 0.2752, "step": 39726 }, { "epoch": 4.0389385929239525, "grad_norm": 0.2909294068813324, "learning_rate": 1.0838351474551445e-06, "loss": 0.3, "step": 39727 }, { "epoch": 4.0390402602684015, "grad_norm": 0.2730120122432709, "learning_rate": 1.083614514652389e-06, "loss": 0.3044, "step": 39728 }, { "epoch": 4.03914192761285, "grad_norm": 0.2909708023071289, "learning_rate": 1.0833939015794547e-06, "loss": 0.2823, "step": 39729 }, { "epoch": 4.039243594957299, "grad_norm": 0.28342118859291077, "learning_rate": 1.0831733082374496e-06, "loss": 0.2682, "step": 39730 }, { "epoch": 4.039345262301748, "grad_norm": 0.2812001407146454, "learning_rate": 1.0829527346274876e-06, "loss": 0.3156, "step": 39731 }, { "epoch": 4.039446929646197, "grad_norm": 0.298806756734848, "learning_rate": 1.082732180750679e-06, "loss": 0.3043, "step": 39732 }, { "epoch": 4.039548596990646, "grad_norm": 0.2920572757720947, "learning_rate": 1.0825116466081338e-06, "loss": 0.2703, "step": 39733 }, { "epoch": 4.039650264335096, "grad_norm": 0.2923663556575775, "learning_rate": 1.0822911322009666e-06, "loss": 0.2889, "step": 39734 }, { "epoch": 4.039751931679545, "grad_norm": 0.2867911756038666, "learning_rate": 1.0820706375302825e-06, "loss": 0.3197, "step": 39735 }, { "epoch": 4.039853599023994, "grad_norm": 0.32004866003990173, "learning_rate": 1.0818501625971962e-06, "loss": 0.2774, "step": 39736 }, { "epoch": 4.039955266368443, "grad_norm": 0.28399959206581116, "learning_rate": 1.0816297074028199e-06, "loss": 0.2771, "step": 39737 }, { "epoch": 4.040056933712892, "grad_norm": 0.2821289300918579, "learning_rate": 1.0814092719482593e-06, "loss": 0.2825, "step": 39738 }, { "epoch": 4.040158601057341, "grad_norm": 0.266121506690979, "learning_rate": 1.081188856234629e-06, "loss": 0.3111, "step": 39739 }, { "epoch": 4.04026026840179, "grad_norm": 0.2735390067100525, "learning_rate": 1.0809684602630383e-06, "loss": 0.2854, "step": 39740 }, { "epoch": 4.040361935746239, "grad_norm": 0.28520670533180237, "learning_rate": 1.0807480840345952e-06, "loss": 0.3072, "step": 39741 }, { "epoch": 4.0404636030906875, "grad_norm": 0.28992196917533875, "learning_rate": 1.0805277275504138e-06, "loss": 0.2966, "step": 39742 }, { "epoch": 4.0405652704351365, "grad_norm": 0.2751980721950531, "learning_rate": 1.0803073908116007e-06, "loss": 0.2586, "step": 39743 }, { "epoch": 4.040666937779585, "grad_norm": 0.29991963505744934, "learning_rate": 1.0800870738192704e-06, "loss": 0.2626, "step": 39744 }, { "epoch": 4.040768605124034, "grad_norm": 0.2807213068008423, "learning_rate": 1.0798667765745268e-06, "loss": 0.285, "step": 39745 }, { "epoch": 4.040870272468483, "grad_norm": 0.29394418001174927, "learning_rate": 1.0796464990784833e-06, "loss": 0.2967, "step": 39746 }, { "epoch": 4.040971939812932, "grad_norm": 0.28203049302101135, "learning_rate": 1.0794262413322514e-06, "loss": 0.3401, "step": 39747 }, { "epoch": 4.041073607157381, "grad_norm": 0.28768157958984375, "learning_rate": 1.0792060033369362e-06, "loss": 0.2619, "step": 39748 }, { "epoch": 4.04117527450183, "grad_norm": 0.305040568113327, "learning_rate": 1.0789857850936503e-06, "loss": 0.27, "step": 39749 }, { "epoch": 4.041276941846279, "grad_norm": 0.2668064832687378, "learning_rate": 1.0787655866035023e-06, "loss": 0.3083, "step": 39750 }, { "epoch": 4.041378609190728, "grad_norm": 0.25718486309051514, "learning_rate": 1.0785454078676006e-06, "loss": 0.2809, "step": 39751 }, { "epoch": 4.041480276535177, "grad_norm": 0.28844988346099854, "learning_rate": 1.0783252488870561e-06, "loss": 0.2911, "step": 39752 }, { "epoch": 4.041581943879626, "grad_norm": 0.2678295075893402, "learning_rate": 1.0781051096629775e-06, "loss": 0.3054, "step": 39753 }, { "epoch": 4.041683611224075, "grad_norm": 0.2778951823711395, "learning_rate": 1.0778849901964727e-06, "loss": 0.3093, "step": 39754 }, { "epoch": 4.041785278568524, "grad_norm": 0.2672460377216339, "learning_rate": 1.0776648904886505e-06, "loss": 0.3064, "step": 39755 }, { "epoch": 4.041886945912973, "grad_norm": 0.28122663497924805, "learning_rate": 1.0774448105406198e-06, "loss": 0.2871, "step": 39756 }, { "epoch": 4.041988613257422, "grad_norm": 0.27590370178222656, "learning_rate": 1.0772247503534938e-06, "loss": 0.2757, "step": 39757 }, { "epoch": 4.042090280601871, "grad_norm": 0.2695065438747406, "learning_rate": 1.0770047099283738e-06, "loss": 0.2851, "step": 39758 }, { "epoch": 4.0421919479463195, "grad_norm": 0.26535359025001526, "learning_rate": 1.0767846892663735e-06, "loss": 0.2931, "step": 39759 }, { "epoch": 4.0422936152907685, "grad_norm": 0.2940661609172821, "learning_rate": 1.0765646883685993e-06, "loss": 0.2849, "step": 39760 }, { "epoch": 4.042395282635217, "grad_norm": 0.25977587699890137, "learning_rate": 1.0763447072361588e-06, "loss": 0.3111, "step": 39761 }, { "epoch": 4.042496949979666, "grad_norm": 0.28915491700172424, "learning_rate": 1.0761247458701623e-06, "loss": 0.2919, "step": 39762 }, { "epoch": 4.042598617324115, "grad_norm": 0.26019832491874695, "learning_rate": 1.0759048042717174e-06, "loss": 0.2962, "step": 39763 }, { "epoch": 4.042700284668564, "grad_norm": 0.27576810121536255, "learning_rate": 1.0756848824419313e-06, "loss": 0.3266, "step": 39764 }, { "epoch": 4.042801952013013, "grad_norm": 0.27455490827560425, "learning_rate": 1.0754649803819106e-06, "loss": 0.286, "step": 39765 }, { "epoch": 4.042903619357462, "grad_norm": 0.2667903006076813, "learning_rate": 1.0752450980927654e-06, "loss": 0.3004, "step": 39766 }, { "epoch": 4.043005286701911, "grad_norm": 0.2821528911590576, "learning_rate": 1.0750252355756047e-06, "loss": 0.3161, "step": 39767 }, { "epoch": 4.04310695404636, "grad_norm": 0.2941545844078064, "learning_rate": 1.0748053928315315e-06, "loss": 0.3053, "step": 39768 }, { "epoch": 4.043208621390809, "grad_norm": 0.27539756894111633, "learning_rate": 1.0745855698616581e-06, "loss": 0.2967, "step": 39769 }, { "epoch": 4.043310288735258, "grad_norm": 0.2889636158943176, "learning_rate": 1.0743657666670892e-06, "loss": 0.3017, "step": 39770 }, { "epoch": 4.043411956079707, "grad_norm": 0.27696627378463745, "learning_rate": 1.0741459832489314e-06, "loss": 0.2575, "step": 39771 }, { "epoch": 4.043513623424156, "grad_norm": 0.27716246247291565, "learning_rate": 1.0739262196082945e-06, "loss": 0.2902, "step": 39772 }, { "epoch": 4.043615290768605, "grad_norm": 0.29547664523124695, "learning_rate": 1.0737064757462846e-06, "loss": 0.296, "step": 39773 }, { "epoch": 4.043716958113054, "grad_norm": 0.29530248045921326, "learning_rate": 1.073486751664008e-06, "loss": 0.2955, "step": 39774 }, { "epoch": 4.0438186254575035, "grad_norm": 0.28168952465057373, "learning_rate": 1.0732670473625705e-06, "loss": 0.2878, "step": 39775 }, { "epoch": 4.043920292801952, "grad_norm": 0.2765531539916992, "learning_rate": 1.0730473628430826e-06, "loss": 0.3323, "step": 39776 }, { "epoch": 4.044021960146401, "grad_norm": 0.28106215596199036, "learning_rate": 1.0728276981066489e-06, "loss": 0.2637, "step": 39777 }, { "epoch": 4.04412362749085, "grad_norm": 0.29288795590400696, "learning_rate": 1.0726080531543737e-06, "loss": 0.3231, "step": 39778 }, { "epoch": 4.044225294835299, "grad_norm": 0.2823050618171692, "learning_rate": 1.072388427987368e-06, "loss": 0.3155, "step": 39779 }, { "epoch": 4.044326962179748, "grad_norm": 0.29770389199256897, "learning_rate": 1.0721688226067361e-06, "loss": 0.2943, "step": 39780 }, { "epoch": 4.044428629524197, "grad_norm": 0.2946813702583313, "learning_rate": 1.0719492370135826e-06, "loss": 0.3008, "step": 39781 }, { "epoch": 4.044530296868646, "grad_norm": 0.2938610315322876, "learning_rate": 1.071729671209017e-06, "loss": 0.2827, "step": 39782 }, { "epoch": 4.044631964213095, "grad_norm": 0.2736428380012512, "learning_rate": 1.0715101251941434e-06, "loss": 0.2914, "step": 39783 }, { "epoch": 4.044733631557544, "grad_norm": 0.29471540451049805, "learning_rate": 1.0712905989700683e-06, "loss": 0.2939, "step": 39784 }, { "epoch": 4.044835298901993, "grad_norm": 0.28624334931373596, "learning_rate": 1.0710710925378964e-06, "loss": 0.2758, "step": 39785 }, { "epoch": 4.044936966246442, "grad_norm": 0.2762000858783722, "learning_rate": 1.0708516058987361e-06, "loss": 0.2766, "step": 39786 }, { "epoch": 4.045038633590891, "grad_norm": 0.29125502705574036, "learning_rate": 1.0706321390536917e-06, "loss": 0.282, "step": 39787 }, { "epoch": 4.04514030093534, "grad_norm": 0.2803501486778259, "learning_rate": 1.0704126920038676e-06, "loss": 0.2949, "step": 39788 }, { "epoch": 4.045241968279789, "grad_norm": 0.26264330744743347, "learning_rate": 1.0701932647503715e-06, "loss": 0.3249, "step": 39789 }, { "epoch": 4.045343635624238, "grad_norm": 0.2931275963783264, "learning_rate": 1.069973857294308e-06, "loss": 0.2828, "step": 39790 }, { "epoch": 4.0454453029686865, "grad_norm": 0.3024100959300995, "learning_rate": 1.0697544696367812e-06, "loss": 0.317, "step": 39791 }, { "epoch": 4.0455469703131355, "grad_norm": 0.30568835139274597, "learning_rate": 1.0695351017788985e-06, "loss": 0.3231, "step": 39792 }, { "epoch": 4.045648637657584, "grad_norm": 0.25964972376823425, "learning_rate": 1.0693157537217641e-06, "loss": 0.285, "step": 39793 }, { "epoch": 4.045750305002033, "grad_norm": 0.30789634585380554, "learning_rate": 1.0690964254664826e-06, "loss": 0.2635, "step": 39794 }, { "epoch": 4.045851972346482, "grad_norm": 0.28322330117225647, "learning_rate": 1.068877117014158e-06, "loss": 0.2749, "step": 39795 }, { "epoch": 4.045953639690931, "grad_norm": 0.2552069127559662, "learning_rate": 1.0686578283658976e-06, "loss": 0.3047, "step": 39796 }, { "epoch": 4.04605530703538, "grad_norm": 0.2662833333015442, "learning_rate": 1.0684385595228046e-06, "loss": 0.3063, "step": 39797 }, { "epoch": 4.046156974379829, "grad_norm": 0.2820882201194763, "learning_rate": 1.0682193104859828e-06, "loss": 0.266, "step": 39798 }, { "epoch": 4.046258641724278, "grad_norm": 0.26890408992767334, "learning_rate": 1.0680000812565393e-06, "loss": 0.2889, "step": 39799 }, { "epoch": 4.046360309068727, "grad_norm": 0.2740161716938019, "learning_rate": 1.067780871835577e-06, "loss": 0.3112, "step": 39800 }, { "epoch": 4.046461976413176, "grad_norm": 0.273078590631485, "learning_rate": 1.0675616822241985e-06, "loss": 0.2779, "step": 39801 }, { "epoch": 4.046563643757625, "grad_norm": 0.2835516631603241, "learning_rate": 1.0673425124235116e-06, "loss": 0.2912, "step": 39802 }, { "epoch": 4.046665311102074, "grad_norm": 0.2877299189567566, "learning_rate": 1.0671233624346179e-06, "loss": 0.2925, "step": 39803 }, { "epoch": 4.046766978446523, "grad_norm": 0.2877176105976105, "learning_rate": 1.0669042322586227e-06, "loss": 0.343, "step": 39804 }, { "epoch": 4.046868645790972, "grad_norm": 0.2866920828819275, "learning_rate": 1.0666851218966278e-06, "loss": 0.3057, "step": 39805 }, { "epoch": 4.046970313135421, "grad_norm": 0.27593937516212463, "learning_rate": 1.06646603134974e-06, "loss": 0.3037, "step": 39806 }, { "epoch": 4.04707198047987, "grad_norm": 0.27659061551094055, "learning_rate": 1.0662469606190612e-06, "loss": 0.2927, "step": 39807 }, { "epoch": 4.0471736478243185, "grad_norm": 0.2551906704902649, "learning_rate": 1.0660279097056942e-06, "loss": 0.3193, "step": 39808 }, { "epoch": 4.0472753151687675, "grad_norm": 0.2798866927623749, "learning_rate": 1.065808878610745e-06, "loss": 0.2951, "step": 39809 }, { "epoch": 4.047376982513216, "grad_norm": 0.2821102440357208, "learning_rate": 1.065589867335316e-06, "loss": 0.3216, "step": 39810 }, { "epoch": 4.047478649857665, "grad_norm": 0.261430025100708, "learning_rate": 1.0653708758805093e-06, "loss": 0.3519, "step": 39811 }, { "epoch": 4.047580317202114, "grad_norm": 0.30148059129714966, "learning_rate": 1.0651519042474302e-06, "loss": 0.2739, "step": 39812 }, { "epoch": 4.047681984546563, "grad_norm": 0.27645257115364075, "learning_rate": 1.0649329524371804e-06, "loss": 0.3102, "step": 39813 }, { "epoch": 4.047783651891012, "grad_norm": 0.27066364884376526, "learning_rate": 1.0647140204508633e-06, "loss": 0.2816, "step": 39814 }, { "epoch": 4.047885319235461, "grad_norm": 0.27047815918922424, "learning_rate": 1.0644951082895804e-06, "loss": 0.2629, "step": 39815 }, { "epoch": 4.047986986579911, "grad_norm": 0.28084596991539, "learning_rate": 1.064276215954438e-06, "loss": 0.3054, "step": 39816 }, { "epoch": 4.04808865392436, "grad_norm": 0.2637982666492462, "learning_rate": 1.0640573434465361e-06, "loss": 0.2795, "step": 39817 }, { "epoch": 4.048190321268809, "grad_norm": 0.28761786222457886, "learning_rate": 1.0638384907669768e-06, "loss": 0.2917, "step": 39818 }, { "epoch": 4.048291988613258, "grad_norm": 0.2947273552417755, "learning_rate": 1.063619657916865e-06, "loss": 0.304, "step": 39819 }, { "epoch": 4.048393655957707, "grad_norm": 0.2661297619342804, "learning_rate": 1.0634008448973022e-06, "loss": 0.278, "step": 39820 }, { "epoch": 4.048495323302156, "grad_norm": 0.26222145557403564, "learning_rate": 1.0631820517093889e-06, "loss": 0.2879, "step": 39821 }, { "epoch": 4.048596990646605, "grad_norm": 0.26734355092048645, "learning_rate": 1.0629632783542305e-06, "loss": 0.3226, "step": 39822 }, { "epoch": 4.0486986579910536, "grad_norm": 0.27105236053466797, "learning_rate": 1.062744524832927e-06, "loss": 0.3108, "step": 39823 }, { "epoch": 4.0488003253355025, "grad_norm": 0.27628815174102783, "learning_rate": 1.0625257911465814e-06, "loss": 0.2913, "step": 39824 }, { "epoch": 4.048901992679951, "grad_norm": 0.30348309874534607, "learning_rate": 1.0623070772962935e-06, "loss": 0.2969, "step": 39825 }, { "epoch": 4.0490036600244, "grad_norm": 0.28928276896476746, "learning_rate": 1.0620883832831685e-06, "loss": 0.2791, "step": 39826 }, { "epoch": 4.049105327368849, "grad_norm": 0.28657692670822144, "learning_rate": 1.0618697091083064e-06, "loss": 0.2887, "step": 39827 }, { "epoch": 4.049206994713298, "grad_norm": 0.27393364906311035, "learning_rate": 1.0616510547728077e-06, "loss": 0.303, "step": 39828 }, { "epoch": 4.049308662057747, "grad_norm": 0.283905565738678, "learning_rate": 1.061432420277776e-06, "loss": 0.3123, "step": 39829 }, { "epoch": 4.049410329402196, "grad_norm": 0.30722686648368835, "learning_rate": 1.061213805624312e-06, "loss": 0.2871, "step": 39830 }, { "epoch": 4.049511996746645, "grad_norm": 0.25855937600135803, "learning_rate": 1.0609952108135153e-06, "loss": 0.2784, "step": 39831 }, { "epoch": 4.049613664091094, "grad_norm": 0.2813783288002014, "learning_rate": 1.0607766358464906e-06, "loss": 0.31, "step": 39832 }, { "epoch": 4.049715331435543, "grad_norm": 0.2844272255897522, "learning_rate": 1.0605580807243365e-06, "loss": 0.2777, "step": 39833 }, { "epoch": 4.049816998779992, "grad_norm": 0.27867206931114197, "learning_rate": 1.0603395454481546e-06, "loss": 0.3023, "step": 39834 }, { "epoch": 4.049918666124441, "grad_norm": 0.25982746481895447, "learning_rate": 1.0601210300190452e-06, "loss": 0.291, "step": 39835 }, { "epoch": 4.05002033346889, "grad_norm": 0.2790766954421997, "learning_rate": 1.0599025344381103e-06, "loss": 0.2777, "step": 39836 }, { "epoch": 4.050122000813339, "grad_norm": 0.2754034101963043, "learning_rate": 1.0596840587064504e-06, "loss": 0.3231, "step": 39837 }, { "epoch": 4.050223668157788, "grad_norm": 0.2762453854084015, "learning_rate": 1.0594656028251648e-06, "loss": 0.3008, "step": 39838 }, { "epoch": 4.050325335502237, "grad_norm": 0.29780253767967224, "learning_rate": 1.0592471667953563e-06, "loss": 0.2653, "step": 39839 }, { "epoch": 4.0504270028466856, "grad_norm": 0.29997795820236206, "learning_rate": 1.059028750618124e-06, "loss": 0.2952, "step": 39840 }, { "epoch": 4.0505286701911345, "grad_norm": 0.2815362215042114, "learning_rate": 1.0588103542945671e-06, "loss": 0.3007, "step": 39841 }, { "epoch": 4.050630337535583, "grad_norm": 0.29007741808891296, "learning_rate": 1.058591977825788e-06, "loss": 0.2968, "step": 39842 }, { "epoch": 4.050732004880032, "grad_norm": 0.2750391364097595, "learning_rate": 1.0583736212128865e-06, "loss": 0.3036, "step": 39843 }, { "epoch": 4.050833672224481, "grad_norm": 0.2824925184249878, "learning_rate": 1.058155284456962e-06, "loss": 0.2828, "step": 39844 }, { "epoch": 4.05093533956893, "grad_norm": 0.27830860018730164, "learning_rate": 1.0579369675591127e-06, "loss": 0.2838, "step": 39845 }, { "epoch": 4.051037006913379, "grad_norm": 0.27510228753089905, "learning_rate": 1.0577186705204418e-06, "loss": 0.293, "step": 39846 }, { "epoch": 4.051138674257828, "grad_norm": 0.27344948053359985, "learning_rate": 1.0575003933420468e-06, "loss": 0.2801, "step": 39847 }, { "epoch": 4.051240341602277, "grad_norm": 0.27784425020217896, "learning_rate": 1.0572821360250273e-06, "loss": 0.2755, "step": 39848 }, { "epoch": 4.051342008946726, "grad_norm": 0.29710274934768677, "learning_rate": 1.0570638985704844e-06, "loss": 0.3225, "step": 39849 }, { "epoch": 4.051443676291175, "grad_norm": 0.26905083656311035, "learning_rate": 1.0568456809795168e-06, "loss": 0.3144, "step": 39850 }, { "epoch": 4.051545343635624, "grad_norm": 0.2664101719856262, "learning_rate": 1.0566274832532224e-06, "loss": 0.3326, "step": 39851 }, { "epoch": 4.051647010980073, "grad_norm": 0.28470394015312195, "learning_rate": 1.0564093053927033e-06, "loss": 0.313, "step": 39852 }, { "epoch": 4.051748678324522, "grad_norm": 0.26747071743011475, "learning_rate": 1.0561911473990566e-06, "loss": 0.3228, "step": 39853 }, { "epoch": 4.051850345668971, "grad_norm": 0.2583578824996948, "learning_rate": 1.0559730092733817e-06, "loss": 0.3223, "step": 39854 }, { "epoch": 4.05195201301342, "grad_norm": 0.2857028543949127, "learning_rate": 1.0557548910167758e-06, "loss": 0.2923, "step": 39855 }, { "epoch": 4.052053680357869, "grad_norm": 0.26840242743492126, "learning_rate": 1.0555367926303411e-06, "loss": 0.303, "step": 39856 }, { "epoch": 4.0521553477023184, "grad_norm": 0.2994177043437958, "learning_rate": 1.0553187141151749e-06, "loss": 0.3016, "step": 39857 }, { "epoch": 4.052257015046767, "grad_norm": 0.28143927454948425, "learning_rate": 1.0551006554723742e-06, "loss": 0.2707, "step": 39858 }, { "epoch": 4.052358682391216, "grad_norm": 0.2696428894996643, "learning_rate": 1.0548826167030402e-06, "loss": 0.3266, "step": 39859 }, { "epoch": 4.052460349735665, "grad_norm": 0.28999829292297363, "learning_rate": 1.0546645978082698e-06, "loss": 0.2965, "step": 39860 }, { "epoch": 4.052562017080114, "grad_norm": 0.2812197506427765, "learning_rate": 1.0544465987891605e-06, "loss": 0.2802, "step": 39861 }, { "epoch": 4.052663684424563, "grad_norm": 0.3111051917076111, "learning_rate": 1.0542286196468132e-06, "loss": 0.3354, "step": 39862 }, { "epoch": 4.052765351769012, "grad_norm": 0.2708936631679535, "learning_rate": 1.0540106603823241e-06, "loss": 0.3116, "step": 39863 }, { "epoch": 4.052867019113461, "grad_norm": 0.25556355714797974, "learning_rate": 1.0537927209967918e-06, "loss": 0.2698, "step": 39864 }, { "epoch": 4.05296868645791, "grad_norm": 0.29078251123428345, "learning_rate": 1.0535748014913127e-06, "loss": 0.3079, "step": 39865 }, { "epoch": 4.053070353802359, "grad_norm": 0.2830110192298889, "learning_rate": 1.0533569018669871e-06, "loss": 0.27, "step": 39866 }, { "epoch": 4.053172021146808, "grad_norm": 0.27792444825172424, "learning_rate": 1.0531390221249115e-06, "loss": 0.3104, "step": 39867 }, { "epoch": 4.053273688491257, "grad_norm": 0.27182698249816895, "learning_rate": 1.0529211622661829e-06, "loss": 0.2885, "step": 39868 }, { "epoch": 4.053375355835706, "grad_norm": 0.2768408954143524, "learning_rate": 1.0527033222919014e-06, "loss": 0.2652, "step": 39869 }, { "epoch": 4.053477023180155, "grad_norm": 0.27394160628318787, "learning_rate": 1.0524855022031604e-06, "loss": 0.2943, "step": 39870 }, { "epoch": 4.053578690524604, "grad_norm": 0.27176544070243835, "learning_rate": 1.052267702001059e-06, "loss": 0.2894, "step": 39871 }, { "epoch": 4.053680357869053, "grad_norm": 0.28113028407096863, "learning_rate": 1.0520499216866964e-06, "loss": 0.2891, "step": 39872 }, { "epoch": 4.0537820252135015, "grad_norm": 0.28669601678848267, "learning_rate": 1.0518321612611677e-06, "loss": 0.3338, "step": 39873 }, { "epoch": 4.0538836925579504, "grad_norm": 0.30524420738220215, "learning_rate": 1.0516144207255708e-06, "loss": 0.2852, "step": 39874 }, { "epoch": 4.053985359902399, "grad_norm": 0.2947120666503906, "learning_rate": 1.0513967000810004e-06, "loss": 0.2841, "step": 39875 }, { "epoch": 4.054087027246848, "grad_norm": 0.28167033195495605, "learning_rate": 1.051178999328557e-06, "loss": 0.3071, "step": 39876 }, { "epoch": 4.054188694591297, "grad_norm": 0.2833753824234009, "learning_rate": 1.050961318469335e-06, "loss": 0.274, "step": 39877 }, { "epoch": 4.054290361935746, "grad_norm": 0.2839789390563965, "learning_rate": 1.05074365750443e-06, "loss": 0.3048, "step": 39878 }, { "epoch": 4.054392029280195, "grad_norm": 0.265435129404068, "learning_rate": 1.0505260164349428e-06, "loss": 0.2823, "step": 39879 }, { "epoch": 4.054493696624644, "grad_norm": 0.29791945219039917, "learning_rate": 1.0503083952619646e-06, "loss": 0.2961, "step": 39880 }, { "epoch": 4.054595363969093, "grad_norm": 0.27056801319122314, "learning_rate": 1.050090793986594e-06, "loss": 0.3188, "step": 39881 }, { "epoch": 4.054697031313542, "grad_norm": 0.2898885905742645, "learning_rate": 1.0498732126099282e-06, "loss": 0.2751, "step": 39882 }, { "epoch": 4.054798698657991, "grad_norm": 0.2873091399669647, "learning_rate": 1.049655651133063e-06, "loss": 0.2855, "step": 39883 }, { "epoch": 4.05490036600244, "grad_norm": 0.26738688349723816, "learning_rate": 1.0494381095570937e-06, "loss": 0.316, "step": 39884 }, { "epoch": 4.055002033346889, "grad_norm": 0.2831800878047943, "learning_rate": 1.0492205878831147e-06, "loss": 0.3031, "step": 39885 }, { "epoch": 4.055103700691338, "grad_norm": 0.29432061314582825, "learning_rate": 1.0490030861122253e-06, "loss": 0.2772, "step": 39886 }, { "epoch": 4.055205368035787, "grad_norm": 0.2744937539100647, "learning_rate": 1.0487856042455196e-06, "loss": 0.3164, "step": 39887 }, { "epoch": 4.055307035380236, "grad_norm": 0.2830682694911957, "learning_rate": 1.0485681422840915e-06, "loss": 0.305, "step": 39888 }, { "epoch": 4.055408702724685, "grad_norm": 0.29040417075157166, "learning_rate": 1.0483507002290406e-06, "loss": 0.2909, "step": 39889 }, { "epoch": 4.0555103700691335, "grad_norm": 0.28662368655204773, "learning_rate": 1.0481332780814574e-06, "loss": 0.3304, "step": 39890 }, { "epoch": 4.0556120374135824, "grad_norm": 0.2712498903274536, "learning_rate": 1.0479158758424395e-06, "loss": 0.291, "step": 39891 }, { "epoch": 4.055713704758031, "grad_norm": 0.25712552666664124, "learning_rate": 1.0476984935130852e-06, "loss": 0.2947, "step": 39892 }, { "epoch": 4.05581537210248, "grad_norm": 0.30116981267929077, "learning_rate": 1.0474811310944838e-06, "loss": 0.2524, "step": 39893 }, { "epoch": 4.055917039446929, "grad_norm": 0.29070594906806946, "learning_rate": 1.0472637885877346e-06, "loss": 0.2649, "step": 39894 }, { "epoch": 4.056018706791378, "grad_norm": 0.272061824798584, "learning_rate": 1.0470464659939294e-06, "loss": 0.2911, "step": 39895 }, { "epoch": 4.056120374135827, "grad_norm": 0.2868649363517761, "learning_rate": 1.0468291633141663e-06, "loss": 0.3179, "step": 39896 }, { "epoch": 4.056222041480276, "grad_norm": 0.28139999508857727, "learning_rate": 1.0466118805495384e-06, "loss": 0.2955, "step": 39897 }, { "epoch": 4.056323708824726, "grad_norm": 0.2787071466445923, "learning_rate": 1.0463946177011397e-06, "loss": 0.2849, "step": 39898 }, { "epoch": 4.056425376169175, "grad_norm": 0.27870020270347595, "learning_rate": 1.046177374770067e-06, "loss": 0.2832, "step": 39899 }, { "epoch": 4.056527043513624, "grad_norm": 0.28567901253700256, "learning_rate": 1.0459601517574114e-06, "loss": 0.3043, "step": 39900 }, { "epoch": 4.056628710858073, "grad_norm": 0.3055081367492676, "learning_rate": 1.0457429486642694e-06, "loss": 0.2643, "step": 39901 }, { "epoch": 4.056730378202522, "grad_norm": 0.28512701392173767, "learning_rate": 1.0455257654917356e-06, "loss": 0.2983, "step": 39902 }, { "epoch": 4.056832045546971, "grad_norm": 0.2885115146636963, "learning_rate": 1.0453086022409015e-06, "loss": 0.2769, "step": 39903 }, { "epoch": 4.05693371289142, "grad_norm": 0.2705758512020111, "learning_rate": 1.045091458912864e-06, "loss": 0.3238, "step": 39904 }, { "epoch": 4.0570353802358685, "grad_norm": 0.28912025690078735, "learning_rate": 1.0448743355087165e-06, "loss": 0.2846, "step": 39905 }, { "epoch": 4.0571370475803175, "grad_norm": 0.2715318202972412, "learning_rate": 1.0446572320295517e-06, "loss": 0.266, "step": 39906 }, { "epoch": 4.057238714924766, "grad_norm": 0.28829464316368103, "learning_rate": 1.0444401484764627e-06, "loss": 0.282, "step": 39907 }, { "epoch": 4.057340382269215, "grad_norm": 0.28026899695396423, "learning_rate": 1.044223084850544e-06, "loss": 0.3132, "step": 39908 }, { "epoch": 4.057442049613664, "grad_norm": 0.26473698019981384, "learning_rate": 1.0440060411528925e-06, "loss": 0.3086, "step": 39909 }, { "epoch": 4.057543716958113, "grad_norm": 0.31683725118637085, "learning_rate": 1.0437890173845955e-06, "loss": 0.2786, "step": 39910 }, { "epoch": 4.057645384302562, "grad_norm": 0.2681431472301483, "learning_rate": 1.043572013546751e-06, "loss": 0.2819, "step": 39911 }, { "epoch": 4.057747051647011, "grad_norm": 0.2802736163139343, "learning_rate": 1.04335502964045e-06, "loss": 0.2758, "step": 39912 }, { "epoch": 4.05784871899146, "grad_norm": 0.2887003421783447, "learning_rate": 1.043138065666785e-06, "loss": 0.3166, "step": 39913 }, { "epoch": 4.057950386335909, "grad_norm": 0.2885763347148895, "learning_rate": 1.0429211216268515e-06, "loss": 0.2873, "step": 39914 }, { "epoch": 4.058052053680358, "grad_norm": 0.29969075322151184, "learning_rate": 1.0427041975217406e-06, "loss": 0.2857, "step": 39915 }, { "epoch": 4.058153721024807, "grad_norm": 0.27089130878448486, "learning_rate": 1.0424872933525458e-06, "loss": 0.2956, "step": 39916 }, { "epoch": 4.058255388369256, "grad_norm": 0.28122133016586304, "learning_rate": 1.042270409120359e-06, "loss": 0.277, "step": 39917 }, { "epoch": 4.058357055713705, "grad_norm": 0.28524619340896606, "learning_rate": 1.0420535448262726e-06, "loss": 0.3026, "step": 39918 }, { "epoch": 4.058458723058154, "grad_norm": 0.27627936005592346, "learning_rate": 1.0418367004713826e-06, "loss": 0.2925, "step": 39919 }, { "epoch": 4.058560390402603, "grad_norm": 0.29686224460601807, "learning_rate": 1.0416198760567758e-06, "loss": 0.2802, "step": 39920 }, { "epoch": 4.058662057747052, "grad_norm": 0.2800413966178894, "learning_rate": 1.0414030715835493e-06, "loss": 0.3303, "step": 39921 }, { "epoch": 4.0587637250915005, "grad_norm": 0.28134676814079285, "learning_rate": 1.0411862870527934e-06, "loss": 0.2988, "step": 39922 }, { "epoch": 4.0588653924359495, "grad_norm": 0.2801326811313629, "learning_rate": 1.0409695224655986e-06, "loss": 0.2452, "step": 39923 }, { "epoch": 4.058967059780398, "grad_norm": 0.2940775752067566, "learning_rate": 1.0407527778230598e-06, "loss": 0.3183, "step": 39924 }, { "epoch": 4.059068727124847, "grad_norm": 0.30284228920936584, "learning_rate": 1.0405360531262677e-06, "loss": 0.3012, "step": 39925 }, { "epoch": 4.059170394469296, "grad_norm": 0.28459274768829346, "learning_rate": 1.0403193483763142e-06, "loss": 0.2861, "step": 39926 }, { "epoch": 4.059272061813745, "grad_norm": 0.28794893622398376, "learning_rate": 1.0401026635742894e-06, "loss": 0.3021, "step": 39927 }, { "epoch": 4.059373729158194, "grad_norm": 0.2462320774793625, "learning_rate": 1.0398859987212877e-06, "loss": 0.2556, "step": 39928 }, { "epoch": 4.059475396502643, "grad_norm": 0.2768247127532959, "learning_rate": 1.0396693538183995e-06, "loss": 0.3027, "step": 39929 }, { "epoch": 4.059577063847092, "grad_norm": 0.2693859338760376, "learning_rate": 1.0394527288667144e-06, "loss": 0.2916, "step": 39930 }, { "epoch": 4.059678731191541, "grad_norm": 0.27946993708610535, "learning_rate": 1.0392361238673266e-06, "loss": 0.2759, "step": 39931 }, { "epoch": 4.05978039853599, "grad_norm": 0.2763388454914093, "learning_rate": 1.0390195388213264e-06, "loss": 0.3019, "step": 39932 }, { "epoch": 4.059882065880439, "grad_norm": 0.2782929539680481, "learning_rate": 1.0388029737298027e-06, "loss": 0.2766, "step": 39933 }, { "epoch": 4.059983733224888, "grad_norm": 0.2648871839046478, "learning_rate": 1.0385864285938497e-06, "loss": 0.2943, "step": 39934 }, { "epoch": 4.060085400569337, "grad_norm": 0.2619757056236267, "learning_rate": 1.0383699034145573e-06, "loss": 0.3168, "step": 39935 }, { "epoch": 4.060187067913786, "grad_norm": 0.2941977083683014, "learning_rate": 1.0381533981930153e-06, "loss": 0.2986, "step": 39936 }, { "epoch": 4.060288735258235, "grad_norm": 0.2868233323097229, "learning_rate": 1.037936912930314e-06, "loss": 0.2806, "step": 39937 }, { "epoch": 4.060390402602684, "grad_norm": 0.2809096574783325, "learning_rate": 1.037720447627546e-06, "loss": 0.3386, "step": 39938 }, { "epoch": 4.060492069947133, "grad_norm": 0.2853536009788513, "learning_rate": 1.0375040022858013e-06, "loss": 0.282, "step": 39939 }, { "epoch": 4.060593737291582, "grad_norm": 0.27340662479400635, "learning_rate": 1.037287576906168e-06, "loss": 0.349, "step": 39940 }, { "epoch": 4.060695404636031, "grad_norm": 0.2785567045211792, "learning_rate": 1.0370711714897398e-06, "loss": 0.288, "step": 39941 }, { "epoch": 4.06079707198048, "grad_norm": 0.3011070191860199, "learning_rate": 1.0368547860376054e-06, "loss": 0.2817, "step": 39942 }, { "epoch": 4.060898739324929, "grad_norm": 0.29613256454467773, "learning_rate": 1.0366384205508535e-06, "loss": 0.3036, "step": 39943 }, { "epoch": 4.061000406669378, "grad_norm": 0.40264397859573364, "learning_rate": 1.0364220750305765e-06, "loss": 0.2826, "step": 39944 }, { "epoch": 4.061102074013827, "grad_norm": 0.27082473039627075, "learning_rate": 1.0362057494778633e-06, "loss": 0.2902, "step": 39945 }, { "epoch": 4.061203741358276, "grad_norm": 0.287702351808548, "learning_rate": 1.0359894438938034e-06, "loss": 0.3001, "step": 39946 }, { "epoch": 4.061305408702725, "grad_norm": 0.28318801522254944, "learning_rate": 1.0357731582794857e-06, "loss": 0.2922, "step": 39947 }, { "epoch": 4.061407076047174, "grad_norm": 0.2614520192146301, "learning_rate": 1.0355568926360017e-06, "loss": 0.2839, "step": 39948 }, { "epoch": 4.061508743391623, "grad_norm": 0.25841212272644043, "learning_rate": 1.03534064696444e-06, "loss": 0.3129, "step": 39949 }, { "epoch": 4.061610410736072, "grad_norm": 0.29857760667800903, "learning_rate": 1.035124421265889e-06, "loss": 0.2823, "step": 39950 }, { "epoch": 4.061712078080521, "grad_norm": 0.2986137568950653, "learning_rate": 1.0349082155414403e-06, "loss": 0.2873, "step": 39951 }, { "epoch": 4.06181374542497, "grad_norm": 0.2681593596935272, "learning_rate": 1.0346920297921814e-06, "loss": 0.2735, "step": 39952 }, { "epoch": 4.061915412769419, "grad_norm": 0.2598749101161957, "learning_rate": 1.0344758640192005e-06, "loss": 0.289, "step": 39953 }, { "epoch": 4.0620170801138675, "grad_norm": 0.3032483160495758, "learning_rate": 1.0342597182235898e-06, "loss": 0.2812, "step": 39954 }, { "epoch": 4.0621187474583165, "grad_norm": 0.31258806586265564, "learning_rate": 1.0340435924064351e-06, "loss": 0.2854, "step": 39955 }, { "epoch": 4.062220414802765, "grad_norm": 0.28089800477027893, "learning_rate": 1.0338274865688274e-06, "loss": 0.2869, "step": 39956 }, { "epoch": 4.062322082147214, "grad_norm": 0.30138731002807617, "learning_rate": 1.0336114007118524e-06, "loss": 0.3066, "step": 39957 }, { "epoch": 4.062423749491663, "grad_norm": 0.2778632640838623, "learning_rate": 1.033395334836602e-06, "loss": 0.2902, "step": 39958 }, { "epoch": 4.062525416836112, "grad_norm": 0.30574682354927063, "learning_rate": 1.0331792889441632e-06, "loss": 0.2508, "step": 39959 }, { "epoch": 4.062627084180561, "grad_norm": 0.27224215865135193, "learning_rate": 1.0329632630356234e-06, "loss": 0.3035, "step": 39960 }, { "epoch": 4.06272875152501, "grad_norm": 0.27409079670906067, "learning_rate": 1.0327472571120733e-06, "loss": 0.3254, "step": 39961 }, { "epoch": 4.062830418869459, "grad_norm": 0.2836087942123413, "learning_rate": 1.0325312711745999e-06, "loss": 0.2737, "step": 39962 }, { "epoch": 4.062932086213908, "grad_norm": 0.27712735533714294, "learning_rate": 1.0323153052242897e-06, "loss": 0.3201, "step": 39963 }, { "epoch": 4.063033753558357, "grad_norm": 0.3028501868247986, "learning_rate": 1.032099359262233e-06, "loss": 0.3054, "step": 39964 }, { "epoch": 4.063135420902806, "grad_norm": 0.26275375485420227, "learning_rate": 1.0318834332895173e-06, "loss": 0.3129, "step": 39965 }, { "epoch": 4.063237088247255, "grad_norm": 0.30106595158576965, "learning_rate": 1.0316675273072297e-06, "loss": 0.309, "step": 39966 }, { "epoch": 4.063338755591704, "grad_norm": 0.2809959053993225, "learning_rate": 1.0314516413164566e-06, "loss": 0.3032, "step": 39967 }, { "epoch": 4.063440422936153, "grad_norm": 0.2748733162879944, "learning_rate": 1.031235775318289e-06, "loss": 0.3249, "step": 39968 }, { "epoch": 4.063542090280602, "grad_norm": 0.282380074262619, "learning_rate": 1.0310199293138119e-06, "loss": 0.2593, "step": 39969 }, { "epoch": 4.063643757625051, "grad_norm": 0.25495296716690063, "learning_rate": 1.0308041033041116e-06, "loss": 0.3121, "step": 39970 }, { "epoch": 4.0637454249694995, "grad_norm": 0.2680279016494751, "learning_rate": 1.0305882972902792e-06, "loss": 0.2922, "step": 39971 }, { "epoch": 4.0638470923139485, "grad_norm": 0.2835122346878052, "learning_rate": 1.0303725112733992e-06, "loss": 0.294, "step": 39972 }, { "epoch": 4.063948759658397, "grad_norm": 0.36321502923965454, "learning_rate": 1.0301567452545575e-06, "loss": 0.2887, "step": 39973 }, { "epoch": 4.064050427002846, "grad_norm": 0.2706884443759918, "learning_rate": 1.0299409992348446e-06, "loss": 0.2946, "step": 39974 }, { "epoch": 4.064152094347295, "grad_norm": 0.29444852471351624, "learning_rate": 1.0297252732153452e-06, "loss": 0.323, "step": 39975 }, { "epoch": 4.064253761691744, "grad_norm": 0.26650750637054443, "learning_rate": 1.0295095671971466e-06, "loss": 0.3141, "step": 39976 }, { "epoch": 4.064355429036193, "grad_norm": 0.304726779460907, "learning_rate": 1.029293881181334e-06, "loss": 0.2723, "step": 39977 }, { "epoch": 4.064457096380642, "grad_norm": 0.26736319065093994, "learning_rate": 1.0290782151689965e-06, "loss": 0.2895, "step": 39978 }, { "epoch": 4.064558763725092, "grad_norm": 0.29173046350479126, "learning_rate": 1.0288625691612192e-06, "loss": 0.3084, "step": 39979 }, { "epoch": 4.064660431069541, "grad_norm": 0.2740989327430725, "learning_rate": 1.0286469431590873e-06, "loss": 0.2979, "step": 39980 }, { "epoch": 4.06476209841399, "grad_norm": 0.290499746799469, "learning_rate": 1.0284313371636896e-06, "loss": 0.2911, "step": 39981 }, { "epoch": 4.064863765758439, "grad_norm": 0.2786736786365509, "learning_rate": 1.0282157511761109e-06, "loss": 0.2996, "step": 39982 }, { "epoch": 4.064965433102888, "grad_norm": 0.2857653796672821, "learning_rate": 1.0280001851974365e-06, "loss": 0.3137, "step": 39983 }, { "epoch": 4.065067100447337, "grad_norm": 0.27834218740463257, "learning_rate": 1.0277846392287544e-06, "loss": 0.3086, "step": 39984 }, { "epoch": 4.065168767791786, "grad_norm": 0.3263479769229889, "learning_rate": 1.0275691132711496e-06, "loss": 0.2898, "step": 39985 }, { "epoch": 4.0652704351362345, "grad_norm": 0.2835688889026642, "learning_rate": 1.0273536073257073e-06, "loss": 0.2678, "step": 39986 }, { "epoch": 4.0653721024806835, "grad_norm": 0.27561429142951965, "learning_rate": 1.0271381213935122e-06, "loss": 0.2909, "step": 39987 }, { "epoch": 4.065473769825132, "grad_norm": 0.27846264839172363, "learning_rate": 1.0269226554756522e-06, "loss": 0.2944, "step": 39988 }, { "epoch": 4.065575437169581, "grad_norm": 0.2739279270172119, "learning_rate": 1.0267072095732122e-06, "loss": 0.3136, "step": 39989 }, { "epoch": 4.06567710451403, "grad_norm": 0.279078871011734, "learning_rate": 1.0264917836872757e-06, "loss": 0.3356, "step": 39990 }, { "epoch": 4.065778771858479, "grad_norm": 0.29719278216362, "learning_rate": 1.0262763778189305e-06, "loss": 0.3067, "step": 39991 }, { "epoch": 4.065880439202928, "grad_norm": 0.25485318899154663, "learning_rate": 1.0260609919692605e-06, "loss": 0.2957, "step": 39992 }, { "epoch": 4.065982106547377, "grad_norm": 0.28614094853401184, "learning_rate": 1.0258456261393495e-06, "loss": 0.3025, "step": 39993 }, { "epoch": 4.066083773891826, "grad_norm": 0.2759406268596649, "learning_rate": 1.0256302803302858e-06, "loss": 0.3076, "step": 39994 }, { "epoch": 4.066185441236275, "grad_norm": 0.2916293740272522, "learning_rate": 1.0254149545431518e-06, "loss": 0.2726, "step": 39995 }, { "epoch": 4.066287108580724, "grad_norm": 0.28611811995506287, "learning_rate": 1.0251996487790329e-06, "loss": 0.2778, "step": 39996 }, { "epoch": 4.066388775925173, "grad_norm": 0.27346718311309814, "learning_rate": 1.024984363039012e-06, "loss": 0.2923, "step": 39997 }, { "epoch": 4.066490443269622, "grad_norm": 0.2762550413608551, "learning_rate": 1.024769097324177e-06, "loss": 0.2674, "step": 39998 }, { "epoch": 4.066592110614071, "grad_norm": 0.274111270904541, "learning_rate": 1.0245538516356102e-06, "loss": 0.2974, "step": 39999 }, { "epoch": 4.06669377795852, "grad_norm": 0.2819819152355194, "learning_rate": 1.0243386259743953e-06, "loss": 0.2922, "step": 40000 }, { "epoch": 4.066795445302969, "grad_norm": 0.2667030394077301, "learning_rate": 1.0241234203416189e-06, "loss": 0.3024, "step": 40001 }, { "epoch": 4.066897112647418, "grad_norm": 0.2668111026287079, "learning_rate": 1.0239082347383638e-06, "loss": 0.3051, "step": 40002 }, { "epoch": 4.0669987799918665, "grad_norm": 0.3051236569881439, "learning_rate": 1.0236930691657133e-06, "loss": 0.2986, "step": 40003 }, { "epoch": 4.0671004473363155, "grad_norm": 0.2976450026035309, "learning_rate": 1.0234779236247527e-06, "loss": 0.3126, "step": 40004 }, { "epoch": 4.067202114680764, "grad_norm": 0.2830226719379425, "learning_rate": 1.023262798116566e-06, "loss": 0.278, "step": 40005 }, { "epoch": 4.067303782025213, "grad_norm": 0.27720487117767334, "learning_rate": 1.023047692642236e-06, "loss": 0.3214, "step": 40006 }, { "epoch": 4.067405449369662, "grad_norm": 0.25731581449508667, "learning_rate": 1.0228326072028455e-06, "loss": 0.3017, "step": 40007 }, { "epoch": 4.067507116714111, "grad_norm": 0.27965208888053894, "learning_rate": 1.0226175417994805e-06, "loss": 0.2774, "step": 40008 }, { "epoch": 4.06760878405856, "grad_norm": 0.2833119034767151, "learning_rate": 1.022402496433223e-06, "loss": 0.2928, "step": 40009 }, { "epoch": 4.067710451403009, "grad_norm": 0.2934032082557678, "learning_rate": 1.0221874711051554e-06, "loss": 0.3233, "step": 40010 }, { "epoch": 4.067812118747458, "grad_norm": 0.26826608180999756, "learning_rate": 1.0219724658163632e-06, "loss": 0.2862, "step": 40011 }, { "epoch": 4.067913786091907, "grad_norm": 0.2820212244987488, "learning_rate": 1.0217574805679286e-06, "loss": 0.3325, "step": 40012 }, { "epoch": 4.068015453436356, "grad_norm": 0.2688945531845093, "learning_rate": 1.0215425153609333e-06, "loss": 0.3012, "step": 40013 }, { "epoch": 4.068117120780805, "grad_norm": 0.28828513622283936, "learning_rate": 1.021327570196462e-06, "loss": 0.301, "step": 40014 }, { "epoch": 4.068218788125254, "grad_norm": 0.26847901940345764, "learning_rate": 1.0211126450755976e-06, "loss": 0.2851, "step": 40015 }, { "epoch": 4.068320455469703, "grad_norm": 0.2707313001155853, "learning_rate": 1.0208977399994218e-06, "loss": 0.2899, "step": 40016 }, { "epoch": 4.068422122814152, "grad_norm": 0.2757517993450165, "learning_rate": 1.0206828549690162e-06, "loss": 0.266, "step": 40017 }, { "epoch": 4.068523790158601, "grad_norm": 0.2992912828922272, "learning_rate": 1.0204679899854663e-06, "loss": 0.2892, "step": 40018 }, { "epoch": 4.06862545750305, "grad_norm": 0.2745789587497711, "learning_rate": 1.0202531450498532e-06, "loss": 0.3045, "step": 40019 }, { "epoch": 4.0687271248474985, "grad_norm": 0.2653890550136566, "learning_rate": 1.0200383201632574e-06, "loss": 0.3013, "step": 40020 }, { "epoch": 4.068828792191948, "grad_norm": 0.29768019914627075, "learning_rate": 1.0198235153267654e-06, "loss": 0.2876, "step": 40021 }, { "epoch": 4.068930459536397, "grad_norm": 0.29809075593948364, "learning_rate": 1.0196087305414538e-06, "loss": 0.2932, "step": 40022 }, { "epoch": 4.069032126880846, "grad_norm": 0.27248913049697876, "learning_rate": 1.0193939658084078e-06, "loss": 0.2981, "step": 40023 }, { "epoch": 4.069133794225295, "grad_norm": 0.2826279401779175, "learning_rate": 1.0191792211287105e-06, "loss": 0.3223, "step": 40024 }, { "epoch": 4.069235461569744, "grad_norm": 0.2868294417858124, "learning_rate": 1.0189644965034424e-06, "loss": 0.3149, "step": 40025 }, { "epoch": 4.069337128914193, "grad_norm": 0.269715279340744, "learning_rate": 1.0187497919336848e-06, "loss": 0.2838, "step": 40026 }, { "epoch": 4.069438796258642, "grad_norm": 0.2712537348270416, "learning_rate": 1.0185351074205185e-06, "loss": 0.3158, "step": 40027 }, { "epoch": 4.069540463603091, "grad_norm": 0.25103500485420227, "learning_rate": 1.0183204429650273e-06, "loss": 0.3256, "step": 40028 }, { "epoch": 4.06964213094754, "grad_norm": 0.2828768789768219, "learning_rate": 1.0181057985682919e-06, "loss": 0.2994, "step": 40029 }, { "epoch": 4.069743798291989, "grad_norm": 0.27237483859062195, "learning_rate": 1.0178911742313912e-06, "loss": 0.2638, "step": 40030 }, { "epoch": 4.069845465636438, "grad_norm": 0.2922429144382477, "learning_rate": 1.017676569955412e-06, "loss": 0.2989, "step": 40031 }, { "epoch": 4.069947132980887, "grad_norm": 0.2695980966091156, "learning_rate": 1.0174619857414285e-06, "loss": 0.2857, "step": 40032 }, { "epoch": 4.070048800325336, "grad_norm": 0.29066139459609985, "learning_rate": 1.017247421590526e-06, "loss": 0.301, "step": 40033 }, { "epoch": 4.070150467669785, "grad_norm": 0.2746647596359253, "learning_rate": 1.0170328775037852e-06, "loss": 0.2977, "step": 40034 }, { "epoch": 4.0702521350142336, "grad_norm": 0.2705981433391571, "learning_rate": 1.0168183534822863e-06, "loss": 0.2972, "step": 40035 }, { "epoch": 4.0703538023586825, "grad_norm": 0.2646860182285309, "learning_rate": 1.0166038495271103e-06, "loss": 0.2769, "step": 40036 }, { "epoch": 4.070455469703131, "grad_norm": 0.2764946222305298, "learning_rate": 1.0163893656393358e-06, "loss": 0.2866, "step": 40037 }, { "epoch": 4.07055713704758, "grad_norm": 0.2790185511112213, "learning_rate": 1.0161749018200467e-06, "loss": 0.3164, "step": 40038 }, { "epoch": 4.070658804392029, "grad_norm": 0.263276070356369, "learning_rate": 1.0159604580703213e-06, "loss": 0.312, "step": 40039 }, { "epoch": 4.070760471736478, "grad_norm": 0.2983773648738861, "learning_rate": 1.0157460343912396e-06, "loss": 0.2586, "step": 40040 }, { "epoch": 4.070862139080927, "grad_norm": 0.2805072069168091, "learning_rate": 1.0155316307838848e-06, "loss": 0.3202, "step": 40041 }, { "epoch": 4.070963806425376, "grad_norm": 0.3139965832233429, "learning_rate": 1.0153172472493322e-06, "loss": 0.3125, "step": 40042 }, { "epoch": 4.071065473769825, "grad_norm": 0.28239813446998596, "learning_rate": 1.0151028837886645e-06, "loss": 0.3193, "step": 40043 }, { "epoch": 4.071167141114274, "grad_norm": 0.2954712510108948, "learning_rate": 1.014888540402964e-06, "loss": 0.2621, "step": 40044 }, { "epoch": 4.071268808458723, "grad_norm": 0.27685242891311646, "learning_rate": 1.0146742170933055e-06, "loss": 0.2799, "step": 40045 }, { "epoch": 4.071370475803172, "grad_norm": 0.27654990553855896, "learning_rate": 1.0144599138607724e-06, "loss": 0.317, "step": 40046 }, { "epoch": 4.071472143147621, "grad_norm": 0.2717988193035126, "learning_rate": 1.0142456307064425e-06, "loss": 0.2958, "step": 40047 }, { "epoch": 4.07157381049207, "grad_norm": 0.2639717161655426, "learning_rate": 1.0140313676313967e-06, "loss": 0.2849, "step": 40048 }, { "epoch": 4.071675477836519, "grad_norm": 0.26311346888542175, "learning_rate": 1.0138171246367134e-06, "loss": 0.3051, "step": 40049 }, { "epoch": 4.071777145180968, "grad_norm": 0.29208505153656006, "learning_rate": 1.0136029017234716e-06, "loss": 0.2871, "step": 40050 }, { "epoch": 4.071878812525417, "grad_norm": 0.3039107024669647, "learning_rate": 1.0133886988927532e-06, "loss": 0.2763, "step": 40051 }, { "epoch": 4.0719804798698656, "grad_norm": 0.28928059339523315, "learning_rate": 1.0131745161456325e-06, "loss": 0.3085, "step": 40052 }, { "epoch": 4.0720821472143145, "grad_norm": 0.26450610160827637, "learning_rate": 1.0129603534831912e-06, "loss": 0.3156, "step": 40053 }, { "epoch": 4.072183814558763, "grad_norm": 0.27596306800842285, "learning_rate": 1.0127462109065112e-06, "loss": 0.2717, "step": 40054 }, { "epoch": 4.072285481903212, "grad_norm": 0.29842495918273926, "learning_rate": 1.012532088416665e-06, "loss": 0.2634, "step": 40055 }, { "epoch": 4.072387149247661, "grad_norm": 0.28056254982948303, "learning_rate": 1.0123179860147363e-06, "loss": 0.3082, "step": 40056 }, { "epoch": 4.07248881659211, "grad_norm": 0.2742289900779724, "learning_rate": 1.012103903701802e-06, "loss": 0.3011, "step": 40057 }, { "epoch": 4.072590483936559, "grad_norm": 0.2857321500778198, "learning_rate": 1.011889841478939e-06, "loss": 0.2862, "step": 40058 }, { "epoch": 4.072692151281008, "grad_norm": 0.28040915727615356, "learning_rate": 1.0116757993472286e-06, "loss": 0.2913, "step": 40059 }, { "epoch": 4.072793818625457, "grad_norm": 0.2648649215698242, "learning_rate": 1.0114617773077467e-06, "loss": 0.309, "step": 40060 }, { "epoch": 4.072895485969907, "grad_norm": 0.2758573591709137, "learning_rate": 1.0112477753615752e-06, "loss": 0.2675, "step": 40061 }, { "epoch": 4.072997153314356, "grad_norm": 0.28846198320388794, "learning_rate": 1.0110337935097864e-06, "loss": 0.282, "step": 40062 }, { "epoch": 4.073098820658805, "grad_norm": 0.3027898371219635, "learning_rate": 1.0108198317534618e-06, "loss": 0.2967, "step": 40063 }, { "epoch": 4.073200488003254, "grad_norm": 0.27582523226737976, "learning_rate": 1.0106058900936815e-06, "loss": 0.2839, "step": 40064 }, { "epoch": 4.073302155347703, "grad_norm": 0.2872450649738312, "learning_rate": 1.0103919685315178e-06, "loss": 0.2931, "step": 40065 }, { "epoch": 4.073403822692152, "grad_norm": 0.28268176317214966, "learning_rate": 1.0101780670680534e-06, "loss": 0.2714, "step": 40066 }, { "epoch": 4.073505490036601, "grad_norm": 0.3020945191383362, "learning_rate": 1.009964185704363e-06, "loss": 0.2962, "step": 40067 }, { "epoch": 4.0736071573810495, "grad_norm": 0.2971512973308563, "learning_rate": 1.0097503244415236e-06, "loss": 0.3092, "step": 40068 }, { "epoch": 4.0737088247254984, "grad_norm": 0.26453113555908203, "learning_rate": 1.0095364832806154e-06, "loss": 0.2673, "step": 40069 }, { "epoch": 4.073810492069947, "grad_norm": 0.2764708399772644, "learning_rate": 1.0093226622227126e-06, "loss": 0.2829, "step": 40070 }, { "epoch": 4.073912159414396, "grad_norm": 0.2904902994632721, "learning_rate": 1.0091088612688966e-06, "loss": 0.2801, "step": 40071 }, { "epoch": 4.074013826758845, "grad_norm": 0.2730455696582794, "learning_rate": 1.008895080420239e-06, "loss": 0.3006, "step": 40072 }, { "epoch": 4.074115494103294, "grad_norm": 0.27921241521835327, "learning_rate": 1.00868131967782e-06, "loss": 0.2976, "step": 40073 }, { "epoch": 4.074217161447743, "grad_norm": 0.2993546724319458, "learning_rate": 1.0084675790427184e-06, "loss": 0.2773, "step": 40074 }, { "epoch": 4.074318828792192, "grad_norm": 0.2796405553817749, "learning_rate": 1.0082538585160062e-06, "loss": 0.3114, "step": 40075 }, { "epoch": 4.074420496136641, "grad_norm": 0.2610234022140503, "learning_rate": 1.0080401580987637e-06, "loss": 0.286, "step": 40076 }, { "epoch": 4.07452216348109, "grad_norm": 0.281859427690506, "learning_rate": 1.007826477792066e-06, "loss": 0.2832, "step": 40077 }, { "epoch": 4.074623830825539, "grad_norm": 0.2519349157810211, "learning_rate": 1.0076128175969885e-06, "loss": 0.2873, "step": 40078 }, { "epoch": 4.074725498169988, "grad_norm": 0.283176064491272, "learning_rate": 1.0073991775146102e-06, "loss": 0.2786, "step": 40079 }, { "epoch": 4.074827165514437, "grad_norm": 0.27414679527282715, "learning_rate": 1.007185557546006e-06, "loss": 0.2973, "step": 40080 }, { "epoch": 4.074928832858886, "grad_norm": 0.28489184379577637, "learning_rate": 1.0069719576922526e-06, "loss": 0.2727, "step": 40081 }, { "epoch": 4.075030500203335, "grad_norm": 0.2641572058200836, "learning_rate": 1.0067583779544237e-06, "loss": 0.27, "step": 40082 }, { "epoch": 4.075132167547784, "grad_norm": 0.2714673578739166, "learning_rate": 1.0065448183335973e-06, "loss": 0.2763, "step": 40083 }, { "epoch": 4.075233834892233, "grad_norm": 0.28044331073760986, "learning_rate": 1.006331278830852e-06, "loss": 0.3067, "step": 40084 }, { "epoch": 4.0753355022366815, "grad_norm": 0.3098292052745819, "learning_rate": 1.006117759447258e-06, "loss": 0.3177, "step": 40085 }, { "epoch": 4.0754371695811304, "grad_norm": 0.2818317711353302, "learning_rate": 1.0059042601838948e-06, "loss": 0.3045, "step": 40086 }, { "epoch": 4.075538836925579, "grad_norm": 0.26248979568481445, "learning_rate": 1.005690781041837e-06, "loss": 0.2844, "step": 40087 }, { "epoch": 4.075640504270028, "grad_norm": 0.2647577226161957, "learning_rate": 1.0054773220221597e-06, "loss": 0.2895, "step": 40088 }, { "epoch": 4.075742171614477, "grad_norm": 0.2968926727771759, "learning_rate": 1.005263883125937e-06, "loss": 0.2956, "step": 40089 }, { "epoch": 4.075843838958926, "grad_norm": 0.2912557125091553, "learning_rate": 1.005050464354247e-06, "loss": 0.3114, "step": 40090 }, { "epoch": 4.075945506303375, "grad_norm": 0.27834123373031616, "learning_rate": 1.0048370657081636e-06, "loss": 0.3008, "step": 40091 }, { "epoch": 4.076047173647824, "grad_norm": 0.28537827730178833, "learning_rate": 1.00462368718876e-06, "loss": 0.2782, "step": 40092 }, { "epoch": 4.076148840992273, "grad_norm": 0.27020302414894104, "learning_rate": 1.0044103287971146e-06, "loss": 0.3218, "step": 40093 }, { "epoch": 4.076250508336722, "grad_norm": 0.2612414062023163, "learning_rate": 1.0041969905342996e-06, "loss": 0.3037, "step": 40094 }, { "epoch": 4.076352175681171, "grad_norm": 0.28841927647590637, "learning_rate": 1.0039836724013902e-06, "loss": 0.3112, "step": 40095 }, { "epoch": 4.07645384302562, "grad_norm": 0.308727890253067, "learning_rate": 1.0037703743994626e-06, "loss": 0.3151, "step": 40096 }, { "epoch": 4.076555510370069, "grad_norm": 0.2830149233341217, "learning_rate": 1.0035570965295898e-06, "loss": 0.3098, "step": 40097 }, { "epoch": 4.076657177714518, "grad_norm": 0.26830554008483887, "learning_rate": 1.0033438387928467e-06, "loss": 0.2857, "step": 40098 }, { "epoch": 4.076758845058967, "grad_norm": 0.2579296827316284, "learning_rate": 1.0031306011903064e-06, "loss": 0.3132, "step": 40099 }, { "epoch": 4.076860512403416, "grad_norm": 0.25695526599884033, "learning_rate": 1.002917383723046e-06, "loss": 0.3097, "step": 40100 }, { "epoch": 4.076962179747865, "grad_norm": 0.27019062638282776, "learning_rate": 1.0027041863921378e-06, "loss": 0.3141, "step": 40101 }, { "epoch": 4.0770638470923135, "grad_norm": 0.26305827498435974, "learning_rate": 1.0024910091986545e-06, "loss": 0.3137, "step": 40102 }, { "epoch": 4.077165514436763, "grad_norm": 0.3061305284500122, "learning_rate": 1.0022778521436727e-06, "loss": 0.2792, "step": 40103 }, { "epoch": 4.077267181781212, "grad_norm": 0.27979758381843567, "learning_rate": 1.0020647152282653e-06, "loss": 0.298, "step": 40104 }, { "epoch": 4.077368849125661, "grad_norm": 0.28541073203086853, "learning_rate": 1.0018515984535043e-06, "loss": 0.2993, "step": 40105 }, { "epoch": 4.07747051647011, "grad_norm": 0.2734243869781494, "learning_rate": 1.0016385018204667e-06, "loss": 0.3417, "step": 40106 }, { "epoch": 4.077572183814559, "grad_norm": 0.2877194881439209, "learning_rate": 1.0014254253302235e-06, "loss": 0.2962, "step": 40107 }, { "epoch": 4.077673851159008, "grad_norm": 0.26910194754600525, "learning_rate": 1.0012123689838493e-06, "loss": 0.2666, "step": 40108 }, { "epoch": 4.077775518503457, "grad_norm": 0.2802135944366455, "learning_rate": 1.0009993327824153e-06, "loss": 0.3009, "step": 40109 }, { "epoch": 4.077877185847906, "grad_norm": 0.31689903140068054, "learning_rate": 1.0007863167269977e-06, "loss": 0.2973, "step": 40110 }, { "epoch": 4.077978853192355, "grad_norm": 0.2764396071434021, "learning_rate": 1.0005733208186686e-06, "loss": 0.3141, "step": 40111 }, { "epoch": 4.078080520536804, "grad_norm": 0.27107685804367065, "learning_rate": 1.0003603450584987e-06, "loss": 0.3028, "step": 40112 }, { "epoch": 4.078182187881253, "grad_norm": 0.2702987492084503, "learning_rate": 1.000147389447565e-06, "loss": 0.2917, "step": 40113 }, { "epoch": 4.078283855225702, "grad_norm": 0.278534471988678, "learning_rate": 9.999344539869377e-07, "loss": 0.3317, "step": 40114 }, { "epoch": 4.078385522570151, "grad_norm": 0.30400121212005615, "learning_rate": 9.997215386776894e-07, "loss": 0.2901, "step": 40115 }, { "epoch": 4.0784871899146, "grad_norm": 0.29748696088790894, "learning_rate": 9.99508643520894e-07, "loss": 0.3053, "step": 40116 }, { "epoch": 4.0785888572590485, "grad_norm": 0.281757652759552, "learning_rate": 9.992957685176235e-07, "loss": 0.3053, "step": 40117 }, { "epoch": 4.0786905246034975, "grad_norm": 0.2805193364620209, "learning_rate": 9.990829136689507e-07, "loss": 0.3079, "step": 40118 }, { "epoch": 4.078792191947946, "grad_norm": 0.27664715051651, "learning_rate": 9.988700789759454e-07, "loss": 0.2993, "step": 40119 }, { "epoch": 4.078893859292395, "grad_norm": 0.27232834696769714, "learning_rate": 9.986572644396836e-07, "loss": 0.2659, "step": 40120 }, { "epoch": 4.078995526636844, "grad_norm": 0.2842988669872284, "learning_rate": 9.984444700612356e-07, "loss": 0.3095, "step": 40121 }, { "epoch": 4.079097193981293, "grad_norm": 0.2651505768299103, "learning_rate": 9.982316958416722e-07, "loss": 0.3335, "step": 40122 }, { "epoch": 4.079198861325742, "grad_norm": 0.2852228581905365, "learning_rate": 9.980189417820674e-07, "loss": 0.2719, "step": 40123 }, { "epoch": 4.079300528670191, "grad_norm": 0.27118977904319763, "learning_rate": 9.978062078834926e-07, "loss": 0.3057, "step": 40124 }, { "epoch": 4.07940219601464, "grad_norm": 0.2899613082408905, "learning_rate": 9.975934941470177e-07, "loss": 0.312, "step": 40125 }, { "epoch": 4.079503863359089, "grad_norm": 0.2548280954360962, "learning_rate": 9.973808005737168e-07, "loss": 0.3205, "step": 40126 }, { "epoch": 4.079605530703538, "grad_norm": 0.27591225504875183, "learning_rate": 9.971681271646605e-07, "loss": 0.3297, "step": 40127 }, { "epoch": 4.079707198047987, "grad_norm": 0.2855781614780426, "learning_rate": 9.969554739209197e-07, "loss": 0.2843, "step": 40128 }, { "epoch": 4.079808865392436, "grad_norm": 0.28090107440948486, "learning_rate": 9.967428408435647e-07, "loss": 0.2764, "step": 40129 }, { "epoch": 4.079910532736885, "grad_norm": 0.30429720878601074, "learning_rate": 9.965302279336691e-07, "loss": 0.3034, "step": 40130 }, { "epoch": 4.080012200081334, "grad_norm": 0.2768023610115051, "learning_rate": 9.963176351923032e-07, "loss": 0.3084, "step": 40131 }, { "epoch": 4.080113867425783, "grad_norm": 0.2775043547153473, "learning_rate": 9.96105062620536e-07, "loss": 0.3153, "step": 40132 }, { "epoch": 4.080215534770232, "grad_norm": 0.276511013507843, "learning_rate": 9.958925102194417e-07, "loss": 0.3151, "step": 40133 }, { "epoch": 4.0803172021146805, "grad_norm": 0.28007078170776367, "learning_rate": 9.956799779900893e-07, "loss": 0.319, "step": 40134 }, { "epoch": 4.0804188694591295, "grad_norm": 0.26952144503593445, "learning_rate": 9.954674659335483e-07, "loss": 0.3104, "step": 40135 }, { "epoch": 4.080520536803578, "grad_norm": 0.2504788637161255, "learning_rate": 9.952549740508916e-07, "loss": 0.258, "step": 40136 }, { "epoch": 4.080622204148027, "grad_norm": 0.27493083477020264, "learning_rate": 9.950425023431888e-07, "loss": 0.2801, "step": 40137 }, { "epoch": 4.080723871492476, "grad_norm": 0.27516984939575195, "learning_rate": 9.948300508115104e-07, "loss": 0.3422, "step": 40138 }, { "epoch": 4.080825538836925, "grad_norm": 0.3038153350353241, "learning_rate": 9.946176194569246e-07, "loss": 0.2953, "step": 40139 }, { "epoch": 4.080927206181374, "grad_norm": 0.29776531457901, "learning_rate": 9.94405208280505e-07, "loss": 0.2779, "step": 40140 }, { "epoch": 4.081028873525823, "grad_norm": 0.26531660556793213, "learning_rate": 9.941928172833205e-07, "loss": 0.294, "step": 40141 }, { "epoch": 4.081130540870272, "grad_norm": 0.2939653992652893, "learning_rate": 9.939804464664387e-07, "loss": 0.3113, "step": 40142 }, { "epoch": 4.081232208214722, "grad_norm": 0.28030070662498474, "learning_rate": 9.93768095830933e-07, "loss": 0.3187, "step": 40143 }, { "epoch": 4.081333875559171, "grad_norm": 0.2836279273033142, "learning_rate": 9.93555765377872e-07, "loss": 0.3045, "step": 40144 }, { "epoch": 4.08143554290362, "grad_norm": 0.27081218361854553, "learning_rate": 9.933434551083233e-07, "loss": 0.3091, "step": 40145 }, { "epoch": 4.081537210248069, "grad_norm": 0.28053927421569824, "learning_rate": 9.931311650233594e-07, "loss": 0.2874, "step": 40146 }, { "epoch": 4.081638877592518, "grad_norm": 0.26770392060279846, "learning_rate": 9.929188951240487e-07, "loss": 0.3166, "step": 40147 }, { "epoch": 4.081740544936967, "grad_norm": 0.2662920653820038, "learning_rate": 9.927066454114603e-07, "loss": 0.2905, "step": 40148 }, { "epoch": 4.0818422122814155, "grad_norm": 0.29967936873435974, "learning_rate": 9.924944158866618e-07, "loss": 0.2743, "step": 40149 }, { "epoch": 4.0819438796258645, "grad_norm": 0.2721855640411377, "learning_rate": 9.922822065507258e-07, "loss": 0.2981, "step": 40150 }, { "epoch": 4.082045546970313, "grad_norm": 0.2789684534072876, "learning_rate": 9.9207001740472e-07, "loss": 0.2782, "step": 40151 }, { "epoch": 4.082147214314762, "grad_norm": 0.28531238436698914, "learning_rate": 9.91857848449711e-07, "loss": 0.2991, "step": 40152 }, { "epoch": 4.082248881659211, "grad_norm": 0.26822102069854736, "learning_rate": 9.916456996867712e-07, "loss": 0.3042, "step": 40153 }, { "epoch": 4.08235054900366, "grad_norm": 0.2869763672351837, "learning_rate": 9.914335711169681e-07, "loss": 0.324, "step": 40154 }, { "epoch": 4.082452216348109, "grad_norm": 0.28887325525283813, "learning_rate": 9.91221462741369e-07, "loss": 0.3146, "step": 40155 }, { "epoch": 4.082553883692558, "grad_norm": 0.294888436794281, "learning_rate": 9.910093745610444e-07, "loss": 0.2786, "step": 40156 }, { "epoch": 4.082655551037007, "grad_norm": 0.25931504368782043, "learning_rate": 9.90797306577062e-07, "loss": 0.2796, "step": 40157 }, { "epoch": 4.082757218381456, "grad_norm": 0.28198519349098206, "learning_rate": 9.9058525879049e-07, "loss": 0.258, "step": 40158 }, { "epoch": 4.082858885725905, "grad_norm": 0.2731848657131195, "learning_rate": 9.903732312023956e-07, "loss": 0.2842, "step": 40159 }, { "epoch": 4.082960553070354, "grad_norm": 0.28335270285606384, "learning_rate": 9.90161223813849e-07, "loss": 0.2845, "step": 40160 }, { "epoch": 4.083062220414803, "grad_norm": 0.2526963949203491, "learning_rate": 9.899492366259172e-07, "loss": 0.3219, "step": 40161 }, { "epoch": 4.083163887759252, "grad_norm": 0.27325496077537537, "learning_rate": 9.897372696396672e-07, "loss": 0.3023, "step": 40162 }, { "epoch": 4.083265555103701, "grad_norm": 0.26624467968940735, "learning_rate": 9.89525322856169e-07, "loss": 0.3036, "step": 40163 }, { "epoch": 4.08336722244815, "grad_norm": 0.2650458514690399, "learning_rate": 9.893133962764894e-07, "loss": 0.3062, "step": 40164 }, { "epoch": 4.083468889792599, "grad_norm": 0.299309104681015, "learning_rate": 9.891014899016943e-07, "loss": 0.3098, "step": 40165 }, { "epoch": 4.0835705571370475, "grad_norm": 0.2938026785850525, "learning_rate": 9.888896037328544e-07, "loss": 0.2946, "step": 40166 }, { "epoch": 4.0836722244814965, "grad_norm": 0.2889057993888855, "learning_rate": 9.886777377710354e-07, "loss": 0.3009, "step": 40167 }, { "epoch": 4.083773891825945, "grad_norm": 0.2847854197025299, "learning_rate": 9.884658920173045e-07, "loss": 0.2995, "step": 40168 }, { "epoch": 4.083875559170394, "grad_norm": 0.2775367796421051, "learning_rate": 9.88254066472728e-07, "loss": 0.3472, "step": 40169 }, { "epoch": 4.083977226514843, "grad_norm": 0.28648433089256287, "learning_rate": 9.88042261138375e-07, "loss": 0.2944, "step": 40170 }, { "epoch": 4.084078893859292, "grad_norm": 0.26842889189720154, "learning_rate": 9.878304760153123e-07, "loss": 0.3466, "step": 40171 }, { "epoch": 4.084180561203741, "grad_norm": 0.2843092679977417, "learning_rate": 9.876187111046048e-07, "loss": 0.2905, "step": 40172 }, { "epoch": 4.08428222854819, "grad_norm": 0.25452113151550293, "learning_rate": 9.874069664073232e-07, "loss": 0.2707, "step": 40173 }, { "epoch": 4.084383895892639, "grad_norm": 0.2763492166996002, "learning_rate": 9.871952419245289e-07, "loss": 0.3164, "step": 40174 }, { "epoch": 4.084485563237088, "grad_norm": 0.2964461147785187, "learning_rate": 9.869835376572918e-07, "loss": 0.2739, "step": 40175 }, { "epoch": 4.084587230581537, "grad_norm": 0.30152955651283264, "learning_rate": 9.867718536066794e-07, "loss": 0.286, "step": 40176 }, { "epoch": 4.084688897925986, "grad_norm": 0.28424614667892456, "learning_rate": 9.865601897737565e-07, "loss": 0.3194, "step": 40177 }, { "epoch": 4.084790565270435, "grad_norm": 0.29474255442619324, "learning_rate": 9.863485461595896e-07, "loss": 0.2826, "step": 40178 }, { "epoch": 4.084892232614884, "grad_norm": 0.26721489429473877, "learning_rate": 9.86136922765244e-07, "loss": 0.3187, "step": 40179 }, { "epoch": 4.084993899959333, "grad_norm": 0.2915484607219696, "learning_rate": 9.859253195917872e-07, "loss": 0.3236, "step": 40180 }, { "epoch": 4.085095567303782, "grad_norm": 0.29394009709358215, "learning_rate": 9.857137366402853e-07, "loss": 0.2917, "step": 40181 }, { "epoch": 4.085197234648231, "grad_norm": 0.2951553761959076, "learning_rate": 9.855021739118027e-07, "loss": 0.3205, "step": 40182 }, { "epoch": 4.0852989019926795, "grad_norm": 0.2764905095100403, "learning_rate": 9.852906314074084e-07, "loss": 0.2997, "step": 40183 }, { "epoch": 4.0854005693371285, "grad_norm": 0.29678019881248474, "learning_rate": 9.85079109128163e-07, "loss": 0.2635, "step": 40184 }, { "epoch": 4.085502236681578, "grad_norm": 0.2950637638568878, "learning_rate": 9.84867607075135e-07, "loss": 0.3225, "step": 40185 }, { "epoch": 4.085603904026027, "grad_norm": 0.284368634223938, "learning_rate": 9.846561252493913e-07, "loss": 0.2998, "step": 40186 }, { "epoch": 4.085705571370476, "grad_norm": 0.28606483340263367, "learning_rate": 9.844446636519962e-07, "loss": 0.2869, "step": 40187 }, { "epoch": 4.085807238714925, "grad_norm": 0.27196070551872253, "learning_rate": 9.842332222840144e-07, "loss": 0.2885, "step": 40188 }, { "epoch": 4.085908906059374, "grad_norm": 0.27090829610824585, "learning_rate": 9.840218011465096e-07, "loss": 0.2918, "step": 40189 }, { "epoch": 4.086010573403823, "grad_norm": 0.28414079546928406, "learning_rate": 9.838104002405508e-07, "loss": 0.2663, "step": 40190 }, { "epoch": 4.086112240748272, "grad_norm": 0.2603486180305481, "learning_rate": 9.835990195672e-07, "loss": 0.2952, "step": 40191 }, { "epoch": 4.086213908092721, "grad_norm": 0.2744181752204895, "learning_rate": 9.83387659127522e-07, "loss": 0.3218, "step": 40192 }, { "epoch": 4.08631557543717, "grad_norm": 0.2646232843399048, "learning_rate": 9.831763189225851e-07, "loss": 0.2692, "step": 40193 }, { "epoch": 4.086417242781619, "grad_norm": 0.2876320779323578, "learning_rate": 9.829649989534485e-07, "loss": 0.2804, "step": 40194 }, { "epoch": 4.086518910126068, "grad_norm": 0.27720004320144653, "learning_rate": 9.827536992211794e-07, "loss": 0.2963, "step": 40195 }, { "epoch": 4.086620577470517, "grad_norm": 0.2862872779369354, "learning_rate": 9.825424197268457e-07, "loss": 0.3167, "step": 40196 }, { "epoch": 4.086722244814966, "grad_norm": 0.26846373081207275, "learning_rate": 9.823311604715058e-07, "loss": 0.2567, "step": 40197 }, { "epoch": 4.0868239121594145, "grad_norm": 0.27206358313560486, "learning_rate": 9.821199214562283e-07, "loss": 0.3027, "step": 40198 }, { "epoch": 4.0869255795038635, "grad_norm": 0.26135313510894775, "learning_rate": 9.81908702682074e-07, "loss": 0.2692, "step": 40199 }, { "epoch": 4.087027246848312, "grad_norm": 0.275696337223053, "learning_rate": 9.816975041501102e-07, "loss": 0.3297, "step": 40200 }, { "epoch": 4.087128914192761, "grad_norm": 0.29172369837760925, "learning_rate": 9.814863258614e-07, "loss": 0.2969, "step": 40201 }, { "epoch": 4.08723058153721, "grad_norm": 0.27560263872146606, "learning_rate": 9.812751678170053e-07, "loss": 0.3518, "step": 40202 }, { "epoch": 4.087332248881659, "grad_norm": 0.24992994964122772, "learning_rate": 9.810640300179935e-07, "loss": 0.278, "step": 40203 }, { "epoch": 4.087433916226108, "grad_norm": 0.26081761717796326, "learning_rate": 9.808529124654237e-07, "loss": 0.3128, "step": 40204 }, { "epoch": 4.087535583570557, "grad_norm": 0.2796625792980194, "learning_rate": 9.80641815160362e-07, "loss": 0.3119, "step": 40205 }, { "epoch": 4.087637250915006, "grad_norm": 0.2753079831600189, "learning_rate": 9.80430738103874e-07, "loss": 0.2975, "step": 40206 }, { "epoch": 4.087738918259455, "grad_norm": 0.26710274815559387, "learning_rate": 9.802196812970183e-07, "loss": 0.3166, "step": 40207 }, { "epoch": 4.087840585603904, "grad_norm": 0.25955817103385925, "learning_rate": 9.80008644740862e-07, "loss": 0.2989, "step": 40208 }, { "epoch": 4.087942252948353, "grad_norm": 0.3091171383857727, "learning_rate": 9.797976284364668e-07, "loss": 0.2746, "step": 40209 }, { "epoch": 4.088043920292802, "grad_norm": 0.27699050307273865, "learning_rate": 9.795866323848946e-07, "loss": 0.325, "step": 40210 }, { "epoch": 4.088145587637251, "grad_norm": 0.26259297132492065, "learning_rate": 9.793756565872103e-07, "loss": 0.2868, "step": 40211 }, { "epoch": 4.0882472549817, "grad_norm": 0.2617497146129608, "learning_rate": 9.791647010444754e-07, "loss": 0.3174, "step": 40212 }, { "epoch": 4.088348922326149, "grad_norm": 0.27304747700691223, "learning_rate": 9.789537657577552e-07, "loss": 0.2648, "step": 40213 }, { "epoch": 4.088450589670598, "grad_norm": 0.2603435814380646, "learning_rate": 9.787428507281083e-07, "loss": 0.3169, "step": 40214 }, { "epoch": 4.0885522570150465, "grad_norm": 0.27972549200057983, "learning_rate": 9.785319559565992e-07, "loss": 0.2945, "step": 40215 }, { "epoch": 4.0886539243594955, "grad_norm": 0.29798683524131775, "learning_rate": 9.783210814442929e-07, "loss": 0.283, "step": 40216 }, { "epoch": 4.088755591703944, "grad_norm": 0.2887590527534485, "learning_rate": 9.781102271922466e-07, "loss": 0.3099, "step": 40217 }, { "epoch": 4.088857259048393, "grad_norm": 0.2679204046726227, "learning_rate": 9.778993932015267e-07, "loss": 0.2952, "step": 40218 }, { "epoch": 4.088958926392842, "grad_norm": 0.2651809751987457, "learning_rate": 9.776885794731938e-07, "loss": 0.2999, "step": 40219 }, { "epoch": 4.089060593737291, "grad_norm": 0.3037179112434387, "learning_rate": 9.77477786008309e-07, "loss": 0.2921, "step": 40220 }, { "epoch": 4.08916226108174, "grad_norm": 0.2873966693878174, "learning_rate": 9.772670128079364e-07, "loss": 0.2619, "step": 40221 }, { "epoch": 4.089263928426189, "grad_norm": 0.27885550260543823, "learning_rate": 9.770562598731349e-07, "loss": 0.2897, "step": 40222 }, { "epoch": 4.089365595770638, "grad_norm": 0.261705607175827, "learning_rate": 9.76845527204971e-07, "loss": 0.2987, "step": 40223 }, { "epoch": 4.089467263115087, "grad_norm": 0.2635916471481323, "learning_rate": 9.766348148045006e-07, "loss": 0.315, "step": 40224 }, { "epoch": 4.089568930459537, "grad_norm": 0.29772910475730896, "learning_rate": 9.764241226727878e-07, "loss": 0.2999, "step": 40225 }, { "epoch": 4.089670597803986, "grad_norm": 0.28583618998527527, "learning_rate": 9.762134508108968e-07, "loss": 0.295, "step": 40226 }, { "epoch": 4.089772265148435, "grad_norm": 0.2620798349380493, "learning_rate": 9.760027992198833e-07, "loss": 0.298, "step": 40227 }, { "epoch": 4.089873932492884, "grad_norm": 0.30619028210639954, "learning_rate": 9.75792167900813e-07, "loss": 0.2807, "step": 40228 }, { "epoch": 4.089975599837333, "grad_norm": 0.28344836831092834, "learning_rate": 9.75581556854746e-07, "loss": 0.3129, "step": 40229 }, { "epoch": 4.0900772671817816, "grad_norm": 0.26438388228416443, "learning_rate": 9.753709660827409e-07, "loss": 0.2881, "step": 40230 }, { "epoch": 4.0901789345262305, "grad_norm": 0.2788963317871094, "learning_rate": 9.751603955858618e-07, "loss": 0.2828, "step": 40231 }, { "epoch": 4.090280601870679, "grad_norm": 0.2756213843822479, "learning_rate": 9.749498453651684e-07, "loss": 0.3381, "step": 40232 }, { "epoch": 4.090382269215128, "grad_norm": 0.27981334924697876, "learning_rate": 9.747393154217211e-07, "loss": 0.2856, "step": 40233 }, { "epoch": 4.090483936559577, "grad_norm": 0.32132428884506226, "learning_rate": 9.745288057565789e-07, "loss": 0.2897, "step": 40234 }, { "epoch": 4.090585603904026, "grad_norm": 0.27054286003112793, "learning_rate": 9.743183163708042e-07, "loss": 0.2951, "step": 40235 }, { "epoch": 4.090687271248475, "grad_norm": 0.2714291512966156, "learning_rate": 9.741078472654602e-07, "loss": 0.343, "step": 40236 }, { "epoch": 4.090788938592924, "grad_norm": 0.28543537855148315, "learning_rate": 9.73897398441601e-07, "loss": 0.31, "step": 40237 }, { "epoch": 4.090890605937373, "grad_norm": 0.27062350511550903, "learning_rate": 9.736869699002916e-07, "loss": 0.3372, "step": 40238 }, { "epoch": 4.090992273281822, "grad_norm": 0.31207576394081116, "learning_rate": 9.7347656164259e-07, "loss": 0.2965, "step": 40239 }, { "epoch": 4.091093940626271, "grad_norm": 0.2713165581226349, "learning_rate": 9.73266173669556e-07, "loss": 0.2747, "step": 40240 }, { "epoch": 4.09119560797072, "grad_norm": 0.3042798340320587, "learning_rate": 9.730558059822509e-07, "loss": 0.3092, "step": 40241 }, { "epoch": 4.091297275315169, "grad_norm": 0.2692211866378784, "learning_rate": 9.728454585817337e-07, "loss": 0.2985, "step": 40242 }, { "epoch": 4.091398942659618, "grad_norm": 0.2800125479698181, "learning_rate": 9.726351314690647e-07, "loss": 0.293, "step": 40243 }, { "epoch": 4.091500610004067, "grad_norm": 0.27431520819664, "learning_rate": 9.72424824645301e-07, "loss": 0.2991, "step": 40244 }, { "epoch": 4.091602277348516, "grad_norm": 0.2655903398990631, "learning_rate": 9.722145381115038e-07, "loss": 0.2685, "step": 40245 }, { "epoch": 4.091703944692965, "grad_norm": 0.2791074514389038, "learning_rate": 9.720042718687355e-07, "loss": 0.2718, "step": 40246 }, { "epoch": 4.0918056120374136, "grad_norm": 0.258779376745224, "learning_rate": 9.717940259180497e-07, "loss": 0.309, "step": 40247 }, { "epoch": 4.0919072793818625, "grad_norm": 0.2923979163169861, "learning_rate": 9.7158380026051e-07, "loss": 0.3131, "step": 40248 }, { "epoch": 4.092008946726311, "grad_norm": 0.29612997174263, "learning_rate": 9.713735948971735e-07, "loss": 0.2958, "step": 40249 }, { "epoch": 4.09211061407076, "grad_norm": 0.26425716280937195, "learning_rate": 9.711634098290983e-07, "loss": 0.3001, "step": 40250 }, { "epoch": 4.092212281415209, "grad_norm": 0.2834082543849945, "learning_rate": 9.70953245057346e-07, "loss": 0.2894, "step": 40251 }, { "epoch": 4.092313948759658, "grad_norm": 0.2661752700805664, "learning_rate": 9.707431005829731e-07, "loss": 0.308, "step": 40252 }, { "epoch": 4.092415616104107, "grad_norm": 0.27518513798713684, "learning_rate": 9.705329764070398e-07, "loss": 0.2931, "step": 40253 }, { "epoch": 4.092517283448556, "grad_norm": 0.27719029784202576, "learning_rate": 9.703228725306018e-07, "loss": 0.3139, "step": 40254 }, { "epoch": 4.092618950793005, "grad_norm": 0.2735072374343872, "learning_rate": 9.701127889547214e-07, "loss": 0.3074, "step": 40255 }, { "epoch": 4.092720618137454, "grad_norm": 0.2717437744140625, "learning_rate": 9.699027256804549e-07, "loss": 0.2834, "step": 40256 }, { "epoch": 4.092822285481903, "grad_norm": 0.2983575761318207, "learning_rate": 9.696926827088594e-07, "loss": 0.2976, "step": 40257 }, { "epoch": 4.092923952826352, "grad_norm": 0.27150630950927734, "learning_rate": 9.694826600409957e-07, "loss": 0.2836, "step": 40258 }, { "epoch": 4.093025620170801, "grad_norm": 0.2993800640106201, "learning_rate": 9.69272657677921e-07, "loss": 0.297, "step": 40259 }, { "epoch": 4.09312728751525, "grad_norm": 0.29537123441696167, "learning_rate": 9.690626756206911e-07, "loss": 0.3194, "step": 40260 }, { "epoch": 4.093228954859699, "grad_norm": 0.2802846431732178, "learning_rate": 9.688527138703668e-07, "loss": 0.3015, "step": 40261 }, { "epoch": 4.093330622204148, "grad_norm": 0.29737716913223267, "learning_rate": 9.686427724280046e-07, "loss": 0.3016, "step": 40262 }, { "epoch": 4.093432289548597, "grad_norm": 0.2882574200630188, "learning_rate": 9.684328512946622e-07, "loss": 0.3172, "step": 40263 }, { "epoch": 4.0935339568930456, "grad_norm": 0.25536468625068665, "learning_rate": 9.682229504713958e-07, "loss": 0.325, "step": 40264 }, { "epoch": 4.0936356242374945, "grad_norm": 0.2836877107620239, "learning_rate": 9.68013069959265e-07, "loss": 0.3028, "step": 40265 }, { "epoch": 4.093737291581943, "grad_norm": 0.2755679190158844, "learning_rate": 9.678032097593265e-07, "loss": 0.2802, "step": 40266 }, { "epoch": 4.093838958926393, "grad_norm": 0.29223257303237915, "learning_rate": 9.675933698726365e-07, "loss": 0.3234, "step": 40267 }, { "epoch": 4.093940626270842, "grad_norm": 0.2663109004497528, "learning_rate": 9.673835503002532e-07, "loss": 0.3213, "step": 40268 }, { "epoch": 4.094042293615291, "grad_norm": 0.26992589235305786, "learning_rate": 9.671737510432338e-07, "loss": 0.2999, "step": 40269 }, { "epoch": 4.09414396095974, "grad_norm": 0.2923157811164856, "learning_rate": 9.66963972102633e-07, "loss": 0.2987, "step": 40270 }, { "epoch": 4.094245628304189, "grad_norm": 0.2763897478580475, "learning_rate": 9.66754213479511e-07, "loss": 0.2667, "step": 40271 }, { "epoch": 4.094347295648638, "grad_norm": 0.29893550276756287, "learning_rate": 9.665444751749225e-07, "loss": 0.3068, "step": 40272 }, { "epoch": 4.094448962993087, "grad_norm": 0.27682292461395264, "learning_rate": 9.66334757189925e-07, "loss": 0.2971, "step": 40273 }, { "epoch": 4.094550630337536, "grad_norm": 0.28276655077934265, "learning_rate": 9.661250595255728e-07, "loss": 0.2858, "step": 40274 }, { "epoch": 4.094652297681985, "grad_norm": 0.297875314950943, "learning_rate": 9.659153821829253e-07, "loss": 0.2846, "step": 40275 }, { "epoch": 4.094753965026434, "grad_norm": 0.28559789061546326, "learning_rate": 9.657057251630376e-07, "loss": 0.331, "step": 40276 }, { "epoch": 4.094855632370883, "grad_norm": 0.2787604033946991, "learning_rate": 9.654960884669639e-07, "loss": 0.2665, "step": 40277 }, { "epoch": 4.094957299715332, "grad_norm": 0.28321850299835205, "learning_rate": 9.652864720957638e-07, "loss": 0.2921, "step": 40278 }, { "epoch": 4.095058967059781, "grad_norm": 0.270203560590744, "learning_rate": 9.65076876050492e-07, "loss": 0.3312, "step": 40279 }, { "epoch": 4.0951606344042295, "grad_norm": 0.3024251461029053, "learning_rate": 9.648673003322034e-07, "loss": 0.2796, "step": 40280 }, { "epoch": 4.0952623017486784, "grad_norm": 0.27877652645111084, "learning_rate": 9.646577449419537e-07, "loss": 0.2704, "step": 40281 }, { "epoch": 4.095363969093127, "grad_norm": 0.2852129340171814, "learning_rate": 9.644482098808007e-07, "loss": 0.3529, "step": 40282 }, { "epoch": 4.095465636437576, "grad_norm": 0.28686416149139404, "learning_rate": 9.642386951497979e-07, "loss": 0.2782, "step": 40283 }, { "epoch": 4.095567303782025, "grad_norm": 0.2643040120601654, "learning_rate": 9.640292007500007e-07, "loss": 0.3065, "step": 40284 }, { "epoch": 4.095668971126474, "grad_norm": 0.26927056908607483, "learning_rate": 9.638197266824667e-07, "loss": 0.3013, "step": 40285 }, { "epoch": 4.095770638470923, "grad_norm": 0.2704276144504547, "learning_rate": 9.636102729482493e-07, "loss": 0.2922, "step": 40286 }, { "epoch": 4.095872305815372, "grad_norm": 0.319134920835495, "learning_rate": 9.634008395484034e-07, "loss": 0.3199, "step": 40287 }, { "epoch": 4.095973973159821, "grad_norm": 0.29462766647338867, "learning_rate": 9.63191426483986e-07, "loss": 0.2894, "step": 40288 }, { "epoch": 4.09607564050427, "grad_norm": 0.2654893398284912, "learning_rate": 9.629820337560509e-07, "loss": 0.3071, "step": 40289 }, { "epoch": 4.096177307848719, "grad_norm": 0.30465516448020935, "learning_rate": 9.627726613656528e-07, "loss": 0.3068, "step": 40290 }, { "epoch": 4.096278975193168, "grad_norm": 0.2926101088523865, "learning_rate": 9.625633093138453e-07, "loss": 0.2936, "step": 40291 }, { "epoch": 4.096380642537617, "grad_norm": 0.2630941867828369, "learning_rate": 9.62353977601686e-07, "loss": 0.2925, "step": 40292 }, { "epoch": 4.096482309882066, "grad_norm": 0.2724382281303406, "learning_rate": 9.621446662302275e-07, "loss": 0.3177, "step": 40293 }, { "epoch": 4.096583977226515, "grad_norm": 0.27899742126464844, "learning_rate": 9.619353752005234e-07, "loss": 0.332, "step": 40294 }, { "epoch": 4.096685644570964, "grad_norm": 0.25974878668785095, "learning_rate": 9.617261045136311e-07, "loss": 0.3085, "step": 40295 }, { "epoch": 4.096787311915413, "grad_norm": 0.28179866075515747, "learning_rate": 9.615168541706028e-07, "loss": 0.2727, "step": 40296 }, { "epoch": 4.0968889792598615, "grad_norm": 0.26931363344192505, "learning_rate": 9.613076241724912e-07, "loss": 0.2722, "step": 40297 }, { "epoch": 4.0969906466043104, "grad_norm": 0.2595825791358948, "learning_rate": 9.610984145203533e-07, "loss": 0.2829, "step": 40298 }, { "epoch": 4.097092313948759, "grad_norm": 0.2688763737678528, "learning_rate": 9.608892252152425e-07, "loss": 0.2667, "step": 40299 }, { "epoch": 4.097193981293208, "grad_norm": 0.28807133436203003, "learning_rate": 9.606800562582115e-07, "loss": 0.3287, "step": 40300 }, { "epoch": 4.097295648637657, "grad_norm": 0.2787977457046509, "learning_rate": 9.604709076503133e-07, "loss": 0.2694, "step": 40301 }, { "epoch": 4.097397315982106, "grad_norm": 0.2594098150730133, "learning_rate": 9.602617793926039e-07, "loss": 0.305, "step": 40302 }, { "epoch": 4.097498983326555, "grad_norm": 0.29346176981925964, "learning_rate": 9.600526714861358e-07, "loss": 0.329, "step": 40303 }, { "epoch": 4.097600650671004, "grad_norm": 0.28853660821914673, "learning_rate": 9.598435839319608e-07, "loss": 0.3148, "step": 40304 }, { "epoch": 4.097702318015453, "grad_norm": 0.2873685359954834, "learning_rate": 9.596345167311349e-07, "loss": 0.3104, "step": 40305 }, { "epoch": 4.097803985359902, "grad_norm": 0.27901381254196167, "learning_rate": 9.594254698847105e-07, "loss": 0.2871, "step": 40306 }, { "epoch": 4.097905652704352, "grad_norm": 0.26308882236480713, "learning_rate": 9.59216443393739e-07, "loss": 0.3063, "step": 40307 }, { "epoch": 4.098007320048801, "grad_norm": 0.28440606594085693, "learning_rate": 9.590074372592762e-07, "loss": 0.305, "step": 40308 }, { "epoch": 4.09810898739325, "grad_norm": 0.28528866171836853, "learning_rate": 9.587984514823734e-07, "loss": 0.2879, "step": 40309 }, { "epoch": 4.098210654737699, "grad_norm": 0.2915954291820526, "learning_rate": 9.585894860640844e-07, "loss": 0.2854, "step": 40310 }, { "epoch": 4.098312322082148, "grad_norm": 0.2588663697242737, "learning_rate": 9.583805410054592e-07, "loss": 0.3064, "step": 40311 }, { "epoch": 4.0984139894265965, "grad_norm": 0.2738896608352661, "learning_rate": 9.581716163075538e-07, "loss": 0.2736, "step": 40312 }, { "epoch": 4.0985156567710455, "grad_norm": 0.2958765923976898, "learning_rate": 9.57962711971419e-07, "loss": 0.2798, "step": 40313 }, { "epoch": 4.098617324115494, "grad_norm": 0.3092423677444458, "learning_rate": 9.577538279981069e-07, "loss": 0.2854, "step": 40314 }, { "epoch": 4.098718991459943, "grad_norm": 0.2900974452495575, "learning_rate": 9.575449643886714e-07, "loss": 0.3123, "step": 40315 }, { "epoch": 4.098820658804392, "grad_norm": 0.29593056440353394, "learning_rate": 9.573361211441635e-07, "loss": 0.2954, "step": 40316 }, { "epoch": 4.098922326148841, "grad_norm": 0.2632399797439575, "learning_rate": 9.571272982656342e-07, "loss": 0.3253, "step": 40317 }, { "epoch": 4.09902399349329, "grad_norm": 0.33171346783638, "learning_rate": 9.569184957541388e-07, "loss": 0.3009, "step": 40318 }, { "epoch": 4.099125660837739, "grad_norm": 0.2810443043708801, "learning_rate": 9.567097136107267e-07, "loss": 0.3107, "step": 40319 }, { "epoch": 4.099227328182188, "grad_norm": 0.27923017740249634, "learning_rate": 9.565009518364504e-07, "loss": 0.2712, "step": 40320 }, { "epoch": 4.099328995526637, "grad_norm": 0.27680084109306335, "learning_rate": 9.5629221043236e-07, "loss": 0.3171, "step": 40321 }, { "epoch": 4.099430662871086, "grad_norm": 0.3080117702484131, "learning_rate": 9.560834893995096e-07, "loss": 0.2742, "step": 40322 }, { "epoch": 4.099532330215535, "grad_norm": 0.28323978185653687, "learning_rate": 9.558747887389503e-07, "loss": 0.307, "step": 40323 }, { "epoch": 4.099633997559984, "grad_norm": 0.27963584661483765, "learning_rate": 9.55666108451731e-07, "loss": 0.2732, "step": 40324 }, { "epoch": 4.099735664904433, "grad_norm": 0.2918391227722168, "learning_rate": 9.554574485389074e-07, "loss": 0.309, "step": 40325 }, { "epoch": 4.099837332248882, "grad_norm": 0.2877520024776459, "learning_rate": 9.552488090015254e-07, "loss": 0.3071, "step": 40326 }, { "epoch": 4.099938999593331, "grad_norm": 0.29645800590515137, "learning_rate": 9.55040189840639e-07, "loss": 0.2818, "step": 40327 }, { "epoch": 4.10004066693778, "grad_norm": 0.27175918221473694, "learning_rate": 9.548315910572997e-07, "loss": 0.3222, "step": 40328 }, { "epoch": 4.1001423342822285, "grad_norm": 0.27222493290901184, "learning_rate": 9.54623012652558e-07, "loss": 0.3266, "step": 40329 }, { "epoch": 4.1002440016266775, "grad_norm": 0.2702983021736145, "learning_rate": 9.544144546274643e-07, "loss": 0.31, "step": 40330 }, { "epoch": 4.100345668971126, "grad_norm": 0.2832641899585724, "learning_rate": 9.542059169830675e-07, "loss": 0.3212, "step": 40331 }, { "epoch": 4.100447336315575, "grad_norm": 0.2818608582019806, "learning_rate": 9.539973997204215e-07, "loss": 0.2898, "step": 40332 }, { "epoch": 4.100549003660024, "grad_norm": 0.28166675567626953, "learning_rate": 9.53788902840575e-07, "loss": 0.2817, "step": 40333 }, { "epoch": 4.100650671004473, "grad_norm": 0.2870791256427765, "learning_rate": 9.535804263445769e-07, "loss": 0.268, "step": 40334 }, { "epoch": 4.100752338348922, "grad_norm": 0.29533231258392334, "learning_rate": 9.53371970233482e-07, "loss": 0.2747, "step": 40335 }, { "epoch": 4.100854005693371, "grad_norm": 0.2694634795188904, "learning_rate": 9.531635345083351e-07, "loss": 0.2903, "step": 40336 }, { "epoch": 4.10095567303782, "grad_norm": 0.26800596714019775, "learning_rate": 9.52955119170188e-07, "loss": 0.3381, "step": 40337 }, { "epoch": 4.101057340382269, "grad_norm": 0.2786995768547058, "learning_rate": 9.527467242200927e-07, "loss": 0.3041, "step": 40338 }, { "epoch": 4.101159007726718, "grad_norm": 0.27364110946655273, "learning_rate": 9.525383496590979e-07, "loss": 0.3203, "step": 40339 }, { "epoch": 4.101260675071167, "grad_norm": 0.27435413002967834, "learning_rate": 9.523299954882531e-07, "loss": 0.3004, "step": 40340 }, { "epoch": 4.101362342415616, "grad_norm": 0.2629163861274719, "learning_rate": 9.521216617086066e-07, "loss": 0.3048, "step": 40341 }, { "epoch": 4.101464009760065, "grad_norm": 0.273490846157074, "learning_rate": 9.519133483212106e-07, "loss": 0.2866, "step": 40342 }, { "epoch": 4.101565677104514, "grad_norm": 0.2791546881198883, "learning_rate": 9.517050553271128e-07, "loss": 0.2864, "step": 40343 }, { "epoch": 4.101667344448963, "grad_norm": 0.2869822084903717, "learning_rate": 9.514967827273619e-07, "loss": 0.3078, "step": 40344 }, { "epoch": 4.101769011793412, "grad_norm": 0.27874431014060974, "learning_rate": 9.512885305230108e-07, "loss": 0.2831, "step": 40345 }, { "epoch": 4.1018706791378605, "grad_norm": 0.28242355585098267, "learning_rate": 9.51080298715103e-07, "loss": 0.2829, "step": 40346 }, { "epoch": 4.1019723464823095, "grad_norm": 0.2860078811645508, "learning_rate": 9.508720873046906e-07, "loss": 0.2951, "step": 40347 }, { "epoch": 4.102074013826758, "grad_norm": 0.2890658378601074, "learning_rate": 9.506638962928255e-07, "loss": 0.318, "step": 40348 }, { "epoch": 4.102175681171208, "grad_norm": 0.2795361578464508, "learning_rate": 9.504557256805503e-07, "loss": 0.3195, "step": 40349 }, { "epoch": 4.102277348515657, "grad_norm": 0.2864740192890167, "learning_rate": 9.502475754689183e-07, "loss": 0.2739, "step": 40350 }, { "epoch": 4.102379015860106, "grad_norm": 0.2924560010433197, "learning_rate": 9.500394456589751e-07, "loss": 0.2672, "step": 40351 }, { "epoch": 4.102480683204555, "grad_norm": 0.28295618295669556, "learning_rate": 9.498313362517724e-07, "loss": 0.3056, "step": 40352 }, { "epoch": 4.102582350549004, "grad_norm": 0.2802540361881256, "learning_rate": 9.496232472483569e-07, "loss": 0.2946, "step": 40353 }, { "epoch": 4.102684017893453, "grad_norm": 0.26583394408226013, "learning_rate": 9.494151786497758e-07, "loss": 0.2812, "step": 40354 }, { "epoch": 4.102785685237902, "grad_norm": 0.26864922046661377, "learning_rate": 9.49207130457081e-07, "loss": 0.3109, "step": 40355 }, { "epoch": 4.102887352582351, "grad_norm": 0.2644868493080139, "learning_rate": 9.489991026713152e-07, "loss": 0.3105, "step": 40356 }, { "epoch": 4.1029890199268, "grad_norm": 0.2984621226787567, "learning_rate": 9.487910952935292e-07, "loss": 0.2927, "step": 40357 }, { "epoch": 4.103090687271249, "grad_norm": 0.2811269760131836, "learning_rate": 9.485831083247738e-07, "loss": 0.2862, "step": 40358 }, { "epoch": 4.103192354615698, "grad_norm": 0.2786339521408081, "learning_rate": 9.483751417660914e-07, "loss": 0.2726, "step": 40359 }, { "epoch": 4.103294021960147, "grad_norm": 0.28839027881622314, "learning_rate": 9.481671956185334e-07, "loss": 0.3164, "step": 40360 }, { "epoch": 4.1033956893045955, "grad_norm": 0.252191960811615, "learning_rate": 9.479592698831458e-07, "loss": 0.2867, "step": 40361 }, { "epoch": 4.1034973566490445, "grad_norm": 0.29314082860946655, "learning_rate": 9.47751364560976e-07, "loss": 0.295, "step": 40362 }, { "epoch": 4.103599023993493, "grad_norm": 0.26855891942977905, "learning_rate": 9.475434796530725e-07, "loss": 0.3166, "step": 40363 }, { "epoch": 4.103700691337942, "grad_norm": 0.30487728118896484, "learning_rate": 9.473356151604807e-07, "loss": 0.3004, "step": 40364 }, { "epoch": 4.103802358682391, "grad_norm": 0.28044772148132324, "learning_rate": 9.471277710842519e-07, "loss": 0.3269, "step": 40365 }, { "epoch": 4.10390402602684, "grad_norm": 0.2900041341781616, "learning_rate": 9.469199474254276e-07, "loss": 0.2691, "step": 40366 }, { "epoch": 4.104005693371289, "grad_norm": 0.27769893407821655, "learning_rate": 9.467121441850574e-07, "loss": 0.2878, "step": 40367 }, { "epoch": 4.104107360715738, "grad_norm": 0.27995359897613525, "learning_rate": 9.465043613641911e-07, "loss": 0.2694, "step": 40368 }, { "epoch": 4.104209028060187, "grad_norm": 0.2721423804759979, "learning_rate": 9.462965989638695e-07, "loss": 0.288, "step": 40369 }, { "epoch": 4.104310695404636, "grad_norm": 0.28165850043296814, "learning_rate": 9.46088856985144e-07, "loss": 0.3118, "step": 40370 }, { "epoch": 4.104412362749085, "grad_norm": 0.2711541950702667, "learning_rate": 9.458811354290592e-07, "loss": 0.3021, "step": 40371 }, { "epoch": 4.104514030093534, "grad_norm": 0.26778194308280945, "learning_rate": 9.456734342966606e-07, "loss": 0.3208, "step": 40372 }, { "epoch": 4.104615697437983, "grad_norm": 0.29773151874542236, "learning_rate": 9.454657535889971e-07, "loss": 0.299, "step": 40373 }, { "epoch": 4.104717364782432, "grad_norm": 0.2608937919139862, "learning_rate": 9.45258093307112e-07, "loss": 0.3086, "step": 40374 }, { "epoch": 4.104819032126881, "grad_norm": 0.2806656062602997, "learning_rate": 9.45050453452056e-07, "loss": 0.2606, "step": 40375 }, { "epoch": 4.10492069947133, "grad_norm": 0.2862858474254608, "learning_rate": 9.448428340248699e-07, "loss": 0.2759, "step": 40376 }, { "epoch": 4.105022366815779, "grad_norm": 0.27698788046836853, "learning_rate": 9.44635235026602e-07, "loss": 0.2981, "step": 40377 }, { "epoch": 4.1051240341602275, "grad_norm": 0.28345751762390137, "learning_rate": 9.444276564583e-07, "loss": 0.2897, "step": 40378 }, { "epoch": 4.1052257015046765, "grad_norm": 0.280073344707489, "learning_rate": 9.442200983210053e-07, "loss": 0.2892, "step": 40379 }, { "epoch": 4.105327368849125, "grad_norm": 0.276056170463562, "learning_rate": 9.440125606157675e-07, "loss": 0.3047, "step": 40380 }, { "epoch": 4.105429036193574, "grad_norm": 0.2749028205871582, "learning_rate": 9.438050433436307e-07, "loss": 0.2966, "step": 40381 }, { "epoch": 4.105530703538023, "grad_norm": 0.26922494173049927, "learning_rate": 9.435975465056385e-07, "loss": 0.3084, "step": 40382 }, { "epoch": 4.105632370882472, "grad_norm": 0.30720722675323486, "learning_rate": 9.433900701028392e-07, "loss": 0.2566, "step": 40383 }, { "epoch": 4.105734038226921, "grad_norm": 0.2838838994503021, "learning_rate": 9.431826141362771e-07, "loss": 0.2983, "step": 40384 }, { "epoch": 4.10583570557137, "grad_norm": 0.26978394389152527, "learning_rate": 9.429751786069968e-07, "loss": 0.2945, "step": 40385 }, { "epoch": 4.105937372915819, "grad_norm": 0.2674615979194641, "learning_rate": 9.427677635160421e-07, "loss": 0.3186, "step": 40386 }, { "epoch": 4.106039040260268, "grad_norm": 0.2898636758327484, "learning_rate": 9.425603688644592e-07, "loss": 0.3032, "step": 40387 }, { "epoch": 4.106140707604717, "grad_norm": 0.2903441786766052, "learning_rate": 9.423529946532956e-07, "loss": 0.287, "step": 40388 }, { "epoch": 4.106242374949167, "grad_norm": 0.29103896021842957, "learning_rate": 9.42145640883591e-07, "loss": 0.2917, "step": 40389 }, { "epoch": 4.106344042293616, "grad_norm": 0.26190829277038574, "learning_rate": 9.419383075563937e-07, "loss": 0.3046, "step": 40390 }, { "epoch": 4.106445709638065, "grad_norm": 0.2878551483154297, "learning_rate": 9.417309946727465e-07, "loss": 0.2937, "step": 40391 }, { "epoch": 4.106547376982514, "grad_norm": 0.2543336749076843, "learning_rate": 9.415237022336926e-07, "loss": 0.3281, "step": 40392 }, { "epoch": 4.1066490443269625, "grad_norm": 0.28133055567741394, "learning_rate": 9.413164302402799e-07, "loss": 0.2719, "step": 40393 }, { "epoch": 4.1067507116714115, "grad_norm": 0.2855818271636963, "learning_rate": 9.4110917869355e-07, "loss": 0.2926, "step": 40394 }, { "epoch": 4.10685237901586, "grad_norm": 0.31520453095436096, "learning_rate": 9.40901947594548e-07, "loss": 0.3283, "step": 40395 }, { "epoch": 4.106954046360309, "grad_norm": 0.2796631455421448, "learning_rate": 9.406947369443153e-07, "loss": 0.2875, "step": 40396 }, { "epoch": 4.107055713704758, "grad_norm": 0.29648086428642273, "learning_rate": 9.404875467439001e-07, "loss": 0.3137, "step": 40397 }, { "epoch": 4.107157381049207, "grad_norm": 0.2960616946220398, "learning_rate": 9.402803769943431e-07, "loss": 0.2995, "step": 40398 }, { "epoch": 4.107259048393656, "grad_norm": 0.2908502519130707, "learning_rate": 9.400732276966884e-07, "loss": 0.2569, "step": 40399 }, { "epoch": 4.107360715738105, "grad_norm": 0.3257843554019928, "learning_rate": 9.398660988519803e-07, "loss": 0.2989, "step": 40400 }, { "epoch": 4.107462383082554, "grad_norm": 0.28870120644569397, "learning_rate": 9.39658990461263e-07, "loss": 0.2993, "step": 40401 }, { "epoch": 4.107564050427003, "grad_norm": 0.25587546825408936, "learning_rate": 9.394519025255766e-07, "loss": 0.302, "step": 40402 }, { "epoch": 4.107665717771452, "grad_norm": 0.28854480385780334, "learning_rate": 9.392448350459682e-07, "loss": 0.2903, "step": 40403 }, { "epoch": 4.107767385115901, "grad_norm": 0.2646273076534271, "learning_rate": 9.390377880234796e-07, "loss": 0.3176, "step": 40404 }, { "epoch": 4.10786905246035, "grad_norm": 0.25556039810180664, "learning_rate": 9.388307614591535e-07, "loss": 0.3405, "step": 40405 }, { "epoch": 4.107970719804799, "grad_norm": 0.27254822850227356, "learning_rate": 9.386237553540317e-07, "loss": 0.3103, "step": 40406 }, { "epoch": 4.108072387149248, "grad_norm": 0.27317139506340027, "learning_rate": 9.384167697091595e-07, "loss": 0.2697, "step": 40407 }, { "epoch": 4.108174054493697, "grad_norm": 0.273725688457489, "learning_rate": 9.382098045255783e-07, "loss": 0.3109, "step": 40408 }, { "epoch": 4.108275721838146, "grad_norm": 0.295844703912735, "learning_rate": 9.380028598043295e-07, "loss": 0.289, "step": 40409 }, { "epoch": 4.1083773891825945, "grad_norm": 0.28496792912483215, "learning_rate": 9.377959355464583e-07, "loss": 0.2725, "step": 40410 }, { "epoch": 4.1084790565270435, "grad_norm": 0.2852252721786499, "learning_rate": 9.375890317530062e-07, "loss": 0.3069, "step": 40411 }, { "epoch": 4.108580723871492, "grad_norm": 0.29010209441185, "learning_rate": 9.37382148425014e-07, "loss": 0.2877, "step": 40412 }, { "epoch": 4.108682391215941, "grad_norm": 0.27305981516838074, "learning_rate": 9.371752855635258e-07, "loss": 0.3381, "step": 40413 }, { "epoch": 4.10878405856039, "grad_norm": 0.3114009499549866, "learning_rate": 9.369684431695836e-07, "loss": 0.2834, "step": 40414 }, { "epoch": 4.108885725904839, "grad_norm": 0.27634936571121216, "learning_rate": 9.367616212442282e-07, "loss": 0.3105, "step": 40415 }, { "epoch": 4.108987393249288, "grad_norm": 0.26291507482528687, "learning_rate": 9.365548197885016e-07, "loss": 0.2984, "step": 40416 }, { "epoch": 4.109089060593737, "grad_norm": 0.2853420674800873, "learning_rate": 9.363480388034469e-07, "loss": 0.3117, "step": 40417 }, { "epoch": 4.109190727938186, "grad_norm": 0.2899334132671356, "learning_rate": 9.361412782901052e-07, "loss": 0.3348, "step": 40418 }, { "epoch": 4.109292395282635, "grad_norm": 0.2818373143672943, "learning_rate": 9.359345382495172e-07, "loss": 0.2795, "step": 40419 }, { "epoch": 4.109394062627084, "grad_norm": 0.26931774616241455, "learning_rate": 9.357278186827262e-07, "loss": 0.3117, "step": 40420 }, { "epoch": 4.109495729971533, "grad_norm": 0.27420639991760254, "learning_rate": 9.355211195907726e-07, "loss": 0.2933, "step": 40421 }, { "epoch": 4.109597397315982, "grad_norm": 0.28271183371543884, "learning_rate": 9.353144409746967e-07, "loss": 0.2921, "step": 40422 }, { "epoch": 4.109699064660431, "grad_norm": 0.29505655169487, "learning_rate": 9.351077828355415e-07, "loss": 0.2654, "step": 40423 }, { "epoch": 4.10980073200488, "grad_norm": 0.266311377286911, "learning_rate": 9.349011451743478e-07, "loss": 0.3197, "step": 40424 }, { "epoch": 4.109902399349329, "grad_norm": 0.28464511036872864, "learning_rate": 9.34694527992156e-07, "loss": 0.2795, "step": 40425 }, { "epoch": 4.110004066693778, "grad_norm": 0.28942373394966125, "learning_rate": 9.344879312900057e-07, "loss": 0.3244, "step": 40426 }, { "epoch": 4.1101057340382265, "grad_norm": 0.28287410736083984, "learning_rate": 9.342813550689406e-07, "loss": 0.3162, "step": 40427 }, { "epoch": 4.1102074013826755, "grad_norm": 0.26686471700668335, "learning_rate": 9.340747993299997e-07, "loss": 0.2806, "step": 40428 }, { "epoch": 4.110309068727124, "grad_norm": 0.2812918424606323, "learning_rate": 9.338682640742225e-07, "loss": 0.2706, "step": 40429 }, { "epoch": 4.110410736071573, "grad_norm": 0.30505451560020447, "learning_rate": 9.336617493026518e-07, "loss": 0.2866, "step": 40430 }, { "epoch": 4.110512403416023, "grad_norm": 0.307338148355484, "learning_rate": 9.334552550163272e-07, "loss": 0.294, "step": 40431 }, { "epoch": 4.110614070760472, "grad_norm": 0.2827070355415344, "learning_rate": 9.332487812162872e-07, "loss": 0.298, "step": 40432 }, { "epoch": 4.110715738104921, "grad_norm": 0.28424280881881714, "learning_rate": 9.33042327903575e-07, "loss": 0.2914, "step": 40433 }, { "epoch": 4.11081740544937, "grad_norm": 0.2645907998085022, "learning_rate": 9.328358950792288e-07, "loss": 0.3136, "step": 40434 }, { "epoch": 4.110919072793819, "grad_norm": 0.2819405794143677, "learning_rate": 9.326294827442889e-07, "loss": 0.2773, "step": 40435 }, { "epoch": 4.111020740138268, "grad_norm": 0.27069923281669617, "learning_rate": 9.324230908997939e-07, "loss": 0.3196, "step": 40436 }, { "epoch": 4.111122407482717, "grad_norm": 0.2921120524406433, "learning_rate": 9.322167195467857e-07, "loss": 0.2672, "step": 40437 }, { "epoch": 4.111224074827166, "grad_norm": 0.2624169886112213, "learning_rate": 9.320103686863035e-07, "loss": 0.3224, "step": 40438 }, { "epoch": 4.111325742171615, "grad_norm": 0.2738550901412964, "learning_rate": 9.318040383193849e-07, "loss": 0.3069, "step": 40439 }, { "epoch": 4.111427409516064, "grad_norm": 0.2802641987800598, "learning_rate": 9.315977284470723e-07, "loss": 0.2809, "step": 40440 }, { "epoch": 4.111529076860513, "grad_norm": 0.26874807476997375, "learning_rate": 9.313914390704032e-07, "loss": 0.2695, "step": 40441 }, { "epoch": 4.1116307442049616, "grad_norm": 0.26567354798316956, "learning_rate": 9.311851701904156e-07, "loss": 0.3224, "step": 40442 }, { "epoch": 4.1117324115494105, "grad_norm": 0.29018884897232056, "learning_rate": 9.309789218081522e-07, "loss": 0.3038, "step": 40443 }, { "epoch": 4.111834078893859, "grad_norm": 0.2857760787010193, "learning_rate": 9.307726939246497e-07, "loss": 0.3256, "step": 40444 }, { "epoch": 4.111935746238308, "grad_norm": 0.2752620279788971, "learning_rate": 9.305664865409475e-07, "loss": 0.3049, "step": 40445 }, { "epoch": 4.112037413582757, "grad_norm": 0.2851582467556, "learning_rate": 9.30360299658083e-07, "loss": 0.3177, "step": 40446 }, { "epoch": 4.112139080927206, "grad_norm": 0.3114895820617676, "learning_rate": 9.301541332770975e-07, "loss": 0.2865, "step": 40447 }, { "epoch": 4.112240748271655, "grad_norm": 0.294365257024765, "learning_rate": 9.29947987399028e-07, "loss": 0.3328, "step": 40448 }, { "epoch": 4.112342415616104, "grad_norm": 0.30138471722602844, "learning_rate": 9.297418620249127e-07, "loss": 0.2676, "step": 40449 }, { "epoch": 4.112444082960553, "grad_norm": 0.2711222767829895, "learning_rate": 9.295357571557911e-07, "loss": 0.3036, "step": 40450 }, { "epoch": 4.112545750305002, "grad_norm": 0.2707258462905884, "learning_rate": 9.29329672792702e-07, "loss": 0.2937, "step": 40451 }, { "epoch": 4.112647417649451, "grad_norm": 0.2662478983402252, "learning_rate": 9.291236089366806e-07, "loss": 0.2986, "step": 40452 }, { "epoch": 4.1127490849939, "grad_norm": 0.28116273880004883, "learning_rate": 9.289175655887689e-07, "loss": 0.2849, "step": 40453 }, { "epoch": 4.112850752338349, "grad_norm": 0.28597378730773926, "learning_rate": 9.287115427500026e-07, "loss": 0.2855, "step": 40454 }, { "epoch": 4.112952419682798, "grad_norm": 0.27992066740989685, "learning_rate": 9.285055404214205e-07, "loss": 0.3166, "step": 40455 }, { "epoch": 4.113054087027247, "grad_norm": 0.26901188492774963, "learning_rate": 9.282995586040583e-07, "loss": 0.2926, "step": 40456 }, { "epoch": 4.113155754371696, "grad_norm": 0.29436397552490234, "learning_rate": 9.280935972989569e-07, "loss": 0.2891, "step": 40457 }, { "epoch": 4.113257421716145, "grad_norm": 0.28057998418807983, "learning_rate": 9.27887656507152e-07, "loss": 0.2938, "step": 40458 }, { "epoch": 4.1133590890605936, "grad_norm": 0.27287712693214417, "learning_rate": 9.276817362296809e-07, "loss": 0.2739, "step": 40459 }, { "epoch": 4.1134607564050425, "grad_norm": 0.2787744700908661, "learning_rate": 9.274758364675818e-07, "loss": 0.2815, "step": 40460 }, { "epoch": 4.113562423749491, "grad_norm": 0.3048960864543915, "learning_rate": 9.272699572218924e-07, "loss": 0.278, "step": 40461 }, { "epoch": 4.11366409109394, "grad_norm": 0.2577981948852539, "learning_rate": 9.270640984936486e-07, "loss": 0.2989, "step": 40462 }, { "epoch": 4.113765758438389, "grad_norm": 0.292427659034729, "learning_rate": 9.268582602838866e-07, "loss": 0.267, "step": 40463 }, { "epoch": 4.113867425782838, "grad_norm": 0.29585352540016174, "learning_rate": 9.266524425936469e-07, "loss": 0.3024, "step": 40464 }, { "epoch": 4.113969093127287, "grad_norm": 0.2957221269607544, "learning_rate": 9.264466454239634e-07, "loss": 0.314, "step": 40465 }, { "epoch": 4.114070760471736, "grad_norm": 0.27765509486198425, "learning_rate": 9.262408687758728e-07, "loss": 0.2934, "step": 40466 }, { "epoch": 4.114172427816185, "grad_norm": 0.27212461829185486, "learning_rate": 9.260351126504135e-07, "loss": 0.2741, "step": 40467 }, { "epoch": 4.114274095160634, "grad_norm": 0.28028616309165955, "learning_rate": 9.258293770486215e-07, "loss": 0.2704, "step": 40468 }, { "epoch": 4.114375762505083, "grad_norm": 0.30003049969673157, "learning_rate": 9.256236619715314e-07, "loss": 0.3383, "step": 40469 }, { "epoch": 4.114477429849532, "grad_norm": 0.2928001880645752, "learning_rate": 9.254179674201825e-07, "loss": 0.3175, "step": 40470 }, { "epoch": 4.114579097193982, "grad_norm": 0.2918621003627777, "learning_rate": 9.252122933956098e-07, "loss": 0.2851, "step": 40471 }, { "epoch": 4.114680764538431, "grad_norm": 0.29622119665145874, "learning_rate": 9.250066398988489e-07, "loss": 0.313, "step": 40472 }, { "epoch": 4.11478243188288, "grad_norm": 0.2625042498111725, "learning_rate": 9.248010069309354e-07, "loss": 0.2758, "step": 40473 }, { "epoch": 4.114884099227329, "grad_norm": 0.30652230978012085, "learning_rate": 9.24595394492907e-07, "loss": 0.2998, "step": 40474 }, { "epoch": 4.1149857665717775, "grad_norm": 0.27041539549827576, "learning_rate": 9.243898025857984e-07, "loss": 0.3077, "step": 40475 }, { "epoch": 4.1150874339162264, "grad_norm": 0.2755463421344757, "learning_rate": 9.241842312106441e-07, "loss": 0.3081, "step": 40476 }, { "epoch": 4.115189101260675, "grad_norm": 0.28385573625564575, "learning_rate": 9.239786803684841e-07, "loss": 0.2882, "step": 40477 }, { "epoch": 4.115290768605124, "grad_norm": 0.29425057768821716, "learning_rate": 9.237731500603481e-07, "loss": 0.3213, "step": 40478 }, { "epoch": 4.115392435949573, "grad_norm": 0.30100753903388977, "learning_rate": 9.235676402872745e-07, "loss": 0.3036, "step": 40479 }, { "epoch": 4.115494103294022, "grad_norm": 0.26950663328170776, "learning_rate": 9.233621510502994e-07, "loss": 0.299, "step": 40480 }, { "epoch": 4.115595770638471, "grad_norm": 0.27752071619033813, "learning_rate": 9.231566823504567e-07, "loss": 0.2957, "step": 40481 }, { "epoch": 4.11569743798292, "grad_norm": 0.29031500220298767, "learning_rate": 9.229512341887825e-07, "loss": 0.2576, "step": 40482 }, { "epoch": 4.115799105327369, "grad_norm": 0.2827373147010803, "learning_rate": 9.227458065663097e-07, "loss": 0.2856, "step": 40483 }, { "epoch": 4.115900772671818, "grad_norm": 0.2723405659198761, "learning_rate": 9.225403994840759e-07, "loss": 0.2958, "step": 40484 }, { "epoch": 4.116002440016267, "grad_norm": 0.2929549515247345, "learning_rate": 9.223350129431141e-07, "loss": 0.2942, "step": 40485 }, { "epoch": 4.116104107360716, "grad_norm": 0.2523339092731476, "learning_rate": 9.221296469444591e-07, "loss": 0.2977, "step": 40486 }, { "epoch": 4.116205774705165, "grad_norm": 0.2890176773071289, "learning_rate": 9.219243014891477e-07, "loss": 0.3211, "step": 40487 }, { "epoch": 4.116307442049614, "grad_norm": 0.2854458689689636, "learning_rate": 9.217189765782103e-07, "loss": 0.2916, "step": 40488 }, { "epoch": 4.116409109394063, "grad_norm": 0.30362623929977417, "learning_rate": 9.215136722126833e-07, "loss": 0.2883, "step": 40489 }, { "epoch": 4.116510776738512, "grad_norm": 0.26539862155914307, "learning_rate": 9.213083883936042e-07, "loss": 0.3048, "step": 40490 }, { "epoch": 4.116612444082961, "grad_norm": 0.26473110914230347, "learning_rate": 9.211031251220015e-07, "loss": 0.2872, "step": 40491 }, { "epoch": 4.1167141114274095, "grad_norm": 0.29138466715812683, "learning_rate": 9.208978823989134e-07, "loss": 0.267, "step": 40492 }, { "epoch": 4.1168157787718584, "grad_norm": 0.285432904958725, "learning_rate": 9.206926602253708e-07, "loss": 0.3572, "step": 40493 }, { "epoch": 4.116917446116307, "grad_norm": 0.244349867105484, "learning_rate": 9.204874586024104e-07, "loss": 0.293, "step": 40494 }, { "epoch": 4.117019113460756, "grad_norm": 0.2625875174999237, "learning_rate": 9.20282277531065e-07, "loss": 0.3016, "step": 40495 }, { "epoch": 4.117120780805205, "grad_norm": 0.2684653401374817, "learning_rate": 9.200771170123668e-07, "loss": 0.2949, "step": 40496 }, { "epoch": 4.117222448149654, "grad_norm": 0.2949866056442261, "learning_rate": 9.19871977047353e-07, "loss": 0.3116, "step": 40497 }, { "epoch": 4.117324115494103, "grad_norm": 0.2770385444164276, "learning_rate": 9.196668576370521e-07, "loss": 0.3206, "step": 40498 }, { "epoch": 4.117425782838552, "grad_norm": 0.27908551692962646, "learning_rate": 9.194617587824994e-07, "loss": 0.316, "step": 40499 }, { "epoch": 4.117527450183001, "grad_norm": 0.28434544801712036, "learning_rate": 9.19256680484732e-07, "loss": 0.3082, "step": 40500 }, { "epoch": 4.11762911752745, "grad_norm": 0.28201261162757874, "learning_rate": 9.190516227447766e-07, "loss": 0.2875, "step": 40501 }, { "epoch": 4.117730784871899, "grad_norm": 0.3070107102394104, "learning_rate": 9.188465855636708e-07, "loss": 0.2935, "step": 40502 }, { "epoch": 4.117832452216348, "grad_norm": 0.270910382270813, "learning_rate": 9.18641568942445e-07, "loss": 0.3042, "step": 40503 }, { "epoch": 4.117934119560797, "grad_norm": 0.28893113136291504, "learning_rate": 9.184365728821343e-07, "loss": 0.2774, "step": 40504 }, { "epoch": 4.118035786905246, "grad_norm": 0.27092429995536804, "learning_rate": 9.182315973837702e-07, "loss": 0.3155, "step": 40505 }, { "epoch": 4.118137454249695, "grad_norm": 0.2829529345035553, "learning_rate": 9.180266424483836e-07, "loss": 0.2772, "step": 40506 }, { "epoch": 4.118239121594144, "grad_norm": 0.2698547840118408, "learning_rate": 9.178217080770119e-07, "loss": 0.27, "step": 40507 }, { "epoch": 4.118340788938593, "grad_norm": 0.27092018723487854, "learning_rate": 9.176167942706815e-07, "loss": 0.2747, "step": 40508 }, { "epoch": 4.1184424562830415, "grad_norm": 0.2725943624973297, "learning_rate": 9.174119010304277e-07, "loss": 0.2815, "step": 40509 }, { "epoch": 4.1185441236274904, "grad_norm": 0.26592734456062317, "learning_rate": 9.172070283572853e-07, "loss": 0.3192, "step": 40510 }, { "epoch": 4.118645790971939, "grad_norm": 0.26108914613723755, "learning_rate": 9.170021762522807e-07, "loss": 0.3231, "step": 40511 }, { "epoch": 4.118747458316388, "grad_norm": 0.27736759185791016, "learning_rate": 9.167973447164502e-07, "loss": 0.306, "step": 40512 }, { "epoch": 4.118849125660838, "grad_norm": 0.271158367395401, "learning_rate": 9.165925337508247e-07, "loss": 0.3079, "step": 40513 }, { "epoch": 4.118950793005287, "grad_norm": 0.28234222531318665, "learning_rate": 9.163877433564345e-07, "loss": 0.2876, "step": 40514 }, { "epoch": 4.119052460349736, "grad_norm": 0.273058146238327, "learning_rate": 9.161829735343131e-07, "loss": 0.2966, "step": 40515 }, { "epoch": 4.119154127694185, "grad_norm": 0.2716229557991028, "learning_rate": 9.159782242854903e-07, "loss": 0.2875, "step": 40516 }, { "epoch": 4.119255795038634, "grad_norm": 0.28863754868507385, "learning_rate": 9.157734956110015e-07, "loss": 0.3088, "step": 40517 }, { "epoch": 4.119357462383083, "grad_norm": 0.30055898427963257, "learning_rate": 9.155687875118724e-07, "loss": 0.307, "step": 40518 }, { "epoch": 4.119459129727532, "grad_norm": 0.2806985676288605, "learning_rate": 9.153640999891372e-07, "loss": 0.3039, "step": 40519 }, { "epoch": 4.119560797071981, "grad_norm": 0.2876383364200592, "learning_rate": 9.151594330438301e-07, "loss": 0.2949, "step": 40520 }, { "epoch": 4.11966246441643, "grad_norm": 0.26754865050315857, "learning_rate": 9.149547866769759e-07, "loss": 0.2729, "step": 40521 }, { "epoch": 4.119764131760879, "grad_norm": 0.2924136519432068, "learning_rate": 9.147501608896103e-07, "loss": 0.2655, "step": 40522 }, { "epoch": 4.119865799105328, "grad_norm": 0.2966078519821167, "learning_rate": 9.145455556827626e-07, "loss": 0.3249, "step": 40523 }, { "epoch": 4.1199674664497765, "grad_norm": 0.2780643105506897, "learning_rate": 9.143409710574619e-07, "loss": 0.3139, "step": 40524 }, { "epoch": 4.1200691337942255, "grad_norm": 0.279026597738266, "learning_rate": 9.141364070147419e-07, "loss": 0.2789, "step": 40525 }, { "epoch": 4.120170801138674, "grad_norm": 0.2935112416744232, "learning_rate": 9.13931863555631e-07, "loss": 0.305, "step": 40526 }, { "epoch": 4.120272468483123, "grad_norm": 0.27833235263824463, "learning_rate": 9.137273406811625e-07, "loss": 0.2433, "step": 40527 }, { "epoch": 4.120374135827572, "grad_norm": 0.2656975984573364, "learning_rate": 9.135228383923617e-07, "loss": 0.331, "step": 40528 }, { "epoch": 4.120475803172021, "grad_norm": 0.3034077286720276, "learning_rate": 9.133183566902621e-07, "loss": 0.3034, "step": 40529 }, { "epoch": 4.12057747051647, "grad_norm": 0.27200672030448914, "learning_rate": 9.131138955758961e-07, "loss": 0.2752, "step": 40530 }, { "epoch": 4.120679137860919, "grad_norm": 0.2694196403026581, "learning_rate": 9.129094550502887e-07, "loss": 0.3335, "step": 40531 }, { "epoch": 4.120780805205368, "grad_norm": 0.3099040687084198, "learning_rate": 9.127050351144728e-07, "loss": 0.2758, "step": 40532 }, { "epoch": 4.120882472549817, "grad_norm": 0.2518065869808197, "learning_rate": 9.125006357694782e-07, "loss": 0.2912, "step": 40533 }, { "epoch": 4.120984139894266, "grad_norm": 0.3027762174606323, "learning_rate": 9.122962570163329e-07, "loss": 0.3309, "step": 40534 }, { "epoch": 4.121085807238715, "grad_norm": 0.2765398323535919, "learning_rate": 9.12091898856069e-07, "loss": 0.294, "step": 40535 }, { "epoch": 4.121187474583164, "grad_norm": 0.27535951137542725, "learning_rate": 9.118875612897144e-07, "loss": 0.2964, "step": 40536 }, { "epoch": 4.121289141927613, "grad_norm": 0.29540354013442993, "learning_rate": 9.116832443182982e-07, "loss": 0.2997, "step": 40537 }, { "epoch": 4.121390809272062, "grad_norm": 0.28681159019470215, "learning_rate": 9.114789479428499e-07, "loss": 0.3083, "step": 40538 }, { "epoch": 4.121492476616511, "grad_norm": 0.27906960248947144, "learning_rate": 9.112746721643989e-07, "loss": 0.3105, "step": 40539 }, { "epoch": 4.12159414396096, "grad_norm": 0.277084082365036, "learning_rate": 9.110704169839768e-07, "loss": 0.3167, "step": 40540 }, { "epoch": 4.1216958113054085, "grad_norm": 0.26925432682037354, "learning_rate": 9.108661824026078e-07, "loss": 0.3078, "step": 40541 }, { "epoch": 4.1217974786498575, "grad_norm": 0.27476611733436584, "learning_rate": 9.106619684213247e-07, "loss": 0.2823, "step": 40542 }, { "epoch": 4.121899145994306, "grad_norm": 0.28150445222854614, "learning_rate": 9.104577750411547e-07, "loss": 0.3076, "step": 40543 }, { "epoch": 4.122000813338755, "grad_norm": 0.279228538274765, "learning_rate": 9.102536022631253e-07, "loss": 0.3, "step": 40544 }, { "epoch": 4.122102480683204, "grad_norm": 0.27177390456199646, "learning_rate": 9.100494500882673e-07, "loss": 0.3322, "step": 40545 }, { "epoch": 4.122204148027653, "grad_norm": 0.284012109041214, "learning_rate": 9.098453185176087e-07, "loss": 0.3002, "step": 40546 }, { "epoch": 4.122305815372102, "grad_norm": 0.2870514392852783, "learning_rate": 9.096412075521771e-07, "loss": 0.2962, "step": 40547 }, { "epoch": 4.122407482716551, "grad_norm": 0.27462461590766907, "learning_rate": 9.094371171930006e-07, "loss": 0.2947, "step": 40548 }, { "epoch": 4.122509150061, "grad_norm": 0.29082101583480835, "learning_rate": 9.092330474411082e-07, "loss": 0.287, "step": 40549 }, { "epoch": 4.122610817405449, "grad_norm": 0.27713167667388916, "learning_rate": 9.090289982975281e-07, "loss": 0.303, "step": 40550 }, { "epoch": 4.122712484749898, "grad_norm": 0.28449368476867676, "learning_rate": 9.088249697632862e-07, "loss": 0.3025, "step": 40551 }, { "epoch": 4.122814152094347, "grad_norm": 0.2781250774860382, "learning_rate": 9.086209618394131e-07, "loss": 0.292, "step": 40552 }, { "epoch": 4.122915819438797, "grad_norm": 0.27614638209342957, "learning_rate": 9.084169745269356e-07, "loss": 0.3072, "step": 40553 }, { "epoch": 4.123017486783246, "grad_norm": 0.2829015552997589, "learning_rate": 9.082130078268797e-07, "loss": 0.3174, "step": 40554 }, { "epoch": 4.123119154127695, "grad_norm": 0.2698148488998413, "learning_rate": 9.080090617402753e-07, "loss": 0.2962, "step": 40555 }, { "epoch": 4.1232208214721435, "grad_norm": 0.2850903272628784, "learning_rate": 9.078051362681495e-07, "loss": 0.3175, "step": 40556 }, { "epoch": 4.1233224888165925, "grad_norm": 0.2757236361503601, "learning_rate": 9.076012314115284e-07, "loss": 0.2798, "step": 40557 }, { "epoch": 4.123424156161041, "grad_norm": 0.2737560570240021, "learning_rate": 9.073973471714387e-07, "loss": 0.2934, "step": 40558 }, { "epoch": 4.12352582350549, "grad_norm": 0.2957924008369446, "learning_rate": 9.071934835489099e-07, "loss": 0.2958, "step": 40559 }, { "epoch": 4.123627490849939, "grad_norm": 0.2702799141407013, "learning_rate": 9.069896405449674e-07, "loss": 0.3214, "step": 40560 }, { "epoch": 4.123729158194388, "grad_norm": 0.28613248467445374, "learning_rate": 9.067858181606376e-07, "loss": 0.2756, "step": 40561 }, { "epoch": 4.123830825538837, "grad_norm": 0.3028981387615204, "learning_rate": 9.065820163969496e-07, "loss": 0.2695, "step": 40562 }, { "epoch": 4.123932492883286, "grad_norm": 0.26851674914360046, "learning_rate": 9.063782352549283e-07, "loss": 0.2933, "step": 40563 }, { "epoch": 4.124034160227735, "grad_norm": 0.2849597632884979, "learning_rate": 9.061744747355994e-07, "loss": 0.3185, "step": 40564 }, { "epoch": 4.124135827572184, "grad_norm": 0.2758421301841736, "learning_rate": 9.059707348399921e-07, "loss": 0.295, "step": 40565 }, { "epoch": 4.124237494916633, "grad_norm": 0.27988043427467346, "learning_rate": 9.057670155691311e-07, "loss": 0.2906, "step": 40566 }, { "epoch": 4.124339162261082, "grad_norm": 0.29898494482040405, "learning_rate": 9.055633169240429e-07, "loss": 0.3075, "step": 40567 }, { "epoch": 4.124440829605531, "grad_norm": 0.2689249813556671, "learning_rate": 9.053596389057528e-07, "loss": 0.2807, "step": 40568 }, { "epoch": 4.12454249694998, "grad_norm": 0.30379825830459595, "learning_rate": 9.051559815152888e-07, "loss": 0.2831, "step": 40569 }, { "epoch": 4.124644164294429, "grad_norm": 0.3082839548587799, "learning_rate": 9.049523447536757e-07, "loss": 0.3285, "step": 40570 }, { "epoch": 4.124745831638878, "grad_norm": 0.26049110293388367, "learning_rate": 9.047487286219386e-07, "loss": 0.3033, "step": 40571 }, { "epoch": 4.124847498983327, "grad_norm": 0.2636610269546509, "learning_rate": 9.045451331211053e-07, "loss": 0.3112, "step": 40572 }, { "epoch": 4.1249491663277755, "grad_norm": 0.2531742453575134, "learning_rate": 9.043415582522008e-07, "loss": 0.319, "step": 40573 }, { "epoch": 4.1250508336722245, "grad_norm": 0.279721736907959, "learning_rate": 9.041380040162484e-07, "loss": 0.3139, "step": 40574 }, { "epoch": 4.125152501016673, "grad_norm": 0.2757752239704132, "learning_rate": 9.039344704142772e-07, "loss": 0.2721, "step": 40575 }, { "epoch": 4.125254168361122, "grad_norm": 0.29223138093948364, "learning_rate": 9.037309574473108e-07, "loss": 0.2874, "step": 40576 }, { "epoch": 4.125355835705571, "grad_norm": 0.2578738331794739, "learning_rate": 9.035274651163739e-07, "loss": 0.3029, "step": 40577 }, { "epoch": 4.12545750305002, "grad_norm": 0.27351051568984985, "learning_rate": 9.033239934224913e-07, "loss": 0.2805, "step": 40578 }, { "epoch": 4.125559170394469, "grad_norm": 0.30145949125289917, "learning_rate": 9.0312054236669e-07, "loss": 0.3033, "step": 40579 }, { "epoch": 4.125660837738918, "grad_norm": 0.3024626076221466, "learning_rate": 9.029171119499935e-07, "loss": 0.3138, "step": 40580 }, { "epoch": 4.125762505083367, "grad_norm": 0.2832395136356354, "learning_rate": 9.027137021734262e-07, "loss": 0.2874, "step": 40581 }, { "epoch": 4.125864172427816, "grad_norm": 0.2731568515300751, "learning_rate": 9.025103130380147e-07, "loss": 0.2596, "step": 40582 }, { "epoch": 4.125965839772265, "grad_norm": 0.2910117208957672, "learning_rate": 9.023069445447829e-07, "loss": 0.299, "step": 40583 }, { "epoch": 4.126067507116714, "grad_norm": 0.2607235014438629, "learning_rate": 9.021035966947533e-07, "loss": 0.2945, "step": 40584 }, { "epoch": 4.126169174461163, "grad_norm": 0.27517247200012207, "learning_rate": 9.019002694889534e-07, "loss": 0.3097, "step": 40585 }, { "epoch": 4.126270841805612, "grad_norm": 0.2787594199180603, "learning_rate": 9.016969629284056e-07, "loss": 0.3018, "step": 40586 }, { "epoch": 4.126372509150061, "grad_norm": 0.26472389698028564, "learning_rate": 9.014936770141353e-07, "loss": 0.334, "step": 40587 }, { "epoch": 4.12647417649451, "grad_norm": 0.26903682947158813, "learning_rate": 9.012904117471644e-07, "loss": 0.3089, "step": 40588 }, { "epoch": 4.126575843838959, "grad_norm": 0.28445640206336975, "learning_rate": 9.010871671285193e-07, "loss": 0.3072, "step": 40589 }, { "epoch": 4.1266775111834075, "grad_norm": 0.2893855571746826, "learning_rate": 9.008839431592231e-07, "loss": 0.2913, "step": 40590 }, { "epoch": 4.1267791785278565, "grad_norm": 0.29675695300102234, "learning_rate": 9.006807398402983e-07, "loss": 0.2906, "step": 40591 }, { "epoch": 4.126880845872305, "grad_norm": 0.24369047582149506, "learning_rate": 9.004775571727714e-07, "loss": 0.3022, "step": 40592 }, { "epoch": 4.126982513216754, "grad_norm": 0.2809104919433594, "learning_rate": 9.002743951576642e-07, "loss": 0.2488, "step": 40593 }, { "epoch": 4.127084180561203, "grad_norm": 0.2918129861354828, "learning_rate": 9.000712537959988e-07, "loss": 0.2836, "step": 40594 }, { "epoch": 4.127185847905653, "grad_norm": 0.2836443781852722, "learning_rate": 8.998681330888015e-07, "loss": 0.3099, "step": 40595 }, { "epoch": 4.127287515250102, "grad_norm": 0.2728545665740967, "learning_rate": 8.996650330370943e-07, "loss": 0.3082, "step": 40596 }, { "epoch": 4.127389182594551, "grad_norm": 0.27380821108818054, "learning_rate": 8.994619536419003e-07, "loss": 0.3039, "step": 40597 }, { "epoch": 4.127490849939, "grad_norm": 0.30152422189712524, "learning_rate": 8.992588949042413e-07, "loss": 0.3115, "step": 40598 }, { "epoch": 4.127592517283449, "grad_norm": 0.284900963306427, "learning_rate": 8.990558568251428e-07, "loss": 0.2903, "step": 40599 }, { "epoch": 4.127694184627898, "grad_norm": 0.2754330039024353, "learning_rate": 8.988528394056262e-07, "loss": 0.2897, "step": 40600 }, { "epoch": 4.127795851972347, "grad_norm": 0.27994897961616516, "learning_rate": 8.986498426467133e-07, "loss": 0.3248, "step": 40601 }, { "epoch": 4.127897519316796, "grad_norm": 0.26352640986442566, "learning_rate": 8.984468665494289e-07, "loss": 0.284, "step": 40602 }, { "epoch": 4.127999186661245, "grad_norm": 0.2713938057422638, "learning_rate": 8.982439111147951e-07, "loss": 0.302, "step": 40603 }, { "epoch": 4.128100854005694, "grad_norm": 0.2881978452205658, "learning_rate": 8.98040976343832e-07, "loss": 0.3321, "step": 40604 }, { "epoch": 4.1282025213501425, "grad_norm": 0.2758069634437561, "learning_rate": 8.978380622375649e-07, "loss": 0.3046, "step": 40605 }, { "epoch": 4.1283041886945915, "grad_norm": 0.2629290819168091, "learning_rate": 8.976351687970148e-07, "loss": 0.2756, "step": 40606 }, { "epoch": 4.12840585603904, "grad_norm": 0.2748985290527344, "learning_rate": 8.974322960232041e-07, "loss": 0.2766, "step": 40607 }, { "epoch": 4.128507523383489, "grad_norm": 0.27184784412384033, "learning_rate": 8.97229443917153e-07, "loss": 0.278, "step": 40608 }, { "epoch": 4.128609190727938, "grad_norm": 0.27657637000083923, "learning_rate": 8.970266124798865e-07, "loss": 0.2858, "step": 40609 }, { "epoch": 4.128710858072387, "grad_norm": 0.292359858751297, "learning_rate": 8.968238017124248e-07, "loss": 0.3103, "step": 40610 }, { "epoch": 4.128812525416836, "grad_norm": 0.28879183530807495, "learning_rate": 8.966210116157886e-07, "loss": 0.2888, "step": 40611 }, { "epoch": 4.128914192761285, "grad_norm": 0.27390003204345703, "learning_rate": 8.964182421910017e-07, "loss": 0.307, "step": 40612 }, { "epoch": 4.129015860105734, "grad_norm": 0.2744635343551636, "learning_rate": 8.962154934390849e-07, "loss": 0.2734, "step": 40613 }, { "epoch": 4.129117527450183, "grad_norm": 0.27039816975593567, "learning_rate": 8.960127653610573e-07, "loss": 0.2619, "step": 40614 }, { "epoch": 4.129219194794632, "grad_norm": 0.2989063560962677, "learning_rate": 8.958100579579443e-07, "loss": 0.3229, "step": 40615 }, { "epoch": 4.129320862139081, "grad_norm": 0.27885106205940247, "learning_rate": 8.956073712307645e-07, "loss": 0.2647, "step": 40616 }, { "epoch": 4.12942252948353, "grad_norm": 0.25466054677963257, "learning_rate": 8.954047051805392e-07, "loss": 0.3219, "step": 40617 }, { "epoch": 4.129524196827979, "grad_norm": 0.283389151096344, "learning_rate": 8.952020598082884e-07, "loss": 0.2486, "step": 40618 }, { "epoch": 4.129625864172428, "grad_norm": 0.2788793742656708, "learning_rate": 8.949994351150354e-07, "loss": 0.2973, "step": 40619 }, { "epoch": 4.129727531516877, "grad_norm": 0.2915291488170624, "learning_rate": 8.947968311017996e-07, "loss": 0.2829, "step": 40620 }, { "epoch": 4.129829198861326, "grad_norm": 0.27427104115486145, "learning_rate": 8.945942477696013e-07, "loss": 0.2922, "step": 40621 }, { "epoch": 4.1299308662057745, "grad_norm": 0.2637065351009369, "learning_rate": 8.943916851194623e-07, "loss": 0.3204, "step": 40622 }, { "epoch": 4.1300325335502235, "grad_norm": 0.2840784788131714, "learning_rate": 8.941891431524025e-07, "loss": 0.2941, "step": 40623 }, { "epoch": 4.130134200894672, "grad_norm": 0.31117650866508484, "learning_rate": 8.939866218694404e-07, "loss": 0.2981, "step": 40624 }, { "epoch": 4.130235868239121, "grad_norm": 0.2891162037849426, "learning_rate": 8.937841212715998e-07, "loss": 0.2987, "step": 40625 }, { "epoch": 4.13033753558357, "grad_norm": 0.27112218737602234, "learning_rate": 8.935816413598986e-07, "loss": 0.2781, "step": 40626 }, { "epoch": 4.130439202928019, "grad_norm": 0.26354625821113586, "learning_rate": 8.933791821353576e-07, "loss": 0.3053, "step": 40627 }, { "epoch": 4.130540870272468, "grad_norm": 0.25987476110458374, "learning_rate": 8.931767435989946e-07, "loss": 0.3193, "step": 40628 }, { "epoch": 4.130642537616917, "grad_norm": 0.2951264977455139, "learning_rate": 8.929743257518331e-07, "loss": 0.2887, "step": 40629 }, { "epoch": 4.130744204961366, "grad_norm": 0.2888757586479187, "learning_rate": 8.927719285948905e-07, "loss": 0.2888, "step": 40630 }, { "epoch": 4.130845872305815, "grad_norm": 0.27158331871032715, "learning_rate": 8.925695521291861e-07, "loss": 0.3056, "step": 40631 }, { "epoch": 4.130947539650264, "grad_norm": 0.2656654119491577, "learning_rate": 8.923671963557412e-07, "loss": 0.2945, "step": 40632 }, { "epoch": 4.131049206994713, "grad_norm": 0.277038037776947, "learning_rate": 8.92164861275574e-07, "loss": 0.3324, "step": 40633 }, { "epoch": 4.131150874339162, "grad_norm": 0.3216026723384857, "learning_rate": 8.91962546889703e-07, "loss": 0.3064, "step": 40634 }, { "epoch": 4.131252541683612, "grad_norm": 0.27085134387016296, "learning_rate": 8.917602531991504e-07, "loss": 0.2856, "step": 40635 }, { "epoch": 4.131354209028061, "grad_norm": 0.277154803276062, "learning_rate": 8.915579802049323e-07, "loss": 0.2986, "step": 40636 }, { "epoch": 4.1314558763725096, "grad_norm": 0.2782547175884247, "learning_rate": 8.913557279080698e-07, "loss": 0.3257, "step": 40637 }, { "epoch": 4.1315575437169585, "grad_norm": 0.28514784574508667, "learning_rate": 8.911534963095786e-07, "loss": 0.3169, "step": 40638 }, { "epoch": 4.131659211061407, "grad_norm": 0.29291731119155884, "learning_rate": 8.909512854104813e-07, "loss": 0.2888, "step": 40639 }, { "epoch": 4.131760878405856, "grad_norm": 0.2627125680446625, "learning_rate": 8.907490952117953e-07, "loss": 0.3001, "step": 40640 }, { "epoch": 4.131862545750305, "grad_norm": 0.28340858221054077, "learning_rate": 8.905469257145371e-07, "loss": 0.2853, "step": 40641 }, { "epoch": 4.131964213094754, "grad_norm": 0.2667028605937958, "learning_rate": 8.903447769197298e-07, "loss": 0.3391, "step": 40642 }, { "epoch": 4.132065880439203, "grad_norm": 0.2632130980491638, "learning_rate": 8.90142648828386e-07, "loss": 0.3116, "step": 40643 }, { "epoch": 4.132167547783652, "grad_norm": 0.2787255644798279, "learning_rate": 8.899405414415274e-07, "loss": 0.3016, "step": 40644 }, { "epoch": 4.132269215128101, "grad_norm": 0.2973751127719879, "learning_rate": 8.897384547601722e-07, "loss": 0.2783, "step": 40645 }, { "epoch": 4.13237088247255, "grad_norm": 0.28886905312538147, "learning_rate": 8.895363887853387e-07, "loss": 0.3066, "step": 40646 }, { "epoch": 4.132472549816999, "grad_norm": 0.2731863558292389, "learning_rate": 8.893343435180435e-07, "loss": 0.2867, "step": 40647 }, { "epoch": 4.132574217161448, "grad_norm": 0.27246567606925964, "learning_rate": 8.891323189593038e-07, "loss": 0.2989, "step": 40648 }, { "epoch": 4.132675884505897, "grad_norm": 0.29053056240081787, "learning_rate": 8.889303151101414e-07, "loss": 0.2739, "step": 40649 }, { "epoch": 4.132777551850346, "grad_norm": 0.26004695892333984, "learning_rate": 8.887283319715684e-07, "loss": 0.2609, "step": 40650 }, { "epoch": 4.132879219194795, "grad_norm": 0.2849423587322235, "learning_rate": 8.885263695446055e-07, "loss": 0.2605, "step": 40651 }, { "epoch": 4.132980886539244, "grad_norm": 0.26151835918426514, "learning_rate": 8.883244278302717e-07, "loss": 0.2815, "step": 40652 }, { "epoch": 4.133082553883693, "grad_norm": 0.2949753701686859, "learning_rate": 8.881225068295801e-07, "loss": 0.2932, "step": 40653 }, { "epoch": 4.1331842212281416, "grad_norm": 0.286266028881073, "learning_rate": 8.879206065435514e-07, "loss": 0.3037, "step": 40654 }, { "epoch": 4.1332858885725905, "grad_norm": 0.27878454327583313, "learning_rate": 8.877187269732001e-07, "loss": 0.2629, "step": 40655 }, { "epoch": 4.133387555917039, "grad_norm": 0.30725252628326416, "learning_rate": 8.875168681195462e-07, "loss": 0.3216, "step": 40656 }, { "epoch": 4.133489223261488, "grad_norm": 0.2797819674015045, "learning_rate": 8.873150299836048e-07, "loss": 0.2946, "step": 40657 }, { "epoch": 4.133590890605937, "grad_norm": 0.2825773060321808, "learning_rate": 8.871132125663917e-07, "loss": 0.2862, "step": 40658 }, { "epoch": 4.133692557950386, "grad_norm": 0.3052663803100586, "learning_rate": 8.869114158689274e-07, "loss": 0.2899, "step": 40659 }, { "epoch": 4.133794225294835, "grad_norm": 0.3313000202178955, "learning_rate": 8.867096398922237e-07, "loss": 0.2884, "step": 40660 }, { "epoch": 4.133895892639284, "grad_norm": 0.28340262174606323, "learning_rate": 8.865078846372987e-07, "loss": 0.2709, "step": 40661 }, { "epoch": 4.133997559983733, "grad_norm": 0.27399536967277527, "learning_rate": 8.863061501051729e-07, "loss": 0.3051, "step": 40662 }, { "epoch": 4.134099227328182, "grad_norm": 0.2609730064868927, "learning_rate": 8.861044362968557e-07, "loss": 0.2815, "step": 40663 }, { "epoch": 4.134200894672631, "grad_norm": 0.25810760259628296, "learning_rate": 8.859027432133688e-07, "loss": 0.2876, "step": 40664 }, { "epoch": 4.13430256201708, "grad_norm": 0.2771822512149811, "learning_rate": 8.857010708557256e-07, "loss": 0.2711, "step": 40665 }, { "epoch": 4.134404229361529, "grad_norm": 0.29184797406196594, "learning_rate": 8.854994192249416e-07, "loss": 0.2903, "step": 40666 }, { "epoch": 4.134505896705978, "grad_norm": 0.27360567450523376, "learning_rate": 8.852977883220354e-07, "loss": 0.3275, "step": 40667 }, { "epoch": 4.134607564050427, "grad_norm": 0.28949955105781555, "learning_rate": 8.850961781480194e-07, "loss": 0.3021, "step": 40668 }, { "epoch": 4.134709231394876, "grad_norm": 0.2909931242465973, "learning_rate": 8.84894588703914e-07, "loss": 0.2792, "step": 40669 }, { "epoch": 4.134810898739325, "grad_norm": 0.3013698160648346, "learning_rate": 8.846930199907289e-07, "loss": 0.289, "step": 40670 }, { "epoch": 4.1349125660837736, "grad_norm": 0.27060797810554504, "learning_rate": 8.844914720094827e-07, "loss": 0.2963, "step": 40671 }, { "epoch": 4.1350142334282225, "grad_norm": 0.2780599892139435, "learning_rate": 8.842899447611925e-07, "loss": 0.2997, "step": 40672 }, { "epoch": 4.135115900772671, "grad_norm": 0.28317028284072876, "learning_rate": 8.840884382468695e-07, "loss": 0.2876, "step": 40673 }, { "epoch": 4.13521756811712, "grad_norm": 0.28153130412101746, "learning_rate": 8.83886952467532e-07, "loss": 0.2904, "step": 40674 }, { "epoch": 4.135319235461569, "grad_norm": 0.30517080426216125, "learning_rate": 8.836854874241934e-07, "loss": 0.332, "step": 40675 }, { "epoch": 4.135420902806018, "grad_norm": 0.2818901240825653, "learning_rate": 8.834840431178687e-07, "loss": 0.3061, "step": 40676 }, { "epoch": 4.135522570150468, "grad_norm": 0.27435585856437683, "learning_rate": 8.832826195495736e-07, "loss": 0.2782, "step": 40677 }, { "epoch": 4.135624237494917, "grad_norm": 0.2813827693462372, "learning_rate": 8.830812167203218e-07, "loss": 0.2716, "step": 40678 }, { "epoch": 4.135725904839366, "grad_norm": 0.2685890197753906, "learning_rate": 8.828798346311307e-07, "loss": 0.3166, "step": 40679 }, { "epoch": 4.135827572183815, "grad_norm": 0.2617034614086151, "learning_rate": 8.826784732830101e-07, "loss": 0.2992, "step": 40680 }, { "epoch": 4.135929239528264, "grad_norm": 0.3063860535621643, "learning_rate": 8.824771326769771e-07, "loss": 0.2609, "step": 40681 }, { "epoch": 4.136030906872713, "grad_norm": 0.29885008931159973, "learning_rate": 8.822758128140479e-07, "loss": 0.2675, "step": 40682 }, { "epoch": 4.136132574217162, "grad_norm": 0.2687792479991913, "learning_rate": 8.820745136952324e-07, "loss": 0.2976, "step": 40683 }, { "epoch": 4.136234241561611, "grad_norm": 0.2788873314857483, "learning_rate": 8.818732353215481e-07, "loss": 0.3106, "step": 40684 }, { "epoch": 4.13633590890606, "grad_norm": 0.2812722325325012, "learning_rate": 8.81671977694008e-07, "loss": 0.3354, "step": 40685 }, { "epoch": 4.136437576250509, "grad_norm": 0.28194278478622437, "learning_rate": 8.814707408136247e-07, "loss": 0.2942, "step": 40686 }, { "epoch": 4.1365392435949575, "grad_norm": 0.2626844644546509, "learning_rate": 8.81269524681414e-07, "loss": 0.2771, "step": 40687 }, { "epoch": 4.1366409109394064, "grad_norm": 0.276374876499176, "learning_rate": 8.810683292983891e-07, "loss": 0.2949, "step": 40688 }, { "epoch": 4.136742578283855, "grad_norm": 0.28569599986076355, "learning_rate": 8.808671546655623e-07, "loss": 0.2839, "step": 40689 }, { "epoch": 4.136844245628304, "grad_norm": 0.2927784025669098, "learning_rate": 8.806660007839474e-07, "loss": 0.2989, "step": 40690 }, { "epoch": 4.136945912972753, "grad_norm": 0.2808423340320587, "learning_rate": 8.80464867654558e-07, "loss": 0.3055, "step": 40691 }, { "epoch": 4.137047580317202, "grad_norm": 0.27684032917022705, "learning_rate": 8.802637552784105e-07, "loss": 0.2942, "step": 40692 }, { "epoch": 4.137149247661651, "grad_norm": 0.3176914155483246, "learning_rate": 8.800626636565124e-07, "loss": 0.2964, "step": 40693 }, { "epoch": 4.1372509150061, "grad_norm": 0.28705111145973206, "learning_rate": 8.798615927898807e-07, "loss": 0.2778, "step": 40694 }, { "epoch": 4.137352582350549, "grad_norm": 0.2866802513599396, "learning_rate": 8.796605426795273e-07, "loss": 0.2662, "step": 40695 }, { "epoch": 4.137454249694998, "grad_norm": 0.28878045082092285, "learning_rate": 8.794595133264639e-07, "loss": 0.2972, "step": 40696 }, { "epoch": 4.137555917039447, "grad_norm": 0.2556582987308502, "learning_rate": 8.792585047317054e-07, "loss": 0.2975, "step": 40697 }, { "epoch": 4.137657584383896, "grad_norm": 0.30558517575263977, "learning_rate": 8.790575168962634e-07, "loss": 0.2781, "step": 40698 }, { "epoch": 4.137759251728345, "grad_norm": 0.2908330261707306, "learning_rate": 8.788565498211498e-07, "loss": 0.2761, "step": 40699 }, { "epoch": 4.137860919072794, "grad_norm": 0.28568923473358154, "learning_rate": 8.786556035073768e-07, "loss": 0.2734, "step": 40700 }, { "epoch": 4.137962586417243, "grad_norm": 0.2975482642650604, "learning_rate": 8.784546779559588e-07, "loss": 0.2904, "step": 40701 }, { "epoch": 4.138064253761692, "grad_norm": 0.2794841229915619, "learning_rate": 8.78253773167907e-07, "loss": 0.2973, "step": 40702 }, { "epoch": 4.138165921106141, "grad_norm": 0.2833999693393707, "learning_rate": 8.780528891442314e-07, "loss": 0.2932, "step": 40703 }, { "epoch": 4.1382675884505895, "grad_norm": 0.27878445386886597, "learning_rate": 8.778520258859468e-07, "loss": 0.2913, "step": 40704 }, { "epoch": 4.1383692557950384, "grad_norm": 0.2704647183418274, "learning_rate": 8.776511833940649e-07, "loss": 0.3078, "step": 40705 }, { "epoch": 4.138470923139487, "grad_norm": 0.2810978889465332, "learning_rate": 8.774503616695951e-07, "loss": 0.2809, "step": 40706 }, { "epoch": 4.138572590483936, "grad_norm": 0.2646142244338989, "learning_rate": 8.772495607135517e-07, "loss": 0.3001, "step": 40707 }, { "epoch": 4.138674257828385, "grad_norm": 0.2807871103286743, "learning_rate": 8.770487805269456e-07, "loss": 0.2745, "step": 40708 }, { "epoch": 4.138775925172834, "grad_norm": 0.3035459518432617, "learning_rate": 8.768480211107882e-07, "loss": 0.3185, "step": 40709 }, { "epoch": 4.138877592517283, "grad_norm": 0.320040762424469, "learning_rate": 8.766472824660893e-07, "loss": 0.3131, "step": 40710 }, { "epoch": 4.138979259861732, "grad_norm": 0.2787456810474396, "learning_rate": 8.764465645938624e-07, "loss": 0.302, "step": 40711 }, { "epoch": 4.139080927206181, "grad_norm": 0.29463866353034973, "learning_rate": 8.76245867495118e-07, "loss": 0.283, "step": 40712 }, { "epoch": 4.13918259455063, "grad_norm": 0.3078913390636444, "learning_rate": 8.760451911708656e-07, "loss": 0.2979, "step": 40713 }, { "epoch": 4.139284261895079, "grad_norm": 0.28308364748954773, "learning_rate": 8.75844535622119e-07, "loss": 0.3231, "step": 40714 }, { "epoch": 4.139385929239528, "grad_norm": 0.2889751195907593, "learning_rate": 8.756439008498874e-07, "loss": 0.2883, "step": 40715 }, { "epoch": 4.139487596583977, "grad_norm": 0.27468204498291016, "learning_rate": 8.75443286855181e-07, "loss": 0.2837, "step": 40716 }, { "epoch": 4.139589263928427, "grad_norm": 0.2805456221103668, "learning_rate": 8.752426936390118e-07, "loss": 0.3055, "step": 40717 }, { "epoch": 4.139690931272876, "grad_norm": 0.3125292658805847, "learning_rate": 8.750421212023902e-07, "loss": 0.292, "step": 40718 }, { "epoch": 4.1397925986173245, "grad_norm": 0.2690962553024292, "learning_rate": 8.748415695463259e-07, "loss": 0.2857, "step": 40719 }, { "epoch": 4.1398942659617735, "grad_norm": 0.2860963046550751, "learning_rate": 8.74641038671828e-07, "loss": 0.253, "step": 40720 }, { "epoch": 4.139995933306222, "grad_norm": 0.2665720283985138, "learning_rate": 8.744405285799096e-07, "loss": 0.314, "step": 40721 }, { "epoch": 4.140097600650671, "grad_norm": 0.3009990155696869, "learning_rate": 8.742400392715794e-07, "loss": 0.2729, "step": 40722 }, { "epoch": 4.14019926799512, "grad_norm": 0.28434526920318604, "learning_rate": 8.740395707478466e-07, "loss": 0.306, "step": 40723 }, { "epoch": 4.140300935339569, "grad_norm": 0.2812889516353607, "learning_rate": 8.738391230097226e-07, "loss": 0.2646, "step": 40724 }, { "epoch": 4.140402602684018, "grad_norm": 0.29309189319610596, "learning_rate": 8.736386960582172e-07, "loss": 0.3103, "step": 40725 }, { "epoch": 4.140504270028467, "grad_norm": 0.26017871499061584, "learning_rate": 8.734382898943378e-07, "loss": 0.2739, "step": 40726 }, { "epoch": 4.140605937372916, "grad_norm": 0.2722684442996979, "learning_rate": 8.73237904519097e-07, "loss": 0.3175, "step": 40727 }, { "epoch": 4.140707604717365, "grad_norm": 0.29575735330581665, "learning_rate": 8.730375399335034e-07, "loss": 0.3149, "step": 40728 }, { "epoch": 4.140809272061814, "grad_norm": 0.28939133882522583, "learning_rate": 8.728371961385651e-07, "loss": 0.283, "step": 40729 }, { "epoch": 4.140910939406263, "grad_norm": 0.2756749093532562, "learning_rate": 8.726368731352919e-07, "loss": 0.2974, "step": 40730 }, { "epoch": 4.141012606750712, "grad_norm": 0.2707441747188568, "learning_rate": 8.72436570924694e-07, "loss": 0.3215, "step": 40731 }, { "epoch": 4.141114274095161, "grad_norm": 0.28666815161705017, "learning_rate": 8.722362895077796e-07, "loss": 0.2908, "step": 40732 }, { "epoch": 4.14121594143961, "grad_norm": 0.24995191395282745, "learning_rate": 8.720360288855573e-07, "loss": 0.2854, "step": 40733 }, { "epoch": 4.141317608784059, "grad_norm": 0.2820393741130829, "learning_rate": 8.718357890590373e-07, "loss": 0.3093, "step": 40734 }, { "epoch": 4.141419276128508, "grad_norm": 0.28846076130867004, "learning_rate": 8.716355700292278e-07, "loss": 0.2672, "step": 40735 }, { "epoch": 4.1415209434729565, "grad_norm": 0.2746134102344513, "learning_rate": 8.714353717971357e-07, "loss": 0.288, "step": 40736 }, { "epoch": 4.1416226108174055, "grad_norm": 0.3176404535770416, "learning_rate": 8.712351943637726e-07, "loss": 0.2865, "step": 40737 }, { "epoch": 4.141724278161854, "grad_norm": 0.2951158881187439, "learning_rate": 8.710350377301457e-07, "loss": 0.3035, "step": 40738 }, { "epoch": 4.141825945506303, "grad_norm": 0.2786189317703247, "learning_rate": 8.708349018972628e-07, "loss": 0.3309, "step": 40739 }, { "epoch": 4.141927612850752, "grad_norm": 0.27130186557769775, "learning_rate": 8.706347868661307e-07, "loss": 0.2677, "step": 40740 }, { "epoch": 4.142029280195201, "grad_norm": 0.31805986166000366, "learning_rate": 8.70434692637761e-07, "loss": 0.2554, "step": 40741 }, { "epoch": 4.14213094753965, "grad_norm": 0.26550066471099854, "learning_rate": 8.7023461921316e-07, "loss": 0.3005, "step": 40742 }, { "epoch": 4.142232614884099, "grad_norm": 0.2539430856704712, "learning_rate": 8.700345665933347e-07, "loss": 0.3037, "step": 40743 }, { "epoch": 4.142334282228548, "grad_norm": 0.2814285457134247, "learning_rate": 8.698345347792947e-07, "loss": 0.3419, "step": 40744 }, { "epoch": 4.142435949572997, "grad_norm": 0.27605894207954407, "learning_rate": 8.696345237720466e-07, "loss": 0.3273, "step": 40745 }, { "epoch": 4.142537616917446, "grad_norm": 0.31916284561157227, "learning_rate": 8.694345335725973e-07, "loss": 0.2843, "step": 40746 }, { "epoch": 4.142639284261895, "grad_norm": 0.2956085205078125, "learning_rate": 8.692345641819566e-07, "loss": 0.297, "step": 40747 }, { "epoch": 4.142740951606344, "grad_norm": 0.28865787386894226, "learning_rate": 8.690346156011303e-07, "loss": 0.2751, "step": 40748 }, { "epoch": 4.142842618950793, "grad_norm": 0.2845204472541809, "learning_rate": 8.68834687831126e-07, "loss": 0.2669, "step": 40749 }, { "epoch": 4.142944286295242, "grad_norm": 0.27942022681236267, "learning_rate": 8.686347808729495e-07, "loss": 0.2946, "step": 40750 }, { "epoch": 4.143045953639691, "grad_norm": 0.2858440577983856, "learning_rate": 8.684348947276106e-07, "loss": 0.27, "step": 40751 }, { "epoch": 4.14314762098414, "grad_norm": 0.28629153966903687, "learning_rate": 8.682350293961144e-07, "loss": 0.301, "step": 40752 }, { "epoch": 4.1432492883285885, "grad_norm": 0.31596407294273376, "learning_rate": 8.680351848794677e-07, "loss": 0.3171, "step": 40753 }, { "epoch": 4.1433509556730375, "grad_norm": 0.30968010425567627, "learning_rate": 8.678353611786789e-07, "loss": 0.2697, "step": 40754 }, { "epoch": 4.143452623017486, "grad_norm": 0.287044882774353, "learning_rate": 8.676355582947538e-07, "loss": 0.2994, "step": 40755 }, { "epoch": 4.143554290361935, "grad_norm": 0.2812996506690979, "learning_rate": 8.674357762286967e-07, "loss": 0.3349, "step": 40756 }, { "epoch": 4.143655957706384, "grad_norm": 0.2833806574344635, "learning_rate": 8.672360149815179e-07, "loss": 0.2707, "step": 40757 }, { "epoch": 4.143757625050833, "grad_norm": 0.27704209089279175, "learning_rate": 8.670362745542221e-07, "loss": 0.3144, "step": 40758 }, { "epoch": 4.143859292395283, "grad_norm": 0.26985543966293335, "learning_rate": 8.668365549478153e-07, "loss": 0.2943, "step": 40759 }, { "epoch": 4.143960959739732, "grad_norm": 0.31652969121932983, "learning_rate": 8.666368561633026e-07, "loss": 0.278, "step": 40760 }, { "epoch": 4.144062627084181, "grad_norm": 0.2712719440460205, "learning_rate": 8.664371782016923e-07, "loss": 0.3152, "step": 40761 }, { "epoch": 4.14416429442863, "grad_norm": 0.27439451217651367, "learning_rate": 8.662375210639889e-07, "loss": 0.2913, "step": 40762 }, { "epoch": 4.144265961773079, "grad_norm": 0.28953665494918823, "learning_rate": 8.66037884751198e-07, "loss": 0.2852, "step": 40763 }, { "epoch": 4.144367629117528, "grad_norm": 0.2953420877456665, "learning_rate": 8.658382692643263e-07, "loss": 0.2844, "step": 40764 }, { "epoch": 4.144469296461977, "grad_norm": 0.28776055574417114, "learning_rate": 8.656386746043793e-07, "loss": 0.31, "step": 40765 }, { "epoch": 4.144570963806426, "grad_norm": 0.2827032804489136, "learning_rate": 8.654391007723611e-07, "loss": 0.2869, "step": 40766 }, { "epoch": 4.144672631150875, "grad_norm": 0.27657654881477356, "learning_rate": 8.652395477692793e-07, "loss": 0.2655, "step": 40767 }, { "epoch": 4.1447742984953235, "grad_norm": 0.2785215973854065, "learning_rate": 8.650400155961381e-07, "loss": 0.323, "step": 40768 }, { "epoch": 4.1448759658397725, "grad_norm": 0.2858300805091858, "learning_rate": 8.648405042539426e-07, "loss": 0.287, "step": 40769 }, { "epoch": 4.144977633184221, "grad_norm": 0.28866466879844666, "learning_rate": 8.646410137436972e-07, "loss": 0.2942, "step": 40770 }, { "epoch": 4.14507930052867, "grad_norm": 0.2751697897911072, "learning_rate": 8.644415440664083e-07, "loss": 0.2941, "step": 40771 }, { "epoch": 4.145180967873119, "grad_norm": 0.26361343264579773, "learning_rate": 8.642420952230807e-07, "loss": 0.3374, "step": 40772 }, { "epoch": 4.145282635217568, "grad_norm": 0.2988716661930084, "learning_rate": 8.640426672147167e-07, "loss": 0.2677, "step": 40773 }, { "epoch": 4.145384302562017, "grad_norm": 0.27377840876579285, "learning_rate": 8.638432600423247e-07, "loss": 0.2952, "step": 40774 }, { "epoch": 4.145485969906466, "grad_norm": 0.29278597235679626, "learning_rate": 8.636438737069069e-07, "loss": 0.2662, "step": 40775 }, { "epoch": 4.145587637250915, "grad_norm": 0.2841789722442627, "learning_rate": 8.634445082094678e-07, "loss": 0.297, "step": 40776 }, { "epoch": 4.145689304595364, "grad_norm": 0.2791873514652252, "learning_rate": 8.63245163551013e-07, "loss": 0.2875, "step": 40777 }, { "epoch": 4.145790971939813, "grad_norm": 0.26864877343177795, "learning_rate": 8.630458397325458e-07, "loss": 0.2849, "step": 40778 }, { "epoch": 4.145892639284262, "grad_norm": 0.2906396985054016, "learning_rate": 8.628465367550715e-07, "loss": 0.2841, "step": 40779 }, { "epoch": 4.145994306628711, "grad_norm": 0.2680162489414215, "learning_rate": 8.626472546195913e-07, "loss": 0.3076, "step": 40780 }, { "epoch": 4.14609597397316, "grad_norm": 0.2912765145301819, "learning_rate": 8.624479933271124e-07, "loss": 0.317, "step": 40781 }, { "epoch": 4.146197641317609, "grad_norm": 0.26367875933647156, "learning_rate": 8.622487528786372e-07, "loss": 0.3201, "step": 40782 }, { "epoch": 4.146299308662058, "grad_norm": 0.2611759305000305, "learning_rate": 8.620495332751683e-07, "loss": 0.2918, "step": 40783 }, { "epoch": 4.146400976006507, "grad_norm": 0.2733341455459595, "learning_rate": 8.618503345177115e-07, "loss": 0.2865, "step": 40784 }, { "epoch": 4.1465026433509555, "grad_norm": 0.2845572233200073, "learning_rate": 8.616511566072699e-07, "loss": 0.3042, "step": 40785 }, { "epoch": 4.1466043106954045, "grad_norm": 0.2909899353981018, "learning_rate": 8.614519995448445e-07, "loss": 0.278, "step": 40786 }, { "epoch": 4.146705978039853, "grad_norm": 0.2945054769515991, "learning_rate": 8.612528633314421e-07, "loss": 0.2627, "step": 40787 }, { "epoch": 4.146807645384302, "grad_norm": 0.25936946272850037, "learning_rate": 8.610537479680641e-07, "loss": 0.2651, "step": 40788 }, { "epoch": 4.146909312728751, "grad_norm": 0.30607110261917114, "learning_rate": 8.608546534557138e-07, "loss": 0.2668, "step": 40789 }, { "epoch": 4.1470109800732, "grad_norm": 0.28403034806251526, "learning_rate": 8.60655579795393e-07, "loss": 0.3406, "step": 40790 }, { "epoch": 4.147112647417649, "grad_norm": 0.2848590910434723, "learning_rate": 8.604565269881071e-07, "loss": 0.3036, "step": 40791 }, { "epoch": 4.147214314762098, "grad_norm": 0.2857746183872223, "learning_rate": 8.602574950348569e-07, "loss": 0.3312, "step": 40792 }, { "epoch": 4.147315982106547, "grad_norm": 0.2863541841506958, "learning_rate": 8.600584839366449e-07, "loss": 0.3179, "step": 40793 }, { "epoch": 4.147417649450996, "grad_norm": 0.2881607711315155, "learning_rate": 8.598594936944771e-07, "loss": 0.3044, "step": 40794 }, { "epoch": 4.147519316795445, "grad_norm": 0.2987281084060669, "learning_rate": 8.596605243093509e-07, "loss": 0.3038, "step": 40795 }, { "epoch": 4.147620984139894, "grad_norm": 0.28024622797966003, "learning_rate": 8.594615757822705e-07, "loss": 0.2776, "step": 40796 }, { "epoch": 4.147722651484343, "grad_norm": 0.3085269629955292, "learning_rate": 8.592626481142402e-07, "loss": 0.2844, "step": 40797 }, { "epoch": 4.147824318828792, "grad_norm": 0.2978026866912842, "learning_rate": 8.590637413062608e-07, "loss": 0.2811, "step": 40798 }, { "epoch": 4.147925986173242, "grad_norm": 0.2862057387828827, "learning_rate": 8.588648553593343e-07, "loss": 0.2897, "step": 40799 }, { "epoch": 4.1480276535176905, "grad_norm": 0.2732764482498169, "learning_rate": 8.586659902744616e-07, "loss": 0.3318, "step": 40800 }, { "epoch": 4.1481293208621395, "grad_norm": 0.280626505613327, "learning_rate": 8.584671460526461e-07, "loss": 0.3005, "step": 40801 }, { "epoch": 4.148230988206588, "grad_norm": 0.2794664204120636, "learning_rate": 8.582683226948891e-07, "loss": 0.3078, "step": 40802 }, { "epoch": 4.148332655551037, "grad_norm": 0.3070996403694153, "learning_rate": 8.580695202021911e-07, "loss": 0.2985, "step": 40803 }, { "epoch": 4.148434322895486, "grad_norm": 0.25412848591804504, "learning_rate": 8.578707385755569e-07, "loss": 0.2929, "step": 40804 }, { "epoch": 4.148535990239935, "grad_norm": 0.25168535113334656, "learning_rate": 8.576719778159831e-07, "loss": 0.3043, "step": 40805 }, { "epoch": 4.148637657584384, "grad_norm": 0.29612496495246887, "learning_rate": 8.574732379244738e-07, "loss": 0.2936, "step": 40806 }, { "epoch": 4.148739324928833, "grad_norm": 0.28395506739616394, "learning_rate": 8.572745189020304e-07, "loss": 0.2649, "step": 40807 }, { "epoch": 4.148840992273282, "grad_norm": 0.28885066509246826, "learning_rate": 8.570758207496543e-07, "loss": 0.3083, "step": 40808 }, { "epoch": 4.148942659617731, "grad_norm": 0.27365827560424805, "learning_rate": 8.568771434683454e-07, "loss": 0.3086, "step": 40809 }, { "epoch": 4.14904432696218, "grad_norm": 0.288985937833786, "learning_rate": 8.566784870591038e-07, "loss": 0.2924, "step": 40810 }, { "epoch": 4.149145994306629, "grad_norm": 0.27572786808013916, "learning_rate": 8.564798515229322e-07, "loss": 0.3258, "step": 40811 }, { "epoch": 4.149247661651078, "grad_norm": 0.2582486867904663, "learning_rate": 8.562812368608309e-07, "loss": 0.2989, "step": 40812 }, { "epoch": 4.149349328995527, "grad_norm": 0.25726816058158875, "learning_rate": 8.560826430737984e-07, "loss": 0.2823, "step": 40813 }, { "epoch": 4.149450996339976, "grad_norm": 0.28024402260780334, "learning_rate": 8.558840701628396e-07, "loss": 0.29, "step": 40814 }, { "epoch": 4.149552663684425, "grad_norm": 0.270380437374115, "learning_rate": 8.556855181289498e-07, "loss": 0.2835, "step": 40815 }, { "epoch": 4.149654331028874, "grad_norm": 0.2793935537338257, "learning_rate": 8.554869869731309e-07, "loss": 0.2808, "step": 40816 }, { "epoch": 4.1497559983733225, "grad_norm": 0.28986120223999023, "learning_rate": 8.552884766963865e-07, "loss": 0.2846, "step": 40817 }, { "epoch": 4.1498576657177715, "grad_norm": 0.26655805110931396, "learning_rate": 8.550899872997115e-07, "loss": 0.2572, "step": 40818 }, { "epoch": 4.14995933306222, "grad_norm": 0.28193193674087524, "learning_rate": 8.548915187841089e-07, "loss": 0.2903, "step": 40819 }, { "epoch": 4.150061000406669, "grad_norm": 0.2937757968902588, "learning_rate": 8.546930711505768e-07, "loss": 0.3151, "step": 40820 }, { "epoch": 4.150162667751118, "grad_norm": 0.2758824825286865, "learning_rate": 8.544946444001173e-07, "loss": 0.2844, "step": 40821 }, { "epoch": 4.150264335095567, "grad_norm": 0.26192474365234375, "learning_rate": 8.542962385337289e-07, "loss": 0.302, "step": 40822 }, { "epoch": 4.150366002440016, "grad_norm": 0.27770400047302246, "learning_rate": 8.540978535524091e-07, "loss": 0.2833, "step": 40823 }, { "epoch": 4.150467669784465, "grad_norm": 0.29890087246894836, "learning_rate": 8.538994894571617e-07, "loss": 0.2697, "step": 40824 }, { "epoch": 4.150569337128914, "grad_norm": 0.26477980613708496, "learning_rate": 8.537011462489814e-07, "loss": 0.3238, "step": 40825 }, { "epoch": 4.150671004473363, "grad_norm": 0.28689444065093994, "learning_rate": 8.53502823928869e-07, "loss": 0.2797, "step": 40826 }, { "epoch": 4.150772671817812, "grad_norm": 0.3117036819458008, "learning_rate": 8.533045224978265e-07, "loss": 0.3011, "step": 40827 }, { "epoch": 4.150874339162261, "grad_norm": 0.2863931357860565, "learning_rate": 8.531062419568481e-07, "loss": 0.289, "step": 40828 }, { "epoch": 4.15097600650671, "grad_norm": 0.27871260046958923, "learning_rate": 8.529079823069358e-07, "loss": 0.3373, "step": 40829 }, { "epoch": 4.151077673851159, "grad_norm": 0.2902821898460388, "learning_rate": 8.527097435490867e-07, "loss": 0.2965, "step": 40830 }, { "epoch": 4.151179341195608, "grad_norm": 0.28214019536972046, "learning_rate": 8.525115256843019e-07, "loss": 0.3063, "step": 40831 }, { "epoch": 4.151281008540057, "grad_norm": 0.31314021348953247, "learning_rate": 8.523133287135782e-07, "loss": 0.2985, "step": 40832 }, { "epoch": 4.151382675884506, "grad_norm": 0.28725674748420715, "learning_rate": 8.521151526379129e-07, "loss": 0.2986, "step": 40833 }, { "epoch": 4.1514843432289545, "grad_norm": 0.2854534983634949, "learning_rate": 8.51916997458308e-07, "loss": 0.2673, "step": 40834 }, { "epoch": 4.1515860105734035, "grad_norm": 0.2840467095375061, "learning_rate": 8.517188631757573e-07, "loss": 0.2875, "step": 40835 }, { "epoch": 4.151687677917852, "grad_norm": 0.2736334204673767, "learning_rate": 8.515207497912625e-07, "loss": 0.2852, "step": 40836 }, { "epoch": 4.151789345262301, "grad_norm": 0.2714129388332367, "learning_rate": 8.513226573058203e-07, "loss": 0.2986, "step": 40837 }, { "epoch": 4.15189101260675, "grad_norm": 0.27337026596069336, "learning_rate": 8.511245857204276e-07, "loss": 0.2768, "step": 40838 }, { "epoch": 4.151992679951199, "grad_norm": 0.3036828637123108, "learning_rate": 8.509265350360845e-07, "loss": 0.3002, "step": 40839 }, { "epoch": 4.152094347295648, "grad_norm": 0.277900755405426, "learning_rate": 8.507285052537873e-07, "loss": 0.2867, "step": 40840 }, { "epoch": 4.152196014640098, "grad_norm": 0.2737917900085449, "learning_rate": 8.505304963745342e-07, "loss": 0.3097, "step": 40841 }, { "epoch": 4.152297681984547, "grad_norm": 0.265718013048172, "learning_rate": 8.503325083993208e-07, "loss": 0.2854, "step": 40842 }, { "epoch": 4.152399349328996, "grad_norm": 0.28049904108047485, "learning_rate": 8.501345413291468e-07, "loss": 0.2992, "step": 40843 }, { "epoch": 4.152501016673445, "grad_norm": 0.27052122354507446, "learning_rate": 8.499365951650107e-07, "loss": 0.2988, "step": 40844 }, { "epoch": 4.152602684017894, "grad_norm": 0.2846953272819519, "learning_rate": 8.497386699079053e-07, "loss": 0.2722, "step": 40845 }, { "epoch": 4.152704351362343, "grad_norm": 0.2943102717399597, "learning_rate": 8.495407655588317e-07, "loss": 0.2979, "step": 40846 }, { "epoch": 4.152806018706792, "grad_norm": 0.25989532470703125, "learning_rate": 8.493428821187854e-07, "loss": 0.2846, "step": 40847 }, { "epoch": 4.152907686051241, "grad_norm": 0.2852669358253479, "learning_rate": 8.491450195887618e-07, "loss": 0.2807, "step": 40848 }, { "epoch": 4.1530093533956896, "grad_norm": 0.27608880400657654, "learning_rate": 8.489471779697606e-07, "loss": 0.2868, "step": 40849 }, { "epoch": 4.1531110207401385, "grad_norm": 0.29935047030448914, "learning_rate": 8.48749357262777e-07, "loss": 0.3138, "step": 40850 }, { "epoch": 4.153212688084587, "grad_norm": 0.2745510935783386, "learning_rate": 8.48551557468808e-07, "loss": 0.3161, "step": 40851 }, { "epoch": 4.153314355429036, "grad_norm": 0.2631039321422577, "learning_rate": 8.483537785888485e-07, "loss": 0.3247, "step": 40852 }, { "epoch": 4.153416022773485, "grad_norm": 0.2828056216239929, "learning_rate": 8.481560206238971e-07, "loss": 0.3114, "step": 40853 }, { "epoch": 4.153517690117934, "grad_norm": 0.3063243329524994, "learning_rate": 8.47958283574949e-07, "loss": 0.2847, "step": 40854 }, { "epoch": 4.153619357462383, "grad_norm": 0.2791047990322113, "learning_rate": 8.477605674429989e-07, "loss": 0.2921, "step": 40855 }, { "epoch": 4.153721024806832, "grad_norm": 0.28960248827934265, "learning_rate": 8.475628722290458e-07, "loss": 0.3062, "step": 40856 }, { "epoch": 4.153822692151281, "grad_norm": 0.2597496509552002, "learning_rate": 8.473651979340841e-07, "loss": 0.2793, "step": 40857 }, { "epoch": 4.15392435949573, "grad_norm": 0.2717783749103546, "learning_rate": 8.471675445591088e-07, "loss": 0.3244, "step": 40858 }, { "epoch": 4.154026026840179, "grad_norm": 0.28431442379951477, "learning_rate": 8.469699121051172e-07, "loss": 0.2697, "step": 40859 }, { "epoch": 4.154127694184628, "grad_norm": 0.28751710057258606, "learning_rate": 8.467723005731049e-07, "loss": 0.2869, "step": 40860 }, { "epoch": 4.154229361529077, "grad_norm": 0.27772822976112366, "learning_rate": 8.465747099640665e-07, "loss": 0.3007, "step": 40861 }, { "epoch": 4.154331028873526, "grad_norm": 0.27172765135765076, "learning_rate": 8.463771402789966e-07, "loss": 0.3003, "step": 40862 }, { "epoch": 4.154432696217975, "grad_norm": 0.287247896194458, "learning_rate": 8.461795915188925e-07, "loss": 0.2691, "step": 40863 }, { "epoch": 4.154534363562424, "grad_norm": 0.2767197787761688, "learning_rate": 8.459820636847488e-07, "loss": 0.3239, "step": 40864 }, { "epoch": 4.154636030906873, "grad_norm": 0.2656913101673126, "learning_rate": 8.457845567775591e-07, "loss": 0.305, "step": 40865 }, { "epoch": 4.1547376982513216, "grad_norm": 0.26888537406921387, "learning_rate": 8.45587070798321e-07, "loss": 0.294, "step": 40866 }, { "epoch": 4.1548393655957705, "grad_norm": 0.26829370856285095, "learning_rate": 8.453896057480277e-07, "loss": 0.3381, "step": 40867 }, { "epoch": 4.154941032940219, "grad_norm": 0.26129981875419617, "learning_rate": 8.451921616276731e-07, "loss": 0.2873, "step": 40868 }, { "epoch": 4.155042700284668, "grad_norm": 0.26948267221450806, "learning_rate": 8.449947384382546e-07, "loss": 0.2859, "step": 40869 }, { "epoch": 4.155144367629117, "grad_norm": 0.286121666431427, "learning_rate": 8.447973361807649e-07, "loss": 0.3077, "step": 40870 }, { "epoch": 4.155246034973566, "grad_norm": 0.28499799966812134, "learning_rate": 8.44599954856199e-07, "loss": 0.3083, "step": 40871 }, { "epoch": 4.155347702318015, "grad_norm": 0.26390042901039124, "learning_rate": 8.444025944655493e-07, "loss": 0.301, "step": 40872 }, { "epoch": 4.155449369662464, "grad_norm": 0.2754092514514923, "learning_rate": 8.442052550098134e-07, "loss": 0.2838, "step": 40873 }, { "epoch": 4.155551037006913, "grad_norm": 0.27264121174812317, "learning_rate": 8.440079364899839e-07, "loss": 0.3071, "step": 40874 }, { "epoch": 4.155652704351362, "grad_norm": 0.2909345030784607, "learning_rate": 8.438106389070538e-07, "loss": 0.2847, "step": 40875 }, { "epoch": 4.155754371695811, "grad_norm": 0.2763267457485199, "learning_rate": 8.436133622620191e-07, "loss": 0.2678, "step": 40876 }, { "epoch": 4.15585603904026, "grad_norm": 0.2886928915977478, "learning_rate": 8.434161065558722e-07, "loss": 0.2979, "step": 40877 }, { "epoch": 4.155957706384709, "grad_norm": 0.282531201839447, "learning_rate": 8.432188717896061e-07, "loss": 0.3285, "step": 40878 }, { "epoch": 4.156059373729158, "grad_norm": 0.26288866996765137, "learning_rate": 8.43021657964217e-07, "loss": 0.3162, "step": 40879 }, { "epoch": 4.156161041073607, "grad_norm": 0.26240795850753784, "learning_rate": 8.428244650806972e-07, "loss": 0.329, "step": 40880 }, { "epoch": 4.156262708418057, "grad_norm": 0.2935798764228821, "learning_rate": 8.426272931400392e-07, "loss": 0.2636, "step": 40881 }, { "epoch": 4.1563643757625055, "grad_norm": 0.28280171751976013, "learning_rate": 8.424301421432363e-07, "loss": 0.321, "step": 40882 }, { "epoch": 4.1564660431069544, "grad_norm": 0.2914373576641083, "learning_rate": 8.42233012091283e-07, "loss": 0.2852, "step": 40883 }, { "epoch": 4.156567710451403, "grad_norm": 0.2765762507915497, "learning_rate": 8.420359029851721e-07, "loss": 0.3086, "step": 40884 }, { "epoch": 4.156669377795852, "grad_norm": 0.28753578662872314, "learning_rate": 8.418388148258949e-07, "loss": 0.2958, "step": 40885 }, { "epoch": 4.156771045140301, "grad_norm": 0.2992556691169739, "learning_rate": 8.416417476144473e-07, "loss": 0.2978, "step": 40886 }, { "epoch": 4.15687271248475, "grad_norm": 0.2714543640613556, "learning_rate": 8.414447013518195e-07, "loss": 0.2945, "step": 40887 }, { "epoch": 4.156974379829199, "grad_norm": 0.29592013359069824, "learning_rate": 8.412476760390048e-07, "loss": 0.3103, "step": 40888 }, { "epoch": 4.157076047173648, "grad_norm": 0.2741101384162903, "learning_rate": 8.410506716769967e-07, "loss": 0.3026, "step": 40889 }, { "epoch": 4.157177714518097, "grad_norm": 0.28826022148132324, "learning_rate": 8.408536882667867e-07, "loss": 0.2974, "step": 40890 }, { "epoch": 4.157279381862546, "grad_norm": 0.2927928566932678, "learning_rate": 8.406567258093678e-07, "loss": 0.2831, "step": 40891 }, { "epoch": 4.157381049206995, "grad_norm": 0.2758835256099701, "learning_rate": 8.404597843057311e-07, "loss": 0.2787, "step": 40892 }, { "epoch": 4.157482716551444, "grad_norm": 0.2371681034564972, "learning_rate": 8.402628637568705e-07, "loss": 0.2824, "step": 40893 }, { "epoch": 4.157584383895893, "grad_norm": 0.29426276683807373, "learning_rate": 8.400659641637771e-07, "loss": 0.2916, "step": 40894 }, { "epoch": 4.157686051240342, "grad_norm": 0.28060492873191833, "learning_rate": 8.39869085527441e-07, "loss": 0.2917, "step": 40895 }, { "epoch": 4.157787718584791, "grad_norm": 0.27340734004974365, "learning_rate": 8.396722278488578e-07, "loss": 0.3541, "step": 40896 }, { "epoch": 4.15788938592924, "grad_norm": 0.266119122505188, "learning_rate": 8.394753911290172e-07, "loss": 0.3011, "step": 40897 }, { "epoch": 4.157991053273689, "grad_norm": 0.2936877906322479, "learning_rate": 8.392785753689093e-07, "loss": 0.2927, "step": 40898 }, { "epoch": 4.1580927206181375, "grad_norm": 0.2819478213787079, "learning_rate": 8.390817805695283e-07, "loss": 0.2971, "step": 40899 }, { "epoch": 4.1581943879625864, "grad_norm": 0.2866937220096588, "learning_rate": 8.388850067318649e-07, "loss": 0.2828, "step": 40900 }, { "epoch": 4.158296055307035, "grad_norm": 0.27366992831230164, "learning_rate": 8.386882538569103e-07, "loss": 0.2837, "step": 40901 }, { "epoch": 4.158397722651484, "grad_norm": 0.2700871229171753, "learning_rate": 8.384915219456536e-07, "loss": 0.3093, "step": 40902 }, { "epoch": 4.158499389995933, "grad_norm": 0.3054838478565216, "learning_rate": 8.38294810999089e-07, "loss": 0.2623, "step": 40903 }, { "epoch": 4.158601057340382, "grad_norm": 0.25892654061317444, "learning_rate": 8.380981210182066e-07, "loss": 0.2966, "step": 40904 }, { "epoch": 4.158702724684831, "grad_norm": 0.2799513339996338, "learning_rate": 8.379014520039952e-07, "loss": 0.2812, "step": 40905 }, { "epoch": 4.15880439202928, "grad_norm": 0.268193781375885, "learning_rate": 8.377048039574487e-07, "loss": 0.3036, "step": 40906 }, { "epoch": 4.158906059373729, "grad_norm": 0.30270618200302124, "learning_rate": 8.375081768795562e-07, "loss": 0.3157, "step": 40907 }, { "epoch": 4.159007726718178, "grad_norm": 0.268565833568573, "learning_rate": 8.37311570771307e-07, "loss": 0.265, "step": 40908 }, { "epoch": 4.159109394062627, "grad_norm": 0.28888246417045593, "learning_rate": 8.371149856336941e-07, "loss": 0.3081, "step": 40909 }, { "epoch": 4.159211061407076, "grad_norm": 0.2951526939868927, "learning_rate": 8.369184214677067e-07, "loss": 0.3048, "step": 40910 }, { "epoch": 4.159312728751525, "grad_norm": 0.29993966221809387, "learning_rate": 8.367218782743347e-07, "loss": 0.2973, "step": 40911 }, { "epoch": 4.159414396095974, "grad_norm": 0.27692553400993347, "learning_rate": 8.365253560545678e-07, "loss": 0.2956, "step": 40912 }, { "epoch": 4.159516063440423, "grad_norm": 0.28641122579574585, "learning_rate": 8.363288548093979e-07, "loss": 0.2981, "step": 40913 }, { "epoch": 4.159617730784872, "grad_norm": 0.28722670674324036, "learning_rate": 8.361323745398136e-07, "loss": 0.2934, "step": 40914 }, { "epoch": 4.159719398129321, "grad_norm": 0.2863910496234894, "learning_rate": 8.359359152468039e-07, "loss": 0.2596, "step": 40915 }, { "epoch": 4.1598210654737695, "grad_norm": 0.2939785122871399, "learning_rate": 8.357394769313604e-07, "loss": 0.2972, "step": 40916 }, { "epoch": 4.1599227328182184, "grad_norm": 0.28647810220718384, "learning_rate": 8.355430595944725e-07, "loss": 0.3184, "step": 40917 }, { "epoch": 4.160024400162667, "grad_norm": 0.30384907126426697, "learning_rate": 8.353466632371271e-07, "loss": 0.2952, "step": 40918 }, { "epoch": 4.160126067507116, "grad_norm": 0.3099253177642822, "learning_rate": 8.351502878603168e-07, "loss": 0.3216, "step": 40919 }, { "epoch": 4.160227734851565, "grad_norm": 0.2918427288532257, "learning_rate": 8.349539334650303e-07, "loss": 0.3272, "step": 40920 }, { "epoch": 4.160329402196014, "grad_norm": 0.3032326102256775, "learning_rate": 8.347576000522555e-07, "loss": 0.3027, "step": 40921 }, { "epoch": 4.160431069540463, "grad_norm": 0.3120880126953125, "learning_rate": 8.345612876229809e-07, "loss": 0.2654, "step": 40922 }, { "epoch": 4.160532736884913, "grad_norm": 0.263955682516098, "learning_rate": 8.34364996178198e-07, "loss": 0.3222, "step": 40923 }, { "epoch": 4.160634404229362, "grad_norm": 0.28321579098701477, "learning_rate": 8.341687257188946e-07, "loss": 0.3124, "step": 40924 }, { "epoch": 4.160736071573811, "grad_norm": 0.2942577600479126, "learning_rate": 8.339724762460578e-07, "loss": 0.3036, "step": 40925 }, { "epoch": 4.16083773891826, "grad_norm": 0.3056783080101013, "learning_rate": 8.337762477606787e-07, "loss": 0.2851, "step": 40926 }, { "epoch": 4.160939406262709, "grad_norm": 0.3190445601940155, "learning_rate": 8.335800402637451e-07, "loss": 0.3047, "step": 40927 }, { "epoch": 4.161041073607158, "grad_norm": 0.26965537667274475, "learning_rate": 8.333838537562438e-07, "loss": 0.2819, "step": 40928 }, { "epoch": 4.161142740951607, "grad_norm": 0.2610730528831482, "learning_rate": 8.331876882391659e-07, "loss": 0.2793, "step": 40929 }, { "epoch": 4.161244408296056, "grad_norm": 0.27367010712623596, "learning_rate": 8.329915437134978e-07, "loss": 0.3276, "step": 40930 }, { "epoch": 4.1613460756405045, "grad_norm": 0.27250468730926514, "learning_rate": 8.327954201802285e-07, "loss": 0.3029, "step": 40931 }, { "epoch": 4.1614477429849535, "grad_norm": 0.28590986132621765, "learning_rate": 8.32599317640344e-07, "loss": 0.3185, "step": 40932 }, { "epoch": 4.161549410329402, "grad_norm": 0.28854531049728394, "learning_rate": 8.324032360948353e-07, "loss": 0.2709, "step": 40933 }, { "epoch": 4.161651077673851, "grad_norm": 0.2819054424762726, "learning_rate": 8.322071755446892e-07, "loss": 0.2966, "step": 40934 }, { "epoch": 4.1617527450183, "grad_norm": 0.2583390474319458, "learning_rate": 8.320111359908911e-07, "loss": 0.3085, "step": 40935 }, { "epoch": 4.161854412362749, "grad_norm": 0.2793945074081421, "learning_rate": 8.318151174344319e-07, "loss": 0.2896, "step": 40936 }, { "epoch": 4.161956079707198, "grad_norm": 0.2693043053150177, "learning_rate": 8.316191198762974e-07, "loss": 0.2944, "step": 40937 }, { "epoch": 4.162057747051647, "grad_norm": 0.303084135055542, "learning_rate": 8.314231433174741e-07, "loss": 0.3134, "step": 40938 }, { "epoch": 4.162159414396096, "grad_norm": 0.2917856276035309, "learning_rate": 8.312271877589512e-07, "loss": 0.3132, "step": 40939 }, { "epoch": 4.162261081740545, "grad_norm": 0.2614326775074005, "learning_rate": 8.310312532017156e-07, "loss": 0.2964, "step": 40940 }, { "epoch": 4.162362749084994, "grad_norm": 0.29402628540992737, "learning_rate": 8.308353396467539e-07, "loss": 0.3034, "step": 40941 }, { "epoch": 4.162464416429443, "grad_norm": 0.2700138986110687, "learning_rate": 8.306394470950513e-07, "loss": 0.2708, "step": 40942 }, { "epoch": 4.162566083773892, "grad_norm": 0.2803085148334503, "learning_rate": 8.304435755475981e-07, "loss": 0.3002, "step": 40943 }, { "epoch": 4.162667751118341, "grad_norm": 0.2669733762741089, "learning_rate": 8.302477250053792e-07, "loss": 0.2894, "step": 40944 }, { "epoch": 4.16276941846279, "grad_norm": 0.27186253666877747, "learning_rate": 8.300518954693798e-07, "loss": 0.3036, "step": 40945 }, { "epoch": 4.162871085807239, "grad_norm": 0.28041723370552063, "learning_rate": 8.298560869405908e-07, "loss": 0.2859, "step": 40946 }, { "epoch": 4.162972753151688, "grad_norm": 0.28883272409439087, "learning_rate": 8.296602994199937e-07, "loss": 0.3145, "step": 40947 }, { "epoch": 4.1630744204961365, "grad_norm": 0.2764281928539276, "learning_rate": 8.294645329085765e-07, "loss": 0.2963, "step": 40948 }, { "epoch": 4.1631760878405855, "grad_norm": 0.2816508710384369, "learning_rate": 8.292687874073275e-07, "loss": 0.3135, "step": 40949 }, { "epoch": 4.163277755185034, "grad_norm": 0.27587223052978516, "learning_rate": 8.290730629172305e-07, "loss": 0.2959, "step": 40950 }, { "epoch": 4.163379422529483, "grad_norm": 0.27976077795028687, "learning_rate": 8.288773594392729e-07, "loss": 0.2811, "step": 40951 }, { "epoch": 4.163481089873932, "grad_norm": 0.3084582984447479, "learning_rate": 8.286816769744388e-07, "loss": 0.2808, "step": 40952 }, { "epoch": 4.163582757218381, "grad_norm": 0.2827356159687042, "learning_rate": 8.284860155237162e-07, "loss": 0.2846, "step": 40953 }, { "epoch": 4.16368442456283, "grad_norm": 0.27638956904411316, "learning_rate": 8.282903750880894e-07, "loss": 0.3277, "step": 40954 }, { "epoch": 4.163786091907279, "grad_norm": 0.281978577375412, "learning_rate": 8.280947556685437e-07, "loss": 0.2973, "step": 40955 }, { "epoch": 4.163887759251728, "grad_norm": 0.28799164295196533, "learning_rate": 8.278991572660672e-07, "loss": 0.3096, "step": 40956 }, { "epoch": 4.163989426596177, "grad_norm": 0.3193015158176422, "learning_rate": 8.277035798816413e-07, "loss": 0.3022, "step": 40957 }, { "epoch": 4.164091093940626, "grad_norm": 0.2538377344608307, "learning_rate": 8.27508023516253e-07, "loss": 0.2907, "step": 40958 }, { "epoch": 4.164192761285075, "grad_norm": 0.284606397151947, "learning_rate": 8.273124881708888e-07, "loss": 0.2707, "step": 40959 }, { "epoch": 4.164294428629524, "grad_norm": 0.28872501850128174, "learning_rate": 8.271169738465329e-07, "loss": 0.2936, "step": 40960 }, { "epoch": 4.164396095973973, "grad_norm": 0.2688051164150238, "learning_rate": 8.2692148054417e-07, "loss": 0.3082, "step": 40961 }, { "epoch": 4.164497763318422, "grad_norm": 0.2681809961795807, "learning_rate": 8.267260082647837e-07, "loss": 0.3364, "step": 40962 }, { "epoch": 4.1645994306628715, "grad_norm": 0.28880444169044495, "learning_rate": 8.265305570093618e-07, "loss": 0.3016, "step": 40963 }, { "epoch": 4.1647010980073205, "grad_norm": 0.268650621175766, "learning_rate": 8.263351267788866e-07, "loss": 0.3179, "step": 40964 }, { "epoch": 4.164802765351769, "grad_norm": 0.2702581584453583, "learning_rate": 8.261397175743419e-07, "loss": 0.2978, "step": 40965 }, { "epoch": 4.164904432696218, "grad_norm": 0.27691012620925903, "learning_rate": 8.259443293967162e-07, "loss": 0.3051, "step": 40966 }, { "epoch": 4.165006100040667, "grad_norm": 0.30860015749931335, "learning_rate": 8.257489622469888e-07, "loss": 0.2735, "step": 40967 }, { "epoch": 4.165107767385116, "grad_norm": 0.2892334461212158, "learning_rate": 8.255536161261457e-07, "loss": 0.2799, "step": 40968 }, { "epoch": 4.165209434729565, "grad_norm": 0.2801148593425751, "learning_rate": 8.253582910351743e-07, "loss": 0.2662, "step": 40969 }, { "epoch": 4.165311102074014, "grad_norm": 0.2799278795719147, "learning_rate": 8.251629869750532e-07, "loss": 0.2955, "step": 40970 }, { "epoch": 4.165412769418463, "grad_norm": 0.28115054965019226, "learning_rate": 8.249677039467702e-07, "loss": 0.3035, "step": 40971 }, { "epoch": 4.165514436762912, "grad_norm": 0.28555503487586975, "learning_rate": 8.247724419513064e-07, "loss": 0.2653, "step": 40972 }, { "epoch": 4.165616104107361, "grad_norm": 0.2770242989063263, "learning_rate": 8.24577200989648e-07, "loss": 0.3126, "step": 40973 }, { "epoch": 4.16571777145181, "grad_norm": 0.28124046325683594, "learning_rate": 8.243819810627779e-07, "loss": 0.2824, "step": 40974 }, { "epoch": 4.165819438796259, "grad_norm": 0.2987661063671112, "learning_rate": 8.241867821716776e-07, "loss": 0.2889, "step": 40975 }, { "epoch": 4.165921106140708, "grad_norm": 0.29064518213272095, "learning_rate": 8.23991604317334e-07, "loss": 0.2767, "step": 40976 }, { "epoch": 4.166022773485157, "grad_norm": 0.2897428870201111, "learning_rate": 8.237964475007265e-07, "loss": 0.3259, "step": 40977 }, { "epoch": 4.166124440829606, "grad_norm": 0.26679545640945435, "learning_rate": 8.236013117228397e-07, "loss": 0.2867, "step": 40978 }, { "epoch": 4.166226108174055, "grad_norm": 0.26115739345550537, "learning_rate": 8.234061969846597e-07, "loss": 0.3087, "step": 40979 }, { "epoch": 4.1663277755185035, "grad_norm": 0.2930524945259094, "learning_rate": 8.232111032871642e-07, "loss": 0.2668, "step": 40980 }, { "epoch": 4.1664294428629525, "grad_norm": 0.2988798916339874, "learning_rate": 8.230160306313395e-07, "loss": 0.2986, "step": 40981 }, { "epoch": 4.166531110207401, "grad_norm": 0.273168683052063, "learning_rate": 8.228209790181668e-07, "loss": 0.3225, "step": 40982 }, { "epoch": 4.16663277755185, "grad_norm": 0.2903899848461151, "learning_rate": 8.226259484486304e-07, "loss": 0.2919, "step": 40983 }, { "epoch": 4.166734444896299, "grad_norm": 0.2783699631690979, "learning_rate": 8.224309389237117e-07, "loss": 0.3028, "step": 40984 }, { "epoch": 4.166836112240748, "grad_norm": 0.2893920838832855, "learning_rate": 8.222359504443916e-07, "loss": 0.3048, "step": 40985 }, { "epoch": 4.166937779585197, "grad_norm": 0.2884375751018524, "learning_rate": 8.220409830116571e-07, "loss": 0.2801, "step": 40986 }, { "epoch": 4.167039446929646, "grad_norm": 0.2748049199581146, "learning_rate": 8.218460366264847e-07, "loss": 0.3008, "step": 40987 }, { "epoch": 4.167141114274095, "grad_norm": 0.2850918173789978, "learning_rate": 8.216511112898589e-07, "loss": 0.2731, "step": 40988 }, { "epoch": 4.167242781618544, "grad_norm": 0.26361319422721863, "learning_rate": 8.214562070027642e-07, "loss": 0.3148, "step": 40989 }, { "epoch": 4.167344448962993, "grad_norm": 0.2726210951805115, "learning_rate": 8.212613237661782e-07, "loss": 0.2801, "step": 40990 }, { "epoch": 4.167446116307442, "grad_norm": 0.29638755321502686, "learning_rate": 8.210664615810854e-07, "loss": 0.2949, "step": 40991 }, { "epoch": 4.167547783651891, "grad_norm": 0.29493439197540283, "learning_rate": 8.208716204484668e-07, "loss": 0.2811, "step": 40992 }, { "epoch": 4.16764945099634, "grad_norm": 0.2903883457183838, "learning_rate": 8.206768003693027e-07, "loss": 0.3087, "step": 40993 }, { "epoch": 4.167751118340789, "grad_norm": 0.27719148993492126, "learning_rate": 8.204820013445769e-07, "loss": 0.302, "step": 40994 }, { "epoch": 4.167852785685238, "grad_norm": 0.28453975915908813, "learning_rate": 8.202872233752679e-07, "loss": 0.3056, "step": 40995 }, { "epoch": 4.167954453029687, "grad_norm": 0.3116324543952942, "learning_rate": 8.200924664623616e-07, "loss": 0.2704, "step": 40996 }, { "epoch": 4.1680561203741355, "grad_norm": 0.280865877866745, "learning_rate": 8.19897730606834e-07, "loss": 0.2954, "step": 40997 }, { "epoch": 4.1681577877185845, "grad_norm": 0.283257395029068, "learning_rate": 8.197030158096675e-07, "loss": 0.304, "step": 40998 }, { "epoch": 4.168259455063033, "grad_norm": 0.27606135606765747, "learning_rate": 8.19508322071847e-07, "loss": 0.2828, "step": 40999 }, { "epoch": 4.168361122407482, "grad_norm": 0.2941493093967438, "learning_rate": 8.193136493943471e-07, "loss": 0.3316, "step": 41000 }, { "epoch": 4.168462789751931, "grad_norm": 0.2698715031147003, "learning_rate": 8.191189977781533e-07, "loss": 0.2939, "step": 41001 }, { "epoch": 4.16856445709638, "grad_norm": 0.25696516036987305, "learning_rate": 8.189243672242441e-07, "loss": 0.2963, "step": 41002 }, { "epoch": 4.168666124440829, "grad_norm": 0.29118239879608154, "learning_rate": 8.18729757733599e-07, "loss": 0.3024, "step": 41003 }, { "epoch": 4.168767791785278, "grad_norm": 0.2766607701778412, "learning_rate": 8.185351693072013e-07, "loss": 0.3137, "step": 41004 }, { "epoch": 4.168869459129728, "grad_norm": 0.27735042572021484, "learning_rate": 8.183406019460299e-07, "loss": 0.3125, "step": 41005 }, { "epoch": 4.168971126474177, "grad_norm": 0.2819662094116211, "learning_rate": 8.181460556510645e-07, "loss": 0.2682, "step": 41006 }, { "epoch": 4.169072793818626, "grad_norm": 0.3216322958469391, "learning_rate": 8.179515304232844e-07, "loss": 0.3102, "step": 41007 }, { "epoch": 4.169174461163075, "grad_norm": 0.27403342723846436, "learning_rate": 8.177570262636708e-07, "loss": 0.3064, "step": 41008 }, { "epoch": 4.169276128507524, "grad_norm": 0.28309333324432373, "learning_rate": 8.175625431732054e-07, "loss": 0.2867, "step": 41009 }, { "epoch": 4.169377795851973, "grad_norm": 0.2855662405490875, "learning_rate": 8.17368081152864e-07, "loss": 0.2862, "step": 41010 }, { "epoch": 4.169479463196422, "grad_norm": 0.2658156156539917, "learning_rate": 8.171736402036296e-07, "loss": 0.298, "step": 41011 }, { "epoch": 4.1695811305408705, "grad_norm": 0.30403488874435425, "learning_rate": 8.169792203264798e-07, "loss": 0.2821, "step": 41012 }, { "epoch": 4.1696827978853195, "grad_norm": 0.29597681760787964, "learning_rate": 8.167848215223939e-07, "loss": 0.2954, "step": 41013 }, { "epoch": 4.169784465229768, "grad_norm": 0.29031696915626526, "learning_rate": 8.165904437923533e-07, "loss": 0.2803, "step": 41014 }, { "epoch": 4.169886132574217, "grad_norm": 0.26957887411117554, "learning_rate": 8.16396087137336e-07, "loss": 0.2687, "step": 41015 }, { "epoch": 4.169987799918666, "grad_norm": 0.29652461409568787, "learning_rate": 8.162017515583204e-07, "loss": 0.3092, "step": 41016 }, { "epoch": 4.170089467263115, "grad_norm": 0.2848624885082245, "learning_rate": 8.160074370562854e-07, "loss": 0.2732, "step": 41017 }, { "epoch": 4.170191134607564, "grad_norm": 0.2643507122993469, "learning_rate": 8.158131436322109e-07, "loss": 0.3007, "step": 41018 }, { "epoch": 4.170292801952013, "grad_norm": 0.29728272557258606, "learning_rate": 8.156188712870772e-07, "loss": 0.2775, "step": 41019 }, { "epoch": 4.170394469296462, "grad_norm": 0.2914554178714752, "learning_rate": 8.154246200218596e-07, "loss": 0.311, "step": 41020 }, { "epoch": 4.170496136640911, "grad_norm": 0.302546888589859, "learning_rate": 8.152303898375391e-07, "loss": 0.298, "step": 41021 }, { "epoch": 4.17059780398536, "grad_norm": 0.2799132764339447, "learning_rate": 8.150361807350937e-07, "loss": 0.2935, "step": 41022 }, { "epoch": 4.170699471329809, "grad_norm": 0.284299373626709, "learning_rate": 8.14841992715501e-07, "loss": 0.2802, "step": 41023 }, { "epoch": 4.170801138674258, "grad_norm": 0.2679506838321686, "learning_rate": 8.146478257797391e-07, "loss": 0.3552, "step": 41024 }, { "epoch": 4.170902806018707, "grad_norm": 0.2712048292160034, "learning_rate": 8.144536799287877e-07, "loss": 0.3152, "step": 41025 }, { "epoch": 4.171004473363156, "grad_norm": 0.28362029790878296, "learning_rate": 8.142595551636245e-07, "loss": 0.2769, "step": 41026 }, { "epoch": 4.171106140707605, "grad_norm": 0.2703773081302643, "learning_rate": 8.140654514852254e-07, "loss": 0.3037, "step": 41027 }, { "epoch": 4.171207808052054, "grad_norm": 0.2835482954978943, "learning_rate": 8.138713688945709e-07, "loss": 0.3223, "step": 41028 }, { "epoch": 4.1713094753965025, "grad_norm": 0.247294083237648, "learning_rate": 8.13677307392638e-07, "loss": 0.3182, "step": 41029 }, { "epoch": 4.1714111427409515, "grad_norm": 0.2954784035682678, "learning_rate": 8.134832669804027e-07, "loss": 0.2864, "step": 41030 }, { "epoch": 4.1715128100854, "grad_norm": 0.30282291769981384, "learning_rate": 8.13289247658845e-07, "loss": 0.2929, "step": 41031 }, { "epoch": 4.171614477429849, "grad_norm": 0.26236847043037415, "learning_rate": 8.130952494289412e-07, "loss": 0.3116, "step": 41032 }, { "epoch": 4.171716144774298, "grad_norm": 0.27218106389045715, "learning_rate": 8.129012722916685e-07, "loss": 0.2842, "step": 41033 }, { "epoch": 4.171817812118747, "grad_norm": 0.27049165964126587, "learning_rate": 8.127073162480031e-07, "loss": 0.296, "step": 41034 }, { "epoch": 4.171919479463196, "grad_norm": 0.2801940143108368, "learning_rate": 8.125133812989244e-07, "loss": 0.2687, "step": 41035 }, { "epoch": 4.172021146807645, "grad_norm": 0.2657647728919983, "learning_rate": 8.123194674454083e-07, "loss": 0.3226, "step": 41036 }, { "epoch": 4.172122814152094, "grad_norm": 0.26706716418266296, "learning_rate": 8.121255746884299e-07, "loss": 0.3173, "step": 41037 }, { "epoch": 4.172224481496543, "grad_norm": 0.2910691797733307, "learning_rate": 8.119317030289692e-07, "loss": 0.3065, "step": 41038 }, { "epoch": 4.172326148840992, "grad_norm": 0.28175637125968933, "learning_rate": 8.117378524680015e-07, "loss": 0.3265, "step": 41039 }, { "epoch": 4.172427816185441, "grad_norm": 0.2861771285533905, "learning_rate": 8.115440230065019e-07, "loss": 0.3118, "step": 41040 }, { "epoch": 4.17252948352989, "grad_norm": 0.28588709235191345, "learning_rate": 8.113502146454494e-07, "loss": 0.2882, "step": 41041 }, { "epoch": 4.172631150874339, "grad_norm": 0.2674363851547241, "learning_rate": 8.111564273858186e-07, "loss": 0.2755, "step": 41042 }, { "epoch": 4.172732818218788, "grad_norm": 0.26177895069122314, "learning_rate": 8.10962661228587e-07, "loss": 0.2978, "step": 41043 }, { "epoch": 4.172834485563237, "grad_norm": 0.3093760013580322, "learning_rate": 8.107689161747284e-07, "loss": 0.288, "step": 41044 }, { "epoch": 4.1729361529076865, "grad_norm": 0.2687714993953705, "learning_rate": 8.10575192225222e-07, "loss": 0.309, "step": 41045 }, { "epoch": 4.173037820252135, "grad_norm": 0.26403793692588806, "learning_rate": 8.103814893810419e-07, "loss": 0.2983, "step": 41046 }, { "epoch": 4.173139487596584, "grad_norm": 0.27180615067481995, "learning_rate": 8.101878076431636e-07, "loss": 0.3373, "step": 41047 }, { "epoch": 4.173241154941033, "grad_norm": 0.2757657468318939, "learning_rate": 8.099941470125644e-07, "loss": 0.2987, "step": 41048 }, { "epoch": 4.173342822285482, "grad_norm": 0.2860485911369324, "learning_rate": 8.098005074902187e-07, "loss": 0.3151, "step": 41049 }, { "epoch": 4.173444489629931, "grad_norm": 0.28523939847946167, "learning_rate": 8.096068890771014e-07, "loss": 0.3193, "step": 41050 }, { "epoch": 4.17354615697438, "grad_norm": 0.2813591957092285, "learning_rate": 8.094132917741898e-07, "loss": 0.301, "step": 41051 }, { "epoch": 4.173647824318829, "grad_norm": 0.28202134370803833, "learning_rate": 8.092197155824583e-07, "loss": 0.2828, "step": 41052 }, { "epoch": 4.173749491663278, "grad_norm": 0.29858946800231934, "learning_rate": 8.090261605028821e-07, "loss": 0.2917, "step": 41053 }, { "epoch": 4.173851159007727, "grad_norm": 0.26253488659858704, "learning_rate": 8.088326265364349e-07, "loss": 0.3109, "step": 41054 }, { "epoch": 4.173952826352176, "grad_norm": 0.28293004631996155, "learning_rate": 8.086391136840938e-07, "loss": 0.279, "step": 41055 }, { "epoch": 4.174054493696625, "grad_norm": 0.28053441643714905, "learning_rate": 8.084456219468334e-07, "loss": 0.2997, "step": 41056 }, { "epoch": 4.174156161041074, "grad_norm": 0.2907857298851013, "learning_rate": 8.082521513256264e-07, "loss": 0.2828, "step": 41057 }, { "epoch": 4.174257828385523, "grad_norm": 0.2833271622657776, "learning_rate": 8.080587018214503e-07, "loss": 0.293, "step": 41058 }, { "epoch": 4.174359495729972, "grad_norm": 0.2834948003292084, "learning_rate": 8.078652734352782e-07, "loss": 0.3195, "step": 41059 }, { "epoch": 4.174461163074421, "grad_norm": 0.2645092010498047, "learning_rate": 8.076718661680832e-07, "loss": 0.2983, "step": 41060 }, { "epoch": 4.1745628304188696, "grad_norm": 0.30192261934280396, "learning_rate": 8.074784800208424e-07, "loss": 0.2865, "step": 41061 }, { "epoch": 4.1746644977633185, "grad_norm": 0.265829473733902, "learning_rate": 8.072851149945288e-07, "loss": 0.3235, "step": 41062 }, { "epoch": 4.174766165107767, "grad_norm": 0.27565717697143555, "learning_rate": 8.070917710901167e-07, "loss": 0.2958, "step": 41063 }, { "epoch": 4.174867832452216, "grad_norm": 0.29050537943840027, "learning_rate": 8.068984483085785e-07, "loss": 0.2981, "step": 41064 }, { "epoch": 4.174969499796665, "grad_norm": 0.2899288535118103, "learning_rate": 8.067051466508907e-07, "loss": 0.3187, "step": 41065 }, { "epoch": 4.175071167141114, "grad_norm": 0.27558520436286926, "learning_rate": 8.065118661180254e-07, "loss": 0.2697, "step": 41066 }, { "epoch": 4.175172834485563, "grad_norm": 0.2688668668270111, "learning_rate": 8.063186067109563e-07, "loss": 0.2974, "step": 41067 }, { "epoch": 4.175274501830012, "grad_norm": 0.27656224370002747, "learning_rate": 8.061253684306586e-07, "loss": 0.2661, "step": 41068 }, { "epoch": 4.175376169174461, "grad_norm": 0.26146864891052246, "learning_rate": 8.059321512781044e-07, "loss": 0.2722, "step": 41069 }, { "epoch": 4.17547783651891, "grad_norm": 0.2834815979003906, "learning_rate": 8.057389552542661e-07, "loss": 0.2975, "step": 41070 }, { "epoch": 4.175579503863359, "grad_norm": 0.2912295460700989, "learning_rate": 8.055457803601197e-07, "loss": 0.3144, "step": 41071 }, { "epoch": 4.175681171207808, "grad_norm": 0.2772296369075775, "learning_rate": 8.053526265966366e-07, "loss": 0.2953, "step": 41072 }, { "epoch": 4.175782838552257, "grad_norm": 0.2658833861351013, "learning_rate": 8.051594939647905e-07, "loss": 0.2986, "step": 41073 }, { "epoch": 4.175884505896706, "grad_norm": 0.3030858039855957, "learning_rate": 8.04966382465553e-07, "loss": 0.3171, "step": 41074 }, { "epoch": 4.175986173241155, "grad_norm": 0.25633904337882996, "learning_rate": 8.047732920998985e-07, "loss": 0.3359, "step": 41075 }, { "epoch": 4.176087840585604, "grad_norm": 0.30549758672714233, "learning_rate": 8.045802228687993e-07, "loss": 0.3564, "step": 41076 }, { "epoch": 4.176189507930053, "grad_norm": 0.30172476172447205, "learning_rate": 8.043871747732274e-07, "loss": 0.278, "step": 41077 }, { "epoch": 4.1762911752745016, "grad_norm": 0.27883219718933105, "learning_rate": 8.041941478141569e-07, "loss": 0.2992, "step": 41078 }, { "epoch": 4.1763928426189505, "grad_norm": 0.2782047986984253, "learning_rate": 8.040011419925587e-07, "loss": 0.2969, "step": 41079 }, { "epoch": 4.176494509963399, "grad_norm": 0.27613121271133423, "learning_rate": 8.038081573094047e-07, "loss": 0.3168, "step": 41080 }, { "epoch": 4.176596177307848, "grad_norm": 0.2818780541419983, "learning_rate": 8.036151937656695e-07, "loss": 0.3142, "step": 41081 }, { "epoch": 4.176697844652297, "grad_norm": 0.2594200670719147, "learning_rate": 8.034222513623229e-07, "loss": 0.3011, "step": 41082 }, { "epoch": 4.176799511996746, "grad_norm": 0.2822246849536896, "learning_rate": 8.032293301003385e-07, "loss": 0.29, "step": 41083 }, { "epoch": 4.176901179341195, "grad_norm": 0.2758895754814148, "learning_rate": 8.030364299806865e-07, "loss": 0.2963, "step": 41084 }, { "epoch": 4.177002846685644, "grad_norm": 0.28880760073661804, "learning_rate": 8.028435510043403e-07, "loss": 0.3028, "step": 41085 }, { "epoch": 4.177104514030093, "grad_norm": 0.28183066844940186, "learning_rate": 8.026506931722711e-07, "loss": 0.2874, "step": 41086 }, { "epoch": 4.177206181374543, "grad_norm": 0.28107011318206787, "learning_rate": 8.024578564854485e-07, "loss": 0.2865, "step": 41087 }, { "epoch": 4.177307848718992, "grad_norm": 0.29823005199432373, "learning_rate": 8.022650409448473e-07, "loss": 0.264, "step": 41088 }, { "epoch": 4.177409516063441, "grad_norm": 0.2848256826400757, "learning_rate": 8.020722465514374e-07, "loss": 0.3164, "step": 41089 }, { "epoch": 4.17751118340789, "grad_norm": 0.29189974069595337, "learning_rate": 8.01879473306189e-07, "loss": 0.2906, "step": 41090 }, { "epoch": 4.177612850752339, "grad_norm": 0.2751114070415497, "learning_rate": 8.016867212100749e-07, "loss": 0.2866, "step": 41091 }, { "epoch": 4.177714518096788, "grad_norm": 0.2889304757118225, "learning_rate": 8.014939902640656e-07, "loss": 0.2946, "step": 41092 }, { "epoch": 4.177816185441237, "grad_norm": 0.2796681225299835, "learning_rate": 8.013012804691322e-07, "loss": 0.2918, "step": 41093 }, { "epoch": 4.1779178527856855, "grad_norm": 0.25716161727905273, "learning_rate": 8.011085918262435e-07, "loss": 0.3134, "step": 41094 }, { "epoch": 4.1780195201301344, "grad_norm": 0.28171995282173157, "learning_rate": 8.009159243363734e-07, "loss": 0.2861, "step": 41095 }, { "epoch": 4.178121187474583, "grad_norm": 0.2601141631603241, "learning_rate": 8.007232780004909e-07, "loss": 0.3031, "step": 41096 }, { "epoch": 4.178222854819032, "grad_norm": 0.29154089093208313, "learning_rate": 8.00530652819565e-07, "loss": 0.261, "step": 41097 }, { "epoch": 4.178324522163481, "grad_norm": 0.3178964853286743, "learning_rate": 8.003380487945705e-07, "loss": 0.2572, "step": 41098 }, { "epoch": 4.17842618950793, "grad_norm": 0.2846493124961853, "learning_rate": 8.001454659264729e-07, "loss": 0.287, "step": 41099 }, { "epoch": 4.178527856852379, "grad_norm": 0.25584033131599426, "learning_rate": 7.99952904216244e-07, "loss": 0.3105, "step": 41100 }, { "epoch": 4.178629524196828, "grad_norm": 0.28951790928840637, "learning_rate": 7.997603636648554e-07, "loss": 0.2737, "step": 41101 }, { "epoch": 4.178731191541277, "grad_norm": 0.2767373025417328, "learning_rate": 7.995678442732763e-07, "loss": 0.3068, "step": 41102 }, { "epoch": 4.178832858885726, "grad_norm": 0.24921785295009613, "learning_rate": 7.993753460424758e-07, "loss": 0.2773, "step": 41103 }, { "epoch": 4.178934526230175, "grad_norm": 0.27760112285614014, "learning_rate": 7.991828689734232e-07, "loss": 0.3208, "step": 41104 }, { "epoch": 4.179036193574624, "grad_norm": 0.2760174572467804, "learning_rate": 7.9899041306709e-07, "loss": 0.286, "step": 41105 }, { "epoch": 4.179137860919073, "grad_norm": 0.2693520784378052, "learning_rate": 7.987979783244448e-07, "loss": 0.2867, "step": 41106 }, { "epoch": 4.179239528263522, "grad_norm": 0.2878882586956024, "learning_rate": 7.986055647464564e-07, "loss": 0.2971, "step": 41107 }, { "epoch": 4.179341195607971, "grad_norm": 0.26501980423927307, "learning_rate": 7.984131723340966e-07, "loss": 0.3091, "step": 41108 }, { "epoch": 4.17944286295242, "grad_norm": 0.28059378266334534, "learning_rate": 7.982208010883302e-07, "loss": 0.3003, "step": 41109 }, { "epoch": 4.179544530296869, "grad_norm": 0.2894745171070099, "learning_rate": 7.980284510101293e-07, "loss": 0.2698, "step": 41110 }, { "epoch": 4.1796461976413175, "grad_norm": 0.2892300486564636, "learning_rate": 7.978361221004638e-07, "loss": 0.2775, "step": 41111 }, { "epoch": 4.1797478649857664, "grad_norm": 0.28282028436660767, "learning_rate": 7.97643814360301e-07, "loss": 0.3093, "step": 41112 }, { "epoch": 4.179849532330215, "grad_norm": 0.27965715527534485, "learning_rate": 7.974515277906103e-07, "loss": 0.2979, "step": 41113 }, { "epoch": 4.179951199674664, "grad_norm": 0.2848227918148041, "learning_rate": 7.972592623923586e-07, "loss": 0.3187, "step": 41114 }, { "epoch": 4.180052867019113, "grad_norm": 0.2994794547557831, "learning_rate": 7.970670181665174e-07, "loss": 0.278, "step": 41115 }, { "epoch": 4.180154534363562, "grad_norm": 0.29287388920783997, "learning_rate": 7.968747951140537e-07, "loss": 0.3219, "step": 41116 }, { "epoch": 4.180256201708011, "grad_norm": 0.26310282945632935, "learning_rate": 7.966825932359351e-07, "loss": 0.3072, "step": 41117 }, { "epoch": 4.18035786905246, "grad_norm": 0.27052417397499084, "learning_rate": 7.96490412533133e-07, "loss": 0.2937, "step": 41118 }, { "epoch": 4.180459536396909, "grad_norm": 0.2901709973812103, "learning_rate": 7.96298253006611e-07, "loss": 0.3217, "step": 41119 }, { "epoch": 4.180561203741358, "grad_norm": 0.2961123287677765, "learning_rate": 7.96106114657339e-07, "loss": 0.2978, "step": 41120 }, { "epoch": 4.180662871085807, "grad_norm": 0.29921138286590576, "learning_rate": 7.959139974862878e-07, "loss": 0.3182, "step": 41121 }, { "epoch": 4.180764538430256, "grad_norm": 0.2775299549102783, "learning_rate": 7.957219014944212e-07, "loss": 0.2846, "step": 41122 }, { "epoch": 4.180866205774705, "grad_norm": 0.26486513018608093, "learning_rate": 7.955298266827088e-07, "loss": 0.3107, "step": 41123 }, { "epoch": 4.180967873119154, "grad_norm": 0.2979057729244232, "learning_rate": 7.953377730521172e-07, "loss": 0.2983, "step": 41124 }, { "epoch": 4.181069540463603, "grad_norm": 0.27099373936653137, "learning_rate": 7.951457406036161e-07, "loss": 0.2939, "step": 41125 }, { "epoch": 4.181171207808052, "grad_norm": 0.27820470929145813, "learning_rate": 7.949537293381715e-07, "loss": 0.2991, "step": 41126 }, { "epoch": 4.1812728751525015, "grad_norm": 0.29176124930381775, "learning_rate": 7.947617392567497e-07, "loss": 0.32, "step": 41127 }, { "epoch": 4.18137454249695, "grad_norm": 0.27073103189468384, "learning_rate": 7.945697703603212e-07, "loss": 0.2693, "step": 41128 }, { "epoch": 4.181476209841399, "grad_norm": 0.27476072311401367, "learning_rate": 7.943778226498478e-07, "loss": 0.2863, "step": 41129 }, { "epoch": 4.181577877185848, "grad_norm": 0.26295608282089233, "learning_rate": 7.941858961263005e-07, "loss": 0.281, "step": 41130 }, { "epoch": 4.181679544530297, "grad_norm": 0.289551317691803, "learning_rate": 7.939939907906469e-07, "loss": 0.2964, "step": 41131 }, { "epoch": 4.181781211874746, "grad_norm": 0.2827586233615875, "learning_rate": 7.9380210664385e-07, "loss": 0.3165, "step": 41132 }, { "epoch": 4.181882879219195, "grad_norm": 0.2674455940723419, "learning_rate": 7.936102436868798e-07, "loss": 0.308, "step": 41133 }, { "epoch": 4.181984546563644, "grad_norm": 0.28424322605133057, "learning_rate": 7.934184019206998e-07, "loss": 0.2834, "step": 41134 }, { "epoch": 4.182086213908093, "grad_norm": 0.30164942145347595, "learning_rate": 7.932265813462803e-07, "loss": 0.2852, "step": 41135 }, { "epoch": 4.182187881252542, "grad_norm": 0.274685263633728, "learning_rate": 7.930347819645851e-07, "loss": 0.2814, "step": 41136 }, { "epoch": 4.182289548596991, "grad_norm": 0.30739831924438477, "learning_rate": 7.9284300377658e-07, "loss": 0.2974, "step": 41137 }, { "epoch": 4.18239121594144, "grad_norm": 0.28889200091362, "learning_rate": 7.926512467832342e-07, "loss": 0.2918, "step": 41138 }, { "epoch": 4.182492883285889, "grad_norm": 0.2924325466156006, "learning_rate": 7.924595109855093e-07, "loss": 0.2928, "step": 41139 }, { "epoch": 4.182594550630338, "grad_norm": 0.28551387786865234, "learning_rate": 7.922677963843734e-07, "loss": 0.3243, "step": 41140 }, { "epoch": 4.182696217974787, "grad_norm": 0.27219465374946594, "learning_rate": 7.920761029807949e-07, "loss": 0.3026, "step": 41141 }, { "epoch": 4.182797885319236, "grad_norm": 0.28116440773010254, "learning_rate": 7.918844307757351e-07, "loss": 0.3018, "step": 41142 }, { "epoch": 4.1828995526636845, "grad_norm": 0.2924412488937378, "learning_rate": 7.916927797701618e-07, "loss": 0.2985, "step": 41143 }, { "epoch": 4.1830012200081335, "grad_norm": 0.26238349080085754, "learning_rate": 7.91501149965041e-07, "loss": 0.2878, "step": 41144 }, { "epoch": 4.183102887352582, "grad_norm": 0.2620376646518707, "learning_rate": 7.913095413613358e-07, "loss": 0.2752, "step": 41145 }, { "epoch": 4.183204554697031, "grad_norm": 0.3068601191043854, "learning_rate": 7.911179539600145e-07, "loss": 0.336, "step": 41146 }, { "epoch": 4.18330622204148, "grad_norm": 0.27227237820625305, "learning_rate": 7.909263877620394e-07, "loss": 0.2917, "step": 41147 }, { "epoch": 4.183407889385929, "grad_norm": 0.2752310633659363, "learning_rate": 7.907348427683792e-07, "loss": 0.2982, "step": 41148 }, { "epoch": 4.183509556730378, "grad_norm": 0.2814349830150604, "learning_rate": 7.905433189799944e-07, "loss": 0.3019, "step": 41149 }, { "epoch": 4.183611224074827, "grad_norm": 0.29030629992485046, "learning_rate": 7.903518163978519e-07, "loss": 0.325, "step": 41150 }, { "epoch": 4.183712891419276, "grad_norm": 0.28449341654777527, "learning_rate": 7.901603350229187e-07, "loss": 0.2871, "step": 41151 }, { "epoch": 4.183814558763725, "grad_norm": 0.25827130675315857, "learning_rate": 7.899688748561552e-07, "loss": 0.3046, "step": 41152 }, { "epoch": 4.183916226108174, "grad_norm": 0.28883641958236694, "learning_rate": 7.897774358985294e-07, "loss": 0.3145, "step": 41153 }, { "epoch": 4.184017893452623, "grad_norm": 0.25502312183380127, "learning_rate": 7.89586018151004e-07, "loss": 0.2877, "step": 41154 }, { "epoch": 4.184119560797072, "grad_norm": 0.26596975326538086, "learning_rate": 7.893946216145426e-07, "loss": 0.2983, "step": 41155 }, { "epoch": 4.184221228141521, "grad_norm": 0.29407331347465515, "learning_rate": 7.892032462901122e-07, "loss": 0.2969, "step": 41156 }, { "epoch": 4.18432289548597, "grad_norm": 0.28316187858581543, "learning_rate": 7.89011892178675e-07, "loss": 0.3227, "step": 41157 }, { "epoch": 4.184424562830419, "grad_norm": 0.29247575998306274, "learning_rate": 7.888205592811948e-07, "loss": 0.3123, "step": 41158 }, { "epoch": 4.184526230174868, "grad_norm": 0.27759796380996704, "learning_rate": 7.886292475986346e-07, "loss": 0.3263, "step": 41159 }, { "epoch": 4.1846278975193165, "grad_norm": 0.2783249020576477, "learning_rate": 7.8843795713196e-07, "loss": 0.3099, "step": 41160 }, { "epoch": 4.1847295648637655, "grad_norm": 0.2584855854511261, "learning_rate": 7.882466878821365e-07, "loss": 0.2787, "step": 41161 }, { "epoch": 4.184831232208214, "grad_norm": 0.2959093153476715, "learning_rate": 7.880554398501228e-07, "loss": 0.3118, "step": 41162 }, { "epoch": 4.184932899552663, "grad_norm": 0.275712251663208, "learning_rate": 7.878642130368857e-07, "loss": 0.2887, "step": 41163 }, { "epoch": 4.185034566897112, "grad_norm": 0.2933994233608246, "learning_rate": 7.87673007443388e-07, "loss": 0.3133, "step": 41164 }, { "epoch": 4.185136234241561, "grad_norm": 0.28101369738578796, "learning_rate": 7.874818230705916e-07, "loss": 0.2834, "step": 41165 }, { "epoch": 4.18523790158601, "grad_norm": 0.28667163848876953, "learning_rate": 7.872906599194618e-07, "loss": 0.3127, "step": 41166 }, { "epoch": 4.185339568930459, "grad_norm": 0.26289892196655273, "learning_rate": 7.870995179909608e-07, "loss": 0.2751, "step": 41167 }, { "epoch": 4.185441236274908, "grad_norm": 0.29677194356918335, "learning_rate": 7.869083972860514e-07, "loss": 0.2888, "step": 41168 }, { "epoch": 4.185542903619358, "grad_norm": 0.26451680064201355, "learning_rate": 7.867172978056948e-07, "loss": 0.2775, "step": 41169 }, { "epoch": 4.185644570963807, "grad_norm": 0.2638755440711975, "learning_rate": 7.865262195508555e-07, "loss": 0.3031, "step": 41170 }, { "epoch": 4.185746238308256, "grad_norm": 0.2747116982936859, "learning_rate": 7.86335162522498e-07, "loss": 0.3261, "step": 41171 }, { "epoch": 4.185847905652705, "grad_norm": 0.29431337118148804, "learning_rate": 7.861441267215802e-07, "loss": 0.2867, "step": 41172 }, { "epoch": 4.185949572997154, "grad_norm": 0.2858527600765228, "learning_rate": 7.859531121490682e-07, "loss": 0.2973, "step": 41173 }, { "epoch": 4.186051240341603, "grad_norm": 0.28160688281059265, "learning_rate": 7.857621188059228e-07, "loss": 0.3146, "step": 41174 }, { "epoch": 4.1861529076860515, "grad_norm": 0.26720067858695984, "learning_rate": 7.855711466931054e-07, "loss": 0.2875, "step": 41175 }, { "epoch": 4.1862545750305005, "grad_norm": 0.2721543610095978, "learning_rate": 7.853801958115804e-07, "loss": 0.2766, "step": 41176 }, { "epoch": 4.186356242374949, "grad_norm": 0.26497912406921387, "learning_rate": 7.851892661623079e-07, "loss": 0.2905, "step": 41177 }, { "epoch": 4.186457909719398, "grad_norm": 0.3052774965763092, "learning_rate": 7.849983577462505e-07, "loss": 0.2982, "step": 41178 }, { "epoch": 4.186559577063847, "grad_norm": 0.2743648290634155, "learning_rate": 7.848074705643688e-07, "loss": 0.29, "step": 41179 }, { "epoch": 4.186661244408296, "grad_norm": 0.2754468023777008, "learning_rate": 7.846166046176267e-07, "loss": 0.2896, "step": 41180 }, { "epoch": 4.186762911752745, "grad_norm": 0.2939987778663635, "learning_rate": 7.844257599069844e-07, "loss": 0.2988, "step": 41181 }, { "epoch": 4.186864579097194, "grad_norm": 0.27410030364990234, "learning_rate": 7.842349364334018e-07, "loss": 0.2762, "step": 41182 }, { "epoch": 4.186966246441643, "grad_norm": 0.27485331892967224, "learning_rate": 7.840441341978428e-07, "loss": 0.3045, "step": 41183 }, { "epoch": 4.187067913786092, "grad_norm": 0.2877022624015808, "learning_rate": 7.838533532012682e-07, "loss": 0.311, "step": 41184 }, { "epoch": 4.187169581130541, "grad_norm": 0.275947242975235, "learning_rate": 7.836625934446368e-07, "loss": 0.2909, "step": 41185 }, { "epoch": 4.18727124847499, "grad_norm": 0.2724877595901489, "learning_rate": 7.834718549289127e-07, "loss": 0.3051, "step": 41186 }, { "epoch": 4.187372915819439, "grad_norm": 0.2733912169933319, "learning_rate": 7.832811376550553e-07, "loss": 0.2784, "step": 41187 }, { "epoch": 4.187474583163888, "grad_norm": 0.26323163509368896, "learning_rate": 7.830904416240254e-07, "loss": 0.2639, "step": 41188 }, { "epoch": 4.187576250508337, "grad_norm": 0.28505468368530273, "learning_rate": 7.828997668367827e-07, "loss": 0.2985, "step": 41189 }, { "epoch": 4.187677917852786, "grad_norm": 0.2785646915435791, "learning_rate": 7.827091132942904e-07, "loss": 0.2988, "step": 41190 }, { "epoch": 4.187779585197235, "grad_norm": 0.27943629026412964, "learning_rate": 7.825184809975067e-07, "loss": 0.3068, "step": 41191 }, { "epoch": 4.1878812525416835, "grad_norm": 0.302375465631485, "learning_rate": 7.823278699473918e-07, "loss": 0.3064, "step": 41192 }, { "epoch": 4.1879829198861325, "grad_norm": 0.26822033524513245, "learning_rate": 7.821372801449084e-07, "loss": 0.3041, "step": 41193 }, { "epoch": 4.188084587230581, "grad_norm": 0.255813866853714, "learning_rate": 7.819467115910145e-07, "loss": 0.311, "step": 41194 }, { "epoch": 4.18818625457503, "grad_norm": 0.28699856996536255, "learning_rate": 7.817561642866695e-07, "loss": 0.2983, "step": 41195 }, { "epoch": 4.188287921919479, "grad_norm": 0.28378260135650635, "learning_rate": 7.815656382328363e-07, "loss": 0.3198, "step": 41196 }, { "epoch": 4.188389589263928, "grad_norm": 0.25991734862327576, "learning_rate": 7.813751334304726e-07, "loss": 0.2898, "step": 41197 }, { "epoch": 4.188491256608377, "grad_norm": 0.2839849293231964, "learning_rate": 7.811846498805381e-07, "loss": 0.3151, "step": 41198 }, { "epoch": 4.188592923952826, "grad_norm": 0.2785542905330658, "learning_rate": 7.809941875839922e-07, "loss": 0.3014, "step": 41199 }, { "epoch": 4.188694591297275, "grad_norm": 0.2834308445453644, "learning_rate": 7.808037465417961e-07, "loss": 0.2998, "step": 41200 }, { "epoch": 4.188796258641724, "grad_norm": 0.2880937159061432, "learning_rate": 7.806133267549077e-07, "loss": 0.2945, "step": 41201 }, { "epoch": 4.188897925986173, "grad_norm": 0.26813018321990967, "learning_rate": 7.804229282242859e-07, "loss": 0.2661, "step": 41202 }, { "epoch": 4.188999593330622, "grad_norm": 0.28156405687332153, "learning_rate": 7.802325509508918e-07, "loss": 0.2922, "step": 41203 }, { "epoch": 4.189101260675071, "grad_norm": 0.29150328040122986, "learning_rate": 7.800421949356829e-07, "loss": 0.2783, "step": 41204 }, { "epoch": 4.18920292801952, "grad_norm": 0.2654925584793091, "learning_rate": 7.798518601796179e-07, "loss": 0.3062, "step": 41205 }, { "epoch": 4.189304595363969, "grad_norm": 0.2997477352619171, "learning_rate": 7.796615466836576e-07, "loss": 0.2695, "step": 41206 }, { "epoch": 4.189406262708418, "grad_norm": 0.29815226793289185, "learning_rate": 7.794712544487592e-07, "loss": 0.3035, "step": 41207 }, { "epoch": 4.189507930052867, "grad_norm": 0.2923503816127777, "learning_rate": 7.792809834758819e-07, "loss": 0.2988, "step": 41208 }, { "epoch": 4.189609597397316, "grad_norm": 0.31178969144821167, "learning_rate": 7.790907337659825e-07, "loss": 0.2603, "step": 41209 }, { "epoch": 4.189711264741765, "grad_norm": 0.2851541340351105, "learning_rate": 7.789005053200222e-07, "loss": 0.296, "step": 41210 }, { "epoch": 4.189812932086214, "grad_norm": 0.25777626037597656, "learning_rate": 7.787102981389582e-07, "loss": 0.2775, "step": 41211 }, { "epoch": 4.189914599430663, "grad_norm": 0.2829276919364929, "learning_rate": 7.785201122237474e-07, "loss": 0.2995, "step": 41212 }, { "epoch": 4.190016266775112, "grad_norm": 0.29728633165359497, "learning_rate": 7.783299475753503e-07, "loss": 0.3062, "step": 41213 }, { "epoch": 4.190117934119561, "grad_norm": 0.27317941188812256, "learning_rate": 7.781398041947242e-07, "loss": 0.3076, "step": 41214 }, { "epoch": 4.19021960146401, "grad_norm": 0.2582422196865082, "learning_rate": 7.779496820828259e-07, "loss": 0.2958, "step": 41215 }, { "epoch": 4.190321268808459, "grad_norm": 0.28215131163597107, "learning_rate": 7.777595812406124e-07, "loss": 0.3228, "step": 41216 }, { "epoch": 4.190422936152908, "grad_norm": 0.2938629984855652, "learning_rate": 7.775695016690443e-07, "loss": 0.3166, "step": 41217 }, { "epoch": 4.190524603497357, "grad_norm": 0.2808973491191864, "learning_rate": 7.773794433690779e-07, "loss": 0.2894, "step": 41218 }, { "epoch": 4.190626270841806, "grad_norm": 0.2942155599594116, "learning_rate": 7.771894063416685e-07, "loss": 0.3067, "step": 41219 }, { "epoch": 4.190727938186255, "grad_norm": 0.2575933337211609, "learning_rate": 7.769993905877771e-07, "loss": 0.2884, "step": 41220 }, { "epoch": 4.190829605530704, "grad_norm": 0.27758994698524475, "learning_rate": 7.768093961083589e-07, "loss": 0.2754, "step": 41221 }, { "epoch": 4.190931272875153, "grad_norm": 0.2752624750137329, "learning_rate": 7.766194229043706e-07, "loss": 0.2778, "step": 41222 }, { "epoch": 4.191032940219602, "grad_norm": 0.2711862623691559, "learning_rate": 7.764294709767711e-07, "loss": 0.2957, "step": 41223 }, { "epoch": 4.1911346075640505, "grad_norm": 0.272150456905365, "learning_rate": 7.76239540326516e-07, "loss": 0.2809, "step": 41224 }, { "epoch": 4.1912362749084995, "grad_norm": 0.2866298258304596, "learning_rate": 7.760496309545628e-07, "loss": 0.3093, "step": 41225 }, { "epoch": 4.191337942252948, "grad_norm": 0.2870267331600189, "learning_rate": 7.758597428618664e-07, "loss": 0.3026, "step": 41226 }, { "epoch": 4.191439609597397, "grad_norm": 0.2618504762649536, "learning_rate": 7.756698760493859e-07, "loss": 0.2966, "step": 41227 }, { "epoch": 4.191541276941846, "grad_norm": 0.2733563780784607, "learning_rate": 7.754800305180765e-07, "loss": 0.3145, "step": 41228 }, { "epoch": 4.191642944286295, "grad_norm": 0.27390170097351074, "learning_rate": 7.752902062688938e-07, "loss": 0.2893, "step": 41229 }, { "epoch": 4.191744611630744, "grad_norm": 0.2848389446735382, "learning_rate": 7.751004033027965e-07, "loss": 0.2974, "step": 41230 }, { "epoch": 4.191846278975193, "grad_norm": 0.2878042459487915, "learning_rate": 7.749106216207392e-07, "loss": 0.2696, "step": 41231 }, { "epoch": 4.191947946319642, "grad_norm": 0.2838292717933655, "learning_rate": 7.747208612236773e-07, "loss": 0.2998, "step": 41232 }, { "epoch": 4.192049613664091, "grad_norm": 0.3002603054046631, "learning_rate": 7.745311221125684e-07, "loss": 0.2968, "step": 41233 }, { "epoch": 4.19215128100854, "grad_norm": 0.2870176136493683, "learning_rate": 7.743414042883674e-07, "loss": 0.2763, "step": 41234 }, { "epoch": 4.192252948352989, "grad_norm": 0.26841649413108826, "learning_rate": 7.741517077520306e-07, "loss": 0.2907, "step": 41235 }, { "epoch": 4.192354615697438, "grad_norm": 0.26212504506111145, "learning_rate": 7.739620325045121e-07, "loss": 0.3091, "step": 41236 }, { "epoch": 4.192456283041887, "grad_norm": 0.2697732448577881, "learning_rate": 7.737723785467699e-07, "loss": 0.3172, "step": 41237 }, { "epoch": 4.192557950386336, "grad_norm": 0.2768726646900177, "learning_rate": 7.735827458797579e-07, "loss": 0.2824, "step": 41238 }, { "epoch": 4.192659617730785, "grad_norm": 0.28705281019210815, "learning_rate": 7.733931345044304e-07, "loss": 0.3267, "step": 41239 }, { "epoch": 4.192761285075234, "grad_norm": 0.27580785751342773, "learning_rate": 7.73203544421745e-07, "loss": 0.2643, "step": 41240 }, { "epoch": 4.1928629524196825, "grad_norm": 0.2884886562824249, "learning_rate": 7.73013975632656e-07, "loss": 0.2906, "step": 41241 }, { "epoch": 4.1929646197641315, "grad_norm": 0.2899055778980255, "learning_rate": 7.72824428138117e-07, "loss": 0.3049, "step": 41242 }, { "epoch": 4.19306628710858, "grad_norm": 0.27887067198753357, "learning_rate": 7.72634901939085e-07, "loss": 0.312, "step": 41243 }, { "epoch": 4.193167954453029, "grad_norm": 0.2851336598396301, "learning_rate": 7.724453970365143e-07, "loss": 0.299, "step": 41244 }, { "epoch": 4.193269621797478, "grad_norm": 0.28242555260658264, "learning_rate": 7.722559134313585e-07, "loss": 0.3175, "step": 41245 }, { "epoch": 4.193371289141927, "grad_norm": 0.256268709897995, "learning_rate": 7.720664511245713e-07, "loss": 0.2909, "step": 41246 }, { "epoch": 4.193472956486376, "grad_norm": 0.27269917726516724, "learning_rate": 7.718770101171103e-07, "loss": 0.2789, "step": 41247 }, { "epoch": 4.193574623830825, "grad_norm": 0.26119527220726013, "learning_rate": 7.71687590409928e-07, "loss": 0.3251, "step": 41248 }, { "epoch": 4.193676291175274, "grad_norm": 0.27711236476898193, "learning_rate": 7.714981920039777e-07, "loss": 0.2949, "step": 41249 }, { "epoch": 4.193777958519723, "grad_norm": 0.26173678040504456, "learning_rate": 7.713088149002168e-07, "loss": 0.2836, "step": 41250 }, { "epoch": 4.193879625864173, "grad_norm": 0.2775661051273346, "learning_rate": 7.711194590995952e-07, "loss": 0.3135, "step": 41251 }, { "epoch": 4.193981293208622, "grad_norm": 0.29702597856521606, "learning_rate": 7.709301246030687e-07, "loss": 0.294, "step": 41252 }, { "epoch": 4.194082960553071, "grad_norm": 0.27265384793281555, "learning_rate": 7.707408114115922e-07, "loss": 0.3036, "step": 41253 }, { "epoch": 4.19418462789752, "grad_norm": 0.26886507868766785, "learning_rate": 7.705515195261187e-07, "loss": 0.2799, "step": 41254 }, { "epoch": 4.194286295241969, "grad_norm": 0.2741590738296509, "learning_rate": 7.703622489476015e-07, "loss": 0.3089, "step": 41255 }, { "epoch": 4.1943879625864176, "grad_norm": 0.2638343870639801, "learning_rate": 7.70172999676993e-07, "loss": 0.3043, "step": 41256 }, { "epoch": 4.1944896299308665, "grad_norm": 0.2818054258823395, "learning_rate": 7.699837717152492e-07, "loss": 0.3042, "step": 41257 }, { "epoch": 4.194591297275315, "grad_norm": 0.2787076234817505, "learning_rate": 7.697945650633215e-07, "loss": 0.333, "step": 41258 }, { "epoch": 4.194692964619764, "grad_norm": 0.28494930267333984, "learning_rate": 7.696053797221625e-07, "loss": 0.3218, "step": 41259 }, { "epoch": 4.194794631964213, "grad_norm": 0.29178255796432495, "learning_rate": 7.694162156927287e-07, "loss": 0.2975, "step": 41260 }, { "epoch": 4.194896299308662, "grad_norm": 0.28091058135032654, "learning_rate": 7.692270729759682e-07, "loss": 0.3007, "step": 41261 }, { "epoch": 4.194997966653111, "grad_norm": 0.276504248380661, "learning_rate": 7.690379515728369e-07, "loss": 0.3001, "step": 41262 }, { "epoch": 4.19509963399756, "grad_norm": 0.29701632261276245, "learning_rate": 7.688488514842873e-07, "loss": 0.2843, "step": 41263 }, { "epoch": 4.195201301342009, "grad_norm": 0.27612438797950745, "learning_rate": 7.686597727112721e-07, "loss": 0.2782, "step": 41264 }, { "epoch": 4.195302968686458, "grad_norm": 0.26745477318763733, "learning_rate": 7.684707152547438e-07, "loss": 0.324, "step": 41265 }, { "epoch": 4.195404636030907, "grad_norm": 0.27317628264427185, "learning_rate": 7.682816791156528e-07, "loss": 0.2843, "step": 41266 }, { "epoch": 4.195506303375356, "grad_norm": 0.27796196937561035, "learning_rate": 7.680926642949543e-07, "loss": 0.2997, "step": 41267 }, { "epoch": 4.195607970719805, "grad_norm": 0.26829972863197327, "learning_rate": 7.679036707935994e-07, "loss": 0.3111, "step": 41268 }, { "epoch": 4.195709638064254, "grad_norm": 0.2877000570297241, "learning_rate": 7.677146986125389e-07, "loss": 0.2962, "step": 41269 }, { "epoch": 4.195811305408703, "grad_norm": 0.27559027075767517, "learning_rate": 7.675257477527287e-07, "loss": 0.2851, "step": 41270 }, { "epoch": 4.195912972753152, "grad_norm": 0.27573591470718384, "learning_rate": 7.673368182151153e-07, "loss": 0.2766, "step": 41271 }, { "epoch": 4.196014640097601, "grad_norm": 0.281288743019104, "learning_rate": 7.671479100006529e-07, "loss": 0.297, "step": 41272 }, { "epoch": 4.1961163074420496, "grad_norm": 0.2831714153289795, "learning_rate": 7.669590231102963e-07, "loss": 0.3106, "step": 41273 }, { "epoch": 4.1962179747864985, "grad_norm": 0.29263240098953247, "learning_rate": 7.667701575449915e-07, "loss": 0.2692, "step": 41274 }, { "epoch": 4.196319642130947, "grad_norm": 0.2859199643135071, "learning_rate": 7.665813133056943e-07, "loss": 0.306, "step": 41275 }, { "epoch": 4.196421309475396, "grad_norm": 0.27414315938949585, "learning_rate": 7.663924903933528e-07, "loss": 0.2915, "step": 41276 }, { "epoch": 4.196522976819845, "grad_norm": 0.26436981558799744, "learning_rate": 7.662036888089213e-07, "loss": 0.317, "step": 41277 }, { "epoch": 4.196624644164294, "grad_norm": 0.28229227662086487, "learning_rate": 7.660149085533492e-07, "loss": 0.2773, "step": 41278 }, { "epoch": 4.196726311508743, "grad_norm": 0.2953341007232666, "learning_rate": 7.658261496275871e-07, "loss": 0.2838, "step": 41279 }, { "epoch": 4.196827978853192, "grad_norm": 0.2788265347480774, "learning_rate": 7.656374120325887e-07, "loss": 0.2739, "step": 41280 }, { "epoch": 4.196929646197641, "grad_norm": 0.2621728479862213, "learning_rate": 7.654486957693008e-07, "loss": 0.2854, "step": 41281 }, { "epoch": 4.19703131354209, "grad_norm": 0.2693893611431122, "learning_rate": 7.652600008386757e-07, "loss": 0.2918, "step": 41282 }, { "epoch": 4.197132980886539, "grad_norm": 0.2641001045703888, "learning_rate": 7.650713272416665e-07, "loss": 0.3321, "step": 41283 }, { "epoch": 4.197234648230988, "grad_norm": 0.28483134508132935, "learning_rate": 7.648826749792199e-07, "loss": 0.2866, "step": 41284 }, { "epoch": 4.197336315575437, "grad_norm": 0.30138492584228516, "learning_rate": 7.646940440522888e-07, "loss": 0.2902, "step": 41285 }, { "epoch": 4.197437982919886, "grad_norm": 0.2785404324531555, "learning_rate": 7.645054344618224e-07, "loss": 0.3258, "step": 41286 }, { "epoch": 4.197539650264335, "grad_norm": 0.26789146661758423, "learning_rate": 7.643168462087708e-07, "loss": 0.272, "step": 41287 }, { "epoch": 4.197641317608784, "grad_norm": 0.30655550956726074, "learning_rate": 7.641282792940847e-07, "loss": 0.3162, "step": 41288 }, { "epoch": 4.197742984953233, "grad_norm": 0.29785868525505066, "learning_rate": 7.639397337187127e-07, "loss": 0.3007, "step": 41289 }, { "epoch": 4.1978446522976816, "grad_norm": 0.28691044449806213, "learning_rate": 7.63751209483608e-07, "loss": 0.3092, "step": 41290 }, { "epoch": 4.197946319642131, "grad_norm": 0.3042145073413849, "learning_rate": 7.635627065897161e-07, "loss": 0.2902, "step": 41291 }, { "epoch": 4.19804798698658, "grad_norm": 0.2906663119792938, "learning_rate": 7.633742250379883e-07, "loss": 0.2906, "step": 41292 }, { "epoch": 4.198149654331029, "grad_norm": 0.2774359881877899, "learning_rate": 7.631857648293766e-07, "loss": 0.2878, "step": 41293 }, { "epoch": 4.198251321675478, "grad_norm": 0.28407716751098633, "learning_rate": 7.62997325964826e-07, "loss": 0.2939, "step": 41294 }, { "epoch": 4.198352989019927, "grad_norm": 0.27155205607414246, "learning_rate": 7.628089084452894e-07, "loss": 0.2968, "step": 41295 }, { "epoch": 4.198454656364376, "grad_norm": 0.2845996916294098, "learning_rate": 7.62620512271714e-07, "loss": 0.3031, "step": 41296 }, { "epoch": 4.198556323708825, "grad_norm": 0.2503441572189331, "learning_rate": 7.624321374450489e-07, "loss": 0.2828, "step": 41297 }, { "epoch": 4.198657991053274, "grad_norm": 0.29498565196990967, "learning_rate": 7.622437839662445e-07, "loss": 0.3226, "step": 41298 }, { "epoch": 4.198759658397723, "grad_norm": 0.28257694840431213, "learning_rate": 7.620554518362477e-07, "loss": 0.2912, "step": 41299 }, { "epoch": 4.198861325742172, "grad_norm": 0.2699134945869446, "learning_rate": 7.618671410560108e-07, "loss": 0.2897, "step": 41300 }, { "epoch": 4.198962993086621, "grad_norm": 0.2777175009250641, "learning_rate": 7.616788516264784e-07, "loss": 0.3176, "step": 41301 }, { "epoch": 4.19906466043107, "grad_norm": 0.27160969376564026, "learning_rate": 7.614905835485997e-07, "loss": 0.2929, "step": 41302 }, { "epoch": 4.199166327775519, "grad_norm": 0.2743164002895355, "learning_rate": 7.613023368233275e-07, "loss": 0.2825, "step": 41303 }, { "epoch": 4.199267995119968, "grad_norm": 0.26891881227493286, "learning_rate": 7.61114111451604e-07, "loss": 0.3101, "step": 41304 }, { "epoch": 4.199369662464417, "grad_norm": 0.2925295829772949, "learning_rate": 7.609259074343816e-07, "loss": 0.3063, "step": 41305 }, { "epoch": 4.1994713298088655, "grad_norm": 0.30513104796409607, "learning_rate": 7.607377247726073e-07, "loss": 0.2964, "step": 41306 }, { "epoch": 4.1995729971533144, "grad_norm": 0.30313605070114136, "learning_rate": 7.605495634672272e-07, "loss": 0.3363, "step": 41307 }, { "epoch": 4.199674664497763, "grad_norm": 0.2801671624183655, "learning_rate": 7.603614235191925e-07, "loss": 0.2938, "step": 41308 }, { "epoch": 4.199776331842212, "grad_norm": 0.28186658024787903, "learning_rate": 7.601733049294497e-07, "loss": 0.2914, "step": 41309 }, { "epoch": 4.199877999186661, "grad_norm": 0.29326698184013367, "learning_rate": 7.599852076989461e-07, "loss": 0.2894, "step": 41310 }, { "epoch": 4.19997966653111, "grad_norm": 0.26651331782341003, "learning_rate": 7.597971318286279e-07, "loss": 0.2993, "step": 41311 }, { "epoch": 4.200081333875559, "grad_norm": 0.26846635341644287, "learning_rate": 7.596090773194442e-07, "loss": 0.309, "step": 41312 }, { "epoch": 4.200183001220008, "grad_norm": 0.2698551416397095, "learning_rate": 7.594210441723448e-07, "loss": 0.2979, "step": 41313 }, { "epoch": 4.200284668564457, "grad_norm": 0.28338122367858887, "learning_rate": 7.592330323882724e-07, "loss": 0.3198, "step": 41314 }, { "epoch": 4.200386335908906, "grad_norm": 0.29697415232658386, "learning_rate": 7.590450419681772e-07, "loss": 0.2905, "step": 41315 }, { "epoch": 4.200488003253355, "grad_norm": 0.28542661666870117, "learning_rate": 7.588570729130051e-07, "loss": 0.2807, "step": 41316 }, { "epoch": 4.200589670597804, "grad_norm": 0.2795066833496094, "learning_rate": 7.586691252237022e-07, "loss": 0.277, "step": 41317 }, { "epoch": 4.200691337942253, "grad_norm": 0.28591403365135193, "learning_rate": 7.584811989012175e-07, "loss": 0.3102, "step": 41318 }, { "epoch": 4.200793005286702, "grad_norm": 0.289777547121048, "learning_rate": 7.582932939464966e-07, "loss": 0.3042, "step": 41319 }, { "epoch": 4.200894672631151, "grad_norm": 0.2723160684108734, "learning_rate": 7.581054103604862e-07, "loss": 0.2967, "step": 41320 }, { "epoch": 4.2009963399756, "grad_norm": 0.2773086726665497, "learning_rate": 7.579175481441314e-07, "loss": 0.2852, "step": 41321 }, { "epoch": 4.201098007320049, "grad_norm": 0.28576117753982544, "learning_rate": 7.577297072983797e-07, "loss": 0.3104, "step": 41322 }, { "epoch": 4.2011996746644975, "grad_norm": 0.2847934067249298, "learning_rate": 7.575418878241803e-07, "loss": 0.2985, "step": 41323 }, { "epoch": 4.2013013420089464, "grad_norm": 0.2761445641517639, "learning_rate": 7.573540897224746e-07, "loss": 0.2807, "step": 41324 }, { "epoch": 4.201403009353395, "grad_norm": 0.27560052275657654, "learning_rate": 7.571663129942114e-07, "loss": 0.3019, "step": 41325 }, { "epoch": 4.201504676697844, "grad_norm": 0.2725594639778137, "learning_rate": 7.56978557640336e-07, "loss": 0.2793, "step": 41326 }, { "epoch": 4.201606344042293, "grad_norm": 0.28830286860466003, "learning_rate": 7.567908236617938e-07, "loss": 0.2967, "step": 41327 }, { "epoch": 4.201708011386742, "grad_norm": 0.29598018527030945, "learning_rate": 7.566031110595317e-07, "loss": 0.304, "step": 41328 }, { "epoch": 4.201809678731191, "grad_norm": 0.2905646860599518, "learning_rate": 7.564154198344947e-07, "loss": 0.278, "step": 41329 }, { "epoch": 4.20191134607564, "grad_norm": 0.28048694133758545, "learning_rate": 7.562277499876291e-07, "loss": 0.3041, "step": 41330 }, { "epoch": 4.202013013420089, "grad_norm": 0.2810322940349579, "learning_rate": 7.560401015198776e-07, "loss": 0.3062, "step": 41331 }, { "epoch": 4.202114680764538, "grad_norm": 0.28272590041160583, "learning_rate": 7.558524744321894e-07, "loss": 0.2819, "step": 41332 }, { "epoch": 4.202216348108988, "grad_norm": 0.2650904953479767, "learning_rate": 7.556648687255075e-07, "loss": 0.3445, "step": 41333 }, { "epoch": 4.202318015453437, "grad_norm": 0.2689002454280853, "learning_rate": 7.554772844007763e-07, "loss": 0.3043, "step": 41334 }, { "epoch": 4.202419682797886, "grad_norm": 0.2703688144683838, "learning_rate": 7.552897214589428e-07, "loss": 0.2777, "step": 41335 }, { "epoch": 4.202521350142335, "grad_norm": 0.2639022767543793, "learning_rate": 7.551021799009511e-07, "loss": 0.3098, "step": 41336 }, { "epoch": 4.202623017486784, "grad_norm": 0.2845386266708374, "learning_rate": 7.549146597277452e-07, "loss": 0.3051, "step": 41337 }, { "epoch": 4.2027246848312325, "grad_norm": 0.26901155710220337, "learning_rate": 7.547271609402712e-07, "loss": 0.2906, "step": 41338 }, { "epoch": 4.2028263521756815, "grad_norm": 0.2748100161552429, "learning_rate": 7.545396835394731e-07, "loss": 0.2725, "step": 41339 }, { "epoch": 4.20292801952013, "grad_norm": 0.29463744163513184, "learning_rate": 7.543522275262949e-07, "loss": 0.2984, "step": 41340 }, { "epoch": 4.203029686864579, "grad_norm": 0.3250587582588196, "learning_rate": 7.541647929016804e-07, "loss": 0.3124, "step": 41341 }, { "epoch": 4.203131354209028, "grad_norm": 0.26325124502182007, "learning_rate": 7.539773796665755e-07, "loss": 0.3159, "step": 41342 }, { "epoch": 4.203233021553477, "grad_norm": 0.2897886037826538, "learning_rate": 7.53789987821924e-07, "loss": 0.2904, "step": 41343 }, { "epoch": 4.203334688897926, "grad_norm": 0.28370922803878784, "learning_rate": 7.536026173686678e-07, "loss": 0.2917, "step": 41344 }, { "epoch": 4.203436356242375, "grad_norm": 0.2908829152584076, "learning_rate": 7.534152683077539e-07, "loss": 0.2944, "step": 41345 }, { "epoch": 4.203538023586824, "grad_norm": 0.28365400433540344, "learning_rate": 7.532279406401249e-07, "loss": 0.2598, "step": 41346 }, { "epoch": 4.203639690931273, "grad_norm": 0.26766490936279297, "learning_rate": 7.530406343667235e-07, "loss": 0.2758, "step": 41347 }, { "epoch": 4.203741358275722, "grad_norm": 0.29087623953819275, "learning_rate": 7.528533494884949e-07, "loss": 0.2782, "step": 41348 }, { "epoch": 4.203843025620171, "grad_norm": 0.26490166783332825, "learning_rate": 7.526660860063817e-07, "loss": 0.2915, "step": 41349 }, { "epoch": 4.20394469296462, "grad_norm": 0.3036748468875885, "learning_rate": 7.524788439213276e-07, "loss": 0.3284, "step": 41350 }, { "epoch": 4.204046360309069, "grad_norm": 0.2613389492034912, "learning_rate": 7.522916232342748e-07, "loss": 0.2667, "step": 41351 }, { "epoch": 4.204148027653518, "grad_norm": 0.27231496572494507, "learning_rate": 7.521044239461683e-07, "loss": 0.3148, "step": 41352 }, { "epoch": 4.204249694997967, "grad_norm": 0.30106106400489807, "learning_rate": 7.519172460579499e-07, "loss": 0.2912, "step": 41353 }, { "epoch": 4.204351362342416, "grad_norm": 0.26865941286087036, "learning_rate": 7.517300895705621e-07, "loss": 0.2736, "step": 41354 }, { "epoch": 4.2044530296868645, "grad_norm": 0.2658802270889282, "learning_rate": 7.515429544849501e-07, "loss": 0.3066, "step": 41355 }, { "epoch": 4.2045546970313135, "grad_norm": 0.271637499332428, "learning_rate": 7.513558408020544e-07, "loss": 0.2848, "step": 41356 }, { "epoch": 4.204656364375762, "grad_norm": 0.2624512016773224, "learning_rate": 7.511687485228176e-07, "loss": 0.2869, "step": 41357 }, { "epoch": 4.204758031720211, "grad_norm": 0.293559193611145, "learning_rate": 7.509816776481844e-07, "loss": 0.2785, "step": 41358 }, { "epoch": 4.20485969906466, "grad_norm": 0.2868165075778961, "learning_rate": 7.507946281790951e-07, "loss": 0.2961, "step": 41359 }, { "epoch": 4.204961366409109, "grad_norm": 0.301380455493927, "learning_rate": 7.506076001164931e-07, "loss": 0.2893, "step": 41360 }, { "epoch": 4.205063033753558, "grad_norm": 0.2664675712585449, "learning_rate": 7.50420593461319e-07, "loss": 0.2702, "step": 41361 }, { "epoch": 4.205164701098007, "grad_norm": 0.28014013171195984, "learning_rate": 7.502336082145173e-07, "loss": 0.2908, "step": 41362 }, { "epoch": 4.205266368442456, "grad_norm": 0.30187252163887024, "learning_rate": 7.500466443770288e-07, "loss": 0.2942, "step": 41363 }, { "epoch": 4.205368035786905, "grad_norm": 0.26903992891311646, "learning_rate": 7.498597019497944e-07, "loss": 0.3236, "step": 41364 }, { "epoch": 4.205469703131354, "grad_norm": 0.30216261744499207, "learning_rate": 7.496727809337573e-07, "loss": 0.3152, "step": 41365 }, { "epoch": 4.205571370475803, "grad_norm": 0.2823222279548645, "learning_rate": 7.494858813298594e-07, "loss": 0.28, "step": 41366 }, { "epoch": 4.205673037820252, "grad_norm": 0.2903963327407837, "learning_rate": 7.492990031390401e-07, "loss": 0.3089, "step": 41367 }, { "epoch": 4.205774705164701, "grad_norm": 0.27217355370521545, "learning_rate": 7.491121463622436e-07, "loss": 0.2766, "step": 41368 }, { "epoch": 4.20587637250915, "grad_norm": 0.3023262917995453, "learning_rate": 7.489253110004097e-07, "loss": 0.2761, "step": 41369 }, { "epoch": 4.205978039853599, "grad_norm": 0.26602715253829956, "learning_rate": 7.487384970544798e-07, "loss": 0.2946, "step": 41370 }, { "epoch": 4.206079707198048, "grad_norm": 0.30105072259902954, "learning_rate": 7.485517045253943e-07, "loss": 0.3019, "step": 41371 }, { "epoch": 4.2061813745424965, "grad_norm": 0.2790069580078125, "learning_rate": 7.483649334140957e-07, "loss": 0.3044, "step": 41372 }, { "epoch": 4.206283041886946, "grad_norm": 0.2914310097694397, "learning_rate": 7.481781837215246e-07, "loss": 0.2959, "step": 41373 }, { "epoch": 4.206384709231395, "grad_norm": 0.2864406704902649, "learning_rate": 7.479914554486201e-07, "loss": 0.3091, "step": 41374 }, { "epoch": 4.206486376575844, "grad_norm": 0.2709081768989563, "learning_rate": 7.478047485963258e-07, "loss": 0.2832, "step": 41375 }, { "epoch": 4.206588043920293, "grad_norm": 0.27051782608032227, "learning_rate": 7.476180631655805e-07, "loss": 0.2819, "step": 41376 }, { "epoch": 4.206689711264742, "grad_norm": 0.280122846364975, "learning_rate": 7.474313991573235e-07, "loss": 0.3401, "step": 41377 }, { "epoch": 4.206791378609191, "grad_norm": 0.2733072340488434, "learning_rate": 7.472447565724978e-07, "loss": 0.3075, "step": 41378 }, { "epoch": 4.20689304595364, "grad_norm": 0.26690778136253357, "learning_rate": 7.470581354120421e-07, "loss": 0.3093, "step": 41379 }, { "epoch": 4.206994713298089, "grad_norm": 0.28151601552963257, "learning_rate": 7.468715356768975e-07, "loss": 0.2927, "step": 41380 }, { "epoch": 4.207096380642538, "grad_norm": 0.2806611657142639, "learning_rate": 7.466849573680019e-07, "loss": 0.2763, "step": 41381 }, { "epoch": 4.207198047986987, "grad_norm": 0.2722247540950775, "learning_rate": 7.464984004862974e-07, "loss": 0.2788, "step": 41382 }, { "epoch": 4.207299715331436, "grad_norm": 0.27567726373672485, "learning_rate": 7.463118650327239e-07, "loss": 0.3192, "step": 41383 }, { "epoch": 4.207401382675885, "grad_norm": 0.2824193835258484, "learning_rate": 7.46125351008219e-07, "loss": 0.2736, "step": 41384 }, { "epoch": 4.207503050020334, "grad_norm": 0.30587127804756165, "learning_rate": 7.45938858413725e-07, "loss": 0.3102, "step": 41385 }, { "epoch": 4.207604717364783, "grad_norm": 0.2822941839694977, "learning_rate": 7.457523872501799e-07, "loss": 0.3013, "step": 41386 }, { "epoch": 4.2077063847092315, "grad_norm": 0.2631770968437195, "learning_rate": 7.45565937518522e-07, "loss": 0.2856, "step": 41387 }, { "epoch": 4.2078080520536805, "grad_norm": 0.28845441341400146, "learning_rate": 7.453795092196935e-07, "loss": 0.2833, "step": 41388 }, { "epoch": 4.207909719398129, "grad_norm": 0.27902185916900635, "learning_rate": 7.451931023546316e-07, "loss": 0.2984, "step": 41389 }, { "epoch": 4.208011386742578, "grad_norm": 0.27422451972961426, "learning_rate": 7.450067169242759e-07, "loss": 0.3033, "step": 41390 }, { "epoch": 4.208113054087027, "grad_norm": 0.29339051246643066, "learning_rate": 7.448203529295639e-07, "loss": 0.2853, "step": 41391 }, { "epoch": 4.208214721431476, "grad_norm": 0.2575397491455078, "learning_rate": 7.446340103714372e-07, "loss": 0.3082, "step": 41392 }, { "epoch": 4.208316388775925, "grad_norm": 0.28603583574295044, "learning_rate": 7.444476892508335e-07, "loss": 0.2875, "step": 41393 }, { "epoch": 4.208418056120374, "grad_norm": 0.26449087262153625, "learning_rate": 7.44261389568689e-07, "loss": 0.3201, "step": 41394 }, { "epoch": 4.208519723464823, "grad_norm": 0.2865704298019409, "learning_rate": 7.440751113259459e-07, "loss": 0.278, "step": 41395 }, { "epoch": 4.208621390809272, "grad_norm": 0.2672383487224579, "learning_rate": 7.438888545235412e-07, "loss": 0.2836, "step": 41396 }, { "epoch": 4.208723058153721, "grad_norm": 0.28969353437423706, "learning_rate": 7.43702619162413e-07, "loss": 0.3032, "step": 41397 }, { "epoch": 4.20882472549817, "grad_norm": 0.2762686014175415, "learning_rate": 7.435164052434979e-07, "loss": 0.2994, "step": 41398 }, { "epoch": 4.208926392842619, "grad_norm": 0.27621763944625854, "learning_rate": 7.433302127677372e-07, "loss": 0.2909, "step": 41399 }, { "epoch": 4.209028060187068, "grad_norm": 0.2740752696990967, "learning_rate": 7.431440417360675e-07, "loss": 0.3046, "step": 41400 }, { "epoch": 4.209129727531517, "grad_norm": 0.2908850610256195, "learning_rate": 7.429578921494251e-07, "loss": 0.3316, "step": 41401 }, { "epoch": 4.209231394875966, "grad_norm": 0.2558346688747406, "learning_rate": 7.427717640087517e-07, "loss": 0.2823, "step": 41402 }, { "epoch": 4.209333062220415, "grad_norm": 0.2772798538208008, "learning_rate": 7.425856573149804e-07, "loss": 0.2921, "step": 41403 }, { "epoch": 4.2094347295648635, "grad_norm": 0.308908075094223, "learning_rate": 7.423995720690502e-07, "loss": 0.3202, "step": 41404 }, { "epoch": 4.2095363969093125, "grad_norm": 0.26900795102119446, "learning_rate": 7.422135082719007e-07, "loss": 0.2764, "step": 41405 }, { "epoch": 4.209638064253761, "grad_norm": 0.325262188911438, "learning_rate": 7.420274659244675e-07, "loss": 0.2812, "step": 41406 }, { "epoch": 4.20973973159821, "grad_norm": 0.26414504647254944, "learning_rate": 7.418414450276884e-07, "loss": 0.2817, "step": 41407 }, { "epoch": 4.209841398942659, "grad_norm": 0.270664781332016, "learning_rate": 7.416554455824992e-07, "loss": 0.2963, "step": 41408 }, { "epoch": 4.209943066287108, "grad_norm": 0.27693846821784973, "learning_rate": 7.414694675898387e-07, "loss": 0.2969, "step": 41409 }, { "epoch": 4.210044733631557, "grad_norm": 0.30161359906196594, "learning_rate": 7.412835110506434e-07, "loss": 0.2716, "step": 41410 }, { "epoch": 4.210146400976006, "grad_norm": 0.27044329047203064, "learning_rate": 7.410975759658485e-07, "loss": 0.3002, "step": 41411 }, { "epoch": 4.210248068320455, "grad_norm": 0.28926244378089905, "learning_rate": 7.409116623363938e-07, "loss": 0.2808, "step": 41412 }, { "epoch": 4.210349735664904, "grad_norm": 0.2894393801689148, "learning_rate": 7.40725770163212e-07, "loss": 0.2521, "step": 41413 }, { "epoch": 4.210451403009353, "grad_norm": 0.2725755274295807, "learning_rate": 7.405398994472418e-07, "loss": 0.2902, "step": 41414 }, { "epoch": 4.210553070353803, "grad_norm": 0.29266929626464844, "learning_rate": 7.403540501894202e-07, "loss": 0.2828, "step": 41415 }, { "epoch": 4.210654737698252, "grad_norm": 0.2663806676864624, "learning_rate": 7.401682223906831e-07, "loss": 0.2733, "step": 41416 }, { "epoch": 4.210756405042701, "grad_norm": 0.30074208974838257, "learning_rate": 7.399824160519659e-07, "loss": 0.2889, "step": 41417 }, { "epoch": 4.21085807238715, "grad_norm": 0.2828781306743622, "learning_rate": 7.397966311742038e-07, "loss": 0.2733, "step": 41418 }, { "epoch": 4.2109597397315985, "grad_norm": 0.29480189085006714, "learning_rate": 7.396108677583346e-07, "loss": 0.3338, "step": 41419 }, { "epoch": 4.2110614070760475, "grad_norm": 0.2758151590824127, "learning_rate": 7.394251258052943e-07, "loss": 0.2858, "step": 41420 }, { "epoch": 4.211163074420496, "grad_norm": 0.28568726778030396, "learning_rate": 7.392394053160162e-07, "loss": 0.3113, "step": 41421 }, { "epoch": 4.211264741764945, "grad_norm": 0.2987610399723053, "learning_rate": 7.390537062914399e-07, "loss": 0.2672, "step": 41422 }, { "epoch": 4.211366409109394, "grad_norm": 0.29350924491882324, "learning_rate": 7.388680287324957e-07, "loss": 0.2869, "step": 41423 }, { "epoch": 4.211468076453843, "grad_norm": 0.2745003402233124, "learning_rate": 7.386823726401221e-07, "loss": 0.312, "step": 41424 }, { "epoch": 4.211569743798292, "grad_norm": 0.2865561246871948, "learning_rate": 7.384967380152564e-07, "loss": 0.3011, "step": 41425 }, { "epoch": 4.211671411142741, "grad_norm": 0.2712344229221344, "learning_rate": 7.383111248588293e-07, "loss": 0.2741, "step": 41426 }, { "epoch": 4.21177307848719, "grad_norm": 0.27678540349006653, "learning_rate": 7.381255331717791e-07, "loss": 0.3025, "step": 41427 }, { "epoch": 4.211874745831639, "grad_norm": 0.2625177204608917, "learning_rate": 7.379399629550382e-07, "loss": 0.3103, "step": 41428 }, { "epoch": 4.211976413176088, "grad_norm": 0.2792612314224243, "learning_rate": 7.377544142095444e-07, "loss": 0.2956, "step": 41429 }, { "epoch": 4.212078080520537, "grad_norm": 0.2840110957622528, "learning_rate": 7.375688869362307e-07, "loss": 0.2844, "step": 41430 }, { "epoch": 4.212179747864986, "grad_norm": 0.27683931589126587, "learning_rate": 7.373833811360315e-07, "loss": 0.2741, "step": 41431 }, { "epoch": 4.212281415209435, "grad_norm": 0.30998969078063965, "learning_rate": 7.371978968098837e-07, "loss": 0.2865, "step": 41432 }, { "epoch": 4.212383082553884, "grad_norm": 0.28665801882743835, "learning_rate": 7.370124339587176e-07, "loss": 0.296, "step": 41433 }, { "epoch": 4.212484749898333, "grad_norm": 0.26716917753219604, "learning_rate": 7.368269925834698e-07, "loss": 0.2817, "step": 41434 }, { "epoch": 4.212586417242782, "grad_norm": 0.29498738050460815, "learning_rate": 7.366415726850767e-07, "loss": 0.2956, "step": 41435 }, { "epoch": 4.2126880845872305, "grad_norm": 0.29497453570365906, "learning_rate": 7.364561742644683e-07, "loss": 0.2992, "step": 41436 }, { "epoch": 4.2127897519316795, "grad_norm": 0.2767708897590637, "learning_rate": 7.362707973225814e-07, "loss": 0.323, "step": 41437 }, { "epoch": 4.212891419276128, "grad_norm": 0.2566106617450714, "learning_rate": 7.360854418603491e-07, "loss": 0.3184, "step": 41438 }, { "epoch": 4.212993086620577, "grad_norm": 0.28249260783195496, "learning_rate": 7.359001078787042e-07, "loss": 0.3175, "step": 41439 }, { "epoch": 4.213094753965026, "grad_norm": 0.30333051085472107, "learning_rate": 7.35714795378582e-07, "loss": 0.2854, "step": 41440 }, { "epoch": 4.213196421309475, "grad_norm": 0.2763960361480713, "learning_rate": 7.355295043609146e-07, "loss": 0.2759, "step": 41441 }, { "epoch": 4.213298088653924, "grad_norm": 0.28912660479545593, "learning_rate": 7.353442348266382e-07, "loss": 0.2861, "step": 41442 }, { "epoch": 4.213399755998373, "grad_norm": 0.2644858658313751, "learning_rate": 7.351589867766817e-07, "loss": 0.3034, "step": 41443 }, { "epoch": 4.213501423342822, "grad_norm": 0.2613602876663208, "learning_rate": 7.349737602119805e-07, "loss": 0.3031, "step": 41444 }, { "epoch": 4.213603090687271, "grad_norm": 0.2718227803707123, "learning_rate": 7.34788555133471e-07, "loss": 0.2902, "step": 41445 }, { "epoch": 4.21370475803172, "grad_norm": 0.2917982339859009, "learning_rate": 7.346033715420803e-07, "loss": 0.2846, "step": 41446 }, { "epoch": 4.213806425376169, "grad_norm": 0.2917805314064026, "learning_rate": 7.344182094387453e-07, "loss": 0.3055, "step": 41447 }, { "epoch": 4.213908092720618, "grad_norm": 0.2726457118988037, "learning_rate": 7.342330688243982e-07, "loss": 0.2677, "step": 41448 }, { "epoch": 4.214009760065067, "grad_norm": 0.27181583642959595, "learning_rate": 7.340479496999698e-07, "loss": 0.2835, "step": 41449 }, { "epoch": 4.214111427409516, "grad_norm": 0.25855425000190735, "learning_rate": 7.338628520663954e-07, "loss": 0.3311, "step": 41450 }, { "epoch": 4.214213094753965, "grad_norm": 0.29403623938560486, "learning_rate": 7.336777759246045e-07, "loss": 0.2781, "step": 41451 }, { "epoch": 4.214314762098414, "grad_norm": 0.2582940459251404, "learning_rate": 7.334927212755338e-07, "loss": 0.3122, "step": 41452 }, { "epoch": 4.2144164294428625, "grad_norm": 0.2808414697647095, "learning_rate": 7.333076881201106e-07, "loss": 0.293, "step": 41453 }, { "epoch": 4.2145180967873115, "grad_norm": 0.2892497777938843, "learning_rate": 7.331226764592691e-07, "loss": 0.2968, "step": 41454 }, { "epoch": 4.214619764131761, "grad_norm": 0.27268388867378235, "learning_rate": 7.329376862939442e-07, "loss": 0.3007, "step": 41455 }, { "epoch": 4.21472143147621, "grad_norm": 0.28534525632858276, "learning_rate": 7.327527176250626e-07, "loss": 0.2925, "step": 41456 }, { "epoch": 4.214823098820659, "grad_norm": 0.2931366264820099, "learning_rate": 7.325677704535606e-07, "loss": 0.2858, "step": 41457 }, { "epoch": 4.214924766165108, "grad_norm": 0.28775280714035034, "learning_rate": 7.323828447803671e-07, "loss": 0.3578, "step": 41458 }, { "epoch": 4.215026433509557, "grad_norm": 0.2680470049381256, "learning_rate": 7.321979406064145e-07, "loss": 0.3096, "step": 41459 }, { "epoch": 4.215128100854006, "grad_norm": 0.26099979877471924, "learning_rate": 7.320130579326351e-07, "loss": 0.285, "step": 41460 }, { "epoch": 4.215229768198455, "grad_norm": 0.2639915645122528, "learning_rate": 7.318281967599595e-07, "loss": 0.2941, "step": 41461 }, { "epoch": 4.215331435542904, "grad_norm": 0.2861151397228241, "learning_rate": 7.316433570893195e-07, "loss": 0.2863, "step": 41462 }, { "epoch": 4.215433102887353, "grad_norm": 0.2703825533390045, "learning_rate": 7.31458538921645e-07, "loss": 0.297, "step": 41463 }, { "epoch": 4.215534770231802, "grad_norm": 0.27970603108406067, "learning_rate": 7.312737422578676e-07, "loss": 0.2943, "step": 41464 }, { "epoch": 4.215636437576251, "grad_norm": 0.2765800356864929, "learning_rate": 7.310889670989212e-07, "loss": 0.2871, "step": 41465 }, { "epoch": 4.2157381049207, "grad_norm": 0.25728490948677063, "learning_rate": 7.309042134457317e-07, "loss": 0.292, "step": 41466 }, { "epoch": 4.215839772265149, "grad_norm": 0.2770494222640991, "learning_rate": 7.307194812992335e-07, "loss": 0.2875, "step": 41467 }, { "epoch": 4.2159414396095976, "grad_norm": 0.3004278540611267, "learning_rate": 7.305347706603561e-07, "loss": 0.3107, "step": 41468 }, { "epoch": 4.2160431069540465, "grad_norm": 0.27251601219177246, "learning_rate": 7.303500815300285e-07, "loss": 0.3068, "step": 41469 }, { "epoch": 4.216144774298495, "grad_norm": 0.2663904130458832, "learning_rate": 7.30165413909184e-07, "loss": 0.2743, "step": 41470 }, { "epoch": 4.216246441642944, "grad_norm": 0.3439434766769409, "learning_rate": 7.299807677987513e-07, "loss": 0.2846, "step": 41471 }, { "epoch": 4.216348108987393, "grad_norm": 0.2777515649795532, "learning_rate": 7.297961431996609e-07, "loss": 0.2886, "step": 41472 }, { "epoch": 4.216449776331842, "grad_norm": 0.27409037947654724, "learning_rate": 7.296115401128417e-07, "loss": 0.289, "step": 41473 }, { "epoch": 4.216551443676291, "grad_norm": 0.2558140456676483, "learning_rate": 7.294269585392244e-07, "loss": 0.2908, "step": 41474 }, { "epoch": 4.21665311102074, "grad_norm": 0.27138152718544006, "learning_rate": 7.29242398479742e-07, "loss": 0.2756, "step": 41475 }, { "epoch": 4.216754778365189, "grad_norm": 0.28238645195961, "learning_rate": 7.290578599353187e-07, "loss": 0.3049, "step": 41476 }, { "epoch": 4.216856445709638, "grad_norm": 0.2962540090084076, "learning_rate": 7.288733429068889e-07, "loss": 0.3031, "step": 41477 }, { "epoch": 4.216958113054087, "grad_norm": 0.27381715178489685, "learning_rate": 7.286888473953796e-07, "loss": 0.282, "step": 41478 }, { "epoch": 4.217059780398536, "grad_norm": 0.27334389090538025, "learning_rate": 7.2850437340172e-07, "loss": 0.2816, "step": 41479 }, { "epoch": 4.217161447742985, "grad_norm": 0.2621130645275116, "learning_rate": 7.283199209268411e-07, "loss": 0.3233, "step": 41480 }, { "epoch": 4.217263115087434, "grad_norm": 0.2748362720012665, "learning_rate": 7.281354899716714e-07, "loss": 0.3051, "step": 41481 }, { "epoch": 4.217364782431883, "grad_norm": 0.28267422318458557, "learning_rate": 7.279510805371404e-07, "loss": 0.3093, "step": 41482 }, { "epoch": 4.217466449776332, "grad_norm": 0.30856847763061523, "learning_rate": 7.27766692624175e-07, "loss": 0.2737, "step": 41483 }, { "epoch": 4.217568117120781, "grad_norm": 0.2864505350589752, "learning_rate": 7.275823262337073e-07, "loss": 0.3183, "step": 41484 }, { "epoch": 4.2176697844652296, "grad_norm": 0.2971999943256378, "learning_rate": 7.273979813666643e-07, "loss": 0.2952, "step": 41485 }, { "epoch": 4.2177714518096785, "grad_norm": 0.2732390761375427, "learning_rate": 7.272136580239741e-07, "loss": 0.2896, "step": 41486 }, { "epoch": 4.217873119154127, "grad_norm": 0.26188671588897705, "learning_rate": 7.270293562065666e-07, "loss": 0.3305, "step": 41487 }, { "epoch": 4.217974786498576, "grad_norm": 0.2700643539428711, "learning_rate": 7.268450759153706e-07, "loss": 0.3193, "step": 41488 }, { "epoch": 4.218076453843025, "grad_norm": 0.2837705910205841, "learning_rate": 7.266608171513123e-07, "loss": 0.3279, "step": 41489 }, { "epoch": 4.218178121187474, "grad_norm": 0.2749078869819641, "learning_rate": 7.264765799153222e-07, "loss": 0.3003, "step": 41490 }, { "epoch": 4.218279788531923, "grad_norm": 0.26861563324928284, "learning_rate": 7.262923642083275e-07, "loss": 0.2813, "step": 41491 }, { "epoch": 4.218381455876372, "grad_norm": 0.2942344546318054, "learning_rate": 7.261081700312561e-07, "loss": 0.2984, "step": 41492 }, { "epoch": 4.218483123220821, "grad_norm": 0.27479490637779236, "learning_rate": 7.259239973850357e-07, "loss": 0.3003, "step": 41493 }, { "epoch": 4.21858479056527, "grad_norm": 0.2755790650844574, "learning_rate": 7.257398462705951e-07, "loss": 0.3087, "step": 41494 }, { "epoch": 4.218686457909719, "grad_norm": 0.28374847769737244, "learning_rate": 7.255557166888616e-07, "loss": 0.3045, "step": 41495 }, { "epoch": 4.218788125254168, "grad_norm": 0.29054489731788635, "learning_rate": 7.253716086407614e-07, "loss": 0.2973, "step": 41496 }, { "epoch": 4.218889792598618, "grad_norm": 0.28818655014038086, "learning_rate": 7.251875221272248e-07, "loss": 0.2836, "step": 41497 }, { "epoch": 4.218991459943067, "grad_norm": 0.27915939688682556, "learning_rate": 7.250034571491771e-07, "loss": 0.2848, "step": 41498 }, { "epoch": 4.219093127287516, "grad_norm": 0.2925948202610016, "learning_rate": 7.248194137075449e-07, "loss": 0.2903, "step": 41499 }, { "epoch": 4.219194794631965, "grad_norm": 0.25901103019714355, "learning_rate": 7.246353918032578e-07, "loss": 0.36, "step": 41500 }, { "epoch": 4.2192964619764135, "grad_norm": 0.2691725194454193, "learning_rate": 7.24451391437242e-07, "loss": 0.284, "step": 41501 }, { "epoch": 4.2193981293208624, "grad_norm": 0.2734541893005371, "learning_rate": 7.242674126104238e-07, "loss": 0.3031, "step": 41502 }, { "epoch": 4.219499796665311, "grad_norm": 0.27266520261764526, "learning_rate": 7.240834553237291e-07, "loss": 0.2814, "step": 41503 }, { "epoch": 4.21960146400976, "grad_norm": 0.298550546169281, "learning_rate": 7.238995195780868e-07, "loss": 0.2993, "step": 41504 }, { "epoch": 4.219703131354209, "grad_norm": 0.27654650807380676, "learning_rate": 7.237156053744227e-07, "loss": 0.3043, "step": 41505 }, { "epoch": 4.219804798698658, "grad_norm": 0.2767599821090698, "learning_rate": 7.235317127136621e-07, "loss": 0.2737, "step": 41506 }, { "epoch": 4.219906466043107, "grad_norm": 0.2618545591831207, "learning_rate": 7.233478415967338e-07, "loss": 0.2981, "step": 41507 }, { "epoch": 4.220008133387556, "grad_norm": 0.2901986837387085, "learning_rate": 7.231639920245625e-07, "loss": 0.3001, "step": 41508 }, { "epoch": 4.220109800732005, "grad_norm": 0.284912645816803, "learning_rate": 7.229801639980738e-07, "loss": 0.2924, "step": 41509 }, { "epoch": 4.220211468076454, "grad_norm": 0.27961698174476624, "learning_rate": 7.227963575181957e-07, "loss": 0.3129, "step": 41510 }, { "epoch": 4.220313135420903, "grad_norm": 0.2948995530605316, "learning_rate": 7.226125725858529e-07, "loss": 0.3054, "step": 41511 }, { "epoch": 4.220414802765352, "grad_norm": 0.2831733226776123, "learning_rate": 7.224288092019721e-07, "loss": 0.2987, "step": 41512 }, { "epoch": 4.220516470109801, "grad_norm": 0.2738620638847351, "learning_rate": 7.222450673674769e-07, "loss": 0.2926, "step": 41513 }, { "epoch": 4.22061813745425, "grad_norm": 0.27831244468688965, "learning_rate": 7.220613470832954e-07, "loss": 0.3182, "step": 41514 }, { "epoch": 4.220719804798699, "grad_norm": 0.2848134934902191, "learning_rate": 7.218776483503526e-07, "loss": 0.2872, "step": 41515 }, { "epoch": 4.220821472143148, "grad_norm": 0.2579957842826843, "learning_rate": 7.216939711695719e-07, "loss": 0.293, "step": 41516 }, { "epoch": 4.220923139487597, "grad_norm": 0.3000503480434418, "learning_rate": 7.215103155418824e-07, "loss": 0.293, "step": 41517 }, { "epoch": 4.2210248068320455, "grad_norm": 0.2652466893196106, "learning_rate": 7.213266814682062e-07, "loss": 0.2973, "step": 41518 }, { "epoch": 4.2211264741764944, "grad_norm": 0.3051557242870331, "learning_rate": 7.211430689494692e-07, "loss": 0.2778, "step": 41519 }, { "epoch": 4.221228141520943, "grad_norm": 0.29858922958374023, "learning_rate": 7.209594779865969e-07, "loss": 0.31, "step": 41520 }, { "epoch": 4.221329808865392, "grad_norm": 0.29612717032432556, "learning_rate": 7.207759085805144e-07, "loss": 0.3003, "step": 41521 }, { "epoch": 4.221431476209841, "grad_norm": 0.2675844132900238, "learning_rate": 7.205923607321458e-07, "loss": 0.3028, "step": 41522 }, { "epoch": 4.22153314355429, "grad_norm": 0.27066466212272644, "learning_rate": 7.204088344424148e-07, "loss": 0.2984, "step": 41523 }, { "epoch": 4.221634810898739, "grad_norm": 0.26436084508895874, "learning_rate": 7.20225329712248e-07, "loss": 0.2884, "step": 41524 }, { "epoch": 4.221736478243188, "grad_norm": 0.3018583655357361, "learning_rate": 7.200418465425696e-07, "loss": 0.2955, "step": 41525 }, { "epoch": 4.221838145587637, "grad_norm": 0.3072911202907562, "learning_rate": 7.198583849343016e-07, "loss": 0.2745, "step": 41526 }, { "epoch": 4.221939812932086, "grad_norm": 0.27885901927948, "learning_rate": 7.196749448883717e-07, "loss": 0.3057, "step": 41527 }, { "epoch": 4.222041480276535, "grad_norm": 0.27466610074043274, "learning_rate": 7.194915264057017e-07, "loss": 0.3078, "step": 41528 }, { "epoch": 4.222143147620984, "grad_norm": 0.27638939023017883, "learning_rate": 7.19308129487215e-07, "loss": 0.306, "step": 41529 }, { "epoch": 4.222244814965433, "grad_norm": 0.270883172750473, "learning_rate": 7.191247541338381e-07, "loss": 0.2889, "step": 41530 }, { "epoch": 4.222346482309882, "grad_norm": 0.29324251413345337, "learning_rate": 7.189414003464934e-07, "loss": 0.2773, "step": 41531 }, { "epoch": 4.222448149654331, "grad_norm": 0.2801826298236847, "learning_rate": 7.187580681261042e-07, "loss": 0.2858, "step": 41532 }, { "epoch": 4.22254981699878, "grad_norm": 0.2658115327358246, "learning_rate": 7.18574757473594e-07, "loss": 0.2758, "step": 41533 }, { "epoch": 4.222651484343229, "grad_norm": 0.2783134877681732, "learning_rate": 7.183914683898874e-07, "loss": 0.3033, "step": 41534 }, { "epoch": 4.2227531516876775, "grad_norm": 0.2704131305217743, "learning_rate": 7.182082008759073e-07, "loss": 0.2951, "step": 41535 }, { "epoch": 4.222854819032127, "grad_norm": 0.27127736806869507, "learning_rate": 7.180249549325757e-07, "loss": 0.2859, "step": 41536 }, { "epoch": 4.222956486376576, "grad_norm": 0.2934364974498749, "learning_rate": 7.178417305608177e-07, "loss": 0.2894, "step": 41537 }, { "epoch": 4.223058153721025, "grad_norm": 0.28067126870155334, "learning_rate": 7.176585277615555e-07, "loss": 0.298, "step": 41538 }, { "epoch": 4.223159821065474, "grad_norm": 0.26380598545074463, "learning_rate": 7.174753465357109e-07, "loss": 0.317, "step": 41539 }, { "epoch": 4.223261488409923, "grad_norm": 0.2840246856212616, "learning_rate": 7.172921868842093e-07, "loss": 0.2979, "step": 41540 }, { "epoch": 4.223363155754372, "grad_norm": 0.2790893614292145, "learning_rate": 7.171090488079713e-07, "loss": 0.295, "step": 41541 }, { "epoch": 4.223464823098821, "grad_norm": 0.27239322662353516, "learning_rate": 7.169259323079208e-07, "loss": 0.2976, "step": 41542 }, { "epoch": 4.22356649044327, "grad_norm": 0.29176250100135803, "learning_rate": 7.167428373849783e-07, "loss": 0.2922, "step": 41543 }, { "epoch": 4.223668157787719, "grad_norm": 0.2810869514942169, "learning_rate": 7.165597640400685e-07, "loss": 0.2865, "step": 41544 }, { "epoch": 4.223769825132168, "grad_norm": 0.2795372009277344, "learning_rate": 7.163767122741127e-07, "loss": 0.2962, "step": 41545 }, { "epoch": 4.223871492476617, "grad_norm": 0.280269980430603, "learning_rate": 7.161936820880322e-07, "loss": 0.3008, "step": 41546 }, { "epoch": 4.223973159821066, "grad_norm": 0.279328316450119, "learning_rate": 7.160106734827504e-07, "loss": 0.2859, "step": 41547 }, { "epoch": 4.224074827165515, "grad_norm": 0.283192902803421, "learning_rate": 7.158276864591895e-07, "loss": 0.2944, "step": 41548 }, { "epoch": 4.224176494509964, "grad_norm": 0.27604836225509644, "learning_rate": 7.156447210182693e-07, "loss": 0.2816, "step": 41549 }, { "epoch": 4.2242781618544125, "grad_norm": 0.3011185824871063, "learning_rate": 7.15461777160914e-07, "loss": 0.3216, "step": 41550 }, { "epoch": 4.2243798291988615, "grad_norm": 0.30280816555023193, "learning_rate": 7.152788548880441e-07, "loss": 0.2913, "step": 41551 }, { "epoch": 4.22448149654331, "grad_norm": 0.3136144280433655, "learning_rate": 7.150959542005814e-07, "loss": 0.3176, "step": 41552 }, { "epoch": 4.224583163887759, "grad_norm": 0.27841678261756897, "learning_rate": 7.149130750994454e-07, "loss": 0.3065, "step": 41553 }, { "epoch": 4.224684831232208, "grad_norm": 0.33004096150398254, "learning_rate": 7.147302175855603e-07, "loss": 0.2964, "step": 41554 }, { "epoch": 4.224786498576657, "grad_norm": 0.27969902753829956, "learning_rate": 7.145473816598464e-07, "loss": 0.3139, "step": 41555 }, { "epoch": 4.224888165921106, "grad_norm": 0.2782318890094757, "learning_rate": 7.14364567323223e-07, "loss": 0.3227, "step": 41556 }, { "epoch": 4.224989833265555, "grad_norm": 0.27805575728416443, "learning_rate": 7.141817745766133e-07, "loss": 0.3213, "step": 41557 }, { "epoch": 4.225091500610004, "grad_norm": 0.28188392519950867, "learning_rate": 7.139990034209376e-07, "loss": 0.2888, "step": 41558 }, { "epoch": 4.225193167954453, "grad_norm": 0.29285940527915955, "learning_rate": 7.138162538571159e-07, "loss": 0.2812, "step": 41559 }, { "epoch": 4.225294835298902, "grad_norm": 0.28566527366638184, "learning_rate": 7.136335258860694e-07, "loss": 0.2898, "step": 41560 }, { "epoch": 4.225396502643351, "grad_norm": 0.2962227761745453, "learning_rate": 7.134508195087192e-07, "loss": 0.3017, "step": 41561 }, { "epoch": 4.2254981699878, "grad_norm": 0.28641942143440247, "learning_rate": 7.132681347259851e-07, "loss": 0.2899, "step": 41562 }, { "epoch": 4.225599837332249, "grad_norm": 0.29595887660980225, "learning_rate": 7.130854715387864e-07, "loss": 0.2798, "step": 41563 }, { "epoch": 4.225701504676698, "grad_norm": 0.27011290192604065, "learning_rate": 7.129028299480456e-07, "loss": 0.3034, "step": 41564 }, { "epoch": 4.225803172021147, "grad_norm": 0.2718433439731598, "learning_rate": 7.127202099546809e-07, "loss": 0.2986, "step": 41565 }, { "epoch": 4.225904839365596, "grad_norm": 0.2979680895805359, "learning_rate": 7.125376115596128e-07, "loss": 0.259, "step": 41566 }, { "epoch": 4.2260065067100445, "grad_norm": 0.25980862975120544, "learning_rate": 7.12355034763762e-07, "loss": 0.3241, "step": 41567 }, { "epoch": 4.2261081740544935, "grad_norm": 0.2724752426147461, "learning_rate": 7.121724795680473e-07, "loss": 0.293, "step": 41568 }, { "epoch": 4.226209841398942, "grad_norm": 0.28219088912010193, "learning_rate": 7.119899459733881e-07, "loss": 0.2811, "step": 41569 }, { "epoch": 4.226311508743391, "grad_norm": 0.2966901659965515, "learning_rate": 7.118074339807052e-07, "loss": 0.3577, "step": 41570 }, { "epoch": 4.22641317608784, "grad_norm": 0.28209561109542847, "learning_rate": 7.116249435909179e-07, "loss": 0.2817, "step": 41571 }, { "epoch": 4.226514843432289, "grad_norm": 0.28300124406814575, "learning_rate": 7.114424748049447e-07, "loss": 0.2981, "step": 41572 }, { "epoch": 4.226616510776738, "grad_norm": 0.29042932391166687, "learning_rate": 7.112600276237042e-07, "loss": 0.3098, "step": 41573 }, { "epoch": 4.226718178121187, "grad_norm": 0.28894203901290894, "learning_rate": 7.110776020481181e-07, "loss": 0.3114, "step": 41574 }, { "epoch": 4.226819845465636, "grad_norm": 0.28158509731292725, "learning_rate": 7.108951980791034e-07, "loss": 0.3131, "step": 41575 }, { "epoch": 4.226921512810085, "grad_norm": 0.27395498752593994, "learning_rate": 7.10712815717578e-07, "loss": 0.3266, "step": 41576 }, { "epoch": 4.227023180154534, "grad_norm": 0.2832995653152466, "learning_rate": 7.105304549644648e-07, "loss": 0.2752, "step": 41577 }, { "epoch": 4.227124847498983, "grad_norm": 0.28942859172821045, "learning_rate": 7.103481158206777e-07, "loss": 0.2747, "step": 41578 }, { "epoch": 4.227226514843433, "grad_norm": 0.290208101272583, "learning_rate": 7.101657982871374e-07, "loss": 0.2978, "step": 41579 }, { "epoch": 4.227328182187882, "grad_norm": 0.27793845534324646, "learning_rate": 7.099835023647634e-07, "loss": 0.3032, "step": 41580 }, { "epoch": 4.227429849532331, "grad_norm": 0.25050926208496094, "learning_rate": 7.098012280544724e-07, "loss": 0.2637, "step": 41581 }, { "epoch": 4.2275315168767795, "grad_norm": 0.28930920362472534, "learning_rate": 7.096189753571841e-07, "loss": 0.2931, "step": 41582 }, { "epoch": 4.2276331842212285, "grad_norm": 0.2675873041152954, "learning_rate": 7.094367442738149e-07, "loss": 0.3181, "step": 41583 }, { "epoch": 4.227734851565677, "grad_norm": 0.29190003871917725, "learning_rate": 7.092545348052842e-07, "loss": 0.2619, "step": 41584 }, { "epoch": 4.227836518910126, "grad_norm": 0.26474258303642273, "learning_rate": 7.090723469525096e-07, "loss": 0.2867, "step": 41585 }, { "epoch": 4.227938186254575, "grad_norm": 0.2601260244846344, "learning_rate": 7.088901807164084e-07, "loss": 0.2901, "step": 41586 }, { "epoch": 4.228039853599024, "grad_norm": 0.2607395350933075, "learning_rate": 7.087080360979004e-07, "loss": 0.2699, "step": 41587 }, { "epoch": 4.228141520943473, "grad_norm": 0.2767679691314697, "learning_rate": 7.085259130978994e-07, "loss": 0.2752, "step": 41588 }, { "epoch": 4.228243188287922, "grad_norm": 0.2719308137893677, "learning_rate": 7.083438117173263e-07, "loss": 0.2792, "step": 41589 }, { "epoch": 4.228344855632371, "grad_norm": 0.2881300449371338, "learning_rate": 7.081617319570972e-07, "loss": 0.2749, "step": 41590 }, { "epoch": 4.22844652297682, "grad_norm": 0.2845803499221802, "learning_rate": 7.079796738181283e-07, "loss": 0.2941, "step": 41591 }, { "epoch": 4.228548190321269, "grad_norm": 0.28477829694747925, "learning_rate": 7.077976373013385e-07, "loss": 0.282, "step": 41592 }, { "epoch": 4.228649857665718, "grad_norm": 0.28353869915008545, "learning_rate": 7.076156224076436e-07, "loss": 0.3195, "step": 41593 }, { "epoch": 4.228751525010167, "grad_norm": 0.30121055245399475, "learning_rate": 7.074336291379636e-07, "loss": 0.3352, "step": 41594 }, { "epoch": 4.228853192354616, "grad_norm": 0.28814905881881714, "learning_rate": 7.072516574932103e-07, "loss": 0.2961, "step": 41595 }, { "epoch": 4.228954859699065, "grad_norm": 0.2880614101886749, "learning_rate": 7.070697074743027e-07, "loss": 0.3449, "step": 41596 }, { "epoch": 4.229056527043514, "grad_norm": 0.27895388007164, "learning_rate": 7.068877790821605e-07, "loss": 0.292, "step": 41597 }, { "epoch": 4.229158194387963, "grad_norm": 0.29320046305656433, "learning_rate": 7.06705872317695e-07, "loss": 0.3021, "step": 41598 }, { "epoch": 4.2292598617324115, "grad_norm": 0.2861771583557129, "learning_rate": 7.065239871818263e-07, "loss": 0.2893, "step": 41599 }, { "epoch": 4.2293615290768605, "grad_norm": 0.2807168662548065, "learning_rate": 7.063421236754697e-07, "loss": 0.2881, "step": 41600 }, { "epoch": 4.229463196421309, "grad_norm": 0.2794988453388214, "learning_rate": 7.0616028179954e-07, "loss": 0.3277, "step": 41601 }, { "epoch": 4.229564863765758, "grad_norm": 0.26967430114746094, "learning_rate": 7.059784615549553e-07, "loss": 0.295, "step": 41602 }, { "epoch": 4.229666531110207, "grad_norm": 0.2833588719367981, "learning_rate": 7.057966629426293e-07, "loss": 0.2851, "step": 41603 }, { "epoch": 4.229768198454656, "grad_norm": 0.28351014852523804, "learning_rate": 7.05614885963482e-07, "loss": 0.2875, "step": 41604 }, { "epoch": 4.229869865799105, "grad_norm": 0.27337950468063354, "learning_rate": 7.054331306184242e-07, "loss": 0.2969, "step": 41605 }, { "epoch": 4.229971533143554, "grad_norm": 0.256337434053421, "learning_rate": 7.052513969083735e-07, "loss": 0.3366, "step": 41606 }, { "epoch": 4.230073200488003, "grad_norm": 0.2907644510269165, "learning_rate": 7.050696848342481e-07, "loss": 0.297, "step": 41607 }, { "epoch": 4.230174867832452, "grad_norm": 0.29898110032081604, "learning_rate": 7.048879943969583e-07, "loss": 0.3146, "step": 41608 }, { "epoch": 4.230276535176901, "grad_norm": 0.27491456270217896, "learning_rate": 7.047063255974234e-07, "loss": 0.2971, "step": 41609 }, { "epoch": 4.23037820252135, "grad_norm": 0.27982786297798157, "learning_rate": 7.045246784365579e-07, "loss": 0.2536, "step": 41610 }, { "epoch": 4.230479869865799, "grad_norm": 0.273739755153656, "learning_rate": 7.043430529152744e-07, "loss": 0.2941, "step": 41611 }, { "epoch": 4.230581537210248, "grad_norm": 0.2713625431060791, "learning_rate": 7.041614490344917e-07, "loss": 0.3006, "step": 41612 }, { "epoch": 4.230683204554697, "grad_norm": 0.28972327709198, "learning_rate": 7.03979866795122e-07, "loss": 0.3138, "step": 41613 }, { "epoch": 4.230784871899146, "grad_norm": 0.26617157459259033, "learning_rate": 7.037983061980813e-07, "loss": 0.2874, "step": 41614 }, { "epoch": 4.230886539243595, "grad_norm": 0.29710203409194946, "learning_rate": 7.036167672442823e-07, "loss": 0.2888, "step": 41615 }, { "epoch": 4.2309882065880435, "grad_norm": 0.2795006334781647, "learning_rate": 7.034352499346414e-07, "loss": 0.2852, "step": 41616 }, { "epoch": 4.2310898739324925, "grad_norm": 0.2924426794052124, "learning_rate": 7.032537542700751e-07, "loss": 0.3192, "step": 41617 }, { "epoch": 4.231191541276942, "grad_norm": 0.26890337467193604, "learning_rate": 7.030722802514928e-07, "loss": 0.3025, "step": 41618 }, { "epoch": 4.231293208621391, "grad_norm": 0.28516316413879395, "learning_rate": 7.028908278798119e-07, "loss": 0.2762, "step": 41619 }, { "epoch": 4.23139487596584, "grad_norm": 0.30808666348457336, "learning_rate": 7.027093971559468e-07, "loss": 0.2942, "step": 41620 }, { "epoch": 4.231496543310289, "grad_norm": 0.27408885955810547, "learning_rate": 7.025279880808089e-07, "loss": 0.3338, "step": 41621 }, { "epoch": 4.231598210654738, "grad_norm": 0.27584609389305115, "learning_rate": 7.023466006553148e-07, "loss": 0.2799, "step": 41622 }, { "epoch": 4.231699877999187, "grad_norm": 0.28375205397605896, "learning_rate": 7.021652348803776e-07, "loss": 0.2768, "step": 41623 }, { "epoch": 4.231801545343636, "grad_norm": 0.269318550825119, "learning_rate": 7.019838907569105e-07, "loss": 0.2838, "step": 41624 }, { "epoch": 4.231903212688085, "grad_norm": 0.2971896231174469, "learning_rate": 7.01802568285826e-07, "loss": 0.3115, "step": 41625 }, { "epoch": 4.232004880032534, "grad_norm": 0.2605013847351074, "learning_rate": 7.016212674680384e-07, "loss": 0.294, "step": 41626 }, { "epoch": 4.232106547376983, "grad_norm": 0.2895142734050751, "learning_rate": 7.014399883044643e-07, "loss": 0.3085, "step": 41627 }, { "epoch": 4.232208214721432, "grad_norm": 0.2800586521625519, "learning_rate": 7.012587307960111e-07, "loss": 0.2759, "step": 41628 }, { "epoch": 4.232309882065881, "grad_norm": 0.2762060761451721, "learning_rate": 7.010774949435967e-07, "loss": 0.2878, "step": 41629 }, { "epoch": 4.23241154941033, "grad_norm": 0.27006036043167114, "learning_rate": 7.008962807481318e-07, "loss": 0.3023, "step": 41630 }, { "epoch": 4.2325132167547785, "grad_norm": 0.2818777561187744, "learning_rate": 7.00715088210529e-07, "loss": 0.2903, "step": 41631 }, { "epoch": 4.2326148840992275, "grad_norm": 0.26204872131347656, "learning_rate": 7.005339173317028e-07, "loss": 0.3075, "step": 41632 }, { "epoch": 4.232716551443676, "grad_norm": 0.2834915816783905, "learning_rate": 7.003527681125649e-07, "loss": 0.2738, "step": 41633 }, { "epoch": 4.232818218788125, "grad_norm": 0.27403298020362854, "learning_rate": 7.001716405540287e-07, "loss": 0.3362, "step": 41634 }, { "epoch": 4.232919886132574, "grad_norm": 0.3028731942176819, "learning_rate": 6.999905346570041e-07, "loss": 0.3217, "step": 41635 }, { "epoch": 4.233021553477023, "grad_norm": 0.28408533334732056, "learning_rate": 6.998094504224068e-07, "loss": 0.2901, "step": 41636 }, { "epoch": 4.233123220821472, "grad_norm": 0.2942325472831726, "learning_rate": 6.996283878511478e-07, "loss": 0.2961, "step": 41637 }, { "epoch": 4.233224888165921, "grad_norm": 0.28500446677207947, "learning_rate": 6.994473469441376e-07, "loss": 0.2674, "step": 41638 }, { "epoch": 4.23332655551037, "grad_norm": 0.3131667673587799, "learning_rate": 6.99266327702291e-07, "loss": 0.2959, "step": 41639 }, { "epoch": 4.233428222854819, "grad_norm": 0.295434832572937, "learning_rate": 6.990853301265183e-07, "loss": 0.3289, "step": 41640 }, { "epoch": 4.233529890199268, "grad_norm": 0.2663617730140686, "learning_rate": 6.98904354217731e-07, "loss": 0.2938, "step": 41641 }, { "epoch": 4.233631557543717, "grad_norm": 0.2881407141685486, "learning_rate": 6.987233999768423e-07, "loss": 0.2742, "step": 41642 }, { "epoch": 4.233733224888166, "grad_norm": 0.2893880307674408, "learning_rate": 6.98542467404763e-07, "loss": 0.2909, "step": 41643 }, { "epoch": 4.233834892232615, "grad_norm": 0.265272319316864, "learning_rate": 6.983615565024049e-07, "loss": 0.3158, "step": 41644 }, { "epoch": 4.233936559577064, "grad_norm": 0.28147372603416443, "learning_rate": 6.981806672706775e-07, "loss": 0.3008, "step": 41645 }, { "epoch": 4.234038226921513, "grad_norm": 0.2793722450733185, "learning_rate": 6.979997997104948e-07, "loss": 0.3065, "step": 41646 }, { "epoch": 4.234139894265962, "grad_norm": 0.30463531613349915, "learning_rate": 6.97818953822767e-07, "loss": 0.2789, "step": 41647 }, { "epoch": 4.2342415616104105, "grad_norm": 0.279121458530426, "learning_rate": 6.976381296084034e-07, "loss": 0.2575, "step": 41648 }, { "epoch": 4.2343432289548595, "grad_norm": 0.29145270586013794, "learning_rate": 6.974573270683183e-07, "loss": 0.3231, "step": 41649 }, { "epoch": 4.234444896299308, "grad_norm": 0.28476646542549133, "learning_rate": 6.972765462034204e-07, "loss": 0.3217, "step": 41650 }, { "epoch": 4.234546563643757, "grad_norm": 0.2771858870983124, "learning_rate": 6.970957870146194e-07, "loss": 0.2919, "step": 41651 }, { "epoch": 4.234648230988206, "grad_norm": 0.27905794978141785, "learning_rate": 6.969150495028287e-07, "loss": 0.3019, "step": 41652 }, { "epoch": 4.234749898332655, "grad_norm": 0.2888944745063782, "learning_rate": 6.967343336689575e-07, "loss": 0.3228, "step": 41653 }, { "epoch": 4.234851565677104, "grad_norm": 0.28887149691581726, "learning_rate": 6.965536395139161e-07, "loss": 0.2943, "step": 41654 }, { "epoch": 4.234953233021553, "grad_norm": 0.28570541739463806, "learning_rate": 6.963729670386133e-07, "loss": 0.3023, "step": 41655 }, { "epoch": 4.235054900366002, "grad_norm": 0.26549258828163147, "learning_rate": 6.961923162439621e-07, "loss": 0.2739, "step": 41656 }, { "epoch": 4.235156567710451, "grad_norm": 0.2709222435951233, "learning_rate": 6.960116871308714e-07, "loss": 0.3251, "step": 41657 }, { "epoch": 4.2352582350549, "grad_norm": 0.2998274564743042, "learning_rate": 6.958310797002499e-07, "loss": 0.3101, "step": 41658 }, { "epoch": 4.235359902399349, "grad_norm": 0.27741187810897827, "learning_rate": 6.956504939530095e-07, "loss": 0.2842, "step": 41659 }, { "epoch": 4.235461569743798, "grad_norm": 0.31475967168807983, "learning_rate": 6.954699298900597e-07, "loss": 0.302, "step": 41660 }, { "epoch": 4.235563237088248, "grad_norm": 0.27277982234954834, "learning_rate": 6.95289387512308e-07, "loss": 0.3197, "step": 41661 }, { "epoch": 4.235664904432697, "grad_norm": 0.27395322918891907, "learning_rate": 6.951088668206663e-07, "loss": 0.2992, "step": 41662 }, { "epoch": 4.2357665717771456, "grad_norm": 0.2779708802700043, "learning_rate": 6.949283678160429e-07, "loss": 0.3048, "step": 41663 }, { "epoch": 4.2358682391215945, "grad_norm": 0.26822102069854736, "learning_rate": 6.947478904993477e-07, "loss": 0.2993, "step": 41664 }, { "epoch": 4.235969906466043, "grad_norm": 0.30880334973335266, "learning_rate": 6.945674348714887e-07, "loss": 0.2993, "step": 41665 }, { "epoch": 4.236071573810492, "grad_norm": 0.2913769483566284, "learning_rate": 6.943870009333765e-07, "loss": 0.2852, "step": 41666 }, { "epoch": 4.236173241154941, "grad_norm": 0.3069344162940979, "learning_rate": 6.942065886859195e-07, "loss": 0.297, "step": 41667 }, { "epoch": 4.23627490849939, "grad_norm": 0.2732561230659485, "learning_rate": 6.940261981300256e-07, "loss": 0.2937, "step": 41668 }, { "epoch": 4.236376575843839, "grad_norm": 0.27269643545150757, "learning_rate": 6.938458292666056e-07, "loss": 0.3303, "step": 41669 }, { "epoch": 4.236478243188288, "grad_norm": 0.2713351845741272, "learning_rate": 6.936654820965666e-07, "loss": 0.3197, "step": 41670 }, { "epoch": 4.236579910532737, "grad_norm": 0.30992183089256287, "learning_rate": 6.934851566208168e-07, "loss": 0.3082, "step": 41671 }, { "epoch": 4.236681577877186, "grad_norm": 0.26051661372184753, "learning_rate": 6.933048528402663e-07, "loss": 0.3093, "step": 41672 }, { "epoch": 4.236783245221635, "grad_norm": 0.3011193573474884, "learning_rate": 6.931245707558226e-07, "loss": 0.3081, "step": 41673 }, { "epoch": 4.236884912566084, "grad_norm": 0.28540998697280884, "learning_rate": 6.929443103683942e-07, "loss": 0.2984, "step": 41674 }, { "epoch": 4.236986579910533, "grad_norm": 0.2987014651298523, "learning_rate": 6.927640716788869e-07, "loss": 0.3018, "step": 41675 }, { "epoch": 4.237088247254982, "grad_norm": 0.28704556822776794, "learning_rate": 6.925838546882124e-07, "loss": 0.3199, "step": 41676 }, { "epoch": 4.237189914599431, "grad_norm": 0.2861112356185913, "learning_rate": 6.924036593972766e-07, "loss": 0.3056, "step": 41677 }, { "epoch": 4.23729158194388, "grad_norm": 0.2734535038471222, "learning_rate": 6.922234858069865e-07, "loss": 0.2986, "step": 41678 }, { "epoch": 4.237393249288329, "grad_norm": 0.2833627462387085, "learning_rate": 6.92043333918252e-07, "loss": 0.2938, "step": 41679 }, { "epoch": 4.2374949166327776, "grad_norm": 0.28793594241142273, "learning_rate": 6.918632037319794e-07, "loss": 0.2842, "step": 41680 }, { "epoch": 4.2375965839772265, "grad_norm": 0.2722614109516144, "learning_rate": 6.916830952490755e-07, "loss": 0.2948, "step": 41681 }, { "epoch": 4.237698251321675, "grad_norm": 0.27027085423469543, "learning_rate": 6.915030084704488e-07, "loss": 0.2983, "step": 41682 }, { "epoch": 4.237799918666124, "grad_norm": 0.27368006110191345, "learning_rate": 6.913229433970064e-07, "loss": 0.2725, "step": 41683 }, { "epoch": 4.237901586010573, "grad_norm": 0.2957993745803833, "learning_rate": 6.911429000296549e-07, "loss": 0.3151, "step": 41684 }, { "epoch": 4.238003253355022, "grad_norm": 0.2648056745529175, "learning_rate": 6.909628783693007e-07, "loss": 0.287, "step": 41685 }, { "epoch": 4.238104920699471, "grad_norm": 0.265641987323761, "learning_rate": 6.907828784168524e-07, "loss": 0.2922, "step": 41686 }, { "epoch": 4.23820658804392, "grad_norm": 0.2733708918094635, "learning_rate": 6.906029001732162e-07, "loss": 0.2759, "step": 41687 }, { "epoch": 4.238308255388369, "grad_norm": 0.27485570311546326, "learning_rate": 6.904229436392973e-07, "loss": 0.2766, "step": 41688 }, { "epoch": 4.238409922732818, "grad_norm": 0.28590264916419983, "learning_rate": 6.902430088160045e-07, "loss": 0.3052, "step": 41689 }, { "epoch": 4.238511590077267, "grad_norm": 0.30047547817230225, "learning_rate": 6.900630957042431e-07, "loss": 0.293, "step": 41690 }, { "epoch": 4.238613257421716, "grad_norm": 0.28976646065711975, "learning_rate": 6.898832043049186e-07, "loss": 0.2872, "step": 41691 }, { "epoch": 4.238714924766165, "grad_norm": 0.27740880846977234, "learning_rate": 6.897033346189396e-07, "loss": 0.323, "step": 41692 }, { "epoch": 4.238816592110614, "grad_norm": 0.2840094268321991, "learning_rate": 6.895234866472111e-07, "loss": 0.291, "step": 41693 }, { "epoch": 4.238918259455063, "grad_norm": 0.2901371717453003, "learning_rate": 6.893436603906384e-07, "loss": 0.2939, "step": 41694 }, { "epoch": 4.239019926799512, "grad_norm": 0.2712232172489166, "learning_rate": 6.891638558501273e-07, "loss": 0.2985, "step": 41695 }, { "epoch": 4.239121594143961, "grad_norm": 0.3024905025959015, "learning_rate": 6.889840730265851e-07, "loss": 0.3114, "step": 41696 }, { "epoch": 4.2392232614884096, "grad_norm": 0.260647177696228, "learning_rate": 6.88804311920917e-07, "loss": 0.2884, "step": 41697 }, { "epoch": 4.2393249288328585, "grad_norm": 0.2995554208755493, "learning_rate": 6.886245725340268e-07, "loss": 0.2937, "step": 41698 }, { "epoch": 4.239426596177307, "grad_norm": 0.26190701127052307, "learning_rate": 6.884448548668226e-07, "loss": 0.3141, "step": 41699 }, { "epoch": 4.239528263521757, "grad_norm": 0.2820497453212738, "learning_rate": 6.882651589202088e-07, "loss": 0.2893, "step": 41700 }, { "epoch": 4.239629930866206, "grad_norm": 0.2781974971294403, "learning_rate": 6.880854846950896e-07, "loss": 0.2874, "step": 41701 }, { "epoch": 4.239731598210655, "grad_norm": 0.28053322434425354, "learning_rate": 6.879058321923721e-07, "loss": 0.2941, "step": 41702 }, { "epoch": 4.239833265555104, "grad_norm": 0.27477139234542847, "learning_rate": 6.877262014129599e-07, "loss": 0.2813, "step": 41703 }, { "epoch": 4.239934932899553, "grad_norm": 0.28152260184288025, "learning_rate": 6.87546592357759e-07, "loss": 0.2746, "step": 41704 }, { "epoch": 4.240036600244002, "grad_norm": 0.29653364419937134, "learning_rate": 6.873670050276721e-07, "loss": 0.2701, "step": 41705 }, { "epoch": 4.240138267588451, "grad_norm": 0.2903878092765808, "learning_rate": 6.871874394236061e-07, "loss": 0.3097, "step": 41706 }, { "epoch": 4.2402399349329, "grad_norm": 0.27563467621803284, "learning_rate": 6.870078955464654e-07, "loss": 0.3071, "step": 41707 }, { "epoch": 4.240341602277349, "grad_norm": 0.28995364904403687, "learning_rate": 6.868283733971526e-07, "loss": 0.3196, "step": 41708 }, { "epoch": 4.240443269621798, "grad_norm": 0.28275832533836365, "learning_rate": 6.866488729765753e-07, "loss": 0.297, "step": 41709 }, { "epoch": 4.240544936966247, "grad_norm": 0.2779318392276764, "learning_rate": 6.864693942856348e-07, "loss": 0.2787, "step": 41710 }, { "epoch": 4.240646604310696, "grad_norm": 0.3002875745296478, "learning_rate": 6.862899373252363e-07, "loss": 0.3079, "step": 41711 }, { "epoch": 4.240748271655145, "grad_norm": 0.2776101529598236, "learning_rate": 6.86110502096285e-07, "loss": 0.2961, "step": 41712 }, { "epoch": 4.2408499389995935, "grad_norm": 0.2776244282722473, "learning_rate": 6.859310885996834e-07, "loss": 0.2955, "step": 41713 }, { "epoch": 4.2409516063440424, "grad_norm": 0.2652820646762848, "learning_rate": 6.85751696836336e-07, "loss": 0.3045, "step": 41714 }, { "epoch": 4.241053273688491, "grad_norm": 0.28393077850341797, "learning_rate": 6.855723268071451e-07, "loss": 0.2503, "step": 41715 }, { "epoch": 4.24115494103294, "grad_norm": 0.2651228606700897, "learning_rate": 6.85392978513017e-07, "loss": 0.26, "step": 41716 }, { "epoch": 4.241256608377389, "grad_norm": 0.305031418800354, "learning_rate": 6.852136519548536e-07, "loss": 0.2897, "step": 41717 }, { "epoch": 4.241358275721838, "grad_norm": 0.2977379858493805, "learning_rate": 6.850343471335574e-07, "loss": 0.3092, "step": 41718 }, { "epoch": 4.241459943066287, "grad_norm": 0.2770276367664337, "learning_rate": 6.848550640500334e-07, "loss": 0.2915, "step": 41719 }, { "epoch": 4.241561610410736, "grad_norm": 0.2815532386302948, "learning_rate": 6.846758027051848e-07, "loss": 0.2839, "step": 41720 }, { "epoch": 4.241663277755185, "grad_norm": 0.2763630747795105, "learning_rate": 6.844965630999123e-07, "loss": 0.2896, "step": 41721 }, { "epoch": 4.241764945099634, "grad_norm": 0.26562216877937317, "learning_rate": 6.843173452351226e-07, "loss": 0.3017, "step": 41722 }, { "epoch": 4.241866612444083, "grad_norm": 0.28230440616607666, "learning_rate": 6.841381491117161e-07, "loss": 0.3032, "step": 41723 }, { "epoch": 4.241968279788532, "grad_norm": 0.2676805555820465, "learning_rate": 6.839589747305959e-07, "loss": 0.3027, "step": 41724 }, { "epoch": 4.242069947132981, "grad_norm": 0.2656441926956177, "learning_rate": 6.837798220926634e-07, "loss": 0.3202, "step": 41725 }, { "epoch": 4.24217161447743, "grad_norm": 0.2663254737854004, "learning_rate": 6.836006911988236e-07, "loss": 0.2723, "step": 41726 }, { "epoch": 4.242273281821879, "grad_norm": 0.2861010432243347, "learning_rate": 6.834215820499785e-07, "loss": 0.2996, "step": 41727 }, { "epoch": 4.242374949166328, "grad_norm": 0.27554187178611755, "learning_rate": 6.832424946470279e-07, "loss": 0.2953, "step": 41728 }, { "epoch": 4.242476616510777, "grad_norm": 0.2773776948451996, "learning_rate": 6.830634289908783e-07, "loss": 0.2963, "step": 41729 }, { "epoch": 4.2425782838552255, "grad_norm": 0.29794228076934814, "learning_rate": 6.828843850824268e-07, "loss": 0.2723, "step": 41730 }, { "epoch": 4.2426799511996744, "grad_norm": 0.28254884481430054, "learning_rate": 6.827053629225777e-07, "loss": 0.264, "step": 41731 }, { "epoch": 4.242781618544123, "grad_norm": 0.2764771282672882, "learning_rate": 6.825263625122346e-07, "loss": 0.29, "step": 41732 }, { "epoch": 4.242883285888572, "grad_norm": 0.2652878761291504, "learning_rate": 6.823473838522976e-07, "loss": 0.3257, "step": 41733 }, { "epoch": 4.242984953233021, "grad_norm": 0.26555153727531433, "learning_rate": 6.821684269436684e-07, "loss": 0.2794, "step": 41734 }, { "epoch": 4.24308662057747, "grad_norm": 0.27832746505737305, "learning_rate": 6.819894917872478e-07, "loss": 0.3418, "step": 41735 }, { "epoch": 4.243188287921919, "grad_norm": 0.28893351554870605, "learning_rate": 6.818105783839385e-07, "loss": 0.2988, "step": 41736 }, { "epoch": 4.243289955266368, "grad_norm": 0.2953009307384491, "learning_rate": 6.816316867346418e-07, "loss": 0.3042, "step": 41737 }, { "epoch": 4.243391622610817, "grad_norm": 0.2723018229007721, "learning_rate": 6.814528168402573e-07, "loss": 0.3138, "step": 41738 }, { "epoch": 4.243493289955266, "grad_norm": 0.2682569921016693, "learning_rate": 6.812739687016895e-07, "loss": 0.2812, "step": 41739 }, { "epoch": 4.243594957299715, "grad_norm": 0.2907172441482544, "learning_rate": 6.810951423198348e-07, "loss": 0.3466, "step": 41740 }, { "epoch": 4.243696624644164, "grad_norm": 0.2597523331642151, "learning_rate": 6.809163376955963e-07, "loss": 0.3386, "step": 41741 }, { "epoch": 4.243798291988613, "grad_norm": 0.29021430015563965, "learning_rate": 6.807375548298772e-07, "loss": 0.307, "step": 41742 }, { "epoch": 4.243899959333063, "grad_norm": 0.28965914249420166, "learning_rate": 6.805587937235741e-07, "loss": 0.3185, "step": 41743 }, { "epoch": 4.244001626677512, "grad_norm": 0.2942107319831848, "learning_rate": 6.803800543775902e-07, "loss": 0.295, "step": 41744 }, { "epoch": 4.2441032940219605, "grad_norm": 0.26244696974754333, "learning_rate": 6.802013367928239e-07, "loss": 0.2781, "step": 41745 }, { "epoch": 4.2442049613664095, "grad_norm": 0.26486721634864807, "learning_rate": 6.800226409701777e-07, "loss": 0.3045, "step": 41746 }, { "epoch": 4.244306628710858, "grad_norm": 0.2874619960784912, "learning_rate": 6.798439669105511e-07, "loss": 0.2853, "step": 41747 }, { "epoch": 4.244408296055307, "grad_norm": 0.31279173493385315, "learning_rate": 6.79665314614843e-07, "loss": 0.3005, "step": 41748 }, { "epoch": 4.244509963399756, "grad_norm": 0.30012524127960205, "learning_rate": 6.794866840839559e-07, "loss": 0.309, "step": 41749 }, { "epoch": 4.244611630744205, "grad_norm": 0.28594884276390076, "learning_rate": 6.793080753187864e-07, "loss": 0.2969, "step": 41750 }, { "epoch": 4.244713298088654, "grad_norm": 0.2754780352115631, "learning_rate": 6.791294883202365e-07, "loss": 0.3456, "step": 41751 }, { "epoch": 4.244814965433103, "grad_norm": 0.2690283954143524, "learning_rate": 6.789509230892072e-07, "loss": 0.2778, "step": 41752 }, { "epoch": 4.244916632777552, "grad_norm": 0.2719465494155884, "learning_rate": 6.787723796265938e-07, "loss": 0.3046, "step": 41753 }, { "epoch": 4.245018300122001, "grad_norm": 0.29043954610824585, "learning_rate": 6.785938579333001e-07, "loss": 0.2797, "step": 41754 }, { "epoch": 4.24511996746645, "grad_norm": 0.2790345251560211, "learning_rate": 6.78415358010222e-07, "loss": 0.275, "step": 41755 }, { "epoch": 4.245221634810899, "grad_norm": 0.26996514201164246, "learning_rate": 6.782368798582617e-07, "loss": 0.325, "step": 41756 }, { "epoch": 4.245323302155348, "grad_norm": 0.27101263403892517, "learning_rate": 6.780584234783166e-07, "loss": 0.295, "step": 41757 }, { "epoch": 4.245424969499797, "grad_norm": 0.2877499759197235, "learning_rate": 6.778799888712856e-07, "loss": 0.2808, "step": 41758 }, { "epoch": 4.245526636844246, "grad_norm": 0.2755436301231384, "learning_rate": 6.777015760380701e-07, "loss": 0.3143, "step": 41759 }, { "epoch": 4.245628304188695, "grad_norm": 0.29553329944610596, "learning_rate": 6.775231849795644e-07, "loss": 0.278, "step": 41760 }, { "epoch": 4.245729971533144, "grad_norm": 0.2994373142719269, "learning_rate": 6.7734481569667e-07, "loss": 0.2956, "step": 41761 }, { "epoch": 4.2458316388775925, "grad_norm": 0.28866153955459595, "learning_rate": 6.771664681902873e-07, "loss": 0.2912, "step": 41762 }, { "epoch": 4.2459333062220415, "grad_norm": 0.27230024337768555, "learning_rate": 6.769881424613106e-07, "loss": 0.3056, "step": 41763 }, { "epoch": 4.24603497356649, "grad_norm": 0.2734374403953552, "learning_rate": 6.768098385106415e-07, "loss": 0.2607, "step": 41764 }, { "epoch": 4.246136640910939, "grad_norm": 0.2849764823913574, "learning_rate": 6.766315563391768e-07, "loss": 0.2917, "step": 41765 }, { "epoch": 4.246238308255388, "grad_norm": 0.2851739525794983, "learning_rate": 6.764532959478138e-07, "loss": 0.308, "step": 41766 }, { "epoch": 4.246339975599837, "grad_norm": 0.2773844599723816, "learning_rate": 6.762750573374527e-07, "loss": 0.2935, "step": 41767 }, { "epoch": 4.246441642944286, "grad_norm": 0.3214508891105652, "learning_rate": 6.760968405089896e-07, "loss": 0.2881, "step": 41768 }, { "epoch": 4.246543310288735, "grad_norm": 0.28194907307624817, "learning_rate": 6.759186454633254e-07, "loss": 0.2739, "step": 41769 }, { "epoch": 4.246644977633184, "grad_norm": 0.2722240090370178, "learning_rate": 6.757404722013533e-07, "loss": 0.2904, "step": 41770 }, { "epoch": 4.246746644977633, "grad_norm": 0.28325238823890686, "learning_rate": 6.755623207239726e-07, "loss": 0.2852, "step": 41771 }, { "epoch": 4.246848312322082, "grad_norm": 0.2788279950618744, "learning_rate": 6.753841910320841e-07, "loss": 0.2821, "step": 41772 }, { "epoch": 4.246949979666531, "grad_norm": 0.26695114374160767, "learning_rate": 6.752060831265794e-07, "loss": 0.2764, "step": 41773 }, { "epoch": 4.24705164701098, "grad_norm": 0.2819032073020935, "learning_rate": 6.750279970083607e-07, "loss": 0.278, "step": 41774 }, { "epoch": 4.247153314355429, "grad_norm": 0.27715957164764404, "learning_rate": 6.748499326783226e-07, "loss": 0.3271, "step": 41775 }, { "epoch": 4.247254981699878, "grad_norm": 0.2745709717273712, "learning_rate": 6.746718901373628e-07, "loss": 0.2706, "step": 41776 }, { "epoch": 4.247356649044327, "grad_norm": 0.2940754294395447, "learning_rate": 6.744938693863773e-07, "loss": 0.324, "step": 41777 }, { "epoch": 4.247458316388776, "grad_norm": 0.2759496569633484, "learning_rate": 6.743158704262636e-07, "loss": 0.2831, "step": 41778 }, { "epoch": 4.2475599837332245, "grad_norm": 0.26194143295288086, "learning_rate": 6.741378932579206e-07, "loss": 0.2944, "step": 41779 }, { "epoch": 4.2476616510776735, "grad_norm": 0.28967875242233276, "learning_rate": 6.739599378822414e-07, "loss": 0.2643, "step": 41780 }, { "epoch": 4.247763318422122, "grad_norm": 0.26401999592781067, "learning_rate": 6.737820043001248e-07, "loss": 0.2778, "step": 41781 }, { "epoch": 4.247864985766572, "grad_norm": 0.2798800468444824, "learning_rate": 6.73604092512466e-07, "loss": 0.3024, "step": 41782 }, { "epoch": 4.247966653111021, "grad_norm": 0.25602996349334717, "learning_rate": 6.73426202520161e-07, "loss": 0.2853, "step": 41783 }, { "epoch": 4.24806832045547, "grad_norm": 0.2758488357067108, "learning_rate": 6.732483343241075e-07, "loss": 0.3008, "step": 41784 }, { "epoch": 4.248169987799919, "grad_norm": 0.27470773458480835, "learning_rate": 6.730704879252009e-07, "loss": 0.3199, "step": 41785 }, { "epoch": 4.248271655144368, "grad_norm": 0.2879675328731537, "learning_rate": 6.728926633243365e-07, "loss": 0.2676, "step": 41786 }, { "epoch": 4.248373322488817, "grad_norm": 0.26695990562438965, "learning_rate": 6.727148605224098e-07, "loss": 0.2909, "step": 41787 }, { "epoch": 4.248474989833266, "grad_norm": 0.28462356328964233, "learning_rate": 6.725370795203184e-07, "loss": 0.2762, "step": 41788 }, { "epoch": 4.248576657177715, "grad_norm": 0.2678562104701996, "learning_rate": 6.723593203189571e-07, "loss": 0.3248, "step": 41789 }, { "epoch": 4.248678324522164, "grad_norm": 0.25445422530174255, "learning_rate": 6.721815829192196e-07, "loss": 0.3148, "step": 41790 }, { "epoch": 4.248779991866613, "grad_norm": 0.30049148201942444, "learning_rate": 6.720038673220042e-07, "loss": 0.3196, "step": 41791 }, { "epoch": 4.248881659211062, "grad_norm": 0.29915058612823486, "learning_rate": 6.718261735282045e-07, "loss": 0.3181, "step": 41792 }, { "epoch": 4.248983326555511, "grad_norm": 0.28790315985679626, "learning_rate": 6.716485015387153e-07, "loss": 0.287, "step": 41793 }, { "epoch": 4.2490849938999595, "grad_norm": 0.27644771337509155, "learning_rate": 6.714708513544332e-07, "loss": 0.2862, "step": 41794 }, { "epoch": 4.2491866612444085, "grad_norm": 0.3163924515247345, "learning_rate": 6.712932229762525e-07, "loss": 0.2545, "step": 41795 }, { "epoch": 4.249288328588857, "grad_norm": 0.2879452109336853, "learning_rate": 6.71115616405068e-07, "loss": 0.3122, "step": 41796 }, { "epoch": 4.249389995933306, "grad_norm": 0.27971866726875305, "learning_rate": 6.709380316417729e-07, "loss": 0.2676, "step": 41797 }, { "epoch": 4.249491663277755, "grad_norm": 0.29998090863227844, "learning_rate": 6.707604686872648e-07, "loss": 0.3005, "step": 41798 }, { "epoch": 4.249593330622204, "grad_norm": 0.26611924171447754, "learning_rate": 6.705829275424364e-07, "loss": 0.3089, "step": 41799 }, { "epoch": 4.249694997966653, "grad_norm": 0.270805299282074, "learning_rate": 6.704054082081812e-07, "loss": 0.3004, "step": 41800 }, { "epoch": 4.249796665311102, "grad_norm": 0.29165321588516235, "learning_rate": 6.70227910685396e-07, "loss": 0.3347, "step": 41801 }, { "epoch": 4.249898332655551, "grad_norm": 0.3045492470264435, "learning_rate": 6.700504349749732e-07, "loss": 0.3315, "step": 41802 }, { "epoch": 4.25, "grad_norm": 0.29827484488487244, "learning_rate": 6.698729810778065e-07, "loss": 0.3068, "step": 41803 }, { "epoch": 4.250101667344449, "grad_norm": 0.26711320877075195, "learning_rate": 6.696955489947921e-07, "loss": 0.3035, "step": 41804 }, { "epoch": 4.250203334688898, "grad_norm": 0.2627474367618561, "learning_rate": 6.695181387268224e-07, "loss": 0.3075, "step": 41805 }, { "epoch": 4.250305002033347, "grad_norm": 0.2619345188140869, "learning_rate": 6.693407502747912e-07, "loss": 0.2952, "step": 41806 }, { "epoch": 4.250406669377796, "grad_norm": 0.2853834927082062, "learning_rate": 6.691633836395911e-07, "loss": 0.2941, "step": 41807 }, { "epoch": 4.250508336722245, "grad_norm": 0.28378063440322876, "learning_rate": 6.689860388221175e-07, "loss": 0.2889, "step": 41808 }, { "epoch": 4.250610004066694, "grad_norm": 0.26033467054367065, "learning_rate": 6.688087158232631e-07, "loss": 0.3196, "step": 41809 }, { "epoch": 4.250711671411143, "grad_norm": 0.2802436947822571, "learning_rate": 6.686314146439204e-07, "loss": 0.2764, "step": 41810 }, { "epoch": 4.2508133387555915, "grad_norm": 0.28580012917518616, "learning_rate": 6.684541352849843e-07, "loss": 0.329, "step": 41811 }, { "epoch": 4.2509150061000405, "grad_norm": 0.2716962695121765, "learning_rate": 6.682768777473469e-07, "loss": 0.2956, "step": 41812 }, { "epoch": 4.251016673444489, "grad_norm": 0.2685284912586212, "learning_rate": 6.680996420318997e-07, "loss": 0.294, "step": 41813 }, { "epoch": 4.251118340788938, "grad_norm": 0.29372861981391907, "learning_rate": 6.679224281395385e-07, "loss": 0.3061, "step": 41814 }, { "epoch": 4.251220008133387, "grad_norm": 0.30238616466522217, "learning_rate": 6.677452360711551e-07, "loss": 0.2809, "step": 41815 }, { "epoch": 4.251321675477836, "grad_norm": 0.2788555920124054, "learning_rate": 6.675680658276412e-07, "loss": 0.302, "step": 41816 }, { "epoch": 4.251423342822285, "grad_norm": 0.2898631989955902, "learning_rate": 6.673909174098891e-07, "loss": 0.3089, "step": 41817 }, { "epoch": 4.251525010166734, "grad_norm": 0.2900763154029846, "learning_rate": 6.672137908187925e-07, "loss": 0.2905, "step": 41818 }, { "epoch": 4.251626677511183, "grad_norm": 0.3027103543281555, "learning_rate": 6.670366860552435e-07, "loss": 0.2733, "step": 41819 }, { "epoch": 4.251728344855632, "grad_norm": 0.28938549757003784, "learning_rate": 6.66859603120133e-07, "loss": 0.2892, "step": 41820 }, { "epoch": 4.251830012200081, "grad_norm": 0.2638365924358368, "learning_rate": 6.666825420143552e-07, "loss": 0.3099, "step": 41821 }, { "epoch": 4.25193167954453, "grad_norm": 0.2884344458580017, "learning_rate": 6.665055027388012e-07, "loss": 0.3019, "step": 41822 }, { "epoch": 4.252033346888979, "grad_norm": 0.27730438113212585, "learning_rate": 6.663284852943608e-07, "loss": 0.3285, "step": 41823 }, { "epoch": 4.252135014233428, "grad_norm": 0.2742918133735657, "learning_rate": 6.661514896819294e-07, "loss": 0.3024, "step": 41824 }, { "epoch": 4.252236681577878, "grad_norm": 0.2761891782283783, "learning_rate": 6.659745159023967e-07, "loss": 0.3121, "step": 41825 }, { "epoch": 4.2523383489223265, "grad_norm": 0.26734644174575806, "learning_rate": 6.65797563956655e-07, "loss": 0.3023, "step": 41826 }, { "epoch": 4.2524400162667755, "grad_norm": 0.26691827178001404, "learning_rate": 6.656206338455934e-07, "loss": 0.334, "step": 41827 }, { "epoch": 4.252541683611224, "grad_norm": 0.29086536169052124, "learning_rate": 6.654437255701062e-07, "loss": 0.2897, "step": 41828 }, { "epoch": 4.252643350955673, "grad_norm": 0.27472227811813354, "learning_rate": 6.652668391310841e-07, "loss": 0.2826, "step": 41829 }, { "epoch": 4.252745018300122, "grad_norm": 0.2759804129600525, "learning_rate": 6.65089974529416e-07, "loss": 0.319, "step": 41830 }, { "epoch": 4.252846685644571, "grad_norm": 0.2762538492679596, "learning_rate": 6.649131317659952e-07, "loss": 0.3289, "step": 41831 }, { "epoch": 4.25294835298902, "grad_norm": 0.2933962941169739, "learning_rate": 6.647363108417126e-07, "loss": 0.3171, "step": 41832 }, { "epoch": 4.253050020333469, "grad_norm": 0.29338178038597107, "learning_rate": 6.645595117574566e-07, "loss": 0.3001, "step": 41833 }, { "epoch": 4.253151687677918, "grad_norm": 0.2877689599990845, "learning_rate": 6.643827345141207e-07, "loss": 0.3108, "step": 41834 }, { "epoch": 4.253253355022367, "grad_norm": 0.28000736236572266, "learning_rate": 6.642059791125948e-07, "loss": 0.3036, "step": 41835 }, { "epoch": 4.253355022366816, "grad_norm": 0.2769663333892822, "learning_rate": 6.640292455537683e-07, "loss": 0.3058, "step": 41836 }, { "epoch": 4.253456689711265, "grad_norm": 0.2656303942203522, "learning_rate": 6.638525338385309e-07, "loss": 0.3019, "step": 41837 }, { "epoch": 4.253558357055714, "grad_norm": 0.26784446835517883, "learning_rate": 6.636758439677754e-07, "loss": 0.3282, "step": 41838 }, { "epoch": 4.253660024400163, "grad_norm": 0.271284282207489, "learning_rate": 6.634991759423909e-07, "loss": 0.3056, "step": 41839 }, { "epoch": 4.253761691744612, "grad_norm": 0.2743825614452362, "learning_rate": 6.63322529763265e-07, "loss": 0.3028, "step": 41840 }, { "epoch": 4.253863359089061, "grad_norm": 0.2877194881439209, "learning_rate": 6.631459054312916e-07, "loss": 0.2997, "step": 41841 }, { "epoch": 4.25396502643351, "grad_norm": 0.287649542093277, "learning_rate": 6.629693029473583e-07, "loss": 0.2926, "step": 41842 }, { "epoch": 4.2540666937779585, "grad_norm": 0.26735296845436096, "learning_rate": 6.627927223123537e-07, "loss": 0.2939, "step": 41843 }, { "epoch": 4.2541683611224075, "grad_norm": 0.2971633970737457, "learning_rate": 6.6261616352717e-07, "loss": 0.2739, "step": 41844 }, { "epoch": 4.254270028466856, "grad_norm": 0.26936420798301697, "learning_rate": 6.624396265926953e-07, "loss": 0.2741, "step": 41845 }, { "epoch": 4.254371695811305, "grad_norm": 0.25939854979515076, "learning_rate": 6.622631115098188e-07, "loss": 0.3101, "step": 41846 }, { "epoch": 4.254473363155754, "grad_norm": 0.30138033628463745, "learning_rate": 6.620866182794289e-07, "loss": 0.2847, "step": 41847 }, { "epoch": 4.254575030500203, "grad_norm": 0.26433730125427246, "learning_rate": 6.61910146902417e-07, "loss": 0.3156, "step": 41848 }, { "epoch": 4.254676697844652, "grad_norm": 0.2994454801082611, "learning_rate": 6.617336973796707e-07, "loss": 0.3112, "step": 41849 }, { "epoch": 4.254778365189101, "grad_norm": 0.290819376707077, "learning_rate": 6.615572697120781e-07, "loss": 0.2919, "step": 41850 }, { "epoch": 4.25488003253355, "grad_norm": 0.32889440655708313, "learning_rate": 6.613808639005303e-07, "loss": 0.3276, "step": 41851 }, { "epoch": 4.254981699877999, "grad_norm": 0.2819819748401642, "learning_rate": 6.612044799459144e-07, "loss": 0.3282, "step": 41852 }, { "epoch": 4.255083367222448, "grad_norm": 0.2805086076259613, "learning_rate": 6.610281178491185e-07, "loss": 0.268, "step": 41853 }, { "epoch": 4.255185034566897, "grad_norm": 0.2589694559574127, "learning_rate": 6.608517776110324e-07, "loss": 0.2876, "step": 41854 }, { "epoch": 4.255286701911346, "grad_norm": 0.28097718954086304, "learning_rate": 6.606754592325443e-07, "loss": 0.2853, "step": 41855 }, { "epoch": 4.255388369255795, "grad_norm": 0.29546064138412476, "learning_rate": 6.604991627145424e-07, "loss": 0.2983, "step": 41856 }, { "epoch": 4.255490036600244, "grad_norm": 0.29999858140945435, "learning_rate": 6.603228880579127e-07, "loss": 0.34, "step": 41857 }, { "epoch": 4.255591703944693, "grad_norm": 0.2716997563838959, "learning_rate": 6.601466352635466e-07, "loss": 0.3127, "step": 41858 }, { "epoch": 4.255693371289142, "grad_norm": 0.29443660378456116, "learning_rate": 6.599704043323301e-07, "loss": 0.3156, "step": 41859 }, { "epoch": 4.2557950386335905, "grad_norm": 0.2723855674266815, "learning_rate": 6.597941952651505e-07, "loss": 0.2902, "step": 41860 }, { "epoch": 4.2558967059780395, "grad_norm": 0.273314505815506, "learning_rate": 6.596180080628972e-07, "loss": 0.3242, "step": 41861 }, { "epoch": 4.255998373322488, "grad_norm": 0.29856666922569275, "learning_rate": 6.594418427264576e-07, "loss": 0.3054, "step": 41862 }, { "epoch": 4.256100040666937, "grad_norm": 0.2677396237850189, "learning_rate": 6.592656992567165e-07, "loss": 0.2986, "step": 41863 }, { "epoch": 4.256201708011387, "grad_norm": 0.28750839829444885, "learning_rate": 6.590895776545653e-07, "loss": 0.3207, "step": 41864 }, { "epoch": 4.256303375355836, "grad_norm": 0.28184959292411804, "learning_rate": 6.589134779208883e-07, "loss": 0.2684, "step": 41865 }, { "epoch": 4.256405042700285, "grad_norm": 0.285449355840683, "learning_rate": 6.587374000565744e-07, "loss": 0.2972, "step": 41866 }, { "epoch": 4.256506710044734, "grad_norm": 0.27778226137161255, "learning_rate": 6.585613440625083e-07, "loss": 0.3187, "step": 41867 }, { "epoch": 4.256608377389183, "grad_norm": 0.28183284401893616, "learning_rate": 6.583853099395799e-07, "loss": 0.3223, "step": 41868 }, { "epoch": 4.256710044733632, "grad_norm": 0.2645115852355957, "learning_rate": 6.58209297688674e-07, "loss": 0.2786, "step": 41869 }, { "epoch": 4.256811712078081, "grad_norm": 0.2898024618625641, "learning_rate": 6.580333073106771e-07, "loss": 0.3085, "step": 41870 }, { "epoch": 4.25691337942253, "grad_norm": 0.28070542216300964, "learning_rate": 6.57857338806478e-07, "loss": 0.3255, "step": 41871 }, { "epoch": 4.257015046766979, "grad_norm": 0.2572745978832245, "learning_rate": 6.576813921769615e-07, "loss": 0.2963, "step": 41872 }, { "epoch": 4.257116714111428, "grad_norm": 0.26309123635292053, "learning_rate": 6.57505467423013e-07, "loss": 0.3247, "step": 41873 }, { "epoch": 4.257218381455877, "grad_norm": 0.317454993724823, "learning_rate": 6.573295645455213e-07, "loss": 0.2932, "step": 41874 }, { "epoch": 4.2573200488003256, "grad_norm": 0.2755453288555145, "learning_rate": 6.571536835453712e-07, "loss": 0.2879, "step": 41875 }, { "epoch": 4.2574217161447745, "grad_norm": 0.2954508662223816, "learning_rate": 6.569778244234493e-07, "loss": 0.3304, "step": 41876 }, { "epoch": 4.257523383489223, "grad_norm": 0.2690412104129791, "learning_rate": 6.568019871806391e-07, "loss": 0.297, "step": 41877 }, { "epoch": 4.257625050833672, "grad_norm": 0.30544841289520264, "learning_rate": 6.566261718178302e-07, "loss": 0.3243, "step": 41878 }, { "epoch": 4.257726718178121, "grad_norm": 0.2825010418891907, "learning_rate": 6.56450378335906e-07, "loss": 0.2929, "step": 41879 }, { "epoch": 4.25782838552257, "grad_norm": 0.3067675232887268, "learning_rate": 6.562746067357517e-07, "loss": 0.2971, "step": 41880 }, { "epoch": 4.257930052867019, "grad_norm": 0.29637789726257324, "learning_rate": 6.560988570182564e-07, "loss": 0.3008, "step": 41881 }, { "epoch": 4.258031720211468, "grad_norm": 0.26428288221359253, "learning_rate": 6.559231291843e-07, "loss": 0.2921, "step": 41882 }, { "epoch": 4.258133387555917, "grad_norm": 0.28859519958496094, "learning_rate": 6.557474232347704e-07, "loss": 0.3119, "step": 41883 }, { "epoch": 4.258235054900366, "grad_norm": 0.2899990975856781, "learning_rate": 6.555717391705546e-07, "loss": 0.2741, "step": 41884 }, { "epoch": 4.258336722244815, "grad_norm": 0.2614865005016327, "learning_rate": 6.553960769925355e-07, "loss": 0.2948, "step": 41885 }, { "epoch": 4.258438389589264, "grad_norm": 0.2939147651195526, "learning_rate": 6.552204367015991e-07, "loss": 0.3017, "step": 41886 }, { "epoch": 4.258540056933713, "grad_norm": 0.2727549374103546, "learning_rate": 6.55044818298628e-07, "loss": 0.2832, "step": 41887 }, { "epoch": 4.258641724278162, "grad_norm": 0.30608004331588745, "learning_rate": 6.548692217845099e-07, "loss": 0.277, "step": 41888 }, { "epoch": 4.258743391622611, "grad_norm": 0.29298946261405945, "learning_rate": 6.546936471601284e-07, "loss": 0.3204, "step": 41889 }, { "epoch": 4.25884505896706, "grad_norm": 0.3033565282821655, "learning_rate": 6.545180944263662e-07, "loss": 0.2665, "step": 41890 }, { "epoch": 4.258946726311509, "grad_norm": 0.2703975439071655, "learning_rate": 6.543425635841111e-07, "loss": 0.2615, "step": 41891 }, { "epoch": 4.2590483936559576, "grad_norm": 0.26958176493644714, "learning_rate": 6.541670546342438e-07, "loss": 0.3024, "step": 41892 }, { "epoch": 4.2591500610004065, "grad_norm": 0.2785632610321045, "learning_rate": 6.539915675776503e-07, "loss": 0.3243, "step": 41893 }, { "epoch": 4.259251728344855, "grad_norm": 0.28595399856567383, "learning_rate": 6.538161024152162e-07, "loss": 0.2983, "step": 41894 }, { "epoch": 4.259353395689304, "grad_norm": 0.2970272898674011, "learning_rate": 6.536406591478217e-07, "loss": 0.3062, "step": 41895 }, { "epoch": 4.259455063033753, "grad_norm": 0.2749948501586914, "learning_rate": 6.534652377763539e-07, "loss": 0.2728, "step": 41896 }, { "epoch": 4.259556730378202, "grad_norm": 0.28965094685554504, "learning_rate": 6.532898383016944e-07, "loss": 0.3258, "step": 41897 }, { "epoch": 4.259658397722651, "grad_norm": 0.27105259895324707, "learning_rate": 6.531144607247286e-07, "loss": 0.2692, "step": 41898 }, { "epoch": 4.2597600650671, "grad_norm": 0.2715206444263458, "learning_rate": 6.529391050463391e-07, "loss": 0.2967, "step": 41899 }, { "epoch": 4.259861732411549, "grad_norm": 0.27778252959251404, "learning_rate": 6.527637712674084e-07, "loss": 0.2886, "step": 41900 }, { "epoch": 4.259963399755998, "grad_norm": 0.29085204005241394, "learning_rate": 6.525884593888232e-07, "loss": 0.2683, "step": 41901 }, { "epoch": 4.260065067100447, "grad_norm": 0.29018834233283997, "learning_rate": 6.524131694114616e-07, "loss": 0.2823, "step": 41902 }, { "epoch": 4.260166734444896, "grad_norm": 0.2706895172595978, "learning_rate": 6.522379013362095e-07, "loss": 0.2965, "step": 41903 }, { "epoch": 4.260268401789345, "grad_norm": 0.2893710732460022, "learning_rate": 6.520626551639519e-07, "loss": 0.3203, "step": 41904 }, { "epoch": 4.260370069133794, "grad_norm": 0.2784956097602844, "learning_rate": 6.518874308955675e-07, "loss": 0.2999, "step": 41905 }, { "epoch": 4.260471736478243, "grad_norm": 0.2524997293949127, "learning_rate": 6.517122285319416e-07, "loss": 0.3067, "step": 41906 }, { "epoch": 4.260573403822693, "grad_norm": 0.26824963092803955, "learning_rate": 6.515370480739552e-07, "loss": 0.2872, "step": 41907 }, { "epoch": 4.2606750711671415, "grad_norm": 0.26988282799720764, "learning_rate": 6.513618895224933e-07, "loss": 0.279, "step": 41908 }, { "epoch": 4.2607767385115904, "grad_norm": 0.26745936274528503, "learning_rate": 6.511867528784366e-07, "loss": 0.302, "step": 41909 }, { "epoch": 4.260878405856039, "grad_norm": 0.28680819272994995, "learning_rate": 6.510116381426663e-07, "loss": 0.2795, "step": 41910 }, { "epoch": 4.260980073200488, "grad_norm": 0.29151880741119385, "learning_rate": 6.508365453160681e-07, "loss": 0.2709, "step": 41911 }, { "epoch": 4.261081740544937, "grad_norm": 0.2636845111846924, "learning_rate": 6.506614743995194e-07, "loss": 0.2886, "step": 41912 }, { "epoch": 4.261183407889386, "grad_norm": 0.27005186676979065, "learning_rate": 6.504864253939053e-07, "loss": 0.2839, "step": 41913 }, { "epoch": 4.261285075233835, "grad_norm": 0.27946987748146057, "learning_rate": 6.503113983001086e-07, "loss": 0.328, "step": 41914 }, { "epoch": 4.261386742578284, "grad_norm": 0.27057233452796936, "learning_rate": 6.501363931190074e-07, "loss": 0.278, "step": 41915 }, { "epoch": 4.261488409922733, "grad_norm": 0.29667821526527405, "learning_rate": 6.499614098514867e-07, "loss": 0.2869, "step": 41916 }, { "epoch": 4.261590077267182, "grad_norm": 0.27408379316329956, "learning_rate": 6.497864484984262e-07, "loss": 0.2806, "step": 41917 }, { "epoch": 4.261691744611631, "grad_norm": 0.2535285949707031, "learning_rate": 6.496115090607069e-07, "loss": 0.2793, "step": 41918 }, { "epoch": 4.26179341195608, "grad_norm": 0.27364763617515564, "learning_rate": 6.494365915392125e-07, "loss": 0.2952, "step": 41919 }, { "epoch": 4.261895079300529, "grad_norm": 0.29024505615234375, "learning_rate": 6.492616959348214e-07, "loss": 0.284, "step": 41920 }, { "epoch": 4.261996746644978, "grad_norm": 0.2839790880680084, "learning_rate": 6.490868222484182e-07, "loss": 0.3123, "step": 41921 }, { "epoch": 4.262098413989427, "grad_norm": 0.28218963742256165, "learning_rate": 6.489119704808794e-07, "loss": 0.3174, "step": 41922 }, { "epoch": 4.262200081333876, "grad_norm": 0.296377569437027, "learning_rate": 6.487371406330877e-07, "loss": 0.2851, "step": 41923 }, { "epoch": 4.262301748678325, "grad_norm": 0.279525488615036, "learning_rate": 6.485623327059271e-07, "loss": 0.2894, "step": 41924 }, { "epoch": 4.2624034160227735, "grad_norm": 0.2931874394416809, "learning_rate": 6.483875467002726e-07, "loss": 0.2807, "step": 41925 }, { "epoch": 4.2625050833672224, "grad_norm": 0.26737985014915466, "learning_rate": 6.482127826170093e-07, "loss": 0.2615, "step": 41926 }, { "epoch": 4.262606750711671, "grad_norm": 0.2564407289028168, "learning_rate": 6.480380404570153e-07, "loss": 0.2669, "step": 41927 }, { "epoch": 4.26270841805612, "grad_norm": 0.27203142642974854, "learning_rate": 6.478633202211709e-07, "loss": 0.2885, "step": 41928 }, { "epoch": 4.262810085400569, "grad_norm": 0.27826792001724243, "learning_rate": 6.476886219103573e-07, "loss": 0.2821, "step": 41929 }, { "epoch": 4.262911752745018, "grad_norm": 0.2757904529571533, "learning_rate": 6.475139455254548e-07, "loss": 0.3291, "step": 41930 }, { "epoch": 4.263013420089467, "grad_norm": 0.2972455620765686, "learning_rate": 6.473392910673427e-07, "loss": 0.3174, "step": 41931 }, { "epoch": 4.263115087433916, "grad_norm": 0.29295065999031067, "learning_rate": 6.471646585368991e-07, "loss": 0.2955, "step": 41932 }, { "epoch": 4.263216754778365, "grad_norm": 0.26763346791267395, "learning_rate": 6.469900479350061e-07, "loss": 0.3544, "step": 41933 }, { "epoch": 4.263318422122814, "grad_norm": 0.28071731328964233, "learning_rate": 6.468154592625447e-07, "loss": 0.2925, "step": 41934 }, { "epoch": 4.263420089467263, "grad_norm": 0.2739049196243286, "learning_rate": 6.46640892520391e-07, "loss": 0.3113, "step": 41935 }, { "epoch": 4.263521756811712, "grad_norm": 0.25492313504219055, "learning_rate": 6.464663477094263e-07, "loss": 0.2851, "step": 41936 }, { "epoch": 4.263623424156161, "grad_norm": 0.282832533121109, "learning_rate": 6.462918248305299e-07, "loss": 0.2878, "step": 41937 }, { "epoch": 4.26372509150061, "grad_norm": 0.3029814064502716, "learning_rate": 6.461173238845797e-07, "loss": 0.2976, "step": 41938 }, { "epoch": 4.263826758845059, "grad_norm": 0.30068257451057434, "learning_rate": 6.45942844872457e-07, "loss": 0.2693, "step": 41939 }, { "epoch": 4.263928426189508, "grad_norm": 0.280922532081604, "learning_rate": 6.457683877950393e-07, "loss": 0.2663, "step": 41940 }, { "epoch": 4.264030093533957, "grad_norm": 0.30704572796821594, "learning_rate": 6.455939526532056e-07, "loss": 0.297, "step": 41941 }, { "epoch": 4.2641317608784055, "grad_norm": 0.26646852493286133, "learning_rate": 6.454195394478341e-07, "loss": 0.2811, "step": 41942 }, { "epoch": 4.2642334282228544, "grad_norm": 0.2556372284889221, "learning_rate": 6.452451481798039e-07, "loss": 0.3116, "step": 41943 }, { "epoch": 4.264335095567303, "grad_norm": 0.29549935460090637, "learning_rate": 6.45070778849996e-07, "loss": 0.269, "step": 41944 }, { "epoch": 4.264436762911752, "grad_norm": 0.28455984592437744, "learning_rate": 6.448964314592843e-07, "loss": 0.3184, "step": 41945 }, { "epoch": 4.264538430256202, "grad_norm": 0.2860985994338989, "learning_rate": 6.447221060085507e-07, "loss": 0.2776, "step": 41946 }, { "epoch": 4.264640097600651, "grad_norm": 0.25471386313438416, "learning_rate": 6.445478024986723e-07, "loss": 0.2946, "step": 41947 }, { "epoch": 4.2647417649451, "grad_norm": 0.25510603189468384, "learning_rate": 6.443735209305257e-07, "loss": 0.2842, "step": 41948 }, { "epoch": 4.264843432289549, "grad_norm": 0.29152175784111023, "learning_rate": 6.441992613049913e-07, "loss": 0.2704, "step": 41949 }, { "epoch": 4.264945099633998, "grad_norm": 0.26872533559799194, "learning_rate": 6.440250236229462e-07, "loss": 0.2939, "step": 41950 }, { "epoch": 4.265046766978447, "grad_norm": 0.2890594005584717, "learning_rate": 6.438508078852679e-07, "loss": 0.2735, "step": 41951 }, { "epoch": 4.265148434322896, "grad_norm": 0.26732614636421204, "learning_rate": 6.43676614092833e-07, "loss": 0.2774, "step": 41952 }, { "epoch": 4.265250101667345, "grad_norm": 0.2850467264652252, "learning_rate": 6.435024422465208e-07, "loss": 0.3011, "step": 41953 }, { "epoch": 4.265351769011794, "grad_norm": 0.2770729660987854, "learning_rate": 6.433282923472084e-07, "loss": 0.3214, "step": 41954 }, { "epoch": 4.265453436356243, "grad_norm": 0.280313104391098, "learning_rate": 6.431541643957712e-07, "loss": 0.3056, "step": 41955 }, { "epoch": 4.265555103700692, "grad_norm": 0.2878872752189636, "learning_rate": 6.429800583930896e-07, "loss": 0.304, "step": 41956 }, { "epoch": 4.2656567710451405, "grad_norm": 0.2712033987045288, "learning_rate": 6.428059743400389e-07, "loss": 0.2851, "step": 41957 }, { "epoch": 4.2657584383895895, "grad_norm": 0.2696189880371094, "learning_rate": 6.426319122374953e-07, "loss": 0.3405, "step": 41958 }, { "epoch": 4.265860105734038, "grad_norm": 0.29077771306037903, "learning_rate": 6.424578720863373e-07, "loss": 0.2849, "step": 41959 }, { "epoch": 4.265961773078487, "grad_norm": 0.2859923541545868, "learning_rate": 6.422838538874416e-07, "loss": 0.2999, "step": 41960 }, { "epoch": 4.266063440422936, "grad_norm": 0.26760709285736084, "learning_rate": 6.42109857641684e-07, "loss": 0.3007, "step": 41961 }, { "epoch": 4.266165107767385, "grad_norm": 0.26523908972740173, "learning_rate": 6.419358833499401e-07, "loss": 0.2852, "step": 41962 }, { "epoch": 4.266266775111834, "grad_norm": 0.26529085636138916, "learning_rate": 6.417619310130885e-07, "loss": 0.2988, "step": 41963 }, { "epoch": 4.266368442456283, "grad_norm": 0.29788923263549805, "learning_rate": 6.415880006320052e-07, "loss": 0.256, "step": 41964 }, { "epoch": 4.266470109800732, "grad_norm": 0.27688831090927124, "learning_rate": 6.414140922075646e-07, "loss": 0.3113, "step": 41965 }, { "epoch": 4.266571777145181, "grad_norm": 0.2702282667160034, "learning_rate": 6.412402057406447e-07, "loss": 0.3194, "step": 41966 }, { "epoch": 4.26667344448963, "grad_norm": 0.28210386633872986, "learning_rate": 6.410663412321216e-07, "loss": 0.2765, "step": 41967 }, { "epoch": 4.266775111834079, "grad_norm": 0.2750661373138428, "learning_rate": 6.408924986828697e-07, "loss": 0.3111, "step": 41968 }, { "epoch": 4.266876779178528, "grad_norm": 0.2898170053958893, "learning_rate": 6.407186780937646e-07, "loss": 0.3194, "step": 41969 }, { "epoch": 4.266978446522977, "grad_norm": 0.2667620778083801, "learning_rate": 6.405448794656843e-07, "loss": 0.3047, "step": 41970 }, { "epoch": 4.267080113867426, "grad_norm": 0.2753984034061432, "learning_rate": 6.40371102799503e-07, "loss": 0.2964, "step": 41971 }, { "epoch": 4.267181781211875, "grad_norm": 0.25797656178474426, "learning_rate": 6.401973480960943e-07, "loss": 0.2841, "step": 41972 }, { "epoch": 4.267283448556324, "grad_norm": 0.2928412854671478, "learning_rate": 6.400236153563371e-07, "loss": 0.2899, "step": 41973 }, { "epoch": 4.2673851159007725, "grad_norm": 0.27431127429008484, "learning_rate": 6.398499045811046e-07, "loss": 0.3105, "step": 41974 }, { "epoch": 4.2674867832452215, "grad_norm": 0.29092565178871155, "learning_rate": 6.39676215771271e-07, "loss": 0.3005, "step": 41975 }, { "epoch": 4.26758845058967, "grad_norm": 0.295940637588501, "learning_rate": 6.395025489277135e-07, "loss": 0.3074, "step": 41976 }, { "epoch": 4.267690117934119, "grad_norm": 0.2813595235347748, "learning_rate": 6.393289040513063e-07, "loss": 0.2907, "step": 41977 }, { "epoch": 4.267791785278568, "grad_norm": 0.27034294605255127, "learning_rate": 6.39155281142923e-07, "loss": 0.2726, "step": 41978 }, { "epoch": 4.267893452623017, "grad_norm": 0.2612884044647217, "learning_rate": 6.389816802034388e-07, "loss": 0.2956, "step": 41979 }, { "epoch": 4.267995119967466, "grad_norm": 0.26564455032348633, "learning_rate": 6.388081012337294e-07, "loss": 0.2735, "step": 41980 }, { "epoch": 4.268096787311915, "grad_norm": 0.2808730900287628, "learning_rate": 6.386345442346681e-07, "loss": 0.2795, "step": 41981 }, { "epoch": 4.268198454656364, "grad_norm": 0.29133760929107666, "learning_rate": 6.384610092071291e-07, "loss": 0.2758, "step": 41982 }, { "epoch": 4.268300122000813, "grad_norm": 0.27094942331314087, "learning_rate": 6.382874961519875e-07, "loss": 0.3166, "step": 41983 }, { "epoch": 4.268401789345262, "grad_norm": 0.2771013677120209, "learning_rate": 6.381140050701173e-07, "loss": 0.3125, "step": 41984 }, { "epoch": 4.268503456689711, "grad_norm": 0.27492696046829224, "learning_rate": 6.379405359623914e-07, "loss": 0.3131, "step": 41985 }, { "epoch": 4.26860512403416, "grad_norm": 0.28066280484199524, "learning_rate": 6.37767088829685e-07, "loss": 0.3049, "step": 41986 }, { "epoch": 4.268706791378609, "grad_norm": 0.280446857213974, "learning_rate": 6.375936636728719e-07, "loss": 0.2889, "step": 41987 }, { "epoch": 4.268808458723058, "grad_norm": 0.30222567915916443, "learning_rate": 6.374202604928248e-07, "loss": 0.2819, "step": 41988 }, { "epoch": 4.2689101260675075, "grad_norm": 0.28081610798835754, "learning_rate": 6.372468792904168e-07, "loss": 0.3319, "step": 41989 }, { "epoch": 4.2690117934119565, "grad_norm": 0.27594178915023804, "learning_rate": 6.370735200665234e-07, "loss": 0.29, "step": 41990 }, { "epoch": 4.269113460756405, "grad_norm": 0.2940561771392822, "learning_rate": 6.369001828220172e-07, "loss": 0.2802, "step": 41991 }, { "epoch": 4.269215128100854, "grad_norm": 0.25790995359420776, "learning_rate": 6.367268675577692e-07, "loss": 0.2965, "step": 41992 }, { "epoch": 4.269316795445303, "grad_norm": 0.2945609986782074, "learning_rate": 6.36553574274656e-07, "loss": 0.2927, "step": 41993 }, { "epoch": 4.269418462789752, "grad_norm": 0.28734090924263, "learning_rate": 6.363803029735488e-07, "loss": 0.319, "step": 41994 }, { "epoch": 4.269520130134201, "grad_norm": 0.27600768208503723, "learning_rate": 6.3620705365532e-07, "loss": 0.2957, "step": 41995 }, { "epoch": 4.26962179747865, "grad_norm": 0.29502689838409424, "learning_rate": 6.360338263208437e-07, "loss": 0.3025, "step": 41996 }, { "epoch": 4.269723464823099, "grad_norm": 0.2869569957256317, "learning_rate": 6.358606209709917e-07, "loss": 0.2854, "step": 41997 }, { "epoch": 4.269825132167548, "grad_norm": 0.27114346623420715, "learning_rate": 6.356874376066374e-07, "loss": 0.3225, "step": 41998 }, { "epoch": 4.269926799511997, "grad_norm": 0.2980513870716095, "learning_rate": 6.355142762286515e-07, "loss": 0.302, "step": 41999 }, { "epoch": 4.270028466856446, "grad_norm": 0.2870606482028961, "learning_rate": 6.353411368379081e-07, "loss": 0.2877, "step": 42000 }, { "epoch": 4.270130134200895, "grad_norm": 0.28588032722473145, "learning_rate": 6.351680194352794e-07, "loss": 0.2856, "step": 42001 }, { "epoch": 4.270231801545344, "grad_norm": 0.287560373544693, "learning_rate": 6.349949240216357e-07, "loss": 0.3282, "step": 42002 }, { "epoch": 4.270333468889793, "grad_norm": 0.28197768330574036, "learning_rate": 6.34821850597851e-07, "loss": 0.2746, "step": 42003 }, { "epoch": 4.270435136234242, "grad_norm": 0.2797890603542328, "learning_rate": 6.346487991647971e-07, "loss": 0.2626, "step": 42004 }, { "epoch": 4.270536803578691, "grad_norm": 0.2880057394504547, "learning_rate": 6.344757697233439e-07, "loss": 0.2898, "step": 42005 }, { "epoch": 4.2706384709231395, "grad_norm": 0.28566774725914, "learning_rate": 6.343027622743653e-07, "loss": 0.3048, "step": 42006 }, { "epoch": 4.2707401382675885, "grad_norm": 0.27973607182502747, "learning_rate": 6.341297768187321e-07, "loss": 0.2987, "step": 42007 }, { "epoch": 4.270841805612037, "grad_norm": 0.26656755805015564, "learning_rate": 6.339568133573154e-07, "loss": 0.3021, "step": 42008 }, { "epoch": 4.270943472956486, "grad_norm": 0.2737005949020386, "learning_rate": 6.337838718909856e-07, "loss": 0.3085, "step": 42009 }, { "epoch": 4.271045140300935, "grad_norm": 0.2772141098976135, "learning_rate": 6.336109524206163e-07, "loss": 0.3156, "step": 42010 }, { "epoch": 4.271146807645384, "grad_norm": 0.2754340171813965, "learning_rate": 6.334380549470775e-07, "loss": 0.2875, "step": 42011 }, { "epoch": 4.271248474989833, "grad_norm": 0.305668443441391, "learning_rate": 6.332651794712391e-07, "loss": 0.2979, "step": 42012 }, { "epoch": 4.271350142334282, "grad_norm": 0.2747483253479004, "learning_rate": 6.330923259939736e-07, "loss": 0.2838, "step": 42013 }, { "epoch": 4.271451809678731, "grad_norm": 0.29046595096588135, "learning_rate": 6.329194945161521e-07, "loss": 0.2907, "step": 42014 }, { "epoch": 4.27155347702318, "grad_norm": 0.2764815092086792, "learning_rate": 6.327466850386426e-07, "loss": 0.3114, "step": 42015 }, { "epoch": 4.271655144367629, "grad_norm": 0.2611132860183716, "learning_rate": 6.325738975623191e-07, "loss": 0.293, "step": 42016 }, { "epoch": 4.271756811712078, "grad_norm": 0.30444908142089844, "learning_rate": 6.324011320880497e-07, "loss": 0.3121, "step": 42017 }, { "epoch": 4.271858479056527, "grad_norm": 0.27584823966026306, "learning_rate": 6.322283886167063e-07, "loss": 0.2983, "step": 42018 }, { "epoch": 4.271960146400976, "grad_norm": 0.29904288053512573, "learning_rate": 6.320556671491567e-07, "loss": 0.2936, "step": 42019 }, { "epoch": 4.272061813745425, "grad_norm": 0.28610724210739136, "learning_rate": 6.318829676862742e-07, "loss": 0.2987, "step": 42020 }, { "epoch": 4.272163481089874, "grad_norm": 0.29490596055984497, "learning_rate": 6.317102902289268e-07, "loss": 0.2531, "step": 42021 }, { "epoch": 4.272265148434323, "grad_norm": 0.31149038672447205, "learning_rate": 6.315376347779839e-07, "loss": 0.297, "step": 42022 }, { "epoch": 4.2723668157787715, "grad_norm": 0.26796939969062805, "learning_rate": 6.313650013343187e-07, "loss": 0.2562, "step": 42023 }, { "epoch": 4.2724684831232205, "grad_norm": 0.297039657831192, "learning_rate": 6.311923898987965e-07, "loss": 0.3341, "step": 42024 }, { "epoch": 4.272570150467669, "grad_norm": 0.3110256493091583, "learning_rate": 6.310198004722884e-07, "loss": 0.2769, "step": 42025 }, { "epoch": 4.272671817812118, "grad_norm": 0.2669438123703003, "learning_rate": 6.308472330556653e-07, "loss": 0.3124, "step": 42026 }, { "epoch": 4.272773485156567, "grad_norm": 0.26406151056289673, "learning_rate": 6.30674687649796e-07, "loss": 0.2665, "step": 42027 }, { "epoch": 4.272875152501017, "grad_norm": 0.261423259973526, "learning_rate": 6.305021642555493e-07, "loss": 0.3009, "step": 42028 }, { "epoch": 4.272976819845466, "grad_norm": 0.28175222873687744, "learning_rate": 6.303296628737932e-07, "loss": 0.2886, "step": 42029 }, { "epoch": 4.273078487189915, "grad_norm": 0.2780255377292633, "learning_rate": 6.301571835053987e-07, "loss": 0.3114, "step": 42030 }, { "epoch": 4.273180154534364, "grad_norm": 0.25331050157546997, "learning_rate": 6.299847261512338e-07, "loss": 0.3062, "step": 42031 }, { "epoch": 4.273281821878813, "grad_norm": 0.28215324878692627, "learning_rate": 6.298122908121667e-07, "loss": 0.3155, "step": 42032 }, { "epoch": 4.273383489223262, "grad_norm": 0.2988971769809723, "learning_rate": 6.296398774890689e-07, "loss": 0.2907, "step": 42033 }, { "epoch": 4.273485156567711, "grad_norm": 0.27742278575897217, "learning_rate": 6.294674861828043e-07, "loss": 0.2913, "step": 42034 }, { "epoch": 4.27358682391216, "grad_norm": 0.28395718336105347, "learning_rate": 6.29295116894244e-07, "loss": 0.289, "step": 42035 }, { "epoch": 4.273688491256609, "grad_norm": 0.2616064250469208, "learning_rate": 6.291227696242574e-07, "loss": 0.2824, "step": 42036 }, { "epoch": 4.273790158601058, "grad_norm": 0.25945645570755005, "learning_rate": 6.289504443737122e-07, "loss": 0.2729, "step": 42037 }, { "epoch": 4.2738918259455065, "grad_norm": 0.27016812562942505, "learning_rate": 6.287781411434751e-07, "loss": 0.2861, "step": 42038 }, { "epoch": 4.2739934932899555, "grad_norm": 0.28096240758895874, "learning_rate": 6.286058599344141e-07, "loss": 0.2996, "step": 42039 }, { "epoch": 4.274095160634404, "grad_norm": 0.2833724021911621, "learning_rate": 6.28433600747399e-07, "loss": 0.317, "step": 42040 }, { "epoch": 4.274196827978853, "grad_norm": 0.25719210505485535, "learning_rate": 6.282613635832969e-07, "loss": 0.2863, "step": 42041 }, { "epoch": 4.274298495323302, "grad_norm": 0.3001563549041748, "learning_rate": 6.280891484429741e-07, "loss": 0.2878, "step": 42042 }, { "epoch": 4.274400162667751, "grad_norm": 0.2928920090198517, "learning_rate": 6.279169553273012e-07, "loss": 0.2836, "step": 42043 }, { "epoch": 4.2745018300122, "grad_norm": 0.26862791180610657, "learning_rate": 6.277447842371415e-07, "loss": 0.3026, "step": 42044 }, { "epoch": 4.274603497356649, "grad_norm": 0.30685415863990784, "learning_rate": 6.275726351733646e-07, "loss": 0.2712, "step": 42045 }, { "epoch": 4.274705164701098, "grad_norm": 0.2782283425331116, "learning_rate": 6.2740050813684e-07, "loss": 0.2847, "step": 42046 }, { "epoch": 4.274806832045547, "grad_norm": 0.2813917100429535, "learning_rate": 6.272284031284303e-07, "loss": 0.2823, "step": 42047 }, { "epoch": 4.274908499389996, "grad_norm": 0.2908356785774231, "learning_rate": 6.27056320149006e-07, "loss": 0.2707, "step": 42048 }, { "epoch": 4.275010166734445, "grad_norm": 0.278110146522522, "learning_rate": 6.268842591994312e-07, "loss": 0.2995, "step": 42049 }, { "epoch": 4.275111834078894, "grad_norm": 0.28251564502716064, "learning_rate": 6.267122202805759e-07, "loss": 0.2885, "step": 42050 }, { "epoch": 4.275213501423343, "grad_norm": 0.26793622970581055, "learning_rate": 6.265402033933049e-07, "loss": 0.2985, "step": 42051 }, { "epoch": 4.275315168767792, "grad_norm": 0.26360976696014404, "learning_rate": 6.263682085384843e-07, "loss": 0.2787, "step": 42052 }, { "epoch": 4.275416836112241, "grad_norm": 0.27594560384750366, "learning_rate": 6.261962357169831e-07, "loss": 0.3049, "step": 42053 }, { "epoch": 4.27551850345669, "grad_norm": 0.2815980315208435, "learning_rate": 6.260242849296644e-07, "loss": 0.2918, "step": 42054 }, { "epoch": 4.2756201708011385, "grad_norm": 0.3104568421840668, "learning_rate": 6.258523561773949e-07, "loss": 0.2661, "step": 42055 }, { "epoch": 4.2757218381455875, "grad_norm": 0.28368887305259705, "learning_rate": 6.256804494610447e-07, "loss": 0.2954, "step": 42056 }, { "epoch": 4.275823505490036, "grad_norm": 0.2760868966579437, "learning_rate": 6.255085647814746e-07, "loss": 0.2929, "step": 42057 }, { "epoch": 4.275925172834485, "grad_norm": 0.28138062357902527, "learning_rate": 6.25336702139554e-07, "loss": 0.3109, "step": 42058 }, { "epoch": 4.276026840178934, "grad_norm": 0.28554779291152954, "learning_rate": 6.251648615361461e-07, "loss": 0.3138, "step": 42059 }, { "epoch": 4.276128507523383, "grad_norm": 0.29361745715141296, "learning_rate": 6.249930429721191e-07, "loss": 0.303, "step": 42060 }, { "epoch": 4.276230174867832, "grad_norm": 0.2815042734146118, "learning_rate": 6.248212464483377e-07, "loss": 0.2956, "step": 42061 }, { "epoch": 4.276331842212281, "grad_norm": 0.29603201150894165, "learning_rate": 6.24649471965666e-07, "loss": 0.2811, "step": 42062 }, { "epoch": 4.27643350955673, "grad_norm": 0.2652631998062134, "learning_rate": 6.244777195249729e-07, "loss": 0.3413, "step": 42063 }, { "epoch": 4.276535176901179, "grad_norm": 0.2842674255371094, "learning_rate": 6.243059891271186e-07, "loss": 0.3385, "step": 42064 }, { "epoch": 4.276636844245628, "grad_norm": 0.2981836199760437, "learning_rate": 6.241342807729711e-07, "loss": 0.2714, "step": 42065 }, { "epoch": 4.276738511590077, "grad_norm": 0.2530376613140106, "learning_rate": 6.239625944633976e-07, "loss": 0.3139, "step": 42066 }, { "epoch": 4.276840178934526, "grad_norm": 0.2579309642314911, "learning_rate": 6.237909301992585e-07, "loss": 0.3123, "step": 42067 }, { "epoch": 4.276941846278975, "grad_norm": 0.3045214116573334, "learning_rate": 6.236192879814218e-07, "loss": 0.2817, "step": 42068 }, { "epoch": 4.277043513623424, "grad_norm": 0.28380075097084045, "learning_rate": 6.23447667810751e-07, "loss": 0.3073, "step": 42069 }, { "epoch": 4.277145180967873, "grad_norm": 0.25810638070106506, "learning_rate": 6.232760696881096e-07, "loss": 0.3386, "step": 42070 }, { "epoch": 4.2772468483123225, "grad_norm": 0.2693324387073517, "learning_rate": 6.231044936143649e-07, "loss": 0.3122, "step": 42071 }, { "epoch": 4.277348515656771, "grad_norm": 0.23856225609779358, "learning_rate": 6.229329395903783e-07, "loss": 0.3196, "step": 42072 }, { "epoch": 4.27745018300122, "grad_norm": 0.26798582077026367, "learning_rate": 6.227614076170174e-07, "loss": 0.2702, "step": 42073 }, { "epoch": 4.277551850345669, "grad_norm": 0.2763720750808716, "learning_rate": 6.225898976951422e-07, "loss": 0.2916, "step": 42074 }, { "epoch": 4.277653517690118, "grad_norm": 0.2792516052722931, "learning_rate": 6.224184098256186e-07, "loss": 0.2978, "step": 42075 }, { "epoch": 4.277755185034567, "grad_norm": 0.28482842445373535, "learning_rate": 6.222469440093132e-07, "loss": 0.2871, "step": 42076 }, { "epoch": 4.277856852379016, "grad_norm": 0.2721537947654724, "learning_rate": 6.220755002470858e-07, "loss": 0.3596, "step": 42077 }, { "epoch": 4.277958519723465, "grad_norm": 0.26199549436569214, "learning_rate": 6.219040785398017e-07, "loss": 0.2965, "step": 42078 }, { "epoch": 4.278060187067914, "grad_norm": 0.2780158221721649, "learning_rate": 6.217326788883255e-07, "loss": 0.2743, "step": 42079 }, { "epoch": 4.278161854412363, "grad_norm": 0.2568899393081665, "learning_rate": 6.215613012935179e-07, "loss": 0.2934, "step": 42080 }, { "epoch": 4.278263521756812, "grad_norm": 0.2744578719139099, "learning_rate": 6.21389945756245e-07, "loss": 0.2738, "step": 42081 }, { "epoch": 4.278365189101261, "grad_norm": 0.3025099039077759, "learning_rate": 6.212186122773695e-07, "loss": 0.2855, "step": 42082 }, { "epoch": 4.27846685644571, "grad_norm": 0.2744056284427643, "learning_rate": 6.210473008577533e-07, "loss": 0.2908, "step": 42083 }, { "epoch": 4.278568523790159, "grad_norm": 0.2628503143787384, "learning_rate": 6.208760114982598e-07, "loss": 0.3186, "step": 42084 }, { "epoch": 4.278670191134608, "grad_norm": 0.2628365457057953, "learning_rate": 6.207047441997522e-07, "loss": 0.262, "step": 42085 }, { "epoch": 4.278771858479057, "grad_norm": 0.2674822509288788, "learning_rate": 6.205334989630956e-07, "loss": 0.2926, "step": 42086 }, { "epoch": 4.2788735258235056, "grad_norm": 0.28207114338874817, "learning_rate": 6.203622757891481e-07, "loss": 0.2685, "step": 42087 }, { "epoch": 4.2789751931679545, "grad_norm": 0.2960716784000397, "learning_rate": 6.201910746787759e-07, "loss": 0.3408, "step": 42088 }, { "epoch": 4.279076860512403, "grad_norm": 0.3062112629413605, "learning_rate": 6.200198956328407e-07, "loss": 0.3339, "step": 42089 }, { "epoch": 4.279178527856852, "grad_norm": 0.2691487967967987, "learning_rate": 6.198487386522028e-07, "loss": 0.3162, "step": 42090 }, { "epoch": 4.279280195201301, "grad_norm": 0.2749072313308716, "learning_rate": 6.196776037377272e-07, "loss": 0.2778, "step": 42091 }, { "epoch": 4.27938186254575, "grad_norm": 0.26265761256217957, "learning_rate": 6.195064908902753e-07, "loss": 0.3017, "step": 42092 }, { "epoch": 4.279483529890199, "grad_norm": 0.28662994503974915, "learning_rate": 6.193354001107083e-07, "loss": 0.3262, "step": 42093 }, { "epoch": 4.279585197234648, "grad_norm": 0.25840845704078674, "learning_rate": 6.191643313998874e-07, "loss": 0.3133, "step": 42094 }, { "epoch": 4.279686864579097, "grad_norm": 0.26977238059043884, "learning_rate": 6.18993284758676e-07, "loss": 0.3179, "step": 42095 }, { "epoch": 4.279788531923546, "grad_norm": 0.2755469083786011, "learning_rate": 6.188222601879373e-07, "loss": 0.2735, "step": 42096 }, { "epoch": 4.279890199267995, "grad_norm": 0.2906951904296875, "learning_rate": 6.186512576885284e-07, "loss": 0.2939, "step": 42097 }, { "epoch": 4.279991866612444, "grad_norm": 0.28307047486305237, "learning_rate": 6.184802772613152e-07, "loss": 0.3121, "step": 42098 }, { "epoch": 4.280093533956893, "grad_norm": 0.2800818383693695, "learning_rate": 6.183093189071565e-07, "loss": 0.3143, "step": 42099 }, { "epoch": 4.280195201301342, "grad_norm": 0.26237738132476807, "learning_rate": 6.181383826269133e-07, "loss": 0.3096, "step": 42100 }, { "epoch": 4.280296868645791, "grad_norm": 0.28750255703926086, "learning_rate": 6.179674684214488e-07, "loss": 0.292, "step": 42101 }, { "epoch": 4.28039853599024, "grad_norm": 0.2666618824005127, "learning_rate": 6.177965762916228e-07, "loss": 0.2827, "step": 42102 }, { "epoch": 4.280500203334689, "grad_norm": 0.2968226969242096, "learning_rate": 6.176257062382962e-07, "loss": 0.2852, "step": 42103 }, { "epoch": 4.2806018706791376, "grad_norm": 0.2661043703556061, "learning_rate": 6.174548582623285e-07, "loss": 0.2982, "step": 42104 }, { "epoch": 4.2807035380235865, "grad_norm": 0.2976817488670349, "learning_rate": 6.172840323645834e-07, "loss": 0.2895, "step": 42105 }, { "epoch": 4.280805205368035, "grad_norm": 0.28770092129707336, "learning_rate": 6.171132285459197e-07, "loss": 0.2785, "step": 42106 }, { "epoch": 4.280906872712484, "grad_norm": 0.29918691515922546, "learning_rate": 6.169424468071967e-07, "loss": 0.2757, "step": 42107 }, { "epoch": 4.281008540056933, "grad_norm": 0.29707270860671997, "learning_rate": 6.167716871492773e-07, "loss": 0.3092, "step": 42108 }, { "epoch": 4.281110207401382, "grad_norm": 0.2809613347053528, "learning_rate": 6.166009495730208e-07, "loss": 0.2992, "step": 42109 }, { "epoch": 4.281211874745832, "grad_norm": 0.2822897732257843, "learning_rate": 6.164302340792855e-07, "loss": 0.293, "step": 42110 }, { "epoch": 4.281313542090281, "grad_norm": 0.287585973739624, "learning_rate": 6.162595406689348e-07, "loss": 0.3146, "step": 42111 }, { "epoch": 4.28141520943473, "grad_norm": 0.28718361258506775, "learning_rate": 6.160888693428263e-07, "loss": 0.3184, "step": 42112 }, { "epoch": 4.281516876779179, "grad_norm": 0.25702473521232605, "learning_rate": 6.159182201018198e-07, "loss": 0.3146, "step": 42113 }, { "epoch": 4.281618544123628, "grad_norm": 0.26992273330688477, "learning_rate": 6.157475929467755e-07, "loss": 0.3204, "step": 42114 }, { "epoch": 4.281720211468077, "grad_norm": 0.273248553276062, "learning_rate": 6.155769878785533e-07, "loss": 0.2979, "step": 42115 }, { "epoch": 4.281821878812526, "grad_norm": 0.2827472686767578, "learning_rate": 6.154064048980124e-07, "loss": 0.2925, "step": 42116 }, { "epoch": 4.281923546156975, "grad_norm": 0.26737719774246216, "learning_rate": 6.152358440060113e-07, "loss": 0.2704, "step": 42117 }, { "epoch": 4.282025213501424, "grad_norm": 0.27402910590171814, "learning_rate": 6.150653052034106e-07, "loss": 0.3005, "step": 42118 }, { "epoch": 4.282126880845873, "grad_norm": 0.2707058787345886, "learning_rate": 6.148947884910694e-07, "loss": 0.3163, "step": 42119 }, { "epoch": 4.2822285481903215, "grad_norm": 0.2959180176258087, "learning_rate": 6.14724293869845e-07, "loss": 0.2897, "step": 42120 }, { "epoch": 4.2823302155347704, "grad_norm": 0.2730916738510132, "learning_rate": 6.145538213405983e-07, "loss": 0.3015, "step": 42121 }, { "epoch": 4.282431882879219, "grad_norm": 0.2828940749168396, "learning_rate": 6.143833709041875e-07, "loss": 0.2904, "step": 42122 }, { "epoch": 4.282533550223668, "grad_norm": 0.2739546000957489, "learning_rate": 6.142129425614713e-07, "loss": 0.3048, "step": 42123 }, { "epoch": 4.282635217568117, "grad_norm": 0.29415708780288696, "learning_rate": 6.140425363133068e-07, "loss": 0.2761, "step": 42124 }, { "epoch": 4.282736884912566, "grad_norm": 0.30640438199043274, "learning_rate": 6.138721521605545e-07, "loss": 0.2603, "step": 42125 }, { "epoch": 4.282838552257015, "grad_norm": 0.26522135734558105, "learning_rate": 6.137017901040726e-07, "loss": 0.318, "step": 42126 }, { "epoch": 4.282940219601464, "grad_norm": 0.27091148495674133, "learning_rate": 6.135314501447176e-07, "loss": 0.2887, "step": 42127 }, { "epoch": 4.283041886945913, "grad_norm": 0.2818309962749481, "learning_rate": 6.133611322833494e-07, "loss": 0.3194, "step": 42128 }, { "epoch": 4.283143554290362, "grad_norm": 0.27910321950912476, "learning_rate": 6.131908365208255e-07, "loss": 0.2806, "step": 42129 }, { "epoch": 4.283245221634811, "grad_norm": 0.27922701835632324, "learning_rate": 6.130205628580027e-07, "loss": 0.302, "step": 42130 }, { "epoch": 4.28334688897926, "grad_norm": 0.2739226818084717, "learning_rate": 6.128503112957413e-07, "loss": 0.3369, "step": 42131 }, { "epoch": 4.283448556323709, "grad_norm": 0.2828023433685303, "learning_rate": 6.126800818348966e-07, "loss": 0.3263, "step": 42132 }, { "epoch": 4.283550223668158, "grad_norm": 0.2861344814300537, "learning_rate": 6.125098744763275e-07, "loss": 0.2806, "step": 42133 }, { "epoch": 4.283651891012607, "grad_norm": 0.274391770362854, "learning_rate": 6.123396892208904e-07, "loss": 0.3024, "step": 42134 }, { "epoch": 4.283753558357056, "grad_norm": 0.2822759747505188, "learning_rate": 6.121695260694438e-07, "loss": 0.281, "step": 42135 }, { "epoch": 4.283855225701505, "grad_norm": 0.2728103995323181, "learning_rate": 6.119993850228445e-07, "loss": 0.2966, "step": 42136 }, { "epoch": 4.2839568930459535, "grad_norm": 0.2743377983570099, "learning_rate": 6.118292660819486e-07, "loss": 0.3018, "step": 42137 }, { "epoch": 4.2840585603904024, "grad_norm": 0.28828051686286926, "learning_rate": 6.116591692476154e-07, "loss": 0.3014, "step": 42138 }, { "epoch": 4.284160227734851, "grad_norm": 0.27977678179740906, "learning_rate": 6.114890945206997e-07, "loss": 0.3021, "step": 42139 }, { "epoch": 4.2842618950793, "grad_norm": 0.27774545550346375, "learning_rate": 6.113190419020587e-07, "loss": 0.2781, "step": 42140 }, { "epoch": 4.284363562423749, "grad_norm": 0.2952748239040375, "learning_rate": 6.1114901139255e-07, "loss": 0.2741, "step": 42141 }, { "epoch": 4.284465229768198, "grad_norm": 0.2709207832813263, "learning_rate": 6.109790029930301e-07, "loss": 0.3096, "step": 42142 }, { "epoch": 4.284566897112647, "grad_norm": 0.268233060836792, "learning_rate": 6.108090167043551e-07, "loss": 0.2598, "step": 42143 }, { "epoch": 4.284668564457096, "grad_norm": 0.28253278136253357, "learning_rate": 6.106390525273798e-07, "loss": 0.2959, "step": 42144 }, { "epoch": 4.284770231801545, "grad_norm": 0.28739771246910095, "learning_rate": 6.104691104629629e-07, "loss": 0.2954, "step": 42145 }, { "epoch": 4.284871899145994, "grad_norm": 0.2830352485179901, "learning_rate": 6.102991905119599e-07, "loss": 0.2735, "step": 42146 }, { "epoch": 4.284973566490443, "grad_norm": 0.2778080105781555, "learning_rate": 6.101292926752251e-07, "loss": 0.2955, "step": 42147 }, { "epoch": 4.285075233834892, "grad_norm": 0.2865327000617981, "learning_rate": 6.099594169536166e-07, "loss": 0.2789, "step": 42148 }, { "epoch": 4.285176901179341, "grad_norm": 0.2473330795764923, "learning_rate": 6.097895633479894e-07, "loss": 0.2678, "step": 42149 }, { "epoch": 4.28527856852379, "grad_norm": 0.29453349113464355, "learning_rate": 6.096197318591996e-07, "loss": 0.2974, "step": 42150 }, { "epoch": 4.285380235868239, "grad_norm": 0.28322944045066833, "learning_rate": 6.094499224881006e-07, "loss": 0.2836, "step": 42151 }, { "epoch": 4.285481903212688, "grad_norm": 0.270849347114563, "learning_rate": 6.092801352355509e-07, "loss": 0.2789, "step": 42152 }, { "epoch": 4.2855835705571375, "grad_norm": 0.30048295855522156, "learning_rate": 6.091103701024042e-07, "loss": 0.3057, "step": 42153 }, { "epoch": 4.285685237901586, "grad_norm": 0.30152198672294617, "learning_rate": 6.089406270895154e-07, "loss": 0.3071, "step": 42154 }, { "epoch": 4.285786905246035, "grad_norm": 0.2798599898815155, "learning_rate": 6.087709061977409e-07, "loss": 0.2789, "step": 42155 }, { "epoch": 4.285888572590484, "grad_norm": 0.28360384702682495, "learning_rate": 6.086012074279351e-07, "loss": 0.2595, "step": 42156 }, { "epoch": 4.285990239934933, "grad_norm": 0.2580054998397827, "learning_rate": 6.084315307809524e-07, "loss": 0.2719, "step": 42157 }, { "epoch": 4.286091907279382, "grad_norm": 0.28419700264930725, "learning_rate": 6.082618762576486e-07, "loss": 0.2807, "step": 42158 }, { "epoch": 4.286193574623831, "grad_norm": 0.287838876247406, "learning_rate": 6.080922438588776e-07, "loss": 0.2954, "step": 42159 }, { "epoch": 4.28629524196828, "grad_norm": 0.2773784101009369, "learning_rate": 6.079226335854949e-07, "loss": 0.3063, "step": 42160 }, { "epoch": 4.286396909312729, "grad_norm": 0.2857811450958252, "learning_rate": 6.077530454383523e-07, "loss": 0.2759, "step": 42161 }, { "epoch": 4.286498576657178, "grad_norm": 0.28545859456062317, "learning_rate": 6.075834794183078e-07, "loss": 0.3125, "step": 42162 }, { "epoch": 4.286600244001627, "grad_norm": 0.2909509837627411, "learning_rate": 6.074139355262138e-07, "loss": 0.2704, "step": 42163 }, { "epoch": 4.286701911346076, "grad_norm": 0.25574690103530884, "learning_rate": 6.072444137629236e-07, "loss": 0.2948, "step": 42164 }, { "epoch": 4.286803578690525, "grad_norm": 0.27978888154029846, "learning_rate": 6.07074914129293e-07, "loss": 0.2835, "step": 42165 }, { "epoch": 4.286905246034974, "grad_norm": 0.2585202753543854, "learning_rate": 6.06905436626175e-07, "loss": 0.3076, "step": 42166 }, { "epoch": 4.287006913379423, "grad_norm": 0.2584591805934906, "learning_rate": 6.067359812544227e-07, "loss": 0.2972, "step": 42167 }, { "epoch": 4.287108580723872, "grad_norm": 0.27237147092819214, "learning_rate": 6.065665480148913e-07, "loss": 0.2984, "step": 42168 }, { "epoch": 4.2872102480683205, "grad_norm": 0.2939086854457855, "learning_rate": 6.063971369084337e-07, "loss": 0.2821, "step": 42169 }, { "epoch": 4.2873119154127695, "grad_norm": 0.26587024331092834, "learning_rate": 6.062277479359035e-07, "loss": 0.3058, "step": 42170 }, { "epoch": 4.287413582757218, "grad_norm": 0.29398539662361145, "learning_rate": 6.060583810981524e-07, "loss": 0.2835, "step": 42171 }, { "epoch": 4.287515250101667, "grad_norm": 0.2833544611930847, "learning_rate": 6.058890363960362e-07, "loss": 0.3072, "step": 42172 }, { "epoch": 4.287616917446116, "grad_norm": 0.2956233620643616, "learning_rate": 6.057197138304067e-07, "loss": 0.2868, "step": 42173 }, { "epoch": 4.287718584790565, "grad_norm": 0.28032055497169495, "learning_rate": 6.055504134021162e-07, "loss": 0.3071, "step": 42174 }, { "epoch": 4.287820252135014, "grad_norm": 0.2802336812019348, "learning_rate": 6.053811351120203e-07, "loss": 0.2934, "step": 42175 }, { "epoch": 4.287921919479463, "grad_norm": 0.27739548683166504, "learning_rate": 6.052118789609679e-07, "loss": 0.3001, "step": 42176 }, { "epoch": 4.288023586823912, "grad_norm": 0.2767427861690521, "learning_rate": 6.050426449498137e-07, "loss": 0.3229, "step": 42177 }, { "epoch": 4.288125254168361, "grad_norm": 0.2768033444881439, "learning_rate": 6.048734330794115e-07, "loss": 0.2997, "step": 42178 }, { "epoch": 4.28822692151281, "grad_norm": 0.26847121119499207, "learning_rate": 6.047042433506123e-07, "loss": 0.2895, "step": 42179 }, { "epoch": 4.288328588857259, "grad_norm": 0.28202763199806213, "learning_rate": 6.045350757642687e-07, "loss": 0.264, "step": 42180 }, { "epoch": 4.288430256201708, "grad_norm": 0.2934150993824005, "learning_rate": 6.043659303212318e-07, "loss": 0.2977, "step": 42181 }, { "epoch": 4.288531923546157, "grad_norm": 0.32200467586517334, "learning_rate": 6.041968070223559e-07, "loss": 0.3123, "step": 42182 }, { "epoch": 4.288633590890606, "grad_norm": 0.266867995262146, "learning_rate": 6.040277058684918e-07, "loss": 0.2759, "step": 42183 }, { "epoch": 4.288735258235055, "grad_norm": 0.29217973351478577, "learning_rate": 6.038586268604901e-07, "loss": 0.31, "step": 42184 }, { "epoch": 4.288836925579504, "grad_norm": 0.27532026171684265, "learning_rate": 6.036895699992068e-07, "loss": 0.3065, "step": 42185 }, { "epoch": 4.2889385929239525, "grad_norm": 0.27407413721084595, "learning_rate": 6.035205352854878e-07, "loss": 0.2884, "step": 42186 }, { "epoch": 4.2890402602684015, "grad_norm": 0.2784854471683502, "learning_rate": 6.03351522720188e-07, "loss": 0.2962, "step": 42187 }, { "epoch": 4.28914192761285, "grad_norm": 0.2680499255657196, "learning_rate": 6.031825323041595e-07, "loss": 0.2813, "step": 42188 }, { "epoch": 4.289243594957299, "grad_norm": 0.26468154788017273, "learning_rate": 6.030135640382528e-07, "loss": 0.2786, "step": 42189 }, { "epoch": 4.289345262301748, "grad_norm": 0.266483336687088, "learning_rate": 6.028446179233183e-07, "loss": 0.2706, "step": 42190 }, { "epoch": 4.289446929646197, "grad_norm": 0.2735045850276947, "learning_rate": 6.026756939602069e-07, "loss": 0.2894, "step": 42191 }, { "epoch": 4.289548596990647, "grad_norm": 0.25661540031433105, "learning_rate": 6.025067921497719e-07, "loss": 0.3167, "step": 42192 }, { "epoch": 4.289650264335096, "grad_norm": 0.27595093846321106, "learning_rate": 6.023379124928619e-07, "loss": 0.2574, "step": 42193 }, { "epoch": 4.289751931679545, "grad_norm": 0.282236248254776, "learning_rate": 6.021690549903281e-07, "loss": 0.2894, "step": 42194 }, { "epoch": 4.289853599023994, "grad_norm": 0.26744335889816284, "learning_rate": 6.02000219643023e-07, "loss": 0.3048, "step": 42195 }, { "epoch": 4.289955266368443, "grad_norm": 0.30626237392425537, "learning_rate": 6.018314064517938e-07, "loss": 0.3061, "step": 42196 }, { "epoch": 4.290056933712892, "grad_norm": 0.30802056193351746, "learning_rate": 6.016626154174926e-07, "loss": 0.3094, "step": 42197 }, { "epoch": 4.290158601057341, "grad_norm": 0.26486459374427795, "learning_rate": 6.014938465409725e-07, "loss": 0.3092, "step": 42198 }, { "epoch": 4.29026026840179, "grad_norm": 0.28378552198410034, "learning_rate": 6.01325099823079e-07, "loss": 0.3095, "step": 42199 }, { "epoch": 4.290361935746239, "grad_norm": 0.2972622215747833, "learning_rate": 6.011563752646654e-07, "loss": 0.293, "step": 42200 }, { "epoch": 4.2904636030906875, "grad_norm": 0.274422824382782, "learning_rate": 6.009876728665792e-07, "loss": 0.2925, "step": 42201 }, { "epoch": 4.2905652704351365, "grad_norm": 0.27587175369262695, "learning_rate": 6.008189926296732e-07, "loss": 0.2972, "step": 42202 }, { "epoch": 4.290666937779585, "grad_norm": 0.3007098436355591, "learning_rate": 6.006503345547954e-07, "loss": 0.2976, "step": 42203 }, { "epoch": 4.290768605124034, "grad_norm": 0.27438727021217346, "learning_rate": 6.004816986427947e-07, "loss": 0.3008, "step": 42204 }, { "epoch": 4.290870272468483, "grad_norm": 0.30687201023101807, "learning_rate": 6.003130848945243e-07, "loss": 0.319, "step": 42205 }, { "epoch": 4.290971939812932, "grad_norm": 0.2672482430934906, "learning_rate": 6.001444933108286e-07, "loss": 0.2564, "step": 42206 }, { "epoch": 4.291073607157381, "grad_norm": 0.29129502177238464, "learning_rate": 5.999759238925596e-07, "loss": 0.3074, "step": 42207 }, { "epoch": 4.29117527450183, "grad_norm": 0.2996887266635895, "learning_rate": 5.998073766405676e-07, "loss": 0.2963, "step": 42208 }, { "epoch": 4.291276941846279, "grad_norm": 0.2633228003978729, "learning_rate": 5.996388515556994e-07, "loss": 0.3181, "step": 42209 }, { "epoch": 4.291378609190728, "grad_norm": 0.2840065658092499, "learning_rate": 5.994703486388054e-07, "loss": 0.2972, "step": 42210 }, { "epoch": 4.291480276535177, "grad_norm": 0.26634618639945984, "learning_rate": 5.993018678907331e-07, "loss": 0.2563, "step": 42211 }, { "epoch": 4.291581943879626, "grad_norm": 0.3001117408275604, "learning_rate": 5.991334093123335e-07, "loss": 0.3155, "step": 42212 }, { "epoch": 4.291683611224075, "grad_norm": 0.288457453250885, "learning_rate": 5.989649729044533e-07, "loss": 0.339, "step": 42213 }, { "epoch": 4.291785278568524, "grad_norm": 0.3045506179332733, "learning_rate": 5.987965586679412e-07, "loss": 0.3081, "step": 42214 }, { "epoch": 4.291886945912973, "grad_norm": 0.27021318674087524, "learning_rate": 5.986281666036486e-07, "loss": 0.2816, "step": 42215 }, { "epoch": 4.291988613257422, "grad_norm": 0.29439258575439453, "learning_rate": 5.984597967124189e-07, "loss": 0.2657, "step": 42216 }, { "epoch": 4.292090280601871, "grad_norm": 0.26753589510917664, "learning_rate": 5.982914489951025e-07, "loss": 0.2956, "step": 42217 }, { "epoch": 4.2921919479463195, "grad_norm": 0.26513591408729553, "learning_rate": 5.981231234525503e-07, "loss": 0.3113, "step": 42218 }, { "epoch": 4.2922936152907685, "grad_norm": 0.28909990191459656, "learning_rate": 5.979548200856055e-07, "loss": 0.299, "step": 42219 }, { "epoch": 4.292395282635217, "grad_norm": 0.2938978970050812, "learning_rate": 5.977865388951193e-07, "loss": 0.2829, "step": 42220 }, { "epoch": 4.292496949979666, "grad_norm": 0.2825821042060852, "learning_rate": 5.97618279881938e-07, "loss": 0.3281, "step": 42221 }, { "epoch": 4.292598617324115, "grad_norm": 0.2880255877971649, "learning_rate": 5.974500430469094e-07, "loss": 0.2973, "step": 42222 }, { "epoch": 4.292700284668564, "grad_norm": 0.28611454367637634, "learning_rate": 5.972818283908816e-07, "loss": 0.2955, "step": 42223 }, { "epoch": 4.292801952013013, "grad_norm": 0.27159419655799866, "learning_rate": 5.971136359147006e-07, "loss": 0.3085, "step": 42224 }, { "epoch": 4.292903619357462, "grad_norm": 0.3059121370315552, "learning_rate": 5.969454656192175e-07, "loss": 0.2949, "step": 42225 }, { "epoch": 4.293005286701911, "grad_norm": 0.26708975434303284, "learning_rate": 5.967773175052744e-07, "loss": 0.3128, "step": 42226 }, { "epoch": 4.29310695404636, "grad_norm": 0.29480603337287903, "learning_rate": 5.96609191573721e-07, "loss": 0.2832, "step": 42227 }, { "epoch": 4.293208621390809, "grad_norm": 0.2790861129760742, "learning_rate": 5.964410878254062e-07, "loss": 0.3088, "step": 42228 }, { "epoch": 4.293310288735258, "grad_norm": 0.2678130865097046, "learning_rate": 5.962730062611722e-07, "loss": 0.2759, "step": 42229 }, { "epoch": 4.293411956079707, "grad_norm": 0.28498274087905884, "learning_rate": 5.961049468818697e-07, "loss": 0.2465, "step": 42230 }, { "epoch": 4.293513623424156, "grad_norm": 0.2720852792263031, "learning_rate": 5.959369096883438e-07, "loss": 0.2867, "step": 42231 }, { "epoch": 4.293615290768605, "grad_norm": 0.2971152663230896, "learning_rate": 5.957688946814406e-07, "loss": 0.2993, "step": 42232 }, { "epoch": 4.293716958113054, "grad_norm": 0.26611998677253723, "learning_rate": 5.956009018620074e-07, "loss": 0.3096, "step": 42233 }, { "epoch": 4.293818625457503, "grad_norm": 0.2712695896625519, "learning_rate": 5.95432931230891e-07, "loss": 0.2649, "step": 42234 }, { "epoch": 4.293920292801952, "grad_norm": 0.2826097905635834, "learning_rate": 5.952649827889362e-07, "loss": 0.2869, "step": 42235 }, { "epoch": 4.294021960146401, "grad_norm": 0.2895245850086212, "learning_rate": 5.95097056536989e-07, "loss": 0.2987, "step": 42236 }, { "epoch": 4.29412362749085, "grad_norm": 0.26809462904930115, "learning_rate": 5.94929152475896e-07, "loss": 0.2615, "step": 42237 }, { "epoch": 4.294225294835299, "grad_norm": 0.2688038647174835, "learning_rate": 5.947612706065048e-07, "loss": 0.3, "step": 42238 }, { "epoch": 4.294326962179748, "grad_norm": 0.27653515338897705, "learning_rate": 5.945934109296581e-07, "loss": 0.2747, "step": 42239 }, { "epoch": 4.294428629524197, "grad_norm": 0.2874694764614105, "learning_rate": 5.944255734462034e-07, "loss": 0.312, "step": 42240 }, { "epoch": 4.294530296868646, "grad_norm": 0.2693837285041809, "learning_rate": 5.942577581569859e-07, "loss": 0.3106, "step": 42241 }, { "epoch": 4.294631964213095, "grad_norm": 0.28243178129196167, "learning_rate": 5.940899650628495e-07, "loss": 0.298, "step": 42242 }, { "epoch": 4.294733631557544, "grad_norm": 0.30548998713493347, "learning_rate": 5.939221941646422e-07, "loss": 0.2897, "step": 42243 }, { "epoch": 4.294835298901993, "grad_norm": 0.24043212831020355, "learning_rate": 5.937544454632083e-07, "loss": 0.3227, "step": 42244 }, { "epoch": 4.294936966246442, "grad_norm": 0.29754382371902466, "learning_rate": 5.935867189593919e-07, "loss": 0.3106, "step": 42245 }, { "epoch": 4.295038633590891, "grad_norm": 0.264636367559433, "learning_rate": 5.934190146540375e-07, "loss": 0.3054, "step": 42246 }, { "epoch": 4.29514030093534, "grad_norm": 0.273329496383667, "learning_rate": 5.932513325479911e-07, "loss": 0.3151, "step": 42247 }, { "epoch": 4.295241968279789, "grad_norm": 0.24564428627490997, "learning_rate": 5.930836726420991e-07, "loss": 0.2728, "step": 42248 }, { "epoch": 4.295343635624238, "grad_norm": 0.26778316497802734, "learning_rate": 5.92916034937202e-07, "loss": 0.2892, "step": 42249 }, { "epoch": 4.2954453029686865, "grad_norm": 0.2604992687702179, "learning_rate": 5.927484194341482e-07, "loss": 0.2916, "step": 42250 }, { "epoch": 4.2955469703131355, "grad_norm": 0.2938005030155182, "learning_rate": 5.925808261337807e-07, "loss": 0.2811, "step": 42251 }, { "epoch": 4.295648637657584, "grad_norm": 0.27317705750465393, "learning_rate": 5.924132550369421e-07, "loss": 0.3048, "step": 42252 }, { "epoch": 4.295750305002033, "grad_norm": 0.2779264450073242, "learning_rate": 5.922457061444792e-07, "loss": 0.3137, "step": 42253 }, { "epoch": 4.295851972346482, "grad_norm": 0.27433544397354126, "learning_rate": 5.92078179457235e-07, "loss": 0.2954, "step": 42254 }, { "epoch": 4.295953639690931, "grad_norm": 0.29501447081565857, "learning_rate": 5.919106749760534e-07, "loss": 0.3111, "step": 42255 }, { "epoch": 4.29605530703538, "grad_norm": 0.2884579300880432, "learning_rate": 5.917431927017775e-07, "loss": 0.2752, "step": 42256 }, { "epoch": 4.296156974379829, "grad_norm": 0.29330819845199585, "learning_rate": 5.915757326352528e-07, "loss": 0.2817, "step": 42257 }, { "epoch": 4.296258641724278, "grad_norm": 0.2768648564815521, "learning_rate": 5.91408294777322e-07, "loss": 0.2637, "step": 42258 }, { "epoch": 4.296360309068727, "grad_norm": 0.29169613122940063, "learning_rate": 5.912408791288277e-07, "loss": 0.2659, "step": 42259 }, { "epoch": 4.296461976413176, "grad_norm": 0.27921685576438904, "learning_rate": 5.910734856906148e-07, "loss": 0.2832, "step": 42260 }, { "epoch": 4.296563643757625, "grad_norm": 0.267665833234787, "learning_rate": 5.909061144635258e-07, "loss": 0.2913, "step": 42261 }, { "epoch": 4.296665311102074, "grad_norm": 0.26015257835388184, "learning_rate": 5.907387654484037e-07, "loss": 0.3172, "step": 42262 }, { "epoch": 4.296766978446523, "grad_norm": 0.27191054821014404, "learning_rate": 5.905714386460925e-07, "loss": 0.2934, "step": 42263 }, { "epoch": 4.296868645790972, "grad_norm": 0.2836533188819885, "learning_rate": 5.904041340574346e-07, "loss": 0.3032, "step": 42264 }, { "epoch": 4.296970313135421, "grad_norm": 0.2857930362224579, "learning_rate": 5.902368516832729e-07, "loss": 0.2963, "step": 42265 }, { "epoch": 4.29707198047987, "grad_norm": 0.2677464783191681, "learning_rate": 5.900695915244487e-07, "loss": 0.316, "step": 42266 }, { "epoch": 4.2971736478243185, "grad_norm": 0.27272090315818787, "learning_rate": 5.899023535818072e-07, "loss": 0.3113, "step": 42267 }, { "epoch": 4.2972753151687675, "grad_norm": 0.2744676172733307, "learning_rate": 5.897351378561894e-07, "loss": 0.2683, "step": 42268 }, { "epoch": 4.297376982513216, "grad_norm": 0.28693243861198425, "learning_rate": 5.895679443484376e-07, "loss": 0.2923, "step": 42269 }, { "epoch": 4.297478649857665, "grad_norm": 0.25558581948280334, "learning_rate": 5.894007730593948e-07, "loss": 0.2974, "step": 42270 }, { "epoch": 4.297580317202114, "grad_norm": 0.2601166367530823, "learning_rate": 5.892336239899027e-07, "loss": 0.2977, "step": 42271 }, { "epoch": 4.297681984546563, "grad_norm": 0.2785646319389343, "learning_rate": 5.890664971408027e-07, "loss": 0.34, "step": 42272 }, { "epoch": 4.297783651891012, "grad_norm": 0.2977999150753021, "learning_rate": 5.888993925129388e-07, "loss": 0.3023, "step": 42273 }, { "epoch": 4.297885319235462, "grad_norm": 0.2740813195705414, "learning_rate": 5.887323101071507e-07, "loss": 0.2657, "step": 42274 }, { "epoch": 4.297986986579911, "grad_norm": 0.28032562136650085, "learning_rate": 5.885652499242816e-07, "loss": 0.3132, "step": 42275 }, { "epoch": 4.29808865392436, "grad_norm": 0.29101336002349854, "learning_rate": 5.883982119651715e-07, "loss": 0.2926, "step": 42276 }, { "epoch": 4.298190321268809, "grad_norm": 0.2952110767364502, "learning_rate": 5.882311962306636e-07, "loss": 0.3008, "step": 42277 }, { "epoch": 4.298291988613258, "grad_norm": 0.2838195860385895, "learning_rate": 5.880642027215983e-07, "loss": 0.2914, "step": 42278 }, { "epoch": 4.298393655957707, "grad_norm": 0.29320278763771057, "learning_rate": 5.878972314388165e-07, "loss": 0.2906, "step": 42279 }, { "epoch": 4.298495323302156, "grad_norm": 0.2556929588317871, "learning_rate": 5.87730282383161e-07, "loss": 0.2857, "step": 42280 }, { "epoch": 4.298596990646605, "grad_norm": 0.2708452343940735, "learning_rate": 5.875633555554717e-07, "loss": 0.3003, "step": 42281 }, { "epoch": 4.2986986579910536, "grad_norm": 0.26390618085861206, "learning_rate": 5.873964509565882e-07, "loss": 0.3073, "step": 42282 }, { "epoch": 4.2988003253355025, "grad_norm": 0.2987779378890991, "learning_rate": 5.872295685873547e-07, "loss": 0.2868, "step": 42283 }, { "epoch": 4.298901992679951, "grad_norm": 0.26461657881736755, "learning_rate": 5.870627084486092e-07, "loss": 0.3343, "step": 42284 }, { "epoch": 4.2990036600244, "grad_norm": 0.3164925277233124, "learning_rate": 5.868958705411937e-07, "loss": 0.3097, "step": 42285 }, { "epoch": 4.299105327368849, "grad_norm": 0.30680474638938904, "learning_rate": 5.867290548659465e-07, "loss": 0.2949, "step": 42286 }, { "epoch": 4.299206994713298, "grad_norm": 0.2870272099971771, "learning_rate": 5.86562261423711e-07, "loss": 0.3034, "step": 42287 }, { "epoch": 4.299308662057747, "grad_norm": 0.2842450737953186, "learning_rate": 5.863954902153257e-07, "loss": 0.294, "step": 42288 }, { "epoch": 4.299410329402196, "grad_norm": 0.30761292576789856, "learning_rate": 5.862287412416306e-07, "loss": 0.2778, "step": 42289 }, { "epoch": 4.299511996746645, "grad_norm": 0.2841273546218872, "learning_rate": 5.860620145034668e-07, "loss": 0.2691, "step": 42290 }, { "epoch": 4.299613664091094, "grad_norm": 0.2809441089630127, "learning_rate": 5.85895310001674e-07, "loss": 0.291, "step": 42291 }, { "epoch": 4.299715331435543, "grad_norm": 0.26337161660194397, "learning_rate": 5.857286277370899e-07, "loss": 0.313, "step": 42292 }, { "epoch": 4.299816998779992, "grad_norm": 0.2843615710735321, "learning_rate": 5.855619677105579e-07, "loss": 0.3201, "step": 42293 }, { "epoch": 4.299918666124441, "grad_norm": 0.2721180021762848, "learning_rate": 5.853953299229154e-07, "loss": 0.2872, "step": 42294 }, { "epoch": 4.30002033346889, "grad_norm": 0.29121649265289307, "learning_rate": 5.852287143750018e-07, "loss": 0.2831, "step": 42295 }, { "epoch": 4.300122000813339, "grad_norm": 0.29414981603622437, "learning_rate": 5.850621210676561e-07, "loss": 0.2775, "step": 42296 }, { "epoch": 4.300223668157788, "grad_norm": 0.3000209331512451, "learning_rate": 5.848955500017195e-07, "loss": 0.2864, "step": 42297 }, { "epoch": 4.300325335502237, "grad_norm": 0.2779853343963623, "learning_rate": 5.847290011780293e-07, "loss": 0.2822, "step": 42298 }, { "epoch": 4.3004270028466856, "grad_norm": 0.3116200566291809, "learning_rate": 5.845624745974248e-07, "loss": 0.3166, "step": 42299 }, { "epoch": 4.3005286701911345, "grad_norm": 0.28752246499061584, "learning_rate": 5.84395970260746e-07, "loss": 0.2706, "step": 42300 }, { "epoch": 4.300630337535583, "grad_norm": 0.31884244084358215, "learning_rate": 5.84229488168831e-07, "loss": 0.2721, "step": 42301 }, { "epoch": 4.300732004880032, "grad_norm": 0.32216203212738037, "learning_rate": 5.840630283225174e-07, "loss": 0.3349, "step": 42302 }, { "epoch": 4.300833672224481, "grad_norm": 0.29547640681266785, "learning_rate": 5.838965907226462e-07, "loss": 0.3236, "step": 42303 }, { "epoch": 4.30093533956893, "grad_norm": 0.3089989423751831, "learning_rate": 5.837301753700542e-07, "loss": 0.2804, "step": 42304 }, { "epoch": 4.301037006913379, "grad_norm": 0.3003568947315216, "learning_rate": 5.835637822655804e-07, "loss": 0.2742, "step": 42305 }, { "epoch": 4.301138674257828, "grad_norm": 0.28388217091560364, "learning_rate": 5.833974114100616e-07, "loss": 0.2772, "step": 42306 }, { "epoch": 4.301240341602277, "grad_norm": 0.2778517007827759, "learning_rate": 5.83231062804338e-07, "loss": 0.2865, "step": 42307 }, { "epoch": 4.301342008946726, "grad_norm": 0.25891974568367004, "learning_rate": 5.83064736449247e-07, "loss": 0.2964, "step": 42308 }, { "epoch": 4.301443676291175, "grad_norm": 0.29570549726486206, "learning_rate": 5.828984323456249e-07, "loss": 0.2866, "step": 42309 }, { "epoch": 4.301545343635624, "grad_norm": 0.2835918068885803, "learning_rate": 5.827321504943123e-07, "loss": 0.2806, "step": 42310 }, { "epoch": 4.301647010980073, "grad_norm": 0.268064945936203, "learning_rate": 5.825658908961452e-07, "loss": 0.282, "step": 42311 }, { "epoch": 4.301748678324522, "grad_norm": 0.2656624913215637, "learning_rate": 5.8239965355196e-07, "loss": 0.3022, "step": 42312 }, { "epoch": 4.301850345668971, "grad_norm": 0.28332340717315674, "learning_rate": 5.822334384625972e-07, "loss": 0.3026, "step": 42313 }, { "epoch": 4.30195201301342, "grad_norm": 0.2789660096168518, "learning_rate": 5.820672456288923e-07, "loss": 0.3127, "step": 42314 }, { "epoch": 4.302053680357869, "grad_norm": 0.29662397503852844, "learning_rate": 5.819010750516823e-07, "loss": 0.2915, "step": 42315 }, { "epoch": 4.302155347702318, "grad_norm": 0.266752690076828, "learning_rate": 5.81734926731804e-07, "loss": 0.2933, "step": 42316 }, { "epoch": 4.302257015046767, "grad_norm": 0.29211562871932983, "learning_rate": 5.815688006700964e-07, "loss": 0.3147, "step": 42317 }, { "epoch": 4.302358682391216, "grad_norm": 0.25531354546546936, "learning_rate": 5.814026968673953e-07, "loss": 0.3152, "step": 42318 }, { "epoch": 4.302460349735665, "grad_norm": 0.30010178685188293, "learning_rate": 5.812366153245363e-07, "loss": 0.2945, "step": 42319 }, { "epoch": 4.302562017080114, "grad_norm": 0.2980806231498718, "learning_rate": 5.81070556042358e-07, "loss": 0.2931, "step": 42320 }, { "epoch": 4.302663684424563, "grad_norm": 0.26855525374412537, "learning_rate": 5.809045190216961e-07, "loss": 0.3007, "step": 42321 }, { "epoch": 4.302765351769012, "grad_norm": 0.2833124101161957, "learning_rate": 5.807385042633862e-07, "loss": 0.2716, "step": 42322 }, { "epoch": 4.302867019113461, "grad_norm": 0.2876952290534973, "learning_rate": 5.805725117682671e-07, "loss": 0.2706, "step": 42323 }, { "epoch": 4.30296868645791, "grad_norm": 0.27842268347740173, "learning_rate": 5.804065415371729e-07, "loss": 0.3342, "step": 42324 }, { "epoch": 4.303070353802359, "grad_norm": 0.2832452952861786, "learning_rate": 5.802405935709399e-07, "loss": 0.2629, "step": 42325 }, { "epoch": 4.303172021146808, "grad_norm": 0.2792676091194153, "learning_rate": 5.800746678704039e-07, "loss": 0.275, "step": 42326 }, { "epoch": 4.303273688491257, "grad_norm": 0.2868100106716156, "learning_rate": 5.799087644364027e-07, "loss": 0.2796, "step": 42327 }, { "epoch": 4.303375355835706, "grad_norm": 0.2930477261543274, "learning_rate": 5.797428832697704e-07, "loss": 0.325, "step": 42328 }, { "epoch": 4.303477023180155, "grad_norm": 0.29076433181762695, "learning_rate": 5.795770243713422e-07, "loss": 0.2689, "step": 42329 }, { "epoch": 4.303578690524604, "grad_norm": 0.2629048228263855, "learning_rate": 5.794111877419556e-07, "loss": 0.2905, "step": 42330 }, { "epoch": 4.303680357869053, "grad_norm": 0.27453553676605225, "learning_rate": 5.792453733824443e-07, "loss": 0.2669, "step": 42331 }, { "epoch": 4.3037820252135015, "grad_norm": 0.2746616303920746, "learning_rate": 5.790795812936439e-07, "loss": 0.2871, "step": 42332 }, { "epoch": 4.3038836925579504, "grad_norm": 0.2824348509311676, "learning_rate": 5.789138114763909e-07, "loss": 0.2877, "step": 42333 }, { "epoch": 4.303985359902399, "grad_norm": 0.28519997000694275, "learning_rate": 5.787480639315196e-07, "loss": 0.293, "step": 42334 }, { "epoch": 4.304087027246848, "grad_norm": 0.2769521474838257, "learning_rate": 5.785823386598643e-07, "loss": 0.2957, "step": 42335 }, { "epoch": 4.304188694591297, "grad_norm": 0.2638648748397827, "learning_rate": 5.7841663566226e-07, "loss": 0.3166, "step": 42336 }, { "epoch": 4.304290361935746, "grad_norm": 0.2990916073322296, "learning_rate": 5.782509549395443e-07, "loss": 0.3052, "step": 42337 }, { "epoch": 4.304392029280195, "grad_norm": 0.2741745412349701, "learning_rate": 5.780852964925471e-07, "loss": 0.2643, "step": 42338 }, { "epoch": 4.304493696624644, "grad_norm": 0.2683415710926056, "learning_rate": 5.779196603221054e-07, "loss": 0.3068, "step": 42339 }, { "epoch": 4.304595363969093, "grad_norm": 0.28268319368362427, "learning_rate": 5.777540464290543e-07, "loss": 0.3024, "step": 42340 }, { "epoch": 4.304697031313542, "grad_norm": 0.2865723967552185, "learning_rate": 5.77588454814228e-07, "loss": 0.2786, "step": 42341 }, { "epoch": 4.304798698657991, "grad_norm": 0.26899299025535583, "learning_rate": 5.774228854784597e-07, "loss": 0.3107, "step": 42342 }, { "epoch": 4.30490036600244, "grad_norm": 0.2794324457645416, "learning_rate": 5.772573384225833e-07, "loss": 0.286, "step": 42343 }, { "epoch": 4.305002033346889, "grad_norm": 0.2539641261100769, "learning_rate": 5.770918136474341e-07, "loss": 0.2851, "step": 42344 }, { "epoch": 4.305103700691338, "grad_norm": 0.28583937883377075, "learning_rate": 5.769263111538453e-07, "loss": 0.3227, "step": 42345 }, { "epoch": 4.305205368035787, "grad_norm": 0.28383395075798035, "learning_rate": 5.767608309426498e-07, "loss": 0.3093, "step": 42346 }, { "epoch": 4.305307035380236, "grad_norm": 0.30090081691741943, "learning_rate": 5.765953730146839e-07, "loss": 0.2835, "step": 42347 }, { "epoch": 4.305408702724685, "grad_norm": 0.27955162525177, "learning_rate": 5.764299373707777e-07, "loss": 0.3275, "step": 42348 }, { "epoch": 4.3055103700691335, "grad_norm": 0.2758994698524475, "learning_rate": 5.762645240117659e-07, "loss": 0.2742, "step": 42349 }, { "epoch": 4.3056120374135824, "grad_norm": 0.255176842212677, "learning_rate": 5.760991329384847e-07, "loss": 0.2871, "step": 42350 }, { "epoch": 4.305713704758031, "grad_norm": 0.295237272977829, "learning_rate": 5.759337641517626e-07, "loss": 0.2987, "step": 42351 }, { "epoch": 4.30581537210248, "grad_norm": 0.2694588005542755, "learning_rate": 5.757684176524353e-07, "loss": 0.2984, "step": 42352 }, { "epoch": 4.305917039446929, "grad_norm": 0.28354188799858093, "learning_rate": 5.756030934413348e-07, "loss": 0.3044, "step": 42353 }, { "epoch": 4.306018706791378, "grad_norm": 0.2895101308822632, "learning_rate": 5.754377915192955e-07, "loss": 0.2962, "step": 42354 }, { "epoch": 4.306120374135828, "grad_norm": 0.27442800998687744, "learning_rate": 5.752725118871494e-07, "loss": 0.3037, "step": 42355 }, { "epoch": 4.306222041480277, "grad_norm": 0.28932997584342957, "learning_rate": 5.751072545457276e-07, "loss": 0.3618, "step": 42356 }, { "epoch": 4.306323708824726, "grad_norm": 0.2773036062717438, "learning_rate": 5.749420194958661e-07, "loss": 0.2983, "step": 42357 }, { "epoch": 4.306425376169175, "grad_norm": 0.2890850305557251, "learning_rate": 5.74776806738393e-07, "loss": 0.2909, "step": 42358 }, { "epoch": 4.306527043513624, "grad_norm": 0.2661284804344177, "learning_rate": 5.746116162741427e-07, "loss": 0.2848, "step": 42359 }, { "epoch": 4.306628710858073, "grad_norm": 0.2894309163093567, "learning_rate": 5.744464481039497e-07, "loss": 0.2965, "step": 42360 }, { "epoch": 4.306730378202522, "grad_norm": 0.2833697199821472, "learning_rate": 5.74281302228642e-07, "loss": 0.3098, "step": 42361 }, { "epoch": 4.306832045546971, "grad_norm": 0.26870638132095337, "learning_rate": 5.74116178649054e-07, "loss": 0.3113, "step": 42362 }, { "epoch": 4.30693371289142, "grad_norm": 0.27171120047569275, "learning_rate": 5.739510773660162e-07, "loss": 0.2917, "step": 42363 }, { "epoch": 4.3070353802358685, "grad_norm": 0.2872210741043091, "learning_rate": 5.737859983803618e-07, "loss": 0.2856, "step": 42364 }, { "epoch": 4.3071370475803175, "grad_norm": 0.2695080637931824, "learning_rate": 5.736209416929217e-07, "loss": 0.3033, "step": 42365 }, { "epoch": 4.307238714924766, "grad_norm": 0.27871009707450867, "learning_rate": 5.734559073045265e-07, "loss": 0.2935, "step": 42366 }, { "epoch": 4.307340382269215, "grad_norm": 0.2962493598461151, "learning_rate": 5.732908952160104e-07, "loss": 0.3002, "step": 42367 }, { "epoch": 4.307442049613664, "grad_norm": 0.301532119512558, "learning_rate": 5.731259054282012e-07, "loss": 0.2952, "step": 42368 }, { "epoch": 4.307543716958113, "grad_norm": 0.27933409810066223, "learning_rate": 5.729609379419315e-07, "loss": 0.3045, "step": 42369 }, { "epoch": 4.307645384302562, "grad_norm": 0.30595794320106506, "learning_rate": 5.727959927580346e-07, "loss": 0.3251, "step": 42370 }, { "epoch": 4.307747051647011, "grad_norm": 0.2738299071788788, "learning_rate": 5.726310698773374e-07, "loss": 0.3003, "step": 42371 }, { "epoch": 4.30784871899146, "grad_norm": 0.29673364758491516, "learning_rate": 5.724661693006738e-07, "loss": 0.2946, "step": 42372 }, { "epoch": 4.307950386335909, "grad_norm": 0.2796414792537689, "learning_rate": 5.723012910288734e-07, "loss": 0.3079, "step": 42373 }, { "epoch": 4.308052053680358, "grad_norm": 0.261417955160141, "learning_rate": 5.721364350627661e-07, "loss": 0.2924, "step": 42374 }, { "epoch": 4.308153721024807, "grad_norm": 0.26733845472335815, "learning_rate": 5.71971601403184e-07, "loss": 0.3184, "step": 42375 }, { "epoch": 4.308255388369256, "grad_norm": 0.30737388134002686, "learning_rate": 5.718067900509561e-07, "loss": 0.2777, "step": 42376 }, { "epoch": 4.308357055713705, "grad_norm": 0.30232733488082886, "learning_rate": 5.716420010069146e-07, "loss": 0.2703, "step": 42377 }, { "epoch": 4.308458723058154, "grad_norm": 0.3216552734375, "learning_rate": 5.714772342718866e-07, "loss": 0.3134, "step": 42378 }, { "epoch": 4.308560390402603, "grad_norm": 0.27369144558906555, "learning_rate": 5.713124898467043e-07, "loss": 0.2884, "step": 42379 }, { "epoch": 4.308662057747052, "grad_norm": 0.25852251052856445, "learning_rate": 5.711477677321992e-07, "loss": 0.291, "step": 42380 }, { "epoch": 4.3087637250915005, "grad_norm": 0.27944156527519226, "learning_rate": 5.709830679291966e-07, "loss": 0.3145, "step": 42381 }, { "epoch": 4.3088653924359495, "grad_norm": 0.29469361901283264, "learning_rate": 5.708183904385306e-07, "loss": 0.3117, "step": 42382 }, { "epoch": 4.308967059780398, "grad_norm": 0.26331689953804016, "learning_rate": 5.706537352610286e-07, "loss": 0.3121, "step": 42383 }, { "epoch": 4.309068727124847, "grad_norm": 0.3016897439956665, "learning_rate": 5.704891023975195e-07, "loss": 0.2893, "step": 42384 }, { "epoch": 4.309170394469296, "grad_norm": 0.281191349029541, "learning_rate": 5.703244918488349e-07, "loss": 0.2989, "step": 42385 }, { "epoch": 4.309272061813745, "grad_norm": 0.28445759415626526, "learning_rate": 5.701599036158024e-07, "loss": 0.2795, "step": 42386 }, { "epoch": 4.309373729158194, "grad_norm": 0.29314303398132324, "learning_rate": 5.699953376992517e-07, "loss": 0.2836, "step": 42387 }, { "epoch": 4.309475396502643, "grad_norm": 0.2696104645729065, "learning_rate": 5.698307941000114e-07, "loss": 0.2663, "step": 42388 }, { "epoch": 4.309577063847092, "grad_norm": 0.2685089111328125, "learning_rate": 5.6966627281891e-07, "loss": 0.2731, "step": 42389 }, { "epoch": 4.309678731191541, "grad_norm": 0.2895529270172119, "learning_rate": 5.695017738567798e-07, "loss": 0.333, "step": 42390 }, { "epoch": 4.30978039853599, "grad_norm": 0.2667788863182068, "learning_rate": 5.693372972144446e-07, "loss": 0.2975, "step": 42391 }, { "epoch": 4.309882065880439, "grad_norm": 0.2835570275783539, "learning_rate": 5.691728428927357e-07, "loss": 0.283, "step": 42392 }, { "epoch": 4.309983733224888, "grad_norm": 0.29171890020370483, "learning_rate": 5.690084108924814e-07, "loss": 0.3036, "step": 42393 }, { "epoch": 4.310085400569337, "grad_norm": 0.2845209836959839, "learning_rate": 5.688440012145091e-07, "loss": 0.2999, "step": 42394 }, { "epoch": 4.310187067913786, "grad_norm": 0.2763756513595581, "learning_rate": 5.686796138596485e-07, "loss": 0.2803, "step": 42395 }, { "epoch": 4.310288735258235, "grad_norm": 0.2953503131866455, "learning_rate": 5.685152488287265e-07, "loss": 0.3016, "step": 42396 }, { "epoch": 4.310390402602684, "grad_norm": 0.27422022819519043, "learning_rate": 5.683509061225723e-07, "loss": 0.2672, "step": 42397 }, { "epoch": 4.3104920699471325, "grad_norm": 0.2910768389701843, "learning_rate": 5.681865857420116e-07, "loss": 0.3124, "step": 42398 }, { "epoch": 4.310593737291582, "grad_norm": 0.3079380691051483, "learning_rate": 5.680222876878744e-07, "loss": 0.2795, "step": 42399 }, { "epoch": 4.310695404636031, "grad_norm": 0.29004719853401184, "learning_rate": 5.678580119609889e-07, "loss": 0.285, "step": 42400 }, { "epoch": 4.31079707198048, "grad_norm": 0.26829832792282104, "learning_rate": 5.6769375856218e-07, "loss": 0.308, "step": 42401 }, { "epoch": 4.310898739324929, "grad_norm": 0.2731330394744873, "learning_rate": 5.675295274922776e-07, "loss": 0.2802, "step": 42402 }, { "epoch": 4.311000406669378, "grad_norm": 0.27828842401504517, "learning_rate": 5.673653187521078e-07, "loss": 0.2916, "step": 42403 }, { "epoch": 4.311102074013827, "grad_norm": 0.30213573575019836, "learning_rate": 5.672011323424975e-07, "loss": 0.278, "step": 42404 }, { "epoch": 4.311203741358276, "grad_norm": 0.26025617122650146, "learning_rate": 5.670369682642756e-07, "loss": 0.2601, "step": 42405 }, { "epoch": 4.311305408702725, "grad_norm": 0.2774936854839325, "learning_rate": 5.668728265182677e-07, "loss": 0.3228, "step": 42406 }, { "epoch": 4.311407076047174, "grad_norm": 0.2818257212638855, "learning_rate": 5.667087071053013e-07, "loss": 0.2783, "step": 42407 }, { "epoch": 4.311508743391623, "grad_norm": 0.2903227210044861, "learning_rate": 5.665446100262017e-07, "loss": 0.2871, "step": 42408 }, { "epoch": 4.311610410736072, "grad_norm": 0.26632893085479736, "learning_rate": 5.66380535281798e-07, "loss": 0.2899, "step": 42409 }, { "epoch": 4.311712078080521, "grad_norm": 0.27574917674064636, "learning_rate": 5.662164828729156e-07, "loss": 0.3011, "step": 42410 }, { "epoch": 4.31181374542497, "grad_norm": 0.27727264165878296, "learning_rate": 5.660524528003797e-07, "loss": 0.2786, "step": 42411 }, { "epoch": 4.311915412769419, "grad_norm": 0.28560537099838257, "learning_rate": 5.658884450650187e-07, "loss": 0.3294, "step": 42412 }, { "epoch": 4.3120170801138675, "grad_norm": 0.30004915595054626, "learning_rate": 5.657244596676581e-07, "loss": 0.2831, "step": 42413 }, { "epoch": 4.3121187474583165, "grad_norm": 0.2770797908306122, "learning_rate": 5.655604966091233e-07, "loss": 0.308, "step": 42414 }, { "epoch": 4.312220414802765, "grad_norm": 0.28003445267677307, "learning_rate": 5.653965558902419e-07, "loss": 0.3111, "step": 42415 }, { "epoch": 4.312322082147214, "grad_norm": 0.2911970317363739, "learning_rate": 5.652326375118383e-07, "loss": 0.2961, "step": 42416 }, { "epoch": 4.312423749491663, "grad_norm": 0.2573898434638977, "learning_rate": 5.650687414747396e-07, "loss": 0.2896, "step": 42417 }, { "epoch": 4.312525416836112, "grad_norm": 0.29727011919021606, "learning_rate": 5.649048677797692e-07, "loss": 0.2723, "step": 42418 }, { "epoch": 4.312627084180561, "grad_norm": 0.30011439323425293, "learning_rate": 5.64741016427755e-07, "loss": 0.2922, "step": 42419 }, { "epoch": 4.31272875152501, "grad_norm": 0.2899087369441986, "learning_rate": 5.645771874195217e-07, "loss": 0.3022, "step": 42420 }, { "epoch": 4.312830418869459, "grad_norm": 0.29929256439208984, "learning_rate": 5.644133807558938e-07, "loss": 0.2788, "step": 42421 }, { "epoch": 4.312932086213908, "grad_norm": 0.28120148181915283, "learning_rate": 5.642495964376977e-07, "loss": 0.3134, "step": 42422 }, { "epoch": 4.313033753558357, "grad_norm": 0.28772833943367004, "learning_rate": 5.640858344657585e-07, "loss": 0.2867, "step": 42423 }, { "epoch": 4.313135420902806, "grad_norm": 0.28871363401412964, "learning_rate": 5.639220948408997e-07, "loss": 0.3176, "step": 42424 }, { "epoch": 4.313237088247255, "grad_norm": 0.2944110035896301, "learning_rate": 5.637583775639477e-07, "loss": 0.2775, "step": 42425 }, { "epoch": 4.313338755591704, "grad_norm": 0.2693805694580078, "learning_rate": 5.635946826357275e-07, "loss": 0.3182, "step": 42426 }, { "epoch": 4.313440422936153, "grad_norm": 0.2655230164527893, "learning_rate": 5.634310100570628e-07, "loss": 0.3015, "step": 42427 }, { "epoch": 4.313542090280602, "grad_norm": 0.26016852259635925, "learning_rate": 5.632673598287769e-07, "loss": 0.2969, "step": 42428 }, { "epoch": 4.313643757625051, "grad_norm": 0.26680871844291687, "learning_rate": 5.631037319516975e-07, "loss": 0.2829, "step": 42429 }, { "epoch": 4.3137454249694995, "grad_norm": 0.25315365195274353, "learning_rate": 5.629401264266465e-07, "loss": 0.2914, "step": 42430 }, { "epoch": 4.3138470923139485, "grad_norm": 0.307628870010376, "learning_rate": 5.627765432544479e-07, "loss": 0.3021, "step": 42431 }, { "epoch": 4.313948759658397, "grad_norm": 0.2843858301639557, "learning_rate": 5.626129824359277e-07, "loss": 0.2861, "step": 42432 }, { "epoch": 4.314050427002846, "grad_norm": 0.2940233647823334, "learning_rate": 5.624494439719091e-07, "loss": 0.3252, "step": 42433 }, { "epoch": 4.314152094347295, "grad_norm": 0.30704766511917114, "learning_rate": 5.622859278632142e-07, "loss": 0.2956, "step": 42434 }, { "epoch": 4.314253761691744, "grad_norm": 0.3009239435195923, "learning_rate": 5.621224341106696e-07, "loss": 0.2901, "step": 42435 }, { "epoch": 4.314355429036193, "grad_norm": 0.2655782103538513, "learning_rate": 5.619589627150973e-07, "loss": 0.34, "step": 42436 }, { "epoch": 4.314457096380643, "grad_norm": 0.2957571744918823, "learning_rate": 5.617955136773212e-07, "loss": 0.3157, "step": 42437 }, { "epoch": 4.314558763725092, "grad_norm": 0.27448609471321106, "learning_rate": 5.616320869981634e-07, "loss": 0.2734, "step": 42438 }, { "epoch": 4.314660431069541, "grad_norm": 0.27565035223960876, "learning_rate": 5.614686826784499e-07, "loss": 0.3109, "step": 42439 }, { "epoch": 4.31476209841399, "grad_norm": 0.27813878655433655, "learning_rate": 5.613053007190022e-07, "loss": 0.3022, "step": 42440 }, { "epoch": 4.314863765758439, "grad_norm": 0.27492210268974304, "learning_rate": 5.611419411206426e-07, "loss": 0.2868, "step": 42441 }, { "epoch": 4.314965433102888, "grad_norm": 0.2996823787689209, "learning_rate": 5.609786038841958e-07, "loss": 0.2839, "step": 42442 }, { "epoch": 4.315067100447337, "grad_norm": 0.2818402349948883, "learning_rate": 5.60815289010484e-07, "loss": 0.2956, "step": 42443 }, { "epoch": 4.315168767791786, "grad_norm": 0.2798202931880951, "learning_rate": 5.606519965003294e-07, "loss": 0.292, "step": 42444 }, { "epoch": 4.3152704351362345, "grad_norm": 0.2869719862937927, "learning_rate": 5.604887263545555e-07, "loss": 0.2848, "step": 42445 }, { "epoch": 4.3153721024806835, "grad_norm": 0.29482337832450867, "learning_rate": 5.603254785739842e-07, "loss": 0.2912, "step": 42446 }, { "epoch": 4.315473769825132, "grad_norm": 0.30475127696990967, "learning_rate": 5.601622531594386e-07, "loss": 0.2714, "step": 42447 }, { "epoch": 4.315575437169581, "grad_norm": 0.2642449736595154, "learning_rate": 5.599990501117397e-07, "loss": 0.3058, "step": 42448 }, { "epoch": 4.31567710451403, "grad_norm": 0.27985987067222595, "learning_rate": 5.598358694317113e-07, "loss": 0.2908, "step": 42449 }, { "epoch": 4.315778771858479, "grad_norm": 0.2942661643028259, "learning_rate": 5.596727111201744e-07, "loss": 0.3337, "step": 42450 }, { "epoch": 4.315880439202928, "grad_norm": 0.3145114779472351, "learning_rate": 5.5950957517795e-07, "loss": 0.3, "step": 42451 }, { "epoch": 4.315982106547377, "grad_norm": 0.29332008957862854, "learning_rate": 5.59346461605863e-07, "loss": 0.2865, "step": 42452 }, { "epoch": 4.316083773891826, "grad_norm": 0.26365360617637634, "learning_rate": 5.591833704047323e-07, "loss": 0.2995, "step": 42453 }, { "epoch": 4.316185441236275, "grad_norm": 0.26198869943618774, "learning_rate": 5.590203015753804e-07, "loss": 0.2698, "step": 42454 }, { "epoch": 4.316287108580724, "grad_norm": 0.2933571934700012, "learning_rate": 5.588572551186289e-07, "loss": 0.2465, "step": 42455 }, { "epoch": 4.316388775925173, "grad_norm": 0.28149479627609253, "learning_rate": 5.586942310353e-07, "loss": 0.304, "step": 42456 }, { "epoch": 4.316490443269622, "grad_norm": 0.27325108647346497, "learning_rate": 5.585312293262135e-07, "loss": 0.3055, "step": 42457 }, { "epoch": 4.316592110614071, "grad_norm": 0.26379838585853577, "learning_rate": 5.583682499921905e-07, "loss": 0.2941, "step": 42458 }, { "epoch": 4.31669377795852, "grad_norm": 0.2796669602394104, "learning_rate": 5.582052930340543e-07, "loss": 0.2898, "step": 42459 }, { "epoch": 4.316795445302969, "grad_norm": 0.28495466709136963, "learning_rate": 5.580423584526234e-07, "loss": 0.3261, "step": 42460 }, { "epoch": 4.316897112647418, "grad_norm": 0.2769976258277893, "learning_rate": 5.578794462487191e-07, "loss": 0.3295, "step": 42461 }, { "epoch": 4.3169987799918665, "grad_norm": 0.27141037583351135, "learning_rate": 5.577165564231635e-07, "loss": 0.3312, "step": 42462 }, { "epoch": 4.3171004473363155, "grad_norm": 0.2654186487197876, "learning_rate": 5.575536889767757e-07, "loss": 0.3038, "step": 42463 }, { "epoch": 4.317202114680764, "grad_norm": 0.2823043167591095, "learning_rate": 5.573908439103765e-07, "loss": 0.2973, "step": 42464 }, { "epoch": 4.317303782025213, "grad_norm": 0.27911460399627686, "learning_rate": 5.572280212247871e-07, "loss": 0.3169, "step": 42465 }, { "epoch": 4.317405449369662, "grad_norm": 0.2654004991054535, "learning_rate": 5.570652209208272e-07, "loss": 0.3125, "step": 42466 }, { "epoch": 4.317507116714111, "grad_norm": 0.28201916813850403, "learning_rate": 5.56902442999317e-07, "loss": 0.3175, "step": 42467 }, { "epoch": 4.31760878405856, "grad_norm": 0.2841544449329376, "learning_rate": 5.567396874610754e-07, "loss": 0.2916, "step": 42468 }, { "epoch": 4.317710451403009, "grad_norm": 0.29487690329551697, "learning_rate": 5.565769543069238e-07, "loss": 0.2915, "step": 42469 }, { "epoch": 4.317812118747458, "grad_norm": 0.29914841055870056, "learning_rate": 5.564142435376824e-07, "loss": 0.3094, "step": 42470 }, { "epoch": 4.317913786091907, "grad_norm": 0.28658056259155273, "learning_rate": 5.562515551541686e-07, "loss": 0.3042, "step": 42471 }, { "epoch": 4.318015453436356, "grad_norm": 0.30868563055992126, "learning_rate": 5.560888891572048e-07, "loss": 0.2903, "step": 42472 }, { "epoch": 4.318117120780805, "grad_norm": 0.28802552819252014, "learning_rate": 5.559262455476089e-07, "loss": 0.3221, "step": 42473 }, { "epoch": 4.318218788125254, "grad_norm": 0.2820641100406647, "learning_rate": 5.557636243261999e-07, "loss": 0.2728, "step": 42474 }, { "epoch": 4.318320455469703, "grad_norm": 0.27305617928504944, "learning_rate": 5.556010254937982e-07, "loss": 0.3192, "step": 42475 }, { "epoch": 4.318422122814152, "grad_norm": 0.2740086019039154, "learning_rate": 5.554384490512227e-07, "loss": 0.2904, "step": 42476 }, { "epoch": 4.318523790158601, "grad_norm": 0.27981942892074585, "learning_rate": 5.55275894999292e-07, "loss": 0.2645, "step": 42477 }, { "epoch": 4.31862545750305, "grad_norm": 0.292889803647995, "learning_rate": 5.55113363338824e-07, "loss": 0.2916, "step": 42478 }, { "epoch": 4.3187271248474985, "grad_norm": 0.27729320526123047, "learning_rate": 5.549508540706394e-07, "loss": 0.2509, "step": 42479 }, { "epoch": 4.3188287921919475, "grad_norm": 0.2923678755760193, "learning_rate": 5.547883671955567e-07, "loss": 0.2696, "step": 42480 }, { "epoch": 4.318930459536397, "grad_norm": 0.2707235515117645, "learning_rate": 5.546259027143925e-07, "loss": 0.3118, "step": 42481 }, { "epoch": 4.319032126880846, "grad_norm": 0.26483768224716187, "learning_rate": 5.544634606279675e-07, "loss": 0.3126, "step": 42482 }, { "epoch": 4.319133794225295, "grad_norm": 0.27970218658447266, "learning_rate": 5.543010409370991e-07, "loss": 0.2967, "step": 42483 }, { "epoch": 4.319235461569744, "grad_norm": 0.2693829834461212, "learning_rate": 5.541386436426049e-07, "loss": 0.2946, "step": 42484 }, { "epoch": 4.319337128914193, "grad_norm": 0.29653412103652954, "learning_rate": 5.539762687453048e-07, "loss": 0.2959, "step": 42485 }, { "epoch": 4.319438796258642, "grad_norm": 0.2785630226135254, "learning_rate": 5.538139162460154e-07, "loss": 0.3324, "step": 42486 }, { "epoch": 4.319540463603091, "grad_norm": 0.3036017119884491, "learning_rate": 5.536515861455549e-07, "loss": 0.3336, "step": 42487 }, { "epoch": 4.31964213094754, "grad_norm": 0.28398096561431885, "learning_rate": 5.534892784447404e-07, "loss": 0.2932, "step": 42488 }, { "epoch": 4.319743798291989, "grad_norm": 0.2932848334312439, "learning_rate": 5.533269931443913e-07, "loss": 0.2674, "step": 42489 }, { "epoch": 4.319845465636438, "grad_norm": 0.28465765714645386, "learning_rate": 5.531647302453236e-07, "loss": 0.2972, "step": 42490 }, { "epoch": 4.319947132980887, "grad_norm": 0.2858439087867737, "learning_rate": 5.53002489748355e-07, "loss": 0.2486, "step": 42491 }, { "epoch": 4.320048800325336, "grad_norm": 0.27038446068763733, "learning_rate": 5.528402716543035e-07, "loss": 0.3014, "step": 42492 }, { "epoch": 4.320150467669785, "grad_norm": 0.2863677442073822, "learning_rate": 5.526780759639861e-07, "loss": 0.2926, "step": 42493 }, { "epoch": 4.3202521350142336, "grad_norm": 0.3028882145881653, "learning_rate": 5.525159026782184e-07, "loss": 0.3123, "step": 42494 }, { "epoch": 4.3203538023586825, "grad_norm": 0.2802406847476959, "learning_rate": 5.523537517978201e-07, "loss": 0.309, "step": 42495 }, { "epoch": 4.320455469703131, "grad_norm": 0.2770795524120331, "learning_rate": 5.521916233236063e-07, "loss": 0.3111, "step": 42496 }, { "epoch": 4.32055713704758, "grad_norm": 0.26865583658218384, "learning_rate": 5.520295172563944e-07, "loss": 0.2973, "step": 42497 }, { "epoch": 4.320658804392029, "grad_norm": 0.2866598069667816, "learning_rate": 5.518674335969998e-07, "loss": 0.2729, "step": 42498 }, { "epoch": 4.320760471736478, "grad_norm": 0.26980167627334595, "learning_rate": 5.517053723462412e-07, "loss": 0.2777, "step": 42499 }, { "epoch": 4.320862139080927, "grad_norm": 0.2624319791793823, "learning_rate": 5.515433335049336e-07, "loss": 0.3075, "step": 42500 }, { "epoch": 4.320963806425376, "grad_norm": 0.26477351784706116, "learning_rate": 5.513813170738924e-07, "loss": 0.2898, "step": 42501 }, { "epoch": 4.321065473769825, "grad_norm": 0.30227699875831604, "learning_rate": 5.512193230539376e-07, "loss": 0.2884, "step": 42502 }, { "epoch": 4.321167141114274, "grad_norm": 0.2958420515060425, "learning_rate": 5.5105735144588e-07, "loss": 0.268, "step": 42503 }, { "epoch": 4.321268808458723, "grad_norm": 0.2746918797492981, "learning_rate": 5.508954022505386e-07, "loss": 0.2952, "step": 42504 }, { "epoch": 4.321370475803172, "grad_norm": 0.28256452083587646, "learning_rate": 5.507334754687299e-07, "loss": 0.3091, "step": 42505 }, { "epoch": 4.321472143147621, "grad_norm": 0.27967801690101624, "learning_rate": 5.505715711012683e-07, "loss": 0.2796, "step": 42506 }, { "epoch": 4.32157381049207, "grad_norm": 0.29231780767440796, "learning_rate": 5.504096891489702e-07, "loss": 0.2942, "step": 42507 }, { "epoch": 4.321675477836519, "grad_norm": 0.28863751888275146, "learning_rate": 5.502478296126495e-07, "loss": 0.3023, "step": 42508 }, { "epoch": 4.321777145180968, "grad_norm": 0.2778019309043884, "learning_rate": 5.50085992493124e-07, "loss": 0.2759, "step": 42509 }, { "epoch": 4.321878812525417, "grad_norm": 0.264196515083313, "learning_rate": 5.499241777912073e-07, "loss": 0.2774, "step": 42510 }, { "epoch": 4.3219804798698656, "grad_norm": 0.281729519367218, "learning_rate": 5.497623855077145e-07, "loss": 0.3099, "step": 42511 }, { "epoch": 4.3220821472143145, "grad_norm": 0.33939117193222046, "learning_rate": 5.49600615643463e-07, "loss": 0.3051, "step": 42512 }, { "epoch": 4.322183814558763, "grad_norm": 0.2717302143573761, "learning_rate": 5.494388681992647e-07, "loss": 0.3093, "step": 42513 }, { "epoch": 4.322285481903212, "grad_norm": 0.303672730922699, "learning_rate": 5.492771431759353e-07, "loss": 0.313, "step": 42514 }, { "epoch": 4.322387149247661, "grad_norm": 0.27665942907333374, "learning_rate": 5.491154405742904e-07, "loss": 0.3268, "step": 42515 }, { "epoch": 4.32248881659211, "grad_norm": 0.29638203978538513, "learning_rate": 5.48953760395145e-07, "loss": 0.2773, "step": 42516 }, { "epoch": 4.322590483936559, "grad_norm": 0.29453301429748535, "learning_rate": 5.487921026393128e-07, "loss": 0.3181, "step": 42517 }, { "epoch": 4.322692151281008, "grad_norm": 0.2749963104724884, "learning_rate": 5.48630467307607e-07, "loss": 0.3098, "step": 42518 }, { "epoch": 4.322793818625458, "grad_norm": 0.27861806750297546, "learning_rate": 5.484688544008443e-07, "loss": 0.2715, "step": 42519 }, { "epoch": 4.322895485969907, "grad_norm": 0.2902684807777405, "learning_rate": 5.483072639198372e-07, "loss": 0.3159, "step": 42520 }, { "epoch": 4.322997153314356, "grad_norm": 0.3011741042137146, "learning_rate": 5.481456958653996e-07, "loss": 0.2764, "step": 42521 }, { "epoch": 4.323098820658805, "grad_norm": 0.27123939990997314, "learning_rate": 5.479841502383482e-07, "loss": 0.2786, "step": 42522 }, { "epoch": 4.323200488003254, "grad_norm": 0.25904199481010437, "learning_rate": 5.478226270394921e-07, "loss": 0.3257, "step": 42523 }, { "epoch": 4.323302155347703, "grad_norm": 0.257159024477005, "learning_rate": 5.476611262696491e-07, "loss": 0.2631, "step": 42524 }, { "epoch": 4.323403822692152, "grad_norm": 0.28381890058517456, "learning_rate": 5.474996479296313e-07, "loss": 0.3105, "step": 42525 }, { "epoch": 4.323505490036601, "grad_norm": 0.2986575663089752, "learning_rate": 5.473381920202508e-07, "loss": 0.3017, "step": 42526 }, { "epoch": 4.3236071573810495, "grad_norm": 0.2792971134185791, "learning_rate": 5.471767585423237e-07, "loss": 0.2902, "step": 42527 }, { "epoch": 4.3237088247254984, "grad_norm": 0.26285457611083984, "learning_rate": 5.47015347496661e-07, "loss": 0.2843, "step": 42528 }, { "epoch": 4.323810492069947, "grad_norm": 0.2709956765174866, "learning_rate": 5.468539588840787e-07, "loss": 0.2906, "step": 42529 }, { "epoch": 4.323912159414396, "grad_norm": 0.3031226694583893, "learning_rate": 5.466925927053857e-07, "loss": 0.2878, "step": 42530 }, { "epoch": 4.324013826758845, "grad_norm": 0.2865599989891052, "learning_rate": 5.465312489613978e-07, "loss": 0.3028, "step": 42531 }, { "epoch": 4.324115494103294, "grad_norm": 0.2772546708583832, "learning_rate": 5.463699276529283e-07, "loss": 0.2748, "step": 42532 }, { "epoch": 4.324217161447743, "grad_norm": 0.2739567756652832, "learning_rate": 5.462086287807872e-07, "loss": 0.327, "step": 42533 }, { "epoch": 4.324318828792192, "grad_norm": 0.26264920830726624, "learning_rate": 5.460473523457898e-07, "loss": 0.3144, "step": 42534 }, { "epoch": 4.324420496136641, "grad_norm": 0.2849172055721283, "learning_rate": 5.458860983487474e-07, "loss": 0.2748, "step": 42535 }, { "epoch": 4.32452216348109, "grad_norm": 0.2818048894405365, "learning_rate": 5.457248667904707e-07, "loss": 0.2992, "step": 42536 }, { "epoch": 4.324623830825539, "grad_norm": 0.28928086161613464, "learning_rate": 5.455636576717755e-07, "loss": 0.296, "step": 42537 }, { "epoch": 4.324725498169988, "grad_norm": 0.26235124468803406, "learning_rate": 5.454024709934714e-07, "loss": 0.3091, "step": 42538 }, { "epoch": 4.324827165514437, "grad_norm": 0.2979668378829956, "learning_rate": 5.452413067563716e-07, "loss": 0.3132, "step": 42539 }, { "epoch": 4.324928832858886, "grad_norm": 0.28673145174980164, "learning_rate": 5.450801649612864e-07, "loss": 0.2824, "step": 42540 }, { "epoch": 4.325030500203335, "grad_norm": 0.24688637256622314, "learning_rate": 5.44919045609028e-07, "loss": 0.2905, "step": 42541 }, { "epoch": 4.325132167547784, "grad_norm": 0.2533572316169739, "learning_rate": 5.447579487004117e-07, "loss": 0.3135, "step": 42542 }, { "epoch": 4.325233834892233, "grad_norm": 0.2727336883544922, "learning_rate": 5.445968742362435e-07, "loss": 0.2841, "step": 42543 }, { "epoch": 4.3253355022366815, "grad_norm": 0.28668877482414246, "learning_rate": 5.444358222173391e-07, "loss": 0.2903, "step": 42544 }, { "epoch": 4.3254371695811304, "grad_norm": 0.260549396276474, "learning_rate": 5.442747926445074e-07, "loss": 0.2605, "step": 42545 }, { "epoch": 4.325538836925579, "grad_norm": 0.275438517332077, "learning_rate": 5.441137855185602e-07, "loss": 0.274, "step": 42546 }, { "epoch": 4.325640504270028, "grad_norm": 0.27620047330856323, "learning_rate": 5.439528008403094e-07, "loss": 0.3, "step": 42547 }, { "epoch": 4.325742171614477, "grad_norm": 0.2712602913379669, "learning_rate": 5.437918386105656e-07, "loss": 0.2975, "step": 42548 }, { "epoch": 4.325843838958926, "grad_norm": 0.2755330502986908, "learning_rate": 5.436308988301392e-07, "loss": 0.2886, "step": 42549 }, { "epoch": 4.325945506303375, "grad_norm": 0.27815955877304077, "learning_rate": 5.434699814998413e-07, "loss": 0.2965, "step": 42550 }, { "epoch": 4.326047173647824, "grad_norm": 0.27056819200515747, "learning_rate": 5.433090866204816e-07, "loss": 0.3054, "step": 42551 }, { "epoch": 4.326148840992273, "grad_norm": 0.26397979259490967, "learning_rate": 5.43148214192874e-07, "loss": 0.3107, "step": 42552 }, { "epoch": 4.326250508336722, "grad_norm": 0.2727099657058716, "learning_rate": 5.429873642178246e-07, "loss": 0.2853, "step": 42553 }, { "epoch": 4.326352175681171, "grad_norm": 0.27444756031036377, "learning_rate": 5.428265366961466e-07, "loss": 0.2984, "step": 42554 }, { "epoch": 4.32645384302562, "grad_norm": 0.27328863739967346, "learning_rate": 5.426657316286499e-07, "loss": 0.2918, "step": 42555 }, { "epoch": 4.326555510370069, "grad_norm": 0.29864275455474854, "learning_rate": 5.425049490161422e-07, "loss": 0.2843, "step": 42556 }, { "epoch": 4.326657177714518, "grad_norm": 0.2706998586654663, "learning_rate": 5.423441888594367e-07, "loss": 0.2802, "step": 42557 }, { "epoch": 4.326758845058967, "grad_norm": 0.2827545702457428, "learning_rate": 5.421834511593421e-07, "loss": 0.3297, "step": 42558 }, { "epoch": 4.326860512403416, "grad_norm": 0.2818904519081116, "learning_rate": 5.420227359166675e-07, "loss": 0.3178, "step": 42559 }, { "epoch": 4.326962179747865, "grad_norm": 0.2715904712677002, "learning_rate": 5.418620431322225e-07, "loss": 0.3089, "step": 42560 }, { "epoch": 4.3270638470923135, "grad_norm": 0.28760018944740295, "learning_rate": 5.417013728068177e-07, "loss": 0.2955, "step": 42561 }, { "epoch": 4.3271655144367625, "grad_norm": 0.27597370743751526, "learning_rate": 5.415407249412618e-07, "loss": 0.2837, "step": 42562 }, { "epoch": 4.327267181781212, "grad_norm": 0.2795311510562897, "learning_rate": 5.413800995363633e-07, "loss": 0.2988, "step": 42563 }, { "epoch": 4.327368849125661, "grad_norm": 0.2767429053783417, "learning_rate": 5.412194965929335e-07, "loss": 0.31, "step": 42564 }, { "epoch": 4.32747051647011, "grad_norm": 0.2817622125148773, "learning_rate": 5.410589161117808e-07, "loss": 0.3041, "step": 42565 }, { "epoch": 4.327572183814559, "grad_norm": 0.2933749556541443, "learning_rate": 5.408983580937117e-07, "loss": 0.2986, "step": 42566 }, { "epoch": 4.327673851159008, "grad_norm": 0.27108216285705566, "learning_rate": 5.407378225395388e-07, "loss": 0.2862, "step": 42567 }, { "epoch": 4.327775518503457, "grad_norm": 0.29499730467796326, "learning_rate": 5.405773094500689e-07, "loss": 0.3151, "step": 42568 }, { "epoch": 4.327877185847906, "grad_norm": 0.2888624668121338, "learning_rate": 5.404168188261106e-07, "loss": 0.3005, "step": 42569 }, { "epoch": 4.327978853192355, "grad_norm": 0.300733745098114, "learning_rate": 5.402563506684716e-07, "loss": 0.2867, "step": 42570 }, { "epoch": 4.328080520536804, "grad_norm": 0.2610314190387726, "learning_rate": 5.400959049779619e-07, "loss": 0.3032, "step": 42571 }, { "epoch": 4.328182187881253, "grad_norm": 0.3126121461391449, "learning_rate": 5.399354817553898e-07, "loss": 0.3122, "step": 42572 }, { "epoch": 4.328283855225702, "grad_norm": 0.28107765316963196, "learning_rate": 5.397750810015617e-07, "loss": 0.291, "step": 42573 }, { "epoch": 4.328385522570151, "grad_norm": 0.2593265175819397, "learning_rate": 5.396147027172877e-07, "loss": 0.2816, "step": 42574 }, { "epoch": 4.3284871899146, "grad_norm": 0.27191755175590515, "learning_rate": 5.394543469033753e-07, "loss": 0.3178, "step": 42575 }, { "epoch": 4.3285888572590485, "grad_norm": 0.30792099237442017, "learning_rate": 5.392940135606306e-07, "loss": 0.302, "step": 42576 }, { "epoch": 4.3286905246034975, "grad_norm": 0.2880125045776367, "learning_rate": 5.391337026898636e-07, "loss": 0.2903, "step": 42577 }, { "epoch": 4.328792191947946, "grad_norm": 0.31142765283584595, "learning_rate": 5.389734142918813e-07, "loss": 0.2583, "step": 42578 }, { "epoch": 4.328893859292395, "grad_norm": 0.2613410949707031, "learning_rate": 5.388131483674902e-07, "loss": 0.2798, "step": 42579 }, { "epoch": 4.328995526636844, "grad_norm": 0.2712879478931427, "learning_rate": 5.386529049174977e-07, "loss": 0.3012, "step": 42580 }, { "epoch": 4.329097193981293, "grad_norm": 0.2702794373035431, "learning_rate": 5.384926839427129e-07, "loss": 0.3152, "step": 42581 }, { "epoch": 4.329198861325742, "grad_norm": 0.32713550329208374, "learning_rate": 5.383324854439415e-07, "loss": 0.3011, "step": 42582 }, { "epoch": 4.329300528670191, "grad_norm": 0.26510217785835266, "learning_rate": 5.381723094219898e-07, "loss": 0.2903, "step": 42583 }, { "epoch": 4.32940219601464, "grad_norm": 0.293541818857193, "learning_rate": 5.380121558776663e-07, "loss": 0.2818, "step": 42584 }, { "epoch": 4.329503863359089, "grad_norm": 0.27312174439430237, "learning_rate": 5.378520248117781e-07, "loss": 0.3066, "step": 42585 }, { "epoch": 4.329605530703538, "grad_norm": 0.2906550467014313, "learning_rate": 5.376919162251292e-07, "loss": 0.3061, "step": 42586 }, { "epoch": 4.329707198047987, "grad_norm": 0.2654818892478943, "learning_rate": 5.375318301185295e-07, "loss": 0.2767, "step": 42587 }, { "epoch": 4.329808865392436, "grad_norm": 0.28696170449256897, "learning_rate": 5.373717664927835e-07, "loss": 0.2865, "step": 42588 }, { "epoch": 4.329910532736885, "grad_norm": 0.27314579486846924, "learning_rate": 5.372117253486986e-07, "loss": 0.2972, "step": 42589 }, { "epoch": 4.330012200081334, "grad_norm": 0.29202187061309814, "learning_rate": 5.370517066870795e-07, "loss": 0.2926, "step": 42590 }, { "epoch": 4.330113867425783, "grad_norm": 0.2912704348564148, "learning_rate": 5.36891710508734e-07, "loss": 0.2766, "step": 42591 }, { "epoch": 4.330215534770232, "grad_norm": 0.2644587457180023, "learning_rate": 5.367317368144675e-07, "loss": 0.2828, "step": 42592 }, { "epoch": 4.3303172021146805, "grad_norm": 0.29325544834136963, "learning_rate": 5.365717856050851e-07, "loss": 0.2537, "step": 42593 }, { "epoch": 4.3304188694591295, "grad_norm": 0.29220035672187805, "learning_rate": 5.364118568813942e-07, "loss": 0.3105, "step": 42594 }, { "epoch": 4.330520536803578, "grad_norm": 0.27685776352882385, "learning_rate": 5.362519506441999e-07, "loss": 0.3294, "step": 42595 }, { "epoch": 4.330622204148027, "grad_norm": 0.2648620903491974, "learning_rate": 5.360920668943064e-07, "loss": 0.2795, "step": 42596 }, { "epoch": 4.330723871492476, "grad_norm": 0.2850934863090515, "learning_rate": 5.359322056325212e-07, "loss": 0.3568, "step": 42597 }, { "epoch": 4.330825538836925, "grad_norm": 0.28006890416145325, "learning_rate": 5.357723668596487e-07, "loss": 0.2895, "step": 42598 }, { "epoch": 4.330927206181374, "grad_norm": 0.2895353138446808, "learning_rate": 5.356125505764942e-07, "loss": 0.2939, "step": 42599 }, { "epoch": 4.331028873525823, "grad_norm": 0.2878684997558594, "learning_rate": 5.354527567838619e-07, "loss": 0.2861, "step": 42600 }, { "epoch": 4.331130540870273, "grad_norm": 0.2768092453479767, "learning_rate": 5.352929854825583e-07, "loss": 0.2777, "step": 42601 }, { "epoch": 4.331232208214722, "grad_norm": 0.2816371023654938, "learning_rate": 5.351332366733881e-07, "loss": 0.3021, "step": 42602 }, { "epoch": 4.331333875559171, "grad_norm": 0.3086695671081543, "learning_rate": 5.349735103571546e-07, "loss": 0.3186, "step": 42603 }, { "epoch": 4.33143554290362, "grad_norm": 0.2880963981151581, "learning_rate": 5.348138065346642e-07, "loss": 0.2898, "step": 42604 }, { "epoch": 4.331537210248069, "grad_norm": 0.2654562294483185, "learning_rate": 5.346541252067206e-07, "loss": 0.2603, "step": 42605 }, { "epoch": 4.331638877592518, "grad_norm": 0.2860048711299896, "learning_rate": 5.344944663741281e-07, "loss": 0.3183, "step": 42606 }, { "epoch": 4.331740544936967, "grad_norm": 0.27365803718566895, "learning_rate": 5.343348300376916e-07, "loss": 0.3175, "step": 42607 }, { "epoch": 4.3318422122814155, "grad_norm": 0.28757569193840027, "learning_rate": 5.341752161982155e-07, "loss": 0.3233, "step": 42608 }, { "epoch": 4.3319438796258645, "grad_norm": 0.273363322019577, "learning_rate": 5.340156248565031e-07, "loss": 0.2791, "step": 42609 }, { "epoch": 4.332045546970313, "grad_norm": 0.2768442928791046, "learning_rate": 5.338560560133576e-07, "loss": 0.2989, "step": 42610 }, { "epoch": 4.332147214314762, "grad_norm": 0.2898751199245453, "learning_rate": 5.336965096695856e-07, "loss": 0.3063, "step": 42611 }, { "epoch": 4.332248881659211, "grad_norm": 0.2519710958003998, "learning_rate": 5.335369858259887e-07, "loss": 0.305, "step": 42612 }, { "epoch": 4.33235054900366, "grad_norm": 0.2963184416294098, "learning_rate": 5.3337748448337e-07, "loss": 0.3181, "step": 42613 }, { "epoch": 4.332452216348109, "grad_norm": 0.274442195892334, "learning_rate": 5.332180056425357e-07, "loss": 0.2856, "step": 42614 }, { "epoch": 4.332553883692558, "grad_norm": 0.27912625670433044, "learning_rate": 5.330585493042873e-07, "loss": 0.3072, "step": 42615 }, { "epoch": 4.332655551037007, "grad_norm": 0.2843332588672638, "learning_rate": 5.328991154694268e-07, "loss": 0.2609, "step": 42616 }, { "epoch": 4.332757218381456, "grad_norm": 0.29288455843925476, "learning_rate": 5.327397041387611e-07, "loss": 0.2982, "step": 42617 }, { "epoch": 4.332858885725905, "grad_norm": 0.2810671329498291, "learning_rate": 5.325803153130904e-07, "loss": 0.3167, "step": 42618 }, { "epoch": 4.332960553070354, "grad_norm": 0.27130255103111267, "learning_rate": 5.324209489932186e-07, "loss": 0.3038, "step": 42619 }, { "epoch": 4.333062220414803, "grad_norm": 0.2616901397705078, "learning_rate": 5.322616051799478e-07, "loss": 0.2944, "step": 42620 }, { "epoch": 4.333163887759252, "grad_norm": 0.2695035934448242, "learning_rate": 5.321022838740819e-07, "loss": 0.3062, "step": 42621 }, { "epoch": 4.333265555103701, "grad_norm": 0.28623828291893005, "learning_rate": 5.319429850764229e-07, "loss": 0.2961, "step": 42622 }, { "epoch": 4.33336722244815, "grad_norm": 0.27431032061576843, "learning_rate": 5.317837087877725e-07, "loss": 0.285, "step": 42623 }, { "epoch": 4.333468889792599, "grad_norm": 0.27560746669769287, "learning_rate": 5.31624455008935e-07, "loss": 0.2854, "step": 42624 }, { "epoch": 4.3335705571370475, "grad_norm": 0.26990628242492676, "learning_rate": 5.31465223740712e-07, "loss": 0.2917, "step": 42625 }, { "epoch": 4.3336722244814965, "grad_norm": 0.28163468837738037, "learning_rate": 5.313060149839038e-07, "loss": 0.2864, "step": 42626 }, { "epoch": 4.333773891825945, "grad_norm": 0.2516801059246063, "learning_rate": 5.311468287393152e-07, "loss": 0.3024, "step": 42627 }, { "epoch": 4.333875559170394, "grad_norm": 0.27709802985191345, "learning_rate": 5.309876650077467e-07, "loss": 0.3152, "step": 42628 }, { "epoch": 4.333977226514843, "grad_norm": 0.278300940990448, "learning_rate": 5.308285237900007e-07, "loss": 0.2886, "step": 42629 }, { "epoch": 4.334078893859292, "grad_norm": 0.27564769983291626, "learning_rate": 5.306694050868771e-07, "loss": 0.2688, "step": 42630 }, { "epoch": 4.334180561203741, "grad_norm": 0.26566359400749207, "learning_rate": 5.305103088991803e-07, "loss": 0.3053, "step": 42631 }, { "epoch": 4.33428222854819, "grad_norm": 0.2600061595439911, "learning_rate": 5.303512352277107e-07, "loss": 0.3158, "step": 42632 }, { "epoch": 4.334383895892639, "grad_norm": 0.2741413712501526, "learning_rate": 5.301921840732677e-07, "loss": 0.298, "step": 42633 }, { "epoch": 4.334485563237088, "grad_norm": 0.28279024362564087, "learning_rate": 5.300331554366561e-07, "loss": 0.3033, "step": 42634 }, { "epoch": 4.334587230581537, "grad_norm": 0.29306313395500183, "learning_rate": 5.298741493186749e-07, "loss": 0.2653, "step": 42635 }, { "epoch": 4.334688897925986, "grad_norm": 0.2952111065387726, "learning_rate": 5.297151657201244e-07, "loss": 0.2927, "step": 42636 }, { "epoch": 4.334790565270435, "grad_norm": 0.2420334815979004, "learning_rate": 5.295562046418074e-07, "loss": 0.2828, "step": 42637 }, { "epoch": 4.334892232614884, "grad_norm": 0.27585095167160034, "learning_rate": 5.293972660845243e-07, "loss": 0.3051, "step": 42638 }, { "epoch": 4.334993899959333, "grad_norm": 0.27901190519332886, "learning_rate": 5.29238350049075e-07, "loss": 0.2898, "step": 42639 }, { "epoch": 4.335095567303782, "grad_norm": 0.30240383744239807, "learning_rate": 5.290794565362595e-07, "loss": 0.3008, "step": 42640 }, { "epoch": 4.335197234648231, "grad_norm": 0.2761670649051666, "learning_rate": 5.289205855468804e-07, "loss": 0.2948, "step": 42641 }, { "epoch": 4.3352989019926795, "grad_norm": 0.2759207785129547, "learning_rate": 5.287617370817371e-07, "loss": 0.2914, "step": 42642 }, { "epoch": 4.3354005693371285, "grad_norm": 0.29764872789382935, "learning_rate": 5.286029111416285e-07, "loss": 0.2512, "step": 42643 }, { "epoch": 4.335502236681577, "grad_norm": 0.25542551279067993, "learning_rate": 5.284441077273566e-07, "loss": 0.3253, "step": 42644 }, { "epoch": 4.335603904026027, "grad_norm": 0.2889483869075775, "learning_rate": 5.282853268397208e-07, "loss": 0.3196, "step": 42645 }, { "epoch": 4.335705571370476, "grad_norm": 0.2784278392791748, "learning_rate": 5.281265684795201e-07, "loss": 0.2893, "step": 42646 }, { "epoch": 4.335807238714925, "grad_norm": 0.273517906665802, "learning_rate": 5.279678326475557e-07, "loss": 0.3279, "step": 42647 }, { "epoch": 4.335908906059374, "grad_norm": 0.2778087258338928, "learning_rate": 5.278091193446272e-07, "loss": 0.2801, "step": 42648 }, { "epoch": 4.336010573403823, "grad_norm": 0.2731958031654358, "learning_rate": 5.276504285715334e-07, "loss": 0.2978, "step": 42649 }, { "epoch": 4.336112240748272, "grad_norm": 0.279227614402771, "learning_rate": 5.274917603290724e-07, "loss": 0.2816, "step": 42650 }, { "epoch": 4.336213908092721, "grad_norm": 0.28861284255981445, "learning_rate": 5.273331146180466e-07, "loss": 0.2901, "step": 42651 }, { "epoch": 4.33631557543717, "grad_norm": 0.2778014838695526, "learning_rate": 5.271744914392535e-07, "loss": 0.2823, "step": 42652 }, { "epoch": 4.336417242781619, "grad_norm": 0.2776893675327301, "learning_rate": 5.270158907934919e-07, "loss": 0.322, "step": 42653 }, { "epoch": 4.336518910126068, "grad_norm": 0.28993529081344604, "learning_rate": 5.268573126815629e-07, "loss": 0.3, "step": 42654 }, { "epoch": 4.336620577470517, "grad_norm": 0.26948028802871704, "learning_rate": 5.266987571042614e-07, "loss": 0.3092, "step": 42655 }, { "epoch": 4.336722244814966, "grad_norm": 0.30567824840545654, "learning_rate": 5.26540224062389e-07, "loss": 0.2909, "step": 42656 }, { "epoch": 4.3368239121594145, "grad_norm": 0.27910053730010986, "learning_rate": 5.26381713556745e-07, "loss": 0.2766, "step": 42657 }, { "epoch": 4.3369255795038635, "grad_norm": 0.2768615484237671, "learning_rate": 5.262232255881266e-07, "loss": 0.2766, "step": 42658 }, { "epoch": 4.337027246848312, "grad_norm": 0.28768786787986755, "learning_rate": 5.260647601573327e-07, "loss": 0.2925, "step": 42659 }, { "epoch": 4.337128914192761, "grad_norm": 0.2990855574607849, "learning_rate": 5.259063172651601e-07, "loss": 0.2786, "step": 42660 }, { "epoch": 4.33723058153721, "grad_norm": 0.2734663486480713, "learning_rate": 5.257478969124092e-07, "loss": 0.2843, "step": 42661 }, { "epoch": 4.337332248881659, "grad_norm": 0.24870829284191132, "learning_rate": 5.255894990998772e-07, "loss": 0.3163, "step": 42662 }, { "epoch": 4.337433916226108, "grad_norm": 0.26299262046813965, "learning_rate": 5.254311238283611e-07, "loss": 0.307, "step": 42663 }, { "epoch": 4.337535583570557, "grad_norm": 0.2788896858692169, "learning_rate": 5.252727710986616e-07, "loss": 0.3016, "step": 42664 }, { "epoch": 4.337637250915006, "grad_norm": 0.2736177146434784, "learning_rate": 5.251144409115727e-07, "loss": 0.2632, "step": 42665 }, { "epoch": 4.337738918259455, "grad_norm": 0.30229824781417847, "learning_rate": 5.249561332678937e-07, "loss": 0.3088, "step": 42666 }, { "epoch": 4.337840585603904, "grad_norm": 0.2752076983451843, "learning_rate": 5.247978481684235e-07, "loss": 0.3001, "step": 42667 }, { "epoch": 4.337942252948353, "grad_norm": 0.33017224073410034, "learning_rate": 5.24639585613958e-07, "loss": 0.3377, "step": 42668 }, { "epoch": 4.338043920292802, "grad_norm": 0.2811978757381439, "learning_rate": 5.244813456052949e-07, "loss": 0.3023, "step": 42669 }, { "epoch": 4.338145587637251, "grad_norm": 0.26699262857437134, "learning_rate": 5.243231281432304e-07, "loss": 0.2964, "step": 42670 }, { "epoch": 4.3382472549817, "grad_norm": 0.3088126480579376, "learning_rate": 5.241649332285631e-07, "loss": 0.2766, "step": 42671 }, { "epoch": 4.338348922326149, "grad_norm": 0.26510727405548096, "learning_rate": 5.240067608620891e-07, "loss": 0.3126, "step": 42672 }, { "epoch": 4.338450589670598, "grad_norm": 0.282414972782135, "learning_rate": 5.238486110446051e-07, "loss": 0.2929, "step": 42673 }, { "epoch": 4.3385522570150465, "grad_norm": 0.2718428671360016, "learning_rate": 5.236904837769092e-07, "loss": 0.2939, "step": 42674 }, { "epoch": 4.3386539243594955, "grad_norm": 0.26858559250831604, "learning_rate": 5.235323790597951e-07, "loss": 0.3084, "step": 42675 }, { "epoch": 4.338755591703944, "grad_norm": 0.28686976432800293, "learning_rate": 5.233742968940614e-07, "loss": 0.3002, "step": 42676 }, { "epoch": 4.338857259048393, "grad_norm": 0.2966618835926056, "learning_rate": 5.23216237280506e-07, "loss": 0.2641, "step": 42677 }, { "epoch": 4.338958926392842, "grad_norm": 0.27109262347221375, "learning_rate": 5.23058200219922e-07, "loss": 0.2801, "step": 42678 }, { "epoch": 4.339060593737291, "grad_norm": 0.2713966369628906, "learning_rate": 5.229001857131072e-07, "loss": 0.3187, "step": 42679 }, { "epoch": 4.33916226108174, "grad_norm": 0.2823010981082916, "learning_rate": 5.227421937608562e-07, "loss": 0.284, "step": 42680 }, { "epoch": 4.339263928426189, "grad_norm": 0.2679266929626465, "learning_rate": 5.225842243639679e-07, "loss": 0.2895, "step": 42681 }, { "epoch": 4.339365595770638, "grad_norm": 0.26008179783821106, "learning_rate": 5.224262775232352e-07, "loss": 0.2866, "step": 42682 }, { "epoch": 4.339467263115088, "grad_norm": 0.28437596559524536, "learning_rate": 5.222683532394546e-07, "loss": 0.2619, "step": 42683 }, { "epoch": 4.339568930459537, "grad_norm": 0.28542986512184143, "learning_rate": 5.22110451513424e-07, "loss": 0.2912, "step": 42684 }, { "epoch": 4.339670597803986, "grad_norm": 0.27505776286125183, "learning_rate": 5.219525723459346e-07, "loss": 0.2925, "step": 42685 }, { "epoch": 4.339772265148435, "grad_norm": 0.2640886604785919, "learning_rate": 5.217947157377839e-07, "loss": 0.304, "step": 42686 }, { "epoch": 4.339873932492884, "grad_norm": 0.2816985249519348, "learning_rate": 5.216368816897694e-07, "loss": 0.2545, "step": 42687 }, { "epoch": 4.339975599837333, "grad_norm": 0.2918197214603424, "learning_rate": 5.214790702026823e-07, "loss": 0.2949, "step": 42688 }, { "epoch": 4.3400772671817816, "grad_norm": 0.299934446811676, "learning_rate": 5.2132128127732e-07, "loss": 0.2999, "step": 42689 }, { "epoch": 4.3401789345262305, "grad_norm": 0.2827945351600647, "learning_rate": 5.211635149144767e-07, "loss": 0.2849, "step": 42690 }, { "epoch": 4.340280601870679, "grad_norm": 0.3059059977531433, "learning_rate": 5.210057711149463e-07, "loss": 0.2888, "step": 42691 }, { "epoch": 4.340382269215128, "grad_norm": 0.28487759828567505, "learning_rate": 5.20848049879526e-07, "loss": 0.3014, "step": 42692 }, { "epoch": 4.340483936559577, "grad_norm": 0.25755220651626587, "learning_rate": 5.206903512090073e-07, "loss": 0.2967, "step": 42693 }, { "epoch": 4.340585603904026, "grad_norm": 0.29169267416000366, "learning_rate": 5.205326751041884e-07, "loss": 0.2865, "step": 42694 }, { "epoch": 4.340687271248475, "grad_norm": 0.2717791497707367, "learning_rate": 5.203750215658592e-07, "loss": 0.3221, "step": 42695 }, { "epoch": 4.340788938592924, "grad_norm": 0.27041196823120117, "learning_rate": 5.202173905948161e-07, "loss": 0.3616, "step": 42696 }, { "epoch": 4.340890605937373, "grad_norm": 0.26677027344703674, "learning_rate": 5.200597821918552e-07, "loss": 0.294, "step": 42697 }, { "epoch": 4.340992273281822, "grad_norm": 0.28762975335121155, "learning_rate": 5.199021963577666e-07, "loss": 0.2853, "step": 42698 }, { "epoch": 4.341093940626271, "grad_norm": 0.2667836844921112, "learning_rate": 5.197446330933476e-07, "loss": 0.3248, "step": 42699 }, { "epoch": 4.34119560797072, "grad_norm": 0.2700735926628113, "learning_rate": 5.195870923993901e-07, "loss": 0.3108, "step": 42700 }, { "epoch": 4.341297275315169, "grad_norm": 0.27137479186058044, "learning_rate": 5.19429574276687e-07, "loss": 0.2754, "step": 42701 }, { "epoch": 4.341398942659618, "grad_norm": 0.29111504554748535, "learning_rate": 5.192720787260335e-07, "loss": 0.2871, "step": 42702 }, { "epoch": 4.341500610004067, "grad_norm": 0.2776009440422058, "learning_rate": 5.191146057482222e-07, "loss": 0.3165, "step": 42703 }, { "epoch": 4.341602277348516, "grad_norm": 0.27398860454559326, "learning_rate": 5.189571553440486e-07, "loss": 0.3004, "step": 42704 }, { "epoch": 4.341703944692965, "grad_norm": 0.2786826491355896, "learning_rate": 5.18799727514302e-07, "loss": 0.3043, "step": 42705 }, { "epoch": 4.3418056120374136, "grad_norm": 0.2756367325782776, "learning_rate": 5.186423222597775e-07, "loss": 0.3185, "step": 42706 }, { "epoch": 4.3419072793818625, "grad_norm": 0.2704184055328369, "learning_rate": 5.184849395812697e-07, "loss": 0.3429, "step": 42707 }, { "epoch": 4.342008946726311, "grad_norm": 0.26391297578811646, "learning_rate": 5.183275794795678e-07, "loss": 0.3031, "step": 42708 }, { "epoch": 4.34211061407076, "grad_norm": 0.2788713276386261, "learning_rate": 5.181702419554679e-07, "loss": 0.3284, "step": 42709 }, { "epoch": 4.342212281415209, "grad_norm": 0.29740726947784424, "learning_rate": 5.180129270097611e-07, "loss": 0.2685, "step": 42710 }, { "epoch": 4.342313948759658, "grad_norm": 0.2744218707084656, "learning_rate": 5.1785563464324e-07, "loss": 0.2578, "step": 42711 }, { "epoch": 4.342415616104107, "grad_norm": 0.2858400344848633, "learning_rate": 5.176983648566964e-07, "loss": 0.3272, "step": 42712 }, { "epoch": 4.342517283448556, "grad_norm": 0.2802458703517914, "learning_rate": 5.175411176509243e-07, "loss": 0.296, "step": 42713 }, { "epoch": 4.342618950793005, "grad_norm": 0.28649047017097473, "learning_rate": 5.173838930267144e-07, "loss": 0.3116, "step": 42714 }, { "epoch": 4.342720618137454, "grad_norm": 0.2790587246417999, "learning_rate": 5.172266909848584e-07, "loss": 0.2976, "step": 42715 }, { "epoch": 4.342822285481903, "grad_norm": 0.2943381369113922, "learning_rate": 5.170695115261503e-07, "loss": 0.2834, "step": 42716 }, { "epoch": 4.342923952826352, "grad_norm": 0.291959285736084, "learning_rate": 5.169123546513804e-07, "loss": 0.2745, "step": 42717 }, { "epoch": 4.343025620170801, "grad_norm": 0.2805699408054352, "learning_rate": 5.167552203613397e-07, "loss": 0.302, "step": 42718 }, { "epoch": 4.34312728751525, "grad_norm": 0.2939128279685974, "learning_rate": 5.165981086568222e-07, "loss": 0.277, "step": 42719 }, { "epoch": 4.343228954859699, "grad_norm": 0.30094462633132935, "learning_rate": 5.164410195386177e-07, "loss": 0.3067, "step": 42720 }, { "epoch": 4.343330622204148, "grad_norm": 0.2631445825099945, "learning_rate": 5.162839530075176e-07, "loss": 0.3066, "step": 42721 }, { "epoch": 4.343432289548597, "grad_norm": 0.28541889786720276, "learning_rate": 5.161269090643128e-07, "loss": 0.3013, "step": 42722 }, { "epoch": 4.3435339568930456, "grad_norm": 0.28107237815856934, "learning_rate": 5.159698877097963e-07, "loss": 0.3598, "step": 42723 }, { "epoch": 4.3436356242374945, "grad_norm": 0.2864745855331421, "learning_rate": 5.158128889447577e-07, "loss": 0.325, "step": 42724 }, { "epoch": 4.343737291581943, "grad_norm": 0.31384706497192383, "learning_rate": 5.156559127699873e-07, "loss": 0.3041, "step": 42725 }, { "epoch": 4.343838958926392, "grad_norm": 0.28778406977653503, "learning_rate": 5.154989591862775e-07, "loss": 0.2822, "step": 42726 }, { "epoch": 4.343940626270842, "grad_norm": 0.2816457748413086, "learning_rate": 5.153420281944183e-07, "loss": 0.2948, "step": 42727 }, { "epoch": 4.344042293615291, "grad_norm": 0.31451576948165894, "learning_rate": 5.151851197951996e-07, "loss": 0.3006, "step": 42728 }, { "epoch": 4.34414396095974, "grad_norm": 0.2906424403190613, "learning_rate": 5.15028233989413e-07, "loss": 0.2766, "step": 42729 }, { "epoch": 4.344245628304189, "grad_norm": 0.27716803550720215, "learning_rate": 5.148713707778485e-07, "loss": 0.3116, "step": 42730 }, { "epoch": 4.344347295648638, "grad_norm": 0.2676253914833069, "learning_rate": 5.147145301612966e-07, "loss": 0.293, "step": 42731 }, { "epoch": 4.344448962993087, "grad_norm": 0.30311521887779236, "learning_rate": 5.145577121405454e-07, "loss": 0.285, "step": 42732 }, { "epoch": 4.344550630337536, "grad_norm": 0.2723177969455719, "learning_rate": 5.144009167163882e-07, "loss": 0.3107, "step": 42733 }, { "epoch": 4.344652297681985, "grad_norm": 0.2749318480491638, "learning_rate": 5.142441438896129e-07, "loss": 0.3022, "step": 42734 }, { "epoch": 4.344753965026434, "grad_norm": 0.3039846420288086, "learning_rate": 5.140873936610086e-07, "loss": 0.3229, "step": 42735 }, { "epoch": 4.344855632370883, "grad_norm": 0.27431538701057434, "learning_rate": 5.13930666031367e-07, "loss": 0.2857, "step": 42736 }, { "epoch": 4.344957299715332, "grad_norm": 0.2543281018733978, "learning_rate": 5.13773961001477e-07, "loss": 0.3098, "step": 42737 }, { "epoch": 4.345058967059781, "grad_norm": 0.28677472472190857, "learning_rate": 5.136172785721261e-07, "loss": 0.3086, "step": 42738 }, { "epoch": 4.3451606344042295, "grad_norm": 0.27815762162208557, "learning_rate": 5.134606187441066e-07, "loss": 0.2909, "step": 42739 }, { "epoch": 4.3452623017486784, "grad_norm": 0.2623247504234314, "learning_rate": 5.133039815182061e-07, "loss": 0.3305, "step": 42740 }, { "epoch": 4.345363969093127, "grad_norm": 0.26799094676971436, "learning_rate": 5.131473668952142e-07, "loss": 0.2984, "step": 42741 }, { "epoch": 4.345465636437576, "grad_norm": 0.29202163219451904, "learning_rate": 5.129907748759189e-07, "loss": 0.3076, "step": 42742 }, { "epoch": 4.345567303782025, "grad_norm": 0.2816329300403595, "learning_rate": 5.128342054611102e-07, "loss": 0.2796, "step": 42743 }, { "epoch": 4.345668971126474, "grad_norm": 0.2952679693698883, "learning_rate": 5.126776586515763e-07, "loss": 0.3046, "step": 42744 }, { "epoch": 4.345770638470923, "grad_norm": 0.2576681077480316, "learning_rate": 5.125211344481057e-07, "loss": 0.3288, "step": 42745 }, { "epoch": 4.345872305815372, "grad_norm": 0.2738153338432312, "learning_rate": 5.123646328514881e-07, "loss": 0.3223, "step": 42746 }, { "epoch": 4.345973973159821, "grad_norm": 0.31415069103240967, "learning_rate": 5.122081538625107e-07, "loss": 0.2731, "step": 42747 }, { "epoch": 4.34607564050427, "grad_norm": 0.2684464752674103, "learning_rate": 5.120516974819612e-07, "loss": 0.2956, "step": 42748 }, { "epoch": 4.346177307848719, "grad_norm": 0.29750314354896545, "learning_rate": 5.1189526371063e-07, "loss": 0.3017, "step": 42749 }, { "epoch": 4.346278975193168, "grad_norm": 0.3030509054660797, "learning_rate": 5.117388525493039e-07, "loss": 0.3087, "step": 42750 }, { "epoch": 4.346380642537617, "grad_norm": 0.27584484219551086, "learning_rate": 5.115824639987704e-07, "loss": 0.2773, "step": 42751 }, { "epoch": 4.346482309882066, "grad_norm": 0.2730298340320587, "learning_rate": 5.114260980598174e-07, "loss": 0.3236, "step": 42752 }, { "epoch": 4.346583977226515, "grad_norm": 0.2677151560783386, "learning_rate": 5.11269754733234e-07, "loss": 0.288, "step": 42753 }, { "epoch": 4.346685644570964, "grad_norm": 0.2657032608985901, "learning_rate": 5.111134340198065e-07, "loss": 0.2728, "step": 42754 }, { "epoch": 4.346787311915413, "grad_norm": 0.29005172848701477, "learning_rate": 5.109571359203225e-07, "loss": 0.2795, "step": 42755 }, { "epoch": 4.3468889792598615, "grad_norm": 0.27103686332702637, "learning_rate": 5.108008604355702e-07, "loss": 0.2975, "step": 42756 }, { "epoch": 4.3469906466043104, "grad_norm": 0.26402488350868225, "learning_rate": 5.106446075663362e-07, "loss": 0.3268, "step": 42757 }, { "epoch": 4.347092313948759, "grad_norm": 0.2906247675418854, "learning_rate": 5.104883773134073e-07, "loss": 0.298, "step": 42758 }, { "epoch": 4.347193981293208, "grad_norm": 0.29034242033958435, "learning_rate": 5.103321696775721e-07, "loss": 0.289, "step": 42759 }, { "epoch": 4.347295648637657, "grad_norm": 0.2791135609149933, "learning_rate": 5.10175984659616e-07, "loss": 0.3177, "step": 42760 }, { "epoch": 4.347397315982106, "grad_norm": 0.2801075577735901, "learning_rate": 5.100198222603265e-07, "loss": 0.307, "step": 42761 }, { "epoch": 4.347498983326555, "grad_norm": 0.26914647221565247, "learning_rate": 5.098636824804892e-07, "loss": 0.2972, "step": 42762 }, { "epoch": 4.347600650671004, "grad_norm": 0.2879379689693451, "learning_rate": 5.097075653208927e-07, "loss": 0.2889, "step": 42763 }, { "epoch": 4.347702318015453, "grad_norm": 0.3005962371826172, "learning_rate": 5.095514707823224e-07, "loss": 0.3022, "step": 42764 }, { "epoch": 4.347803985359903, "grad_norm": 0.29627466201782227, "learning_rate": 5.093953988655642e-07, "loss": 0.3234, "step": 42765 }, { "epoch": 4.347905652704352, "grad_norm": 0.27306830883026123, "learning_rate": 5.092393495714049e-07, "loss": 0.3265, "step": 42766 }, { "epoch": 4.348007320048801, "grad_norm": 0.2896910607814789, "learning_rate": 5.090833229006315e-07, "loss": 0.2701, "step": 42767 }, { "epoch": 4.34810898739325, "grad_norm": 0.28256121277809143, "learning_rate": 5.089273188540272e-07, "loss": 0.3078, "step": 42768 }, { "epoch": 4.348210654737699, "grad_norm": 0.30084675550460815, "learning_rate": 5.087713374323816e-07, "loss": 0.2866, "step": 42769 }, { "epoch": 4.348312322082148, "grad_norm": 0.2823255956172943, "learning_rate": 5.086153786364783e-07, "loss": 0.2984, "step": 42770 }, { "epoch": 4.3484139894265965, "grad_norm": 0.272366464138031, "learning_rate": 5.084594424671035e-07, "loss": 0.2763, "step": 42771 }, { "epoch": 4.3485156567710455, "grad_norm": 0.2715190351009369, "learning_rate": 5.083035289250421e-07, "loss": 0.2913, "step": 42772 }, { "epoch": 4.348617324115494, "grad_norm": 0.2817942202091217, "learning_rate": 5.081476380110805e-07, "loss": 0.3039, "step": 42773 }, { "epoch": 4.348718991459943, "grad_norm": 0.2597092092037201, "learning_rate": 5.07991769726004e-07, "loss": 0.3035, "step": 42774 }, { "epoch": 4.348820658804392, "grad_norm": 0.28810185194015503, "learning_rate": 5.078359240705966e-07, "loss": 0.3034, "step": 42775 }, { "epoch": 4.348922326148841, "grad_norm": 0.27608487010002136, "learning_rate": 5.076801010456451e-07, "loss": 0.277, "step": 42776 }, { "epoch": 4.34902399349329, "grad_norm": 0.3005636930465698, "learning_rate": 5.075243006519337e-07, "loss": 0.2901, "step": 42777 }, { "epoch": 4.349125660837739, "grad_norm": 0.29670602083206177, "learning_rate": 5.073685228902464e-07, "loss": 0.2898, "step": 42778 }, { "epoch": 4.349227328182188, "grad_norm": 0.2666976749897003, "learning_rate": 5.072127677613698e-07, "loss": 0.3368, "step": 42779 }, { "epoch": 4.349328995526637, "grad_norm": 0.27915385365486145, "learning_rate": 5.070570352660875e-07, "loss": 0.2755, "step": 42780 }, { "epoch": 4.349430662871086, "grad_norm": 0.2762937545776367, "learning_rate": 5.069013254051841e-07, "loss": 0.2615, "step": 42781 }, { "epoch": 4.349532330215535, "grad_norm": 0.278615802526474, "learning_rate": 5.067456381794433e-07, "loss": 0.2965, "step": 42782 }, { "epoch": 4.349633997559984, "grad_norm": 0.2782840430736542, "learning_rate": 5.065899735896512e-07, "loss": 0.2812, "step": 42783 }, { "epoch": 4.349735664904433, "grad_norm": 0.28193482756614685, "learning_rate": 5.064343316365911e-07, "loss": 0.2755, "step": 42784 }, { "epoch": 4.349837332248882, "grad_norm": 0.288177490234375, "learning_rate": 5.062787123210461e-07, "loss": 0.3274, "step": 42785 }, { "epoch": 4.349938999593331, "grad_norm": 0.27541205286979675, "learning_rate": 5.061231156438018e-07, "loss": 0.2909, "step": 42786 }, { "epoch": 4.35004066693778, "grad_norm": 0.3000405430793762, "learning_rate": 5.059675416056415e-07, "loss": 0.2929, "step": 42787 }, { "epoch": 4.3501423342822285, "grad_norm": 0.2722429931163788, "learning_rate": 5.058119902073477e-07, "loss": 0.2732, "step": 42788 }, { "epoch": 4.3502440016266775, "grad_norm": 0.29660603404045105, "learning_rate": 5.056564614497061e-07, "loss": 0.2869, "step": 42789 }, { "epoch": 4.350345668971126, "grad_norm": 0.2703559100627899, "learning_rate": 5.055009553334994e-07, "loss": 0.2945, "step": 42790 }, { "epoch": 4.350447336315575, "grad_norm": 0.29300248622894287, "learning_rate": 5.053454718595107e-07, "loss": 0.2964, "step": 42791 }, { "epoch": 4.350549003660024, "grad_norm": 0.2802635133266449, "learning_rate": 5.051900110285224e-07, "loss": 0.3, "step": 42792 }, { "epoch": 4.350650671004473, "grad_norm": 0.28355252742767334, "learning_rate": 5.050345728413197e-07, "loss": 0.2721, "step": 42793 }, { "epoch": 4.350752338348922, "grad_norm": 0.2507241368293762, "learning_rate": 5.048791572986844e-07, "loss": 0.2932, "step": 42794 }, { "epoch": 4.350854005693371, "grad_norm": 0.2798919975757599, "learning_rate": 5.047237644013991e-07, "loss": 0.2938, "step": 42795 }, { "epoch": 4.35095567303782, "grad_norm": 0.28635072708129883, "learning_rate": 5.045683941502483e-07, "loss": 0.2795, "step": 42796 }, { "epoch": 4.351057340382269, "grad_norm": 0.2898612320423126, "learning_rate": 5.044130465460134e-07, "loss": 0.3077, "step": 42797 }, { "epoch": 4.351159007726718, "grad_norm": 0.26975515484809875, "learning_rate": 5.042577215894762e-07, "loss": 0.2962, "step": 42798 }, { "epoch": 4.351260675071167, "grad_norm": 0.27302953600883484, "learning_rate": 5.041024192814215e-07, "loss": 0.3013, "step": 42799 }, { "epoch": 4.351362342415616, "grad_norm": 0.28550246357917786, "learning_rate": 5.039471396226297e-07, "loss": 0.2996, "step": 42800 }, { "epoch": 4.351464009760065, "grad_norm": 0.30219796299934387, "learning_rate": 5.037918826138843e-07, "loss": 0.3174, "step": 42801 }, { "epoch": 4.351565677104514, "grad_norm": 0.27563464641571045, "learning_rate": 5.036366482559663e-07, "loss": 0.2901, "step": 42802 }, { "epoch": 4.351667344448963, "grad_norm": 0.2607934772968292, "learning_rate": 5.034814365496588e-07, "loss": 0.3054, "step": 42803 }, { "epoch": 4.351769011793412, "grad_norm": 0.2701238691806793, "learning_rate": 5.03326247495744e-07, "loss": 0.3233, "step": 42804 }, { "epoch": 4.3518706791378605, "grad_norm": 0.29422906041145325, "learning_rate": 5.031710810950013e-07, "loss": 0.3076, "step": 42805 }, { "epoch": 4.3519723464823095, "grad_norm": 0.27038225531578064, "learning_rate": 5.030159373482163e-07, "loss": 0.3189, "step": 42806 }, { "epoch": 4.352074013826758, "grad_norm": 0.29762351512908936, "learning_rate": 5.028608162561665e-07, "loss": 0.2863, "step": 42807 }, { "epoch": 4.352175681171207, "grad_norm": 0.3087218403816223, "learning_rate": 5.027057178196349e-07, "loss": 0.292, "step": 42808 }, { "epoch": 4.352277348515657, "grad_norm": 0.2954160273075104, "learning_rate": 5.025506420394044e-07, "loss": 0.3175, "step": 42809 }, { "epoch": 4.352379015860106, "grad_norm": 0.279802531003952, "learning_rate": 5.023955889162552e-07, "loss": 0.2889, "step": 42810 }, { "epoch": 4.352480683204555, "grad_norm": 0.29314008355140686, "learning_rate": 5.022405584509676e-07, "loss": 0.2648, "step": 42811 }, { "epoch": 4.352582350549004, "grad_norm": 0.29658937454223633, "learning_rate": 5.020855506443228e-07, "loss": 0.3151, "step": 42812 }, { "epoch": 4.352684017893453, "grad_norm": 0.28959569334983826, "learning_rate": 5.019305654971029e-07, "loss": 0.3123, "step": 42813 }, { "epoch": 4.352785685237902, "grad_norm": 0.2746022343635559, "learning_rate": 5.017756030100879e-07, "loss": 0.3351, "step": 42814 }, { "epoch": 4.352887352582351, "grad_norm": 0.2860971987247467, "learning_rate": 5.016206631840576e-07, "loss": 0.2952, "step": 42815 }, { "epoch": 4.3529890199268, "grad_norm": 0.2594475746154785, "learning_rate": 5.014657460197953e-07, "loss": 0.2863, "step": 42816 }, { "epoch": 4.353090687271249, "grad_norm": 0.29508528113365173, "learning_rate": 5.013108515180776e-07, "loss": 0.2837, "step": 42817 }, { "epoch": 4.353192354615698, "grad_norm": 0.2670176327228546, "learning_rate": 5.011559796796867e-07, "loss": 0.2649, "step": 42818 }, { "epoch": 4.353294021960147, "grad_norm": 0.2818083167076111, "learning_rate": 5.010011305054052e-07, "loss": 0.3055, "step": 42819 }, { "epoch": 4.3533956893045955, "grad_norm": 0.2965122163295746, "learning_rate": 5.008463039960088e-07, "loss": 0.2969, "step": 42820 }, { "epoch": 4.3534973566490445, "grad_norm": 0.28381267189979553, "learning_rate": 5.0069150015228e-07, "loss": 0.2918, "step": 42821 }, { "epoch": 4.353599023993493, "grad_norm": 0.2653132677078247, "learning_rate": 5.005367189749977e-07, "loss": 0.3156, "step": 42822 }, { "epoch": 4.353700691337942, "grad_norm": 0.2650834918022156, "learning_rate": 5.003819604649435e-07, "loss": 0.2977, "step": 42823 }, { "epoch": 4.353802358682391, "grad_norm": 0.25746312737464905, "learning_rate": 5.002272246228951e-07, "loss": 0.2984, "step": 42824 }, { "epoch": 4.35390402602684, "grad_norm": 0.2759878635406494, "learning_rate": 5.000725114496319e-07, "loss": 0.2795, "step": 42825 }, { "epoch": 4.354005693371289, "grad_norm": 0.28933829069137573, "learning_rate": 4.99917820945936e-07, "loss": 0.2706, "step": 42826 }, { "epoch": 4.354107360715738, "grad_norm": 0.28947508335113525, "learning_rate": 4.997631531125824e-07, "loss": 0.2777, "step": 42827 }, { "epoch": 4.354209028060187, "grad_norm": 0.2902066707611084, "learning_rate": 4.996085079503532e-07, "loss": 0.302, "step": 42828 }, { "epoch": 4.354310695404636, "grad_norm": 0.29828912019729614, "learning_rate": 4.994538854600284e-07, "loss": 0.2762, "step": 42829 }, { "epoch": 4.354412362749085, "grad_norm": 0.2807953655719757, "learning_rate": 4.992992856423839e-07, "loss": 0.2936, "step": 42830 }, { "epoch": 4.354514030093534, "grad_norm": 0.2747003436088562, "learning_rate": 4.991447084982004e-07, "loss": 0.2931, "step": 42831 }, { "epoch": 4.354615697437983, "grad_norm": 0.2626899182796478, "learning_rate": 4.98990154028256e-07, "loss": 0.3159, "step": 42832 }, { "epoch": 4.354717364782432, "grad_norm": 0.30706557631492615, "learning_rate": 4.988356222333302e-07, "loss": 0.2716, "step": 42833 }, { "epoch": 4.354819032126881, "grad_norm": 0.2783760726451874, "learning_rate": 4.986811131142011e-07, "loss": 0.3216, "step": 42834 }, { "epoch": 4.35492069947133, "grad_norm": 0.28808334469795227, "learning_rate": 4.98526626671646e-07, "loss": 0.3129, "step": 42835 }, { "epoch": 4.355022366815779, "grad_norm": 0.29338711500167847, "learning_rate": 4.983721629064453e-07, "loss": 0.2781, "step": 42836 }, { "epoch": 4.3551240341602275, "grad_norm": 0.2661798596382141, "learning_rate": 4.982177218193745e-07, "loss": 0.2963, "step": 42837 }, { "epoch": 4.3552257015046765, "grad_norm": 0.2620159685611725, "learning_rate": 4.980633034112131e-07, "loss": 0.272, "step": 42838 }, { "epoch": 4.355327368849125, "grad_norm": 0.27505043148994446, "learning_rate": 4.979089076827409e-07, "loss": 0.3097, "step": 42839 }, { "epoch": 4.355429036193574, "grad_norm": 0.269123375415802, "learning_rate": 4.977545346347318e-07, "loss": 0.2836, "step": 42840 }, { "epoch": 4.355530703538023, "grad_norm": 0.2857297956943512, "learning_rate": 4.976001842679662e-07, "loss": 0.2788, "step": 42841 }, { "epoch": 4.355632370882472, "grad_norm": 0.2773151993751526, "learning_rate": 4.974458565832207e-07, "loss": 0.3215, "step": 42842 }, { "epoch": 4.355734038226921, "grad_norm": 0.2884732484817505, "learning_rate": 4.972915515812726e-07, "loss": 0.2993, "step": 42843 }, { "epoch": 4.35583570557137, "grad_norm": 0.3047146201133728, "learning_rate": 4.971372692629e-07, "loss": 0.2789, "step": 42844 }, { "epoch": 4.355937372915819, "grad_norm": 0.28373709321022034, "learning_rate": 4.969830096288797e-07, "loss": 0.3062, "step": 42845 }, { "epoch": 4.356039040260268, "grad_norm": 0.2818439304828644, "learning_rate": 4.968287726799903e-07, "loss": 0.2988, "step": 42846 }, { "epoch": 4.356140707604718, "grad_norm": 0.2933461368083954, "learning_rate": 4.966745584170057e-07, "loss": 0.2973, "step": 42847 }, { "epoch": 4.356242374949167, "grad_norm": 0.28546297550201416, "learning_rate": 4.965203668407042e-07, "loss": 0.2953, "step": 42848 }, { "epoch": 4.356344042293616, "grad_norm": 0.29632383584976196, "learning_rate": 4.963661979518647e-07, "loss": 0.322, "step": 42849 }, { "epoch": 4.356445709638065, "grad_norm": 0.27304205298423767, "learning_rate": 4.962120517512608e-07, "loss": 0.2904, "step": 42850 }, { "epoch": 4.356547376982514, "grad_norm": 0.2714203894138336, "learning_rate": 4.96057928239671e-07, "loss": 0.2912, "step": 42851 }, { "epoch": 4.3566490443269625, "grad_norm": 0.29175683856010437, "learning_rate": 4.959038274178707e-07, "loss": 0.2863, "step": 42852 }, { "epoch": 4.3567507116714115, "grad_norm": 0.2724783718585968, "learning_rate": 4.957497492866359e-07, "loss": 0.2513, "step": 42853 }, { "epoch": 4.35685237901586, "grad_norm": 0.30260002613067627, "learning_rate": 4.955956938467443e-07, "loss": 0.2748, "step": 42854 }, { "epoch": 4.356954046360309, "grad_norm": 0.2809407413005829, "learning_rate": 4.954416610989704e-07, "loss": 0.3269, "step": 42855 }, { "epoch": 4.357055713704758, "grad_norm": 0.27888697385787964, "learning_rate": 4.95287651044093e-07, "loss": 0.2981, "step": 42856 }, { "epoch": 4.357157381049207, "grad_norm": 0.2581183910369873, "learning_rate": 4.951336636828841e-07, "loss": 0.3007, "step": 42857 }, { "epoch": 4.357259048393656, "grad_norm": 0.27200570702552795, "learning_rate": 4.94979699016121e-07, "loss": 0.2836, "step": 42858 }, { "epoch": 4.357360715738105, "grad_norm": 0.29648423194885254, "learning_rate": 4.948257570445813e-07, "loss": 0.258, "step": 42859 }, { "epoch": 4.357462383082554, "grad_norm": 0.2817307710647583, "learning_rate": 4.946718377690374e-07, "loss": 0.298, "step": 42860 }, { "epoch": 4.357564050427003, "grad_norm": 0.2848965525627136, "learning_rate": 4.945179411902667e-07, "loss": 0.303, "step": 42861 }, { "epoch": 4.357665717771452, "grad_norm": 0.27127042412757874, "learning_rate": 4.943640673090444e-07, "loss": 0.2823, "step": 42862 }, { "epoch": 4.357767385115901, "grad_norm": 0.2886623740196228, "learning_rate": 4.942102161261441e-07, "loss": 0.3477, "step": 42863 }, { "epoch": 4.35786905246035, "grad_norm": 0.294035404920578, "learning_rate": 4.940563876423426e-07, "loss": 0.2916, "step": 42864 }, { "epoch": 4.357970719804799, "grad_norm": 0.2634471654891968, "learning_rate": 4.939025818584147e-07, "loss": 0.2678, "step": 42865 }, { "epoch": 4.358072387149248, "grad_norm": 0.2660760283470154, "learning_rate": 4.937487987751349e-07, "loss": 0.3111, "step": 42866 }, { "epoch": 4.358174054493697, "grad_norm": 0.27426037192344666, "learning_rate": 4.935950383932764e-07, "loss": 0.2593, "step": 42867 }, { "epoch": 4.358275721838146, "grad_norm": 0.2856598198413849, "learning_rate": 4.934413007136152e-07, "loss": 0.2472, "step": 42868 }, { "epoch": 4.3583773891825945, "grad_norm": 0.27871909737586975, "learning_rate": 4.93287585736928e-07, "loss": 0.3241, "step": 42869 }, { "epoch": 4.3584790565270435, "grad_norm": 0.2590879201889038, "learning_rate": 4.931338934639851e-07, "loss": 0.279, "step": 42870 }, { "epoch": 4.358580723871492, "grad_norm": 0.2569518983364105, "learning_rate": 4.929802238955633e-07, "loss": 0.314, "step": 42871 }, { "epoch": 4.358682391215941, "grad_norm": 0.2723678648471832, "learning_rate": 4.928265770324364e-07, "loss": 0.2844, "step": 42872 }, { "epoch": 4.35878405856039, "grad_norm": 0.268146812915802, "learning_rate": 4.926729528753771e-07, "loss": 0.2952, "step": 42873 }, { "epoch": 4.358885725904839, "grad_norm": 0.2773364186286926, "learning_rate": 4.925193514251608e-07, "loss": 0.2961, "step": 42874 }, { "epoch": 4.358987393249288, "grad_norm": 0.2787374258041382, "learning_rate": 4.923657726825615e-07, "loss": 0.2754, "step": 42875 }, { "epoch": 4.359089060593737, "grad_norm": 0.27182236313819885, "learning_rate": 4.922122166483522e-07, "loss": 0.277, "step": 42876 }, { "epoch": 4.359190727938186, "grad_norm": 0.26568877696990967, "learning_rate": 4.920586833233054e-07, "loss": 0.2932, "step": 42877 }, { "epoch": 4.359292395282635, "grad_norm": 0.26741600036621094, "learning_rate": 4.91905172708197e-07, "loss": 0.2953, "step": 42878 }, { "epoch": 4.359394062627084, "grad_norm": 0.2658529281616211, "learning_rate": 4.917516848037984e-07, "loss": 0.2756, "step": 42879 }, { "epoch": 4.359495729971533, "grad_norm": 0.2912624180316925, "learning_rate": 4.915982196108832e-07, "loss": 0.2776, "step": 42880 }, { "epoch": 4.359597397315982, "grad_norm": 0.2723490297794342, "learning_rate": 4.914447771302249e-07, "loss": 0.28, "step": 42881 }, { "epoch": 4.359699064660431, "grad_norm": 0.2753726840019226, "learning_rate": 4.912913573625972e-07, "loss": 0.2819, "step": 42882 }, { "epoch": 4.35980073200488, "grad_norm": 0.28684282302856445, "learning_rate": 4.911379603087713e-07, "loss": 0.2995, "step": 42883 }, { "epoch": 4.359902399349329, "grad_norm": 0.2711579501628876, "learning_rate": 4.909845859695211e-07, "loss": 0.2691, "step": 42884 }, { "epoch": 4.360004066693778, "grad_norm": 0.2783123850822449, "learning_rate": 4.908312343456201e-07, "loss": 0.3244, "step": 42885 }, { "epoch": 4.3601057340382265, "grad_norm": 0.27607494592666626, "learning_rate": 4.906779054378391e-07, "loss": 0.3136, "step": 42886 }, { "epoch": 4.3602074013826755, "grad_norm": 0.2638660967350006, "learning_rate": 4.905245992469504e-07, "loss": 0.318, "step": 42887 }, { "epoch": 4.360309068727124, "grad_norm": 0.29512059688568115, "learning_rate": 4.903713157737278e-07, "loss": 0.2882, "step": 42888 }, { "epoch": 4.360410736071573, "grad_norm": 0.26755568385124207, "learning_rate": 4.902180550189434e-07, "loss": 0.2718, "step": 42889 }, { "epoch": 4.360512403416022, "grad_norm": 0.2913511097431183, "learning_rate": 4.900648169833677e-07, "loss": 0.2691, "step": 42890 }, { "epoch": 4.360614070760472, "grad_norm": 0.2779591977596283, "learning_rate": 4.899116016677741e-07, "loss": 0.2721, "step": 42891 }, { "epoch": 4.360715738104921, "grad_norm": 0.26637980341911316, "learning_rate": 4.897584090729346e-07, "loss": 0.2776, "step": 42892 }, { "epoch": 4.36081740544937, "grad_norm": 0.2653888761997223, "learning_rate": 4.896052391996198e-07, "loss": 0.2736, "step": 42893 }, { "epoch": 4.360919072793819, "grad_norm": 0.30425071716308594, "learning_rate": 4.894520920486023e-07, "loss": 0.2705, "step": 42894 }, { "epoch": 4.361020740138268, "grad_norm": 0.3002108335494995, "learning_rate": 4.892989676206533e-07, "loss": 0.2805, "step": 42895 }, { "epoch": 4.361122407482717, "grad_norm": 0.28825366497039795, "learning_rate": 4.891458659165449e-07, "loss": 0.2815, "step": 42896 }, { "epoch": 4.361224074827166, "grad_norm": 0.26253676414489746, "learning_rate": 4.889927869370458e-07, "loss": 0.2832, "step": 42897 }, { "epoch": 4.361325742171615, "grad_norm": 0.2724214792251587, "learning_rate": 4.888397306829307e-07, "loss": 0.309, "step": 42898 }, { "epoch": 4.361427409516064, "grad_norm": 0.2870422601699829, "learning_rate": 4.886866971549686e-07, "loss": 0.3349, "step": 42899 }, { "epoch": 4.361529076860513, "grad_norm": 0.2917724847793579, "learning_rate": 4.885336863539303e-07, "loss": 0.2893, "step": 42900 }, { "epoch": 4.3616307442049616, "grad_norm": 0.2704911231994629, "learning_rate": 4.883806982805877e-07, "loss": 0.2888, "step": 42901 }, { "epoch": 4.3617324115494105, "grad_norm": 0.29847803711891174, "learning_rate": 4.882277329357115e-07, "loss": 0.2592, "step": 42902 }, { "epoch": 4.361834078893859, "grad_norm": 0.29473406076431274, "learning_rate": 4.880747903200717e-07, "loss": 0.304, "step": 42903 }, { "epoch": 4.361935746238308, "grad_norm": 0.28036293387413025, "learning_rate": 4.87921870434438e-07, "loss": 0.2899, "step": 42904 }, { "epoch": 4.362037413582757, "grad_norm": 0.28353261947631836, "learning_rate": 4.877689732795821e-07, "loss": 0.2857, "step": 42905 }, { "epoch": 4.362139080927206, "grad_norm": 0.29069676995277405, "learning_rate": 4.876160988562744e-07, "loss": 0.2892, "step": 42906 }, { "epoch": 4.362240748271655, "grad_norm": 0.25863856077194214, "learning_rate": 4.874632471652835e-07, "loss": 0.2778, "step": 42907 }, { "epoch": 4.362342415616104, "grad_norm": 0.2824741303920746, "learning_rate": 4.873104182073806e-07, "loss": 0.283, "step": 42908 }, { "epoch": 4.362444082960553, "grad_norm": 0.27904418110847473, "learning_rate": 4.871576119833366e-07, "loss": 0.2783, "step": 42909 }, { "epoch": 4.362545750305002, "grad_norm": 0.31384822726249695, "learning_rate": 4.870048284939182e-07, "loss": 0.2847, "step": 42910 }, { "epoch": 4.362647417649451, "grad_norm": 0.29754889011383057, "learning_rate": 4.868520677398986e-07, "loss": 0.2808, "step": 42911 }, { "epoch": 4.3627490849939, "grad_norm": 0.2859657406806946, "learning_rate": 4.866993297220457e-07, "loss": 0.2963, "step": 42912 }, { "epoch": 4.362850752338349, "grad_norm": 0.27816009521484375, "learning_rate": 4.865466144411291e-07, "loss": 0.3021, "step": 42913 }, { "epoch": 4.362952419682798, "grad_norm": 0.2811945080757141, "learning_rate": 4.863939218979168e-07, "loss": 0.3027, "step": 42914 }, { "epoch": 4.363054087027247, "grad_norm": 0.26914915442466736, "learning_rate": 4.862412520931803e-07, "loss": 0.3064, "step": 42915 }, { "epoch": 4.363155754371696, "grad_norm": 0.3027014136314392, "learning_rate": 4.860886050276881e-07, "loss": 0.3048, "step": 42916 }, { "epoch": 4.363257421716145, "grad_norm": 0.2834305763244629, "learning_rate": 4.859359807022079e-07, "loss": 0.3093, "step": 42917 }, { "epoch": 4.3633590890605936, "grad_norm": 0.2854263186454773, "learning_rate": 4.857833791175099e-07, "loss": 0.2799, "step": 42918 }, { "epoch": 4.3634607564050425, "grad_norm": 0.3079811930656433, "learning_rate": 4.856308002743632e-07, "loss": 0.2447, "step": 42919 }, { "epoch": 4.363562423749491, "grad_norm": 0.28731992840766907, "learning_rate": 4.854782441735345e-07, "loss": 0.3035, "step": 42920 }, { "epoch": 4.36366409109394, "grad_norm": 0.27002179622650146, "learning_rate": 4.853257108157944e-07, "loss": 0.3102, "step": 42921 }, { "epoch": 4.363765758438389, "grad_norm": 0.28469106554985046, "learning_rate": 4.851732002019105e-07, "loss": 0.2896, "step": 42922 }, { "epoch": 4.363867425782838, "grad_norm": 0.28118154406547546, "learning_rate": 4.850207123326511e-07, "loss": 0.2897, "step": 42923 }, { "epoch": 4.363969093127287, "grad_norm": 0.3042384684085846, "learning_rate": 4.84868247208784e-07, "loss": 0.276, "step": 42924 }, { "epoch": 4.364070760471736, "grad_norm": 0.2592112720012665, "learning_rate": 4.84715804831078e-07, "loss": 0.2696, "step": 42925 }, { "epoch": 4.364172427816185, "grad_norm": 0.2704911231994629, "learning_rate": 4.845633852003012e-07, "loss": 0.2879, "step": 42926 }, { "epoch": 4.364274095160634, "grad_norm": 0.27165690064430237, "learning_rate": 4.844109883172199e-07, "loss": 0.2736, "step": 42927 }, { "epoch": 4.364375762505083, "grad_norm": 0.26914945244789124, "learning_rate": 4.842586141826039e-07, "loss": 0.3001, "step": 42928 }, { "epoch": 4.364477429849533, "grad_norm": 0.2834987938404083, "learning_rate": 4.841062627972204e-07, "loss": 0.2993, "step": 42929 }, { "epoch": 4.364579097193982, "grad_norm": 0.2910556495189667, "learning_rate": 4.839539341618349e-07, "loss": 0.3193, "step": 42930 }, { "epoch": 4.364680764538431, "grad_norm": 0.29723191261291504, "learning_rate": 4.838016282772174e-07, "loss": 0.2846, "step": 42931 }, { "epoch": 4.36478243188288, "grad_norm": 0.28848445415496826, "learning_rate": 4.836493451441337e-07, "loss": 0.2899, "step": 42932 }, { "epoch": 4.364884099227329, "grad_norm": 0.2893695831298828, "learning_rate": 4.834970847633519e-07, "loss": 0.3067, "step": 42933 }, { "epoch": 4.3649857665717775, "grad_norm": 0.27098172903060913, "learning_rate": 4.833448471356372e-07, "loss": 0.304, "step": 42934 }, { "epoch": 4.3650874339162264, "grad_norm": 0.28148937225341797, "learning_rate": 4.831926322617592e-07, "loss": 0.3222, "step": 42935 }, { "epoch": 4.365189101260675, "grad_norm": 0.2805595397949219, "learning_rate": 4.830404401424831e-07, "loss": 0.2932, "step": 42936 }, { "epoch": 4.365290768605124, "grad_norm": 0.25536048412323, "learning_rate": 4.828882707785748e-07, "loss": 0.3136, "step": 42937 }, { "epoch": 4.365392435949573, "grad_norm": 0.2705421447753906, "learning_rate": 4.827361241708034e-07, "loss": 0.3179, "step": 42938 }, { "epoch": 4.365494103294022, "grad_norm": 0.3009936213493347, "learning_rate": 4.825840003199334e-07, "loss": 0.3018, "step": 42939 }, { "epoch": 4.365595770638471, "grad_norm": 0.28632551431655884, "learning_rate": 4.824318992267307e-07, "loss": 0.2717, "step": 42940 }, { "epoch": 4.36569743798292, "grad_norm": 0.27556928992271423, "learning_rate": 4.822798208919638e-07, "loss": 0.3071, "step": 42941 }, { "epoch": 4.365799105327369, "grad_norm": 0.2663190960884094, "learning_rate": 4.821277653163974e-07, "loss": 0.2881, "step": 42942 }, { "epoch": 4.365900772671818, "grad_norm": 0.28612020611763, "learning_rate": 4.819757325007979e-07, "loss": 0.2981, "step": 42943 }, { "epoch": 4.366002440016267, "grad_norm": 0.2890884280204773, "learning_rate": 4.818237224459299e-07, "loss": 0.3067, "step": 42944 }, { "epoch": 4.366104107360716, "grad_norm": 0.2845425307750702, "learning_rate": 4.816717351525618e-07, "loss": 0.2841, "step": 42945 }, { "epoch": 4.366205774705165, "grad_norm": 0.26875853538513184, "learning_rate": 4.81519770621457e-07, "loss": 0.2766, "step": 42946 }, { "epoch": 4.366307442049614, "grad_norm": 0.259609192609787, "learning_rate": 4.813678288533813e-07, "loss": 0.3003, "step": 42947 }, { "epoch": 4.366409109394063, "grad_norm": 0.2721792161464691, "learning_rate": 4.812159098491015e-07, "loss": 0.3056, "step": 42948 }, { "epoch": 4.366510776738512, "grad_norm": 0.27948662638664246, "learning_rate": 4.810640136093824e-07, "loss": 0.3341, "step": 42949 }, { "epoch": 4.366612444082961, "grad_norm": 0.27651792764663696, "learning_rate": 4.809121401349875e-07, "loss": 0.3311, "step": 42950 }, { "epoch": 4.3667141114274095, "grad_norm": 0.2860111594200134, "learning_rate": 4.80760289426685e-07, "loss": 0.2852, "step": 42951 }, { "epoch": 4.3668157787718584, "grad_norm": 0.27588409185409546, "learning_rate": 4.806084614852374e-07, "loss": 0.2775, "step": 42952 }, { "epoch": 4.366917446116307, "grad_norm": 0.28209763765335083, "learning_rate": 4.80456656311411e-07, "loss": 0.3011, "step": 42953 }, { "epoch": 4.367019113460756, "grad_norm": 0.29677191376686096, "learning_rate": 4.803048739059685e-07, "loss": 0.3227, "step": 42954 }, { "epoch": 4.367120780805205, "grad_norm": 0.27103081345558167, "learning_rate": 4.801531142696774e-07, "loss": 0.3266, "step": 42955 }, { "epoch": 4.367222448149654, "grad_norm": 0.27232638001441956, "learning_rate": 4.800013774033007e-07, "loss": 0.3065, "step": 42956 }, { "epoch": 4.367324115494103, "grad_norm": 0.28599074482917786, "learning_rate": 4.798496633076016e-07, "loss": 0.3063, "step": 42957 }, { "epoch": 4.367425782838552, "grad_norm": 0.25827690958976746, "learning_rate": 4.796979719833484e-07, "loss": 0.2816, "step": 42958 }, { "epoch": 4.367527450183001, "grad_norm": 0.2968864142894745, "learning_rate": 4.795463034313002e-07, "loss": 0.3001, "step": 42959 }, { "epoch": 4.36762911752745, "grad_norm": 0.29233649373054504, "learning_rate": 4.793946576522235e-07, "loss": 0.3389, "step": 42960 }, { "epoch": 4.367730784871899, "grad_norm": 0.28171271085739136, "learning_rate": 4.792430346468834e-07, "loss": 0.3522, "step": 42961 }, { "epoch": 4.367832452216348, "grad_norm": 0.27607157826423645, "learning_rate": 4.790914344160424e-07, "loss": 0.3146, "step": 42962 }, { "epoch": 4.367934119560797, "grad_norm": 0.2787458002567291, "learning_rate": 4.789398569604648e-07, "loss": 0.2758, "step": 42963 }, { "epoch": 4.368035786905246, "grad_norm": 0.26939788460731506, "learning_rate": 4.787883022809125e-07, "loss": 0.287, "step": 42964 }, { "epoch": 4.368137454249695, "grad_norm": 0.2625114917755127, "learning_rate": 4.786367703781513e-07, "loss": 0.3019, "step": 42965 }, { "epoch": 4.368239121594144, "grad_norm": 0.28215086460113525, "learning_rate": 4.784852612529439e-07, "loss": 0.2664, "step": 42966 }, { "epoch": 4.368340788938593, "grad_norm": 0.3056643307209015, "learning_rate": 4.783337749060518e-07, "loss": 0.262, "step": 42967 }, { "epoch": 4.3684424562830415, "grad_norm": 0.2531771659851074, "learning_rate": 4.781823113382422e-07, "loss": 0.312, "step": 42968 }, { "epoch": 4.3685441236274904, "grad_norm": 0.2981230318546295, "learning_rate": 4.780308705502735e-07, "loss": 0.2855, "step": 42969 }, { "epoch": 4.368645790971939, "grad_norm": 0.29852646589279175, "learning_rate": 4.778794525429103e-07, "loss": 0.3071, "step": 42970 }, { "epoch": 4.368747458316388, "grad_norm": 0.28241661190986633, "learning_rate": 4.777280573169179e-07, "loss": 0.3033, "step": 42971 }, { "epoch": 4.368849125660837, "grad_norm": 0.2878444790840149, "learning_rate": 4.775766848730551e-07, "loss": 0.2931, "step": 42972 }, { "epoch": 4.368950793005287, "grad_norm": 0.2842995226383209, "learning_rate": 4.774253352120872e-07, "loss": 0.2648, "step": 42973 }, { "epoch": 4.369052460349736, "grad_norm": 0.28426626324653625, "learning_rate": 4.772740083347743e-07, "loss": 0.2829, "step": 42974 }, { "epoch": 4.369154127694185, "grad_norm": 0.2812940776348114, "learning_rate": 4.771227042418819e-07, "loss": 0.3156, "step": 42975 }, { "epoch": 4.369255795038634, "grad_norm": 0.2703404724597931, "learning_rate": 4.769714229341699e-07, "loss": 0.3099, "step": 42976 }, { "epoch": 4.369357462383083, "grad_norm": 0.2616594433784485, "learning_rate": 4.768201644124004e-07, "loss": 0.2875, "step": 42977 }, { "epoch": 4.369459129727532, "grad_norm": 0.28565874695777893, "learning_rate": 4.7666892867733796e-07, "loss": 0.2828, "step": 42978 }, { "epoch": 4.369560797071981, "grad_norm": 0.28696975111961365, "learning_rate": 4.765177157297407e-07, "loss": 0.3137, "step": 42979 }, { "epoch": 4.36966246441643, "grad_norm": 0.2976797819137573, "learning_rate": 4.76366525570372e-07, "loss": 0.2883, "step": 42980 }, { "epoch": 4.369764131760879, "grad_norm": 0.2653844654560089, "learning_rate": 4.7621535819999566e-07, "loss": 0.3081, "step": 42981 }, { "epoch": 4.369865799105328, "grad_norm": 0.29410144686698914, "learning_rate": 4.7606421361936936e-07, "loss": 0.3163, "step": 42982 }, { "epoch": 4.3699674664497765, "grad_norm": 0.2542138695716858, "learning_rate": 4.7591309182925814e-07, "loss": 0.2949, "step": 42983 }, { "epoch": 4.3700691337942255, "grad_norm": 0.2649993896484375, "learning_rate": 4.7576199283042024e-07, "loss": 0.2977, "step": 42984 }, { "epoch": 4.370170801138674, "grad_norm": 0.2886767089366913, "learning_rate": 4.756109166236189e-07, "loss": 0.2872, "step": 42985 }, { "epoch": 4.370272468483123, "grad_norm": 0.3082249164581299, "learning_rate": 4.754598632096152e-07, "loss": 0.2927, "step": 42986 }, { "epoch": 4.370374135827572, "grad_norm": 0.2867373526096344, "learning_rate": 4.7530883258916804e-07, "loss": 0.2831, "step": 42987 }, { "epoch": 4.370475803172021, "grad_norm": 0.26946377754211426, "learning_rate": 4.751578247630417e-07, "loss": 0.2971, "step": 42988 }, { "epoch": 4.37057747051647, "grad_norm": 0.31022053956985474, "learning_rate": 4.7500683973199347e-07, "loss": 0.3032, "step": 42989 }, { "epoch": 4.370679137860919, "grad_norm": 0.25942033529281616, "learning_rate": 4.7485587749678544e-07, "loss": 0.3121, "step": 42990 }, { "epoch": 4.370780805205368, "grad_norm": 0.27239006757736206, "learning_rate": 4.7470493805817973e-07, "loss": 0.287, "step": 42991 }, { "epoch": 4.370882472549817, "grad_norm": 0.27065297961235046, "learning_rate": 4.7455402141693365e-07, "loss": 0.2908, "step": 42992 }, { "epoch": 4.370984139894266, "grad_norm": 0.28277289867401123, "learning_rate": 4.744031275738098e-07, "loss": 0.2663, "step": 42993 }, { "epoch": 4.371085807238715, "grad_norm": 0.27383437752723694, "learning_rate": 4.742522565295671e-07, "loss": 0.3006, "step": 42994 }, { "epoch": 4.371187474583164, "grad_norm": 0.28761574625968933, "learning_rate": 4.741014082849654e-07, "loss": 0.2923, "step": 42995 }, { "epoch": 4.371289141927613, "grad_norm": 0.2686692774295807, "learning_rate": 4.7395058284076646e-07, "loss": 0.29, "step": 42996 }, { "epoch": 4.371390809272062, "grad_norm": 0.2761074900627136, "learning_rate": 4.737997801977279e-07, "loss": 0.3279, "step": 42997 }, { "epoch": 4.371492476616511, "grad_norm": 0.29709574580192566, "learning_rate": 4.7364900035661245e-07, "loss": 0.2945, "step": 42998 }, { "epoch": 4.37159414396096, "grad_norm": 0.27704209089279175, "learning_rate": 4.734982433181756e-07, "loss": 0.2962, "step": 42999 }, { "epoch": 4.3716958113054085, "grad_norm": 0.251335084438324, "learning_rate": 4.7334750908317906e-07, "loss": 0.279, "step": 43000 }, { "epoch": 4.3717974786498575, "grad_norm": 0.28042230010032654, "learning_rate": 4.731967976523838e-07, "loss": 0.2575, "step": 43001 }, { "epoch": 4.371899145994306, "grad_norm": 0.27529993653297424, "learning_rate": 4.7304610902654536e-07, "loss": 0.2731, "step": 43002 }, { "epoch": 4.372000813338755, "grad_norm": 0.2855452597141266, "learning_rate": 4.728954432064259e-07, "loss": 0.2699, "step": 43003 }, { "epoch": 4.372102480683204, "grad_norm": 0.2908581793308258, "learning_rate": 4.7274480019278367e-07, "loss": 0.2737, "step": 43004 }, { "epoch": 4.372204148027653, "grad_norm": 0.2950972318649292, "learning_rate": 4.725941799863765e-07, "loss": 0.306, "step": 43005 }, { "epoch": 4.372305815372102, "grad_norm": 0.2683178782463074, "learning_rate": 4.724435825879642e-07, "loss": 0.2966, "step": 43006 }, { "epoch": 4.372407482716551, "grad_norm": 0.2950837016105652, "learning_rate": 4.7229300799830457e-07, "loss": 0.3011, "step": 43007 }, { "epoch": 4.372509150061, "grad_norm": 0.2912951111793518, "learning_rate": 4.721424562181587e-07, "loss": 0.3026, "step": 43008 }, { "epoch": 4.372610817405449, "grad_norm": 0.27398377656936646, "learning_rate": 4.719919272482815e-07, "loss": 0.3236, "step": 43009 }, { "epoch": 4.372712484749898, "grad_norm": 0.28225767612457275, "learning_rate": 4.7184142108943285e-07, "loss": 0.2846, "step": 43010 }, { "epoch": 4.372814152094348, "grad_norm": 0.26949411630630493, "learning_rate": 4.7169093774237284e-07, "loss": 0.2878, "step": 43011 }, { "epoch": 4.372915819438797, "grad_norm": 0.2782157361507416, "learning_rate": 4.7154047720785636e-07, "loss": 0.3028, "step": 43012 }, { "epoch": 4.373017486783246, "grad_norm": 0.29863232374191284, "learning_rate": 4.713900394866433e-07, "loss": 0.266, "step": 43013 }, { "epoch": 4.373119154127695, "grad_norm": 0.300262987613678, "learning_rate": 4.712396245794909e-07, "loss": 0.2919, "step": 43014 }, { "epoch": 4.3732208214721435, "grad_norm": 0.27452775835990906, "learning_rate": 4.710892324871563e-07, "loss": 0.3056, "step": 43015 }, { "epoch": 4.3733224888165925, "grad_norm": 0.27634650468826294, "learning_rate": 4.709388632103989e-07, "loss": 0.2878, "step": 43016 }, { "epoch": 4.373424156161041, "grad_norm": 0.26923972368240356, "learning_rate": 4.7078851674997537e-07, "loss": 0.2763, "step": 43017 }, { "epoch": 4.37352582350549, "grad_norm": 0.2788388431072235, "learning_rate": 4.706381931066428e-07, "loss": 0.2876, "step": 43018 }, { "epoch": 4.373627490849939, "grad_norm": 0.2741512358188629, "learning_rate": 4.7048789228115787e-07, "loss": 0.3183, "step": 43019 }, { "epoch": 4.373729158194388, "grad_norm": 0.29372715950012207, "learning_rate": 4.7033761427427826e-07, "loss": 0.2673, "step": 43020 }, { "epoch": 4.373830825538837, "grad_norm": 0.2866983115673065, "learning_rate": 4.701873590867639e-07, "loss": 0.2727, "step": 43021 }, { "epoch": 4.373932492883286, "grad_norm": 0.30194932222366333, "learning_rate": 4.700371267193665e-07, "loss": 0.3168, "step": 43022 }, { "epoch": 4.374034160227735, "grad_norm": 0.29167160391807556, "learning_rate": 4.6988691717284705e-07, "loss": 0.2802, "step": 43023 }, { "epoch": 4.374135827572184, "grad_norm": 0.3024417757987976, "learning_rate": 4.6973673044796054e-07, "loss": 0.2831, "step": 43024 }, { "epoch": 4.374237494916633, "grad_norm": 0.2959505021572113, "learning_rate": 4.6958656654546296e-07, "loss": 0.3118, "step": 43025 }, { "epoch": 4.374339162261082, "grad_norm": 0.2529774010181427, "learning_rate": 4.694364254661127e-07, "loss": 0.2911, "step": 43026 }, { "epoch": 4.374440829605531, "grad_norm": 0.2907043695449829, "learning_rate": 4.6928630721066513e-07, "loss": 0.3055, "step": 43027 }, { "epoch": 4.37454249694998, "grad_norm": 0.2958321273326874, "learning_rate": 4.691362117798759e-07, "loss": 0.3113, "step": 43028 }, { "epoch": 4.374644164294429, "grad_norm": 0.27440136671066284, "learning_rate": 4.6898613917450154e-07, "loss": 0.2995, "step": 43029 }, { "epoch": 4.374745831638878, "grad_norm": 0.28578639030456543, "learning_rate": 4.688360893952987e-07, "loss": 0.2849, "step": 43030 }, { "epoch": 4.374847498983327, "grad_norm": 0.28732770681381226, "learning_rate": 4.686860624430234e-07, "loss": 0.2799, "step": 43031 }, { "epoch": 4.3749491663277755, "grad_norm": 0.28533831238746643, "learning_rate": 4.685360583184295e-07, "loss": 0.2958, "step": 43032 }, { "epoch": 4.3750508336722245, "grad_norm": 0.28919893503189087, "learning_rate": 4.6838607702227536e-07, "loss": 0.2806, "step": 43033 }, { "epoch": 4.375152501016673, "grad_norm": 0.2718283236026764, "learning_rate": 4.6823611855531524e-07, "loss": 0.3191, "step": 43034 }, { "epoch": 4.375254168361122, "grad_norm": 0.27305594086647034, "learning_rate": 4.680861829183031e-07, "loss": 0.309, "step": 43035 }, { "epoch": 4.375355835705571, "grad_norm": 0.27531859278678894, "learning_rate": 4.6793627011199773e-07, "loss": 0.3043, "step": 43036 }, { "epoch": 4.37545750305002, "grad_norm": 0.28531426191329956, "learning_rate": 4.677863801371518e-07, "loss": 0.3334, "step": 43037 }, { "epoch": 4.375559170394469, "grad_norm": 0.2736605107784271, "learning_rate": 4.6763651299452094e-07, "loss": 0.3086, "step": 43038 }, { "epoch": 4.375660837738918, "grad_norm": 0.2934141159057617, "learning_rate": 4.6748666868486e-07, "loss": 0.2672, "step": 43039 }, { "epoch": 4.375762505083367, "grad_norm": 0.2904035747051239, "learning_rate": 4.673368472089246e-07, "loss": 0.3131, "step": 43040 }, { "epoch": 4.375864172427816, "grad_norm": 0.29089534282684326, "learning_rate": 4.67187048567469e-07, "loss": 0.2781, "step": 43041 }, { "epoch": 4.375965839772265, "grad_norm": 0.27994340658187866, "learning_rate": 4.6703727276124713e-07, "loss": 0.2539, "step": 43042 }, { "epoch": 4.376067507116714, "grad_norm": 0.3037230372428894, "learning_rate": 4.6688751979101497e-07, "loss": 0.32, "step": 43043 }, { "epoch": 4.376169174461163, "grad_norm": 0.2853336036205292, "learning_rate": 4.6673778965752646e-07, "loss": 0.3072, "step": 43044 }, { "epoch": 4.376270841805612, "grad_norm": 0.26974356174468994, "learning_rate": 4.6658808236153487e-07, "loss": 0.3307, "step": 43045 }, { "epoch": 4.376372509150061, "grad_norm": 0.2852981686592102, "learning_rate": 4.6643839790379617e-07, "loss": 0.308, "step": 43046 }, { "epoch": 4.37647417649451, "grad_norm": 0.28089290857315063, "learning_rate": 4.662887362850632e-07, "loss": 0.3105, "step": 43047 }, { "epoch": 4.376575843838959, "grad_norm": 0.28347790241241455, "learning_rate": 4.661390975060903e-07, "loss": 0.2977, "step": 43048 }, { "epoch": 4.3766775111834075, "grad_norm": 0.26381251215934753, "learning_rate": 4.659894815676308e-07, "loss": 0.2812, "step": 43049 }, { "epoch": 4.3767791785278565, "grad_norm": 0.2947556674480438, "learning_rate": 4.6583988847043904e-07, "loss": 0.2676, "step": 43050 }, { "epoch": 4.376880845872305, "grad_norm": 0.2825044095516205, "learning_rate": 4.656903182152689e-07, "loss": 0.3069, "step": 43051 }, { "epoch": 4.376982513216754, "grad_norm": 0.2644241154193878, "learning_rate": 4.6554077080287194e-07, "loss": 0.3195, "step": 43052 }, { "epoch": 4.377084180561203, "grad_norm": 0.26304954290390015, "learning_rate": 4.653912462340043e-07, "loss": 0.3108, "step": 43053 }, { "epoch": 4.377185847905652, "grad_norm": 0.2734878361225128, "learning_rate": 4.652417445094182e-07, "loss": 0.2807, "step": 43054 }, { "epoch": 4.377287515250102, "grad_norm": 0.2857818007469177, "learning_rate": 4.6509226562986567e-07, "loss": 0.3033, "step": 43055 }, { "epoch": 4.377389182594551, "grad_norm": 0.28783029317855835, "learning_rate": 4.649428095961012e-07, "loss": 0.2956, "step": 43056 }, { "epoch": 4.377490849939, "grad_norm": 0.26545631885528564, "learning_rate": 4.6479337640887745e-07, "loss": 0.2763, "step": 43057 }, { "epoch": 4.377592517283449, "grad_norm": 0.26230189204216003, "learning_rate": 4.646439660689467e-07, "loss": 0.2634, "step": 43058 }, { "epoch": 4.377694184627898, "grad_norm": 0.2803536355495453, "learning_rate": 4.6449457857706103e-07, "loss": 0.2813, "step": 43059 }, { "epoch": 4.377795851972347, "grad_norm": 0.2736068367958069, "learning_rate": 4.643452139339749e-07, "loss": 0.3342, "step": 43060 }, { "epoch": 4.377897519316796, "grad_norm": 0.27864232659339905, "learning_rate": 4.641958721404394e-07, "loss": 0.2833, "step": 43061 }, { "epoch": 4.377999186661245, "grad_norm": 0.2678375840187073, "learning_rate": 4.640465531972066e-07, "loss": 0.2987, "step": 43062 }, { "epoch": 4.378100854005694, "grad_norm": 0.28125178813934326, "learning_rate": 4.638972571050304e-07, "loss": 0.3259, "step": 43063 }, { "epoch": 4.3782025213501425, "grad_norm": 0.30537110567092896, "learning_rate": 4.6374798386466134e-07, "loss": 0.2675, "step": 43064 }, { "epoch": 4.3783041886945915, "grad_norm": 0.30347833037376404, "learning_rate": 4.635987334768516e-07, "loss": 0.3221, "step": 43065 }, { "epoch": 4.37840585603904, "grad_norm": 0.27749770879745483, "learning_rate": 4.6344950594235437e-07, "loss": 0.2766, "step": 43066 }, { "epoch": 4.378507523383489, "grad_norm": 0.2685014605522156, "learning_rate": 4.633003012619203e-07, "loss": 0.3362, "step": 43067 }, { "epoch": 4.378609190727938, "grad_norm": 0.28168293833732605, "learning_rate": 4.6315111943630085e-07, "loss": 0.2685, "step": 43068 }, { "epoch": 4.378710858072387, "grad_norm": 0.28899243474006653, "learning_rate": 4.6300196046624724e-07, "loss": 0.2931, "step": 43069 }, { "epoch": 4.378812525416836, "grad_norm": 0.2755078971385956, "learning_rate": 4.628528243525127e-07, "loss": 0.2762, "step": 43070 }, { "epoch": 4.378914192761285, "grad_norm": 0.27745321393013, "learning_rate": 4.6270371109584765e-07, "loss": 0.3238, "step": 43071 }, { "epoch": 4.379015860105734, "grad_norm": 0.2667054235935211, "learning_rate": 4.6255462069700165e-07, "loss": 0.2931, "step": 43072 }, { "epoch": 4.379117527450183, "grad_norm": 0.2871150076389313, "learning_rate": 4.624055531567284e-07, "loss": 0.3213, "step": 43073 }, { "epoch": 4.379219194794632, "grad_norm": 0.2630029022693634, "learning_rate": 4.62256508475778e-07, "loss": 0.2861, "step": 43074 }, { "epoch": 4.379320862139081, "grad_norm": 0.2752740979194641, "learning_rate": 4.621074866548997e-07, "loss": 0.2991, "step": 43075 }, { "epoch": 4.37942252948353, "grad_norm": 0.26985377073287964, "learning_rate": 4.619584876948463e-07, "loss": 0.3115, "step": 43076 }, { "epoch": 4.379524196827979, "grad_norm": 0.28207141160964966, "learning_rate": 4.618095115963678e-07, "loss": 0.2859, "step": 43077 }, { "epoch": 4.379625864172428, "grad_norm": 0.2879893183708191, "learning_rate": 4.616605583602141e-07, "loss": 0.2823, "step": 43078 }, { "epoch": 4.379727531516877, "grad_norm": 0.2829543948173523, "learning_rate": 4.6151162798713567e-07, "loss": 0.2972, "step": 43079 }, { "epoch": 4.379829198861326, "grad_norm": 0.2679949104785919, "learning_rate": 4.613627204778842e-07, "loss": 0.2959, "step": 43080 }, { "epoch": 4.3799308662057745, "grad_norm": 0.27868202328681946, "learning_rate": 4.61213835833208e-07, "loss": 0.3045, "step": 43081 }, { "epoch": 4.3800325335502235, "grad_norm": 0.27674201130867004, "learning_rate": 4.6106497405385755e-07, "loss": 0.2895, "step": 43082 }, { "epoch": 4.380134200894672, "grad_norm": 0.26814451813697815, "learning_rate": 4.609161351405844e-07, "loss": 0.3088, "step": 43083 }, { "epoch": 4.380235868239121, "grad_norm": 0.276071161031723, "learning_rate": 4.607673190941364e-07, "loss": 0.2655, "step": 43084 }, { "epoch": 4.38033753558357, "grad_norm": 0.28933462500572205, "learning_rate": 4.606185259152646e-07, "loss": 0.2927, "step": 43085 }, { "epoch": 4.380439202928019, "grad_norm": 0.3045348823070526, "learning_rate": 4.604697556047166e-07, "loss": 0.3063, "step": 43086 }, { "epoch": 4.380540870272468, "grad_norm": 0.25976797938346863, "learning_rate": 4.603210081632442e-07, "loss": 0.3501, "step": 43087 }, { "epoch": 4.380642537616917, "grad_norm": 0.3092544674873352, "learning_rate": 4.601722835915956e-07, "loss": 0.3033, "step": 43088 }, { "epoch": 4.380744204961366, "grad_norm": 0.2607897222042084, "learning_rate": 4.6002358189051965e-07, "loss": 0.3046, "step": 43089 }, { "epoch": 4.380845872305815, "grad_norm": 0.2839703857898712, "learning_rate": 4.598749030607669e-07, "loss": 0.2832, "step": 43090 }, { "epoch": 4.380947539650264, "grad_norm": 0.291260302066803, "learning_rate": 4.5972624710308554e-07, "loss": 0.3006, "step": 43091 }, { "epoch": 4.381049206994713, "grad_norm": 0.2773703634738922, "learning_rate": 4.5957761401822287e-07, "loss": 0.2965, "step": 43092 }, { "epoch": 4.381150874339163, "grad_norm": 0.27376794815063477, "learning_rate": 4.59429003806931e-07, "loss": 0.3051, "step": 43093 }, { "epoch": 4.381252541683612, "grad_norm": 0.2916826605796814, "learning_rate": 4.59280416469956e-07, "loss": 0.2891, "step": 43094 }, { "epoch": 4.381354209028061, "grad_norm": 0.2849583029747009, "learning_rate": 4.591318520080479e-07, "loss": 0.262, "step": 43095 }, { "epoch": 4.3814558763725096, "grad_norm": 0.2857908308506012, "learning_rate": 4.589833104219532e-07, "loss": 0.3195, "step": 43096 }, { "epoch": 4.3815575437169585, "grad_norm": 0.2703724801540375, "learning_rate": 4.588347917124225e-07, "loss": 0.2682, "step": 43097 }, { "epoch": 4.381659211061407, "grad_norm": 0.28128111362457275, "learning_rate": 4.5868629588020296e-07, "loss": 0.2919, "step": 43098 }, { "epoch": 4.381760878405856, "grad_norm": 0.2753068506717682, "learning_rate": 4.5853782292604176e-07, "loss": 0.3157, "step": 43099 }, { "epoch": 4.381862545750305, "grad_norm": 0.2691606879234314, "learning_rate": 4.583893728506883e-07, "loss": 0.2688, "step": 43100 }, { "epoch": 4.381964213094754, "grad_norm": 0.2895422577857971, "learning_rate": 4.582409456548903e-07, "loss": 0.2911, "step": 43101 }, { "epoch": 4.382065880439203, "grad_norm": 0.29744768142700195, "learning_rate": 4.580925413393944e-07, "loss": 0.2634, "step": 43102 }, { "epoch": 4.382167547783652, "grad_norm": 0.2516692280769348, "learning_rate": 4.5794415990494944e-07, "loss": 0.299, "step": 43103 }, { "epoch": 4.382269215128101, "grad_norm": 0.2988807260990143, "learning_rate": 4.577958013523032e-07, "loss": 0.3013, "step": 43104 }, { "epoch": 4.38237088247255, "grad_norm": 0.28801238536834717, "learning_rate": 4.5764746568220165e-07, "loss": 0.2981, "step": 43105 }, { "epoch": 4.382472549816999, "grad_norm": 0.28682810068130493, "learning_rate": 4.57499152895392e-07, "loss": 0.3061, "step": 43106 }, { "epoch": 4.382574217161448, "grad_norm": 0.27274712920188904, "learning_rate": 4.5735086299262267e-07, "loss": 0.3136, "step": 43107 }, { "epoch": 4.382675884505897, "grad_norm": 0.2857688367366791, "learning_rate": 4.5720259597464067e-07, "loss": 0.2848, "step": 43108 }, { "epoch": 4.382777551850346, "grad_norm": 0.2583145499229431, "learning_rate": 4.570543518421916e-07, "loss": 0.2817, "step": 43109 }, { "epoch": 4.382879219194795, "grad_norm": 0.28066954016685486, "learning_rate": 4.5690613059602487e-07, "loss": 0.2859, "step": 43110 }, { "epoch": 4.382980886539244, "grad_norm": 0.29381775856018066, "learning_rate": 4.567579322368837e-07, "loss": 0.3302, "step": 43111 }, { "epoch": 4.383082553883693, "grad_norm": 0.2992827296257019, "learning_rate": 4.566097567655159e-07, "loss": 0.2765, "step": 43112 }, { "epoch": 4.3831842212281416, "grad_norm": 0.27872776985168457, "learning_rate": 4.564616041826697e-07, "loss": 0.3152, "step": 43113 }, { "epoch": 4.3832858885725905, "grad_norm": 0.28065747022628784, "learning_rate": 4.563134744890907e-07, "loss": 0.2873, "step": 43114 }, { "epoch": 4.383387555917039, "grad_norm": 0.28358912467956543, "learning_rate": 4.561653676855243e-07, "loss": 0.2682, "step": 43115 }, { "epoch": 4.383489223261488, "grad_norm": 0.30858707427978516, "learning_rate": 4.5601728377271557e-07, "loss": 0.2662, "step": 43116 }, { "epoch": 4.383590890605937, "grad_norm": 0.29148954153060913, "learning_rate": 4.558692227514133e-07, "loss": 0.2796, "step": 43117 }, { "epoch": 4.383692557950386, "grad_norm": 0.27563175559043884, "learning_rate": 4.557211846223625e-07, "loss": 0.325, "step": 43118 }, { "epoch": 4.383794225294835, "grad_norm": 0.2899158298969269, "learning_rate": 4.555731693863069e-07, "loss": 0.2841, "step": 43119 }, { "epoch": 4.383895892639284, "grad_norm": 0.2696012854576111, "learning_rate": 4.55425177043996e-07, "loss": 0.2877, "step": 43120 }, { "epoch": 4.383997559983733, "grad_norm": 0.2820799946784973, "learning_rate": 4.5527720759617087e-07, "loss": 0.3008, "step": 43121 }, { "epoch": 4.384099227328182, "grad_norm": 0.26810571551322937, "learning_rate": 4.551292610435798e-07, "loss": 0.3176, "step": 43122 }, { "epoch": 4.384200894672631, "grad_norm": 0.28576841950416565, "learning_rate": 4.549813373869688e-07, "loss": 0.2816, "step": 43123 }, { "epoch": 4.38430256201708, "grad_norm": 0.3028475344181061, "learning_rate": 4.548334366270801e-07, "loss": 0.2961, "step": 43124 }, { "epoch": 4.384404229361529, "grad_norm": 0.2547040283679962, "learning_rate": 4.5468555876466094e-07, "loss": 0.3144, "step": 43125 }, { "epoch": 4.384505896705978, "grad_norm": 0.28589531779289246, "learning_rate": 4.545377038004556e-07, "loss": 0.2836, "step": 43126 }, { "epoch": 4.384607564050427, "grad_norm": 0.27882328629493713, "learning_rate": 4.5438987173520966e-07, "loss": 0.2855, "step": 43127 }, { "epoch": 4.384709231394876, "grad_norm": 0.2855421304702759, "learning_rate": 4.542420625696675e-07, "loss": 0.3181, "step": 43128 }, { "epoch": 4.384810898739325, "grad_norm": 0.26845306158065796, "learning_rate": 4.5409427630457247e-07, "loss": 0.3004, "step": 43129 }, { "epoch": 4.3849125660837736, "grad_norm": 0.3010426461696625, "learning_rate": 4.539465129406723e-07, "loss": 0.3107, "step": 43130 }, { "epoch": 4.3850142334282225, "grad_norm": 0.27517759799957275, "learning_rate": 4.5379877247870686e-07, "loss": 0.2891, "step": 43131 }, { "epoch": 4.385115900772671, "grad_norm": 0.3018590211868286, "learning_rate": 4.536510549194234e-07, "loss": 0.292, "step": 43132 }, { "epoch": 4.38521756811712, "grad_norm": 0.2898029088973999, "learning_rate": 4.535033602635675e-07, "loss": 0.293, "step": 43133 }, { "epoch": 4.385319235461569, "grad_norm": 0.2760939598083496, "learning_rate": 4.5335568851187896e-07, "loss": 0.3107, "step": 43134 }, { "epoch": 4.385420902806018, "grad_norm": 0.28013432025909424, "learning_rate": 4.532080396651051e-07, "loss": 0.3079, "step": 43135 }, { "epoch": 4.385522570150467, "grad_norm": 0.28368884325027466, "learning_rate": 4.53060413723988e-07, "loss": 0.257, "step": 43136 }, { "epoch": 4.385624237494917, "grad_norm": 0.30655524134635925, "learning_rate": 4.529128106892727e-07, "loss": 0.303, "step": 43137 }, { "epoch": 4.385725904839366, "grad_norm": 0.2824510931968689, "learning_rate": 4.5276523056170196e-07, "loss": 0.2697, "step": 43138 }, { "epoch": 4.385827572183815, "grad_norm": 0.2732195258140564, "learning_rate": 4.52617673342019e-07, "loss": 0.2863, "step": 43139 }, { "epoch": 4.385929239528264, "grad_norm": 0.308741956949234, "learning_rate": 4.524701390309688e-07, "loss": 0.2992, "step": 43140 }, { "epoch": 4.386030906872713, "grad_norm": 0.2651682496070862, "learning_rate": 4.523226276292919e-07, "loss": 0.2757, "step": 43141 }, { "epoch": 4.386132574217162, "grad_norm": 0.27074500918388367, "learning_rate": 4.521751391377327e-07, "loss": 0.3111, "step": 43142 }, { "epoch": 4.386234241561611, "grad_norm": 0.27084848284721375, "learning_rate": 4.520276735570367e-07, "loss": 0.2944, "step": 43143 }, { "epoch": 4.38633590890606, "grad_norm": 0.29316624999046326, "learning_rate": 4.518802308879422e-07, "loss": 0.2386, "step": 43144 }, { "epoch": 4.386437576250509, "grad_norm": 0.26103994250297546, "learning_rate": 4.5173281113119525e-07, "loss": 0.2951, "step": 43145 }, { "epoch": 4.3865392435949575, "grad_norm": 0.2861209511756897, "learning_rate": 4.5158541428753753e-07, "loss": 0.2987, "step": 43146 }, { "epoch": 4.3866409109394064, "grad_norm": 0.2878991365432739, "learning_rate": 4.5143804035771123e-07, "loss": 0.2718, "step": 43147 }, { "epoch": 4.386742578283855, "grad_norm": 0.27765876054763794, "learning_rate": 4.512906893424596e-07, "loss": 0.3187, "step": 43148 }, { "epoch": 4.386844245628304, "grad_norm": 0.2857378125190735, "learning_rate": 4.5114336124252366e-07, "loss": 0.2956, "step": 43149 }, { "epoch": 4.386945912972753, "grad_norm": 0.26409780979156494, "learning_rate": 4.509960560586485e-07, "loss": 0.294, "step": 43150 }, { "epoch": 4.387047580317202, "grad_norm": 0.2684037983417511, "learning_rate": 4.5084877379157234e-07, "loss": 0.303, "step": 43151 }, { "epoch": 4.387149247661651, "grad_norm": 0.25652799010276794, "learning_rate": 4.5070151444203904e-07, "loss": 0.3263, "step": 43152 }, { "epoch": 4.3872509150061, "grad_norm": 0.27776238322257996, "learning_rate": 4.5055427801079186e-07, "loss": 0.2949, "step": 43153 }, { "epoch": 4.387352582350549, "grad_norm": 0.28353747725486755, "learning_rate": 4.5040706449856975e-07, "loss": 0.2985, "step": 43154 }, { "epoch": 4.387454249694998, "grad_norm": 0.29245856404304504, "learning_rate": 4.502598739061165e-07, "loss": 0.3157, "step": 43155 }, { "epoch": 4.387555917039447, "grad_norm": 0.2711804211139679, "learning_rate": 4.5011270623417313e-07, "loss": 0.2859, "step": 43156 }, { "epoch": 4.387657584383896, "grad_norm": 0.27683112025260925, "learning_rate": 4.4996556148347914e-07, "loss": 0.3063, "step": 43157 }, { "epoch": 4.387759251728345, "grad_norm": 0.3012571930885315, "learning_rate": 4.498184396547789e-07, "loss": 0.294, "step": 43158 }, { "epoch": 4.387860919072794, "grad_norm": 0.2712385058403015, "learning_rate": 4.496713407488107e-07, "loss": 0.2986, "step": 43159 }, { "epoch": 4.387962586417243, "grad_norm": 0.2981335520744324, "learning_rate": 4.4952426476631894e-07, "loss": 0.272, "step": 43160 }, { "epoch": 4.388064253761692, "grad_norm": 0.26569804549217224, "learning_rate": 4.4937721170804083e-07, "loss": 0.2829, "step": 43161 }, { "epoch": 4.388165921106141, "grad_norm": 0.29720965027809143, "learning_rate": 4.492301815747191e-07, "loss": 0.2817, "step": 43162 }, { "epoch": 4.3882675884505895, "grad_norm": 0.2775338292121887, "learning_rate": 4.490831743670959e-07, "loss": 0.2756, "step": 43163 }, { "epoch": 4.3883692557950384, "grad_norm": 0.2993682324886322, "learning_rate": 4.489361900859085e-07, "loss": 0.315, "step": 43164 }, { "epoch": 4.388470923139487, "grad_norm": 0.2940625250339508, "learning_rate": 4.4878922873189956e-07, "loss": 0.2666, "step": 43165 }, { "epoch": 4.388572590483936, "grad_norm": 0.26542016863822937, "learning_rate": 4.486422903058091e-07, "loss": 0.3114, "step": 43166 }, { "epoch": 4.388674257828385, "grad_norm": 0.2831789553165436, "learning_rate": 4.4849537480837646e-07, "loss": 0.2993, "step": 43167 }, { "epoch": 4.388775925172834, "grad_norm": 0.29262566566467285, "learning_rate": 4.4834848224034387e-07, "loss": 0.3166, "step": 43168 }, { "epoch": 4.388877592517283, "grad_norm": 0.2844330966472626, "learning_rate": 4.4820161260244967e-07, "loss": 0.2986, "step": 43169 }, { "epoch": 4.388979259861732, "grad_norm": 0.2966994345188141, "learning_rate": 4.4805476589543375e-07, "loss": 0.2808, "step": 43170 }, { "epoch": 4.389080927206181, "grad_norm": 0.28678807616233826, "learning_rate": 4.479079421200355e-07, "loss": 0.3197, "step": 43171 }, { "epoch": 4.38918259455063, "grad_norm": 0.27804499864578247, "learning_rate": 4.47761141276995e-07, "loss": 0.2696, "step": 43172 }, { "epoch": 4.389284261895079, "grad_norm": 0.27784088253974915, "learning_rate": 4.4761436336705434e-07, "loss": 0.294, "step": 43173 }, { "epoch": 4.389385929239528, "grad_norm": 0.27533450722694397, "learning_rate": 4.4746760839094904e-07, "loss": 0.2945, "step": 43174 }, { "epoch": 4.389487596583978, "grad_norm": 0.27624738216400146, "learning_rate": 4.473208763494208e-07, "loss": 0.2964, "step": 43175 }, { "epoch": 4.389589263928427, "grad_norm": 0.26641300320625305, "learning_rate": 4.4717416724320786e-07, "loss": 0.2862, "step": 43176 }, { "epoch": 4.389690931272876, "grad_norm": 0.2794640362262726, "learning_rate": 4.470274810730485e-07, "loss": 0.2866, "step": 43177 }, { "epoch": 4.3897925986173245, "grad_norm": 0.2615966796875, "learning_rate": 4.468808178396844e-07, "loss": 0.3137, "step": 43178 }, { "epoch": 4.3898942659617735, "grad_norm": 0.2899658977985382, "learning_rate": 4.4673417754385216e-07, "loss": 0.2635, "step": 43179 }, { "epoch": 4.389995933306222, "grad_norm": 0.3049139380455017, "learning_rate": 4.4658756018629126e-07, "loss": 0.2911, "step": 43180 }, { "epoch": 4.390097600650671, "grad_norm": 0.25537899136543274, "learning_rate": 4.464409657677393e-07, "loss": 0.2788, "step": 43181 }, { "epoch": 4.39019926799512, "grad_norm": 0.2648372948169708, "learning_rate": 4.462943942889364e-07, "loss": 0.2739, "step": 43182 }, { "epoch": 4.390300935339569, "grad_norm": 0.2995377779006958, "learning_rate": 4.461478457506202e-07, "loss": 0.2803, "step": 43183 }, { "epoch": 4.390402602684018, "grad_norm": 0.2587420344352722, "learning_rate": 4.460013201535285e-07, "loss": 0.317, "step": 43184 }, { "epoch": 4.390504270028467, "grad_norm": 0.29984259605407715, "learning_rate": 4.4585481749840063e-07, "loss": 0.2974, "step": 43185 }, { "epoch": 4.390605937372916, "grad_norm": 0.28742048144340515, "learning_rate": 4.457083377859739e-07, "loss": 0.2731, "step": 43186 }, { "epoch": 4.390707604717365, "grad_norm": 0.2951623499393463, "learning_rate": 4.4556188101698536e-07, "loss": 0.2578, "step": 43187 }, { "epoch": 4.390809272061814, "grad_norm": 0.28197726607322693, "learning_rate": 4.4541544719217455e-07, "loss": 0.2782, "step": 43188 }, { "epoch": 4.390910939406263, "grad_norm": 0.2934859097003937, "learning_rate": 4.4526903631227856e-07, "loss": 0.2962, "step": 43189 }, { "epoch": 4.391012606750712, "grad_norm": 0.2644500434398651, "learning_rate": 4.4512264837803464e-07, "loss": 0.274, "step": 43190 }, { "epoch": 4.391114274095161, "grad_norm": 0.28693175315856934, "learning_rate": 4.4497628339018e-07, "loss": 0.2981, "step": 43191 }, { "epoch": 4.39121594143961, "grad_norm": 0.2575404942035675, "learning_rate": 4.448299413494528e-07, "loss": 0.3132, "step": 43192 }, { "epoch": 4.391317608784059, "grad_norm": 0.327784925699234, "learning_rate": 4.446836222565898e-07, "loss": 0.2895, "step": 43193 }, { "epoch": 4.391419276128508, "grad_norm": 0.28803175687789917, "learning_rate": 4.445373261123276e-07, "loss": 0.2785, "step": 43194 }, { "epoch": 4.3915209434729565, "grad_norm": 0.27933168411254883, "learning_rate": 4.4439105291740506e-07, "loss": 0.2947, "step": 43195 }, { "epoch": 4.3916226108174055, "grad_norm": 0.2837398946285248, "learning_rate": 4.442448026725571e-07, "loss": 0.2735, "step": 43196 }, { "epoch": 4.391724278161854, "grad_norm": 0.27865105867385864, "learning_rate": 4.440985753785204e-07, "loss": 0.2566, "step": 43197 }, { "epoch": 4.391825945506303, "grad_norm": 0.29644250869750977, "learning_rate": 4.439523710360338e-07, "loss": 0.3215, "step": 43198 }, { "epoch": 4.391927612850752, "grad_norm": 0.3047477900981903, "learning_rate": 4.4380618964583234e-07, "loss": 0.2819, "step": 43199 }, { "epoch": 4.392029280195201, "grad_norm": 0.2738385796546936, "learning_rate": 4.4366003120865253e-07, "loss": 0.2761, "step": 43200 }, { "epoch": 4.39213094753965, "grad_norm": 0.27399173378944397, "learning_rate": 4.435138957252294e-07, "loss": 0.3181, "step": 43201 }, { "epoch": 4.392232614884099, "grad_norm": 0.29717305302619934, "learning_rate": 4.4336778319630183e-07, "loss": 0.3033, "step": 43202 }, { "epoch": 4.392334282228548, "grad_norm": 0.30051979422569275, "learning_rate": 4.432216936226047e-07, "loss": 0.3288, "step": 43203 }, { "epoch": 4.392435949572997, "grad_norm": 0.27726203203201294, "learning_rate": 4.430756270048725e-07, "loss": 0.3017, "step": 43204 }, { "epoch": 4.392537616917446, "grad_norm": 0.2729615569114685, "learning_rate": 4.4292958334384353e-07, "loss": 0.2672, "step": 43205 }, { "epoch": 4.392639284261895, "grad_norm": 0.2890578508377075, "learning_rate": 4.4278356264025214e-07, "loss": 0.2961, "step": 43206 }, { "epoch": 4.392740951606344, "grad_norm": 0.29689157009124756, "learning_rate": 4.4263756489483335e-07, "loss": 0.3163, "step": 43207 }, { "epoch": 4.392842618950793, "grad_norm": 0.2838507294654846, "learning_rate": 4.4249159010832486e-07, "loss": 0.307, "step": 43208 }, { "epoch": 4.392944286295242, "grad_norm": 0.2866742014884949, "learning_rate": 4.4234563828146004e-07, "loss": 0.2971, "step": 43209 }, { "epoch": 4.393045953639691, "grad_norm": 0.2660622298717499, "learning_rate": 4.4219970941497545e-07, "loss": 0.3301, "step": 43210 }, { "epoch": 4.39314762098414, "grad_norm": 0.27891814708709717, "learning_rate": 4.4205380350960445e-07, "loss": 0.3191, "step": 43211 }, { "epoch": 4.3932492883285885, "grad_norm": 0.30908361077308655, "learning_rate": 4.4190792056608423e-07, "loss": 0.3187, "step": 43212 }, { "epoch": 4.3933509556730375, "grad_norm": 0.27644938230514526, "learning_rate": 4.417620605851486e-07, "loss": 0.2986, "step": 43213 }, { "epoch": 4.393452623017486, "grad_norm": 0.2717839777469635, "learning_rate": 4.416162235675314e-07, "loss": 0.2891, "step": 43214 }, { "epoch": 4.393554290361935, "grad_norm": 0.2831310033798218, "learning_rate": 4.414704095139699e-07, "loss": 0.2844, "step": 43215 }, { "epoch": 4.393655957706384, "grad_norm": 0.28237074613571167, "learning_rate": 4.413246184251968e-07, "loss": 0.3084, "step": 43216 }, { "epoch": 4.393757625050833, "grad_norm": 0.26998281478881836, "learning_rate": 4.4117885030194596e-07, "loss": 0.3002, "step": 43217 }, { "epoch": 4.393859292395282, "grad_norm": 0.28973349928855896, "learning_rate": 4.4103310514495403e-07, "loss": 0.3448, "step": 43218 }, { "epoch": 4.393960959739732, "grad_norm": 0.2995598018169403, "learning_rate": 4.408873829549537e-07, "loss": 0.3203, "step": 43219 }, { "epoch": 4.394062627084181, "grad_norm": 0.27937814593315125, "learning_rate": 4.4074168373267945e-07, "loss": 0.2943, "step": 43220 }, { "epoch": 4.39416429442863, "grad_norm": 0.271788090467453, "learning_rate": 4.4059600747886397e-07, "loss": 0.2835, "step": 43221 }, { "epoch": 4.394265961773079, "grad_norm": 0.26583606004714966, "learning_rate": 4.404503541942434e-07, "loss": 0.3036, "step": 43222 }, { "epoch": 4.394367629117528, "grad_norm": 0.2857562303543091, "learning_rate": 4.4030472387955046e-07, "loss": 0.2977, "step": 43223 }, { "epoch": 4.394469296461977, "grad_norm": 0.2663916051387787, "learning_rate": 4.401591165355179e-07, "loss": 0.2788, "step": 43224 }, { "epoch": 4.394570963806426, "grad_norm": 0.2764955163002014, "learning_rate": 4.4001353216288124e-07, "loss": 0.3216, "step": 43225 }, { "epoch": 4.394672631150875, "grad_norm": 0.30889827013015747, "learning_rate": 4.3986797076237267e-07, "loss": 0.2807, "step": 43226 }, { "epoch": 4.3947742984953235, "grad_norm": 0.2909563481807709, "learning_rate": 4.3972243233472434e-07, "loss": 0.2948, "step": 43227 }, { "epoch": 4.3948759658397725, "grad_norm": 0.27116885781288147, "learning_rate": 4.395769168806718e-07, "loss": 0.2943, "step": 43228 }, { "epoch": 4.394977633184221, "grad_norm": 0.309779554605484, "learning_rate": 4.394314244009473e-07, "loss": 0.3121, "step": 43229 }, { "epoch": 4.39507930052867, "grad_norm": 0.2826765179634094, "learning_rate": 4.39285954896283e-07, "loss": 0.3145, "step": 43230 }, { "epoch": 4.395180967873119, "grad_norm": 0.27175068855285645, "learning_rate": 4.391405083674122e-07, "loss": 0.3107, "step": 43231 }, { "epoch": 4.395282635217568, "grad_norm": 0.28025519847869873, "learning_rate": 4.3899508481506757e-07, "loss": 0.2808, "step": 43232 }, { "epoch": 4.395384302562017, "grad_norm": 0.26779037714004517, "learning_rate": 4.3884968423998255e-07, "loss": 0.3004, "step": 43233 }, { "epoch": 4.395485969906466, "grad_norm": 0.2706892490386963, "learning_rate": 4.3870430664288756e-07, "loss": 0.2831, "step": 43234 }, { "epoch": 4.395587637250915, "grad_norm": 0.2794080972671509, "learning_rate": 4.3855895202451757e-07, "loss": 0.2876, "step": 43235 }, { "epoch": 4.395689304595364, "grad_norm": 0.2848297357559204, "learning_rate": 4.384136203856038e-07, "loss": 0.2883, "step": 43236 }, { "epoch": 4.395790971939813, "grad_norm": 0.28040191531181335, "learning_rate": 4.382683117268771e-07, "loss": 0.3457, "step": 43237 }, { "epoch": 4.395892639284262, "grad_norm": 0.27743133902549744, "learning_rate": 4.3812302604907156e-07, "loss": 0.2925, "step": 43238 }, { "epoch": 4.395994306628711, "grad_norm": 0.27798548340797424, "learning_rate": 4.3797776335291807e-07, "loss": 0.2776, "step": 43239 }, { "epoch": 4.39609597397316, "grad_norm": 0.2899564504623413, "learning_rate": 4.378325236391484e-07, "loss": 0.3341, "step": 43240 }, { "epoch": 4.396197641317609, "grad_norm": 0.28070446848869324, "learning_rate": 4.3768730690849357e-07, "loss": 0.3011, "step": 43241 }, { "epoch": 4.396299308662058, "grad_norm": 0.2659229636192322, "learning_rate": 4.375421131616864e-07, "loss": 0.2825, "step": 43242 }, { "epoch": 4.396400976006507, "grad_norm": 0.28107741475105286, "learning_rate": 4.373969423994584e-07, "loss": 0.316, "step": 43243 }, { "epoch": 4.3965026433509555, "grad_norm": 0.272162526845932, "learning_rate": 4.3725179462253906e-07, "loss": 0.2783, "step": 43244 }, { "epoch": 4.3966043106954045, "grad_norm": 0.27748534083366394, "learning_rate": 4.371066698316623e-07, "loss": 0.2819, "step": 43245 }, { "epoch": 4.396705978039853, "grad_norm": 0.27484092116355896, "learning_rate": 4.369615680275574e-07, "loss": 0.2788, "step": 43246 }, { "epoch": 4.396807645384302, "grad_norm": 0.27967968583106995, "learning_rate": 4.3681648921095497e-07, "loss": 0.2798, "step": 43247 }, { "epoch": 4.396909312728751, "grad_norm": 0.280499666929245, "learning_rate": 4.3667143338258775e-07, "loss": 0.2912, "step": 43248 }, { "epoch": 4.3970109800732, "grad_norm": 0.27398914098739624, "learning_rate": 4.365264005431852e-07, "loss": 0.2818, "step": 43249 }, { "epoch": 4.397112647417649, "grad_norm": 0.3156808018684387, "learning_rate": 4.3638139069347827e-07, "loss": 0.3112, "step": 43250 }, { "epoch": 4.397214314762098, "grad_norm": 0.271077424287796, "learning_rate": 4.362364038341971e-07, "loss": 0.2762, "step": 43251 }, { "epoch": 4.397315982106547, "grad_norm": 0.2733314335346222, "learning_rate": 4.360914399660726e-07, "loss": 0.296, "step": 43252 }, { "epoch": 4.397417649450996, "grad_norm": 0.29674193263053894, "learning_rate": 4.3594649908983487e-07, "loss": 0.2994, "step": 43253 }, { "epoch": 4.397519316795445, "grad_norm": 0.3033292293548584, "learning_rate": 4.358015812062133e-07, "loss": 0.2697, "step": 43254 }, { "epoch": 4.397620984139894, "grad_norm": 0.2885034382343292, "learning_rate": 4.3565668631594003e-07, "loss": 0.2956, "step": 43255 }, { "epoch": 4.397722651484343, "grad_norm": 0.2532120943069458, "learning_rate": 4.3551181441974345e-07, "loss": 0.296, "step": 43256 }, { "epoch": 4.397824318828793, "grad_norm": 0.2693350613117218, "learning_rate": 4.353669655183523e-07, "loss": 0.2667, "step": 43257 }, { "epoch": 4.397925986173242, "grad_norm": 0.2973574101924896, "learning_rate": 4.352221396124995e-07, "loss": 0.2928, "step": 43258 }, { "epoch": 4.3980276535176905, "grad_norm": 0.28879186511039734, "learning_rate": 4.3507733670291206e-07, "loss": 0.2661, "step": 43259 }, { "epoch": 4.3981293208621395, "grad_norm": 0.27798449993133545, "learning_rate": 4.3493255679032065e-07, "loss": 0.3574, "step": 43260 }, { "epoch": 4.398230988206588, "grad_norm": 0.27234312891960144, "learning_rate": 4.3478779987545297e-07, "loss": 0.3071, "step": 43261 }, { "epoch": 4.398332655551037, "grad_norm": 0.26558709144592285, "learning_rate": 4.346430659590406e-07, "loss": 0.2953, "step": 43262 }, { "epoch": 4.398434322895486, "grad_norm": 0.2795998454093933, "learning_rate": 4.344983550418114e-07, "loss": 0.2743, "step": 43263 }, { "epoch": 4.398535990239935, "grad_norm": 0.2778688073158264, "learning_rate": 4.3435366712449355e-07, "loss": 0.2916, "step": 43264 }, { "epoch": 4.398637657584384, "grad_norm": 0.27038127183914185, "learning_rate": 4.3420900220781826e-07, "loss": 0.2918, "step": 43265 }, { "epoch": 4.398739324928833, "grad_norm": 0.3163837790489197, "learning_rate": 4.3406436029251266e-07, "loss": 0.2906, "step": 43266 }, { "epoch": 4.398840992273282, "grad_norm": 0.27293989062309265, "learning_rate": 4.3391974137930503e-07, "loss": 0.2867, "step": 43267 }, { "epoch": 4.398942659617731, "grad_norm": 0.30362480878829956, "learning_rate": 4.337751454689254e-07, "loss": 0.2831, "step": 43268 }, { "epoch": 4.39904432696218, "grad_norm": 0.29331153631210327, "learning_rate": 4.336305725621015e-07, "loss": 0.3242, "step": 43269 }, { "epoch": 4.399145994306629, "grad_norm": 0.28529536724090576, "learning_rate": 4.3348602265956165e-07, "loss": 0.3193, "step": 43270 }, { "epoch": 4.399247661651078, "grad_norm": 0.27322834730148315, "learning_rate": 4.3334149576203357e-07, "loss": 0.3035, "step": 43271 }, { "epoch": 4.399349328995527, "grad_norm": 0.26731109619140625, "learning_rate": 4.3319699187024666e-07, "loss": 0.2895, "step": 43272 }, { "epoch": 4.399450996339976, "grad_norm": 0.30049535632133484, "learning_rate": 4.330525109849276e-07, "loss": 0.2866, "step": 43273 }, { "epoch": 4.399552663684425, "grad_norm": 0.27610331773757935, "learning_rate": 4.329080531068042e-07, "loss": 0.2857, "step": 43274 }, { "epoch": 4.399654331028874, "grad_norm": 0.27383318543434143, "learning_rate": 4.327636182366063e-07, "loss": 0.3066, "step": 43275 }, { "epoch": 4.3997559983733225, "grad_norm": 0.32500478625297546, "learning_rate": 4.326192063750584e-07, "loss": 0.2792, "step": 43276 }, { "epoch": 4.3998576657177715, "grad_norm": 0.2815989553928375, "learning_rate": 4.324748175228899e-07, "loss": 0.2968, "step": 43277 }, { "epoch": 4.39995933306222, "grad_norm": 0.2898143231868744, "learning_rate": 4.3233045168082746e-07, "loss": 0.3128, "step": 43278 }, { "epoch": 4.400061000406669, "grad_norm": 0.2746759057044983, "learning_rate": 4.321861088495993e-07, "loss": 0.2774, "step": 43279 }, { "epoch": 4.400162667751118, "grad_norm": 0.27687621116638184, "learning_rate": 4.3204178902993274e-07, "loss": 0.2963, "step": 43280 }, { "epoch": 4.400264335095567, "grad_norm": 0.3037763833999634, "learning_rate": 4.318974922225527e-07, "loss": 0.2979, "step": 43281 }, { "epoch": 4.400366002440016, "grad_norm": 0.2906070351600647, "learning_rate": 4.317532184281892e-07, "loss": 0.2807, "step": 43282 }, { "epoch": 4.400467669784465, "grad_norm": 0.25863540172576904, "learning_rate": 4.31608967647566e-07, "loss": 0.3055, "step": 43283 }, { "epoch": 4.400569337128914, "grad_norm": 0.25965628027915955, "learning_rate": 4.314647398814109e-07, "loss": 0.2575, "step": 43284 }, { "epoch": 4.400671004473363, "grad_norm": 0.2895359396934509, "learning_rate": 4.313205351304528e-07, "loss": 0.2876, "step": 43285 }, { "epoch": 4.400772671817812, "grad_norm": 0.2819131314754486, "learning_rate": 4.3117635339541444e-07, "loss": 0.3162, "step": 43286 }, { "epoch": 4.400874339162261, "grad_norm": 0.2813679873943329, "learning_rate": 4.3103219467702463e-07, "loss": 0.2886, "step": 43287 }, { "epoch": 4.40097600650671, "grad_norm": 0.30857396125793457, "learning_rate": 4.308880589760078e-07, "loss": 0.2922, "step": 43288 }, { "epoch": 4.401077673851159, "grad_norm": 0.29629334807395935, "learning_rate": 4.307439462930924e-07, "loss": 0.2881, "step": 43289 }, { "epoch": 4.401179341195608, "grad_norm": 0.3107694387435913, "learning_rate": 4.305998566290026e-07, "loss": 0.3045, "step": 43290 }, { "epoch": 4.401281008540057, "grad_norm": 0.2845003306865692, "learning_rate": 4.304557899844647e-07, "loss": 0.3148, "step": 43291 }, { "epoch": 4.401382675884506, "grad_norm": 0.3029121458530426, "learning_rate": 4.3031174636020576e-07, "loss": 0.2997, "step": 43292 }, { "epoch": 4.4014843432289545, "grad_norm": 0.2965519428253174, "learning_rate": 4.301677257569492e-07, "loss": 0.3009, "step": 43293 }, { "epoch": 4.4015860105734035, "grad_norm": 0.28068092465400696, "learning_rate": 4.3002372817542106e-07, "loss": 0.2574, "step": 43294 }, { "epoch": 4.401687677917852, "grad_norm": 0.25493407249450684, "learning_rate": 4.298797536163496e-07, "loss": 0.3483, "step": 43295 }, { "epoch": 4.401789345262301, "grad_norm": 0.274810254573822, "learning_rate": 4.2973580208045595e-07, "loss": 0.3201, "step": 43296 }, { "epoch": 4.40189101260675, "grad_norm": 0.26752743124961853, "learning_rate": 4.295918735684684e-07, "loss": 0.3171, "step": 43297 }, { "epoch": 4.401992679951199, "grad_norm": 0.2987115681171417, "learning_rate": 4.2944796808111035e-07, "loss": 0.2829, "step": 43298 }, { "epoch": 4.402094347295648, "grad_norm": 0.2725537419319153, "learning_rate": 4.293040856191072e-07, "loss": 0.2838, "step": 43299 }, { "epoch": 4.402196014640097, "grad_norm": 0.2841232419013977, "learning_rate": 4.29160226183184e-07, "loss": 0.3175, "step": 43300 }, { "epoch": 4.402297681984547, "grad_norm": 0.2997666299343109, "learning_rate": 4.290163897740651e-07, "loss": 0.2817, "step": 43301 }, { "epoch": 4.402399349328996, "grad_norm": 0.2883864939212799, "learning_rate": 4.288725763924767e-07, "loss": 0.3011, "step": 43302 }, { "epoch": 4.402501016673445, "grad_norm": 0.3027419149875641, "learning_rate": 4.287287860391404e-07, "loss": 0.2824, "step": 43303 }, { "epoch": 4.402602684017894, "grad_norm": 0.29310622811317444, "learning_rate": 4.285850187147822e-07, "loss": 0.2837, "step": 43304 }, { "epoch": 4.402704351362343, "grad_norm": 0.2659071385860443, "learning_rate": 4.284412744201283e-07, "loss": 0.2849, "step": 43305 }, { "epoch": 4.402806018706792, "grad_norm": 0.2764832377433777, "learning_rate": 4.282975531558986e-07, "loss": 0.2936, "step": 43306 }, { "epoch": 4.402907686051241, "grad_norm": 0.2763379216194153, "learning_rate": 4.281538549228209e-07, "loss": 0.2986, "step": 43307 }, { "epoch": 4.4030093533956896, "grad_norm": 0.2726663649082184, "learning_rate": 4.280101797216174e-07, "loss": 0.273, "step": 43308 }, { "epoch": 4.4031110207401385, "grad_norm": 0.2656719386577606, "learning_rate": 4.278665275530114e-07, "loss": 0.3028, "step": 43309 }, { "epoch": 4.403212688084587, "grad_norm": 0.2700621485710144, "learning_rate": 4.2772289841772785e-07, "loss": 0.3016, "step": 43310 }, { "epoch": 4.403314355429036, "grad_norm": 0.2696751356124878, "learning_rate": 4.27579292316489e-07, "loss": 0.294, "step": 43311 }, { "epoch": 4.403416022773485, "grad_norm": 0.3014659285545349, "learning_rate": 4.2743570925002143e-07, "loss": 0.2914, "step": 43312 }, { "epoch": 4.403517690117934, "grad_norm": 0.31258270144462585, "learning_rate": 4.272921492190435e-07, "loss": 0.301, "step": 43313 }, { "epoch": 4.403619357462383, "grad_norm": 0.2753792107105255, "learning_rate": 4.2714861222428183e-07, "loss": 0.3072, "step": 43314 }, { "epoch": 4.403721024806832, "grad_norm": 0.2744520604610443, "learning_rate": 4.2700509826646033e-07, "loss": 0.2819, "step": 43315 }, { "epoch": 4.403822692151281, "grad_norm": 0.2581039369106293, "learning_rate": 4.2686160734629836e-07, "loss": 0.2911, "step": 43316 }, { "epoch": 4.40392435949573, "grad_norm": 0.2694735825061798, "learning_rate": 4.26718139464522e-07, "loss": 0.3005, "step": 43317 }, { "epoch": 4.404026026840179, "grad_norm": 0.2803501784801483, "learning_rate": 4.2657469462185297e-07, "loss": 0.283, "step": 43318 }, { "epoch": 4.404127694184628, "grad_norm": 0.281393438577652, "learning_rate": 4.264312728190129e-07, "loss": 0.2736, "step": 43319 }, { "epoch": 4.404229361529077, "grad_norm": 0.2902558147907257, "learning_rate": 4.2628787405672554e-07, "loss": 0.3064, "step": 43320 }, { "epoch": 4.404331028873526, "grad_norm": 0.26753878593444824, "learning_rate": 4.261444983357138e-07, "loss": 0.3238, "step": 43321 }, { "epoch": 4.404432696217975, "grad_norm": 0.260606050491333, "learning_rate": 4.260011456566987e-07, "loss": 0.2849, "step": 43322 }, { "epoch": 4.404534363562424, "grad_norm": 0.26755282282829285, "learning_rate": 4.258578160204019e-07, "loss": 0.2777, "step": 43323 }, { "epoch": 4.404636030906873, "grad_norm": 0.2888990044593811, "learning_rate": 4.2571450942754667e-07, "loss": 0.3016, "step": 43324 }, { "epoch": 4.4047376982513216, "grad_norm": 0.2731022238731384, "learning_rate": 4.255712258788569e-07, "loss": 0.3121, "step": 43325 }, { "epoch": 4.4048393655957705, "grad_norm": 0.2792820334434509, "learning_rate": 4.254279653750498e-07, "loss": 0.2841, "step": 43326 }, { "epoch": 4.404941032940219, "grad_norm": 0.25307148694992065, "learning_rate": 4.2528472791685096e-07, "loss": 0.3019, "step": 43327 }, { "epoch": 4.405042700284668, "grad_norm": 0.2826627194881439, "learning_rate": 4.2514151350498023e-07, "loss": 0.26, "step": 43328 }, { "epoch": 4.405144367629117, "grad_norm": 0.2811329662799835, "learning_rate": 4.2499832214015823e-07, "loss": 0.2766, "step": 43329 }, { "epoch": 4.405246034973566, "grad_norm": 0.27229759097099304, "learning_rate": 4.248551538231088e-07, "loss": 0.2554, "step": 43330 }, { "epoch": 4.405347702318015, "grad_norm": 0.294979065656662, "learning_rate": 4.2471200855455197e-07, "loss": 0.2963, "step": 43331 }, { "epoch": 4.405449369662464, "grad_norm": 0.2751217782497406, "learning_rate": 4.245688863352082e-07, "loss": 0.312, "step": 43332 }, { "epoch": 4.405551037006913, "grad_norm": 0.28604382276535034, "learning_rate": 4.244257871657986e-07, "loss": 0.2879, "step": 43333 }, { "epoch": 4.405652704351362, "grad_norm": 0.28183847665786743, "learning_rate": 4.2428271104704545e-07, "loss": 0.297, "step": 43334 }, { "epoch": 4.405754371695811, "grad_norm": 0.299037367105484, "learning_rate": 4.2413965797966863e-07, "loss": 0.3107, "step": 43335 }, { "epoch": 4.40585603904026, "grad_norm": 0.28274717926979065, "learning_rate": 4.239966279643881e-07, "loss": 0.3076, "step": 43336 }, { "epoch": 4.405957706384709, "grad_norm": 0.2626042068004608, "learning_rate": 4.238536210019256e-07, "loss": 0.2838, "step": 43337 }, { "epoch": 4.406059373729158, "grad_norm": 0.2603636384010315, "learning_rate": 4.2371063709300163e-07, "loss": 0.3091, "step": 43338 }, { "epoch": 4.406161041073608, "grad_norm": 0.2743147313594818, "learning_rate": 4.235676762383345e-07, "loss": 0.2694, "step": 43339 }, { "epoch": 4.406262708418057, "grad_norm": 0.2836763560771942, "learning_rate": 4.234247384386475e-07, "loss": 0.3048, "step": 43340 }, { "epoch": 4.4063643757625055, "grad_norm": 0.3015974164009094, "learning_rate": 4.232818236946584e-07, "loss": 0.303, "step": 43341 }, { "epoch": 4.4064660431069544, "grad_norm": 0.264335960149765, "learning_rate": 4.2313893200708835e-07, "loss": 0.2987, "step": 43342 }, { "epoch": 4.406567710451403, "grad_norm": 0.27515730261802673, "learning_rate": 4.2299606337665557e-07, "loss": 0.2749, "step": 43343 }, { "epoch": 4.406669377795852, "grad_norm": 0.2722875773906708, "learning_rate": 4.2285321780408173e-07, "loss": 0.2767, "step": 43344 }, { "epoch": 4.406771045140301, "grad_norm": 0.29431381821632385, "learning_rate": 4.227103952900863e-07, "loss": 0.3047, "step": 43345 }, { "epoch": 4.40687271248475, "grad_norm": 0.2856341302394867, "learning_rate": 4.22567595835387e-07, "loss": 0.2933, "step": 43346 }, { "epoch": 4.406974379829199, "grad_norm": 0.31649529933929443, "learning_rate": 4.224248194407049e-07, "loss": 0.334, "step": 43347 }, { "epoch": 4.407076047173648, "grad_norm": 0.2900576889514923, "learning_rate": 4.2228206610675894e-07, "loss": 0.3391, "step": 43348 }, { "epoch": 4.407177714518097, "grad_norm": 0.28993141651153564, "learning_rate": 4.221393358342674e-07, "loss": 0.3062, "step": 43349 }, { "epoch": 4.407279381862546, "grad_norm": 0.2595635652542114, "learning_rate": 4.219966286239507e-07, "loss": 0.2819, "step": 43350 }, { "epoch": 4.407381049206995, "grad_norm": 0.2793816030025482, "learning_rate": 4.218539444765274e-07, "loss": 0.2692, "step": 43351 }, { "epoch": 4.407482716551444, "grad_norm": 0.29405340552330017, "learning_rate": 4.2171128339271563e-07, "loss": 0.2873, "step": 43352 }, { "epoch": 4.407584383895893, "grad_norm": 0.2753072679042816, "learning_rate": 4.2156864537323373e-07, "loss": 0.3178, "step": 43353 }, { "epoch": 4.407686051240342, "grad_norm": 0.2992664873600006, "learning_rate": 4.214260304188017e-07, "loss": 0.3025, "step": 43354 }, { "epoch": 4.407787718584791, "grad_norm": 0.266603946685791, "learning_rate": 4.2128343853013733e-07, "loss": 0.2553, "step": 43355 }, { "epoch": 4.40788938592924, "grad_norm": 0.2740519940853119, "learning_rate": 4.211408697079583e-07, "loss": 0.3141, "step": 43356 }, { "epoch": 4.407991053273689, "grad_norm": 0.292054682970047, "learning_rate": 4.2099832395298415e-07, "loss": 0.282, "step": 43357 }, { "epoch": 4.4080927206181375, "grad_norm": 0.2743263244628906, "learning_rate": 4.208558012659325e-07, "loss": 0.2916, "step": 43358 }, { "epoch": 4.4081943879625864, "grad_norm": 0.28147637844085693, "learning_rate": 4.2071330164752013e-07, "loss": 0.3312, "step": 43359 }, { "epoch": 4.408296055307035, "grad_norm": 0.29553836584091187, "learning_rate": 4.2057082509846693e-07, "loss": 0.2717, "step": 43360 }, { "epoch": 4.408397722651484, "grad_norm": 0.29232272505760193, "learning_rate": 4.204283716194896e-07, "loss": 0.3343, "step": 43361 }, { "epoch": 4.408499389995933, "grad_norm": 0.2733989655971527, "learning_rate": 4.202859412113053e-07, "loss": 0.3073, "step": 43362 }, { "epoch": 4.408601057340382, "grad_norm": 0.29029691219329834, "learning_rate": 4.2014353387463183e-07, "loss": 0.2882, "step": 43363 }, { "epoch": 4.408702724684831, "grad_norm": 0.28305408358573914, "learning_rate": 4.20001149610188e-07, "loss": 0.2865, "step": 43364 }, { "epoch": 4.40880439202928, "grad_norm": 0.2821360230445862, "learning_rate": 4.1985878841868944e-07, "loss": 0.2684, "step": 43365 }, { "epoch": 4.408906059373729, "grad_norm": 0.2739277482032776, "learning_rate": 4.197164503008533e-07, "loss": 0.316, "step": 43366 }, { "epoch": 4.409007726718178, "grad_norm": 0.27311187982559204, "learning_rate": 4.1957413525739845e-07, "loss": 0.3117, "step": 43367 }, { "epoch": 4.409109394062627, "grad_norm": 0.29062458872795105, "learning_rate": 4.1943184328903984e-07, "loss": 0.3114, "step": 43368 }, { "epoch": 4.409211061407076, "grad_norm": 0.2783363461494446, "learning_rate": 4.192895743964948e-07, "loss": 0.3064, "step": 43369 }, { "epoch": 4.409312728751525, "grad_norm": 0.2812676727771759, "learning_rate": 4.1914732858048093e-07, "loss": 0.2864, "step": 43370 }, { "epoch": 4.409414396095974, "grad_norm": 0.293381929397583, "learning_rate": 4.190051058417144e-07, "loss": 0.2742, "step": 43371 }, { "epoch": 4.409516063440423, "grad_norm": 0.2913722097873688, "learning_rate": 4.188629061809119e-07, "loss": 0.294, "step": 43372 }, { "epoch": 4.409617730784872, "grad_norm": 0.2805035710334778, "learning_rate": 4.1872072959878774e-07, "loss": 0.287, "step": 43373 }, { "epoch": 4.409719398129321, "grad_norm": 0.2828518748283386, "learning_rate": 4.1857857609606143e-07, "loss": 0.2956, "step": 43374 }, { "epoch": 4.4098210654737695, "grad_norm": 0.2867080867290497, "learning_rate": 4.184364456734474e-07, "loss": 0.2859, "step": 43375 }, { "epoch": 4.4099227328182184, "grad_norm": 0.27168476581573486, "learning_rate": 4.1829433833166066e-07, "loss": 0.2943, "step": 43376 }, { "epoch": 4.410024400162667, "grad_norm": 0.28819626569747925, "learning_rate": 4.181522540714195e-07, "loss": 0.3209, "step": 43377 }, { "epoch": 4.410126067507116, "grad_norm": 0.29345574975013733, "learning_rate": 4.1801019289343826e-07, "loss": 0.2604, "step": 43378 }, { "epoch": 4.410227734851565, "grad_norm": 0.28243935108184814, "learning_rate": 4.17868154798432e-07, "loss": 0.2816, "step": 43379 }, { "epoch": 4.410329402196014, "grad_norm": 0.26636675000190735, "learning_rate": 4.1772613978711796e-07, "loss": 0.3054, "step": 43380 }, { "epoch": 4.410431069540463, "grad_norm": 0.26624706387519836, "learning_rate": 4.175841478602105e-07, "loss": 0.2972, "step": 43381 }, { "epoch": 4.410532736884912, "grad_norm": 0.2757464349269867, "learning_rate": 4.174421790184252e-07, "loss": 0.2896, "step": 43382 }, { "epoch": 4.410634404229362, "grad_norm": 0.265327513217926, "learning_rate": 4.1730023326247646e-07, "loss": 0.3183, "step": 43383 }, { "epoch": 4.410736071573811, "grad_norm": 0.2628030478954315, "learning_rate": 4.17158310593081e-07, "loss": 0.2921, "step": 43384 }, { "epoch": 4.41083773891826, "grad_norm": 0.27378225326538086, "learning_rate": 4.1701641101095316e-07, "loss": 0.3042, "step": 43385 }, { "epoch": 4.410939406262709, "grad_norm": 0.2656317353248596, "learning_rate": 4.168745345168063e-07, "loss": 0.2982, "step": 43386 }, { "epoch": 4.411041073607158, "grad_norm": 0.27922263741493225, "learning_rate": 4.1673268111135755e-07, "loss": 0.297, "step": 43387 }, { "epoch": 4.411142740951607, "grad_norm": 0.2779251039028168, "learning_rate": 4.165908507953204e-07, "loss": 0.3122, "step": 43388 }, { "epoch": 4.411244408296056, "grad_norm": 0.2966005206108093, "learning_rate": 4.164490435694085e-07, "loss": 0.2932, "step": 43389 }, { "epoch": 4.4113460756405045, "grad_norm": 0.26394709944725037, "learning_rate": 4.163072594343376e-07, "loss": 0.2991, "step": 43390 }, { "epoch": 4.4114477429849535, "grad_norm": 0.27343839406967163, "learning_rate": 4.16165498390822e-07, "loss": 0.3144, "step": 43391 }, { "epoch": 4.411549410329402, "grad_norm": 0.27431178092956543, "learning_rate": 4.16023760439575e-07, "loss": 0.3213, "step": 43392 }, { "epoch": 4.411651077673851, "grad_norm": 0.2819156050682068, "learning_rate": 4.158820455813101e-07, "loss": 0.3005, "step": 43393 }, { "epoch": 4.4117527450183, "grad_norm": 0.26754096150398254, "learning_rate": 4.1574035381674315e-07, "loss": 0.2903, "step": 43394 }, { "epoch": 4.411854412362749, "grad_norm": 0.2804591655731201, "learning_rate": 4.155986851465865e-07, "loss": 0.2783, "step": 43395 }, { "epoch": 4.411956079707198, "grad_norm": 0.30506372451782227, "learning_rate": 4.1545703957155406e-07, "loss": 0.276, "step": 43396 }, { "epoch": 4.412057747051647, "grad_norm": 0.28757691383361816, "learning_rate": 4.1531541709236014e-07, "loss": 0.33, "step": 43397 }, { "epoch": 4.412159414396096, "grad_norm": 0.2923045754432678, "learning_rate": 4.1517381770971753e-07, "loss": 0.2802, "step": 43398 }, { "epoch": 4.412261081740545, "grad_norm": 0.29806002974510193, "learning_rate": 4.1503224142433853e-07, "loss": 0.2793, "step": 43399 }, { "epoch": 4.412362749084994, "grad_norm": 0.288576602935791, "learning_rate": 4.1489068823693914e-07, "loss": 0.2836, "step": 43400 }, { "epoch": 4.412464416429443, "grad_norm": 0.288717657327652, "learning_rate": 4.1474915814823044e-07, "loss": 0.2844, "step": 43401 }, { "epoch": 4.412566083773892, "grad_norm": 0.2642967700958252, "learning_rate": 4.1460765115892586e-07, "loss": 0.3102, "step": 43402 }, { "epoch": 4.412667751118341, "grad_norm": 0.27932924032211304, "learning_rate": 4.1446616726973753e-07, "loss": 0.2923, "step": 43403 }, { "epoch": 4.41276941846279, "grad_norm": 0.27691298723220825, "learning_rate": 4.143247064813799e-07, "loss": 0.3156, "step": 43404 }, { "epoch": 4.412871085807239, "grad_norm": 0.2847394049167633, "learning_rate": 4.141832687945646e-07, "loss": 0.3019, "step": 43405 }, { "epoch": 4.412972753151688, "grad_norm": 0.2871226668357849, "learning_rate": 4.140418542100033e-07, "loss": 0.295, "step": 43406 }, { "epoch": 4.4130744204961365, "grad_norm": 0.28079262375831604, "learning_rate": 4.1390046272841043e-07, "loss": 0.3172, "step": 43407 }, { "epoch": 4.4131760878405855, "grad_norm": 0.2779541313648224, "learning_rate": 4.137590943504971e-07, "loss": 0.2594, "step": 43408 }, { "epoch": 4.413277755185034, "grad_norm": 0.2930516302585602, "learning_rate": 4.136177490769744e-07, "loss": 0.293, "step": 43409 }, { "epoch": 4.413379422529483, "grad_norm": 0.2787433862686157, "learning_rate": 4.1347642690855674e-07, "loss": 0.3081, "step": 43410 }, { "epoch": 4.413481089873932, "grad_norm": 0.2669537663459778, "learning_rate": 4.1333512784595465e-07, "loss": 0.2894, "step": 43411 }, { "epoch": 4.413582757218381, "grad_norm": 0.29176247119903564, "learning_rate": 4.131938518898809e-07, "loss": 0.2964, "step": 43412 }, { "epoch": 4.41368442456283, "grad_norm": 0.2865583896636963, "learning_rate": 4.13052599041045e-07, "loss": 0.294, "step": 43413 }, { "epoch": 4.413786091907279, "grad_norm": 0.2685430347919464, "learning_rate": 4.1291136930016126e-07, "loss": 0.2869, "step": 43414 }, { "epoch": 4.413887759251728, "grad_norm": 0.27659285068511963, "learning_rate": 4.127701626679398e-07, "loss": 0.2868, "step": 43415 }, { "epoch": 4.413989426596177, "grad_norm": 0.2706749737262726, "learning_rate": 4.1262897914509157e-07, "loss": 0.3235, "step": 43416 }, { "epoch": 4.414091093940626, "grad_norm": 0.2650296986103058, "learning_rate": 4.124878187323289e-07, "loss": 0.3368, "step": 43417 }, { "epoch": 4.414192761285075, "grad_norm": 0.2901856303215027, "learning_rate": 4.123466814303628e-07, "loss": 0.285, "step": 43418 }, { "epoch": 4.414294428629524, "grad_norm": 0.2660883665084839, "learning_rate": 4.122055672399028e-07, "loss": 0.2919, "step": 43419 }, { "epoch": 4.414396095973973, "grad_norm": 0.2844765782356262, "learning_rate": 4.120644761616621e-07, "loss": 0.3066, "step": 43420 }, { "epoch": 4.414497763318423, "grad_norm": 0.2929087281227112, "learning_rate": 4.1192340819635026e-07, "loss": 0.3028, "step": 43421 }, { "epoch": 4.4145994306628715, "grad_norm": 0.275590717792511, "learning_rate": 4.117823633446777e-07, "loss": 0.2664, "step": 43422 }, { "epoch": 4.4147010980073205, "grad_norm": 0.26206234097480774, "learning_rate": 4.116413416073545e-07, "loss": 0.2808, "step": 43423 }, { "epoch": 4.414802765351769, "grad_norm": 0.26504045724868774, "learning_rate": 4.115003429850928e-07, "loss": 0.3444, "step": 43424 }, { "epoch": 4.414904432696218, "grad_norm": 0.2701617181301117, "learning_rate": 4.113593674786021e-07, "loss": 0.3011, "step": 43425 }, { "epoch": 4.415006100040667, "grad_norm": 0.2676035463809967, "learning_rate": 4.112184150885912e-07, "loss": 0.2898, "step": 43426 }, { "epoch": 4.415107767385116, "grad_norm": 0.26096320152282715, "learning_rate": 4.1107748581577344e-07, "loss": 0.3188, "step": 43427 }, { "epoch": 4.415209434729565, "grad_norm": 0.3100515305995941, "learning_rate": 4.10936579660855e-07, "loss": 0.3066, "step": 43428 }, { "epoch": 4.415311102074014, "grad_norm": 0.2763682007789612, "learning_rate": 4.10795696624548e-07, "loss": 0.3025, "step": 43429 }, { "epoch": 4.415412769418463, "grad_norm": 0.2755056619644165, "learning_rate": 4.106548367075619e-07, "loss": 0.3153, "step": 43430 }, { "epoch": 4.415514436762912, "grad_norm": 0.2551173269748688, "learning_rate": 4.105139999106067e-07, "loss": 0.3007, "step": 43431 }, { "epoch": 4.415616104107361, "grad_norm": 0.2804979085922241, "learning_rate": 4.103731862343907e-07, "loss": 0.2878, "step": 43432 }, { "epoch": 4.41571777145181, "grad_norm": 0.2965780794620514, "learning_rate": 4.102323956796239e-07, "loss": 0.2811, "step": 43433 }, { "epoch": 4.415819438796259, "grad_norm": 0.2929583489894867, "learning_rate": 4.1009162824701574e-07, "loss": 0.2703, "step": 43434 }, { "epoch": 4.415921106140708, "grad_norm": 0.2649582326412201, "learning_rate": 4.099508839372757e-07, "loss": 0.2904, "step": 43435 }, { "epoch": 4.416022773485157, "grad_norm": 0.25062960386276245, "learning_rate": 4.0981016275111085e-07, "loss": 0.297, "step": 43436 }, { "epoch": 4.416124440829606, "grad_norm": 0.27063095569610596, "learning_rate": 4.096694646892341e-07, "loss": 0.2941, "step": 43437 }, { "epoch": 4.416226108174055, "grad_norm": 0.2852226793766022, "learning_rate": 4.095287897523498e-07, "loss": 0.3192, "step": 43438 }, { "epoch": 4.4163277755185035, "grad_norm": 0.2800830006599426, "learning_rate": 4.093881379411685e-07, "loss": 0.2659, "step": 43439 }, { "epoch": 4.4164294428629525, "grad_norm": 0.29470786452293396, "learning_rate": 4.092475092563997e-07, "loss": 0.2586, "step": 43440 }, { "epoch": 4.416531110207401, "grad_norm": 0.30217793583869934, "learning_rate": 4.0910690369875107e-07, "loss": 0.3298, "step": 43441 }, { "epoch": 4.41663277755185, "grad_norm": 0.3024323880672455, "learning_rate": 4.0896632126893046e-07, "loss": 0.311, "step": 43442 }, { "epoch": 4.416734444896299, "grad_norm": 0.2900102138519287, "learning_rate": 4.088257619676461e-07, "loss": 0.3072, "step": 43443 }, { "epoch": 4.416836112240748, "grad_norm": 0.28503990173339844, "learning_rate": 4.0868522579560754e-07, "loss": 0.3156, "step": 43444 }, { "epoch": 4.416937779585197, "grad_norm": 0.2887042760848999, "learning_rate": 4.085447127535214e-07, "loss": 0.3236, "step": 43445 }, { "epoch": 4.417039446929646, "grad_norm": 0.28806981444358826, "learning_rate": 4.084042228420954e-07, "loss": 0.3043, "step": 43446 }, { "epoch": 4.417141114274095, "grad_norm": 0.29995083808898926, "learning_rate": 4.082637560620395e-07, "loss": 0.3105, "step": 43447 }, { "epoch": 4.417242781618544, "grad_norm": 0.27287378907203674, "learning_rate": 4.081233124140577e-07, "loss": 0.2671, "step": 43448 }, { "epoch": 4.417344448962993, "grad_norm": 0.29226937890052795, "learning_rate": 4.0798289189885933e-07, "loss": 0.2979, "step": 43449 }, { "epoch": 4.417446116307442, "grad_norm": 0.2936488687992096, "learning_rate": 4.078424945171539e-07, "loss": 0.2964, "step": 43450 }, { "epoch": 4.417547783651891, "grad_norm": 0.2725446820259094, "learning_rate": 4.077021202696446e-07, "loss": 0.2919, "step": 43451 }, { "epoch": 4.41764945099634, "grad_norm": 0.2872124910354614, "learning_rate": 4.075617691570422e-07, "loss": 0.2631, "step": 43452 }, { "epoch": 4.417751118340789, "grad_norm": 0.26117852330207825, "learning_rate": 4.074214411800509e-07, "loss": 0.2618, "step": 43453 }, { "epoch": 4.417852785685238, "grad_norm": 0.2732681930065155, "learning_rate": 4.072811363393797e-07, "loss": 0.306, "step": 43454 }, { "epoch": 4.417954453029687, "grad_norm": 0.2827276289463043, "learning_rate": 4.071408546357353e-07, "loss": 0.3067, "step": 43455 }, { "epoch": 4.4180561203741355, "grad_norm": 0.30794358253479004, "learning_rate": 4.0700059606982254e-07, "loss": 0.2785, "step": 43456 }, { "epoch": 4.4181577877185845, "grad_norm": 0.283670037984848, "learning_rate": 4.068603606423516e-07, "loss": 0.2977, "step": 43457 }, { "epoch": 4.418259455063033, "grad_norm": 0.27675411105155945, "learning_rate": 4.067201483540245e-07, "loss": 0.2679, "step": 43458 }, { "epoch": 4.418361122407482, "grad_norm": 0.28746145963668823, "learning_rate": 4.0657995920554974e-07, "loss": 0.2837, "step": 43459 }, { "epoch": 4.418462789751931, "grad_norm": 0.2904198467731476, "learning_rate": 4.0643979319763547e-07, "loss": 0.2936, "step": 43460 }, { "epoch": 4.41856445709638, "grad_norm": 0.2757191061973572, "learning_rate": 4.062996503309841e-07, "loss": 0.3341, "step": 43461 }, { "epoch": 4.418666124440829, "grad_norm": 0.28805479407310486, "learning_rate": 4.061595306063049e-07, "loss": 0.2883, "step": 43462 }, { "epoch": 4.418767791785278, "grad_norm": 0.270474374294281, "learning_rate": 4.060194340243018e-07, "loss": 0.2969, "step": 43463 }, { "epoch": 4.418869459129727, "grad_norm": 0.2710186243057251, "learning_rate": 4.058793605856809e-07, "loss": 0.2861, "step": 43464 }, { "epoch": 4.418971126474177, "grad_norm": 0.28702419996261597, "learning_rate": 4.057393102911472e-07, "loss": 0.2785, "step": 43465 }, { "epoch": 4.419072793818626, "grad_norm": 0.28877559304237366, "learning_rate": 4.055992831414074e-07, "loss": 0.2878, "step": 43466 }, { "epoch": 4.419174461163075, "grad_norm": 0.30189990997314453, "learning_rate": 4.054592791371681e-07, "loss": 0.2929, "step": 43467 }, { "epoch": 4.419276128507524, "grad_norm": 0.2770196199417114, "learning_rate": 4.0531929827913153e-07, "loss": 0.3043, "step": 43468 }, { "epoch": 4.419377795851973, "grad_norm": 0.2729690670967102, "learning_rate": 4.0517934056800544e-07, "loss": 0.2792, "step": 43469 }, { "epoch": 4.419479463196422, "grad_norm": 0.2867656946182251, "learning_rate": 4.0503940600449377e-07, "loss": 0.2823, "step": 43470 }, { "epoch": 4.4195811305408705, "grad_norm": 0.27789533138275146, "learning_rate": 4.048994945893009e-07, "loss": 0.2797, "step": 43471 }, { "epoch": 4.4196827978853195, "grad_norm": 0.28681784868240356, "learning_rate": 4.0475960632313294e-07, "loss": 0.3019, "step": 43472 }, { "epoch": 4.419784465229768, "grad_norm": 0.2613033652305603, "learning_rate": 4.0461974120669435e-07, "loss": 0.2823, "step": 43473 }, { "epoch": 4.419886132574217, "grad_norm": 0.28549453616142273, "learning_rate": 4.0447989924068953e-07, "loss": 0.3021, "step": 43474 }, { "epoch": 4.419987799918666, "grad_norm": 0.26827389001846313, "learning_rate": 4.043400804258224e-07, "loss": 0.2985, "step": 43475 }, { "epoch": 4.420089467263115, "grad_norm": 0.2775264382362366, "learning_rate": 4.042002847627974e-07, "loss": 0.3236, "step": 43476 }, { "epoch": 4.420191134607564, "grad_norm": 0.29885610938072205, "learning_rate": 4.0406051225232056e-07, "loss": 0.2863, "step": 43477 }, { "epoch": 4.420292801952013, "grad_norm": 0.27218523621559143, "learning_rate": 4.0392076289509364e-07, "loss": 0.3323, "step": 43478 }, { "epoch": 4.420394469296462, "grad_norm": 0.26950299739837646, "learning_rate": 4.0378103669182213e-07, "loss": 0.3169, "step": 43479 }, { "epoch": 4.420496136640911, "grad_norm": 0.26434919238090515, "learning_rate": 4.0364133364320987e-07, "loss": 0.2767, "step": 43480 }, { "epoch": 4.42059780398536, "grad_norm": 0.27232781052589417, "learning_rate": 4.035016537499592e-07, "loss": 0.2882, "step": 43481 }, { "epoch": 4.420699471329809, "grad_norm": 0.28375542163848877, "learning_rate": 4.0336199701277614e-07, "loss": 0.3079, "step": 43482 }, { "epoch": 4.420801138674258, "grad_norm": 0.2763204276561737, "learning_rate": 4.0322236343236286e-07, "loss": 0.3052, "step": 43483 }, { "epoch": 4.420902806018707, "grad_norm": 0.28333669900894165, "learning_rate": 4.030827530094228e-07, "loss": 0.2889, "step": 43484 }, { "epoch": 4.421004473363156, "grad_norm": 0.3305627405643463, "learning_rate": 4.029431657446586e-07, "loss": 0.3209, "step": 43485 }, { "epoch": 4.421106140707605, "grad_norm": 0.27388614416122437, "learning_rate": 4.028036016387754e-07, "loss": 0.2678, "step": 43486 }, { "epoch": 4.421207808052054, "grad_norm": 0.3045024573802948, "learning_rate": 4.026640606924748e-07, "loss": 0.3112, "step": 43487 }, { "epoch": 4.4213094753965025, "grad_norm": 0.28319209814071655, "learning_rate": 4.025245429064595e-07, "loss": 0.2752, "step": 43488 }, { "epoch": 4.4214111427409515, "grad_norm": 0.30649879574775696, "learning_rate": 4.02385048281434e-07, "loss": 0.2675, "step": 43489 }, { "epoch": 4.4215128100854, "grad_norm": 0.2813902795314789, "learning_rate": 4.0224557681809993e-07, "loss": 0.3091, "step": 43490 }, { "epoch": 4.421614477429849, "grad_norm": 0.27428629994392395, "learning_rate": 4.02106128517159e-07, "loss": 0.2957, "step": 43491 }, { "epoch": 4.421716144774298, "grad_norm": 0.29323676228523254, "learning_rate": 4.0196670337931623e-07, "loss": 0.2814, "step": 43492 }, { "epoch": 4.421817812118747, "grad_norm": 0.27634555101394653, "learning_rate": 4.01827301405272e-07, "loss": 0.2942, "step": 43493 }, { "epoch": 4.421919479463196, "grad_norm": 0.26863789558410645, "learning_rate": 4.016879225957293e-07, "loss": 0.3108, "step": 43494 }, { "epoch": 4.422021146807645, "grad_norm": 0.3159143626689911, "learning_rate": 4.0154856695138913e-07, "loss": 0.2759, "step": 43495 }, { "epoch": 4.422122814152094, "grad_norm": 0.2886979877948761, "learning_rate": 4.014092344729553e-07, "loss": 0.2725, "step": 43496 }, { "epoch": 4.422224481496543, "grad_norm": 0.2771075367927551, "learning_rate": 4.0126992516112905e-07, "loss": 0.2986, "step": 43497 }, { "epoch": 4.422326148840992, "grad_norm": 0.2893716096878052, "learning_rate": 4.011306390166114e-07, "loss": 0.2767, "step": 43498 }, { "epoch": 4.422427816185441, "grad_norm": 0.2736951410770416, "learning_rate": 4.0099137604010575e-07, "loss": 0.3081, "step": 43499 }, { "epoch": 4.42252948352989, "grad_norm": 0.2741275429725647, "learning_rate": 4.0085213623231256e-07, "loss": 0.3265, "step": 43500 }, { "epoch": 4.422631150874339, "grad_norm": 0.2786853611469269, "learning_rate": 4.0071291959393246e-07, "loss": 0.2808, "step": 43501 }, { "epoch": 4.422732818218788, "grad_norm": 0.270812451839447, "learning_rate": 4.0057372612566815e-07, "loss": 0.2903, "step": 43502 }, { "epoch": 4.4228344855632375, "grad_norm": 0.2682061493396759, "learning_rate": 4.0043455582822077e-07, "loss": 0.3487, "step": 43503 }, { "epoch": 4.4229361529076865, "grad_norm": 0.2797890603542328, "learning_rate": 4.0029540870229087e-07, "loss": 0.2833, "step": 43504 }, { "epoch": 4.423037820252135, "grad_norm": 0.2725813388824463, "learning_rate": 4.0015628474857905e-07, "loss": 0.3015, "step": 43505 }, { "epoch": 4.423139487596584, "grad_norm": 0.2663843631744385, "learning_rate": 4.00017183967788e-07, "loss": 0.313, "step": 43506 }, { "epoch": 4.423241154941033, "grad_norm": 0.2718402147293091, "learning_rate": 3.998781063606166e-07, "loss": 0.3131, "step": 43507 }, { "epoch": 4.423342822285482, "grad_norm": 0.32726407051086426, "learning_rate": 3.997390519277655e-07, "loss": 0.3013, "step": 43508 }, { "epoch": 4.423444489629931, "grad_norm": 0.2875029146671295, "learning_rate": 3.9960002066993686e-07, "loss": 0.2767, "step": 43509 }, { "epoch": 4.42354615697438, "grad_norm": 0.26638057827949524, "learning_rate": 3.9946101258782956e-07, "loss": 0.2798, "step": 43510 }, { "epoch": 4.423647824318829, "grad_norm": 0.2624654471874237, "learning_rate": 3.993220276821441e-07, "loss": 0.3163, "step": 43511 }, { "epoch": 4.423749491663278, "grad_norm": 0.28322067856788635, "learning_rate": 3.991830659535817e-07, "loss": 0.2875, "step": 43512 }, { "epoch": 4.423851159007727, "grad_norm": 0.28272882103919983, "learning_rate": 3.990441274028417e-07, "loss": 0.283, "step": 43513 }, { "epoch": 4.423952826352176, "grad_norm": 0.27015388011932373, "learning_rate": 3.9890521203062417e-07, "loss": 0.2637, "step": 43514 }, { "epoch": 4.424054493696625, "grad_norm": 0.2595120668411255, "learning_rate": 3.987663198376279e-07, "loss": 0.2977, "step": 43515 }, { "epoch": 4.424156161041074, "grad_norm": 0.2627177834510803, "learning_rate": 3.986274508245541e-07, "loss": 0.3305, "step": 43516 }, { "epoch": 4.424257828385523, "grad_norm": 0.28102514147758484, "learning_rate": 3.984886049921016e-07, "loss": 0.3309, "step": 43517 }, { "epoch": 4.424359495729972, "grad_norm": 0.2762342095375061, "learning_rate": 3.983497823409699e-07, "loss": 0.2964, "step": 43518 }, { "epoch": 4.424461163074421, "grad_norm": 0.2782406806945801, "learning_rate": 3.982109828718589e-07, "loss": 0.2786, "step": 43519 }, { "epoch": 4.4245628304188696, "grad_norm": 0.2651172876358032, "learning_rate": 3.98072206585467e-07, "loss": 0.2766, "step": 43520 }, { "epoch": 4.4246644977633185, "grad_norm": 0.2882150709629059, "learning_rate": 3.979334534824936e-07, "loss": 0.3051, "step": 43521 }, { "epoch": 4.424766165107767, "grad_norm": 0.2824694812297821, "learning_rate": 3.977947235636381e-07, "loss": 0.3017, "step": 43522 }, { "epoch": 4.424867832452216, "grad_norm": 0.26605531573295593, "learning_rate": 3.9765601682959956e-07, "loss": 0.3184, "step": 43523 }, { "epoch": 4.424969499796665, "grad_norm": 0.2818821668624878, "learning_rate": 3.975173332810761e-07, "loss": 0.3062, "step": 43524 }, { "epoch": 4.425071167141114, "grad_norm": 0.28561004996299744, "learning_rate": 3.9737867291876566e-07, "loss": 0.2806, "step": 43525 }, { "epoch": 4.425172834485563, "grad_norm": 0.2871408760547638, "learning_rate": 3.9724003574336924e-07, "loss": 0.3036, "step": 43526 }, { "epoch": 4.425274501830012, "grad_norm": 0.2740313708782196, "learning_rate": 3.97101421755583e-07, "loss": 0.3068, "step": 43527 }, { "epoch": 4.425376169174461, "grad_norm": 0.2972138524055481, "learning_rate": 3.969628309561052e-07, "loss": 0.3176, "step": 43528 }, { "epoch": 4.42547783651891, "grad_norm": 0.2881323993206024, "learning_rate": 3.9682426334563596e-07, "loss": 0.2869, "step": 43529 }, { "epoch": 4.425579503863359, "grad_norm": 0.3055404722690582, "learning_rate": 3.9668571892487185e-07, "loss": 0.3058, "step": 43530 }, { "epoch": 4.425681171207808, "grad_norm": 0.26728928089141846, "learning_rate": 3.9654719769451067e-07, "loss": 0.3016, "step": 43531 }, { "epoch": 4.425782838552257, "grad_norm": 0.2982049584388733, "learning_rate": 3.9640869965525133e-07, "loss": 0.2989, "step": 43532 }, { "epoch": 4.425884505896706, "grad_norm": 0.30076202750205994, "learning_rate": 3.96270224807791e-07, "loss": 0.2969, "step": 43533 }, { "epoch": 4.425986173241155, "grad_norm": 0.2780096232891083, "learning_rate": 3.9613177315282747e-07, "loss": 0.2617, "step": 43534 }, { "epoch": 4.426087840585604, "grad_norm": 0.2772674858570099, "learning_rate": 3.9599334469105685e-07, "loss": 0.2795, "step": 43535 }, { "epoch": 4.426189507930053, "grad_norm": 0.26565226912498474, "learning_rate": 3.958549394231792e-07, "loss": 0.2637, "step": 43536 }, { "epoch": 4.4262911752745016, "grad_norm": 0.2851698100566864, "learning_rate": 3.9571655734988946e-07, "loss": 0.2738, "step": 43537 }, { "epoch": 4.4263928426189505, "grad_norm": 0.2972528040409088, "learning_rate": 3.9557819847188536e-07, "loss": 0.2605, "step": 43538 }, { "epoch": 4.426494509963399, "grad_norm": 0.32625648379325867, "learning_rate": 3.954398627898648e-07, "loss": 0.2741, "step": 43539 }, { "epoch": 4.426596177307848, "grad_norm": 0.2748209834098816, "learning_rate": 3.9530155030452376e-07, "loss": 0.291, "step": 43540 }, { "epoch": 4.426697844652297, "grad_norm": 0.29343393445014954, "learning_rate": 3.9516326101655846e-07, "loss": 0.309, "step": 43541 }, { "epoch": 4.426799511996746, "grad_norm": 0.29167142510414124, "learning_rate": 3.9502499492666713e-07, "loss": 0.2752, "step": 43542 }, { "epoch": 4.426901179341195, "grad_norm": 0.2599923312664032, "learning_rate": 3.9488675203554593e-07, "loss": 0.308, "step": 43543 }, { "epoch": 4.427002846685644, "grad_norm": 0.28529390692710876, "learning_rate": 3.9474853234389043e-07, "loss": 0.315, "step": 43544 }, { "epoch": 4.427104514030093, "grad_norm": 0.28118079900741577, "learning_rate": 3.9461033585239674e-07, "loss": 0.2693, "step": 43545 }, { "epoch": 4.427206181374542, "grad_norm": 0.28705185651779175, "learning_rate": 3.944721625617631e-07, "loss": 0.2962, "step": 43546 }, { "epoch": 4.427307848718992, "grad_norm": 0.26192331314086914, "learning_rate": 3.9433401247268357e-07, "loss": 0.2828, "step": 43547 }, { "epoch": 4.427409516063441, "grad_norm": 0.26881179213523865, "learning_rate": 3.9419588558585465e-07, "loss": 0.2827, "step": 43548 }, { "epoch": 4.42751118340789, "grad_norm": 0.2971413731575012, "learning_rate": 3.9405778190197253e-07, "loss": 0.3058, "step": 43549 }, { "epoch": 4.427612850752339, "grad_norm": 0.28887465596199036, "learning_rate": 3.939197014217333e-07, "loss": 0.2919, "step": 43550 }, { "epoch": 4.427714518096788, "grad_norm": 0.31212112307548523, "learning_rate": 3.9378164414583086e-07, "loss": 0.2983, "step": 43551 }, { "epoch": 4.427816185441237, "grad_norm": 0.2944367229938507, "learning_rate": 3.93643610074963e-07, "loss": 0.2911, "step": 43552 }, { "epoch": 4.4279178527856855, "grad_norm": 0.29124006628990173, "learning_rate": 3.935055992098241e-07, "loss": 0.3082, "step": 43553 }, { "epoch": 4.4280195201301344, "grad_norm": 0.28480082750320435, "learning_rate": 3.933676115511087e-07, "loss": 0.3026, "step": 43554 }, { "epoch": 4.428121187474583, "grad_norm": 0.280401349067688, "learning_rate": 3.9322964709951226e-07, "loss": 0.2769, "step": 43555 }, { "epoch": 4.428222854819032, "grad_norm": 0.26700612902641296, "learning_rate": 3.93091705855731e-07, "loss": 0.3189, "step": 43556 }, { "epoch": 4.428324522163481, "grad_norm": 0.27376773953437805, "learning_rate": 3.9295378782045867e-07, "loss": 0.3086, "step": 43557 }, { "epoch": 4.42842618950793, "grad_norm": 0.29096683859825134, "learning_rate": 3.928158929943893e-07, "loss": 0.3005, "step": 43558 }, { "epoch": 4.428527856852379, "grad_norm": 0.2964833080768585, "learning_rate": 3.9267802137821997e-07, "loss": 0.2861, "step": 43559 }, { "epoch": 4.428629524196828, "grad_norm": 0.2939949333667755, "learning_rate": 3.925401729726436e-07, "loss": 0.3038, "step": 43560 }, { "epoch": 4.428731191541277, "grad_norm": 0.28306475281715393, "learning_rate": 3.924023477783545e-07, "loss": 0.3128, "step": 43561 }, { "epoch": 4.428832858885726, "grad_norm": 0.2690158188343048, "learning_rate": 3.922645457960478e-07, "loss": 0.2835, "step": 43562 }, { "epoch": 4.428934526230175, "grad_norm": 0.2769935429096222, "learning_rate": 3.921267670264178e-07, "loss": 0.3101, "step": 43563 }, { "epoch": 4.429036193574624, "grad_norm": 0.29584014415740967, "learning_rate": 3.9198901147015746e-07, "loss": 0.3118, "step": 43564 }, { "epoch": 4.429137860919073, "grad_norm": 0.2727339565753937, "learning_rate": 3.9185127912796104e-07, "loss": 0.3143, "step": 43565 }, { "epoch": 4.429239528263522, "grad_norm": 0.2634449005126953, "learning_rate": 3.9171357000052366e-07, "loss": 0.2881, "step": 43566 }, { "epoch": 4.429341195607971, "grad_norm": 0.268568754196167, "learning_rate": 3.915758840885386e-07, "loss": 0.2875, "step": 43567 }, { "epoch": 4.42944286295242, "grad_norm": 0.26356443762779236, "learning_rate": 3.914382213926976e-07, "loss": 0.2798, "step": 43568 }, { "epoch": 4.429544530296869, "grad_norm": 0.254869282245636, "learning_rate": 3.9130058191369667e-07, "loss": 0.3186, "step": 43569 }, { "epoch": 4.4296461976413175, "grad_norm": 0.2935723066329956, "learning_rate": 3.911629656522287e-07, "loss": 0.2837, "step": 43570 }, { "epoch": 4.4297478649857664, "grad_norm": 0.2710823714733124, "learning_rate": 3.910253726089852e-07, "loss": 0.3061, "step": 43571 }, { "epoch": 4.429849532330215, "grad_norm": 0.28016605973243713, "learning_rate": 3.908878027846613e-07, "loss": 0.3048, "step": 43572 }, { "epoch": 4.429951199674664, "grad_norm": 0.26542583107948303, "learning_rate": 3.9075025617994977e-07, "loss": 0.2796, "step": 43573 }, { "epoch": 4.430052867019113, "grad_norm": 0.27320432662963867, "learning_rate": 3.906127327955428e-07, "loss": 0.2977, "step": 43574 }, { "epoch": 4.430154534363562, "grad_norm": 0.26472222805023193, "learning_rate": 3.904752326321326e-07, "loss": 0.3164, "step": 43575 }, { "epoch": 4.430256201708011, "grad_norm": 0.2869566082954407, "learning_rate": 3.903377556904142e-07, "loss": 0.2795, "step": 43576 }, { "epoch": 4.43035786905246, "grad_norm": 0.31937524676322937, "learning_rate": 3.902003019710782e-07, "loss": 0.2921, "step": 43577 }, { "epoch": 4.430459536396909, "grad_norm": 0.2874987721443176, "learning_rate": 3.9006287147481726e-07, "loss": 0.3051, "step": 43578 }, { "epoch": 4.430561203741358, "grad_norm": 0.27292943000793457, "learning_rate": 3.8992546420232537e-07, "loss": 0.3385, "step": 43579 }, { "epoch": 4.430662871085807, "grad_norm": 0.2871059775352478, "learning_rate": 3.8978808015429194e-07, "loss": 0.3201, "step": 43580 }, { "epoch": 4.430764538430256, "grad_norm": 0.2869202792644501, "learning_rate": 3.896507193314103e-07, "loss": 0.2771, "step": 43581 }, { "epoch": 4.430866205774705, "grad_norm": 0.2660163640975952, "learning_rate": 3.8951338173437437e-07, "loss": 0.309, "step": 43582 }, { "epoch": 4.430967873119154, "grad_norm": 0.2897098958492279, "learning_rate": 3.893760673638736e-07, "loss": 0.3144, "step": 43583 }, { "epoch": 4.431069540463603, "grad_norm": 0.25967785716056824, "learning_rate": 3.892387762206007e-07, "loss": 0.3118, "step": 43584 }, { "epoch": 4.4311712078080525, "grad_norm": 0.27107709646224976, "learning_rate": 3.891015083052468e-07, "loss": 0.3322, "step": 43585 }, { "epoch": 4.4312728751525015, "grad_norm": 0.2904879152774811, "learning_rate": 3.8896426361850427e-07, "loss": 0.3, "step": 43586 }, { "epoch": 4.43137454249695, "grad_norm": 0.2699088752269745, "learning_rate": 3.8882704216106404e-07, "loss": 0.2995, "step": 43587 }, { "epoch": 4.431476209841399, "grad_norm": 0.282577782869339, "learning_rate": 3.886898439336162e-07, "loss": 0.3098, "step": 43588 }, { "epoch": 4.431577877185848, "grad_norm": 0.2850295603275299, "learning_rate": 3.885526689368552e-07, "loss": 0.2823, "step": 43589 }, { "epoch": 4.431679544530297, "grad_norm": 0.2705543041229248, "learning_rate": 3.8841551717146877e-07, "loss": 0.2786, "step": 43590 }, { "epoch": 4.431781211874746, "grad_norm": 0.2571703791618347, "learning_rate": 3.8827838863814807e-07, "loss": 0.2812, "step": 43591 }, { "epoch": 4.431882879219195, "grad_norm": 0.2557845115661621, "learning_rate": 3.881412833375864e-07, "loss": 0.2857, "step": 43592 }, { "epoch": 4.431984546563644, "grad_norm": 0.2836833596229553, "learning_rate": 3.880042012704732e-07, "loss": 0.2802, "step": 43593 }, { "epoch": 4.432086213908093, "grad_norm": 0.28191670775413513, "learning_rate": 3.8786714243749855e-07, "loss": 0.3133, "step": 43594 }, { "epoch": 4.432187881252542, "grad_norm": 0.27718910574913025, "learning_rate": 3.8773010683935243e-07, "loss": 0.3107, "step": 43595 }, { "epoch": 4.432289548596991, "grad_norm": 0.30763232707977295, "learning_rate": 3.8759309447672644e-07, "loss": 0.3076, "step": 43596 }, { "epoch": 4.43239121594144, "grad_norm": 0.2673041522502899, "learning_rate": 3.8745610535031065e-07, "loss": 0.2804, "step": 43597 }, { "epoch": 4.432492883285889, "grad_norm": 0.2729290723800659, "learning_rate": 3.8731913946079445e-07, "loss": 0.3028, "step": 43598 }, { "epoch": 4.432594550630338, "grad_norm": 0.2723689675331116, "learning_rate": 3.871821968088696e-07, "loss": 0.3114, "step": 43599 }, { "epoch": 4.432696217974787, "grad_norm": 0.2829199433326721, "learning_rate": 3.870452773952227e-07, "loss": 0.3094, "step": 43600 }, { "epoch": 4.432797885319236, "grad_norm": 0.2595074474811554, "learning_rate": 3.8690838122054596e-07, "loss": 0.3123, "step": 43601 }, { "epoch": 4.4328995526636845, "grad_norm": 0.2710329294204712, "learning_rate": 3.8677150828553e-07, "loss": 0.323, "step": 43602 }, { "epoch": 4.4330012200081335, "grad_norm": 0.2804757058620453, "learning_rate": 3.8663465859086146e-07, "loss": 0.2803, "step": 43603 }, { "epoch": 4.433102887352582, "grad_norm": 0.2935013175010681, "learning_rate": 3.8649783213723204e-07, "loss": 0.2753, "step": 43604 }, { "epoch": 4.433204554697031, "grad_norm": 0.28623467683792114, "learning_rate": 3.863610289253289e-07, "loss": 0.3086, "step": 43605 }, { "epoch": 4.43330622204148, "grad_norm": 0.2717513144016266, "learning_rate": 3.862242489558438e-07, "loss": 0.3085, "step": 43606 }, { "epoch": 4.433407889385929, "grad_norm": 0.26556405425071716, "learning_rate": 3.860874922294644e-07, "loss": 0.3238, "step": 43607 }, { "epoch": 4.433509556730378, "grad_norm": 0.26842251420021057, "learning_rate": 3.8595075874687917e-07, "loss": 0.2859, "step": 43608 }, { "epoch": 4.433611224074827, "grad_norm": 0.2727535367012024, "learning_rate": 3.8581404850877913e-07, "loss": 0.297, "step": 43609 }, { "epoch": 4.433712891419276, "grad_norm": 0.2685181498527527, "learning_rate": 3.856773615158493e-07, "loss": 0.3085, "step": 43610 }, { "epoch": 4.433814558763725, "grad_norm": 0.28671902418136597, "learning_rate": 3.855406977687809e-07, "loss": 0.3115, "step": 43611 }, { "epoch": 4.433916226108174, "grad_norm": 0.2682356834411621, "learning_rate": 3.854040572682632e-07, "loss": 0.3115, "step": 43612 }, { "epoch": 4.434017893452623, "grad_norm": 0.2662633955478668, "learning_rate": 3.8526744001498193e-07, "loss": 0.2873, "step": 43613 }, { "epoch": 4.434119560797072, "grad_norm": 0.28137487173080444, "learning_rate": 3.85130846009627e-07, "loss": 0.3314, "step": 43614 }, { "epoch": 4.434221228141521, "grad_norm": 0.28731948137283325, "learning_rate": 3.8499427525288677e-07, "loss": 0.2648, "step": 43615 }, { "epoch": 4.43432289548597, "grad_norm": 0.28971996903419495, "learning_rate": 3.8485772774544795e-07, "loss": 0.2973, "step": 43616 }, { "epoch": 4.434424562830419, "grad_norm": 0.2739780843257904, "learning_rate": 3.8472120348799933e-07, "loss": 0.3057, "step": 43617 }, { "epoch": 4.434526230174868, "grad_norm": 0.28633803129196167, "learning_rate": 3.8458470248122825e-07, "loss": 0.2859, "step": 43618 }, { "epoch": 4.4346278975193165, "grad_norm": 0.2684631049633026, "learning_rate": 3.844482247258241e-07, "loss": 0.2817, "step": 43619 }, { "epoch": 4.4347295648637655, "grad_norm": 0.28750547766685486, "learning_rate": 3.8431177022247134e-07, "loss": 0.2853, "step": 43620 }, { "epoch": 4.434831232208214, "grad_norm": 0.28797751665115356, "learning_rate": 3.841753389718589e-07, "loss": 0.3183, "step": 43621 }, { "epoch": 4.434932899552663, "grad_norm": 0.2902331054210663, "learning_rate": 3.8403893097467613e-07, "loss": 0.3319, "step": 43622 }, { "epoch": 4.435034566897112, "grad_norm": 0.2702639102935791, "learning_rate": 3.839025462316065e-07, "loss": 0.2767, "step": 43623 }, { "epoch": 4.435136234241561, "grad_norm": 0.2699931859970093, "learning_rate": 3.8376618474333994e-07, "loss": 0.2913, "step": 43624 }, { "epoch": 4.43523790158601, "grad_norm": 0.28692692518234253, "learning_rate": 3.83629846510562e-07, "loss": 0.3445, "step": 43625 }, { "epoch": 4.435339568930459, "grad_norm": 0.28380972146987915, "learning_rate": 3.834935315339589e-07, "loss": 0.3045, "step": 43626 }, { "epoch": 4.435441236274908, "grad_norm": 0.2794207036495209, "learning_rate": 3.833572398142199e-07, "loss": 0.3504, "step": 43627 }, { "epoch": 4.435542903619357, "grad_norm": 0.29087087512016296, "learning_rate": 3.832209713520285e-07, "loss": 0.301, "step": 43628 }, { "epoch": 4.435644570963807, "grad_norm": 0.2781599164009094, "learning_rate": 3.830847261480747e-07, "loss": 0.3088, "step": 43629 }, { "epoch": 4.435746238308256, "grad_norm": 0.30458182096481323, "learning_rate": 3.829485042030412e-07, "loss": 0.3109, "step": 43630 }, { "epoch": 4.435847905652705, "grad_norm": 0.27907177805900574, "learning_rate": 3.828123055176158e-07, "loss": 0.3175, "step": 43631 }, { "epoch": 4.435949572997154, "grad_norm": 0.30357640981674194, "learning_rate": 3.826761300924869e-07, "loss": 0.2929, "step": 43632 }, { "epoch": 4.436051240341603, "grad_norm": 0.2933499217033386, "learning_rate": 3.825399779283362e-07, "loss": 0.2813, "step": 43633 }, { "epoch": 4.4361529076860515, "grad_norm": 0.2591985762119293, "learning_rate": 3.8240384902585306e-07, "loss": 0.2974, "step": 43634 }, { "epoch": 4.4362545750305005, "grad_norm": 0.29182547330856323, "learning_rate": 3.8226774338572193e-07, "loss": 0.2939, "step": 43635 }, { "epoch": 4.436356242374949, "grad_norm": 0.3223707973957062, "learning_rate": 3.8213166100862733e-07, "loss": 0.331, "step": 43636 }, { "epoch": 4.436457909719398, "grad_norm": 0.2533993124961853, "learning_rate": 3.8199560189525753e-07, "loss": 0.2952, "step": 43637 }, { "epoch": 4.436559577063847, "grad_norm": 0.2808370590209961, "learning_rate": 3.8185956604629593e-07, "loss": 0.3178, "step": 43638 }, { "epoch": 4.436661244408296, "grad_norm": 0.274245947599411, "learning_rate": 3.817235534624286e-07, "loss": 0.2762, "step": 43639 }, { "epoch": 4.436762911752745, "grad_norm": 0.3039498031139374, "learning_rate": 3.8158756414434005e-07, "loss": 0.311, "step": 43640 }, { "epoch": 4.436864579097194, "grad_norm": 0.2992216646671295, "learning_rate": 3.8145159809271526e-07, "loss": 0.2777, "step": 43641 }, { "epoch": 4.436966246441643, "grad_norm": 0.2828708291053772, "learning_rate": 3.81315655308242e-07, "loss": 0.2812, "step": 43642 }, { "epoch": 4.437067913786092, "grad_norm": 0.27691128849983215, "learning_rate": 3.8117973579160086e-07, "loss": 0.3043, "step": 43643 }, { "epoch": 4.437169581130541, "grad_norm": 0.274837851524353, "learning_rate": 3.8104383954347956e-07, "loss": 0.2962, "step": 43644 }, { "epoch": 4.43727124847499, "grad_norm": 0.2797258198261261, "learning_rate": 3.809079665645615e-07, "loss": 0.3122, "step": 43645 }, { "epoch": 4.437372915819439, "grad_norm": 0.27370354533195496, "learning_rate": 3.8077211685553116e-07, "loss": 0.3453, "step": 43646 }, { "epoch": 4.437474583163888, "grad_norm": 0.3077804446220398, "learning_rate": 3.806362904170735e-07, "loss": 0.3081, "step": 43647 }, { "epoch": 4.437576250508337, "grad_norm": 0.27254676818847656, "learning_rate": 3.80500487249873e-07, "loss": 0.2889, "step": 43648 }, { "epoch": 4.437677917852786, "grad_norm": 0.2826247215270996, "learning_rate": 3.8036470735461296e-07, "loss": 0.3076, "step": 43649 }, { "epoch": 4.437779585197235, "grad_norm": 0.2736092805862427, "learning_rate": 3.802289507319767e-07, "loss": 0.3108, "step": 43650 }, { "epoch": 4.4378812525416835, "grad_norm": 0.29774561524391174, "learning_rate": 3.800932173826505e-07, "loss": 0.2888, "step": 43651 }, { "epoch": 4.4379829198861325, "grad_norm": 0.2869921922683716, "learning_rate": 3.799575073073164e-07, "loss": 0.2991, "step": 43652 }, { "epoch": 4.438084587230581, "grad_norm": 0.2852988541126251, "learning_rate": 3.7982182050665785e-07, "loss": 0.2934, "step": 43653 }, { "epoch": 4.43818625457503, "grad_norm": 0.29471254348754883, "learning_rate": 3.796861569813598e-07, "loss": 0.2539, "step": 43654 }, { "epoch": 4.438287921919479, "grad_norm": 0.27335304021835327, "learning_rate": 3.795505167321051e-07, "loss": 0.2837, "step": 43655 }, { "epoch": 4.438389589263928, "grad_norm": 0.2733808755874634, "learning_rate": 3.7941489975957644e-07, "loss": 0.3058, "step": 43656 }, { "epoch": 4.438491256608377, "grad_norm": 0.261994868516922, "learning_rate": 3.7927930606445726e-07, "loss": 0.3114, "step": 43657 }, { "epoch": 4.438592923952826, "grad_norm": 0.28336283564567566, "learning_rate": 3.791437356474315e-07, "loss": 0.2823, "step": 43658 }, { "epoch": 4.438694591297275, "grad_norm": 0.2812448740005493, "learning_rate": 3.790081885091812e-07, "loss": 0.3053, "step": 43659 }, { "epoch": 4.438796258641724, "grad_norm": 0.26706448197364807, "learning_rate": 3.788726646503893e-07, "loss": 0.3098, "step": 43660 }, { "epoch": 4.438897925986173, "grad_norm": 0.3194999396800995, "learning_rate": 3.787371640717391e-07, "loss": 0.2753, "step": 43661 }, { "epoch": 4.438999593330622, "grad_norm": 0.2823927700519562, "learning_rate": 3.7860168677391287e-07, "loss": 0.3042, "step": 43662 }, { "epoch": 4.439101260675071, "grad_norm": 0.2698923945426941, "learning_rate": 3.7846623275759274e-07, "loss": 0.2867, "step": 43663 }, { "epoch": 4.43920292801952, "grad_norm": 0.29758989810943604, "learning_rate": 3.783308020234616e-07, "loss": 0.2848, "step": 43664 }, { "epoch": 4.439304595363969, "grad_norm": 0.30181169509887695, "learning_rate": 3.781953945722022e-07, "loss": 0.2766, "step": 43665 }, { "epoch": 4.439406262708418, "grad_norm": 0.2881355583667755, "learning_rate": 3.7806001040449626e-07, "loss": 0.284, "step": 43666 }, { "epoch": 4.4395079300528675, "grad_norm": 0.3211669325828552, "learning_rate": 3.779246495210243e-07, "loss": 0.3097, "step": 43667 }, { "epoch": 4.439609597397316, "grad_norm": 0.28565713763237, "learning_rate": 3.777893119224707e-07, "loss": 0.3158, "step": 43668 }, { "epoch": 4.439711264741765, "grad_norm": 0.275095671415329, "learning_rate": 3.7765399760951616e-07, "loss": 0.2808, "step": 43669 }, { "epoch": 4.439812932086214, "grad_norm": 0.291945219039917, "learning_rate": 3.7751870658284117e-07, "loss": 0.3038, "step": 43670 }, { "epoch": 4.439914599430663, "grad_norm": 0.2678239643573761, "learning_rate": 3.7738343884312965e-07, "loss": 0.2785, "step": 43671 }, { "epoch": 4.440016266775112, "grad_norm": 0.27851274609565735, "learning_rate": 3.7724819439106165e-07, "loss": 0.2655, "step": 43672 }, { "epoch": 4.440117934119561, "grad_norm": 0.2763504981994629, "learning_rate": 3.7711297322731824e-07, "loss": 0.2982, "step": 43673 }, { "epoch": 4.44021960146401, "grad_norm": 0.2575916051864624, "learning_rate": 3.769777753525816e-07, "loss": 0.2914, "step": 43674 }, { "epoch": 4.440321268808459, "grad_norm": 0.26606181263923645, "learning_rate": 3.7684260076753243e-07, "loss": 0.3123, "step": 43675 }, { "epoch": 4.440422936152908, "grad_norm": 0.28447359800338745, "learning_rate": 3.767074494728512e-07, "loss": 0.3073, "step": 43676 }, { "epoch": 4.440524603497357, "grad_norm": 0.27814194560050964, "learning_rate": 3.765723214692191e-07, "loss": 0.2749, "step": 43677 }, { "epoch": 4.440626270841806, "grad_norm": 0.2640889883041382, "learning_rate": 3.7643721675731714e-07, "loss": 0.2839, "step": 43678 }, { "epoch": 4.440727938186255, "grad_norm": 0.27154162526130676, "learning_rate": 3.76302135337826e-07, "loss": 0.2749, "step": 43679 }, { "epoch": 4.440829605530704, "grad_norm": 0.2927549183368683, "learning_rate": 3.761670772114251e-07, "loss": 0.2931, "step": 43680 }, { "epoch": 4.440931272875153, "grad_norm": 0.26721829175949097, "learning_rate": 3.7603204237879664e-07, "loss": 0.3096, "step": 43681 }, { "epoch": 4.441032940219602, "grad_norm": 0.2680131793022156, "learning_rate": 3.758970308406196e-07, "loss": 0.2794, "step": 43682 }, { "epoch": 4.4411346075640505, "grad_norm": 0.2880568206310272, "learning_rate": 3.757620425975739e-07, "loss": 0.3119, "step": 43683 }, { "epoch": 4.4412362749084995, "grad_norm": 0.26707613468170166, "learning_rate": 3.756270776503412e-07, "loss": 0.2857, "step": 43684 }, { "epoch": 4.441337942252948, "grad_norm": 0.28625497221946716, "learning_rate": 3.754921359996e-07, "loss": 0.2806, "step": 43685 }, { "epoch": 4.441439609597397, "grad_norm": 0.26821058988571167, "learning_rate": 3.753572176460307e-07, "loss": 0.2794, "step": 43686 }, { "epoch": 4.441541276941846, "grad_norm": 0.2970195412635803, "learning_rate": 3.7522232259031177e-07, "loss": 0.2806, "step": 43687 }, { "epoch": 4.441642944286295, "grad_norm": 0.2869182229042053, "learning_rate": 3.7508745083312426e-07, "loss": 0.2917, "step": 43688 }, { "epoch": 4.441744611630744, "grad_norm": 0.2549954056739807, "learning_rate": 3.7495260237514763e-07, "loss": 0.2643, "step": 43689 }, { "epoch": 4.441846278975193, "grad_norm": 0.29059430956840515, "learning_rate": 3.748177772170597e-07, "loss": 0.2939, "step": 43690 }, { "epoch": 4.441947946319642, "grad_norm": 0.2941807508468628, "learning_rate": 3.7468297535954157e-07, "loss": 0.3023, "step": 43691 }, { "epoch": 4.442049613664091, "grad_norm": 0.2677953839302063, "learning_rate": 3.7454819680327157e-07, "loss": 0.2853, "step": 43692 }, { "epoch": 4.44215128100854, "grad_norm": 0.272239089012146, "learning_rate": 3.7441344154892755e-07, "loss": 0.3201, "step": 43693 }, { "epoch": 4.442252948352989, "grad_norm": 0.26062440872192383, "learning_rate": 3.7427870959719006e-07, "loss": 0.3177, "step": 43694 }, { "epoch": 4.442354615697438, "grad_norm": 0.3014305531978607, "learning_rate": 3.7414400094873735e-07, "loss": 0.3205, "step": 43695 }, { "epoch": 4.442456283041887, "grad_norm": 0.27428898215293884, "learning_rate": 3.740093156042479e-07, "loss": 0.2645, "step": 43696 }, { "epoch": 4.442557950386336, "grad_norm": 0.27343225479125977, "learning_rate": 3.7387465356439943e-07, "loss": 0.2855, "step": 43697 }, { "epoch": 4.442659617730785, "grad_norm": 0.2643747925758362, "learning_rate": 3.73740014829872e-07, "loss": 0.2954, "step": 43698 }, { "epoch": 4.442761285075234, "grad_norm": 0.3028354346752167, "learning_rate": 3.7360539940134333e-07, "loss": 0.3064, "step": 43699 }, { "epoch": 4.4428629524196825, "grad_norm": 0.2762925326824188, "learning_rate": 3.7347080727949015e-07, "loss": 0.2699, "step": 43700 }, { "epoch": 4.4429646197641315, "grad_norm": 0.27060091495513916, "learning_rate": 3.733362384649919e-07, "loss": 0.2976, "step": 43701 }, { "epoch": 4.44306628710858, "grad_norm": 0.2833830714225769, "learning_rate": 3.732016929585269e-07, "loss": 0.3, "step": 43702 }, { "epoch": 4.443167954453029, "grad_norm": 0.28103798627853394, "learning_rate": 3.7306717076077137e-07, "loss": 0.3097, "step": 43703 }, { "epoch": 4.443269621797478, "grad_norm": 0.28995591402053833, "learning_rate": 3.7293267187240465e-07, "loss": 0.3594, "step": 43704 }, { "epoch": 4.443371289141927, "grad_norm": 0.2965056598186493, "learning_rate": 3.727981962941035e-07, "loss": 0.3122, "step": 43705 }, { "epoch": 4.443472956486376, "grad_norm": 0.28765392303466797, "learning_rate": 3.7266374402654514e-07, "loss": 0.3262, "step": 43706 }, { "epoch": 4.443574623830825, "grad_norm": 0.28459620475769043, "learning_rate": 3.7252931507040677e-07, "loss": 0.2868, "step": 43707 }, { "epoch": 4.443676291175274, "grad_norm": 0.2977517545223236, "learning_rate": 3.7239490942636683e-07, "loss": 0.2857, "step": 43708 }, { "epoch": 4.443777958519723, "grad_norm": 0.27406826615333557, "learning_rate": 3.722605270951019e-07, "loss": 0.3347, "step": 43709 }, { "epoch": 4.443879625864172, "grad_norm": 0.3011758625507355, "learning_rate": 3.7212616807728753e-07, "loss": 0.2975, "step": 43710 }, { "epoch": 4.443981293208622, "grad_norm": 0.27579352259635925, "learning_rate": 3.719918323736027e-07, "loss": 0.3304, "step": 43711 }, { "epoch": 4.444082960553071, "grad_norm": 0.26875564455986023, "learning_rate": 3.7185751998472297e-07, "loss": 0.286, "step": 43712 }, { "epoch": 4.44418462789752, "grad_norm": 0.2971283793449402, "learning_rate": 3.71723230911325e-07, "loss": 0.2764, "step": 43713 }, { "epoch": 4.444286295241969, "grad_norm": 0.26260414719581604, "learning_rate": 3.7158896515408605e-07, "loss": 0.321, "step": 43714 }, { "epoch": 4.4443879625864176, "grad_norm": 0.27349504828453064, "learning_rate": 3.714547227136817e-07, "loss": 0.3035, "step": 43715 }, { "epoch": 4.4444896299308665, "grad_norm": 0.2799736261367798, "learning_rate": 3.7132050359078906e-07, "loss": 0.268, "step": 43716 }, { "epoch": 4.444591297275315, "grad_norm": 0.2679486870765686, "learning_rate": 3.7118630778608276e-07, "loss": 0.317, "step": 43717 }, { "epoch": 4.444692964619764, "grad_norm": 0.28717344999313354, "learning_rate": 3.710521353002405e-07, "loss": 0.3074, "step": 43718 }, { "epoch": 4.444794631964213, "grad_norm": 0.2699754238128662, "learning_rate": 3.709179861339379e-07, "loss": 0.3274, "step": 43719 }, { "epoch": 4.444896299308662, "grad_norm": 0.2981160283088684, "learning_rate": 3.707838602878494e-07, "loss": 0.2657, "step": 43720 }, { "epoch": 4.444997966653111, "grad_norm": 0.2771354913711548, "learning_rate": 3.7064975776265276e-07, "loss": 0.2839, "step": 43721 }, { "epoch": 4.44509963399756, "grad_norm": 0.2669347822666168, "learning_rate": 3.705156785590225e-07, "loss": 0.3064, "step": 43722 }, { "epoch": 4.445201301342009, "grad_norm": 0.2698156237602234, "learning_rate": 3.70381622677633e-07, "loss": 0.3254, "step": 43723 }, { "epoch": 4.445302968686458, "grad_norm": 0.2925940454006195, "learning_rate": 3.7024759011916156e-07, "loss": 0.2904, "step": 43724 }, { "epoch": 4.445404636030907, "grad_norm": 0.28624364733695984, "learning_rate": 3.7011358088428264e-07, "loss": 0.2942, "step": 43725 }, { "epoch": 4.445506303375356, "grad_norm": 0.27556440234184265, "learning_rate": 3.699795949736718e-07, "loss": 0.3187, "step": 43726 }, { "epoch": 4.445607970719805, "grad_norm": 0.31649088859558105, "learning_rate": 3.698456323880023e-07, "loss": 0.3181, "step": 43727 }, { "epoch": 4.445709638064254, "grad_norm": 0.2961975336074829, "learning_rate": 3.69711693127951e-07, "loss": 0.3087, "step": 43728 }, { "epoch": 4.445811305408703, "grad_norm": 0.2817286550998688, "learning_rate": 3.695777771941916e-07, "loss": 0.2747, "step": 43729 }, { "epoch": 4.445912972753152, "grad_norm": 0.2432960867881775, "learning_rate": 3.694438845873982e-07, "loss": 0.3081, "step": 43730 }, { "epoch": 4.446014640097601, "grad_norm": 0.27676838636398315, "learning_rate": 3.693100153082485e-07, "loss": 0.3152, "step": 43731 }, { "epoch": 4.4461163074420496, "grad_norm": 0.29780998826026917, "learning_rate": 3.6917616935741196e-07, "loss": 0.3209, "step": 43732 }, { "epoch": 4.4462179747864985, "grad_norm": 0.2820523679256439, "learning_rate": 3.690423467355658e-07, "loss": 0.2992, "step": 43733 }, { "epoch": 4.446319642130947, "grad_norm": 0.289579302072525, "learning_rate": 3.689085474433851e-07, "loss": 0.2894, "step": 43734 }, { "epoch": 4.446421309475396, "grad_norm": 0.28513863682746887, "learning_rate": 3.687747714815421e-07, "loss": 0.2659, "step": 43735 }, { "epoch": 4.446522976819845, "grad_norm": 0.2744240164756775, "learning_rate": 3.686410188507111e-07, "loss": 0.2815, "step": 43736 }, { "epoch": 4.446624644164294, "grad_norm": 0.2638453245162964, "learning_rate": 3.685072895515657e-07, "loss": 0.2893, "step": 43737 }, { "epoch": 4.446726311508743, "grad_norm": 0.28931018710136414, "learning_rate": 3.683735835847807e-07, "loss": 0.3016, "step": 43738 }, { "epoch": 4.446827978853192, "grad_norm": 0.28960153460502625, "learning_rate": 3.6823990095102845e-07, "loss": 0.3026, "step": 43739 }, { "epoch": 4.446929646197641, "grad_norm": 0.2684539556503296, "learning_rate": 3.6810624165098283e-07, "loss": 0.3158, "step": 43740 }, { "epoch": 4.44703131354209, "grad_norm": 0.2735181748867035, "learning_rate": 3.679726056853183e-07, "loss": 0.2662, "step": 43741 }, { "epoch": 4.447132980886539, "grad_norm": 0.27441927790641785, "learning_rate": 3.678389930547055e-07, "loss": 0.2727, "step": 43742 }, { "epoch": 4.447234648230988, "grad_norm": 0.26452207565307617, "learning_rate": 3.677054037598193e-07, "loss": 0.3066, "step": 43743 }, { "epoch": 4.447336315575437, "grad_norm": 0.2704675495624542, "learning_rate": 3.675718378013332e-07, "loss": 0.2925, "step": 43744 }, { "epoch": 4.447437982919886, "grad_norm": 0.27881723642349243, "learning_rate": 3.6743829517991935e-07, "loss": 0.2921, "step": 43745 }, { "epoch": 4.447539650264335, "grad_norm": 0.27598461508750916, "learning_rate": 3.6730477589625056e-07, "loss": 0.2763, "step": 43746 }, { "epoch": 4.447641317608784, "grad_norm": 0.3005300760269165, "learning_rate": 3.6717127995099857e-07, "loss": 0.3083, "step": 43747 }, { "epoch": 4.447742984953233, "grad_norm": 0.27235275506973267, "learning_rate": 3.670378073448377e-07, "loss": 0.3026, "step": 43748 }, { "epoch": 4.447844652297682, "grad_norm": 0.2600492835044861, "learning_rate": 3.669043580784393e-07, "loss": 0.3286, "step": 43749 }, { "epoch": 4.447946319642131, "grad_norm": 0.28239354491233826, "learning_rate": 3.667709321524748e-07, "loss": 0.2939, "step": 43750 }, { "epoch": 4.44804798698658, "grad_norm": 0.29080578684806824, "learning_rate": 3.666375295676189e-07, "loss": 0.3055, "step": 43751 }, { "epoch": 4.448149654331029, "grad_norm": 0.30734983086586, "learning_rate": 3.6650415032454033e-07, "loss": 0.2987, "step": 43752 }, { "epoch": 4.448251321675478, "grad_norm": 0.3020770847797394, "learning_rate": 3.663707944239131e-07, "loss": 0.2972, "step": 43753 }, { "epoch": 4.448352989019927, "grad_norm": 0.2819528877735138, "learning_rate": 3.6623746186640997e-07, "loss": 0.2948, "step": 43754 }, { "epoch": 4.448454656364376, "grad_norm": 0.26451200246810913, "learning_rate": 3.661041526527004e-07, "loss": 0.2924, "step": 43755 }, { "epoch": 4.448556323708825, "grad_norm": 0.26682427525520325, "learning_rate": 3.6597086678345715e-07, "loss": 0.2725, "step": 43756 }, { "epoch": 4.448657991053274, "grad_norm": 0.26158350706100464, "learning_rate": 3.658376042593509e-07, "loss": 0.2831, "step": 43757 }, { "epoch": 4.448759658397723, "grad_norm": 0.2779156267642975, "learning_rate": 3.6570436508105433e-07, "loss": 0.2859, "step": 43758 }, { "epoch": 4.448861325742172, "grad_norm": 0.2794746160507202, "learning_rate": 3.6557114924923754e-07, "loss": 0.3054, "step": 43759 }, { "epoch": 4.448962993086621, "grad_norm": 0.26355722546577454, "learning_rate": 3.654379567645716e-07, "loss": 0.2858, "step": 43760 }, { "epoch": 4.44906466043107, "grad_norm": 0.2551170289516449, "learning_rate": 3.6530478762772936e-07, "loss": 0.3226, "step": 43761 }, { "epoch": 4.449166327775519, "grad_norm": 0.27448129653930664, "learning_rate": 3.651716418393786e-07, "loss": 0.2909, "step": 43762 }, { "epoch": 4.449267995119968, "grad_norm": 0.27623817324638367, "learning_rate": 3.6503851940019153e-07, "loss": 0.2905, "step": 43763 }, { "epoch": 4.449369662464417, "grad_norm": 0.2878393828868866, "learning_rate": 3.64905420310841e-07, "loss": 0.3186, "step": 43764 }, { "epoch": 4.4494713298088655, "grad_norm": 0.2836313247680664, "learning_rate": 3.647723445719936e-07, "loss": 0.2608, "step": 43765 }, { "epoch": 4.4495729971533144, "grad_norm": 0.3052428960800171, "learning_rate": 3.6463929218432224e-07, "loss": 0.2866, "step": 43766 }, { "epoch": 4.449674664497763, "grad_norm": 0.27174651622772217, "learning_rate": 3.645062631484969e-07, "loss": 0.3089, "step": 43767 }, { "epoch": 4.449776331842212, "grad_norm": 0.2944454252719879, "learning_rate": 3.643732574651865e-07, "loss": 0.2892, "step": 43768 }, { "epoch": 4.449877999186661, "grad_norm": 0.28362131118774414, "learning_rate": 3.642402751350632e-07, "loss": 0.274, "step": 43769 }, { "epoch": 4.44997966653111, "grad_norm": 0.28593263030052185, "learning_rate": 3.641073161587949e-07, "loss": 0.2657, "step": 43770 }, { "epoch": 4.450081333875559, "grad_norm": 0.28284892439842224, "learning_rate": 3.6397438053705325e-07, "loss": 0.3121, "step": 43771 }, { "epoch": 4.450183001220008, "grad_norm": 0.29246407747268677, "learning_rate": 3.6384146827050605e-07, "loss": 0.3025, "step": 43772 }, { "epoch": 4.450284668564457, "grad_norm": 0.26041579246520996, "learning_rate": 3.637085793598233e-07, "loss": 0.3242, "step": 43773 }, { "epoch": 4.450386335908906, "grad_norm": 0.2976025342941284, "learning_rate": 3.6357571380567724e-07, "loss": 0.2557, "step": 43774 }, { "epoch": 4.450488003253355, "grad_norm": 0.2686058282852173, "learning_rate": 3.6344287160873295e-07, "loss": 0.2909, "step": 43775 }, { "epoch": 4.450589670597804, "grad_norm": 0.2763140797615051, "learning_rate": 3.63310052769662e-07, "loss": 0.3572, "step": 43776 }, { "epoch": 4.450691337942253, "grad_norm": 0.262066125869751, "learning_rate": 3.63177257289134e-07, "loss": 0.2594, "step": 43777 }, { "epoch": 4.450793005286702, "grad_norm": 0.28342851996421814, "learning_rate": 3.6304448516781556e-07, "loss": 0.2993, "step": 43778 }, { "epoch": 4.450894672631151, "grad_norm": 0.2810635268688202, "learning_rate": 3.6291173640637836e-07, "loss": 0.2952, "step": 43779 }, { "epoch": 4.4509963399756, "grad_norm": 0.3056911528110504, "learning_rate": 3.627790110054891e-07, "loss": 0.3082, "step": 43780 }, { "epoch": 4.451098007320049, "grad_norm": 0.2724560797214508, "learning_rate": 3.6264630896581887e-07, "loss": 0.2848, "step": 43781 }, { "epoch": 4.4511996746644975, "grad_norm": 0.3021131455898285, "learning_rate": 3.625136302880328e-07, "loss": 0.29, "step": 43782 }, { "epoch": 4.4513013420089464, "grad_norm": 0.2708667814731598, "learning_rate": 3.623809749728008e-07, "loss": 0.2755, "step": 43783 }, { "epoch": 4.451403009353395, "grad_norm": 0.2690366506576538, "learning_rate": 3.6224834302079294e-07, "loss": 0.2886, "step": 43784 }, { "epoch": 4.451504676697844, "grad_norm": 0.28539299964904785, "learning_rate": 3.621157344326748e-07, "loss": 0.2827, "step": 43785 }, { "epoch": 4.451606344042293, "grad_norm": 0.26409614086151123, "learning_rate": 3.619831492091158e-07, "loss": 0.2939, "step": 43786 }, { "epoch": 4.451708011386742, "grad_norm": 0.28200119733810425, "learning_rate": 3.618505873507838e-07, "loss": 0.3163, "step": 43787 }, { "epoch": 4.451809678731191, "grad_norm": 0.28648924827575684, "learning_rate": 3.6171804885834497e-07, "loss": 0.3087, "step": 43788 }, { "epoch": 4.45191134607564, "grad_norm": 0.2848648726940155, "learning_rate": 3.615855337324697e-07, "loss": 0.2839, "step": 43789 }, { "epoch": 4.452013013420089, "grad_norm": 0.2798647880554199, "learning_rate": 3.6145304197382437e-07, "loss": 0.275, "step": 43790 }, { "epoch": 4.452114680764538, "grad_norm": 0.23553679883480072, "learning_rate": 3.6132057358307606e-07, "loss": 0.2923, "step": 43791 }, { "epoch": 4.452216348108988, "grad_norm": 0.2974105179309845, "learning_rate": 3.6118812856089146e-07, "loss": 0.2928, "step": 43792 }, { "epoch": 4.452318015453437, "grad_norm": 0.27579525113105774, "learning_rate": 3.6105570690793903e-07, "loss": 0.2757, "step": 43793 }, { "epoch": 4.452419682797886, "grad_norm": 0.2529062032699585, "learning_rate": 3.60923308624887e-07, "loss": 0.2938, "step": 43794 }, { "epoch": 4.452521350142335, "grad_norm": 0.2770478427410126, "learning_rate": 3.607909337123994e-07, "loss": 0.2875, "step": 43795 }, { "epoch": 4.452623017486784, "grad_norm": 0.2864423394203186, "learning_rate": 3.606585821711456e-07, "loss": 0.3193, "step": 43796 }, { "epoch": 4.4527246848312325, "grad_norm": 0.3104557991027832, "learning_rate": 3.6052625400179066e-07, "loss": 0.2907, "step": 43797 }, { "epoch": 4.4528263521756815, "grad_norm": 0.27915626764297485, "learning_rate": 3.603939492050018e-07, "loss": 0.2989, "step": 43798 }, { "epoch": 4.45292801952013, "grad_norm": 0.31539517641067505, "learning_rate": 3.602616677814463e-07, "loss": 0.3229, "step": 43799 }, { "epoch": 4.453029686864579, "grad_norm": 0.28186047077178955, "learning_rate": 3.601294097317903e-07, "loss": 0.2866, "step": 43800 }, { "epoch": 4.453131354209028, "grad_norm": 0.2704935669898987, "learning_rate": 3.599971750566994e-07, "loss": 0.2809, "step": 43801 }, { "epoch": 4.453233021553477, "grad_norm": 0.28066113591194153, "learning_rate": 3.598649637568391e-07, "loss": 0.3197, "step": 43802 }, { "epoch": 4.453334688897926, "grad_norm": 0.26642361283302307, "learning_rate": 3.597327758328767e-07, "loss": 0.2816, "step": 43803 }, { "epoch": 4.453436356242375, "grad_norm": 0.26984289288520813, "learning_rate": 3.596006112854794e-07, "loss": 0.2702, "step": 43804 }, { "epoch": 4.453538023586824, "grad_norm": 0.2868967354297638, "learning_rate": 3.5946847011531007e-07, "loss": 0.3151, "step": 43805 }, { "epoch": 4.453639690931273, "grad_norm": 0.2679119408130646, "learning_rate": 3.5933635232303644e-07, "loss": 0.2999, "step": 43806 }, { "epoch": 4.453741358275722, "grad_norm": 0.27654513716697693, "learning_rate": 3.5920425790932356e-07, "loss": 0.2814, "step": 43807 }, { "epoch": 4.453843025620171, "grad_norm": 0.31120848655700684, "learning_rate": 3.590721868748359e-07, "loss": 0.282, "step": 43808 }, { "epoch": 4.45394469296462, "grad_norm": 0.2852117717266083, "learning_rate": 3.5894013922024073e-07, "loss": 0.2912, "step": 43809 }, { "epoch": 4.454046360309069, "grad_norm": 0.27790454030036926, "learning_rate": 3.5880811494620193e-07, "loss": 0.3289, "step": 43810 }, { "epoch": 4.454148027653518, "grad_norm": 0.2603105902671814, "learning_rate": 3.586761140533851e-07, "loss": 0.3256, "step": 43811 }, { "epoch": 4.454249694997967, "grad_norm": 0.2833024859428406, "learning_rate": 3.5854413654245414e-07, "loss": 0.3048, "step": 43812 }, { "epoch": 4.454351362342416, "grad_norm": 0.2783854901790619, "learning_rate": 3.5841218241407573e-07, "loss": 0.2663, "step": 43813 }, { "epoch": 4.4544530296868645, "grad_norm": 0.2643752098083496, "learning_rate": 3.582802516689138e-07, "loss": 0.3095, "step": 43814 }, { "epoch": 4.4545546970313135, "grad_norm": 0.29360026121139526, "learning_rate": 3.5814834430763224e-07, "loss": 0.2894, "step": 43815 }, { "epoch": 4.454656364375762, "grad_norm": 0.27285757660865784, "learning_rate": 3.5801646033089665e-07, "loss": 0.2966, "step": 43816 }, { "epoch": 4.454758031720211, "grad_norm": 0.29163962602615356, "learning_rate": 3.578845997393715e-07, "loss": 0.3009, "step": 43817 }, { "epoch": 4.45485969906466, "grad_norm": 0.2790151834487915, "learning_rate": 3.577527625337196e-07, "loss": 0.2967, "step": 43818 }, { "epoch": 4.454961366409109, "grad_norm": 0.2801859676837921, "learning_rate": 3.5762094871460707e-07, "loss": 0.2814, "step": 43819 }, { "epoch": 4.455063033753558, "grad_norm": 0.2611079812049866, "learning_rate": 3.574891582826967e-07, "loss": 0.3173, "step": 43820 }, { "epoch": 4.455164701098007, "grad_norm": 0.26836374402046204, "learning_rate": 3.57357391238653e-07, "loss": 0.297, "step": 43821 }, { "epoch": 4.455266368442456, "grad_norm": 0.2985289394855499, "learning_rate": 3.5722564758313815e-07, "loss": 0.2866, "step": 43822 }, { "epoch": 4.455368035786905, "grad_norm": 0.2839251160621643, "learning_rate": 3.5709392731681836e-07, "loss": 0.283, "step": 43823 }, { "epoch": 4.455469703131354, "grad_norm": 0.2757275700569153, "learning_rate": 3.5696223044035637e-07, "loss": 0.2785, "step": 43824 }, { "epoch": 4.455571370475803, "grad_norm": 0.29044076800346375, "learning_rate": 3.568305569544139e-07, "loss": 0.266, "step": 43825 }, { "epoch": 4.455673037820252, "grad_norm": 0.26864343881607056, "learning_rate": 3.5669890685965714e-07, "loss": 0.298, "step": 43826 }, { "epoch": 4.455774705164701, "grad_norm": 0.2924180328845978, "learning_rate": 3.5656728015674716e-07, "loss": 0.2761, "step": 43827 }, { "epoch": 4.45587637250915, "grad_norm": 0.2615363895893097, "learning_rate": 3.564356768463473e-07, "loss": 0.3283, "step": 43828 }, { "epoch": 4.455978039853599, "grad_norm": 0.26321932673454285, "learning_rate": 3.5630409692912205e-07, "loss": 0.2903, "step": 43829 }, { "epoch": 4.456079707198048, "grad_norm": 0.276670902967453, "learning_rate": 3.561725404057331e-07, "loss": 0.31, "step": 43830 }, { "epoch": 4.456181374542497, "grad_norm": 0.26495420932769775, "learning_rate": 3.560410072768433e-07, "loss": 0.2993, "step": 43831 }, { "epoch": 4.456283041886946, "grad_norm": 0.27755823731422424, "learning_rate": 3.5590949754311434e-07, "loss": 0.3039, "step": 43832 }, { "epoch": 4.456384709231395, "grad_norm": 0.2843264043331146, "learning_rate": 3.5577801120521116e-07, "loss": 0.328, "step": 43833 }, { "epoch": 4.456486376575844, "grad_norm": 0.2565056383609772, "learning_rate": 3.5564654826379386e-07, "loss": 0.2933, "step": 43834 }, { "epoch": 4.456588043920293, "grad_norm": 0.2646790146827698, "learning_rate": 3.555151087195252e-07, "loss": 0.2851, "step": 43835 }, { "epoch": 4.456689711264742, "grad_norm": 0.26671653985977173, "learning_rate": 3.553836925730686e-07, "loss": 0.3187, "step": 43836 }, { "epoch": 4.456791378609191, "grad_norm": 0.25414955615997314, "learning_rate": 3.5525229982508514e-07, "loss": 0.2706, "step": 43837 }, { "epoch": 4.45689304595364, "grad_norm": 0.31436824798583984, "learning_rate": 3.551209304762371e-07, "loss": 0.2923, "step": 43838 }, { "epoch": 4.456994713298089, "grad_norm": 0.2671351432800293, "learning_rate": 3.54989584527185e-07, "loss": 0.31, "step": 43839 }, { "epoch": 4.457096380642538, "grad_norm": 0.2800733745098114, "learning_rate": 3.548582619785923e-07, "loss": 0.29, "step": 43840 }, { "epoch": 4.457198047986987, "grad_norm": 0.3218294084072113, "learning_rate": 3.5472696283112005e-07, "loss": 0.284, "step": 43841 }, { "epoch": 4.457299715331436, "grad_norm": 0.2777149975299835, "learning_rate": 3.5459568708542834e-07, "loss": 0.2981, "step": 43842 }, { "epoch": 4.457401382675885, "grad_norm": 0.2662346661090851, "learning_rate": 3.544644347421805e-07, "loss": 0.3266, "step": 43843 }, { "epoch": 4.457503050020334, "grad_norm": 0.2745022475719452, "learning_rate": 3.5433320580203713e-07, "loss": 0.2875, "step": 43844 }, { "epoch": 4.457604717364783, "grad_norm": 0.29633671045303345, "learning_rate": 3.5420200026565763e-07, "loss": 0.2817, "step": 43845 }, { "epoch": 4.4577063847092315, "grad_norm": 0.2700895667076111, "learning_rate": 3.5407081813370605e-07, "loss": 0.3081, "step": 43846 }, { "epoch": 4.4578080520536805, "grad_norm": 0.26320162415504456, "learning_rate": 3.539396594068412e-07, "loss": 0.2876, "step": 43847 }, { "epoch": 4.457909719398129, "grad_norm": 0.31273677945137024, "learning_rate": 3.5380852408572477e-07, "loss": 0.2811, "step": 43848 }, { "epoch": 4.458011386742578, "grad_norm": 0.2898240089416504, "learning_rate": 3.536774121710157e-07, "loss": 0.2832, "step": 43849 }, { "epoch": 4.458113054087027, "grad_norm": 0.27026617527008057, "learning_rate": 3.535463236633763e-07, "loss": 0.3523, "step": 43850 }, { "epoch": 4.458214721431476, "grad_norm": 0.2653004825115204, "learning_rate": 3.534152585634665e-07, "loss": 0.2927, "step": 43851 }, { "epoch": 4.458316388775925, "grad_norm": 0.2713333070278168, "learning_rate": 3.532842168719458e-07, "loss": 0.2699, "step": 43852 }, { "epoch": 4.458418056120374, "grad_norm": 0.29846999049186707, "learning_rate": 3.531531985894754e-07, "loss": 0.299, "step": 43853 }, { "epoch": 4.458519723464823, "grad_norm": 0.2697294354438782, "learning_rate": 3.5302220371671525e-07, "loss": 0.3564, "step": 43854 }, { "epoch": 4.458621390809272, "grad_norm": 0.2693939507007599, "learning_rate": 3.528912322543243e-07, "loss": 0.3532, "step": 43855 }, { "epoch": 4.458723058153721, "grad_norm": 0.26688650250434875, "learning_rate": 3.527602842029637e-07, "loss": 0.2972, "step": 43856 }, { "epoch": 4.45882472549817, "grad_norm": 0.28115490078926086, "learning_rate": 3.526293595632924e-07, "loss": 0.3046, "step": 43857 }, { "epoch": 4.458926392842619, "grad_norm": 0.28368690609931946, "learning_rate": 3.524984583359697e-07, "loss": 0.3046, "step": 43858 }, { "epoch": 4.459028060187068, "grad_norm": 0.2668154239654541, "learning_rate": 3.5236758052165475e-07, "loss": 0.2993, "step": 43859 }, { "epoch": 4.459129727531517, "grad_norm": 0.27988356351852417, "learning_rate": 3.522367261210086e-07, "loss": 0.2956, "step": 43860 }, { "epoch": 4.459231394875966, "grad_norm": 0.2486642450094223, "learning_rate": 3.5210589513468896e-07, "loss": 0.2956, "step": 43861 }, { "epoch": 4.459333062220415, "grad_norm": 0.2795058786869049, "learning_rate": 3.519750875633543e-07, "loss": 0.2912, "step": 43862 }, { "epoch": 4.4594347295648635, "grad_norm": 0.27879488468170166, "learning_rate": 3.5184430340766573e-07, "loss": 0.3167, "step": 43863 }, { "epoch": 4.4595363969093125, "grad_norm": 0.2509419322013855, "learning_rate": 3.5171354266828104e-07, "loss": 0.3098, "step": 43864 }, { "epoch": 4.459638064253761, "grad_norm": 0.3123820126056671, "learning_rate": 3.515828053458581e-07, "loss": 0.2888, "step": 43865 }, { "epoch": 4.45973973159821, "grad_norm": 0.28748613595962524, "learning_rate": 3.514520914410574e-07, "loss": 0.2855, "step": 43866 }, { "epoch": 4.459841398942659, "grad_norm": 0.2936484217643738, "learning_rate": 3.513214009545357e-07, "loss": 0.3037, "step": 43867 }, { "epoch": 4.459943066287108, "grad_norm": 0.2852303385734558, "learning_rate": 3.5119073388695247e-07, "loss": 0.2999, "step": 43868 }, { "epoch": 4.460044733631557, "grad_norm": 0.2884095311164856, "learning_rate": 3.510600902389649e-07, "loss": 0.2863, "step": 43869 }, { "epoch": 4.460146400976006, "grad_norm": 0.2904020845890045, "learning_rate": 3.5092947001123257e-07, "loss": 0.3155, "step": 43870 }, { "epoch": 4.460248068320455, "grad_norm": 0.2733688950538635, "learning_rate": 3.5079887320441317e-07, "loss": 0.2746, "step": 43871 }, { "epoch": 4.460349735664904, "grad_norm": 0.28478747606277466, "learning_rate": 3.5066829981916294e-07, "loss": 0.2539, "step": 43872 }, { "epoch": 4.460451403009353, "grad_norm": 0.26354220509529114, "learning_rate": 3.505377498561419e-07, "loss": 0.2858, "step": 43873 }, { "epoch": 4.460553070353803, "grad_norm": 0.25992101430892944, "learning_rate": 3.504072233160066e-07, "loss": 0.297, "step": 43874 }, { "epoch": 4.460654737698252, "grad_norm": 0.2613461911678314, "learning_rate": 3.5027672019941395e-07, "loss": 0.3075, "step": 43875 }, { "epoch": 4.460756405042701, "grad_norm": 0.25507333874702454, "learning_rate": 3.501462405070233e-07, "loss": 0.2802, "step": 43876 }, { "epoch": 4.46085807238715, "grad_norm": 0.27078691124916077, "learning_rate": 3.500157842394908e-07, "loss": 0.3142, "step": 43877 }, { "epoch": 4.4609597397315985, "grad_norm": 0.27237433195114136, "learning_rate": 3.498853513974743e-07, "loss": 0.2945, "step": 43878 }, { "epoch": 4.4610614070760475, "grad_norm": 0.26983824372291565, "learning_rate": 3.4975494198162875e-07, "loss": 0.2963, "step": 43879 }, { "epoch": 4.461163074420496, "grad_norm": 0.26142069697380066, "learning_rate": 3.496245559926137e-07, "loss": 0.3332, "step": 43880 }, { "epoch": 4.461264741764945, "grad_norm": 0.267392635345459, "learning_rate": 3.4949419343108525e-07, "loss": 0.2875, "step": 43881 }, { "epoch": 4.461366409109394, "grad_norm": 0.2665853202342987, "learning_rate": 3.4936385429769905e-07, "loss": 0.3227, "step": 43882 }, { "epoch": 4.461468076453843, "grad_norm": 0.2829269766807556, "learning_rate": 3.4923353859311447e-07, "loss": 0.3117, "step": 43883 }, { "epoch": 4.461569743798292, "grad_norm": 0.2577987313270569, "learning_rate": 3.491032463179844e-07, "loss": 0.2966, "step": 43884 }, { "epoch": 4.461671411142741, "grad_norm": 0.2640427052974701, "learning_rate": 3.4897297747296666e-07, "loss": 0.3057, "step": 43885 }, { "epoch": 4.46177307848719, "grad_norm": 0.2958028018474579, "learning_rate": 3.48842732058719e-07, "loss": 0.292, "step": 43886 }, { "epoch": 4.461874745831639, "grad_norm": 0.2916724681854248, "learning_rate": 3.487125100758959e-07, "loss": 0.275, "step": 43887 }, { "epoch": 4.461976413176088, "grad_norm": 0.2743496894836426, "learning_rate": 3.485823115251541e-07, "loss": 0.291, "step": 43888 }, { "epoch": 4.462078080520537, "grad_norm": 0.2671167254447937, "learning_rate": 3.484521364071486e-07, "loss": 0.3139, "step": 43889 }, { "epoch": 4.462179747864986, "grad_norm": 0.2990242838859558, "learning_rate": 3.4832198472253666e-07, "loss": 0.2827, "step": 43890 }, { "epoch": 4.462281415209435, "grad_norm": 0.2929791510105133, "learning_rate": 3.4819185647197275e-07, "loss": 0.2984, "step": 43891 }, { "epoch": 4.462383082553884, "grad_norm": 0.2611134648323059, "learning_rate": 3.4806175165611247e-07, "loss": 0.2937, "step": 43892 }, { "epoch": 4.462484749898333, "grad_norm": 0.2810294032096863, "learning_rate": 3.47931670275613e-07, "loss": 0.2889, "step": 43893 }, { "epoch": 4.462586417242782, "grad_norm": 0.2742934823036194, "learning_rate": 3.4780161233112673e-07, "loss": 0.3101, "step": 43894 }, { "epoch": 4.4626880845872305, "grad_norm": 0.29703766107559204, "learning_rate": 3.476715778233103e-07, "loss": 0.2684, "step": 43895 }, { "epoch": 4.4627897519316795, "grad_norm": 0.2777467966079712, "learning_rate": 3.475415667528198e-07, "loss": 0.2629, "step": 43896 }, { "epoch": 4.462891419276128, "grad_norm": 0.274502158164978, "learning_rate": 3.474115791203092e-07, "loss": 0.2976, "step": 43897 }, { "epoch": 4.462993086620577, "grad_norm": 0.27933716773986816, "learning_rate": 3.4728161492643355e-07, "loss": 0.2918, "step": 43898 }, { "epoch": 4.463094753965026, "grad_norm": 0.2783427834510803, "learning_rate": 3.471516741718467e-07, "loss": 0.3288, "step": 43899 }, { "epoch": 4.463196421309475, "grad_norm": 0.27126723527908325, "learning_rate": 3.4702175685720487e-07, "loss": 0.2985, "step": 43900 }, { "epoch": 4.463298088653924, "grad_norm": 0.2902025282382965, "learning_rate": 3.468918629831619e-07, "loss": 0.2835, "step": 43901 }, { "epoch": 4.463399755998373, "grad_norm": 0.2594006061553955, "learning_rate": 3.467619925503707e-07, "loss": 0.2929, "step": 43902 }, { "epoch": 4.463501423342822, "grad_norm": 0.28808122873306274, "learning_rate": 3.466321455594884e-07, "loss": 0.2658, "step": 43903 }, { "epoch": 4.463603090687271, "grad_norm": 0.27579909563064575, "learning_rate": 3.4650232201116627e-07, "loss": 0.2725, "step": 43904 }, { "epoch": 4.46370475803172, "grad_norm": 0.2791210412979126, "learning_rate": 3.463725219060593e-07, "loss": 0.2805, "step": 43905 }, { "epoch": 4.463806425376169, "grad_norm": 0.2607349455356598, "learning_rate": 3.462427452448236e-07, "loss": 0.2578, "step": 43906 }, { "epoch": 4.463908092720618, "grad_norm": 0.2924937307834625, "learning_rate": 3.4611299202810923e-07, "loss": 0.3127, "step": 43907 }, { "epoch": 4.464009760065067, "grad_norm": 0.2812218964099884, "learning_rate": 3.4598326225657285e-07, "loss": 0.2756, "step": 43908 }, { "epoch": 4.464111427409516, "grad_norm": 0.25553345680236816, "learning_rate": 3.4585355593086567e-07, "loss": 0.2962, "step": 43909 }, { "epoch": 4.464213094753965, "grad_norm": 0.27793899178504944, "learning_rate": 3.4572387305164325e-07, "loss": 0.3176, "step": 43910 }, { "epoch": 4.464314762098414, "grad_norm": 0.2726227045059204, "learning_rate": 3.455942136195578e-07, "loss": 0.2756, "step": 43911 }, { "epoch": 4.4644164294428625, "grad_norm": 0.26443761587142944, "learning_rate": 3.4546457763526164e-07, "loss": 0.3332, "step": 43912 }, { "epoch": 4.464518096787312, "grad_norm": 0.2770005166530609, "learning_rate": 3.453349650994109e-07, "loss": 0.3153, "step": 43913 }, { "epoch": 4.464619764131761, "grad_norm": 0.2571493685245514, "learning_rate": 3.452053760126545e-07, "loss": 0.3103, "step": 43914 }, { "epoch": 4.46472143147621, "grad_norm": 0.28691866993904114, "learning_rate": 3.450758103756474e-07, "loss": 0.3262, "step": 43915 }, { "epoch": 4.464823098820659, "grad_norm": 0.3026532828807831, "learning_rate": 3.449462681890442e-07, "loss": 0.32, "step": 43916 }, { "epoch": 4.464924766165108, "grad_norm": 0.2672213315963745, "learning_rate": 3.448167494534932e-07, "loss": 0.2945, "step": 43917 }, { "epoch": 4.465026433509557, "grad_norm": 0.28483229875564575, "learning_rate": 3.446872541696505e-07, "loss": 0.2739, "step": 43918 }, { "epoch": 4.465128100854006, "grad_norm": 0.26980558037757874, "learning_rate": 3.4455778233816685e-07, "loss": 0.2875, "step": 43919 }, { "epoch": 4.465229768198455, "grad_norm": 0.26621320843696594, "learning_rate": 3.444283339596938e-07, "loss": 0.3337, "step": 43920 }, { "epoch": 4.465331435542904, "grad_norm": 0.2561592161655426, "learning_rate": 3.4429890903488584e-07, "loss": 0.2836, "step": 43921 }, { "epoch": 4.465433102887353, "grad_norm": 0.28548187017440796, "learning_rate": 3.441695075643925e-07, "loss": 0.275, "step": 43922 }, { "epoch": 4.465534770231802, "grad_norm": 0.28326287865638733, "learning_rate": 3.4404012954886824e-07, "loss": 0.2829, "step": 43923 }, { "epoch": 4.465636437576251, "grad_norm": 0.26958489418029785, "learning_rate": 3.4391077498896196e-07, "loss": 0.3035, "step": 43924 }, { "epoch": 4.4657381049207, "grad_norm": 0.27072376012802124, "learning_rate": 3.4378144388532653e-07, "loss": 0.3206, "step": 43925 }, { "epoch": 4.465839772265149, "grad_norm": 0.2613290548324585, "learning_rate": 3.436521362386158e-07, "loss": 0.3258, "step": 43926 }, { "epoch": 4.4659414396095976, "grad_norm": 0.27177873253822327, "learning_rate": 3.435228520494771e-07, "loss": 0.2956, "step": 43927 }, { "epoch": 4.4660431069540465, "grad_norm": 0.2748984098434448, "learning_rate": 3.4339359131856434e-07, "loss": 0.305, "step": 43928 }, { "epoch": 4.466144774298495, "grad_norm": 0.2766355276107788, "learning_rate": 3.4326435404652856e-07, "loss": 0.2644, "step": 43929 }, { "epoch": 4.466246441642944, "grad_norm": 0.28071627020835876, "learning_rate": 3.431351402340194e-07, "loss": 0.3151, "step": 43930 }, { "epoch": 4.466348108987393, "grad_norm": 0.27555641531944275, "learning_rate": 3.4300594988168953e-07, "loss": 0.2828, "step": 43931 }, { "epoch": 4.466449776331842, "grad_norm": 0.2779316306114197, "learning_rate": 3.42876782990188e-07, "loss": 0.3075, "step": 43932 }, { "epoch": 4.466551443676291, "grad_norm": 0.28722405433654785, "learning_rate": 3.4274763956016865e-07, "loss": 0.2779, "step": 43933 }, { "epoch": 4.46665311102074, "grad_norm": 0.2635864019393921, "learning_rate": 3.4261851959227763e-07, "loss": 0.3189, "step": 43934 }, { "epoch": 4.466754778365189, "grad_norm": 0.2680981755256653, "learning_rate": 3.424894230871684e-07, "loss": 0.2757, "step": 43935 }, { "epoch": 4.466856445709638, "grad_norm": 0.2912140488624573, "learning_rate": 3.4236035004549197e-07, "loss": 0.2906, "step": 43936 }, { "epoch": 4.466958113054087, "grad_norm": 0.28321608901023865, "learning_rate": 3.4223130046789567e-07, "loss": 0.2858, "step": 43937 }, { "epoch": 4.467059780398536, "grad_norm": 0.31972628831863403, "learning_rate": 3.421022743550323e-07, "loss": 0.2595, "step": 43938 }, { "epoch": 4.467161447742985, "grad_norm": 0.2843289077281952, "learning_rate": 3.419732717075502e-07, "loss": 0.3431, "step": 43939 }, { "epoch": 4.467263115087434, "grad_norm": 0.2833569645881653, "learning_rate": 3.418442925260995e-07, "loss": 0.2883, "step": 43940 }, { "epoch": 4.467364782431883, "grad_norm": 0.2685447633266449, "learning_rate": 3.417153368113307e-07, "loss": 0.2892, "step": 43941 }, { "epoch": 4.467466449776332, "grad_norm": 0.2721173167228699, "learning_rate": 3.4158640456389335e-07, "loss": 0.2581, "step": 43942 }, { "epoch": 4.467568117120781, "grad_norm": 0.2978048324584961, "learning_rate": 3.414574957844369e-07, "loss": 0.3038, "step": 43943 }, { "epoch": 4.4676697844652296, "grad_norm": 0.2886076867580414, "learning_rate": 3.4132861047360967e-07, "loss": 0.2613, "step": 43944 }, { "epoch": 4.4677714518096785, "grad_norm": 0.27570241689682007, "learning_rate": 3.4119974863206175e-07, "loss": 0.2758, "step": 43945 }, { "epoch": 4.467873119154127, "grad_norm": 0.28901615738868713, "learning_rate": 3.410709102604437e-07, "loss": 0.3166, "step": 43946 }, { "epoch": 4.467974786498576, "grad_norm": 0.2734728455543518, "learning_rate": 3.409420953594017e-07, "loss": 0.2883, "step": 43947 }, { "epoch": 4.468076453843025, "grad_norm": 0.2717643082141876, "learning_rate": 3.4081330392958744e-07, "loss": 0.2831, "step": 43948 }, { "epoch": 4.468178121187474, "grad_norm": 0.28050899505615234, "learning_rate": 3.4068453597164874e-07, "loss": 0.2894, "step": 43949 }, { "epoch": 4.468279788531923, "grad_norm": 0.3041016757488251, "learning_rate": 3.4055579148623286e-07, "loss": 0.3003, "step": 43950 }, { "epoch": 4.468381455876372, "grad_norm": 0.2941953241825104, "learning_rate": 3.4042707047399037e-07, "loss": 0.2842, "step": 43951 }, { "epoch": 4.468483123220821, "grad_norm": 0.25996509194374084, "learning_rate": 3.402983729355691e-07, "loss": 0.2921, "step": 43952 }, { "epoch": 4.46858479056527, "grad_norm": 0.2751220464706421, "learning_rate": 3.401696988716174e-07, "loss": 0.2874, "step": 43953 }, { "epoch": 4.468686457909719, "grad_norm": 0.27442535758018494, "learning_rate": 3.400410482827826e-07, "loss": 0.2883, "step": 43954 }, { "epoch": 4.468788125254168, "grad_norm": 0.3172253370285034, "learning_rate": 3.3991242116971356e-07, "loss": 0.2965, "step": 43955 }, { "epoch": 4.468889792598618, "grad_norm": 0.2938395142555237, "learning_rate": 3.3978381753306035e-07, "loss": 0.2937, "step": 43956 }, { "epoch": 4.468991459943067, "grad_norm": 0.26469382643699646, "learning_rate": 3.396552373734668e-07, "loss": 0.2902, "step": 43957 }, { "epoch": 4.469093127287516, "grad_norm": 0.28196436166763306, "learning_rate": 3.3952668069158366e-07, "loss": 0.298, "step": 43958 }, { "epoch": 4.469194794631965, "grad_norm": 0.2862424850463867, "learning_rate": 3.3939814748805756e-07, "loss": 0.2849, "step": 43959 }, { "epoch": 4.4692964619764135, "grad_norm": 0.29359596967697144, "learning_rate": 3.3926963776353514e-07, "loss": 0.3029, "step": 43960 }, { "epoch": 4.4693981293208624, "grad_norm": 0.31065458059310913, "learning_rate": 3.39141151518666e-07, "loss": 0.343, "step": 43961 }, { "epoch": 4.469499796665311, "grad_norm": 0.2581532597541809, "learning_rate": 3.3901268875409567e-07, "loss": 0.3045, "step": 43962 }, { "epoch": 4.46960146400976, "grad_norm": 0.2697846293449402, "learning_rate": 3.3888424947047194e-07, "loss": 0.2632, "step": 43963 }, { "epoch": 4.469703131354209, "grad_norm": 0.3089766502380371, "learning_rate": 3.3875583366844047e-07, "loss": 0.2855, "step": 43964 }, { "epoch": 4.469804798698658, "grad_norm": 0.31383591890335083, "learning_rate": 3.3862744134865066e-07, "loss": 0.3098, "step": 43965 }, { "epoch": 4.469906466043107, "grad_norm": 0.2637934982776642, "learning_rate": 3.384990725117482e-07, "loss": 0.3163, "step": 43966 }, { "epoch": 4.470008133387556, "grad_norm": 0.29426562786102295, "learning_rate": 3.383707271583786e-07, "loss": 0.2806, "step": 43967 }, { "epoch": 4.470109800732005, "grad_norm": 0.2771572470664978, "learning_rate": 3.3824240528919084e-07, "loss": 0.286, "step": 43968 }, { "epoch": 4.470211468076454, "grad_norm": 0.28084126114845276, "learning_rate": 3.3811410690482936e-07, "loss": 0.2902, "step": 43969 }, { "epoch": 4.470313135420903, "grad_norm": 0.2792803943157196, "learning_rate": 3.379858320059409e-07, "loss": 0.2808, "step": 43970 }, { "epoch": 4.470414802765352, "grad_norm": 0.2905155420303345, "learning_rate": 3.378575805931722e-07, "loss": 0.2799, "step": 43971 }, { "epoch": 4.470516470109801, "grad_norm": 0.2569577395915985, "learning_rate": 3.3772935266716934e-07, "loss": 0.3377, "step": 43972 }, { "epoch": 4.47061813745425, "grad_norm": 0.27992209792137146, "learning_rate": 3.376011482285785e-07, "loss": 0.3169, "step": 43973 }, { "epoch": 4.470719804798699, "grad_norm": 0.28569483757019043, "learning_rate": 3.374729672780436e-07, "loss": 0.2952, "step": 43974 }, { "epoch": 4.470821472143148, "grad_norm": 0.3056207597255707, "learning_rate": 3.3734480981621355e-07, "loss": 0.3132, "step": 43975 }, { "epoch": 4.470923139487597, "grad_norm": 0.28851038217544556, "learning_rate": 3.3721667584373175e-07, "loss": 0.296, "step": 43976 }, { "epoch": 4.4710248068320455, "grad_norm": 0.29426309466362, "learning_rate": 3.3708856536124324e-07, "loss": 0.2883, "step": 43977 }, { "epoch": 4.4711264741764944, "grad_norm": 0.29191040992736816, "learning_rate": 3.3696047836939584e-07, "loss": 0.3104, "step": 43978 }, { "epoch": 4.471228141520943, "grad_norm": 0.2930215299129486, "learning_rate": 3.3683241486883346e-07, "loss": 0.3, "step": 43979 }, { "epoch": 4.471329808865392, "grad_norm": 0.2757726311683655, "learning_rate": 3.367043748602e-07, "loss": 0.3085, "step": 43980 }, { "epoch": 4.471431476209841, "grad_norm": 0.27302926778793335, "learning_rate": 3.365763583441428e-07, "loss": 0.2899, "step": 43981 }, { "epoch": 4.47153314355429, "grad_norm": 0.2804427742958069, "learning_rate": 3.364483653213052e-07, "loss": 0.3276, "step": 43982 }, { "epoch": 4.471634810898739, "grad_norm": 0.3103151321411133, "learning_rate": 3.3632039579233335e-07, "loss": 0.3083, "step": 43983 }, { "epoch": 4.471736478243188, "grad_norm": 0.2696283161640167, "learning_rate": 3.361924497578695e-07, "loss": 0.2768, "step": 43984 }, { "epoch": 4.471838145587637, "grad_norm": 0.2743109166622162, "learning_rate": 3.360645272185614e-07, "loss": 0.2902, "step": 43985 }, { "epoch": 4.471939812932086, "grad_norm": 0.26551371812820435, "learning_rate": 3.3593662817505145e-07, "loss": 0.3097, "step": 43986 }, { "epoch": 4.472041480276535, "grad_norm": 0.29019784927368164, "learning_rate": 3.358087526279835e-07, "loss": 0.3114, "step": 43987 }, { "epoch": 4.472143147620984, "grad_norm": 0.25235211849212646, "learning_rate": 3.356809005780037e-07, "loss": 0.3205, "step": 43988 }, { "epoch": 4.472244814965433, "grad_norm": 0.30382072925567627, "learning_rate": 3.355530720257549e-07, "loss": 0.2822, "step": 43989 }, { "epoch": 4.472346482309882, "grad_norm": 0.2627975344657898, "learning_rate": 3.354252669718805e-07, "loss": 0.2863, "step": 43990 }, { "epoch": 4.472448149654331, "grad_norm": 0.2845703959465027, "learning_rate": 3.35297485417026e-07, "loss": 0.2888, "step": 43991 }, { "epoch": 4.47254981699878, "grad_norm": 0.28227052092552185, "learning_rate": 3.351697273618343e-07, "loss": 0.2938, "step": 43992 }, { "epoch": 4.472651484343229, "grad_norm": 0.2764289081096649, "learning_rate": 3.350419928069487e-07, "loss": 0.2808, "step": 43993 }, { "epoch": 4.4727531516876775, "grad_norm": 0.3008420467376709, "learning_rate": 3.3491428175301265e-07, "loss": 0.2932, "step": 43994 }, { "epoch": 4.472854819032127, "grad_norm": 0.3698718845844269, "learning_rate": 3.347865942006706e-07, "loss": 0.3181, "step": 43995 }, { "epoch": 4.472956486376576, "grad_norm": 0.26841285824775696, "learning_rate": 3.346589301505648e-07, "loss": 0.3054, "step": 43996 }, { "epoch": 4.473058153721025, "grad_norm": 0.291298508644104, "learning_rate": 3.3453128960333816e-07, "loss": 0.2607, "step": 43997 }, { "epoch": 4.473159821065474, "grad_norm": 0.2744942605495453, "learning_rate": 3.3440367255963447e-07, "loss": 0.307, "step": 43998 }, { "epoch": 4.473261488409923, "grad_norm": 0.2652318775653839, "learning_rate": 3.342760790200966e-07, "loss": 0.318, "step": 43999 }, { "epoch": 4.473363155754372, "grad_norm": 0.27691125869750977, "learning_rate": 3.3414850898536687e-07, "loss": 0.2882, "step": 44000 }, { "epoch": 4.473464823098821, "grad_norm": 0.3162829875946045, "learning_rate": 3.340209624560881e-07, "loss": 0.3041, "step": 44001 }, { "epoch": 4.47356649044327, "grad_norm": 0.2929913103580475, "learning_rate": 3.338934394329035e-07, "loss": 0.3108, "step": 44002 }, { "epoch": 4.473668157787719, "grad_norm": 0.26743143796920776, "learning_rate": 3.3376593991645443e-07, "loss": 0.2678, "step": 44003 }, { "epoch": 4.473769825132168, "grad_norm": 0.2873455286026001, "learning_rate": 3.336384639073836e-07, "loss": 0.2743, "step": 44004 }, { "epoch": 4.473871492476617, "grad_norm": 0.2674354910850525, "learning_rate": 3.3351101140633334e-07, "loss": 0.2887, "step": 44005 }, { "epoch": 4.473973159821066, "grad_norm": 0.31572484970092773, "learning_rate": 3.3338358241394644e-07, "loss": 0.2684, "step": 44006 }, { "epoch": 4.474074827165515, "grad_norm": 0.28489723801612854, "learning_rate": 3.332561769308623e-07, "loss": 0.2986, "step": 44007 }, { "epoch": 4.474176494509964, "grad_norm": 0.2999833822250366, "learning_rate": 3.331287949577261e-07, "loss": 0.2812, "step": 44008 }, { "epoch": 4.4742781618544125, "grad_norm": 0.2791388928890228, "learning_rate": 3.330014364951778e-07, "loss": 0.3123, "step": 44009 }, { "epoch": 4.4743798291988615, "grad_norm": 0.3009507656097412, "learning_rate": 3.3287410154385856e-07, "loss": 0.3385, "step": 44010 }, { "epoch": 4.47448149654331, "grad_norm": 0.279420405626297, "learning_rate": 3.327467901044112e-07, "loss": 0.3273, "step": 44011 }, { "epoch": 4.474583163887759, "grad_norm": 0.27353858947753906, "learning_rate": 3.326195021774764e-07, "loss": 0.2905, "step": 44012 }, { "epoch": 4.474684831232208, "grad_norm": 0.27908745408058167, "learning_rate": 3.324922377636952e-07, "loss": 0.2873, "step": 44013 }, { "epoch": 4.474786498576657, "grad_norm": 0.26872092485427856, "learning_rate": 3.323649968637083e-07, "loss": 0.3151, "step": 44014 }, { "epoch": 4.474888165921106, "grad_norm": 0.2688465118408203, "learning_rate": 3.3223777947815836e-07, "loss": 0.3219, "step": 44015 }, { "epoch": 4.474989833265555, "grad_norm": 0.26334482431411743, "learning_rate": 3.321105856076851e-07, "loss": 0.314, "step": 44016 }, { "epoch": 4.475091500610004, "grad_norm": 0.2809251546859741, "learning_rate": 3.319834152529289e-07, "loss": 0.2576, "step": 44017 }, { "epoch": 4.475193167954453, "grad_norm": 0.2626802325248718, "learning_rate": 3.31856268414531e-07, "loss": 0.2938, "step": 44018 }, { "epoch": 4.475294835298902, "grad_norm": 0.2825596332550049, "learning_rate": 3.317291450931326e-07, "loss": 0.2939, "step": 44019 }, { "epoch": 4.475396502643351, "grad_norm": 0.2644781470298767, "learning_rate": 3.3160204528937254e-07, "loss": 0.2691, "step": 44020 }, { "epoch": 4.4754981699878, "grad_norm": 0.28128504753112793, "learning_rate": 3.3147496900389266e-07, "loss": 0.2969, "step": 44021 }, { "epoch": 4.475599837332249, "grad_norm": 0.2933787703514099, "learning_rate": 3.313479162373323e-07, "loss": 0.3141, "step": 44022 }, { "epoch": 4.475701504676698, "grad_norm": 0.28914615511894226, "learning_rate": 3.312208869903322e-07, "loss": 0.2846, "step": 44023 }, { "epoch": 4.475803172021147, "grad_norm": 0.26691934466362, "learning_rate": 3.310938812635306e-07, "loss": 0.3152, "step": 44024 }, { "epoch": 4.475904839365596, "grad_norm": 0.2887192964553833, "learning_rate": 3.309668990575693e-07, "loss": 0.2914, "step": 44025 }, { "epoch": 4.4760065067100445, "grad_norm": 0.2968252897262573, "learning_rate": 3.308399403730872e-07, "loss": 0.2985, "step": 44026 }, { "epoch": 4.4761081740544935, "grad_norm": 0.27047616243362427, "learning_rate": 3.3071300521072324e-07, "loss": 0.2535, "step": 44027 }, { "epoch": 4.476209841398942, "grad_norm": 0.29466113448143005, "learning_rate": 3.305860935711186e-07, "loss": 0.3127, "step": 44028 }, { "epoch": 4.476311508743391, "grad_norm": 0.2888208329677582, "learning_rate": 3.3045920545491105e-07, "loss": 0.2621, "step": 44029 }, { "epoch": 4.47641317608784, "grad_norm": 0.27881383895874023, "learning_rate": 3.303323408627407e-07, "loss": 0.3197, "step": 44030 }, { "epoch": 4.476514843432289, "grad_norm": 0.3088880479335785, "learning_rate": 3.3020549979524585e-07, "loss": 0.3118, "step": 44031 }, { "epoch": 4.476616510776738, "grad_norm": 0.2708769142627716, "learning_rate": 3.3007868225306606e-07, "loss": 0.3183, "step": 44032 }, { "epoch": 4.476718178121187, "grad_norm": 0.26855021715164185, "learning_rate": 3.2995188823684077e-07, "loss": 0.2785, "step": 44033 }, { "epoch": 4.476819845465636, "grad_norm": 0.28047341108322144, "learning_rate": 3.2982511774720673e-07, "loss": 0.2833, "step": 44034 }, { "epoch": 4.476921512810085, "grad_norm": 0.27934497594833374, "learning_rate": 3.2969837078480616e-07, "loss": 0.2886, "step": 44035 }, { "epoch": 4.477023180154534, "grad_norm": 0.28263673186302185, "learning_rate": 3.295716473502736e-07, "loss": 0.3052, "step": 44036 }, { "epoch": 4.477124847498983, "grad_norm": 0.2626695930957794, "learning_rate": 3.2944494744424905e-07, "loss": 0.3325, "step": 44037 }, { "epoch": 4.477226514843433, "grad_norm": 0.2792534828186035, "learning_rate": 3.293182710673715e-07, "loss": 0.3386, "step": 44038 }, { "epoch": 4.477328182187882, "grad_norm": 0.2792555093765259, "learning_rate": 3.291916182202787e-07, "loss": 0.2947, "step": 44039 }, { "epoch": 4.477429849532331, "grad_norm": 0.2971961498260498, "learning_rate": 3.2906498890360903e-07, "loss": 0.2691, "step": 44040 }, { "epoch": 4.4775315168767795, "grad_norm": 0.2907072901725769, "learning_rate": 3.289383831179988e-07, "loss": 0.2851, "step": 44041 }, { "epoch": 4.4776331842212285, "grad_norm": 0.27194663882255554, "learning_rate": 3.288118008640878e-07, "loss": 0.2904, "step": 44042 }, { "epoch": 4.477734851565677, "grad_norm": 0.25213637948036194, "learning_rate": 3.2868524214251294e-07, "loss": 0.2637, "step": 44043 }, { "epoch": 4.477836518910126, "grad_norm": 0.2655944526195526, "learning_rate": 3.285587069539109e-07, "loss": 0.3274, "step": 44044 }, { "epoch": 4.477938186254575, "grad_norm": 0.26729241013526917, "learning_rate": 3.284321952989217e-07, "loss": 0.2823, "step": 44045 }, { "epoch": 4.478039853599024, "grad_norm": 0.2728465795516968, "learning_rate": 3.283057071781792e-07, "loss": 0.2826, "step": 44046 }, { "epoch": 4.478141520943473, "grad_norm": 0.2801600396633148, "learning_rate": 3.281792425923225e-07, "loss": 0.3005, "step": 44047 }, { "epoch": 4.478243188287922, "grad_norm": 0.2623114585876465, "learning_rate": 3.2805280154198925e-07, "loss": 0.2755, "step": 44048 }, { "epoch": 4.478344855632371, "grad_norm": 0.26954227685928345, "learning_rate": 3.279263840278163e-07, "loss": 0.2957, "step": 44049 }, { "epoch": 4.47844652297682, "grad_norm": 0.28828442096710205, "learning_rate": 3.2779999005043914e-07, "loss": 0.2849, "step": 44050 }, { "epoch": 4.478548190321269, "grad_norm": 0.29011645913124084, "learning_rate": 3.2767361961049515e-07, "loss": 0.2753, "step": 44051 }, { "epoch": 4.478649857665718, "grad_norm": 0.2788703739643097, "learning_rate": 3.275472727086215e-07, "loss": 0.2736, "step": 44052 }, { "epoch": 4.478751525010167, "grad_norm": 0.2856252193450928, "learning_rate": 3.27420949345455e-07, "loss": 0.2711, "step": 44053 }, { "epoch": 4.478853192354616, "grad_norm": 0.28085657954216003, "learning_rate": 3.2729464952163004e-07, "loss": 0.2755, "step": 44054 }, { "epoch": 4.478954859699065, "grad_norm": 0.26571211218833923, "learning_rate": 3.2716837323778614e-07, "loss": 0.321, "step": 44055 }, { "epoch": 4.479056527043514, "grad_norm": 0.2977936565876007, "learning_rate": 3.2704212049455565e-07, "loss": 0.2987, "step": 44056 }, { "epoch": 4.479158194387963, "grad_norm": 0.28980547189712524, "learning_rate": 3.269158912925768e-07, "loss": 0.3137, "step": 44057 }, { "epoch": 4.4792598617324115, "grad_norm": 0.2697836756706238, "learning_rate": 3.267896856324865e-07, "loss": 0.295, "step": 44058 }, { "epoch": 4.4793615290768605, "grad_norm": 0.27305781841278076, "learning_rate": 3.2666350351491794e-07, "loss": 0.3135, "step": 44059 }, { "epoch": 4.479463196421309, "grad_norm": 0.2758161425590515, "learning_rate": 3.265373449405085e-07, "loss": 0.2725, "step": 44060 }, { "epoch": 4.479564863765758, "grad_norm": 0.2840932011604309, "learning_rate": 3.264112099098926e-07, "loss": 0.2824, "step": 44061 }, { "epoch": 4.479666531110207, "grad_norm": 0.28529125452041626, "learning_rate": 3.262850984237076e-07, "loss": 0.3134, "step": 44062 }, { "epoch": 4.479768198454656, "grad_norm": 0.2880033552646637, "learning_rate": 3.2615901048258735e-07, "loss": 0.2838, "step": 44063 }, { "epoch": 4.479869865799105, "grad_norm": 0.29217734932899475, "learning_rate": 3.260329460871664e-07, "loss": 0.3201, "step": 44064 }, { "epoch": 4.479971533143554, "grad_norm": 0.28629884123802185, "learning_rate": 3.2590690523808257e-07, "loss": 0.2805, "step": 44065 }, { "epoch": 4.480073200488003, "grad_norm": 0.28475886583328247, "learning_rate": 3.2578088793596753e-07, "loss": 0.3221, "step": 44066 }, { "epoch": 4.480174867832452, "grad_norm": 0.27321508526802063, "learning_rate": 3.2565489418145746e-07, "loss": 0.306, "step": 44067 }, { "epoch": 4.480276535176901, "grad_norm": 0.258302241563797, "learning_rate": 3.2552892397518854e-07, "loss": 0.3004, "step": 44068 }, { "epoch": 4.48037820252135, "grad_norm": 0.2973581552505493, "learning_rate": 3.2540297731779304e-07, "loss": 0.2942, "step": 44069 }, { "epoch": 4.480479869865799, "grad_norm": 0.2851295471191406, "learning_rate": 3.2527705420990707e-07, "loss": 0.2917, "step": 44070 }, { "epoch": 4.480581537210248, "grad_norm": 0.26916950941085815, "learning_rate": 3.2515115465216404e-07, "loss": 0.302, "step": 44071 }, { "epoch": 4.480683204554697, "grad_norm": 0.2807486951351166, "learning_rate": 3.2502527864519793e-07, "loss": 0.3023, "step": 44072 }, { "epoch": 4.480784871899146, "grad_norm": 0.26861095428466797, "learning_rate": 3.248994261896449e-07, "loss": 0.2945, "step": 44073 }, { "epoch": 4.480886539243595, "grad_norm": 0.28215494751930237, "learning_rate": 3.24773597286136e-07, "loss": 0.2923, "step": 44074 }, { "epoch": 4.4809882065880435, "grad_norm": 0.2939738631248474, "learning_rate": 3.246477919353086e-07, "loss": 0.3074, "step": 44075 }, { "epoch": 4.4810898739324925, "grad_norm": 0.26703059673309326, "learning_rate": 3.245220101377933e-07, "loss": 0.2964, "step": 44076 }, { "epoch": 4.481191541276942, "grad_norm": 0.2739327847957611, "learning_rate": 3.243962518942251e-07, "loss": 0.2758, "step": 44077 }, { "epoch": 4.481293208621391, "grad_norm": 0.2655387818813324, "learning_rate": 3.242705172052385e-07, "loss": 0.3029, "step": 44078 }, { "epoch": 4.48139487596584, "grad_norm": 0.2787170112133026, "learning_rate": 3.241448060714647e-07, "loss": 0.2926, "step": 44079 }, { "epoch": 4.481496543310289, "grad_norm": 0.2772638201713562, "learning_rate": 3.240191184935387e-07, "loss": 0.2913, "step": 44080 }, { "epoch": 4.481598210654738, "grad_norm": 0.27004334330558777, "learning_rate": 3.238934544720934e-07, "loss": 0.3262, "step": 44081 }, { "epoch": 4.481699877999187, "grad_norm": 0.2756730020046234, "learning_rate": 3.2376781400776105e-07, "loss": 0.2801, "step": 44082 }, { "epoch": 4.481801545343636, "grad_norm": 0.28506746888160706, "learning_rate": 3.2364219710117605e-07, "loss": 0.2807, "step": 44083 }, { "epoch": 4.481903212688085, "grad_norm": 0.2805476486682892, "learning_rate": 3.2351660375296913e-07, "loss": 0.2506, "step": 44084 }, { "epoch": 4.482004880032534, "grad_norm": 0.28890103101730347, "learning_rate": 3.2339103396377635e-07, "loss": 0.2979, "step": 44085 }, { "epoch": 4.482106547376983, "grad_norm": 0.27782291173934937, "learning_rate": 3.2326548773422615e-07, "loss": 0.2624, "step": 44086 }, { "epoch": 4.482208214721432, "grad_norm": 0.2819434404373169, "learning_rate": 3.231399650649536e-07, "loss": 0.2873, "step": 44087 }, { "epoch": 4.482309882065881, "grad_norm": 0.2878014147281647, "learning_rate": 3.2301446595659145e-07, "loss": 0.281, "step": 44088 }, { "epoch": 4.48241154941033, "grad_norm": 0.2701816260814667, "learning_rate": 3.2288899040976986e-07, "loss": 0.3058, "step": 44089 }, { "epoch": 4.4825132167547785, "grad_norm": 0.27116507291793823, "learning_rate": 3.2276353842512264e-07, "loss": 0.2944, "step": 44090 }, { "epoch": 4.4826148840992275, "grad_norm": 0.2572581171989441, "learning_rate": 3.226381100032816e-07, "loss": 0.3115, "step": 44091 }, { "epoch": 4.482716551443676, "grad_norm": 0.28885918855667114, "learning_rate": 3.225127051448773e-07, "loss": 0.2686, "step": 44092 }, { "epoch": 4.482818218788125, "grad_norm": 0.3020053505897522, "learning_rate": 3.2238732385054315e-07, "loss": 0.273, "step": 44093 }, { "epoch": 4.482919886132574, "grad_norm": 0.2711006999015808, "learning_rate": 3.222619661209098e-07, "loss": 0.2848, "step": 44094 }, { "epoch": 4.483021553477023, "grad_norm": 0.2614974081516266, "learning_rate": 3.2213663195660937e-07, "loss": 0.2803, "step": 44095 }, { "epoch": 4.483123220821472, "grad_norm": 0.2640663683414459, "learning_rate": 3.2201132135827205e-07, "loss": 0.2825, "step": 44096 }, { "epoch": 4.483224888165921, "grad_norm": 0.2598091959953308, "learning_rate": 3.218860343265301e-07, "loss": 0.3183, "step": 44097 }, { "epoch": 4.48332655551037, "grad_norm": 0.30560922622680664, "learning_rate": 3.217607708620163e-07, "loss": 0.2645, "step": 44098 }, { "epoch": 4.483428222854819, "grad_norm": 0.3082951307296753, "learning_rate": 3.2163553096535847e-07, "loss": 0.2902, "step": 44099 }, { "epoch": 4.483529890199268, "grad_norm": 0.26822909712791443, "learning_rate": 3.2151031463719006e-07, "loss": 0.3028, "step": 44100 }, { "epoch": 4.483631557543717, "grad_norm": 0.26985064148902893, "learning_rate": 3.2138512187814053e-07, "loss": 0.3055, "step": 44101 }, { "epoch": 4.483733224888166, "grad_norm": 0.2925635874271393, "learning_rate": 3.212599526888399e-07, "loss": 0.3158, "step": 44102 }, { "epoch": 4.483834892232615, "grad_norm": 0.2645070552825928, "learning_rate": 3.2113480706992107e-07, "loss": 0.2748, "step": 44103 }, { "epoch": 4.483936559577064, "grad_norm": 0.28395044803619385, "learning_rate": 3.210096850220129e-07, "loss": 0.3048, "step": 44104 }, { "epoch": 4.484038226921513, "grad_norm": 0.2664591670036316, "learning_rate": 3.208845865457466e-07, "loss": 0.2785, "step": 44105 }, { "epoch": 4.484139894265962, "grad_norm": 0.2785004675388336, "learning_rate": 3.207595116417506e-07, "loss": 0.3065, "step": 44106 }, { "epoch": 4.4842415616104105, "grad_norm": 0.2814306914806366, "learning_rate": 3.206344603106565e-07, "loss": 0.2714, "step": 44107 }, { "epoch": 4.4843432289548595, "grad_norm": 0.3053683936595917, "learning_rate": 3.2050943255309495e-07, "loss": 0.3132, "step": 44108 }, { "epoch": 4.484444896299308, "grad_norm": 0.2909119725227356, "learning_rate": 3.2038442836969387e-07, "loss": 0.2836, "step": 44109 }, { "epoch": 4.484546563643757, "grad_norm": 0.2880142629146576, "learning_rate": 3.202594477610843e-07, "loss": 0.2996, "step": 44110 }, { "epoch": 4.484648230988206, "grad_norm": 0.2743770182132721, "learning_rate": 3.2013449072789583e-07, "loss": 0.2965, "step": 44111 }, { "epoch": 4.484749898332655, "grad_norm": 0.2723143994808197, "learning_rate": 3.200095572707573e-07, "loss": 0.2887, "step": 44112 }, { "epoch": 4.484851565677104, "grad_norm": 0.2680955231189728, "learning_rate": 3.198846473902989e-07, "loss": 0.2781, "step": 44113 }, { "epoch": 4.484953233021553, "grad_norm": 0.30435022711753845, "learning_rate": 3.1975976108714945e-07, "loss": 0.2869, "step": 44114 }, { "epoch": 4.485054900366002, "grad_norm": 0.27931904792785645, "learning_rate": 3.1963489836193796e-07, "loss": 0.3159, "step": 44115 }, { "epoch": 4.485156567710451, "grad_norm": 0.28641343116760254, "learning_rate": 3.195100592152933e-07, "loss": 0.3049, "step": 44116 }, { "epoch": 4.4852582350549, "grad_norm": 0.28615251183509827, "learning_rate": 3.1938524364784506e-07, "loss": 0.2942, "step": 44117 }, { "epoch": 4.485359902399349, "grad_norm": 0.2728762626647949, "learning_rate": 3.192604516602216e-07, "loss": 0.2969, "step": 44118 }, { "epoch": 4.485461569743798, "grad_norm": 0.2627005875110626, "learning_rate": 3.1913568325305125e-07, "loss": 0.2781, "step": 44119 }, { "epoch": 4.485563237088248, "grad_norm": 0.2791234254837036, "learning_rate": 3.1901093842696354e-07, "loss": 0.3069, "step": 44120 }, { "epoch": 4.485664904432697, "grad_norm": 0.3055398464202881, "learning_rate": 3.1888621718258685e-07, "loss": 0.3362, "step": 44121 }, { "epoch": 4.4857665717771456, "grad_norm": 0.29871439933776855, "learning_rate": 3.1876151952054736e-07, "loss": 0.2892, "step": 44122 }, { "epoch": 4.4858682391215945, "grad_norm": 0.2781313955783844, "learning_rate": 3.186368454414762e-07, "loss": 0.2569, "step": 44123 }, { "epoch": 4.485969906466043, "grad_norm": 0.29268360137939453, "learning_rate": 3.1851219494600015e-07, "loss": 0.2807, "step": 44124 }, { "epoch": 4.486071573810492, "grad_norm": 0.28885143995285034, "learning_rate": 3.1838756803474704e-07, "loss": 0.2676, "step": 44125 }, { "epoch": 4.486173241154941, "grad_norm": 0.26997673511505127, "learning_rate": 3.1826296470834406e-07, "loss": 0.2891, "step": 44126 }, { "epoch": 4.48627490849939, "grad_norm": 0.26476672291755676, "learning_rate": 3.1813838496742076e-07, "loss": 0.2936, "step": 44127 }, { "epoch": 4.486376575843839, "grad_norm": 0.30251210927963257, "learning_rate": 3.180138288126033e-07, "loss": 0.3155, "step": 44128 }, { "epoch": 4.486478243188288, "grad_norm": 0.28864404559135437, "learning_rate": 3.17889296244519e-07, "loss": 0.2962, "step": 44129 }, { "epoch": 4.486579910532737, "grad_norm": 0.26420632004737854, "learning_rate": 3.1776478726379677e-07, "loss": 0.2865, "step": 44130 }, { "epoch": 4.486681577877186, "grad_norm": 0.2797195613384247, "learning_rate": 3.1764030187106275e-07, "loss": 0.2742, "step": 44131 }, { "epoch": 4.486783245221635, "grad_norm": 0.2749253511428833, "learning_rate": 3.1751584006694313e-07, "loss": 0.305, "step": 44132 }, { "epoch": 4.486884912566084, "grad_norm": 0.25838738679885864, "learning_rate": 3.173914018520669e-07, "loss": 0.2805, "step": 44133 }, { "epoch": 4.486986579910533, "grad_norm": 0.2753675580024719, "learning_rate": 3.1726698722706006e-07, "loss": 0.2797, "step": 44134 }, { "epoch": 4.487088247254982, "grad_norm": 0.295993447303772, "learning_rate": 3.171425961925495e-07, "loss": 0.3121, "step": 44135 }, { "epoch": 4.487189914599431, "grad_norm": 0.2844531834125519, "learning_rate": 3.1701822874916077e-07, "loss": 0.267, "step": 44136 }, { "epoch": 4.48729158194388, "grad_norm": 0.27731481194496155, "learning_rate": 3.168938848975217e-07, "loss": 0.3123, "step": 44137 }, { "epoch": 4.487393249288329, "grad_norm": 0.27641215920448303, "learning_rate": 3.16769564638259e-07, "loss": 0.3111, "step": 44138 }, { "epoch": 4.4874949166327776, "grad_norm": 0.28841546177864075, "learning_rate": 3.1664526797199726e-07, "loss": 0.311, "step": 44139 }, { "epoch": 4.4875965839772265, "grad_norm": 0.2838059663772583, "learning_rate": 3.1652099489936474e-07, "loss": 0.27, "step": 44140 }, { "epoch": 4.487698251321675, "grad_norm": 0.266092449426651, "learning_rate": 3.1639674542098654e-07, "loss": 0.3172, "step": 44141 }, { "epoch": 4.487799918666124, "grad_norm": 0.2776350677013397, "learning_rate": 3.162725195374872e-07, "loss": 0.2845, "step": 44142 }, { "epoch": 4.487901586010573, "grad_norm": 0.2638890743255615, "learning_rate": 3.16148317249495e-07, "loss": 0.2984, "step": 44143 }, { "epoch": 4.488003253355022, "grad_norm": 0.2517983019351959, "learning_rate": 3.160241385576346e-07, "loss": 0.2797, "step": 44144 }, { "epoch": 4.488104920699471, "grad_norm": 0.30202245712280273, "learning_rate": 3.1589998346253145e-07, "loss": 0.2792, "step": 44145 }, { "epoch": 4.48820658804392, "grad_norm": 0.28145307302474976, "learning_rate": 3.157758519648102e-07, "loss": 0.2972, "step": 44146 }, { "epoch": 4.488308255388369, "grad_norm": 0.2936515510082245, "learning_rate": 3.15651744065098e-07, "loss": 0.3021, "step": 44147 }, { "epoch": 4.488409922732818, "grad_norm": 0.26308852434158325, "learning_rate": 3.1552765976401943e-07, "loss": 0.2846, "step": 44148 }, { "epoch": 4.488511590077267, "grad_norm": 0.2655688524246216, "learning_rate": 3.1540359906219785e-07, "loss": 0.2761, "step": 44149 }, { "epoch": 4.488613257421716, "grad_norm": 0.2726658880710602, "learning_rate": 3.152795619602611e-07, "loss": 0.3066, "step": 44150 }, { "epoch": 4.488714924766165, "grad_norm": 0.2926284372806549, "learning_rate": 3.151555484588326e-07, "loss": 0.267, "step": 44151 }, { "epoch": 4.488816592110614, "grad_norm": 0.27845481038093567, "learning_rate": 3.150315585585356e-07, "loss": 0.2959, "step": 44152 }, { "epoch": 4.488918259455063, "grad_norm": 0.30193692445755005, "learning_rate": 3.149075922599981e-07, "loss": 0.2911, "step": 44153 }, { "epoch": 4.489019926799512, "grad_norm": 0.28302332758903503, "learning_rate": 3.147836495638423e-07, "loss": 0.2876, "step": 44154 }, { "epoch": 4.489121594143961, "grad_norm": 0.29751506447792053, "learning_rate": 3.146597304706928e-07, "loss": 0.2869, "step": 44155 }, { "epoch": 4.4892232614884096, "grad_norm": 0.311430424451828, "learning_rate": 3.145358349811739e-07, "loss": 0.2796, "step": 44156 }, { "epoch": 4.4893249288328585, "grad_norm": 0.2738429009914398, "learning_rate": 3.144119630959103e-07, "loss": 0.2711, "step": 44157 }, { "epoch": 4.489426596177307, "grad_norm": 0.29202574491500854, "learning_rate": 3.1428811481552637e-07, "loss": 0.2967, "step": 44158 }, { "epoch": 4.489528263521757, "grad_norm": 0.2749161720275879, "learning_rate": 3.141642901406444e-07, "loss": 0.2869, "step": 44159 }, { "epoch": 4.489629930866206, "grad_norm": 0.3195508122444153, "learning_rate": 3.1404048907188954e-07, "loss": 0.2749, "step": 44160 }, { "epoch": 4.489731598210655, "grad_norm": 0.28431224822998047, "learning_rate": 3.1391671160988567e-07, "loss": 0.2985, "step": 44161 }, { "epoch": 4.489833265555104, "grad_norm": 0.29220879077911377, "learning_rate": 3.137929577552551e-07, "loss": 0.31, "step": 44162 }, { "epoch": 4.489934932899553, "grad_norm": 0.26896050572395325, "learning_rate": 3.136692275086223e-07, "loss": 0.2874, "step": 44163 }, { "epoch": 4.490036600244002, "grad_norm": 0.27703118324279785, "learning_rate": 3.1354552087061064e-07, "loss": 0.299, "step": 44164 }, { "epoch": 4.490138267588451, "grad_norm": 0.292548805475235, "learning_rate": 3.1342183784184297e-07, "loss": 0.2804, "step": 44165 }, { "epoch": 4.4902399349329, "grad_norm": 0.26191601157188416, "learning_rate": 3.1329817842294164e-07, "loss": 0.2839, "step": 44166 }, { "epoch": 4.490341602277349, "grad_norm": 0.266478568315506, "learning_rate": 3.131745426145311e-07, "loss": 0.2938, "step": 44167 }, { "epoch": 4.490443269621798, "grad_norm": 0.24716538190841675, "learning_rate": 3.1305093041723355e-07, "loss": 0.2915, "step": 44168 }, { "epoch": 4.490544936966247, "grad_norm": 0.28482291102409363, "learning_rate": 3.129273418316703e-07, "loss": 0.3127, "step": 44169 }, { "epoch": 4.490646604310696, "grad_norm": 0.29628607630729675, "learning_rate": 3.1280377685846687e-07, "loss": 0.2893, "step": 44170 }, { "epoch": 4.490748271655145, "grad_norm": 0.27820464968681335, "learning_rate": 3.1268023549824333e-07, "loss": 0.2927, "step": 44171 }, { "epoch": 4.4908499389995935, "grad_norm": 0.28218352794647217, "learning_rate": 3.1255671775162256e-07, "loss": 0.2741, "step": 44172 }, { "epoch": 4.4909516063440424, "grad_norm": 0.28433287143707275, "learning_rate": 3.124332236192279e-07, "loss": 0.2968, "step": 44173 }, { "epoch": 4.491053273688491, "grad_norm": 0.27878090739250183, "learning_rate": 3.1230975310168056e-07, "loss": 0.2668, "step": 44174 }, { "epoch": 4.49115494103294, "grad_norm": 0.28472670912742615, "learning_rate": 3.1218630619960335e-07, "loss": 0.3018, "step": 44175 }, { "epoch": 4.491256608377389, "grad_norm": 0.2749183177947998, "learning_rate": 3.1206288291361575e-07, "loss": 0.309, "step": 44176 }, { "epoch": 4.491358275721838, "grad_norm": 0.2820315361022949, "learning_rate": 3.1193948324434286e-07, "loss": 0.303, "step": 44177 }, { "epoch": 4.491459943066287, "grad_norm": 0.2877088487148285, "learning_rate": 3.1181610719240476e-07, "loss": 0.2933, "step": 44178 }, { "epoch": 4.491561610410736, "grad_norm": 0.2971637547016144, "learning_rate": 3.1169275475842196e-07, "loss": 0.2799, "step": 44179 }, { "epoch": 4.491663277755185, "grad_norm": 0.2962484061717987, "learning_rate": 3.115694259430174e-07, "loss": 0.3105, "step": 44180 }, { "epoch": 4.491764945099634, "grad_norm": 0.281953364610672, "learning_rate": 3.114461207468128e-07, "loss": 0.2854, "step": 44181 }, { "epoch": 4.491866612444083, "grad_norm": 0.2852436602115631, "learning_rate": 3.1132283917042704e-07, "loss": 0.3059, "step": 44182 }, { "epoch": 4.491968279788532, "grad_norm": 0.2799444794654846, "learning_rate": 3.1119958121448356e-07, "loss": 0.3083, "step": 44183 }, { "epoch": 4.492069947132981, "grad_norm": 0.27597829699516296, "learning_rate": 3.110763468796024e-07, "loss": 0.3026, "step": 44184 }, { "epoch": 4.49217161447743, "grad_norm": 0.3232289254665375, "learning_rate": 3.1095313616640424e-07, "loss": 0.2834, "step": 44185 }, { "epoch": 4.492273281821879, "grad_norm": 0.26373419165611267, "learning_rate": 3.1082994907550903e-07, "loss": 0.3115, "step": 44186 }, { "epoch": 4.492374949166328, "grad_norm": 0.282487154006958, "learning_rate": 3.107067856075391e-07, "loss": 0.2797, "step": 44187 }, { "epoch": 4.492476616510777, "grad_norm": 0.2722519636154175, "learning_rate": 3.105836457631145e-07, "loss": 0.2644, "step": 44188 }, { "epoch": 4.4925782838552255, "grad_norm": 0.28908318281173706, "learning_rate": 3.104605295428537e-07, "loss": 0.2826, "step": 44189 }, { "epoch": 4.4926799511996744, "grad_norm": 0.30494385957717896, "learning_rate": 3.103374369473794e-07, "loss": 0.2861, "step": 44190 }, { "epoch": 4.492781618544123, "grad_norm": 0.26964497566223145, "learning_rate": 3.102143679773106e-07, "loss": 0.339, "step": 44191 }, { "epoch": 4.492883285888572, "grad_norm": 0.2876216471195221, "learning_rate": 3.1009132263326637e-07, "loss": 0.2713, "step": 44192 }, { "epoch": 4.492984953233021, "grad_norm": 0.2747703790664673, "learning_rate": 3.099683009158688e-07, "loss": 0.283, "step": 44193 }, { "epoch": 4.49308662057747, "grad_norm": 0.26793745160102844, "learning_rate": 3.0984530282573646e-07, "loss": 0.3478, "step": 44194 }, { "epoch": 4.493188287921919, "grad_norm": 0.2948819696903229, "learning_rate": 3.097223283634881e-07, "loss": 0.3161, "step": 44195 }, { "epoch": 4.493289955266368, "grad_norm": 0.2733805179595947, "learning_rate": 3.09599377529744e-07, "loss": 0.2714, "step": 44196 }, { "epoch": 4.493391622610817, "grad_norm": 0.29482224583625793, "learning_rate": 3.094764503251241e-07, "loss": 0.2874, "step": 44197 }, { "epoch": 4.493493289955266, "grad_norm": 0.2581416368484497, "learning_rate": 3.093535467502473e-07, "loss": 0.299, "step": 44198 }, { "epoch": 4.493594957299715, "grad_norm": 0.2872178256511688, "learning_rate": 3.092306668057321e-07, "loss": 0.3039, "step": 44199 }, { "epoch": 4.493696624644164, "grad_norm": 0.29318928718566895, "learning_rate": 3.091078104921991e-07, "loss": 0.3389, "step": 44200 }, { "epoch": 4.493798291988613, "grad_norm": 0.2943058907985687, "learning_rate": 3.089849778102655e-07, "loss": 0.3147, "step": 44201 }, { "epoch": 4.493899959333063, "grad_norm": 0.29560551047325134, "learning_rate": 3.088621687605503e-07, "loss": 0.3008, "step": 44202 }, { "epoch": 4.494001626677512, "grad_norm": 0.31750380992889404, "learning_rate": 3.087393833436736e-07, "loss": 0.2851, "step": 44203 }, { "epoch": 4.4941032940219605, "grad_norm": 0.2948501408100128, "learning_rate": 3.086166215602532e-07, "loss": 0.2913, "step": 44204 }, { "epoch": 4.4942049613664095, "grad_norm": 0.28982219099998474, "learning_rate": 3.084938834109069e-07, "loss": 0.2929, "step": 44205 }, { "epoch": 4.494306628710858, "grad_norm": 0.297193706035614, "learning_rate": 3.083711688962532e-07, "loss": 0.2777, "step": 44206 }, { "epoch": 4.494408296055307, "grad_norm": 0.26904016733169556, "learning_rate": 3.082484780169115e-07, "loss": 0.2751, "step": 44207 }, { "epoch": 4.494509963399756, "grad_norm": 0.30790290236473083, "learning_rate": 3.0812581077349915e-07, "loss": 0.266, "step": 44208 }, { "epoch": 4.494611630744205, "grad_norm": 0.28099071979522705, "learning_rate": 3.0800316716663283e-07, "loss": 0.2827, "step": 44209 }, { "epoch": 4.494713298088654, "grad_norm": 0.279451847076416, "learning_rate": 3.0788054719693315e-07, "loss": 0.3119, "step": 44210 }, { "epoch": 4.494814965433103, "grad_norm": 0.2932796776294708, "learning_rate": 3.077579508650147e-07, "loss": 0.3122, "step": 44211 }, { "epoch": 4.494916632777552, "grad_norm": 0.2735286056995392, "learning_rate": 3.0763537817149747e-07, "loss": 0.355, "step": 44212 }, { "epoch": 4.495018300122001, "grad_norm": 0.3015331029891968, "learning_rate": 3.0751282911699767e-07, "loss": 0.2763, "step": 44213 }, { "epoch": 4.49511996746645, "grad_norm": 0.2740907073020935, "learning_rate": 3.073903037021336e-07, "loss": 0.2975, "step": 44214 }, { "epoch": 4.495221634810899, "grad_norm": 0.2726247012615204, "learning_rate": 3.0726780192752214e-07, "loss": 0.3092, "step": 44215 }, { "epoch": 4.495323302155348, "grad_norm": 0.297006219625473, "learning_rate": 3.0714532379377937e-07, "loss": 0.2806, "step": 44216 }, { "epoch": 4.495424969499797, "grad_norm": 0.26818037033081055, "learning_rate": 3.070228693015248e-07, "loss": 0.3207, "step": 44217 }, { "epoch": 4.495526636844246, "grad_norm": 0.27826717495918274, "learning_rate": 3.069004384513724e-07, "loss": 0.2917, "step": 44218 }, { "epoch": 4.495628304188695, "grad_norm": 0.2943692207336426, "learning_rate": 3.0677803124394057e-07, "loss": 0.2769, "step": 44219 }, { "epoch": 4.495729971533144, "grad_norm": 0.2641785442829132, "learning_rate": 3.0665564767984654e-07, "loss": 0.2716, "step": 44220 }, { "epoch": 4.4958316388775925, "grad_norm": 0.27670761942863464, "learning_rate": 3.065332877597055e-07, "loss": 0.3332, "step": 44221 }, { "epoch": 4.4959333062220415, "grad_norm": 0.2847483456134796, "learning_rate": 3.0641095148413456e-07, "loss": 0.2861, "step": 44222 }, { "epoch": 4.49603497356649, "grad_norm": 0.28078436851501465, "learning_rate": 3.0628863885375003e-07, "loss": 0.3273, "step": 44223 }, { "epoch": 4.496136640910939, "grad_norm": 0.2791116535663605, "learning_rate": 3.061663498691669e-07, "loss": 0.301, "step": 44224 }, { "epoch": 4.496238308255388, "grad_norm": 0.2802022397518158, "learning_rate": 3.0604408453100363e-07, "loss": 0.2996, "step": 44225 }, { "epoch": 4.496339975599837, "grad_norm": 0.28191590309143066, "learning_rate": 3.059218428398736e-07, "loss": 0.3058, "step": 44226 }, { "epoch": 4.496441642944286, "grad_norm": 0.2757508158683777, "learning_rate": 3.0579962479639516e-07, "loss": 0.303, "step": 44227 }, { "epoch": 4.496543310288735, "grad_norm": 0.28528448939323425, "learning_rate": 3.056774304011817e-07, "loss": 0.3015, "step": 44228 }, { "epoch": 4.496644977633184, "grad_norm": 0.2804322838783264, "learning_rate": 3.0555525965484943e-07, "loss": 0.287, "step": 44229 }, { "epoch": 4.496746644977633, "grad_norm": 0.27989134192466736, "learning_rate": 3.054331125580162e-07, "loss": 0.294, "step": 44230 }, { "epoch": 4.496848312322082, "grad_norm": 0.2948489189147949, "learning_rate": 3.053109891112932e-07, "loss": 0.2809, "step": 44231 }, { "epoch": 4.496949979666531, "grad_norm": 0.2795666754245758, "learning_rate": 3.051888893152988e-07, "loss": 0.2867, "step": 44232 }, { "epoch": 4.49705164701098, "grad_norm": 0.26932674646377563, "learning_rate": 3.0506681317064744e-07, "loss": 0.2853, "step": 44233 }, { "epoch": 4.497153314355429, "grad_norm": 0.2753257155418396, "learning_rate": 3.0494476067795266e-07, "loss": 0.2797, "step": 44234 }, { "epoch": 4.497254981699878, "grad_norm": 0.2838943600654602, "learning_rate": 3.0482273183783105e-07, "loss": 0.2931, "step": 44235 }, { "epoch": 4.497356649044327, "grad_norm": 0.27160799503326416, "learning_rate": 3.0470072665089665e-07, "loss": 0.3196, "step": 44236 }, { "epoch": 4.497458316388776, "grad_norm": 0.2814415991306305, "learning_rate": 3.045787451177651e-07, "loss": 0.268, "step": 44237 }, { "epoch": 4.4975599837332245, "grad_norm": 0.26552364230155945, "learning_rate": 3.0445678723904917e-07, "loss": 0.2959, "step": 44238 }, { "epoch": 4.4976616510776735, "grad_norm": 0.2658768892288208, "learning_rate": 3.043348530153634e-07, "loss": 0.3292, "step": 44239 }, { "epoch": 4.497763318422122, "grad_norm": 0.2797260582447052, "learning_rate": 3.042129424473245e-07, "loss": 0.3102, "step": 44240 }, { "epoch": 4.497864985766572, "grad_norm": 0.2637847363948822, "learning_rate": 3.0409105553554366e-07, "loss": 0.2904, "step": 44241 }, { "epoch": 4.497966653111021, "grad_norm": 0.29163065552711487, "learning_rate": 3.03969192280637e-07, "loss": 0.286, "step": 44242 }, { "epoch": 4.49806832045547, "grad_norm": 0.2746945321559906, "learning_rate": 3.03847352683217e-07, "loss": 0.3155, "step": 44243 }, { "epoch": 4.498169987799919, "grad_norm": 0.28787070512771606, "learning_rate": 3.0372553674389794e-07, "loss": 0.2854, "step": 44244 }, { "epoch": 4.498271655144368, "grad_norm": 0.275764524936676, "learning_rate": 3.036037444632939e-07, "loss": 0.3076, "step": 44245 }, { "epoch": 4.498373322488817, "grad_norm": 0.2962893545627594, "learning_rate": 3.0348197584201823e-07, "loss": 0.2878, "step": 44246 }, { "epoch": 4.498474989833266, "grad_norm": 0.29757481813430786, "learning_rate": 3.033602308806849e-07, "loss": 0.3072, "step": 44247 }, { "epoch": 4.498576657177715, "grad_norm": 0.2624870240688324, "learning_rate": 3.0323850957990507e-07, "loss": 0.2808, "step": 44248 }, { "epoch": 4.498678324522164, "grad_norm": 0.28141656517982483, "learning_rate": 3.0311681194029387e-07, "loss": 0.3134, "step": 44249 }, { "epoch": 4.498779991866613, "grad_norm": 0.2896134853363037, "learning_rate": 3.0299513796246574e-07, "loss": 0.2922, "step": 44250 }, { "epoch": 4.498881659211062, "grad_norm": 0.2903764545917511, "learning_rate": 3.0287348764703016e-07, "loss": 0.2589, "step": 44251 }, { "epoch": 4.498983326555511, "grad_norm": 0.26320862770080566, "learning_rate": 3.027518609946023e-07, "loss": 0.2629, "step": 44252 }, { "epoch": 4.4990849938999595, "grad_norm": 0.2579134404659271, "learning_rate": 3.026302580057949e-07, "loss": 0.3313, "step": 44253 }, { "epoch": 4.4991866612444085, "grad_norm": 0.25114306807518005, "learning_rate": 3.0250867868121867e-07, "loss": 0.3087, "step": 44254 }, { "epoch": 4.499288328588857, "grad_norm": 0.26855120062828064, "learning_rate": 3.0238712302148863e-07, "loss": 0.2871, "step": 44255 }, { "epoch": 4.499389995933306, "grad_norm": 0.27298057079315186, "learning_rate": 3.0226559102721545e-07, "loss": 0.2756, "step": 44256 }, { "epoch": 4.499491663277755, "grad_norm": 0.3113895058631897, "learning_rate": 3.021440826990124e-07, "loss": 0.3106, "step": 44257 }, { "epoch": 4.499593330622204, "grad_norm": 0.27840450406074524, "learning_rate": 3.020225980374897e-07, "loss": 0.3313, "step": 44258 }, { "epoch": 4.499694997966653, "grad_norm": 0.27000752091407776, "learning_rate": 3.0190113704326175e-07, "loss": 0.2922, "step": 44259 }, { "epoch": 4.499796665311102, "grad_norm": 0.28541386127471924, "learning_rate": 3.0177969971693976e-07, "loss": 0.3319, "step": 44260 }, { "epoch": 4.499898332655551, "grad_norm": 0.29667386412620544, "learning_rate": 3.016582860591344e-07, "loss": 0.2738, "step": 44261 }, { "epoch": 4.5, "grad_norm": 0.2667245864868164, "learning_rate": 3.015368960704584e-07, "loss": 0.2812, "step": 44262 }, { "epoch": 4.500101667344449, "grad_norm": 0.32468125224113464, "learning_rate": 3.014155297515231e-07, "loss": 0.2831, "step": 44263 }, { "epoch": 4.500203334688898, "grad_norm": 0.27538859844207764, "learning_rate": 3.0129418710293956e-07, "loss": 0.3202, "step": 44264 }, { "epoch": 4.500305002033347, "grad_norm": 0.3035098910331726, "learning_rate": 3.0117286812531955e-07, "loss": 0.2516, "step": 44265 }, { "epoch": 4.500406669377796, "grad_norm": 0.2627762258052826, "learning_rate": 3.010515728192742e-07, "loss": 0.2719, "step": 44266 }, { "epoch": 4.500508336722245, "grad_norm": 0.26233184337615967, "learning_rate": 3.009303011854142e-07, "loss": 0.3246, "step": 44267 }, { "epoch": 4.500610004066694, "grad_norm": 0.2752624750137329, "learning_rate": 3.008090532243502e-07, "loss": 0.3013, "step": 44268 }, { "epoch": 4.500711671411143, "grad_norm": 0.26572149991989136, "learning_rate": 3.006878289366938e-07, "loss": 0.2981, "step": 44269 }, { "epoch": 4.5008133387555915, "grad_norm": 0.28746461868286133, "learning_rate": 3.005666283230557e-07, "loss": 0.3129, "step": 44270 }, { "epoch": 4.5009150061000405, "grad_norm": 0.2652265727519989, "learning_rate": 3.0044545138404546e-07, "loss": 0.2886, "step": 44271 }, { "epoch": 4.501016673444489, "grad_norm": 0.2581535577774048, "learning_rate": 3.003242981202753e-07, "loss": 0.3112, "step": 44272 }, { "epoch": 4.501118340788938, "grad_norm": 0.2783917486667633, "learning_rate": 3.0020316853235423e-07, "loss": 0.3206, "step": 44273 }, { "epoch": 4.501220008133387, "grad_norm": 0.3148552179336548, "learning_rate": 3.0008206262089165e-07, "loss": 0.2673, "step": 44274 }, { "epoch": 4.501321675477836, "grad_norm": 0.27332383394241333, "learning_rate": 2.999609803865e-07, "loss": 0.2886, "step": 44275 }, { "epoch": 4.501423342822285, "grad_norm": 0.3061423897743225, "learning_rate": 2.998399218297876e-07, "loss": 0.3117, "step": 44276 }, { "epoch": 4.501525010166734, "grad_norm": 0.26905015110969543, "learning_rate": 2.997188869513651e-07, "loss": 0.3303, "step": 44277 }, { "epoch": 4.501626677511183, "grad_norm": 0.26043352484703064, "learning_rate": 2.995978757518409e-07, "loss": 0.3147, "step": 44278 }, { "epoch": 4.501728344855632, "grad_norm": 0.31797751784324646, "learning_rate": 2.9947688823182664e-07, "loss": 0.2663, "step": 44279 }, { "epoch": 4.501830012200081, "grad_norm": 0.2822737693786621, "learning_rate": 2.9935592439193087e-07, "loss": 0.2721, "step": 44280 }, { "epoch": 4.50193167954453, "grad_norm": 0.2786712050437927, "learning_rate": 2.992349842327619e-07, "loss": 0.3152, "step": 44281 }, { "epoch": 4.502033346888979, "grad_norm": 0.2658999264240265, "learning_rate": 2.9911406775493036e-07, "loss": 0.3307, "step": 44282 }, { "epoch": 4.502135014233428, "grad_norm": 0.27326729893684387, "learning_rate": 2.989931749590458e-07, "loss": 0.3416, "step": 44283 }, { "epoch": 4.502236681577877, "grad_norm": 0.2791406810283661, "learning_rate": 2.9887230584571547e-07, "loss": 0.2873, "step": 44284 }, { "epoch": 4.502338348922326, "grad_norm": 0.28109103441238403, "learning_rate": 2.9875146041555e-07, "loss": 0.2812, "step": 44285 }, { "epoch": 4.5024400162667755, "grad_norm": 0.26583942770957947, "learning_rate": 2.986306386691573e-07, "loss": 0.2999, "step": 44286 }, { "epoch": 4.502541683611224, "grad_norm": 0.2785050570964813, "learning_rate": 2.985098406071463e-07, "loss": 0.3115, "step": 44287 }, { "epoch": 4.502643350955673, "grad_norm": 0.28518351912498474, "learning_rate": 2.983890662301248e-07, "loss": 0.2963, "step": 44288 }, { "epoch": 4.502745018300122, "grad_norm": 0.25798022747039795, "learning_rate": 2.9826831553870286e-07, "loss": 0.2764, "step": 44289 }, { "epoch": 4.502846685644571, "grad_norm": 0.27914461493492126, "learning_rate": 2.981475885334878e-07, "loss": 0.2811, "step": 44290 }, { "epoch": 4.50294835298902, "grad_norm": 0.28635019063949585, "learning_rate": 2.980268852150869e-07, "loss": 0.3229, "step": 44291 }, { "epoch": 4.503050020333469, "grad_norm": 0.25318217277526855, "learning_rate": 2.979062055841103e-07, "loss": 0.311, "step": 44292 }, { "epoch": 4.503151687677918, "grad_norm": 0.28212451934814453, "learning_rate": 2.9778554964116415e-07, "loss": 0.2987, "step": 44293 }, { "epoch": 4.503253355022367, "grad_norm": 0.2861516773700714, "learning_rate": 2.9766491738685676e-07, "loss": 0.2922, "step": 44294 }, { "epoch": 4.503355022366816, "grad_norm": 0.29029878973960876, "learning_rate": 2.975443088217966e-07, "loss": 0.304, "step": 44295 }, { "epoch": 4.503456689711265, "grad_norm": 0.29319897294044495, "learning_rate": 2.97423723946591e-07, "loss": 0.2948, "step": 44296 }, { "epoch": 4.503558357055714, "grad_norm": 0.2791794240474701, "learning_rate": 2.9730316276184724e-07, "loss": 0.3018, "step": 44297 }, { "epoch": 4.503660024400163, "grad_norm": 0.2984755039215088, "learning_rate": 2.9718262526817144e-07, "loss": 0.3251, "step": 44298 }, { "epoch": 4.503761691744612, "grad_norm": 0.266436368227005, "learning_rate": 2.9706211146617313e-07, "loss": 0.3174, "step": 44299 }, { "epoch": 4.503863359089061, "grad_norm": 0.26115158200263977, "learning_rate": 2.96941621356458e-07, "loss": 0.3129, "step": 44300 }, { "epoch": 4.50396502643351, "grad_norm": 0.285081148147583, "learning_rate": 2.968211549396327e-07, "loss": 0.303, "step": 44301 }, { "epoch": 4.5040666937779585, "grad_norm": 0.2812589704990387, "learning_rate": 2.967007122163057e-07, "loss": 0.3141, "step": 44302 }, { "epoch": 4.5041683611224075, "grad_norm": 0.2816146910190582, "learning_rate": 2.9658029318708314e-07, "loss": 0.2792, "step": 44303 }, { "epoch": 4.504270028466856, "grad_norm": 0.29473820328712463, "learning_rate": 2.964598978525701e-07, "loss": 0.3116, "step": 44304 }, { "epoch": 4.504371695811305, "grad_norm": 0.2810267210006714, "learning_rate": 2.96339526213375e-07, "loss": 0.2821, "step": 44305 }, { "epoch": 4.504473363155754, "grad_norm": 0.292508989572525, "learning_rate": 2.9621917827010406e-07, "loss": 0.2902, "step": 44306 }, { "epoch": 4.504575030500203, "grad_norm": 0.2684137225151062, "learning_rate": 2.960988540233628e-07, "loss": 0.293, "step": 44307 }, { "epoch": 4.504676697844652, "grad_norm": 0.2802521288394928, "learning_rate": 2.9597855347375694e-07, "loss": 0.2914, "step": 44308 }, { "epoch": 4.504778365189101, "grad_norm": 0.2709781527519226, "learning_rate": 2.9585827662189423e-07, "loss": 0.2596, "step": 44309 }, { "epoch": 4.50488003253355, "grad_norm": 0.2653203010559082, "learning_rate": 2.957380234683793e-07, "loss": 0.3178, "step": 44310 }, { "epoch": 4.504981699877999, "grad_norm": 0.28215306997299194, "learning_rate": 2.9561779401381774e-07, "loss": 0.2911, "step": 44311 }, { "epoch": 4.505083367222448, "grad_norm": 0.2876589596271515, "learning_rate": 2.954975882588168e-07, "loss": 0.3096, "step": 44312 }, { "epoch": 4.505185034566897, "grad_norm": 0.2871142327785492, "learning_rate": 2.9537740620398105e-07, "loss": 0.3219, "step": 44313 }, { "epoch": 4.505286701911346, "grad_norm": 0.26096391677856445, "learning_rate": 2.9525724784991495e-07, "loss": 0.2852, "step": 44314 }, { "epoch": 4.505388369255795, "grad_norm": 0.271526575088501, "learning_rate": 2.9513711319722526e-07, "loss": 0.2989, "step": 44315 }, { "epoch": 4.505490036600244, "grad_norm": 0.2735435664653778, "learning_rate": 2.9501700224651706e-07, "loss": 0.3263, "step": 44316 }, { "epoch": 4.505591703944693, "grad_norm": 0.27036231756210327, "learning_rate": 2.9489691499839546e-07, "loss": 0.2847, "step": 44317 }, { "epoch": 4.505693371289142, "grad_norm": 0.2833428382873535, "learning_rate": 2.947768514534638e-07, "loss": 0.2621, "step": 44318 }, { "epoch": 4.5057950386335905, "grad_norm": 0.27072271704673767, "learning_rate": 2.946568116123294e-07, "loss": 0.2873, "step": 44319 }, { "epoch": 4.5058967059780395, "grad_norm": 0.2941463887691498, "learning_rate": 2.9453679547559567e-07, "loss": 0.2758, "step": 44320 }, { "epoch": 4.505998373322489, "grad_norm": 0.2739861309528351, "learning_rate": 2.944168030438666e-07, "loss": 0.2985, "step": 44321 }, { "epoch": 4.506100040666938, "grad_norm": 0.26586437225341797, "learning_rate": 2.9429683431774825e-07, "loss": 0.2932, "step": 44322 }, { "epoch": 4.506201708011387, "grad_norm": 0.2798629105091095, "learning_rate": 2.9417688929784415e-07, "loss": 0.3224, "step": 44323 }, { "epoch": 4.506303375355836, "grad_norm": 0.2616485357284546, "learning_rate": 2.940569679847577e-07, "loss": 0.3206, "step": 44324 }, { "epoch": 4.506405042700285, "grad_norm": 0.2881181836128235, "learning_rate": 2.93937070379095e-07, "loss": 0.2904, "step": 44325 }, { "epoch": 4.506506710044734, "grad_norm": 0.2812501788139343, "learning_rate": 2.9381719648145846e-07, "loss": 0.3145, "step": 44326 }, { "epoch": 4.506608377389183, "grad_norm": 0.2899647653102875, "learning_rate": 2.9369734629245305e-07, "loss": 0.288, "step": 44327 }, { "epoch": 4.506710044733632, "grad_norm": 0.2956046760082245, "learning_rate": 2.935775198126811e-07, "loss": 0.3016, "step": 44328 }, { "epoch": 4.506811712078081, "grad_norm": 0.2792094945907593, "learning_rate": 2.934577170427483e-07, "loss": 0.3184, "step": 44329 }, { "epoch": 4.50691337942253, "grad_norm": 0.2682172358036041, "learning_rate": 2.9333793798325685e-07, "loss": 0.2883, "step": 44330 }, { "epoch": 4.507015046766979, "grad_norm": 0.2899288833141327, "learning_rate": 2.9321818263480905e-07, "loss": 0.2935, "step": 44331 }, { "epoch": 4.507116714111428, "grad_norm": 0.2803163528442383, "learning_rate": 2.9309845099801116e-07, "loss": 0.3105, "step": 44332 }, { "epoch": 4.507218381455877, "grad_norm": 0.2835853099822998, "learning_rate": 2.929787430734643e-07, "loss": 0.2726, "step": 44333 }, { "epoch": 4.5073200488003256, "grad_norm": 0.2482018917798996, "learning_rate": 2.9285905886177133e-07, "loss": 0.3268, "step": 44334 }, { "epoch": 4.5074217161447745, "grad_norm": 0.2902107238769531, "learning_rate": 2.9273939836353627e-07, "loss": 0.3089, "step": 44335 }, { "epoch": 4.507523383489223, "grad_norm": 0.2926720678806305, "learning_rate": 2.9261976157936187e-07, "loss": 0.2678, "step": 44336 }, { "epoch": 4.507625050833672, "grad_norm": 0.30809634923934937, "learning_rate": 2.9250014850985053e-07, "loss": 0.272, "step": 44337 }, { "epoch": 4.507726718178121, "grad_norm": 0.2567507326602936, "learning_rate": 2.923805591556039e-07, "loss": 0.2985, "step": 44338 }, { "epoch": 4.50782838552257, "grad_norm": 0.3037054240703583, "learning_rate": 2.9226099351722606e-07, "loss": 0.2945, "step": 44339 }, { "epoch": 4.507930052867019, "grad_norm": 0.30191799998283386, "learning_rate": 2.9214145159531857e-07, "loss": 0.2934, "step": 44340 }, { "epoch": 4.508031720211468, "grad_norm": 0.28485068678855896, "learning_rate": 2.920219333904828e-07, "loss": 0.2867, "step": 44341 }, { "epoch": 4.508133387555917, "grad_norm": 0.2923639714717865, "learning_rate": 2.919024389033226e-07, "loss": 0.3049, "step": 44342 }, { "epoch": 4.508235054900366, "grad_norm": 0.2809261083602905, "learning_rate": 2.9178296813443917e-07, "loss": 0.2903, "step": 44343 }, { "epoch": 4.508336722244815, "grad_norm": 0.27859556674957275, "learning_rate": 2.9166352108443375e-07, "loss": 0.2974, "step": 44344 }, { "epoch": 4.508438389589264, "grad_norm": 0.299434095621109, "learning_rate": 2.9154409775390915e-07, "loss": 0.2888, "step": 44345 }, { "epoch": 4.508540056933713, "grad_norm": 0.2650010287761688, "learning_rate": 2.914246981434665e-07, "loss": 0.2724, "step": 44346 }, { "epoch": 4.508641724278162, "grad_norm": 0.2739608883857727, "learning_rate": 2.913053222537071e-07, "loss": 0.2797, "step": 44347 }, { "epoch": 4.508743391622611, "grad_norm": 0.2895875871181488, "learning_rate": 2.911859700852321e-07, "loss": 0.283, "step": 44348 }, { "epoch": 4.50884505896706, "grad_norm": 0.2733018398284912, "learning_rate": 2.9106664163864375e-07, "loss": 0.3084, "step": 44349 }, { "epoch": 4.508946726311509, "grad_norm": 0.29532769322395325, "learning_rate": 2.909473369145427e-07, "loss": 0.2733, "step": 44350 }, { "epoch": 4.5090483936559576, "grad_norm": 0.2610410153865814, "learning_rate": 2.90828055913529e-07, "loss": 0.2935, "step": 44351 }, { "epoch": 4.5091500610004065, "grad_norm": 0.2858433723449707, "learning_rate": 2.9070879863620615e-07, "loss": 0.318, "step": 44352 }, { "epoch": 4.509251728344855, "grad_norm": 0.3065016567707062, "learning_rate": 2.9058956508317193e-07, "loss": 0.2903, "step": 44353 }, { "epoch": 4.509353395689304, "grad_norm": 0.2860676050186157, "learning_rate": 2.9047035525502806e-07, "loss": 0.2703, "step": 44354 }, { "epoch": 4.509455063033753, "grad_norm": 0.2765948474407196, "learning_rate": 2.9035116915237636e-07, "loss": 0.2975, "step": 44355 }, { "epoch": 4.509556730378202, "grad_norm": 0.2638236880302429, "learning_rate": 2.902320067758163e-07, "loss": 0.3033, "step": 44356 }, { "epoch": 4.509658397722651, "grad_norm": 0.2927236258983612, "learning_rate": 2.9011286812594795e-07, "loss": 0.2923, "step": 44357 }, { "epoch": 4.5097600650671, "grad_norm": 0.3004588782787323, "learning_rate": 2.899937532033709e-07, "loss": 0.3033, "step": 44358 }, { "epoch": 4.509861732411549, "grad_norm": 0.29663947224617004, "learning_rate": 2.898746620086873e-07, "loss": 0.2706, "step": 44359 }, { "epoch": 4.509963399755998, "grad_norm": 0.27973663806915283, "learning_rate": 2.8975559454249514e-07, "loss": 0.3042, "step": 44360 }, { "epoch": 4.510065067100447, "grad_norm": 0.28638216853141785, "learning_rate": 2.89636550805395e-07, "loss": 0.2531, "step": 44361 }, { "epoch": 4.510166734444896, "grad_norm": 0.2884501516819, "learning_rate": 2.895175307979875e-07, "loss": 0.2718, "step": 44362 }, { "epoch": 4.510268401789345, "grad_norm": 0.27795469760894775, "learning_rate": 2.8939853452086995e-07, "loss": 0.2535, "step": 44363 }, { "epoch": 4.510370069133794, "grad_norm": 0.27552559971809387, "learning_rate": 2.892795619746436e-07, "loss": 0.304, "step": 44364 }, { "epoch": 4.510471736478243, "grad_norm": 0.2897492051124573, "learning_rate": 2.891606131599073e-07, "loss": 0.2893, "step": 44365 }, { "epoch": 4.510573403822692, "grad_norm": 0.27900221943855286, "learning_rate": 2.890416880772612e-07, "loss": 0.2813, "step": 44366 }, { "epoch": 4.510675071167141, "grad_norm": 0.2719011902809143, "learning_rate": 2.889227867273031e-07, "loss": 0.2698, "step": 44367 }, { "epoch": 4.5107767385115904, "grad_norm": 0.2917725443840027, "learning_rate": 2.888039091106315e-07, "loss": 0.2757, "step": 44368 }, { "epoch": 4.510878405856039, "grad_norm": 0.27556246519088745, "learning_rate": 2.886850552278475e-07, "loss": 0.2862, "step": 44369 }, { "epoch": 4.510980073200488, "grad_norm": 0.2757965326309204, "learning_rate": 2.885662250795485e-07, "loss": 0.2938, "step": 44370 }, { "epoch": 4.511081740544937, "grad_norm": 0.26025134325027466, "learning_rate": 2.884474186663322e-07, "loss": 0.291, "step": 44371 }, { "epoch": 4.511183407889386, "grad_norm": 0.27288126945495605, "learning_rate": 2.883286359887999e-07, "loss": 0.2963, "step": 44372 }, { "epoch": 4.511285075233835, "grad_norm": 0.28738996386528015, "learning_rate": 2.8820987704754723e-07, "loss": 0.3043, "step": 44373 }, { "epoch": 4.511386742578284, "grad_norm": 0.3039238452911377, "learning_rate": 2.880911418431731e-07, "loss": 0.2902, "step": 44374 }, { "epoch": 4.511488409922733, "grad_norm": 0.3018433153629303, "learning_rate": 2.879724303762771e-07, "loss": 0.2794, "step": 44375 }, { "epoch": 4.511590077267182, "grad_norm": 0.26457762718200684, "learning_rate": 2.8785374264745536e-07, "loss": 0.2872, "step": 44376 }, { "epoch": 4.511691744611631, "grad_norm": 0.28027060627937317, "learning_rate": 2.877350786573074e-07, "loss": 0.2921, "step": 44377 }, { "epoch": 4.51179341195608, "grad_norm": 0.275861531496048, "learning_rate": 2.876164384064295e-07, "loss": 0.2658, "step": 44378 }, { "epoch": 4.511895079300529, "grad_norm": 0.27318915724754333, "learning_rate": 2.8749782189542054e-07, "loss": 0.303, "step": 44379 }, { "epoch": 4.511996746644978, "grad_norm": 0.30616724491119385, "learning_rate": 2.8737922912487783e-07, "loss": 0.3078, "step": 44380 }, { "epoch": 4.512098413989427, "grad_norm": 0.26924294233322144, "learning_rate": 2.872606600953981e-07, "loss": 0.2954, "step": 44381 }, { "epoch": 4.512200081333876, "grad_norm": 0.27229928970336914, "learning_rate": 2.871421148075809e-07, "loss": 0.293, "step": 44382 }, { "epoch": 4.512301748678325, "grad_norm": 0.2817774713039398, "learning_rate": 2.870235932620197e-07, "loss": 0.2694, "step": 44383 }, { "epoch": 4.5124034160227735, "grad_norm": 0.2593672573566437, "learning_rate": 2.8690509545931443e-07, "loss": 0.3187, "step": 44384 }, { "epoch": 4.5125050833672224, "grad_norm": 0.2905151844024658, "learning_rate": 2.86786621400062e-07, "loss": 0.2773, "step": 44385 }, { "epoch": 4.512606750711671, "grad_norm": 0.29811182618141174, "learning_rate": 2.8666817108485787e-07, "loss": 0.3094, "step": 44386 }, { "epoch": 4.51270841805612, "grad_norm": 0.2697964310646057, "learning_rate": 2.865497445142995e-07, "loss": 0.2903, "step": 44387 }, { "epoch": 4.512810085400569, "grad_norm": 0.25020503997802734, "learning_rate": 2.86431341688983e-07, "loss": 0.3001, "step": 44388 }, { "epoch": 4.512911752745018, "grad_norm": 0.2782328128814697, "learning_rate": 2.863129626095057e-07, "loss": 0.3135, "step": 44389 }, { "epoch": 4.513013420089467, "grad_norm": 0.28225523233413696, "learning_rate": 2.8619460727646375e-07, "loss": 0.3031, "step": 44390 }, { "epoch": 4.513115087433916, "grad_norm": 0.2989107370376587, "learning_rate": 2.8607627569045284e-07, "loss": 0.2749, "step": 44391 }, { "epoch": 4.513216754778365, "grad_norm": 0.29033181071281433, "learning_rate": 2.8595796785207077e-07, "loss": 0.3071, "step": 44392 }, { "epoch": 4.513318422122814, "grad_norm": 0.28201112151145935, "learning_rate": 2.858396837619104e-07, "loss": 0.3037, "step": 44393 }, { "epoch": 4.513420089467263, "grad_norm": 0.2792181372642517, "learning_rate": 2.857214234205696e-07, "loss": 0.3148, "step": 44394 }, { "epoch": 4.513521756811712, "grad_norm": 0.2612074613571167, "learning_rate": 2.8560318682864517e-07, "loss": 0.2926, "step": 44395 }, { "epoch": 4.513623424156161, "grad_norm": 0.29284152388572693, "learning_rate": 2.8548497398673046e-07, "loss": 0.342, "step": 44396 }, { "epoch": 4.51372509150061, "grad_norm": 0.2939837872982025, "learning_rate": 2.853667848954228e-07, "loss": 0.298, "step": 44397 }, { "epoch": 4.513826758845059, "grad_norm": 0.291544109582901, "learning_rate": 2.8524861955531725e-07, "loss": 0.2578, "step": 44398 }, { "epoch": 4.513928426189508, "grad_norm": 0.29582807421684265, "learning_rate": 2.8513047796700833e-07, "loss": 0.3093, "step": 44399 }, { "epoch": 4.514030093533957, "grad_norm": 0.26307913661003113, "learning_rate": 2.8501236013109113e-07, "loss": 0.3073, "step": 44400 }, { "epoch": 4.5141317608784055, "grad_norm": 0.25019416213035583, "learning_rate": 2.8489426604816073e-07, "loss": 0.3126, "step": 44401 }, { "epoch": 4.5142334282228544, "grad_norm": 0.27200761437416077, "learning_rate": 2.8477619571881445e-07, "loss": 0.2696, "step": 44402 }, { "epoch": 4.514335095567304, "grad_norm": 0.2927429974079132, "learning_rate": 2.846581491436434e-07, "loss": 0.2981, "step": 44403 }, { "epoch": 4.514436762911753, "grad_norm": 0.27601444721221924, "learning_rate": 2.84540126323245e-07, "loss": 0.3039, "step": 44404 }, { "epoch": 4.514538430256202, "grad_norm": 0.29757311940193176, "learning_rate": 2.8442212725821316e-07, "loss": 0.2793, "step": 44405 }, { "epoch": 4.514640097600651, "grad_norm": 0.2644926607608795, "learning_rate": 2.8430415194914075e-07, "loss": 0.2727, "step": 44406 }, { "epoch": 4.5147417649451, "grad_norm": 0.2903963327407837, "learning_rate": 2.841862003966245e-07, "loss": 0.2642, "step": 44407 }, { "epoch": 4.514843432289549, "grad_norm": 0.2577657997608185, "learning_rate": 2.840682726012578e-07, "loss": 0.2867, "step": 44408 }, { "epoch": 4.514945099633998, "grad_norm": 0.29134419560432434, "learning_rate": 2.839503685636341e-07, "loss": 0.2715, "step": 44409 }, { "epoch": 4.515046766978447, "grad_norm": 0.2929593026638031, "learning_rate": 2.8383248828434684e-07, "loss": 0.2981, "step": 44410 }, { "epoch": 4.515148434322896, "grad_norm": 0.2670951783657074, "learning_rate": 2.837146317639916e-07, "loss": 0.3109, "step": 44411 }, { "epoch": 4.515250101667345, "grad_norm": 0.2792415916919708, "learning_rate": 2.835967990031613e-07, "loss": 0.2609, "step": 44412 }, { "epoch": 4.515351769011794, "grad_norm": 0.2646169662475586, "learning_rate": 2.834789900024493e-07, "loss": 0.2935, "step": 44413 }, { "epoch": 4.515453436356243, "grad_norm": 0.2557654082775116, "learning_rate": 2.8336120476244955e-07, "loss": 0.34, "step": 44414 }, { "epoch": 4.515555103700692, "grad_norm": 0.28275343775749207, "learning_rate": 2.8324344328375496e-07, "loss": 0.3095, "step": 44415 }, { "epoch": 4.5156567710451405, "grad_norm": 0.3141026496887207, "learning_rate": 2.8312570556695897e-07, "loss": 0.2733, "step": 44416 }, { "epoch": 4.5157584383895895, "grad_norm": 0.2920762300491333, "learning_rate": 2.830079916126549e-07, "loss": 0.2767, "step": 44417 }, { "epoch": 4.515860105734038, "grad_norm": 0.2736679017543793, "learning_rate": 2.828903014214357e-07, "loss": 0.301, "step": 44418 }, { "epoch": 4.515961773078487, "grad_norm": 0.26509708166122437, "learning_rate": 2.8277263499389476e-07, "loss": 0.2879, "step": 44419 }, { "epoch": 4.516063440422936, "grad_norm": 0.26186999678611755, "learning_rate": 2.8265499233062323e-07, "loss": 0.2711, "step": 44420 }, { "epoch": 4.516165107767385, "grad_norm": 0.2746143043041229, "learning_rate": 2.825373734322151e-07, "loss": 0.2997, "step": 44421 }, { "epoch": 4.516266775111834, "grad_norm": 0.2897387444972992, "learning_rate": 2.8241977829926325e-07, "loss": 0.3055, "step": 44422 }, { "epoch": 4.516368442456283, "grad_norm": 0.2820929288864136, "learning_rate": 2.823022069323589e-07, "loss": 0.3046, "step": 44423 }, { "epoch": 4.516470109800732, "grad_norm": 0.2853517234325409, "learning_rate": 2.821846593320954e-07, "loss": 0.307, "step": 44424 }, { "epoch": 4.516571777145181, "grad_norm": 0.2746865153312683, "learning_rate": 2.8206713549906394e-07, "loss": 0.2921, "step": 44425 }, { "epoch": 4.51667344448963, "grad_norm": 0.30849841237068176, "learning_rate": 2.819496354338569e-07, "loss": 0.2916, "step": 44426 }, { "epoch": 4.516775111834079, "grad_norm": 0.28764137625694275, "learning_rate": 2.8183215913706706e-07, "loss": 0.3038, "step": 44427 }, { "epoch": 4.516876779178528, "grad_norm": 0.2721639573574066, "learning_rate": 2.817147066092857e-07, "loss": 0.2792, "step": 44428 }, { "epoch": 4.516978446522977, "grad_norm": 0.29213351011276245, "learning_rate": 2.815972778511045e-07, "loss": 0.3195, "step": 44429 }, { "epoch": 4.517080113867426, "grad_norm": 0.2877957820892334, "learning_rate": 2.8147987286311404e-07, "loss": 0.2979, "step": 44430 }, { "epoch": 4.517181781211875, "grad_norm": 0.2790367603302002, "learning_rate": 2.8136249164590737e-07, "loss": 0.2928, "step": 44431 }, { "epoch": 4.517283448556324, "grad_norm": 0.29879888892173767, "learning_rate": 2.8124513420007547e-07, "loss": 0.2759, "step": 44432 }, { "epoch": 4.5173851159007725, "grad_norm": 0.2732221782207489, "learning_rate": 2.8112780052620804e-07, "loss": 0.2842, "step": 44433 }, { "epoch": 4.5174867832452215, "grad_norm": 0.2590135335922241, "learning_rate": 2.810104906248984e-07, "loss": 0.2706, "step": 44434 }, { "epoch": 4.51758845058967, "grad_norm": 0.29074355959892273, "learning_rate": 2.8089320449673606e-07, "loss": 0.3206, "step": 44435 }, { "epoch": 4.517690117934119, "grad_norm": 0.27703315019607544, "learning_rate": 2.807759421423123e-07, "loss": 0.298, "step": 44436 }, { "epoch": 4.517791785278568, "grad_norm": 0.28189587593078613, "learning_rate": 2.8065870356221826e-07, "loss": 0.327, "step": 44437 }, { "epoch": 4.517893452623017, "grad_norm": 0.2965887486934662, "learning_rate": 2.80541488757044e-07, "loss": 0.2924, "step": 44438 }, { "epoch": 4.517995119967466, "grad_norm": 0.26994383335113525, "learning_rate": 2.804242977273802e-07, "loss": 0.288, "step": 44439 }, { "epoch": 4.518096787311915, "grad_norm": 0.2709432542324066, "learning_rate": 2.8030713047381696e-07, "loss": 0.2786, "step": 44440 }, { "epoch": 4.518198454656364, "grad_norm": 0.27984383702278137, "learning_rate": 2.801899869969454e-07, "loss": 0.2969, "step": 44441 }, { "epoch": 4.518300122000813, "grad_norm": 0.26603925228118896, "learning_rate": 2.8007286729735517e-07, "loss": 0.2754, "step": 44442 }, { "epoch": 4.518401789345262, "grad_norm": 0.2755586504936218, "learning_rate": 2.799557713756351e-07, "loss": 0.3032, "step": 44443 }, { "epoch": 4.518503456689711, "grad_norm": 0.2810913622379303, "learning_rate": 2.7983869923237707e-07, "loss": 0.2819, "step": 44444 }, { "epoch": 4.51860512403416, "grad_norm": 0.27032172679901123, "learning_rate": 2.7972165086816994e-07, "loss": 0.3111, "step": 44445 }, { "epoch": 4.518706791378609, "grad_norm": 0.28442031145095825, "learning_rate": 2.7960462628360277e-07, "loss": 0.2703, "step": 44446 }, { "epoch": 4.518808458723058, "grad_norm": 0.26913517713546753, "learning_rate": 2.794876254792667e-07, "loss": 0.2907, "step": 44447 }, { "epoch": 4.518910126067507, "grad_norm": 0.2938646972179413, "learning_rate": 2.793706484557501e-07, "loss": 0.2955, "step": 44448 }, { "epoch": 4.5190117934119565, "grad_norm": 0.27581584453582764, "learning_rate": 2.792536952136421e-07, "loss": 0.343, "step": 44449 }, { "epoch": 4.519113460756405, "grad_norm": 0.28270983695983887, "learning_rate": 2.7913676575353154e-07, "loss": 0.311, "step": 44450 }, { "epoch": 4.519215128100854, "grad_norm": 0.2699142396450043, "learning_rate": 2.790198600760091e-07, "loss": 0.3235, "step": 44451 }, { "epoch": 4.519316795445303, "grad_norm": 0.25151315331459045, "learning_rate": 2.7890297818166267e-07, "loss": 0.3287, "step": 44452 }, { "epoch": 4.519418462789752, "grad_norm": 0.27584609389305115, "learning_rate": 2.7878612007108007e-07, "loss": 0.2689, "step": 44453 }, { "epoch": 4.519520130134201, "grad_norm": 0.26357772946357727, "learning_rate": 2.7866928574485197e-07, "loss": 0.2845, "step": 44454 }, { "epoch": 4.51962179747865, "grad_norm": 0.2717551589012146, "learning_rate": 2.785524752035662e-07, "loss": 0.3238, "step": 44455 }, { "epoch": 4.519723464823099, "grad_norm": 0.28071990609169006, "learning_rate": 2.784356884478101e-07, "loss": 0.3279, "step": 44456 }, { "epoch": 4.519825132167548, "grad_norm": 0.28053712844848633, "learning_rate": 2.7831892547817375e-07, "loss": 0.299, "step": 44457 }, { "epoch": 4.519926799511997, "grad_norm": 0.30285531282424927, "learning_rate": 2.7820218629524496e-07, "loss": 0.2909, "step": 44458 }, { "epoch": 4.520028466856446, "grad_norm": 0.28408387303352356, "learning_rate": 2.7808547089961114e-07, "loss": 0.3069, "step": 44459 }, { "epoch": 4.520130134200895, "grad_norm": 0.3072876036167145, "learning_rate": 2.7796877929186004e-07, "loss": 0.2882, "step": 44460 }, { "epoch": 4.520231801545344, "grad_norm": 0.27519962191581726, "learning_rate": 2.7785211147258075e-07, "loss": 0.2792, "step": 44461 }, { "epoch": 4.520333468889793, "grad_norm": 0.2773798704147339, "learning_rate": 2.777354674423605e-07, "loss": 0.3157, "step": 44462 }, { "epoch": 4.520435136234242, "grad_norm": 0.27957507967948914, "learning_rate": 2.77618847201786e-07, "loss": 0.3098, "step": 44463 }, { "epoch": 4.520536803578691, "grad_norm": 0.26260390877723694, "learning_rate": 2.7750225075144575e-07, "loss": 0.326, "step": 44464 }, { "epoch": 4.5206384709231395, "grad_norm": 0.29060158133506775, "learning_rate": 2.773856780919276e-07, "loss": 0.2748, "step": 44465 }, { "epoch": 4.5207401382675885, "grad_norm": 0.2721037268638611, "learning_rate": 2.772691292238172e-07, "loss": 0.294, "step": 44466 }, { "epoch": 4.520841805612037, "grad_norm": 0.27967238426208496, "learning_rate": 2.7715260414770353e-07, "loss": 0.3139, "step": 44467 }, { "epoch": 4.520943472956486, "grad_norm": 0.2788325548171997, "learning_rate": 2.770361028641722e-07, "loss": 0.266, "step": 44468 }, { "epoch": 4.521045140300935, "grad_norm": 0.28160780668258667, "learning_rate": 2.7691962537381103e-07, "loss": 0.3003, "step": 44469 }, { "epoch": 4.521146807645384, "grad_norm": 0.2906648516654968, "learning_rate": 2.768031716772057e-07, "loss": 0.2805, "step": 44470 }, { "epoch": 4.521248474989833, "grad_norm": 0.28990745544433594, "learning_rate": 2.7668674177494415e-07, "loss": 0.3127, "step": 44471 }, { "epoch": 4.521350142334282, "grad_norm": 0.2529431879520416, "learning_rate": 2.765703356676125e-07, "loss": 0.29, "step": 44472 }, { "epoch": 4.521451809678731, "grad_norm": 0.26733434200286865, "learning_rate": 2.764539533557964e-07, "loss": 0.3327, "step": 44473 }, { "epoch": 4.52155347702318, "grad_norm": 0.28346899151802063, "learning_rate": 2.7633759484008315e-07, "loss": 0.2693, "step": 44474 }, { "epoch": 4.521655144367629, "grad_norm": 0.2690410017967224, "learning_rate": 2.76221260121059e-07, "loss": 0.329, "step": 44475 }, { "epoch": 4.521756811712078, "grad_norm": 0.2747338116168976, "learning_rate": 2.7610494919930844e-07, "loss": 0.291, "step": 44476 }, { "epoch": 4.521858479056527, "grad_norm": 0.2988603711128235, "learning_rate": 2.7598866207541986e-07, "loss": 0.2362, "step": 44477 }, { "epoch": 4.521960146400976, "grad_norm": 0.2707351744174957, "learning_rate": 2.758723987499773e-07, "loss": 0.282, "step": 44478 }, { "epoch": 4.522061813745425, "grad_norm": 0.2630758583545685, "learning_rate": 2.757561592235675e-07, "loss": 0.278, "step": 44479 }, { "epoch": 4.522163481089874, "grad_norm": 0.2911747395992279, "learning_rate": 2.7563994349677435e-07, "loss": 0.2864, "step": 44480 }, { "epoch": 4.522265148434323, "grad_norm": 0.27933168411254883, "learning_rate": 2.755237515701853e-07, "loss": 0.2777, "step": 44481 }, { "epoch": 4.5223668157787715, "grad_norm": 0.26258185505867004, "learning_rate": 2.754075834443848e-07, "loss": 0.2907, "step": 44482 }, { "epoch": 4.5224684831232205, "grad_norm": 0.27274587750434875, "learning_rate": 2.752914391199574e-07, "loss": 0.2718, "step": 44483 }, { "epoch": 4.522570150467669, "grad_norm": 0.2661385238170624, "learning_rate": 2.751753185974898e-07, "loss": 0.2993, "step": 44484 }, { "epoch": 4.522671817812119, "grad_norm": 0.2918631136417389, "learning_rate": 2.7505922187756606e-07, "loss": 0.2909, "step": 44485 }, { "epoch": 4.522773485156568, "grad_norm": 0.2622685730457306, "learning_rate": 2.7494314896077066e-07, "loss": 0.3208, "step": 44486 }, { "epoch": 4.522875152501017, "grad_norm": 0.2911258339881897, "learning_rate": 2.7482709984768987e-07, "loss": 0.2927, "step": 44487 }, { "epoch": 4.522976819845466, "grad_norm": 0.27802714705467224, "learning_rate": 2.7471107453890643e-07, "loss": 0.2805, "step": 44488 }, { "epoch": 4.523078487189915, "grad_norm": 0.29903414845466614, "learning_rate": 2.745950730350061e-07, "loss": 0.3105, "step": 44489 }, { "epoch": 4.523180154534364, "grad_norm": 0.28310590982437134, "learning_rate": 2.7447909533657225e-07, "loss": 0.2825, "step": 44490 }, { "epoch": 4.523281821878813, "grad_norm": 0.25292301177978516, "learning_rate": 2.743631414441905e-07, "loss": 0.329, "step": 44491 }, { "epoch": 4.523383489223262, "grad_norm": 0.2845308482646942, "learning_rate": 2.742472113584438e-07, "loss": 0.2739, "step": 44492 }, { "epoch": 4.523485156567711, "grad_norm": 0.28870508074760437, "learning_rate": 2.741313050799166e-07, "loss": 0.2982, "step": 44493 }, { "epoch": 4.52358682391216, "grad_norm": 0.28123652935028076, "learning_rate": 2.740154226091929e-07, "loss": 0.3155, "step": 44494 }, { "epoch": 4.523688491256609, "grad_norm": 0.28110629320144653, "learning_rate": 2.7389956394685725e-07, "loss": 0.2722, "step": 44495 }, { "epoch": 4.523790158601058, "grad_norm": 0.28691887855529785, "learning_rate": 2.737837290934908e-07, "loss": 0.2954, "step": 44496 }, { "epoch": 4.5238918259455065, "grad_norm": 0.2633965313434601, "learning_rate": 2.736679180496804e-07, "loss": 0.3075, "step": 44497 }, { "epoch": 4.5239934932899555, "grad_norm": 0.2582392394542694, "learning_rate": 2.735521308160072e-07, "loss": 0.2733, "step": 44498 }, { "epoch": 4.524095160634404, "grad_norm": 0.28351935744285583, "learning_rate": 2.734363673930557e-07, "loss": 0.2732, "step": 44499 }, { "epoch": 4.524196827978853, "grad_norm": 0.2761680781841278, "learning_rate": 2.7332062778140765e-07, "loss": 0.3369, "step": 44500 }, { "epoch": 4.524298495323302, "grad_norm": 0.28161171078681946, "learning_rate": 2.7320491198164767e-07, "loss": 0.3063, "step": 44501 }, { "epoch": 4.524400162667751, "grad_norm": 0.26038673520088196, "learning_rate": 2.73089219994358e-07, "loss": 0.2794, "step": 44502 }, { "epoch": 4.5245018300122, "grad_norm": 0.2687034606933594, "learning_rate": 2.7297355182012097e-07, "loss": 0.2789, "step": 44503 }, { "epoch": 4.524603497356649, "grad_norm": 0.2766919434070587, "learning_rate": 2.728579074595217e-07, "loss": 0.2825, "step": 44504 }, { "epoch": 4.524705164701098, "grad_norm": 0.2613452672958374, "learning_rate": 2.727422869131391e-07, "loss": 0.292, "step": 44505 }, { "epoch": 4.524806832045547, "grad_norm": 0.2732692062854767, "learning_rate": 2.726266901815572e-07, "loss": 0.3132, "step": 44506 }, { "epoch": 4.524908499389996, "grad_norm": 0.28491511940956116, "learning_rate": 2.7251111726535994e-07, "loss": 0.2992, "step": 44507 }, { "epoch": 4.525010166734445, "grad_norm": 0.3017917275428772, "learning_rate": 2.72395568165128e-07, "loss": 0.3243, "step": 44508 }, { "epoch": 4.525111834078894, "grad_norm": 0.2716335356235504, "learning_rate": 2.722800428814437e-07, "loss": 0.2973, "step": 44509 }, { "epoch": 4.525213501423343, "grad_norm": 0.2835010886192322, "learning_rate": 2.7216454141488826e-07, "loss": 0.299, "step": 44510 }, { "epoch": 4.525315168767792, "grad_norm": 0.293587327003479, "learning_rate": 2.720490637660456e-07, "loss": 0.2951, "step": 44511 }, { "epoch": 4.525416836112241, "grad_norm": 0.2673529386520386, "learning_rate": 2.719336099354958e-07, "loss": 0.2663, "step": 44512 }, { "epoch": 4.52551850345669, "grad_norm": 0.2923087179660797, "learning_rate": 2.718181799238201e-07, "loss": 0.3163, "step": 44513 }, { "epoch": 4.5256201708011385, "grad_norm": 0.2874971628189087, "learning_rate": 2.7170277373160245e-07, "loss": 0.2822, "step": 44514 }, { "epoch": 4.5257218381455875, "grad_norm": 0.27008330821990967, "learning_rate": 2.715873913594208e-07, "loss": 0.2718, "step": 44515 }, { "epoch": 4.525823505490036, "grad_norm": 0.2799304723739624, "learning_rate": 2.714720328078585e-07, "loss": 0.2859, "step": 44516 }, { "epoch": 4.525925172834485, "grad_norm": 0.2668786942958832, "learning_rate": 2.713566980774973e-07, "loss": 0.2758, "step": 44517 }, { "epoch": 4.526026840178934, "grad_norm": 0.26912808418273926, "learning_rate": 2.7124138716891736e-07, "loss": 0.2634, "step": 44518 }, { "epoch": 4.526128507523383, "grad_norm": 0.26543253660202026, "learning_rate": 2.711261000826992e-07, "loss": 0.2605, "step": 44519 }, { "epoch": 4.526230174867832, "grad_norm": 0.29323047399520874, "learning_rate": 2.7101083681942365e-07, "loss": 0.2941, "step": 44520 }, { "epoch": 4.526331842212281, "grad_norm": 0.27235883474349976, "learning_rate": 2.708955973796723e-07, "loss": 0.3094, "step": 44521 }, { "epoch": 4.52643350955673, "grad_norm": 0.30282798409461975, "learning_rate": 2.707803817640253e-07, "loss": 0.3172, "step": 44522 }, { "epoch": 4.526535176901179, "grad_norm": 0.28497952222824097, "learning_rate": 2.706651899730617e-07, "loss": 0.2697, "step": 44523 }, { "epoch": 4.526636844245628, "grad_norm": 0.29470157623291016, "learning_rate": 2.705500220073648e-07, "loss": 0.2981, "step": 44524 }, { "epoch": 4.526738511590077, "grad_norm": 0.2898086905479431, "learning_rate": 2.70434877867512e-07, "loss": 0.2849, "step": 44525 }, { "epoch": 4.526840178934526, "grad_norm": 0.28865334391593933, "learning_rate": 2.703197575540839e-07, "loss": 0.2873, "step": 44526 }, { "epoch": 4.526941846278975, "grad_norm": 0.277310311794281, "learning_rate": 2.702046610676623e-07, "loss": 0.3266, "step": 44527 }, { "epoch": 4.527043513623424, "grad_norm": 0.29416897892951965, "learning_rate": 2.7008958840882394e-07, "loss": 0.31, "step": 44528 }, { "epoch": 4.527145180967873, "grad_norm": 0.2837236821651459, "learning_rate": 2.699745395781517e-07, "loss": 0.3049, "step": 44529 }, { "epoch": 4.527246848312322, "grad_norm": 0.2780688405036926, "learning_rate": 2.6985951457622227e-07, "loss": 0.3245, "step": 44530 }, { "epoch": 4.527348515656771, "grad_norm": 0.2769095003604889, "learning_rate": 2.697445134036175e-07, "loss": 0.3418, "step": 44531 }, { "epoch": 4.52745018300122, "grad_norm": 0.2785269320011139, "learning_rate": 2.696295360609158e-07, "loss": 0.3014, "step": 44532 }, { "epoch": 4.527551850345669, "grad_norm": 0.2768259048461914, "learning_rate": 2.695145825486961e-07, "loss": 0.3149, "step": 44533 }, { "epoch": 4.527653517690118, "grad_norm": 0.28612861037254333, "learning_rate": 2.6939965286753853e-07, "loss": 0.2689, "step": 44534 }, { "epoch": 4.527755185034567, "grad_norm": 0.2772803008556366, "learning_rate": 2.692847470180204e-07, "loss": 0.3017, "step": 44535 }, { "epoch": 4.527856852379016, "grad_norm": 0.29044637084007263, "learning_rate": 2.691698650007213e-07, "loss": 0.3005, "step": 44536 }, { "epoch": 4.527958519723465, "grad_norm": 0.2878352999687195, "learning_rate": 2.6905500681622174e-07, "loss": 0.2945, "step": 44537 }, { "epoch": 4.528060187067914, "grad_norm": 0.3009844124317169, "learning_rate": 2.689401724650975e-07, "loss": 0.2906, "step": 44538 }, { "epoch": 4.528161854412363, "grad_norm": 0.28106755018234253, "learning_rate": 2.688253619479286e-07, "loss": 0.2919, "step": 44539 }, { "epoch": 4.528263521756812, "grad_norm": 0.26173949241638184, "learning_rate": 2.68710575265293e-07, "loss": 0.3135, "step": 44540 }, { "epoch": 4.528365189101261, "grad_norm": 0.26593339443206787, "learning_rate": 2.685958124177701e-07, "loss": 0.3103, "step": 44541 }, { "epoch": 4.52846685644571, "grad_norm": 0.2705959975719452, "learning_rate": 2.684810734059368e-07, "loss": 0.2987, "step": 44542 }, { "epoch": 4.528568523790159, "grad_norm": 0.28434035181999207, "learning_rate": 2.683663582303703e-07, "loss": 0.3022, "step": 44543 }, { "epoch": 4.528670191134608, "grad_norm": 0.2795378565788269, "learning_rate": 2.6825166689165183e-07, "loss": 0.2954, "step": 44544 }, { "epoch": 4.528771858479057, "grad_norm": 0.27866047620773315, "learning_rate": 2.6813699939035544e-07, "loss": 0.3214, "step": 44545 }, { "epoch": 4.5288735258235056, "grad_norm": 0.2953052818775177, "learning_rate": 2.6802235572706005e-07, "loss": 0.2804, "step": 44546 }, { "epoch": 4.5289751931679545, "grad_norm": 0.27683159708976746, "learning_rate": 2.679077359023452e-07, "loss": 0.3383, "step": 44547 }, { "epoch": 4.529076860512403, "grad_norm": 0.30828768014907837, "learning_rate": 2.6779313991678547e-07, "loss": 0.3092, "step": 44548 }, { "epoch": 4.529178527856852, "grad_norm": 0.29342469573020935, "learning_rate": 2.676785677709598e-07, "loss": 0.3099, "step": 44549 }, { "epoch": 4.529280195201301, "grad_norm": 0.2742895483970642, "learning_rate": 2.67564019465445e-07, "loss": 0.3171, "step": 44550 }, { "epoch": 4.52938186254575, "grad_norm": 0.26130980253219604, "learning_rate": 2.6744949500081717e-07, "loss": 0.2933, "step": 44551 }, { "epoch": 4.529483529890199, "grad_norm": 0.29693177342414856, "learning_rate": 2.673349943776549e-07, "loss": 0.3113, "step": 44552 }, { "epoch": 4.529585197234648, "grad_norm": 0.266828328371048, "learning_rate": 2.6722051759653365e-07, "loss": 0.2778, "step": 44553 }, { "epoch": 4.529686864579097, "grad_norm": 0.26444435119628906, "learning_rate": 2.6710606465803256e-07, "loss": 0.2706, "step": 44554 }, { "epoch": 4.529788531923546, "grad_norm": 0.2748463451862335, "learning_rate": 2.6699163556272447e-07, "loss": 0.2773, "step": 44555 }, { "epoch": 4.529890199267995, "grad_norm": 0.28753411769866943, "learning_rate": 2.668772303111883e-07, "loss": 0.2957, "step": 44556 }, { "epoch": 4.529991866612444, "grad_norm": 0.2823947072029114, "learning_rate": 2.6676284890400083e-07, "loss": 0.2914, "step": 44557 }, { "epoch": 4.530093533956893, "grad_norm": 0.274533748626709, "learning_rate": 2.666484913417361e-07, "loss": 0.2953, "step": 44558 }, { "epoch": 4.530195201301342, "grad_norm": 0.2726277709007263, "learning_rate": 2.665341576249719e-07, "loss": 0.2862, "step": 44559 }, { "epoch": 4.530296868645791, "grad_norm": 0.29074764251708984, "learning_rate": 2.6641984775428395e-07, "loss": 0.2791, "step": 44560 }, { "epoch": 4.53039853599024, "grad_norm": 0.2724508047103882, "learning_rate": 2.663055617302474e-07, "loss": 0.2831, "step": 44561 }, { "epoch": 4.530500203334689, "grad_norm": 0.2932540774345398, "learning_rate": 2.661912995534388e-07, "loss": 0.3006, "step": 44562 }, { "epoch": 4.5306018706791376, "grad_norm": 0.2995694875717163, "learning_rate": 2.66077061224434e-07, "loss": 0.2914, "step": 44563 }, { "epoch": 4.5307035380235865, "grad_norm": 0.2654430568218231, "learning_rate": 2.65962846743808e-07, "loss": 0.3105, "step": 44564 }, { "epoch": 4.530805205368035, "grad_norm": 0.26465311646461487, "learning_rate": 2.658486561121354e-07, "loss": 0.263, "step": 44565 }, { "epoch": 4.530906872712484, "grad_norm": 0.29534873366355896, "learning_rate": 2.6573448932999177e-07, "loss": 0.2933, "step": 44566 }, { "epoch": 4.531008540056934, "grad_norm": 0.2886357605457306, "learning_rate": 2.6562034639795456e-07, "loss": 0.2535, "step": 44567 }, { "epoch": 4.531110207401383, "grad_norm": 0.29119542241096497, "learning_rate": 2.655062273165959e-07, "loss": 0.2789, "step": 44568 }, { "epoch": 4.531211874745832, "grad_norm": 0.30874329805374146, "learning_rate": 2.6539213208649215e-07, "loss": 0.2771, "step": 44569 }, { "epoch": 4.531313542090281, "grad_norm": 0.26923930644989014, "learning_rate": 2.652780607082178e-07, "loss": 0.3056, "step": 44570 }, { "epoch": 4.53141520943473, "grad_norm": 0.29317334294319153, "learning_rate": 2.651640131823463e-07, "loss": 0.2838, "step": 44571 }, { "epoch": 4.531516876779179, "grad_norm": 0.2792234420776367, "learning_rate": 2.650499895094544e-07, "loss": 0.3073, "step": 44572 }, { "epoch": 4.531618544123628, "grad_norm": 0.279789537191391, "learning_rate": 2.649359896901155e-07, "loss": 0.2752, "step": 44573 }, { "epoch": 4.531720211468077, "grad_norm": 0.2847372591495514, "learning_rate": 2.6482201372490415e-07, "loss": 0.2631, "step": 44574 }, { "epoch": 4.531821878812526, "grad_norm": 0.2767566442489624, "learning_rate": 2.6470806161439323e-07, "loss": 0.2991, "step": 44575 }, { "epoch": 4.531923546156975, "grad_norm": 0.28529486060142517, "learning_rate": 2.645941333591573e-07, "loss": 0.3081, "step": 44576 }, { "epoch": 4.532025213501424, "grad_norm": 0.26830849051475525, "learning_rate": 2.644802289597731e-07, "loss": 0.3062, "step": 44577 }, { "epoch": 4.532126880845873, "grad_norm": 0.3059498965740204, "learning_rate": 2.643663484168102e-07, "loss": 0.2967, "step": 44578 }, { "epoch": 4.5322285481903215, "grad_norm": 0.2782011330127716, "learning_rate": 2.642524917308453e-07, "loss": 0.2989, "step": 44579 }, { "epoch": 4.5323302155347704, "grad_norm": 0.2652840316295624, "learning_rate": 2.641386589024508e-07, "loss": 0.3063, "step": 44580 }, { "epoch": 4.532431882879219, "grad_norm": 0.2675994634628296, "learning_rate": 2.640248499321996e-07, "loss": 0.2993, "step": 44581 }, { "epoch": 4.532533550223668, "grad_norm": 0.27045130729675293, "learning_rate": 2.6391106482066665e-07, "loss": 0.3091, "step": 44582 }, { "epoch": 4.532635217568117, "grad_norm": 0.268963485956192, "learning_rate": 2.6379730356842383e-07, "loss": 0.2688, "step": 44583 }, { "epoch": 4.532736884912566, "grad_norm": 0.26855096220970154, "learning_rate": 2.6368356617604506e-07, "loss": 0.2999, "step": 44584 }, { "epoch": 4.532838552257015, "grad_norm": 0.2804924249649048, "learning_rate": 2.635698526441022e-07, "loss": 0.3091, "step": 44585 }, { "epoch": 4.532940219601464, "grad_norm": 0.2731979787349701, "learning_rate": 2.6345616297316966e-07, "loss": 0.2801, "step": 44586 }, { "epoch": 4.533041886945913, "grad_norm": 0.2718392014503479, "learning_rate": 2.6334249716381933e-07, "loss": 0.28, "step": 44587 }, { "epoch": 4.533143554290362, "grad_norm": 0.26695361733436584, "learning_rate": 2.6322885521662347e-07, "loss": 0.2634, "step": 44588 }, { "epoch": 4.533245221634811, "grad_norm": 0.2735106647014618, "learning_rate": 2.631152371321555e-07, "loss": 0.3332, "step": 44589 }, { "epoch": 4.53334688897926, "grad_norm": 0.27393120527267456, "learning_rate": 2.6300164291098715e-07, "loss": 0.3073, "step": 44590 }, { "epoch": 4.533448556323709, "grad_norm": 0.2733212411403656, "learning_rate": 2.6288807255369087e-07, "loss": 0.2916, "step": 44591 }, { "epoch": 4.533550223668158, "grad_norm": 0.2753832936286926, "learning_rate": 2.627745260608383e-07, "loss": 0.2823, "step": 44592 }, { "epoch": 4.533651891012607, "grad_norm": 0.2922345995903015, "learning_rate": 2.626610034330024e-07, "loss": 0.2722, "step": 44593 }, { "epoch": 4.533753558357056, "grad_norm": 0.2999662756919861, "learning_rate": 2.625475046707543e-07, "loss": 0.2989, "step": 44594 }, { "epoch": 4.533855225701505, "grad_norm": 0.2922099232673645, "learning_rate": 2.624340297746658e-07, "loss": 0.2608, "step": 44595 }, { "epoch": 4.5339568930459535, "grad_norm": 0.28557470440864563, "learning_rate": 2.623205787453098e-07, "loss": 0.2896, "step": 44596 }, { "epoch": 4.5340585603904024, "grad_norm": 0.28269466757774353, "learning_rate": 2.6220715158325636e-07, "loss": 0.3025, "step": 44597 }, { "epoch": 4.534160227734851, "grad_norm": 0.2907501757144928, "learning_rate": 2.6209374828907676e-07, "loss": 0.2744, "step": 44598 }, { "epoch": 4.5342618950793, "grad_norm": 0.2713652551174164, "learning_rate": 2.6198036886334435e-07, "loss": 0.3027, "step": 44599 }, { "epoch": 4.534363562423749, "grad_norm": 0.28271397948265076, "learning_rate": 2.618670133066281e-07, "loss": 0.2995, "step": 44600 }, { "epoch": 4.534465229768198, "grad_norm": 0.2777874171733856, "learning_rate": 2.6175368161950045e-07, "loss": 0.2749, "step": 44601 }, { "epoch": 4.534566897112647, "grad_norm": 0.2703293561935425, "learning_rate": 2.6164037380253024e-07, "loss": 0.2759, "step": 44602 }, { "epoch": 4.534668564457096, "grad_norm": 0.29765254259109497, "learning_rate": 2.615270898562916e-07, "loss": 0.2905, "step": 44603 }, { "epoch": 4.534770231801545, "grad_norm": 0.2629893720149994, "learning_rate": 2.614138297813529e-07, "loss": 0.2797, "step": 44604 }, { "epoch": 4.534871899145994, "grad_norm": 0.29850319027900696, "learning_rate": 2.613005935782842e-07, "loss": 0.3109, "step": 44605 }, { "epoch": 4.534973566490443, "grad_norm": 0.27047425508499146, "learning_rate": 2.611873812476584e-07, "loss": 0.294, "step": 44606 }, { "epoch": 4.535075233834892, "grad_norm": 0.2784278392791748, "learning_rate": 2.61074192790044e-07, "loss": 0.2734, "step": 44607 }, { "epoch": 4.535176901179341, "grad_norm": 0.29055270552635193, "learning_rate": 2.60961028206011e-07, "loss": 0.2662, "step": 44608 }, { "epoch": 4.53527856852379, "grad_norm": 0.29534900188446045, "learning_rate": 2.608478874961312e-07, "loss": 0.3016, "step": 44609 }, { "epoch": 4.535380235868239, "grad_norm": 0.26411712169647217, "learning_rate": 2.607347706609731e-07, "loss": 0.3241, "step": 44610 }, { "epoch": 4.535481903212688, "grad_norm": 0.2773022949695587, "learning_rate": 2.6062167770110723e-07, "loss": 0.2996, "step": 44611 }, { "epoch": 4.535583570557137, "grad_norm": 0.30192920565605164, "learning_rate": 2.605086086171027e-07, "loss": 0.2579, "step": 44612 }, { "epoch": 4.535685237901586, "grad_norm": 0.2815212607383728, "learning_rate": 2.603955634095301e-07, "loss": 0.2601, "step": 44613 }, { "epoch": 4.535786905246035, "grad_norm": 0.2896420657634735, "learning_rate": 2.602825420789579e-07, "loss": 0.287, "step": 44614 }, { "epoch": 4.535888572590484, "grad_norm": 0.297121524810791, "learning_rate": 2.6016954462595557e-07, "loss": 0.3013, "step": 44615 }, { "epoch": 4.535990239934933, "grad_norm": 0.3051699697971344, "learning_rate": 2.6005657105109326e-07, "loss": 0.2925, "step": 44616 }, { "epoch": 4.536091907279382, "grad_norm": 0.2835882604122162, "learning_rate": 2.5994362135494e-07, "loss": 0.2954, "step": 44617 }, { "epoch": 4.536193574623831, "grad_norm": 0.2786763906478882, "learning_rate": 2.5983069553806307e-07, "loss": 0.3363, "step": 44618 }, { "epoch": 4.53629524196828, "grad_norm": 0.30462637543678284, "learning_rate": 2.5971779360103366e-07, "loss": 0.267, "step": 44619 }, { "epoch": 4.536396909312729, "grad_norm": 0.30360478162765503, "learning_rate": 2.596049155444197e-07, "loss": 0.3091, "step": 44620 }, { "epoch": 4.536498576657178, "grad_norm": 0.2708733081817627, "learning_rate": 2.594920613687896e-07, "loss": 0.3168, "step": 44621 }, { "epoch": 4.536600244001627, "grad_norm": 0.29041266441345215, "learning_rate": 2.5937923107471116e-07, "loss": 0.2845, "step": 44622 }, { "epoch": 4.536701911346076, "grad_norm": 0.2675139009952545, "learning_rate": 2.592664246627541e-07, "loss": 0.3285, "step": 44623 }, { "epoch": 4.536803578690525, "grad_norm": 0.26690545678138733, "learning_rate": 2.591536421334867e-07, "loss": 0.2962, "step": 44624 }, { "epoch": 4.536905246034974, "grad_norm": 0.26032713055610657, "learning_rate": 2.5904088348747524e-07, "loss": 0.3446, "step": 44625 }, { "epoch": 4.537006913379423, "grad_norm": 0.2780899405479431, "learning_rate": 2.589281487252904e-07, "loss": 0.3159, "step": 44626 }, { "epoch": 4.537108580723872, "grad_norm": 0.2857972979545593, "learning_rate": 2.5881543784749887e-07, "loss": 0.3362, "step": 44627 }, { "epoch": 4.5372102480683205, "grad_norm": 0.2909029424190521, "learning_rate": 2.58702750854668e-07, "loss": 0.3116, "step": 44628 }, { "epoch": 4.5373119154127695, "grad_norm": 0.30088984966278076, "learning_rate": 2.5859008774736627e-07, "loss": 0.2639, "step": 44629 }, { "epoch": 4.537413582757218, "grad_norm": 0.2659466564655304, "learning_rate": 2.5847744852616096e-07, "loss": 0.3179, "step": 44630 }, { "epoch": 4.537515250101667, "grad_norm": 0.2415352612733841, "learning_rate": 2.5836483319162e-07, "loss": 0.294, "step": 44631 }, { "epoch": 4.537616917446116, "grad_norm": 0.2895667254924774, "learning_rate": 2.58252241744309e-07, "loss": 0.2906, "step": 44632 }, { "epoch": 4.537718584790565, "grad_norm": 0.29560187458992004, "learning_rate": 2.581396741847969e-07, "loss": 0.2633, "step": 44633 }, { "epoch": 4.537820252135014, "grad_norm": 0.27311983704566956, "learning_rate": 2.5802713051365116e-07, "loss": 0.327, "step": 44634 }, { "epoch": 4.537921919479463, "grad_norm": 0.27582496404647827, "learning_rate": 2.579146107314362e-07, "loss": 0.2917, "step": 44635 }, { "epoch": 4.538023586823912, "grad_norm": 0.2989014685153961, "learning_rate": 2.5780211483872163e-07, "loss": 0.3081, "step": 44636 }, { "epoch": 4.538125254168361, "grad_norm": 0.2958161234855652, "learning_rate": 2.5768964283607313e-07, "loss": 0.2987, "step": 44637 }, { "epoch": 4.53822692151281, "grad_norm": 0.2755722403526306, "learning_rate": 2.5757719472405683e-07, "loss": 0.3387, "step": 44638 }, { "epoch": 4.538328588857259, "grad_norm": 0.25164592266082764, "learning_rate": 2.5746477050324014e-07, "loss": 0.3271, "step": 44639 }, { "epoch": 4.538430256201708, "grad_norm": 0.2856650650501251, "learning_rate": 2.573523701741887e-07, "loss": 0.3043, "step": 44640 }, { "epoch": 4.538531923546157, "grad_norm": 0.28587818145751953, "learning_rate": 2.572399937374692e-07, "loss": 0.2537, "step": 44641 }, { "epoch": 4.538633590890606, "grad_norm": 0.28233981132507324, "learning_rate": 2.5712764119364687e-07, "loss": 0.2767, "step": 44642 }, { "epoch": 4.538735258235055, "grad_norm": 0.266364187002182, "learning_rate": 2.5701531254328894e-07, "loss": 0.3041, "step": 44643 }, { "epoch": 4.538836925579504, "grad_norm": 0.27672049403190613, "learning_rate": 2.569030077869611e-07, "loss": 0.2941, "step": 44644 }, { "epoch": 4.5389385929239525, "grad_norm": 0.29127439856529236, "learning_rate": 2.567907269252279e-07, "loss": 0.2892, "step": 44645 }, { "epoch": 4.5390402602684015, "grad_norm": 0.26101475954055786, "learning_rate": 2.5667846995865664e-07, "loss": 0.3187, "step": 44646 }, { "epoch": 4.53914192761285, "grad_norm": 0.2994513213634491, "learning_rate": 2.565662368878119e-07, "loss": 0.2924, "step": 44647 }, { "epoch": 4.539243594957299, "grad_norm": 0.2847822606563568, "learning_rate": 2.564540277132588e-07, "loss": 0.3266, "step": 44648 }, { "epoch": 4.539345262301749, "grad_norm": 0.2715073525905609, "learning_rate": 2.563418424355635e-07, "loss": 0.2713, "step": 44649 }, { "epoch": 4.539446929646198, "grad_norm": 0.2782636284828186, "learning_rate": 2.5622968105529057e-07, "loss": 0.2643, "step": 44650 }, { "epoch": 4.539548596990647, "grad_norm": 0.27745968103408813, "learning_rate": 2.561175435730057e-07, "loss": 0.3026, "step": 44651 }, { "epoch": 4.539650264335096, "grad_norm": 0.2758822739124298, "learning_rate": 2.5600542998927224e-07, "loss": 0.3144, "step": 44652 }, { "epoch": 4.539751931679545, "grad_norm": 0.2797732949256897, "learning_rate": 2.558933403046565e-07, "loss": 0.2887, "step": 44653 }, { "epoch": 4.539853599023994, "grad_norm": 0.2918427288532257, "learning_rate": 2.557812745197236e-07, "loss": 0.2887, "step": 44654 }, { "epoch": 4.539955266368443, "grad_norm": 0.2745610475540161, "learning_rate": 2.556692326350357e-07, "loss": 0.2768, "step": 44655 }, { "epoch": 4.540056933712892, "grad_norm": 0.25624024868011475, "learning_rate": 2.555572146511609e-07, "loss": 0.292, "step": 44656 }, { "epoch": 4.540158601057341, "grad_norm": 0.2813558280467987, "learning_rate": 2.554452205686597e-07, "loss": 0.3092, "step": 44657 }, { "epoch": 4.54026026840179, "grad_norm": 0.2744315564632416, "learning_rate": 2.553332503880979e-07, "loss": 0.3109, "step": 44658 }, { "epoch": 4.540361935746239, "grad_norm": 0.29332756996154785, "learning_rate": 2.5522130411004043e-07, "loss": 0.321, "step": 44659 }, { "epoch": 4.5404636030906875, "grad_norm": 0.3027520179748535, "learning_rate": 2.551093817350503e-07, "loss": 0.2632, "step": 44660 }, { "epoch": 4.5405652704351365, "grad_norm": 0.29123786091804504, "learning_rate": 2.54997483263692e-07, "loss": 0.2904, "step": 44661 }, { "epoch": 4.540666937779585, "grad_norm": 0.3152146637439728, "learning_rate": 2.548856086965279e-07, "loss": 0.285, "step": 44662 }, { "epoch": 4.540768605124034, "grad_norm": 0.2973146438598633, "learning_rate": 2.5477375803412306e-07, "loss": 0.2632, "step": 44663 }, { "epoch": 4.540870272468483, "grad_norm": 0.2899914085865021, "learning_rate": 2.5466193127704044e-07, "loss": 0.2854, "step": 44664 }, { "epoch": 4.540971939812932, "grad_norm": 0.2739824950695038, "learning_rate": 2.545501284258428e-07, "loss": 0.3116, "step": 44665 }, { "epoch": 4.541073607157381, "grad_norm": 0.25960779190063477, "learning_rate": 2.544383494810948e-07, "loss": 0.2906, "step": 44666 }, { "epoch": 4.54117527450183, "grad_norm": 0.34277525544166565, "learning_rate": 2.543265944433576e-07, "loss": 0.3083, "step": 44667 }, { "epoch": 4.541276941846279, "grad_norm": 0.2781631052494049, "learning_rate": 2.542148633131958e-07, "loss": 0.2847, "step": 44668 }, { "epoch": 4.541378609190728, "grad_norm": 0.25887900590896606, "learning_rate": 2.5410315609117165e-07, "loss": 0.3251, "step": 44669 }, { "epoch": 4.541480276535177, "grad_norm": 0.2707335650920868, "learning_rate": 2.539914727778486e-07, "loss": 0.3027, "step": 44670 }, { "epoch": 4.541581943879626, "grad_norm": 0.26231512427330017, "learning_rate": 2.5387981337378797e-07, "loss": 0.2971, "step": 44671 }, { "epoch": 4.541683611224075, "grad_norm": 0.28495416045188904, "learning_rate": 2.537681778795531e-07, "loss": 0.2844, "step": 44672 }, { "epoch": 4.541785278568524, "grad_norm": 0.26925504207611084, "learning_rate": 2.5365656629570634e-07, "loss": 0.2921, "step": 44673 }, { "epoch": 4.541886945912973, "grad_norm": 0.26761218905448914, "learning_rate": 2.5354497862281055e-07, "loss": 0.274, "step": 44674 }, { "epoch": 4.541988613257422, "grad_norm": 0.24614883959293365, "learning_rate": 2.534334148614259e-07, "loss": 0.2874, "step": 44675 }, { "epoch": 4.542090280601871, "grad_norm": 0.27625492215156555, "learning_rate": 2.5332187501211746e-07, "loss": 0.2899, "step": 44676 }, { "epoch": 4.5421919479463195, "grad_norm": 0.2784021198749542, "learning_rate": 2.5321035907544364e-07, "loss": 0.2992, "step": 44677 }, { "epoch": 4.5422936152907685, "grad_norm": 0.27183130383491516, "learning_rate": 2.530988670519685e-07, "loss": 0.2752, "step": 44678 }, { "epoch": 4.542395282635217, "grad_norm": 0.28584644198417664, "learning_rate": 2.529873989422543e-07, "loss": 0.3296, "step": 44679 }, { "epoch": 4.542496949979666, "grad_norm": 0.2694264054298401, "learning_rate": 2.5287595474686067e-07, "loss": 0.291, "step": 44680 }, { "epoch": 4.542598617324115, "grad_norm": 0.2777998745441437, "learning_rate": 2.5276453446634985e-07, "loss": 0.3072, "step": 44681 }, { "epoch": 4.542700284668564, "grad_norm": 0.2995753884315491, "learning_rate": 2.526531381012831e-07, "loss": 0.2754, "step": 44682 }, { "epoch": 4.542801952013013, "grad_norm": 0.275545209646225, "learning_rate": 2.525417656522222e-07, "loss": 0.2696, "step": 44683 }, { "epoch": 4.542903619357462, "grad_norm": 0.2824799418449402, "learning_rate": 2.524304171197278e-07, "loss": 0.3156, "step": 44684 }, { "epoch": 4.543005286701911, "grad_norm": 0.29670190811157227, "learning_rate": 2.523190925043595e-07, "loss": 0.2971, "step": 44685 }, { "epoch": 4.54310695404636, "grad_norm": 0.28415319323539734, "learning_rate": 2.522077918066812e-07, "loss": 0.2901, "step": 44686 }, { "epoch": 4.543208621390809, "grad_norm": 0.29588553309440613, "learning_rate": 2.5209651502725085e-07, "loss": 0.2968, "step": 44687 }, { "epoch": 4.543310288735258, "grad_norm": 0.27302777767181396, "learning_rate": 2.519852621666291e-07, "loss": 0.2796, "step": 44688 }, { "epoch": 4.543411956079707, "grad_norm": 0.2857396900653839, "learning_rate": 2.518740332253794e-07, "loss": 0.2877, "step": 44689 }, { "epoch": 4.543513623424156, "grad_norm": 0.2572711706161499, "learning_rate": 2.517628282040585e-07, "loss": 0.3138, "step": 44690 }, { "epoch": 4.543615290768605, "grad_norm": 0.2789636254310608, "learning_rate": 2.5165164710322875e-07, "loss": 0.3031, "step": 44691 }, { "epoch": 4.543716958113054, "grad_norm": 0.2874200940132141, "learning_rate": 2.5154048992344857e-07, "loss": 0.301, "step": 44692 }, { "epoch": 4.543818625457503, "grad_norm": 0.2775205075740814, "learning_rate": 2.514293566652798e-07, "loss": 0.2828, "step": 44693 }, { "epoch": 4.5439202928019515, "grad_norm": 0.258333683013916, "learning_rate": 2.513182473292819e-07, "loss": 0.2932, "step": 44694 }, { "epoch": 4.544021960146401, "grad_norm": 0.26285532116889954, "learning_rate": 2.5120716191601336e-07, "loss": 0.2953, "step": 44695 }, { "epoch": 4.54412362749085, "grad_norm": 0.2721068561077118, "learning_rate": 2.510961004260359e-07, "loss": 0.3084, "step": 44696 }, { "epoch": 4.544225294835299, "grad_norm": 0.2661597430706024, "learning_rate": 2.509850628599064e-07, "loss": 0.3318, "step": 44697 }, { "epoch": 4.544326962179748, "grad_norm": 0.27653127908706665, "learning_rate": 2.5087404921818546e-07, "loss": 0.2921, "step": 44698 }, { "epoch": 4.544428629524197, "grad_norm": 0.2720634341239929, "learning_rate": 2.5076305950143433e-07, "loss": 0.3183, "step": 44699 }, { "epoch": 4.544530296868646, "grad_norm": 0.27098819613456726, "learning_rate": 2.506520937102086e-07, "loss": 0.2969, "step": 44700 }, { "epoch": 4.544631964213095, "grad_norm": 0.2779742181301117, "learning_rate": 2.5054115184506954e-07, "loss": 0.3019, "step": 44701 }, { "epoch": 4.544733631557544, "grad_norm": 0.2814854383468628, "learning_rate": 2.504302339065756e-07, "loss": 0.2952, "step": 44702 }, { "epoch": 4.544835298901993, "grad_norm": 0.278293251991272, "learning_rate": 2.503193398952852e-07, "loss": 0.3279, "step": 44703 }, { "epoch": 4.544936966246442, "grad_norm": 0.276813268661499, "learning_rate": 2.502084698117574e-07, "loss": 0.3147, "step": 44704 }, { "epoch": 4.545038633590891, "grad_norm": 0.2899412214756012, "learning_rate": 2.5009762365655054e-07, "loss": 0.282, "step": 44705 }, { "epoch": 4.54514030093534, "grad_norm": 0.2792489528656006, "learning_rate": 2.4998680143022367e-07, "loss": 0.2834, "step": 44706 }, { "epoch": 4.545241968279789, "grad_norm": 0.2809337377548218, "learning_rate": 2.498760031333336e-07, "loss": 0.2855, "step": 44707 }, { "epoch": 4.545343635624238, "grad_norm": 0.272872656583786, "learning_rate": 2.497652287664398e-07, "loss": 0.3312, "step": 44708 }, { "epoch": 4.5454453029686865, "grad_norm": 0.2662515640258789, "learning_rate": 2.496544783301008e-07, "loss": 0.2678, "step": 44709 }, { "epoch": 4.5455469703131355, "grad_norm": 0.2762616276741028, "learning_rate": 2.4954375182487223e-07, "loss": 0.3005, "step": 44710 }, { "epoch": 4.545648637657584, "grad_norm": 0.2785162925720215, "learning_rate": 2.4943304925131417e-07, "loss": 0.3225, "step": 44711 }, { "epoch": 4.545750305002033, "grad_norm": 0.31398603320121765, "learning_rate": 2.49322370609984e-07, "loss": 0.2872, "step": 44712 }, { "epoch": 4.545851972346482, "grad_norm": 0.2713715732097626, "learning_rate": 2.492117159014373e-07, "loss": 0.273, "step": 44713 }, { "epoch": 4.545953639690931, "grad_norm": 0.2732536494731903, "learning_rate": 2.491010851262343e-07, "loss": 0.2839, "step": 44714 }, { "epoch": 4.54605530703538, "grad_norm": 0.2728447914123535, "learning_rate": 2.489904782849306e-07, "loss": 0.3124, "step": 44715 }, { "epoch": 4.546156974379829, "grad_norm": 0.2744089663028717, "learning_rate": 2.488798953780847e-07, "loss": 0.283, "step": 44716 }, { "epoch": 4.546258641724278, "grad_norm": 0.3085152506828308, "learning_rate": 2.4876933640625103e-07, "loss": 0.3017, "step": 44717 }, { "epoch": 4.546360309068727, "grad_norm": 0.2769569158554077, "learning_rate": 2.4865880136998923e-07, "loss": 0.2715, "step": 44718 }, { "epoch": 4.546461976413176, "grad_norm": 0.2754979729652405, "learning_rate": 2.4854829026985604e-07, "loss": 0.2972, "step": 44719 }, { "epoch": 4.546563643757625, "grad_norm": 0.27307507395744324, "learning_rate": 2.4843780310640665e-07, "loss": 0.3022, "step": 44720 }, { "epoch": 4.546665311102074, "grad_norm": 0.2707323133945465, "learning_rate": 2.483273398801989e-07, "loss": 0.3041, "step": 44721 }, { "epoch": 4.546766978446523, "grad_norm": 0.2868892550468445, "learning_rate": 2.4821690059178895e-07, "loss": 0.2981, "step": 44722 }, { "epoch": 4.546868645790972, "grad_norm": 0.2843151390552521, "learning_rate": 2.4810648524173195e-07, "loss": 0.2929, "step": 44723 }, { "epoch": 4.546970313135421, "grad_norm": 0.308862566947937, "learning_rate": 2.4799609383058644e-07, "loss": 0.2826, "step": 44724 }, { "epoch": 4.54707198047987, "grad_norm": 0.2690301835536957, "learning_rate": 2.4788572635890685e-07, "loss": 0.2871, "step": 44725 }, { "epoch": 4.5471736478243185, "grad_norm": 0.2622494399547577, "learning_rate": 2.4777538282725e-07, "loss": 0.2966, "step": 44726 }, { "epoch": 4.5472753151687675, "grad_norm": 0.27930355072021484, "learning_rate": 2.4766506323617103e-07, "loss": 0.3052, "step": 44727 }, { "epoch": 4.547376982513216, "grad_norm": 0.26194092631340027, "learning_rate": 2.475547675862255e-07, "loss": 0.2861, "step": 44728 }, { "epoch": 4.547478649857665, "grad_norm": 0.28459739685058594, "learning_rate": 2.4744449587797146e-07, "loss": 0.2929, "step": 44729 }, { "epoch": 4.547580317202114, "grad_norm": 0.2600157856941223, "learning_rate": 2.473342481119617e-07, "loss": 0.2998, "step": 44730 }, { "epoch": 4.547681984546564, "grad_norm": 0.29133540391921997, "learning_rate": 2.4722402428875305e-07, "loss": 0.2805, "step": 44731 }, { "epoch": 4.547783651891013, "grad_norm": 0.27019721269607544, "learning_rate": 2.4711382440889996e-07, "loss": 0.3279, "step": 44732 }, { "epoch": 4.547885319235462, "grad_norm": 0.2958122193813324, "learning_rate": 2.4700364847295765e-07, "loss": 0.2458, "step": 44733 }, { "epoch": 4.547986986579911, "grad_norm": 0.27381157875061035, "learning_rate": 2.4689349648148173e-07, "loss": 0.306, "step": 44734 }, { "epoch": 4.54808865392436, "grad_norm": 0.2766050696372986, "learning_rate": 2.4678336843502737e-07, "loss": 0.2648, "step": 44735 }, { "epoch": 4.548190321268809, "grad_norm": 0.2808291018009186, "learning_rate": 2.466732643341485e-07, "loss": 0.3172, "step": 44736 }, { "epoch": 4.548291988613258, "grad_norm": 0.2959883511066437, "learning_rate": 2.4656318417939916e-07, "loss": 0.3035, "step": 44737 }, { "epoch": 4.548393655957707, "grad_norm": 0.2749600410461426, "learning_rate": 2.4645312797133614e-07, "loss": 0.2951, "step": 44738 }, { "epoch": 4.548495323302156, "grad_norm": 0.29109978675842285, "learning_rate": 2.4634309571051286e-07, "loss": 0.2765, "step": 44739 }, { "epoch": 4.548596990646605, "grad_norm": 0.2857304513454437, "learning_rate": 2.462330873974822e-07, "loss": 0.3062, "step": 44740 }, { "epoch": 4.5486986579910536, "grad_norm": 0.26616793870925903, "learning_rate": 2.461231030328004e-07, "loss": 0.2789, "step": 44741 }, { "epoch": 4.5488003253355025, "grad_norm": 0.2706531286239624, "learning_rate": 2.460131426170209e-07, "loss": 0.3099, "step": 44742 }, { "epoch": 4.548901992679951, "grad_norm": 0.2807246148586273, "learning_rate": 2.459032061506966e-07, "loss": 0.3151, "step": 44743 }, { "epoch": 4.5490036600244, "grad_norm": 0.31695348024368286, "learning_rate": 2.457932936343832e-07, "loss": 0.2763, "step": 44744 }, { "epoch": 4.549105327368849, "grad_norm": 0.29572242498397827, "learning_rate": 2.45683405068633e-07, "loss": 0.2845, "step": 44745 }, { "epoch": 4.549206994713298, "grad_norm": 0.27360665798187256, "learning_rate": 2.4557354045399994e-07, "loss": 0.3045, "step": 44746 }, { "epoch": 4.549308662057747, "grad_norm": 0.2846059203147888, "learning_rate": 2.4546369979103703e-07, "loss": 0.2761, "step": 44747 }, { "epoch": 4.549410329402196, "grad_norm": 0.28179123997688293, "learning_rate": 2.4535388308029927e-07, "loss": 0.3224, "step": 44748 }, { "epoch": 4.549511996746645, "grad_norm": 0.3114417493343353, "learning_rate": 2.4524409032233797e-07, "loss": 0.3052, "step": 44749 }, { "epoch": 4.549613664091094, "grad_norm": 0.30241426825523376, "learning_rate": 2.451343215177071e-07, "loss": 0.2934, "step": 44750 }, { "epoch": 4.549715331435543, "grad_norm": 0.26772385835647583, "learning_rate": 2.4502457666696013e-07, "loss": 0.2881, "step": 44751 }, { "epoch": 4.549816998779992, "grad_norm": 0.2687259614467621, "learning_rate": 2.4491485577064876e-07, "loss": 0.3264, "step": 44752 }, { "epoch": 4.549918666124441, "grad_norm": 0.27344822883605957, "learning_rate": 2.4480515882932656e-07, "loss": 0.3026, "step": 44753 }, { "epoch": 4.55002033346889, "grad_norm": 0.3099937438964844, "learning_rate": 2.4469548584354576e-07, "loss": 0.2633, "step": 44754 }, { "epoch": 4.550122000813339, "grad_norm": 0.2817653715610504, "learning_rate": 2.4458583681385983e-07, "loss": 0.2885, "step": 44755 }, { "epoch": 4.550223668157788, "grad_norm": 0.2835463881492615, "learning_rate": 2.4447621174082004e-07, "loss": 0.2746, "step": 44756 }, { "epoch": 4.550325335502237, "grad_norm": 0.2789538502693176, "learning_rate": 2.443666106249781e-07, "loss": 0.328, "step": 44757 }, { "epoch": 4.5504270028466856, "grad_norm": 0.27540045976638794, "learning_rate": 2.4425703346688814e-07, "loss": 0.3025, "step": 44758 }, { "epoch": 4.5505286701911345, "grad_norm": 0.2857937216758728, "learning_rate": 2.4414748026710124e-07, "loss": 0.3335, "step": 44759 }, { "epoch": 4.550630337535583, "grad_norm": 0.29049941897392273, "learning_rate": 2.440379510261681e-07, "loss": 0.2865, "step": 44760 }, { "epoch": 4.550732004880032, "grad_norm": 0.2933270335197449, "learning_rate": 2.4392844574464224e-07, "loss": 0.2615, "step": 44761 }, { "epoch": 4.550833672224481, "grad_norm": 0.26465630531311035, "learning_rate": 2.438189644230748e-07, "loss": 0.2872, "step": 44762 }, { "epoch": 4.55093533956893, "grad_norm": 0.27121564745903015, "learning_rate": 2.437095070620166e-07, "loss": 0.2858, "step": 44763 }, { "epoch": 4.551037006913379, "grad_norm": 0.26489129662513733, "learning_rate": 2.4360007366202034e-07, "loss": 0.293, "step": 44764 }, { "epoch": 4.551138674257828, "grad_norm": 0.3008899986743927, "learning_rate": 2.434906642236362e-07, "loss": 0.288, "step": 44765 }, { "epoch": 4.551240341602277, "grad_norm": 0.2941538989543915, "learning_rate": 2.4338127874741656e-07, "loss": 0.2888, "step": 44766 }, { "epoch": 4.551342008946726, "grad_norm": 0.2734515964984894, "learning_rate": 2.432719172339104e-07, "loss": 0.2823, "step": 44767 }, { "epoch": 4.551443676291175, "grad_norm": 0.2766127288341522, "learning_rate": 2.431625796836706e-07, "loss": 0.2834, "step": 44768 }, { "epoch": 4.551545343635624, "grad_norm": 0.27561745047569275, "learning_rate": 2.430532660972473e-07, "loss": 0.3187, "step": 44769 }, { "epoch": 4.551647010980073, "grad_norm": 0.28613942861557007, "learning_rate": 2.429439764751906e-07, "loss": 0.2875, "step": 44770 }, { "epoch": 4.551748678324522, "grad_norm": 0.28763923048973083, "learning_rate": 2.428347108180529e-07, "loss": 0.302, "step": 44771 }, { "epoch": 4.551850345668971, "grad_norm": 0.28672587871551514, "learning_rate": 2.4272546912638306e-07, "loss": 0.3054, "step": 44772 }, { "epoch": 4.55195201301342, "grad_norm": 0.2794814705848694, "learning_rate": 2.4261625140073186e-07, "loss": 0.2892, "step": 44773 }, { "epoch": 4.552053680357869, "grad_norm": 0.2831849753856659, "learning_rate": 2.425070576416483e-07, "loss": 0.2899, "step": 44774 }, { "epoch": 4.552155347702318, "grad_norm": 0.27078843116760254, "learning_rate": 2.4239788784968465e-07, "loss": 0.3351, "step": 44775 }, { "epoch": 4.5522570150467665, "grad_norm": 0.261861652135849, "learning_rate": 2.4228874202539e-07, "loss": 0.3138, "step": 44776 }, { "epoch": 4.552358682391216, "grad_norm": 0.2745434641838074, "learning_rate": 2.4217962016931327e-07, "loss": 0.2925, "step": 44777 }, { "epoch": 4.552460349735665, "grad_norm": 0.27665597200393677, "learning_rate": 2.420705222820058e-07, "loss": 0.3252, "step": 44778 }, { "epoch": 4.552562017080114, "grad_norm": 0.2808288037776947, "learning_rate": 2.419614483640165e-07, "loss": 0.2883, "step": 44779 }, { "epoch": 4.552663684424563, "grad_norm": 0.2974923551082611, "learning_rate": 2.418523984158938e-07, "loss": 0.2828, "step": 44780 }, { "epoch": 4.552765351769012, "grad_norm": 0.2929648458957672, "learning_rate": 2.4174337243818846e-07, "loss": 0.2723, "step": 44781 }, { "epoch": 4.552867019113461, "grad_norm": 0.2750646770000458, "learning_rate": 2.4163437043144946e-07, "loss": 0.26, "step": 44782 }, { "epoch": 4.55296868645791, "grad_norm": 0.26249614357948303, "learning_rate": 2.415253923962263e-07, "loss": 0.2759, "step": 44783 }, { "epoch": 4.553070353802359, "grad_norm": 0.281990110874176, "learning_rate": 2.4141643833306585e-07, "loss": 0.3103, "step": 44784 }, { "epoch": 4.553172021146808, "grad_norm": 0.26285719871520996, "learning_rate": 2.413075082425198e-07, "loss": 0.2907, "step": 44785 }, { "epoch": 4.553273688491257, "grad_norm": 0.27596044540405273, "learning_rate": 2.411986021251356e-07, "loss": 0.3007, "step": 44786 }, { "epoch": 4.553375355835706, "grad_norm": 0.2901955842971802, "learning_rate": 2.4108971998146103e-07, "loss": 0.2948, "step": 44787 }, { "epoch": 4.553477023180155, "grad_norm": 0.2633592188358307, "learning_rate": 2.409808618120468e-07, "loss": 0.2992, "step": 44788 }, { "epoch": 4.553578690524604, "grad_norm": 0.3084185719490051, "learning_rate": 2.4087202761743975e-07, "loss": 0.2983, "step": 44789 }, { "epoch": 4.553680357869053, "grad_norm": 0.27165576815605164, "learning_rate": 2.4076321739818777e-07, "loss": 0.2803, "step": 44790 }, { "epoch": 4.5537820252135015, "grad_norm": 0.2755151093006134, "learning_rate": 2.4065443115484033e-07, "loss": 0.294, "step": 44791 }, { "epoch": 4.5538836925579504, "grad_norm": 0.27490922808647156, "learning_rate": 2.405456688879448e-07, "loss": 0.3058, "step": 44792 }, { "epoch": 4.553985359902399, "grad_norm": 0.3161395788192749, "learning_rate": 2.404369305980492e-07, "loss": 0.3231, "step": 44793 }, { "epoch": 4.554087027246848, "grad_norm": 0.2927166223526001, "learning_rate": 2.403282162857012e-07, "loss": 0.2995, "step": 44794 }, { "epoch": 4.554188694591297, "grad_norm": 0.28998586535453796, "learning_rate": 2.402195259514484e-07, "loss": 0.2841, "step": 44795 }, { "epoch": 4.554290361935746, "grad_norm": 0.27288419008255005, "learning_rate": 2.40110859595839e-07, "loss": 0.2826, "step": 44796 }, { "epoch": 4.554392029280195, "grad_norm": 0.30750200152397156, "learning_rate": 2.400022172194194e-07, "loss": 0.2995, "step": 44797 }, { "epoch": 4.554493696624644, "grad_norm": 0.29442447423934937, "learning_rate": 2.39893598822738e-07, "loss": 0.3142, "step": 44798 }, { "epoch": 4.554595363969093, "grad_norm": 0.2622528672218323, "learning_rate": 2.3978500440634154e-07, "loss": 0.2882, "step": 44799 }, { "epoch": 4.554697031313542, "grad_norm": 0.29242831468582153, "learning_rate": 2.396764339707763e-07, "loss": 0.2935, "step": 44800 }, { "epoch": 4.554798698657991, "grad_norm": 0.2967871129512787, "learning_rate": 2.3956788751659023e-07, "loss": 0.3017, "step": 44801 }, { "epoch": 4.55490036600244, "grad_norm": 0.2946813702583313, "learning_rate": 2.3945936504433056e-07, "loss": 0.2782, "step": 44802 }, { "epoch": 4.555002033346889, "grad_norm": 0.26908794045448303, "learning_rate": 2.39350866554543e-07, "loss": 0.321, "step": 44803 }, { "epoch": 4.555103700691338, "grad_norm": 0.2567942440509796, "learning_rate": 2.392423920477738e-07, "loss": 0.2795, "step": 44804 }, { "epoch": 4.555205368035787, "grad_norm": 0.300101101398468, "learning_rate": 2.391339415245708e-07, "loss": 0.2983, "step": 44805 }, { "epoch": 4.555307035380236, "grad_norm": 0.28003978729248047, "learning_rate": 2.3902551498547976e-07, "loss": 0.2968, "step": 44806 }, { "epoch": 4.555408702724685, "grad_norm": 0.28399360179901123, "learning_rate": 2.389171124310458e-07, "loss": 0.2969, "step": 44807 }, { "epoch": 4.5555103700691335, "grad_norm": 0.2751299738883972, "learning_rate": 2.388087338618178e-07, "loss": 0.3007, "step": 44808 }, { "epoch": 4.5556120374135824, "grad_norm": 0.28311872482299805, "learning_rate": 2.3870037927833824e-07, "loss": 0.2841, "step": 44809 }, { "epoch": 4.555713704758031, "grad_norm": 0.2774575352668762, "learning_rate": 2.385920486811549e-07, "loss": 0.2929, "step": 44810 }, { "epoch": 4.55581537210248, "grad_norm": 0.2941662669181824, "learning_rate": 2.3848374207081415e-07, "loss": 0.2671, "step": 44811 }, { "epoch": 4.555917039446929, "grad_norm": 0.2758415937423706, "learning_rate": 2.3837545944786045e-07, "loss": 0.2851, "step": 44812 }, { "epoch": 4.556018706791379, "grad_norm": 0.2999378442764282, "learning_rate": 2.382672008128395e-07, "loss": 0.2874, "step": 44813 }, { "epoch": 4.556120374135828, "grad_norm": 0.27996233105659485, "learning_rate": 2.3815896616629696e-07, "loss": 0.3312, "step": 44814 }, { "epoch": 4.556222041480277, "grad_norm": 0.2744022011756897, "learning_rate": 2.38050755508778e-07, "loss": 0.2864, "step": 44815 }, { "epoch": 4.556323708824726, "grad_norm": 0.3009198307991028, "learning_rate": 2.3794256884082766e-07, "loss": 0.2827, "step": 44816 }, { "epoch": 4.556425376169175, "grad_norm": 0.33243119716644287, "learning_rate": 2.3783440616299058e-07, "loss": 0.3028, "step": 44817 }, { "epoch": 4.556527043513624, "grad_norm": 0.2607347369194031, "learning_rate": 2.3772626747581352e-07, "loss": 0.3352, "step": 44818 }, { "epoch": 4.556628710858073, "grad_norm": 0.2837448716163635, "learning_rate": 2.3761815277983825e-07, "loss": 0.274, "step": 44819 }, { "epoch": 4.556730378202522, "grad_norm": 0.27979132533073425, "learning_rate": 2.3751006207561156e-07, "loss": 0.2812, "step": 44820 }, { "epoch": 4.556832045546971, "grad_norm": 0.26145660877227783, "learning_rate": 2.3740199536367747e-07, "loss": 0.323, "step": 44821 }, { "epoch": 4.55693371289142, "grad_norm": 0.28555381298065186, "learning_rate": 2.3729395264458055e-07, "loss": 0.3163, "step": 44822 }, { "epoch": 4.5570353802358685, "grad_norm": 0.28695759177207947, "learning_rate": 2.371859339188648e-07, "loss": 0.2873, "step": 44823 }, { "epoch": 4.5571370475803175, "grad_norm": 0.28662657737731934, "learning_rate": 2.3707793918707423e-07, "loss": 0.2796, "step": 44824 }, { "epoch": 4.557238714924766, "grad_norm": 0.2781774699687958, "learning_rate": 2.369699684497534e-07, "loss": 0.3097, "step": 44825 }, { "epoch": 4.557340382269215, "grad_norm": 0.26041385531425476, "learning_rate": 2.3686202170744633e-07, "loss": 0.2892, "step": 44826 }, { "epoch": 4.557442049613664, "grad_norm": 0.2679232358932495, "learning_rate": 2.3675409896069533e-07, "loss": 0.2682, "step": 44827 }, { "epoch": 4.557543716958113, "grad_norm": 0.27187153697013855, "learning_rate": 2.3664620021004725e-07, "loss": 0.299, "step": 44828 }, { "epoch": 4.557645384302562, "grad_norm": 0.2766769528388977, "learning_rate": 2.3653832545604215e-07, "loss": 0.3045, "step": 44829 }, { "epoch": 4.557747051647011, "grad_norm": 0.2767340838909149, "learning_rate": 2.3643047469922464e-07, "loss": 0.2813, "step": 44830 }, { "epoch": 4.55784871899146, "grad_norm": 0.27830398082733154, "learning_rate": 2.3632264794013982e-07, "loss": 0.3175, "step": 44831 }, { "epoch": 4.557950386335909, "grad_norm": 0.28817543387413025, "learning_rate": 2.3621484517932836e-07, "loss": 0.2878, "step": 44832 }, { "epoch": 4.558052053680358, "grad_norm": 0.2821093797683716, "learning_rate": 2.361070664173354e-07, "loss": 0.2895, "step": 44833 }, { "epoch": 4.558153721024807, "grad_norm": 0.2705446779727936, "learning_rate": 2.3599931165470158e-07, "loss": 0.3015, "step": 44834 }, { "epoch": 4.558255388369256, "grad_norm": 0.289421945810318, "learning_rate": 2.3589158089197206e-07, "loss": 0.294, "step": 44835 }, { "epoch": 4.558357055713705, "grad_norm": 0.2686416804790497, "learning_rate": 2.3578387412968861e-07, "loss": 0.3038, "step": 44836 }, { "epoch": 4.558458723058154, "grad_norm": 0.290823370218277, "learning_rate": 2.3567619136839358e-07, "loss": 0.2972, "step": 44837 }, { "epoch": 4.558560390402603, "grad_norm": 0.2757573127746582, "learning_rate": 2.35568532608631e-07, "loss": 0.291, "step": 44838 }, { "epoch": 4.558662057747052, "grad_norm": 0.28006887435913086, "learning_rate": 2.354608978509404e-07, "loss": 0.2702, "step": 44839 }, { "epoch": 4.5587637250915005, "grad_norm": 0.28727829456329346, "learning_rate": 2.3535328709586524e-07, "loss": 0.3078, "step": 44840 }, { "epoch": 4.5588653924359495, "grad_norm": 0.25222572684288025, "learning_rate": 2.3524570034394954e-07, "loss": 0.327, "step": 44841 }, { "epoch": 4.558967059780398, "grad_norm": 0.30119872093200684, "learning_rate": 2.3513813759573235e-07, "loss": 0.2729, "step": 44842 }, { "epoch": 4.559068727124847, "grad_norm": 0.26959288120269775, "learning_rate": 2.3503059885175815e-07, "loss": 0.2745, "step": 44843 }, { "epoch": 4.559170394469296, "grad_norm": 0.26152703166007996, "learning_rate": 2.3492308411256603e-07, "loss": 0.2868, "step": 44844 }, { "epoch": 4.559272061813745, "grad_norm": 0.29409611225128174, "learning_rate": 2.3481559337869997e-07, "loss": 0.2979, "step": 44845 }, { "epoch": 4.559373729158194, "grad_norm": 0.28668123483657837, "learning_rate": 2.3470812665070063e-07, "loss": 0.3052, "step": 44846 }, { "epoch": 4.559475396502643, "grad_norm": 0.29148271679878235, "learning_rate": 2.3460068392910873e-07, "loss": 0.2915, "step": 44847 }, { "epoch": 4.559577063847092, "grad_norm": 0.2764245867729187, "learning_rate": 2.344932652144677e-07, "loss": 0.2877, "step": 44848 }, { "epoch": 4.559678731191541, "grad_norm": 0.29906708002090454, "learning_rate": 2.3438587050731543e-07, "loss": 0.2899, "step": 44849 }, { "epoch": 4.55978039853599, "grad_norm": 0.2993607521057129, "learning_rate": 2.3427849980819483e-07, "loss": 0.3103, "step": 44850 }, { "epoch": 4.559882065880439, "grad_norm": 0.28910696506500244, "learning_rate": 2.341711531176477e-07, "loss": 0.2832, "step": 44851 }, { "epoch": 4.559983733224888, "grad_norm": 0.2656315565109253, "learning_rate": 2.3406383043621306e-07, "loss": 0.3477, "step": 44852 }, { "epoch": 4.560085400569337, "grad_norm": 0.28410059213638306, "learning_rate": 2.3395653176443212e-07, "loss": 0.2988, "step": 44853 }, { "epoch": 4.560187067913786, "grad_norm": 0.2708081603050232, "learning_rate": 2.3384925710284613e-07, "loss": 0.3269, "step": 44854 }, { "epoch": 4.560288735258235, "grad_norm": 0.2997090816497803, "learning_rate": 2.3374200645199406e-07, "loss": 0.3514, "step": 44855 }, { "epoch": 4.560390402602684, "grad_norm": 0.27997106313705444, "learning_rate": 2.3363477981241834e-07, "loss": 0.2874, "step": 44856 }, { "epoch": 4.5604920699471325, "grad_norm": 0.2792738378047943, "learning_rate": 2.335275771846568e-07, "loss": 0.2746, "step": 44857 }, { "epoch": 4.5605937372915815, "grad_norm": 0.25247421860694885, "learning_rate": 2.334203985692518e-07, "loss": 0.2991, "step": 44858 }, { "epoch": 4.560695404636031, "grad_norm": 0.3001136779785156, "learning_rate": 2.333132439667407e-07, "loss": 0.2919, "step": 44859 }, { "epoch": 4.56079707198048, "grad_norm": 0.23700617253780365, "learning_rate": 2.332061133776653e-07, "loss": 0.2942, "step": 44860 }, { "epoch": 4.560898739324929, "grad_norm": 0.28872150182724, "learning_rate": 2.3309900680256569e-07, "loss": 0.2999, "step": 44861 }, { "epoch": 4.561000406669378, "grad_norm": 0.28736019134521484, "learning_rate": 2.3299192424197925e-07, "loss": 0.2791, "step": 44862 }, { "epoch": 4.561102074013827, "grad_norm": 0.30384206771850586, "learning_rate": 2.3288486569644774e-07, "loss": 0.3034, "step": 44863 }, { "epoch": 4.561203741358276, "grad_norm": 0.3007117807865143, "learning_rate": 2.327778311665091e-07, "loss": 0.2733, "step": 44864 }, { "epoch": 4.561305408702725, "grad_norm": 0.2767592966556549, "learning_rate": 2.3267082065270174e-07, "loss": 0.2987, "step": 44865 }, { "epoch": 4.561407076047174, "grad_norm": 0.2622130513191223, "learning_rate": 2.3256383415556694e-07, "loss": 0.3022, "step": 44866 }, { "epoch": 4.561508743391623, "grad_norm": 0.2781201899051666, "learning_rate": 2.3245687167564312e-07, "loss": 0.2983, "step": 44867 }, { "epoch": 4.561610410736072, "grad_norm": 0.2878284156322479, "learning_rate": 2.3234993321346822e-07, "loss": 0.2936, "step": 44868 }, { "epoch": 4.561712078080521, "grad_norm": 0.2926126718521118, "learning_rate": 2.3224301876958066e-07, "loss": 0.3042, "step": 44869 }, { "epoch": 4.56181374542497, "grad_norm": 0.27153950929641724, "learning_rate": 2.3213612834451948e-07, "loss": 0.2983, "step": 44870 }, { "epoch": 4.561915412769419, "grad_norm": 0.265371710062027, "learning_rate": 2.3202926193882536e-07, "loss": 0.2851, "step": 44871 }, { "epoch": 4.5620170801138675, "grad_norm": 0.28568035364151, "learning_rate": 2.319224195530334e-07, "loss": 0.2857, "step": 44872 }, { "epoch": 4.5621187474583165, "grad_norm": 0.2681216597557068, "learning_rate": 2.3181560118768376e-07, "loss": 0.2914, "step": 44873 }, { "epoch": 4.562220414802765, "grad_norm": 0.2754265367984772, "learning_rate": 2.3170880684331377e-07, "loss": 0.2972, "step": 44874 }, { "epoch": 4.562322082147214, "grad_norm": 0.26958274841308594, "learning_rate": 2.316020365204613e-07, "loss": 0.2661, "step": 44875 }, { "epoch": 4.562423749491663, "grad_norm": 0.27331632375717163, "learning_rate": 2.3149529021966544e-07, "loss": 0.2665, "step": 44876 }, { "epoch": 4.562525416836112, "grad_norm": 0.2805633842945099, "learning_rate": 2.313885679414629e-07, "loss": 0.2918, "step": 44877 }, { "epoch": 4.562627084180561, "grad_norm": 0.29650357365608215, "learning_rate": 2.312818696863911e-07, "loss": 0.3164, "step": 44878 }, { "epoch": 4.56272875152501, "grad_norm": 0.30923253297805786, "learning_rate": 2.311751954549879e-07, "loss": 0.3066, "step": 44879 }, { "epoch": 4.562830418869459, "grad_norm": 0.26959460973739624, "learning_rate": 2.3106854524779065e-07, "loss": 0.3023, "step": 44880 }, { "epoch": 4.562932086213908, "grad_norm": 0.281294584274292, "learning_rate": 2.3096191906533837e-07, "loss": 0.3291, "step": 44881 }, { "epoch": 4.563033753558357, "grad_norm": 0.29960814118385315, "learning_rate": 2.3085531690816508e-07, "loss": 0.2902, "step": 44882 }, { "epoch": 4.563135420902806, "grad_norm": 0.27085378766059875, "learning_rate": 2.3074873877681037e-07, "loss": 0.302, "step": 44883 }, { "epoch": 4.563237088247255, "grad_norm": 0.2884155213832855, "learning_rate": 2.3064218467180988e-07, "loss": 0.2699, "step": 44884 }, { "epoch": 4.563338755591704, "grad_norm": 0.2694438397884369, "learning_rate": 2.305356545937004e-07, "loss": 0.2895, "step": 44885 }, { "epoch": 4.563440422936153, "grad_norm": 0.25929537415504456, "learning_rate": 2.3042914854301878e-07, "loss": 0.313, "step": 44886 }, { "epoch": 4.563542090280602, "grad_norm": 0.32058271765708923, "learning_rate": 2.3032266652030232e-07, "loss": 0.2916, "step": 44887 }, { "epoch": 4.563643757625051, "grad_norm": 0.28641846776008606, "learning_rate": 2.3021620852608673e-07, "loss": 0.317, "step": 44888 }, { "epoch": 4.5637454249694995, "grad_norm": 0.2878949046134949, "learning_rate": 2.3010977456090767e-07, "loss": 0.3187, "step": 44889 }, { "epoch": 4.5638470923139485, "grad_norm": 0.28492385149002075, "learning_rate": 2.3000336462530248e-07, "loss": 0.2957, "step": 44890 }, { "epoch": 4.563948759658397, "grad_norm": 0.2705138325691223, "learning_rate": 2.2989697871980742e-07, "loss": 0.3253, "step": 44891 }, { "epoch": 4.564050427002846, "grad_norm": 0.238550066947937, "learning_rate": 2.297906168449565e-07, "loss": 0.2979, "step": 44892 }, { "epoch": 4.564152094347295, "grad_norm": 0.297792911529541, "learning_rate": 2.296842790012882e-07, "loss": 0.3021, "step": 44893 }, { "epoch": 4.564253761691744, "grad_norm": 0.26795655488967896, "learning_rate": 2.295779651893365e-07, "loss": 0.295, "step": 44894 }, { "epoch": 4.564355429036194, "grad_norm": 0.30647966265678406, "learning_rate": 2.2947167540963656e-07, "loss": 0.3127, "step": 44895 }, { "epoch": 4.564457096380643, "grad_norm": 0.30027297139167786, "learning_rate": 2.293654096627257e-07, "loss": 0.3133, "step": 44896 }, { "epoch": 4.564558763725092, "grad_norm": 0.2629692256450653, "learning_rate": 2.2925916794913795e-07, "loss": 0.2821, "step": 44897 }, { "epoch": 4.564660431069541, "grad_norm": 0.2982242703437805, "learning_rate": 2.2915295026940898e-07, "loss": 0.2593, "step": 44898 }, { "epoch": 4.56476209841399, "grad_norm": 0.2663644552230835, "learning_rate": 2.2904675662407282e-07, "loss": 0.2901, "step": 44899 }, { "epoch": 4.564863765758439, "grad_norm": 0.2851555049419403, "learning_rate": 2.289405870136663e-07, "loss": 0.2962, "step": 44900 }, { "epoch": 4.564965433102888, "grad_norm": 0.3003155589103699, "learning_rate": 2.2883444143872334e-07, "loss": 0.2848, "step": 44901 }, { "epoch": 4.565067100447337, "grad_norm": 0.27686718106269836, "learning_rate": 2.287283198997775e-07, "loss": 0.2842, "step": 44902 }, { "epoch": 4.565168767791786, "grad_norm": 0.25163334608078003, "learning_rate": 2.2862222239736553e-07, "loss": 0.3107, "step": 44903 }, { "epoch": 4.5652704351362345, "grad_norm": 0.2744530737400055, "learning_rate": 2.2851614893202144e-07, "loss": 0.2751, "step": 44904 }, { "epoch": 4.5653721024806835, "grad_norm": 0.2935188412666321, "learning_rate": 2.284100995042776e-07, "loss": 0.2913, "step": 44905 }, { "epoch": 4.565473769825132, "grad_norm": 0.2689168155193329, "learning_rate": 2.2830407411467138e-07, "loss": 0.3263, "step": 44906 }, { "epoch": 4.565575437169581, "grad_norm": 0.294759601354599, "learning_rate": 2.2819807276373452e-07, "loss": 0.2944, "step": 44907 }, { "epoch": 4.56567710451403, "grad_norm": 0.2825189232826233, "learning_rate": 2.2809209545200217e-07, "loss": 0.2792, "step": 44908 }, { "epoch": 4.565778771858479, "grad_norm": 0.2704578936100006, "learning_rate": 2.2798614218000726e-07, "loss": 0.3271, "step": 44909 }, { "epoch": 4.565880439202928, "grad_norm": 0.26365166902542114, "learning_rate": 2.2788021294828433e-07, "loss": 0.2898, "step": 44910 }, { "epoch": 4.565982106547377, "grad_norm": 0.2732788026332855, "learning_rate": 2.2777430775736743e-07, "loss": 0.2939, "step": 44911 }, { "epoch": 4.566083773891826, "grad_norm": 0.2664628028869629, "learning_rate": 2.2766842660778831e-07, "loss": 0.2568, "step": 44912 }, { "epoch": 4.566185441236275, "grad_norm": 0.2717067003250122, "learning_rate": 2.275625695000827e-07, "loss": 0.2891, "step": 44913 }, { "epoch": 4.566287108580724, "grad_norm": 0.2879645526409149, "learning_rate": 2.274567364347824e-07, "loss": 0.3262, "step": 44914 }, { "epoch": 4.566388775925173, "grad_norm": 0.29680201411247253, "learning_rate": 2.2735092741242083e-07, "loss": 0.2858, "step": 44915 }, { "epoch": 4.566490443269622, "grad_norm": 0.26678329706192017, "learning_rate": 2.2724514243353145e-07, "loss": 0.2877, "step": 44916 }, { "epoch": 4.566592110614071, "grad_norm": 0.2700335383415222, "learning_rate": 2.2713938149864667e-07, "loss": 0.2984, "step": 44917 }, { "epoch": 4.56669377795852, "grad_norm": 0.26354438066482544, "learning_rate": 2.270336446082999e-07, "loss": 0.2969, "step": 44918 }, { "epoch": 4.566795445302969, "grad_norm": 0.2663273513317108, "learning_rate": 2.269279317630224e-07, "loss": 0.2696, "step": 44919 }, { "epoch": 4.566897112647418, "grad_norm": 0.2735626697540283, "learning_rate": 2.268222429633482e-07, "loss": 0.2968, "step": 44920 }, { "epoch": 4.5669987799918665, "grad_norm": 0.283325731754303, "learning_rate": 2.2671657820980965e-07, "loss": 0.3117, "step": 44921 }, { "epoch": 4.5671004473363155, "grad_norm": 0.2710832953453064, "learning_rate": 2.2661093750293794e-07, "loss": 0.2854, "step": 44922 }, { "epoch": 4.567202114680764, "grad_norm": 0.3045000433921814, "learning_rate": 2.265053208432666e-07, "loss": 0.297, "step": 44923 }, { "epoch": 4.567303782025213, "grad_norm": 0.28020960092544556, "learning_rate": 2.2639972823132738e-07, "loss": 0.2814, "step": 44924 }, { "epoch": 4.567405449369662, "grad_norm": 0.2913373112678528, "learning_rate": 2.26294159667651e-07, "loss": 0.3034, "step": 44925 }, { "epoch": 4.567507116714111, "grad_norm": 0.2774537205696106, "learning_rate": 2.261886151527709e-07, "loss": 0.3021, "step": 44926 }, { "epoch": 4.56760878405856, "grad_norm": 0.2778235673904419, "learning_rate": 2.2608309468721779e-07, "loss": 0.2938, "step": 44927 }, { "epoch": 4.567710451403009, "grad_norm": 0.28499463200569153, "learning_rate": 2.25977598271524e-07, "loss": 0.2947, "step": 44928 }, { "epoch": 4.567812118747458, "grad_norm": 0.28435495495796204, "learning_rate": 2.258721259062202e-07, "loss": 0.305, "step": 44929 }, { "epoch": 4.567913786091907, "grad_norm": 0.28163859248161316, "learning_rate": 2.2576667759183823e-07, "loss": 0.2742, "step": 44930 }, { "epoch": 4.568015453436356, "grad_norm": 0.2732548415660858, "learning_rate": 2.2566125332890932e-07, "loss": 0.3258, "step": 44931 }, { "epoch": 4.568117120780805, "grad_norm": 0.29002845287323, "learning_rate": 2.2555585311796414e-07, "loss": 0.2973, "step": 44932 }, { "epoch": 4.568218788125254, "grad_norm": 0.2908256947994232, "learning_rate": 2.254504769595345e-07, "loss": 0.3026, "step": 44933 }, { "epoch": 4.568320455469703, "grad_norm": 0.25695493817329407, "learning_rate": 2.253451248541505e-07, "loss": 0.3001, "step": 44934 }, { "epoch": 4.568422122814152, "grad_norm": 0.2603917419910431, "learning_rate": 2.2523979680234288e-07, "loss": 0.2859, "step": 44935 }, { "epoch": 4.568523790158601, "grad_norm": 0.2727770209312439, "learning_rate": 2.2513449280464283e-07, "loss": 0.3065, "step": 44936 }, { "epoch": 4.56862545750305, "grad_norm": 0.2899416387081146, "learning_rate": 2.2502921286158052e-07, "loss": 0.3008, "step": 44937 }, { "epoch": 4.5687271248474985, "grad_norm": 0.28289279341697693, "learning_rate": 2.2492395697368664e-07, "loss": 0.3364, "step": 44938 }, { "epoch": 4.5688287921919475, "grad_norm": 0.308043897151947, "learning_rate": 2.2481872514149016e-07, "loss": 0.3094, "step": 44939 }, { "epoch": 4.568930459536396, "grad_norm": 0.2743840515613556, "learning_rate": 2.2471351736552294e-07, "loss": 0.2526, "step": 44940 }, { "epoch": 4.569032126880846, "grad_norm": 0.27520567178726196, "learning_rate": 2.2460833364631397e-07, "loss": 0.3242, "step": 44941 }, { "epoch": 4.569133794225295, "grad_norm": 0.2645875811576843, "learning_rate": 2.2450317398439337e-07, "loss": 0.2853, "step": 44942 }, { "epoch": 4.569235461569744, "grad_norm": 0.26951703429222107, "learning_rate": 2.2439803838029127e-07, "loss": 0.2735, "step": 44943 }, { "epoch": 4.569337128914193, "grad_norm": 0.2774500250816345, "learning_rate": 2.2429292683453673e-07, "loss": 0.294, "step": 44944 }, { "epoch": 4.569438796258642, "grad_norm": 0.279452383518219, "learning_rate": 2.2418783934765932e-07, "loss": 0.2874, "step": 44945 }, { "epoch": 4.569540463603091, "grad_norm": 0.26360079646110535, "learning_rate": 2.2408277592018913e-07, "loss": 0.2842, "step": 44946 }, { "epoch": 4.56964213094754, "grad_norm": 0.2874598801136017, "learning_rate": 2.2397773655265466e-07, "loss": 0.2827, "step": 44947 }, { "epoch": 4.569743798291989, "grad_norm": 0.2679480016231537, "learning_rate": 2.23872721245586e-07, "loss": 0.2984, "step": 44948 }, { "epoch": 4.569845465636438, "grad_norm": 0.29202863574028015, "learning_rate": 2.2376772999951057e-07, "loss": 0.2989, "step": 44949 }, { "epoch": 4.569947132980887, "grad_norm": 0.296954870223999, "learning_rate": 2.2366276281495903e-07, "loss": 0.329, "step": 44950 }, { "epoch": 4.570048800325336, "grad_norm": 0.2837477922439575, "learning_rate": 2.235578196924598e-07, "loss": 0.2844, "step": 44951 }, { "epoch": 4.570150467669785, "grad_norm": 0.2775487005710602, "learning_rate": 2.2345290063254032e-07, "loss": 0.2765, "step": 44952 }, { "epoch": 4.5702521350142336, "grad_norm": 0.267056405544281, "learning_rate": 2.233480056357301e-07, "loss": 0.2929, "step": 44953 }, { "epoch": 4.5703538023586825, "grad_norm": 0.264542818069458, "learning_rate": 2.232431347025582e-07, "loss": 0.2852, "step": 44954 }, { "epoch": 4.570455469703131, "grad_norm": 0.28857770562171936, "learning_rate": 2.231382878335514e-07, "loss": 0.2975, "step": 44955 }, { "epoch": 4.57055713704758, "grad_norm": 0.3030684292316437, "learning_rate": 2.2303346502923984e-07, "loss": 0.2996, "step": 44956 }, { "epoch": 4.570658804392029, "grad_norm": 0.3006329834461212, "learning_rate": 2.2292866629014976e-07, "loss": 0.2947, "step": 44957 }, { "epoch": 4.570760471736478, "grad_norm": 0.2711810767650604, "learning_rate": 2.2282389161681017e-07, "loss": 0.2843, "step": 44958 }, { "epoch": 4.570862139080927, "grad_norm": 0.2744877338409424, "learning_rate": 2.227191410097479e-07, "loss": 0.2871, "step": 44959 }, { "epoch": 4.570963806425376, "grad_norm": 0.2712715268135071, "learning_rate": 2.226144144694925e-07, "loss": 0.3137, "step": 44960 }, { "epoch": 4.571065473769825, "grad_norm": 0.2791879177093506, "learning_rate": 2.225097119965697e-07, "loss": 0.2974, "step": 44961 }, { "epoch": 4.571167141114274, "grad_norm": 0.29497218132019043, "learning_rate": 2.2240503359150733e-07, "loss": 0.3272, "step": 44962 }, { "epoch": 4.571268808458723, "grad_norm": 0.28827527165412903, "learning_rate": 2.2230037925483394e-07, "loss": 0.2875, "step": 44963 }, { "epoch": 4.571370475803172, "grad_norm": 0.28687259554862976, "learning_rate": 2.2219574898707518e-07, "loss": 0.2819, "step": 44964 }, { "epoch": 4.571472143147621, "grad_norm": 0.29536330699920654, "learning_rate": 2.2209114278875953e-07, "loss": 0.3173, "step": 44965 }, { "epoch": 4.57157381049207, "grad_norm": 0.2658783197402954, "learning_rate": 2.2198656066041214e-07, "loss": 0.2714, "step": 44966 }, { "epoch": 4.571675477836519, "grad_norm": 0.27538982033729553, "learning_rate": 2.21882002602562e-07, "loss": 0.2877, "step": 44967 }, { "epoch": 4.571777145180968, "grad_norm": 0.27827832102775574, "learning_rate": 2.217774686157348e-07, "loss": 0.2863, "step": 44968 }, { "epoch": 4.571878812525417, "grad_norm": 0.27316609025001526, "learning_rate": 2.216729587004568e-07, "loss": 0.2862, "step": 44969 }, { "epoch": 4.5719804798698656, "grad_norm": 0.2738897502422333, "learning_rate": 2.215684728572559e-07, "loss": 0.2768, "step": 44970 }, { "epoch": 4.5720821472143145, "grad_norm": 0.26958319544792175, "learning_rate": 2.2146401108665617e-07, "loss": 0.2995, "step": 44971 }, { "epoch": 4.572183814558763, "grad_norm": 0.2907538414001465, "learning_rate": 2.2135957338918547e-07, "loss": 0.3056, "step": 44972 }, { "epoch": 4.572285481903212, "grad_norm": 0.27453675866127014, "learning_rate": 2.212551597653706e-07, "loss": 0.2994, "step": 44973 }, { "epoch": 4.572387149247661, "grad_norm": 0.2996523082256317, "learning_rate": 2.2115077021573617e-07, "loss": 0.316, "step": 44974 }, { "epoch": 4.57248881659211, "grad_norm": 0.2772614657878876, "learning_rate": 2.2104640474080841e-07, "loss": 0.3195, "step": 44975 }, { "epoch": 4.572590483936559, "grad_norm": 0.30672356486320496, "learning_rate": 2.2094206334111246e-07, "loss": 0.3067, "step": 44976 }, { "epoch": 4.572692151281009, "grad_norm": 0.308420866727829, "learning_rate": 2.2083774601717567e-07, "loss": 0.2905, "step": 44977 }, { "epoch": 4.572793818625458, "grad_norm": 0.304647296667099, "learning_rate": 2.2073345276952262e-07, "loss": 0.2961, "step": 44978 }, { "epoch": 4.572895485969907, "grad_norm": 0.29847943782806396, "learning_rate": 2.2062918359867845e-07, "loss": 0.269, "step": 44979 }, { "epoch": 4.572997153314356, "grad_norm": 0.25926366448402405, "learning_rate": 2.2052493850516943e-07, "loss": 0.3084, "step": 44980 }, { "epoch": 4.573098820658805, "grad_norm": 0.2896473705768585, "learning_rate": 2.20420717489519e-07, "loss": 0.3361, "step": 44981 }, { "epoch": 4.573200488003254, "grad_norm": 0.2658107280731201, "learning_rate": 2.2031652055225283e-07, "loss": 0.343, "step": 44982 }, { "epoch": 4.573302155347703, "grad_norm": 0.29243364930152893, "learning_rate": 2.2021234769389776e-07, "loss": 0.3095, "step": 44983 }, { "epoch": 4.573403822692152, "grad_norm": 0.284991979598999, "learning_rate": 2.2010819891497558e-07, "loss": 0.2849, "step": 44984 }, { "epoch": 4.573505490036601, "grad_norm": 0.28172558546066284, "learning_rate": 2.2000407421601311e-07, "loss": 0.2815, "step": 44985 }, { "epoch": 4.5736071573810495, "grad_norm": 0.2805946469306946, "learning_rate": 2.198999735975338e-07, "loss": 0.2948, "step": 44986 }, { "epoch": 4.5737088247254984, "grad_norm": 0.26693108677864075, "learning_rate": 2.1979589706006278e-07, "loss": 0.2977, "step": 44987 }, { "epoch": 4.573810492069947, "grad_norm": 0.27322882413864136, "learning_rate": 2.1969184460412406e-07, "loss": 0.2825, "step": 44988 }, { "epoch": 4.573912159414396, "grad_norm": 0.2628447711467743, "learning_rate": 2.1958781623024173e-07, "loss": 0.3066, "step": 44989 }, { "epoch": 4.574013826758845, "grad_norm": 0.2969762086868286, "learning_rate": 2.1948381193894085e-07, "loss": 0.2667, "step": 44990 }, { "epoch": 4.574115494103294, "grad_norm": 0.2724356949329376, "learning_rate": 2.1937983173074328e-07, "loss": 0.3071, "step": 44991 }, { "epoch": 4.574217161447743, "grad_norm": 0.29156893491744995, "learning_rate": 2.1927587560617413e-07, "loss": 0.3123, "step": 44992 }, { "epoch": 4.574318828792192, "grad_norm": 0.2725634276866913, "learning_rate": 2.1917194356575856e-07, "loss": 0.3017, "step": 44993 }, { "epoch": 4.574420496136641, "grad_norm": 0.3003752827644348, "learning_rate": 2.1906803561001667e-07, "loss": 0.321, "step": 44994 }, { "epoch": 4.57452216348109, "grad_norm": 0.30287426710128784, "learning_rate": 2.1896415173947527e-07, "loss": 0.2894, "step": 44995 }, { "epoch": 4.574623830825539, "grad_norm": 0.2767673134803772, "learning_rate": 2.1886029195465564e-07, "loss": 0.2658, "step": 44996 }, { "epoch": 4.574725498169988, "grad_norm": 0.2758044898509979, "learning_rate": 2.1875645625608177e-07, "loss": 0.3017, "step": 44997 }, { "epoch": 4.574827165514437, "grad_norm": 0.30411243438720703, "learning_rate": 2.186526446442766e-07, "loss": 0.2749, "step": 44998 }, { "epoch": 4.574928832858886, "grad_norm": 0.2937678098678589, "learning_rate": 2.1854885711976304e-07, "loss": 0.3094, "step": 44999 }, { "epoch": 4.575030500203335, "grad_norm": 0.28757819533348083, "learning_rate": 2.184450936830651e-07, "loss": 0.2871, "step": 45000 }, { "epoch": 4.575132167547784, "grad_norm": 0.29190096259117126, "learning_rate": 2.183413543347035e-07, "loss": 0.3092, "step": 45001 }, { "epoch": 4.575233834892233, "grad_norm": 0.28235843777656555, "learning_rate": 2.182376390752017e-07, "loss": 0.3164, "step": 45002 }, { "epoch": 4.5753355022366815, "grad_norm": 0.2753603458404541, "learning_rate": 2.181339479050837e-07, "loss": 0.2752, "step": 45003 }, { "epoch": 4.5754371695811304, "grad_norm": 0.27912795543670654, "learning_rate": 2.180302808248691e-07, "loss": 0.2629, "step": 45004 }, { "epoch": 4.575538836925579, "grad_norm": 0.26764383912086487, "learning_rate": 2.179266378350825e-07, "loss": 0.287, "step": 45005 }, { "epoch": 4.575640504270028, "grad_norm": 0.26754230260849, "learning_rate": 2.178230189362446e-07, "loss": 0.2855, "step": 45006 }, { "epoch": 4.575742171614477, "grad_norm": 0.2676054537296295, "learning_rate": 2.177194241288777e-07, "loss": 0.2911, "step": 45007 }, { "epoch": 4.575843838958926, "grad_norm": 0.2809711992740631, "learning_rate": 2.1761585341350479e-07, "loss": 0.266, "step": 45008 }, { "epoch": 4.575945506303375, "grad_norm": 0.26885873079299927, "learning_rate": 2.1751230679064538e-07, "loss": 0.2469, "step": 45009 }, { "epoch": 4.576047173647824, "grad_norm": 0.28060540556907654, "learning_rate": 2.1740878426082412e-07, "loss": 0.2637, "step": 45010 }, { "epoch": 4.576148840992273, "grad_norm": 0.267208456993103, "learning_rate": 2.1730528582455944e-07, "loss": 0.3005, "step": 45011 }, { "epoch": 4.576250508336722, "grad_norm": 0.3048707842826843, "learning_rate": 2.1720181148237485e-07, "loss": 0.2949, "step": 45012 }, { "epoch": 4.576352175681171, "grad_norm": 0.2803093194961548, "learning_rate": 2.1709836123479157e-07, "loss": 0.3135, "step": 45013 }, { "epoch": 4.57645384302562, "grad_norm": 0.28223884105682373, "learning_rate": 2.1699493508232916e-07, "loss": 0.3062, "step": 45014 }, { "epoch": 4.576555510370069, "grad_norm": 0.2662176489830017, "learning_rate": 2.1689153302551002e-07, "loss": 0.2675, "step": 45015 }, { "epoch": 4.576657177714518, "grad_norm": 0.28489577770233154, "learning_rate": 2.1678815506485485e-07, "loss": 0.3184, "step": 45016 }, { "epoch": 4.576758845058967, "grad_norm": 0.2851276099681854, "learning_rate": 2.1668480120088377e-07, "loss": 0.3218, "step": 45017 }, { "epoch": 4.576860512403416, "grad_norm": 0.2945162355899811, "learning_rate": 2.1658147143411856e-07, "loss": 0.2866, "step": 45018 }, { "epoch": 4.576962179747865, "grad_norm": 0.28734198212623596, "learning_rate": 2.1647816576507886e-07, "loss": 0.2767, "step": 45019 }, { "epoch": 4.5770638470923135, "grad_norm": 0.2645193934440613, "learning_rate": 2.1637488419428586e-07, "loss": 0.3147, "step": 45020 }, { "epoch": 4.5771655144367625, "grad_norm": 0.30468398332595825, "learning_rate": 2.1627162672225865e-07, "loss": 0.3005, "step": 45021 }, { "epoch": 4.577267181781211, "grad_norm": 0.27492865920066833, "learning_rate": 2.1616839334951788e-07, "loss": 0.3205, "step": 45022 }, { "epoch": 4.577368849125661, "grad_norm": 0.27556082606315613, "learning_rate": 2.1606518407658539e-07, "loss": 0.2765, "step": 45023 }, { "epoch": 4.57747051647011, "grad_norm": 0.2641027271747589, "learning_rate": 2.159619989039785e-07, "loss": 0.2642, "step": 45024 }, { "epoch": 4.577572183814559, "grad_norm": 0.25211936235427856, "learning_rate": 2.1585883783221905e-07, "loss": 0.2922, "step": 45025 }, { "epoch": 4.577673851159008, "grad_norm": 0.27611133456230164, "learning_rate": 2.1575570086182552e-07, "loss": 0.2786, "step": 45026 }, { "epoch": 4.577775518503457, "grad_norm": 0.27646639943122864, "learning_rate": 2.1565258799331746e-07, "loss": 0.3019, "step": 45027 }, { "epoch": 4.577877185847906, "grad_norm": 0.26908406615257263, "learning_rate": 2.1554949922721503e-07, "loss": 0.3247, "step": 45028 }, { "epoch": 4.577978853192355, "grad_norm": 0.27953794598579407, "learning_rate": 2.154464345640378e-07, "loss": 0.3083, "step": 45029 }, { "epoch": 4.578080520536804, "grad_norm": 0.25923457741737366, "learning_rate": 2.1534339400430372e-07, "loss": 0.2838, "step": 45030 }, { "epoch": 4.578182187881253, "grad_norm": 0.2850012481212616, "learning_rate": 2.1524037754853232e-07, "loss": 0.3181, "step": 45031 }, { "epoch": 4.578283855225702, "grad_norm": 0.2758944034576416, "learning_rate": 2.1513738519724326e-07, "loss": 0.2976, "step": 45032 }, { "epoch": 4.578385522570151, "grad_norm": 0.3308301270008087, "learning_rate": 2.1503441695095605e-07, "loss": 0.2962, "step": 45033 }, { "epoch": 4.5784871899146, "grad_norm": 0.2824176847934723, "learning_rate": 2.14931472810187e-07, "loss": 0.3354, "step": 45034 }, { "epoch": 4.5785888572590485, "grad_norm": 0.2969481647014618, "learning_rate": 2.1482855277545677e-07, "loss": 0.2594, "step": 45035 }, { "epoch": 4.5786905246034975, "grad_norm": 0.2842392325401306, "learning_rate": 2.1472565684728275e-07, "loss": 0.2711, "step": 45036 }, { "epoch": 4.578792191947946, "grad_norm": 0.2689334750175476, "learning_rate": 2.146227850261834e-07, "loss": 0.2866, "step": 45037 }, { "epoch": 4.578893859292395, "grad_norm": 0.2602107524871826, "learning_rate": 2.145199373126783e-07, "loss": 0.3227, "step": 45038 }, { "epoch": 4.578995526636844, "grad_norm": 0.2672370374202728, "learning_rate": 2.1441711370728367e-07, "loss": 0.2642, "step": 45039 }, { "epoch": 4.579097193981293, "grad_norm": 0.2794048488140106, "learning_rate": 2.1431431421051918e-07, "loss": 0.2894, "step": 45040 }, { "epoch": 4.579198861325742, "grad_norm": 0.28488603234291077, "learning_rate": 2.14211538822901e-07, "loss": 0.3002, "step": 45041 }, { "epoch": 4.579300528670191, "grad_norm": 0.2940811812877655, "learning_rate": 2.1410878754494823e-07, "loss": 0.2836, "step": 45042 }, { "epoch": 4.57940219601464, "grad_norm": 0.2830996811389923, "learning_rate": 2.1400606037717874e-07, "loss": 0.3099, "step": 45043 }, { "epoch": 4.579503863359089, "grad_norm": 0.2725156545639038, "learning_rate": 2.1390335732010826e-07, "loss": 0.2846, "step": 45044 }, { "epoch": 4.579605530703538, "grad_norm": 0.27599284052848816, "learning_rate": 2.1380067837425578e-07, "loss": 0.2873, "step": 45045 }, { "epoch": 4.579707198047987, "grad_norm": 0.30078864097595215, "learning_rate": 2.1369802354013869e-07, "loss": 0.2876, "step": 45046 }, { "epoch": 4.579808865392436, "grad_norm": 0.3060147166252136, "learning_rate": 2.1359539281827212e-07, "loss": 0.2947, "step": 45047 }, { "epoch": 4.579910532736885, "grad_norm": 0.2834894359111786, "learning_rate": 2.134927862091757e-07, "loss": 0.2978, "step": 45048 }, { "epoch": 4.580012200081334, "grad_norm": 0.272066593170166, "learning_rate": 2.1339020371336506e-07, "loss": 0.3484, "step": 45049 }, { "epoch": 4.580113867425783, "grad_norm": 0.3060777187347412, "learning_rate": 2.132876453313576e-07, "loss": 0.2859, "step": 45050 }, { "epoch": 4.580215534770232, "grad_norm": 0.2918471693992615, "learning_rate": 2.1318511106366847e-07, "loss": 0.2683, "step": 45051 }, { "epoch": 4.5803172021146805, "grad_norm": 0.27096596360206604, "learning_rate": 2.1308260091081557e-07, "loss": 0.2837, "step": 45052 }, { "epoch": 4.5804188694591295, "grad_norm": 0.27243462204933167, "learning_rate": 2.1298011487331517e-07, "loss": 0.3006, "step": 45053 }, { "epoch": 4.580520536803578, "grad_norm": 0.2688453495502472, "learning_rate": 2.1287765295168293e-07, "loss": 0.2977, "step": 45054 }, { "epoch": 4.580622204148027, "grad_norm": 0.3027033507823944, "learning_rate": 2.1277521514643573e-07, "loss": 0.2859, "step": 45055 }, { "epoch": 4.580723871492476, "grad_norm": 0.28074881434440613, "learning_rate": 2.1267280145808977e-07, "loss": 0.2986, "step": 45056 }, { "epoch": 4.580825538836925, "grad_norm": 0.2807285189628601, "learning_rate": 2.1257041188715966e-07, "loss": 0.3014, "step": 45057 }, { "epoch": 4.580927206181374, "grad_norm": 0.283153772354126, "learning_rate": 2.1246804643416275e-07, "loss": 0.2963, "step": 45058 }, { "epoch": 4.581028873525824, "grad_norm": 0.27210724353790283, "learning_rate": 2.1236570509961473e-07, "loss": 0.3089, "step": 45059 }, { "epoch": 4.581130540870273, "grad_norm": 0.2768929600715637, "learning_rate": 2.1226338788402968e-07, "loss": 0.3046, "step": 45060 }, { "epoch": 4.581232208214722, "grad_norm": 0.27907806634902954, "learning_rate": 2.121610947879238e-07, "loss": 0.3013, "step": 45061 }, { "epoch": 4.581333875559171, "grad_norm": 0.27817302942276, "learning_rate": 2.1205882581181335e-07, "loss": 0.2785, "step": 45062 }, { "epoch": 4.58143554290362, "grad_norm": 0.27162739634513855, "learning_rate": 2.119565809562124e-07, "loss": 0.2914, "step": 45063 }, { "epoch": 4.581537210248069, "grad_norm": 0.28930822014808655, "learning_rate": 2.118543602216361e-07, "loss": 0.3329, "step": 45064 }, { "epoch": 4.581638877592518, "grad_norm": 0.27865034341812134, "learning_rate": 2.1175216360860007e-07, "loss": 0.2809, "step": 45065 }, { "epoch": 4.581740544936967, "grad_norm": 0.2835235297679901, "learning_rate": 2.1164999111761898e-07, "loss": 0.2843, "step": 45066 }, { "epoch": 4.5818422122814155, "grad_norm": 0.2765059173107147, "learning_rate": 2.1154784274920625e-07, "loss": 0.3238, "step": 45067 }, { "epoch": 4.5819438796258645, "grad_norm": 0.30228307843208313, "learning_rate": 2.1144571850387873e-07, "loss": 0.2788, "step": 45068 }, { "epoch": 4.582045546970313, "grad_norm": 0.25592321157455444, "learning_rate": 2.1134361838214935e-07, "loss": 0.3126, "step": 45069 }, { "epoch": 4.582147214314762, "grad_norm": 0.3025347888469696, "learning_rate": 2.112415423845332e-07, "loss": 0.276, "step": 45070 }, { "epoch": 4.582248881659211, "grad_norm": 0.29277777671813965, "learning_rate": 2.1113949051154326e-07, "loss": 0.2989, "step": 45071 }, { "epoch": 4.58235054900366, "grad_norm": 0.2617664635181427, "learning_rate": 2.1103746276369575e-07, "loss": 0.3111, "step": 45072 }, { "epoch": 4.582452216348109, "grad_norm": 0.29589998722076416, "learning_rate": 2.1093545914150305e-07, "loss": 0.2875, "step": 45073 }, { "epoch": 4.582553883692558, "grad_norm": 0.26599669456481934, "learning_rate": 2.1083347964547863e-07, "loss": 0.2867, "step": 45074 }, { "epoch": 4.582655551037007, "grad_norm": 0.2699335217475891, "learning_rate": 2.1073152427613764e-07, "loss": 0.2886, "step": 45075 }, { "epoch": 4.582757218381456, "grad_norm": 0.25777068734169006, "learning_rate": 2.1062959303399356e-07, "loss": 0.3258, "step": 45076 }, { "epoch": 4.582858885725905, "grad_norm": 0.2637844979763031, "learning_rate": 2.1052768591955875e-07, "loss": 0.3182, "step": 45077 }, { "epoch": 4.582960553070354, "grad_norm": 0.2877371907234192, "learning_rate": 2.104258029333478e-07, "loss": 0.3121, "step": 45078 }, { "epoch": 4.583062220414803, "grad_norm": 0.2806415855884552, "learning_rate": 2.1032394407587364e-07, "loss": 0.2811, "step": 45079 }, { "epoch": 4.583163887759252, "grad_norm": 0.26853036880493164, "learning_rate": 2.1022210934764865e-07, "loss": 0.2947, "step": 45080 }, { "epoch": 4.583265555103701, "grad_norm": 0.27235743403434753, "learning_rate": 2.1012029874918627e-07, "loss": 0.312, "step": 45081 }, { "epoch": 4.58336722244815, "grad_norm": 0.27847814559936523, "learning_rate": 2.1001851228100056e-07, "loss": 0.2945, "step": 45082 }, { "epoch": 4.583468889792599, "grad_norm": 0.2844371497631073, "learning_rate": 2.0991674994360278e-07, "loss": 0.2843, "step": 45083 }, { "epoch": 4.5835705571370475, "grad_norm": 0.2729761004447937, "learning_rate": 2.0981501173750586e-07, "loss": 0.2586, "step": 45084 }, { "epoch": 4.5836722244814965, "grad_norm": 0.28267359733581543, "learning_rate": 2.0971329766322268e-07, "loss": 0.3119, "step": 45085 }, { "epoch": 4.583773891825945, "grad_norm": 0.27665314078330994, "learning_rate": 2.096116077212662e-07, "loss": 0.3043, "step": 45086 }, { "epoch": 4.583875559170394, "grad_norm": 0.27573952078819275, "learning_rate": 2.0950994191214712e-07, "loss": 0.3104, "step": 45087 }, { "epoch": 4.583977226514843, "grad_norm": 0.2903483211994171, "learning_rate": 2.0940830023637894e-07, "loss": 0.2878, "step": 45088 }, { "epoch": 4.584078893859292, "grad_norm": 0.288812518119812, "learning_rate": 2.0930668269447397e-07, "loss": 0.3156, "step": 45089 }, { "epoch": 4.584180561203741, "grad_norm": 0.2818536162376404, "learning_rate": 2.0920508928694295e-07, "loss": 0.2937, "step": 45090 }, { "epoch": 4.58428222854819, "grad_norm": 0.2913459241390228, "learning_rate": 2.0910352001429767e-07, "loss": 0.3284, "step": 45091 }, { "epoch": 4.584383895892639, "grad_norm": 0.2831660807132721, "learning_rate": 2.090019748770511e-07, "loss": 0.2773, "step": 45092 }, { "epoch": 4.584485563237088, "grad_norm": 0.28226834535598755, "learning_rate": 2.089004538757139e-07, "loss": 0.2957, "step": 45093 }, { "epoch": 4.584587230581537, "grad_norm": 0.2787499725818634, "learning_rate": 2.0879895701079733e-07, "loss": 0.3027, "step": 45094 }, { "epoch": 4.584688897925986, "grad_norm": 0.29548537731170654, "learning_rate": 2.0869748428281378e-07, "loss": 0.2962, "step": 45095 }, { "epoch": 4.584790565270435, "grad_norm": 0.2968641519546509, "learning_rate": 2.0859603569227338e-07, "loss": 0.2761, "step": 45096 }, { "epoch": 4.584892232614884, "grad_norm": 0.2740151584148407, "learning_rate": 2.0849461123968685e-07, "loss": 0.2958, "step": 45097 }, { "epoch": 4.584993899959333, "grad_norm": 0.3118892312049866, "learning_rate": 2.0839321092556653e-07, "loss": 0.2931, "step": 45098 }, { "epoch": 4.585095567303782, "grad_norm": 0.28470170497894287, "learning_rate": 2.082918347504226e-07, "loss": 0.2622, "step": 45099 }, { "epoch": 4.585197234648231, "grad_norm": 0.27510014176368713, "learning_rate": 2.0819048271476572e-07, "loss": 0.2876, "step": 45100 }, { "epoch": 4.5852989019926795, "grad_norm": 0.28594520688056946, "learning_rate": 2.0808915481910607e-07, "loss": 0.3083, "step": 45101 }, { "epoch": 4.5854005693371285, "grad_norm": 0.27387598156929016, "learning_rate": 2.0798785106395492e-07, "loss": 0.2974, "step": 45102 }, { "epoch": 4.585502236681577, "grad_norm": 0.3068792521953583, "learning_rate": 2.0788657144982237e-07, "loss": 0.2642, "step": 45103 }, { "epoch": 4.585603904026026, "grad_norm": 0.2886558473110199, "learning_rate": 2.0778531597721753e-07, "loss": 0.3006, "step": 45104 }, { "epoch": 4.585705571370476, "grad_norm": 0.2726125717163086, "learning_rate": 2.0768408464665267e-07, "loss": 0.3188, "step": 45105 }, { "epoch": 4.585807238714925, "grad_norm": 0.2756419777870178, "learning_rate": 2.0758287745863636e-07, "loss": 0.3046, "step": 45106 }, { "epoch": 4.585908906059374, "grad_norm": 0.2749435603618622, "learning_rate": 2.074816944136776e-07, "loss": 0.2808, "step": 45107 }, { "epoch": 4.586010573403823, "grad_norm": 0.2899503707885742, "learning_rate": 2.073805355122882e-07, "loss": 0.3126, "step": 45108 }, { "epoch": 4.586112240748272, "grad_norm": 0.27719539403915405, "learning_rate": 2.0727940075497665e-07, "loss": 0.2713, "step": 45109 }, { "epoch": 4.586213908092721, "grad_norm": 0.29354503750801086, "learning_rate": 2.0717829014225254e-07, "loss": 0.2986, "step": 45110 }, { "epoch": 4.58631557543717, "grad_norm": 0.2906191945075989, "learning_rate": 2.0707720367462491e-07, "loss": 0.2928, "step": 45111 }, { "epoch": 4.586417242781619, "grad_norm": 0.27770331501960754, "learning_rate": 2.0697614135260335e-07, "loss": 0.3032, "step": 45112 }, { "epoch": 4.586518910126068, "grad_norm": 0.2799294888973236, "learning_rate": 2.0687510317669746e-07, "loss": 0.2773, "step": 45113 }, { "epoch": 4.586620577470517, "grad_norm": 0.28377020359039307, "learning_rate": 2.0677408914741515e-07, "loss": 0.3077, "step": 45114 }, { "epoch": 4.586722244814966, "grad_norm": 0.3095903992652893, "learning_rate": 2.0667309926526658e-07, "loss": 0.2801, "step": 45115 }, { "epoch": 4.5868239121594145, "grad_norm": 0.25353026390075684, "learning_rate": 2.0657213353075967e-07, "loss": 0.291, "step": 45116 }, { "epoch": 4.5869255795038635, "grad_norm": 0.291148841381073, "learning_rate": 2.064711919444029e-07, "loss": 0.3137, "step": 45117 }, { "epoch": 4.587027246848312, "grad_norm": 0.30841735005378723, "learning_rate": 2.0637027450670533e-07, "loss": 0.292, "step": 45118 }, { "epoch": 4.587128914192761, "grad_norm": 0.2849575877189636, "learning_rate": 2.0626938121817542e-07, "loss": 0.2814, "step": 45119 }, { "epoch": 4.58723058153721, "grad_norm": 0.3170660436153412, "learning_rate": 2.061685120793211e-07, "loss": 0.2903, "step": 45120 }, { "epoch": 4.587332248881659, "grad_norm": 0.27783480286598206, "learning_rate": 2.0606766709065029e-07, "loss": 0.3196, "step": 45121 }, { "epoch": 4.587433916226108, "grad_norm": 0.28690168261528015, "learning_rate": 2.0596684625267205e-07, "loss": 0.289, "step": 45122 }, { "epoch": 4.587535583570557, "grad_norm": 0.2821495532989502, "learning_rate": 2.0586604956589317e-07, "loss": 0.2903, "step": 45123 }, { "epoch": 4.587637250915006, "grad_norm": 0.2581930160522461, "learning_rate": 2.057652770308216e-07, "loss": 0.305, "step": 45124 }, { "epoch": 4.587738918259455, "grad_norm": 0.29325026273727417, "learning_rate": 2.0566452864796582e-07, "loss": 0.2868, "step": 45125 }, { "epoch": 4.587840585603904, "grad_norm": 0.284397691488266, "learning_rate": 2.0556380441783265e-07, "loss": 0.2893, "step": 45126 }, { "epoch": 4.587942252948353, "grad_norm": 0.27978914976119995, "learning_rate": 2.0546310434092885e-07, "loss": 0.2569, "step": 45127 }, { "epoch": 4.588043920292802, "grad_norm": 0.26516857743263245, "learning_rate": 2.0536242841776356e-07, "loss": 0.2533, "step": 45128 }, { "epoch": 4.588145587637251, "grad_norm": 0.2694140374660492, "learning_rate": 2.0526177664884294e-07, "loss": 0.308, "step": 45129 }, { "epoch": 4.5882472549817, "grad_norm": 0.3036268353462219, "learning_rate": 2.0516114903467444e-07, "loss": 0.267, "step": 45130 }, { "epoch": 4.588348922326149, "grad_norm": 0.281345933675766, "learning_rate": 2.0506054557576371e-07, "loss": 0.3037, "step": 45131 }, { "epoch": 4.588450589670598, "grad_norm": 0.2746066749095917, "learning_rate": 2.049599662726187e-07, "loss": 0.2743, "step": 45132 }, { "epoch": 4.5885522570150465, "grad_norm": 0.2801539897918701, "learning_rate": 2.048594111257468e-07, "loss": 0.3037, "step": 45133 }, { "epoch": 4.5886539243594955, "grad_norm": 0.2955171465873718, "learning_rate": 2.0475888013565204e-07, "loss": 0.3246, "step": 45134 }, { "epoch": 4.588755591703944, "grad_norm": 0.2789500057697296, "learning_rate": 2.0465837330284454e-07, "loss": 0.2848, "step": 45135 }, { "epoch": 4.588857259048393, "grad_norm": 0.29632872343063354, "learning_rate": 2.0455789062782727e-07, "loss": 0.2932, "step": 45136 }, { "epoch": 4.588958926392842, "grad_norm": 0.2823081910610199, "learning_rate": 2.0445743211110758e-07, "loss": 0.2756, "step": 45137 }, { "epoch": 4.589060593737291, "grad_norm": 0.27361658215522766, "learning_rate": 2.0435699775319228e-07, "loss": 0.3078, "step": 45138 }, { "epoch": 4.58916226108174, "grad_norm": 0.3042280972003937, "learning_rate": 2.042565875545871e-07, "loss": 0.3218, "step": 45139 }, { "epoch": 4.589263928426189, "grad_norm": 0.2891062796115875, "learning_rate": 2.0415620151579717e-07, "loss": 0.2731, "step": 45140 }, { "epoch": 4.589365595770639, "grad_norm": 0.2872859239578247, "learning_rate": 2.0405583963732822e-07, "loss": 0.3013, "step": 45141 }, { "epoch": 4.589467263115088, "grad_norm": 0.25830191373825073, "learning_rate": 2.0395550191968706e-07, "loss": 0.266, "step": 45142 }, { "epoch": 4.589568930459537, "grad_norm": 0.26967287063598633, "learning_rate": 2.0385518836337826e-07, "loss": 0.2897, "step": 45143 }, { "epoch": 4.589670597803986, "grad_norm": 0.2619025707244873, "learning_rate": 2.0375489896890644e-07, "loss": 0.266, "step": 45144 }, { "epoch": 4.589772265148435, "grad_norm": 0.28886160254478455, "learning_rate": 2.0365463373677895e-07, "loss": 0.3225, "step": 45145 }, { "epoch": 4.589873932492884, "grad_norm": 0.28516915440559387, "learning_rate": 2.0355439266749877e-07, "loss": 0.307, "step": 45146 }, { "epoch": 4.589975599837333, "grad_norm": 0.2812560200691223, "learning_rate": 2.0345417576157155e-07, "loss": 0.2945, "step": 45147 }, { "epoch": 4.5900772671817816, "grad_norm": 0.28796514868736267, "learning_rate": 2.033539830195036e-07, "loss": 0.3192, "step": 45148 }, { "epoch": 4.5901789345262305, "grad_norm": 0.27644479274749756, "learning_rate": 2.0325381444179726e-07, "loss": 0.3191, "step": 45149 }, { "epoch": 4.590280601870679, "grad_norm": 0.29946210980415344, "learning_rate": 2.031536700289588e-07, "loss": 0.2773, "step": 45150 }, { "epoch": 4.590382269215128, "grad_norm": 0.294681191444397, "learning_rate": 2.0305354978149173e-07, "loss": 0.2709, "step": 45151 }, { "epoch": 4.590483936559577, "grad_norm": 0.27967432141304016, "learning_rate": 2.0295345369990172e-07, "loss": 0.2692, "step": 45152 }, { "epoch": 4.590585603904026, "grad_norm": 0.27333134412765503, "learning_rate": 2.0285338178469172e-07, "loss": 0.2768, "step": 45153 }, { "epoch": 4.590687271248475, "grad_norm": 0.27465295791625977, "learning_rate": 2.0275333403636575e-07, "loss": 0.3186, "step": 45154 }, { "epoch": 4.590788938592924, "grad_norm": 0.26621028780937195, "learning_rate": 2.0265331045543013e-07, "loss": 0.2991, "step": 45155 }, { "epoch": 4.590890605937373, "grad_norm": 0.26144179701805115, "learning_rate": 2.0255331104238608e-07, "loss": 0.3108, "step": 45156 }, { "epoch": 4.590992273281822, "grad_norm": 0.27948829531669617, "learning_rate": 2.0245333579773874e-07, "loss": 0.2851, "step": 45157 }, { "epoch": 4.591093940626271, "grad_norm": 0.29702886939048767, "learning_rate": 2.0235338472199106e-07, "loss": 0.2738, "step": 45158 }, { "epoch": 4.59119560797072, "grad_norm": 0.25907862186431885, "learning_rate": 2.022534578156471e-07, "loss": 0.3152, "step": 45159 }, { "epoch": 4.591297275315169, "grad_norm": 0.28358784317970276, "learning_rate": 2.021535550792103e-07, "loss": 0.3524, "step": 45160 }, { "epoch": 4.591398942659618, "grad_norm": 0.32326140999794006, "learning_rate": 2.0205367651318307e-07, "loss": 0.2768, "step": 45161 }, { "epoch": 4.591500610004067, "grad_norm": 0.2837275266647339, "learning_rate": 2.0195382211807057e-07, "loss": 0.2752, "step": 45162 }, { "epoch": 4.591602277348516, "grad_norm": 0.27697575092315674, "learning_rate": 2.0185399189437293e-07, "loss": 0.2773, "step": 45163 }, { "epoch": 4.591703944692965, "grad_norm": 0.271749347448349, "learning_rate": 2.0175418584259532e-07, "loss": 0.3182, "step": 45164 }, { "epoch": 4.5918056120374136, "grad_norm": 0.2635483145713806, "learning_rate": 2.0165440396324065e-07, "loss": 0.2988, "step": 45165 }, { "epoch": 4.5919072793818625, "grad_norm": 0.28504064679145813, "learning_rate": 2.0155464625680964e-07, "loss": 0.2959, "step": 45166 }, { "epoch": 4.592008946726311, "grad_norm": 0.2948456108570099, "learning_rate": 2.014549127238069e-07, "loss": 0.3171, "step": 45167 }, { "epoch": 4.59211061407076, "grad_norm": 0.30065086483955383, "learning_rate": 2.013552033647337e-07, "loss": 0.2589, "step": 45168 }, { "epoch": 4.592212281415209, "grad_norm": 0.30924829840660095, "learning_rate": 2.0125551818009237e-07, "loss": 0.3235, "step": 45169 }, { "epoch": 4.592313948759658, "grad_norm": 0.26906076073646545, "learning_rate": 2.0115585717038588e-07, "loss": 0.3034, "step": 45170 }, { "epoch": 4.592415616104107, "grad_norm": 0.27769577503204346, "learning_rate": 2.0105622033611549e-07, "loss": 0.2871, "step": 45171 }, { "epoch": 4.592517283448556, "grad_norm": 0.2600850760936737, "learning_rate": 2.0095660767778358e-07, "loss": 0.3197, "step": 45172 }, { "epoch": 4.592618950793005, "grad_norm": 0.28455033898353577, "learning_rate": 2.008570191958914e-07, "loss": 0.279, "step": 45173 }, { "epoch": 4.592720618137454, "grad_norm": 0.28126245737075806, "learning_rate": 2.0075745489094134e-07, "loss": 0.3052, "step": 45174 }, { "epoch": 4.592822285481903, "grad_norm": 0.28233081102371216, "learning_rate": 2.0065791476343577e-07, "loss": 0.2959, "step": 45175 }, { "epoch": 4.592923952826352, "grad_norm": 0.28325554728507996, "learning_rate": 2.0055839881387373e-07, "loss": 0.2912, "step": 45176 }, { "epoch": 4.593025620170801, "grad_norm": 0.2686794400215149, "learning_rate": 2.0045890704275871e-07, "loss": 0.2851, "step": 45177 }, { "epoch": 4.59312728751525, "grad_norm": 0.3035508096218109, "learning_rate": 2.0035943945059088e-07, "loss": 0.3078, "step": 45178 }, { "epoch": 4.593228954859699, "grad_norm": 0.2853931188583374, "learning_rate": 2.0025999603787148e-07, "loss": 0.3203, "step": 45179 }, { "epoch": 4.593330622204148, "grad_norm": 0.28706806898117065, "learning_rate": 2.001605768051018e-07, "loss": 0.2634, "step": 45180 }, { "epoch": 4.593432289548597, "grad_norm": 0.27439647912979126, "learning_rate": 2.0006118175278256e-07, "loss": 0.2916, "step": 45181 }, { "epoch": 4.5935339568930456, "grad_norm": 0.2869109809398651, "learning_rate": 1.9996181088141497e-07, "loss": 0.3128, "step": 45182 }, { "epoch": 4.5936356242374945, "grad_norm": 0.27641358971595764, "learning_rate": 1.9986246419149812e-07, "loss": 0.2917, "step": 45183 }, { "epoch": 4.593737291581943, "grad_norm": 0.2606762945652008, "learning_rate": 1.9976314168353327e-07, "loss": 0.2944, "step": 45184 }, { "epoch": 4.593838958926392, "grad_norm": 0.2812544107437134, "learning_rate": 1.996638433580228e-07, "loss": 0.2682, "step": 45185 }, { "epoch": 4.593940626270841, "grad_norm": 0.2678332030773163, "learning_rate": 1.9956456921546353e-07, "loss": 0.302, "step": 45186 }, { "epoch": 4.594042293615291, "grad_norm": 0.2662154734134674, "learning_rate": 1.9946531925635782e-07, "loss": 0.3023, "step": 45187 }, { "epoch": 4.59414396095974, "grad_norm": 0.27203503251075745, "learning_rate": 1.9936609348120527e-07, "loss": 0.3104, "step": 45188 }, { "epoch": 4.594245628304189, "grad_norm": 0.26519501209259033, "learning_rate": 1.992668918905044e-07, "loss": 0.2791, "step": 45189 }, { "epoch": 4.594347295648638, "grad_norm": 0.275194376707077, "learning_rate": 1.9916771448475703e-07, "loss": 0.2782, "step": 45190 }, { "epoch": 4.594448962993087, "grad_norm": 0.28271424770355225, "learning_rate": 1.9906856126446218e-07, "loss": 0.3037, "step": 45191 }, { "epoch": 4.594550630337536, "grad_norm": 0.26235491037368774, "learning_rate": 1.9896943223011889e-07, "loss": 0.3023, "step": 45192 }, { "epoch": 4.594652297681985, "grad_norm": 0.31075307726860046, "learning_rate": 1.9887032738222566e-07, "loss": 0.2887, "step": 45193 }, { "epoch": 4.594753965026434, "grad_norm": 0.28720909357070923, "learning_rate": 1.9877124672128378e-07, "loss": 0.3024, "step": 45194 }, { "epoch": 4.594855632370883, "grad_norm": 0.30100128054618835, "learning_rate": 1.9867219024779173e-07, "loss": 0.3082, "step": 45195 }, { "epoch": 4.594957299715332, "grad_norm": 0.2826917767524719, "learning_rate": 1.9857315796224685e-07, "loss": 0.2658, "step": 45196 }, { "epoch": 4.595058967059781, "grad_norm": 0.26725754141807556, "learning_rate": 1.9847414986515044e-07, "loss": 0.3141, "step": 45197 }, { "epoch": 4.5951606344042295, "grad_norm": 0.2864651083946228, "learning_rate": 1.9837516595700045e-07, "loss": 0.3076, "step": 45198 }, { "epoch": 4.5952623017486784, "grad_norm": 0.2863862216472626, "learning_rate": 1.9827620623829424e-07, "loss": 0.3111, "step": 45199 }, { "epoch": 4.595363969093127, "grad_norm": 0.2714143395423889, "learning_rate": 1.9817727070953252e-07, "loss": 0.2992, "step": 45200 }, { "epoch": 4.595465636437576, "grad_norm": 0.28169751167297363, "learning_rate": 1.9807835937121212e-07, "loss": 0.3091, "step": 45201 }, { "epoch": 4.595567303782025, "grad_norm": 0.27485549449920654, "learning_rate": 1.9797947222383208e-07, "loss": 0.3086, "step": 45202 }, { "epoch": 4.595668971126474, "grad_norm": 0.274850994348526, "learning_rate": 1.9788060926788977e-07, "loss": 0.2958, "step": 45203 }, { "epoch": 4.595770638470923, "grad_norm": 0.2585258483886719, "learning_rate": 1.9778177050388426e-07, "loss": 0.2961, "step": 45204 }, { "epoch": 4.595872305815372, "grad_norm": 0.26408252120018005, "learning_rate": 1.9768295593231345e-07, "loss": 0.3008, "step": 45205 }, { "epoch": 4.595973973159821, "grad_norm": 0.26865658164024353, "learning_rate": 1.9758416555367367e-07, "loss": 0.3157, "step": 45206 }, { "epoch": 4.59607564050427, "grad_norm": 0.2624809741973877, "learning_rate": 1.974853993684639e-07, "loss": 0.2685, "step": 45207 }, { "epoch": 4.596177307848719, "grad_norm": 0.3117418587207794, "learning_rate": 1.973866573771821e-07, "loss": 0.313, "step": 45208 }, { "epoch": 4.596278975193168, "grad_norm": 0.28591179847717285, "learning_rate": 1.97287939580324e-07, "loss": 0.2811, "step": 45209 }, { "epoch": 4.596380642537617, "grad_norm": 0.2762202024459839, "learning_rate": 1.9718924597838917e-07, "loss": 0.2784, "step": 45210 }, { "epoch": 4.596482309882066, "grad_norm": 0.2729281485080719, "learning_rate": 1.970905765718728e-07, "loss": 0.2975, "step": 45211 }, { "epoch": 4.596583977226515, "grad_norm": 0.2876313328742981, "learning_rate": 1.9699193136127338e-07, "loss": 0.2997, "step": 45212 }, { "epoch": 4.596685644570964, "grad_norm": 0.2671463191509247, "learning_rate": 1.9689331034708657e-07, "loss": 0.2893, "step": 45213 }, { "epoch": 4.596787311915413, "grad_norm": 0.2717866003513336, "learning_rate": 1.9679471352980984e-07, "loss": 0.277, "step": 45214 }, { "epoch": 4.5968889792598615, "grad_norm": 0.27289560437202454, "learning_rate": 1.966961409099405e-07, "loss": 0.3055, "step": 45215 }, { "epoch": 4.5969906466043104, "grad_norm": 0.2983090579509735, "learning_rate": 1.9659759248797372e-07, "loss": 0.314, "step": 45216 }, { "epoch": 4.597092313948759, "grad_norm": 0.264114111661911, "learning_rate": 1.96499068264408e-07, "loss": 0.3025, "step": 45217 }, { "epoch": 4.597193981293208, "grad_norm": 0.2979283034801483, "learning_rate": 1.9640056823973797e-07, "loss": 0.2851, "step": 45218 }, { "epoch": 4.597295648637657, "grad_norm": 0.28564581274986267, "learning_rate": 1.9630209241445986e-07, "loss": 0.2805, "step": 45219 }, { "epoch": 4.597397315982106, "grad_norm": 0.30286678671836853, "learning_rate": 1.962036407890705e-07, "loss": 0.3239, "step": 45220 }, { "epoch": 4.597498983326555, "grad_norm": 0.3044230341911316, "learning_rate": 1.961052133640662e-07, "loss": 0.313, "step": 45221 }, { "epoch": 4.597600650671004, "grad_norm": 0.27621763944625854, "learning_rate": 1.9600681013994206e-07, "loss": 0.2951, "step": 45222 }, { "epoch": 4.597702318015454, "grad_norm": 0.2886779010295868, "learning_rate": 1.9590843111719327e-07, "loss": 0.2474, "step": 45223 }, { "epoch": 4.597803985359903, "grad_norm": 0.27245548367500305, "learning_rate": 1.9581007629631665e-07, "loss": 0.2944, "step": 45224 }, { "epoch": 4.597905652704352, "grad_norm": 0.28329992294311523, "learning_rate": 1.9571174567780794e-07, "loss": 0.3231, "step": 45225 }, { "epoch": 4.598007320048801, "grad_norm": 0.28173860907554626, "learning_rate": 1.9561343926216059e-07, "loss": 0.2849, "step": 45226 }, { "epoch": 4.59810898739325, "grad_norm": 0.2820248603820801, "learning_rate": 1.9551515704987145e-07, "loss": 0.325, "step": 45227 }, { "epoch": 4.598210654737699, "grad_norm": 0.2856544852256775, "learning_rate": 1.9541689904143567e-07, "loss": 0.319, "step": 45228 }, { "epoch": 4.598312322082148, "grad_norm": 0.27486395835876465, "learning_rate": 1.9531866523734676e-07, "loss": 0.2808, "step": 45229 }, { "epoch": 4.5984139894265965, "grad_norm": 0.2915160655975342, "learning_rate": 1.9522045563810156e-07, "loss": 0.293, "step": 45230 }, { "epoch": 4.5985156567710455, "grad_norm": 0.2730504870414734, "learning_rate": 1.9512227024419406e-07, "loss": 0.3004, "step": 45231 }, { "epoch": 4.598617324115494, "grad_norm": 0.2761355936527252, "learning_rate": 1.950241090561189e-07, "loss": 0.2958, "step": 45232 }, { "epoch": 4.598718991459943, "grad_norm": 0.28469938039779663, "learning_rate": 1.9492597207436903e-07, "loss": 0.304, "step": 45233 }, { "epoch": 4.598820658804392, "grad_norm": 0.2697809636592865, "learning_rate": 1.948278592994418e-07, "loss": 0.2983, "step": 45234 }, { "epoch": 4.598922326148841, "grad_norm": 0.2740142345428467, "learning_rate": 1.947297707318291e-07, "loss": 0.2982, "step": 45235 }, { "epoch": 4.59902399349329, "grad_norm": 0.2659316658973694, "learning_rate": 1.94631706372026e-07, "loss": 0.29, "step": 45236 }, { "epoch": 4.599125660837739, "grad_norm": 0.26894617080688477, "learning_rate": 1.945336662205266e-07, "loss": 0.309, "step": 45237 }, { "epoch": 4.599227328182188, "grad_norm": 0.2819752097129822, "learning_rate": 1.944356502778244e-07, "loss": 0.2682, "step": 45238 }, { "epoch": 4.599328995526637, "grad_norm": 0.2861745059490204, "learning_rate": 1.9433765854441344e-07, "loss": 0.2902, "step": 45239 }, { "epoch": 4.599430662871086, "grad_norm": 0.27116551995277405, "learning_rate": 1.9423969102078777e-07, "loss": 0.3042, "step": 45240 }, { "epoch": 4.599532330215535, "grad_norm": 0.26778697967529297, "learning_rate": 1.9414174770744033e-07, "loss": 0.309, "step": 45241 }, { "epoch": 4.599633997559984, "grad_norm": 0.3131784200668335, "learning_rate": 1.9404382860486458e-07, "loss": 0.3019, "step": 45242 }, { "epoch": 4.599735664904433, "grad_norm": 0.2816411554813385, "learning_rate": 1.9394593371355353e-07, "loss": 0.2633, "step": 45243 }, { "epoch": 4.599837332248882, "grad_norm": 0.29700466990470886, "learning_rate": 1.9384806303400172e-07, "loss": 0.3136, "step": 45244 }, { "epoch": 4.599938999593331, "grad_norm": 0.269444078207016, "learning_rate": 1.93750216566701e-07, "loss": 0.291, "step": 45245 }, { "epoch": 4.60004066693778, "grad_norm": 0.28396469354629517, "learning_rate": 1.9365239431214378e-07, "loss": 0.2814, "step": 45246 }, { "epoch": 4.6001423342822285, "grad_norm": 0.2688385844230652, "learning_rate": 1.9355459627082463e-07, "loss": 0.317, "step": 45247 }, { "epoch": 4.6002440016266775, "grad_norm": 0.27856358885765076, "learning_rate": 1.9345682244323482e-07, "loss": 0.2941, "step": 45248 }, { "epoch": 4.600345668971126, "grad_norm": 0.2749783396720886, "learning_rate": 1.9335907282986677e-07, "loss": 0.2727, "step": 45249 }, { "epoch": 4.600447336315575, "grad_norm": 0.2736986577510834, "learning_rate": 1.9326134743121395e-07, "loss": 0.2867, "step": 45250 }, { "epoch": 4.600549003660024, "grad_norm": 0.28099149465560913, "learning_rate": 1.9316364624776874e-07, "loss": 0.2966, "step": 45251 }, { "epoch": 4.600650671004473, "grad_norm": 0.2668451964855194, "learning_rate": 1.930659692800224e-07, "loss": 0.309, "step": 45252 }, { "epoch": 4.600752338348922, "grad_norm": 0.278595894575119, "learning_rate": 1.929683165284668e-07, "loss": 0.2949, "step": 45253 }, { "epoch": 4.600854005693371, "grad_norm": 0.26951107382774353, "learning_rate": 1.9287068799359544e-07, "loss": 0.2846, "step": 45254 }, { "epoch": 4.60095567303782, "grad_norm": 0.2890341579914093, "learning_rate": 1.9277308367589898e-07, "loss": 0.2891, "step": 45255 }, { "epoch": 4.601057340382269, "grad_norm": 0.25093621015548706, "learning_rate": 1.9267550357586874e-07, "loss": 0.3107, "step": 45256 }, { "epoch": 4.601159007726718, "grad_norm": 0.2723068296909332, "learning_rate": 1.9257794769399708e-07, "loss": 0.2751, "step": 45257 }, { "epoch": 4.601260675071167, "grad_norm": 0.2875980734825134, "learning_rate": 1.9248041603077582e-07, "loss": 0.3088, "step": 45258 }, { "epoch": 4.601362342415616, "grad_norm": 0.2825709879398346, "learning_rate": 1.9238290858669517e-07, "loss": 0.259, "step": 45259 }, { "epoch": 4.601464009760065, "grad_norm": 0.27443334460258484, "learning_rate": 1.9228542536224747e-07, "loss": 0.3124, "step": 45260 }, { "epoch": 4.601565677104514, "grad_norm": 0.2980582118034363, "learning_rate": 1.9218796635792347e-07, "loss": 0.2889, "step": 45261 }, { "epoch": 4.601667344448963, "grad_norm": 0.2761041224002838, "learning_rate": 1.9209053157421387e-07, "loss": 0.3276, "step": 45262 }, { "epoch": 4.601769011793412, "grad_norm": 0.27245032787323, "learning_rate": 1.9199312101160883e-07, "loss": 0.3004, "step": 45263 }, { "epoch": 4.6018706791378605, "grad_norm": 0.27811479568481445, "learning_rate": 1.9189573467060075e-07, "loss": 0.3175, "step": 45264 }, { "epoch": 4.6019723464823095, "grad_norm": 0.2892451286315918, "learning_rate": 1.917983725516792e-07, "loss": 0.2864, "step": 45265 }, { "epoch": 4.602074013826758, "grad_norm": 0.292169988155365, "learning_rate": 1.917010346553344e-07, "loss": 0.2925, "step": 45266 }, { "epoch": 4.602175681171207, "grad_norm": 0.2803310453891754, "learning_rate": 1.9160372098205759e-07, "loss": 0.2977, "step": 45267 }, { "epoch": 4.602277348515656, "grad_norm": 0.27519720792770386, "learning_rate": 1.9150643153233895e-07, "loss": 0.3075, "step": 45268 }, { "epoch": 4.602379015860106, "grad_norm": 0.2743278741836548, "learning_rate": 1.9140916630666694e-07, "loss": 0.2947, "step": 45269 }, { "epoch": 4.602480683204555, "grad_norm": 0.2827301025390625, "learning_rate": 1.91311925305534e-07, "loss": 0.282, "step": 45270 }, { "epoch": 4.602582350549004, "grad_norm": 0.286587119102478, "learning_rate": 1.9121470852942914e-07, "loss": 0.2917, "step": 45271 }, { "epoch": 4.602684017893453, "grad_norm": 0.26758310198783875, "learning_rate": 1.9111751597884143e-07, "loss": 0.2939, "step": 45272 }, { "epoch": 4.602785685237902, "grad_norm": 0.27369144558906555, "learning_rate": 1.910203476542599e-07, "loss": 0.3182, "step": 45273 }, { "epoch": 4.602887352582351, "grad_norm": 0.28381919860839844, "learning_rate": 1.9092320355617643e-07, "loss": 0.2776, "step": 45274 }, { "epoch": 4.6029890199268, "grad_norm": 0.26439979672431946, "learning_rate": 1.9082608368507837e-07, "loss": 0.3063, "step": 45275 }, { "epoch": 4.603090687271249, "grad_norm": 0.30824366211891174, "learning_rate": 1.9072898804145535e-07, "loss": 0.2877, "step": 45276 }, { "epoch": 4.603192354615698, "grad_norm": 0.27994608879089355, "learning_rate": 1.906319166257975e-07, "loss": 0.2831, "step": 45277 }, { "epoch": 4.603294021960147, "grad_norm": 0.265614777803421, "learning_rate": 1.9053486943859335e-07, "loss": 0.3413, "step": 45278 }, { "epoch": 4.6033956893045955, "grad_norm": 0.28865912556648254, "learning_rate": 1.904378464803308e-07, "loss": 0.3001, "step": 45279 }, { "epoch": 4.6034973566490445, "grad_norm": 0.28368079662323, "learning_rate": 1.9034084775150007e-07, "loss": 0.3033, "step": 45280 }, { "epoch": 4.603599023993493, "grad_norm": 0.2861359119415283, "learning_rate": 1.9024387325258907e-07, "loss": 0.312, "step": 45281 }, { "epoch": 4.603700691337942, "grad_norm": 0.26698029041290283, "learning_rate": 1.9014692298408687e-07, "loss": 0.277, "step": 45282 }, { "epoch": 4.603802358682391, "grad_norm": 0.27100569009780884, "learning_rate": 1.900499969464803e-07, "loss": 0.278, "step": 45283 }, { "epoch": 4.60390402602684, "grad_norm": 0.28808891773223877, "learning_rate": 1.8995309514026006e-07, "loss": 0.3177, "step": 45284 }, { "epoch": 4.604005693371289, "grad_norm": 0.2754862904548645, "learning_rate": 1.8985621756591245e-07, "loss": 0.3003, "step": 45285 }, { "epoch": 4.604107360715738, "grad_norm": 0.2874782383441925, "learning_rate": 1.8975936422392594e-07, "loss": 0.3063, "step": 45286 }, { "epoch": 4.604209028060187, "grad_norm": 0.27321603894233704, "learning_rate": 1.896625351147896e-07, "loss": 0.3014, "step": 45287 }, { "epoch": 4.604310695404636, "grad_norm": 0.26711180806159973, "learning_rate": 1.8956573023898973e-07, "loss": 0.2953, "step": 45288 }, { "epoch": 4.604412362749085, "grad_norm": 0.28971394896507263, "learning_rate": 1.8946894959701366e-07, "loss": 0.2879, "step": 45289 }, { "epoch": 4.604514030093534, "grad_norm": 0.29565539956092834, "learning_rate": 1.8937219318935106e-07, "loss": 0.2922, "step": 45290 }, { "epoch": 4.604615697437983, "grad_norm": 0.27992376685142517, "learning_rate": 1.892754610164882e-07, "loss": 0.2824, "step": 45291 }, { "epoch": 4.604717364782432, "grad_norm": 0.296032577753067, "learning_rate": 1.8917875307891187e-07, "loss": 0.3077, "step": 45292 }, { "epoch": 4.604819032126881, "grad_norm": 0.2776632308959961, "learning_rate": 1.8908206937710948e-07, "loss": 0.307, "step": 45293 }, { "epoch": 4.60492069947133, "grad_norm": 0.2792290449142456, "learning_rate": 1.88985409911569e-07, "loss": 0.2976, "step": 45294 }, { "epoch": 4.605022366815779, "grad_norm": 0.2917589843273163, "learning_rate": 1.888887746827772e-07, "loss": 0.2784, "step": 45295 }, { "epoch": 4.6051240341602275, "grad_norm": 0.2713044285774231, "learning_rate": 1.8879216369121933e-07, "loss": 0.273, "step": 45296 }, { "epoch": 4.6052257015046765, "grad_norm": 0.27202874422073364, "learning_rate": 1.8869557693738437e-07, "loss": 0.3007, "step": 45297 }, { "epoch": 4.605327368849125, "grad_norm": 0.2785024344921112, "learning_rate": 1.8859901442175642e-07, "loss": 0.3084, "step": 45298 }, { "epoch": 4.605429036193574, "grad_norm": 0.27658703923225403, "learning_rate": 1.8850247614482343e-07, "loss": 0.2657, "step": 45299 }, { "epoch": 4.605530703538023, "grad_norm": 0.28413838148117065, "learning_rate": 1.884059621070733e-07, "loss": 0.3195, "step": 45300 }, { "epoch": 4.605632370882472, "grad_norm": 0.2601594626903534, "learning_rate": 1.8830947230898845e-07, "loss": 0.2664, "step": 45301 }, { "epoch": 4.605734038226921, "grad_norm": 0.2709875702857971, "learning_rate": 1.8821300675105846e-07, "loss": 0.2816, "step": 45302 }, { "epoch": 4.60583570557137, "grad_norm": 0.28849977254867554, "learning_rate": 1.881165654337669e-07, "loss": 0.3364, "step": 45303 }, { "epoch": 4.605937372915819, "grad_norm": 0.29240962862968445, "learning_rate": 1.8802014835760108e-07, "loss": 0.2816, "step": 45304 }, { "epoch": 4.606039040260269, "grad_norm": 0.2915077209472656, "learning_rate": 1.879237555230462e-07, "loss": 0.3338, "step": 45305 }, { "epoch": 4.606140707604718, "grad_norm": 0.27893948554992676, "learning_rate": 1.8782738693058743e-07, "loss": 0.3019, "step": 45306 }, { "epoch": 4.606242374949167, "grad_norm": 0.2971620559692383, "learning_rate": 1.8773104258071218e-07, "loss": 0.256, "step": 45307 }, { "epoch": 4.606344042293616, "grad_norm": 0.28255629539489746, "learning_rate": 1.876347224739028e-07, "loss": 0.3106, "step": 45308 }, { "epoch": 4.606445709638065, "grad_norm": 0.27372944355010986, "learning_rate": 1.875384266106467e-07, "loss": 0.2967, "step": 45309 }, { "epoch": 4.606547376982514, "grad_norm": 0.2951083481311798, "learning_rate": 1.8744215499142903e-07, "loss": 0.3212, "step": 45310 }, { "epoch": 4.6066490443269625, "grad_norm": 0.2758658528327942, "learning_rate": 1.8734590761673276e-07, "loss": 0.315, "step": 45311 }, { "epoch": 4.6067507116714115, "grad_norm": 0.2692798972129822, "learning_rate": 1.8724968448704527e-07, "loss": 0.2934, "step": 45312 }, { "epoch": 4.60685237901586, "grad_norm": 0.2796890437602997, "learning_rate": 1.8715348560284952e-07, "loss": 0.2784, "step": 45313 }, { "epoch": 4.606954046360309, "grad_norm": 0.30728116631507874, "learning_rate": 1.870573109646312e-07, "loss": 0.3136, "step": 45314 }, { "epoch": 4.607055713704758, "grad_norm": 0.2700992226600647, "learning_rate": 1.869611605728744e-07, "loss": 0.2948, "step": 45315 }, { "epoch": 4.607157381049207, "grad_norm": 0.27994245290756226, "learning_rate": 1.8686503442806315e-07, "loss": 0.3122, "step": 45316 }, { "epoch": 4.607259048393656, "grad_norm": 0.28337520360946655, "learning_rate": 1.8676893253068318e-07, "loss": 0.2932, "step": 45317 }, { "epoch": 4.607360715738105, "grad_norm": 0.28101634979248047, "learning_rate": 1.8667285488121633e-07, "loss": 0.2766, "step": 45318 }, { "epoch": 4.607462383082554, "grad_norm": 0.28584638237953186, "learning_rate": 1.8657680148014778e-07, "loss": 0.2856, "step": 45319 }, { "epoch": 4.607564050427003, "grad_norm": 0.2686324715614319, "learning_rate": 1.864807723279627e-07, "loss": 0.3045, "step": 45320 }, { "epoch": 4.607665717771452, "grad_norm": 0.28038349747657776, "learning_rate": 1.8638476742514234e-07, "loss": 0.2906, "step": 45321 }, { "epoch": 4.607767385115901, "grad_norm": 0.2733452022075653, "learning_rate": 1.8628878677217188e-07, "loss": 0.3158, "step": 45322 }, { "epoch": 4.60786905246035, "grad_norm": 0.2725309431552887, "learning_rate": 1.861928303695354e-07, "loss": 0.2903, "step": 45323 }, { "epoch": 4.607970719804799, "grad_norm": 0.26899757981300354, "learning_rate": 1.8609689821771415e-07, "loss": 0.3005, "step": 45324 }, { "epoch": 4.608072387149248, "grad_norm": 0.2978329062461853, "learning_rate": 1.8600099031719387e-07, "loss": 0.2856, "step": 45325 }, { "epoch": 4.608174054493697, "grad_norm": 0.3102790117263794, "learning_rate": 1.8590510666845586e-07, "loss": 0.2698, "step": 45326 }, { "epoch": 4.608275721838146, "grad_norm": 0.2671719193458557, "learning_rate": 1.8580924727198523e-07, "loss": 0.2883, "step": 45327 }, { "epoch": 4.6083773891825945, "grad_norm": 0.25780466198921204, "learning_rate": 1.857134121282622e-07, "loss": 0.2742, "step": 45328 }, { "epoch": 4.6084790565270435, "grad_norm": 0.2685716152191162, "learning_rate": 1.8561760123777083e-07, "loss": 0.281, "step": 45329 }, { "epoch": 4.608580723871492, "grad_norm": 0.2686273753643036, "learning_rate": 1.8552181460099515e-07, "loss": 0.2998, "step": 45330 }, { "epoch": 4.608682391215941, "grad_norm": 0.29059603810310364, "learning_rate": 1.8542605221841537e-07, "loss": 0.288, "step": 45331 }, { "epoch": 4.60878405856039, "grad_norm": 0.3014816641807556, "learning_rate": 1.8533031409051604e-07, "loss": 0.3067, "step": 45332 }, { "epoch": 4.608885725904839, "grad_norm": 0.3031691610813141, "learning_rate": 1.8523460021777795e-07, "loss": 0.2806, "step": 45333 }, { "epoch": 4.608987393249288, "grad_norm": 0.2664737403392792, "learning_rate": 1.8513891060068347e-07, "loss": 0.2668, "step": 45334 }, { "epoch": 4.609089060593737, "grad_norm": 0.26889166235923767, "learning_rate": 1.8504324523971494e-07, "loss": 0.2747, "step": 45335 }, { "epoch": 4.609190727938186, "grad_norm": 0.2598941922187805, "learning_rate": 1.849476041353543e-07, "loss": 0.2866, "step": 45336 }, { "epoch": 4.609292395282635, "grad_norm": 0.2912229895591736, "learning_rate": 1.848519872880844e-07, "loss": 0.2879, "step": 45337 }, { "epoch": 4.609394062627084, "grad_norm": 0.281133770942688, "learning_rate": 1.847563946983849e-07, "loss": 0.3445, "step": 45338 }, { "epoch": 4.609495729971533, "grad_norm": 0.3126038610935211, "learning_rate": 1.8466082636673876e-07, "loss": 0.3053, "step": 45339 }, { "epoch": 4.609597397315982, "grad_norm": 0.2968163788318634, "learning_rate": 1.8456528229362726e-07, "loss": 0.3171, "step": 45340 }, { "epoch": 4.609699064660431, "grad_norm": 0.27531906962394714, "learning_rate": 1.8446976247953107e-07, "loss": 0.2705, "step": 45341 }, { "epoch": 4.60980073200488, "grad_norm": 0.26337072253227234, "learning_rate": 1.8437426692493265e-07, "loss": 0.2818, "step": 45342 }, { "epoch": 4.609902399349329, "grad_norm": 0.26644831895828247, "learning_rate": 1.842787956303116e-07, "loss": 0.3295, "step": 45343 }, { "epoch": 4.610004066693778, "grad_norm": 0.3071496784687042, "learning_rate": 1.8418334859615028e-07, "loss": 0.2769, "step": 45344 }, { "epoch": 4.6101057340382265, "grad_norm": 0.29826804995536804, "learning_rate": 1.8408792582292779e-07, "loss": 0.2821, "step": 45345 }, { "epoch": 4.6102074013826755, "grad_norm": 0.25452688336372375, "learning_rate": 1.8399252731112648e-07, "loss": 0.2802, "step": 45346 }, { "epoch": 4.610309068727124, "grad_norm": 0.28150662779808044, "learning_rate": 1.8389715306122658e-07, "loss": 0.2701, "step": 45347 }, { "epoch": 4.610410736071573, "grad_norm": 0.2647643983364105, "learning_rate": 1.8380180307370766e-07, "loss": 0.3236, "step": 45348 }, { "epoch": 4.610512403416022, "grad_norm": 0.2837161719799042, "learning_rate": 1.8370647734905156e-07, "loss": 0.3221, "step": 45349 }, { "epoch": 4.610614070760471, "grad_norm": 0.2830277681350708, "learning_rate": 1.8361117588773737e-07, "loss": 0.3228, "step": 45350 }, { "epoch": 4.610715738104921, "grad_norm": 0.27887994050979614, "learning_rate": 1.8351589869024523e-07, "loss": 0.281, "step": 45351 }, { "epoch": 4.61081740544937, "grad_norm": 0.26815301179885864, "learning_rate": 1.8342064575705586e-07, "loss": 0.2841, "step": 45352 }, { "epoch": 4.610919072793819, "grad_norm": 0.2731037139892578, "learning_rate": 1.8332541708864836e-07, "loss": 0.2886, "step": 45353 }, { "epoch": 4.611020740138268, "grad_norm": 0.2812941372394562, "learning_rate": 1.8323021268550345e-07, "loss": 0.3235, "step": 45354 }, { "epoch": 4.611122407482717, "grad_norm": 0.2770971655845642, "learning_rate": 1.8313503254809905e-07, "loss": 0.2808, "step": 45355 }, { "epoch": 4.611224074827166, "grad_norm": 0.2776496708393097, "learning_rate": 1.8303987667691647e-07, "loss": 0.2829, "step": 45356 }, { "epoch": 4.611325742171615, "grad_norm": 0.30488237738609314, "learning_rate": 1.829447450724342e-07, "loss": 0.3084, "step": 45357 }, { "epoch": 4.611427409516064, "grad_norm": 0.27520719170570374, "learning_rate": 1.8284963773513077e-07, "loss": 0.2936, "step": 45358 }, { "epoch": 4.611529076860513, "grad_norm": 0.27225854992866516, "learning_rate": 1.8275455466548686e-07, "loss": 0.2919, "step": 45359 }, { "epoch": 4.6116307442049616, "grad_norm": 0.2807474434375763, "learning_rate": 1.82659495863981e-07, "loss": 0.3353, "step": 45360 }, { "epoch": 4.6117324115494105, "grad_norm": 0.29942160844802856, "learning_rate": 1.8256446133109117e-07, "loss": 0.2778, "step": 45361 }, { "epoch": 4.611834078893859, "grad_norm": 0.28246521949768066, "learning_rate": 1.8246945106729752e-07, "loss": 0.2713, "step": 45362 }, { "epoch": 4.611935746238308, "grad_norm": 0.2884421646595001, "learning_rate": 1.8237446507307743e-07, "loss": 0.2662, "step": 45363 }, { "epoch": 4.612037413582757, "grad_norm": 0.27848178148269653, "learning_rate": 1.8227950334891054e-07, "loss": 0.2884, "step": 45364 }, { "epoch": 4.612139080927206, "grad_norm": 0.27819955348968506, "learning_rate": 1.8218456589527367e-07, "loss": 0.2962, "step": 45365 }, { "epoch": 4.612240748271655, "grad_norm": 0.27735382318496704, "learning_rate": 1.8208965271264645e-07, "loss": 0.3077, "step": 45366 }, { "epoch": 4.612342415616104, "grad_norm": 0.26257559657096863, "learning_rate": 1.8199476380150683e-07, "loss": 0.2732, "step": 45367 }, { "epoch": 4.612444082960553, "grad_norm": 0.29897260665893555, "learning_rate": 1.8189989916233218e-07, "loss": 0.3226, "step": 45368 }, { "epoch": 4.612545750305002, "grad_norm": 0.3007897734642029, "learning_rate": 1.8180505879560161e-07, "loss": 0.2965, "step": 45369 }, { "epoch": 4.612647417649451, "grad_norm": 0.2868667542934418, "learning_rate": 1.8171024270179195e-07, "loss": 0.2795, "step": 45370 }, { "epoch": 4.6127490849939, "grad_norm": 0.27570074796676636, "learning_rate": 1.816154508813811e-07, "loss": 0.2924, "step": 45371 }, { "epoch": 4.612850752338349, "grad_norm": 0.28312820196151733, "learning_rate": 1.815206833348465e-07, "loss": 0.3413, "step": 45372 }, { "epoch": 4.612952419682798, "grad_norm": 0.2720000445842743, "learning_rate": 1.8142594006266556e-07, "loss": 0.2799, "step": 45373 }, { "epoch": 4.613054087027247, "grad_norm": 0.26118165254592896, "learning_rate": 1.813312210653162e-07, "loss": 0.2826, "step": 45374 }, { "epoch": 4.613155754371696, "grad_norm": 0.2729019522666931, "learning_rate": 1.8123652634327416e-07, "loss": 0.2871, "step": 45375 }, { "epoch": 4.613257421716145, "grad_norm": 0.26512935757637024, "learning_rate": 1.8114185589701795e-07, "loss": 0.3053, "step": 45376 }, { "epoch": 4.6133590890605936, "grad_norm": 0.29379117488861084, "learning_rate": 1.8104720972702382e-07, "loss": 0.2881, "step": 45377 }, { "epoch": 4.6134607564050425, "grad_norm": 0.2842940390110016, "learning_rate": 1.8095258783376812e-07, "loss": 0.2844, "step": 45378 }, { "epoch": 4.613562423749491, "grad_norm": 0.29079023003578186, "learning_rate": 1.8085799021772932e-07, "loss": 0.2885, "step": 45379 }, { "epoch": 4.61366409109394, "grad_norm": 0.2575214207172394, "learning_rate": 1.8076341687938202e-07, "loss": 0.293, "step": 45380 }, { "epoch": 4.613765758438389, "grad_norm": 0.26209914684295654, "learning_rate": 1.8066886781920313e-07, "loss": 0.2587, "step": 45381 }, { "epoch": 4.613867425782838, "grad_norm": 0.26976069808006287, "learning_rate": 1.8057434303766995e-07, "loss": 0.2626, "step": 45382 }, { "epoch": 4.613969093127287, "grad_norm": 0.2675081491470337, "learning_rate": 1.8047984253525774e-07, "loss": 0.297, "step": 45383 }, { "epoch": 4.614070760471736, "grad_norm": 0.29826292395591736, "learning_rate": 1.8038536631244275e-07, "loss": 0.3136, "step": 45384 }, { "epoch": 4.614172427816185, "grad_norm": 0.2745639979839325, "learning_rate": 1.8029091436970014e-07, "loss": 0.25, "step": 45385 }, { "epoch": 4.614274095160634, "grad_norm": 0.2845449447631836, "learning_rate": 1.8019648670750788e-07, "loss": 0.2925, "step": 45386 }, { "epoch": 4.614375762505084, "grad_norm": 0.2707740068435669, "learning_rate": 1.801020833263395e-07, "loss": 0.2635, "step": 45387 }, { "epoch": 4.614477429849533, "grad_norm": 0.2870871126651764, "learning_rate": 1.8000770422667125e-07, "loss": 0.3078, "step": 45388 }, { "epoch": 4.614579097193982, "grad_norm": 0.28409987688064575, "learning_rate": 1.7991334940897943e-07, "loss": 0.2823, "step": 45389 }, { "epoch": 4.614680764538431, "grad_norm": 0.28822198510169983, "learning_rate": 1.798190188737392e-07, "loss": 0.2793, "step": 45390 }, { "epoch": 4.61478243188288, "grad_norm": 0.27955541014671326, "learning_rate": 1.797247126214241e-07, "loss": 0.3361, "step": 45391 }, { "epoch": 4.614884099227329, "grad_norm": 0.28794005513191223, "learning_rate": 1.7963043065251096e-07, "loss": 0.2901, "step": 45392 }, { "epoch": 4.6149857665717775, "grad_norm": 0.29130640625953674, "learning_rate": 1.7953617296747438e-07, "loss": 0.3194, "step": 45393 }, { "epoch": 4.6150874339162264, "grad_norm": 0.27222102880477905, "learning_rate": 1.7944193956678902e-07, "loss": 0.3115, "step": 45394 }, { "epoch": 4.615189101260675, "grad_norm": 0.25224825739860535, "learning_rate": 1.793477304509289e-07, "loss": 0.3266, "step": 45395 }, { "epoch": 4.615290768605124, "grad_norm": 0.269398033618927, "learning_rate": 1.7925354562037033e-07, "loss": 0.3018, "step": 45396 }, { "epoch": 4.615392435949573, "grad_norm": 0.25763922929763794, "learning_rate": 1.7915938507558683e-07, "loss": 0.269, "step": 45397 }, { "epoch": 4.615494103294022, "grad_norm": 0.26759955286979675, "learning_rate": 1.790652488170519e-07, "loss": 0.3189, "step": 45398 }, { "epoch": 4.615595770638471, "grad_norm": 0.2783369719982147, "learning_rate": 1.7897113684524125e-07, "loss": 0.3198, "step": 45399 }, { "epoch": 4.61569743798292, "grad_norm": 0.2591189444065094, "learning_rate": 1.7887704916062787e-07, "loss": 0.2951, "step": 45400 }, { "epoch": 4.615799105327369, "grad_norm": 0.29920294880867004, "learning_rate": 1.7878298576368635e-07, "loss": 0.2846, "step": 45401 }, { "epoch": 4.615900772671818, "grad_norm": 0.29020535945892334, "learning_rate": 1.7868894665489078e-07, "loss": 0.2641, "step": 45402 }, { "epoch": 4.616002440016267, "grad_norm": 0.2694553732872009, "learning_rate": 1.7859493183471466e-07, "loss": 0.3039, "step": 45403 }, { "epoch": 4.616104107360716, "grad_norm": 0.2710845172405243, "learning_rate": 1.785009413036315e-07, "loss": 0.3298, "step": 45404 }, { "epoch": 4.616205774705165, "grad_norm": 0.3064870536327362, "learning_rate": 1.7840697506211423e-07, "loss": 0.2914, "step": 45405 }, { "epoch": 4.616307442049614, "grad_norm": 0.2959226965904236, "learning_rate": 1.7831303311063752e-07, "loss": 0.2821, "step": 45406 }, { "epoch": 4.616409109394063, "grad_norm": 0.289358526468277, "learning_rate": 1.7821911544967373e-07, "loss": 0.2957, "step": 45407 }, { "epoch": 4.616510776738512, "grad_norm": 0.27234482765197754, "learning_rate": 1.7812522207969585e-07, "loss": 0.27, "step": 45408 }, { "epoch": 4.616612444082961, "grad_norm": 0.2846349775791168, "learning_rate": 1.7803135300117735e-07, "loss": 0.2833, "step": 45409 }, { "epoch": 4.6167141114274095, "grad_norm": 0.2828558683395386, "learning_rate": 1.7793750821459122e-07, "loss": 0.2897, "step": 45410 }, { "epoch": 4.6168157787718584, "grad_norm": 0.2871151268482208, "learning_rate": 1.7784368772040982e-07, "loss": 0.2752, "step": 45411 }, { "epoch": 4.616917446116307, "grad_norm": 0.28214651346206665, "learning_rate": 1.7774989151910614e-07, "loss": 0.2867, "step": 45412 }, { "epoch": 4.617019113460756, "grad_norm": 0.296311616897583, "learning_rate": 1.7765611961115314e-07, "loss": 0.3263, "step": 45413 }, { "epoch": 4.617120780805205, "grad_norm": 0.2866506278514862, "learning_rate": 1.7756237199702208e-07, "loss": 0.2945, "step": 45414 }, { "epoch": 4.617222448149654, "grad_norm": 0.3081873953342438, "learning_rate": 1.7746864867718538e-07, "loss": 0.2972, "step": 45415 }, { "epoch": 4.617324115494103, "grad_norm": 0.29096853733062744, "learning_rate": 1.7737494965211654e-07, "loss": 0.3046, "step": 45416 }, { "epoch": 4.617425782838552, "grad_norm": 0.302828311920166, "learning_rate": 1.7728127492228632e-07, "loss": 0.3037, "step": 45417 }, { "epoch": 4.617527450183001, "grad_norm": 0.28273695707321167, "learning_rate": 1.771876244881665e-07, "loss": 0.3171, "step": 45418 }, { "epoch": 4.61762911752745, "grad_norm": 0.2857554852962494, "learning_rate": 1.7709399835022955e-07, "loss": 0.2809, "step": 45419 }, { "epoch": 4.617730784871899, "grad_norm": 0.2936869263648987, "learning_rate": 1.7700039650894728e-07, "loss": 0.2728, "step": 45420 }, { "epoch": 4.617832452216348, "grad_norm": 0.276419073343277, "learning_rate": 1.7690681896479044e-07, "loss": 0.2668, "step": 45421 }, { "epoch": 4.617934119560797, "grad_norm": 0.2838054299354553, "learning_rate": 1.7681326571823144e-07, "loss": 0.2956, "step": 45422 }, { "epoch": 4.618035786905246, "grad_norm": 0.30074918270111084, "learning_rate": 1.7671973676974096e-07, "loss": 0.305, "step": 45423 }, { "epoch": 4.618137454249695, "grad_norm": 0.27424347400665283, "learning_rate": 1.7662623211979034e-07, "loss": 0.3251, "step": 45424 }, { "epoch": 4.618239121594144, "grad_norm": 0.26340070366859436, "learning_rate": 1.7653275176884977e-07, "loss": 0.334, "step": 45425 }, { "epoch": 4.618340788938593, "grad_norm": 0.27014821767807007, "learning_rate": 1.764392957173916e-07, "loss": 0.3169, "step": 45426 }, { "epoch": 4.6184424562830415, "grad_norm": 0.26557326316833496, "learning_rate": 1.7634586396588603e-07, "loss": 0.3162, "step": 45427 }, { "epoch": 4.6185441236274904, "grad_norm": 0.2929675579071045, "learning_rate": 1.7625245651480327e-07, "loss": 0.276, "step": 45428 }, { "epoch": 4.618645790971939, "grad_norm": 0.2872638702392578, "learning_rate": 1.7615907336461458e-07, "loss": 0.3024, "step": 45429 }, { "epoch": 4.618747458316388, "grad_norm": 0.298464298248291, "learning_rate": 1.7606571451579069e-07, "loss": 0.3098, "step": 45430 }, { "epoch": 4.618849125660837, "grad_norm": 0.2723498046398163, "learning_rate": 1.7597237996880013e-07, "loss": 0.2799, "step": 45431 }, { "epoch": 4.618950793005286, "grad_norm": 0.2739085555076599, "learning_rate": 1.7587906972411584e-07, "loss": 0.2902, "step": 45432 }, { "epoch": 4.619052460349736, "grad_norm": 0.271619588136673, "learning_rate": 1.757857837822058e-07, "loss": 0.3109, "step": 45433 }, { "epoch": 4.619154127694185, "grad_norm": 0.2684103846549988, "learning_rate": 1.7569252214354071e-07, "loss": 0.2991, "step": 45434 }, { "epoch": 4.619255795038634, "grad_norm": 0.27690768241882324, "learning_rate": 1.7559928480858968e-07, "loss": 0.2948, "step": 45435 }, { "epoch": 4.619357462383083, "grad_norm": 0.2917063236236572, "learning_rate": 1.7550607177782342e-07, "loss": 0.335, "step": 45436 }, { "epoch": 4.619459129727532, "grad_norm": 0.2768004536628723, "learning_rate": 1.75412883051711e-07, "loss": 0.2778, "step": 45437 }, { "epoch": 4.619560797071981, "grad_norm": 0.2753048837184906, "learning_rate": 1.753197186307215e-07, "loss": 0.2997, "step": 45438 }, { "epoch": 4.61966246441643, "grad_norm": 0.27896052598953247, "learning_rate": 1.752265785153262e-07, "loss": 0.2977, "step": 45439 }, { "epoch": 4.619764131760879, "grad_norm": 0.26406753063201904, "learning_rate": 1.7513346270599142e-07, "loss": 0.2993, "step": 45440 }, { "epoch": 4.619865799105328, "grad_norm": 0.26642927527427673, "learning_rate": 1.7504037120318783e-07, "loss": 0.2824, "step": 45441 }, { "epoch": 4.6199674664497765, "grad_norm": 0.2892298698425293, "learning_rate": 1.7494730400738457e-07, "loss": 0.2833, "step": 45442 }, { "epoch": 4.6200691337942255, "grad_norm": 0.26276299357414246, "learning_rate": 1.7485426111905067e-07, "loss": 0.3349, "step": 45443 }, { "epoch": 4.620170801138674, "grad_norm": 0.28716856241226196, "learning_rate": 1.7476124253865413e-07, "loss": 0.3088, "step": 45444 }, { "epoch": 4.620272468483123, "grad_norm": 0.2832084000110626, "learning_rate": 1.7466824826666285e-07, "loss": 0.3001, "step": 45445 }, { "epoch": 4.620374135827572, "grad_norm": 0.27537447214126587, "learning_rate": 1.7457527830354759e-07, "loss": 0.2934, "step": 45446 }, { "epoch": 4.620475803172021, "grad_norm": 0.2830525040626526, "learning_rate": 1.7448233264977464e-07, "loss": 0.2959, "step": 45447 }, { "epoch": 4.62057747051647, "grad_norm": 0.28164365887641907, "learning_rate": 1.7438941130581256e-07, "loss": 0.3235, "step": 45448 }, { "epoch": 4.620679137860919, "grad_norm": 0.2597541809082031, "learning_rate": 1.7429651427213146e-07, "loss": 0.2922, "step": 45449 }, { "epoch": 4.620780805205368, "grad_norm": 0.290619432926178, "learning_rate": 1.7420364154919656e-07, "loss": 0.2892, "step": 45450 }, { "epoch": 4.620882472549817, "grad_norm": 0.2600882053375244, "learning_rate": 1.7411079313747637e-07, "loss": 0.3139, "step": 45451 }, { "epoch": 4.620984139894266, "grad_norm": 0.26715248823165894, "learning_rate": 1.7401796903744106e-07, "loss": 0.2825, "step": 45452 }, { "epoch": 4.621085807238715, "grad_norm": 0.2730278968811035, "learning_rate": 1.7392516924955472e-07, "loss": 0.307, "step": 45453 }, { "epoch": 4.621187474583164, "grad_norm": 0.28004026412963867, "learning_rate": 1.7383239377428697e-07, "loss": 0.2955, "step": 45454 }, { "epoch": 4.621289141927613, "grad_norm": 0.2846045196056366, "learning_rate": 1.7373964261210462e-07, "loss": 0.2659, "step": 45455 }, { "epoch": 4.621390809272062, "grad_norm": 0.27106165885925293, "learning_rate": 1.736469157634757e-07, "loss": 0.3268, "step": 45456 }, { "epoch": 4.621492476616511, "grad_norm": 0.2899301052093506, "learning_rate": 1.7355421322886645e-07, "loss": 0.3171, "step": 45457 }, { "epoch": 4.62159414396096, "grad_norm": 0.285126268863678, "learning_rate": 1.7346153500874318e-07, "loss": 0.302, "step": 45458 }, { "epoch": 4.6216958113054085, "grad_norm": 0.2724846601486206, "learning_rate": 1.7336888110357552e-07, "loss": 0.3053, "step": 45459 }, { "epoch": 4.6217974786498575, "grad_norm": 0.2950771152973175, "learning_rate": 1.73276251513827e-07, "loss": 0.2959, "step": 45460 }, { "epoch": 4.621899145994306, "grad_norm": 0.2864823043346405, "learning_rate": 1.7318364623996609e-07, "loss": 0.2884, "step": 45461 }, { "epoch": 4.622000813338755, "grad_norm": 0.28264161944389343, "learning_rate": 1.7309106528245968e-07, "loss": 0.2922, "step": 45462 }, { "epoch": 4.622102480683204, "grad_norm": 0.2813866138458252, "learning_rate": 1.7299850864177237e-07, "loss": 0.2985, "step": 45463 }, { "epoch": 4.622204148027653, "grad_norm": 0.2895699441432953, "learning_rate": 1.7290597631837215e-07, "loss": 0.2936, "step": 45464 }, { "epoch": 4.622305815372102, "grad_norm": 0.29485276341438293, "learning_rate": 1.7281346831272362e-07, "loss": 0.2674, "step": 45465 }, { "epoch": 4.622407482716551, "grad_norm": 0.27753132581710815, "learning_rate": 1.7272098462529475e-07, "loss": 0.3126, "step": 45466 }, { "epoch": 4.622509150061, "grad_norm": 0.28009113669395447, "learning_rate": 1.7262852525655016e-07, "loss": 0.3012, "step": 45467 }, { "epoch": 4.622610817405449, "grad_norm": 0.2765336036682129, "learning_rate": 1.725360902069556e-07, "loss": 0.2911, "step": 45468 }, { "epoch": 4.622712484749899, "grad_norm": 0.28861376643180847, "learning_rate": 1.7244367947697794e-07, "loss": 0.2772, "step": 45469 }, { "epoch": 4.622814152094348, "grad_norm": 0.2727947533130646, "learning_rate": 1.7235129306708065e-07, "loss": 0.3066, "step": 45470 }, { "epoch": 4.622915819438797, "grad_norm": 0.2835652828216553, "learning_rate": 1.722589309777306e-07, "loss": 0.3274, "step": 45471 }, { "epoch": 4.623017486783246, "grad_norm": 0.285898357629776, "learning_rate": 1.7216659320939355e-07, "loss": 0.3089, "step": 45472 }, { "epoch": 4.623119154127695, "grad_norm": 0.28753167390823364, "learning_rate": 1.7207427976253298e-07, "loss": 0.3271, "step": 45473 }, { "epoch": 4.6232208214721435, "grad_norm": 0.2795025706291199, "learning_rate": 1.7198199063761522e-07, "loss": 0.2796, "step": 45474 }, { "epoch": 4.6233224888165925, "grad_norm": 0.28425055742263794, "learning_rate": 1.7188972583510545e-07, "loss": 0.305, "step": 45475 }, { "epoch": 4.623424156161041, "grad_norm": 0.3105050325393677, "learning_rate": 1.7179748535546714e-07, "loss": 0.2776, "step": 45476 }, { "epoch": 4.62352582350549, "grad_norm": 0.30456623435020447, "learning_rate": 1.7170526919916607e-07, "loss": 0.2779, "step": 45477 }, { "epoch": 4.623627490849939, "grad_norm": 0.2885938584804535, "learning_rate": 1.7161307736666577e-07, "loss": 0.2922, "step": 45478 }, { "epoch": 4.623729158194388, "grad_norm": 0.3015989661216736, "learning_rate": 1.7152090985843306e-07, "loss": 0.3099, "step": 45479 }, { "epoch": 4.623830825538837, "grad_norm": 0.2975985109806061, "learning_rate": 1.7142876667492924e-07, "loss": 0.2931, "step": 45480 }, { "epoch": 4.623932492883286, "grad_norm": 0.2898489534854889, "learning_rate": 1.7133664781661953e-07, "loss": 0.2813, "step": 45481 }, { "epoch": 4.624034160227735, "grad_norm": 0.26681220531463623, "learning_rate": 1.712445532839696e-07, "loss": 0.296, "step": 45482 }, { "epoch": 4.624135827572184, "grad_norm": 0.29058903455734253, "learning_rate": 1.7115248307744136e-07, "loss": 0.3249, "step": 45483 }, { "epoch": 4.624237494916633, "grad_norm": 0.2918570637702942, "learning_rate": 1.7106043719749944e-07, "loss": 0.2643, "step": 45484 }, { "epoch": 4.624339162261082, "grad_norm": 0.27058857679367065, "learning_rate": 1.7096841564460787e-07, "loss": 0.286, "step": 45485 }, { "epoch": 4.624440829605531, "grad_norm": 0.2684609889984131, "learning_rate": 1.708764184192291e-07, "loss": 0.2977, "step": 45486 }, { "epoch": 4.62454249694998, "grad_norm": 0.2611458897590637, "learning_rate": 1.7078444552182828e-07, "loss": 0.2928, "step": 45487 }, { "epoch": 4.624644164294429, "grad_norm": 0.3045862317085266, "learning_rate": 1.7069249695286673e-07, "loss": 0.2856, "step": 45488 }, { "epoch": 4.624745831638878, "grad_norm": 0.2818112373352051, "learning_rate": 1.7060057271281016e-07, "loss": 0.2867, "step": 45489 }, { "epoch": 4.624847498983327, "grad_norm": 0.2614188492298126, "learning_rate": 1.7050867280211935e-07, "loss": 0.3392, "step": 45490 }, { "epoch": 4.6249491663277755, "grad_norm": 0.27632734179496765, "learning_rate": 1.704167972212578e-07, "loss": 0.2806, "step": 45491 }, { "epoch": 4.6250508336722245, "grad_norm": 0.2763387858867645, "learning_rate": 1.7032494597069015e-07, "loss": 0.3264, "step": 45492 }, { "epoch": 4.625152501016673, "grad_norm": 0.28483763337135315, "learning_rate": 1.7023311905087658e-07, "loss": 0.2912, "step": 45493 }, { "epoch": 4.625254168361122, "grad_norm": 0.27511635422706604, "learning_rate": 1.7014131646228117e-07, "loss": 0.2942, "step": 45494 }, { "epoch": 4.625355835705571, "grad_norm": 0.27060821652412415, "learning_rate": 1.700495382053663e-07, "loss": 0.3069, "step": 45495 }, { "epoch": 4.62545750305002, "grad_norm": 0.26898908615112305, "learning_rate": 1.699577842805933e-07, "loss": 0.3346, "step": 45496 }, { "epoch": 4.625559170394469, "grad_norm": 0.2816559374332428, "learning_rate": 1.6986605468842566e-07, "loss": 0.2934, "step": 45497 }, { "epoch": 4.625660837738918, "grad_norm": 0.27848905324935913, "learning_rate": 1.6977434942932526e-07, "loss": 0.313, "step": 45498 }, { "epoch": 4.625762505083367, "grad_norm": 0.2563268542289734, "learning_rate": 1.6968266850375393e-07, "loss": 0.2937, "step": 45499 }, { "epoch": 4.625864172427816, "grad_norm": 0.27025240659713745, "learning_rate": 1.6959101191217297e-07, "loss": 0.3093, "step": 45500 }, { "epoch": 4.625965839772265, "grad_norm": 0.2984201908111572, "learning_rate": 1.6949937965504425e-07, "loss": 0.29, "step": 45501 }, { "epoch": 4.626067507116714, "grad_norm": 0.2880016267299652, "learning_rate": 1.694077717328313e-07, "loss": 0.2879, "step": 45502 }, { "epoch": 4.626169174461163, "grad_norm": 0.27518129348754883, "learning_rate": 1.6931618814599259e-07, "loss": 0.2897, "step": 45503 }, { "epoch": 4.626270841805612, "grad_norm": 0.2873857915401459, "learning_rate": 1.692246288949917e-07, "loss": 0.3062, "step": 45504 }, { "epoch": 4.626372509150061, "grad_norm": 0.2702200710773468, "learning_rate": 1.6913309398028876e-07, "loss": 0.2858, "step": 45505 }, { "epoch": 4.62647417649451, "grad_norm": 0.2652680277824402, "learning_rate": 1.690415834023451e-07, "loss": 0.2756, "step": 45506 }, { "epoch": 4.626575843838959, "grad_norm": 0.2820195257663727, "learning_rate": 1.689500971616226e-07, "loss": 0.2713, "step": 45507 }, { "epoch": 4.6266775111834075, "grad_norm": 0.275988906621933, "learning_rate": 1.6885863525858136e-07, "loss": 0.2877, "step": 45508 }, { "epoch": 4.6267791785278565, "grad_norm": 0.2805946171283722, "learning_rate": 1.6876719769368223e-07, "loss": 0.3258, "step": 45509 }, { "epoch": 4.626880845872305, "grad_norm": 0.27180975675582886, "learning_rate": 1.686757844673853e-07, "loss": 0.2975, "step": 45510 }, { "epoch": 4.626982513216754, "grad_norm": 0.26350316405296326, "learning_rate": 1.6858439558015195e-07, "loss": 0.2714, "step": 45511 }, { "epoch": 4.627084180561203, "grad_norm": 0.28187960386276245, "learning_rate": 1.6849303103244285e-07, "loss": 0.27, "step": 45512 }, { "epoch": 4.627185847905652, "grad_norm": 0.2743123769760132, "learning_rate": 1.6840169082471658e-07, "loss": 0.3084, "step": 45513 }, { "epoch": 4.627287515250101, "grad_norm": 0.3041372001171112, "learning_rate": 1.6831037495743552e-07, "loss": 0.2976, "step": 45514 }, { "epoch": 4.627389182594551, "grad_norm": 0.27431967854499817, "learning_rate": 1.682190834310582e-07, "loss": 0.2984, "step": 45515 }, { "epoch": 4.627490849939, "grad_norm": 0.29866623878479004, "learning_rate": 1.6812781624604478e-07, "loss": 0.3053, "step": 45516 }, { "epoch": 4.627592517283449, "grad_norm": 0.2863025963306427, "learning_rate": 1.680365734028555e-07, "loss": 0.3081, "step": 45517 }, { "epoch": 4.627694184627898, "grad_norm": 0.2655933201313019, "learning_rate": 1.6794535490194996e-07, "loss": 0.2799, "step": 45518 }, { "epoch": 4.627795851972347, "grad_norm": 0.2699821889400482, "learning_rate": 1.6785416074378724e-07, "loss": 0.2946, "step": 45519 }, { "epoch": 4.627897519316796, "grad_norm": 0.2803739607334137, "learning_rate": 1.6776299092882643e-07, "loss": 0.2938, "step": 45520 }, { "epoch": 4.627999186661245, "grad_norm": 0.2566160559654236, "learning_rate": 1.676718454575277e-07, "loss": 0.2854, "step": 45521 }, { "epoch": 4.628100854005694, "grad_norm": 0.30916962027549744, "learning_rate": 1.675807243303501e-07, "loss": 0.2624, "step": 45522 }, { "epoch": 4.6282025213501425, "grad_norm": 0.30279698967933655, "learning_rate": 1.6748962754775167e-07, "loss": 0.2718, "step": 45523 }, { "epoch": 4.6283041886945915, "grad_norm": 0.27936241030693054, "learning_rate": 1.6739855511019309e-07, "loss": 0.2798, "step": 45524 }, { "epoch": 4.62840585603904, "grad_norm": 0.27437251806259155, "learning_rate": 1.6730750701813182e-07, "loss": 0.2963, "step": 45525 }, { "epoch": 4.628507523383489, "grad_norm": 0.30116522312164307, "learning_rate": 1.672164832720269e-07, "loss": 0.3116, "step": 45526 }, { "epoch": 4.628609190727938, "grad_norm": 0.2799440324306488, "learning_rate": 1.6712548387233685e-07, "loss": 0.2906, "step": 45527 }, { "epoch": 4.628710858072387, "grad_norm": 0.2750040292739868, "learning_rate": 1.670345088195202e-07, "loss": 0.3075, "step": 45528 }, { "epoch": 4.628812525416836, "grad_norm": 0.2832573354244232, "learning_rate": 1.6694355811403552e-07, "loss": 0.2797, "step": 45529 }, { "epoch": 4.628914192761285, "grad_norm": 0.291530042886734, "learning_rate": 1.668526317563396e-07, "loss": 0.2954, "step": 45530 }, { "epoch": 4.629015860105734, "grad_norm": 0.2917070686817169, "learning_rate": 1.6676172974689265e-07, "loss": 0.2725, "step": 45531 }, { "epoch": 4.629117527450183, "grad_norm": 0.280957967042923, "learning_rate": 1.6667085208615153e-07, "loss": 0.3027, "step": 45532 }, { "epoch": 4.629219194794632, "grad_norm": 0.2836853265762329, "learning_rate": 1.6657999877457308e-07, "loss": 0.2994, "step": 45533 }, { "epoch": 4.629320862139081, "grad_norm": 0.29233530163764954, "learning_rate": 1.6648916981261697e-07, "loss": 0.3052, "step": 45534 }, { "epoch": 4.62942252948353, "grad_norm": 0.28431621193885803, "learning_rate": 1.6639836520073948e-07, "loss": 0.291, "step": 45535 }, { "epoch": 4.629524196827979, "grad_norm": 0.25202518701553345, "learning_rate": 1.6630758493939915e-07, "loss": 0.2783, "step": 45536 }, { "epoch": 4.629625864172428, "grad_norm": 0.3070005178451538, "learning_rate": 1.662168290290511e-07, "loss": 0.2934, "step": 45537 }, { "epoch": 4.629727531516877, "grad_norm": 0.32261332869529724, "learning_rate": 1.6612609747015506e-07, "loss": 0.2788, "step": 45538 }, { "epoch": 4.629829198861326, "grad_norm": 0.286598801612854, "learning_rate": 1.6603539026316673e-07, "loss": 0.3211, "step": 45539 }, { "epoch": 4.6299308662057745, "grad_norm": 0.2778884172439575, "learning_rate": 1.6594470740854296e-07, "loss": 0.2845, "step": 45540 }, { "epoch": 4.6300325335502235, "grad_norm": 0.27658456563949585, "learning_rate": 1.6585404890674172e-07, "loss": 0.282, "step": 45541 }, { "epoch": 4.630134200894672, "grad_norm": 0.25159910321235657, "learning_rate": 1.6576341475821878e-07, "loss": 0.2814, "step": 45542 }, { "epoch": 4.630235868239121, "grad_norm": 0.2666507363319397, "learning_rate": 1.6567280496343042e-07, "loss": 0.3177, "step": 45543 }, { "epoch": 4.63033753558357, "grad_norm": 0.28009286522865295, "learning_rate": 1.6558221952283403e-07, "loss": 0.2801, "step": 45544 }, { "epoch": 4.630439202928019, "grad_norm": 0.2840318977832794, "learning_rate": 1.6549165843688598e-07, "loss": 0.3136, "step": 45545 }, { "epoch": 4.630540870272468, "grad_norm": 0.27806100249290466, "learning_rate": 1.6540112170604193e-07, "loss": 0.2903, "step": 45546 }, { "epoch": 4.630642537616917, "grad_norm": 0.27801546454429626, "learning_rate": 1.653106093307577e-07, "loss": 0.3003, "step": 45547 }, { "epoch": 4.630744204961366, "grad_norm": 0.29238319396972656, "learning_rate": 1.6522012131149013e-07, "loss": 0.3023, "step": 45548 }, { "epoch": 4.630845872305815, "grad_norm": 0.29022738337516785, "learning_rate": 1.6512965764869438e-07, "loss": 0.2902, "step": 45549 }, { "epoch": 4.630947539650264, "grad_norm": 0.2895592749118805, "learning_rate": 1.6503921834282676e-07, "loss": 0.295, "step": 45550 }, { "epoch": 4.631049206994714, "grad_norm": 0.28010228276252747, "learning_rate": 1.649488033943425e-07, "loss": 0.3206, "step": 45551 }, { "epoch": 4.631150874339163, "grad_norm": 0.2693347632884979, "learning_rate": 1.648584128036973e-07, "loss": 0.3023, "step": 45552 }, { "epoch": 4.631252541683612, "grad_norm": 0.29668089747428894, "learning_rate": 1.6476804657134582e-07, "loss": 0.2951, "step": 45553 }, { "epoch": 4.631354209028061, "grad_norm": 0.28638339042663574, "learning_rate": 1.6467770469774437e-07, "loss": 0.3286, "step": 45554 }, { "epoch": 4.6314558763725096, "grad_norm": 0.3517504036426544, "learning_rate": 1.645873871833481e-07, "loss": 0.2906, "step": 45555 }, { "epoch": 4.6315575437169585, "grad_norm": 0.2829315662384033, "learning_rate": 1.6449709402861115e-07, "loss": 0.286, "step": 45556 }, { "epoch": 4.631659211061407, "grad_norm": 0.26429131627082825, "learning_rate": 1.644068252339881e-07, "loss": 0.3024, "step": 45557 }, { "epoch": 4.631760878405856, "grad_norm": 0.2887374758720398, "learning_rate": 1.6431658079993473e-07, "loss": 0.3245, "step": 45558 }, { "epoch": 4.631862545750305, "grad_norm": 0.2656221389770508, "learning_rate": 1.642263607269057e-07, "loss": 0.2974, "step": 45559 }, { "epoch": 4.631964213094754, "grad_norm": 0.28815358877182007, "learning_rate": 1.6413616501535446e-07, "loss": 0.2816, "step": 45560 }, { "epoch": 4.632065880439203, "grad_norm": 0.2977364659309387, "learning_rate": 1.6404599366573625e-07, "loss": 0.3039, "step": 45561 }, { "epoch": 4.632167547783652, "grad_norm": 0.28096890449523926, "learning_rate": 1.6395584667850572e-07, "loss": 0.315, "step": 45562 }, { "epoch": 4.632269215128101, "grad_norm": 0.2585240304470062, "learning_rate": 1.6386572405411527e-07, "loss": 0.3147, "step": 45563 }, { "epoch": 4.63237088247255, "grad_norm": 0.2813195586204529, "learning_rate": 1.637756257930212e-07, "loss": 0.2989, "step": 45564 }, { "epoch": 4.632472549816999, "grad_norm": 0.2544702887535095, "learning_rate": 1.6368555189567592e-07, "loss": 0.2913, "step": 45565 }, { "epoch": 4.632574217161448, "grad_norm": 0.2652909755706787, "learning_rate": 1.6359550236253351e-07, "loss": 0.2783, "step": 45566 }, { "epoch": 4.632675884505897, "grad_norm": 0.2828070819377899, "learning_rate": 1.6350547719404698e-07, "loss": 0.3054, "step": 45567 }, { "epoch": 4.632777551850346, "grad_norm": 0.270735502243042, "learning_rate": 1.6341547639067146e-07, "loss": 0.325, "step": 45568 }, { "epoch": 4.632879219194795, "grad_norm": 0.2621263563632965, "learning_rate": 1.633254999528594e-07, "loss": 0.3114, "step": 45569 }, { "epoch": 4.632980886539244, "grad_norm": 0.2693409025669098, "learning_rate": 1.6323554788106323e-07, "loss": 0.3079, "step": 45570 }, { "epoch": 4.633082553883693, "grad_norm": 0.28613901138305664, "learning_rate": 1.6314562017573755e-07, "loss": 0.2744, "step": 45571 }, { "epoch": 4.6331842212281416, "grad_norm": 0.29186832904815674, "learning_rate": 1.6305571683733535e-07, "loss": 0.2901, "step": 45572 }, { "epoch": 4.6332858885725905, "grad_norm": 0.2757833003997803, "learning_rate": 1.6296583786630792e-07, "loss": 0.3174, "step": 45573 }, { "epoch": 4.633387555917039, "grad_norm": 0.2640048563480377, "learning_rate": 1.6287598326310993e-07, "loss": 0.2944, "step": 45574 }, { "epoch": 4.633489223261488, "grad_norm": 0.2934424579143524, "learning_rate": 1.6278615302819324e-07, "loss": 0.3074, "step": 45575 }, { "epoch": 4.633590890605937, "grad_norm": 0.27965402603149414, "learning_rate": 1.6269634716201022e-07, "loss": 0.2711, "step": 45576 }, { "epoch": 4.633692557950386, "grad_norm": 0.26992467045783997, "learning_rate": 1.6260656566501277e-07, "loss": 0.3175, "step": 45577 }, { "epoch": 4.633794225294835, "grad_norm": 0.3057369887828827, "learning_rate": 1.6251680853765495e-07, "loss": 0.2951, "step": 45578 }, { "epoch": 4.633895892639284, "grad_norm": 0.2841869294643402, "learning_rate": 1.6242707578038753e-07, "loss": 0.2887, "step": 45579 }, { "epoch": 4.633997559983733, "grad_norm": 0.2895476818084717, "learning_rate": 1.6233736739366236e-07, "loss": 0.296, "step": 45580 }, { "epoch": 4.634099227328182, "grad_norm": 0.2720983028411865, "learning_rate": 1.6224768337793295e-07, "loss": 0.2644, "step": 45581 }, { "epoch": 4.634200894672631, "grad_norm": 0.27046072483062744, "learning_rate": 1.6215802373364953e-07, "loss": 0.2983, "step": 45582 }, { "epoch": 4.63430256201708, "grad_norm": 0.27306458353996277, "learning_rate": 1.620683884612645e-07, "loss": 0.3003, "step": 45583 }, { "epoch": 4.634404229361529, "grad_norm": 0.28080272674560547, "learning_rate": 1.6197877756122914e-07, "loss": 0.2799, "step": 45584 }, { "epoch": 4.634505896705978, "grad_norm": 0.25658199191093445, "learning_rate": 1.6188919103399536e-07, "loss": 0.2654, "step": 45585 }, { "epoch": 4.634607564050427, "grad_norm": 0.26235252618789673, "learning_rate": 1.6179962888001388e-07, "loss": 0.294, "step": 45586 }, { "epoch": 4.634709231394876, "grad_norm": 0.2827245593070984, "learning_rate": 1.6171009109973545e-07, "loss": 0.2919, "step": 45587 }, { "epoch": 4.634810898739325, "grad_norm": 0.27774468064308167, "learning_rate": 1.616205776936125e-07, "loss": 0.2827, "step": 45588 }, { "epoch": 4.6349125660837736, "grad_norm": 0.2833065092563629, "learning_rate": 1.6153108866209576e-07, "loss": 0.3115, "step": 45589 }, { "epoch": 4.6350142334282225, "grad_norm": 0.26984477043151855, "learning_rate": 1.6144162400563434e-07, "loss": 0.2942, "step": 45590 }, { "epoch": 4.635115900772671, "grad_norm": 0.31564968824386597, "learning_rate": 1.6135218372468177e-07, "loss": 0.261, "step": 45591 }, { "epoch": 4.63521756811712, "grad_norm": 0.27257269620895386, "learning_rate": 1.6126276781968597e-07, "loss": 0.2945, "step": 45592 }, { "epoch": 4.635319235461569, "grad_norm": 0.29089081287384033, "learning_rate": 1.611733762910983e-07, "loss": 0.3143, "step": 45593 }, { "epoch": 4.635420902806018, "grad_norm": 0.28263595700263977, "learning_rate": 1.6108400913937005e-07, "loss": 0.3162, "step": 45594 }, { "epoch": 4.635522570150467, "grad_norm": 0.27790915966033936, "learning_rate": 1.609946663649503e-07, "loss": 0.3018, "step": 45595 }, { "epoch": 4.635624237494916, "grad_norm": 0.27989140152931213, "learning_rate": 1.6090534796828983e-07, "loss": 0.2862, "step": 45596 }, { "epoch": 4.635725904839366, "grad_norm": 0.28272995352745056, "learning_rate": 1.608160539498377e-07, "loss": 0.258, "step": 45597 }, { "epoch": 4.635827572183815, "grad_norm": 0.270065575838089, "learning_rate": 1.6072678431004462e-07, "loss": 0.2859, "step": 45598 }, { "epoch": 4.635929239528264, "grad_norm": 0.2706458270549774, "learning_rate": 1.606375390493603e-07, "loss": 0.3096, "step": 45599 }, { "epoch": 4.636030906872713, "grad_norm": 0.278637558221817, "learning_rate": 1.6054831816823324e-07, "loss": 0.2763, "step": 45600 }, { "epoch": 4.636132574217162, "grad_norm": 0.27049803733825684, "learning_rate": 1.6045912166711474e-07, "loss": 0.314, "step": 45601 }, { "epoch": 4.636234241561611, "grad_norm": 0.31852275133132935, "learning_rate": 1.6036994954645224e-07, "loss": 0.2785, "step": 45602 }, { "epoch": 4.63633590890606, "grad_norm": 0.27597591280937195, "learning_rate": 1.602808018066959e-07, "loss": 0.2882, "step": 45603 }, { "epoch": 4.636437576250509, "grad_norm": 0.26792216300964355, "learning_rate": 1.6019167844829598e-07, "loss": 0.2801, "step": 45604 }, { "epoch": 4.6365392435949575, "grad_norm": 0.2800045907497406, "learning_rate": 1.6010257947169872e-07, "loss": 0.2992, "step": 45605 }, { "epoch": 4.6366409109394064, "grad_norm": 0.2831188142299652, "learning_rate": 1.6001350487735545e-07, "loss": 0.3039, "step": 45606 }, { "epoch": 4.636742578283855, "grad_norm": 0.27917104959487915, "learning_rate": 1.599244546657136e-07, "loss": 0.2727, "step": 45607 }, { "epoch": 4.636844245628304, "grad_norm": 0.27566513419151306, "learning_rate": 1.5983542883722226e-07, "loss": 0.2746, "step": 45608 }, { "epoch": 4.636945912972753, "grad_norm": 0.2905350923538208, "learning_rate": 1.5974642739232993e-07, "loss": 0.288, "step": 45609 }, { "epoch": 4.637047580317202, "grad_norm": 0.26336631178855896, "learning_rate": 1.596574503314846e-07, "loss": 0.3065, "step": 45610 }, { "epoch": 4.637149247661651, "grad_norm": 0.2884712517261505, "learning_rate": 1.5956849765513593e-07, "loss": 0.2986, "step": 45611 }, { "epoch": 4.6372509150061, "grad_norm": 0.26861056685447693, "learning_rate": 1.5947956936372965e-07, "loss": 0.2854, "step": 45612 }, { "epoch": 4.637352582350549, "grad_norm": 0.2968722879886627, "learning_rate": 1.5939066545771488e-07, "loss": 0.2835, "step": 45613 }, { "epoch": 4.637454249694998, "grad_norm": 0.2988792061805725, "learning_rate": 1.5930178593754063e-07, "loss": 0.2863, "step": 45614 }, { "epoch": 4.637555917039447, "grad_norm": 0.27753013372421265, "learning_rate": 1.5921293080365273e-07, "loss": 0.3147, "step": 45615 }, { "epoch": 4.637657584383896, "grad_norm": 0.28085559606552124, "learning_rate": 1.591241000565008e-07, "loss": 0.3069, "step": 45616 }, { "epoch": 4.637759251728345, "grad_norm": 0.2612423002719879, "learning_rate": 1.5903529369653004e-07, "loss": 0.3229, "step": 45617 }, { "epoch": 4.637860919072794, "grad_norm": 0.27727609872817993, "learning_rate": 1.5894651172419006e-07, "loss": 0.338, "step": 45618 }, { "epoch": 4.637962586417243, "grad_norm": 0.3059120774269104, "learning_rate": 1.5885775413992665e-07, "loss": 0.2959, "step": 45619 }, { "epoch": 4.638064253761692, "grad_norm": 0.25779908895492554, "learning_rate": 1.5876902094418723e-07, "loss": 0.2974, "step": 45620 }, { "epoch": 4.638165921106141, "grad_norm": 0.2969697117805481, "learning_rate": 1.586803121374203e-07, "loss": 0.2808, "step": 45621 }, { "epoch": 4.6382675884505895, "grad_norm": 0.303493469953537, "learning_rate": 1.5859162772006998e-07, "loss": 0.3216, "step": 45622 }, { "epoch": 4.6383692557950384, "grad_norm": 0.28068217635154724, "learning_rate": 1.585029676925848e-07, "loss": 0.2947, "step": 45623 }, { "epoch": 4.638470923139487, "grad_norm": 0.31106820702552795, "learning_rate": 1.5841433205541212e-07, "loss": 0.293, "step": 45624 }, { "epoch": 4.638572590483936, "grad_norm": 0.28814631700515747, "learning_rate": 1.5832572080899611e-07, "loss": 0.3148, "step": 45625 }, { "epoch": 4.638674257828385, "grad_norm": 0.267801433801651, "learning_rate": 1.582371339537858e-07, "loss": 0.2749, "step": 45626 }, { "epoch": 4.638775925172834, "grad_norm": 0.24857094883918762, "learning_rate": 1.5814857149022588e-07, "loss": 0.2947, "step": 45627 }, { "epoch": 4.638877592517283, "grad_norm": 0.2950323820114136, "learning_rate": 1.580600334187621e-07, "loss": 0.2743, "step": 45628 }, { "epoch": 4.638979259861732, "grad_norm": 0.32435664534568787, "learning_rate": 1.579715197398418e-07, "loss": 0.2968, "step": 45629 }, { "epoch": 4.639080927206181, "grad_norm": 0.28920644521713257, "learning_rate": 1.5788303045391028e-07, "loss": 0.2674, "step": 45630 }, { "epoch": 4.63918259455063, "grad_norm": 0.28068557381629944, "learning_rate": 1.5779456556141437e-07, "loss": 0.3132, "step": 45631 }, { "epoch": 4.639284261895079, "grad_norm": 0.2750784456729889, "learning_rate": 1.577061250627976e-07, "loss": 0.285, "step": 45632 }, { "epoch": 4.639385929239529, "grad_norm": 0.29649287462234497, "learning_rate": 1.5761770895850624e-07, "loss": 0.291, "step": 45633 }, { "epoch": 4.639487596583978, "grad_norm": 0.2739805579185486, "learning_rate": 1.575293172489878e-07, "loss": 0.2822, "step": 45634 }, { "epoch": 4.639589263928427, "grad_norm": 0.2633104622364044, "learning_rate": 1.5744094993468462e-07, "loss": 0.3057, "step": 45635 }, { "epoch": 4.639690931272876, "grad_norm": 0.26682212948799133, "learning_rate": 1.573526070160436e-07, "loss": 0.3082, "step": 45636 }, { "epoch": 4.6397925986173245, "grad_norm": 0.2836671769618988, "learning_rate": 1.5726428849350938e-07, "loss": 0.3194, "step": 45637 }, { "epoch": 4.6398942659617735, "grad_norm": 0.2867209017276764, "learning_rate": 1.5717599436752607e-07, "loss": 0.2742, "step": 45638 }, { "epoch": 4.639995933306222, "grad_norm": 0.28278541564941406, "learning_rate": 1.5708772463853993e-07, "loss": 0.3088, "step": 45639 }, { "epoch": 4.640097600650671, "grad_norm": 0.3173348009586334, "learning_rate": 1.569994793069951e-07, "loss": 0.3007, "step": 45640 }, { "epoch": 4.64019926799512, "grad_norm": 0.2854958772659302, "learning_rate": 1.569112583733362e-07, "loss": 0.3068, "step": 45641 }, { "epoch": 4.640300935339569, "grad_norm": 0.2835858464241028, "learning_rate": 1.5682306183800676e-07, "loss": 0.2793, "step": 45642 }, { "epoch": 4.640402602684018, "grad_norm": 0.2679920792579651, "learning_rate": 1.5673488970145202e-07, "loss": 0.3028, "step": 45643 }, { "epoch": 4.640504270028467, "grad_norm": 0.24822638928890228, "learning_rate": 1.5664674196411712e-07, "loss": 0.327, "step": 45644 }, { "epoch": 4.640605937372916, "grad_norm": 0.2767084538936615, "learning_rate": 1.565586186264434e-07, "loss": 0.3435, "step": 45645 }, { "epoch": 4.640707604717365, "grad_norm": 0.2917112112045288, "learning_rate": 1.5647051968887715e-07, "loss": 0.2924, "step": 45646 }, { "epoch": 4.640809272061814, "grad_norm": 0.2606782019138336, "learning_rate": 1.5638244515186142e-07, "loss": 0.2809, "step": 45647 }, { "epoch": 4.640910939406263, "grad_norm": 0.26322364807128906, "learning_rate": 1.5629439501583966e-07, "loss": 0.3397, "step": 45648 }, { "epoch": 4.641012606750712, "grad_norm": 0.27631089091300964, "learning_rate": 1.56206369281256e-07, "loss": 0.2892, "step": 45649 }, { "epoch": 4.641114274095161, "grad_norm": 0.2725397050380707, "learning_rate": 1.561183679485534e-07, "loss": 0.295, "step": 45650 }, { "epoch": 4.64121594143961, "grad_norm": 0.29086264967918396, "learning_rate": 1.5603039101817597e-07, "loss": 0.2839, "step": 45651 }, { "epoch": 4.641317608784059, "grad_norm": 0.2794816195964813, "learning_rate": 1.5594243849056502e-07, "loss": 0.2817, "step": 45652 }, { "epoch": 4.641419276128508, "grad_norm": 0.28306031227111816, "learning_rate": 1.5585451036616518e-07, "loss": 0.2981, "step": 45653 }, { "epoch": 4.6415209434729565, "grad_norm": 0.28536057472229004, "learning_rate": 1.5576660664542055e-07, "loss": 0.292, "step": 45654 }, { "epoch": 4.6416226108174055, "grad_norm": 0.2614242136478424, "learning_rate": 1.5567872732877133e-07, "loss": 0.3036, "step": 45655 }, { "epoch": 4.641724278161854, "grad_norm": 0.2796539068222046, "learning_rate": 1.5559087241666214e-07, "loss": 0.2927, "step": 45656 }, { "epoch": 4.641825945506303, "grad_norm": 0.29521042108535767, "learning_rate": 1.555030419095349e-07, "loss": 0.2985, "step": 45657 }, { "epoch": 4.641927612850752, "grad_norm": 0.27591046690940857, "learning_rate": 1.5541523580783147e-07, "loss": 0.2985, "step": 45658 }, { "epoch": 4.642029280195201, "grad_norm": 0.2859545648097992, "learning_rate": 1.5532745411199534e-07, "loss": 0.2695, "step": 45659 }, { "epoch": 4.64213094753965, "grad_norm": 0.2743940055370331, "learning_rate": 1.5523969682246842e-07, "loss": 0.3179, "step": 45660 }, { "epoch": 4.642232614884099, "grad_norm": 0.2684003710746765, "learning_rate": 1.55151963939692e-07, "loss": 0.3051, "step": 45661 }, { "epoch": 4.642334282228548, "grad_norm": 0.28621163964271545, "learning_rate": 1.550642554641091e-07, "loss": 0.3069, "step": 45662 }, { "epoch": 4.642435949572997, "grad_norm": 0.2805527448654175, "learning_rate": 1.5497657139616097e-07, "loss": 0.2802, "step": 45663 }, { "epoch": 4.642537616917446, "grad_norm": 0.2872653305530548, "learning_rate": 1.548889117362895e-07, "loss": 0.2986, "step": 45664 }, { "epoch": 4.642639284261895, "grad_norm": 0.2882554531097412, "learning_rate": 1.5480127648493603e-07, "loss": 0.2808, "step": 45665 }, { "epoch": 4.642740951606344, "grad_norm": 0.2624145448207855, "learning_rate": 1.54713665642543e-07, "loss": 0.3147, "step": 45666 }, { "epoch": 4.642842618950793, "grad_norm": 0.27775079011917114, "learning_rate": 1.5462607920955107e-07, "loss": 0.3058, "step": 45667 }, { "epoch": 4.642944286295242, "grad_norm": 0.2903948426246643, "learning_rate": 1.5453851718640056e-07, "loss": 0.2908, "step": 45668 }, { "epoch": 4.643045953639691, "grad_norm": 0.2874833345413208, "learning_rate": 1.544509795735344e-07, "loss": 0.3322, "step": 45669 }, { "epoch": 4.64314762098414, "grad_norm": 0.2966739535331726, "learning_rate": 1.5436346637139278e-07, "loss": 0.2729, "step": 45670 }, { "epoch": 4.6432492883285885, "grad_norm": 0.25621622800827026, "learning_rate": 1.5427597758041646e-07, "loss": 0.3233, "step": 45671 }, { "epoch": 4.6433509556730375, "grad_norm": 0.26252028346061707, "learning_rate": 1.5418851320104566e-07, "loss": 0.3141, "step": 45672 }, { "epoch": 4.643452623017486, "grad_norm": 0.30230408906936646, "learning_rate": 1.5410107323372226e-07, "loss": 0.3179, "step": 45673 }, { "epoch": 4.643554290361935, "grad_norm": 0.2796892821788788, "learning_rate": 1.540136576788859e-07, "loss": 0.2772, "step": 45674 }, { "epoch": 4.643655957706384, "grad_norm": 0.3011647164821625, "learning_rate": 1.5392626653697673e-07, "loss": 0.2846, "step": 45675 }, { "epoch": 4.643757625050833, "grad_norm": 0.3001614809036255, "learning_rate": 1.5383889980843613e-07, "loss": 0.2843, "step": 45676 }, { "epoch": 4.643859292395282, "grad_norm": 0.2830839157104492, "learning_rate": 1.5375155749370318e-07, "loss": 0.2987, "step": 45677 }, { "epoch": 4.643960959739731, "grad_norm": 0.27409085631370544, "learning_rate": 1.5366423959321808e-07, "loss": 0.2996, "step": 45678 }, { "epoch": 4.644062627084181, "grad_norm": 0.2693268656730652, "learning_rate": 1.5357694610742102e-07, "loss": 0.2826, "step": 45679 }, { "epoch": 4.64416429442863, "grad_norm": 0.27161335945129395, "learning_rate": 1.5348967703675167e-07, "loss": 0.2933, "step": 45680 }, { "epoch": 4.644265961773079, "grad_norm": 0.2851133346557617, "learning_rate": 1.534024323816502e-07, "loss": 0.283, "step": 45681 }, { "epoch": 4.644367629117528, "grad_norm": 0.27496233582496643, "learning_rate": 1.5331521214255408e-07, "loss": 0.2907, "step": 45682 }, { "epoch": 4.644469296461977, "grad_norm": 0.2897031903266907, "learning_rate": 1.5322801631990513e-07, "loss": 0.276, "step": 45683 }, { "epoch": 4.644570963806426, "grad_norm": 0.276623010635376, "learning_rate": 1.5314084491414194e-07, "loss": 0.2693, "step": 45684 }, { "epoch": 4.644672631150875, "grad_norm": 0.28757184743881226, "learning_rate": 1.5305369792570246e-07, "loss": 0.2843, "step": 45685 }, { "epoch": 4.6447742984953235, "grad_norm": 0.2855261266231537, "learning_rate": 1.5296657535502747e-07, "loss": 0.3371, "step": 45686 }, { "epoch": 4.6448759658397725, "grad_norm": 0.288714736700058, "learning_rate": 1.5287947720255492e-07, "loss": 0.2965, "step": 45687 }, { "epoch": 4.644977633184221, "grad_norm": 0.2857188582420349, "learning_rate": 1.5279240346872338e-07, "loss": 0.2712, "step": 45688 }, { "epoch": 4.64507930052867, "grad_norm": 0.2702508270740509, "learning_rate": 1.527053541539719e-07, "loss": 0.2951, "step": 45689 }, { "epoch": 4.645180967873119, "grad_norm": 0.28658223152160645, "learning_rate": 1.526183292587391e-07, "loss": 0.2731, "step": 45690 }, { "epoch": 4.645282635217568, "grad_norm": 0.3024221956729889, "learning_rate": 1.5253132878346345e-07, "loss": 0.2732, "step": 45691 }, { "epoch": 4.645384302562017, "grad_norm": 0.26195576786994934, "learning_rate": 1.5244435272858237e-07, "loss": 0.2699, "step": 45692 }, { "epoch": 4.645485969906466, "grad_norm": 0.25215184688568115, "learning_rate": 1.5235740109453556e-07, "loss": 0.28, "step": 45693 }, { "epoch": 4.645587637250915, "grad_norm": 0.2761988341808319, "learning_rate": 1.5227047388175986e-07, "loss": 0.2647, "step": 45694 }, { "epoch": 4.645689304595364, "grad_norm": 0.25964298844337463, "learning_rate": 1.521835710906927e-07, "loss": 0.2837, "step": 45695 }, { "epoch": 4.645790971939813, "grad_norm": 0.2579883933067322, "learning_rate": 1.5209669272177375e-07, "loss": 0.3302, "step": 45696 }, { "epoch": 4.645892639284262, "grad_norm": 0.28041020035743713, "learning_rate": 1.5200983877543928e-07, "loss": 0.2722, "step": 45697 }, { "epoch": 4.645994306628711, "grad_norm": 0.26892808079719543, "learning_rate": 1.5192300925212678e-07, "loss": 0.2958, "step": 45698 }, { "epoch": 4.64609597397316, "grad_norm": 0.2872925400733948, "learning_rate": 1.5183620415227475e-07, "loss": 0.2911, "step": 45699 }, { "epoch": 4.646197641317609, "grad_norm": 0.2954595685005188, "learning_rate": 1.517494234763195e-07, "loss": 0.2637, "step": 45700 }, { "epoch": 4.646299308662058, "grad_norm": 0.2832857370376587, "learning_rate": 1.5166266722469847e-07, "loss": 0.3225, "step": 45701 }, { "epoch": 4.646400976006507, "grad_norm": 0.27237725257873535, "learning_rate": 1.5157593539784855e-07, "loss": 0.2893, "step": 45702 }, { "epoch": 4.6465026433509555, "grad_norm": 0.27578306198120117, "learning_rate": 1.5148922799620714e-07, "loss": 0.3242, "step": 45703 }, { "epoch": 4.6466043106954045, "grad_norm": 0.26584699749946594, "learning_rate": 1.5140254502021112e-07, "loss": 0.2704, "step": 45704 }, { "epoch": 4.646705978039853, "grad_norm": 0.2965019941329956, "learning_rate": 1.5131588647029626e-07, "loss": 0.2626, "step": 45705 }, { "epoch": 4.646807645384302, "grad_norm": 0.2854524254798889, "learning_rate": 1.5122925234690057e-07, "loss": 0.2859, "step": 45706 }, { "epoch": 4.646909312728751, "grad_norm": 0.2777479887008667, "learning_rate": 1.511426426504592e-07, "loss": 0.264, "step": 45707 }, { "epoch": 4.6470109800732, "grad_norm": 0.3125589191913605, "learning_rate": 1.510560573814085e-07, "loss": 0.2853, "step": 45708 }, { "epoch": 4.647112647417649, "grad_norm": 0.2981269061565399, "learning_rate": 1.509694965401859e-07, "loss": 0.294, "step": 45709 }, { "epoch": 4.647214314762098, "grad_norm": 0.30201441049575806, "learning_rate": 1.5088296012722657e-07, "loss": 0.2924, "step": 45710 }, { "epoch": 4.647315982106547, "grad_norm": 0.2807910740375519, "learning_rate": 1.5079644814296636e-07, "loss": 0.2958, "step": 45711 }, { "epoch": 4.647417649450996, "grad_norm": 0.28376203775405884, "learning_rate": 1.5070996058784094e-07, "loss": 0.2541, "step": 45712 }, { "epoch": 4.647519316795445, "grad_norm": 0.29983749985694885, "learning_rate": 1.5062349746228667e-07, "loss": 0.3268, "step": 45713 }, { "epoch": 4.647620984139894, "grad_norm": 0.2854712903499603, "learning_rate": 1.5053705876673874e-07, "loss": 0.3017, "step": 45714 }, { "epoch": 4.647722651484344, "grad_norm": 0.29811030626296997, "learning_rate": 1.5045064450163237e-07, "loss": 0.259, "step": 45715 }, { "epoch": 4.647824318828793, "grad_norm": 0.3047785758972168, "learning_rate": 1.503642546674039e-07, "loss": 0.2945, "step": 45716 }, { "epoch": 4.647925986173242, "grad_norm": 0.2832120954990387, "learning_rate": 1.5027788926448793e-07, "loss": 0.2917, "step": 45717 }, { "epoch": 4.6480276535176905, "grad_norm": 0.28906336426734924, "learning_rate": 1.5019154829331917e-07, "loss": 0.2834, "step": 45718 }, { "epoch": 4.6481293208621395, "grad_norm": 0.2800290286540985, "learning_rate": 1.5010523175433223e-07, "loss": 0.2782, "step": 45719 }, { "epoch": 4.648230988206588, "grad_norm": 0.2950184643268585, "learning_rate": 1.5001893964796343e-07, "loss": 0.2882, "step": 45720 }, { "epoch": 4.648332655551037, "grad_norm": 0.2699127793312073, "learning_rate": 1.499326719746469e-07, "loss": 0.3236, "step": 45721 }, { "epoch": 4.648434322895486, "grad_norm": 0.29004618525505066, "learning_rate": 1.4984642873481614e-07, "loss": 0.2923, "step": 45722 }, { "epoch": 4.648535990239935, "grad_norm": 0.2913942039012909, "learning_rate": 1.497602099289075e-07, "loss": 0.2778, "step": 45723 }, { "epoch": 4.648637657584384, "grad_norm": 0.2804519534111023, "learning_rate": 1.4967401555735395e-07, "loss": 0.3008, "step": 45724 }, { "epoch": 4.648739324928833, "grad_norm": 0.28109246492385864, "learning_rate": 1.495878456205896e-07, "loss": 0.3178, "step": 45725 }, { "epoch": 4.648840992273282, "grad_norm": 0.2828892171382904, "learning_rate": 1.4950170011904964e-07, "loss": 0.3169, "step": 45726 }, { "epoch": 4.648942659617731, "grad_norm": 0.2790159285068512, "learning_rate": 1.4941557905316707e-07, "loss": 0.2972, "step": 45727 }, { "epoch": 4.64904432696218, "grad_norm": 0.2765568494796753, "learning_rate": 1.4932948242337653e-07, "loss": 0.2916, "step": 45728 }, { "epoch": 4.649145994306629, "grad_norm": 0.27072301506996155, "learning_rate": 1.4924341023011046e-07, "loss": 0.2955, "step": 45729 }, { "epoch": 4.649247661651078, "grad_norm": 0.29160431027412415, "learning_rate": 1.4915736247380464e-07, "loss": 0.2857, "step": 45730 }, { "epoch": 4.649349328995527, "grad_norm": 0.24761316180229187, "learning_rate": 1.4907133915489092e-07, "loss": 0.2934, "step": 45731 }, { "epoch": 4.649450996339976, "grad_norm": 0.26222658157348633, "learning_rate": 1.4898534027380228e-07, "loss": 0.303, "step": 45732 }, { "epoch": 4.649552663684425, "grad_norm": 0.27351081371307373, "learning_rate": 1.4889936583097343e-07, "loss": 0.3153, "step": 45733 }, { "epoch": 4.649654331028874, "grad_norm": 0.2881046533584595, "learning_rate": 1.4881341582683674e-07, "loss": 0.3099, "step": 45734 }, { "epoch": 4.6497559983733225, "grad_norm": 0.3070394992828369, "learning_rate": 1.4872749026182464e-07, "loss": 0.3084, "step": 45735 }, { "epoch": 4.6498576657177715, "grad_norm": 0.2671053111553192, "learning_rate": 1.4864158913637128e-07, "loss": 0.3131, "step": 45736 }, { "epoch": 4.64995933306222, "grad_norm": 0.2898617684841156, "learning_rate": 1.485557124509085e-07, "loss": 0.2612, "step": 45737 }, { "epoch": 4.650061000406669, "grad_norm": 0.2648717164993286, "learning_rate": 1.484698602058693e-07, "loss": 0.3087, "step": 45738 }, { "epoch": 4.650162667751118, "grad_norm": 0.29195770621299744, "learning_rate": 1.4838403240168552e-07, "loss": 0.3116, "step": 45739 }, { "epoch": 4.650264335095567, "grad_norm": 0.29400384426116943, "learning_rate": 1.4829822903879075e-07, "loss": 0.3003, "step": 45740 }, { "epoch": 4.650366002440016, "grad_norm": 0.2821938395500183, "learning_rate": 1.4821245011761632e-07, "loss": 0.3424, "step": 45741 }, { "epoch": 4.650467669784465, "grad_norm": 0.2555709779262543, "learning_rate": 1.4812669563859407e-07, "loss": 0.3001, "step": 45742 }, { "epoch": 4.650569337128914, "grad_norm": 0.291221559047699, "learning_rate": 1.4804096560215808e-07, "loss": 0.3061, "step": 45743 }, { "epoch": 4.650671004473363, "grad_norm": 0.28845301270484924, "learning_rate": 1.4795526000873695e-07, "loss": 0.3036, "step": 45744 }, { "epoch": 4.650772671817812, "grad_norm": 0.28859642148017883, "learning_rate": 1.4786957885876474e-07, "loss": 0.3093, "step": 45745 }, { "epoch": 4.650874339162261, "grad_norm": 0.28232890367507935, "learning_rate": 1.477839221526728e-07, "loss": 0.2706, "step": 45746 }, { "epoch": 4.65097600650671, "grad_norm": 0.30084121227264404, "learning_rate": 1.476982898908924e-07, "loss": 0.2909, "step": 45747 }, { "epoch": 4.651077673851159, "grad_norm": 0.2777067720890045, "learning_rate": 1.4761268207385492e-07, "loss": 0.3166, "step": 45748 }, { "epoch": 4.651179341195608, "grad_norm": 0.3053097128868103, "learning_rate": 1.475270987019911e-07, "loss": 0.2993, "step": 45749 }, { "epoch": 4.651281008540057, "grad_norm": 0.282014936208725, "learning_rate": 1.474415397757334e-07, "loss": 0.2984, "step": 45750 }, { "epoch": 4.651382675884506, "grad_norm": 0.2648667097091675, "learning_rate": 1.4735600529551199e-07, "loss": 0.3165, "step": 45751 }, { "epoch": 4.6514843432289545, "grad_norm": 0.2707311809062958, "learning_rate": 1.4727049526175707e-07, "loss": 0.3051, "step": 45752 }, { "epoch": 4.6515860105734035, "grad_norm": 0.2511306405067444, "learning_rate": 1.4718500967490167e-07, "loss": 0.3164, "step": 45753 }, { "epoch": 4.651687677917852, "grad_norm": 0.26138219237327576, "learning_rate": 1.4709954853537378e-07, "loss": 0.3005, "step": 45754 }, { "epoch": 4.651789345262301, "grad_norm": 0.2781272530555725, "learning_rate": 1.4701411184360524e-07, "loss": 0.2975, "step": 45755 }, { "epoch": 4.65189101260675, "grad_norm": 0.28814244270324707, "learning_rate": 1.4692869960002686e-07, "loss": 0.2956, "step": 45756 }, { "epoch": 4.651992679951199, "grad_norm": 0.2792288362979889, "learning_rate": 1.4684331180506774e-07, "loss": 0.2615, "step": 45757 }, { "epoch": 4.652094347295648, "grad_norm": 0.2916991114616394, "learning_rate": 1.467579484591597e-07, "loss": 0.2943, "step": 45758 }, { "epoch": 4.652196014640097, "grad_norm": 0.2997930347919464, "learning_rate": 1.4667260956273078e-07, "loss": 0.2723, "step": 45759 }, { "epoch": 4.652297681984546, "grad_norm": 0.2811644673347473, "learning_rate": 1.4658729511621228e-07, "loss": 0.3032, "step": 45760 }, { "epoch": 4.652399349328996, "grad_norm": 0.27235785126686096, "learning_rate": 1.4650200512003387e-07, "loss": 0.2878, "step": 45761 }, { "epoch": 4.652501016673445, "grad_norm": 0.29365772008895874, "learning_rate": 1.4641673957462465e-07, "loss": 0.313, "step": 45762 }, { "epoch": 4.652602684017894, "grad_norm": 0.27292460203170776, "learning_rate": 1.4633149848041538e-07, "loss": 0.2876, "step": 45763 }, { "epoch": 4.652704351362343, "grad_norm": 0.2979830503463745, "learning_rate": 1.4624628183783352e-07, "loss": 0.2994, "step": 45764 }, { "epoch": 4.652806018706792, "grad_norm": 0.26071467995643616, "learning_rate": 1.461610896473098e-07, "loss": 0.3159, "step": 45765 }, { "epoch": 4.652907686051241, "grad_norm": 0.2978595793247223, "learning_rate": 1.4607592190927388e-07, "loss": 0.2901, "step": 45766 }, { "epoch": 4.6530093533956896, "grad_norm": 0.27651774883270264, "learning_rate": 1.4599077862415268e-07, "loss": 0.2841, "step": 45767 }, { "epoch": 4.6531110207401385, "grad_norm": 0.26975637674331665, "learning_rate": 1.4590565979237692e-07, "loss": 0.3223, "step": 45768 }, { "epoch": 4.653212688084587, "grad_norm": 0.2695072293281555, "learning_rate": 1.4582056541437516e-07, "loss": 0.3039, "step": 45769 }, { "epoch": 4.653314355429036, "grad_norm": 0.2635016143321991, "learning_rate": 1.457354954905754e-07, "loss": 0.3184, "step": 45770 }, { "epoch": 4.653416022773485, "grad_norm": 0.29433107376098633, "learning_rate": 1.4565045002140733e-07, "loss": 0.2991, "step": 45771 }, { "epoch": 4.653517690117934, "grad_norm": 0.29178372025489807, "learning_rate": 1.4556542900729832e-07, "loss": 0.3103, "step": 45772 }, { "epoch": 4.653619357462383, "grad_norm": 0.263883113861084, "learning_rate": 1.454804324486775e-07, "loss": 0.2948, "step": 45773 }, { "epoch": 4.653721024806832, "grad_norm": 0.27611997723579407, "learning_rate": 1.453954603459723e-07, "loss": 0.3056, "step": 45774 }, { "epoch": 4.653822692151281, "grad_norm": 0.32084307074546814, "learning_rate": 1.4531051269961073e-07, "loss": 0.2753, "step": 45775 }, { "epoch": 4.65392435949573, "grad_norm": 0.2772723436355591, "learning_rate": 1.4522558951002186e-07, "loss": 0.3312, "step": 45776 }, { "epoch": 4.654026026840179, "grad_norm": 0.28284016251564026, "learning_rate": 1.451406907776326e-07, "loss": 0.2996, "step": 45777 }, { "epoch": 4.654127694184628, "grad_norm": 0.2791733145713806, "learning_rate": 1.4505581650287093e-07, "loss": 0.2877, "step": 45778 }, { "epoch": 4.654229361529077, "grad_norm": 0.26837778091430664, "learning_rate": 1.4497096668616428e-07, "loss": 0.2899, "step": 45779 }, { "epoch": 4.654331028873526, "grad_norm": 0.2641506791114807, "learning_rate": 1.4488614132793955e-07, "loss": 0.2929, "step": 45780 }, { "epoch": 4.654432696217975, "grad_norm": 0.2829543352127075, "learning_rate": 1.4480134042862526e-07, "loss": 0.3082, "step": 45781 }, { "epoch": 4.654534363562424, "grad_norm": 0.29654836654663086, "learning_rate": 1.4471656398864774e-07, "loss": 0.3014, "step": 45782 }, { "epoch": 4.654636030906873, "grad_norm": 0.2821787893772125, "learning_rate": 1.4463181200843558e-07, "loss": 0.3002, "step": 45783 }, { "epoch": 4.6547376982513216, "grad_norm": 0.25849682092666626, "learning_rate": 1.4454708448841282e-07, "loss": 0.3062, "step": 45784 }, { "epoch": 4.6548393655957705, "grad_norm": 0.29551035165786743, "learning_rate": 1.444623814290086e-07, "loss": 0.2864, "step": 45785 }, { "epoch": 4.654941032940219, "grad_norm": 0.267500638961792, "learning_rate": 1.4437770283064977e-07, "loss": 0.2993, "step": 45786 }, { "epoch": 4.655042700284668, "grad_norm": 0.27775663137435913, "learning_rate": 1.442930486937616e-07, "loss": 0.3293, "step": 45787 }, { "epoch": 4.655144367629117, "grad_norm": 0.28999951481819153, "learning_rate": 1.442084190187709e-07, "loss": 0.278, "step": 45788 }, { "epoch": 4.655246034973566, "grad_norm": 0.3001992404460907, "learning_rate": 1.4412381380610517e-07, "loss": 0.2989, "step": 45789 }, { "epoch": 4.655347702318015, "grad_norm": 0.28729620575904846, "learning_rate": 1.440392330561885e-07, "loss": 0.295, "step": 45790 }, { "epoch": 4.655449369662464, "grad_norm": 0.2394435554742813, "learning_rate": 1.4395467676944884e-07, "loss": 0.3244, "step": 45791 }, { "epoch": 4.655551037006913, "grad_norm": 0.2805261015892029, "learning_rate": 1.43870144946312e-07, "loss": 0.2981, "step": 45792 }, { "epoch": 4.655652704351362, "grad_norm": 0.26782578229904175, "learning_rate": 1.437856375872032e-07, "loss": 0.302, "step": 45793 }, { "epoch": 4.655754371695811, "grad_norm": 0.2805846035480499, "learning_rate": 1.4370115469254764e-07, "loss": 0.2852, "step": 45794 }, { "epoch": 4.65585603904026, "grad_norm": 0.2917613685131073, "learning_rate": 1.4361669626277163e-07, "loss": 0.2746, "step": 45795 }, { "epoch": 4.655957706384709, "grad_norm": 0.27988043427467346, "learning_rate": 1.4353226229830207e-07, "loss": 0.2879, "step": 45796 }, { "epoch": 4.656059373729159, "grad_norm": 0.27189263701438904, "learning_rate": 1.434478527995614e-07, "loss": 0.2856, "step": 45797 }, { "epoch": 4.656161041073608, "grad_norm": 0.26239094138145447, "learning_rate": 1.4336346776697707e-07, "loss": 0.2991, "step": 45798 }, { "epoch": 4.656262708418057, "grad_norm": 0.3107035160064697, "learning_rate": 1.4327910720097316e-07, "loss": 0.2949, "step": 45799 }, { "epoch": 4.6563643757625055, "grad_norm": 0.2664186358451843, "learning_rate": 1.4319477110197487e-07, "loss": 0.3023, "step": 45800 }, { "epoch": 4.6564660431069544, "grad_norm": 0.25150713324546814, "learning_rate": 1.4311045947040746e-07, "loss": 0.3086, "step": 45801 }, { "epoch": 4.656567710451403, "grad_norm": 0.27387645840644836, "learning_rate": 1.4302617230669558e-07, "loss": 0.2469, "step": 45802 }, { "epoch": 4.656669377795852, "grad_norm": 0.2873938977718353, "learning_rate": 1.4294190961126387e-07, "loss": 0.303, "step": 45803 }, { "epoch": 4.656771045140301, "grad_norm": 0.28374868631362915, "learning_rate": 1.4285767138453532e-07, "loss": 0.2942, "step": 45804 }, { "epoch": 4.65687271248475, "grad_norm": 0.265137642621994, "learning_rate": 1.4277345762693627e-07, "loss": 0.3106, "step": 45805 }, { "epoch": 4.656974379829199, "grad_norm": 0.2736181318759918, "learning_rate": 1.4268926833889085e-07, "loss": 0.2632, "step": 45806 }, { "epoch": 4.657076047173648, "grad_norm": 0.28034907579421997, "learning_rate": 1.4260510352082203e-07, "loss": 0.3021, "step": 45807 }, { "epoch": 4.657177714518097, "grad_norm": 0.3049858808517456, "learning_rate": 1.42520963173155e-07, "loss": 0.2815, "step": 45808 }, { "epoch": 4.657279381862546, "grad_norm": 0.30040279030799866, "learning_rate": 1.4243684729631224e-07, "loss": 0.2898, "step": 45809 }, { "epoch": 4.657381049206995, "grad_norm": 0.29066002368927, "learning_rate": 1.423527558907184e-07, "loss": 0.2684, "step": 45810 }, { "epoch": 4.657482716551444, "grad_norm": 0.2791213095188141, "learning_rate": 1.4226868895679759e-07, "loss": 0.295, "step": 45811 }, { "epoch": 4.657584383895893, "grad_norm": 0.24902620911598206, "learning_rate": 1.4218464649497277e-07, "loss": 0.2802, "step": 45812 }, { "epoch": 4.657686051240342, "grad_norm": 0.3151063621044159, "learning_rate": 1.4210062850566698e-07, "loss": 0.2879, "step": 45813 }, { "epoch": 4.657787718584791, "grad_norm": 0.27241599559783936, "learning_rate": 1.4201663498930374e-07, "loss": 0.2825, "step": 45814 }, { "epoch": 4.65788938592924, "grad_norm": 0.2792072594165802, "learning_rate": 1.4193266594630606e-07, "loss": 0.281, "step": 45815 }, { "epoch": 4.657991053273689, "grad_norm": 0.28684014081954956, "learning_rate": 1.4184872137709804e-07, "loss": 0.2959, "step": 45816 }, { "epoch": 4.6580927206181375, "grad_norm": 0.29555997252464294, "learning_rate": 1.4176480128210045e-07, "loss": 0.2814, "step": 45817 }, { "epoch": 4.6581943879625864, "grad_norm": 0.28319475054740906, "learning_rate": 1.41680905661738e-07, "loss": 0.292, "step": 45818 }, { "epoch": 4.658296055307035, "grad_norm": 0.268353134393692, "learning_rate": 1.4159703451643303e-07, "loss": 0.2789, "step": 45819 }, { "epoch": 4.658397722651484, "grad_norm": 0.28385308384895325, "learning_rate": 1.4151318784660638e-07, "loss": 0.3153, "step": 45820 }, { "epoch": 4.658499389995933, "grad_norm": 0.2854456603527069, "learning_rate": 1.4142936565268274e-07, "loss": 0.2956, "step": 45821 }, { "epoch": 4.658601057340382, "grad_norm": 0.2866485118865967, "learning_rate": 1.4134556793508336e-07, "loss": 0.3124, "step": 45822 }, { "epoch": 4.658702724684831, "grad_norm": 0.278405100107193, "learning_rate": 1.4126179469423072e-07, "loss": 0.3077, "step": 45823 }, { "epoch": 4.65880439202928, "grad_norm": 0.2814471125602722, "learning_rate": 1.4117804593054563e-07, "loss": 0.2838, "step": 45824 }, { "epoch": 4.658906059373729, "grad_norm": 0.2665896415710449, "learning_rate": 1.4109432164445158e-07, "loss": 0.2647, "step": 45825 }, { "epoch": 4.659007726718178, "grad_norm": 0.3122450113296509, "learning_rate": 1.4101062183636938e-07, "loss": 0.2964, "step": 45826 }, { "epoch": 4.659109394062627, "grad_norm": 0.3052695691585541, "learning_rate": 1.409269465067209e-07, "loss": 0.3394, "step": 45827 }, { "epoch": 4.659211061407076, "grad_norm": 0.26727494597435, "learning_rate": 1.4084329565592802e-07, "loss": 0.2983, "step": 45828 }, { "epoch": 4.659312728751525, "grad_norm": 0.31109538674354553, "learning_rate": 1.407596692844121e-07, "loss": 0.2972, "step": 45829 }, { "epoch": 4.659414396095974, "grad_norm": 0.2744685113430023, "learning_rate": 1.4067606739259333e-07, "loss": 0.3224, "step": 45830 }, { "epoch": 4.659516063440423, "grad_norm": 0.2643844187259674, "learning_rate": 1.405924899808947e-07, "loss": 0.2694, "step": 45831 }, { "epoch": 4.659617730784872, "grad_norm": 0.3080193102359772, "learning_rate": 1.4050893704973644e-07, "loss": 0.3281, "step": 45832 }, { "epoch": 4.659719398129321, "grad_norm": 0.3105722963809967, "learning_rate": 1.404254085995388e-07, "loss": 0.2693, "step": 45833 }, { "epoch": 4.6598210654737695, "grad_norm": 0.30412834882736206, "learning_rate": 1.4034190463072307e-07, "loss": 0.3128, "step": 45834 }, { "epoch": 4.6599227328182184, "grad_norm": 0.2845267951488495, "learning_rate": 1.4025842514371057e-07, "loss": 0.3358, "step": 45835 }, { "epoch": 4.660024400162667, "grad_norm": 0.27970489859580994, "learning_rate": 1.4017497013892102e-07, "loss": 0.3013, "step": 45836 }, { "epoch": 4.660126067507116, "grad_norm": 0.2883913218975067, "learning_rate": 1.4009153961677456e-07, "loss": 0.3163, "step": 45837 }, { "epoch": 4.660227734851565, "grad_norm": 0.26080018281936646, "learning_rate": 1.4000813357769261e-07, "loss": 0.3068, "step": 45838 }, { "epoch": 4.660329402196014, "grad_norm": 0.26739662885665894, "learning_rate": 1.3992475202209478e-07, "loss": 0.2904, "step": 45839 }, { "epoch": 4.660431069540463, "grad_norm": 0.2743140757083893, "learning_rate": 1.3984139495040073e-07, "loss": 0.3069, "step": 45840 }, { "epoch": 4.660532736884912, "grad_norm": 0.27553269267082214, "learning_rate": 1.3975806236303124e-07, "loss": 0.2835, "step": 45841 }, { "epoch": 4.660634404229361, "grad_norm": 0.2862066626548767, "learning_rate": 1.3967475426040545e-07, "loss": 0.253, "step": 45842 }, { "epoch": 4.660736071573811, "grad_norm": 0.27882111072540283, "learning_rate": 1.3959147064294354e-07, "loss": 0.2938, "step": 45843 }, { "epoch": 4.66083773891826, "grad_norm": 0.286434531211853, "learning_rate": 1.3950821151106408e-07, "loss": 0.3032, "step": 45844 }, { "epoch": 4.660939406262709, "grad_norm": 0.2904771566390991, "learning_rate": 1.3942497686518787e-07, "loss": 0.2913, "step": 45845 }, { "epoch": 4.661041073607158, "grad_norm": 0.2929718792438507, "learning_rate": 1.393417667057334e-07, "loss": 0.2701, "step": 45846 }, { "epoch": 4.661142740951607, "grad_norm": 0.2708483040332794, "learning_rate": 1.3925858103311984e-07, "loss": 0.2823, "step": 45847 }, { "epoch": 4.661244408296056, "grad_norm": 0.30465641617774963, "learning_rate": 1.3917541984776628e-07, "loss": 0.3121, "step": 45848 }, { "epoch": 4.6613460756405045, "grad_norm": 0.28301092982292175, "learning_rate": 1.3909228315009237e-07, "loss": 0.327, "step": 45849 }, { "epoch": 4.6614477429849535, "grad_norm": 0.2773086428642273, "learning_rate": 1.3900917094051614e-07, "loss": 0.2803, "step": 45850 }, { "epoch": 4.661549410329402, "grad_norm": 0.28784966468811035, "learning_rate": 1.3892608321945667e-07, "loss": 0.2654, "step": 45851 }, { "epoch": 4.661651077673851, "grad_norm": 0.2815435528755188, "learning_rate": 1.388430199873325e-07, "loss": 0.2869, "step": 45852 }, { "epoch": 4.6617527450183, "grad_norm": 0.2966806888580322, "learning_rate": 1.3875998124456225e-07, "loss": 0.2903, "step": 45853 }, { "epoch": 4.661854412362749, "grad_norm": 0.2919810116291046, "learning_rate": 1.386769669915633e-07, "loss": 0.2764, "step": 45854 }, { "epoch": 4.661956079707198, "grad_norm": 0.2811645567417145, "learning_rate": 1.3859397722875477e-07, "loss": 0.2898, "step": 45855 }, { "epoch": 4.662057747051647, "grad_norm": 0.30916735529899597, "learning_rate": 1.3851101195655526e-07, "loss": 0.3037, "step": 45856 }, { "epoch": 4.662159414396096, "grad_norm": 0.2837161719799042, "learning_rate": 1.3842807117538105e-07, "loss": 0.288, "step": 45857 }, { "epoch": 4.662261081740545, "grad_norm": 0.2837185263633728, "learning_rate": 1.3834515488565126e-07, "loss": 0.309, "step": 45858 }, { "epoch": 4.662362749084994, "grad_norm": 0.28671160340309143, "learning_rate": 1.3826226308778334e-07, "loss": 0.3214, "step": 45859 }, { "epoch": 4.662464416429443, "grad_norm": 0.30640700459480286, "learning_rate": 1.3817939578219476e-07, "loss": 0.338, "step": 45860 }, { "epoch": 4.662566083773892, "grad_norm": 0.273863822221756, "learning_rate": 1.380965529693029e-07, "loss": 0.2793, "step": 45861 }, { "epoch": 4.662667751118341, "grad_norm": 0.25943896174430847, "learning_rate": 1.3801373464952582e-07, "loss": 0.2883, "step": 45862 }, { "epoch": 4.66276941846279, "grad_norm": 0.26130059361457825, "learning_rate": 1.3793094082327985e-07, "loss": 0.3108, "step": 45863 }, { "epoch": 4.662871085807239, "grad_norm": 0.29383477568626404, "learning_rate": 1.3784817149098184e-07, "loss": 0.27, "step": 45864 }, { "epoch": 4.662972753151688, "grad_norm": 0.28772875666618347, "learning_rate": 1.3776542665304983e-07, "loss": 0.3141, "step": 45865 }, { "epoch": 4.6630744204961365, "grad_norm": 0.2800491452217102, "learning_rate": 1.3768270630990066e-07, "loss": 0.2638, "step": 45866 }, { "epoch": 4.6631760878405855, "grad_norm": 0.284393310546875, "learning_rate": 1.3760001046194959e-07, "loss": 0.2827, "step": 45867 }, { "epoch": 4.663277755185034, "grad_norm": 0.29725342988967896, "learning_rate": 1.3751733910961463e-07, "loss": 0.32, "step": 45868 }, { "epoch": 4.663379422529483, "grad_norm": 0.28617608547210693, "learning_rate": 1.3743469225331207e-07, "loss": 0.2739, "step": 45869 }, { "epoch": 4.663481089873932, "grad_norm": 0.3167177438735962, "learning_rate": 1.3735206989345717e-07, "loss": 0.2654, "step": 45870 }, { "epoch": 4.663582757218381, "grad_norm": 0.28221365809440613, "learning_rate": 1.3726947203046735e-07, "loss": 0.2863, "step": 45871 }, { "epoch": 4.66368442456283, "grad_norm": 0.2740338146686554, "learning_rate": 1.3718689866475897e-07, "loss": 0.2807, "step": 45872 }, { "epoch": 4.663786091907279, "grad_norm": 0.2713209390640259, "learning_rate": 1.3710434979674725e-07, "loss": 0.2979, "step": 45873 }, { "epoch": 4.663887759251728, "grad_norm": 0.2948239743709564, "learning_rate": 1.370218254268474e-07, "loss": 0.277, "step": 45874 }, { "epoch": 4.663989426596177, "grad_norm": 0.26689162850379944, "learning_rate": 1.3693932555547684e-07, "loss": 0.315, "step": 45875 }, { "epoch": 4.664091093940626, "grad_norm": 0.27372029423713684, "learning_rate": 1.3685685018305028e-07, "loss": 0.3016, "step": 45876 }, { "epoch": 4.664192761285075, "grad_norm": 0.2777900993824005, "learning_rate": 1.3677439930998237e-07, "loss": 0.285, "step": 45877 }, { "epoch": 4.664294428629524, "grad_norm": 0.2688867449760437, "learning_rate": 1.3669197293669e-07, "loss": 0.281, "step": 45878 }, { "epoch": 4.664396095973974, "grad_norm": 0.2729906737804413, "learning_rate": 1.3660957106358786e-07, "loss": 0.3088, "step": 45879 }, { "epoch": 4.664497763318423, "grad_norm": 0.278251051902771, "learning_rate": 1.3652719369109058e-07, "loss": 0.2737, "step": 45880 }, { "epoch": 4.6645994306628715, "grad_norm": 0.28730422258377075, "learning_rate": 1.3644484081961397e-07, "loss": 0.2964, "step": 45881 }, { "epoch": 4.6647010980073205, "grad_norm": 0.2820900082588196, "learning_rate": 1.3636251244957265e-07, "loss": 0.3398, "step": 45882 }, { "epoch": 4.664802765351769, "grad_norm": 0.2778725028038025, "learning_rate": 1.3628020858138135e-07, "loss": 0.2748, "step": 45883 }, { "epoch": 4.664904432696218, "grad_norm": 0.2867256999015808, "learning_rate": 1.361979292154536e-07, "loss": 0.3001, "step": 45884 }, { "epoch": 4.665006100040667, "grad_norm": 0.27860409021377563, "learning_rate": 1.3611567435220573e-07, "loss": 0.2756, "step": 45885 }, { "epoch": 4.665107767385116, "grad_norm": 0.2967446744441986, "learning_rate": 1.3603344399205076e-07, "loss": 0.2869, "step": 45886 }, { "epoch": 4.665209434729565, "grad_norm": 0.28546378016471863, "learning_rate": 1.3595123813540334e-07, "loss": 0.2735, "step": 45887 }, { "epoch": 4.665311102074014, "grad_norm": 0.26938527822494507, "learning_rate": 1.3586905678267814e-07, "loss": 0.3252, "step": 45888 }, { "epoch": 4.665412769418463, "grad_norm": 0.277904748916626, "learning_rate": 1.3578689993428872e-07, "loss": 0.3138, "step": 45889 }, { "epoch": 4.665514436762912, "grad_norm": 0.2664419710636139, "learning_rate": 1.3570476759064866e-07, "loss": 0.303, "step": 45890 }, { "epoch": 4.665616104107361, "grad_norm": 0.27302953600883484, "learning_rate": 1.3562265975217204e-07, "loss": 0.2939, "step": 45891 }, { "epoch": 4.66571777145181, "grad_norm": 0.2795795202255249, "learning_rate": 1.3554057641927298e-07, "loss": 0.2844, "step": 45892 }, { "epoch": 4.665819438796259, "grad_norm": 0.268115758895874, "learning_rate": 1.3545851759236451e-07, "loss": 0.2483, "step": 45893 }, { "epoch": 4.665921106140708, "grad_norm": 0.2785969078540802, "learning_rate": 1.3537648327185905e-07, "loss": 0.2542, "step": 45894 }, { "epoch": 4.666022773485157, "grad_norm": 0.28522220253944397, "learning_rate": 1.352944734581718e-07, "loss": 0.3047, "step": 45895 }, { "epoch": 4.666124440829606, "grad_norm": 0.27965831756591797, "learning_rate": 1.3521248815171528e-07, "loss": 0.3385, "step": 45896 }, { "epoch": 4.666226108174055, "grad_norm": 0.2929469048976898, "learning_rate": 1.3513052735290077e-07, "loss": 0.2795, "step": 45897 }, { "epoch": 4.6663277755185035, "grad_norm": 0.29088136553764343, "learning_rate": 1.3504859106214409e-07, "loss": 0.3019, "step": 45898 }, { "epoch": 4.6664294428629525, "grad_norm": 0.29021117091178894, "learning_rate": 1.3496667927985597e-07, "loss": 0.3013, "step": 45899 }, { "epoch": 4.666531110207401, "grad_norm": 0.2820453643798828, "learning_rate": 1.3488479200644943e-07, "loss": 0.2809, "step": 45900 }, { "epoch": 4.66663277755185, "grad_norm": 0.26259759068489075, "learning_rate": 1.3480292924233695e-07, "loss": 0.2853, "step": 45901 }, { "epoch": 4.666734444896299, "grad_norm": 0.27717116475105286, "learning_rate": 1.3472109098793152e-07, "loss": 0.2737, "step": 45902 }, { "epoch": 4.666836112240748, "grad_norm": 0.2619929909706116, "learning_rate": 1.3463927724364555e-07, "loss": 0.2908, "step": 45903 }, { "epoch": 4.666937779585197, "grad_norm": 0.2595551013946533, "learning_rate": 1.3455748800988988e-07, "loss": 0.3032, "step": 45904 }, { "epoch": 4.667039446929646, "grad_norm": 0.28157126903533936, "learning_rate": 1.3447572328707804e-07, "loss": 0.2632, "step": 45905 }, { "epoch": 4.667141114274095, "grad_norm": 0.2844122052192688, "learning_rate": 1.343939830756208e-07, "loss": 0.2862, "step": 45906 }, { "epoch": 4.667242781618544, "grad_norm": 0.28977257013320923, "learning_rate": 1.3431226737593006e-07, "loss": 0.286, "step": 45907 }, { "epoch": 4.667344448962993, "grad_norm": 0.2707582414150238, "learning_rate": 1.3423057618841883e-07, "loss": 0.278, "step": 45908 }, { "epoch": 4.667446116307442, "grad_norm": 0.26784616708755493, "learning_rate": 1.3414890951349735e-07, "loss": 0.2656, "step": 45909 }, { "epoch": 4.667547783651891, "grad_norm": 0.2745267152786255, "learning_rate": 1.3406726735157693e-07, "loss": 0.2981, "step": 45910 }, { "epoch": 4.66764945099634, "grad_norm": 0.26695960760116577, "learning_rate": 1.339856497030695e-07, "loss": 0.2843, "step": 45911 }, { "epoch": 4.667751118340789, "grad_norm": 0.2720070779323578, "learning_rate": 1.3390405656838634e-07, "loss": 0.3248, "step": 45912 }, { "epoch": 4.667852785685238, "grad_norm": 0.29335641860961914, "learning_rate": 1.3382248794793772e-07, "loss": 0.3121, "step": 45913 }, { "epoch": 4.667954453029687, "grad_norm": 0.28541985154151917, "learning_rate": 1.33740943842135e-07, "loss": 0.3143, "step": 45914 }, { "epoch": 4.6680561203741355, "grad_norm": 0.28986456990242004, "learning_rate": 1.3365942425139e-07, "loss": 0.293, "step": 45915 }, { "epoch": 4.6681577877185845, "grad_norm": 0.2692302465438843, "learning_rate": 1.3357792917611135e-07, "loss": 0.3063, "step": 45916 }, { "epoch": 4.668259455063033, "grad_norm": 0.27244967222213745, "learning_rate": 1.3349645861671036e-07, "loss": 0.2865, "step": 45917 }, { "epoch": 4.668361122407482, "grad_norm": 0.2831132113933563, "learning_rate": 1.3341501257359946e-07, "loss": 0.2868, "step": 45918 }, { "epoch": 4.668462789751931, "grad_norm": 0.26773005723953247, "learning_rate": 1.3333359104718558e-07, "loss": 0.2877, "step": 45919 }, { "epoch": 4.66856445709638, "grad_norm": 0.29153138399124146, "learning_rate": 1.3325219403788114e-07, "loss": 0.3011, "step": 45920 }, { "epoch": 4.668666124440829, "grad_norm": 0.271968275308609, "learning_rate": 1.3317082154609528e-07, "loss": 0.2847, "step": 45921 }, { "epoch": 4.668767791785278, "grad_norm": 0.2925833463668823, "learning_rate": 1.3308947357223879e-07, "loss": 0.2784, "step": 45922 }, { "epoch": 4.668869459129727, "grad_norm": 0.29053983092308044, "learning_rate": 1.3300815011672075e-07, "loss": 0.296, "step": 45923 }, { "epoch": 4.668971126474176, "grad_norm": 0.30380165576934814, "learning_rate": 1.3292685117995085e-07, "loss": 0.292, "step": 45924 }, { "epoch": 4.669072793818626, "grad_norm": 0.3043282926082611, "learning_rate": 1.328455767623399e-07, "loss": 0.2692, "step": 45925 }, { "epoch": 4.669174461163075, "grad_norm": 0.2749924957752228, "learning_rate": 1.327643268642953e-07, "loss": 0.297, "step": 45926 }, { "epoch": 4.669276128507524, "grad_norm": 0.28110378980636597, "learning_rate": 1.3268310148622732e-07, "loss": 0.2897, "step": 45927 }, { "epoch": 4.669377795851973, "grad_norm": 0.27630069851875305, "learning_rate": 1.3260190062854616e-07, "loss": 0.3612, "step": 45928 }, { "epoch": 4.669479463196422, "grad_norm": 0.32213643193244934, "learning_rate": 1.325207242916593e-07, "loss": 0.2906, "step": 45929 }, { "epoch": 4.6695811305408705, "grad_norm": 0.268291175365448, "learning_rate": 1.324395724759764e-07, "loss": 0.3016, "step": 45930 }, { "epoch": 4.6696827978853195, "grad_norm": 0.2674790620803833, "learning_rate": 1.3235844518190655e-07, "loss": 0.2607, "step": 45931 }, { "epoch": 4.669784465229768, "grad_norm": 0.28876110911369324, "learning_rate": 1.3227734240985723e-07, "loss": 0.2638, "step": 45932 }, { "epoch": 4.669886132574217, "grad_norm": 0.27893391251564026, "learning_rate": 1.3219626416023868e-07, "loss": 0.2853, "step": 45933 }, { "epoch": 4.669987799918666, "grad_norm": 0.2887493669986725, "learning_rate": 1.3211521043345776e-07, "loss": 0.2628, "step": 45934 }, { "epoch": 4.670089467263115, "grad_norm": 0.2934446632862091, "learning_rate": 1.320341812299253e-07, "loss": 0.2933, "step": 45935 }, { "epoch": 4.670191134607564, "grad_norm": 0.2789871096611023, "learning_rate": 1.3195317655004646e-07, "loss": 0.282, "step": 45936 }, { "epoch": 4.670292801952013, "grad_norm": 0.2714766561985016, "learning_rate": 1.3187219639423044e-07, "loss": 0.3235, "step": 45937 }, { "epoch": 4.670394469296462, "grad_norm": 0.26166170835494995, "learning_rate": 1.3179124076288686e-07, "loss": 0.2953, "step": 45938 }, { "epoch": 4.670496136640911, "grad_norm": 0.29303768277168274, "learning_rate": 1.3171030965642096e-07, "loss": 0.2979, "step": 45939 }, { "epoch": 4.67059780398536, "grad_norm": 0.2850957214832306, "learning_rate": 1.3162940307524186e-07, "loss": 0.2892, "step": 45940 }, { "epoch": 4.670699471329809, "grad_norm": 0.2673194408416748, "learning_rate": 1.3154852101975756e-07, "loss": 0.3511, "step": 45941 }, { "epoch": 4.670801138674258, "grad_norm": 0.2715081572532654, "learning_rate": 1.314676634903739e-07, "loss": 0.2962, "step": 45942 }, { "epoch": 4.670902806018707, "grad_norm": 0.2794862389564514, "learning_rate": 1.313868304874999e-07, "loss": 0.2664, "step": 45943 }, { "epoch": 4.671004473363156, "grad_norm": 0.2765014171600342, "learning_rate": 1.3130602201154196e-07, "loss": 0.3008, "step": 45944 }, { "epoch": 4.671106140707605, "grad_norm": 0.27403363585472107, "learning_rate": 1.31225238062907e-07, "loss": 0.2998, "step": 45945 }, { "epoch": 4.671207808052054, "grad_norm": 0.2633226811885834, "learning_rate": 1.3114447864200241e-07, "loss": 0.2633, "step": 45946 }, { "epoch": 4.6713094753965025, "grad_norm": 0.30412837862968445, "learning_rate": 1.3106374374923459e-07, "loss": 0.2996, "step": 45947 }, { "epoch": 4.6714111427409515, "grad_norm": 0.27060869336128235, "learning_rate": 1.309830333850115e-07, "loss": 0.2855, "step": 45948 }, { "epoch": 4.6715128100854, "grad_norm": 0.2932576835155487, "learning_rate": 1.3090234754973842e-07, "loss": 0.2909, "step": 45949 }, { "epoch": 4.671614477429849, "grad_norm": 0.25330790877342224, "learning_rate": 1.308216862438222e-07, "loss": 0.3132, "step": 45950 }, { "epoch": 4.671716144774298, "grad_norm": 0.2831656038761139, "learning_rate": 1.307410494676692e-07, "loss": 0.2863, "step": 45951 }, { "epoch": 4.671817812118747, "grad_norm": 0.2914099395275116, "learning_rate": 1.306604372216852e-07, "loss": 0.2934, "step": 45952 }, { "epoch": 4.671919479463196, "grad_norm": 0.26688727736473083, "learning_rate": 1.3057984950627766e-07, "loss": 0.2556, "step": 45953 }, { "epoch": 4.672021146807645, "grad_norm": 0.26871946454048157, "learning_rate": 1.3049928632185128e-07, "loss": 0.2985, "step": 45954 }, { "epoch": 4.672122814152094, "grad_norm": 0.27100542187690735, "learning_rate": 1.3041874766881235e-07, "loss": 0.2883, "step": 45955 }, { "epoch": 4.672224481496543, "grad_norm": 0.3213444948196411, "learning_rate": 1.3033823354756668e-07, "loss": 0.3199, "step": 45956 }, { "epoch": 4.672326148840992, "grad_norm": 0.29925480484962463, "learning_rate": 1.3025774395851897e-07, "loss": 0.3078, "step": 45957 }, { "epoch": 4.672427816185441, "grad_norm": 0.2758561968803406, "learning_rate": 1.3017727890207722e-07, "loss": 0.2861, "step": 45958 }, { "epoch": 4.67252948352989, "grad_norm": 0.26744383573532104, "learning_rate": 1.300968383786433e-07, "loss": 0.2987, "step": 45959 }, { "epoch": 4.672631150874339, "grad_norm": 0.2947172224521637, "learning_rate": 1.3001642238862521e-07, "loss": 0.3015, "step": 45960 }, { "epoch": 4.672732818218789, "grad_norm": 0.26558271050453186, "learning_rate": 1.2993603093242714e-07, "loss": 0.2831, "step": 45961 }, { "epoch": 4.6728344855632375, "grad_norm": 0.27776578068733215, "learning_rate": 1.2985566401045368e-07, "loss": 0.2939, "step": 45962 }, { "epoch": 4.6729361529076865, "grad_norm": 0.27064022421836853, "learning_rate": 1.2977532162311013e-07, "loss": 0.3039, "step": 45963 }, { "epoch": 4.673037820252135, "grad_norm": 0.2710820138454437, "learning_rate": 1.2969500377080168e-07, "loss": 0.3132, "step": 45964 }, { "epoch": 4.673139487596584, "grad_norm": 0.2579721212387085, "learning_rate": 1.2961471045393194e-07, "loss": 0.2915, "step": 45965 }, { "epoch": 4.673241154941033, "grad_norm": 0.2986788749694824, "learning_rate": 1.295344416729055e-07, "loss": 0.2685, "step": 45966 }, { "epoch": 4.673342822285482, "grad_norm": 0.2646064758300781, "learning_rate": 1.294541974281277e-07, "loss": 0.2799, "step": 45967 }, { "epoch": 4.673444489629931, "grad_norm": 0.25626689195632935, "learning_rate": 1.293739777200026e-07, "loss": 0.3419, "step": 45968 }, { "epoch": 4.67354615697438, "grad_norm": 0.2872217297554016, "learning_rate": 1.2929378254893322e-07, "loss": 0.2775, "step": 45969 }, { "epoch": 4.673647824318829, "grad_norm": 0.2795732319355011, "learning_rate": 1.292136119153242e-07, "loss": 0.2735, "step": 45970 }, { "epoch": 4.673749491663278, "grad_norm": 0.2556457817554474, "learning_rate": 1.291334658195803e-07, "loss": 0.3169, "step": 45971 }, { "epoch": 4.673851159007727, "grad_norm": 0.26346883177757263, "learning_rate": 1.2905334426210393e-07, "loss": 0.3118, "step": 45972 }, { "epoch": 4.673952826352176, "grad_norm": 0.3101150691509247, "learning_rate": 1.2897324724329973e-07, "loss": 0.3003, "step": 45973 }, { "epoch": 4.674054493696625, "grad_norm": 0.2679888606071472, "learning_rate": 1.2889317476357022e-07, "loss": 0.289, "step": 45974 }, { "epoch": 4.674156161041074, "grad_norm": 0.3031962215900421, "learning_rate": 1.2881312682332003e-07, "loss": 0.2761, "step": 45975 }, { "epoch": 4.674257828385523, "grad_norm": 0.2855459749698639, "learning_rate": 1.287331034229511e-07, "loss": 0.291, "step": 45976 }, { "epoch": 4.674359495729972, "grad_norm": 0.2759024500846863, "learning_rate": 1.2865310456286752e-07, "loss": 0.2798, "step": 45977 }, { "epoch": 4.674461163074421, "grad_norm": 0.2853398025035858, "learning_rate": 1.2857313024347228e-07, "loss": 0.2947, "step": 45978 }, { "epoch": 4.6745628304188696, "grad_norm": 0.26310452818870544, "learning_rate": 1.2849318046516678e-07, "loss": 0.285, "step": 45979 }, { "epoch": 4.6746644977633185, "grad_norm": 0.27384066581726074, "learning_rate": 1.2841325522835624e-07, "loss": 0.2717, "step": 45980 }, { "epoch": 4.674766165107767, "grad_norm": 0.26123929023742676, "learning_rate": 1.2833335453344198e-07, "loss": 0.2722, "step": 45981 }, { "epoch": 4.674867832452216, "grad_norm": 0.2880249321460724, "learning_rate": 1.2825347838082592e-07, "loss": 0.2883, "step": 45982 }, { "epoch": 4.674969499796665, "grad_norm": 0.28542381525039673, "learning_rate": 1.2817362677091162e-07, "loss": 0.2972, "step": 45983 }, { "epoch": 4.675071167141114, "grad_norm": 0.29451367259025574, "learning_rate": 1.2809379970410042e-07, "loss": 0.3187, "step": 45984 }, { "epoch": 4.675172834485563, "grad_norm": 0.28889381885528564, "learning_rate": 1.2801399718079533e-07, "loss": 0.2952, "step": 45985 }, { "epoch": 4.675274501830012, "grad_norm": 0.2924947738647461, "learning_rate": 1.2793421920139771e-07, "loss": 0.3089, "step": 45986 }, { "epoch": 4.675376169174461, "grad_norm": 0.27998900413513184, "learning_rate": 1.2785446576630999e-07, "loss": 0.2963, "step": 45987 }, { "epoch": 4.67547783651891, "grad_norm": 0.2854643762111664, "learning_rate": 1.2777473687593357e-07, "loss": 0.2926, "step": 45988 }, { "epoch": 4.675579503863359, "grad_norm": 0.27616584300994873, "learning_rate": 1.2769503253066973e-07, "loss": 0.2907, "step": 45989 }, { "epoch": 4.675681171207808, "grad_norm": 0.2753365635871887, "learning_rate": 1.2761535273092151e-07, "loss": 0.3011, "step": 45990 }, { "epoch": 4.675782838552257, "grad_norm": 0.269497275352478, "learning_rate": 1.275356974770886e-07, "loss": 0.2836, "step": 45991 }, { "epoch": 4.675884505896706, "grad_norm": 0.2842079997062683, "learning_rate": 1.274560667695729e-07, "loss": 0.2986, "step": 45992 }, { "epoch": 4.675986173241155, "grad_norm": 0.2603534758090973, "learning_rate": 1.2737646060877572e-07, "loss": 0.3188, "step": 45993 }, { "epoch": 4.676087840585604, "grad_norm": 0.26887187361717224, "learning_rate": 1.2729687899509845e-07, "loss": 0.2873, "step": 45994 }, { "epoch": 4.676189507930053, "grad_norm": 0.27159079909324646, "learning_rate": 1.2721732192894132e-07, "loss": 0.3044, "step": 45995 }, { "epoch": 4.6762911752745016, "grad_norm": 0.33860379457473755, "learning_rate": 1.2713778941070509e-07, "loss": 0.2831, "step": 45996 }, { "epoch": 4.6763928426189505, "grad_norm": 0.2651852071285248, "learning_rate": 1.2705828144079058e-07, "loss": 0.3151, "step": 45997 }, { "epoch": 4.676494509963399, "grad_norm": 0.2842922508716583, "learning_rate": 1.2697879801959912e-07, "loss": 0.3183, "step": 45998 }, { "epoch": 4.676596177307848, "grad_norm": 0.2921628952026367, "learning_rate": 1.2689933914752984e-07, "loss": 0.3036, "step": 45999 }, { "epoch": 4.676697844652297, "grad_norm": 0.29202407598495483, "learning_rate": 1.2681990482498409e-07, "loss": 0.316, "step": 46000 }, { "epoch": 4.676799511996746, "grad_norm": 0.27681994438171387, "learning_rate": 1.2674049505236152e-07, "loss": 0.2819, "step": 46001 }, { "epoch": 4.676901179341195, "grad_norm": 0.27627941966056824, "learning_rate": 1.2666110983006184e-07, "loss": 0.2917, "step": 46002 }, { "epoch": 4.677002846685644, "grad_norm": 0.2627885043621063, "learning_rate": 1.2658174915848588e-07, "loss": 0.3061, "step": 46003 }, { "epoch": 4.677104514030093, "grad_norm": 0.29952019453048706, "learning_rate": 1.2650241303803323e-07, "loss": 0.2643, "step": 46004 }, { "epoch": 4.677206181374542, "grad_norm": 0.27138644456863403, "learning_rate": 1.264231014691031e-07, "loss": 0.2975, "step": 46005 }, { "epoch": 4.677307848718991, "grad_norm": 0.27916866540908813, "learning_rate": 1.2634381445209454e-07, "loss": 0.3047, "step": 46006 }, { "epoch": 4.677409516063441, "grad_norm": 0.27630048990249634, "learning_rate": 1.262645519874084e-07, "loss": 0.2969, "step": 46007 }, { "epoch": 4.67751118340789, "grad_norm": 0.28382426500320435, "learning_rate": 1.261853140754432e-07, "loss": 0.2822, "step": 46008 }, { "epoch": 4.677612850752339, "grad_norm": 0.2672230005264282, "learning_rate": 1.2610610071659757e-07, "loss": 0.2698, "step": 46009 }, { "epoch": 4.677714518096788, "grad_norm": 0.30534547567367554, "learning_rate": 1.2602691191127226e-07, "loss": 0.2965, "step": 46010 }, { "epoch": 4.677816185441237, "grad_norm": 0.2969840168952942, "learning_rate": 1.2594774765986473e-07, "loss": 0.3053, "step": 46011 }, { "epoch": 4.6779178527856855, "grad_norm": 0.28943875432014465, "learning_rate": 1.2586860796277357e-07, "loss": 0.2841, "step": 46012 }, { "epoch": 4.6780195201301344, "grad_norm": 0.28241410851478577, "learning_rate": 1.2578949282039842e-07, "loss": 0.2837, "step": 46013 }, { "epoch": 4.678121187474583, "grad_norm": 0.28124913573265076, "learning_rate": 1.257104022331379e-07, "loss": 0.2804, "step": 46014 }, { "epoch": 4.678222854819032, "grad_norm": 0.2810436189174652, "learning_rate": 1.2563133620139002e-07, "loss": 0.2836, "step": 46015 }, { "epoch": 4.678324522163481, "grad_norm": 0.2726471722126007, "learning_rate": 1.2555229472555275e-07, "loss": 0.2993, "step": 46016 }, { "epoch": 4.67842618950793, "grad_norm": 0.2725360095500946, "learning_rate": 1.2547327780602525e-07, "loss": 0.3125, "step": 46017 }, { "epoch": 4.678527856852379, "grad_norm": 0.2986404299736023, "learning_rate": 1.2539428544320442e-07, "loss": 0.2775, "step": 46018 }, { "epoch": 4.678629524196828, "grad_norm": 0.28993529081344604, "learning_rate": 1.2531531763748882e-07, "loss": 0.2751, "step": 46019 }, { "epoch": 4.678731191541277, "grad_norm": 0.2600128650665283, "learning_rate": 1.252363743892765e-07, "loss": 0.2745, "step": 46020 }, { "epoch": 4.678832858885726, "grad_norm": 0.26763802766799927, "learning_rate": 1.2515745569896486e-07, "loss": 0.2896, "step": 46021 }, { "epoch": 4.678934526230175, "grad_norm": 0.2889913320541382, "learning_rate": 1.2507856156695142e-07, "loss": 0.2761, "step": 46022 }, { "epoch": 4.679036193574624, "grad_norm": 0.29281899333000183, "learning_rate": 1.249996919936336e-07, "loss": 0.2811, "step": 46023 }, { "epoch": 4.679137860919073, "grad_norm": 0.2619641423225403, "learning_rate": 1.2492084697940943e-07, "loss": 0.3148, "step": 46024 }, { "epoch": 4.679239528263522, "grad_norm": 0.2839204668998718, "learning_rate": 1.2484202652467525e-07, "loss": 0.2816, "step": 46025 }, { "epoch": 4.679341195607971, "grad_norm": 0.29032087326049805, "learning_rate": 1.2476323062982798e-07, "loss": 0.317, "step": 46026 }, { "epoch": 4.67944286295242, "grad_norm": 0.25792396068573, "learning_rate": 1.2468445929526563e-07, "loss": 0.3059, "step": 46027 }, { "epoch": 4.679544530296869, "grad_norm": 0.28330445289611816, "learning_rate": 1.2460571252138452e-07, "loss": 0.253, "step": 46028 }, { "epoch": 4.6796461976413175, "grad_norm": 0.2883061170578003, "learning_rate": 1.245269903085805e-07, "loss": 0.3056, "step": 46029 }, { "epoch": 4.6797478649857664, "grad_norm": 0.2818204164505005, "learning_rate": 1.2444829265725157e-07, "loss": 0.2864, "step": 46030 }, { "epoch": 4.679849532330215, "grad_norm": 0.2882620692253113, "learning_rate": 1.243696195677929e-07, "loss": 0.2732, "step": 46031 }, { "epoch": 4.679951199674664, "grad_norm": 0.2567828595638275, "learning_rate": 1.242909710406015e-07, "loss": 0.3022, "step": 46032 }, { "epoch": 4.680052867019113, "grad_norm": 0.27391311526298523, "learning_rate": 1.2421234707607421e-07, "loss": 0.3116, "step": 46033 }, { "epoch": 4.680154534363562, "grad_norm": 0.2853926718235016, "learning_rate": 1.241337476746063e-07, "loss": 0.2852, "step": 46034 }, { "epoch": 4.680256201708011, "grad_norm": 0.2833018898963928, "learning_rate": 1.2405517283659408e-07, "loss": 0.3049, "step": 46035 }, { "epoch": 4.68035786905246, "grad_norm": 0.2673168182373047, "learning_rate": 1.2397662256243226e-07, "loss": 0.302, "step": 46036 }, { "epoch": 4.680459536396909, "grad_norm": 0.28107836842536926, "learning_rate": 1.238980968525183e-07, "loss": 0.3075, "step": 46037 }, { "epoch": 4.680561203741358, "grad_norm": 0.2686578035354614, "learning_rate": 1.2381959570724688e-07, "loss": 0.2922, "step": 46038 }, { "epoch": 4.680662871085807, "grad_norm": 0.3017241954803467, "learning_rate": 1.2374111912701326e-07, "loss": 0.2837, "step": 46039 }, { "epoch": 4.680764538430256, "grad_norm": 0.26626765727996826, "learning_rate": 1.236626671122132e-07, "loss": 0.3149, "step": 46040 }, { "epoch": 4.680866205774705, "grad_norm": 0.2666725218296051, "learning_rate": 1.2358423966324196e-07, "loss": 0.3136, "step": 46041 }, { "epoch": 4.680967873119154, "grad_norm": 0.29842275381088257, "learning_rate": 1.235058367804942e-07, "loss": 0.2798, "step": 46042 }, { "epoch": 4.681069540463604, "grad_norm": 0.30519387125968933, "learning_rate": 1.2342745846436578e-07, "loss": 0.315, "step": 46043 }, { "epoch": 4.6811712078080525, "grad_norm": 0.2642251253128052, "learning_rate": 1.2334910471525075e-07, "loss": 0.2624, "step": 46044 }, { "epoch": 4.6812728751525015, "grad_norm": 0.304293692111969, "learning_rate": 1.2327077553354383e-07, "loss": 0.3101, "step": 46045 }, { "epoch": 4.68137454249695, "grad_norm": 0.2849246859550476, "learning_rate": 1.231924709196397e-07, "loss": 0.2976, "step": 46046 }, { "epoch": 4.681476209841399, "grad_norm": 0.2648705244064331, "learning_rate": 1.231141908739336e-07, "loss": 0.2982, "step": 46047 }, { "epoch": 4.681577877185848, "grad_norm": 0.2672678828239441, "learning_rate": 1.2303593539681913e-07, "loss": 0.319, "step": 46048 }, { "epoch": 4.681679544530297, "grad_norm": 0.2932867705821991, "learning_rate": 1.2295770448869038e-07, "loss": 0.2779, "step": 46049 }, { "epoch": 4.681781211874746, "grad_norm": 0.28301048278808594, "learning_rate": 1.2287949814994204e-07, "loss": 0.3142, "step": 46050 }, { "epoch": 4.681882879219195, "grad_norm": 0.2782473862171173, "learning_rate": 1.228013163809677e-07, "loss": 0.2915, "step": 46051 }, { "epoch": 4.681984546563644, "grad_norm": 0.26758143305778503, "learning_rate": 1.2272315918216092e-07, "loss": 0.2689, "step": 46052 }, { "epoch": 4.682086213908093, "grad_norm": 0.273949533700943, "learning_rate": 1.2264502655391641e-07, "loss": 0.3019, "step": 46053 }, { "epoch": 4.682187881252542, "grad_norm": 0.2761029303073883, "learning_rate": 1.2256691849662715e-07, "loss": 0.2999, "step": 46054 }, { "epoch": 4.682289548596991, "grad_norm": 0.2672473192214966, "learning_rate": 1.2248883501068677e-07, "loss": 0.321, "step": 46055 }, { "epoch": 4.68239121594144, "grad_norm": 0.2698538303375244, "learning_rate": 1.2241077609648767e-07, "loss": 0.3198, "step": 46056 }, { "epoch": 4.682492883285889, "grad_norm": 0.27981406450271606, "learning_rate": 1.2233274175442512e-07, "loss": 0.3056, "step": 46057 }, { "epoch": 4.682594550630338, "grad_norm": 0.2953440546989441, "learning_rate": 1.2225473198489045e-07, "loss": 0.3222, "step": 46058 }, { "epoch": 4.682696217974787, "grad_norm": 0.28343620896339417, "learning_rate": 1.221767467882773e-07, "loss": 0.2986, "step": 46059 }, { "epoch": 4.682797885319236, "grad_norm": 0.28023630380630493, "learning_rate": 1.2209878616497915e-07, "loss": 0.2923, "step": 46060 }, { "epoch": 4.6828995526636845, "grad_norm": 0.27100804448127747, "learning_rate": 1.2202085011538745e-07, "loss": 0.3051, "step": 46061 }, { "epoch": 4.6830012200081335, "grad_norm": 0.28406304121017456, "learning_rate": 1.219429386398957e-07, "loss": 0.3041, "step": 46062 }, { "epoch": 4.683102887352582, "grad_norm": 0.2880362272262573, "learning_rate": 1.218650517388964e-07, "loss": 0.2894, "step": 46063 }, { "epoch": 4.683204554697031, "grad_norm": 0.26811957359313965, "learning_rate": 1.21787189412782e-07, "loss": 0.2729, "step": 46064 }, { "epoch": 4.68330622204148, "grad_norm": 0.26365387439727783, "learning_rate": 1.2170935166194441e-07, "loss": 0.2903, "step": 46065 }, { "epoch": 4.683407889385929, "grad_norm": 0.27138203382492065, "learning_rate": 1.21631538486775e-07, "loss": 0.3239, "step": 46066 }, { "epoch": 4.683509556730378, "grad_norm": 0.2821814715862274, "learning_rate": 1.2155374988766734e-07, "loss": 0.2751, "step": 46067 }, { "epoch": 4.683611224074827, "grad_norm": 0.2800323963165283, "learning_rate": 1.2147598586501275e-07, "loss": 0.2977, "step": 46068 }, { "epoch": 4.683712891419276, "grad_norm": 0.30210620164871216, "learning_rate": 1.2139824641920262e-07, "loss": 0.2909, "step": 46069 }, { "epoch": 4.683814558763725, "grad_norm": 0.284188836812973, "learning_rate": 1.2132053155062938e-07, "loss": 0.304, "step": 46070 }, { "epoch": 4.683916226108174, "grad_norm": 0.27581724524497986, "learning_rate": 1.2124284125968278e-07, "loss": 0.2801, "step": 46071 }, { "epoch": 4.684017893452623, "grad_norm": 0.30173981189727783, "learning_rate": 1.2116517554675578e-07, "loss": 0.3196, "step": 46072 }, { "epoch": 4.684119560797072, "grad_norm": 0.2703308165073395, "learning_rate": 1.210875344122392e-07, "loss": 0.2666, "step": 46073 }, { "epoch": 4.684221228141521, "grad_norm": 0.2811773419380188, "learning_rate": 1.210099178565244e-07, "loss": 0.2964, "step": 46074 }, { "epoch": 4.68432289548597, "grad_norm": 0.259773313999176, "learning_rate": 1.209323258800027e-07, "loss": 0.2942, "step": 46075 }, { "epoch": 4.684424562830419, "grad_norm": 0.298444539308548, "learning_rate": 1.2085475848306326e-07, "loss": 0.2768, "step": 46076 }, { "epoch": 4.684526230174868, "grad_norm": 0.29356950521469116, "learning_rate": 1.2077721566609912e-07, "loss": 0.2907, "step": 46077 }, { "epoch": 4.6846278975193165, "grad_norm": 0.27744990587234497, "learning_rate": 1.2069969742949994e-07, "loss": 0.2599, "step": 46078 }, { "epoch": 4.6847295648637655, "grad_norm": 0.26793086528778076, "learning_rate": 1.206222037736554e-07, "loss": 0.2984, "step": 46079 }, { "epoch": 4.684831232208214, "grad_norm": 0.2742462754249573, "learning_rate": 1.20544734698958e-07, "loss": 0.2911, "step": 46080 }, { "epoch": 4.684932899552663, "grad_norm": 0.2649601399898529, "learning_rate": 1.2046729020579516e-07, "loss": 0.2902, "step": 46081 }, { "epoch": 4.685034566897112, "grad_norm": 0.26775217056274414, "learning_rate": 1.2038987029455884e-07, "loss": 0.2889, "step": 46082 }, { "epoch": 4.685136234241561, "grad_norm": 0.30989018082618713, "learning_rate": 1.203124749656398e-07, "loss": 0.3103, "step": 46083 }, { "epoch": 4.68523790158601, "grad_norm": 0.28606197237968445, "learning_rate": 1.2023510421942608e-07, "loss": 0.2828, "step": 46084 }, { "epoch": 4.685339568930459, "grad_norm": 0.2819860279560089, "learning_rate": 1.2015775805630848e-07, "loss": 0.2656, "step": 46085 }, { "epoch": 4.685441236274908, "grad_norm": 0.2979782819747925, "learning_rate": 1.2008043647667612e-07, "loss": 0.2896, "step": 46086 }, { "epoch": 4.685542903619357, "grad_norm": 0.29017287492752075, "learning_rate": 1.2000313948091978e-07, "loss": 0.3194, "step": 46087 }, { "epoch": 4.685644570963806, "grad_norm": 0.283168226480484, "learning_rate": 1.1992586706942698e-07, "loss": 0.3293, "step": 46088 }, { "epoch": 4.685746238308256, "grad_norm": 0.3133437931537628, "learning_rate": 1.1984861924258796e-07, "loss": 0.2883, "step": 46089 }, { "epoch": 4.685847905652705, "grad_norm": 0.27973660826683044, "learning_rate": 1.197713960007929e-07, "loss": 0.2909, "step": 46090 }, { "epoch": 4.685949572997154, "grad_norm": 0.30554822087287903, "learning_rate": 1.1969419734442934e-07, "loss": 0.2875, "step": 46091 }, { "epoch": 4.686051240341603, "grad_norm": 0.2824355661869049, "learning_rate": 1.1961702327388637e-07, "loss": 0.2763, "step": 46092 }, { "epoch": 4.6861529076860515, "grad_norm": 0.5772235989570618, "learning_rate": 1.1953987378955367e-07, "loss": 0.3026, "step": 46093 }, { "epoch": 4.6862545750305005, "grad_norm": 0.2761915326118469, "learning_rate": 1.1946274889181876e-07, "loss": 0.3275, "step": 46094 }, { "epoch": 4.686356242374949, "grad_norm": 0.26267433166503906, "learning_rate": 1.1938564858107072e-07, "loss": 0.2871, "step": 46095 }, { "epoch": 4.686457909719398, "grad_norm": 0.26437094807624817, "learning_rate": 1.1930857285769813e-07, "loss": 0.3063, "step": 46096 }, { "epoch": 4.686559577063847, "grad_norm": 0.24173207581043243, "learning_rate": 1.1923152172208963e-07, "loss": 0.3091, "step": 46097 }, { "epoch": 4.686661244408296, "grad_norm": 0.27973854541778564, "learning_rate": 1.1915449517463207e-07, "loss": 0.318, "step": 46098 }, { "epoch": 4.686762911752745, "grad_norm": 0.267924427986145, "learning_rate": 1.1907749321571404e-07, "loss": 0.2841, "step": 46099 }, { "epoch": 4.686864579097194, "grad_norm": 0.29972079396247864, "learning_rate": 1.1900051584572526e-07, "loss": 0.3125, "step": 46100 }, { "epoch": 4.686966246441643, "grad_norm": 0.2865208685398102, "learning_rate": 1.1892356306505037e-07, "loss": 0.2869, "step": 46101 }, { "epoch": 4.687067913786092, "grad_norm": 0.29461413621902466, "learning_rate": 1.1884663487407966e-07, "loss": 0.3149, "step": 46102 }, { "epoch": 4.687169581130541, "grad_norm": 0.27188968658447266, "learning_rate": 1.1876973127319947e-07, "loss": 0.2778, "step": 46103 }, { "epoch": 4.68727124847499, "grad_norm": 0.3094724416732788, "learning_rate": 1.1869285226279669e-07, "loss": 0.3078, "step": 46104 }, { "epoch": 4.687372915819439, "grad_norm": 0.2839423716068268, "learning_rate": 1.1861599784325994e-07, "loss": 0.305, "step": 46105 }, { "epoch": 4.687474583163888, "grad_norm": 0.24922508001327515, "learning_rate": 1.1853916801497611e-07, "loss": 0.3138, "step": 46106 }, { "epoch": 4.687576250508337, "grad_norm": 0.28437018394470215, "learning_rate": 1.1846236277833157e-07, "loss": 0.2899, "step": 46107 }, { "epoch": 4.687677917852786, "grad_norm": 0.27956780791282654, "learning_rate": 1.1838558213371321e-07, "loss": 0.2943, "step": 46108 }, { "epoch": 4.687779585197235, "grad_norm": 0.29001691937446594, "learning_rate": 1.183088260815085e-07, "loss": 0.3121, "step": 46109 }, { "epoch": 4.6878812525416835, "grad_norm": 0.30360209941864014, "learning_rate": 1.1823209462210439e-07, "loss": 0.2939, "step": 46110 }, { "epoch": 4.6879829198861325, "grad_norm": 0.2817215919494629, "learning_rate": 1.1815538775588609e-07, "loss": 0.3154, "step": 46111 }, { "epoch": 4.688084587230581, "grad_norm": 0.31889042258262634, "learning_rate": 1.1807870548324162e-07, "loss": 0.275, "step": 46112 }, { "epoch": 4.68818625457503, "grad_norm": 0.29372745752334595, "learning_rate": 1.1800204780455627e-07, "loss": 0.277, "step": 46113 }, { "epoch": 4.688287921919479, "grad_norm": 0.2958419919013977, "learning_rate": 1.179254147202158e-07, "loss": 0.3097, "step": 46114 }, { "epoch": 4.688389589263928, "grad_norm": 0.2736889123916626, "learning_rate": 1.1784880623060768e-07, "loss": 0.3078, "step": 46115 }, { "epoch": 4.688491256608377, "grad_norm": 0.2649451196193695, "learning_rate": 1.1777222233611718e-07, "loss": 0.2913, "step": 46116 }, { "epoch": 4.688592923952826, "grad_norm": 0.2650192975997925, "learning_rate": 1.1769566303713009e-07, "loss": 0.2726, "step": 46117 }, { "epoch": 4.688694591297275, "grad_norm": 0.2667507231235504, "learning_rate": 1.1761912833403111e-07, "loss": 0.2849, "step": 46118 }, { "epoch": 4.688796258641724, "grad_norm": 0.2854636013507843, "learning_rate": 1.175426182272077e-07, "loss": 0.2935, "step": 46119 }, { "epoch": 4.688897925986173, "grad_norm": 0.29950839281082153, "learning_rate": 1.1746613271704399e-07, "loss": 0.291, "step": 46120 }, { "epoch": 4.688999593330622, "grad_norm": 0.2710583508014679, "learning_rate": 1.1738967180392525e-07, "loss": 0.3119, "step": 46121 }, { "epoch": 4.689101260675071, "grad_norm": 0.26656657457351685, "learning_rate": 1.1731323548823725e-07, "loss": 0.269, "step": 46122 }, { "epoch": 4.68920292801952, "grad_norm": 0.2760864198207855, "learning_rate": 1.1723682377036527e-07, "loss": 0.28, "step": 46123 }, { "epoch": 4.689304595363969, "grad_norm": 0.28349167108535767, "learning_rate": 1.1716043665069343e-07, "loss": 0.2999, "step": 46124 }, { "epoch": 4.6894062627084185, "grad_norm": 0.2599371075630188, "learning_rate": 1.1708407412960698e-07, "loss": 0.3085, "step": 46125 }, { "epoch": 4.6895079300528675, "grad_norm": 0.29662761092185974, "learning_rate": 1.1700773620749117e-07, "loss": 0.3116, "step": 46126 }, { "epoch": 4.689609597397316, "grad_norm": 0.27385756373405457, "learning_rate": 1.1693142288472958e-07, "loss": 0.2999, "step": 46127 }, { "epoch": 4.689711264741765, "grad_norm": 0.2802050709724426, "learning_rate": 1.1685513416170691e-07, "loss": 0.273, "step": 46128 }, { "epoch": 4.689812932086214, "grad_norm": 0.2678441107273102, "learning_rate": 1.1677887003880783e-07, "loss": 0.2734, "step": 46129 }, { "epoch": 4.689914599430663, "grad_norm": 0.31164348125457764, "learning_rate": 1.1670263051641651e-07, "loss": 0.2768, "step": 46130 }, { "epoch": 4.690016266775112, "grad_norm": 0.25491780042648315, "learning_rate": 1.166264155949165e-07, "loss": 0.3115, "step": 46131 }, { "epoch": 4.690117934119561, "grad_norm": 0.30814653635025024, "learning_rate": 1.1655022527469251e-07, "loss": 0.2821, "step": 46132 }, { "epoch": 4.69021960146401, "grad_norm": 0.260867178440094, "learning_rate": 1.1647405955612811e-07, "loss": 0.318, "step": 46133 }, { "epoch": 4.690321268808459, "grad_norm": 0.2849048376083374, "learning_rate": 1.1639791843960636e-07, "loss": 0.2812, "step": 46134 }, { "epoch": 4.690422936152908, "grad_norm": 0.2918456494808197, "learning_rate": 1.1632180192551135e-07, "loss": 0.2783, "step": 46135 }, { "epoch": 4.690524603497357, "grad_norm": 0.26277440786361694, "learning_rate": 1.1624571001422724e-07, "loss": 0.3035, "step": 46136 }, { "epoch": 4.690626270841806, "grad_norm": 0.29073578119277954, "learning_rate": 1.161696427061365e-07, "loss": 0.3058, "step": 46137 }, { "epoch": 4.690727938186255, "grad_norm": 0.27212899923324585, "learning_rate": 1.1609360000162217e-07, "loss": 0.3103, "step": 46138 }, { "epoch": 4.690829605530704, "grad_norm": 0.3085414469242096, "learning_rate": 1.1601758190106782e-07, "loss": 0.2711, "step": 46139 }, { "epoch": 4.690931272875153, "grad_norm": 0.2846890091896057, "learning_rate": 1.1594158840485648e-07, "loss": 0.2745, "step": 46140 }, { "epoch": 4.691032940219602, "grad_norm": 0.2944909930229187, "learning_rate": 1.1586561951337061e-07, "loss": 0.2932, "step": 46141 }, { "epoch": 4.6911346075640505, "grad_norm": 0.27502158284187317, "learning_rate": 1.1578967522699324e-07, "loss": 0.322, "step": 46142 }, { "epoch": 4.6912362749084995, "grad_norm": 0.28659912943840027, "learning_rate": 1.1571375554610686e-07, "loss": 0.2704, "step": 46143 }, { "epoch": 4.691337942252948, "grad_norm": 0.28920212388038635, "learning_rate": 1.1563786047109338e-07, "loss": 0.3051, "step": 46144 }, { "epoch": 4.691439609597397, "grad_norm": 0.312663733959198, "learning_rate": 1.1556199000233582e-07, "loss": 0.2889, "step": 46145 }, { "epoch": 4.691541276941846, "grad_norm": 0.27927520871162415, "learning_rate": 1.1548614414021664e-07, "loss": 0.2938, "step": 46146 }, { "epoch": 4.691642944286295, "grad_norm": 0.27853527665138245, "learning_rate": 1.1541032288511777e-07, "loss": 0.3095, "step": 46147 }, { "epoch": 4.691744611630744, "grad_norm": 0.27974191308021545, "learning_rate": 1.1533452623742003e-07, "loss": 0.2897, "step": 46148 }, { "epoch": 4.691846278975193, "grad_norm": 0.2952779531478882, "learning_rate": 1.1525875419750698e-07, "loss": 0.2916, "step": 46149 }, { "epoch": 4.691947946319642, "grad_norm": 0.2642465829849243, "learning_rate": 1.1518300676575888e-07, "loss": 0.2928, "step": 46150 }, { "epoch": 4.692049613664091, "grad_norm": 0.2836390733718872, "learning_rate": 1.1510728394255821e-07, "loss": 0.2936, "step": 46151 }, { "epoch": 4.69215128100854, "grad_norm": 0.31385815143585205, "learning_rate": 1.1503158572828632e-07, "loss": 0.2922, "step": 46152 }, { "epoch": 4.692252948352989, "grad_norm": 0.2707497477531433, "learning_rate": 1.1495591212332458e-07, "loss": 0.2882, "step": 46153 }, { "epoch": 4.692354615697438, "grad_norm": 0.26451122760772705, "learning_rate": 1.1488026312805379e-07, "loss": 0.3237, "step": 46154 }, { "epoch": 4.692456283041887, "grad_norm": 0.260640412569046, "learning_rate": 1.1480463874285586e-07, "loss": 0.2676, "step": 46155 }, { "epoch": 4.692557950386336, "grad_norm": 0.26798582077026367, "learning_rate": 1.1472903896811105e-07, "loss": 0.3009, "step": 46156 }, { "epoch": 4.692659617730785, "grad_norm": 0.2676113247871399, "learning_rate": 1.1465346380420017e-07, "loss": 0.2968, "step": 46157 }, { "epoch": 4.692761285075234, "grad_norm": 0.2988417148590088, "learning_rate": 1.1457791325150402e-07, "loss": 0.2892, "step": 46158 }, { "epoch": 4.6928629524196825, "grad_norm": 0.2781904637813568, "learning_rate": 1.1450238731040397e-07, "loss": 0.3216, "step": 46159 }, { "epoch": 4.6929646197641315, "grad_norm": 0.28210678696632385, "learning_rate": 1.1442688598127972e-07, "loss": 0.2577, "step": 46160 }, { "epoch": 4.69306628710858, "grad_norm": 0.2681017518043518, "learning_rate": 1.1435140926451149e-07, "loss": 0.2678, "step": 46161 }, { "epoch": 4.693167954453029, "grad_norm": 0.2812449336051941, "learning_rate": 1.1427595716048012e-07, "loss": 0.2934, "step": 46162 }, { "epoch": 4.693269621797478, "grad_norm": 0.3078713119029999, "learning_rate": 1.142005296695653e-07, "loss": 0.2672, "step": 46163 }, { "epoch": 4.693371289141927, "grad_norm": 0.2782621681690216, "learning_rate": 1.1412512679214727e-07, "loss": 0.2741, "step": 46164 }, { "epoch": 4.693472956486376, "grad_norm": 0.30436137318611145, "learning_rate": 1.1404974852860573e-07, "loss": 0.312, "step": 46165 }, { "epoch": 4.693574623830825, "grad_norm": 0.28862836956977844, "learning_rate": 1.1397439487932094e-07, "loss": 0.2822, "step": 46166 }, { "epoch": 4.693676291175274, "grad_norm": 0.29604974389076233, "learning_rate": 1.1389906584467148e-07, "loss": 0.2735, "step": 46167 }, { "epoch": 4.693777958519723, "grad_norm": 0.31768321990966797, "learning_rate": 1.1382376142503759e-07, "loss": 0.3315, "step": 46168 }, { "epoch": 4.693879625864172, "grad_norm": 0.2879217863082886, "learning_rate": 1.1374848162079843e-07, "loss": 0.2936, "step": 46169 }, { "epoch": 4.693981293208621, "grad_norm": 0.2648026943206787, "learning_rate": 1.1367322643233369e-07, "loss": 0.29, "step": 46170 }, { "epoch": 4.694082960553071, "grad_norm": 0.33147525787353516, "learning_rate": 1.1359799586002141e-07, "loss": 0.3323, "step": 46171 }, { "epoch": 4.69418462789752, "grad_norm": 0.28995922207832336, "learning_rate": 1.1352278990424125e-07, "loss": 0.2879, "step": 46172 }, { "epoch": 4.694286295241969, "grad_norm": 0.2856661379337311, "learning_rate": 1.1344760856537241e-07, "loss": 0.2736, "step": 46173 }, { "epoch": 4.6943879625864176, "grad_norm": 0.2918431758880615, "learning_rate": 1.1337245184379286e-07, "loss": 0.2827, "step": 46174 }, { "epoch": 4.6944896299308665, "grad_norm": 0.26694896817207336, "learning_rate": 1.1329731973988234e-07, "loss": 0.3008, "step": 46175 }, { "epoch": 4.694591297275315, "grad_norm": 0.2727005183696747, "learning_rate": 1.132222122540183e-07, "loss": 0.3135, "step": 46176 }, { "epoch": 4.694692964619764, "grad_norm": 0.26384636759757996, "learning_rate": 1.131471293865799e-07, "loss": 0.2932, "step": 46177 }, { "epoch": 4.694794631964213, "grad_norm": 0.2711603343486786, "learning_rate": 1.1307207113794405e-07, "loss": 0.2915, "step": 46178 }, { "epoch": 4.694896299308662, "grad_norm": 0.2712690234184265, "learning_rate": 1.1299703750849045e-07, "loss": 0.2914, "step": 46179 }, { "epoch": 4.694997966653111, "grad_norm": 0.27491462230682373, "learning_rate": 1.1292202849859657e-07, "loss": 0.3205, "step": 46180 }, { "epoch": 4.69509963399756, "grad_norm": 0.27101004123687744, "learning_rate": 1.128470441086399e-07, "loss": 0.3021, "step": 46181 }, { "epoch": 4.695201301342009, "grad_norm": 0.29047754406929016, "learning_rate": 1.1277208433899844e-07, "loss": 0.3183, "step": 46182 }, { "epoch": 4.695302968686458, "grad_norm": 0.2746492922306061, "learning_rate": 1.126971491900497e-07, "loss": 0.3065, "step": 46183 }, { "epoch": 4.695404636030907, "grad_norm": 0.24851520359516144, "learning_rate": 1.1262223866217115e-07, "loss": 0.2844, "step": 46184 }, { "epoch": 4.695506303375356, "grad_norm": 0.2876633107662201, "learning_rate": 1.125473527557408e-07, "loss": 0.2801, "step": 46185 }, { "epoch": 4.695607970719805, "grad_norm": 0.2715955376625061, "learning_rate": 1.1247249147113559e-07, "loss": 0.2903, "step": 46186 }, { "epoch": 4.695709638064254, "grad_norm": 0.26703283190727234, "learning_rate": 1.1239765480873243e-07, "loss": 0.3037, "step": 46187 }, { "epoch": 4.695811305408703, "grad_norm": 0.26087749004364014, "learning_rate": 1.1232284276890825e-07, "loss": 0.3283, "step": 46188 }, { "epoch": 4.695912972753152, "grad_norm": 0.27799490094184875, "learning_rate": 1.1224805535203997e-07, "loss": 0.322, "step": 46189 }, { "epoch": 4.696014640097601, "grad_norm": 0.28228840231895447, "learning_rate": 1.1217329255850451e-07, "loss": 0.3018, "step": 46190 }, { "epoch": 4.6961163074420496, "grad_norm": 0.30194276571273804, "learning_rate": 1.1209855438867822e-07, "loss": 0.331, "step": 46191 }, { "epoch": 4.6962179747864985, "grad_norm": 0.308447927236557, "learning_rate": 1.120238408429386e-07, "loss": 0.2961, "step": 46192 }, { "epoch": 4.696319642130947, "grad_norm": 0.2640627324581146, "learning_rate": 1.1194915192166089e-07, "loss": 0.258, "step": 46193 }, { "epoch": 4.696421309475396, "grad_norm": 0.2767423093318939, "learning_rate": 1.1187448762522147e-07, "loss": 0.3114, "step": 46194 }, { "epoch": 4.696522976819845, "grad_norm": 0.280819833278656, "learning_rate": 1.1179984795399723e-07, "loss": 0.2945, "step": 46195 }, { "epoch": 4.696624644164294, "grad_norm": 0.27864277362823486, "learning_rate": 1.11725232908364e-07, "loss": 0.2463, "step": 46196 }, { "epoch": 4.696726311508743, "grad_norm": 0.25955918431282043, "learning_rate": 1.1165064248869706e-07, "loss": 0.3169, "step": 46197 }, { "epoch": 4.696827978853192, "grad_norm": 0.2613646984100342, "learning_rate": 1.1157607669537218e-07, "loss": 0.3218, "step": 46198 }, { "epoch": 4.696929646197641, "grad_norm": 0.28057530522346497, "learning_rate": 1.1150153552876575e-07, "loss": 0.3009, "step": 46199 }, { "epoch": 4.69703131354209, "grad_norm": 0.29735976457595825, "learning_rate": 1.1142701898925302e-07, "loss": 0.2847, "step": 46200 }, { "epoch": 4.697132980886539, "grad_norm": 0.2812855839729309, "learning_rate": 1.1135252707720868e-07, "loss": 0.2962, "step": 46201 }, { "epoch": 4.697234648230988, "grad_norm": 0.2747686803340912, "learning_rate": 1.112780597930091e-07, "loss": 0.2854, "step": 46202 }, { "epoch": 4.697336315575437, "grad_norm": 0.2638905644416809, "learning_rate": 1.1120361713702899e-07, "loss": 0.3163, "step": 46203 }, { "epoch": 4.697437982919886, "grad_norm": 0.2737949788570404, "learning_rate": 1.1112919910964304e-07, "loss": 0.3165, "step": 46204 }, { "epoch": 4.697539650264335, "grad_norm": 0.27259060740470886, "learning_rate": 1.110548057112265e-07, "loss": 0.3049, "step": 46205 }, { "epoch": 4.697641317608784, "grad_norm": 0.26945021748542786, "learning_rate": 1.109804369421541e-07, "loss": 0.3068, "step": 46206 }, { "epoch": 4.6977429849532335, "grad_norm": 0.2680727541446686, "learning_rate": 1.1090609280280052e-07, "loss": 0.3149, "step": 46207 }, { "epoch": 4.697844652297682, "grad_norm": 0.30403590202331543, "learning_rate": 1.108317732935399e-07, "loss": 0.3084, "step": 46208 }, { "epoch": 4.697946319642131, "grad_norm": 0.28391721844673157, "learning_rate": 1.1075747841474749e-07, "loss": 0.3007, "step": 46209 }, { "epoch": 4.69804798698658, "grad_norm": 0.26900073885917664, "learning_rate": 1.1068320816679634e-07, "loss": 0.2478, "step": 46210 }, { "epoch": 4.698149654331029, "grad_norm": 0.2711271643638611, "learning_rate": 1.106089625500617e-07, "loss": 0.2893, "step": 46211 }, { "epoch": 4.698251321675478, "grad_norm": 0.26284340023994446, "learning_rate": 1.1053474156491772e-07, "loss": 0.2992, "step": 46212 }, { "epoch": 4.698352989019927, "grad_norm": 0.27552977204322815, "learning_rate": 1.1046054521173688e-07, "loss": 0.2892, "step": 46213 }, { "epoch": 4.698454656364376, "grad_norm": 0.2840556800365448, "learning_rate": 1.1038637349089388e-07, "loss": 0.3181, "step": 46214 }, { "epoch": 4.698556323708825, "grad_norm": 0.2962964177131653, "learning_rate": 1.1031222640276284e-07, "loss": 0.2929, "step": 46215 }, { "epoch": 4.698657991053274, "grad_norm": 0.2851617932319641, "learning_rate": 1.1023810394771683e-07, "loss": 0.3067, "step": 46216 }, { "epoch": 4.698759658397723, "grad_norm": 0.2965297996997833, "learning_rate": 1.1016400612612943e-07, "loss": 0.2987, "step": 46217 }, { "epoch": 4.698861325742172, "grad_norm": 0.2765350341796875, "learning_rate": 1.1008993293837311e-07, "loss": 0.2799, "step": 46218 }, { "epoch": 4.698962993086621, "grad_norm": 0.29031088948249817, "learning_rate": 1.1001588438482258e-07, "loss": 0.2883, "step": 46219 }, { "epoch": 4.69906466043107, "grad_norm": 0.29142826795578003, "learning_rate": 1.0994186046584921e-07, "loss": 0.2711, "step": 46220 }, { "epoch": 4.699166327775519, "grad_norm": 0.2742762267589569, "learning_rate": 1.0986786118182657e-07, "loss": 0.3324, "step": 46221 }, { "epoch": 4.699267995119968, "grad_norm": 0.2834993898868561, "learning_rate": 1.0979388653312883e-07, "loss": 0.282, "step": 46222 }, { "epoch": 4.699369662464417, "grad_norm": 0.27321407198905945, "learning_rate": 1.0971993652012624e-07, "loss": 0.277, "step": 46223 }, { "epoch": 4.6994713298088655, "grad_norm": 0.2711786925792694, "learning_rate": 1.096460111431924e-07, "loss": 0.2817, "step": 46224 }, { "epoch": 4.6995729971533144, "grad_norm": 0.28494229912757874, "learning_rate": 1.0957211040270033e-07, "loss": 0.3075, "step": 46225 }, { "epoch": 4.699674664497763, "grad_norm": 0.28916075825691223, "learning_rate": 1.0949823429902196e-07, "loss": 0.2879, "step": 46226 }, { "epoch": 4.699776331842212, "grad_norm": 0.28497377038002014, "learning_rate": 1.0942438283252921e-07, "loss": 0.2903, "step": 46227 }, { "epoch": 4.699877999186661, "grad_norm": 0.29353806376457214, "learning_rate": 1.0935055600359402e-07, "loss": 0.2818, "step": 46228 }, { "epoch": 4.69997966653111, "grad_norm": 0.2754442095756531, "learning_rate": 1.0927675381258884e-07, "loss": 0.3148, "step": 46229 }, { "epoch": 4.700081333875559, "grad_norm": 0.28327757120132446, "learning_rate": 1.0920297625988507e-07, "loss": 0.2704, "step": 46230 }, { "epoch": 4.700183001220008, "grad_norm": 0.28836098313331604, "learning_rate": 1.0912922334585407e-07, "loss": 0.2963, "step": 46231 }, { "epoch": 4.700284668564457, "grad_norm": 0.27137839794158936, "learning_rate": 1.0905549507086832e-07, "loss": 0.29, "step": 46232 }, { "epoch": 4.700386335908906, "grad_norm": 0.2867964804172516, "learning_rate": 1.0898179143529808e-07, "loss": 0.3326, "step": 46233 }, { "epoch": 4.700488003253355, "grad_norm": 0.2899135649204254, "learning_rate": 1.0890811243951582e-07, "loss": 0.3283, "step": 46234 }, { "epoch": 4.700589670597804, "grad_norm": 0.296415239572525, "learning_rate": 1.0883445808389237e-07, "loss": 0.2995, "step": 46235 }, { "epoch": 4.700691337942253, "grad_norm": 0.2943054139614105, "learning_rate": 1.0876082836879797e-07, "loss": 0.3134, "step": 46236 }, { "epoch": 4.700793005286702, "grad_norm": 0.28586432337760925, "learning_rate": 1.0868722329460457e-07, "loss": 0.3015, "step": 46237 }, { "epoch": 4.700894672631151, "grad_norm": 0.265806645154953, "learning_rate": 1.0861364286168242e-07, "loss": 0.3089, "step": 46238 }, { "epoch": 4.7009963399756, "grad_norm": 0.27599093317985535, "learning_rate": 1.0854008707040231e-07, "loss": 0.289, "step": 46239 }, { "epoch": 4.701098007320049, "grad_norm": 0.261479914188385, "learning_rate": 1.0846655592113564e-07, "loss": 0.288, "step": 46240 }, { "epoch": 4.7011996746644975, "grad_norm": 0.29123103618621826, "learning_rate": 1.08393049414251e-07, "loss": 0.2941, "step": 46241 }, { "epoch": 4.7013013420089464, "grad_norm": 0.2764693796634674, "learning_rate": 1.0831956755012086e-07, "loss": 0.311, "step": 46242 }, { "epoch": 4.701403009353395, "grad_norm": 0.2847383916378021, "learning_rate": 1.0824611032911325e-07, "loss": 0.3272, "step": 46243 }, { "epoch": 4.701504676697844, "grad_norm": 0.3077206015586853, "learning_rate": 1.0817267775159901e-07, "loss": 0.2927, "step": 46244 }, { "epoch": 4.701606344042293, "grad_norm": 0.29170119762420654, "learning_rate": 1.0809926981794949e-07, "loss": 0.2873, "step": 46245 }, { "epoch": 4.701708011386742, "grad_norm": 0.2955003082752228, "learning_rate": 1.0802588652853274e-07, "loss": 0.2691, "step": 46246 }, { "epoch": 4.701809678731191, "grad_norm": 0.28060007095336914, "learning_rate": 1.0795252788371902e-07, "loss": 0.2889, "step": 46247 }, { "epoch": 4.70191134607564, "grad_norm": 0.2822400629520416, "learning_rate": 1.0787919388387857e-07, "loss": 0.2868, "step": 46248 }, { "epoch": 4.702013013420089, "grad_norm": 0.2802281081676483, "learning_rate": 1.0780588452937945e-07, "loss": 0.2896, "step": 46249 }, { "epoch": 4.702114680764538, "grad_norm": 0.28134095668792725, "learning_rate": 1.0773259982059192e-07, "loss": 0.282, "step": 46250 }, { "epoch": 4.702216348108987, "grad_norm": 0.2783568501472473, "learning_rate": 1.0765933975788457e-07, "loss": 0.2924, "step": 46251 }, { "epoch": 4.702318015453436, "grad_norm": 0.2663811147212982, "learning_rate": 1.0758610434162819e-07, "loss": 0.3404, "step": 46252 }, { "epoch": 4.702419682797886, "grad_norm": 0.2868768572807312, "learning_rate": 1.0751289357218919e-07, "loss": 0.2722, "step": 46253 }, { "epoch": 4.702521350142335, "grad_norm": 0.2668664753437042, "learning_rate": 1.0743970744993725e-07, "loss": 0.2915, "step": 46254 }, { "epoch": 4.702623017486784, "grad_norm": 0.27242663502693176, "learning_rate": 1.0736654597524265e-07, "loss": 0.2804, "step": 46255 }, { "epoch": 4.7027246848312325, "grad_norm": 0.2723722755908966, "learning_rate": 1.0729340914847231e-07, "loss": 0.2999, "step": 46256 }, { "epoch": 4.7028263521756815, "grad_norm": 0.2896622121334076, "learning_rate": 1.072202969699948e-07, "loss": 0.2989, "step": 46257 }, { "epoch": 4.70292801952013, "grad_norm": 0.2751927971839905, "learning_rate": 1.0714720944017931e-07, "loss": 0.284, "step": 46258 }, { "epoch": 4.703029686864579, "grad_norm": 0.29151517152786255, "learning_rate": 1.0707414655939219e-07, "loss": 0.2957, "step": 46259 }, { "epoch": 4.703131354209028, "grad_norm": 0.28167402744293213, "learning_rate": 1.0700110832800426e-07, "loss": 0.3111, "step": 46260 }, { "epoch": 4.703233021553477, "grad_norm": 0.3073461949825287, "learning_rate": 1.0692809474638078e-07, "loss": 0.2871, "step": 46261 }, { "epoch": 4.703334688897926, "grad_norm": 0.2747136950492859, "learning_rate": 1.06855105814892e-07, "loss": 0.2808, "step": 46262 }, { "epoch": 4.703436356242375, "grad_norm": 0.2676997780799866, "learning_rate": 1.067821415339032e-07, "loss": 0.2958, "step": 46263 }, { "epoch": 4.703538023586824, "grad_norm": 0.2873094081878662, "learning_rate": 1.0670920190378353e-07, "loss": 0.2825, "step": 46264 }, { "epoch": 4.703639690931273, "grad_norm": 0.28964051604270935, "learning_rate": 1.0663628692490102e-07, "loss": 0.2915, "step": 46265 }, { "epoch": 4.703741358275722, "grad_norm": 0.2802897095680237, "learning_rate": 1.0656339659762094e-07, "loss": 0.2701, "step": 46266 }, { "epoch": 4.703843025620171, "grad_norm": 0.3052653670310974, "learning_rate": 1.0649053092231188e-07, "loss": 0.2836, "step": 46267 }, { "epoch": 4.70394469296462, "grad_norm": 0.27532124519348145, "learning_rate": 1.0641768989934131e-07, "loss": 0.2718, "step": 46268 }, { "epoch": 4.704046360309069, "grad_norm": 0.27017611265182495, "learning_rate": 1.0634487352907452e-07, "loss": 0.2885, "step": 46269 }, { "epoch": 4.704148027653518, "grad_norm": 0.3003207743167877, "learning_rate": 1.0627208181188009e-07, "loss": 0.3083, "step": 46270 }, { "epoch": 4.704249694997967, "grad_norm": 0.2816575765609741, "learning_rate": 1.0619931474812328e-07, "loss": 0.3106, "step": 46271 }, { "epoch": 4.704351362342416, "grad_norm": 0.2903844714164734, "learning_rate": 1.0612657233817214e-07, "loss": 0.2772, "step": 46272 }, { "epoch": 4.7044530296868645, "grad_norm": 0.28947168588638306, "learning_rate": 1.0605385458239192e-07, "loss": 0.2625, "step": 46273 }, { "epoch": 4.7045546970313135, "grad_norm": 0.26249203085899353, "learning_rate": 1.0598116148114956e-07, "loss": 0.3081, "step": 46274 }, { "epoch": 4.704656364375762, "grad_norm": 0.2840457260608673, "learning_rate": 1.0590849303481088e-07, "loss": 0.2875, "step": 46275 }, { "epoch": 4.704758031720211, "grad_norm": 0.3067161440849304, "learning_rate": 1.0583584924374224e-07, "loss": 0.2828, "step": 46276 }, { "epoch": 4.70485969906466, "grad_norm": 0.28624075651168823, "learning_rate": 1.0576323010831002e-07, "loss": 0.2888, "step": 46277 }, { "epoch": 4.704961366409109, "grad_norm": 0.28613194823265076, "learning_rate": 1.0569063562887894e-07, "loss": 0.3175, "step": 46278 }, { "epoch": 4.705063033753558, "grad_norm": 0.2935536205768585, "learning_rate": 1.0561806580581591e-07, "loss": 0.2704, "step": 46279 }, { "epoch": 4.705164701098007, "grad_norm": 0.2527693510055542, "learning_rate": 1.0554552063948565e-07, "loss": 0.2788, "step": 46280 }, { "epoch": 4.705266368442456, "grad_norm": 0.2769346833229065, "learning_rate": 1.0547300013025397e-07, "loss": 0.3029, "step": 46281 }, { "epoch": 4.705368035786905, "grad_norm": 0.27794358134269714, "learning_rate": 1.0540050427848669e-07, "loss": 0.3273, "step": 46282 }, { "epoch": 4.705469703131354, "grad_norm": 0.2763251066207886, "learning_rate": 1.0532803308454742e-07, "loss": 0.3029, "step": 46283 }, { "epoch": 4.705571370475803, "grad_norm": 0.27320513129234314, "learning_rate": 1.0525558654880364e-07, "loss": 0.3241, "step": 46284 }, { "epoch": 4.705673037820252, "grad_norm": 0.2821560800075531, "learning_rate": 1.0518316467161838e-07, "loss": 0.2657, "step": 46285 }, { "epoch": 4.705774705164701, "grad_norm": 0.28194868564605713, "learning_rate": 1.0511076745335691e-07, "loss": 0.2961, "step": 46286 }, { "epoch": 4.70587637250915, "grad_norm": 0.2964421212673187, "learning_rate": 1.050383948943845e-07, "loss": 0.2883, "step": 46287 }, { "epoch": 4.705978039853599, "grad_norm": 0.2664334177970886, "learning_rate": 1.0496604699506585e-07, "loss": 0.2849, "step": 46288 }, { "epoch": 4.7060797071980485, "grad_norm": 0.2818925678730011, "learning_rate": 1.0489372375576457e-07, "loss": 0.3149, "step": 46289 }, { "epoch": 4.706181374542497, "grad_norm": 0.2801419496536255, "learning_rate": 1.0482142517684535e-07, "loss": 0.2805, "step": 46290 }, { "epoch": 4.706283041886946, "grad_norm": 0.2684166431427002, "learning_rate": 1.0474915125867292e-07, "loss": 0.3043, "step": 46291 }, { "epoch": 4.706384709231395, "grad_norm": 0.3207337558269501, "learning_rate": 1.0467690200161085e-07, "loss": 0.2805, "step": 46292 }, { "epoch": 4.706486376575844, "grad_norm": 0.2826577425003052, "learning_rate": 1.0460467740602276e-07, "loss": 0.2762, "step": 46293 }, { "epoch": 4.706588043920293, "grad_norm": 0.25494661927223206, "learning_rate": 1.0453247747227335e-07, "loss": 0.2806, "step": 46294 }, { "epoch": 4.706689711264742, "grad_norm": 0.28680193424224854, "learning_rate": 1.0446030220072622e-07, "loss": 0.2929, "step": 46295 }, { "epoch": 4.706791378609191, "grad_norm": 0.27329230308532715, "learning_rate": 1.0438815159174387e-07, "loss": 0.2962, "step": 46296 }, { "epoch": 4.70689304595364, "grad_norm": 0.2598118782043457, "learning_rate": 1.0431602564569154e-07, "loss": 0.2814, "step": 46297 }, { "epoch": 4.706994713298089, "grad_norm": 0.28171631693840027, "learning_rate": 1.0424392436293173e-07, "loss": 0.2947, "step": 46298 }, { "epoch": 4.707096380642538, "grad_norm": 0.2923809587955475, "learning_rate": 1.0417184774382749e-07, "loss": 0.2832, "step": 46299 }, { "epoch": 4.707198047986987, "grad_norm": 0.2870834469795227, "learning_rate": 1.040997957887413e-07, "loss": 0.2927, "step": 46300 }, { "epoch": 4.707299715331436, "grad_norm": 0.27517688274383545, "learning_rate": 1.0402776849803731e-07, "loss": 0.2923, "step": 46301 }, { "epoch": 4.707401382675885, "grad_norm": 0.28940677642822266, "learning_rate": 1.0395576587207856e-07, "loss": 0.2867, "step": 46302 }, { "epoch": 4.707503050020334, "grad_norm": 0.2797366976737976, "learning_rate": 1.0388378791122644e-07, "loss": 0.3017, "step": 46303 }, { "epoch": 4.707604717364783, "grad_norm": 0.27900102734565735, "learning_rate": 1.0381183461584454e-07, "loss": 0.2782, "step": 46304 }, { "epoch": 4.7077063847092315, "grad_norm": 0.27773740887641907, "learning_rate": 1.0373990598629536e-07, "loss": 0.3202, "step": 46305 }, { "epoch": 4.7078080520536805, "grad_norm": 0.26014798879623413, "learning_rate": 1.0366800202294025e-07, "loss": 0.2969, "step": 46306 }, { "epoch": 4.707909719398129, "grad_norm": 0.28926146030426025, "learning_rate": 1.0359612272614283e-07, "loss": 0.3026, "step": 46307 }, { "epoch": 4.708011386742578, "grad_norm": 0.3068740963935852, "learning_rate": 1.0352426809626503e-07, "loss": 0.2985, "step": 46308 }, { "epoch": 4.708113054087027, "grad_norm": 0.27421751618385315, "learning_rate": 1.0345243813366768e-07, "loss": 0.2965, "step": 46309 }, { "epoch": 4.708214721431476, "grad_norm": 0.28426748514175415, "learning_rate": 1.0338063283871325e-07, "loss": 0.3045, "step": 46310 }, { "epoch": 4.708316388775925, "grad_norm": 0.2824731171131134, "learning_rate": 1.0330885221176367e-07, "loss": 0.2931, "step": 46311 }, { "epoch": 4.708418056120374, "grad_norm": 0.2980523109436035, "learning_rate": 1.032370962531809e-07, "loss": 0.2858, "step": 46312 }, { "epoch": 4.708519723464823, "grad_norm": 0.26786008477211, "learning_rate": 1.0316536496332575e-07, "loss": 0.2849, "step": 46313 }, { "epoch": 4.708621390809272, "grad_norm": 0.27663204073905945, "learning_rate": 1.0309365834255958e-07, "loss": 0.2835, "step": 46314 }, { "epoch": 4.708723058153721, "grad_norm": 0.2846836447715759, "learning_rate": 1.0302197639124433e-07, "loss": 0.2804, "step": 46315 }, { "epoch": 4.70882472549817, "grad_norm": 0.2640911340713501, "learning_rate": 1.0295031910974029e-07, "loss": 0.3202, "step": 46316 }, { "epoch": 4.708926392842619, "grad_norm": 0.2953154444694519, "learning_rate": 1.0287868649840881e-07, "loss": 0.3088, "step": 46317 }, { "epoch": 4.709028060187068, "grad_norm": 0.28379201889038086, "learning_rate": 1.028070785576113e-07, "loss": 0.2682, "step": 46318 }, { "epoch": 4.709129727531517, "grad_norm": 0.2718556225299835, "learning_rate": 1.0273549528770743e-07, "loss": 0.2792, "step": 46319 }, { "epoch": 4.709231394875966, "grad_norm": 0.26775556802749634, "learning_rate": 1.0266393668905805e-07, "loss": 0.2949, "step": 46320 }, { "epoch": 4.709333062220415, "grad_norm": 0.2841002643108368, "learning_rate": 1.0259240276202453e-07, "loss": 0.2939, "step": 46321 }, { "epoch": 4.7094347295648635, "grad_norm": 0.307369589805603, "learning_rate": 1.0252089350696714e-07, "loss": 0.3127, "step": 46322 }, { "epoch": 4.7095363969093125, "grad_norm": 0.3029417395591736, "learning_rate": 1.0244940892424449e-07, "loss": 0.296, "step": 46323 }, { "epoch": 4.709638064253761, "grad_norm": 0.2701108455657959, "learning_rate": 1.0237794901421849e-07, "loss": 0.306, "step": 46324 }, { "epoch": 4.70973973159821, "grad_norm": 0.28293725848197937, "learning_rate": 1.0230651377724888e-07, "loss": 0.2557, "step": 46325 }, { "epoch": 4.709841398942659, "grad_norm": 0.2844219207763672, "learning_rate": 1.0223510321369478e-07, "loss": 0.295, "step": 46326 }, { "epoch": 4.709943066287108, "grad_norm": 0.27440473437309265, "learning_rate": 1.021637173239165e-07, "loss": 0.2915, "step": 46327 }, { "epoch": 4.710044733631557, "grad_norm": 0.2742899954319, "learning_rate": 1.0209235610827317e-07, "loss": 0.2844, "step": 46328 }, { "epoch": 4.710146400976006, "grad_norm": 0.2931599020957947, "learning_rate": 1.0202101956712507e-07, "loss": 0.2909, "step": 46329 }, { "epoch": 4.710248068320455, "grad_norm": 0.24398663640022278, "learning_rate": 1.0194970770083079e-07, "loss": 0.315, "step": 46330 }, { "epoch": 4.710349735664904, "grad_norm": 0.264314204454422, "learning_rate": 1.0187842050975005e-07, "loss": 0.2874, "step": 46331 }, { "epoch": 4.710451403009353, "grad_norm": 0.29759567975997925, "learning_rate": 1.0180715799424201e-07, "loss": 0.2521, "step": 46332 }, { "epoch": 4.710553070353802, "grad_norm": 0.2871790826320648, "learning_rate": 1.0173592015466527e-07, "loss": 0.321, "step": 46333 }, { "epoch": 4.710654737698251, "grad_norm": 0.2786223888397217, "learning_rate": 1.0166470699137898e-07, "loss": 0.2776, "step": 46334 }, { "epoch": 4.710756405042701, "grad_norm": 0.28347206115722656, "learning_rate": 1.015935185047423e-07, "loss": 0.31, "step": 46335 }, { "epoch": 4.71085807238715, "grad_norm": 0.2735442519187927, "learning_rate": 1.0152235469511273e-07, "loss": 0.2863, "step": 46336 }, { "epoch": 4.7109597397315985, "grad_norm": 0.25990626215934753, "learning_rate": 1.0145121556284998e-07, "loss": 0.3086, "step": 46337 }, { "epoch": 4.7110614070760475, "grad_norm": 0.2790814936161041, "learning_rate": 1.013801011083121e-07, "loss": 0.2841, "step": 46338 }, { "epoch": 4.711163074420496, "grad_norm": 0.28433406352996826, "learning_rate": 1.013090113318571e-07, "loss": 0.2974, "step": 46339 }, { "epoch": 4.711264741764945, "grad_norm": 0.2714003920555115, "learning_rate": 1.0123794623384253e-07, "loss": 0.2792, "step": 46340 }, { "epoch": 4.711366409109394, "grad_norm": 0.2855333387851715, "learning_rate": 1.0116690581462751e-07, "loss": 0.3189, "step": 46341 }, { "epoch": 4.711468076453843, "grad_norm": 0.2821866571903229, "learning_rate": 1.0109589007456954e-07, "loss": 0.2867, "step": 46342 }, { "epoch": 4.711569743798292, "grad_norm": 0.2622216045856476, "learning_rate": 1.0102489901402612e-07, "loss": 0.2728, "step": 46343 }, { "epoch": 4.711671411142741, "grad_norm": 0.2811592221260071, "learning_rate": 1.0095393263335529e-07, "loss": 0.2819, "step": 46344 }, { "epoch": 4.71177307848719, "grad_norm": 0.2935313880443573, "learning_rate": 1.0088299093291454e-07, "loss": 0.2636, "step": 46345 }, { "epoch": 4.711874745831639, "grad_norm": 0.2846433222293854, "learning_rate": 1.0081207391306081e-07, "loss": 0.2605, "step": 46346 }, { "epoch": 4.711976413176088, "grad_norm": 0.27879759669303894, "learning_rate": 1.0074118157415158e-07, "loss": 0.3412, "step": 46347 }, { "epoch": 4.712078080520537, "grad_norm": 0.2640412747859955, "learning_rate": 1.0067031391654436e-07, "loss": 0.3033, "step": 46348 }, { "epoch": 4.712179747864986, "grad_norm": 0.2843748927116394, "learning_rate": 1.0059947094059553e-07, "loss": 0.2941, "step": 46349 }, { "epoch": 4.712281415209435, "grad_norm": 0.2992466688156128, "learning_rate": 1.00528652646662e-07, "loss": 0.2862, "step": 46350 }, { "epoch": 4.712383082553884, "grad_norm": 0.27543577551841736, "learning_rate": 1.004578590351013e-07, "loss": 0.2778, "step": 46351 }, { "epoch": 4.712484749898333, "grad_norm": 0.277168333530426, "learning_rate": 1.0038709010626979e-07, "loss": 0.3138, "step": 46352 }, { "epoch": 4.712586417242782, "grad_norm": 0.29910773038864136, "learning_rate": 1.0031634586052274e-07, "loss": 0.2781, "step": 46353 }, { "epoch": 4.7126880845872305, "grad_norm": 0.290365070104599, "learning_rate": 1.0024562629821877e-07, "loss": 0.3026, "step": 46354 }, { "epoch": 4.7127897519316795, "grad_norm": 0.28623494505882263, "learning_rate": 1.0017493141971258e-07, "loss": 0.2598, "step": 46355 }, { "epoch": 4.712891419276128, "grad_norm": 0.2841827869415283, "learning_rate": 1.0010426122535999e-07, "loss": 0.2806, "step": 46356 }, { "epoch": 4.712993086620577, "grad_norm": 0.27963709831237793, "learning_rate": 1.0003361571551851e-07, "loss": 0.3224, "step": 46357 }, { "epoch": 4.713094753965026, "grad_norm": 0.29461562633514404, "learning_rate": 9.99629948905434e-08, "loss": 0.2998, "step": 46358 }, { "epoch": 4.713196421309475, "grad_norm": 0.2954307198524475, "learning_rate": 9.989239875079049e-08, "loss": 0.2937, "step": 46359 }, { "epoch": 4.713298088653924, "grad_norm": 0.2778611481189728, "learning_rate": 9.982182729661449e-08, "loss": 0.2481, "step": 46360 }, { "epoch": 4.713399755998373, "grad_norm": 0.2994440197944641, "learning_rate": 9.97512805283718e-08, "loss": 0.2847, "step": 46361 }, { "epoch": 4.713501423342822, "grad_norm": 0.2946386933326721, "learning_rate": 9.968075844641822e-08, "loss": 0.3141, "step": 46362 }, { "epoch": 4.713603090687271, "grad_norm": 0.2668743133544922, "learning_rate": 9.961026105110738e-08, "loss": 0.2851, "step": 46363 }, { "epoch": 4.71370475803172, "grad_norm": 0.26971134543418884, "learning_rate": 9.953978834279676e-08, "loss": 0.2927, "step": 46364 }, { "epoch": 4.713806425376169, "grad_norm": 0.281767874956131, "learning_rate": 9.946934032183941e-08, "loss": 0.286, "step": 46365 }, { "epoch": 4.713908092720618, "grad_norm": 0.28810423612594604, "learning_rate": 9.939891698859116e-08, "loss": 0.2997, "step": 46366 }, { "epoch": 4.714009760065067, "grad_norm": 0.26146209239959717, "learning_rate": 9.932851834340673e-08, "loss": 0.3086, "step": 46367 }, { "epoch": 4.714111427409516, "grad_norm": 0.2786553204059601, "learning_rate": 9.925814438664084e-08, "loss": 0.2828, "step": 46368 }, { "epoch": 4.714213094753965, "grad_norm": 0.2699975371360779, "learning_rate": 9.918779511864762e-08, "loss": 0.3054, "step": 46369 }, { "epoch": 4.714314762098414, "grad_norm": 0.3042367696762085, "learning_rate": 9.911747053978127e-08, "loss": 0.2878, "step": 46370 }, { "epoch": 4.714416429442863, "grad_norm": 0.2753003239631653, "learning_rate": 9.904717065039704e-08, "loss": 0.2947, "step": 46371 }, { "epoch": 4.714518096787312, "grad_norm": 0.2675032913684845, "learning_rate": 9.897689545084854e-08, "loss": 0.304, "step": 46372 }, { "epoch": 4.714619764131761, "grad_norm": 0.27040010690689087, "learning_rate": 9.890664494148883e-08, "loss": 0.3217, "step": 46373 }, { "epoch": 4.71472143147621, "grad_norm": 0.25339028239250183, "learning_rate": 9.883641912267427e-08, "loss": 0.3297, "step": 46374 }, { "epoch": 4.714823098820659, "grad_norm": 0.280689001083374, "learning_rate": 9.876621799475572e-08, "loss": 0.297, "step": 46375 }, { "epoch": 4.714924766165108, "grad_norm": 0.30257901549339294, "learning_rate": 9.869604155808898e-08, "loss": 0.3334, "step": 46376 }, { "epoch": 4.715026433509557, "grad_norm": 0.3006569743156433, "learning_rate": 9.862588981302657e-08, "loss": 0.297, "step": 46377 }, { "epoch": 4.715128100854006, "grad_norm": 0.29910972714424133, "learning_rate": 9.855576275992262e-08, "loss": 0.3081, "step": 46378 }, { "epoch": 4.715229768198455, "grad_norm": 0.31751298904418945, "learning_rate": 9.848566039912965e-08, "loss": 0.2977, "step": 46379 }, { "epoch": 4.715331435542904, "grad_norm": 0.2916110157966614, "learning_rate": 9.841558273100127e-08, "loss": 0.253, "step": 46380 }, { "epoch": 4.715433102887353, "grad_norm": 0.28621843457221985, "learning_rate": 9.83455297558905e-08, "loss": 0.2733, "step": 46381 }, { "epoch": 4.715534770231802, "grad_norm": 0.28154152631759644, "learning_rate": 9.827550147414988e-08, "loss": 0.2928, "step": 46382 }, { "epoch": 4.715636437576251, "grad_norm": 0.2965661883354187, "learning_rate": 9.820549788613243e-08, "loss": 0.3107, "step": 46383 }, { "epoch": 4.7157381049207, "grad_norm": 0.28157496452331543, "learning_rate": 9.81355189921912e-08, "loss": 0.2852, "step": 46384 }, { "epoch": 4.715839772265149, "grad_norm": 0.28198084235191345, "learning_rate": 9.806556479267815e-08, "loss": 0.3154, "step": 46385 }, { "epoch": 4.7159414396095976, "grad_norm": 0.254553884267807, "learning_rate": 9.79956352879452e-08, "loss": 0.3071, "step": 46386 }, { "epoch": 4.7160431069540465, "grad_norm": 0.2612389624118805, "learning_rate": 9.792573047834653e-08, "loss": 0.3022, "step": 46387 }, { "epoch": 4.716144774298495, "grad_norm": 0.2975260615348816, "learning_rate": 9.78558503642324e-08, "loss": 0.258, "step": 46388 }, { "epoch": 4.716246441642944, "grad_norm": 0.25763219594955444, "learning_rate": 9.778599494595586e-08, "loss": 0.3487, "step": 46389 }, { "epoch": 4.716348108987393, "grad_norm": 0.28816741704940796, "learning_rate": 9.771616422386832e-08, "loss": 0.2762, "step": 46390 }, { "epoch": 4.716449776331842, "grad_norm": 0.30318397283554077, "learning_rate": 9.764635819832169e-08, "loss": 0.2715, "step": 46391 }, { "epoch": 4.716551443676291, "grad_norm": 0.27233177423477173, "learning_rate": 9.757657686966848e-08, "loss": 0.3077, "step": 46392 }, { "epoch": 4.71665311102074, "grad_norm": 0.2871863842010498, "learning_rate": 9.750682023825841e-08, "loss": 0.265, "step": 46393 }, { "epoch": 4.716754778365189, "grad_norm": 0.2780158221721649, "learning_rate": 9.74370883044451e-08, "loss": 0.2892, "step": 46394 }, { "epoch": 4.716856445709638, "grad_norm": 0.2905362844467163, "learning_rate": 9.736738106857769e-08, "loss": 0.2728, "step": 46395 }, { "epoch": 4.716958113054087, "grad_norm": 0.280602365732193, "learning_rate": 9.729769853100868e-08, "loss": 0.3009, "step": 46396 }, { "epoch": 4.717059780398536, "grad_norm": 0.2913571000099182, "learning_rate": 9.722804069208946e-08, "loss": 0.2916, "step": 46397 }, { "epoch": 4.717161447742985, "grad_norm": 0.28007012605667114, "learning_rate": 9.715840755216921e-08, "loss": 0.2776, "step": 46398 }, { "epoch": 4.717263115087434, "grad_norm": 0.25408923625946045, "learning_rate": 9.70887991116004e-08, "loss": 0.3281, "step": 46399 }, { "epoch": 4.717364782431883, "grad_norm": 0.28960955142974854, "learning_rate": 9.701921537073333e-08, "loss": 0.3171, "step": 46400 }, { "epoch": 4.717466449776332, "grad_norm": 0.2702907919883728, "learning_rate": 9.69496563299177e-08, "loss": 0.2915, "step": 46401 }, { "epoch": 4.717568117120781, "grad_norm": 0.3092312216758728, "learning_rate": 9.688012198950491e-08, "loss": 0.28, "step": 46402 }, { "epoch": 4.7176697844652296, "grad_norm": 0.2871358096599579, "learning_rate": 9.681061234984469e-08, "loss": 0.311, "step": 46403 }, { "epoch": 4.7177714518096785, "grad_norm": 0.28144317865371704, "learning_rate": 9.674112741128838e-08, "loss": 0.2853, "step": 46404 }, { "epoch": 4.717873119154127, "grad_norm": 0.2809174358844757, "learning_rate": 9.667166717418408e-08, "loss": 0.2805, "step": 46405 }, { "epoch": 4.717974786498576, "grad_norm": 0.2851658761501312, "learning_rate": 9.66022316388826e-08, "loss": 0.301, "step": 46406 }, { "epoch": 4.718076453843025, "grad_norm": 0.2780287563800812, "learning_rate": 9.653282080573479e-08, "loss": 0.2851, "step": 46407 }, { "epoch": 4.718178121187474, "grad_norm": 0.30550989508628845, "learning_rate": 9.646343467508867e-08, "loss": 0.268, "step": 46408 }, { "epoch": 4.718279788531923, "grad_norm": 0.30153587460517883, "learning_rate": 9.639407324729455e-08, "loss": 0.2866, "step": 46409 }, { "epoch": 4.718381455876372, "grad_norm": 0.2763853073120117, "learning_rate": 9.632473652270213e-08, "loss": 0.2985, "step": 46410 }, { "epoch": 4.718483123220821, "grad_norm": 0.2795827090740204, "learning_rate": 9.625542450166003e-08, "loss": 0.3068, "step": 46411 }, { "epoch": 4.71858479056527, "grad_norm": 0.2808699309825897, "learning_rate": 9.618613718451797e-08, "loss": 0.3268, "step": 46412 }, { "epoch": 4.718686457909719, "grad_norm": 0.29614269733428955, "learning_rate": 9.611687457162456e-08, "loss": 0.3043, "step": 46413 }, { "epoch": 4.718788125254168, "grad_norm": 0.27162879705429077, "learning_rate": 9.60476366633295e-08, "loss": 0.3065, "step": 46414 }, { "epoch": 4.718889792598617, "grad_norm": 0.2975948452949524, "learning_rate": 9.597842345998087e-08, "loss": 0.2775, "step": 46415 }, { "epoch": 4.718991459943066, "grad_norm": 0.28889086842536926, "learning_rate": 9.590923496192672e-08, "loss": 0.279, "step": 46416 }, { "epoch": 4.719093127287516, "grad_norm": 0.3021214008331299, "learning_rate": 9.584007116951788e-08, "loss": 0.2935, "step": 46417 }, { "epoch": 4.719194794631965, "grad_norm": 0.2882077991962433, "learning_rate": 9.577093208310073e-08, "loss": 0.2853, "step": 46418 }, { "epoch": 4.7192964619764135, "grad_norm": 0.2740919888019562, "learning_rate": 9.570181770302445e-08, "loss": 0.2952, "step": 46419 }, { "epoch": 4.7193981293208624, "grad_norm": 0.3086903989315033, "learning_rate": 9.563272802963652e-08, "loss": 0.2813, "step": 46420 }, { "epoch": 4.719499796665311, "grad_norm": 0.27668851613998413, "learning_rate": 9.556366306328557e-08, "loss": 0.2986, "step": 46421 }, { "epoch": 4.71960146400976, "grad_norm": 0.28627339005470276, "learning_rate": 9.549462280431964e-08, "loss": 0.2905, "step": 46422 }, { "epoch": 4.719703131354209, "grad_norm": 0.2735472023487091, "learning_rate": 9.542560725308625e-08, "loss": 0.3045, "step": 46423 }, { "epoch": 4.719804798698658, "grad_norm": 0.26229241490364075, "learning_rate": 9.535661640993288e-08, "loss": 0.3126, "step": 46424 }, { "epoch": 4.719906466043107, "grad_norm": 0.2583581507205963, "learning_rate": 9.528765027520758e-08, "loss": 0.284, "step": 46425 }, { "epoch": 4.720008133387556, "grad_norm": 0.2853699028491974, "learning_rate": 9.521870884925677e-08, "loss": 0.3188, "step": 46426 }, { "epoch": 4.720109800732005, "grad_norm": 0.32209134101867676, "learning_rate": 9.514979213243014e-08, "loss": 0.3132, "step": 46427 }, { "epoch": 4.720211468076454, "grad_norm": 0.30967235565185547, "learning_rate": 9.508090012507188e-08, "loss": 0.3103, "step": 46428 }, { "epoch": 4.720313135420903, "grad_norm": 0.2768440544605255, "learning_rate": 9.501203282753113e-08, "loss": 0.3072, "step": 46429 }, { "epoch": 4.720414802765352, "grad_norm": 0.2634330689907074, "learning_rate": 9.494319024015431e-08, "loss": 0.3102, "step": 46430 }, { "epoch": 4.720516470109801, "grad_norm": 0.26912790536880493, "learning_rate": 9.487437236328778e-08, "loss": 0.3007, "step": 46431 }, { "epoch": 4.72061813745425, "grad_norm": 0.26392537355422974, "learning_rate": 9.48055791972785e-08, "loss": 0.2911, "step": 46432 }, { "epoch": 4.720719804798699, "grad_norm": 0.29094502329826355, "learning_rate": 9.473681074247342e-08, "loss": 0.2844, "step": 46433 }, { "epoch": 4.720821472143148, "grad_norm": 0.28787437081336975, "learning_rate": 9.466806699921838e-08, "loss": 0.3189, "step": 46434 }, { "epoch": 4.720923139487597, "grad_norm": 0.28526344895362854, "learning_rate": 9.459934796785919e-08, "loss": 0.2948, "step": 46435 }, { "epoch": 4.7210248068320455, "grad_norm": 0.2885552942752838, "learning_rate": 9.453065364874337e-08, "loss": 0.3422, "step": 46436 }, { "epoch": 4.7211264741764944, "grad_norm": 0.303777277469635, "learning_rate": 9.446198404221674e-08, "loss": 0.3092, "step": 46437 }, { "epoch": 4.721228141520943, "grad_norm": 0.2748124301433563, "learning_rate": 9.439333914862459e-08, "loss": 0.3037, "step": 46438 }, { "epoch": 4.721329808865392, "grad_norm": 0.2877165675163269, "learning_rate": 9.43247189683133e-08, "loss": 0.2753, "step": 46439 }, { "epoch": 4.721431476209841, "grad_norm": 0.28096896409988403, "learning_rate": 9.425612350162816e-08, "loss": 0.2949, "step": 46440 }, { "epoch": 4.72153314355429, "grad_norm": 0.28474244475364685, "learning_rate": 9.418755274891445e-08, "loss": 0.2715, "step": 46441 }, { "epoch": 4.721634810898739, "grad_norm": 0.2944866120815277, "learning_rate": 9.411900671051855e-08, "loss": 0.3235, "step": 46442 }, { "epoch": 4.721736478243188, "grad_norm": 0.2688578963279724, "learning_rate": 9.405048538678518e-08, "loss": 0.2942, "step": 46443 }, { "epoch": 4.721838145587637, "grad_norm": 0.2801900804042816, "learning_rate": 9.398198877806019e-08, "loss": 0.2871, "step": 46444 }, { "epoch": 4.721939812932086, "grad_norm": 0.27979546785354614, "learning_rate": 9.39135168846872e-08, "loss": 0.2711, "step": 46445 }, { "epoch": 4.722041480276535, "grad_norm": 0.270469605922699, "learning_rate": 9.384506970701201e-08, "loss": 0.2953, "step": 46446 }, { "epoch": 4.722143147620984, "grad_norm": 0.27463066577911377, "learning_rate": 9.377664724537994e-08, "loss": 0.3043, "step": 46447 }, { "epoch": 4.722244814965433, "grad_norm": 0.2919563949108124, "learning_rate": 9.370824950013513e-08, "loss": 0.2833, "step": 46448 }, { "epoch": 4.722346482309882, "grad_norm": 0.27493661642074585, "learning_rate": 9.363987647162232e-08, "loss": 0.2798, "step": 46449 }, { "epoch": 4.722448149654331, "grad_norm": 0.28041210770606995, "learning_rate": 9.357152816018566e-08, "loss": 0.2958, "step": 46450 }, { "epoch": 4.72254981699878, "grad_norm": 0.2641626000404358, "learning_rate": 9.35032045661699e-08, "loss": 0.273, "step": 46451 }, { "epoch": 4.722651484343229, "grad_norm": 0.2673676609992981, "learning_rate": 9.343490568991865e-08, "loss": 0.2885, "step": 46452 }, { "epoch": 4.722753151687678, "grad_norm": 0.2678074240684509, "learning_rate": 9.336663153177716e-08, "loss": 0.3127, "step": 46453 }, { "epoch": 4.722854819032127, "grad_norm": 0.2679806649684906, "learning_rate": 9.329838209208797e-08, "loss": 0.28, "step": 46454 }, { "epoch": 4.722956486376576, "grad_norm": 0.2656482458114624, "learning_rate": 9.323015737119523e-08, "loss": 0.3222, "step": 46455 }, { "epoch": 4.723058153721025, "grad_norm": 0.2708015739917755, "learning_rate": 9.316195736944366e-08, "loss": 0.2898, "step": 46456 }, { "epoch": 4.723159821065474, "grad_norm": 0.285395085811615, "learning_rate": 9.309378208717579e-08, "loss": 0.2746, "step": 46457 }, { "epoch": 4.723261488409923, "grad_norm": 0.26477283239364624, "learning_rate": 9.30256315247352e-08, "loss": 0.3346, "step": 46458 }, { "epoch": 4.723363155754372, "grad_norm": 0.3221692144870758, "learning_rate": 9.295750568246553e-08, "loss": 0.2572, "step": 46459 }, { "epoch": 4.723464823098821, "grad_norm": 0.2920585870742798, "learning_rate": 9.288940456071039e-08, "loss": 0.3048, "step": 46460 }, { "epoch": 4.72356649044327, "grad_norm": 0.29912957549095154, "learning_rate": 9.282132815981171e-08, "loss": 0.2939, "step": 46461 }, { "epoch": 4.723668157787719, "grad_norm": 0.2735096216201782, "learning_rate": 9.275327648011312e-08, "loss": 0.3154, "step": 46462 }, { "epoch": 4.723769825132168, "grad_norm": 0.27520740032196045, "learning_rate": 9.268524952195768e-08, "loss": 0.3297, "step": 46463 }, { "epoch": 4.723871492476617, "grad_norm": 0.27143552899360657, "learning_rate": 9.261724728568732e-08, "loss": 0.3165, "step": 46464 }, { "epoch": 4.723973159821066, "grad_norm": 0.30350497364997864, "learning_rate": 9.254926977164514e-08, "loss": 0.3278, "step": 46465 }, { "epoch": 4.724074827165515, "grad_norm": 0.29120779037475586, "learning_rate": 9.248131698017415e-08, "loss": 0.2836, "step": 46466 }, { "epoch": 4.724176494509964, "grad_norm": 0.28583183884620667, "learning_rate": 9.241338891161578e-08, "loss": 0.2846, "step": 46467 }, { "epoch": 4.7242781618544125, "grad_norm": 0.27832114696502686, "learning_rate": 9.234548556631196e-08, "loss": 0.277, "step": 46468 }, { "epoch": 4.7243798291988615, "grad_norm": 0.26652202010154724, "learning_rate": 9.227760694460574e-08, "loss": 0.3163, "step": 46469 }, { "epoch": 4.72448149654331, "grad_norm": 0.27541598677635193, "learning_rate": 9.22097530468391e-08, "loss": 0.2993, "step": 46470 }, { "epoch": 4.724583163887759, "grad_norm": 0.31417417526245117, "learning_rate": 9.214192387335341e-08, "loss": 0.2988, "step": 46471 }, { "epoch": 4.724684831232208, "grad_norm": 0.2823871374130249, "learning_rate": 9.207411942448952e-08, "loss": 0.3027, "step": 46472 }, { "epoch": 4.724786498576657, "grad_norm": 0.29340144991874695, "learning_rate": 9.200633970059048e-08, "loss": 0.2875, "step": 46473 }, { "epoch": 4.724888165921106, "grad_norm": 0.28648826479911804, "learning_rate": 9.193858470199768e-08, "loss": 0.2953, "step": 46474 }, { "epoch": 4.724989833265555, "grad_norm": 0.28494107723236084, "learning_rate": 9.187085442905086e-08, "loss": 0.323, "step": 46475 }, { "epoch": 4.725091500610004, "grad_norm": 0.31982678174972534, "learning_rate": 9.180314888209307e-08, "loss": 0.2679, "step": 46476 }, { "epoch": 4.725193167954453, "grad_norm": 0.2988986074924469, "learning_rate": 9.173546806146516e-08, "loss": 0.3178, "step": 46477 }, { "epoch": 4.725294835298902, "grad_norm": 0.2786608040332794, "learning_rate": 9.16678119675063e-08, "loss": 0.2683, "step": 46478 }, { "epoch": 4.725396502643351, "grad_norm": 0.2875276207923889, "learning_rate": 9.160018060055953e-08, "loss": 0.3079, "step": 46479 }, { "epoch": 4.7254981699878, "grad_norm": 0.2520638406276703, "learning_rate": 9.15325739609646e-08, "loss": 0.3166, "step": 46480 }, { "epoch": 4.725599837332249, "grad_norm": 0.2890220582485199, "learning_rate": 9.146499204906178e-08, "loss": 0.3147, "step": 46481 }, { "epoch": 4.725701504676698, "grad_norm": 0.3015466034412384, "learning_rate": 9.139743486519192e-08, "loss": 0.2956, "step": 46482 }, { "epoch": 4.725803172021147, "grad_norm": 0.27320924401283264, "learning_rate": 9.13299024096953e-08, "loss": 0.3247, "step": 46483 }, { "epoch": 4.725904839365596, "grad_norm": 0.2803398370742798, "learning_rate": 9.126239468291276e-08, "loss": 0.3152, "step": 46484 }, { "epoch": 4.7260065067100445, "grad_norm": 0.3076308071613312, "learning_rate": 9.119491168518291e-08, "loss": 0.2921, "step": 46485 }, { "epoch": 4.7261081740544935, "grad_norm": 0.28208208084106445, "learning_rate": 9.112745341684714e-08, "loss": 0.3029, "step": 46486 }, { "epoch": 4.726209841398942, "grad_norm": 0.29701387882232666, "learning_rate": 9.106001987824464e-08, "loss": 0.3069, "step": 46487 }, { "epoch": 4.726311508743391, "grad_norm": 0.26768189668655396, "learning_rate": 9.099261106971457e-08, "loss": 0.2733, "step": 46488 }, { "epoch": 4.72641317608784, "grad_norm": 0.2891438901424408, "learning_rate": 9.092522699159778e-08, "loss": 0.2763, "step": 46489 }, { "epoch": 4.726514843432289, "grad_norm": 0.2746737599372864, "learning_rate": 9.085786764423343e-08, "loss": 0.3116, "step": 46490 }, { "epoch": 4.726616510776738, "grad_norm": 0.2837389409542084, "learning_rate": 9.079053302796014e-08, "loss": 0.2846, "step": 46491 }, { "epoch": 4.726718178121187, "grad_norm": 0.28979358077049255, "learning_rate": 9.07232231431171e-08, "loss": 0.3028, "step": 46492 }, { "epoch": 4.726819845465636, "grad_norm": 0.29493075609207153, "learning_rate": 9.065593799004457e-08, "loss": 0.2953, "step": 46493 }, { "epoch": 4.726921512810085, "grad_norm": 0.25305119156837463, "learning_rate": 9.058867756908063e-08, "loss": 0.2845, "step": 46494 }, { "epoch": 4.727023180154534, "grad_norm": 0.2946644723415375, "learning_rate": 9.052144188056389e-08, "loss": 0.2706, "step": 46495 }, { "epoch": 4.727124847498983, "grad_norm": 0.2641868591308594, "learning_rate": 9.045423092483407e-08, "loss": 0.2903, "step": 46496 }, { "epoch": 4.727226514843432, "grad_norm": 0.29192453622817993, "learning_rate": 9.03870447022287e-08, "loss": 0.3252, "step": 46497 }, { "epoch": 4.727328182187881, "grad_norm": 0.2881162762641907, "learning_rate": 9.031988321308637e-08, "loss": 0.2863, "step": 46498 }, { "epoch": 4.727429849532331, "grad_norm": 0.2572900652885437, "learning_rate": 9.025274645774685e-08, "loss": 0.2611, "step": 46499 }, { "epoch": 4.7275315168767795, "grad_norm": 0.2674015760421753, "learning_rate": 9.01856344365465e-08, "loss": 0.3073, "step": 46500 }, { "epoch": 4.7276331842212285, "grad_norm": 0.2737286388874054, "learning_rate": 9.011854714982448e-08, "loss": 0.2727, "step": 46501 }, { "epoch": 4.727734851565677, "grad_norm": 0.26096612215042114, "learning_rate": 9.005148459791779e-08, "loss": 0.2797, "step": 46502 }, { "epoch": 4.727836518910126, "grad_norm": 0.2877943217754364, "learning_rate": 8.998444678116558e-08, "loss": 0.2913, "step": 46503 }, { "epoch": 4.727938186254575, "grad_norm": 0.31712573766708374, "learning_rate": 8.99174336999048e-08, "loss": 0.2763, "step": 46504 }, { "epoch": 4.728039853599024, "grad_norm": 0.27978846430778503, "learning_rate": 8.985044535447297e-08, "loss": 0.3129, "step": 46505 }, { "epoch": 4.728141520943473, "grad_norm": 0.28416988253593445, "learning_rate": 8.978348174520813e-08, "loss": 0.2968, "step": 46506 }, { "epoch": 4.728243188287922, "grad_norm": 0.29142263531684875, "learning_rate": 8.971654287244669e-08, "loss": 0.2983, "step": 46507 }, { "epoch": 4.728344855632371, "grad_norm": 0.2760723829269409, "learning_rate": 8.964962873652671e-08, "loss": 0.3172, "step": 46508 }, { "epoch": 4.72844652297682, "grad_norm": 0.32216203212738037, "learning_rate": 8.958273933778517e-08, "loss": 0.2949, "step": 46509 }, { "epoch": 4.728548190321269, "grad_norm": 0.27981865406036377, "learning_rate": 8.951587467655898e-08, "loss": 0.2772, "step": 46510 }, { "epoch": 4.728649857665718, "grad_norm": 0.2556176781654358, "learning_rate": 8.944903475318511e-08, "loss": 0.3014, "step": 46511 }, { "epoch": 4.728751525010167, "grad_norm": 0.3038473129272461, "learning_rate": 8.938221956799942e-08, "loss": 0.2761, "step": 46512 }, { "epoch": 4.728853192354616, "grad_norm": 0.2832239866256714, "learning_rate": 8.931542912133939e-08, "loss": 0.2878, "step": 46513 }, { "epoch": 4.728954859699065, "grad_norm": 0.26756808161735535, "learning_rate": 8.924866341354143e-08, "loss": 0.3007, "step": 46514 }, { "epoch": 4.729056527043514, "grad_norm": 0.29067865014076233, "learning_rate": 8.91819224449414e-08, "loss": 0.2787, "step": 46515 }, { "epoch": 4.729158194387963, "grad_norm": 0.26635923981666565, "learning_rate": 8.911520621587677e-08, "loss": 0.3016, "step": 46516 }, { "epoch": 4.7292598617324115, "grad_norm": 0.30896255373954773, "learning_rate": 8.904851472668174e-08, "loss": 0.2884, "step": 46517 }, { "epoch": 4.7293615290768605, "grad_norm": 0.2856440842151642, "learning_rate": 8.898184797769327e-08, "loss": 0.2813, "step": 46518 }, { "epoch": 4.729463196421309, "grad_norm": 0.2821061313152313, "learning_rate": 8.891520596924775e-08, "loss": 0.3105, "step": 46519 }, { "epoch": 4.729564863765758, "grad_norm": 0.2778902053833008, "learning_rate": 8.884858870168044e-08, "loss": 0.2842, "step": 46520 }, { "epoch": 4.729666531110207, "grad_norm": 0.28842487931251526, "learning_rate": 8.878199617532668e-08, "loss": 0.3259, "step": 46521 }, { "epoch": 4.729768198454656, "grad_norm": 0.29508674144744873, "learning_rate": 8.871542839052172e-08, "loss": 0.3015, "step": 46522 }, { "epoch": 4.729869865799105, "grad_norm": 0.2670769989490509, "learning_rate": 8.86488853476014e-08, "loss": 0.2935, "step": 46523 }, { "epoch": 4.729971533143554, "grad_norm": 0.27910223603248596, "learning_rate": 8.858236704690159e-08, "loss": 0.2922, "step": 46524 }, { "epoch": 4.730073200488003, "grad_norm": 0.2918611764907837, "learning_rate": 8.85158734887559e-08, "loss": 0.3, "step": 46525 }, { "epoch": 4.730174867832452, "grad_norm": 0.2993384301662445, "learning_rate": 8.844940467350072e-08, "loss": 0.3112, "step": 46526 }, { "epoch": 4.730276535176901, "grad_norm": 0.2840975821018219, "learning_rate": 8.838296060146967e-08, "loss": 0.282, "step": 46527 }, { "epoch": 4.73037820252135, "grad_norm": 0.2712380588054657, "learning_rate": 8.831654127299804e-08, "loss": 0.2681, "step": 46528 }, { "epoch": 4.730479869865799, "grad_norm": 0.2821711301803589, "learning_rate": 8.825014668842169e-08, "loss": 0.2978, "step": 46529 }, { "epoch": 4.730581537210248, "grad_norm": 0.29537415504455566, "learning_rate": 8.818377684807256e-08, "loss": 0.2877, "step": 46530 }, { "epoch": 4.730683204554697, "grad_norm": 0.29152101278305054, "learning_rate": 8.811743175228704e-08, "loss": 0.2896, "step": 46531 }, { "epoch": 4.730784871899146, "grad_norm": 0.3098548650741577, "learning_rate": 8.805111140139821e-08, "loss": 0.2605, "step": 46532 }, { "epoch": 4.730886539243595, "grad_norm": 0.2699110805988312, "learning_rate": 8.798481579574081e-08, "loss": 0.2945, "step": 46533 }, { "epoch": 4.7309882065880435, "grad_norm": 0.2850075364112854, "learning_rate": 8.7918544935649e-08, "loss": 0.3045, "step": 46534 }, { "epoch": 4.731089873932493, "grad_norm": 0.32320454716682434, "learning_rate": 8.78522988214553e-08, "loss": 0.2785, "step": 46535 }, { "epoch": 4.731191541276942, "grad_norm": 0.2852734327316284, "learning_rate": 8.778607745349554e-08, "loss": 0.2566, "step": 46536 }, { "epoch": 4.731293208621391, "grad_norm": 0.2865271270275116, "learning_rate": 8.77198808321017e-08, "loss": 0.3241, "step": 46537 }, { "epoch": 4.73139487596584, "grad_norm": 0.2987556457519531, "learning_rate": 8.765370895760739e-08, "loss": 0.2981, "step": 46538 }, { "epoch": 4.731496543310289, "grad_norm": 0.3004026412963867, "learning_rate": 8.758756183034734e-08, "loss": 0.2755, "step": 46539 }, { "epoch": 4.731598210654738, "grad_norm": 0.3027381896972656, "learning_rate": 8.752143945065295e-08, "loss": 0.2845, "step": 46540 }, { "epoch": 4.731699877999187, "grad_norm": 0.28845837712287903, "learning_rate": 8.74553418188584e-08, "loss": 0.3045, "step": 46541 }, { "epoch": 4.731801545343636, "grad_norm": 0.2825733721256256, "learning_rate": 8.73892689352962e-08, "loss": 0.2688, "step": 46542 }, { "epoch": 4.731903212688085, "grad_norm": 0.26648828387260437, "learning_rate": 8.732322080029942e-08, "loss": 0.2911, "step": 46543 }, { "epoch": 4.732004880032534, "grad_norm": 0.2883225977420807, "learning_rate": 8.725719741420113e-08, "loss": 0.319, "step": 46544 }, { "epoch": 4.732106547376983, "grad_norm": 0.2689424753189087, "learning_rate": 8.719119877733329e-08, "loss": 0.2776, "step": 46545 }, { "epoch": 4.732208214721432, "grad_norm": 0.2749840319156647, "learning_rate": 8.71252248900295e-08, "loss": 0.2896, "step": 46546 }, { "epoch": 4.732309882065881, "grad_norm": 0.28177347779273987, "learning_rate": 8.705927575262063e-08, "loss": 0.2778, "step": 46547 }, { "epoch": 4.73241154941033, "grad_norm": 0.29228225350379944, "learning_rate": 8.699335136543973e-08, "loss": 0.3115, "step": 46548 }, { "epoch": 4.7325132167547785, "grad_norm": 0.29285895824432373, "learning_rate": 8.692745172881934e-08, "loss": 0.3012, "step": 46549 }, { "epoch": 4.7326148840992275, "grad_norm": 0.29555079340934753, "learning_rate": 8.686157684309027e-08, "loss": 0.3268, "step": 46550 }, { "epoch": 4.732716551443676, "grad_norm": 0.26936209201812744, "learning_rate": 8.67957267085856e-08, "loss": 0.2937, "step": 46551 }, { "epoch": 4.732818218788125, "grad_norm": 0.29261085391044617, "learning_rate": 8.672990132563619e-08, "loss": 0.3017, "step": 46552 }, { "epoch": 4.732919886132574, "grad_norm": 0.2700605094432831, "learning_rate": 8.666410069457343e-08, "loss": 0.281, "step": 46553 }, { "epoch": 4.733021553477023, "grad_norm": 0.26683205366134644, "learning_rate": 8.659832481573038e-08, "loss": 0.3063, "step": 46554 }, { "epoch": 4.733123220821472, "grad_norm": 0.27740341424942017, "learning_rate": 8.653257368943624e-08, "loss": 0.3138, "step": 46555 }, { "epoch": 4.733224888165921, "grad_norm": 0.2759908437728882, "learning_rate": 8.646684731602462e-08, "loss": 0.2753, "step": 46556 }, { "epoch": 4.73332655551037, "grad_norm": 0.2594982385635376, "learning_rate": 8.640114569582469e-08, "loss": 0.3324, "step": 46557 }, { "epoch": 4.733428222854819, "grad_norm": 0.2752442955970764, "learning_rate": 8.633546882916787e-08, "loss": 0.2791, "step": 46558 }, { "epoch": 4.733529890199268, "grad_norm": 0.29102396965026855, "learning_rate": 8.626981671638612e-08, "loss": 0.309, "step": 46559 }, { "epoch": 4.733631557543717, "grad_norm": 0.3523459732532501, "learning_rate": 8.620418935780861e-08, "loss": 0.2977, "step": 46560 }, { "epoch": 4.733733224888166, "grad_norm": 0.27925345301628113, "learning_rate": 8.61385867537673e-08, "loss": 0.3071, "step": 46561 }, { "epoch": 4.733834892232615, "grad_norm": 0.2649409770965576, "learning_rate": 8.607300890459191e-08, "loss": 0.3053, "step": 46562 }, { "epoch": 4.733936559577064, "grad_norm": 0.2651221752166748, "learning_rate": 8.60074558106122e-08, "loss": 0.3028, "step": 46563 }, { "epoch": 4.734038226921513, "grad_norm": 0.28361913561820984, "learning_rate": 8.594192747216012e-08, "loss": 0.3089, "step": 46564 }, { "epoch": 4.734139894265962, "grad_norm": 0.2620839476585388, "learning_rate": 8.587642388956429e-08, "loss": 0.288, "step": 46565 }, { "epoch": 4.7342415616104105, "grad_norm": 0.2667556703090668, "learning_rate": 8.581094506315557e-08, "loss": 0.3132, "step": 46566 }, { "epoch": 4.7343432289548595, "grad_norm": 0.29184287786483765, "learning_rate": 8.574549099326312e-08, "loss": 0.285, "step": 46567 }, { "epoch": 4.734444896299308, "grad_norm": 0.2880285680294037, "learning_rate": 8.568006168021725e-08, "loss": 0.3028, "step": 46568 }, { "epoch": 4.734546563643757, "grad_norm": 0.2720635235309601, "learning_rate": 8.561465712434824e-08, "loss": 0.3062, "step": 46569 }, { "epoch": 4.734648230988206, "grad_norm": 0.2774834930896759, "learning_rate": 8.554927732598361e-08, "loss": 0.289, "step": 46570 }, { "epoch": 4.734749898332655, "grad_norm": 0.2942889928817749, "learning_rate": 8.548392228545477e-08, "loss": 0.293, "step": 46571 }, { "epoch": 4.734851565677104, "grad_norm": 0.28846073150634766, "learning_rate": 8.541859200308922e-08, "loss": 0.2771, "step": 46572 }, { "epoch": 4.734953233021553, "grad_norm": 0.27034851908683777, "learning_rate": 8.535328647921726e-08, "loss": 0.2765, "step": 46573 }, { "epoch": 4.735054900366002, "grad_norm": 0.28706759214401245, "learning_rate": 8.528800571416751e-08, "loss": 0.3056, "step": 46574 }, { "epoch": 4.735156567710451, "grad_norm": 0.28983354568481445, "learning_rate": 8.522274970826916e-08, "loss": 0.3091, "step": 46575 }, { "epoch": 4.7352582350549, "grad_norm": 0.2775925397872925, "learning_rate": 8.515751846185083e-08, "loss": 0.32, "step": 46576 }, { "epoch": 4.735359902399349, "grad_norm": 0.2905478775501251, "learning_rate": 8.509231197524003e-08, "loss": 0.2927, "step": 46577 }, { "epoch": 4.735461569743798, "grad_norm": 0.2874801754951477, "learning_rate": 8.502713024876652e-08, "loss": 0.2753, "step": 46578 }, { "epoch": 4.735563237088247, "grad_norm": 0.27607688307762146, "learning_rate": 8.496197328275946e-08, "loss": 0.3195, "step": 46579 }, { "epoch": 4.735664904432696, "grad_norm": 0.28613772988319397, "learning_rate": 8.48968410775447e-08, "loss": 0.2825, "step": 46580 }, { "epoch": 4.7357665717771456, "grad_norm": 0.28114452958106995, "learning_rate": 8.483173363345199e-08, "loss": 0.2836, "step": 46581 }, { "epoch": 4.7358682391215945, "grad_norm": 0.274473637342453, "learning_rate": 8.476665095080938e-08, "loss": 0.2803, "step": 46582 }, { "epoch": 4.735969906466043, "grad_norm": 0.2906663119792938, "learning_rate": 8.470159302994385e-08, "loss": 0.3341, "step": 46583 }, { "epoch": 4.736071573810492, "grad_norm": 0.26164713501930237, "learning_rate": 8.463655987118402e-08, "loss": 0.2978, "step": 46584 }, { "epoch": 4.736173241154941, "grad_norm": 0.2741057276725769, "learning_rate": 8.457155147485684e-08, "loss": 0.3034, "step": 46585 }, { "epoch": 4.73627490849939, "grad_norm": 0.28925690054893494, "learning_rate": 8.450656784129042e-08, "loss": 0.2945, "step": 46586 }, { "epoch": 4.736376575843839, "grad_norm": 0.30194053053855896, "learning_rate": 8.44416089708111e-08, "loss": 0.3294, "step": 46587 }, { "epoch": 4.736478243188288, "grad_norm": 0.2833715081214905, "learning_rate": 8.437667486374756e-08, "loss": 0.3099, "step": 46588 }, { "epoch": 4.736579910532737, "grad_norm": 0.2656334936618805, "learning_rate": 8.431176552042619e-08, "loss": 0.2818, "step": 46589 }, { "epoch": 4.736681577877186, "grad_norm": 0.2920805811882019, "learning_rate": 8.42468809411734e-08, "loss": 0.3077, "step": 46590 }, { "epoch": 4.736783245221635, "grad_norm": 0.28490614891052246, "learning_rate": 8.418202112631723e-08, "loss": 0.2826, "step": 46591 }, { "epoch": 4.736884912566084, "grad_norm": 0.27218392491340637, "learning_rate": 8.411718607618357e-08, "loss": 0.2913, "step": 46592 }, { "epoch": 4.736986579910533, "grad_norm": 0.2635231018066406, "learning_rate": 8.405237579109937e-08, "loss": 0.2783, "step": 46593 }, { "epoch": 4.737088247254982, "grad_norm": 0.2674503028392792, "learning_rate": 8.398759027139103e-08, "loss": 0.2651, "step": 46594 }, { "epoch": 4.737189914599431, "grad_norm": 0.2677685618400574, "learning_rate": 8.39228295173855e-08, "loss": 0.2794, "step": 46595 }, { "epoch": 4.73729158194388, "grad_norm": 0.2782028615474701, "learning_rate": 8.38580935294081e-08, "loss": 0.3013, "step": 46596 }, { "epoch": 4.737393249288329, "grad_norm": 0.26887160539627075, "learning_rate": 8.379338230778522e-08, "loss": 0.3005, "step": 46597 }, { "epoch": 4.7374949166327776, "grad_norm": 0.2896449863910675, "learning_rate": 8.372869585284326e-08, "loss": 0.301, "step": 46598 }, { "epoch": 4.7375965839772265, "grad_norm": 0.2602350115776062, "learning_rate": 8.366403416490754e-08, "loss": 0.3124, "step": 46599 }, { "epoch": 4.737698251321675, "grad_norm": 0.270893931388855, "learning_rate": 8.359939724430444e-08, "loss": 0.2927, "step": 46600 }, { "epoch": 4.737799918666124, "grad_norm": 0.27166271209716797, "learning_rate": 8.353478509135925e-08, "loss": 0.2836, "step": 46601 }, { "epoch": 4.737901586010573, "grad_norm": 0.2759769558906555, "learning_rate": 8.34701977063973e-08, "loss": 0.273, "step": 46602 }, { "epoch": 4.738003253355022, "grad_norm": 0.2813847064971924, "learning_rate": 8.340563508974387e-08, "loss": 0.2533, "step": 46603 }, { "epoch": 4.738104920699471, "grad_norm": 0.2712889015674591, "learning_rate": 8.334109724172535e-08, "loss": 0.3383, "step": 46604 }, { "epoch": 4.73820658804392, "grad_norm": 0.27392783761024475, "learning_rate": 8.327658416266537e-08, "loss": 0.2912, "step": 46605 }, { "epoch": 4.738308255388369, "grad_norm": 0.28298842906951904, "learning_rate": 8.32120958528898e-08, "loss": 0.2834, "step": 46606 }, { "epoch": 4.738409922732818, "grad_norm": 0.3160184323787689, "learning_rate": 8.314763231272283e-08, "loss": 0.3085, "step": 46607 }, { "epoch": 4.738511590077267, "grad_norm": 0.2768670618534088, "learning_rate": 8.308319354249029e-08, "loss": 0.2794, "step": 46608 }, { "epoch": 4.738613257421716, "grad_norm": 0.2727770209312439, "learning_rate": 8.30187795425158e-08, "loss": 0.294, "step": 46609 }, { "epoch": 4.738714924766165, "grad_norm": 0.3353477120399475, "learning_rate": 8.295439031312413e-08, "loss": 0.2907, "step": 46610 }, { "epoch": 4.738816592110614, "grad_norm": 0.27556008100509644, "learning_rate": 8.289002585464057e-08, "loss": 0.2904, "step": 46611 }, { "epoch": 4.738918259455063, "grad_norm": 0.26654303073883057, "learning_rate": 8.282568616738762e-08, "loss": 0.2886, "step": 46612 }, { "epoch": 4.739019926799512, "grad_norm": 0.274641215801239, "learning_rate": 8.276137125169059e-08, "loss": 0.3093, "step": 46613 }, { "epoch": 4.739121594143961, "grad_norm": 0.2563823163509369, "learning_rate": 8.269708110787366e-08, "loss": 0.2901, "step": 46614 }, { "epoch": 4.7392232614884096, "grad_norm": 0.28926169872283936, "learning_rate": 8.263281573625992e-08, "loss": 0.336, "step": 46615 }, { "epoch": 4.739324928832859, "grad_norm": 0.2805754840373993, "learning_rate": 8.256857513717408e-08, "loss": 0.2932, "step": 46616 }, { "epoch": 4.739426596177308, "grad_norm": 0.28316405415534973, "learning_rate": 8.25043593109387e-08, "loss": 0.2879, "step": 46617 }, { "epoch": 4.739528263521757, "grad_norm": 0.2577113211154938, "learning_rate": 8.244016825787793e-08, "loss": 0.3163, "step": 46618 }, { "epoch": 4.739629930866206, "grad_norm": 0.2707253694534302, "learning_rate": 8.237600197831486e-08, "loss": 0.3006, "step": 46619 }, { "epoch": 4.739731598210655, "grad_norm": 0.25912612676620483, "learning_rate": 8.231186047257256e-08, "loss": 0.2854, "step": 46620 }, { "epoch": 4.739833265555104, "grad_norm": 0.2543841302394867, "learning_rate": 8.224774374097521e-08, "loss": 0.2844, "step": 46621 }, { "epoch": 4.739934932899553, "grad_norm": 0.2577327489852905, "learning_rate": 8.21836517838448e-08, "loss": 0.2874, "step": 46622 }, { "epoch": 4.740036600244002, "grad_norm": 0.2758856415748596, "learning_rate": 8.211958460150382e-08, "loss": 0.2886, "step": 46623 }, { "epoch": 4.740138267588451, "grad_norm": 0.286644846200943, "learning_rate": 8.205554219427647e-08, "loss": 0.3014, "step": 46624 }, { "epoch": 4.7402399349329, "grad_norm": 0.27820539474487305, "learning_rate": 8.199152456248471e-08, "loss": 0.2921, "step": 46625 }, { "epoch": 4.740341602277349, "grad_norm": 0.28790199756622314, "learning_rate": 8.192753170645107e-08, "loss": 0.2736, "step": 46626 }, { "epoch": 4.740443269621798, "grad_norm": 0.2570241391658783, "learning_rate": 8.186356362649695e-08, "loss": 0.2791, "step": 46627 }, { "epoch": 4.740544936966247, "grad_norm": 0.30738961696624756, "learning_rate": 8.179962032294598e-08, "loss": 0.307, "step": 46628 }, { "epoch": 4.740646604310696, "grad_norm": 0.27319014072418213, "learning_rate": 8.173570179612011e-08, "loss": 0.2917, "step": 46629 }, { "epoch": 4.740748271655145, "grad_norm": 0.28071483969688416, "learning_rate": 8.167180804634023e-08, "loss": 0.2857, "step": 46630 }, { "epoch": 4.7408499389995935, "grad_norm": 0.28043460845947266, "learning_rate": 8.160793907392994e-08, "loss": 0.2869, "step": 46631 }, { "epoch": 4.7409516063440424, "grad_norm": 0.27896931767463684, "learning_rate": 8.154409487920956e-08, "loss": 0.3127, "step": 46632 }, { "epoch": 4.741053273688491, "grad_norm": 0.303093284368515, "learning_rate": 8.148027546250104e-08, "loss": 0.2833, "step": 46633 }, { "epoch": 4.74115494103294, "grad_norm": 0.27844566106796265, "learning_rate": 8.141648082412634e-08, "loss": 0.3278, "step": 46634 }, { "epoch": 4.741256608377389, "grad_norm": 0.26783987879753113, "learning_rate": 8.135271096440634e-08, "loss": 0.3199, "step": 46635 }, { "epoch": 4.741358275721838, "grad_norm": 0.2918637990951538, "learning_rate": 8.128896588366298e-08, "loss": 0.3207, "step": 46636 }, { "epoch": 4.741459943066287, "grad_norm": 0.29182204604148865, "learning_rate": 8.122524558221656e-08, "loss": 0.2991, "step": 46637 }, { "epoch": 4.741561610410736, "grad_norm": 0.28239139914512634, "learning_rate": 8.11615500603885e-08, "loss": 0.2735, "step": 46638 }, { "epoch": 4.741663277755185, "grad_norm": 0.2717958986759186, "learning_rate": 8.109787931850022e-08, "loss": 0.303, "step": 46639 }, { "epoch": 4.741764945099634, "grad_norm": 0.2757539451122284, "learning_rate": 8.103423335687088e-08, "loss": 0.2982, "step": 46640 }, { "epoch": 4.741866612444083, "grad_norm": 0.29899346828460693, "learning_rate": 8.097061217582302e-08, "loss": 0.2827, "step": 46641 }, { "epoch": 4.741968279788532, "grad_norm": 0.29045408964157104, "learning_rate": 8.090701577567583e-08, "loss": 0.2635, "step": 46642 }, { "epoch": 4.742069947132981, "grad_norm": 0.28497597575187683, "learning_rate": 8.084344415675016e-08, "loss": 0.2887, "step": 46643 }, { "epoch": 4.74217161447743, "grad_norm": 0.2932117283344269, "learning_rate": 8.07798973193663e-08, "loss": 0.3036, "step": 46644 }, { "epoch": 4.742273281821879, "grad_norm": 0.2692692279815674, "learning_rate": 8.071637526384457e-08, "loss": 0.3104, "step": 46645 }, { "epoch": 4.742374949166328, "grad_norm": 0.25752565264701843, "learning_rate": 8.06528779905047e-08, "loss": 0.2839, "step": 46646 }, { "epoch": 4.742476616510777, "grad_norm": 0.26337289810180664, "learning_rate": 8.058940549966587e-08, "loss": 0.3131, "step": 46647 }, { "epoch": 4.7425782838552255, "grad_norm": 0.27822667360305786, "learning_rate": 8.052595779164952e-08, "loss": 0.2896, "step": 46648 }, { "epoch": 4.7426799511996744, "grad_norm": 0.27750203013420105, "learning_rate": 8.046253486677424e-08, "loss": 0.2938, "step": 46649 }, { "epoch": 4.742781618544123, "grad_norm": 0.28445979952812195, "learning_rate": 8.039913672535927e-08, "loss": 0.2637, "step": 46650 }, { "epoch": 4.742883285888572, "grad_norm": 0.2682991027832031, "learning_rate": 8.03357633677243e-08, "loss": 0.2982, "step": 46651 }, { "epoch": 4.742984953233021, "grad_norm": 0.29294949769973755, "learning_rate": 8.027241479418913e-08, "loss": 0.3141, "step": 46652 }, { "epoch": 4.74308662057747, "grad_norm": 0.27454259991645813, "learning_rate": 8.020909100507235e-08, "loss": 0.3149, "step": 46653 }, { "epoch": 4.743188287921919, "grad_norm": 0.3031185567378998, "learning_rate": 8.014579200069262e-08, "loss": 0.2815, "step": 46654 }, { "epoch": 4.743289955266368, "grad_norm": 0.280060350894928, "learning_rate": 8.008251778136967e-08, "loss": 0.3042, "step": 46655 }, { "epoch": 4.743391622610817, "grad_norm": 0.2719351053237915, "learning_rate": 8.001926834742158e-08, "loss": 0.3058, "step": 46656 }, { "epoch": 4.743493289955266, "grad_norm": 0.28881773352622986, "learning_rate": 7.995604369916699e-08, "loss": 0.3145, "step": 46657 }, { "epoch": 4.743594957299715, "grad_norm": 0.250151664018631, "learning_rate": 7.989284383692508e-08, "loss": 0.2668, "step": 46658 }, { "epoch": 4.743696624644164, "grad_norm": 0.278502881526947, "learning_rate": 7.982966876101394e-08, "loss": 0.2844, "step": 46659 }, { "epoch": 4.743798291988613, "grad_norm": 0.2629604637622833, "learning_rate": 7.976651847175165e-08, "loss": 0.2842, "step": 46660 }, { "epoch": 4.743899959333062, "grad_norm": 0.3049876391887665, "learning_rate": 7.970339296945629e-08, "loss": 0.2798, "step": 46661 }, { "epoch": 4.744001626677511, "grad_norm": 0.30596521496772766, "learning_rate": 7.964029225444592e-08, "loss": 0.2857, "step": 46662 }, { "epoch": 4.7441032940219605, "grad_norm": 0.25585174560546875, "learning_rate": 7.957721632703863e-08, "loss": 0.3108, "step": 46663 }, { "epoch": 4.7442049613664095, "grad_norm": 0.2547169625759125, "learning_rate": 7.951416518755195e-08, "loss": 0.3416, "step": 46664 }, { "epoch": 4.744306628710858, "grad_norm": 0.29748064279556274, "learning_rate": 7.945113883630395e-08, "loss": 0.2709, "step": 46665 }, { "epoch": 4.744408296055307, "grad_norm": 0.2694002389907837, "learning_rate": 7.93881372736116e-08, "loss": 0.257, "step": 46666 }, { "epoch": 4.744509963399756, "grad_norm": 0.2687888741493225, "learning_rate": 7.932516049979189e-08, "loss": 0.296, "step": 46667 }, { "epoch": 4.744611630744205, "grad_norm": 0.28203585743904114, "learning_rate": 7.926220851516397e-08, "loss": 0.2638, "step": 46668 }, { "epoch": 4.744713298088654, "grad_norm": 0.2741110622882843, "learning_rate": 7.919928132004262e-08, "loss": 0.2918, "step": 46669 }, { "epoch": 4.744814965433103, "grad_norm": 0.26323768496513367, "learning_rate": 7.913637891474591e-08, "loss": 0.2822, "step": 46670 }, { "epoch": 4.744916632777552, "grad_norm": 0.2708108723163605, "learning_rate": 7.907350129959135e-08, "loss": 0.2863, "step": 46671 }, { "epoch": 4.745018300122001, "grad_norm": 0.2697800397872925, "learning_rate": 7.901064847489537e-08, "loss": 0.2889, "step": 46672 }, { "epoch": 4.74511996746645, "grad_norm": 0.27434468269348145, "learning_rate": 7.894782044097382e-08, "loss": 0.2848, "step": 46673 }, { "epoch": 4.745221634810899, "grad_norm": 0.2969861328601837, "learning_rate": 7.888501719814313e-08, "loss": 0.3037, "step": 46674 }, { "epoch": 4.745323302155348, "grad_norm": 0.27297234535217285, "learning_rate": 7.882223874672134e-08, "loss": 0.2754, "step": 46675 }, { "epoch": 4.745424969499797, "grad_norm": 0.2868692874908447, "learning_rate": 7.875948508702325e-08, "loss": 0.3118, "step": 46676 }, { "epoch": 4.745526636844246, "grad_norm": 0.2688533663749695, "learning_rate": 7.869675621936524e-08, "loss": 0.2891, "step": 46677 }, { "epoch": 4.745628304188695, "grad_norm": 0.2644820213317871, "learning_rate": 7.863405214406373e-08, "loss": 0.3082, "step": 46678 }, { "epoch": 4.745729971533144, "grad_norm": 0.2617127597332001, "learning_rate": 7.857137286143401e-08, "loss": 0.3131, "step": 46679 }, { "epoch": 4.7458316388775925, "grad_norm": 0.28240588307380676, "learning_rate": 7.850871837179253e-08, "loss": 0.3351, "step": 46680 }, { "epoch": 4.7459333062220415, "grad_norm": 0.25940555334091187, "learning_rate": 7.844608867545511e-08, "loss": 0.2897, "step": 46681 }, { "epoch": 4.74603497356649, "grad_norm": 0.2830246090888977, "learning_rate": 7.838348377273597e-08, "loss": 0.2859, "step": 46682 }, { "epoch": 4.746136640910939, "grad_norm": 0.3010043799877167, "learning_rate": 7.832090366395151e-08, "loss": 0.3082, "step": 46683 }, { "epoch": 4.746238308255388, "grad_norm": 0.2542809545993805, "learning_rate": 7.825834834941647e-08, "loss": 0.2844, "step": 46684 }, { "epoch": 4.746339975599837, "grad_norm": 0.2791934609413147, "learning_rate": 7.819581782944674e-08, "loss": 0.3265, "step": 46685 }, { "epoch": 4.746441642944286, "grad_norm": 0.26264604926109314, "learning_rate": 7.813331210435648e-08, "loss": 0.2915, "step": 46686 }, { "epoch": 4.746543310288735, "grad_norm": 0.2879740595817566, "learning_rate": 7.807083117446101e-08, "loss": 0.2997, "step": 46687 }, { "epoch": 4.746644977633184, "grad_norm": 0.2950350046157837, "learning_rate": 7.800837504007563e-08, "loss": 0.2868, "step": 46688 }, { "epoch": 4.746746644977633, "grad_norm": 0.2875155210494995, "learning_rate": 7.794594370151398e-08, "loss": 0.289, "step": 46689 }, { "epoch": 4.746848312322082, "grad_norm": 0.2707135081291199, "learning_rate": 7.78835371590908e-08, "loss": 0.2926, "step": 46690 }, { "epoch": 4.746949979666531, "grad_norm": 0.2840736508369446, "learning_rate": 7.78211554131214e-08, "loss": 0.2794, "step": 46691 }, { "epoch": 4.74705164701098, "grad_norm": 0.2517412006855011, "learning_rate": 7.775879846391831e-08, "loss": 0.2803, "step": 46692 }, { "epoch": 4.747153314355429, "grad_norm": 0.2963449954986572, "learning_rate": 7.769646631179739e-08, "loss": 0.2989, "step": 46693 }, { "epoch": 4.747254981699878, "grad_norm": 0.2662122845649719, "learning_rate": 7.763415895707171e-08, "loss": 0.2946, "step": 46694 }, { "epoch": 4.747356649044327, "grad_norm": 0.2685483694076538, "learning_rate": 7.757187640005548e-08, "loss": 0.3231, "step": 46695 }, { "epoch": 4.747458316388776, "grad_norm": 0.2917764484882355, "learning_rate": 7.750961864106287e-08, "loss": 0.2704, "step": 46696 }, { "epoch": 4.7475599837332245, "grad_norm": 0.2729932963848114, "learning_rate": 7.744738568040644e-08, "loss": 0.2917, "step": 46697 }, { "epoch": 4.747661651077674, "grad_norm": 0.261589914560318, "learning_rate": 7.738517751840091e-08, "loss": 0.2959, "step": 46698 }, { "epoch": 4.747763318422123, "grad_norm": 0.2630032002925873, "learning_rate": 7.732299415535827e-08, "loss": 0.3075, "step": 46699 }, { "epoch": 4.747864985766572, "grad_norm": 0.28569769859313965, "learning_rate": 7.72608355915927e-08, "loss": 0.2986, "step": 46700 }, { "epoch": 4.747966653111021, "grad_norm": 0.26671960949897766, "learning_rate": 7.719870182741784e-08, "loss": 0.2622, "step": 46701 }, { "epoch": 4.74806832045547, "grad_norm": 0.2784954905509949, "learning_rate": 7.713659286314512e-08, "loss": 0.3092, "step": 46702 }, { "epoch": 4.748169987799919, "grad_norm": 0.2844352424144745, "learning_rate": 7.707450869908928e-08, "loss": 0.288, "step": 46703 }, { "epoch": 4.748271655144368, "grad_norm": 0.3064221143722534, "learning_rate": 7.701244933556174e-08, "loss": 0.3209, "step": 46704 }, { "epoch": 4.748373322488817, "grad_norm": 0.2897581160068512, "learning_rate": 7.695041477287557e-08, "loss": 0.2673, "step": 46705 }, { "epoch": 4.748474989833266, "grad_norm": 0.28051820397377014, "learning_rate": 7.688840501134387e-08, "loss": 0.2822, "step": 46706 }, { "epoch": 4.748576657177715, "grad_norm": 0.26800045371055603, "learning_rate": 7.68264200512775e-08, "loss": 0.3064, "step": 46707 }, { "epoch": 4.748678324522164, "grad_norm": 0.2947732210159302, "learning_rate": 7.676445989299063e-08, "loss": 0.3156, "step": 46708 }, { "epoch": 4.748779991866613, "grad_norm": 0.31306931376457214, "learning_rate": 7.670252453679416e-08, "loss": 0.2781, "step": 46709 }, { "epoch": 4.748881659211062, "grad_norm": 0.27880793809890747, "learning_rate": 7.664061398300004e-08, "loss": 0.3067, "step": 46710 }, { "epoch": 4.748983326555511, "grad_norm": 0.29219332337379456, "learning_rate": 7.657872823192136e-08, "loss": 0.345, "step": 46711 }, { "epoch": 4.7490849938999595, "grad_norm": 0.2674776017665863, "learning_rate": 7.651686728386842e-08, "loss": 0.2735, "step": 46712 }, { "epoch": 4.7491866612444085, "grad_norm": 0.2877078056335449, "learning_rate": 7.645503113915376e-08, "loss": 0.2978, "step": 46713 }, { "epoch": 4.749288328588857, "grad_norm": 0.264710932970047, "learning_rate": 7.639321979808822e-08, "loss": 0.3163, "step": 46714 }, { "epoch": 4.749389995933306, "grad_norm": 0.28575509786605835, "learning_rate": 7.63314332609838e-08, "loss": 0.3063, "step": 46715 }, { "epoch": 4.749491663277755, "grad_norm": 0.29382240772247314, "learning_rate": 7.62696715281519e-08, "loss": 0.3117, "step": 46716 }, { "epoch": 4.749593330622204, "grad_norm": 0.27453678846359253, "learning_rate": 7.620793459990283e-08, "loss": 0.2745, "step": 46717 }, { "epoch": 4.749694997966653, "grad_norm": 0.3029615879058838, "learning_rate": 7.614622247654912e-08, "loss": 0.2778, "step": 46718 }, { "epoch": 4.749796665311102, "grad_norm": 0.26423898339271545, "learning_rate": 7.608453515839943e-08, "loss": 0.272, "step": 46719 }, { "epoch": 4.749898332655551, "grad_norm": 0.2838267385959625, "learning_rate": 7.602287264576625e-08, "loss": 0.2944, "step": 46720 }, { "epoch": 4.75, "grad_norm": 0.31278011202812195, "learning_rate": 7.59612349389599e-08, "loss": 0.2927, "step": 46721 }, { "epoch": 4.750101667344449, "grad_norm": 0.2719923257827759, "learning_rate": 7.589962203829015e-08, "loss": 0.3005, "step": 46722 }, { "epoch": 4.750203334688898, "grad_norm": 0.25925788283348083, "learning_rate": 7.583803394406897e-08, "loss": 0.2772, "step": 46723 }, { "epoch": 4.750305002033347, "grad_norm": 0.2751096785068512, "learning_rate": 7.577647065660499e-08, "loss": 0.2869, "step": 46724 }, { "epoch": 4.750406669377796, "grad_norm": 0.26458367705345154, "learning_rate": 7.571493217620851e-08, "loss": 0.308, "step": 46725 }, { "epoch": 4.750508336722245, "grad_norm": 0.2744530439376831, "learning_rate": 7.565341850319096e-08, "loss": 0.285, "step": 46726 }, { "epoch": 4.750610004066694, "grad_norm": 0.27142801880836487, "learning_rate": 7.559192963786044e-08, "loss": 0.2757, "step": 46727 }, { "epoch": 4.750711671411143, "grad_norm": 0.26949742436408997, "learning_rate": 7.553046558052835e-08, "loss": 0.318, "step": 46728 }, { "epoch": 4.7508133387555915, "grad_norm": 0.2849726378917694, "learning_rate": 7.546902633150277e-08, "loss": 0.2789, "step": 46729 }, { "epoch": 4.7509150061000405, "grad_norm": 0.2704632878303528, "learning_rate": 7.540761189109403e-08, "loss": 0.3138, "step": 46730 }, { "epoch": 4.751016673444489, "grad_norm": 0.2753089368343353, "learning_rate": 7.534622225961241e-08, "loss": 0.2622, "step": 46731 }, { "epoch": 4.751118340788938, "grad_norm": 0.2677212357521057, "learning_rate": 7.528485743736546e-08, "loss": 0.3043, "step": 46732 }, { "epoch": 4.751220008133387, "grad_norm": 0.27693772315979004, "learning_rate": 7.52235174246635e-08, "loss": 0.3034, "step": 46733 }, { "epoch": 4.751321675477836, "grad_norm": 0.2862841784954071, "learning_rate": 7.516220222181458e-08, "loss": 0.2907, "step": 46734 }, { "epoch": 4.751423342822285, "grad_norm": 0.2875089645385742, "learning_rate": 7.510091182912848e-08, "loss": 0.3221, "step": 46735 }, { "epoch": 4.751525010166734, "grad_norm": 0.29174885153770447, "learning_rate": 7.503964624691384e-08, "loss": 0.2985, "step": 46736 }, { "epoch": 4.751626677511183, "grad_norm": 0.27486950159072876, "learning_rate": 7.497840547547874e-08, "loss": 0.3008, "step": 46737 }, { "epoch": 4.751728344855632, "grad_norm": 0.2772981524467468, "learning_rate": 7.491718951513238e-08, "loss": 0.3357, "step": 46738 }, { "epoch": 4.751830012200081, "grad_norm": 0.28050437569618225, "learning_rate": 7.485599836618285e-08, "loss": 0.3128, "step": 46739 }, { "epoch": 4.75193167954453, "grad_norm": 0.2679762542247772, "learning_rate": 7.479483202893822e-08, "loss": 0.2578, "step": 46740 }, { "epoch": 4.752033346888979, "grad_norm": 0.2831774652004242, "learning_rate": 7.473369050370716e-08, "loss": 0.2951, "step": 46741 }, { "epoch": 4.752135014233428, "grad_norm": 0.29899460077285767, "learning_rate": 7.467257379079663e-08, "loss": 0.2849, "step": 46742 }, { "epoch": 4.752236681577877, "grad_norm": 0.2567060589790344, "learning_rate": 7.461148189051581e-08, "loss": 0.272, "step": 46743 }, { "epoch": 4.752338348922326, "grad_norm": 0.2781612277030945, "learning_rate": 7.455041480317171e-08, "loss": 0.3161, "step": 46744 }, { "epoch": 4.7524400162667755, "grad_norm": 0.2809852957725525, "learning_rate": 7.44893725290724e-08, "loss": 0.2826, "step": 46745 }, { "epoch": 4.752541683611224, "grad_norm": 0.31260913610458374, "learning_rate": 7.442835506852486e-08, "loss": 0.3038, "step": 46746 }, { "epoch": 4.752643350955673, "grad_norm": 0.2656826078891754, "learning_rate": 7.436736242183717e-08, "loss": 0.2749, "step": 46747 }, { "epoch": 4.752745018300122, "grad_norm": 0.2607738971710205, "learning_rate": 7.430639458931577e-08, "loss": 0.3156, "step": 46748 }, { "epoch": 4.752846685644571, "grad_norm": 0.26691287755966187, "learning_rate": 7.424545157126817e-08, "loss": 0.3003, "step": 46749 }, { "epoch": 4.75294835298902, "grad_norm": 0.2792312204837799, "learning_rate": 7.418453336800191e-08, "loss": 0.2867, "step": 46750 }, { "epoch": 4.753050020333469, "grad_norm": 0.279751181602478, "learning_rate": 7.412363997982285e-08, "loss": 0.2791, "step": 46751 }, { "epoch": 4.753151687677918, "grad_norm": 0.28813526034355164, "learning_rate": 7.406277140703854e-08, "loss": 0.3013, "step": 46752 }, { "epoch": 4.753253355022367, "grad_norm": 0.2624925971031189, "learning_rate": 7.40019276499554e-08, "loss": 0.2654, "step": 46753 }, { "epoch": 4.753355022366816, "grad_norm": 0.264940083026886, "learning_rate": 7.394110870888039e-08, "loss": 0.2962, "step": 46754 }, { "epoch": 4.753456689711265, "grad_norm": 0.2967442572116852, "learning_rate": 7.388031458411882e-08, "loss": 0.2649, "step": 46755 }, { "epoch": 4.753558357055714, "grad_norm": 0.28278669714927673, "learning_rate": 7.381954527597824e-08, "loss": 0.2586, "step": 46756 }, { "epoch": 4.753660024400163, "grad_norm": 0.2716185450553894, "learning_rate": 7.375880078476394e-08, "loss": 0.3088, "step": 46757 }, { "epoch": 4.753761691744612, "grad_norm": 0.2743043005466461, "learning_rate": 7.36980811107818e-08, "loss": 0.2519, "step": 46758 }, { "epoch": 4.753863359089061, "grad_norm": 0.2644409239292145, "learning_rate": 7.363738625433825e-08, "loss": 0.3337, "step": 46759 }, { "epoch": 4.75396502643351, "grad_norm": 0.2691097855567932, "learning_rate": 7.357671621573859e-08, "loss": 0.2597, "step": 46760 }, { "epoch": 4.7540666937779585, "grad_norm": 0.2586157023906708, "learning_rate": 7.351607099528924e-08, "loss": 0.2827, "step": 46761 }, { "epoch": 4.7541683611224075, "grad_norm": 0.26647546887397766, "learning_rate": 7.345545059329495e-08, "loss": 0.3074, "step": 46762 }, { "epoch": 4.754270028466856, "grad_norm": 0.3217555284500122, "learning_rate": 7.33948550100616e-08, "loss": 0.2755, "step": 46763 }, { "epoch": 4.754371695811305, "grad_norm": 0.27938106656074524, "learning_rate": 7.333428424589395e-08, "loss": 0.3002, "step": 46764 }, { "epoch": 4.754473363155754, "grad_norm": 0.29746782779693604, "learning_rate": 7.327373830109729e-08, "loss": 0.3285, "step": 46765 }, { "epoch": 4.754575030500203, "grad_norm": 0.2811341881752014, "learning_rate": 7.321321717597696e-08, "loss": 0.2696, "step": 46766 }, { "epoch": 4.754676697844652, "grad_norm": 0.2901397943496704, "learning_rate": 7.315272087083769e-08, "loss": 0.2605, "step": 46767 }, { "epoch": 4.754778365189101, "grad_norm": 0.28715741634368896, "learning_rate": 7.309224938598481e-08, "loss": 0.3222, "step": 46768 }, { "epoch": 4.75488003253355, "grad_norm": 0.28345435857772827, "learning_rate": 7.30318027217214e-08, "loss": 0.329, "step": 46769 }, { "epoch": 4.754981699877999, "grad_norm": 0.2805061936378479, "learning_rate": 7.297138087835331e-08, "loss": 0.2942, "step": 46770 }, { "epoch": 4.755083367222448, "grad_norm": 0.2830849289894104, "learning_rate": 7.291098385618478e-08, "loss": 0.3405, "step": 46771 }, { "epoch": 4.755185034566897, "grad_norm": 0.26925182342529297, "learning_rate": 7.285061165551943e-08, "loss": 0.2932, "step": 46772 }, { "epoch": 4.755286701911346, "grad_norm": 0.31426388025283813, "learning_rate": 7.279026427666203e-08, "loss": 0.2916, "step": 46773 }, { "epoch": 4.755388369255795, "grad_norm": 0.2559455633163452, "learning_rate": 7.272994171991621e-08, "loss": 0.3086, "step": 46774 }, { "epoch": 4.755490036600244, "grad_norm": 0.28415462374687195, "learning_rate": 7.266964398558618e-08, "loss": 0.2934, "step": 46775 }, { "epoch": 4.755591703944693, "grad_norm": 0.30680596828460693, "learning_rate": 7.260937107397559e-08, "loss": 0.2723, "step": 46776 }, { "epoch": 4.755693371289142, "grad_norm": 0.3006821274757385, "learning_rate": 7.254912298538808e-08, "loss": 0.3039, "step": 46777 }, { "epoch": 4.7557950386335905, "grad_norm": 0.28019919991493225, "learning_rate": 7.24888997201273e-08, "loss": 0.2585, "step": 46778 }, { "epoch": 4.7558967059780395, "grad_norm": 0.3232698142528534, "learning_rate": 7.242870127849633e-08, "loss": 0.2765, "step": 46779 }, { "epoch": 4.755998373322489, "grad_norm": 0.27809590101242065, "learning_rate": 7.236852766079883e-08, "loss": 0.2783, "step": 46780 }, { "epoch": 4.756100040666938, "grad_norm": 0.2778492569923401, "learning_rate": 7.230837886733733e-08, "loss": 0.3055, "step": 46781 }, { "epoch": 4.756201708011387, "grad_norm": 0.2732914984226227, "learning_rate": 7.224825489841547e-08, "loss": 0.2999, "step": 46782 }, { "epoch": 4.756303375355836, "grad_norm": 0.2937031090259552, "learning_rate": 7.21881557543358e-08, "loss": 0.2526, "step": 46783 }, { "epoch": 4.756405042700285, "grad_norm": 0.3054676353931427, "learning_rate": 7.21280814354014e-08, "loss": 0.2823, "step": 46784 }, { "epoch": 4.756506710044734, "grad_norm": 0.27672335505485535, "learning_rate": 7.20680319419137e-08, "loss": 0.2737, "step": 46785 }, { "epoch": 4.756608377389183, "grad_norm": 0.26661717891693115, "learning_rate": 7.200800727417745e-08, "loss": 0.3227, "step": 46786 }, { "epoch": 4.756710044733632, "grad_norm": 0.2909509837627411, "learning_rate": 7.194800743249297e-08, "loss": 0.3082, "step": 46787 }, { "epoch": 4.756811712078081, "grad_norm": 0.29539933800697327, "learning_rate": 7.188803241716391e-08, "loss": 0.297, "step": 46788 }, { "epoch": 4.75691337942253, "grad_norm": 0.272944837808609, "learning_rate": 7.18280822284917e-08, "loss": 0.344, "step": 46789 }, { "epoch": 4.757015046766979, "grad_norm": 0.28569820523262024, "learning_rate": 7.17681568667783e-08, "loss": 0.2738, "step": 46790 }, { "epoch": 4.757116714111428, "grad_norm": 0.28548189997673035, "learning_rate": 7.17082563323257e-08, "loss": 0.2766, "step": 46791 }, { "epoch": 4.757218381455877, "grad_norm": 0.2723522186279297, "learning_rate": 7.164838062543589e-08, "loss": 0.2921, "step": 46792 }, { "epoch": 4.7573200488003256, "grad_norm": 0.28374195098876953, "learning_rate": 7.158852974641084e-08, "loss": 0.2735, "step": 46793 }, { "epoch": 4.7574217161447745, "grad_norm": 0.2888926565647125, "learning_rate": 7.152870369555087e-08, "loss": 0.2833, "step": 46794 }, { "epoch": 4.757523383489223, "grad_norm": 0.28667929768562317, "learning_rate": 7.146890247315853e-08, "loss": 0.2984, "step": 46795 }, { "epoch": 4.757625050833672, "grad_norm": 0.29625335335731506, "learning_rate": 7.140912607953465e-08, "loss": 0.2935, "step": 46796 }, { "epoch": 4.757726718178121, "grad_norm": 0.2704218029975891, "learning_rate": 7.134937451498014e-08, "loss": 0.2865, "step": 46797 }, { "epoch": 4.75782838552257, "grad_norm": 0.2676985561847687, "learning_rate": 7.128964777979641e-08, "loss": 0.2876, "step": 46798 }, { "epoch": 4.757930052867019, "grad_norm": 0.2958052158355713, "learning_rate": 7.122994587428434e-08, "loss": 0.2886, "step": 46799 }, { "epoch": 4.758031720211468, "grad_norm": 0.2959177494049072, "learning_rate": 7.117026879874422e-08, "loss": 0.2648, "step": 46800 }, { "epoch": 4.758133387555917, "grad_norm": 0.29261359572410583, "learning_rate": 7.111061655347751e-08, "loss": 0.282, "step": 46801 }, { "epoch": 4.758235054900366, "grad_norm": 0.2856605350971222, "learning_rate": 7.105098913878339e-08, "loss": 0.2939, "step": 46802 }, { "epoch": 4.758336722244815, "grad_norm": 0.2819719612598419, "learning_rate": 7.099138655496385e-08, "loss": 0.3252, "step": 46803 }, { "epoch": 4.758438389589264, "grad_norm": 0.27868354320526123, "learning_rate": 7.093180880231864e-08, "loss": 0.3179, "step": 46804 }, { "epoch": 4.758540056933713, "grad_norm": 0.2979907989501953, "learning_rate": 7.087225588114643e-08, "loss": 0.3046, "step": 46805 }, { "epoch": 4.758641724278162, "grad_norm": 0.2854750156402588, "learning_rate": 7.081272779174974e-08, "loss": 0.2944, "step": 46806 }, { "epoch": 4.758743391622611, "grad_norm": 0.2745932936668396, "learning_rate": 7.075322453442668e-08, "loss": 0.3099, "step": 46807 }, { "epoch": 4.75884505896706, "grad_norm": 0.28230366110801697, "learning_rate": 7.069374610947754e-08, "loss": 0.3196, "step": 46808 }, { "epoch": 4.758946726311509, "grad_norm": 0.2721899449825287, "learning_rate": 7.063429251720156e-08, "loss": 0.3398, "step": 46809 }, { "epoch": 4.7590483936559576, "grad_norm": 0.30581632256507874, "learning_rate": 7.057486375789901e-08, "loss": 0.2725, "step": 46810 }, { "epoch": 4.7591500610004065, "grad_norm": 0.2970275282859802, "learning_rate": 7.051545983186859e-08, "loss": 0.2778, "step": 46811 }, { "epoch": 4.759251728344855, "grad_norm": 0.3026626706123352, "learning_rate": 7.045608073941002e-08, "loss": 0.2887, "step": 46812 }, { "epoch": 4.759353395689304, "grad_norm": 0.3195025324821472, "learning_rate": 7.039672648082252e-08, "loss": 0.2807, "step": 46813 }, { "epoch": 4.759455063033753, "grad_norm": 0.26962125301361084, "learning_rate": 7.033739705640475e-08, "loss": 0.311, "step": 46814 }, { "epoch": 4.759556730378202, "grad_norm": 0.27053287625312805, "learning_rate": 7.027809246645535e-08, "loss": 0.2988, "step": 46815 }, { "epoch": 4.759658397722651, "grad_norm": 0.24990831315517426, "learning_rate": 7.02188127112735e-08, "loss": 0.2862, "step": 46816 }, { "epoch": 4.7597600650671, "grad_norm": 0.25828251242637634, "learning_rate": 7.015955779115791e-08, "loss": 0.2891, "step": 46817 }, { "epoch": 4.759861732411549, "grad_norm": 0.28077569603919983, "learning_rate": 7.010032770640718e-08, "loss": 0.307, "step": 46818 }, { "epoch": 4.759963399755998, "grad_norm": 0.2536120116710663, "learning_rate": 7.004112245731942e-08, "loss": 0.2728, "step": 46819 }, { "epoch": 4.760065067100447, "grad_norm": 0.2658136785030365, "learning_rate": 6.998194204419273e-08, "loss": 0.2676, "step": 46820 }, { "epoch": 4.760166734444896, "grad_norm": 0.2648244798183441, "learning_rate": 6.992278646732576e-08, "loss": 0.3196, "step": 46821 }, { "epoch": 4.760268401789345, "grad_norm": 0.2648044526576996, "learning_rate": 6.986365572701547e-08, "loss": 0.2912, "step": 46822 }, { "epoch": 4.760370069133794, "grad_norm": 0.27000632882118225, "learning_rate": 6.980454982356111e-08, "loss": 0.2901, "step": 46823 }, { "epoch": 4.760471736478243, "grad_norm": 0.2626950442790985, "learning_rate": 6.974546875726018e-08, "loss": 0.2747, "step": 46824 }, { "epoch": 4.760573403822692, "grad_norm": 0.26317688822746277, "learning_rate": 6.968641252840913e-08, "loss": 0.3217, "step": 46825 }, { "epoch": 4.760675071167141, "grad_norm": 0.2759222984313965, "learning_rate": 6.962738113730716e-08, "loss": 0.302, "step": 46826 }, { "epoch": 4.7607767385115904, "grad_norm": 0.27360495924949646, "learning_rate": 6.956837458425015e-08, "loss": 0.3147, "step": 46827 }, { "epoch": 4.760878405856039, "grad_norm": 0.2884540259838104, "learning_rate": 6.950939286953673e-08, "loss": 0.2947, "step": 46828 }, { "epoch": 4.760980073200488, "grad_norm": 0.2902117967605591, "learning_rate": 6.945043599346224e-08, "loss": 0.3459, "step": 46829 }, { "epoch": 4.761081740544937, "grad_norm": 0.25316816568374634, "learning_rate": 6.939150395632587e-08, "loss": 0.3276, "step": 46830 }, { "epoch": 4.761183407889386, "grad_norm": 0.2884944677352905, "learning_rate": 6.933259675842297e-08, "loss": 0.311, "step": 46831 }, { "epoch": 4.761285075233835, "grad_norm": 0.28173935413360596, "learning_rate": 6.927371440005048e-08, "loss": 0.2961, "step": 46832 }, { "epoch": 4.761386742578284, "grad_norm": 0.2550159990787506, "learning_rate": 6.921485688150598e-08, "loss": 0.2558, "step": 46833 }, { "epoch": 4.761488409922733, "grad_norm": 0.2831001579761505, "learning_rate": 6.91560242030842e-08, "loss": 0.3046, "step": 46834 }, { "epoch": 4.761590077267182, "grad_norm": 0.27820050716400146, "learning_rate": 6.909721636508326e-08, "loss": 0.2905, "step": 46835 }, { "epoch": 4.761691744611631, "grad_norm": 0.272846519947052, "learning_rate": 6.903843336779903e-08, "loss": 0.308, "step": 46836 }, { "epoch": 4.76179341195608, "grad_norm": 0.2773008346557617, "learning_rate": 6.897967521152738e-08, "loss": 0.2647, "step": 46837 }, { "epoch": 4.761895079300529, "grad_norm": 0.2849881052970886, "learning_rate": 6.892094189656417e-08, "loss": 0.2702, "step": 46838 }, { "epoch": 4.761996746644978, "grad_norm": 0.29248932003974915, "learning_rate": 6.886223342320475e-08, "loss": 0.3036, "step": 46839 }, { "epoch": 4.762098413989427, "grad_norm": 0.2916114628314972, "learning_rate": 6.880354979174663e-08, "loss": 0.2938, "step": 46840 }, { "epoch": 4.762200081333876, "grad_norm": 0.28021061420440674, "learning_rate": 6.874489100248404e-08, "loss": 0.2763, "step": 46841 }, { "epoch": 4.762301748678325, "grad_norm": 0.303789883852005, "learning_rate": 6.86862570557123e-08, "loss": 0.2988, "step": 46842 }, { "epoch": 4.7624034160227735, "grad_norm": 0.28731632232666016, "learning_rate": 6.862764795172838e-08, "loss": 0.2883, "step": 46843 }, { "epoch": 4.7625050833672224, "grad_norm": 0.29530319571495056, "learning_rate": 6.856906369082594e-08, "loss": 0.2981, "step": 46844 }, { "epoch": 4.762606750711671, "grad_norm": 0.2650565207004547, "learning_rate": 6.85105042733003e-08, "loss": 0.3058, "step": 46845 }, { "epoch": 4.76270841805612, "grad_norm": 0.3003265857696533, "learning_rate": 6.845196969944735e-08, "loss": 0.3239, "step": 46846 }, { "epoch": 4.762810085400569, "grad_norm": 0.2733294665813446, "learning_rate": 6.839345996956126e-08, "loss": 0.2874, "step": 46847 }, { "epoch": 4.762911752745018, "grad_norm": 0.28093597292900085, "learning_rate": 6.833497508393738e-08, "loss": 0.2838, "step": 46848 }, { "epoch": 4.763013420089467, "grad_norm": 0.2708435356616974, "learning_rate": 6.827651504286937e-08, "loss": 0.308, "step": 46849 }, { "epoch": 4.763115087433916, "grad_norm": 0.3149579167366028, "learning_rate": 6.821807984665251e-08, "loss": 0.283, "step": 46850 }, { "epoch": 4.763216754778365, "grad_norm": 0.2795669436454773, "learning_rate": 6.815966949558106e-08, "loss": 0.2923, "step": 46851 }, { "epoch": 4.763318422122814, "grad_norm": 0.2684871256351471, "learning_rate": 6.810128398994864e-08, "loss": 0.2807, "step": 46852 }, { "epoch": 4.763420089467263, "grad_norm": 0.29122576117515564, "learning_rate": 6.804292333005113e-08, "loss": 0.2881, "step": 46853 }, { "epoch": 4.763521756811712, "grad_norm": 0.2823287546634674, "learning_rate": 6.798458751618054e-08, "loss": 0.2948, "step": 46854 }, { "epoch": 4.763623424156161, "grad_norm": 0.2766699194908142, "learning_rate": 6.792627654863215e-08, "loss": 0.3271, "step": 46855 }, { "epoch": 4.76372509150061, "grad_norm": 0.2830308973789215, "learning_rate": 6.786799042769854e-08, "loss": 0.3005, "step": 46856 }, { "epoch": 4.763826758845059, "grad_norm": 0.2872157096862793, "learning_rate": 6.78097291536739e-08, "loss": 0.3239, "step": 46857 }, { "epoch": 4.763928426189508, "grad_norm": 0.2728211283683777, "learning_rate": 6.775149272685244e-08, "loss": 0.2776, "step": 46858 }, { "epoch": 4.764030093533957, "grad_norm": 0.26832064986228943, "learning_rate": 6.769328114752616e-08, "loss": 0.2922, "step": 46859 }, { "epoch": 4.7641317608784055, "grad_norm": 0.29011526703834534, "learning_rate": 6.763509441598981e-08, "loss": 0.2906, "step": 46860 }, { "epoch": 4.7642334282228544, "grad_norm": 0.2691970765590668, "learning_rate": 6.757693253253483e-08, "loss": 0.3087, "step": 46861 }, { "epoch": 4.764335095567304, "grad_norm": 0.2911261022090912, "learning_rate": 6.751879549745543e-08, "loss": 0.3411, "step": 46862 }, { "epoch": 4.764436762911753, "grad_norm": 0.2690100073814392, "learning_rate": 6.746068331104471e-08, "loss": 0.2907, "step": 46863 }, { "epoch": 4.764538430256202, "grad_norm": 0.2720317542552948, "learning_rate": 6.740259597359411e-08, "loss": 0.2792, "step": 46864 }, { "epoch": 4.764640097600651, "grad_norm": 0.2817569673061371, "learning_rate": 6.734453348539783e-08, "loss": 0.2961, "step": 46865 }, { "epoch": 4.7647417649451, "grad_norm": 0.30145108699798584, "learning_rate": 6.728649584674729e-08, "loss": 0.3072, "step": 46866 }, { "epoch": 4.764843432289549, "grad_norm": 0.2661544978618622, "learning_rate": 6.722848305793506e-08, "loss": 0.2919, "step": 46867 }, { "epoch": 4.764945099633998, "grad_norm": 0.28974807262420654, "learning_rate": 6.717049511925367e-08, "loss": 0.2979, "step": 46868 }, { "epoch": 4.765046766978447, "grad_norm": 0.270536333322525, "learning_rate": 6.71125320309951e-08, "loss": 0.3574, "step": 46869 }, { "epoch": 4.765148434322896, "grad_norm": 0.2896328270435333, "learning_rate": 6.705459379345191e-08, "loss": 0.297, "step": 46870 }, { "epoch": 4.765250101667345, "grad_norm": 0.5960754752159119, "learning_rate": 6.699668040691442e-08, "loss": 0.2679, "step": 46871 }, { "epoch": 4.765351769011794, "grad_norm": 0.26573237776756287, "learning_rate": 6.693879187167574e-08, "loss": 0.2947, "step": 46872 }, { "epoch": 4.765453436356243, "grad_norm": 0.29364344477653503, "learning_rate": 6.688092818802783e-08, "loss": 0.3032, "step": 46873 }, { "epoch": 4.765555103700692, "grad_norm": 0.28741270303726196, "learning_rate": 6.682308935626048e-08, "loss": 0.2795, "step": 46874 }, { "epoch": 4.7656567710451405, "grad_norm": 0.281800776720047, "learning_rate": 6.676527537666677e-08, "loss": 0.303, "step": 46875 }, { "epoch": 4.7657584383895895, "grad_norm": 0.27497196197509766, "learning_rate": 6.670748624953705e-08, "loss": 0.296, "step": 46876 }, { "epoch": 4.765860105734038, "grad_norm": 0.2889536917209625, "learning_rate": 6.664972197516273e-08, "loss": 0.3029, "step": 46877 }, { "epoch": 4.765961773078487, "grad_norm": 0.28759765625, "learning_rate": 6.65919825538347e-08, "loss": 0.2939, "step": 46878 }, { "epoch": 4.766063440422936, "grad_norm": 0.30232617259025574, "learning_rate": 6.653426798584384e-08, "loss": 0.2984, "step": 46879 }, { "epoch": 4.766165107767385, "grad_norm": 0.30466291308403015, "learning_rate": 6.647657827148158e-08, "loss": 0.2878, "step": 46880 }, { "epoch": 4.766266775111834, "grad_norm": 0.26839524507522583, "learning_rate": 6.641891341103712e-08, "loss": 0.2627, "step": 46881 }, { "epoch": 4.766368442456283, "grad_norm": 0.2634492516517639, "learning_rate": 6.636127340480192e-08, "loss": 0.3176, "step": 46882 }, { "epoch": 4.766470109800732, "grad_norm": 0.28909483551979065, "learning_rate": 6.630365825306684e-08, "loss": 0.2907, "step": 46883 }, { "epoch": 4.766571777145181, "grad_norm": 0.3174971044063568, "learning_rate": 6.624606795612054e-08, "loss": 0.3302, "step": 46884 }, { "epoch": 4.76667344448963, "grad_norm": 0.28872519731521606, "learning_rate": 6.618850251425502e-08, "loss": 0.2785, "step": 46885 }, { "epoch": 4.766775111834079, "grad_norm": 0.28597575426101685, "learning_rate": 6.613096192775892e-08, "loss": 0.2853, "step": 46886 }, { "epoch": 4.766876779178528, "grad_norm": 0.2919832766056061, "learning_rate": 6.607344619692202e-08, "loss": 0.2896, "step": 46887 }, { "epoch": 4.766978446522977, "grad_norm": 0.2855174243450165, "learning_rate": 6.601595532203576e-08, "loss": 0.3079, "step": 46888 }, { "epoch": 4.767080113867426, "grad_norm": 0.28232207894325256, "learning_rate": 6.595848930338822e-08, "loss": 0.2867, "step": 46889 }, { "epoch": 4.767181781211875, "grad_norm": 0.29443350434303284, "learning_rate": 6.590104814126919e-08, "loss": 0.2954, "step": 46890 }, { "epoch": 4.767283448556324, "grad_norm": 0.2661529779434204, "learning_rate": 6.584363183596787e-08, "loss": 0.2577, "step": 46891 }, { "epoch": 4.7673851159007725, "grad_norm": 0.26875191926956177, "learning_rate": 6.578624038777404e-08, "loss": 0.2701, "step": 46892 }, { "epoch": 4.7674867832452215, "grad_norm": 0.30078962445259094, "learning_rate": 6.57288737969769e-08, "loss": 0.3069, "step": 46893 }, { "epoch": 4.76758845058967, "grad_norm": 0.30778494477272034, "learning_rate": 6.567153206386457e-08, "loss": 0.3068, "step": 46894 }, { "epoch": 4.767690117934119, "grad_norm": 0.27036911249160767, "learning_rate": 6.56142151887268e-08, "loss": 0.2687, "step": 46895 }, { "epoch": 4.767791785278568, "grad_norm": 0.27553045749664307, "learning_rate": 6.555692317185169e-08, "loss": 0.2882, "step": 46896 }, { "epoch": 4.767893452623017, "grad_norm": 0.2763691842556, "learning_rate": 6.549965601352847e-08, "loss": 0.271, "step": 46897 }, { "epoch": 4.767995119967466, "grad_norm": 0.29023128747940063, "learning_rate": 6.544241371404525e-08, "loss": 0.271, "step": 46898 }, { "epoch": 4.768096787311915, "grad_norm": 0.26999151706695557, "learning_rate": 6.538519627369066e-08, "loss": 0.3499, "step": 46899 }, { "epoch": 4.768198454656364, "grad_norm": 0.27852389216423035, "learning_rate": 6.532800369275339e-08, "loss": 0.2959, "step": 46900 }, { "epoch": 4.768300122000813, "grad_norm": 0.2973296344280243, "learning_rate": 6.527083597151984e-08, "loss": 0.2687, "step": 46901 }, { "epoch": 4.768401789345262, "grad_norm": 0.29479238390922546, "learning_rate": 6.521369311027981e-08, "loss": 0.2812, "step": 46902 }, { "epoch": 4.768503456689711, "grad_norm": 0.29044219851493835, "learning_rate": 6.515657510932027e-08, "loss": 0.2787, "step": 46903 }, { "epoch": 4.76860512403416, "grad_norm": 0.2886730432510376, "learning_rate": 6.509948196892934e-08, "loss": 0.3015, "step": 46904 }, { "epoch": 4.768706791378609, "grad_norm": 0.28156834840774536, "learning_rate": 6.504241368939457e-08, "loss": 0.3256, "step": 46905 }, { "epoch": 4.768808458723058, "grad_norm": 0.32357046008110046, "learning_rate": 6.49853702710035e-08, "loss": 0.2552, "step": 46906 }, { "epoch": 4.768910126067507, "grad_norm": 0.2655029296875, "learning_rate": 6.49283517140431e-08, "loss": 0.283, "step": 46907 }, { "epoch": 4.7690117934119565, "grad_norm": 0.2927292585372925, "learning_rate": 6.487135801880096e-08, "loss": 0.3322, "step": 46908 }, { "epoch": 4.769113460756405, "grad_norm": 0.27488142251968384, "learning_rate": 6.481438918556404e-08, "loss": 0.3175, "step": 46909 }, { "epoch": 4.769215128100854, "grad_norm": 0.27990829944610596, "learning_rate": 6.475744521461991e-08, "loss": 0.3141, "step": 46910 }, { "epoch": 4.769316795445303, "grad_norm": 0.2554888427257538, "learning_rate": 6.470052610625444e-08, "loss": 0.2984, "step": 46911 }, { "epoch": 4.769418462789752, "grad_norm": 0.28817158937454224, "learning_rate": 6.464363186075518e-08, "loss": 0.2636, "step": 46912 }, { "epoch": 4.769520130134201, "grad_norm": 0.2734203040599823, "learning_rate": 6.458676247840855e-08, "loss": 0.2582, "step": 46913 }, { "epoch": 4.76962179747865, "grad_norm": 0.28017258644104004, "learning_rate": 6.452991795950047e-08, "loss": 0.3233, "step": 46914 }, { "epoch": 4.769723464823099, "grad_norm": 0.26424506306648254, "learning_rate": 6.447309830431847e-08, "loss": 0.2831, "step": 46915 }, { "epoch": 4.769825132167548, "grad_norm": 0.2801538407802582, "learning_rate": 6.441630351314787e-08, "loss": 0.3207, "step": 46916 }, { "epoch": 4.769926799511997, "grad_norm": 0.29225414991378784, "learning_rate": 6.435953358627511e-08, "loss": 0.2861, "step": 46917 }, { "epoch": 4.770028466856446, "grad_norm": 0.2691152095794678, "learning_rate": 6.430278852398608e-08, "loss": 0.2884, "step": 46918 }, { "epoch": 4.770130134200895, "grad_norm": 0.2749168574810028, "learning_rate": 6.424606832656666e-08, "loss": 0.2739, "step": 46919 }, { "epoch": 4.770231801545344, "grad_norm": 0.2843119204044342, "learning_rate": 6.418937299430272e-08, "loss": 0.3217, "step": 46920 }, { "epoch": 4.770333468889793, "grad_norm": 0.2833578586578369, "learning_rate": 6.41327025274796e-08, "loss": 0.2883, "step": 46921 }, { "epoch": 4.770435136234242, "grad_norm": 0.2830435633659363, "learning_rate": 6.40760569263832e-08, "loss": 0.3309, "step": 46922 }, { "epoch": 4.770536803578691, "grad_norm": 0.2605517506599426, "learning_rate": 6.40194361912988e-08, "loss": 0.3282, "step": 46923 }, { "epoch": 4.7706384709231395, "grad_norm": 0.27749180793762207, "learning_rate": 6.396284032251121e-08, "loss": 0.2841, "step": 46924 }, { "epoch": 4.7707401382675885, "grad_norm": 0.2574160099029541, "learning_rate": 6.39062693203063e-08, "loss": 0.2886, "step": 46925 }, { "epoch": 4.770841805612037, "grad_norm": 0.27678316831588745, "learning_rate": 6.38497231849683e-08, "loss": 0.2917, "step": 46926 }, { "epoch": 4.770943472956486, "grad_norm": 0.28240346908569336, "learning_rate": 6.379320191678195e-08, "loss": 0.2855, "step": 46927 }, { "epoch": 4.771045140300935, "grad_norm": 0.2923644185066223, "learning_rate": 6.373670551603317e-08, "loss": 0.2944, "step": 46928 }, { "epoch": 4.771146807645384, "grad_norm": 0.28367242217063904, "learning_rate": 6.368023398300561e-08, "loss": 0.321, "step": 46929 }, { "epoch": 4.771248474989833, "grad_norm": 0.2765286862850189, "learning_rate": 6.362378731798403e-08, "loss": 0.2964, "step": 46930 }, { "epoch": 4.771350142334282, "grad_norm": 0.294345885515213, "learning_rate": 6.356736552125209e-08, "loss": 0.3143, "step": 46931 }, { "epoch": 4.771451809678731, "grad_norm": 0.2683846652507782, "learning_rate": 6.351096859309514e-08, "loss": 0.3126, "step": 46932 }, { "epoch": 4.77155347702318, "grad_norm": 0.2896474301815033, "learning_rate": 6.345459653379737e-08, "loss": 0.3241, "step": 46933 }, { "epoch": 4.771655144367629, "grad_norm": 0.2831784188747406, "learning_rate": 6.339824934364136e-08, "loss": 0.3066, "step": 46934 }, { "epoch": 4.771756811712078, "grad_norm": 0.3132512867450714, "learning_rate": 6.334192702291243e-08, "loss": 0.2804, "step": 46935 }, { "epoch": 4.771858479056527, "grad_norm": 0.2655131220817566, "learning_rate": 6.328562957189366e-08, "loss": 0.276, "step": 46936 }, { "epoch": 4.771960146400976, "grad_norm": 0.28181377053260803, "learning_rate": 6.322935699086818e-08, "loss": 0.2413, "step": 46937 }, { "epoch": 4.772061813745425, "grad_norm": 0.29292359948158264, "learning_rate": 6.317310928012077e-08, "loss": 0.3057, "step": 46938 }, { "epoch": 4.772163481089874, "grad_norm": 0.2928157448768616, "learning_rate": 6.311688643993396e-08, "loss": 0.3362, "step": 46939 }, { "epoch": 4.772265148434323, "grad_norm": 0.2771207094192505, "learning_rate": 6.306068847059088e-08, "loss": 0.2807, "step": 46940 }, { "epoch": 4.7723668157787715, "grad_norm": 0.29339802265167236, "learning_rate": 6.300451537237462e-08, "loss": 0.2906, "step": 46941 }, { "epoch": 4.7724684831232205, "grad_norm": 0.2851518988609314, "learning_rate": 6.294836714556884e-08, "loss": 0.2924, "step": 46942 }, { "epoch": 4.772570150467669, "grad_norm": 0.2752697467803955, "learning_rate": 6.289224379045611e-08, "loss": 0.3245, "step": 46943 }, { "epoch": 4.772671817812119, "grad_norm": 0.28428465127944946, "learning_rate": 6.283614530731841e-08, "loss": 0.301, "step": 46944 }, { "epoch": 4.772773485156568, "grad_norm": 0.25333061814308167, "learning_rate": 6.27800716964394e-08, "loss": 0.2876, "step": 46945 }, { "epoch": 4.772875152501017, "grad_norm": 0.27596205472946167, "learning_rate": 6.27240229581011e-08, "loss": 0.3184, "step": 46946 }, { "epoch": 4.772976819845466, "grad_norm": 0.27934935688972473, "learning_rate": 6.266799909258548e-08, "loss": 0.3161, "step": 46947 }, { "epoch": 4.773078487189915, "grad_norm": 0.27851730585098267, "learning_rate": 6.261200010017565e-08, "loss": 0.306, "step": 46948 }, { "epoch": 4.773180154534364, "grad_norm": 0.2774854898452759, "learning_rate": 6.255602598115306e-08, "loss": 0.2872, "step": 46949 }, { "epoch": 4.773281821878813, "grad_norm": 0.2927311062812805, "learning_rate": 6.250007673579972e-08, "loss": 0.3133, "step": 46950 }, { "epoch": 4.773383489223262, "grad_norm": 0.26780182123184204, "learning_rate": 6.24441523643976e-08, "loss": 0.2645, "step": 46951 }, { "epoch": 4.773485156567711, "grad_norm": 0.2723783254623413, "learning_rate": 6.238825286722927e-08, "loss": 0.2711, "step": 46952 }, { "epoch": 4.77358682391216, "grad_norm": 0.28380322456359863, "learning_rate": 6.233237824457506e-08, "loss": 0.258, "step": 46953 }, { "epoch": 4.773688491256609, "grad_norm": 0.27399730682373047, "learning_rate": 6.227652849671695e-08, "loss": 0.3096, "step": 46954 }, { "epoch": 4.773790158601058, "grad_norm": 0.2723931074142456, "learning_rate": 6.22207036239364e-08, "loss": 0.3117, "step": 46955 }, { "epoch": 4.7738918259455065, "grad_norm": 0.2690974771976471, "learning_rate": 6.216490362651428e-08, "loss": 0.2797, "step": 46956 }, { "epoch": 4.7739934932899555, "grad_norm": 0.27040109038352966, "learning_rate": 6.210912850473205e-08, "loss": 0.2805, "step": 46957 }, { "epoch": 4.774095160634404, "grad_norm": 0.2701399028301239, "learning_rate": 6.20533782588706e-08, "loss": 0.3139, "step": 46958 }, { "epoch": 4.774196827978853, "grad_norm": 0.2738231122493744, "learning_rate": 6.199765288921133e-08, "loss": 0.298, "step": 46959 }, { "epoch": 4.774298495323302, "grad_norm": 0.27218130230903625, "learning_rate": 6.194195239603406e-08, "loss": 0.2938, "step": 46960 }, { "epoch": 4.774400162667751, "grad_norm": 0.27885210514068604, "learning_rate": 6.188627677961967e-08, "loss": 0.3131, "step": 46961 }, { "epoch": 4.7745018300122, "grad_norm": 0.2863049805164337, "learning_rate": 6.183062604024848e-08, "loss": 0.288, "step": 46962 }, { "epoch": 4.774603497356649, "grad_norm": 0.304094523191452, "learning_rate": 6.177500017820192e-08, "loss": 0.2569, "step": 46963 }, { "epoch": 4.774705164701098, "grad_norm": 0.2857167720794678, "learning_rate": 6.171939919375868e-08, "loss": 0.2932, "step": 46964 }, { "epoch": 4.774806832045547, "grad_norm": 0.2808990180492401, "learning_rate": 6.166382308719964e-08, "loss": 0.3068, "step": 46965 }, { "epoch": 4.774908499389996, "grad_norm": 0.29450851678848267, "learning_rate": 6.160827185880514e-08, "loss": 0.2612, "step": 46966 }, { "epoch": 4.775010166734445, "grad_norm": 0.26853179931640625, "learning_rate": 6.155274550885438e-08, "loss": 0.3188, "step": 46967 }, { "epoch": 4.775111834078894, "grad_norm": 0.3000415563583374, "learning_rate": 6.149724403762713e-08, "loss": 0.3181, "step": 46968 }, { "epoch": 4.775213501423343, "grad_norm": 0.26409202814102173, "learning_rate": 6.144176744540376e-08, "loss": 0.3119, "step": 46969 }, { "epoch": 4.775315168767792, "grad_norm": 0.2976364195346832, "learning_rate": 6.13863157324629e-08, "loss": 0.3127, "step": 46970 }, { "epoch": 4.775416836112241, "grad_norm": 0.2711302936077118, "learning_rate": 6.133088889908378e-08, "loss": 0.3288, "step": 46971 }, { "epoch": 4.77551850345669, "grad_norm": 0.26059502363204956, "learning_rate": 6.127548694554674e-08, "loss": 0.2901, "step": 46972 }, { "epoch": 4.7756201708011385, "grad_norm": 0.272176057100296, "learning_rate": 6.12201098721299e-08, "loss": 0.321, "step": 46973 }, { "epoch": 4.7757218381455875, "grad_norm": 0.27771520614624023, "learning_rate": 6.116475767911189e-08, "loss": 0.2766, "step": 46974 }, { "epoch": 4.775823505490036, "grad_norm": 0.26500049233436584, "learning_rate": 6.110943036677308e-08, "loss": 0.3222, "step": 46975 }, { "epoch": 4.775925172834485, "grad_norm": 0.26315170526504517, "learning_rate": 6.1054127935391e-08, "loss": 0.3054, "step": 46976 }, { "epoch": 4.776026840178934, "grad_norm": 0.302943617105484, "learning_rate": 6.099885038524434e-08, "loss": 0.3056, "step": 46977 }, { "epoch": 4.776128507523383, "grad_norm": 0.2727339267730713, "learning_rate": 6.094359771661174e-08, "loss": 0.289, "step": 46978 }, { "epoch": 4.776230174867832, "grad_norm": 0.24953585863113403, "learning_rate": 6.088836992977188e-08, "loss": 0.31, "step": 46979 }, { "epoch": 4.776331842212281, "grad_norm": 0.2826288044452667, "learning_rate": 6.083316702500286e-08, "loss": 0.2899, "step": 46980 }, { "epoch": 4.77643350955673, "grad_norm": 0.2824632525444031, "learning_rate": 6.077798900258225e-08, "loss": 0.2866, "step": 46981 }, { "epoch": 4.776535176901179, "grad_norm": 0.2508663237094879, "learning_rate": 6.07228358627887e-08, "loss": 0.3097, "step": 46982 }, { "epoch": 4.776636844245628, "grad_norm": 0.2842662036418915, "learning_rate": 6.066770760589924e-08, "loss": 0.2799, "step": 46983 }, { "epoch": 4.776738511590077, "grad_norm": 0.2814421057701111, "learning_rate": 6.06126042321925e-08, "loss": 0.3182, "step": 46984 }, { "epoch": 4.776840178934526, "grad_norm": 0.2959934175014496, "learning_rate": 6.055752574194607e-08, "loss": 0.3232, "step": 46985 }, { "epoch": 4.776941846278975, "grad_norm": 0.26229286193847656, "learning_rate": 6.050247213543636e-08, "loss": 0.3127, "step": 46986 }, { "epoch": 4.777043513623424, "grad_norm": 0.2560969293117523, "learning_rate": 6.044744341294095e-08, "loss": 0.29, "step": 46987 }, { "epoch": 4.777145180967873, "grad_norm": 0.27360665798187256, "learning_rate": 6.03924395747385e-08, "loss": 0.2791, "step": 46988 }, { "epoch": 4.777246848312322, "grad_norm": 0.29089227318763733, "learning_rate": 6.03374606211049e-08, "loss": 0.2631, "step": 46989 }, { "epoch": 4.777348515656771, "grad_norm": 0.29041120409965515, "learning_rate": 6.028250655231716e-08, "loss": 0.2963, "step": 46990 }, { "epoch": 4.77745018300122, "grad_norm": 0.2819778323173523, "learning_rate": 6.022757736865226e-08, "loss": 0.2903, "step": 46991 }, { "epoch": 4.777551850345669, "grad_norm": 0.28226712346076965, "learning_rate": 6.01726730703872e-08, "loss": 0.295, "step": 46992 }, { "epoch": 4.777653517690118, "grad_norm": 0.27031248807907104, "learning_rate": 6.011779365779791e-08, "loss": 0.2991, "step": 46993 }, { "epoch": 4.777755185034567, "grad_norm": 0.29298198223114014, "learning_rate": 6.006293913116135e-08, "loss": 0.3097, "step": 46994 }, { "epoch": 4.777856852379016, "grad_norm": 0.2824303209781647, "learning_rate": 6.000810949075453e-08, "loss": 0.3193, "step": 46995 }, { "epoch": 4.777958519723465, "grad_norm": 0.3211221694946289, "learning_rate": 5.995330473685224e-08, "loss": 0.2706, "step": 46996 }, { "epoch": 4.778060187067914, "grad_norm": 0.2725215256214142, "learning_rate": 5.989852486973091e-08, "loss": 0.2731, "step": 46997 }, { "epoch": 4.778161854412363, "grad_norm": 0.2699970006942749, "learning_rate": 5.984376988966756e-08, "loss": 0.3201, "step": 46998 }, { "epoch": 4.778263521756812, "grad_norm": 0.29040849208831787, "learning_rate": 5.978903979693752e-08, "loss": 0.2787, "step": 46999 }, { "epoch": 4.778365189101261, "grad_norm": 0.2887887954711914, "learning_rate": 5.973433459181555e-08, "loss": 0.3057, "step": 47000 }, { "epoch": 4.77846685644571, "grad_norm": 0.30396512150764465, "learning_rate": 5.967965427457811e-08, "loss": 0.2671, "step": 47001 }, { "epoch": 4.778568523790159, "grad_norm": 0.3049483001232147, "learning_rate": 5.96249988455011e-08, "loss": 0.2922, "step": 47002 }, { "epoch": 4.778670191134608, "grad_norm": 0.2875819206237793, "learning_rate": 5.9570368304859296e-08, "loss": 0.2982, "step": 47003 }, { "epoch": 4.778771858479057, "grad_norm": 0.290497750043869, "learning_rate": 5.9515762652927465e-08, "loss": 0.2845, "step": 47004 }, { "epoch": 4.7788735258235056, "grad_norm": 0.27709731459617615, "learning_rate": 5.9461181889982064e-08, "loss": 0.2886, "step": 47005 }, { "epoch": 4.7789751931679545, "grad_norm": 0.28394269943237305, "learning_rate": 5.940662601629621e-08, "loss": 0.283, "step": 47006 }, { "epoch": 4.779076860512403, "grad_norm": 0.28521716594696045, "learning_rate": 5.935209503214578e-08, "loss": 0.2972, "step": 47007 }, { "epoch": 4.779178527856852, "grad_norm": 0.27124112844467163, "learning_rate": 5.929758893780613e-08, "loss": 0.2886, "step": 47008 }, { "epoch": 4.779280195201301, "grad_norm": 0.3074954152107239, "learning_rate": 5.924310773355035e-08, "loss": 0.2793, "step": 47009 }, { "epoch": 4.77938186254575, "grad_norm": 0.28062373399734497, "learning_rate": 5.9188651419654354e-08, "loss": 0.2833, "step": 47010 }, { "epoch": 4.779483529890199, "grad_norm": 0.2720331847667694, "learning_rate": 5.913421999639124e-08, "loss": 0.2839, "step": 47011 }, { "epoch": 4.779585197234648, "grad_norm": 0.31766483187675476, "learning_rate": 5.907981346403635e-08, "loss": 0.2788, "step": 47012 }, { "epoch": 4.779686864579097, "grad_norm": 0.26517629623413086, "learning_rate": 5.90254318228628e-08, "loss": 0.2836, "step": 47013 }, { "epoch": 4.779788531923546, "grad_norm": 0.26712173223495483, "learning_rate": 5.8971075073144815e-08, "loss": 0.3185, "step": 47014 }, { "epoch": 4.779890199267995, "grad_norm": 0.2935325503349304, "learning_rate": 5.8916743215157167e-08, "loss": 0.2835, "step": 47015 }, { "epoch": 4.779991866612444, "grad_norm": 0.2564832866191864, "learning_rate": 5.8862436249171875e-08, "loss": 0.2892, "step": 47016 }, { "epoch": 4.780093533956893, "grad_norm": 0.28626570105552673, "learning_rate": 5.8808154175463705e-08, "loss": 0.3053, "step": 47017 }, { "epoch": 4.780195201301342, "grad_norm": 0.3125159740447998, "learning_rate": 5.875389699430634e-08, "loss": 0.2566, "step": 47018 }, { "epoch": 4.780296868645791, "grad_norm": 0.31614840030670166, "learning_rate": 5.8699664705972324e-08, "loss": 0.2706, "step": 47019 }, { "epoch": 4.78039853599024, "grad_norm": 0.2918146848678589, "learning_rate": 5.864545731073479e-08, "loss": 0.2834, "step": 47020 }, { "epoch": 4.780500203334689, "grad_norm": 0.28861457109451294, "learning_rate": 5.8591274808867396e-08, "loss": 0.3306, "step": 47021 }, { "epoch": 4.7806018706791376, "grad_norm": 0.2887507379055023, "learning_rate": 5.8537117200643256e-08, "loss": 0.2927, "step": 47022 }, { "epoch": 4.7807035380235865, "grad_norm": 0.27051109075546265, "learning_rate": 5.8482984486334935e-08, "loss": 0.3054, "step": 47023 }, { "epoch": 4.780805205368035, "grad_norm": 0.29831916093826294, "learning_rate": 5.842887666621444e-08, "loss": 0.3034, "step": 47024 }, { "epoch": 4.780906872712484, "grad_norm": 0.2575480341911316, "learning_rate": 5.8374793740555435e-08, "loss": 0.2805, "step": 47025 }, { "epoch": 4.781008540056934, "grad_norm": 0.3011907935142517, "learning_rate": 5.8320735709629375e-08, "loss": 0.2869, "step": 47026 }, { "epoch": 4.781110207401383, "grad_norm": 0.2832663655281067, "learning_rate": 5.826670257370992e-08, "loss": 0.314, "step": 47027 }, { "epoch": 4.781211874745832, "grad_norm": 0.2701667249202728, "learning_rate": 5.8212694333067974e-08, "loss": 0.3013, "step": 47028 }, { "epoch": 4.781313542090281, "grad_norm": 0.3117552399635315, "learning_rate": 5.81587109879761e-08, "loss": 0.2729, "step": 47029 }, { "epoch": 4.78141520943473, "grad_norm": 0.29767605662345886, "learning_rate": 5.810475253870629e-08, "loss": 0.2805, "step": 47030 }, { "epoch": 4.781516876779179, "grad_norm": 0.29033657908439636, "learning_rate": 5.805081898553055e-08, "loss": 0.3025, "step": 47031 }, { "epoch": 4.781618544123628, "grad_norm": 0.27908414602279663, "learning_rate": 5.799691032872035e-08, "loss": 0.2978, "step": 47032 }, { "epoch": 4.781720211468077, "grad_norm": 0.2566720247268677, "learning_rate": 5.794302656854711e-08, "loss": 0.2738, "step": 47033 }, { "epoch": 4.781821878812526, "grad_norm": 0.27275028824806213, "learning_rate": 5.78891677052823e-08, "loss": 0.2911, "step": 47034 }, { "epoch": 4.781923546156975, "grad_norm": 0.2712858319282532, "learning_rate": 5.783533373919847e-08, "loss": 0.2942, "step": 47035 }, { "epoch": 4.782025213501424, "grad_norm": 0.2726340889930725, "learning_rate": 5.778152467056486e-08, "loss": 0.2748, "step": 47036 }, { "epoch": 4.782126880845873, "grad_norm": 0.30077382922172546, "learning_rate": 5.772774049965346e-08, "loss": 0.3256, "step": 47037 }, { "epoch": 4.7822285481903215, "grad_norm": 0.27188798785209656, "learning_rate": 5.767398122673573e-08, "loss": 0.3411, "step": 47038 }, { "epoch": 4.7823302155347704, "grad_norm": 0.27365466952323914, "learning_rate": 5.762024685208145e-08, "loss": 0.3237, "step": 47039 }, { "epoch": 4.782431882879219, "grad_norm": 0.2808462381362915, "learning_rate": 5.756653737596263e-08, "loss": 0.3001, "step": 47040 }, { "epoch": 4.782533550223668, "grad_norm": 0.28697794675827026, "learning_rate": 5.7512852798648486e-08, "loss": 0.2956, "step": 47041 }, { "epoch": 4.782635217568117, "grad_norm": 0.2693048119544983, "learning_rate": 5.745919312041048e-08, "loss": 0.2725, "step": 47042 }, { "epoch": 4.782736884912566, "grad_norm": 0.28883305191993713, "learning_rate": 5.7405558341517844e-08, "loss": 0.3323, "step": 47043 }, { "epoch": 4.782838552257015, "grad_norm": 0.27961650490760803, "learning_rate": 5.735194846224201e-08, "loss": 0.2948, "step": 47044 }, { "epoch": 4.782940219601464, "grad_norm": 0.28636521100997925, "learning_rate": 5.729836348285278e-08, "loss": 0.283, "step": 47045 }, { "epoch": 4.783041886945913, "grad_norm": 0.26726001501083374, "learning_rate": 5.7244803403618823e-08, "loss": 0.3157, "step": 47046 }, { "epoch": 4.783143554290362, "grad_norm": 0.2689705491065979, "learning_rate": 5.719126822481214e-08, "loss": 0.2832, "step": 47047 }, { "epoch": 4.783245221634811, "grad_norm": 0.29260769486427307, "learning_rate": 5.713775794670084e-08, "loss": 0.2853, "step": 47048 }, { "epoch": 4.78334688897926, "grad_norm": 0.2742752134799957, "learning_rate": 5.7084272569554176e-08, "loss": 0.2951, "step": 47049 }, { "epoch": 4.783448556323709, "grad_norm": 0.26999223232269287, "learning_rate": 5.703081209364303e-08, "loss": 0.2789, "step": 47050 }, { "epoch": 4.783550223668158, "grad_norm": 0.2765210270881653, "learning_rate": 5.697737651923607e-08, "loss": 0.2715, "step": 47051 }, { "epoch": 4.783651891012607, "grad_norm": 0.2473084032535553, "learning_rate": 5.6923965846602535e-08, "loss": 0.2959, "step": 47052 }, { "epoch": 4.783753558357056, "grad_norm": 0.283083975315094, "learning_rate": 5.6870580076011094e-08, "loss": 0.3125, "step": 47053 }, { "epoch": 4.783855225701505, "grad_norm": 0.29039663076400757, "learning_rate": 5.681721920773098e-08, "loss": 0.3005, "step": 47054 }, { "epoch": 4.7839568930459535, "grad_norm": 0.28668057918548584, "learning_rate": 5.6763883242031416e-08, "loss": 0.2965, "step": 47055 }, { "epoch": 4.7840585603904024, "grad_norm": 0.2650926113128662, "learning_rate": 5.6710572179179967e-08, "loss": 0.2874, "step": 47056 }, { "epoch": 4.784160227734851, "grad_norm": 0.2801576256752014, "learning_rate": 5.665728601944642e-08, "loss": 0.3171, "step": 47057 }, { "epoch": 4.7842618950793, "grad_norm": 0.2905636727809906, "learning_rate": 5.66040247630989e-08, "loss": 0.2878, "step": 47058 }, { "epoch": 4.784363562423749, "grad_norm": 0.2562154531478882, "learning_rate": 5.655078841040551e-08, "loss": 0.3081, "step": 47059 }, { "epoch": 4.784465229768198, "grad_norm": 0.2814798951148987, "learning_rate": 5.649757696163438e-08, "loss": 0.2728, "step": 47060 }, { "epoch": 4.784566897112647, "grad_norm": 0.2774794101715088, "learning_rate": 5.644439041705363e-08, "loss": 0.3015, "step": 47061 }, { "epoch": 4.784668564457096, "grad_norm": 0.2679224908351898, "learning_rate": 5.6391228776931374e-08, "loss": 0.3158, "step": 47062 }, { "epoch": 4.784770231801545, "grad_norm": 0.2879103422164917, "learning_rate": 5.6338092041535175e-08, "loss": 0.301, "step": 47063 }, { "epoch": 4.784871899145994, "grad_norm": 0.28430697321891785, "learning_rate": 5.628498021113316e-08, "loss": 0.3073, "step": 47064 }, { "epoch": 4.784973566490443, "grad_norm": 0.2788207232952118, "learning_rate": 5.623189328599288e-08, "loss": 0.3185, "step": 47065 }, { "epoch": 4.785075233834892, "grad_norm": 0.27944061160087585, "learning_rate": 5.6178831266380796e-08, "loss": 0.278, "step": 47066 }, { "epoch": 4.785176901179341, "grad_norm": 0.2729732394218445, "learning_rate": 5.612579415256503e-08, "loss": 0.265, "step": 47067 }, { "epoch": 4.78527856852379, "grad_norm": 0.27903300523757935, "learning_rate": 5.6072781944813135e-08, "loss": 0.2995, "step": 47068 }, { "epoch": 4.785380235868239, "grad_norm": 0.2810114324092865, "learning_rate": 5.601979464339102e-08, "loss": 0.3178, "step": 47069 }, { "epoch": 4.785481903212688, "grad_norm": 0.30075594782829285, "learning_rate": 5.59668322485668e-08, "loss": 0.274, "step": 47070 }, { "epoch": 4.785583570557137, "grad_norm": 0.3062971532344818, "learning_rate": 5.591389476060693e-08, "loss": 0.2727, "step": 47071 }, { "epoch": 4.785685237901586, "grad_norm": 0.27986788749694824, "learning_rate": 5.5860982179777867e-08, "loss": 0.2889, "step": 47072 }, { "epoch": 4.785786905246035, "grad_norm": 0.2816653251647949, "learning_rate": 5.5808094506346055e-08, "loss": 0.2997, "step": 47073 }, { "epoch": 4.785888572590484, "grad_norm": 0.28531673550605774, "learning_rate": 5.575523174057851e-08, "loss": 0.2746, "step": 47074 }, { "epoch": 4.785990239934933, "grad_norm": 0.25619909167289734, "learning_rate": 5.570239388274057e-08, "loss": 0.2846, "step": 47075 }, { "epoch": 4.786091907279382, "grad_norm": 0.25410592555999756, "learning_rate": 5.564958093309925e-08, "loss": 0.291, "step": 47076 }, { "epoch": 4.786193574623831, "grad_norm": 0.2760455310344696, "learning_rate": 5.559679289192099e-08, "loss": 0.287, "step": 47077 }, { "epoch": 4.78629524196828, "grad_norm": 0.2792450785636902, "learning_rate": 5.5544029759470596e-08, "loss": 0.2796, "step": 47078 }, { "epoch": 4.786396909312729, "grad_norm": 0.28571099042892456, "learning_rate": 5.549129153601396e-08, "loss": 0.2965, "step": 47079 }, { "epoch": 4.786498576657178, "grad_norm": 0.2625228464603424, "learning_rate": 5.5438578221818085e-08, "loss": 0.2898, "step": 47080 }, { "epoch": 4.786600244001627, "grad_norm": 0.2844913899898529, "learning_rate": 5.538588981714721e-08, "loss": 0.3086, "step": 47081 }, { "epoch": 4.786701911346076, "grad_norm": 0.28342825174331665, "learning_rate": 5.533322632226723e-08, "loss": 0.2894, "step": 47082 }, { "epoch": 4.786803578690525, "grad_norm": 0.2725892663002014, "learning_rate": 5.528058773744294e-08, "loss": 0.302, "step": 47083 }, { "epoch": 4.786905246034974, "grad_norm": 0.2844330370426178, "learning_rate": 5.522797406294078e-08, "loss": 0.2833, "step": 47084 }, { "epoch": 4.787006913379423, "grad_norm": 0.2629513144493103, "learning_rate": 5.5175385299025e-08, "loss": 0.3052, "step": 47085 }, { "epoch": 4.787108580723872, "grad_norm": 0.29077959060668945, "learning_rate": 5.512282144595982e-08, "loss": 0.2976, "step": 47086 }, { "epoch": 4.7872102480683205, "grad_norm": 0.28378018736839294, "learning_rate": 5.507028250401114e-08, "loss": 0.2831, "step": 47087 }, { "epoch": 4.7873119154127695, "grad_norm": 0.2590498626232147, "learning_rate": 5.5017768473443754e-08, "loss": 0.2912, "step": 47088 }, { "epoch": 4.787413582757218, "grad_norm": 0.2860674560070038, "learning_rate": 5.496527935452134e-08, "loss": 0.2924, "step": 47089 }, { "epoch": 4.787515250101667, "grad_norm": 0.2924754023551941, "learning_rate": 5.491281514750868e-08, "loss": 0.2955, "step": 47090 }, { "epoch": 4.787616917446116, "grad_norm": 0.26285234093666077, "learning_rate": 5.486037585267001e-08, "loss": 0.2962, "step": 47091 }, { "epoch": 4.787718584790565, "grad_norm": 0.28316643834114075, "learning_rate": 5.480796147027012e-08, "loss": 0.2896, "step": 47092 }, { "epoch": 4.787820252135014, "grad_norm": 0.2731351852416992, "learning_rate": 5.475557200057213e-08, "loss": 0.3043, "step": 47093 }, { "epoch": 4.787921919479463, "grad_norm": 0.2826938331127167, "learning_rate": 5.470320744384083e-08, "loss": 0.2908, "step": 47094 }, { "epoch": 4.788023586823912, "grad_norm": 0.2796313166618347, "learning_rate": 5.465086780033935e-08, "loss": 0.2864, "step": 47095 }, { "epoch": 4.788125254168361, "grad_norm": 0.2569829225540161, "learning_rate": 5.45985530703308e-08, "loss": 0.2612, "step": 47096 }, { "epoch": 4.78822692151281, "grad_norm": 0.2934207618236542, "learning_rate": 5.454626325408052e-08, "loss": 0.284, "step": 47097 }, { "epoch": 4.788328588857259, "grad_norm": 0.30154749751091003, "learning_rate": 5.44939983518511e-08, "loss": 0.2869, "step": 47098 }, { "epoch": 4.788430256201708, "grad_norm": 0.29855722188949585, "learning_rate": 5.444175836390508e-08, "loss": 0.3078, "step": 47099 }, { "epoch": 4.788531923546157, "grad_norm": 0.30542999505996704, "learning_rate": 5.4389543290506165e-08, "loss": 0.3295, "step": 47100 }, { "epoch": 4.788633590890606, "grad_norm": 0.29847627878189087, "learning_rate": 5.433735313191801e-08, "loss": 0.2919, "step": 47101 }, { "epoch": 4.788735258235055, "grad_norm": 0.2856503427028656, "learning_rate": 5.428518788840265e-08, "loss": 0.2778, "step": 47102 }, { "epoch": 4.788836925579504, "grad_norm": 0.2774173617362976, "learning_rate": 5.423304756022318e-08, "loss": 0.301, "step": 47103 }, { "epoch": 4.7889385929239525, "grad_norm": 0.28231585025787354, "learning_rate": 5.418093214764275e-08, "loss": 0.3252, "step": 47104 }, { "epoch": 4.7890402602684015, "grad_norm": 0.28118544816970825, "learning_rate": 5.412884165092336e-08, "loss": 0.2724, "step": 47105 }, { "epoch": 4.78914192761285, "grad_norm": 0.27946197986602783, "learning_rate": 5.407677607032702e-08, "loss": 0.2997, "step": 47106 }, { "epoch": 4.789243594957299, "grad_norm": 0.2751479744911194, "learning_rate": 5.4024735406116856e-08, "loss": 0.3096, "step": 47107 }, { "epoch": 4.789345262301749, "grad_norm": 0.283735990524292, "learning_rate": 5.3972719658554884e-08, "loss": 0.2853, "step": 47108 }, { "epoch": 4.789446929646198, "grad_norm": 0.26675617694854736, "learning_rate": 5.392072882790256e-08, "loss": 0.3113, "step": 47109 }, { "epoch": 4.789548596990647, "grad_norm": 0.28864753246307373, "learning_rate": 5.3868762914422446e-08, "loss": 0.2831, "step": 47110 }, { "epoch": 4.789650264335096, "grad_norm": 0.2914358675479889, "learning_rate": 5.381682191837656e-08, "loss": 0.3195, "step": 47111 }, { "epoch": 4.789751931679545, "grad_norm": 0.26771512627601624, "learning_rate": 5.3764905840025806e-08, "loss": 0.3397, "step": 47112 }, { "epoch": 4.789853599023994, "grad_norm": 0.2813376188278198, "learning_rate": 5.371301467963219e-08, "loss": 0.2825, "step": 47113 }, { "epoch": 4.789955266368443, "grad_norm": 0.27252915501594543, "learning_rate": 5.3661148437456625e-08, "loss": 0.2977, "step": 47114 }, { "epoch": 4.790056933712892, "grad_norm": 0.2792668342590332, "learning_rate": 5.360930711376111e-08, "loss": 0.3048, "step": 47115 }, { "epoch": 4.790158601057341, "grad_norm": 0.26642799377441406, "learning_rate": 5.3557490708806e-08, "loss": 0.294, "step": 47116 }, { "epoch": 4.79026026840179, "grad_norm": 0.3078802824020386, "learning_rate": 5.350569922285331e-08, "loss": 0.2783, "step": 47117 }, { "epoch": 4.790361935746239, "grad_norm": 0.2894117832183838, "learning_rate": 5.3453932656163385e-08, "loss": 0.2737, "step": 47118 }, { "epoch": 4.7904636030906875, "grad_norm": 0.2874911427497864, "learning_rate": 5.340219100899657e-08, "loss": 0.3068, "step": 47119 }, { "epoch": 4.7905652704351365, "grad_norm": 0.2860839366912842, "learning_rate": 5.335047428161488e-08, "loss": 0.2632, "step": 47120 }, { "epoch": 4.790666937779585, "grad_norm": 0.2962905764579773, "learning_rate": 5.329878247427755e-08, "loss": 0.2698, "step": 47121 }, { "epoch": 4.790768605124034, "grad_norm": 0.30689364671707153, "learning_rate": 5.324711558724549e-08, "loss": 0.2738, "step": 47122 }, { "epoch": 4.790870272468483, "grad_norm": 0.3049572706222534, "learning_rate": 5.319547362077848e-08, "loss": 0.2736, "step": 47123 }, { "epoch": 4.790971939812932, "grad_norm": 0.2717806398868561, "learning_rate": 5.314385657513743e-08, "loss": 0.2753, "step": 47124 }, { "epoch": 4.791073607157381, "grad_norm": 0.28763478994369507, "learning_rate": 5.3092264450582685e-08, "loss": 0.3142, "step": 47125 }, { "epoch": 4.79117527450183, "grad_norm": 0.2836468815803528, "learning_rate": 5.3040697247372377e-08, "loss": 0.3354, "step": 47126 }, { "epoch": 4.791276941846279, "grad_norm": 0.26861751079559326, "learning_rate": 5.2989154965768505e-08, "loss": 0.291, "step": 47127 }, { "epoch": 4.791378609190728, "grad_norm": 0.24348367750644684, "learning_rate": 5.293763760602921e-08, "loss": 0.2982, "step": 47128 }, { "epoch": 4.791480276535177, "grad_norm": 0.26874062418937683, "learning_rate": 5.288614516841428e-08, "loss": 0.2763, "step": 47129 }, { "epoch": 4.791581943879626, "grad_norm": 0.29525062441825867, "learning_rate": 5.283467765318351e-08, "loss": 0.2999, "step": 47130 }, { "epoch": 4.791683611224075, "grad_norm": 0.27394548058509827, "learning_rate": 5.278323506059613e-08, "loss": 0.2888, "step": 47131 }, { "epoch": 4.791785278568524, "grad_norm": 0.26070713996887207, "learning_rate": 5.273181739091138e-08, "loss": 0.3204, "step": 47132 }, { "epoch": 4.791886945912973, "grad_norm": 0.28748416900634766, "learning_rate": 5.268042464438794e-08, "loss": 0.2969, "step": 47133 }, { "epoch": 4.791988613257422, "grad_norm": 0.28319865465164185, "learning_rate": 5.2629056821284495e-08, "loss": 0.2853, "step": 47134 }, { "epoch": 4.792090280601871, "grad_norm": 0.275202214717865, "learning_rate": 5.257771392186084e-08, "loss": 0.2837, "step": 47135 }, { "epoch": 4.7921919479463195, "grad_norm": 0.2949797809123993, "learning_rate": 5.2526395946373984e-08, "loss": 0.2855, "step": 47136 }, { "epoch": 4.7922936152907685, "grad_norm": 0.28525224328041077, "learning_rate": 5.247510289508484e-08, "loss": 0.3365, "step": 47137 }, { "epoch": 4.792395282635217, "grad_norm": 0.28434810042381287, "learning_rate": 5.24238347682493e-08, "loss": 0.2957, "step": 47138 }, { "epoch": 4.792496949979666, "grad_norm": 0.2938188314437866, "learning_rate": 5.237259156612662e-08, "loss": 0.269, "step": 47139 }, { "epoch": 4.792598617324115, "grad_norm": 0.28463807702064514, "learning_rate": 5.2321373288976016e-08, "loss": 0.2986, "step": 47140 }, { "epoch": 4.792700284668564, "grad_norm": 0.284446656703949, "learning_rate": 5.2270179937053966e-08, "loss": 0.2998, "step": 47141 }, { "epoch": 4.792801952013013, "grad_norm": 0.26958441734313965, "learning_rate": 5.221901151061915e-08, "loss": 0.3112, "step": 47142 }, { "epoch": 4.792903619357462, "grad_norm": 0.26798662543296814, "learning_rate": 5.2167868009929124e-08, "loss": 0.3099, "step": 47143 }, { "epoch": 4.793005286701911, "grad_norm": 0.29996728897094727, "learning_rate": 5.211674943524147e-08, "loss": 0.2908, "step": 47144 }, { "epoch": 4.79310695404636, "grad_norm": 0.2775905728340149, "learning_rate": 5.206565578681433e-08, "loss": 0.3018, "step": 47145 }, { "epoch": 4.793208621390809, "grad_norm": 0.2905160188674927, "learning_rate": 5.201458706490414e-08, "loss": 0.2807, "step": 47146 }, { "epoch": 4.793310288735258, "grad_norm": 0.25921234488487244, "learning_rate": 5.1963543269769046e-08, "loss": 0.336, "step": 47147 }, { "epoch": 4.793411956079707, "grad_norm": 0.28851065039634705, "learning_rate": 5.1912524401665495e-08, "loss": 0.3127, "step": 47148 }, { "epoch": 4.793513623424156, "grad_norm": 0.29351112246513367, "learning_rate": 5.186153046085052e-08, "loss": 0.2868, "step": 47149 }, { "epoch": 4.793615290768605, "grad_norm": 0.27776551246643066, "learning_rate": 5.1810561447581674e-08, "loss": 0.3198, "step": 47150 }, { "epoch": 4.793716958113054, "grad_norm": 0.2866399586200714, "learning_rate": 5.1759617362115434e-08, "loss": 0.2887, "step": 47151 }, { "epoch": 4.793818625457503, "grad_norm": 0.25961917638778687, "learning_rate": 5.170869820470825e-08, "loss": 0.2776, "step": 47152 }, { "epoch": 4.7939202928019515, "grad_norm": 0.28065475821495056, "learning_rate": 5.16578039756166e-08, "loss": 0.2872, "step": 47153 }, { "epoch": 4.794021960146401, "grad_norm": 0.2774275243282318, "learning_rate": 5.160693467509692e-08, "loss": 0.2842, "step": 47154 }, { "epoch": 4.79412362749085, "grad_norm": 0.26698336005210876, "learning_rate": 5.155609030340569e-08, "loss": 0.3152, "step": 47155 }, { "epoch": 4.794225294835299, "grad_norm": 0.2608014643192291, "learning_rate": 5.150527086079882e-08, "loss": 0.3187, "step": 47156 }, { "epoch": 4.794326962179748, "grad_norm": 0.2799733877182007, "learning_rate": 5.145447634753331e-08, "loss": 0.2861, "step": 47157 }, { "epoch": 4.794428629524197, "grad_norm": 0.2921431064605713, "learning_rate": 5.140370676386341e-08, "loss": 0.2964, "step": 47158 }, { "epoch": 4.794530296868646, "grad_norm": 0.2744063436985016, "learning_rate": 5.135296211004559e-08, "loss": 0.2866, "step": 47159 }, { "epoch": 4.794631964213095, "grad_norm": 0.2876538932323456, "learning_rate": 5.1302242386336296e-08, "loss": 0.2797, "step": 47160 }, { "epoch": 4.794733631557544, "grad_norm": 0.27520230412483215, "learning_rate": 5.1251547592989223e-08, "loss": 0.2885, "step": 47161 }, { "epoch": 4.794835298901993, "grad_norm": 0.3188013732433319, "learning_rate": 5.1200877730261945e-08, "loss": 0.319, "step": 47162 }, { "epoch": 4.794936966246442, "grad_norm": 0.2668595016002655, "learning_rate": 5.1150232798407584e-08, "loss": 0.2516, "step": 47163 }, { "epoch": 4.795038633590891, "grad_norm": 0.274508535861969, "learning_rate": 5.109961279768316e-08, "loss": 0.3029, "step": 47164 }, { "epoch": 4.79514030093534, "grad_norm": 0.28877341747283936, "learning_rate": 5.104901772834237e-08, "loss": 0.2994, "step": 47165 }, { "epoch": 4.795241968279789, "grad_norm": 0.27685433626174927, "learning_rate": 5.0998447590640546e-08, "loss": 0.3237, "step": 47166 }, { "epoch": 4.795343635624238, "grad_norm": 0.2780654728412628, "learning_rate": 5.094790238483305e-08, "loss": 0.2988, "step": 47167 }, { "epoch": 4.7954453029686865, "grad_norm": 0.2801972031593323, "learning_rate": 5.089738211117356e-08, "loss": 0.3182, "step": 47168 }, { "epoch": 4.7955469703131355, "grad_norm": 0.27869415283203125, "learning_rate": 5.084688676991689e-08, "loss": 0.3189, "step": 47169 }, { "epoch": 4.795648637657584, "grad_norm": 0.30068477988243103, "learning_rate": 5.0796416361317826e-08, "loss": 0.2632, "step": 47170 }, { "epoch": 4.795750305002033, "grad_norm": 0.3084872364997864, "learning_rate": 5.074597088563005e-08, "loss": 0.3228, "step": 47171 }, { "epoch": 4.795851972346482, "grad_norm": 0.27952566742897034, "learning_rate": 5.06955503431078e-08, "loss": 0.3086, "step": 47172 }, { "epoch": 4.795953639690931, "grad_norm": 0.28074803948402405, "learning_rate": 5.064515473400533e-08, "loss": 0.3065, "step": 47173 }, { "epoch": 4.79605530703538, "grad_norm": 0.286248117685318, "learning_rate": 5.0594784058576876e-08, "loss": 0.2825, "step": 47174 }, { "epoch": 4.796156974379829, "grad_norm": 0.2777620851993561, "learning_rate": 5.054443831707556e-08, "loss": 0.2875, "step": 47175 }, { "epoch": 4.796258641724278, "grad_norm": 0.29171913862228394, "learning_rate": 5.049411750975453e-08, "loss": 0.2919, "step": 47176 }, { "epoch": 4.796360309068727, "grad_norm": 0.28404510021209717, "learning_rate": 5.0443821636869115e-08, "loss": 0.2872, "step": 47177 }, { "epoch": 4.796461976413176, "grad_norm": 0.28186553716659546, "learning_rate": 5.0393550698670246e-08, "loss": 0.2987, "step": 47178 }, { "epoch": 4.796563643757625, "grad_norm": 0.27353984117507935, "learning_rate": 5.034330469541271e-08, "loss": 0.2892, "step": 47179 }, { "epoch": 4.796665311102074, "grad_norm": 0.3001023530960083, "learning_rate": 5.029308362735019e-08, "loss": 0.3184, "step": 47180 }, { "epoch": 4.796766978446523, "grad_norm": 0.25824975967407227, "learning_rate": 5.0242887494734715e-08, "loss": 0.3082, "step": 47181 }, { "epoch": 4.796868645790972, "grad_norm": 0.271487832069397, "learning_rate": 5.0192716297818856e-08, "loss": 0.2814, "step": 47182 }, { "epoch": 4.796970313135421, "grad_norm": 0.27589523792266846, "learning_rate": 5.01425700368563e-08, "loss": 0.2793, "step": 47183 }, { "epoch": 4.79707198047987, "grad_norm": 0.2638917565345764, "learning_rate": 5.009244871209851e-08, "loss": 0.3037, "step": 47184 }, { "epoch": 4.7971736478243185, "grad_norm": 0.26590368151664734, "learning_rate": 5.004235232379917e-08, "loss": 0.2939, "step": 47185 }, { "epoch": 4.7972753151687675, "grad_norm": 0.2785415053367615, "learning_rate": 4.9992280872209756e-08, "loss": 0.3208, "step": 47186 }, { "epoch": 4.797376982513216, "grad_norm": 0.26833483576774597, "learning_rate": 4.994223435758394e-08, "loss": 0.3005, "step": 47187 }, { "epoch": 4.797478649857665, "grad_norm": 0.2896609902381897, "learning_rate": 4.9892212780172086e-08, "loss": 0.317, "step": 47188 }, { "epoch": 4.797580317202114, "grad_norm": 0.2734065651893616, "learning_rate": 4.9842216140226774e-08, "loss": 0.2681, "step": 47189 }, { "epoch": 4.797681984546564, "grad_norm": 0.2557665705680847, "learning_rate": 4.979224443800057e-08, "loss": 0.2974, "step": 47190 }, { "epoch": 4.797783651891013, "grad_norm": 0.2594752311706543, "learning_rate": 4.974229767374439e-08, "loss": 0.2801, "step": 47191 }, { "epoch": 4.797885319235462, "grad_norm": 0.29441550374031067, "learning_rate": 4.969237584771025e-08, "loss": 0.3186, "step": 47192 }, { "epoch": 4.797986986579911, "grad_norm": 0.2734423577785492, "learning_rate": 4.964247896014962e-08, "loss": 0.3193, "step": 47193 }, { "epoch": 4.79808865392436, "grad_norm": 0.2766534090042114, "learning_rate": 4.959260701131341e-08, "loss": 0.2956, "step": 47194 }, { "epoch": 4.798190321268809, "grad_norm": 0.28702792525291443, "learning_rate": 4.954276000145364e-08, "loss": 0.2685, "step": 47195 }, { "epoch": 4.798291988613258, "grad_norm": 0.2878904640674591, "learning_rate": 4.949293793082066e-08, "loss": 0.3286, "step": 47196 }, { "epoch": 4.798393655957707, "grad_norm": 0.2735401391983032, "learning_rate": 4.94431407996665e-08, "loss": 0.268, "step": 47197 }, { "epoch": 4.798495323302156, "grad_norm": 0.28446894884109497, "learning_rate": 4.93933686082404e-08, "loss": 0.3023, "step": 47198 }, { "epoch": 4.798596990646605, "grad_norm": 0.26712465286254883, "learning_rate": 4.934362135679438e-08, "loss": 0.2743, "step": 47199 }, { "epoch": 4.7986986579910536, "grad_norm": 0.2965540289878845, "learning_rate": 4.9293899045579355e-08, "loss": 0.2841, "step": 47200 }, { "epoch": 4.7988003253355025, "grad_norm": 0.2766207754611969, "learning_rate": 4.924420167484456e-08, "loss": 0.2841, "step": 47201 }, { "epoch": 4.798901992679951, "grad_norm": 0.2910883128643036, "learning_rate": 4.9194529244841475e-08, "loss": 0.285, "step": 47202 }, { "epoch": 4.7990036600244, "grad_norm": 0.2790822386741638, "learning_rate": 4.9144881755819886e-08, "loss": 0.3017, "step": 47203 }, { "epoch": 4.799105327368849, "grad_norm": 0.26334062218666077, "learning_rate": 4.9095259208029046e-08, "loss": 0.3075, "step": 47204 }, { "epoch": 4.799206994713298, "grad_norm": 0.27743101119995117, "learning_rate": 4.9045661601720416e-08, "loss": 0.2974, "step": 47205 }, { "epoch": 4.799308662057747, "grad_norm": 0.2682972848415375, "learning_rate": 4.899608893714325e-08, "loss": 0.3376, "step": 47206 }, { "epoch": 4.799410329402196, "grad_norm": 0.2793549597263336, "learning_rate": 4.894654121454734e-08, "loss": 0.2817, "step": 47207 }, { "epoch": 4.799511996746645, "grad_norm": 0.3056706190109253, "learning_rate": 4.889701843418193e-08, "loss": 0.2619, "step": 47208 }, { "epoch": 4.799613664091094, "grad_norm": 0.2558193802833557, "learning_rate": 4.8847520596296273e-08, "loss": 0.3022, "step": 47209 }, { "epoch": 4.799715331435543, "grad_norm": 0.2944421172142029, "learning_rate": 4.879804770114127e-08, "loss": 0.2896, "step": 47210 }, { "epoch": 4.799816998779992, "grad_norm": 0.30428019165992737, "learning_rate": 4.874859974896451e-08, "loss": 0.2503, "step": 47211 }, { "epoch": 4.799918666124441, "grad_norm": 0.27509239315986633, "learning_rate": 4.869917674001579e-08, "loss": 0.2848, "step": 47212 }, { "epoch": 4.80002033346889, "grad_norm": 0.27127423882484436, "learning_rate": 4.8649778674544347e-08, "loss": 0.3018, "step": 47213 }, { "epoch": 4.800122000813339, "grad_norm": 0.2939029932022095, "learning_rate": 4.8600405552798323e-08, "loss": 0.296, "step": 47214 }, { "epoch": 4.800223668157788, "grad_norm": 0.26490625739097595, "learning_rate": 4.8551057375026965e-08, "loss": 0.3337, "step": 47215 }, { "epoch": 4.800325335502237, "grad_norm": 0.2979663908481598, "learning_rate": 4.850173414147896e-08, "loss": 0.2748, "step": 47216 }, { "epoch": 4.8004270028466856, "grad_norm": 0.2913733124732971, "learning_rate": 4.845243585240189e-08, "loss": 0.3168, "step": 47217 }, { "epoch": 4.8005286701911345, "grad_norm": 0.24266894161701202, "learning_rate": 4.8403162508045e-08, "loss": 0.3041, "step": 47218 }, { "epoch": 4.800630337535583, "grad_norm": 0.27671051025390625, "learning_rate": 4.835391410865697e-08, "loss": 0.2952, "step": 47219 }, { "epoch": 4.800732004880032, "grad_norm": 0.24760222434997559, "learning_rate": 4.8304690654484844e-08, "loss": 0.3005, "step": 47220 }, { "epoch": 4.800833672224481, "grad_norm": 0.3018244504928589, "learning_rate": 4.8255492145776186e-08, "loss": 0.2708, "step": 47221 }, { "epoch": 4.80093533956893, "grad_norm": 0.2911583483219147, "learning_rate": 4.82063185827808e-08, "loss": 0.2909, "step": 47222 }, { "epoch": 4.801037006913379, "grad_norm": 0.28042569756507874, "learning_rate": 4.81571699657446e-08, "loss": 0.3009, "step": 47223 }, { "epoch": 4.801138674257828, "grad_norm": 0.2811630964279175, "learning_rate": 4.8108046294916277e-08, "loss": 0.2733, "step": 47224 }, { "epoch": 4.801240341602277, "grad_norm": 0.31377261877059937, "learning_rate": 4.80589475705423e-08, "loss": 0.2885, "step": 47225 }, { "epoch": 4.801342008946726, "grad_norm": 0.28265318274497986, "learning_rate": 4.800987379287081e-08, "loss": 0.2971, "step": 47226 }, { "epoch": 4.801443676291175, "grad_norm": 0.25386908650398254, "learning_rate": 4.7960824962149375e-08, "loss": 0.3018, "step": 47227 }, { "epoch": 4.801545343635624, "grad_norm": 0.2706495225429535, "learning_rate": 4.791180107862392e-08, "loss": 0.2566, "step": 47228 }, { "epoch": 4.801647010980073, "grad_norm": 0.27259156107902527, "learning_rate": 4.786280214254202e-08, "loss": 0.274, "step": 47229 }, { "epoch": 4.801748678324522, "grad_norm": 0.2735154628753662, "learning_rate": 4.78138281541507e-08, "loss": 0.3109, "step": 47230 }, { "epoch": 4.801850345668971, "grad_norm": 0.2592603266239166, "learning_rate": 4.776487911369643e-08, "loss": 0.2818, "step": 47231 }, { "epoch": 4.80195201301342, "grad_norm": 0.2956441640853882, "learning_rate": 4.7715955021425676e-08, "loss": 0.3331, "step": 47232 }, { "epoch": 4.802053680357869, "grad_norm": 0.28657013177871704, "learning_rate": 4.7667055877585466e-08, "loss": 0.2911, "step": 47233 }, { "epoch": 4.802155347702318, "grad_norm": 0.2659457325935364, "learning_rate": 4.7618181682421714e-08, "loss": 0.2901, "step": 47234 }, { "epoch": 4.8022570150467665, "grad_norm": 0.28141555190086365, "learning_rate": 4.756933243618034e-08, "loss": 0.3023, "step": 47235 }, { "epoch": 4.802358682391216, "grad_norm": 0.30126771330833435, "learning_rate": 4.75205081391078e-08, "loss": 0.2902, "step": 47236 }, { "epoch": 4.802460349735665, "grad_norm": 0.27640673518180847, "learning_rate": 4.7471708791450575e-08, "loss": 0.2808, "step": 47237 }, { "epoch": 4.802562017080114, "grad_norm": 0.26817557215690613, "learning_rate": 4.742293439345347e-08, "loss": 0.2854, "step": 47238 }, { "epoch": 4.802663684424563, "grad_norm": 0.2783786356449127, "learning_rate": 4.737418494536294e-08, "loss": 0.2811, "step": 47239 }, { "epoch": 4.802765351769012, "grad_norm": 0.2781177759170532, "learning_rate": 4.7325460447424365e-08, "loss": 0.2909, "step": 47240 }, { "epoch": 4.802867019113461, "grad_norm": 0.26681336760520935, "learning_rate": 4.727676089988309e-08, "loss": 0.2844, "step": 47241 }, { "epoch": 4.80296868645791, "grad_norm": 0.27706006169319153, "learning_rate": 4.722808630298448e-08, "loss": 0.3195, "step": 47242 }, { "epoch": 4.803070353802359, "grad_norm": 0.29046913981437683, "learning_rate": 4.717943665697389e-08, "loss": 0.3183, "step": 47243 }, { "epoch": 4.803172021146808, "grad_norm": 0.28697943687438965, "learning_rate": 4.713081196209668e-08, "loss": 0.2943, "step": 47244 }, { "epoch": 4.803273688491257, "grad_norm": 0.2975521385669708, "learning_rate": 4.70822122185971e-08, "loss": 0.3145, "step": 47245 }, { "epoch": 4.803375355835706, "grad_norm": 0.2858089208602905, "learning_rate": 4.7033637426719955e-08, "loss": 0.2715, "step": 47246 }, { "epoch": 4.803477023180155, "grad_norm": 0.2690706253051758, "learning_rate": 4.6985087586711166e-08, "loss": 0.3151, "step": 47247 }, { "epoch": 4.803578690524604, "grad_norm": 0.3030882775783539, "learning_rate": 4.693656269881386e-08, "loss": 0.2985, "step": 47248 }, { "epoch": 4.803680357869053, "grad_norm": 0.3056630790233612, "learning_rate": 4.68880627632734e-08, "loss": 0.2913, "step": 47249 }, { "epoch": 4.8037820252135015, "grad_norm": 0.26435452699661255, "learning_rate": 4.683958778033404e-08, "loss": 0.2815, "step": 47250 }, { "epoch": 4.8038836925579504, "grad_norm": 0.29302117228507996, "learning_rate": 4.679113775023891e-08, "loss": 0.2911, "step": 47251 }, { "epoch": 4.803985359902399, "grad_norm": 0.2752752900123596, "learning_rate": 4.6742712673233934e-08, "loss": 0.3063, "step": 47252 }, { "epoch": 4.804087027246848, "grad_norm": 0.3023190200328827, "learning_rate": 4.6694312549561694e-08, "loss": 0.267, "step": 47253 }, { "epoch": 4.804188694591297, "grad_norm": 0.285858690738678, "learning_rate": 4.6645937379466435e-08, "loss": 0.2692, "step": 47254 }, { "epoch": 4.804290361935746, "grad_norm": 0.2945820987224579, "learning_rate": 4.659758716319185e-08, "loss": 0.2984, "step": 47255 }, { "epoch": 4.804392029280195, "grad_norm": 0.2768661081790924, "learning_rate": 4.6549261900981636e-08, "loss": 0.3281, "step": 47256 }, { "epoch": 4.804493696624644, "grad_norm": 0.2588914632797241, "learning_rate": 4.650096159307893e-08, "loss": 0.321, "step": 47257 }, { "epoch": 4.804595363969093, "grad_norm": 0.28715160489082336, "learning_rate": 4.645268623972743e-08, "loss": 0.3234, "step": 47258 }, { "epoch": 4.804697031313542, "grad_norm": 0.2853516936302185, "learning_rate": 4.640443584117027e-08, "loss": 0.3113, "step": 47259 }, { "epoch": 4.804798698657991, "grad_norm": 0.26876288652420044, "learning_rate": 4.6356210397650035e-08, "loss": 0.2724, "step": 47260 }, { "epoch": 4.80490036600244, "grad_norm": 0.27719563245773315, "learning_rate": 4.630800990940987e-08, "loss": 0.2784, "step": 47261 }, { "epoch": 4.805002033346889, "grad_norm": 0.28654682636260986, "learning_rate": 4.625983437669346e-08, "loss": 0.2732, "step": 47262 }, { "epoch": 4.805103700691338, "grad_norm": 0.25722813606262207, "learning_rate": 4.621168379974228e-08, "loss": 0.3278, "step": 47263 }, { "epoch": 4.805205368035787, "grad_norm": 0.2899083197116852, "learning_rate": 4.6163558178800026e-08, "loss": 0.3029, "step": 47264 }, { "epoch": 4.805307035380236, "grad_norm": 0.3072196841239929, "learning_rate": 4.611545751410762e-08, "loss": 0.2913, "step": 47265 }, { "epoch": 4.805408702724685, "grad_norm": 0.2741663157939911, "learning_rate": 4.60673818059093e-08, "loss": 0.301, "step": 47266 }, { "epoch": 4.8055103700691335, "grad_norm": 0.26020801067352295, "learning_rate": 4.601933105444545e-08, "loss": 0.2953, "step": 47267 }, { "epoch": 4.8056120374135824, "grad_norm": 0.282870888710022, "learning_rate": 4.5971305259959186e-08, "loss": 0.2666, "step": 47268 }, { "epoch": 4.805713704758031, "grad_norm": 0.2653392255306244, "learning_rate": 4.592330442269255e-08, "loss": 0.2574, "step": 47269 }, { "epoch": 4.80581537210248, "grad_norm": 0.2923060357570648, "learning_rate": 4.587532854288701e-08, "loss": 0.2815, "step": 47270 }, { "epoch": 4.805917039446929, "grad_norm": 0.2688867449760437, "learning_rate": 4.582737762078404e-08, "loss": 0.2793, "step": 47271 }, { "epoch": 4.806018706791379, "grad_norm": 0.27346163988113403, "learning_rate": 4.5779451656625675e-08, "loss": 0.3163, "step": 47272 }, { "epoch": 4.806120374135828, "grad_norm": 0.2826465964317322, "learning_rate": 4.573155065065282e-08, "loss": 0.3042, "step": 47273 }, { "epoch": 4.806222041480277, "grad_norm": 0.2934112846851349, "learning_rate": 4.568367460310752e-08, "loss": 0.2994, "step": 47274 }, { "epoch": 4.806323708824726, "grad_norm": 0.2732071578502655, "learning_rate": 4.563582351423013e-08, "loss": 0.3164, "step": 47275 }, { "epoch": 4.806425376169175, "grad_norm": 0.31241732835769653, "learning_rate": 4.5587997384262115e-08, "loss": 0.2476, "step": 47276 }, { "epoch": 4.806527043513624, "grad_norm": 0.2897777855396271, "learning_rate": 4.554019621344496e-08, "loss": 0.2934, "step": 47277 }, { "epoch": 4.806628710858073, "grad_norm": 0.2842976450920105, "learning_rate": 4.5492420002018476e-08, "loss": 0.2653, "step": 47278 }, { "epoch": 4.806730378202522, "grad_norm": 0.2671264410018921, "learning_rate": 4.544466875022413e-08, "loss": 0.3031, "step": 47279 }, { "epoch": 4.806832045546971, "grad_norm": 0.28935712575912476, "learning_rate": 4.539694245830173e-08, "loss": 0.2998, "step": 47280 }, { "epoch": 4.80693371289142, "grad_norm": 0.29152554273605347, "learning_rate": 4.534924112649219e-08, "loss": 0.2906, "step": 47281 }, { "epoch": 4.8070353802358685, "grad_norm": 0.26838964223861694, "learning_rate": 4.530156475503589e-08, "loss": 0.2806, "step": 47282 }, { "epoch": 4.8071370475803175, "grad_norm": 0.302854061126709, "learning_rate": 4.5253913344173175e-08, "loss": 0.3398, "step": 47283 }, { "epoch": 4.807238714924766, "grad_norm": 0.26096242666244507, "learning_rate": 4.520628689414386e-08, "loss": 0.3026, "step": 47284 }, { "epoch": 4.807340382269215, "grad_norm": 0.2872250974178314, "learning_rate": 4.51586854051872e-08, "loss": 0.2948, "step": 47285 }, { "epoch": 4.807442049613664, "grad_norm": 0.3051671087741852, "learning_rate": 4.511110887754411e-08, "loss": 0.2996, "step": 47286 }, { "epoch": 4.807543716958113, "grad_norm": 0.2766726613044739, "learning_rate": 4.506355731145384e-08, "loss": 0.2854, "step": 47287 }, { "epoch": 4.807645384302562, "grad_norm": 0.2720559537410736, "learning_rate": 4.50160307071551e-08, "loss": 0.2865, "step": 47288 }, { "epoch": 4.807747051647011, "grad_norm": 0.26910829544067383, "learning_rate": 4.496852906488936e-08, "loss": 0.2826, "step": 47289 }, { "epoch": 4.80784871899146, "grad_norm": 0.27197688817977905, "learning_rate": 4.4921052384893636e-08, "loss": 0.3156, "step": 47290 }, { "epoch": 4.807950386335909, "grad_norm": 0.287531316280365, "learning_rate": 4.4873600667408315e-08, "loss": 0.3147, "step": 47291 }, { "epoch": 4.808052053680358, "grad_norm": 0.2856345474720001, "learning_rate": 4.4826173912672633e-08, "loss": 0.2658, "step": 47292 }, { "epoch": 4.808153721024807, "grad_norm": 0.28412193059921265, "learning_rate": 4.4778772120925296e-08, "loss": 0.3119, "step": 47293 }, { "epoch": 4.808255388369256, "grad_norm": 0.2896385192871094, "learning_rate": 4.4731395292404444e-08, "loss": 0.2771, "step": 47294 }, { "epoch": 4.808357055713705, "grad_norm": 0.29436472058296204, "learning_rate": 4.468404342734933e-08, "loss": 0.2911, "step": 47295 }, { "epoch": 4.808458723058154, "grad_norm": 0.2862078547477722, "learning_rate": 4.463671652599866e-08, "loss": 0.2923, "step": 47296 }, { "epoch": 4.808560390402603, "grad_norm": 0.2779068350791931, "learning_rate": 4.458941458859112e-08, "loss": 0.2642, "step": 47297 }, { "epoch": 4.808662057747052, "grad_norm": 0.27244821190834045, "learning_rate": 4.454213761536374e-08, "loss": 0.3273, "step": 47298 }, { "epoch": 4.8087637250915005, "grad_norm": 0.2609326243400574, "learning_rate": 4.4494885606555796e-08, "loss": 0.286, "step": 47299 }, { "epoch": 4.8088653924359495, "grad_norm": 0.27605241537094116, "learning_rate": 4.444765856240485e-08, "loss": 0.3172, "step": 47300 }, { "epoch": 4.808967059780398, "grad_norm": 0.29780814051628113, "learning_rate": 4.4400456483149056e-08, "loss": 0.3002, "step": 47301 }, { "epoch": 4.809068727124847, "grad_norm": 0.28792861104011536, "learning_rate": 4.4353279369026003e-08, "loss": 0.273, "step": 47302 }, { "epoch": 4.809170394469296, "grad_norm": 0.2794300317764282, "learning_rate": 4.4306127220273834e-08, "loss": 0.2769, "step": 47303 }, { "epoch": 4.809272061813745, "grad_norm": 0.29197216033935547, "learning_rate": 4.4259000037129576e-08, "loss": 0.311, "step": 47304 }, { "epoch": 4.809373729158194, "grad_norm": 0.29723891615867615, "learning_rate": 4.421189781983082e-08, "loss": 0.2928, "step": 47305 }, { "epoch": 4.809475396502643, "grad_norm": 0.29598256945610046, "learning_rate": 4.4164820568614595e-08, "loss": 0.3082, "step": 47306 }, { "epoch": 4.809577063847092, "grad_norm": 0.26395073533058167, "learning_rate": 4.41177682837185e-08, "loss": 0.3174, "step": 47307 }, { "epoch": 4.809678731191541, "grad_norm": 0.281436562538147, "learning_rate": 4.407074096537956e-08, "loss": 0.3239, "step": 47308 }, { "epoch": 4.80978039853599, "grad_norm": 0.2720002233982086, "learning_rate": 4.40237386138348e-08, "loss": 0.2833, "step": 47309 }, { "epoch": 4.809882065880439, "grad_norm": 0.2715150713920593, "learning_rate": 4.397676122932015e-08, "loss": 0.325, "step": 47310 }, { "epoch": 4.809983733224888, "grad_norm": 0.27454742789268494, "learning_rate": 4.392980881207265e-08, "loss": 0.2994, "step": 47311 }, { "epoch": 4.810085400569337, "grad_norm": 0.2554750442504883, "learning_rate": 4.388288136232988e-08, "loss": 0.272, "step": 47312 }, { "epoch": 4.810187067913786, "grad_norm": 0.27211421728134155, "learning_rate": 4.3835978880326645e-08, "loss": 0.3046, "step": 47313 }, { "epoch": 4.810288735258235, "grad_norm": 0.2840646803379059, "learning_rate": 4.378910136629999e-08, "loss": 0.2827, "step": 47314 }, { "epoch": 4.810390402602684, "grad_norm": 0.2764233350753784, "learning_rate": 4.374224882048639e-08, "loss": 0.3226, "step": 47315 }, { "epoch": 4.8104920699471325, "grad_norm": 0.2835717499256134, "learning_rate": 4.369542124312176e-08, "loss": 0.2585, "step": 47316 }, { "epoch": 4.8105937372915815, "grad_norm": 0.2984941005706787, "learning_rate": 4.3648618634441475e-08, "loss": 0.3043, "step": 47317 }, { "epoch": 4.810695404636031, "grad_norm": 0.2728932797908783, "learning_rate": 4.3601840994681455e-08, "loss": 0.2908, "step": 47318 }, { "epoch": 4.81079707198048, "grad_norm": 0.2844071686267853, "learning_rate": 4.355508832407818e-08, "loss": 0.2729, "step": 47319 }, { "epoch": 4.810898739324929, "grad_norm": 0.28138861060142517, "learning_rate": 4.35083606228659e-08, "loss": 0.2871, "step": 47320 }, { "epoch": 4.811000406669378, "grad_norm": 0.2893526554107666, "learning_rate": 4.346165789128054e-08, "loss": 0.2925, "step": 47321 }, { "epoch": 4.811102074013827, "grad_norm": 0.26513922214508057, "learning_rate": 4.3414980129558026e-08, "loss": 0.2638, "step": 47322 }, { "epoch": 4.811203741358276, "grad_norm": 0.2858467400074005, "learning_rate": 4.336832733793261e-08, "loss": 0.3038, "step": 47323 }, { "epoch": 4.811305408702725, "grad_norm": 0.265450656414032, "learning_rate": 4.3321699516640225e-08, "loss": 0.2773, "step": 47324 }, { "epoch": 4.811407076047174, "grad_norm": 0.26980018615722656, "learning_rate": 4.3275096665914565e-08, "loss": 0.268, "step": 47325 }, { "epoch": 4.811508743391623, "grad_norm": 0.269605815410614, "learning_rate": 4.3228518785991546e-08, "loss": 0.3093, "step": 47326 }, { "epoch": 4.811610410736072, "grad_norm": 0.26680347323417664, "learning_rate": 4.318196587710488e-08, "loss": 0.2994, "step": 47327 }, { "epoch": 4.811712078080521, "grad_norm": 0.29801714420318604, "learning_rate": 4.3135437939489934e-08, "loss": 0.2787, "step": 47328 }, { "epoch": 4.81181374542497, "grad_norm": 0.27030718326568604, "learning_rate": 4.3088934973380956e-08, "loss": 0.3031, "step": 47329 }, { "epoch": 4.811915412769419, "grad_norm": 0.28561776876449585, "learning_rate": 4.3042456979011105e-08, "loss": 0.3035, "step": 47330 }, { "epoch": 4.8120170801138675, "grad_norm": 0.27147307991981506, "learning_rate": 4.299600395661574e-08, "loss": 0.3107, "step": 47331 }, { "epoch": 4.8121187474583165, "grad_norm": 0.2928285300731659, "learning_rate": 4.294957590642912e-08, "loss": 0.2857, "step": 47332 }, { "epoch": 4.812220414802765, "grad_norm": 0.2855457365512848, "learning_rate": 4.290317282868439e-08, "loss": 0.2912, "step": 47333 }, { "epoch": 4.812322082147214, "grad_norm": 0.2728137671947479, "learning_rate": 4.2856794723615256e-08, "loss": 0.3232, "step": 47334 }, { "epoch": 4.812423749491663, "grad_norm": 0.2734616696834564, "learning_rate": 4.281044159145542e-08, "loss": 0.303, "step": 47335 }, { "epoch": 4.812525416836112, "grad_norm": 0.2951783537864685, "learning_rate": 4.276411343243858e-08, "loss": 0.3014, "step": 47336 }, { "epoch": 4.812627084180561, "grad_norm": 0.2899837791919708, "learning_rate": 4.271781024679844e-08, "loss": 0.2879, "step": 47337 }, { "epoch": 4.81272875152501, "grad_norm": 0.2652095854282379, "learning_rate": 4.2671532034767595e-08, "loss": 0.2931, "step": 47338 }, { "epoch": 4.812830418869459, "grad_norm": 0.2758394479751587, "learning_rate": 4.262527879658029e-08, "loss": 0.3034, "step": 47339 }, { "epoch": 4.812932086213908, "grad_norm": 0.2649509906768799, "learning_rate": 4.257905053246802e-08, "loss": 0.255, "step": 47340 }, { "epoch": 4.813033753558357, "grad_norm": 0.26310214400291443, "learning_rate": 4.2532847242664486e-08, "loss": 0.3046, "step": 47341 }, { "epoch": 4.813135420902806, "grad_norm": 0.2794552445411682, "learning_rate": 4.248666892740283e-08, "loss": 0.3144, "step": 47342 }, { "epoch": 4.813237088247255, "grad_norm": 0.2849773168563843, "learning_rate": 4.244051558691453e-08, "loss": 0.2674, "step": 47343 }, { "epoch": 4.813338755591704, "grad_norm": 0.2887606918811798, "learning_rate": 4.2394387221433295e-08, "loss": 0.2947, "step": 47344 }, { "epoch": 4.813440422936153, "grad_norm": 0.26908016204833984, "learning_rate": 4.234828383119116e-08, "loss": 0.2955, "step": 47345 }, { "epoch": 4.813542090280602, "grad_norm": 0.26599615812301636, "learning_rate": 4.2302205416420164e-08, "loss": 0.272, "step": 47346 }, { "epoch": 4.813643757625051, "grad_norm": 0.31315305829048157, "learning_rate": 4.225615197735233e-08, "loss": 0.3064, "step": 47347 }, { "epoch": 4.8137454249694995, "grad_norm": 0.2820567786693573, "learning_rate": 4.2210123514219715e-08, "loss": 0.2966, "step": 47348 }, { "epoch": 4.8138470923139485, "grad_norm": 0.3063001334667206, "learning_rate": 4.21641200272549e-08, "loss": 0.2988, "step": 47349 }, { "epoch": 4.813948759658397, "grad_norm": 0.2800694406032562, "learning_rate": 4.21181415166888e-08, "loss": 0.3059, "step": 47350 }, { "epoch": 4.814050427002846, "grad_norm": 0.2647712528705597, "learning_rate": 4.207218798275292e-08, "loss": 0.2897, "step": 47351 }, { "epoch": 4.814152094347295, "grad_norm": 0.2854072153568268, "learning_rate": 4.202625942567984e-08, "loss": 0.2932, "step": 47352 }, { "epoch": 4.814253761691744, "grad_norm": 0.28871750831604004, "learning_rate": 4.198035584569993e-08, "loss": 0.2952, "step": 47353 }, { "epoch": 4.814355429036194, "grad_norm": 0.2628060281276703, "learning_rate": 4.193447724304467e-08, "loss": 0.2775, "step": 47354 }, { "epoch": 4.814457096380643, "grad_norm": 0.29687806963920593, "learning_rate": 4.188862361794554e-08, "loss": 0.2782, "step": 47355 }, { "epoch": 4.814558763725092, "grad_norm": 0.3030003011226654, "learning_rate": 4.184279497063293e-08, "loss": 0.2854, "step": 47356 }, { "epoch": 4.814660431069541, "grad_norm": 0.27238166332244873, "learning_rate": 4.1796991301338854e-08, "loss": 0.3007, "step": 47357 }, { "epoch": 4.81476209841399, "grad_norm": 0.2713262140750885, "learning_rate": 4.175121261029258e-08, "loss": 0.2969, "step": 47358 }, { "epoch": 4.814863765758439, "grad_norm": 0.2870499789714813, "learning_rate": 4.170545889772615e-08, "loss": 0.2926, "step": 47359 }, { "epoch": 4.814965433102888, "grad_norm": 0.2747756242752075, "learning_rate": 4.165973016386826e-08, "loss": 0.2556, "step": 47360 }, { "epoch": 4.815067100447337, "grad_norm": 0.27186867594718933, "learning_rate": 4.161402640895096e-08, "loss": 0.3229, "step": 47361 }, { "epoch": 4.815168767791786, "grad_norm": 0.24682599306106567, "learning_rate": 4.156834763320405e-08, "loss": 0.2971, "step": 47362 }, { "epoch": 4.8152704351362345, "grad_norm": 0.2956978976726532, "learning_rate": 4.152269383685737e-08, "loss": 0.2923, "step": 47363 }, { "epoch": 4.8153721024806835, "grad_norm": 0.25785624980926514, "learning_rate": 4.147706502014126e-08, "loss": 0.3027, "step": 47364 }, { "epoch": 4.815473769825132, "grad_norm": 0.2619728446006775, "learning_rate": 4.1431461183285006e-08, "loss": 0.2713, "step": 47365 }, { "epoch": 4.815575437169581, "grad_norm": 0.26840344071388245, "learning_rate": 4.138588232651841e-08, "loss": 0.2863, "step": 47366 }, { "epoch": 4.81567710451403, "grad_norm": 0.2670869827270508, "learning_rate": 4.134032845007241e-08, "loss": 0.2676, "step": 47367 }, { "epoch": 4.815778771858479, "grad_norm": 0.296452134847641, "learning_rate": 4.1294799554174594e-08, "loss": 0.3053, "step": 47368 }, { "epoch": 4.815880439202928, "grad_norm": 0.2720869183540344, "learning_rate": 4.1249295639055884e-08, "loss": 0.3258, "step": 47369 }, { "epoch": 4.815982106547377, "grad_norm": 0.28411924839019775, "learning_rate": 4.120381670494389e-08, "loss": 0.3109, "step": 47370 }, { "epoch": 4.816083773891826, "grad_norm": 0.2828728258609772, "learning_rate": 4.115836275206952e-08, "loss": 0.2636, "step": 47371 }, { "epoch": 4.816185441236275, "grad_norm": 0.2855026125907898, "learning_rate": 4.111293378066095e-08, "loss": 0.3015, "step": 47372 }, { "epoch": 4.816287108580724, "grad_norm": 0.28015872836112976, "learning_rate": 4.10675297909463e-08, "loss": 0.3011, "step": 47373 }, { "epoch": 4.816388775925173, "grad_norm": 0.27771201729774475, "learning_rate": 4.102215078315541e-08, "loss": 0.2996, "step": 47374 }, { "epoch": 4.816490443269622, "grad_norm": 0.27099329233169556, "learning_rate": 4.0976796757516426e-08, "loss": 0.2845, "step": 47375 }, { "epoch": 4.816592110614071, "grad_norm": 0.2607892155647278, "learning_rate": 4.093146771425804e-08, "loss": 0.2779, "step": 47376 }, { "epoch": 4.81669377795852, "grad_norm": 0.28325772285461426, "learning_rate": 4.088616365360842e-08, "loss": 0.2866, "step": 47377 }, { "epoch": 4.816795445302969, "grad_norm": 0.293424129486084, "learning_rate": 4.084088457579572e-08, "loss": 0.2855, "step": 47378 }, { "epoch": 4.816897112647418, "grad_norm": 0.2801510989665985, "learning_rate": 4.0795630481048624e-08, "loss": 0.3033, "step": 47379 }, { "epoch": 4.8169987799918665, "grad_norm": 0.27268731594085693, "learning_rate": 4.075040136959418e-08, "loss": 0.2982, "step": 47380 }, { "epoch": 4.8171004473363155, "grad_norm": 0.2935836911201477, "learning_rate": 4.070519724166056e-08, "loss": 0.2888, "step": 47381 }, { "epoch": 4.817202114680764, "grad_norm": 0.2787972688674927, "learning_rate": 4.0660018097475884e-08, "loss": 0.2879, "step": 47382 }, { "epoch": 4.817303782025213, "grad_norm": 0.2774066925048828, "learning_rate": 4.061486393726777e-08, "loss": 0.2827, "step": 47383 }, { "epoch": 4.817405449369662, "grad_norm": 0.2783544361591339, "learning_rate": 4.0569734761263245e-08, "loss": 0.3117, "step": 47384 }, { "epoch": 4.817507116714111, "grad_norm": 0.2763867974281311, "learning_rate": 4.052463056968992e-08, "loss": 0.2691, "step": 47385 }, { "epoch": 4.81760878405856, "grad_norm": 0.28271883726119995, "learning_rate": 4.047955136277426e-08, "loss": 0.2815, "step": 47386 }, { "epoch": 4.817710451403009, "grad_norm": 0.2698878049850464, "learning_rate": 4.0434497140744986e-08, "loss": 0.2813, "step": 47387 }, { "epoch": 4.817812118747458, "grad_norm": 0.26805469393730164, "learning_rate": 4.038946790382803e-08, "loss": 0.314, "step": 47388 }, { "epoch": 4.817913786091907, "grad_norm": 0.2592840790748596, "learning_rate": 4.0344463652249865e-08, "loss": 0.3041, "step": 47389 }, { "epoch": 4.818015453436356, "grad_norm": 0.26789307594299316, "learning_rate": 4.0299484386238095e-08, "loss": 0.2842, "step": 47390 }, { "epoch": 4.818117120780805, "grad_norm": 0.2812808156013489, "learning_rate": 4.0254530106018655e-08, "loss": 0.2879, "step": 47391 }, { "epoch": 4.818218788125254, "grad_norm": 0.2671055197715759, "learning_rate": 4.020960081181857e-08, "loss": 0.2821, "step": 47392 }, { "epoch": 4.818320455469703, "grad_norm": 0.27562761306762695, "learning_rate": 4.016469650386379e-08, "loss": 0.2899, "step": 47393 }, { "epoch": 4.818422122814152, "grad_norm": 0.3012910485267639, "learning_rate": 4.0119817182380225e-08, "loss": 0.3131, "step": 47394 }, { "epoch": 4.818523790158601, "grad_norm": 0.26963257789611816, "learning_rate": 4.0074962847594935e-08, "loss": 0.2921, "step": 47395 }, { "epoch": 4.81862545750305, "grad_norm": 0.3142354190349579, "learning_rate": 4.0030133499732724e-08, "loss": 0.287, "step": 47396 }, { "epoch": 4.8187271248474985, "grad_norm": 0.2630466818809509, "learning_rate": 3.9985329139020645e-08, "loss": 0.3016, "step": 47397 }, { "epoch": 4.8188287921919475, "grad_norm": 0.28453773260116577, "learning_rate": 3.994054976568351e-08, "loss": 0.2566, "step": 47398 }, { "epoch": 4.818930459536396, "grad_norm": 0.29152023792266846, "learning_rate": 3.9895795379947256e-08, "loss": 0.3062, "step": 47399 }, { "epoch": 4.819032126880846, "grad_norm": 0.27307724952697754, "learning_rate": 3.985106598203725e-08, "loss": 0.2935, "step": 47400 }, { "epoch": 4.819133794225295, "grad_norm": 0.2775375545024872, "learning_rate": 3.9806361572178874e-08, "loss": 0.3123, "step": 47401 }, { "epoch": 4.819235461569744, "grad_norm": 0.256734699010849, "learning_rate": 3.97616821505975e-08, "loss": 0.2797, "step": 47402 }, { "epoch": 4.819337128914193, "grad_norm": 0.2697471082210541, "learning_rate": 3.971702771751795e-08, "loss": 0.2801, "step": 47403 }, { "epoch": 4.819438796258642, "grad_norm": 0.2696388065814972, "learning_rate": 3.967239827316505e-08, "loss": 0.3005, "step": 47404 }, { "epoch": 4.819540463603091, "grad_norm": 0.27457350492477417, "learning_rate": 3.962779381776416e-08, "loss": 0.3184, "step": 47405 }, { "epoch": 4.81964213094754, "grad_norm": 0.26792973279953003, "learning_rate": 3.958321435153956e-08, "loss": 0.2883, "step": 47406 }, { "epoch": 4.819743798291989, "grad_norm": 0.2920264005661011, "learning_rate": 3.95386598747155e-08, "loss": 0.3091, "step": 47407 }, { "epoch": 4.819845465636438, "grad_norm": 0.26357367634773254, "learning_rate": 3.949413038751737e-08, "loss": 0.3097, "step": 47408 }, { "epoch": 4.819947132980887, "grad_norm": 0.3109276592731476, "learning_rate": 3.9449625890168876e-08, "loss": 0.2857, "step": 47409 }, { "epoch": 4.820048800325336, "grad_norm": 0.29679784178733826, "learning_rate": 3.940514638289428e-08, "loss": 0.2867, "step": 47410 }, { "epoch": 4.820150467669785, "grad_norm": 0.2788788080215454, "learning_rate": 3.93606918659184e-08, "loss": 0.28, "step": 47411 }, { "epoch": 4.8202521350142336, "grad_norm": 0.2601011097431183, "learning_rate": 3.931626233946384e-08, "loss": 0.3235, "step": 47412 }, { "epoch": 4.8203538023586825, "grad_norm": 0.2651963531970978, "learning_rate": 3.9271857803754864e-08, "loss": 0.2875, "step": 47413 }, { "epoch": 4.820455469703131, "grad_norm": 0.2762024998664856, "learning_rate": 3.9227478259016294e-08, "loss": 0.2897, "step": 47414 }, { "epoch": 4.82055713704758, "grad_norm": 0.28321120142936707, "learning_rate": 3.9183123705470174e-08, "loss": 0.2923, "step": 47415 }, { "epoch": 4.820658804392029, "grad_norm": 0.2762024700641632, "learning_rate": 3.913879414334076e-08, "loss": 0.2638, "step": 47416 }, { "epoch": 4.820760471736478, "grad_norm": 0.2727402150630951, "learning_rate": 3.909448957285123e-08, "loss": 0.2808, "step": 47417 }, { "epoch": 4.820862139080927, "grad_norm": 0.26691561937332153, "learning_rate": 3.905020999422471e-08, "loss": 0.2798, "step": 47418 }, { "epoch": 4.820963806425376, "grad_norm": 0.2911287844181061, "learning_rate": 3.900595540768437e-08, "loss": 0.3002, "step": 47419 }, { "epoch": 4.821065473769825, "grad_norm": 0.2909146845340729, "learning_rate": 3.896172581345337e-08, "loss": 0.2828, "step": 47420 }, { "epoch": 4.821167141114274, "grad_norm": 0.28286561369895935, "learning_rate": 3.891752121175374e-08, "loss": 0.2951, "step": 47421 }, { "epoch": 4.821268808458723, "grad_norm": 0.30464479327201843, "learning_rate": 3.88733416028092e-08, "loss": 0.2631, "step": 47422 }, { "epoch": 4.821370475803172, "grad_norm": 0.2717580199241638, "learning_rate": 3.882918698684179e-08, "loss": 0.3098, "step": 47423 }, { "epoch": 4.821472143147621, "grad_norm": 0.2675895690917969, "learning_rate": 3.878505736407356e-08, "loss": 0.2934, "step": 47424 }, { "epoch": 4.82157381049207, "grad_norm": 0.27348652482032776, "learning_rate": 3.874095273472767e-08, "loss": 0.2938, "step": 47425 }, { "epoch": 4.821675477836519, "grad_norm": 0.2745998501777649, "learning_rate": 3.869687309902559e-08, "loss": 0.2661, "step": 47426 }, { "epoch": 4.821777145180968, "grad_norm": 0.3046276569366455, "learning_rate": 3.8652818457189935e-08, "loss": 0.2875, "step": 47427 }, { "epoch": 4.821878812525417, "grad_norm": 0.29363295435905457, "learning_rate": 3.860878880944219e-08, "loss": 0.2915, "step": 47428 }, { "epoch": 4.8219804798698656, "grad_norm": 0.31069451570510864, "learning_rate": 3.8564784156004396e-08, "loss": 0.2886, "step": 47429 }, { "epoch": 4.8220821472143145, "grad_norm": 0.3007517158985138, "learning_rate": 3.8520804497098055e-08, "loss": 0.3044, "step": 47430 }, { "epoch": 4.822183814558763, "grad_norm": 0.2805578112602234, "learning_rate": 3.84768498329452e-08, "loss": 0.2864, "step": 47431 }, { "epoch": 4.822285481903212, "grad_norm": 0.26840928196907043, "learning_rate": 3.8432920163766765e-08, "loss": 0.2836, "step": 47432 }, { "epoch": 4.822387149247661, "grad_norm": 0.26185211539268494, "learning_rate": 3.8389015489784244e-08, "loss": 0.2624, "step": 47433 }, { "epoch": 4.82248881659211, "grad_norm": 0.298345685005188, "learning_rate": 3.834513581121913e-08, "loss": 0.2948, "step": 47434 }, { "epoch": 4.822590483936559, "grad_norm": 0.27520862221717834, "learning_rate": 3.830128112829179e-08, "loss": 0.2974, "step": 47435 }, { "epoch": 4.822692151281009, "grad_norm": 0.29211682081222534, "learning_rate": 3.825745144122373e-08, "loss": 0.2743, "step": 47436 }, { "epoch": 4.822793818625458, "grad_norm": 0.29211148619651794, "learning_rate": 3.821364675023476e-08, "loss": 0.2988, "step": 47437 }, { "epoch": 4.822895485969907, "grad_norm": 0.2818763852119446, "learning_rate": 3.8169867055547485e-08, "loss": 0.3153, "step": 47438 }, { "epoch": 4.822997153314356, "grad_norm": 0.2845359444618225, "learning_rate": 3.812611235738062e-08, "loss": 0.2754, "step": 47439 }, { "epoch": 4.823098820658805, "grad_norm": 0.28607019782066345, "learning_rate": 3.80823826559551e-08, "loss": 0.3536, "step": 47440 }, { "epoch": 4.823200488003254, "grad_norm": 0.28578200936317444, "learning_rate": 3.8038677951492406e-08, "loss": 0.3111, "step": 47441 }, { "epoch": 4.823302155347703, "grad_norm": 0.2767212688922882, "learning_rate": 3.799499824421071e-08, "loss": 0.2822, "step": 47442 }, { "epoch": 4.823403822692152, "grad_norm": 0.25085267424583435, "learning_rate": 3.795134353433094e-08, "loss": 0.3052, "step": 47443 }, { "epoch": 4.823505490036601, "grad_norm": 0.28453904390335083, "learning_rate": 3.790771382207403e-08, "loss": 0.2811, "step": 47444 }, { "epoch": 4.8236071573810495, "grad_norm": 0.2817996144294739, "learning_rate": 3.7864109107658145e-08, "loss": 0.3027, "step": 47445 }, { "epoch": 4.8237088247254984, "grad_norm": 0.30341240763664246, "learning_rate": 3.7820529391304205e-08, "loss": 0.277, "step": 47446 }, { "epoch": 4.823810492069947, "grad_norm": 0.28619131445884705, "learning_rate": 3.777697467323094e-08, "loss": 0.2746, "step": 47447 }, { "epoch": 4.823912159414396, "grad_norm": 0.28429898619651794, "learning_rate": 3.773344495365816e-08, "loss": 0.3068, "step": 47448 }, { "epoch": 4.824013826758845, "grad_norm": 0.2802497148513794, "learning_rate": 3.768994023280514e-08, "loss": 0.2774, "step": 47449 }, { "epoch": 4.824115494103294, "grad_norm": 0.29045215249061584, "learning_rate": 3.76464605108906e-08, "loss": 0.2766, "step": 47450 }, { "epoch": 4.824217161447743, "grad_norm": 0.2703644931316376, "learning_rate": 3.760300578813436e-08, "loss": 0.2885, "step": 47451 }, { "epoch": 4.824318828792192, "grad_norm": 0.2863353490829468, "learning_rate": 3.755957606475513e-08, "loss": 0.3006, "step": 47452 }, { "epoch": 4.824420496136641, "grad_norm": 0.28365257382392883, "learning_rate": 3.751617134097052e-08, "loss": 0.3151, "step": 47453 }, { "epoch": 4.82452216348109, "grad_norm": 0.2824072241783142, "learning_rate": 3.7472791617000905e-08, "loss": 0.2991, "step": 47454 }, { "epoch": 4.824623830825539, "grad_norm": 0.26833197474479675, "learning_rate": 3.742943689306444e-08, "loss": 0.2684, "step": 47455 }, { "epoch": 4.824725498169988, "grad_norm": 0.2833849787712097, "learning_rate": 3.7386107169378736e-08, "loss": 0.2942, "step": 47456 }, { "epoch": 4.824827165514437, "grad_norm": 0.306844025850296, "learning_rate": 3.734280244616251e-08, "loss": 0.2771, "step": 47457 }, { "epoch": 4.824928832858886, "grad_norm": 0.30802100896835327, "learning_rate": 3.729952272363391e-08, "loss": 0.2896, "step": 47458 }, { "epoch": 4.825030500203335, "grad_norm": 0.3012290596961975, "learning_rate": 3.72562680020111e-08, "loss": 0.2737, "step": 47459 }, { "epoch": 4.825132167547784, "grad_norm": 0.27524203062057495, "learning_rate": 3.721303828151168e-08, "loss": 0.2875, "step": 47460 }, { "epoch": 4.825233834892233, "grad_norm": 0.28061309456825256, "learning_rate": 3.7169833562354376e-08, "loss": 0.3022, "step": 47461 }, { "epoch": 4.8253355022366815, "grad_norm": 0.3000858426094055, "learning_rate": 3.7126653844755114e-08, "loss": 0.2768, "step": 47462 }, { "epoch": 4.8254371695811304, "grad_norm": 0.286015123128891, "learning_rate": 3.708349912893261e-08, "loss": 0.249, "step": 47463 }, { "epoch": 4.825538836925579, "grad_norm": 0.2707328796386719, "learning_rate": 3.704036941510447e-08, "loss": 0.2938, "step": 47464 }, { "epoch": 4.825640504270028, "grad_norm": 0.27743595838546753, "learning_rate": 3.699726470348719e-08, "loss": 0.2959, "step": 47465 }, { "epoch": 4.825742171614477, "grad_norm": 0.28321510553359985, "learning_rate": 3.695418499429892e-08, "loss": 0.3023, "step": 47466 }, { "epoch": 4.825843838958926, "grad_norm": 0.283812016248703, "learning_rate": 3.691113028775506e-08, "loss": 0.2805, "step": 47467 }, { "epoch": 4.825945506303375, "grad_norm": 0.28298598527908325, "learning_rate": 3.6868100584074304e-08, "loss": 0.283, "step": 47468 }, { "epoch": 4.826047173647824, "grad_norm": 0.2976984977722168, "learning_rate": 3.6825095883472603e-08, "loss": 0.2703, "step": 47469 }, { "epoch": 4.826148840992273, "grad_norm": 0.2875056862831116, "learning_rate": 3.678211618616645e-08, "loss": 0.3148, "step": 47470 }, { "epoch": 4.826250508336722, "grad_norm": 0.2829713225364685, "learning_rate": 3.673916149237289e-08, "loss": 0.273, "step": 47471 }, { "epoch": 4.826352175681171, "grad_norm": 0.30220580101013184, "learning_rate": 3.66962318023073e-08, "loss": 0.2901, "step": 47472 }, { "epoch": 4.82645384302562, "grad_norm": 0.2856178283691406, "learning_rate": 3.665332711618674e-08, "loss": 0.278, "step": 47473 }, { "epoch": 4.826555510370069, "grad_norm": 0.27262237668037415, "learning_rate": 3.6610447434227703e-08, "loss": 0.2916, "step": 47474 }, { "epoch": 4.826657177714518, "grad_norm": 0.2786535322666168, "learning_rate": 3.656759275664557e-08, "loss": 0.3121, "step": 47475 }, { "epoch": 4.826758845058967, "grad_norm": 0.26708874106407166, "learning_rate": 3.652476308365627e-08, "loss": 0.3014, "step": 47476 }, { "epoch": 4.826860512403416, "grad_norm": 0.2998151481151581, "learning_rate": 3.6481958415476306e-08, "loss": 0.3158, "step": 47477 }, { "epoch": 4.826962179747865, "grad_norm": 0.27636635303497314, "learning_rate": 3.643917875231995e-08, "loss": 0.3092, "step": 47478 }, { "epoch": 4.8270638470923135, "grad_norm": 0.28982219099998474, "learning_rate": 3.6396424094403695e-08, "loss": 0.3132, "step": 47479 }, { "epoch": 4.8271655144367625, "grad_norm": 0.2805336117744446, "learning_rate": 3.635369444194237e-08, "loss": 0.2838, "step": 47480 }, { "epoch": 4.827267181781211, "grad_norm": 0.28283852338790894, "learning_rate": 3.6310989795152464e-08, "loss": 0.2678, "step": 47481 }, { "epoch": 4.827368849125661, "grad_norm": 0.2869645357131958, "learning_rate": 3.62683101542477e-08, "loss": 0.2901, "step": 47482 }, { "epoch": 4.82747051647011, "grad_norm": 0.2789100408554077, "learning_rate": 3.622565551944346e-08, "loss": 0.2932, "step": 47483 }, { "epoch": 4.827572183814559, "grad_norm": 0.2679777145385742, "learning_rate": 3.618302589095568e-08, "loss": 0.3138, "step": 47484 }, { "epoch": 4.827673851159008, "grad_norm": 0.28418850898742676, "learning_rate": 3.6140421268996974e-08, "loss": 0.2761, "step": 47485 }, { "epoch": 4.827775518503457, "grad_norm": 0.3034915328025818, "learning_rate": 3.609784165378438e-08, "loss": 0.2989, "step": 47486 }, { "epoch": 4.827877185847906, "grad_norm": 0.2852293848991394, "learning_rate": 3.605528704553052e-08, "loss": 0.3065, "step": 47487 }, { "epoch": 4.827978853192355, "grad_norm": 0.2906368374824524, "learning_rate": 3.6012757444450765e-08, "loss": 0.27, "step": 47488 }, { "epoch": 4.828080520536804, "grad_norm": 0.27750709652900696, "learning_rate": 3.597025285075884e-08, "loss": 0.264, "step": 47489 }, { "epoch": 4.828182187881253, "grad_norm": 0.28485816717147827, "learning_rate": 3.5927773264669566e-08, "loss": 0.3126, "step": 47490 }, { "epoch": 4.828283855225702, "grad_norm": 0.282292902469635, "learning_rate": 3.5885318686396665e-08, "loss": 0.2523, "step": 47491 }, { "epoch": 4.828385522570151, "grad_norm": 0.28026390075683594, "learning_rate": 3.5842889116153304e-08, "loss": 0.2761, "step": 47492 }, { "epoch": 4.8284871899146, "grad_norm": 0.2964487671852112, "learning_rate": 3.580048455415375e-08, "loss": 0.2534, "step": 47493 }, { "epoch": 4.8285888572590485, "grad_norm": 0.2775777578353882, "learning_rate": 3.575810500061228e-08, "loss": 0.2958, "step": 47494 }, { "epoch": 4.8286905246034975, "grad_norm": 0.29090338945388794, "learning_rate": 3.571575045574149e-08, "loss": 0.2815, "step": 47495 }, { "epoch": 4.828792191947946, "grad_norm": 0.2832001745700836, "learning_rate": 3.567342091975512e-08, "loss": 0.2895, "step": 47496 }, { "epoch": 4.828893859292395, "grad_norm": 0.28005385398864746, "learning_rate": 3.5631116392866316e-08, "loss": 0.2904, "step": 47497 }, { "epoch": 4.828995526636844, "grad_norm": 0.3159080147743225, "learning_rate": 3.558883687528769e-08, "loss": 0.275, "step": 47498 }, { "epoch": 4.829097193981293, "grad_norm": 0.2853793203830719, "learning_rate": 3.5546582367233516e-08, "loss": 0.3158, "step": 47499 }, { "epoch": 4.829198861325742, "grad_norm": 0.28805044293403625, "learning_rate": 3.550435286891585e-08, "loss": 0.2803, "step": 47500 }, { "epoch": 4.829300528670191, "grad_norm": 0.29928120970726013, "learning_rate": 3.546214838054729e-08, "loss": 0.2609, "step": 47501 }, { "epoch": 4.82940219601464, "grad_norm": 0.28783783316612244, "learning_rate": 3.541996890234045e-08, "loss": 0.2925, "step": 47502 }, { "epoch": 4.829503863359089, "grad_norm": 0.26387155055999756, "learning_rate": 3.5377814434508494e-08, "loss": 0.2902, "step": 47503 }, { "epoch": 4.829605530703538, "grad_norm": 0.26871082186698914, "learning_rate": 3.5335684977263474e-08, "loss": 0.2951, "step": 47504 }, { "epoch": 4.829707198047987, "grad_norm": 0.2701314389705658, "learning_rate": 3.5293580530816884e-08, "loss": 0.3041, "step": 47505 }, { "epoch": 4.829808865392436, "grad_norm": 0.2578224837779999, "learning_rate": 3.525150109538189e-08, "loss": 0.3203, "step": 47506 }, { "epoch": 4.829910532736885, "grad_norm": 0.2722906172275543, "learning_rate": 3.5209446671169986e-08, "loss": 0.3009, "step": 47507 }, { "epoch": 4.830012200081334, "grad_norm": 0.27607500553131104, "learning_rate": 3.516741725839268e-08, "loss": 0.3016, "step": 47508 }, { "epoch": 4.830113867425783, "grad_norm": 0.29786914587020874, "learning_rate": 3.512541285726256e-08, "loss": 0.2785, "step": 47509 }, { "epoch": 4.830215534770232, "grad_norm": 0.2883390188217163, "learning_rate": 3.508343346799059e-08, "loss": 0.3316, "step": 47510 }, { "epoch": 4.8303172021146805, "grad_norm": 0.29041656851768494, "learning_rate": 3.504147909078881e-08, "loss": 0.2798, "step": 47511 }, { "epoch": 4.8304188694591295, "grad_norm": 0.2849368453025818, "learning_rate": 3.499954972586761e-08, "loss": 0.2715, "step": 47512 }, { "epoch": 4.830520536803578, "grad_norm": 0.2804689407348633, "learning_rate": 3.4957645373438487e-08, "loss": 0.2806, "step": 47513 }, { "epoch": 4.830622204148027, "grad_norm": 0.29381534457206726, "learning_rate": 3.491576603371405e-08, "loss": 0.3099, "step": 47514 }, { "epoch": 4.830723871492476, "grad_norm": 0.2818770706653595, "learning_rate": 3.487391170690302e-08, "loss": 0.3228, "step": 47515 }, { "epoch": 4.830825538836925, "grad_norm": 0.2804092764854431, "learning_rate": 3.4832082393218e-08, "loss": 0.3259, "step": 47516 }, { "epoch": 4.830927206181374, "grad_norm": 0.26004815101623535, "learning_rate": 3.4790278092868836e-08, "loss": 0.2767, "step": 47517 }, { "epoch": 4.831028873525824, "grad_norm": 0.3072342276573181, "learning_rate": 3.474849880606646e-08, "loss": 0.2785, "step": 47518 }, { "epoch": 4.831130540870273, "grad_norm": 0.27224260568618774, "learning_rate": 3.47067445330207e-08, "loss": 0.2995, "step": 47519 }, { "epoch": 4.831232208214722, "grad_norm": 0.2629374861717224, "learning_rate": 3.4665015273943056e-08, "loss": 0.2823, "step": 47520 }, { "epoch": 4.831333875559171, "grad_norm": 0.2718102037906647, "learning_rate": 3.4623311029042816e-08, "loss": 0.2921, "step": 47521 }, { "epoch": 4.83143554290362, "grad_norm": 0.2637743353843689, "learning_rate": 3.4581631798530355e-08, "loss": 0.3295, "step": 47522 }, { "epoch": 4.831537210248069, "grad_norm": 0.2829582393169403, "learning_rate": 3.453997758261607e-08, "loss": 0.2849, "step": 47523 }, { "epoch": 4.831638877592518, "grad_norm": 0.30400124192237854, "learning_rate": 3.4498348381509225e-08, "loss": 0.2767, "step": 47524 }, { "epoch": 4.831740544936967, "grad_norm": 0.2883583605289459, "learning_rate": 3.445674419541911e-08, "loss": 0.2732, "step": 47525 }, { "epoch": 4.8318422122814155, "grad_norm": 0.2798764407634735, "learning_rate": 3.441516502455666e-08, "loss": 0.2878, "step": 47526 }, { "epoch": 4.8319438796258645, "grad_norm": 0.28472664952278137, "learning_rate": 3.437361086913005e-08, "loss": 0.2866, "step": 47527 }, { "epoch": 4.832045546970313, "grad_norm": 0.2625463902950287, "learning_rate": 3.433208172934965e-08, "loss": 0.2991, "step": 47528 }, { "epoch": 4.832147214314762, "grad_norm": 0.28537794947624207, "learning_rate": 3.4290577605424204e-08, "loss": 0.2662, "step": 47529 }, { "epoch": 4.832248881659211, "grad_norm": 0.3010108172893524, "learning_rate": 3.4249098497562416e-08, "loss": 0.2821, "step": 47530 }, { "epoch": 4.83235054900366, "grad_norm": 0.28029417991638184, "learning_rate": 3.420764440597413e-08, "loss": 0.3088, "step": 47531 }, { "epoch": 4.832452216348109, "grad_norm": 0.2747470736503601, "learning_rate": 3.4166215330866946e-08, "loss": 0.3061, "step": 47532 }, { "epoch": 4.832553883692558, "grad_norm": 0.27861666679382324, "learning_rate": 3.412481127245071e-08, "loss": 0.3074, "step": 47533 }, { "epoch": 4.832655551037007, "grad_norm": 0.27894309163093567, "learning_rate": 3.408343223093358e-08, "loss": 0.2871, "step": 47534 }, { "epoch": 4.832757218381456, "grad_norm": 0.2822013199329376, "learning_rate": 3.4042078206524274e-08, "loss": 0.3197, "step": 47535 }, { "epoch": 4.832858885725905, "grad_norm": 0.291383296251297, "learning_rate": 3.400074919943042e-08, "loss": 0.298, "step": 47536 }, { "epoch": 4.832960553070354, "grad_norm": 0.28935521841049194, "learning_rate": 3.395944520986127e-08, "loss": 0.303, "step": 47537 }, { "epoch": 4.833062220414803, "grad_norm": 0.2950609028339386, "learning_rate": 3.391816623802391e-08, "loss": 0.2787, "step": 47538 }, { "epoch": 4.833163887759252, "grad_norm": 0.2932417690753937, "learning_rate": 3.3876912284126486e-08, "loss": 0.2791, "step": 47539 }, { "epoch": 4.833265555103701, "grad_norm": 0.2828611135482788, "learning_rate": 3.3835683348377724e-08, "loss": 0.3103, "step": 47540 }, { "epoch": 4.83336722244815, "grad_norm": 0.26162973046302795, "learning_rate": 3.379447943098413e-08, "loss": 0.3046, "step": 47541 }, { "epoch": 4.833468889792599, "grad_norm": 0.2776411771774292, "learning_rate": 3.375330053215386e-08, "loss": 0.2885, "step": 47542 }, { "epoch": 4.8335705571370475, "grad_norm": 0.2823212444782257, "learning_rate": 3.3712146652094546e-08, "loss": 0.2917, "step": 47543 }, { "epoch": 4.8336722244814965, "grad_norm": 0.2921183109283447, "learning_rate": 3.3671017791013225e-08, "loss": 0.2843, "step": 47544 }, { "epoch": 4.833773891825945, "grad_norm": 0.2629263997077942, "learning_rate": 3.362991394911641e-08, "loss": 0.2944, "step": 47545 }, { "epoch": 4.833875559170394, "grad_norm": 0.2788603603839874, "learning_rate": 3.358883512661282e-08, "loss": 0.3048, "step": 47546 }, { "epoch": 4.833977226514843, "grad_norm": 0.28676342964172363, "learning_rate": 3.3547781323708394e-08, "loss": 0.29, "step": 47547 }, { "epoch": 4.834078893859292, "grad_norm": 0.2475365549325943, "learning_rate": 3.3506752540609644e-08, "loss": 0.2773, "step": 47548 }, { "epoch": 4.834180561203741, "grad_norm": 0.2757704257965088, "learning_rate": 3.346574877752362e-08, "loss": 0.2828, "step": 47549 }, { "epoch": 4.83428222854819, "grad_norm": 0.2790624797344208, "learning_rate": 3.342477003465738e-08, "loss": 0.3004, "step": 47550 }, { "epoch": 4.834383895892639, "grad_norm": 0.2897050678730011, "learning_rate": 3.338381631221688e-08, "loss": 0.2881, "step": 47551 }, { "epoch": 4.834485563237088, "grad_norm": 0.28692081570625305, "learning_rate": 3.334288761040805e-08, "loss": 0.2767, "step": 47552 }, { "epoch": 4.834587230581537, "grad_norm": 0.25469639897346497, "learning_rate": 3.3301983929437395e-08, "loss": 0.2842, "step": 47553 }, { "epoch": 4.834688897925986, "grad_norm": 0.26319798827171326, "learning_rate": 3.326110526951143e-08, "loss": 0.276, "step": 47554 }, { "epoch": 4.834790565270435, "grad_norm": 0.26941582560539246, "learning_rate": 3.322025163083553e-08, "loss": 0.3098, "step": 47555 }, { "epoch": 4.834892232614884, "grad_norm": 0.2910468876361847, "learning_rate": 3.317942301361621e-08, "loss": 0.3081, "step": 47556 }, { "epoch": 4.834993899959333, "grad_norm": 0.3002036511898041, "learning_rate": 3.313861941805829e-08, "loss": 0.2806, "step": 47557 }, { "epoch": 4.835095567303782, "grad_norm": 0.2883870303630829, "learning_rate": 3.309784084436718e-08, "loss": 0.2992, "step": 47558 }, { "epoch": 4.835197234648231, "grad_norm": 0.24743372201919556, "learning_rate": 3.305708729274937e-08, "loss": 0.3068, "step": 47559 }, { "epoch": 4.8352989019926795, "grad_norm": 0.27375560998916626, "learning_rate": 3.3016358763409697e-08, "loss": 0.3019, "step": 47560 }, { "epoch": 4.8354005693371285, "grad_norm": 0.2765231430530548, "learning_rate": 3.297565525655355e-08, "loss": 0.273, "step": 47561 }, { "epoch": 4.835502236681577, "grad_norm": 0.28293293714523315, "learning_rate": 3.293497677238522e-08, "loss": 0.3207, "step": 47562 }, { "epoch": 4.835603904026026, "grad_norm": 0.29490604996681213, "learning_rate": 3.289432331111009e-08, "loss": 0.2838, "step": 47563 }, { "epoch": 4.835705571370476, "grad_norm": 0.2822037637233734, "learning_rate": 3.285369487293355e-08, "loss": 0.3048, "step": 47564 }, { "epoch": 4.835807238714925, "grad_norm": 0.2816816568374634, "learning_rate": 3.281309145805878e-08, "loss": 0.274, "step": 47565 }, { "epoch": 4.835908906059374, "grad_norm": 0.26707813143730164, "learning_rate": 3.277251306669227e-08, "loss": 0.3353, "step": 47566 }, { "epoch": 4.836010573403823, "grad_norm": 0.2920410931110382, "learning_rate": 3.273195969903664e-08, "loss": 0.2951, "step": 47567 }, { "epoch": 4.836112240748272, "grad_norm": 0.2791549265384674, "learning_rate": 3.269143135529729e-08, "loss": 0.292, "step": 47568 }, { "epoch": 4.836213908092721, "grad_norm": 0.27700090408325195, "learning_rate": 3.2650928035678486e-08, "loss": 0.269, "step": 47569 }, { "epoch": 4.83631557543717, "grad_norm": 0.2919146716594696, "learning_rate": 3.261044974038341e-08, "loss": 0.2786, "step": 47570 }, { "epoch": 4.836417242781619, "grad_norm": 0.2536373734474182, "learning_rate": 3.2569996469616895e-08, "loss": 0.3079, "step": 47571 }, { "epoch": 4.836518910126068, "grad_norm": 0.2968551516532898, "learning_rate": 3.2529568223581555e-08, "loss": 0.2808, "step": 47572 }, { "epoch": 4.836620577470517, "grad_norm": 0.28398916125297546, "learning_rate": 3.248916500248223e-08, "loss": 0.2661, "step": 47573 }, { "epoch": 4.836722244814966, "grad_norm": 0.2774800658226013, "learning_rate": 3.244878680652208e-08, "loss": 0.3123, "step": 47574 }, { "epoch": 4.8368239121594145, "grad_norm": 0.28067490458488464, "learning_rate": 3.240843363590429e-08, "loss": 0.2782, "step": 47575 }, { "epoch": 4.8369255795038635, "grad_norm": 0.29215767979621887, "learning_rate": 3.236810549083258e-08, "loss": 0.3022, "step": 47576 }, { "epoch": 4.837027246848312, "grad_norm": 0.29293322563171387, "learning_rate": 3.2327802371509564e-08, "loss": 0.2872, "step": 47577 }, { "epoch": 4.837128914192761, "grad_norm": 0.28995952010154724, "learning_rate": 3.228752427813897e-08, "loss": 0.3186, "step": 47578 }, { "epoch": 4.83723058153721, "grad_norm": 0.2986307144165039, "learning_rate": 3.224727121092286e-08, "loss": 0.3071, "step": 47579 }, { "epoch": 4.837332248881659, "grad_norm": 0.27271386981010437, "learning_rate": 3.2207043170064955e-08, "loss": 0.2882, "step": 47580 }, { "epoch": 4.837433916226108, "grad_norm": 0.304082989692688, "learning_rate": 3.216684015576732e-08, "loss": 0.2958, "step": 47581 }, { "epoch": 4.837535583570557, "grad_norm": 0.26058241724967957, "learning_rate": 3.212666216823201e-08, "loss": 0.3266, "step": 47582 }, { "epoch": 4.837637250915006, "grad_norm": 0.29286614060401917, "learning_rate": 3.208650920766277e-08, "loss": 0.2934, "step": 47583 }, { "epoch": 4.837738918259455, "grad_norm": 0.2727747857570648, "learning_rate": 3.2046381274261076e-08, "loss": 0.3056, "step": 47584 }, { "epoch": 4.837840585603904, "grad_norm": 0.2806890904903412, "learning_rate": 3.200627836822845e-08, "loss": 0.262, "step": 47585 }, { "epoch": 4.837942252948353, "grad_norm": 0.2602587044239044, "learning_rate": 3.196620048976862e-08, "loss": 0.2958, "step": 47586 }, { "epoch": 4.838043920292802, "grad_norm": 0.3143487274646759, "learning_rate": 3.192614763908197e-08, "loss": 0.2934, "step": 47587 }, { "epoch": 4.838145587637251, "grad_norm": 0.2579018771648407, "learning_rate": 3.188611981637057e-08, "loss": 0.3265, "step": 47588 }, { "epoch": 4.8382472549817, "grad_norm": 0.2421807497739792, "learning_rate": 3.184611702183649e-08, "loss": 0.28, "step": 47589 }, { "epoch": 4.838348922326149, "grad_norm": 0.2663708031177521, "learning_rate": 3.180613925568121e-08, "loss": 0.2887, "step": 47590 }, { "epoch": 4.838450589670598, "grad_norm": 0.26925113797187805, "learning_rate": 3.17661865181057e-08, "loss": 0.3019, "step": 47591 }, { "epoch": 4.8385522570150465, "grad_norm": 0.27249664068222046, "learning_rate": 3.172625880931146e-08, "loss": 0.2687, "step": 47592 }, { "epoch": 4.8386539243594955, "grad_norm": 0.25972720980644226, "learning_rate": 3.168635612950055e-08, "loss": 0.3022, "step": 47593 }, { "epoch": 4.838755591703944, "grad_norm": 0.3024694621562958, "learning_rate": 3.164647847887226e-08, "loss": 0.2851, "step": 47594 }, { "epoch": 4.838857259048393, "grad_norm": 0.2684551477432251, "learning_rate": 3.160662585762808e-08, "loss": 0.2795, "step": 47595 }, { "epoch": 4.838958926392842, "grad_norm": 0.2984030544757843, "learning_rate": 3.156679826597009e-08, "loss": 0.2798, "step": 47596 }, { "epoch": 4.839060593737291, "grad_norm": 0.27597883343696594, "learning_rate": 3.152699570409701e-08, "loss": 0.2726, "step": 47597 }, { "epoch": 4.83916226108174, "grad_norm": 0.27693721652030945, "learning_rate": 3.1487218172210896e-08, "loss": 0.3208, "step": 47598 }, { "epoch": 4.839263928426189, "grad_norm": 0.2602706253528595, "learning_rate": 3.1447465670511046e-08, "loss": 0.2944, "step": 47599 }, { "epoch": 4.839365595770639, "grad_norm": 0.30411431193351746, "learning_rate": 3.1407738199197845e-08, "loss": 0.2855, "step": 47600 }, { "epoch": 4.839467263115088, "grad_norm": 0.27186813950538635, "learning_rate": 3.1368035758472246e-08, "loss": 0.2936, "step": 47601 }, { "epoch": 4.839568930459537, "grad_norm": 0.28562799096107483, "learning_rate": 3.132835834853354e-08, "loss": 0.2988, "step": 47602 }, { "epoch": 4.839670597803986, "grad_norm": 0.2715388536453247, "learning_rate": 3.1288705969582665e-08, "loss": 0.2981, "step": 47603 }, { "epoch": 4.839772265148435, "grad_norm": 0.29856160283088684, "learning_rate": 3.124907862181725e-08, "loss": 0.2688, "step": 47604 }, { "epoch": 4.839873932492884, "grad_norm": 0.2710444927215576, "learning_rate": 3.12094763054388e-08, "loss": 0.2949, "step": 47605 }, { "epoch": 4.839975599837333, "grad_norm": 0.30090954899787903, "learning_rate": 3.116989902064604e-08, "loss": 0.2749, "step": 47606 }, { "epoch": 4.8400772671817816, "grad_norm": 0.29229044914245605, "learning_rate": 3.113034676763882e-08, "loss": 0.31, "step": 47607 }, { "epoch": 4.8401789345262305, "grad_norm": 0.2750135362148285, "learning_rate": 3.1090819546616416e-08, "loss": 0.2925, "step": 47608 }, { "epoch": 4.840280601870679, "grad_norm": 0.3173077404499054, "learning_rate": 3.1051317357777e-08, "loss": 0.2909, "step": 47609 }, { "epoch": 4.840382269215128, "grad_norm": 0.28082773089408875, "learning_rate": 3.101184020132042e-08, "loss": 0.3279, "step": 47610 }, { "epoch": 4.840483936559577, "grad_norm": 0.28860488533973694, "learning_rate": 3.097238807744596e-08, "loss": 0.2803, "step": 47611 }, { "epoch": 4.840585603904026, "grad_norm": 0.2691476345062256, "learning_rate": 3.093296098635068e-08, "loss": 0.3004, "step": 47612 }, { "epoch": 4.840687271248475, "grad_norm": 0.26632246375083923, "learning_rate": 3.089355892823553e-08, "loss": 0.3403, "step": 47613 }, { "epoch": 4.840788938592924, "grad_norm": 0.27292826771736145, "learning_rate": 3.085418190329703e-08, "loss": 0.3176, "step": 47614 }, { "epoch": 4.840890605937373, "grad_norm": 0.26499873399734497, "learning_rate": 3.0814829911734456e-08, "loss": 0.3061, "step": 47615 }, { "epoch": 4.840992273281822, "grad_norm": 0.2792268395423889, "learning_rate": 3.0775502953745987e-08, "loss": 0.298, "step": 47616 }, { "epoch": 4.841093940626271, "grad_norm": 0.2782328426837921, "learning_rate": 3.073620102952979e-08, "loss": 0.3041, "step": 47617 }, { "epoch": 4.84119560797072, "grad_norm": 0.2678619921207428, "learning_rate": 3.069692413928349e-08, "loss": 0.2803, "step": 47618 }, { "epoch": 4.841297275315169, "grad_norm": 0.2602779269218445, "learning_rate": 3.0657672283205264e-08, "loss": 0.2913, "step": 47619 }, { "epoch": 4.841398942659618, "grad_norm": 0.27719900012016296, "learning_rate": 3.061844546149273e-08, "loss": 0.2791, "step": 47620 }, { "epoch": 4.841500610004067, "grad_norm": 0.27497047185897827, "learning_rate": 3.05792436743435e-08, "loss": 0.3138, "step": 47621 }, { "epoch": 4.841602277348516, "grad_norm": 0.286848783493042, "learning_rate": 3.054006692195521e-08, "loss": 0.3288, "step": 47622 }, { "epoch": 4.841703944692965, "grad_norm": 0.3077697157859802, "learning_rate": 3.0500915204525474e-08, "loss": 0.2925, "step": 47623 }, { "epoch": 4.8418056120374136, "grad_norm": 0.27200645208358765, "learning_rate": 3.0461788522250793e-08, "loss": 0.303, "step": 47624 }, { "epoch": 4.8419072793818625, "grad_norm": 0.2834765315055847, "learning_rate": 3.0422686875328235e-08, "loss": 0.2934, "step": 47625 }, { "epoch": 4.842008946726311, "grad_norm": 0.2903497815132141, "learning_rate": 3.0383610263956534e-08, "loss": 0.2774, "step": 47626 }, { "epoch": 4.84211061407076, "grad_norm": 0.2604603171348572, "learning_rate": 3.034455868832997e-08, "loss": 0.3099, "step": 47627 }, { "epoch": 4.842212281415209, "grad_norm": 0.29250630736351013, "learning_rate": 3.030553214864729e-08, "loss": 0.3231, "step": 47628 }, { "epoch": 4.842313948759658, "grad_norm": 0.29637911915779114, "learning_rate": 3.026653064510443e-08, "loss": 0.2992, "step": 47629 }, { "epoch": 4.842415616104107, "grad_norm": 0.287111759185791, "learning_rate": 3.0227554177897355e-08, "loss": 0.3083, "step": 47630 }, { "epoch": 4.842517283448556, "grad_norm": 0.2641954720020294, "learning_rate": 3.0188602747222574e-08, "loss": 0.2787, "step": 47631 }, { "epoch": 4.842618950793005, "grad_norm": 0.28387346863746643, "learning_rate": 3.0149676353277145e-08, "loss": 0.3135, "step": 47632 }, { "epoch": 4.842720618137454, "grad_norm": 0.3054496943950653, "learning_rate": 3.011077499625703e-08, "loss": 0.2817, "step": 47633 }, { "epoch": 4.842822285481903, "grad_norm": 0.2807840406894684, "learning_rate": 3.007189867635707e-08, "loss": 0.2967, "step": 47634 }, { "epoch": 4.842923952826352, "grad_norm": 0.26494744420051575, "learning_rate": 3.003304739377377e-08, "loss": 0.2698, "step": 47635 }, { "epoch": 4.843025620170801, "grad_norm": 0.27002638578414917, "learning_rate": 2.999422114870365e-08, "loss": 0.3118, "step": 47636 }, { "epoch": 4.84312728751525, "grad_norm": 0.2782001495361328, "learning_rate": 2.995541994134099e-08, "loss": 0.276, "step": 47637 }, { "epoch": 4.843228954859699, "grad_norm": 0.30178919434547424, "learning_rate": 2.991664377188175e-08, "loss": 0.2886, "step": 47638 }, { "epoch": 4.843330622204148, "grad_norm": 0.2745976746082306, "learning_rate": 2.987789264052188e-08, "loss": 0.3083, "step": 47639 }, { "epoch": 4.843432289548597, "grad_norm": 0.2554529011249542, "learning_rate": 2.9839166547455665e-08, "loss": 0.3089, "step": 47640 }, { "epoch": 4.8435339568930456, "grad_norm": 0.28228121995925903, "learning_rate": 2.980046549287907e-08, "loss": 0.2993, "step": 47641 }, { "epoch": 4.8436356242374945, "grad_norm": 0.26143038272857666, "learning_rate": 2.976178947698638e-08, "loss": 0.2785, "step": 47642 }, { "epoch": 4.843737291581943, "grad_norm": 0.30431294441223145, "learning_rate": 2.972313849997299e-08, "loss": 0.2733, "step": 47643 }, { "epoch": 4.843838958926392, "grad_norm": 0.26839902997016907, "learning_rate": 2.9684512562032642e-08, "loss": 0.3191, "step": 47644 }, { "epoch": 4.843940626270841, "grad_norm": 0.2945657968521118, "learning_rate": 2.9645911663361282e-08, "loss": 0.31, "step": 47645 }, { "epoch": 4.844042293615291, "grad_norm": 0.2567843496799469, "learning_rate": 2.9607335804152648e-08, "loss": 0.2916, "step": 47646 }, { "epoch": 4.84414396095974, "grad_norm": 0.283352255821228, "learning_rate": 2.9568784984600474e-08, "loss": 0.2848, "step": 47647 }, { "epoch": 4.844245628304189, "grad_norm": 0.2612329125404358, "learning_rate": 2.953025920490016e-08, "loss": 0.3128, "step": 47648 }, { "epoch": 4.844347295648638, "grad_norm": 0.2855492830276489, "learning_rate": 2.9491758465245435e-08, "loss": 0.3303, "step": 47649 }, { "epoch": 4.844448962993087, "grad_norm": 0.28076648712158203, "learning_rate": 2.9453282765829483e-08, "loss": 0.303, "step": 47650 }, { "epoch": 4.844550630337536, "grad_norm": 0.28199899196624756, "learning_rate": 2.941483210684659e-08, "loss": 0.3017, "step": 47651 }, { "epoch": 4.844652297681985, "grad_norm": 0.2953265607357025, "learning_rate": 2.937640648849105e-08, "loss": 0.2979, "step": 47652 }, { "epoch": 4.844753965026434, "grad_norm": 0.2628823518753052, "learning_rate": 2.9338005910956035e-08, "loss": 0.291, "step": 47653 }, { "epoch": 4.844855632370883, "grad_norm": 0.2839246988296509, "learning_rate": 2.9299630374434728e-08, "loss": 0.3115, "step": 47654 }, { "epoch": 4.844957299715332, "grad_norm": 0.284717321395874, "learning_rate": 2.9261279879120308e-08, "loss": 0.305, "step": 47655 }, { "epoch": 4.845058967059781, "grad_norm": 0.2892211675643921, "learning_rate": 2.922295442520706e-08, "loss": 0.2875, "step": 47656 }, { "epoch": 4.8451606344042295, "grad_norm": 0.2537945806980133, "learning_rate": 2.9184654012886503e-08, "loss": 0.2989, "step": 47657 }, { "epoch": 4.8452623017486784, "grad_norm": 0.27324968576431274, "learning_rate": 2.9146378642352925e-08, "loss": 0.2975, "step": 47658 }, { "epoch": 4.845363969093127, "grad_norm": 0.2907828092575073, "learning_rate": 2.9108128313798945e-08, "loss": 0.3125, "step": 47659 }, { "epoch": 4.845465636437576, "grad_norm": 0.29218459129333496, "learning_rate": 2.9069903027416634e-08, "loss": 0.2805, "step": 47660 }, { "epoch": 4.845567303782025, "grad_norm": 0.2841586470603943, "learning_rate": 2.903170278339862e-08, "loss": 0.288, "step": 47661 }, { "epoch": 4.845668971126474, "grad_norm": 0.2831597924232483, "learning_rate": 2.8993527581938074e-08, "loss": 0.3097, "step": 47662 }, { "epoch": 4.845770638470923, "grad_norm": 0.2773889899253845, "learning_rate": 2.8955377423226515e-08, "loss": 0.2865, "step": 47663 }, { "epoch": 4.845872305815372, "grad_norm": 0.25431352853775024, "learning_rate": 2.8917252307456566e-08, "loss": 0.3221, "step": 47664 }, { "epoch": 4.845973973159821, "grad_norm": 0.2689359784126282, "learning_rate": 2.887915223481974e-08, "loss": 0.2898, "step": 47665 }, { "epoch": 4.84607564050427, "grad_norm": 0.28096896409988403, "learning_rate": 2.8841077205509216e-08, "loss": 0.2985, "step": 47666 }, { "epoch": 4.846177307848719, "grad_norm": 0.2773131728172302, "learning_rate": 2.8803027219715395e-08, "loss": 0.314, "step": 47667 }, { "epoch": 4.846278975193168, "grad_norm": 0.2740475833415985, "learning_rate": 2.8765002277630904e-08, "loss": 0.3039, "step": 47668 }, { "epoch": 4.846380642537617, "grad_norm": 0.2784784734249115, "learning_rate": 2.8727002379447256e-08, "loss": 0.2928, "step": 47669 }, { "epoch": 4.846482309882066, "grad_norm": 0.27907681465148926, "learning_rate": 2.868902752535485e-08, "loss": 0.2723, "step": 47670 }, { "epoch": 4.846583977226515, "grad_norm": 0.2809879779815674, "learning_rate": 2.8651077715546317e-08, "loss": 0.3223, "step": 47671 }, { "epoch": 4.846685644570964, "grad_norm": 0.31391704082489014, "learning_rate": 2.8613152950212054e-08, "loss": 0.307, "step": 47672 }, { "epoch": 4.846787311915413, "grad_norm": 0.2843572497367859, "learning_rate": 2.8575253229543577e-08, "loss": 0.3194, "step": 47673 }, { "epoch": 4.8468889792598615, "grad_norm": 0.2977209985256195, "learning_rate": 2.8537378553731286e-08, "loss": 0.3089, "step": 47674 }, { "epoch": 4.8469906466043104, "grad_norm": 0.28804829716682434, "learning_rate": 2.84995289229667e-08, "loss": 0.3006, "step": 47675 }, { "epoch": 4.847092313948759, "grad_norm": 0.28543123602867126, "learning_rate": 2.8461704337439667e-08, "loss": 0.2995, "step": 47676 }, { "epoch": 4.847193981293208, "grad_norm": 0.28580379486083984, "learning_rate": 2.842390479734114e-08, "loss": 0.2759, "step": 47677 }, { "epoch": 4.847295648637657, "grad_norm": 0.2905900776386261, "learning_rate": 2.8386130302861526e-08, "loss": 0.3026, "step": 47678 }, { "epoch": 4.847397315982106, "grad_norm": 0.28200891613960266, "learning_rate": 2.834838085419178e-08, "loss": 0.3123, "step": 47679 }, { "epoch": 4.847498983326555, "grad_norm": 0.23836266994476318, "learning_rate": 2.8310656451520648e-08, "loss": 0.2859, "step": 47680 }, { "epoch": 4.847600650671004, "grad_norm": 0.28518134355545044, "learning_rate": 2.8272957095039632e-08, "loss": 0.3218, "step": 47681 }, { "epoch": 4.847702318015454, "grad_norm": 0.2728903591632843, "learning_rate": 2.823528278493748e-08, "loss": 0.2855, "step": 47682 }, { "epoch": 4.847803985359903, "grad_norm": 0.2722249925136566, "learning_rate": 2.8197633521404588e-08, "loss": 0.2742, "step": 47683 }, { "epoch": 4.847905652704352, "grad_norm": 0.28451505303382874, "learning_rate": 2.816000930463081e-08, "loss": 0.2883, "step": 47684 }, { "epoch": 4.848007320048801, "grad_norm": 0.27197933197021484, "learning_rate": 2.8122410134805433e-08, "loss": 0.308, "step": 47685 }, { "epoch": 4.84810898739325, "grad_norm": 0.24648118019104004, "learning_rate": 2.8084836012117756e-08, "loss": 0.293, "step": 47686 }, { "epoch": 4.848210654737699, "grad_norm": 0.2867559492588043, "learning_rate": 2.8047286936756512e-08, "loss": 0.2898, "step": 47687 }, { "epoch": 4.848312322082148, "grad_norm": 0.2535631060600281, "learning_rate": 2.800976290891211e-08, "loss": 0.2949, "step": 47688 }, { "epoch": 4.8484139894265965, "grad_norm": 0.275495320558548, "learning_rate": 2.797226392877328e-08, "loss": 0.2874, "step": 47689 }, { "epoch": 4.8485156567710455, "grad_norm": 0.2758006751537323, "learning_rate": 2.793478999652821e-08, "loss": 0.2873, "step": 47690 }, { "epoch": 4.848617324115494, "grad_norm": 0.27296170592308044, "learning_rate": 2.7897341112366194e-08, "loss": 0.285, "step": 47691 }, { "epoch": 4.848718991459943, "grad_norm": 0.2884714603424072, "learning_rate": 2.7859917276475967e-08, "loss": 0.2963, "step": 47692 }, { "epoch": 4.848820658804392, "grad_norm": 0.2605730891227722, "learning_rate": 2.782251848904627e-08, "loss": 0.2839, "step": 47693 }, { "epoch": 4.848922326148841, "grad_norm": 0.2681007981300354, "learning_rate": 2.778514475026417e-08, "loss": 0.2692, "step": 47694 }, { "epoch": 4.84902399349329, "grad_norm": 0.2805427014827728, "learning_rate": 2.774779606031952e-08, "loss": 0.263, "step": 47695 }, { "epoch": 4.849125660837739, "grad_norm": 0.27963951230049133, "learning_rate": 2.7710472419399946e-08, "loss": 0.306, "step": 47696 }, { "epoch": 4.849227328182188, "grad_norm": 0.2619757354259491, "learning_rate": 2.767317382769308e-08, "loss": 0.3198, "step": 47697 }, { "epoch": 4.849328995526637, "grad_norm": 0.277208536863327, "learning_rate": 2.7635900285387652e-08, "loss": 0.3003, "step": 47698 }, { "epoch": 4.849430662871086, "grad_norm": 0.27299222350120544, "learning_rate": 2.759865179267074e-08, "loss": 0.291, "step": 47699 }, { "epoch": 4.849532330215535, "grad_norm": 0.3046858012676239, "learning_rate": 2.756142834972997e-08, "loss": 0.2643, "step": 47700 }, { "epoch": 4.849633997559984, "grad_norm": 0.28139373660087585, "learning_rate": 2.7524229956753525e-08, "loss": 0.307, "step": 47701 }, { "epoch": 4.849735664904433, "grad_norm": 0.2749039828777313, "learning_rate": 2.748705661392792e-08, "loss": 0.3241, "step": 47702 }, { "epoch": 4.849837332248882, "grad_norm": 0.2902096211910248, "learning_rate": 2.7449908321441342e-08, "loss": 0.2811, "step": 47703 }, { "epoch": 4.849938999593331, "grad_norm": 0.3139931857585907, "learning_rate": 2.7412785079479755e-08, "loss": 0.3056, "step": 47704 }, { "epoch": 4.85004066693778, "grad_norm": 0.29019439220428467, "learning_rate": 2.7375686888231335e-08, "loss": 0.2677, "step": 47705 }, { "epoch": 4.8501423342822285, "grad_norm": 0.2665369510650635, "learning_rate": 2.7338613747882602e-08, "loss": 0.2663, "step": 47706 }, { "epoch": 4.8502440016266775, "grad_norm": 0.2924121916294098, "learning_rate": 2.730156565862008e-08, "loss": 0.3189, "step": 47707 }, { "epoch": 4.850345668971126, "grad_norm": 0.2848038375377655, "learning_rate": 2.726454262063083e-08, "loss": 0.3078, "step": 47708 }, { "epoch": 4.850447336315575, "grad_norm": 0.27977320551872253, "learning_rate": 2.7227544634100823e-08, "loss": 0.3672, "step": 47709 }, { "epoch": 4.850549003660024, "grad_norm": 0.2778957784175873, "learning_rate": 2.7190571699216573e-08, "loss": 0.2991, "step": 47710 }, { "epoch": 4.850650671004473, "grad_norm": 0.26333674788475037, "learning_rate": 2.7153623816165153e-08, "loss": 0.2985, "step": 47711 }, { "epoch": 4.850752338348922, "grad_norm": 0.28715789318084717, "learning_rate": 2.7116700985131416e-08, "loss": 0.2863, "step": 47712 }, { "epoch": 4.850854005693371, "grad_norm": 0.2751449942588806, "learning_rate": 2.7079803206302436e-08, "loss": 0.2796, "step": 47713 }, { "epoch": 4.85095567303782, "grad_norm": 0.29815298318862915, "learning_rate": 2.7042930479863617e-08, "loss": 0.287, "step": 47714 }, { "epoch": 4.851057340382269, "grad_norm": 0.3041495680809021, "learning_rate": 2.7006082806000365e-08, "loss": 0.3153, "step": 47715 }, { "epoch": 4.851159007726718, "grad_norm": 0.2912956178188324, "learning_rate": 2.6969260184899204e-08, "loss": 0.2732, "step": 47716 }, { "epoch": 4.851260675071167, "grad_norm": 0.28421396017074585, "learning_rate": 2.6932462616744426e-08, "loss": 0.3208, "step": 47717 }, { "epoch": 4.851362342415616, "grad_norm": 0.2820899486541748, "learning_rate": 2.6895690101723103e-08, "loss": 0.3124, "step": 47718 }, { "epoch": 4.851464009760065, "grad_norm": 0.27409759163856506, "learning_rate": 2.6858942640018982e-08, "loss": 0.3026, "step": 47719 }, { "epoch": 4.851565677104514, "grad_norm": 0.2927713990211487, "learning_rate": 2.6822220231817463e-08, "loss": 0.2817, "step": 47720 }, { "epoch": 4.851667344448963, "grad_norm": 0.28521639108657837, "learning_rate": 2.678552287730396e-08, "loss": 0.2813, "step": 47721 }, { "epoch": 4.851769011793412, "grad_norm": 0.26485756039619446, "learning_rate": 2.674885057666332e-08, "loss": 0.3168, "step": 47722 }, { "epoch": 4.8518706791378605, "grad_norm": 0.2902619242668152, "learning_rate": 2.6712203330080398e-08, "loss": 0.3135, "step": 47723 }, { "epoch": 4.8519723464823095, "grad_norm": 0.2975822985172272, "learning_rate": 2.6675581137738938e-08, "loss": 0.2857, "step": 47724 }, { "epoch": 4.852074013826758, "grad_norm": 0.3023092746734619, "learning_rate": 2.6638983999824343e-08, "loss": 0.3155, "step": 47725 }, { "epoch": 4.852175681171207, "grad_norm": 0.2850571274757385, "learning_rate": 2.660241191652091e-08, "loss": 0.3156, "step": 47726 }, { "epoch": 4.852277348515656, "grad_norm": 0.29507046937942505, "learning_rate": 2.6565864888011826e-08, "loss": 0.2834, "step": 47727 }, { "epoch": 4.852379015860106, "grad_norm": 0.29552993178367615, "learning_rate": 2.652934291448306e-08, "loss": 0.2937, "step": 47728 }, { "epoch": 4.852480683204555, "grad_norm": 0.2922460734844208, "learning_rate": 2.6492845996117232e-08, "loss": 0.3036, "step": 47729 }, { "epoch": 4.852582350549004, "grad_norm": 0.28643935918807983, "learning_rate": 2.6456374133098094e-08, "loss": 0.3001, "step": 47730 }, { "epoch": 4.852684017893453, "grad_norm": 0.274020791053772, "learning_rate": 2.6419927325610494e-08, "loss": 0.292, "step": 47731 }, { "epoch": 4.852785685237902, "grad_norm": 0.2662905752658844, "learning_rate": 2.6383505573837622e-08, "loss": 0.3188, "step": 47732 }, { "epoch": 4.852887352582351, "grad_norm": 0.2674373686313629, "learning_rate": 2.6347108877962103e-08, "loss": 0.2876, "step": 47733 }, { "epoch": 4.8529890199268, "grad_norm": 0.2948659360408783, "learning_rate": 2.6310737238167684e-08, "loss": 0.3043, "step": 47734 }, { "epoch": 4.853090687271249, "grad_norm": 0.28871461749076843, "learning_rate": 2.627439065463866e-08, "loss": 0.2929, "step": 47735 }, { "epoch": 4.853192354615698, "grad_norm": 0.3191734552383423, "learning_rate": 2.623806912755711e-08, "loss": 0.3109, "step": 47736 }, { "epoch": 4.853294021960147, "grad_norm": 0.3105137050151825, "learning_rate": 2.6201772657105663e-08, "loss": 0.284, "step": 47737 }, { "epoch": 4.8533956893045955, "grad_norm": 0.2637297809123993, "learning_rate": 2.616550124346806e-08, "loss": 0.288, "step": 47738 }, { "epoch": 4.8534973566490445, "grad_norm": 0.27201613783836365, "learning_rate": 2.612925488682694e-08, "loss": 0.3013, "step": 47739 }, { "epoch": 4.853599023993493, "grad_norm": 0.2827034890651703, "learning_rate": 2.6093033587364368e-08, "loss": 0.278, "step": 47740 }, { "epoch": 4.853700691337942, "grad_norm": 0.27064669132232666, "learning_rate": 2.605683734526354e-08, "loss": 0.3372, "step": 47741 }, { "epoch": 4.853802358682391, "grad_norm": 0.2660205066204071, "learning_rate": 2.6020666160705977e-08, "loss": 0.2984, "step": 47742 }, { "epoch": 4.85390402602684, "grad_norm": 0.2515729069709778, "learning_rate": 2.5984520033874307e-08, "loss": 0.3056, "step": 47743 }, { "epoch": 4.854005693371289, "grad_norm": 0.28971728682518005, "learning_rate": 2.5948398964950604e-08, "loss": 0.2658, "step": 47744 }, { "epoch": 4.854107360715738, "grad_norm": 0.2795569598674774, "learning_rate": 2.591230295411695e-08, "loss": 0.2796, "step": 47745 }, { "epoch": 4.854209028060187, "grad_norm": 0.28908365964889526, "learning_rate": 2.587623200155487e-08, "loss": 0.2956, "step": 47746 }, { "epoch": 4.854310695404636, "grad_norm": 0.27141115069389343, "learning_rate": 2.584018610744643e-08, "loss": 0.2945, "step": 47747 }, { "epoch": 4.854412362749085, "grad_norm": 0.2696097195148468, "learning_rate": 2.5804165271973158e-08, "loss": 0.2877, "step": 47748 }, { "epoch": 4.854514030093534, "grad_norm": 0.2881384491920471, "learning_rate": 2.5768169495316575e-08, "loss": 0.3048, "step": 47749 }, { "epoch": 4.854615697437983, "grad_norm": 0.26224619150161743, "learning_rate": 2.5732198777657645e-08, "loss": 0.2971, "step": 47750 }, { "epoch": 4.854717364782432, "grad_norm": 0.24343159794807434, "learning_rate": 2.5696253119177895e-08, "loss": 0.3181, "step": 47751 }, { "epoch": 4.854819032126881, "grad_norm": 0.2692975103855133, "learning_rate": 2.566033252005884e-08, "loss": 0.3062, "step": 47752 }, { "epoch": 4.85492069947133, "grad_norm": 0.27799898386001587, "learning_rate": 2.562443698048034e-08, "loss": 0.2873, "step": 47753 }, { "epoch": 4.855022366815779, "grad_norm": 0.29023298621177673, "learning_rate": 2.5588566500623913e-08, "loss": 0.2867, "step": 47754 }, { "epoch": 4.8551240341602275, "grad_norm": 0.26273229718208313, "learning_rate": 2.555272108067053e-08, "loss": 0.3006, "step": 47755 }, { "epoch": 4.8552257015046765, "grad_norm": 0.27030816674232483, "learning_rate": 2.551690072080004e-08, "loss": 0.2789, "step": 47756 }, { "epoch": 4.855327368849125, "grad_norm": 0.2583198845386505, "learning_rate": 2.5481105421193418e-08, "loss": 0.3044, "step": 47757 }, { "epoch": 4.855429036193574, "grad_norm": 0.28828680515289307, "learning_rate": 2.544533518203107e-08, "loss": 0.3168, "step": 47758 }, { "epoch": 4.855530703538023, "grad_norm": 0.2750299870967865, "learning_rate": 2.5409590003492855e-08, "loss": 0.297, "step": 47759 }, { "epoch": 4.855632370882472, "grad_norm": 0.26266711950302124, "learning_rate": 2.537386988575863e-08, "loss": 0.2866, "step": 47760 }, { "epoch": 4.855734038226921, "grad_norm": 0.27027300000190735, "learning_rate": 2.53381748290088e-08, "loss": 0.3208, "step": 47761 }, { "epoch": 4.85583570557137, "grad_norm": 0.29296544194221497, "learning_rate": 2.5302504833423225e-08, "loss": 0.3097, "step": 47762 }, { "epoch": 4.855937372915819, "grad_norm": 0.27291393280029297, "learning_rate": 2.526685989918176e-08, "loss": 0.2908, "step": 47763 }, { "epoch": 4.856039040260269, "grad_norm": 0.28160208463668823, "learning_rate": 2.5231240026463155e-08, "loss": 0.2758, "step": 47764 }, { "epoch": 4.856140707604718, "grad_norm": 0.28674691915512085, "learning_rate": 2.519564521544782e-08, "loss": 0.2906, "step": 47765 }, { "epoch": 4.856242374949167, "grad_norm": 0.3003815710544586, "learning_rate": 2.5160075466314493e-08, "loss": 0.3008, "step": 47766 }, { "epoch": 4.856344042293616, "grad_norm": 0.27323684096336365, "learning_rate": 2.5124530779242486e-08, "loss": 0.2831, "step": 47767 }, { "epoch": 4.856445709638065, "grad_norm": 0.2448342889547348, "learning_rate": 2.5089011154411093e-08, "loss": 0.3318, "step": 47768 }, { "epoch": 4.856547376982514, "grad_norm": 0.30223405361175537, "learning_rate": 2.5053516591998505e-08, "loss": 0.2953, "step": 47769 }, { "epoch": 4.8566490443269625, "grad_norm": 0.28655552864074707, "learning_rate": 2.501804709218403e-08, "loss": 0.299, "step": 47770 }, { "epoch": 4.8567507116714115, "grad_norm": 0.2857430875301361, "learning_rate": 2.498260265514696e-08, "loss": 0.2898, "step": 47771 }, { "epoch": 4.85685237901586, "grad_norm": 0.27757853269577026, "learning_rate": 2.4947183281064935e-08, "loss": 0.2813, "step": 47772 }, { "epoch": 4.856954046360309, "grad_norm": 0.28939202427864075, "learning_rate": 2.4911788970117257e-08, "loss": 0.2899, "step": 47773 }, { "epoch": 4.857055713704758, "grad_norm": 0.25520747900009155, "learning_rate": 2.4876419722481006e-08, "loss": 0.2953, "step": 47774 }, { "epoch": 4.857157381049207, "grad_norm": 0.26608529686927795, "learning_rate": 2.484107553833548e-08, "loss": 0.3431, "step": 47775 }, { "epoch": 4.857259048393656, "grad_norm": 0.279473215341568, "learning_rate": 2.480575641785832e-08, "loss": 0.3014, "step": 47776 }, { "epoch": 4.857360715738105, "grad_norm": 0.27094531059265137, "learning_rate": 2.4770462361227154e-08, "loss": 0.3172, "step": 47777 }, { "epoch": 4.857462383082554, "grad_norm": 0.269959032535553, "learning_rate": 2.4735193368620736e-08, "loss": 0.2932, "step": 47778 }, { "epoch": 4.857564050427003, "grad_norm": 0.30665239691734314, "learning_rate": 2.4699949440215586e-08, "loss": 0.286, "step": 47779 }, { "epoch": 4.857665717771452, "grad_norm": 0.2722650468349457, "learning_rate": 2.46647305761899e-08, "loss": 0.2944, "step": 47780 }, { "epoch": 4.857767385115901, "grad_norm": 0.2770887315273285, "learning_rate": 2.462953677672131e-08, "loss": 0.3107, "step": 47781 }, { "epoch": 4.85786905246035, "grad_norm": 0.2851526439189911, "learning_rate": 2.4594368041986338e-08, "loss": 0.2873, "step": 47782 }, { "epoch": 4.857970719804799, "grad_norm": 0.2919289469718933, "learning_rate": 2.4559224372162627e-08, "loss": 0.2883, "step": 47783 }, { "epoch": 4.858072387149248, "grad_norm": 0.27320054173469543, "learning_rate": 2.4524105767427254e-08, "loss": 0.314, "step": 47784 }, { "epoch": 4.858174054493697, "grad_norm": 0.2906358540058136, "learning_rate": 2.4489012227957298e-08, "loss": 0.2845, "step": 47785 }, { "epoch": 4.858275721838146, "grad_norm": 0.28737643361091614, "learning_rate": 2.445394375392929e-08, "loss": 0.305, "step": 47786 }, { "epoch": 4.8583773891825945, "grad_norm": 0.2674062252044678, "learning_rate": 2.4418900345519747e-08, "loss": 0.2757, "step": 47787 }, { "epoch": 4.8584790565270435, "grad_norm": 0.27592071890830994, "learning_rate": 2.4383882002905757e-08, "loss": 0.2938, "step": 47788 }, { "epoch": 4.858580723871492, "grad_norm": 0.2834600806236267, "learning_rate": 2.434888872626273e-08, "loss": 0.2842, "step": 47789 }, { "epoch": 4.858682391215941, "grad_norm": 0.28498026728630066, "learning_rate": 2.4313920515768307e-08, "loss": 0.3136, "step": 47790 }, { "epoch": 4.85878405856039, "grad_norm": 0.2698746919631958, "learning_rate": 2.4278977371597345e-08, "loss": 0.3071, "step": 47791 }, { "epoch": 4.858885725904839, "grad_norm": 0.2701634466648102, "learning_rate": 2.4244059293926925e-08, "loss": 0.2754, "step": 47792 }, { "epoch": 4.858987393249288, "grad_norm": 0.28405869007110596, "learning_rate": 2.4209166282932462e-08, "loss": 0.2885, "step": 47793 }, { "epoch": 4.859089060593737, "grad_norm": 0.25694429874420166, "learning_rate": 2.417429833878937e-08, "loss": 0.285, "step": 47794 }, { "epoch": 4.859190727938186, "grad_norm": 0.30037298798561096, "learning_rate": 2.4139455461674178e-08, "loss": 0.3264, "step": 47795 }, { "epoch": 4.859292395282635, "grad_norm": 0.29046332836151123, "learning_rate": 2.4104637651761743e-08, "loss": 0.2733, "step": 47796 }, { "epoch": 4.859394062627084, "grad_norm": 0.2819423973560333, "learning_rate": 2.406984490922748e-08, "loss": 0.2788, "step": 47797 }, { "epoch": 4.859495729971533, "grad_norm": 0.28438660502433777, "learning_rate": 2.4035077234247363e-08, "loss": 0.2715, "step": 47798 }, { "epoch": 4.859597397315982, "grad_norm": 0.27945083379745483, "learning_rate": 2.4000334626995692e-08, "loss": 0.3324, "step": 47799 }, { "epoch": 4.859699064660431, "grad_norm": 0.27410319447517395, "learning_rate": 2.396561708764844e-08, "loss": 0.2955, "step": 47800 }, { "epoch": 4.85980073200488, "grad_norm": 0.291006475687027, "learning_rate": 2.3930924616379357e-08, "loss": 0.2757, "step": 47801 }, { "epoch": 4.859902399349329, "grad_norm": 0.28558430075645447, "learning_rate": 2.389625721336386e-08, "loss": 0.3035, "step": 47802 }, { "epoch": 4.860004066693778, "grad_norm": 0.2760620713233948, "learning_rate": 2.3861614878776806e-08, "loss": 0.2971, "step": 47803 }, { "epoch": 4.8601057340382265, "grad_norm": 0.2969476580619812, "learning_rate": 2.3826997612791946e-08, "loss": 0.2771, "step": 47804 }, { "epoch": 4.8602074013826755, "grad_norm": 0.2729808986186981, "learning_rate": 2.3792405415584697e-08, "loss": 0.284, "step": 47805 }, { "epoch": 4.860309068727124, "grad_norm": 0.2705835700035095, "learning_rate": 2.375783828732825e-08, "loss": 0.322, "step": 47806 }, { "epoch": 4.860410736071573, "grad_norm": 0.2636675238609314, "learning_rate": 2.3723296228197467e-08, "loss": 0.3223, "step": 47807 }, { "epoch": 4.860512403416022, "grad_norm": 0.27716711163520813, "learning_rate": 2.3688779238366655e-08, "loss": 0.2693, "step": 47808 }, { "epoch": 4.860614070760471, "grad_norm": 0.2796099781990051, "learning_rate": 2.3654287318008452e-08, "loss": 0.3046, "step": 47809 }, { "epoch": 4.860715738104921, "grad_norm": 0.24781332910060883, "learning_rate": 2.3619820467297715e-08, "loss": 0.308, "step": 47810 }, { "epoch": 4.86081740544937, "grad_norm": 0.28502145409584045, "learning_rate": 2.3585378686407645e-08, "loss": 0.3088, "step": 47811 }, { "epoch": 4.860919072793819, "grad_norm": 0.2776225507259369, "learning_rate": 2.3550961975511988e-08, "loss": 0.3238, "step": 47812 }, { "epoch": 4.861020740138268, "grad_norm": 0.28820839524269104, "learning_rate": 2.351657033478394e-08, "loss": 0.3126, "step": 47813 }, { "epoch": 4.861122407482717, "grad_norm": 0.28584906458854675, "learning_rate": 2.348220376439725e-08, "loss": 0.3036, "step": 47814 }, { "epoch": 4.861224074827166, "grad_norm": 0.2866365611553192, "learning_rate": 2.3447862264524e-08, "loss": 0.3065, "step": 47815 }, { "epoch": 4.861325742171615, "grad_norm": 0.25887471437454224, "learning_rate": 2.3413545835337948e-08, "loss": 0.3587, "step": 47816 }, { "epoch": 4.861427409516064, "grad_norm": 0.2776283621788025, "learning_rate": 2.3379254477011725e-08, "loss": 0.2849, "step": 47817 }, { "epoch": 4.861529076860513, "grad_norm": 0.2644996643066406, "learning_rate": 2.3344988189719086e-08, "loss": 0.2967, "step": 47818 }, { "epoch": 4.8616307442049616, "grad_norm": 0.3019314408302307, "learning_rate": 2.331074697363045e-08, "loss": 0.2707, "step": 47819 }, { "epoch": 4.8617324115494105, "grad_norm": 0.2834342122077942, "learning_rate": 2.3276530828920673e-08, "loss": 0.306, "step": 47820 }, { "epoch": 4.861834078893859, "grad_norm": 0.28577721118927, "learning_rate": 2.3242339755760733e-08, "loss": 0.2783, "step": 47821 }, { "epoch": 4.861935746238308, "grad_norm": 0.25680625438690186, "learning_rate": 2.3208173754322717e-08, "loss": 0.2983, "step": 47822 }, { "epoch": 4.862037413582757, "grad_norm": 0.2834038734436035, "learning_rate": 2.3174032824779813e-08, "loss": 0.2879, "step": 47823 }, { "epoch": 4.862139080927206, "grad_norm": 0.27484947443008423, "learning_rate": 2.3139916967303556e-08, "loss": 0.2802, "step": 47824 }, { "epoch": 4.862240748271655, "grad_norm": 0.2879728078842163, "learning_rate": 2.3105826182065473e-08, "loss": 0.2928, "step": 47825 }, { "epoch": 4.862342415616104, "grad_norm": 0.28409066796302795, "learning_rate": 2.3071760469237647e-08, "loss": 0.2717, "step": 47826 }, { "epoch": 4.862444082960553, "grad_norm": 0.27767014503479004, "learning_rate": 2.3037719828991613e-08, "loss": 0.2956, "step": 47827 }, { "epoch": 4.862545750305002, "grad_norm": 0.2938545346260071, "learning_rate": 2.3003704261498338e-08, "loss": 0.3379, "step": 47828 }, { "epoch": 4.862647417649451, "grad_norm": 0.2730851173400879, "learning_rate": 2.2969713766929913e-08, "loss": 0.3026, "step": 47829 }, { "epoch": 4.8627490849939, "grad_norm": 0.29573002457618713, "learning_rate": 2.2935748345457865e-08, "loss": 0.31, "step": 47830 }, { "epoch": 4.862850752338349, "grad_norm": 0.26701635122299194, "learning_rate": 2.2901807997252613e-08, "loss": 0.2754, "step": 47831 }, { "epoch": 4.862952419682798, "grad_norm": 0.252279669046402, "learning_rate": 2.2867892722484576e-08, "loss": 0.2859, "step": 47832 }, { "epoch": 4.863054087027247, "grad_norm": 0.2864586412906647, "learning_rate": 2.2834002521326394e-08, "loss": 0.288, "step": 47833 }, { "epoch": 4.863155754371696, "grad_norm": 0.2689196467399597, "learning_rate": 2.280013739394682e-08, "loss": 0.2877, "step": 47834 }, { "epoch": 4.863257421716145, "grad_norm": 0.27758315205574036, "learning_rate": 2.276629734051794e-08, "loss": 0.2948, "step": 47835 }, { "epoch": 4.8633590890605936, "grad_norm": 0.2969449460506439, "learning_rate": 2.273248236120962e-08, "loss": 0.307, "step": 47836 }, { "epoch": 4.8634607564050425, "grad_norm": 0.27902674674987793, "learning_rate": 2.2698692456192273e-08, "loss": 0.2995, "step": 47837 }, { "epoch": 4.863562423749491, "grad_norm": 0.27062752842903137, "learning_rate": 2.2664927625636324e-08, "loss": 0.3129, "step": 47838 }, { "epoch": 4.86366409109394, "grad_norm": 0.2822440266609192, "learning_rate": 2.263118786971108e-08, "loss": 0.334, "step": 47839 }, { "epoch": 4.863765758438389, "grad_norm": 0.2806486487388611, "learning_rate": 2.2597473188587514e-08, "loss": 0.3268, "step": 47840 }, { "epoch": 4.863867425782838, "grad_norm": 0.2776772379875183, "learning_rate": 2.256378358243494e-08, "loss": 0.2871, "step": 47841 }, { "epoch": 4.863969093127287, "grad_norm": 0.2722943127155304, "learning_rate": 2.2530119051423217e-08, "loss": 0.3012, "step": 47842 }, { "epoch": 4.864070760471736, "grad_norm": 0.2774467468261719, "learning_rate": 2.2496479595722208e-08, "loss": 0.3052, "step": 47843 }, { "epoch": 4.864172427816185, "grad_norm": 0.266968309879303, "learning_rate": 2.2462865215500674e-08, "loss": 0.2928, "step": 47844 }, { "epoch": 4.864274095160634, "grad_norm": 0.2911813259124756, "learning_rate": 2.2429275910929027e-08, "loss": 0.323, "step": 47845 }, { "epoch": 4.864375762505084, "grad_norm": 0.2893708348274231, "learning_rate": 2.2395711682174913e-08, "loss": 0.3134, "step": 47846 }, { "epoch": 4.864477429849533, "grad_norm": 0.275835782289505, "learning_rate": 2.2362172529408755e-08, "loss": 0.2957, "step": 47847 }, { "epoch": 4.864579097193982, "grad_norm": 0.26717472076416016, "learning_rate": 2.23286584527993e-08, "loss": 0.2963, "step": 47848 }, { "epoch": 4.864680764538431, "grad_norm": 0.31581932306289673, "learning_rate": 2.2295169452514752e-08, "loss": 0.2662, "step": 47849 }, { "epoch": 4.86478243188288, "grad_norm": 0.2586776614189148, "learning_rate": 2.2261705528724977e-08, "loss": 0.2785, "step": 47850 }, { "epoch": 4.864884099227329, "grad_norm": 0.27200818061828613, "learning_rate": 2.222826668159761e-08, "loss": 0.2688, "step": 47851 }, { "epoch": 4.8649857665717775, "grad_norm": 0.2694833278656006, "learning_rate": 2.2194852911300858e-08, "loss": 0.297, "step": 47852 }, { "epoch": 4.8650874339162264, "grad_norm": 0.28437551856040955, "learning_rate": 2.2161464218003472e-08, "loss": 0.2927, "step": 47853 }, { "epoch": 4.865189101260675, "grad_norm": 0.27362120151519775, "learning_rate": 2.2128100601874203e-08, "loss": 0.3279, "step": 47854 }, { "epoch": 4.865290768605124, "grad_norm": 0.27211689949035645, "learning_rate": 2.2094762063080698e-08, "loss": 0.2753, "step": 47855 }, { "epoch": 4.865392435949573, "grad_norm": 0.2918907403945923, "learning_rate": 2.2061448601790603e-08, "loss": 0.3179, "step": 47856 }, { "epoch": 4.865494103294022, "grad_norm": 0.27832409739494324, "learning_rate": 2.202816021817211e-08, "loss": 0.2925, "step": 47857 }, { "epoch": 4.865595770638471, "grad_norm": 0.2981272041797638, "learning_rate": 2.199489691239287e-08, "loss": 0.2879, "step": 47858 }, { "epoch": 4.86569743798292, "grad_norm": 0.2830207347869873, "learning_rate": 2.196165868461997e-08, "loss": 0.3152, "step": 47859 }, { "epoch": 4.865799105327369, "grad_norm": 0.3080834150314331, "learning_rate": 2.1928445535021604e-08, "loss": 0.274, "step": 47860 }, { "epoch": 4.865900772671818, "grad_norm": 0.27484747767448425, "learning_rate": 2.1895257463764307e-08, "loss": 0.2904, "step": 47861 }, { "epoch": 4.866002440016267, "grad_norm": 0.27118921279907227, "learning_rate": 2.1862094471016282e-08, "loss": 0.309, "step": 47862 }, { "epoch": 4.866104107360716, "grad_norm": 0.27060818672180176, "learning_rate": 2.1828956556944057e-08, "loss": 0.3013, "step": 47863 }, { "epoch": 4.866205774705165, "grad_norm": 0.28124621510505676, "learning_rate": 2.1795843721714173e-08, "loss": 0.2914, "step": 47864 }, { "epoch": 4.866307442049614, "grad_norm": 0.3011082410812378, "learning_rate": 2.1762755965494266e-08, "loss": 0.2846, "step": 47865 }, { "epoch": 4.866409109394063, "grad_norm": 0.3010295331478119, "learning_rate": 2.172969328844976e-08, "loss": 0.2963, "step": 47866 }, { "epoch": 4.866510776738512, "grad_norm": 0.27097275853157043, "learning_rate": 2.169665569074886e-08, "loss": 0.2986, "step": 47867 }, { "epoch": 4.866612444082961, "grad_norm": 0.2781085968017578, "learning_rate": 2.166364317255698e-08, "loss": 0.2976, "step": 47868 }, { "epoch": 4.8667141114274095, "grad_norm": 0.27933269739151, "learning_rate": 2.163065573404066e-08, "loss": 0.298, "step": 47869 }, { "epoch": 4.8668157787718584, "grad_norm": 0.28818652033805847, "learning_rate": 2.159769337536588e-08, "loss": 0.3053, "step": 47870 }, { "epoch": 4.866917446116307, "grad_norm": 0.2875450849533081, "learning_rate": 2.1564756096699167e-08, "loss": 0.2968, "step": 47871 }, { "epoch": 4.867019113460756, "grad_norm": 0.29532912373542786, "learning_rate": 2.1531843898205397e-08, "loss": 0.3318, "step": 47872 }, { "epoch": 4.867120780805205, "grad_norm": 0.2778080999851227, "learning_rate": 2.149895678005165e-08, "loss": 0.336, "step": 47873 }, { "epoch": 4.867222448149654, "grad_norm": 0.28340744972229004, "learning_rate": 2.1466094742403353e-08, "loss": 0.2916, "step": 47874 }, { "epoch": 4.867324115494103, "grad_norm": 0.2766728401184082, "learning_rate": 2.143325778542593e-08, "loss": 0.2759, "step": 47875 }, { "epoch": 4.867425782838552, "grad_norm": 0.2829608917236328, "learning_rate": 2.1400445909283697e-08, "loss": 0.2968, "step": 47876 }, { "epoch": 4.867527450183001, "grad_norm": 0.2801920175552368, "learning_rate": 2.1367659114143735e-08, "loss": 0.2998, "step": 47877 }, { "epoch": 4.86762911752745, "grad_norm": 0.27851444482803345, "learning_rate": 2.1334897400170362e-08, "loss": 0.2837, "step": 47878 }, { "epoch": 4.867730784871899, "grad_norm": 0.4135695695877075, "learning_rate": 2.1302160767528445e-08, "loss": 0.2943, "step": 47879 }, { "epoch": 4.867832452216348, "grad_norm": 0.2822517156600952, "learning_rate": 2.1269449216382852e-08, "loss": 0.2741, "step": 47880 }, { "epoch": 4.867934119560797, "grad_norm": 0.2564810812473297, "learning_rate": 2.123676274689901e-08, "loss": 0.2954, "step": 47881 }, { "epoch": 4.868035786905246, "grad_norm": 0.31136980652809143, "learning_rate": 2.1204101359241226e-08, "loss": 0.3028, "step": 47882 }, { "epoch": 4.868137454249695, "grad_norm": 0.26972392201423645, "learning_rate": 2.1171465053573816e-08, "loss": 0.3175, "step": 47883 }, { "epoch": 4.868239121594144, "grad_norm": 0.26656538248062134, "learning_rate": 2.1138853830061644e-08, "loss": 0.3281, "step": 47884 }, { "epoch": 4.868340788938593, "grad_norm": 0.26421546936035156, "learning_rate": 2.1106267688869032e-08, "loss": 0.3221, "step": 47885 }, { "epoch": 4.8684424562830415, "grad_norm": 0.27884218096733093, "learning_rate": 2.1073706630159174e-08, "loss": 0.2921, "step": 47886 }, { "epoch": 4.8685441236274904, "grad_norm": 0.3167381286621094, "learning_rate": 2.1041170654097497e-08, "loss": 0.324, "step": 47887 }, { "epoch": 4.868645790971939, "grad_norm": 0.25511232018470764, "learning_rate": 2.100865976084665e-08, "loss": 0.318, "step": 47888 }, { "epoch": 4.868747458316388, "grad_norm": 0.28409919142723083, "learning_rate": 2.0976173950570944e-08, "loss": 0.2907, "step": 47889 }, { "epoch": 4.868849125660837, "grad_norm": 0.2767843008041382, "learning_rate": 2.0943713223434692e-08, "loss": 0.2986, "step": 47890 }, { "epoch": 4.868950793005286, "grad_norm": 0.2740844488143921, "learning_rate": 2.0911277579600542e-08, "loss": 0.3295, "step": 47891 }, { "epoch": 4.869052460349736, "grad_norm": 0.270439475774765, "learning_rate": 2.0878867019231696e-08, "loss": 0.2966, "step": 47892 }, { "epoch": 4.869154127694185, "grad_norm": 0.28759339451789856, "learning_rate": 2.084648154249247e-08, "loss": 0.2916, "step": 47893 }, { "epoch": 4.869255795038634, "grad_norm": 0.25766393542289734, "learning_rate": 2.081412114954551e-08, "loss": 0.2834, "step": 47894 }, { "epoch": 4.869357462383083, "grad_norm": 0.26567715406417847, "learning_rate": 2.0781785840553458e-08, "loss": 0.3118, "step": 47895 }, { "epoch": 4.869459129727532, "grad_norm": 0.2779604494571686, "learning_rate": 2.0749475615679525e-08, "loss": 0.2786, "step": 47896 }, { "epoch": 4.869560797071981, "grad_norm": 0.26740145683288574, "learning_rate": 2.0717190475086358e-08, "loss": 0.2848, "step": 47897 }, { "epoch": 4.86966246441643, "grad_norm": 0.28165191411972046, "learning_rate": 2.0684930418937156e-08, "loss": 0.3468, "step": 47898 }, { "epoch": 4.869764131760879, "grad_norm": 0.28754085302352905, "learning_rate": 2.0652695447393457e-08, "loss": 0.291, "step": 47899 }, { "epoch": 4.869865799105328, "grad_norm": 0.2699577510356903, "learning_rate": 2.0620485560619018e-08, "loss": 0.3039, "step": 47900 }, { "epoch": 4.8699674664497765, "grad_norm": 0.28820130228996277, "learning_rate": 2.0588300758774826e-08, "loss": 0.2786, "step": 47901 }, { "epoch": 4.8700691337942255, "grad_norm": 0.26489508152008057, "learning_rate": 2.055614104202297e-08, "loss": 0.3184, "step": 47902 }, { "epoch": 4.870170801138674, "grad_norm": 0.290485143661499, "learning_rate": 2.0524006410526652e-08, "loss": 0.2926, "step": 47903 }, { "epoch": 4.870272468483123, "grad_norm": 0.3028770685195923, "learning_rate": 2.049189686444686e-08, "loss": 0.2716, "step": 47904 }, { "epoch": 4.870374135827572, "grad_norm": 0.2839764952659607, "learning_rate": 2.0459812403945677e-08, "loss": 0.2941, "step": 47905 }, { "epoch": 4.870475803172021, "grad_norm": 0.2940617799758911, "learning_rate": 2.042775302918465e-08, "loss": 0.3063, "step": 47906 }, { "epoch": 4.87057747051647, "grad_norm": 0.2951318621635437, "learning_rate": 2.039571874032531e-08, "loss": 0.2905, "step": 47907 }, { "epoch": 4.870679137860919, "grad_norm": 0.2624947428703308, "learning_rate": 2.0363709537528643e-08, "loss": 0.2889, "step": 47908 }, { "epoch": 4.870780805205368, "grad_norm": 0.27951258420944214, "learning_rate": 2.0331725420956182e-08, "loss": 0.3209, "step": 47909 }, { "epoch": 4.870882472549817, "grad_norm": 0.30310437083244324, "learning_rate": 2.029976639076947e-08, "loss": 0.2871, "step": 47910 }, { "epoch": 4.870984139894266, "grad_norm": 0.27456018328666687, "learning_rate": 2.0267832447129488e-08, "loss": 0.3019, "step": 47911 }, { "epoch": 4.871085807238715, "grad_norm": 0.27292823791503906, "learning_rate": 2.0235923590196105e-08, "loss": 0.3109, "step": 47912 }, { "epoch": 4.871187474583164, "grad_norm": 0.27379944920539856, "learning_rate": 2.0204039820131415e-08, "loss": 0.2931, "step": 47913 }, { "epoch": 4.871289141927613, "grad_norm": 0.2803105413913727, "learning_rate": 2.0172181137095292e-08, "loss": 0.3304, "step": 47914 }, { "epoch": 4.871390809272062, "grad_norm": 0.27736392617225647, "learning_rate": 2.0140347541248162e-08, "loss": 0.2742, "step": 47915 }, { "epoch": 4.871492476616511, "grad_norm": 0.26659512519836426, "learning_rate": 2.0108539032750453e-08, "loss": 0.2941, "step": 47916 }, { "epoch": 4.87159414396096, "grad_norm": 0.292176753282547, "learning_rate": 2.0076755611762588e-08, "loss": 0.2779, "step": 47917 }, { "epoch": 4.8716958113054085, "grad_norm": 0.28032806515693665, "learning_rate": 2.0044997278445e-08, "loss": 0.3084, "step": 47918 }, { "epoch": 4.8717974786498575, "grad_norm": 0.2980043292045593, "learning_rate": 2.0013264032956447e-08, "loss": 0.3011, "step": 47919 }, { "epoch": 4.871899145994306, "grad_norm": 0.2818085849285126, "learning_rate": 1.998155587545847e-08, "loss": 0.323, "step": 47920 }, { "epoch": 4.872000813338755, "grad_norm": 0.27521228790283203, "learning_rate": 1.994987280610927e-08, "loss": 0.281, "step": 47921 }, { "epoch": 4.872102480683204, "grad_norm": 0.30500712990760803, "learning_rate": 1.991821482506984e-08, "loss": 0.3037, "step": 47922 }, { "epoch": 4.872204148027653, "grad_norm": 0.2998248040676117, "learning_rate": 1.9886581932498373e-08, "loss": 0.3125, "step": 47923 }, { "epoch": 4.872305815372102, "grad_norm": 0.3031855523586273, "learning_rate": 1.9854974128555305e-08, "loss": 0.3168, "step": 47924 }, { "epoch": 4.872407482716551, "grad_norm": 0.2563530504703522, "learning_rate": 1.9823391413399397e-08, "loss": 0.3065, "step": 47925 }, { "epoch": 4.872509150061, "grad_norm": 0.28081396222114563, "learning_rate": 1.9791833787189407e-08, "loss": 0.3022, "step": 47926 }, { "epoch": 4.872610817405449, "grad_norm": 0.27760806679725647, "learning_rate": 1.9760301250085213e-08, "loss": 0.3024, "step": 47927 }, { "epoch": 4.872712484749899, "grad_norm": 0.2635863125324249, "learning_rate": 1.9728793802245016e-08, "loss": 0.303, "step": 47928 }, { "epoch": 4.872814152094348, "grad_norm": 0.2777175009250641, "learning_rate": 1.9697311443827583e-08, "loss": 0.2924, "step": 47929 }, { "epoch": 4.872915819438797, "grad_norm": 0.28549572825431824, "learning_rate": 1.9665854174991672e-08, "loss": 0.3132, "step": 47930 }, { "epoch": 4.873017486783246, "grad_norm": 0.2717916965484619, "learning_rate": 1.963442199589549e-08, "loss": 0.3196, "step": 47931 }, { "epoch": 4.873119154127695, "grad_norm": 0.2656417191028595, "learning_rate": 1.9603014906697248e-08, "loss": 0.2822, "step": 47932 }, { "epoch": 4.8732208214721435, "grad_norm": 0.2581624686717987, "learning_rate": 1.957163290755626e-08, "loss": 0.3001, "step": 47933 }, { "epoch": 4.8733224888165925, "grad_norm": 0.2784618139266968, "learning_rate": 1.9540275998629064e-08, "loss": 0.3385, "step": 47934 }, { "epoch": 4.873424156161041, "grad_norm": 0.2725256681442261, "learning_rate": 1.9508944180074984e-08, "loss": 0.3114, "step": 47935 }, { "epoch": 4.87352582350549, "grad_norm": 0.2872160077095032, "learning_rate": 1.9477637452051112e-08, "loss": 0.2794, "step": 47936 }, { "epoch": 4.873627490849939, "grad_norm": 0.27124375104904175, "learning_rate": 1.94463558147151e-08, "loss": 0.2777, "step": 47937 }, { "epoch": 4.873729158194388, "grad_norm": 0.259804368019104, "learning_rate": 1.9415099268225156e-08, "loss": 0.2807, "step": 47938 }, { "epoch": 4.873830825538837, "grad_norm": 0.2771489918231964, "learning_rate": 1.9383867812738377e-08, "loss": 0.3, "step": 47939 }, { "epoch": 4.873932492883286, "grad_norm": 0.2833336591720581, "learning_rate": 1.935266144841186e-08, "loss": 0.3111, "step": 47940 }, { "epoch": 4.874034160227735, "grad_norm": 0.2774963080883026, "learning_rate": 1.932148017540325e-08, "loss": 0.3208, "step": 47941 }, { "epoch": 4.874135827572184, "grad_norm": 0.2793697416782379, "learning_rate": 1.9290323993869096e-08, "loss": 0.2779, "step": 47942 }, { "epoch": 4.874237494916633, "grad_norm": 0.2834325432777405, "learning_rate": 1.925919290396705e-08, "loss": 0.2953, "step": 47943 }, { "epoch": 4.874339162261082, "grad_norm": 0.28775784373283386, "learning_rate": 1.9228086905853648e-08, "loss": 0.285, "step": 47944 }, { "epoch": 4.874440829605531, "grad_norm": 0.27553093433380127, "learning_rate": 1.9197005999685435e-08, "loss": 0.2648, "step": 47945 }, { "epoch": 4.87454249694998, "grad_norm": 0.2838609218597412, "learning_rate": 1.9165950185618954e-08, "loss": 0.2961, "step": 47946 }, { "epoch": 4.874644164294429, "grad_norm": 0.28311029076576233, "learning_rate": 1.913491946381074e-08, "loss": 0.3262, "step": 47947 }, { "epoch": 4.874745831638878, "grad_norm": 0.2699874937534332, "learning_rate": 1.9103913834417343e-08, "loss": 0.3154, "step": 47948 }, { "epoch": 4.874847498983327, "grad_norm": 0.27470436692237854, "learning_rate": 1.9072933297594742e-08, "loss": 0.2791, "step": 47949 }, { "epoch": 4.8749491663277755, "grad_norm": 0.28066977858543396, "learning_rate": 1.9041977853498927e-08, "loss": 0.2855, "step": 47950 }, { "epoch": 4.8750508336722245, "grad_norm": 0.2817292809486389, "learning_rate": 1.9011047502285885e-08, "loss": 0.2871, "step": 47951 }, { "epoch": 4.875152501016673, "grad_norm": 0.2694702744483948, "learning_rate": 1.8980142244111597e-08, "loss": 0.3039, "step": 47952 }, { "epoch": 4.875254168361122, "grad_norm": 0.28678491711616516, "learning_rate": 1.894926207913206e-08, "loss": 0.2986, "step": 47953 }, { "epoch": 4.875355835705571, "grad_norm": 0.3043757677078247, "learning_rate": 1.8918407007502136e-08, "loss": 0.286, "step": 47954 }, { "epoch": 4.87545750305002, "grad_norm": 0.29491743445396423, "learning_rate": 1.8887577029377823e-08, "loss": 0.303, "step": 47955 }, { "epoch": 4.875559170394469, "grad_norm": 0.2563500702381134, "learning_rate": 1.885677214491399e-08, "loss": 0.2735, "step": 47956 }, { "epoch": 4.875660837738918, "grad_norm": 0.28351351618766785, "learning_rate": 1.8825992354266077e-08, "loss": 0.3168, "step": 47957 }, { "epoch": 4.875762505083367, "grad_norm": 0.2822917699813843, "learning_rate": 1.879523765758895e-08, "loss": 0.2535, "step": 47958 }, { "epoch": 4.875864172427816, "grad_norm": 0.285003662109375, "learning_rate": 1.876450805503749e-08, "loss": 0.2971, "step": 47959 }, { "epoch": 4.875965839772265, "grad_norm": 0.33060139417648315, "learning_rate": 1.8733803546767683e-08, "loss": 0.2687, "step": 47960 }, { "epoch": 4.876067507116714, "grad_norm": 0.2676697373390198, "learning_rate": 1.870312413293218e-08, "loss": 0.2971, "step": 47961 }, { "epoch": 4.876169174461163, "grad_norm": 0.28479519486427307, "learning_rate": 1.8672469813687534e-08, "loss": 0.2547, "step": 47962 }, { "epoch": 4.876270841805612, "grad_norm": 0.2916211187839508, "learning_rate": 1.8641840589186388e-08, "loss": 0.2875, "step": 47963 }, { "epoch": 4.876372509150061, "grad_norm": 0.2832738757133484, "learning_rate": 1.861123645958418e-08, "loss": 0.3033, "step": 47964 }, { "epoch": 4.87647417649451, "grad_norm": 0.27821993827819824, "learning_rate": 1.8580657425035233e-08, "loss": 0.3219, "step": 47965 }, { "epoch": 4.876575843838959, "grad_norm": 0.26765409111976624, "learning_rate": 1.855010348569275e-08, "loss": 0.2775, "step": 47966 }, { "epoch": 4.8766775111834075, "grad_norm": 0.2992294132709503, "learning_rate": 1.851957464171106e-08, "loss": 0.2805, "step": 47967 }, { "epoch": 4.8767791785278565, "grad_norm": 0.2847028374671936, "learning_rate": 1.8489070893243922e-08, "loss": 0.2947, "step": 47968 }, { "epoch": 4.876880845872305, "grad_norm": 0.2810595631599426, "learning_rate": 1.8458592240445104e-08, "loss": 0.2996, "step": 47969 }, { "epoch": 4.876982513216754, "grad_norm": 0.2957996428012848, "learning_rate": 1.8428138683468376e-08, "loss": 0.3022, "step": 47970 }, { "epoch": 4.877084180561203, "grad_norm": 0.2675076723098755, "learning_rate": 1.8397710222466947e-08, "loss": 0.2854, "step": 47971 }, { "epoch": 4.877185847905652, "grad_norm": 0.28209397196769714, "learning_rate": 1.836730685759347e-08, "loss": 0.2915, "step": 47972 }, { "epoch": 4.877287515250101, "grad_norm": 0.2708549201488495, "learning_rate": 1.8336928589002268e-08, "loss": 0.2824, "step": 47973 }, { "epoch": 4.877389182594551, "grad_norm": 0.2740764915943146, "learning_rate": 1.830657541684544e-08, "loss": 0.2911, "step": 47974 }, { "epoch": 4.877490849939, "grad_norm": 0.2821207344532013, "learning_rate": 1.82762473412762e-08, "loss": 0.3124, "step": 47975 }, { "epoch": 4.877592517283449, "grad_norm": 0.28533872961997986, "learning_rate": 1.8245944362447753e-08, "loss": 0.2697, "step": 47976 }, { "epoch": 4.877694184627898, "grad_norm": 0.2890758514404297, "learning_rate": 1.8215666480512207e-08, "loss": 0.3078, "step": 47977 }, { "epoch": 4.877795851972347, "grad_norm": 0.29937195777893066, "learning_rate": 1.818541369562221e-08, "loss": 0.2812, "step": 47978 }, { "epoch": 4.877897519316796, "grad_norm": 0.27150604128837585, "learning_rate": 1.8155186007930425e-08, "loss": 0.3039, "step": 47979 }, { "epoch": 4.877999186661245, "grad_norm": 0.28069043159484863, "learning_rate": 1.8124983417588394e-08, "loss": 0.3016, "step": 47980 }, { "epoch": 4.878100854005694, "grad_norm": 0.2579115927219391, "learning_rate": 1.8094805924749327e-08, "loss": 0.3276, "step": 47981 }, { "epoch": 4.8782025213501425, "grad_norm": 0.2954699695110321, "learning_rate": 1.806465352956477e-08, "loss": 0.3259, "step": 47982 }, { "epoch": 4.8783041886945915, "grad_norm": 0.2766764163970947, "learning_rate": 1.8034526232186266e-08, "loss": 0.295, "step": 47983 }, { "epoch": 4.87840585603904, "grad_norm": 0.25909423828125, "learning_rate": 1.800442403276592e-08, "loss": 0.2978, "step": 47984 }, { "epoch": 4.878507523383489, "grad_norm": 0.27270713448524475, "learning_rate": 1.797434693145583e-08, "loss": 0.2904, "step": 47985 }, { "epoch": 4.878609190727938, "grad_norm": 0.27319908142089844, "learning_rate": 1.7944294928406992e-08, "loss": 0.3007, "step": 47986 }, { "epoch": 4.878710858072387, "grad_norm": 0.30993789434432983, "learning_rate": 1.791426802377039e-08, "loss": 0.2937, "step": 47987 }, { "epoch": 4.878812525416836, "grad_norm": 0.2779786288738251, "learning_rate": 1.788426621769812e-08, "loss": 0.2826, "step": 47988 }, { "epoch": 4.878914192761285, "grad_norm": 0.2923443019390106, "learning_rate": 1.7854289510341184e-08, "loss": 0.2714, "step": 47989 }, { "epoch": 4.879015860105734, "grad_norm": 0.27305710315704346, "learning_rate": 1.7824337901850007e-08, "loss": 0.3137, "step": 47990 }, { "epoch": 4.879117527450183, "grad_norm": 0.2973625361919403, "learning_rate": 1.7794411392376143e-08, "loss": 0.3312, "step": 47991 }, { "epoch": 4.879219194794632, "grad_norm": 0.277587890625, "learning_rate": 1.7764509982070023e-08, "loss": 0.3416, "step": 47992 }, { "epoch": 4.879320862139081, "grad_norm": 0.27119559049606323, "learning_rate": 1.773463367108208e-08, "loss": 0.3373, "step": 47993 }, { "epoch": 4.87942252948353, "grad_norm": 0.2705070972442627, "learning_rate": 1.770478245956331e-08, "loss": 0.3318, "step": 47994 }, { "epoch": 4.879524196827979, "grad_norm": 0.290892630815506, "learning_rate": 1.767495634766414e-08, "loss": 0.2887, "step": 47995 }, { "epoch": 4.879625864172428, "grad_norm": 0.31910648941993713, "learning_rate": 1.7645155335533904e-08, "loss": 0.3154, "step": 47996 }, { "epoch": 4.879727531516877, "grad_norm": 0.30866193771362305, "learning_rate": 1.7615379423324142e-08, "loss": 0.3176, "step": 47997 }, { "epoch": 4.879829198861326, "grad_norm": 0.2752358317375183, "learning_rate": 1.7585628611183624e-08, "loss": 0.2723, "step": 47998 }, { "epoch": 4.8799308662057745, "grad_norm": 0.27109161019325256, "learning_rate": 1.755590289926279e-08, "loss": 0.2993, "step": 47999 }, { "epoch": 4.8800325335502235, "grad_norm": 0.2783670425415039, "learning_rate": 1.7526202287711515e-08, "loss": 0.3029, "step": 48000 }, { "epoch": 4.880134200894672, "grad_norm": 0.30796152353286743, "learning_rate": 1.7496526776678568e-08, "loss": 0.3099, "step": 48001 }, { "epoch": 4.880235868239121, "grad_norm": 0.297265887260437, "learning_rate": 1.7466876366314943e-08, "loss": 0.2951, "step": 48002 }, { "epoch": 4.88033753558357, "grad_norm": 0.29249390959739685, "learning_rate": 1.74372510567683e-08, "loss": 0.2971, "step": 48003 }, { "epoch": 4.880439202928019, "grad_norm": 0.2886521816253662, "learning_rate": 1.740765084818907e-08, "loss": 0.3023, "step": 48004 }, { "epoch": 4.880540870272468, "grad_norm": 0.2633398771286011, "learning_rate": 1.7378075740726586e-08, "loss": 0.3231, "step": 48005 }, { "epoch": 4.880642537616917, "grad_norm": 0.2890297770500183, "learning_rate": 1.7348525734529055e-08, "loss": 0.328, "step": 48006 }, { "epoch": 4.880744204961366, "grad_norm": 0.27329403162002563, "learning_rate": 1.7319000829745247e-08, "loss": 0.2989, "step": 48007 }, { "epoch": 4.880845872305815, "grad_norm": 0.2653973698616028, "learning_rate": 1.728950102652449e-08, "loss": 0.296, "step": 48008 }, { "epoch": 4.880947539650264, "grad_norm": 0.2629460394382477, "learning_rate": 1.7260026325015e-08, "loss": 0.2995, "step": 48009 }, { "epoch": 4.881049206994714, "grad_norm": 0.2700587511062622, "learning_rate": 1.7230576725365543e-08, "loss": 0.2986, "step": 48010 }, { "epoch": 4.881150874339163, "grad_norm": 0.2687920928001404, "learning_rate": 1.7201152227724336e-08, "loss": 0.332, "step": 48011 }, { "epoch": 4.881252541683612, "grad_norm": 0.2797166109085083, "learning_rate": 1.7171752832239596e-08, "loss": 0.2815, "step": 48012 }, { "epoch": 4.881354209028061, "grad_norm": 0.27017465233802795, "learning_rate": 1.7142378539059533e-08, "loss": 0.3189, "step": 48013 }, { "epoch": 4.8814558763725096, "grad_norm": 0.2813417911529541, "learning_rate": 1.7113029348331813e-08, "loss": 0.3161, "step": 48014 }, { "epoch": 4.8815575437169585, "grad_norm": 0.28857576847076416, "learning_rate": 1.708370526020464e-08, "loss": 0.3048, "step": 48015 }, { "epoch": 4.881659211061407, "grad_norm": 0.2650745213031769, "learning_rate": 1.705440627482624e-08, "loss": 0.308, "step": 48016 }, { "epoch": 4.881760878405856, "grad_norm": 0.2701336741447449, "learning_rate": 1.7025132392343157e-08, "loss": 0.3348, "step": 48017 }, { "epoch": 4.881862545750305, "grad_norm": 0.27559220790863037, "learning_rate": 1.6995883612903053e-08, "loss": 0.2851, "step": 48018 }, { "epoch": 4.881964213094754, "grad_norm": 0.2704579532146454, "learning_rate": 1.6966659936653585e-08, "loss": 0.3152, "step": 48019 }, { "epoch": 4.882065880439203, "grad_norm": 0.309405118227005, "learning_rate": 1.6937461363742413e-08, "loss": 0.2924, "step": 48020 }, { "epoch": 4.882167547783652, "grad_norm": 0.28020787239074707, "learning_rate": 1.6908287894315534e-08, "loss": 0.3008, "step": 48021 }, { "epoch": 4.882269215128101, "grad_norm": 0.2828964293003082, "learning_rate": 1.687913952852116e-08, "loss": 0.3199, "step": 48022 }, { "epoch": 4.88237088247255, "grad_norm": 0.2737457752227783, "learning_rate": 1.6850016266505285e-08, "loss": 0.3093, "step": 48023 }, { "epoch": 4.882472549816999, "grad_norm": 0.2918097972869873, "learning_rate": 1.6820918108414463e-08, "loss": 0.2924, "step": 48024 }, { "epoch": 4.882574217161448, "grad_norm": 0.2822606861591339, "learning_rate": 1.6791845054395793e-08, "loss": 0.3006, "step": 48025 }, { "epoch": 4.882675884505897, "grad_norm": 0.281773179769516, "learning_rate": 1.6762797104595828e-08, "loss": 0.2867, "step": 48026 }, { "epoch": 4.882777551850346, "grad_norm": 0.2994450032711029, "learning_rate": 1.6733774259161116e-08, "loss": 0.3078, "step": 48027 }, { "epoch": 4.882879219194795, "grad_norm": 0.23929449915885925, "learning_rate": 1.6704776518236543e-08, "loss": 0.2596, "step": 48028 }, { "epoch": 4.882980886539244, "grad_norm": 0.2814198434352875, "learning_rate": 1.6675803881969764e-08, "loss": 0.326, "step": 48029 }, { "epoch": 4.883082553883693, "grad_norm": 0.2912919819355011, "learning_rate": 1.664685635050567e-08, "loss": 0.2759, "step": 48030 }, { "epoch": 4.8831842212281416, "grad_norm": 0.2816600203514099, "learning_rate": 1.6617933923990803e-08, "loss": 0.2811, "step": 48031 }, { "epoch": 4.8832858885725905, "grad_norm": 0.25938525795936584, "learning_rate": 1.6589036602570052e-08, "loss": 0.3201, "step": 48032 }, { "epoch": 4.883387555917039, "grad_norm": 0.26063093543052673, "learning_rate": 1.6560164386389964e-08, "loss": 0.2946, "step": 48033 }, { "epoch": 4.883489223261488, "grad_norm": 0.27268579602241516, "learning_rate": 1.6531317275595427e-08, "loss": 0.2991, "step": 48034 }, { "epoch": 4.883590890605937, "grad_norm": 0.2783551514148712, "learning_rate": 1.6502495270331875e-08, "loss": 0.2688, "step": 48035 }, { "epoch": 4.883692557950386, "grad_norm": 0.28312718868255615, "learning_rate": 1.647369837074475e-08, "loss": 0.2878, "step": 48036 }, { "epoch": 4.883794225294835, "grad_norm": 0.2927648723125458, "learning_rate": 1.644492657697838e-08, "loss": 0.2885, "step": 48037 }, { "epoch": 4.883895892639284, "grad_norm": 0.290835440158844, "learning_rate": 1.6416179889178207e-08, "loss": 0.2853, "step": 48038 }, { "epoch": 4.883997559983733, "grad_norm": 0.25985610485076904, "learning_rate": 1.6387458307489113e-08, "loss": 0.2809, "step": 48039 }, { "epoch": 4.884099227328182, "grad_norm": 0.260635107755661, "learning_rate": 1.635876183205598e-08, "loss": 0.3183, "step": 48040 }, { "epoch": 4.884200894672631, "grad_norm": 0.2712297737598419, "learning_rate": 1.633009046302314e-08, "loss": 0.2663, "step": 48041 }, { "epoch": 4.88430256201708, "grad_norm": 0.27263808250427246, "learning_rate": 1.6301444200534922e-08, "loss": 0.2661, "step": 48042 }, { "epoch": 4.884404229361529, "grad_norm": 0.29198840260505676, "learning_rate": 1.6272823044735654e-08, "loss": 0.3355, "step": 48043 }, { "epoch": 4.884505896705978, "grad_norm": 0.2764628529548645, "learning_rate": 1.624422699576911e-08, "loss": 0.2942, "step": 48044 }, { "epoch": 4.884607564050427, "grad_norm": 0.2736055254936218, "learning_rate": 1.621565605378017e-08, "loss": 0.3067, "step": 48045 }, { "epoch": 4.884709231394876, "grad_norm": 0.2850707769393921, "learning_rate": 1.6187110218912615e-08, "loss": 0.3059, "step": 48046 }, { "epoch": 4.884810898739325, "grad_norm": 0.2695533037185669, "learning_rate": 1.615858949131022e-08, "loss": 0.2934, "step": 48047 }, { "epoch": 4.8849125660837736, "grad_norm": 0.26222649216651917, "learning_rate": 1.613009387111619e-08, "loss": 0.3283, "step": 48048 }, { "epoch": 4.8850142334282225, "grad_norm": 0.26192525029182434, "learning_rate": 1.6101623358474316e-08, "loss": 0.299, "step": 48049 }, { "epoch": 4.885115900772671, "grad_norm": 0.2893802225589752, "learning_rate": 1.6073177953528363e-08, "loss": 0.3116, "step": 48050 }, { "epoch": 4.88521756811712, "grad_norm": 0.29055896401405334, "learning_rate": 1.6044757656420995e-08, "loss": 0.2977, "step": 48051 }, { "epoch": 4.885319235461569, "grad_norm": 0.2666487395763397, "learning_rate": 1.6016362467295986e-08, "loss": 0.2991, "step": 48052 }, { "epoch": 4.885420902806018, "grad_norm": 0.2759370505809784, "learning_rate": 1.5987992386296557e-08, "loss": 0.2932, "step": 48053 }, { "epoch": 4.885522570150467, "grad_norm": 0.2868640720844269, "learning_rate": 1.5959647413564815e-08, "loss": 0.295, "step": 48054 }, { "epoch": 4.885624237494916, "grad_norm": 0.2947438359260559, "learning_rate": 1.593132754924398e-08, "loss": 0.3066, "step": 48055 }, { "epoch": 4.885725904839366, "grad_norm": 0.29822227358818054, "learning_rate": 1.5903032793476712e-08, "loss": 0.2551, "step": 48056 }, { "epoch": 4.885827572183815, "grad_norm": 0.28888633847236633, "learning_rate": 1.587476314640568e-08, "loss": 0.3164, "step": 48057 }, { "epoch": 4.885929239528264, "grad_norm": 0.28692886233329773, "learning_rate": 1.584651860817299e-08, "loss": 0.2761, "step": 48058 }, { "epoch": 4.886030906872713, "grad_norm": 0.26657018065452576, "learning_rate": 1.5818299178921303e-08, "loss": 0.3003, "step": 48059 }, { "epoch": 4.886132574217162, "grad_norm": 0.28763383626937866, "learning_rate": 1.579010485879273e-08, "loss": 0.2787, "step": 48060 }, { "epoch": 4.886234241561611, "grad_norm": 0.2943444848060608, "learning_rate": 1.576193564792883e-08, "loss": 0.3071, "step": 48061 }, { "epoch": 4.88633590890606, "grad_norm": 0.2921387553215027, "learning_rate": 1.5733791546472256e-08, "loss": 0.284, "step": 48062 }, { "epoch": 4.886437576250509, "grad_norm": 0.26104027032852173, "learning_rate": 1.5705672554564567e-08, "loss": 0.2841, "step": 48063 }, { "epoch": 4.8865392435949575, "grad_norm": 0.2968258559703827, "learning_rate": 1.567757867234676e-08, "loss": 0.3009, "step": 48064 }, { "epoch": 4.8866409109394064, "grad_norm": 0.29218173027038574, "learning_rate": 1.5649509899960946e-08, "loss": 0.3035, "step": 48065 }, { "epoch": 4.886742578283855, "grad_norm": 0.283774733543396, "learning_rate": 1.562146623754812e-08, "loss": 0.3041, "step": 48066 }, { "epoch": 4.886844245628304, "grad_norm": 0.2641793489456177, "learning_rate": 1.5593447685250395e-08, "loss": 0.3082, "step": 48067 }, { "epoch": 4.886945912972753, "grad_norm": 0.288565456867218, "learning_rate": 1.5565454243208213e-08, "loss": 0.295, "step": 48068 }, { "epoch": 4.887047580317202, "grad_norm": 0.2695411741733551, "learning_rate": 1.553748591156312e-08, "loss": 0.3005, "step": 48069 }, { "epoch": 4.887149247661651, "grad_norm": 0.2642471492290497, "learning_rate": 1.5509542690455013e-08, "loss": 0.288, "step": 48070 }, { "epoch": 4.8872509150061, "grad_norm": 0.2933686077594757, "learning_rate": 1.5481624580026e-08, "loss": 0.2948, "step": 48071 }, { "epoch": 4.887352582350549, "grad_norm": 0.2691585123538971, "learning_rate": 1.5453731580415964e-08, "loss": 0.3333, "step": 48072 }, { "epoch": 4.887454249694998, "grad_norm": 0.2924039363861084, "learning_rate": 1.5425863691764798e-08, "loss": 0.2832, "step": 48073 }, { "epoch": 4.887555917039447, "grad_norm": 0.25699710845947266, "learning_rate": 1.539802091421405e-08, "loss": 0.2638, "step": 48074 }, { "epoch": 4.887657584383896, "grad_norm": 0.27358749508857727, "learning_rate": 1.5370203247903614e-08, "loss": 0.3211, "step": 48075 }, { "epoch": 4.887759251728345, "grad_norm": 0.273773193359375, "learning_rate": 1.534241069297393e-08, "loss": 0.2986, "step": 48076 }, { "epoch": 4.887860919072794, "grad_norm": 0.27997180819511414, "learning_rate": 1.5314643249564332e-08, "loss": 0.2841, "step": 48077 }, { "epoch": 4.887962586417243, "grad_norm": 0.27984288334846497, "learning_rate": 1.5286900917814707e-08, "loss": 0.2944, "step": 48078 }, { "epoch": 4.888064253761692, "grad_norm": 0.2676440477371216, "learning_rate": 1.52591836978655e-08, "loss": 0.3047, "step": 48079 }, { "epoch": 4.888165921106141, "grad_norm": 0.27097591757774353, "learning_rate": 1.523149158985604e-08, "loss": 0.2933, "step": 48080 }, { "epoch": 4.8882675884505895, "grad_norm": 0.27722102403640747, "learning_rate": 1.5203824593925108e-08, "loss": 0.3165, "step": 48081 }, { "epoch": 4.8883692557950384, "grad_norm": 0.2778403162956238, "learning_rate": 1.5176182710213705e-08, "loss": 0.2948, "step": 48082 }, { "epoch": 4.888470923139487, "grad_norm": 0.2760280966758728, "learning_rate": 1.5148565938859494e-08, "loss": 0.3012, "step": 48083 }, { "epoch": 4.888572590483936, "grad_norm": 0.2806040048599243, "learning_rate": 1.5120974280002366e-08, "loss": 0.2866, "step": 48084 }, { "epoch": 4.888674257828385, "grad_norm": 0.2925177216529846, "learning_rate": 1.5093407733781094e-08, "loss": 0.2972, "step": 48085 }, { "epoch": 4.888775925172834, "grad_norm": 0.2850065231323242, "learning_rate": 1.506586630033502e-08, "loss": 0.2665, "step": 48086 }, { "epoch": 4.888877592517283, "grad_norm": 0.28098273277282715, "learning_rate": 1.5038349979801802e-08, "loss": 0.2792, "step": 48087 }, { "epoch": 4.888979259861732, "grad_norm": 0.24525117874145508, "learning_rate": 1.5010858772321335e-08, "loss": 0.2973, "step": 48088 }, { "epoch": 4.889080927206181, "grad_norm": 0.2797165513038635, "learning_rate": 1.4983392678031284e-08, "loss": 0.3147, "step": 48089 }, { "epoch": 4.88918259455063, "grad_norm": 0.25561919808387756, "learning_rate": 1.4955951697070425e-08, "loss": 0.2528, "step": 48090 }, { "epoch": 4.889284261895079, "grad_norm": 0.2598741352558136, "learning_rate": 1.4928535829576983e-08, "loss": 0.2837, "step": 48091 }, { "epoch": 4.889385929239529, "grad_norm": 0.2751241624355316, "learning_rate": 1.4901145075688628e-08, "loss": 0.2922, "step": 48092 }, { "epoch": 4.889487596583978, "grad_norm": 0.28152838349342346, "learning_rate": 1.4873779435543578e-08, "loss": 0.2894, "step": 48093 }, { "epoch": 4.889589263928427, "grad_norm": 0.3063008487224579, "learning_rate": 1.4846438909280059e-08, "loss": 0.2884, "step": 48094 }, { "epoch": 4.889690931272876, "grad_norm": 0.2707583010196686, "learning_rate": 1.4819123497035737e-08, "loss": 0.309, "step": 48095 }, { "epoch": 4.8897925986173245, "grad_norm": 0.26080965995788574, "learning_rate": 1.4791833198947724e-08, "loss": 0.2967, "step": 48096 }, { "epoch": 4.8898942659617735, "grad_norm": 0.2774949371814728, "learning_rate": 1.476456801515369e-08, "loss": 0.3064, "step": 48097 }, { "epoch": 4.889995933306222, "grad_norm": 0.2524910867214203, "learning_rate": 1.4737327945791303e-08, "loss": 0.2731, "step": 48098 }, { "epoch": 4.890097600650671, "grad_norm": 0.27970975637435913, "learning_rate": 1.4710112990997117e-08, "loss": 0.3135, "step": 48099 }, { "epoch": 4.89019926799512, "grad_norm": 0.2955027222633362, "learning_rate": 1.4682923150909355e-08, "loss": 0.2752, "step": 48100 }, { "epoch": 4.890300935339569, "grad_norm": 0.3038919270038605, "learning_rate": 1.4655758425664024e-08, "loss": 0.2794, "step": 48101 }, { "epoch": 4.890402602684018, "grad_norm": 0.2836330235004425, "learning_rate": 1.4628618815398233e-08, "loss": 0.3146, "step": 48102 }, { "epoch": 4.890504270028467, "grad_norm": 0.2744172513484955, "learning_rate": 1.4601504320249093e-08, "loss": 0.294, "step": 48103 }, { "epoch": 4.890605937372916, "grad_norm": 0.24931840598583221, "learning_rate": 1.4574414940352056e-08, "loss": 0.3094, "step": 48104 }, { "epoch": 4.890707604717365, "grad_norm": 0.2716965079307556, "learning_rate": 1.454735067584534e-08, "loss": 0.3061, "step": 48105 }, { "epoch": 4.890809272061814, "grad_norm": 0.2819250524044037, "learning_rate": 1.4520311526863839e-08, "loss": 0.3026, "step": 48106 }, { "epoch": 4.890910939406263, "grad_norm": 0.288911372423172, "learning_rate": 1.4493297493544112e-08, "loss": 0.2695, "step": 48107 }, { "epoch": 4.891012606750712, "grad_norm": 0.26434388756752014, "learning_rate": 1.4466308576022714e-08, "loss": 0.2944, "step": 48108 }, { "epoch": 4.891114274095161, "grad_norm": 0.2673410177230835, "learning_rate": 1.4439344774435649e-08, "loss": 0.3159, "step": 48109 }, { "epoch": 4.89121594143961, "grad_norm": 0.2896350920200348, "learning_rate": 1.4412406088917807e-08, "loss": 0.3283, "step": 48110 }, { "epoch": 4.891317608784059, "grad_norm": 0.27576133608818054, "learning_rate": 1.4385492519605748e-08, "loss": 0.2977, "step": 48111 }, { "epoch": 4.891419276128508, "grad_norm": 0.2801382541656494, "learning_rate": 1.4358604066634918e-08, "loss": 0.2535, "step": 48112 }, { "epoch": 4.8915209434729565, "grad_norm": 0.29560935497283936, "learning_rate": 1.4331740730140209e-08, "loss": 0.2834, "step": 48113 }, { "epoch": 4.8916226108174055, "grad_norm": 0.2648893892765045, "learning_rate": 1.4304902510257624e-08, "loss": 0.3292, "step": 48114 }, { "epoch": 4.891724278161854, "grad_norm": 0.2665802836418152, "learning_rate": 1.4278089407122608e-08, "loss": 0.3193, "step": 48115 }, { "epoch": 4.891825945506303, "grad_norm": 0.26590844988822937, "learning_rate": 1.42513014208695e-08, "loss": 0.2869, "step": 48116 }, { "epoch": 4.891927612850752, "grad_norm": 0.2793043553829193, "learning_rate": 1.4224538551633193e-08, "loss": 0.309, "step": 48117 }, { "epoch": 4.892029280195201, "grad_norm": 0.27646884322166443, "learning_rate": 1.4197800799549688e-08, "loss": 0.28, "step": 48118 }, { "epoch": 4.89213094753965, "grad_norm": 0.27037304639816284, "learning_rate": 1.4171088164752211e-08, "loss": 0.2697, "step": 48119 }, { "epoch": 4.892232614884099, "grad_norm": 0.29864779114723206, "learning_rate": 1.4144400647376212e-08, "loss": 0.3018, "step": 48120 }, { "epoch": 4.892334282228548, "grad_norm": 0.2678740322589874, "learning_rate": 1.4117738247555467e-08, "loss": 0.2787, "step": 48121 }, { "epoch": 4.892435949572997, "grad_norm": 0.29096218943595886, "learning_rate": 1.409110096542543e-08, "loss": 0.2783, "step": 48122 }, { "epoch": 4.892537616917446, "grad_norm": 0.28779327869415283, "learning_rate": 1.406448880111877e-08, "loss": 0.2911, "step": 48123 }, { "epoch": 4.892639284261895, "grad_norm": 0.26956504583358765, "learning_rate": 1.4037901754770934e-08, "loss": 0.305, "step": 48124 }, { "epoch": 4.892740951606344, "grad_norm": 0.2791682481765747, "learning_rate": 1.4011339826515147e-08, "loss": 0.2907, "step": 48125 }, { "epoch": 4.892842618950793, "grad_norm": 0.28835874795913696, "learning_rate": 1.3984803016485193e-08, "loss": 0.272, "step": 48126 }, { "epoch": 4.892944286295242, "grad_norm": 0.2756386399269104, "learning_rate": 1.395829132481541e-08, "loss": 0.2808, "step": 48127 }, { "epoch": 4.893045953639691, "grad_norm": 0.304252564907074, "learning_rate": 1.3931804751638466e-08, "loss": 0.3028, "step": 48128 }, { "epoch": 4.89314762098414, "grad_norm": 0.2774510085582733, "learning_rate": 1.3905343297088703e-08, "loss": 0.3176, "step": 48129 }, { "epoch": 4.8932492883285885, "grad_norm": 0.29156219959259033, "learning_rate": 1.3878906961298788e-08, "loss": 0.2673, "step": 48130 }, { "epoch": 4.8933509556730375, "grad_norm": 0.2806990146636963, "learning_rate": 1.385249574440195e-08, "loss": 0.2927, "step": 48131 }, { "epoch": 4.893452623017486, "grad_norm": 0.27945300936698914, "learning_rate": 1.3826109646530861e-08, "loss": 0.3112, "step": 48132 }, { "epoch": 4.893554290361935, "grad_norm": 0.2880391478538513, "learning_rate": 1.3799748667819301e-08, "loss": 0.3164, "step": 48133 }, { "epoch": 4.893655957706384, "grad_norm": 0.27034151554107666, "learning_rate": 1.3773412808399944e-08, "loss": 0.2939, "step": 48134 }, { "epoch": 4.893757625050833, "grad_norm": 0.28659212589263916, "learning_rate": 1.374710206840546e-08, "loss": 0.2948, "step": 48135 }, { "epoch": 4.893859292395282, "grad_norm": 0.2858733832836151, "learning_rate": 1.3720816447967412e-08, "loss": 0.3074, "step": 48136 }, { "epoch": 4.893960959739731, "grad_norm": 0.25553953647613525, "learning_rate": 1.3694555947219579e-08, "loss": 0.3263, "step": 48137 }, { "epoch": 4.894062627084181, "grad_norm": 0.2766777575016022, "learning_rate": 1.3668320566292969e-08, "loss": 0.3056, "step": 48138 }, { "epoch": 4.89416429442863, "grad_norm": 0.29248929023742676, "learning_rate": 1.364211030532081e-08, "loss": 0.3094, "step": 48139 }, { "epoch": 4.894265961773079, "grad_norm": 0.2600003480911255, "learning_rate": 1.3615925164435218e-08, "loss": 0.302, "step": 48140 }, { "epoch": 4.894367629117528, "grad_norm": 0.29370778799057007, "learning_rate": 1.3589765143767197e-08, "loss": 0.288, "step": 48141 }, { "epoch": 4.894469296461977, "grad_norm": 0.28591859340667725, "learning_rate": 1.3563630243448867e-08, "loss": 0.2637, "step": 48142 }, { "epoch": 4.894570963806426, "grad_norm": 0.2771235406398773, "learning_rate": 1.3537520463611787e-08, "loss": 0.288, "step": 48143 }, { "epoch": 4.894672631150875, "grad_norm": 0.2776564359664917, "learning_rate": 1.3511435804388073e-08, "loss": 0.2836, "step": 48144 }, { "epoch": 4.8947742984953235, "grad_norm": 0.2788374125957489, "learning_rate": 1.3485376265909288e-08, "loss": 0.2984, "step": 48145 }, { "epoch": 4.8948759658397725, "grad_norm": 0.27235350012779236, "learning_rate": 1.3459341848305329e-08, "loss": 0.3137, "step": 48146 }, { "epoch": 4.894977633184221, "grad_norm": 0.29487404227256775, "learning_rate": 1.3433332551708867e-08, "loss": 0.2887, "step": 48147 }, { "epoch": 4.89507930052867, "grad_norm": 0.2552856504917145, "learning_rate": 1.3407348376249796e-08, "loss": 0.2693, "step": 48148 }, { "epoch": 4.895180967873119, "grad_norm": 0.2737909257411957, "learning_rate": 1.3381389322059679e-08, "loss": 0.2887, "step": 48149 }, { "epoch": 4.895282635217568, "grad_norm": 0.2832711338996887, "learning_rate": 1.3355455389268967e-08, "loss": 0.2959, "step": 48150 }, { "epoch": 4.895384302562017, "grad_norm": 0.2972903847694397, "learning_rate": 1.3329546578008668e-08, "loss": 0.2761, "step": 48151 }, { "epoch": 4.895485969906466, "grad_norm": 0.26859599351882935, "learning_rate": 1.330366288840923e-08, "loss": 0.3116, "step": 48152 }, { "epoch": 4.895587637250915, "grad_norm": 0.2906685173511505, "learning_rate": 1.3277804320600551e-08, "loss": 0.2921, "step": 48153 }, { "epoch": 4.895689304595364, "grad_norm": 0.25593793392181396, "learning_rate": 1.3251970874713637e-08, "loss": 0.2988, "step": 48154 }, { "epoch": 4.895790971939813, "grad_norm": 0.2661439776420593, "learning_rate": 1.3226162550878385e-08, "loss": 0.2637, "step": 48155 }, { "epoch": 4.895892639284262, "grad_norm": 0.27902641892433167, "learning_rate": 1.3200379349224134e-08, "loss": 0.2985, "step": 48156 }, { "epoch": 4.895994306628711, "grad_norm": 0.30060192942619324, "learning_rate": 1.3174621269881893e-08, "loss": 0.2607, "step": 48157 }, { "epoch": 4.89609597397316, "grad_norm": 0.28596019744873047, "learning_rate": 1.3148888312980446e-08, "loss": 0.2566, "step": 48158 }, { "epoch": 4.896197641317609, "grad_norm": 0.2791321575641632, "learning_rate": 1.3123180478649688e-08, "loss": 0.2933, "step": 48159 }, { "epoch": 4.896299308662058, "grad_norm": 0.28270223736763, "learning_rate": 1.3097497767019518e-08, "loss": 0.2915, "step": 48160 }, { "epoch": 4.896400976006507, "grad_norm": 0.26355215907096863, "learning_rate": 1.3071840178219276e-08, "loss": 0.3301, "step": 48161 }, { "epoch": 4.8965026433509555, "grad_norm": 0.2640465497970581, "learning_rate": 1.3046207712377745e-08, "loss": 0.2757, "step": 48162 }, { "epoch": 4.8966043106954045, "grad_norm": 0.28168946504592896, "learning_rate": 1.3020600369624826e-08, "loss": 0.3133, "step": 48163 }, { "epoch": 4.896705978039853, "grad_norm": 0.2855049669742584, "learning_rate": 1.299501815008819e-08, "loss": 0.2963, "step": 48164 }, { "epoch": 4.896807645384302, "grad_norm": 0.2829863727092743, "learning_rate": 1.2969461053898291e-08, "loss": 0.3005, "step": 48165 }, { "epoch": 4.896909312728751, "grad_norm": 0.25343644618988037, "learning_rate": 1.2943929081182805e-08, "loss": 0.2873, "step": 48166 }, { "epoch": 4.8970109800732, "grad_norm": 0.2697526514530182, "learning_rate": 1.291842223207107e-08, "loss": 0.3076, "step": 48167 }, { "epoch": 4.897112647417649, "grad_norm": 0.29367706179618835, "learning_rate": 1.2892940506690765e-08, "loss": 0.3025, "step": 48168 }, { "epoch": 4.897214314762098, "grad_norm": 0.27094894647598267, "learning_rate": 1.2867483905170675e-08, "loss": 0.2986, "step": 48169 }, { "epoch": 4.897315982106547, "grad_norm": 0.2924364507198334, "learning_rate": 1.2842052427639584e-08, "loss": 0.3011, "step": 48170 }, { "epoch": 4.897417649450996, "grad_norm": 0.27025964856147766, "learning_rate": 1.2816646074224615e-08, "loss": 0.3141, "step": 48171 }, { "epoch": 4.897519316795445, "grad_norm": 0.2717454135417938, "learning_rate": 1.2791264845054551e-08, "loss": 0.2837, "step": 48172 }, { "epoch": 4.897620984139894, "grad_norm": 0.3093661367893219, "learning_rate": 1.2765908740256517e-08, "loss": 0.2861, "step": 48173 }, { "epoch": 4.897722651484344, "grad_norm": 0.26858583092689514, "learning_rate": 1.2740577759959294e-08, "loss": 0.2852, "step": 48174 }, { "epoch": 4.897824318828793, "grad_norm": 0.2824966311454773, "learning_rate": 1.271527190428945e-08, "loss": 0.3172, "step": 48175 }, { "epoch": 4.897925986173242, "grad_norm": 0.283529132604599, "learning_rate": 1.2689991173375215e-08, "loss": 0.3009, "step": 48176 }, { "epoch": 4.8980276535176905, "grad_norm": 0.27126049995422363, "learning_rate": 1.2664735567343156e-08, "loss": 0.3173, "step": 48177 }, { "epoch": 4.8981293208621395, "grad_norm": 0.275785356760025, "learning_rate": 1.2639505086321502e-08, "loss": 0.3109, "step": 48178 }, { "epoch": 4.898230988206588, "grad_norm": 0.2805580496788025, "learning_rate": 1.2614299730436818e-08, "loss": 0.2754, "step": 48179 }, { "epoch": 4.898332655551037, "grad_norm": 0.28224432468414307, "learning_rate": 1.2589119499815672e-08, "loss": 0.3064, "step": 48180 }, { "epoch": 4.898434322895486, "grad_norm": 0.2488519549369812, "learning_rate": 1.2563964394585737e-08, "loss": 0.3171, "step": 48181 }, { "epoch": 4.898535990239935, "grad_norm": 0.2828053832054138, "learning_rate": 1.2538834414873024e-08, "loss": 0.3038, "step": 48182 }, { "epoch": 4.898637657584384, "grad_norm": 0.2913973927497864, "learning_rate": 1.2513729560804656e-08, "loss": 0.3282, "step": 48183 }, { "epoch": 4.898739324928833, "grad_norm": 0.29341089725494385, "learning_rate": 1.2488649832506639e-08, "loss": 0.2743, "step": 48184 }, { "epoch": 4.898840992273282, "grad_norm": 0.2841832935810089, "learning_rate": 1.2463595230106095e-08, "loss": 0.295, "step": 48185 }, { "epoch": 4.898942659617731, "grad_norm": 0.2911830544471741, "learning_rate": 1.2438565753728481e-08, "loss": 0.2845, "step": 48186 }, { "epoch": 4.89904432696218, "grad_norm": 0.24869488179683685, "learning_rate": 1.2413561403500362e-08, "loss": 0.2822, "step": 48187 }, { "epoch": 4.899145994306629, "grad_norm": 0.2689109742641449, "learning_rate": 1.2388582179547194e-08, "loss": 0.3112, "step": 48188 }, { "epoch": 4.899247661651078, "grad_norm": 0.2801622450351715, "learning_rate": 1.2363628081994983e-08, "loss": 0.3119, "step": 48189 }, { "epoch": 4.899349328995527, "grad_norm": 0.2719947099685669, "learning_rate": 1.2338699110969742e-08, "loss": 0.3015, "step": 48190 }, { "epoch": 4.899450996339976, "grad_norm": 0.2949834167957306, "learning_rate": 1.2313795266596928e-08, "loss": 0.295, "step": 48191 }, { "epoch": 4.899552663684425, "grad_norm": 0.28805360198020935, "learning_rate": 1.2288916549001994e-08, "loss": 0.2939, "step": 48192 }, { "epoch": 4.899654331028874, "grad_norm": 0.26110175251960754, "learning_rate": 1.2264062958310397e-08, "loss": 0.3257, "step": 48193 }, { "epoch": 4.8997559983733225, "grad_norm": 0.3206535279750824, "learning_rate": 1.2239234494647035e-08, "loss": 0.2919, "step": 48194 }, { "epoch": 4.8998576657177715, "grad_norm": 0.2890027165412903, "learning_rate": 1.2214431158136807e-08, "loss": 0.2873, "step": 48195 }, { "epoch": 4.89995933306222, "grad_norm": 0.2770453691482544, "learning_rate": 1.2189652948905173e-08, "loss": 0.3135, "step": 48196 }, { "epoch": 4.900061000406669, "grad_norm": 0.2804366946220398, "learning_rate": 1.2164899867077028e-08, "loss": 0.2771, "step": 48197 }, { "epoch": 4.900162667751118, "grad_norm": 0.28840896487236023, "learning_rate": 1.2140171912776721e-08, "loss": 0.281, "step": 48198 }, { "epoch": 4.900264335095567, "grad_norm": 0.28098350763320923, "learning_rate": 1.2115469086129151e-08, "loss": 0.2862, "step": 48199 }, { "epoch": 4.900366002440016, "grad_norm": 0.2634621262550354, "learning_rate": 1.2090791387258105e-08, "loss": 0.2667, "step": 48200 }, { "epoch": 4.900467669784465, "grad_norm": 0.27360475063323975, "learning_rate": 1.2066138816288486e-08, "loss": 0.3168, "step": 48201 }, { "epoch": 4.900569337128914, "grad_norm": 0.2651852071285248, "learning_rate": 1.2041511373344638e-08, "loss": 0.3469, "step": 48202 }, { "epoch": 4.900671004473363, "grad_norm": 0.28238049149513245, "learning_rate": 1.2016909058549798e-08, "loss": 0.3081, "step": 48203 }, { "epoch": 4.900772671817812, "grad_norm": 0.2715640366077423, "learning_rate": 1.1992331872028862e-08, "loss": 0.2631, "step": 48204 }, { "epoch": 4.900874339162261, "grad_norm": 0.30336761474609375, "learning_rate": 1.1967779813905068e-08, "loss": 0.289, "step": 48205 }, { "epoch": 4.90097600650671, "grad_norm": 0.24470645189285278, "learning_rate": 1.1943252884302204e-08, "loss": 0.3192, "step": 48206 }, { "epoch": 4.901077673851159, "grad_norm": 0.29252928495407104, "learning_rate": 1.191875108334406e-08, "loss": 0.2944, "step": 48207 }, { "epoch": 4.901179341195608, "grad_norm": 0.2678048014640808, "learning_rate": 1.1894274411153872e-08, "loss": 0.2648, "step": 48208 }, { "epoch": 4.901281008540057, "grad_norm": 0.28265297412872314, "learning_rate": 1.1869822867854874e-08, "loss": 0.3145, "step": 48209 }, { "epoch": 4.901382675884506, "grad_norm": 0.2807254493236542, "learning_rate": 1.1845396453570301e-08, "loss": 0.2858, "step": 48210 }, { "epoch": 4.9014843432289545, "grad_norm": 0.278561532497406, "learning_rate": 1.1820995168423388e-08, "loss": 0.3056, "step": 48211 }, { "epoch": 4.9015860105734035, "grad_norm": 0.2862282693386078, "learning_rate": 1.1796619012536815e-08, "loss": 0.275, "step": 48212 }, { "epoch": 4.901687677917852, "grad_norm": 0.27565905451774597, "learning_rate": 1.177226798603326e-08, "loss": 0.2916, "step": 48213 }, { "epoch": 4.901789345262301, "grad_norm": 0.2970980405807495, "learning_rate": 1.1747942089036512e-08, "loss": 0.2829, "step": 48214 }, { "epoch": 4.90189101260675, "grad_norm": 0.2814435064792633, "learning_rate": 1.1723641321667589e-08, "loss": 0.302, "step": 48215 }, { "epoch": 4.901992679951199, "grad_norm": 0.2966720759868622, "learning_rate": 1.1699365684049169e-08, "loss": 0.2941, "step": 48216 }, { "epoch": 4.902094347295648, "grad_norm": 0.2724539637565613, "learning_rate": 1.1675115176304486e-08, "loss": 0.3135, "step": 48217 }, { "epoch": 4.902196014640097, "grad_norm": 0.2784518301486969, "learning_rate": 1.165088979855511e-08, "loss": 0.299, "step": 48218 }, { "epoch": 4.902297681984546, "grad_norm": 0.2865249812602997, "learning_rate": 1.1626689550923165e-08, "loss": 0.2842, "step": 48219 }, { "epoch": 4.902399349328996, "grad_norm": 0.31452155113220215, "learning_rate": 1.1602514433530222e-08, "loss": 0.2887, "step": 48220 }, { "epoch": 4.902501016673445, "grad_norm": 0.2853575348854065, "learning_rate": 1.1578364446498957e-08, "loss": 0.2892, "step": 48221 }, { "epoch": 4.902602684017894, "grad_norm": 0.2730138897895813, "learning_rate": 1.1554239589950388e-08, "loss": 0.3002, "step": 48222 }, { "epoch": 4.902704351362343, "grad_norm": 0.28652554750442505, "learning_rate": 1.153013986400553e-08, "loss": 0.2887, "step": 48223 }, { "epoch": 4.902806018706792, "grad_norm": 0.2730698883533478, "learning_rate": 1.1506065268787059e-08, "loss": 0.2897, "step": 48224 }, { "epoch": 4.902907686051241, "grad_norm": 0.27876535058021545, "learning_rate": 1.1482015804414881e-08, "loss": 0.3164, "step": 48225 }, { "epoch": 4.9030093533956896, "grad_norm": 0.2956145107746124, "learning_rate": 1.1457991471011121e-08, "loss": 0.3132, "step": 48226 }, { "epoch": 4.9031110207401385, "grad_norm": 0.29379960894584656, "learning_rate": 1.1433992268696792e-08, "loss": 0.2548, "step": 48227 }, { "epoch": 4.903212688084587, "grad_norm": 0.27818042039871216, "learning_rate": 1.1410018197592354e-08, "loss": 0.2909, "step": 48228 }, { "epoch": 4.903314355429036, "grad_norm": 0.2708068788051605, "learning_rate": 1.1386069257818822e-08, "loss": 0.3026, "step": 48229 }, { "epoch": 4.903416022773485, "grad_norm": 0.28669437766075134, "learning_rate": 1.1362145449497208e-08, "loss": 0.297, "step": 48230 }, { "epoch": 4.903517690117934, "grad_norm": 0.24517183005809784, "learning_rate": 1.1338246772746863e-08, "loss": 0.302, "step": 48231 }, { "epoch": 4.903619357462383, "grad_norm": 0.2551955580711365, "learning_rate": 1.1314373227689358e-08, "loss": 0.2925, "step": 48232 }, { "epoch": 4.903721024806832, "grad_norm": 0.28810954093933105, "learning_rate": 1.1290524814444593e-08, "loss": 0.2971, "step": 48233 }, { "epoch": 4.903822692151281, "grad_norm": 0.27653995156288147, "learning_rate": 1.1266701533132473e-08, "loss": 0.3002, "step": 48234 }, { "epoch": 4.90392435949573, "grad_norm": 0.28920596837997437, "learning_rate": 1.124290338387346e-08, "loss": 0.29, "step": 48235 }, { "epoch": 4.904026026840179, "grad_norm": 0.3031877279281616, "learning_rate": 1.1219130366786901e-08, "loss": 0.2747, "step": 48236 }, { "epoch": 4.904127694184628, "grad_norm": 0.2673581540584564, "learning_rate": 1.1195382481993256e-08, "loss": 0.3106, "step": 48237 }, { "epoch": 4.904229361529077, "grad_norm": 0.2810719907283783, "learning_rate": 1.1171659729611317e-08, "loss": 0.3032, "step": 48238 }, { "epoch": 4.904331028873526, "grad_norm": 0.28011003136634827, "learning_rate": 1.114796210976099e-08, "loss": 0.2933, "step": 48239 }, { "epoch": 4.904432696217975, "grad_norm": 0.2647770047187805, "learning_rate": 1.1124289622561623e-08, "loss": 0.3186, "step": 48240 }, { "epoch": 4.904534363562424, "grad_norm": 0.28296664357185364, "learning_rate": 1.1100642268133121e-08, "loss": 0.2764, "step": 48241 }, { "epoch": 4.904636030906873, "grad_norm": 0.2905051112174988, "learning_rate": 1.1077020046593723e-08, "loss": 0.2868, "step": 48242 }, { "epoch": 4.9047376982513216, "grad_norm": 0.2874302268028259, "learning_rate": 1.1053422958062776e-08, "loss": 0.2769, "step": 48243 }, { "epoch": 4.9048393655957705, "grad_norm": 0.29311737418174744, "learning_rate": 1.1029851002659075e-08, "loss": 0.3011, "step": 48244 }, { "epoch": 4.904941032940219, "grad_norm": 0.27627548575401306, "learning_rate": 1.1006304180501414e-08, "loss": 0.2828, "step": 48245 }, { "epoch": 4.905042700284668, "grad_norm": 0.30100980401039124, "learning_rate": 1.0982782491708588e-08, "loss": 0.3106, "step": 48246 }, { "epoch": 4.905144367629117, "grad_norm": 0.27225059270858765, "learning_rate": 1.0959285936398833e-08, "loss": 0.3217, "step": 48247 }, { "epoch": 4.905246034973566, "grad_norm": 0.28741973638534546, "learning_rate": 1.093581451469039e-08, "loss": 0.2443, "step": 48248 }, { "epoch": 4.905347702318015, "grad_norm": 0.28659340739250183, "learning_rate": 1.0912368226702052e-08, "loss": 0.2918, "step": 48249 }, { "epoch": 4.905449369662464, "grad_norm": 0.28348904848098755, "learning_rate": 1.0888947072551504e-08, "loss": 0.3031, "step": 48250 }, { "epoch": 4.905551037006913, "grad_norm": 0.2710193395614624, "learning_rate": 1.0865551052356427e-08, "loss": 0.2879, "step": 48251 }, { "epoch": 4.905652704351362, "grad_norm": 0.2748110592365265, "learning_rate": 1.0842180166235616e-08, "loss": 0.2893, "step": 48252 }, { "epoch": 4.905754371695811, "grad_norm": 0.31189703941345215, "learning_rate": 1.08188344143062e-08, "loss": 0.291, "step": 48253 }, { "epoch": 4.90585603904026, "grad_norm": 0.26791489124298096, "learning_rate": 1.0795513796685863e-08, "loss": 0.3133, "step": 48254 }, { "epoch": 4.905957706384709, "grad_norm": 0.2876088619232178, "learning_rate": 1.0772218313492289e-08, "loss": 0.3111, "step": 48255 }, { "epoch": 4.906059373729159, "grad_norm": 0.26463770866394043, "learning_rate": 1.0748947964842049e-08, "loss": 0.3163, "step": 48256 }, { "epoch": 4.906161041073608, "grad_norm": 0.2683623433113098, "learning_rate": 1.072570275085394e-08, "loss": 0.2745, "step": 48257 }, { "epoch": 4.906262708418057, "grad_norm": 0.27223220467567444, "learning_rate": 1.0702482671643422e-08, "loss": 0.2747, "step": 48258 }, { "epoch": 4.9063643757625055, "grad_norm": 0.25552722811698914, "learning_rate": 1.067928772732818e-08, "loss": 0.2935, "step": 48259 }, { "epoch": 4.9064660431069544, "grad_norm": 0.2780098021030426, "learning_rate": 1.0656117918025343e-08, "loss": 0.2883, "step": 48260 }, { "epoch": 4.906567710451403, "grad_norm": 0.3034610152244568, "learning_rate": 1.063297324385093e-08, "loss": 0.2668, "step": 48261 }, { "epoch": 4.906669377795852, "grad_norm": 0.28253334760665894, "learning_rate": 1.0609853704922623e-08, "loss": 0.33, "step": 48262 }, { "epoch": 4.906771045140301, "grad_norm": 0.32062727212905884, "learning_rate": 1.0586759301355332e-08, "loss": 0.2724, "step": 48263 }, { "epoch": 4.90687271248475, "grad_norm": 0.28309518098831177, "learning_rate": 1.0563690033266738e-08, "loss": 0.2975, "step": 48264 }, { "epoch": 4.906974379829199, "grad_norm": 0.3245396614074707, "learning_rate": 1.0540645900772861e-08, "loss": 0.2912, "step": 48265 }, { "epoch": 4.907076047173648, "grad_norm": 0.2975866496562958, "learning_rate": 1.0517626903989165e-08, "loss": 0.3096, "step": 48266 }, { "epoch": 4.907177714518097, "grad_norm": 0.272450715303421, "learning_rate": 1.0494633043032221e-08, "loss": 0.2735, "step": 48267 }, { "epoch": 4.907279381862546, "grad_norm": 0.28761136531829834, "learning_rate": 1.0471664318017494e-08, "loss": 0.2909, "step": 48268 }, { "epoch": 4.907381049206995, "grad_norm": 0.26159873604774475, "learning_rate": 1.0448720729060447e-08, "loss": 0.2953, "step": 48269 }, { "epoch": 4.907482716551444, "grad_norm": 0.272982120513916, "learning_rate": 1.0425802276277653e-08, "loss": 0.2818, "step": 48270 }, { "epoch": 4.907584383895893, "grad_norm": 0.28928884863853455, "learning_rate": 1.0402908959783464e-08, "loss": 0.2931, "step": 48271 }, { "epoch": 4.907686051240342, "grad_norm": 0.26498162746429443, "learning_rate": 1.0380040779693901e-08, "loss": 0.2945, "step": 48272 }, { "epoch": 4.907787718584791, "grad_norm": 0.27499234676361084, "learning_rate": 1.0357197736124425e-08, "loss": 0.2851, "step": 48273 }, { "epoch": 4.90788938592924, "grad_norm": 0.2736080586910248, "learning_rate": 1.033437982918939e-08, "loss": 0.305, "step": 48274 }, { "epoch": 4.907991053273689, "grad_norm": 0.27492329478263855, "learning_rate": 1.0311587059003702e-08, "loss": 0.2888, "step": 48275 }, { "epoch": 4.9080927206181375, "grad_norm": 0.26748454570770264, "learning_rate": 1.0288819425682273e-08, "loss": 0.2798, "step": 48276 }, { "epoch": 4.9081943879625864, "grad_norm": 0.27863308787345886, "learning_rate": 1.0266076929341118e-08, "loss": 0.2702, "step": 48277 }, { "epoch": 4.908296055307035, "grad_norm": 0.2743878662586212, "learning_rate": 1.0243359570092926e-08, "loss": 0.2895, "step": 48278 }, { "epoch": 4.908397722651484, "grad_norm": 0.30090755224227905, "learning_rate": 1.022066734805316e-08, "loss": 0.3105, "step": 48279 }, { "epoch": 4.908499389995933, "grad_norm": 0.2651403546333313, "learning_rate": 1.0198000263335616e-08, "loss": 0.3168, "step": 48280 }, { "epoch": 4.908601057340382, "grad_norm": 0.2944837808609009, "learning_rate": 1.0175358316055205e-08, "loss": 0.3067, "step": 48281 }, { "epoch": 4.908702724684831, "grad_norm": 0.2827069163322449, "learning_rate": 1.015274150632517e-08, "loss": 0.3054, "step": 48282 }, { "epoch": 4.90880439202928, "grad_norm": 0.2758779227733612, "learning_rate": 1.0130149834260417e-08, "loss": 0.2753, "step": 48283 }, { "epoch": 4.908906059373729, "grad_norm": 0.2831861078739166, "learning_rate": 1.0107583299973634e-08, "loss": 0.2678, "step": 48284 }, { "epoch": 4.909007726718178, "grad_norm": 0.28371191024780273, "learning_rate": 1.0085041903579173e-08, "loss": 0.3091, "step": 48285 }, { "epoch": 4.909109394062627, "grad_norm": 0.2752760350704193, "learning_rate": 1.006252564519028e-08, "loss": 0.2989, "step": 48286 }, { "epoch": 4.909211061407076, "grad_norm": 0.2685597538948059, "learning_rate": 1.0040034524921304e-08, "loss": 0.2759, "step": 48287 }, { "epoch": 4.909312728751525, "grad_norm": 0.2853211760520935, "learning_rate": 1.0017568542884382e-08, "loss": 0.2989, "step": 48288 }, { "epoch": 4.909414396095974, "grad_norm": 0.2905358672142029, "learning_rate": 9.995127699193308e-09, "loss": 0.3259, "step": 48289 }, { "epoch": 4.909516063440423, "grad_norm": 0.27658289670944214, "learning_rate": 9.972711993960771e-09, "loss": 0.2996, "step": 48290 }, { "epoch": 4.909617730784872, "grad_norm": 0.27579811215400696, "learning_rate": 9.950321427300014e-09, "loss": 0.3072, "step": 48291 }, { "epoch": 4.909719398129321, "grad_norm": 0.2792149782180786, "learning_rate": 9.927955999323724e-09, "loss": 0.2723, "step": 48292 }, { "epoch": 4.9098210654737695, "grad_norm": 0.28206491470336914, "learning_rate": 9.905615710144589e-09, "loss": 0.2885, "step": 48293 }, { "epoch": 4.9099227328182184, "grad_norm": 0.287274032831192, "learning_rate": 9.883300559875298e-09, "loss": 0.3042, "step": 48294 }, { "epoch": 4.910024400162667, "grad_norm": 0.2817619740962982, "learning_rate": 9.861010548627425e-09, "loss": 0.283, "step": 48295 }, { "epoch": 4.910126067507116, "grad_norm": 0.28395190834999084, "learning_rate": 9.838745676514772e-09, "loss": 0.3061, "step": 48296 }, { "epoch": 4.910227734851565, "grad_norm": 0.2767931818962097, "learning_rate": 9.816505943648358e-09, "loss": 0.2897, "step": 48297 }, { "epoch": 4.910329402196014, "grad_norm": 0.2718091309070587, "learning_rate": 9.794291350140317e-09, "loss": 0.2934, "step": 48298 }, { "epoch": 4.910431069540463, "grad_norm": 0.2723800539970398, "learning_rate": 9.77210189610278e-09, "loss": 0.3002, "step": 48299 }, { "epoch": 4.910532736884912, "grad_norm": 0.25861066579818726, "learning_rate": 9.749937581647884e-09, "loss": 0.3164, "step": 48300 }, { "epoch": 4.910634404229361, "grad_norm": 0.29384636878967285, "learning_rate": 9.727798406886646e-09, "loss": 0.3038, "step": 48301 }, { "epoch": 4.910736071573811, "grad_norm": 0.2690267860889435, "learning_rate": 9.7056843719312e-09, "loss": 0.3131, "step": 48302 }, { "epoch": 4.91083773891826, "grad_norm": 0.27332961559295654, "learning_rate": 9.683595476892572e-09, "loss": 0.3032, "step": 48303 }, { "epoch": 4.910939406262709, "grad_norm": 0.28789159655570984, "learning_rate": 9.661531721881778e-09, "loss": 0.2721, "step": 48304 }, { "epoch": 4.911041073607158, "grad_norm": 0.2651635706424713, "learning_rate": 9.6394931070104e-09, "loss": 0.3141, "step": 48305 }, { "epoch": 4.911142740951607, "grad_norm": 0.27589794993400574, "learning_rate": 9.61747963238946e-09, "loss": 0.2748, "step": 48306 }, { "epoch": 4.911244408296056, "grad_norm": 0.3013058602809906, "learning_rate": 9.595491298129978e-09, "loss": 0.3162, "step": 48307 }, { "epoch": 4.9113460756405045, "grad_norm": 0.2831445634365082, "learning_rate": 9.573528104342422e-09, "loss": 0.3219, "step": 48308 }, { "epoch": 4.9114477429849535, "grad_norm": 0.32771360874176025, "learning_rate": 9.55159005113726e-09, "loss": 0.2778, "step": 48309 }, { "epoch": 4.911549410329402, "grad_norm": 0.259280264377594, "learning_rate": 9.529677138625514e-09, "loss": 0.3181, "step": 48310 }, { "epoch": 4.911651077673851, "grad_norm": 0.270453542470932, "learning_rate": 9.507789366917097e-09, "loss": 0.3109, "step": 48311 }, { "epoch": 4.9117527450183, "grad_norm": 0.27915459871292114, "learning_rate": 9.48592673612303e-09, "loss": 0.2755, "step": 48312 }, { "epoch": 4.911854412362749, "grad_norm": 0.28975749015808105, "learning_rate": 9.464089246352114e-09, "loss": 0.2641, "step": 48313 }, { "epoch": 4.911956079707198, "grad_norm": 0.2713620066642761, "learning_rate": 9.44227689771593e-09, "loss": 0.3122, "step": 48314 }, { "epoch": 4.912057747051647, "grad_norm": 0.2762545645236969, "learning_rate": 9.420489690323275e-09, "loss": 0.281, "step": 48315 }, { "epoch": 4.912159414396096, "grad_norm": 0.2762121558189392, "learning_rate": 9.398727624284065e-09, "loss": 0.2669, "step": 48316 }, { "epoch": 4.912261081740545, "grad_norm": 0.2740492522716522, "learning_rate": 9.376990699708765e-09, "loss": 0.2747, "step": 48317 }, { "epoch": 4.912362749084994, "grad_norm": 0.27896037697792053, "learning_rate": 9.355278916705623e-09, "loss": 0.304, "step": 48318 }, { "epoch": 4.912464416429443, "grad_norm": 0.27902668714523315, "learning_rate": 9.333592275384551e-09, "loss": 0.2802, "step": 48319 }, { "epoch": 4.912566083773892, "grad_norm": 0.2945583462715149, "learning_rate": 9.31193077585546e-09, "loss": 0.2978, "step": 48320 }, { "epoch": 4.912667751118341, "grad_norm": 0.28933364152908325, "learning_rate": 9.290294418226598e-09, "loss": 0.3032, "step": 48321 }, { "epoch": 4.91276941846279, "grad_norm": 0.2737201750278473, "learning_rate": 9.26868320260732e-09, "loss": 0.2771, "step": 48322 }, { "epoch": 4.912871085807239, "grad_norm": 0.2656690776348114, "learning_rate": 9.24709712910643e-09, "loss": 0.2939, "step": 48323 }, { "epoch": 4.912972753151688, "grad_norm": 0.2903234362602234, "learning_rate": 9.225536197832175e-09, "loss": 0.3019, "step": 48324 }, { "epoch": 4.9130744204961365, "grad_norm": 0.2860518991947174, "learning_rate": 9.204000408894464e-09, "loss": 0.3054, "step": 48325 }, { "epoch": 4.9131760878405855, "grad_norm": 0.2706815302371979, "learning_rate": 9.182489762400437e-09, "loss": 0.3015, "step": 48326 }, { "epoch": 4.913277755185034, "grad_norm": 0.2899060845375061, "learning_rate": 9.16100425845945e-09, "loss": 0.2768, "step": 48327 }, { "epoch": 4.913379422529483, "grad_norm": 0.2901398241519928, "learning_rate": 9.139543897178637e-09, "loss": 0.2719, "step": 48328 }, { "epoch": 4.913481089873932, "grad_norm": 0.2805916368961334, "learning_rate": 9.118108678667359e-09, "loss": 0.2793, "step": 48329 }, { "epoch": 4.913582757218381, "grad_norm": 0.2684924602508545, "learning_rate": 9.096698603033305e-09, "loss": 0.3077, "step": 48330 }, { "epoch": 4.91368442456283, "grad_norm": 0.30022236704826355, "learning_rate": 9.075313670383611e-09, "loss": 0.2686, "step": 48331 }, { "epoch": 4.913786091907279, "grad_norm": 0.2650090754032135, "learning_rate": 9.053953880826527e-09, "loss": 0.2611, "step": 48332 }, { "epoch": 4.913887759251728, "grad_norm": 0.2929716110229492, "learning_rate": 9.032619234469741e-09, "loss": 0.2943, "step": 48333 }, { "epoch": 4.913989426596177, "grad_norm": 0.28042280673980713, "learning_rate": 9.011309731420393e-09, "loss": 0.3182, "step": 48334 }, { "epoch": 4.914091093940626, "grad_norm": 0.2892718017101288, "learning_rate": 8.990025371786171e-09, "loss": 0.2877, "step": 48335 }, { "epoch": 4.914192761285075, "grad_norm": 0.2736579179763794, "learning_rate": 8.968766155673658e-09, "loss": 0.2669, "step": 48336 }, { "epoch": 4.914294428629524, "grad_norm": 0.2941237986087799, "learning_rate": 8.947532083191101e-09, "loss": 0.292, "step": 48337 }, { "epoch": 4.914396095973974, "grad_norm": 0.28837668895721436, "learning_rate": 8.926323154444527e-09, "loss": 0.2913, "step": 48338 }, { "epoch": 4.914497763318423, "grad_norm": 0.26246681809425354, "learning_rate": 8.905139369541072e-09, "loss": 0.2771, "step": 48339 }, { "epoch": 4.9145994306628715, "grad_norm": 0.2988308370113373, "learning_rate": 8.883980728587316e-09, "loss": 0.322, "step": 48340 }, { "epoch": 4.9147010980073205, "grad_norm": 0.2757440209388733, "learning_rate": 8.862847231690397e-09, "loss": 0.309, "step": 48341 }, { "epoch": 4.914802765351769, "grad_norm": 0.2586940824985504, "learning_rate": 8.84173887895634e-09, "loss": 0.3377, "step": 48342 }, { "epoch": 4.914904432696218, "grad_norm": 0.2631162703037262, "learning_rate": 8.820655670491173e-09, "loss": 0.3184, "step": 48343 }, { "epoch": 4.915006100040667, "grad_norm": 0.2798324227333069, "learning_rate": 8.799597606401478e-09, "loss": 0.307, "step": 48344 }, { "epoch": 4.915107767385116, "grad_norm": 0.27164486050605774, "learning_rate": 8.778564686793834e-09, "loss": 0.2832, "step": 48345 }, { "epoch": 4.915209434729565, "grad_norm": 0.2724403738975525, "learning_rate": 8.757556911773157e-09, "loss": 0.3041, "step": 48346 }, { "epoch": 4.915311102074014, "grad_norm": 0.2710471451282501, "learning_rate": 8.73657428144603e-09, "loss": 0.3054, "step": 48347 }, { "epoch": 4.915412769418463, "grad_norm": 0.28120192885398865, "learning_rate": 8.715616795917925e-09, "loss": 0.2928, "step": 48348 }, { "epoch": 4.915514436762912, "grad_norm": 0.28565263748168945, "learning_rate": 8.69468445529431e-09, "loss": 0.2879, "step": 48349 }, { "epoch": 4.915616104107361, "grad_norm": 0.267216295003891, "learning_rate": 8.67377725968066e-09, "loss": 0.288, "step": 48350 }, { "epoch": 4.91571777145181, "grad_norm": 0.2916505038738251, "learning_rate": 8.652895209182998e-09, "loss": 0.3117, "step": 48351 }, { "epoch": 4.915819438796259, "grad_norm": 0.28890255093574524, "learning_rate": 8.632038303905133e-09, "loss": 0.2984, "step": 48352 }, { "epoch": 4.915921106140708, "grad_norm": 0.3049856722354889, "learning_rate": 8.611206543953643e-09, "loss": 0.2903, "step": 48353 }, { "epoch": 4.916022773485157, "grad_norm": 0.28484034538269043, "learning_rate": 8.590399929432336e-09, "loss": 0.2802, "step": 48354 }, { "epoch": 4.916124440829606, "grad_norm": 0.292618989944458, "learning_rate": 8.569618460446128e-09, "loss": 0.2773, "step": 48355 }, { "epoch": 4.916226108174055, "grad_norm": 0.28089839220046997, "learning_rate": 8.548862137100488e-09, "loss": 0.3176, "step": 48356 }, { "epoch": 4.9163277755185035, "grad_norm": 0.2626808285713196, "learning_rate": 8.52813095949978e-09, "loss": 0.3019, "step": 48357 }, { "epoch": 4.9164294428629525, "grad_norm": 0.25801312923431396, "learning_rate": 8.50742492774781e-09, "loss": 0.2944, "step": 48358 }, { "epoch": 4.916531110207401, "grad_norm": 0.2674444019794464, "learning_rate": 8.48674404194949e-09, "loss": 0.3083, "step": 48359 }, { "epoch": 4.91663277755185, "grad_norm": 0.2861880958080292, "learning_rate": 8.46608830220863e-09, "loss": 0.2922, "step": 48360 }, { "epoch": 4.916734444896299, "grad_norm": 0.27024248242378235, "learning_rate": 8.445457708629589e-09, "loss": 0.2899, "step": 48361 }, { "epoch": 4.916836112240748, "grad_norm": 0.2792585790157318, "learning_rate": 8.424852261315618e-09, "loss": 0.3324, "step": 48362 }, { "epoch": 4.916937779585197, "grad_norm": 0.29065850377082825, "learning_rate": 8.404271960371636e-09, "loss": 0.3141, "step": 48363 }, { "epoch": 4.917039446929646, "grad_norm": 0.2726697623729706, "learning_rate": 8.383716805900888e-09, "loss": 0.2915, "step": 48364 }, { "epoch": 4.917141114274095, "grad_norm": 0.28630751371383667, "learning_rate": 8.363186798006073e-09, "loss": 0.2878, "step": 48365 }, { "epoch": 4.917242781618544, "grad_norm": 0.2881711423397064, "learning_rate": 8.342681936792107e-09, "loss": 0.2888, "step": 48366 }, { "epoch": 4.917344448962993, "grad_norm": 0.28774189949035645, "learning_rate": 8.32220222236113e-09, "loss": 0.2759, "step": 48367 }, { "epoch": 4.917446116307442, "grad_norm": 0.2903018891811371, "learning_rate": 8.301747654816395e-09, "loss": 0.2995, "step": 48368 }, { "epoch": 4.917547783651891, "grad_norm": 0.29061809182167053, "learning_rate": 8.281318234261704e-09, "loss": 0.278, "step": 48369 }, { "epoch": 4.91764945099634, "grad_norm": 0.25924742221832275, "learning_rate": 8.260913960799199e-09, "loss": 0.2902, "step": 48370 }, { "epoch": 4.917751118340789, "grad_norm": 0.28143471479415894, "learning_rate": 8.240534834532132e-09, "loss": 0.3085, "step": 48371 }, { "epoch": 4.917852785685238, "grad_norm": 0.2909204959869385, "learning_rate": 8.220180855563197e-09, "loss": 0.2578, "step": 48372 }, { "epoch": 4.917954453029687, "grad_norm": 0.2722812294960022, "learning_rate": 8.199852023994537e-09, "loss": 0.2978, "step": 48373 }, { "epoch": 4.9180561203741355, "grad_norm": 0.26296088099479675, "learning_rate": 8.179548339928845e-09, "loss": 0.2844, "step": 48374 }, { "epoch": 4.9181577877185845, "grad_norm": 0.273908793926239, "learning_rate": 8.159269803468261e-09, "loss": 0.315, "step": 48375 }, { "epoch": 4.918259455063033, "grad_norm": 0.30165934562683105, "learning_rate": 8.139016414715483e-09, "loss": 0.2783, "step": 48376 }, { "epoch": 4.918361122407482, "grad_norm": 0.2633538544178009, "learning_rate": 8.11878817377154e-09, "loss": 0.3054, "step": 48377 }, { "epoch": 4.918462789751931, "grad_norm": 0.27383944392204285, "learning_rate": 8.09858508073913e-09, "loss": 0.2869, "step": 48378 }, { "epoch": 4.91856445709638, "grad_norm": 0.3145774006843567, "learning_rate": 8.078407135719835e-09, "loss": 0.2592, "step": 48379 }, { "epoch": 4.918666124440829, "grad_norm": 0.27009162306785583, "learning_rate": 8.058254338815242e-09, "loss": 0.3125, "step": 48380 }, { "epoch": 4.918767791785278, "grad_norm": 0.2652435302734375, "learning_rate": 8.038126690126936e-09, "loss": 0.3155, "step": 48381 }, { "epoch": 4.918869459129727, "grad_norm": 0.31526046991348267, "learning_rate": 8.018024189755946e-09, "loss": 0.2867, "step": 48382 }, { "epoch": 4.918971126474176, "grad_norm": 0.2924801707267761, "learning_rate": 7.997946837804415e-09, "loss": 0.2856, "step": 48383 }, { "epoch": 4.919072793818626, "grad_norm": 0.2702735662460327, "learning_rate": 7.977894634372263e-09, "loss": 0.3129, "step": 48384 }, { "epoch": 4.919174461163075, "grad_norm": 0.2466067522764206, "learning_rate": 7.95786757956163e-09, "loss": 0.2927, "step": 48385 }, { "epoch": 4.919276128507524, "grad_norm": 0.2776024639606476, "learning_rate": 7.93786567347299e-09, "loss": 0.2895, "step": 48386 }, { "epoch": 4.919377795851973, "grad_norm": 0.2726445198059082, "learning_rate": 7.917888916206817e-09, "loss": 0.2988, "step": 48387 }, { "epoch": 4.919479463196422, "grad_norm": 0.3006259500980377, "learning_rate": 7.897937307864146e-09, "loss": 0.3077, "step": 48388 }, { "epoch": 4.9195811305408705, "grad_norm": 0.2775113582611084, "learning_rate": 7.878010848546002e-09, "loss": 0.2841, "step": 48389 }, { "epoch": 4.9196827978853195, "grad_norm": 0.2807644307613373, "learning_rate": 7.8581095383512e-09, "loss": 0.2702, "step": 48390 }, { "epoch": 4.919784465229768, "grad_norm": 0.26878121495246887, "learning_rate": 7.83823337738132e-09, "loss": 0.3206, "step": 48391 }, { "epoch": 4.919886132574217, "grad_norm": 0.2657895088195801, "learning_rate": 7.818382365735733e-09, "loss": 0.2829, "step": 48392 }, { "epoch": 4.919987799918666, "grad_norm": 0.29461848735809326, "learning_rate": 7.798556503515465e-09, "loss": 0.271, "step": 48393 }, { "epoch": 4.920089467263115, "grad_norm": 0.26792433857917786, "learning_rate": 7.778755790819325e-09, "loss": 0.2876, "step": 48394 }, { "epoch": 4.920191134607564, "grad_norm": 0.27367186546325684, "learning_rate": 7.758980227747792e-09, "loss": 0.2906, "step": 48395 }, { "epoch": 4.920292801952013, "grad_norm": 0.2849721312522888, "learning_rate": 7.739229814399674e-09, "loss": 0.3113, "step": 48396 }, { "epoch": 4.920394469296462, "grad_norm": 0.2547817826271057, "learning_rate": 7.719504550875446e-09, "loss": 0.2948, "step": 48397 }, { "epoch": 4.920496136640911, "grad_norm": 0.28214141726493835, "learning_rate": 7.699804437273916e-09, "loss": 0.2895, "step": 48398 }, { "epoch": 4.92059780398536, "grad_norm": 0.278493195772171, "learning_rate": 7.680129473694454e-09, "loss": 0.2928, "step": 48399 }, { "epoch": 4.920699471329809, "grad_norm": 0.2754148244857788, "learning_rate": 7.660479660235864e-09, "loss": 0.2607, "step": 48400 }, { "epoch": 4.920801138674258, "grad_norm": 0.2813054621219635, "learning_rate": 7.640854996997515e-09, "loss": 0.3133, "step": 48401 }, { "epoch": 4.920902806018707, "grad_norm": 0.2888756990432739, "learning_rate": 7.621255484078215e-09, "loss": 0.2686, "step": 48402 }, { "epoch": 4.921004473363156, "grad_norm": 0.27217555046081543, "learning_rate": 7.60168112157622e-09, "loss": 0.2858, "step": 48403 }, { "epoch": 4.921106140707605, "grad_norm": 0.27502691745758057, "learning_rate": 7.58213190959145e-09, "loss": 0.2904, "step": 48404 }, { "epoch": 4.921207808052054, "grad_norm": 0.2725941240787506, "learning_rate": 7.562607848221048e-09, "loss": 0.318, "step": 48405 }, { "epoch": 4.9213094753965025, "grad_norm": 0.2829605042934418, "learning_rate": 7.543108937563825e-09, "loss": 0.3124, "step": 48406 }, { "epoch": 4.9214111427409515, "grad_norm": 0.27131137251853943, "learning_rate": 7.523635177718037e-09, "loss": 0.2946, "step": 48407 }, { "epoch": 4.9215128100854, "grad_norm": 0.2728074789047241, "learning_rate": 7.504186568781935e-09, "loss": 0.3094, "step": 48408 }, { "epoch": 4.921614477429849, "grad_norm": 0.2592771649360657, "learning_rate": 7.484763110853221e-09, "loss": 0.3044, "step": 48409 }, { "epoch": 4.921716144774298, "grad_norm": 0.3150770962238312, "learning_rate": 7.465364804029595e-09, "loss": 0.2968, "step": 48410 }, { "epoch": 4.921817812118747, "grad_norm": 0.28591206669807434, "learning_rate": 7.445991648409312e-09, "loss": 0.3084, "step": 48411 }, { "epoch": 4.921919479463196, "grad_norm": 0.2785559296607971, "learning_rate": 7.42664364409007e-09, "loss": 0.3107, "step": 48412 }, { "epoch": 4.922021146807645, "grad_norm": 0.271517813205719, "learning_rate": 7.40732079116846e-09, "loss": 0.3044, "step": 48413 }, { "epoch": 4.922122814152094, "grad_norm": 0.26768165826797485, "learning_rate": 7.388023089742735e-09, "loss": 0.3027, "step": 48414 }, { "epoch": 4.922224481496543, "grad_norm": 0.28579458594322205, "learning_rate": 7.368750539909486e-09, "loss": 0.2988, "step": 48415 }, { "epoch": 4.922326148840992, "grad_norm": 0.27855977416038513, "learning_rate": 7.349503141765857e-09, "loss": 0.2885, "step": 48416 }, { "epoch": 4.922427816185441, "grad_norm": 0.29042020440101624, "learning_rate": 7.3302808954089924e-09, "loss": 0.283, "step": 48417 }, { "epoch": 4.92252948352989, "grad_norm": 0.28162211179733276, "learning_rate": 7.311083800936036e-09, "loss": 0.2978, "step": 48418 }, { "epoch": 4.922631150874339, "grad_norm": 0.28614023327827454, "learning_rate": 7.2919118584435785e-09, "loss": 0.2953, "step": 48419 }, { "epoch": 4.922732818218789, "grad_norm": 0.292371541261673, "learning_rate": 7.272765068027654e-09, "loss": 0.3505, "step": 48420 }, { "epoch": 4.9228344855632375, "grad_norm": 0.2749180495738983, "learning_rate": 7.253643429785406e-09, "loss": 0.2889, "step": 48421 }, { "epoch": 4.9229361529076865, "grad_norm": 0.2963685095310211, "learning_rate": 7.234546943812315e-09, "loss": 0.2991, "step": 48422 }, { "epoch": 4.923037820252135, "grad_norm": 0.29047757387161255, "learning_rate": 7.215475610205524e-09, "loss": 0.297, "step": 48423 }, { "epoch": 4.923139487596584, "grad_norm": 0.26777228713035583, "learning_rate": 7.196429429060514e-09, "loss": 0.3027, "step": 48424 }, { "epoch": 4.923241154941033, "grad_norm": 0.28044795989990234, "learning_rate": 7.177408400473318e-09, "loss": 0.2969, "step": 48425 }, { "epoch": 4.923342822285482, "grad_norm": 0.27402350306510925, "learning_rate": 7.158412524540526e-09, "loss": 0.2803, "step": 48426 }, { "epoch": 4.923444489629931, "grad_norm": 0.3047599494457245, "learning_rate": 7.1394418013565055e-09, "loss": 0.2671, "step": 48427 }, { "epoch": 4.92354615697438, "grad_norm": 0.2692275643348694, "learning_rate": 7.120496231017293e-09, "loss": 0.2983, "step": 48428 }, { "epoch": 4.923647824318829, "grad_norm": 0.2613995671272278, "learning_rate": 7.101575813618922e-09, "loss": 0.2581, "step": 48429 }, { "epoch": 4.923749491663278, "grad_norm": 0.27015575766563416, "learning_rate": 7.082680549256315e-09, "loss": 0.345, "step": 48430 }, { "epoch": 4.923851159007727, "grad_norm": 0.3262559473514557, "learning_rate": 7.063810438024954e-09, "loss": 0.2709, "step": 48431 }, { "epoch": 4.923952826352176, "grad_norm": 0.29087942838668823, "learning_rate": 7.044965480019206e-09, "loss": 0.2944, "step": 48432 }, { "epoch": 4.924054493696625, "grad_norm": 0.27307555079460144, "learning_rate": 7.02614567533455e-09, "loss": 0.2961, "step": 48433 }, { "epoch": 4.924156161041074, "grad_norm": 0.2657853364944458, "learning_rate": 7.007351024065356e-09, "loss": 0.303, "step": 48434 }, { "epoch": 4.924257828385523, "grad_norm": 0.26302629709243774, "learning_rate": 6.988581526307103e-09, "loss": 0.332, "step": 48435 }, { "epoch": 4.924359495729972, "grad_norm": 0.31441012024879456, "learning_rate": 6.969837182153605e-09, "loss": 0.268, "step": 48436 }, { "epoch": 4.924461163074421, "grad_norm": 0.30669650435447693, "learning_rate": 6.951117991699785e-09, "loss": 0.3059, "step": 48437 }, { "epoch": 4.9245628304188696, "grad_norm": 0.2792705297470093, "learning_rate": 6.932423955038903e-09, "loss": 0.305, "step": 48438 }, { "epoch": 4.9246644977633185, "grad_norm": 0.26201462745666504, "learning_rate": 6.913755072266992e-09, "loss": 0.2939, "step": 48439 }, { "epoch": 4.924766165107767, "grad_norm": 0.29674047231674194, "learning_rate": 6.895111343476202e-09, "loss": 0.2961, "step": 48440 }, { "epoch": 4.924867832452216, "grad_norm": 0.27230942249298096, "learning_rate": 6.876492768762011e-09, "loss": 0.3284, "step": 48441 }, { "epoch": 4.924969499796665, "grad_norm": 0.25988057255744934, "learning_rate": 6.857899348217123e-09, "loss": 0.3617, "step": 48442 }, { "epoch": 4.925071167141114, "grad_norm": 0.2884984612464905, "learning_rate": 6.839331081935352e-09, "loss": 0.2988, "step": 48443 }, { "epoch": 4.925172834485563, "grad_norm": 0.3238656222820282, "learning_rate": 6.820787970011067e-09, "loss": 0.2855, "step": 48444 }, { "epoch": 4.925274501830012, "grad_norm": 0.29532814025878906, "learning_rate": 6.802270012536416e-09, "loss": 0.2716, "step": 48445 }, { "epoch": 4.925376169174461, "grad_norm": 0.2900850176811218, "learning_rate": 6.783777209605769e-09, "loss": 0.3001, "step": 48446 }, { "epoch": 4.92547783651891, "grad_norm": 0.2816764712333679, "learning_rate": 6.765309561311828e-09, "loss": 0.2827, "step": 48447 }, { "epoch": 4.925579503863359, "grad_norm": 0.27487656474113464, "learning_rate": 6.746867067747853e-09, "loss": 0.2764, "step": 48448 }, { "epoch": 4.925681171207808, "grad_norm": 0.2871931791305542, "learning_rate": 6.7284497290065475e-09, "loss": 0.2741, "step": 48449 }, { "epoch": 4.925782838552257, "grad_norm": 0.26430708169937134, "learning_rate": 6.7100575451800594e-09, "loss": 0.2825, "step": 48450 }, { "epoch": 4.925884505896706, "grad_norm": 0.312041312456131, "learning_rate": 6.6916905163627585e-09, "loss": 0.3121, "step": 48451 }, { "epoch": 4.925986173241155, "grad_norm": 0.3080594837665558, "learning_rate": 6.673348642645128e-09, "loss": 0.2844, "step": 48452 }, { "epoch": 4.926087840585604, "grad_norm": 0.2791058421134949, "learning_rate": 6.6550319241209805e-09, "loss": 0.2751, "step": 48453 }, { "epoch": 4.926189507930053, "grad_norm": 0.263495534658432, "learning_rate": 6.636740360882465e-09, "loss": 0.2952, "step": 48454 }, { "epoch": 4.9262911752745016, "grad_norm": 0.2735117971897125, "learning_rate": 6.618473953021176e-09, "loss": 0.3164, "step": 48455 }, { "epoch": 4.9263928426189505, "grad_norm": 0.27867332100868225, "learning_rate": 6.600232700629816e-09, "loss": 0.2522, "step": 48456 }, { "epoch": 4.926494509963399, "grad_norm": 0.3046287000179291, "learning_rate": 6.5820166037994235e-09, "loss": 0.3133, "step": 48457 }, { "epoch": 4.926596177307848, "grad_norm": 0.29126039147377014, "learning_rate": 6.5638256626221474e-09, "loss": 0.2882, "step": 48458 }, { "epoch": 4.926697844652297, "grad_norm": 0.28604036569595337, "learning_rate": 6.545659877190136e-09, "loss": 0.2986, "step": 48459 }, { "epoch": 4.926799511996746, "grad_norm": 0.2672695815563202, "learning_rate": 6.527519247593872e-09, "loss": 0.291, "step": 48460 }, { "epoch": 4.926901179341195, "grad_norm": 0.3095012307167053, "learning_rate": 6.509403773925505e-09, "loss": 0.2756, "step": 48461 }, { "epoch": 4.927002846685644, "grad_norm": 0.3014791011810303, "learning_rate": 6.4913134562766265e-09, "loss": 0.3125, "step": 48462 }, { "epoch": 4.927104514030093, "grad_norm": 0.2966049611568451, "learning_rate": 6.473248294737167e-09, "loss": 0.257, "step": 48463 }, { "epoch": 4.927206181374542, "grad_norm": 0.2903556823730469, "learning_rate": 6.455208289399273e-09, "loss": 0.2763, "step": 48464 }, { "epoch": 4.927307848718991, "grad_norm": 0.27205654978752136, "learning_rate": 6.4371934403534284e-09, "loss": 0.2998, "step": 48465 }, { "epoch": 4.927409516063441, "grad_norm": 0.26460662484169006, "learning_rate": 6.419203747690117e-09, "loss": 0.2919, "step": 48466 }, { "epoch": 4.92751118340789, "grad_norm": 0.2763219177722931, "learning_rate": 6.401239211500376e-09, "loss": 0.2838, "step": 48467 }, { "epoch": 4.927612850752339, "grad_norm": 0.2730589509010315, "learning_rate": 6.383299831874135e-09, "loss": 0.2603, "step": 48468 }, { "epoch": 4.927714518096788, "grad_norm": 0.2830961048603058, "learning_rate": 6.365385608902985e-09, "loss": 0.2756, "step": 48469 }, { "epoch": 4.927816185441237, "grad_norm": 0.2771323621273041, "learning_rate": 6.347496542675747e-09, "loss": 0.2921, "step": 48470 }, { "epoch": 4.9279178527856855, "grad_norm": 0.27578452229499817, "learning_rate": 6.3296326332829005e-09, "loss": 0.2964, "step": 48471 }, { "epoch": 4.9280195201301344, "grad_norm": 0.2988712787628174, "learning_rate": 6.3117938808154864e-09, "loss": 0.3018, "step": 48472 }, { "epoch": 4.928121187474583, "grad_norm": 0.271634578704834, "learning_rate": 6.293980285361767e-09, "loss": 0.3126, "step": 48473 }, { "epoch": 4.928222854819032, "grad_norm": 0.2856564223766327, "learning_rate": 6.2761918470127805e-09, "loss": 0.2801, "step": 48474 }, { "epoch": 4.928324522163481, "grad_norm": 0.2909812927246094, "learning_rate": 6.258428565857344e-09, "loss": 0.2808, "step": 48475 }, { "epoch": 4.92842618950793, "grad_norm": 0.27333712577819824, "learning_rate": 6.240690441985387e-09, "loss": 0.295, "step": 48476 }, { "epoch": 4.928527856852379, "grad_norm": 0.29329195618629456, "learning_rate": 6.22297747548628e-09, "loss": 0.27, "step": 48477 }, { "epoch": 4.928629524196828, "grad_norm": 0.31386035680770874, "learning_rate": 6.2052896664488435e-09, "loss": 0.2926, "step": 48478 }, { "epoch": 4.928731191541277, "grad_norm": 0.2734401524066925, "learning_rate": 6.187627014963005e-09, "loss": 0.2734, "step": 48479 }, { "epoch": 4.928832858885726, "grad_norm": 0.26288145780563354, "learning_rate": 6.169989521116471e-09, "loss": 0.3001, "step": 48480 }, { "epoch": 4.928934526230175, "grad_norm": 0.2801800072193146, "learning_rate": 6.1523771849991695e-09, "loss": 0.2832, "step": 48481 }, { "epoch": 4.929036193574624, "grad_norm": 0.2830103635787964, "learning_rate": 6.134790006699365e-09, "loss": 0.3162, "step": 48482 }, { "epoch": 4.929137860919073, "grad_norm": 0.2818650007247925, "learning_rate": 6.117227986305874e-09, "loss": 0.295, "step": 48483 }, { "epoch": 4.929239528263522, "grad_norm": 0.2792051136493683, "learning_rate": 6.099691123906959e-09, "loss": 0.3031, "step": 48484 }, { "epoch": 4.929341195607971, "grad_norm": 0.30503571033477783, "learning_rate": 6.082179419591438e-09, "loss": 0.3145, "step": 48485 }, { "epoch": 4.92944286295242, "grad_norm": 0.2591210901737213, "learning_rate": 6.064692873446465e-09, "loss": 0.3072, "step": 48486 }, { "epoch": 4.929544530296869, "grad_norm": 0.28369855880737305, "learning_rate": 6.0472314855614115e-09, "loss": 0.3382, "step": 48487 }, { "epoch": 4.9296461976413175, "grad_norm": 0.2929801940917969, "learning_rate": 6.029795256023429e-09, "loss": 0.2947, "step": 48488 }, { "epoch": 4.9297478649857664, "grad_norm": 0.2927648723125458, "learning_rate": 6.012384184920783e-09, "loss": 0.2663, "step": 48489 }, { "epoch": 4.929849532330215, "grad_norm": 0.31777238845825195, "learning_rate": 5.994998272340624e-09, "loss": 0.2903, "step": 48490 }, { "epoch": 4.929951199674664, "grad_norm": 0.26005277037620544, "learning_rate": 5.977637518371216e-09, "loss": 0.2994, "step": 48491 }, { "epoch": 4.930052867019113, "grad_norm": 0.27615952491760254, "learning_rate": 5.96030192309971e-09, "loss": 0.2742, "step": 48492 }, { "epoch": 4.930154534363562, "grad_norm": 0.2697876989841461, "learning_rate": 5.9429914866132585e-09, "loss": 0.2786, "step": 48493 }, { "epoch": 4.930256201708011, "grad_norm": 0.27740687131881714, "learning_rate": 5.9257062089990156e-09, "loss": 0.3328, "step": 48494 }, { "epoch": 4.93035786905246, "grad_norm": 0.26706501841545105, "learning_rate": 5.908446090344689e-09, "loss": 0.2909, "step": 48495 }, { "epoch": 4.930459536396909, "grad_norm": 0.2826120853424072, "learning_rate": 5.891211130736873e-09, "loss": 0.3117, "step": 48496 }, { "epoch": 4.930561203741358, "grad_norm": 0.3018333911895752, "learning_rate": 5.874001330262169e-09, "loss": 0.2703, "step": 48497 }, { "epoch": 4.930662871085807, "grad_norm": 0.2871569097042084, "learning_rate": 5.856816689007172e-09, "loss": 0.2922, "step": 48498 }, { "epoch": 4.930764538430256, "grad_norm": 0.2777274549007416, "learning_rate": 5.83965720705959e-09, "loss": 0.3026, "step": 48499 }, { "epoch": 4.930866205774705, "grad_norm": 0.27793818712234497, "learning_rate": 5.822522884504356e-09, "loss": 0.2892, "step": 48500 }, { "epoch": 4.930967873119154, "grad_norm": 0.30962660908699036, "learning_rate": 5.805413721428621e-09, "loss": 0.283, "step": 48501 }, { "epoch": 4.931069540463604, "grad_norm": 0.2832290232181549, "learning_rate": 5.788329717918428e-09, "loss": 0.2832, "step": 48502 }, { "epoch": 4.9311712078080525, "grad_norm": 0.26865845918655396, "learning_rate": 5.771270874059265e-09, "loss": 0.2695, "step": 48503 }, { "epoch": 4.9312728751525015, "grad_norm": 0.28533512353897095, "learning_rate": 5.7542371899382835e-09, "loss": 0.3022, "step": 48504 }, { "epoch": 4.93137454249695, "grad_norm": 0.2597171664237976, "learning_rate": 5.737228665640415e-09, "loss": 0.3315, "step": 48505 }, { "epoch": 4.931476209841399, "grad_norm": 0.25299903750419617, "learning_rate": 5.7202453012517035e-09, "loss": 0.3195, "step": 48506 }, { "epoch": 4.931577877185848, "grad_norm": 0.27028730511665344, "learning_rate": 5.703287096857635e-09, "loss": 0.2703, "step": 48507 }, { "epoch": 4.931679544530297, "grad_norm": 0.28046101331710815, "learning_rate": 5.686354052543697e-09, "loss": 0.3007, "step": 48508 }, { "epoch": 4.931781211874746, "grad_norm": 0.34696030616760254, "learning_rate": 5.669446168394821e-09, "loss": 0.2974, "step": 48509 }, { "epoch": 4.931882879219195, "grad_norm": 0.29619696736335754, "learning_rate": 5.652563444496495e-09, "loss": 0.3199, "step": 48510 }, { "epoch": 4.931984546563644, "grad_norm": 0.28608065843582153, "learning_rate": 5.63570588093365e-09, "loss": 0.2758, "step": 48511 }, { "epoch": 4.932086213908093, "grad_norm": 0.2724591791629791, "learning_rate": 5.61887347779122e-09, "loss": 0.3324, "step": 48512 }, { "epoch": 4.932187881252542, "grad_norm": 0.30729687213897705, "learning_rate": 5.602066235154135e-09, "loss": 0.2753, "step": 48513 }, { "epoch": 4.932289548596991, "grad_norm": 0.2693241536617279, "learning_rate": 5.585284153106773e-09, "loss": 0.2969, "step": 48514 }, { "epoch": 4.93239121594144, "grad_norm": 0.2802169620990753, "learning_rate": 5.568527231734067e-09, "loss": 0.3069, "step": 48515 }, { "epoch": 4.932492883285889, "grad_norm": 0.3190498650074005, "learning_rate": 5.551795471120392e-09, "loss": 0.2961, "step": 48516 }, { "epoch": 4.932594550630338, "grad_norm": 0.2854297161102295, "learning_rate": 5.535088871350125e-09, "loss": 0.297, "step": 48517 }, { "epoch": 4.932696217974787, "grad_norm": 0.26633405685424805, "learning_rate": 5.518407432506534e-09, "loss": 0.2997, "step": 48518 }, { "epoch": 4.932797885319236, "grad_norm": 0.28230997920036316, "learning_rate": 5.501751154675106e-09, "loss": 0.3038, "step": 48519 }, { "epoch": 4.9328995526636845, "grad_norm": 0.26979881525039673, "learning_rate": 5.485120037938551e-09, "loss": 0.3322, "step": 48520 }, { "epoch": 4.9330012200081335, "grad_norm": 0.2784351110458374, "learning_rate": 5.468514082381249e-09, "loss": 0.2866, "step": 48521 }, { "epoch": 4.933102887352582, "grad_norm": 0.2744220793247223, "learning_rate": 5.451933288086464e-09, "loss": 0.3186, "step": 48522 }, { "epoch": 4.933204554697031, "grad_norm": 0.26310184597969055, "learning_rate": 5.435377655138019e-09, "loss": 0.3, "step": 48523 }, { "epoch": 4.93330622204148, "grad_norm": 0.3184982240200043, "learning_rate": 5.41884718361918e-09, "loss": 0.2893, "step": 48524 }, { "epoch": 4.933407889385929, "grad_norm": 0.28534671664237976, "learning_rate": 5.402341873613215e-09, "loss": 0.3024, "step": 48525 }, { "epoch": 4.933509556730378, "grad_norm": 0.2747083008289337, "learning_rate": 5.385861725203945e-09, "loss": 0.2864, "step": 48526 }, { "epoch": 4.933611224074827, "grad_norm": 0.2853494882583618, "learning_rate": 5.369406738472971e-09, "loss": 0.2636, "step": 48527 }, { "epoch": 4.933712891419276, "grad_norm": 0.277100145816803, "learning_rate": 5.352976913504671e-09, "loss": 0.2902, "step": 48528 }, { "epoch": 4.933814558763725, "grad_norm": 0.27913007140159607, "learning_rate": 5.3365722503812e-09, "loss": 0.3025, "step": 48529 }, { "epoch": 4.933916226108174, "grad_norm": 0.30953291058540344, "learning_rate": 5.320192749184716e-09, "loss": 0.3, "step": 48530 }, { "epoch": 4.934017893452623, "grad_norm": 0.2621214687824249, "learning_rate": 5.303838409998485e-09, "loss": 0.3206, "step": 48531 }, { "epoch": 4.934119560797072, "grad_norm": 0.266304612159729, "learning_rate": 5.287509232904109e-09, "loss": 0.2917, "step": 48532 }, { "epoch": 4.934221228141521, "grad_norm": 0.2711810767650604, "learning_rate": 5.2712052179848535e-09, "loss": 0.2915, "step": 48533 }, { "epoch": 4.93432289548597, "grad_norm": 0.28773269057273865, "learning_rate": 5.254926365322322e-09, "loss": 0.2988, "step": 48534 }, { "epoch": 4.934424562830419, "grad_norm": 0.2591918408870697, "learning_rate": 5.238672674998113e-09, "loss": 0.2864, "step": 48535 }, { "epoch": 4.934526230174868, "grad_norm": 0.28245607018470764, "learning_rate": 5.2224441470949405e-09, "loss": 0.2896, "step": 48536 }, { "epoch": 4.9346278975193165, "grad_norm": 0.3058486580848694, "learning_rate": 5.20624078169385e-09, "loss": 0.286, "step": 48537 }, { "epoch": 4.9347295648637655, "grad_norm": 0.28015634417533875, "learning_rate": 5.190062578876997e-09, "loss": 0.2687, "step": 48538 }, { "epoch": 4.934831232208214, "grad_norm": 0.2675260007381439, "learning_rate": 5.1739095387259855e-09, "loss": 0.298, "step": 48539 }, { "epoch": 4.934932899552663, "grad_norm": 0.2621070146560669, "learning_rate": 5.157781661321304e-09, "loss": 0.3021, "step": 48540 }, { "epoch": 4.935034566897112, "grad_norm": 0.2968129813671112, "learning_rate": 5.14167894674511e-09, "loss": 0.2783, "step": 48541 }, { "epoch": 4.935136234241561, "grad_norm": 0.2771848142147064, "learning_rate": 5.125601395077895e-09, "loss": 0.2665, "step": 48542 }, { "epoch": 4.93523790158601, "grad_norm": 0.2597176134586334, "learning_rate": 5.10954900640126e-09, "loss": 0.2805, "step": 48543 }, { "epoch": 4.935339568930459, "grad_norm": 0.26149076223373413, "learning_rate": 5.093521780795141e-09, "loss": 0.313, "step": 48544 }, { "epoch": 4.935441236274908, "grad_norm": 0.2945941090583801, "learning_rate": 5.0775197183416944e-09, "loss": 0.2837, "step": 48545 }, { "epoch": 4.935542903619357, "grad_norm": 0.2615047097206116, "learning_rate": 5.061542819120302e-09, "loss": 0.3269, "step": 48546 }, { "epoch": 4.935644570963806, "grad_norm": 0.28277745842933655, "learning_rate": 5.045591083211454e-09, "loss": 0.3136, "step": 48547 }, { "epoch": 4.935746238308256, "grad_norm": 0.2832920551300049, "learning_rate": 5.029664510696752e-09, "loss": 0.3157, "step": 48548 }, { "epoch": 4.935847905652705, "grad_norm": 0.29337114095687866, "learning_rate": 5.013763101655022e-09, "loss": 0.3209, "step": 48549 }, { "epoch": 4.935949572997154, "grad_norm": 0.2490311563014984, "learning_rate": 4.997886856167311e-09, "loss": 0.2922, "step": 48550 }, { "epoch": 4.936051240341603, "grad_norm": 0.29591605067253113, "learning_rate": 4.982035774312999e-09, "loss": 0.3151, "step": 48551 }, { "epoch": 4.9361529076860515, "grad_norm": 0.26972612738609314, "learning_rate": 4.966209856172577e-09, "loss": 0.3567, "step": 48552 }, { "epoch": 4.9362545750305005, "grad_norm": 0.2850268483161926, "learning_rate": 4.9504091018248715e-09, "loss": 0.2937, "step": 48553 }, { "epoch": 4.936356242374949, "grad_norm": 0.2655964493751526, "learning_rate": 4.934633511350373e-09, "loss": 0.3441, "step": 48554 }, { "epoch": 4.936457909719398, "grad_norm": 0.2766181528568268, "learning_rate": 4.918883084827908e-09, "loss": 0.2961, "step": 48555 }, { "epoch": 4.936559577063847, "grad_norm": 0.2607538402080536, "learning_rate": 4.903157822337412e-09, "loss": 0.284, "step": 48556 }, { "epoch": 4.936661244408296, "grad_norm": 0.28046417236328125, "learning_rate": 4.887457723957711e-09, "loss": 0.2853, "step": 48557 }, { "epoch": 4.936762911752745, "grad_norm": 0.2809862494468689, "learning_rate": 4.871782789768187e-09, "loss": 0.2945, "step": 48558 }, { "epoch": 4.936864579097194, "grad_norm": 0.2846648097038269, "learning_rate": 4.856133019847109e-09, "loss": 0.3008, "step": 48559 }, { "epoch": 4.936966246441643, "grad_norm": 0.27547192573547363, "learning_rate": 4.840508414273859e-09, "loss": 0.266, "step": 48560 }, { "epoch": 4.937067913786092, "grad_norm": 0.27884796261787415, "learning_rate": 4.824908973127263e-09, "loss": 0.3368, "step": 48561 }, { "epoch": 4.937169581130541, "grad_norm": 0.2719514071941376, "learning_rate": 4.80933469648559e-09, "loss": 0.3228, "step": 48562 }, { "epoch": 4.93727124847499, "grad_norm": 0.26511335372924805, "learning_rate": 4.793785584427668e-09, "loss": 0.2807, "step": 48563 }, { "epoch": 4.937372915819439, "grad_norm": 0.26897677779197693, "learning_rate": 4.778261637031767e-09, "loss": 0.314, "step": 48564 }, { "epoch": 4.937474583163888, "grad_norm": 0.27700042724609375, "learning_rate": 4.7627628543756025e-09, "loss": 0.3076, "step": 48565 }, { "epoch": 4.937576250508337, "grad_norm": 0.2946907579898834, "learning_rate": 4.747289236537444e-09, "loss": 0.2876, "step": 48566 }, { "epoch": 4.937677917852786, "grad_norm": 0.2718396782875061, "learning_rate": 4.7318407835955645e-09, "loss": 0.2875, "step": 48567 }, { "epoch": 4.937779585197235, "grad_norm": 0.27012836933135986, "learning_rate": 4.7164174956276785e-09, "loss": 0.3039, "step": 48568 }, { "epoch": 4.9378812525416835, "grad_norm": 0.2914280593395233, "learning_rate": 4.701019372711502e-09, "loss": 0.3236, "step": 48569 }, { "epoch": 4.9379829198861325, "grad_norm": 0.26989874243736267, "learning_rate": 4.685646414924194e-09, "loss": 0.3353, "step": 48570 }, { "epoch": 4.938084587230581, "grad_norm": 0.29321733117103577, "learning_rate": 4.6702986223440276e-09, "loss": 0.3021, "step": 48571 }, { "epoch": 4.93818625457503, "grad_norm": 0.2828052043914795, "learning_rate": 4.654975995047606e-09, "loss": 0.3069, "step": 48572 }, { "epoch": 4.938287921919479, "grad_norm": 0.29606008529663086, "learning_rate": 4.639678533112091e-09, "loss": 0.3018, "step": 48573 }, { "epoch": 4.938389589263928, "grad_norm": 0.2861541509628296, "learning_rate": 4.624406236615198e-09, "loss": 0.3013, "step": 48574 }, { "epoch": 4.938491256608377, "grad_norm": 0.27447912096977234, "learning_rate": 4.609159105632976e-09, "loss": 0.3075, "step": 48575 }, { "epoch": 4.938592923952826, "grad_norm": 0.28108489513397217, "learning_rate": 4.5939371402431436e-09, "loss": 0.2932, "step": 48576 }, { "epoch": 4.938694591297275, "grad_norm": 0.268196702003479, "learning_rate": 4.578740340521748e-09, "loss": 0.338, "step": 48577 }, { "epoch": 4.938796258641724, "grad_norm": 0.2780689597129822, "learning_rate": 4.563568706545951e-09, "loss": 0.2987, "step": 48578 }, { "epoch": 4.938897925986173, "grad_norm": 0.27028006315231323, "learning_rate": 4.548422238391248e-09, "loss": 0.3036, "step": 48579 }, { "epoch": 4.938999593330622, "grad_norm": 0.2817748188972473, "learning_rate": 4.533300936134799e-09, "loss": 0.2761, "step": 48580 }, { "epoch": 4.939101260675071, "grad_norm": 0.2644968032836914, "learning_rate": 4.5182047998526546e-09, "loss": 0.3073, "step": 48581 }, { "epoch": 4.93920292801952, "grad_norm": 0.2700199782848358, "learning_rate": 4.50313382962031e-09, "loss": 0.2898, "step": 48582 }, { "epoch": 4.939304595363969, "grad_norm": 0.2801532447338104, "learning_rate": 4.48808802551437e-09, "loss": 0.2759, "step": 48583 }, { "epoch": 4.9394062627084185, "grad_norm": 0.2681865692138672, "learning_rate": 4.4730673876097755e-09, "loss": 0.3334, "step": 48584 }, { "epoch": 4.9395079300528675, "grad_norm": 0.2865132689476013, "learning_rate": 4.458071915983131e-09, "loss": 0.2627, "step": 48585 }, { "epoch": 4.939609597397316, "grad_norm": 0.26951947808265686, "learning_rate": 4.443101610709932e-09, "loss": 0.3256, "step": 48586 }, { "epoch": 4.939711264741765, "grad_norm": 0.2628745138645172, "learning_rate": 4.428156471864564e-09, "loss": 0.3046, "step": 48587 }, { "epoch": 4.939812932086214, "grad_norm": 0.29343682527542114, "learning_rate": 4.413236499523632e-09, "loss": 0.2895, "step": 48588 }, { "epoch": 4.939914599430663, "grad_norm": 0.27389779686927795, "learning_rate": 4.398341693761521e-09, "loss": 0.2871, "step": 48589 }, { "epoch": 4.940016266775112, "grad_norm": 0.2825516164302826, "learning_rate": 4.383472054653171e-09, "loss": 0.273, "step": 48590 }, { "epoch": 4.940117934119561, "grad_norm": 0.2826087772846222, "learning_rate": 4.368627582274077e-09, "loss": 0.2986, "step": 48591 }, { "epoch": 4.94021960146401, "grad_norm": 0.26711606979370117, "learning_rate": 4.353808276698623e-09, "loss": 0.3186, "step": 48592 }, { "epoch": 4.940321268808459, "grad_norm": 0.2732868194580078, "learning_rate": 4.339014138001196e-09, "loss": 0.3056, "step": 48593 }, { "epoch": 4.940422936152908, "grad_norm": 0.28621506690979004, "learning_rate": 4.324245166256735e-09, "loss": 0.276, "step": 48594 }, { "epoch": 4.940524603497357, "grad_norm": 0.27750062942504883, "learning_rate": 4.309501361539625e-09, "loss": 0.2987, "step": 48595 }, { "epoch": 4.940626270841806, "grad_norm": 0.2840583920478821, "learning_rate": 4.294782723924251e-09, "loss": 0.2929, "step": 48596 }, { "epoch": 4.940727938186255, "grad_norm": 0.27457931637763977, "learning_rate": 4.280089253484998e-09, "loss": 0.2716, "step": 48597 }, { "epoch": 4.940829605530704, "grad_norm": 0.2679068148136139, "learning_rate": 4.26542095029514e-09, "loss": 0.3231, "step": 48598 }, { "epoch": 4.940931272875153, "grad_norm": 0.2730163633823395, "learning_rate": 4.250777814428509e-09, "loss": 0.2683, "step": 48599 }, { "epoch": 4.941032940219602, "grad_norm": 0.29156598448753357, "learning_rate": 4.236159845959487e-09, "loss": 0.2887, "step": 48600 }, { "epoch": 4.9411346075640505, "grad_norm": 0.2650754749774933, "learning_rate": 4.221567044961905e-09, "loss": 0.2718, "step": 48601 }, { "epoch": 4.9412362749084995, "grad_norm": 0.28084665536880493, "learning_rate": 4.2069994115084835e-09, "loss": 0.3079, "step": 48602 }, { "epoch": 4.941337942252948, "grad_norm": 0.27730268239974976, "learning_rate": 4.192456945673051e-09, "loss": 0.2725, "step": 48603 }, { "epoch": 4.941439609597397, "grad_norm": 0.2607632577419281, "learning_rate": 4.177939647528884e-09, "loss": 0.2688, "step": 48604 }, { "epoch": 4.941541276941846, "grad_norm": 0.29494765400886536, "learning_rate": 4.163447517149255e-09, "loss": 0.3035, "step": 48605 }, { "epoch": 4.941642944286295, "grad_norm": 0.2811907231807709, "learning_rate": 4.148980554606885e-09, "loss": 0.2992, "step": 48606 }, { "epoch": 4.941744611630744, "grad_norm": 0.2768300771713257, "learning_rate": 4.134538759974493e-09, "loss": 0.2921, "step": 48607 }, { "epoch": 4.941846278975193, "grad_norm": 0.28051868081092834, "learning_rate": 4.120122133325355e-09, "loss": 0.2827, "step": 48608 }, { "epoch": 4.941947946319642, "grad_norm": 0.2764242887496948, "learning_rate": 4.105730674732189e-09, "loss": 0.3081, "step": 48609 }, { "epoch": 4.942049613664091, "grad_norm": 0.276775598526001, "learning_rate": 4.091364384266605e-09, "loss": 0.2753, "step": 48610 }, { "epoch": 4.94215128100854, "grad_norm": 0.2695479989051819, "learning_rate": 4.077023262001878e-09, "loss": 0.2778, "step": 48611 }, { "epoch": 4.942252948352989, "grad_norm": 0.2654770314693451, "learning_rate": 4.062707308009617e-09, "loss": 0.3007, "step": 48612 }, { "epoch": 4.942354615697438, "grad_norm": 0.25600990653038025, "learning_rate": 4.048416522361986e-09, "loss": 0.312, "step": 48613 }, { "epoch": 4.942456283041887, "grad_norm": 0.26563549041748047, "learning_rate": 4.034150905131706e-09, "loss": 0.2976, "step": 48614 }, { "epoch": 4.942557950386336, "grad_norm": 0.2605534791946411, "learning_rate": 4.019910456389831e-09, "loss": 0.2796, "step": 48615 }, { "epoch": 4.942659617730785, "grad_norm": 0.28631383180618286, "learning_rate": 4.005695176208524e-09, "loss": 0.3043, "step": 48616 }, { "epoch": 4.942761285075234, "grad_norm": 0.30951276421546936, "learning_rate": 3.991505064659395e-09, "loss": 0.2742, "step": 48617 }, { "epoch": 4.9428629524196825, "grad_norm": 0.26968783140182495, "learning_rate": 3.9773401218134996e-09, "loss": 0.3229, "step": 48618 }, { "epoch": 4.9429646197641315, "grad_norm": 0.279464989900589, "learning_rate": 3.963200347743001e-09, "loss": 0.28, "step": 48619 }, { "epoch": 4.94306628710858, "grad_norm": 0.27003228664398193, "learning_rate": 3.949085742518399e-09, "loss": 0.2971, "step": 48620 }, { "epoch": 4.943167954453029, "grad_norm": 0.2798764705657959, "learning_rate": 3.934996306211303e-09, "loss": 0.2715, "step": 48621 }, { "epoch": 4.943269621797478, "grad_norm": 0.27890273928642273, "learning_rate": 3.920932038892211e-09, "loss": 0.3168, "step": 48622 }, { "epoch": 4.943371289141927, "grad_norm": 0.31684768199920654, "learning_rate": 3.906892940632178e-09, "loss": 0.3011, "step": 48623 }, { "epoch": 4.943472956486376, "grad_norm": 0.28401651978492737, "learning_rate": 3.892879011501705e-09, "loss": 0.2856, "step": 48624 }, { "epoch": 4.943574623830825, "grad_norm": 0.259145051240921, "learning_rate": 3.878890251572398e-09, "loss": 0.273, "step": 48625 }, { "epoch": 4.943676291175274, "grad_norm": 0.2825593650341034, "learning_rate": 3.864926660913093e-09, "loss": 0.348, "step": 48626 }, { "epoch": 4.943777958519723, "grad_norm": 0.295242577791214, "learning_rate": 3.850988239595954e-09, "loss": 0.2813, "step": 48627 }, { "epoch": 4.943879625864172, "grad_norm": 0.2851601541042328, "learning_rate": 3.837074987689815e-09, "loss": 0.2934, "step": 48628 }, { "epoch": 4.943981293208621, "grad_norm": 0.27735549211502075, "learning_rate": 3.823186905265175e-09, "loss": 0.2961, "step": 48629 }, { "epoch": 4.944082960553071, "grad_norm": 0.29963403940200806, "learning_rate": 3.809323992391978e-09, "loss": 0.2924, "step": 48630 }, { "epoch": 4.94418462789752, "grad_norm": 0.2868587374687195, "learning_rate": 3.795486249140168e-09, "loss": 0.2807, "step": 48631 }, { "epoch": 4.944286295241969, "grad_norm": 0.2667151093482971, "learning_rate": 3.781673675579689e-09, "loss": 0.2832, "step": 48632 }, { "epoch": 4.9443879625864176, "grad_norm": 0.2806714177131653, "learning_rate": 3.7678862717793755e-09, "loss": 0.3011, "step": 48633 }, { "epoch": 4.9444896299308665, "grad_norm": 0.27957767248153687, "learning_rate": 3.754124037809725e-09, "loss": 0.2722, "step": 48634 }, { "epoch": 4.944591297275315, "grad_norm": 0.28220003843307495, "learning_rate": 3.740386973739019e-09, "loss": 0.3025, "step": 48635 }, { "epoch": 4.944692964619764, "grad_norm": 0.292807936668396, "learning_rate": 3.726675079637199e-09, "loss": 0.3039, "step": 48636 }, { "epoch": 4.944794631964213, "grad_norm": 0.2840040922164917, "learning_rate": 3.7129883555730995e-09, "loss": 0.3014, "step": 48637 }, { "epoch": 4.944896299308662, "grad_norm": 0.284438818693161, "learning_rate": 3.699326801615555e-09, "loss": 0.292, "step": 48638 }, { "epoch": 4.944997966653111, "grad_norm": 0.2662573754787445, "learning_rate": 3.6856904178333986e-09, "loss": 0.298, "step": 48639 }, { "epoch": 4.94509963399756, "grad_norm": 0.265491783618927, "learning_rate": 3.6720792042954643e-09, "loss": 0.2806, "step": 48640 }, { "epoch": 4.945201301342009, "grad_norm": 0.2723853588104248, "learning_rate": 3.6584931610705866e-09, "loss": 0.2714, "step": 48641 }, { "epoch": 4.945302968686458, "grad_norm": 0.282527893781662, "learning_rate": 3.644932288226488e-09, "loss": 0.2737, "step": 48642 }, { "epoch": 4.945404636030907, "grad_norm": 0.2676142752170563, "learning_rate": 3.631396585832558e-09, "loss": 0.3058, "step": 48643 }, { "epoch": 4.945506303375356, "grad_norm": 0.2761549651622772, "learning_rate": 3.617886053955966e-09, "loss": 0.3173, "step": 48644 }, { "epoch": 4.945607970719805, "grad_norm": 0.2720634341239929, "learning_rate": 3.6044006926649886e-09, "loss": 0.3147, "step": 48645 }, { "epoch": 4.945709638064254, "grad_norm": 0.2960922420024872, "learning_rate": 3.590940502027351e-09, "loss": 0.2912, "step": 48646 }, { "epoch": 4.945811305408703, "grad_norm": 0.28721001744270325, "learning_rate": 3.577505482111887e-09, "loss": 0.2728, "step": 48647 }, { "epoch": 4.945912972753152, "grad_norm": 0.28491348028182983, "learning_rate": 3.5640956329857646e-09, "loss": 0.2802, "step": 48648 }, { "epoch": 4.946014640097601, "grad_norm": 0.27590009570121765, "learning_rate": 3.550710954715597e-09, "loss": 0.287, "step": 48649 }, { "epoch": 4.9461163074420496, "grad_norm": 0.2826051115989685, "learning_rate": 3.5373514473702188e-09, "loss": 0.2624, "step": 48650 }, { "epoch": 4.9462179747864985, "grad_norm": 0.28191494941711426, "learning_rate": 3.5240171110162424e-09, "loss": 0.2965, "step": 48651 }, { "epoch": 4.946319642130947, "grad_norm": 0.2697843611240387, "learning_rate": 3.510707945720837e-09, "loss": 0.3565, "step": 48652 }, { "epoch": 4.946421309475396, "grad_norm": 0.310187965631485, "learning_rate": 3.497423951551171e-09, "loss": 0.2892, "step": 48653 }, { "epoch": 4.946522976819845, "grad_norm": 0.2797655165195465, "learning_rate": 3.4841651285738575e-09, "loss": 0.2621, "step": 48654 }, { "epoch": 4.946624644164294, "grad_norm": 0.2831494212150574, "learning_rate": 3.4709314768560653e-09, "loss": 0.3119, "step": 48655 }, { "epoch": 4.946726311508743, "grad_norm": 0.3058483898639679, "learning_rate": 3.4577229964649626e-09, "loss": 0.2929, "step": 48656 }, { "epoch": 4.946827978853192, "grad_norm": 0.2742167115211487, "learning_rate": 3.4445396874660532e-09, "loss": 0.2863, "step": 48657 }, { "epoch": 4.946929646197641, "grad_norm": 0.2722894251346588, "learning_rate": 3.4313815499259494e-09, "loss": 0.2737, "step": 48658 }, { "epoch": 4.94703131354209, "grad_norm": 0.2758692502975464, "learning_rate": 3.4182485839112658e-09, "loss": 0.3089, "step": 48659 }, { "epoch": 4.947132980886539, "grad_norm": 0.24601423740386963, "learning_rate": 3.4051407894886147e-09, "loss": 0.3289, "step": 48660 }, { "epoch": 4.947234648230988, "grad_norm": 0.2978990375995636, "learning_rate": 3.392058166722945e-09, "loss": 0.277, "step": 48661 }, { "epoch": 4.947336315575437, "grad_norm": 0.2783883213996887, "learning_rate": 3.379000715680869e-09, "loss": 0.2647, "step": 48662 }, { "epoch": 4.947437982919886, "grad_norm": 0.2975993752479553, "learning_rate": 3.365968436427891e-09, "loss": 0.301, "step": 48663 }, { "epoch": 4.947539650264335, "grad_norm": 0.2951469421386719, "learning_rate": 3.352961329030069e-09, "loss": 0.2795, "step": 48664 }, { "epoch": 4.947641317608784, "grad_norm": 0.26707467436790466, "learning_rate": 3.3399793935523506e-09, "loss": 0.2715, "step": 48665 }, { "epoch": 4.9477429849532335, "grad_norm": 0.28807830810546875, "learning_rate": 3.3270226300602393e-09, "loss": 0.2604, "step": 48666 }, { "epoch": 4.947844652297682, "grad_norm": 0.28495433926582336, "learning_rate": 3.3140910386197935e-09, "loss": 0.3027, "step": 48667 }, { "epoch": 4.947946319642131, "grad_norm": 0.27908599376678467, "learning_rate": 3.3011846192954058e-09, "loss": 0.3161, "step": 48668 }, { "epoch": 4.94804798698658, "grad_norm": 0.2840302586555481, "learning_rate": 3.288303372152024e-09, "loss": 0.2815, "step": 48669 }, { "epoch": 4.948149654331029, "grad_norm": 0.2570722997188568, "learning_rate": 3.275447297254597e-09, "loss": 0.3015, "step": 48670 }, { "epoch": 4.948251321675478, "grad_norm": 0.2886514365673065, "learning_rate": 3.2626163946686275e-09, "loss": 0.3063, "step": 48671 }, { "epoch": 4.948352989019927, "grad_norm": 0.27763038873672485, "learning_rate": 3.249810664457398e-09, "loss": 0.2998, "step": 48672 }, { "epoch": 4.948454656364376, "grad_norm": 0.2759174704551697, "learning_rate": 3.237030106686967e-09, "loss": 0.3024, "step": 48673 }, { "epoch": 4.948556323708825, "grad_norm": 0.29179251194000244, "learning_rate": 3.224274721420617e-09, "loss": 0.295, "step": 48674 }, { "epoch": 4.948657991053274, "grad_norm": 0.2715832591056824, "learning_rate": 3.2115445087227416e-09, "loss": 0.3145, "step": 48675 }, { "epoch": 4.948759658397723, "grad_norm": 0.28471365571022034, "learning_rate": 3.198839468658288e-09, "loss": 0.3, "step": 48676 }, { "epoch": 4.948861325742172, "grad_norm": 0.2433350533246994, "learning_rate": 3.1861596012905394e-09, "loss": 0.287, "step": 48677 }, { "epoch": 4.948962993086621, "grad_norm": 0.2979294955730438, "learning_rate": 3.1735049066833333e-09, "loss": 0.2897, "step": 48678 }, { "epoch": 4.94906466043107, "grad_norm": 0.29994189739227295, "learning_rate": 3.160875384899953e-09, "loss": 0.3048, "step": 48679 }, { "epoch": 4.949166327775519, "grad_norm": 0.2640165388584137, "learning_rate": 3.148271036005346e-09, "loss": 0.2911, "step": 48680 }, { "epoch": 4.949267995119968, "grad_norm": 0.29380905628204346, "learning_rate": 3.13569186006224e-09, "loss": 0.3045, "step": 48681 }, { "epoch": 4.949369662464417, "grad_norm": 0.2792716920375824, "learning_rate": 3.1231378571339176e-09, "loss": 0.313, "step": 48682 }, { "epoch": 4.9494713298088655, "grad_norm": 0.27749839425086975, "learning_rate": 3.110609027284217e-09, "loss": 0.2933, "step": 48683 }, { "epoch": 4.9495729971533144, "grad_norm": 0.28731974959373474, "learning_rate": 3.0981053705753108e-09, "loss": 0.2869, "step": 48684 }, { "epoch": 4.949674664497763, "grad_norm": 0.2926889657974243, "learning_rate": 3.085626887071036e-09, "loss": 0.2949, "step": 48685 }, { "epoch": 4.949776331842212, "grad_norm": 0.24835634231567383, "learning_rate": 3.0731735768341208e-09, "loss": 0.3003, "step": 48686 }, { "epoch": 4.949877999186661, "grad_norm": 0.28652632236480713, "learning_rate": 3.0607454399267376e-09, "loss": 0.2981, "step": 48687 }, { "epoch": 4.94997966653111, "grad_norm": 0.29274260997772217, "learning_rate": 3.048342476411614e-09, "loss": 0.3122, "step": 48688 }, { "epoch": 4.950081333875559, "grad_norm": 0.27382975816726685, "learning_rate": 3.035964686352033e-09, "loss": 0.2941, "step": 48689 }, { "epoch": 4.950183001220008, "grad_norm": 0.271077036857605, "learning_rate": 3.0236120698096115e-09, "loss": 0.2706, "step": 48690 }, { "epoch": 4.950284668564457, "grad_norm": 0.28480616211891174, "learning_rate": 3.0112846268465225e-09, "loss": 0.2879, "step": 48691 }, { "epoch": 4.950386335908906, "grad_norm": 0.2797263562679291, "learning_rate": 2.998982357525493e-09, "loss": 0.2892, "step": 48692 }, { "epoch": 4.950488003253355, "grad_norm": 0.2674225866794586, "learning_rate": 2.9867052619075855e-09, "loss": 0.3049, "step": 48693 }, { "epoch": 4.950589670597804, "grad_norm": 0.2750796973705292, "learning_rate": 2.9744533400555276e-09, "loss": 0.2808, "step": 48694 }, { "epoch": 4.950691337942253, "grad_norm": 0.2876850366592407, "learning_rate": 2.9622265920309368e-09, "loss": 0.2642, "step": 48695 }, { "epoch": 4.950793005286702, "grad_norm": 0.2905742824077606, "learning_rate": 2.950025017894875e-09, "loss": 0.2976, "step": 48696 }, { "epoch": 4.950894672631151, "grad_norm": 0.27430474758148193, "learning_rate": 2.9378486177089605e-09, "loss": 0.3253, "step": 48697 }, { "epoch": 4.9509963399756, "grad_norm": 0.29355934262275696, "learning_rate": 2.9256973915348097e-09, "loss": 0.2622, "step": 48698 }, { "epoch": 4.951098007320049, "grad_norm": 0.2538420557975769, "learning_rate": 2.91357133943404e-09, "loss": 0.3168, "step": 48699 }, { "epoch": 4.9511996746644975, "grad_norm": 0.2864333987236023, "learning_rate": 2.9014704614666043e-09, "loss": 0.2693, "step": 48700 }, { "epoch": 4.9513013420089464, "grad_norm": 0.2674381136894226, "learning_rate": 2.8893947576941197e-09, "loss": 0.2762, "step": 48701 }, { "epoch": 4.951403009353395, "grad_norm": 0.2886923551559448, "learning_rate": 2.8773442281776474e-09, "loss": 0.295, "step": 48702 }, { "epoch": 4.951504676697844, "grad_norm": 0.2915525734424591, "learning_rate": 2.865318872977141e-09, "loss": 0.2646, "step": 48703 }, { "epoch": 4.951606344042293, "grad_norm": 0.28845199942588806, "learning_rate": 2.8533186921542165e-09, "loss": 0.3093, "step": 48704 }, { "epoch": 4.951708011386742, "grad_norm": 0.28657636046409607, "learning_rate": 2.841343685768272e-09, "loss": 0.2857, "step": 48705 }, { "epoch": 4.951809678731191, "grad_norm": 0.28847289085388184, "learning_rate": 2.829393853880369e-09, "loss": 0.3014, "step": 48706 }, { "epoch": 4.95191134607564, "grad_norm": 0.28088563680648804, "learning_rate": 2.8174691965504595e-09, "loss": 0.2932, "step": 48707 }, { "epoch": 4.952013013420089, "grad_norm": 0.28905701637268066, "learning_rate": 2.8055697138384965e-09, "loss": 0.3096, "step": 48708 }, { "epoch": 4.952114680764538, "grad_norm": 0.27910706400871277, "learning_rate": 2.7936954058049858e-09, "loss": 0.3061, "step": 48709 }, { "epoch": 4.952216348108987, "grad_norm": 0.27632802724838257, "learning_rate": 2.78184627250877e-09, "loss": 0.2808, "step": 48710 }, { "epoch": 4.952318015453436, "grad_norm": 0.2899973392486572, "learning_rate": 2.7700223140103566e-09, "loss": 0.3101, "step": 48711 }, { "epoch": 4.952419682797886, "grad_norm": 0.252826064825058, "learning_rate": 2.758223530369142e-09, "loss": 0.2812, "step": 48712 }, { "epoch": 4.952521350142335, "grad_norm": 0.28240466117858887, "learning_rate": 2.746449921644523e-09, "loss": 0.2893, "step": 48713 }, { "epoch": 4.952623017486784, "grad_norm": 0.28339365124702454, "learning_rate": 2.7347014878953414e-09, "loss": 0.3063, "step": 48714 }, { "epoch": 4.9527246848312325, "grad_norm": 0.2894449830055237, "learning_rate": 2.7229782291815497e-09, "loss": 0.2942, "step": 48715 }, { "epoch": 4.9528263521756815, "grad_norm": 0.29494208097457886, "learning_rate": 2.7112801455619897e-09, "loss": 0.3002, "step": 48716 }, { "epoch": 4.95292801952013, "grad_norm": 0.27169379591941833, "learning_rate": 2.699607237094948e-09, "loss": 0.2624, "step": 48717 }, { "epoch": 4.953029686864579, "grad_norm": 0.276796817779541, "learning_rate": 2.687959503839821e-09, "loss": 0.2898, "step": 48718 }, { "epoch": 4.953131354209028, "grad_norm": 0.264772891998291, "learning_rate": 2.676336945855451e-09, "loss": 0.2833, "step": 48719 }, { "epoch": 4.953233021553477, "grad_norm": 0.27395766973495483, "learning_rate": 2.664739563200125e-09, "loss": 0.3113, "step": 48720 }, { "epoch": 4.953334688897926, "grad_norm": 0.2871930003166199, "learning_rate": 2.6531673559326844e-09, "loss": 0.3289, "step": 48721 }, { "epoch": 4.953436356242375, "grad_norm": 0.2726549208164215, "learning_rate": 2.6416203241103055e-09, "loss": 0.2713, "step": 48722 }, { "epoch": 4.953538023586824, "grad_norm": 0.28266215324401855, "learning_rate": 2.630098467792386e-09, "loss": 0.282, "step": 48723 }, { "epoch": 4.953639690931273, "grad_norm": 0.293285608291626, "learning_rate": 2.6186017870361014e-09, "loss": 0.3032, "step": 48724 }, { "epoch": 4.953741358275722, "grad_norm": 0.30231913924217224, "learning_rate": 2.6071302819002943e-09, "loss": 0.3142, "step": 48725 }, { "epoch": 4.953843025620171, "grad_norm": 0.27288389205932617, "learning_rate": 2.5956839524415856e-09, "loss": 0.2914, "step": 48726 }, { "epoch": 4.95394469296462, "grad_norm": 0.28408700227737427, "learning_rate": 2.584262798718817e-09, "loss": 0.3056, "step": 48727 }, { "epoch": 4.954046360309069, "grad_norm": 0.27622804045677185, "learning_rate": 2.5728668207886108e-09, "loss": 0.3051, "step": 48728 }, { "epoch": 4.954148027653518, "grad_norm": 0.2730439007282257, "learning_rate": 2.561496018708698e-09, "loss": 0.322, "step": 48729 }, { "epoch": 4.954249694997967, "grad_norm": 0.306272029876709, "learning_rate": 2.5501503925362547e-09, "loss": 0.2742, "step": 48730 }, { "epoch": 4.954351362342416, "grad_norm": 0.2862717807292938, "learning_rate": 2.538829942329013e-09, "loss": 0.3317, "step": 48731 }, { "epoch": 4.9544530296868645, "grad_norm": 0.2699856758117676, "learning_rate": 2.527534668143039e-09, "loss": 0.3251, "step": 48732 }, { "epoch": 4.9545546970313135, "grad_norm": 0.2571951448917389, "learning_rate": 2.5162645700366193e-09, "loss": 0.2831, "step": 48733 }, { "epoch": 4.954656364375762, "grad_norm": 0.2580571472644806, "learning_rate": 2.5050196480647104e-09, "loss": 0.3014, "step": 48734 }, { "epoch": 4.954758031720211, "grad_norm": 0.2792547345161438, "learning_rate": 2.4937999022855987e-09, "loss": 0.3165, "step": 48735 }, { "epoch": 4.95485969906466, "grad_norm": 0.27452945709228516, "learning_rate": 2.482605332754795e-09, "loss": 0.289, "step": 48736 }, { "epoch": 4.954961366409109, "grad_norm": 0.2767123878002167, "learning_rate": 2.471435939528921e-09, "loss": 0.2991, "step": 48737 }, { "epoch": 4.955063033753558, "grad_norm": 0.2762315273284912, "learning_rate": 2.4602917226645984e-09, "loss": 0.3081, "step": 48738 }, { "epoch": 4.955164701098007, "grad_norm": 0.2641822397708893, "learning_rate": 2.449172682217893e-09, "loss": 0.3144, "step": 48739 }, { "epoch": 4.955266368442456, "grad_norm": 0.2652221620082855, "learning_rate": 2.4380788182443162e-09, "loss": 0.295, "step": 48740 }, { "epoch": 4.955368035786905, "grad_norm": 0.32741492986679077, "learning_rate": 2.427010130800489e-09, "loss": 0.315, "step": 48741 }, { "epoch": 4.955469703131354, "grad_norm": 0.2787433862686157, "learning_rate": 2.415966619941368e-09, "loss": 0.3104, "step": 48742 }, { "epoch": 4.955571370475803, "grad_norm": 0.28295379877090454, "learning_rate": 2.404948285723019e-09, "loss": 0.2905, "step": 48743 }, { "epoch": 4.955673037820252, "grad_norm": 0.2931755781173706, "learning_rate": 2.393955128200953e-09, "loss": 0.3216, "step": 48744 }, { "epoch": 4.955774705164701, "grad_norm": 0.27361512184143066, "learning_rate": 2.382987147430682e-09, "loss": 0.318, "step": 48745 }, { "epoch": 4.95587637250915, "grad_norm": 0.28227028250694275, "learning_rate": 2.3720443434677164e-09, "loss": 0.2809, "step": 48746 }, { "epoch": 4.955978039853599, "grad_norm": 0.26465147733688354, "learning_rate": 2.3611267163659023e-09, "loss": 0.308, "step": 48747 }, { "epoch": 4.9560797071980485, "grad_norm": 0.25814560055732727, "learning_rate": 2.350234266181861e-09, "loss": 0.2881, "step": 48748 }, { "epoch": 4.956181374542497, "grad_norm": 0.2656562030315399, "learning_rate": 2.3393669929694386e-09, "loss": 0.3134, "step": 48749 }, { "epoch": 4.956283041886946, "grad_norm": 0.26780712604522705, "learning_rate": 2.3285248967835905e-09, "loss": 0.2792, "step": 48750 }, { "epoch": 4.956384709231395, "grad_norm": 0.26501280069351196, "learning_rate": 2.317707977678718e-09, "loss": 0.3037, "step": 48751 }, { "epoch": 4.956486376575844, "grad_norm": 0.28738921880722046, "learning_rate": 2.3069162357097772e-09, "loss": 0.2968, "step": 48752 }, { "epoch": 4.956588043920293, "grad_norm": 0.28961387276649475, "learning_rate": 2.2961496709311694e-09, "loss": 0.2826, "step": 48753 }, { "epoch": 4.956689711264742, "grad_norm": 0.30507880449295044, "learning_rate": 2.2854082833967395e-09, "loss": 0.2675, "step": 48754 }, { "epoch": 4.956791378609191, "grad_norm": 0.27852901816368103, "learning_rate": 2.274692073160889e-09, "loss": 0.2929, "step": 48755 }, { "epoch": 4.95689304595364, "grad_norm": 0.3033870458602905, "learning_rate": 2.264001040277464e-09, "loss": 0.2693, "step": 48756 }, { "epoch": 4.956994713298089, "grad_norm": 0.28290024399757385, "learning_rate": 2.2533351848003095e-09, "loss": 0.305, "step": 48757 }, { "epoch": 4.957096380642538, "grad_norm": 0.2713428735733032, "learning_rate": 2.242694506783272e-09, "loss": 0.3048, "step": 48758 }, { "epoch": 4.957198047986987, "grad_norm": 0.2823106646537781, "learning_rate": 2.232079006280197e-09, "loss": 0.2787, "step": 48759 }, { "epoch": 4.957299715331436, "grad_norm": 0.28693869709968567, "learning_rate": 2.22148868334382e-09, "loss": 0.3076, "step": 48760 }, { "epoch": 4.957401382675885, "grad_norm": 0.27600473165512085, "learning_rate": 2.210923538027987e-09, "loss": 0.3133, "step": 48761 }, { "epoch": 4.957503050020334, "grad_norm": 0.27393531799316406, "learning_rate": 2.2003835703859887e-09, "loss": 0.2541, "step": 48762 }, { "epoch": 4.957604717364783, "grad_norm": 0.26583361625671387, "learning_rate": 2.189868780471116e-09, "loss": 0.2894, "step": 48763 }, { "epoch": 4.9577063847092315, "grad_norm": 0.301864355802536, "learning_rate": 2.179379168335549e-09, "loss": 0.266, "step": 48764 }, { "epoch": 4.9578080520536805, "grad_norm": 0.2748646140098572, "learning_rate": 2.168914734032579e-09, "loss": 0.3048, "step": 48765 }, { "epoch": 4.957909719398129, "grad_norm": 0.27880018949508667, "learning_rate": 2.158475477614941e-09, "loss": 0.3298, "step": 48766 }, { "epoch": 4.958011386742578, "grad_norm": 0.2849620282649994, "learning_rate": 2.1480613991353706e-09, "loss": 0.3523, "step": 48767 }, { "epoch": 4.958113054087027, "grad_norm": 0.27217158675193787, "learning_rate": 2.1376724986460485e-09, "loss": 0.2881, "step": 48768 }, { "epoch": 4.958214721431476, "grad_norm": 0.27256447076797485, "learning_rate": 2.1273087761997103e-09, "loss": 0.2937, "step": 48769 }, { "epoch": 4.958316388775925, "grad_norm": 0.2755383849143982, "learning_rate": 2.1169702318485365e-09, "loss": 0.2913, "step": 48770 }, { "epoch": 4.958418056120374, "grad_norm": 0.29794687032699585, "learning_rate": 2.1066568656441523e-09, "loss": 0.3182, "step": 48771 }, { "epoch": 4.958519723464823, "grad_norm": 0.2863849401473999, "learning_rate": 2.0963686776381832e-09, "loss": 0.3061, "step": 48772 }, { "epoch": 4.958621390809272, "grad_norm": 0.2722525894641876, "learning_rate": 2.0861056678839197e-09, "loss": 0.3008, "step": 48773 }, { "epoch": 4.958723058153721, "grad_norm": 0.2592337727546692, "learning_rate": 2.0758678364313225e-09, "loss": 0.2805, "step": 48774 }, { "epoch": 4.95882472549817, "grad_norm": 0.28893500566482544, "learning_rate": 2.0656551833336815e-09, "loss": 0.2798, "step": 48775 }, { "epoch": 4.958926392842619, "grad_norm": 0.2827973961830139, "learning_rate": 2.055467708640957e-09, "loss": 0.3259, "step": 48776 }, { "epoch": 4.959028060187068, "grad_norm": 0.2806626260280609, "learning_rate": 2.0453054124053294e-09, "loss": 0.2607, "step": 48777 }, { "epoch": 4.959129727531517, "grad_norm": 0.2787787914276123, "learning_rate": 2.0351682946778695e-09, "loss": 0.2973, "step": 48778 }, { "epoch": 4.959231394875966, "grad_norm": 0.2956501245498657, "learning_rate": 2.0250563555090916e-09, "loss": 0.3002, "step": 48779 }, { "epoch": 4.959333062220415, "grad_norm": 0.2603592574596405, "learning_rate": 2.014969594950622e-09, "loss": 0.3014, "step": 48780 }, { "epoch": 4.9594347295648635, "grad_norm": 0.2625780403614044, "learning_rate": 2.004908013052975e-09, "loss": 0.2745, "step": 48781 }, { "epoch": 4.9595363969093125, "grad_norm": 0.26034578680992126, "learning_rate": 1.9948716098672215e-09, "loss": 0.2705, "step": 48782 }, { "epoch": 4.959638064253761, "grad_norm": 0.26890838146209717, "learning_rate": 1.9848603854433213e-09, "loss": 0.3012, "step": 48783 }, { "epoch": 4.95973973159821, "grad_norm": 0.2979625165462494, "learning_rate": 1.9748743398317892e-09, "loss": 0.2957, "step": 48784 }, { "epoch": 4.959841398942659, "grad_norm": 0.2962390184402466, "learning_rate": 1.964913473083696e-09, "loss": 0.2755, "step": 48785 }, { "epoch": 4.959943066287108, "grad_norm": 0.2823178768157959, "learning_rate": 1.9549777852478913e-09, "loss": 0.2907, "step": 48786 }, { "epoch": 4.960044733631557, "grad_norm": 0.28161609172821045, "learning_rate": 1.9450672763760002e-09, "loss": 0.2854, "step": 48787 }, { "epoch": 4.960146400976006, "grad_norm": 0.28777486085891724, "learning_rate": 1.935181946516318e-09, "loss": 0.2975, "step": 48788 }, { "epoch": 4.960248068320455, "grad_norm": 0.2824189364910126, "learning_rate": 1.9253217957199143e-09, "loss": 0.2902, "step": 48789 }, { "epoch": 4.960349735664904, "grad_norm": 0.27679741382598877, "learning_rate": 1.9154868240361946e-09, "loss": 0.2867, "step": 48790 }, { "epoch": 4.960451403009353, "grad_norm": 0.2919931411743164, "learning_rate": 1.9056770315145633e-09, "loss": 0.3065, "step": 48791 }, { "epoch": 4.960553070353802, "grad_norm": 0.29316550493240356, "learning_rate": 1.8958924182038705e-09, "loss": 0.2727, "step": 48792 }, { "epoch": 4.960654737698251, "grad_norm": 0.2956530749797821, "learning_rate": 1.8861329841546317e-09, "loss": 0.2932, "step": 48793 }, { "epoch": 4.960756405042701, "grad_norm": 0.27465060353279114, "learning_rate": 1.876398729414586e-09, "loss": 0.306, "step": 48794 }, { "epoch": 4.96085807238715, "grad_norm": 0.2923744320869446, "learning_rate": 1.866689654033693e-09, "loss": 0.2727, "step": 48795 }, { "epoch": 4.9609597397315985, "grad_norm": 0.2808082103729248, "learning_rate": 1.8570057580608036e-09, "loss": 0.3097, "step": 48796 }, { "epoch": 4.9610614070760475, "grad_norm": 0.29233843088150024, "learning_rate": 1.847347041544767e-09, "loss": 0.2635, "step": 48797 }, { "epoch": 4.961163074420496, "grad_norm": 0.28797590732574463, "learning_rate": 1.8377135045338778e-09, "loss": 0.3015, "step": 48798 }, { "epoch": 4.961264741764945, "grad_norm": 0.27834200859069824, "learning_rate": 1.8281051470769862e-09, "loss": 0.2695, "step": 48799 }, { "epoch": 4.961366409109394, "grad_norm": 0.29470720887184143, "learning_rate": 1.8185219692223865e-09, "loss": 0.3013, "step": 48800 }, { "epoch": 4.961468076453843, "grad_norm": 0.27508875727653503, "learning_rate": 1.8089639710178187e-09, "loss": 0.2552, "step": 48801 }, { "epoch": 4.961569743798292, "grad_norm": 0.28735223412513733, "learning_rate": 1.7994311525121322e-09, "loss": 0.3277, "step": 48802 }, { "epoch": 4.961671411142741, "grad_norm": 0.2777349650859833, "learning_rate": 1.789923513753622e-09, "loss": 0.3184, "step": 48803 }, { "epoch": 4.96177307848719, "grad_norm": 0.2799828350543976, "learning_rate": 1.7804410547894724e-09, "loss": 0.2793, "step": 48804 }, { "epoch": 4.961874745831639, "grad_norm": 0.2977643609046936, "learning_rate": 1.770983775667423e-09, "loss": 0.2933, "step": 48805 }, { "epoch": 4.961976413176088, "grad_norm": 0.2866138815879822, "learning_rate": 1.7615516764357687e-09, "loss": 0.2907, "step": 48806 }, { "epoch": 4.962078080520537, "grad_norm": 0.26434147357940674, "learning_rate": 1.7521447571411387e-09, "loss": 0.29, "step": 48807 }, { "epoch": 4.962179747864986, "grad_norm": 0.27631300687789917, "learning_rate": 1.7427630178318278e-09, "loss": 0.2738, "step": 48808 }, { "epoch": 4.962281415209435, "grad_norm": 0.2626470625400543, "learning_rate": 1.7334064585550204e-09, "loss": 0.2943, "step": 48809 }, { "epoch": 4.962383082553884, "grad_norm": 0.26422518491744995, "learning_rate": 1.7240750793567907e-09, "loss": 0.2892, "step": 48810 }, { "epoch": 4.962484749898333, "grad_norm": 0.2761613726615906, "learning_rate": 1.7147688802854334e-09, "loss": 0.2803, "step": 48811 }, { "epoch": 4.962586417242782, "grad_norm": 0.28705766797065735, "learning_rate": 1.705487861387023e-09, "loss": 0.3166, "step": 48812 }, { "epoch": 4.9626880845872305, "grad_norm": 0.2762676775455475, "learning_rate": 1.6962320227087437e-09, "loss": 0.2831, "step": 48813 }, { "epoch": 4.9627897519316795, "grad_norm": 0.29264870285987854, "learning_rate": 1.687001364297225e-09, "loss": 0.2857, "step": 48814 }, { "epoch": 4.962891419276128, "grad_norm": 0.27611419558525085, "learning_rate": 1.677795886198541e-09, "loss": 0.2952, "step": 48815 }, { "epoch": 4.962993086620577, "grad_norm": 0.2673286497592926, "learning_rate": 1.6686155884593213e-09, "loss": 0.3036, "step": 48816 }, { "epoch": 4.963094753965026, "grad_norm": 0.2994815707206726, "learning_rate": 1.6594604711261952e-09, "loss": 0.29, "step": 48817 }, { "epoch": 4.963196421309475, "grad_norm": 0.2644563913345337, "learning_rate": 1.6503305342446818e-09, "loss": 0.3131, "step": 48818 }, { "epoch": 4.963298088653924, "grad_norm": 0.27498388290405273, "learning_rate": 1.6412257778608553e-09, "loss": 0.2818, "step": 48819 }, { "epoch": 4.963399755998373, "grad_norm": 0.27844128012657166, "learning_rate": 1.63214620202079e-09, "loss": 0.3059, "step": 48820 }, { "epoch": 4.963501423342822, "grad_norm": 0.289212703704834, "learning_rate": 1.6230918067705604e-09, "loss": 0.3042, "step": 48821 }, { "epoch": 4.963603090687271, "grad_norm": 0.27753111720085144, "learning_rate": 1.61406259215513e-09, "loss": 0.2653, "step": 48822 }, { "epoch": 4.96370475803172, "grad_norm": 0.26858124136924744, "learning_rate": 1.6050585582200184e-09, "loss": 0.3125, "step": 48823 }, { "epoch": 4.963806425376169, "grad_norm": 0.28948670625686646, "learning_rate": 1.5960797050112997e-09, "loss": 0.3142, "step": 48824 }, { "epoch": 4.963908092720618, "grad_norm": 0.2573925256729126, "learning_rate": 1.5871260325733828e-09, "loss": 0.2948, "step": 48825 }, { "epoch": 4.964009760065067, "grad_norm": 0.2815339267253876, "learning_rate": 1.5781975409517869e-09, "loss": 0.29, "step": 48826 }, { "epoch": 4.964111427409516, "grad_norm": 0.27575621008872986, "learning_rate": 1.569294230191476e-09, "loss": 0.318, "step": 48827 }, { "epoch": 4.964213094753965, "grad_norm": 0.2773125171661377, "learning_rate": 1.560416100336859e-09, "loss": 0.2814, "step": 48828 }, { "epoch": 4.964314762098414, "grad_norm": 0.2913295328617096, "learning_rate": 1.5515631514334551e-09, "loss": 0.2804, "step": 48829 }, { "epoch": 4.964416429442863, "grad_norm": 0.26870471239089966, "learning_rate": 1.5427353835256732e-09, "loss": 0.2753, "step": 48830 }, { "epoch": 4.964518096787312, "grad_norm": 0.27716970443725586, "learning_rate": 1.533932796657367e-09, "loss": 0.2844, "step": 48831 }, { "epoch": 4.964619764131761, "grad_norm": 0.2908705770969391, "learning_rate": 1.5251553908735005e-09, "loss": 0.3229, "step": 48832 }, { "epoch": 4.96472143147621, "grad_norm": 0.2793956995010376, "learning_rate": 1.516403166217928e-09, "loss": 0.2883, "step": 48833 }, { "epoch": 4.964823098820659, "grad_norm": 0.26422885060310364, "learning_rate": 1.5076761227350578e-09, "loss": 0.3037, "step": 48834 }, { "epoch": 4.964924766165108, "grad_norm": 0.2793012857437134, "learning_rate": 1.498974260468744e-09, "loss": 0.2787, "step": 48835 }, { "epoch": 4.965026433509557, "grad_norm": 0.2878340184688568, "learning_rate": 1.4902975794622853e-09, "loss": 0.2869, "step": 48836 }, { "epoch": 4.965128100854006, "grad_norm": 0.26376983523368835, "learning_rate": 1.4816460797600906e-09, "loss": 0.2963, "step": 48837 }, { "epoch": 4.965229768198455, "grad_norm": 0.27811479568481445, "learning_rate": 1.4730197614060138e-09, "loss": 0.2957, "step": 48838 }, { "epoch": 4.965331435542904, "grad_norm": 0.3006132245063782, "learning_rate": 1.4644186244427983e-09, "loss": 0.2829, "step": 48839 }, { "epoch": 4.965433102887353, "grad_norm": 0.26683497428894043, "learning_rate": 1.4558426689137428e-09, "loss": 0.3186, "step": 48840 }, { "epoch": 4.965534770231802, "grad_norm": 0.3082464635372162, "learning_rate": 1.4472918948627012e-09, "loss": 0.324, "step": 48841 }, { "epoch": 4.965636437576251, "grad_norm": 0.28508591651916504, "learning_rate": 1.4387663023318621e-09, "loss": 0.3253, "step": 48842 }, { "epoch": 4.9657381049207, "grad_norm": 0.28002825379371643, "learning_rate": 1.4302658913650792e-09, "loss": 0.2954, "step": 48843 }, { "epoch": 4.965839772265149, "grad_norm": 0.25577449798583984, "learning_rate": 1.4217906620045406e-09, "loss": 0.295, "step": 48844 }, { "epoch": 4.9659414396095976, "grad_norm": 0.2748747169971466, "learning_rate": 1.4133406142935457e-09, "loss": 0.2917, "step": 48845 }, { "epoch": 4.9660431069540465, "grad_norm": 0.2763647437095642, "learning_rate": 1.4049157482737274e-09, "loss": 0.3219, "step": 48846 }, { "epoch": 4.966144774298495, "grad_norm": 0.2758273184299469, "learning_rate": 1.3965160639883846e-09, "loss": 0.3012, "step": 48847 }, { "epoch": 4.966246441642944, "grad_norm": 0.27165958285331726, "learning_rate": 1.3881415614797056e-09, "loss": 0.3175, "step": 48848 }, { "epoch": 4.966348108987393, "grad_norm": 0.2587142884731293, "learning_rate": 1.3797922407893238e-09, "loss": 0.3053, "step": 48849 }, { "epoch": 4.966449776331842, "grad_norm": 0.2830875515937805, "learning_rate": 1.371468101959983e-09, "loss": 0.2682, "step": 48850 }, { "epoch": 4.966551443676291, "grad_norm": 0.2681712210178375, "learning_rate": 1.3631691450333161e-09, "loss": 0.3042, "step": 48851 }, { "epoch": 4.96665311102074, "grad_norm": 0.30748897790908813, "learning_rate": 1.354895370050957e-09, "loss": 0.2639, "step": 48852 }, { "epoch": 4.966754778365189, "grad_norm": 0.2630017399787903, "learning_rate": 1.346646777055094e-09, "loss": 0.2906, "step": 48853 }, { "epoch": 4.966856445709638, "grad_norm": 0.27611735463142395, "learning_rate": 1.3384233660868051e-09, "loss": 0.2748, "step": 48854 }, { "epoch": 4.966958113054087, "grad_norm": 0.31350764632225037, "learning_rate": 1.330225137187724e-09, "loss": 0.2917, "step": 48855 }, { "epoch": 4.967059780398536, "grad_norm": 0.27399778366088867, "learning_rate": 1.3220520903989287e-09, "loss": 0.2685, "step": 48856 }, { "epoch": 4.967161447742985, "grad_norm": 0.2691947817802429, "learning_rate": 1.3139042257620526e-09, "loss": 0.2929, "step": 48857 }, { "epoch": 4.967263115087434, "grad_norm": 0.27895253896713257, "learning_rate": 1.305781543317619e-09, "loss": 0.2823, "step": 48858 }, { "epoch": 4.967364782431883, "grad_norm": 0.2869495153427124, "learning_rate": 1.2976840431067061e-09, "loss": 0.2664, "step": 48859 }, { "epoch": 4.967466449776332, "grad_norm": 0.26875120401382446, "learning_rate": 1.2896117251703922e-09, "loss": 0.2995, "step": 48860 }, { "epoch": 4.967568117120781, "grad_norm": 0.2612427771091461, "learning_rate": 1.2815645895492e-09, "loss": 0.271, "step": 48861 }, { "epoch": 4.9676697844652296, "grad_norm": 0.2678101658821106, "learning_rate": 1.2735426362836534e-09, "loss": 0.2918, "step": 48862 }, { "epoch": 4.9677714518096785, "grad_norm": 0.2594749629497528, "learning_rate": 1.2655458654137199e-09, "loss": 0.2836, "step": 48863 }, { "epoch": 4.967873119154127, "grad_norm": 0.27967119216918945, "learning_rate": 1.2575742769804777e-09, "loss": 0.296, "step": 48864 }, { "epoch": 4.967974786498576, "grad_norm": 0.2990081012248993, "learning_rate": 1.24962787102334e-09, "loss": 0.2784, "step": 48865 }, { "epoch": 4.968076453843025, "grad_norm": 0.2732626497745514, "learning_rate": 1.2417066475828298e-09, "loss": 0.3141, "step": 48866 }, { "epoch": 4.968178121187474, "grad_norm": 0.2717762887477875, "learning_rate": 1.2338106066989153e-09, "loss": 0.2892, "step": 48867 }, { "epoch": 4.968279788531923, "grad_norm": 0.2693465054035187, "learning_rate": 1.2259397484110091e-09, "loss": 0.2847, "step": 48868 }, { "epoch": 4.968381455876372, "grad_norm": 0.2790975570678711, "learning_rate": 1.2180940727590796e-09, "loss": 0.269, "step": 48869 }, { "epoch": 4.968483123220821, "grad_norm": 0.28052425384521484, "learning_rate": 1.2102735797825394e-09, "loss": 0.3214, "step": 48870 }, { "epoch": 4.96858479056527, "grad_norm": 0.2736840844154358, "learning_rate": 1.2024782695202463e-09, "loss": 0.2992, "step": 48871 }, { "epoch": 4.968686457909719, "grad_norm": 0.2651884853839874, "learning_rate": 1.1947081420121686e-09, "loss": 0.3074, "step": 48872 }, { "epoch": 4.968788125254168, "grad_norm": 0.29110872745513916, "learning_rate": 1.1869631972977192e-09, "loss": 0.3038, "step": 48873 }, { "epoch": 4.968889792598617, "grad_norm": 0.27702775597572327, "learning_rate": 1.1792434354146454e-09, "loss": 0.3158, "step": 48874 }, { "epoch": 4.968991459943066, "grad_norm": 0.25486353039741516, "learning_rate": 1.1715488564034704e-09, "loss": 0.2835, "step": 48875 }, { "epoch": 4.969093127287516, "grad_norm": 0.2658610939979553, "learning_rate": 1.163879460301942e-09, "loss": 0.2831, "step": 48876 }, { "epoch": 4.969194794631965, "grad_norm": 0.2920950651168823, "learning_rate": 1.156235247148363e-09, "loss": 0.2995, "step": 48877 }, { "epoch": 4.9692964619764135, "grad_norm": 0.2682042419910431, "learning_rate": 1.1486162169821457e-09, "loss": 0.2758, "step": 48878 }, { "epoch": 4.9693981293208624, "grad_norm": 0.26714614033699036, "learning_rate": 1.1410223698410383e-09, "loss": 0.2657, "step": 48879 }, { "epoch": 4.969499796665311, "grad_norm": 0.2901572585105896, "learning_rate": 1.1334537057638983e-09, "loss": 0.2546, "step": 48880 }, { "epoch": 4.96960146400976, "grad_norm": 0.28913381695747375, "learning_rate": 1.1259102247884735e-09, "loss": 0.2672, "step": 48881 }, { "epoch": 4.969703131354209, "grad_norm": 0.29541513323783875, "learning_rate": 1.1183919269525111e-09, "loss": 0.3162, "step": 48882 }, { "epoch": 4.969804798698658, "grad_norm": 0.25804251432418823, "learning_rate": 1.1108988122943143e-09, "loss": 0.2667, "step": 48883 }, { "epoch": 4.969906466043107, "grad_norm": 0.28713715076446533, "learning_rate": 1.1034308808516304e-09, "loss": 0.2968, "step": 48884 }, { "epoch": 4.970008133387556, "grad_norm": 0.3104132115840912, "learning_rate": 1.0959881326622068e-09, "loss": 0.2908, "step": 48885 }, { "epoch": 4.970109800732005, "grad_norm": 0.2956576347351074, "learning_rate": 1.0885705677626811e-09, "loss": 0.2782, "step": 48886 }, { "epoch": 4.970211468076454, "grad_norm": 0.26250937581062317, "learning_rate": 1.0811781861913562e-09, "loss": 0.2457, "step": 48887 }, { "epoch": 4.970313135420903, "grad_norm": 0.2936404347419739, "learning_rate": 1.073810987984869e-09, "loss": 0.2878, "step": 48888 }, { "epoch": 4.970414802765352, "grad_norm": 0.2675153315067291, "learning_rate": 1.0664689731809674e-09, "loss": 0.2775, "step": 48889 }, { "epoch": 4.970516470109801, "grad_norm": 0.27282339334487915, "learning_rate": 1.0591521418157336e-09, "loss": 0.293, "step": 48890 }, { "epoch": 4.97061813745425, "grad_norm": 0.30719929933547974, "learning_rate": 1.051860493926915e-09, "loss": 0.2874, "step": 48891 }, { "epoch": 4.970719804798699, "grad_norm": 0.27524539828300476, "learning_rate": 1.0445940295511492e-09, "loss": 0.2934, "step": 48892 }, { "epoch": 4.970821472143148, "grad_norm": 0.2688295245170593, "learning_rate": 1.0373527487245183e-09, "loss": 0.3022, "step": 48893 }, { "epoch": 4.970923139487597, "grad_norm": 0.33783748745918274, "learning_rate": 1.0301366514836598e-09, "loss": 0.3316, "step": 48894 }, { "epoch": 4.9710248068320455, "grad_norm": 0.26378121972084045, "learning_rate": 1.0229457378646557e-09, "loss": 0.2607, "step": 48895 }, { "epoch": 4.9711264741764944, "grad_norm": 0.26985153555870056, "learning_rate": 1.0157800079046986e-09, "loss": 0.2828, "step": 48896 }, { "epoch": 4.971228141520943, "grad_norm": 0.29682987928390503, "learning_rate": 1.0086394616393159e-09, "loss": 0.2759, "step": 48897 }, { "epoch": 4.971329808865392, "grad_norm": 0.27024003863334656, "learning_rate": 1.0015240991040343e-09, "loss": 0.3156, "step": 48898 }, { "epoch": 4.971431476209841, "grad_norm": 0.262829452753067, "learning_rate": 9.944339203354914e-10, "loss": 0.2986, "step": 48899 }, { "epoch": 4.97153314355429, "grad_norm": 0.2735006809234619, "learning_rate": 9.873689253692143e-10, "loss": 0.3436, "step": 48900 }, { "epoch": 4.971634810898739, "grad_norm": 0.2596491873264313, "learning_rate": 9.80329114240175e-10, "loss": 0.294, "step": 48901 }, { "epoch": 4.971736478243188, "grad_norm": 0.2854733467102051, "learning_rate": 9.733144869844558e-10, "loss": 0.3051, "step": 48902 }, { "epoch": 4.971838145587637, "grad_norm": 0.33301055431365967, "learning_rate": 9.663250436375838e-10, "loss": 0.3276, "step": 48903 }, { "epoch": 4.971939812932086, "grad_norm": 0.2732256054878235, "learning_rate": 9.59360784233976e-10, "loss": 0.277, "step": 48904 }, { "epoch": 4.972041480276535, "grad_norm": 0.3006856143474579, "learning_rate": 9.524217088091592e-10, "loss": 0.3125, "step": 48905 }, { "epoch": 4.972143147620984, "grad_norm": 0.29934677481651306, "learning_rate": 9.455078173981058e-10, "loss": 0.2709, "step": 48906 }, { "epoch": 4.972244814965433, "grad_norm": 0.25801363587379456, "learning_rate": 9.386191100357877e-10, "loss": 0.3011, "step": 48907 }, { "epoch": 4.972346482309882, "grad_norm": 0.2878652513027191, "learning_rate": 9.31755586756622e-10, "loss": 0.2824, "step": 48908 }, { "epoch": 4.972448149654331, "grad_norm": 0.2666507363319397, "learning_rate": 9.249172475955803e-10, "loss": 0.2818, "step": 48909 }, { "epoch": 4.97254981699878, "grad_norm": 0.2721248269081116, "learning_rate": 9.181040925865248e-10, "loss": 0.292, "step": 48910 }, { "epoch": 4.972651484343229, "grad_norm": 0.2759316861629486, "learning_rate": 9.113161217638722e-10, "loss": 0.2525, "step": 48911 }, { "epoch": 4.972753151687678, "grad_norm": 0.2700130045413971, "learning_rate": 9.045533351625946e-10, "loss": 0.2959, "step": 48912 }, { "epoch": 4.972854819032127, "grad_norm": 0.2602750360965729, "learning_rate": 8.978157328159987e-10, "loss": 0.2979, "step": 48913 }, { "epoch": 4.972956486376576, "grad_norm": 0.30513566732406616, "learning_rate": 8.911033147579462e-10, "loss": 0.2964, "step": 48914 }, { "epoch": 4.973058153721025, "grad_norm": 0.280547559261322, "learning_rate": 8.844160810228542e-10, "loss": 0.2912, "step": 48915 }, { "epoch": 4.973159821065474, "grad_norm": 0.2568584680557251, "learning_rate": 8.777540316440292e-10, "loss": 0.3083, "step": 48916 }, { "epoch": 4.973261488409923, "grad_norm": 0.2896888852119446, "learning_rate": 8.71117166655333e-10, "loss": 0.2778, "step": 48917 }, { "epoch": 4.973363155754372, "grad_norm": 0.30294275283813477, "learning_rate": 8.645054860895174e-10, "loss": 0.317, "step": 48918 }, { "epoch": 4.973464823098821, "grad_norm": 0.2861200273036957, "learning_rate": 8.579189899809992e-10, "loss": 0.3071, "step": 48919 }, { "epoch": 4.97356649044327, "grad_norm": 0.27906322479248047, "learning_rate": 8.513576783619748e-10, "loss": 0.2602, "step": 48920 }, { "epoch": 4.973668157787719, "grad_norm": 0.27246615290641785, "learning_rate": 8.448215512657509e-10, "loss": 0.2798, "step": 48921 }, { "epoch": 4.973769825132168, "grad_norm": 0.2896516025066376, "learning_rate": 8.383106087250791e-10, "loss": 0.2797, "step": 48922 }, { "epoch": 4.973871492476617, "grad_norm": 0.2538783550262451, "learning_rate": 8.318248507732662e-10, "loss": 0.3001, "step": 48923 }, { "epoch": 4.973973159821066, "grad_norm": 0.2990718483924866, "learning_rate": 8.253642774430637e-10, "loss": 0.3134, "step": 48924 }, { "epoch": 4.974074827165515, "grad_norm": 0.2677641808986664, "learning_rate": 8.189288887661129e-10, "loss": 0.2911, "step": 48925 }, { "epoch": 4.974176494509964, "grad_norm": 0.27153274416923523, "learning_rate": 8.125186847757205e-10, "loss": 0.3151, "step": 48926 }, { "epoch": 4.9742781618544125, "grad_norm": 0.28923654556274414, "learning_rate": 8.06133665504083e-10, "loss": 0.2817, "step": 48927 }, { "epoch": 4.9743798291988615, "grad_norm": 0.2966999411582947, "learning_rate": 7.997738309828418e-10, "loss": 0.2949, "step": 48928 }, { "epoch": 4.97448149654331, "grad_norm": 0.3077397346496582, "learning_rate": 7.934391812441932e-10, "loss": 0.2926, "step": 48929 }, { "epoch": 4.974583163887759, "grad_norm": 0.31410226225852966, "learning_rate": 7.871297163203339e-10, "loss": 0.3273, "step": 48930 }, { "epoch": 4.974684831232208, "grad_norm": 0.2644476592540741, "learning_rate": 7.808454362429052e-10, "loss": 0.2835, "step": 48931 }, { "epoch": 4.974786498576657, "grad_norm": 0.27960625290870667, "learning_rate": 7.745863410441035e-10, "loss": 0.2997, "step": 48932 }, { "epoch": 4.974888165921106, "grad_norm": 0.2714118957519531, "learning_rate": 7.683524307544599e-10, "loss": 0.3047, "step": 48933 }, { "epoch": 4.974989833265555, "grad_norm": 0.2733590006828308, "learning_rate": 7.621437054056157e-10, "loss": 0.2926, "step": 48934 }, { "epoch": 4.975091500610004, "grad_norm": 0.26709359884262085, "learning_rate": 7.559601650292126e-10, "loss": 0.2945, "step": 48935 }, { "epoch": 4.975193167954453, "grad_norm": 0.27447959780693054, "learning_rate": 7.498018096568915e-10, "loss": 0.2963, "step": 48936 }, { "epoch": 4.975294835298902, "grad_norm": 0.28442642092704773, "learning_rate": 7.436686393186288e-10, "loss": 0.294, "step": 48937 }, { "epoch": 4.975396502643351, "grad_norm": 0.3046979308128357, "learning_rate": 7.375606540455105e-10, "loss": 0.2767, "step": 48938 }, { "epoch": 4.9754981699878, "grad_norm": 0.26761701703071594, "learning_rate": 7.314778538686229e-10, "loss": 0.2784, "step": 48939 }, { "epoch": 4.975599837332249, "grad_norm": 0.289029061794281, "learning_rate": 7.254202388190523e-10, "loss": 0.3116, "step": 48940 }, { "epoch": 4.975701504676698, "grad_norm": 0.2709636390209198, "learning_rate": 7.193878089262196e-10, "loss": 0.2792, "step": 48941 }, { "epoch": 4.975803172021147, "grad_norm": 0.28963321447372437, "learning_rate": 7.13380564221211e-10, "loss": 0.2921, "step": 48942 }, { "epoch": 4.975904839365596, "grad_norm": 0.26544448733329773, "learning_rate": 7.073985047345578e-10, "loss": 0.2749, "step": 48943 }, { "epoch": 4.9760065067100445, "grad_norm": 0.2686058282852173, "learning_rate": 7.014416304956806e-10, "loss": 0.3066, "step": 48944 }, { "epoch": 4.9761081740544935, "grad_norm": 0.2656649351119995, "learning_rate": 6.955099415351108e-10, "loss": 0.3299, "step": 48945 }, { "epoch": 4.976209841398942, "grad_norm": 0.26368218660354614, "learning_rate": 6.896034378822692e-10, "loss": 0.3155, "step": 48946 }, { "epoch": 4.976311508743391, "grad_norm": 0.3005828261375427, "learning_rate": 6.837221195676869e-10, "loss": 0.3307, "step": 48947 }, { "epoch": 4.97641317608784, "grad_norm": 0.27820342779159546, "learning_rate": 6.778659866202297e-10, "loss": 0.2947, "step": 48948 }, { "epoch": 4.976514843432289, "grad_norm": 0.26014944911003113, "learning_rate": 6.720350390693186e-10, "loss": 0.297, "step": 48949 }, { "epoch": 4.976616510776738, "grad_norm": 0.2967669367790222, "learning_rate": 6.662292769449297e-10, "loss": 0.2783, "step": 48950 }, { "epoch": 4.976718178121187, "grad_norm": 0.27600541710853577, "learning_rate": 6.604487002759286e-10, "loss": 0.2901, "step": 48951 }, { "epoch": 4.976819845465636, "grad_norm": 0.26752427220344543, "learning_rate": 6.546933090917362e-10, "loss": 0.2808, "step": 48952 }, { "epoch": 4.976921512810085, "grad_norm": 0.2618391513824463, "learning_rate": 6.489631034212185e-10, "loss": 0.2852, "step": 48953 }, { "epoch": 4.977023180154534, "grad_norm": 0.2669958472251892, "learning_rate": 6.432580832932411e-10, "loss": 0.2819, "step": 48954 }, { "epoch": 4.977124847498983, "grad_norm": 0.304451584815979, "learning_rate": 6.375782487366699e-10, "loss": 0.2716, "step": 48955 }, { "epoch": 4.977226514843432, "grad_norm": 0.28324219584465027, "learning_rate": 6.319235997792606e-10, "loss": 0.3146, "step": 48956 }, { "epoch": 4.977328182187881, "grad_norm": 0.2872169017791748, "learning_rate": 6.262941364509889e-10, "loss": 0.2716, "step": 48957 }, { "epoch": 4.977429849532331, "grad_norm": 0.25206124782562256, "learning_rate": 6.206898587790555e-10, "loss": 0.2834, "step": 48958 }, { "epoch": 4.9775315168767795, "grad_norm": 0.2716525197029114, "learning_rate": 6.151107667917711e-10, "loss": 0.2944, "step": 48959 }, { "epoch": 4.9776331842212285, "grad_norm": 0.3017112910747528, "learning_rate": 6.095568605174462e-10, "loss": 0.3119, "step": 48960 }, { "epoch": 4.977734851565677, "grad_norm": 0.28449028730392456, "learning_rate": 6.040281399849468e-10, "loss": 0.2752, "step": 48961 }, { "epoch": 4.977836518910126, "grad_norm": 0.2853670120239258, "learning_rate": 5.98524605220363e-10, "loss": 0.2864, "step": 48962 }, { "epoch": 4.977938186254575, "grad_norm": 0.2721683979034424, "learning_rate": 5.930462562531159e-10, "loss": 0.3025, "step": 48963 }, { "epoch": 4.978039853599024, "grad_norm": 0.2745330333709717, "learning_rate": 5.875930931098506e-10, "loss": 0.287, "step": 48964 }, { "epoch": 4.978141520943473, "grad_norm": 0.299505352973938, "learning_rate": 5.821651158183228e-10, "loss": 0.2855, "step": 48965 }, { "epoch": 4.978243188287922, "grad_norm": 0.313485711812973, "learning_rate": 5.767623244051778e-10, "loss": 0.2344, "step": 48966 }, { "epoch": 4.978344855632371, "grad_norm": 0.2773531377315521, "learning_rate": 5.713847188987265e-10, "loss": 0.2791, "step": 48967 }, { "epoch": 4.97844652297682, "grad_norm": 0.26090508699417114, "learning_rate": 5.66032299325614e-10, "loss": 0.2961, "step": 48968 }, { "epoch": 4.978548190321269, "grad_norm": 0.2916257977485657, "learning_rate": 5.607050657130408e-10, "loss": 0.2939, "step": 48969 }, { "epoch": 4.978649857665718, "grad_norm": 0.3040848672389984, "learning_rate": 5.554030180870972e-10, "loss": 0.2989, "step": 48970 }, { "epoch": 4.978751525010167, "grad_norm": 0.26887762546539307, "learning_rate": 5.501261564755389e-10, "loss": 0.2864, "step": 48971 }, { "epoch": 4.978853192354616, "grad_norm": 0.2711952328681946, "learning_rate": 5.448744809039008e-10, "loss": 0.2613, "step": 48972 }, { "epoch": 4.978954859699065, "grad_norm": 0.2910950779914856, "learning_rate": 5.396479913993836e-10, "loss": 0.2906, "step": 48973 }, { "epoch": 4.979056527043514, "grad_norm": 0.28514808416366577, "learning_rate": 5.344466879880772e-10, "loss": 0.2871, "step": 48974 }, { "epoch": 4.979158194387963, "grad_norm": 0.2888358235359192, "learning_rate": 5.292705706960721e-10, "loss": 0.2813, "step": 48975 }, { "epoch": 4.9792598617324115, "grad_norm": 0.29810190200805664, "learning_rate": 5.241196395494585e-10, "loss": 0.2756, "step": 48976 }, { "epoch": 4.9793615290768605, "grad_norm": 0.28304025530815125, "learning_rate": 5.189938945743267e-10, "loss": 0.2901, "step": 48977 }, { "epoch": 4.979463196421309, "grad_norm": 0.26771238446235657, "learning_rate": 5.138933357962117e-10, "loss": 0.2838, "step": 48978 }, { "epoch": 4.979564863765758, "grad_norm": 0.273408979177475, "learning_rate": 5.088179632412039e-10, "loss": 0.2745, "step": 48979 }, { "epoch": 4.979666531110207, "grad_norm": 0.2812364101409912, "learning_rate": 5.037677769348381e-10, "loss": 0.2898, "step": 48980 }, { "epoch": 4.979768198454656, "grad_norm": 0.25902554392814636, "learning_rate": 4.987427769026499e-10, "loss": 0.2942, "step": 48981 }, { "epoch": 4.979869865799105, "grad_norm": 0.29448625445365906, "learning_rate": 4.937429631696188e-10, "loss": 0.288, "step": 48982 }, { "epoch": 4.979971533143554, "grad_norm": 0.29826048016548157, "learning_rate": 4.887683357607253e-10, "loss": 0.3046, "step": 48983 }, { "epoch": 4.980073200488003, "grad_norm": 0.27600014209747314, "learning_rate": 4.838188947015044e-10, "loss": 0.3109, "step": 48984 }, { "epoch": 4.980174867832452, "grad_norm": 0.27134689688682556, "learning_rate": 4.788946400169359e-10, "loss": 0.2979, "step": 48985 }, { "epoch": 4.980276535176901, "grad_norm": 0.30309659242630005, "learning_rate": 4.739955717314449e-10, "loss": 0.2902, "step": 48986 }, { "epoch": 4.98037820252135, "grad_norm": 0.25580835342407227, "learning_rate": 4.691216898694561e-10, "loss": 0.2623, "step": 48987 }, { "epoch": 4.980479869865799, "grad_norm": 0.29084470868110657, "learning_rate": 4.6427299445650495e-10, "loss": 0.3272, "step": 48988 }, { "epoch": 4.980581537210248, "grad_norm": 0.2915721833705902, "learning_rate": 4.5944948551646105e-10, "loss": 0.2922, "step": 48989 }, { "epoch": 4.980683204554697, "grad_norm": 0.2829657196998596, "learning_rate": 4.5465116307374936e-10, "loss": 0.2806, "step": 48990 }, { "epoch": 4.980784871899146, "grad_norm": 0.2776887118816376, "learning_rate": 4.498780271522396e-10, "loss": 0.2686, "step": 48991 }, { "epoch": 4.980886539243595, "grad_norm": 0.28141212463378906, "learning_rate": 4.4513007777580164e-10, "loss": 0.2934, "step": 48992 }, { "epoch": 4.9809882065880435, "grad_norm": 0.2918439507484436, "learning_rate": 4.404073149694155e-10, "loss": 0.3247, "step": 48993 }, { "epoch": 4.981089873932493, "grad_norm": 0.28045591711997986, "learning_rate": 4.3570973875584066e-10, "loss": 0.3316, "step": 48994 }, { "epoch": 4.981191541276942, "grad_norm": 0.2785465121269226, "learning_rate": 4.3103734915894703e-10, "loss": 0.2902, "step": 48995 }, { "epoch": 4.981293208621391, "grad_norm": 0.28388556838035583, "learning_rate": 4.2639014620260433e-10, "loss": 0.3232, "step": 48996 }, { "epoch": 4.98139487596584, "grad_norm": 0.27692434191703796, "learning_rate": 4.2176812991012727e-10, "loss": 0.2936, "step": 48997 }, { "epoch": 4.981496543310289, "grad_norm": 0.28774183988571167, "learning_rate": 4.1717130030427546e-10, "loss": 0.2895, "step": 48998 }, { "epoch": 4.981598210654738, "grad_norm": 0.28448259830474854, "learning_rate": 4.125996574089186e-10, "loss": 0.274, "step": 48999 }, { "epoch": 4.981699877999187, "grad_norm": 0.29461655020713806, "learning_rate": 4.0805320124681636e-10, "loss": 0.281, "step": 49000 }, { "epoch": 4.981801545343636, "grad_norm": 0.2841476500034332, "learning_rate": 4.0353193184072824e-10, "loss": 0.2943, "step": 49001 }, { "epoch": 4.981903212688085, "grad_norm": 0.27518317103385925, "learning_rate": 3.9903584921341387e-10, "loss": 0.2853, "step": 49002 }, { "epoch": 4.982004880032534, "grad_norm": 0.2958705723285675, "learning_rate": 3.945649533881879e-10, "loss": 0.2861, "step": 49003 }, { "epoch": 4.982106547376983, "grad_norm": 0.2803618311882019, "learning_rate": 3.901192443866997e-10, "loss": 0.2928, "step": 49004 }, { "epoch": 4.982208214721432, "grad_norm": 0.2735181152820587, "learning_rate": 3.856987222317088e-10, "loss": 0.2696, "step": 49005 }, { "epoch": 4.982309882065881, "grad_norm": 0.2739732563495636, "learning_rate": 3.8130338694541966e-10, "loss": 0.3505, "step": 49006 }, { "epoch": 4.98241154941033, "grad_norm": 0.2769717872142792, "learning_rate": 3.769332385505919e-10, "loss": 0.3146, "step": 49007 }, { "epoch": 4.9825132167547785, "grad_norm": 0.2722594738006592, "learning_rate": 3.7258827706776466e-10, "loss": 0.3005, "step": 49008 }, { "epoch": 4.9826148840992275, "grad_norm": 0.2666313052177429, "learning_rate": 3.682685025202526e-10, "loss": 0.3166, "step": 49009 }, { "epoch": 4.982716551443676, "grad_norm": 0.26959028840065, "learning_rate": 3.63973914929705e-10, "loss": 0.2935, "step": 49010 }, { "epoch": 4.982818218788125, "grad_norm": 0.2694343030452728, "learning_rate": 3.597045143166611e-10, "loss": 0.2939, "step": 49011 }, { "epoch": 4.982919886132574, "grad_norm": 0.2775460481643677, "learning_rate": 3.5546030070388037e-10, "loss": 0.3051, "step": 49012 }, { "epoch": 4.983021553477023, "grad_norm": 0.2854219675064087, "learning_rate": 3.51241274111902e-10, "loss": 0.2832, "step": 49013 }, { "epoch": 4.983123220821472, "grad_norm": 0.3044326901435852, "learning_rate": 3.470474345618202e-10, "loss": 0.2796, "step": 49014 }, { "epoch": 4.983224888165921, "grad_norm": 0.2688600718975067, "learning_rate": 3.428787820758395e-10, "loss": 0.3183, "step": 49015 }, { "epoch": 4.98332655551037, "grad_norm": 0.2863231599330902, "learning_rate": 3.3873531667449886e-10, "loss": 0.2836, "step": 49016 }, { "epoch": 4.983428222854819, "grad_norm": 0.2615041434764862, "learning_rate": 3.3461703837778247e-10, "loss": 0.2908, "step": 49017 }, { "epoch": 4.983529890199268, "grad_norm": 0.258293092250824, "learning_rate": 3.3052394720789473e-10, "loss": 0.319, "step": 49018 }, { "epoch": 4.983631557543717, "grad_norm": 0.30048495531082153, "learning_rate": 3.264560431842645e-10, "loss": 0.2909, "step": 49019 }, { "epoch": 4.983733224888166, "grad_norm": 0.27712780237197876, "learning_rate": 3.2241332632798605e-10, "loss": 0.3422, "step": 49020 }, { "epoch": 4.983834892232615, "grad_norm": 0.2526622712612152, "learning_rate": 3.183957966590434e-10, "loss": 0.295, "step": 49021 }, { "epoch": 4.983936559577064, "grad_norm": 0.2887810170650482, "learning_rate": 3.144034541985308e-10, "loss": 0.3044, "step": 49022 }, { "epoch": 4.984038226921513, "grad_norm": 0.28360116481781006, "learning_rate": 3.1043629896532204e-10, "loss": 0.2981, "step": 49023 }, { "epoch": 4.984139894265962, "grad_norm": 0.26018714904785156, "learning_rate": 3.064943309805113e-10, "loss": 0.3061, "step": 49024 }, { "epoch": 4.9842415616104105, "grad_norm": 0.28964531421661377, "learning_rate": 3.025775502629724e-10, "loss": 0.312, "step": 49025 }, { "epoch": 4.9843432289548595, "grad_norm": 0.29356029629707336, "learning_rate": 2.986859568332445e-10, "loss": 0.2748, "step": 49026 }, { "epoch": 4.984444896299308, "grad_norm": 0.2799297273159027, "learning_rate": 2.9481955071075653e-10, "loss": 0.3057, "step": 49027 }, { "epoch": 4.984546563643757, "grad_norm": 0.2875298261642456, "learning_rate": 2.9097833191438216e-10, "loss": 0.3548, "step": 49028 }, { "epoch": 4.984648230988206, "grad_norm": 0.2941751778125763, "learning_rate": 2.8716230046410553e-10, "loss": 0.2875, "step": 49029 }, { "epoch": 4.984749898332655, "grad_norm": 0.28051039576530457, "learning_rate": 2.8337145637935546e-10, "loss": 0.2752, "step": 49030 }, { "epoch": 4.984851565677104, "grad_norm": 0.29051727056503296, "learning_rate": 2.796057996784507e-10, "loss": 0.2678, "step": 49031 }, { "epoch": 4.984953233021553, "grad_norm": 0.297424852848053, "learning_rate": 2.758653303808201e-10, "loss": 0.2858, "step": 49032 }, { "epoch": 4.985054900366002, "grad_norm": 0.26033106446266174, "learning_rate": 2.721500485053374e-10, "loss": 0.3156, "step": 49033 }, { "epoch": 4.985156567710451, "grad_norm": 0.29143381118774414, "learning_rate": 2.6845995407087654e-10, "loss": 0.3135, "step": 49034 }, { "epoch": 4.9852582350549, "grad_norm": 0.2794761061668396, "learning_rate": 2.647950470957561e-10, "loss": 0.3097, "step": 49035 }, { "epoch": 4.985359902399349, "grad_norm": 0.2864450514316559, "learning_rate": 2.611553275982948e-10, "loss": 0.3073, "step": 49036 }, { "epoch": 4.985461569743798, "grad_norm": 0.2665882408618927, "learning_rate": 2.575407955968112e-10, "loss": 0.2888, "step": 49037 }, { "epoch": 4.985563237088247, "grad_norm": 0.29381483793258667, "learning_rate": 2.5395145111017925e-10, "loss": 0.2957, "step": 49038 }, { "epoch": 4.985664904432696, "grad_norm": 0.27352648973464966, "learning_rate": 2.5038729415560736e-10, "loss": 0.3089, "step": 49039 }, { "epoch": 4.9857665717771456, "grad_norm": 0.25133365392684937, "learning_rate": 2.468483247519693e-10, "loss": 0.2908, "step": 49040 }, { "epoch": 4.9858682391215945, "grad_norm": 0.2768755555152893, "learning_rate": 2.433345429159184e-10, "loss": 0.2963, "step": 49041 }, { "epoch": 4.985969906466043, "grad_norm": 0.28351259231567383, "learning_rate": 2.398459486663285e-10, "loss": 0.2716, "step": 49042 }, { "epoch": 4.986071573810492, "grad_norm": 0.27436116337776184, "learning_rate": 2.36382542020408e-10, "loss": 0.3061, "step": 49043 }, { "epoch": 4.986173241154941, "grad_norm": 0.28875571489334106, "learning_rate": 2.329443229948103e-10, "loss": 0.3167, "step": 49044 }, { "epoch": 4.98627490849939, "grad_norm": 0.27545860409736633, "learning_rate": 2.2953129160785404e-10, "loss": 0.322, "step": 49045 }, { "epoch": 4.986376575843839, "grad_norm": 0.2789401113986969, "learning_rate": 2.261434478767477e-10, "loss": 0.2865, "step": 49046 }, { "epoch": 4.986478243188288, "grad_norm": 0.25133591890335083, "learning_rate": 2.2278079181758948e-10, "loss": 0.2937, "step": 49047 }, { "epoch": 4.986579910532737, "grad_norm": 0.25328299403190613, "learning_rate": 2.1944332344814302e-10, "loss": 0.3197, "step": 49048 }, { "epoch": 4.986681577877186, "grad_norm": 0.28316861391067505, "learning_rate": 2.1613104278506159e-10, "loss": 0.3038, "step": 49049 }, { "epoch": 4.986783245221635, "grad_norm": 0.3012153208255768, "learning_rate": 2.1284394984499856e-10, "loss": 0.2832, "step": 49050 }, { "epoch": 4.986884912566084, "grad_norm": 0.2833634614944458, "learning_rate": 2.095820446446073e-10, "loss": 0.314, "step": 49051 }, { "epoch": 4.986986579910533, "grad_norm": 0.268197625875473, "learning_rate": 2.0634532719998602e-10, "loss": 0.3101, "step": 49052 }, { "epoch": 4.987088247254982, "grad_norm": 0.33872854709625244, "learning_rate": 2.0313379752778806e-10, "loss": 0.2989, "step": 49053 }, { "epoch": 4.987189914599431, "grad_norm": 0.2685549855232239, "learning_rate": 1.9994745564411166e-10, "loss": 0.3063, "step": 49054 }, { "epoch": 4.98729158194388, "grad_norm": 0.2663237750530243, "learning_rate": 1.9678630156505507e-10, "loss": 0.3003, "step": 49055 }, { "epoch": 4.987393249288329, "grad_norm": 0.26033952832221985, "learning_rate": 1.936503353061614e-10, "loss": 0.2782, "step": 49056 }, { "epoch": 4.9874949166327776, "grad_norm": 0.2932072877883911, "learning_rate": 1.9053955688352888e-10, "loss": 0.2559, "step": 49057 }, { "epoch": 4.9875965839772265, "grad_norm": 0.28068798780441284, "learning_rate": 1.8745396631325574e-10, "loss": 0.3067, "step": 49058 }, { "epoch": 4.987698251321675, "grad_norm": 0.2863645553588867, "learning_rate": 1.843935636097749e-10, "loss": 0.2899, "step": 49059 }, { "epoch": 4.987799918666124, "grad_norm": 0.27073565125465393, "learning_rate": 1.813583487897397e-10, "loss": 0.3348, "step": 49060 }, { "epoch": 4.987901586010573, "grad_norm": 0.29142579436302185, "learning_rate": 1.78348321867583e-10, "loss": 0.2997, "step": 49061 }, { "epoch": 4.988003253355022, "grad_norm": 0.29510733485221863, "learning_rate": 1.7536348285884796e-10, "loss": 0.294, "step": 49062 }, { "epoch": 4.988104920699471, "grad_norm": 0.284324586391449, "learning_rate": 1.724038317785226e-10, "loss": 0.2791, "step": 49063 }, { "epoch": 4.98820658804392, "grad_norm": 0.2688252031803131, "learning_rate": 1.6946936864159492e-10, "loss": 0.2867, "step": 49064 }, { "epoch": 4.988308255388369, "grad_norm": 0.2672334909439087, "learning_rate": 1.6656009346249779e-10, "loss": 0.3128, "step": 49065 }, { "epoch": 4.988409922732818, "grad_norm": 0.27467286586761475, "learning_rate": 1.6367600625677437e-10, "loss": 0.3014, "step": 49066 }, { "epoch": 4.988511590077267, "grad_norm": 0.27338409423828125, "learning_rate": 1.6081710703774734e-10, "loss": 0.3064, "step": 49067 }, { "epoch": 4.988613257421716, "grad_norm": 0.29442837834358215, "learning_rate": 1.5798339582040467e-10, "loss": 0.3119, "step": 49068 }, { "epoch": 4.988714924766165, "grad_norm": 0.2707790732383728, "learning_rate": 1.5517487261862417e-10, "loss": 0.3048, "step": 49069 }, { "epoch": 4.988816592110614, "grad_norm": 0.28187254071235657, "learning_rate": 1.5239153744739387e-10, "loss": 0.3098, "step": 49070 }, { "epoch": 4.988918259455063, "grad_norm": 0.2648199200630188, "learning_rate": 1.4963339032003642e-10, "loss": 0.3035, "step": 49071 }, { "epoch": 4.989019926799512, "grad_norm": 0.2747848331928253, "learning_rate": 1.4690043125098475e-10, "loss": 0.286, "step": 49072 }, { "epoch": 4.989121594143961, "grad_norm": 0.2646937966346741, "learning_rate": 1.441926602535615e-10, "loss": 0.288, "step": 49073 }, { "epoch": 4.9892232614884096, "grad_norm": 0.29188865423202515, "learning_rate": 1.4151007734164446e-10, "loss": 0.2819, "step": 49074 }, { "epoch": 4.989324928832859, "grad_norm": 0.28748658299446106, "learning_rate": 1.3885268252855632e-10, "loss": 0.257, "step": 49075 }, { "epoch": 4.989426596177308, "grad_norm": 0.27521154284477234, "learning_rate": 1.3622047582761976e-10, "loss": 0.3196, "step": 49076 }, { "epoch": 4.989528263521757, "grad_norm": 0.27286601066589355, "learning_rate": 1.3361345725271259e-10, "loss": 0.3248, "step": 49077 }, { "epoch": 4.989629930866206, "grad_norm": 0.3017440736293793, "learning_rate": 1.310316268160472e-10, "loss": 0.2505, "step": 49078 }, { "epoch": 4.989731598210655, "grad_norm": 0.28523188829421997, "learning_rate": 1.2847498453150142e-10, "loss": 0.2857, "step": 49079 }, { "epoch": 4.989833265555104, "grad_norm": 0.27714988589286804, "learning_rate": 1.259435304112877e-10, "loss": 0.3437, "step": 49080 }, { "epoch": 4.989934932899553, "grad_norm": 0.29385629296302795, "learning_rate": 1.2343726446817362e-10, "loss": 0.2782, "step": 49081 }, { "epoch": 4.990036600244002, "grad_norm": 0.2586016356945038, "learning_rate": 1.209561867154818e-10, "loss": 0.2578, "step": 49082 }, { "epoch": 4.990138267588451, "grad_norm": 0.28368130326271057, "learning_rate": 1.1850029716486965e-10, "loss": 0.2783, "step": 49083 }, { "epoch": 4.9902399349329, "grad_norm": 0.2901467978954315, "learning_rate": 1.1606959582910471e-10, "loss": 0.291, "step": 49084 }, { "epoch": 4.990341602277349, "grad_norm": 0.27434131503105164, "learning_rate": 1.1366408272095453e-10, "loss": 0.3021, "step": 49085 }, { "epoch": 4.990443269621798, "grad_norm": 0.2554137110710144, "learning_rate": 1.1128375785152135e-10, "loss": 0.2932, "step": 49086 }, { "epoch": 4.990544936966247, "grad_norm": 0.28177309036254883, "learning_rate": 1.0892862123357273e-10, "loss": 0.306, "step": 49087 }, { "epoch": 4.990646604310696, "grad_norm": 0.27209988236427307, "learning_rate": 1.065986728782109e-10, "loss": 0.2727, "step": 49088 }, { "epoch": 4.990748271655145, "grad_norm": 0.267259806394577, "learning_rate": 1.0429391279820345e-10, "loss": 0.283, "step": 49089 }, { "epoch": 4.9908499389995935, "grad_norm": 0.2870759665966034, "learning_rate": 1.0201434100409746e-10, "loss": 0.2776, "step": 49090 }, { "epoch": 4.9909516063440424, "grad_norm": 0.28456056118011475, "learning_rate": 9.97599575081054e-11, "loss": 0.2878, "step": 49091 }, { "epoch": 4.991053273688491, "grad_norm": 0.2843035161495209, "learning_rate": 9.753076232188463e-11, "loss": 0.309, "step": 49092 }, { "epoch": 4.99115494103294, "grad_norm": 0.2703826129436493, "learning_rate": 9.532675545542714e-11, "loss": 0.3118, "step": 49093 }, { "epoch": 4.991256608377389, "grad_norm": 0.2845623195171356, "learning_rate": 9.314793692094537e-11, "loss": 0.286, "step": 49094 }, { "epoch": 4.991358275721838, "grad_norm": 0.2845698893070221, "learning_rate": 9.099430672898646e-11, "loss": 0.2723, "step": 49095 }, { "epoch": 4.991459943066287, "grad_norm": 0.2753603160381317, "learning_rate": 8.886586489009752e-11, "loss": 0.2962, "step": 49096 }, { "epoch": 4.991561610410736, "grad_norm": 0.29620081186294556, "learning_rate": 8.676261141538078e-11, "loss": 0.307, "step": 49097 }, { "epoch": 4.991663277755185, "grad_norm": 0.29372262954711914, "learning_rate": 8.468454631593848e-11, "loss": 0.2925, "step": 49098 }, { "epoch": 4.991764945099634, "grad_norm": 0.27697572112083435, "learning_rate": 8.26316696012075e-11, "loss": 0.2919, "step": 49099 }, { "epoch": 4.991866612444083, "grad_norm": 0.2756202220916748, "learning_rate": 8.060398128229008e-11, "loss": 0.3128, "step": 49100 }, { "epoch": 4.991968279788532, "grad_norm": 0.29682350158691406, "learning_rate": 7.860148136862311e-11, "loss": 0.3283, "step": 49101 }, { "epoch": 4.992069947132981, "grad_norm": 0.3040023148059845, "learning_rate": 7.662416987130882e-11, "loss": 0.2979, "step": 49102 }, { "epoch": 4.99217161447743, "grad_norm": 0.298639178276062, "learning_rate": 7.467204679978412e-11, "loss": 0.3009, "step": 49103 }, { "epoch": 4.992273281821879, "grad_norm": 0.281585693359375, "learning_rate": 7.274511216348589e-11, "loss": 0.3101, "step": 49104 }, { "epoch": 4.992374949166328, "grad_norm": 0.26406118273735046, "learning_rate": 7.084336597296126e-11, "loss": 0.288, "step": 49105 }, { "epoch": 4.992476616510777, "grad_norm": 0.3051893711090088, "learning_rate": 6.896680823709201e-11, "loss": 0.3085, "step": 49106 }, { "epoch": 4.9925782838552255, "grad_norm": 0.2858383357524872, "learning_rate": 6.711543896587013e-11, "loss": 0.2728, "step": 49107 }, { "epoch": 4.9926799511996744, "grad_norm": 0.27722668647766113, "learning_rate": 6.528925816817744e-11, "loss": 0.2946, "step": 49108 }, { "epoch": 4.992781618544123, "grad_norm": 0.2753016948699951, "learning_rate": 6.34882658534508e-11, "loss": 0.2915, "step": 49109 }, { "epoch": 4.992883285888572, "grad_norm": 0.2727154791355133, "learning_rate": 6.171246203057201e-11, "loss": 0.3175, "step": 49110 }, { "epoch": 4.992984953233021, "grad_norm": 0.29923707246780396, "learning_rate": 5.996184670897797e-11, "loss": 0.2926, "step": 49111 }, { "epoch": 4.99308662057747, "grad_norm": 0.2742918133735657, "learning_rate": 5.823641989699536e-11, "loss": 0.339, "step": 49112 }, { "epoch": 4.993188287921919, "grad_norm": 0.25591981410980225, "learning_rate": 5.6536181602950823e-11, "loss": 0.3132, "step": 49113 }, { "epoch": 4.993289955266368, "grad_norm": 0.28011977672576904, "learning_rate": 5.486113183628128e-11, "loss": 0.2939, "step": 49114 }, { "epoch": 4.993391622610817, "grad_norm": 0.26680442690849304, "learning_rate": 5.3211270605313394e-11, "loss": 0.3357, "step": 49115 }, { "epoch": 4.993493289955266, "grad_norm": 0.2828132212162018, "learning_rate": 5.158659791781873e-11, "loss": 0.3049, "step": 49116 }, { "epoch": 4.993594957299715, "grad_norm": 0.26386651396751404, "learning_rate": 4.998711378212395e-11, "loss": 0.2855, "step": 49117 }, { "epoch": 4.993696624644164, "grad_norm": 0.30231913924217224, "learning_rate": 4.8412818206555745e-11, "loss": 0.2826, "step": 49118 }, { "epoch": 4.993798291988613, "grad_norm": 0.302788108587265, "learning_rate": 4.686371119888566e-11, "loss": 0.2717, "step": 49119 }, { "epoch": 4.993899959333062, "grad_norm": 0.28646573424339294, "learning_rate": 4.533979276688527e-11, "loss": 0.3051, "step": 49120 }, { "epoch": 4.994001626677511, "grad_norm": 0.28182071447372437, "learning_rate": 4.384106291888124e-11, "loss": 0.3027, "step": 49121 }, { "epoch": 4.9941032940219605, "grad_norm": 0.27288615703582764, "learning_rate": 4.2367521660979796e-11, "loss": 0.276, "step": 49122 }, { "epoch": 4.9942049613664095, "grad_norm": 0.29327353835105896, "learning_rate": 4.091916900206272e-11, "loss": 0.2957, "step": 49123 }, { "epoch": 4.994306628710858, "grad_norm": 0.28398290276527405, "learning_rate": 3.9496004948236246e-11, "loss": 0.3276, "step": 49124 }, { "epoch": 4.994408296055307, "grad_norm": 0.26606521010398865, "learning_rate": 3.809802950782704e-11, "loss": 0.2974, "step": 49125 }, { "epoch": 4.994509963399756, "grad_norm": 0.28878554701805115, "learning_rate": 3.6725242686941334e-11, "loss": 0.2864, "step": 49126 }, { "epoch": 4.994611630744205, "grad_norm": 0.288213849067688, "learning_rate": 3.537764449335068e-11, "loss": 0.3034, "step": 49127 }, { "epoch": 4.994713298088654, "grad_norm": 0.27106818556785583, "learning_rate": 3.405523493260621e-11, "loss": 0.3113, "step": 49128 }, { "epoch": 4.994814965433103, "grad_norm": 0.2732320725917816, "learning_rate": 3.275801401303458e-11, "loss": 0.2879, "step": 49129 }, { "epoch": 4.994916632777552, "grad_norm": 0.2610689699649811, "learning_rate": 3.1485981739631796e-11, "loss": 0.2948, "step": 49130 }, { "epoch": 4.995018300122001, "grad_norm": 0.2739568054676056, "learning_rate": 3.023913811961432e-11, "loss": 0.3077, "step": 49131 }, { "epoch": 4.99511996746645, "grad_norm": 0.2662091851234436, "learning_rate": 2.9017483158533256e-11, "loss": 0.2794, "step": 49132 }, { "epoch": 4.995221634810899, "grad_norm": 0.2820564806461334, "learning_rate": 2.7821016863605054e-11, "loss": 0.3125, "step": 49133 }, { "epoch": 4.995323302155348, "grad_norm": 0.2764141857624054, "learning_rate": 2.6649739240380836e-11, "loss": 0.2936, "step": 49134 }, { "epoch": 4.995424969499797, "grad_norm": 0.29618018865585327, "learning_rate": 2.550365029441171e-11, "loss": 0.3044, "step": 49135 }, { "epoch": 4.995526636844246, "grad_norm": 0.2683315575122833, "learning_rate": 2.4382750032359016e-11, "loss": 0.3076, "step": 49136 }, { "epoch": 4.995628304188695, "grad_norm": 0.27286335825920105, "learning_rate": 2.3287038458663647e-11, "loss": 0.2929, "step": 49137 }, { "epoch": 4.995729971533144, "grad_norm": 0.28359857201576233, "learning_rate": 2.221651557998694e-11, "loss": 0.2973, "step": 49138 }, { "epoch": 4.9958316388775925, "grad_norm": 0.2660669982433319, "learning_rate": 2.117118140076979e-11, "loss": 0.2658, "step": 49139 }, { "epoch": 4.9959333062220415, "grad_norm": 0.29014453291893005, "learning_rate": 2.015103592711842e-11, "loss": 0.3038, "step": 49140 }, { "epoch": 4.99603497356649, "grad_norm": 0.28406330943107605, "learning_rate": 1.915607916347373e-11, "loss": 0.3001, "step": 49141 }, { "epoch": 4.996136640910939, "grad_norm": 0.27184489369392395, "learning_rate": 1.8186311115386823e-11, "loss": 0.2963, "step": 49142 }, { "epoch": 4.996238308255388, "grad_norm": 0.26849037408828735, "learning_rate": 1.7241731787298598e-11, "loss": 0.2922, "step": 49143 }, { "epoch": 4.996339975599837, "grad_norm": 0.2912749946117401, "learning_rate": 1.632234118420506e-11, "loss": 0.3059, "step": 49144 }, { "epoch": 4.996441642944286, "grad_norm": 0.26760056614875793, "learning_rate": 1.5428139310547096e-11, "loss": 0.3098, "step": 49145 }, { "epoch": 4.996543310288735, "grad_norm": 0.2570796310901642, "learning_rate": 1.4559126170765604e-11, "loss": 0.2854, "step": 49146 }, { "epoch": 4.996644977633184, "grad_norm": 0.28807008266448975, "learning_rate": 1.3715301769856581e-11, "loss": 0.3165, "step": 49147 }, { "epoch": 4.996746644977633, "grad_norm": 0.2615559995174408, "learning_rate": 1.2896666111705814e-11, "loss": 0.3168, "step": 49148 }, { "epoch": 4.996848312322082, "grad_norm": 0.29656392335891724, "learning_rate": 1.2103219200199078e-11, "loss": 0.2786, "step": 49149 }, { "epoch": 4.996949979666531, "grad_norm": 0.2623136341571808, "learning_rate": 1.133496103977727e-11, "loss": 0.2771, "step": 49150 }, { "epoch": 4.99705164701098, "grad_norm": 0.2584652900695801, "learning_rate": 1.0591891634326169e-11, "loss": 0.2978, "step": 49151 }, { "epoch": 4.997153314355429, "grad_norm": 0.2981538772583008, "learning_rate": 9.87401098662133e-12, "loss": 0.2967, "step": 49152 }, { "epoch": 4.997254981699878, "grad_norm": 0.27038639783859253, "learning_rate": 9.181319101658759e-12, "loss": 0.2845, "step": 49153 }, { "epoch": 4.997356649044327, "grad_norm": 0.27353647351264954, "learning_rate": 8.513815981658902e-12, "loss": 0.2764, "step": 49154 }, { "epoch": 4.997458316388776, "grad_norm": 0.29429203271865845, "learning_rate": 7.87150163106265e-12, "loss": 0.2982, "step": 49155 }, { "epoch": 4.9975599837332245, "grad_norm": 0.30087801814079285, "learning_rate": 7.25437605209045e-12, "loss": 0.3158, "step": 49156 }, { "epoch": 4.997661651077674, "grad_norm": 0.27969396114349365, "learning_rate": 6.662439249183194e-12, "loss": 0.3055, "step": 49157 }, { "epoch": 4.997763318422123, "grad_norm": 0.3026981055736542, "learning_rate": 6.095691224006217e-12, "loss": 0.3222, "step": 49158 }, { "epoch": 4.997864985766572, "grad_norm": 0.2702493965625763, "learning_rate": 5.554131979890187e-12, "loss": 0.3136, "step": 49159 }, { "epoch": 4.997966653111021, "grad_norm": 0.271762877702713, "learning_rate": 5.037761519610662e-12, "loss": 0.3079, "step": 49160 }, { "epoch": 4.99806832045547, "grad_norm": 0.2613036632537842, "learning_rate": 4.5465798459432e-12, "loss": 0.3147, "step": 49161 }, { "epoch": 4.998169987799919, "grad_norm": 0.30384954810142517, "learning_rate": 4.080586961108246e-12, "loss": 0.3087, "step": 49162 }, { "epoch": 4.998271655144368, "grad_norm": 0.27899035811424255, "learning_rate": 3.639782867326247e-12, "loss": 0.2822, "step": 49163 }, { "epoch": 4.998373322488817, "grad_norm": 0.28527021408081055, "learning_rate": 3.22416756737276e-12, "loss": 0.3163, "step": 49164 }, { "epoch": 4.998474989833266, "grad_norm": 0.3047986626625061, "learning_rate": 2.83374106291312e-12, "loss": 0.2742, "step": 49165 }, { "epoch": 4.998576657177715, "grad_norm": 0.3030051589012146, "learning_rate": 2.468503356167773e-12, "loss": 0.2648, "step": 49166 }, { "epoch": 4.998678324522164, "grad_norm": 0.30397269129753113, "learning_rate": 2.1284544482469418e-12, "loss": 0.3035, "step": 49167 }, { "epoch": 4.998779991866613, "grad_norm": 0.28959041833877563, "learning_rate": 1.8135943419261837e-12, "loss": 0.3075, "step": 49168 }, { "epoch": 4.998881659211062, "grad_norm": 0.28204068541526794, "learning_rate": 1.5239230383157222e-12, "loss": 0.3097, "step": 49169 }, { "epoch": 4.998983326555511, "grad_norm": 0.2762725055217743, "learning_rate": 1.2594405385257801e-12, "loss": 0.3039, "step": 49170 }, { "epoch": 4.9990849938999595, "grad_norm": 0.269182413816452, "learning_rate": 1.0201468442216921e-12, "loss": 0.277, "step": 49171 }, { "epoch": 4.9991866612444085, "grad_norm": 0.26645347476005554, "learning_rate": 8.060419570687928e-13, "loss": 0.2814, "step": 49172 }, { "epoch": 4.999288328588857, "grad_norm": 0.3016573190689087, "learning_rate": 6.171258770670818e-13, "loss": 0.282, "step": 49173 }, { "epoch": 4.999389995933306, "grad_norm": 0.2679661214351654, "learning_rate": 4.53398605881894e-13, "loss": 0.2936, "step": 49174 }, { "epoch": 4.999491663277755, "grad_norm": 0.300398051738739, "learning_rate": 3.1486014462345226e-13, "loss": 0.2694, "step": 49175 }, { "epoch": 4.999593330622204, "grad_norm": 0.2699708044528961, "learning_rate": 2.0151049329175666e-13, "loss": 0.281, "step": 49176 }, { "epoch": 4.999694997966653, "grad_norm": 0.2677386701107025, "learning_rate": 1.1334965299703016e-13, "loss": 0.2733, "step": 49177 }, { "epoch": 4.999796665311102, "grad_norm": 0.2724464237689972, "learning_rate": 5.0377623739272794e-14, "loss": 0.3069, "step": 49178 }, { "epoch": 4.999898332655551, "grad_norm": 0.2993130385875702, "learning_rate": 1.2594406073596078e-14, "loss": 0.3, "step": 49179 }, { "epoch": 5.0, "grad_norm": 0.27815431356430054, "learning_rate": 0.0, "loss": 0.3111, "step": 49180 }, { "epoch": 5.0, "step": 49180, "total_flos": 4.724693395426509e+16, "train_loss": 0.34488780698358357, "train_runtime": 644246.8308, "train_samples_per_second": 7.328, "train_steps_per_second": 0.076 } ], "logging_steps": 1.0, "max_steps": 49180, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.724693395426509e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }