| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.046646278355835305, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 2.332313917791765e-05, |
| "grad_norm": 1.6235620975494385, |
| "learning_rate": 5.182689816014512e-09, |
| "loss": 1.9275, |
| "step": 1 |
| }, |
| { |
| "epoch": 4.66462783558353e-05, |
| "grad_norm": 1.5710082054138184, |
| "learning_rate": 1.0365379632029025e-08, |
| "loss": 1.5593, |
| "step": 2 |
| }, |
| { |
| "epoch": 6.996941753375295e-05, |
| "grad_norm": 2.3231985569000244, |
| "learning_rate": 1.5548069448043534e-08, |
| "loss": 2.0021, |
| "step": 3 |
| }, |
| { |
| "epoch": 9.32925567116706e-05, |
| "grad_norm": 1.8349288702011108, |
| "learning_rate": 2.073075926405805e-08, |
| "loss": 2.1141, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00011661569588958826, |
| "grad_norm": 2.039928436279297, |
| "learning_rate": 2.5913449080072562e-08, |
| "loss": 1.9361, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0001399388350675059, |
| "grad_norm": 1.8988783359527588, |
| "learning_rate": 3.109613889608707e-08, |
| "loss": 2.2441, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00016326197424542356, |
| "grad_norm": 1.4865813255310059, |
| "learning_rate": 3.6278828712101586e-08, |
| "loss": 1.8118, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0001865851134233412, |
| "grad_norm": 1.4033368825912476, |
| "learning_rate": 4.14615185281161e-08, |
| "loss": 1.8838, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.00020990825260125886, |
| "grad_norm": 1.876894235610962, |
| "learning_rate": 4.6644208344130604e-08, |
| "loss": 1.9916, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00023323139177917651, |
| "grad_norm": 2.4104366302490234, |
| "learning_rate": 5.1826898160145123e-08, |
| "loss": 1.8618, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0002565545309570942, |
| "grad_norm": 1.8457229137420654, |
| "learning_rate": 5.700958797615963e-08, |
| "loss": 1.7303, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0002798776701350118, |
| "grad_norm": 1.940317988395691, |
| "learning_rate": 6.219227779217413e-08, |
| "loss": 2.2692, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0003032008093129295, |
| "grad_norm": 2.455432891845703, |
| "learning_rate": 6.737496760818865e-08, |
| "loss": 2.3401, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0003265239484908471, |
| "grad_norm": 1.5163850784301758, |
| "learning_rate": 7.255765742420317e-08, |
| "loss": 2.1687, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0003498470876687648, |
| "grad_norm": 1.3012642860412598, |
| "learning_rate": 7.774034724021768e-08, |
| "loss": 1.8693, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0003731702268466824, |
| "grad_norm": 2.0896522998809814, |
| "learning_rate": 8.29230370562322e-08, |
| "loss": 1.7031, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0003964933660246001, |
| "grad_norm": 1.7818728685379028, |
| "learning_rate": 8.810572687224672e-08, |
| "loss": 2.0829, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0004198165052025177, |
| "grad_norm": 2.569828510284424, |
| "learning_rate": 9.328841668826121e-08, |
| "loss": 1.8998, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0004431396443804354, |
| "grad_norm": 1.4619100093841553, |
| "learning_rate": 9.847110650427573e-08, |
| "loss": 1.5964, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.00046646278355835303, |
| "grad_norm": 1.9832793474197388, |
| "learning_rate": 1.0365379632029025e-07, |
| "loss": 1.9292, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0004897859227362707, |
| "grad_norm": 2.0182175636291504, |
| "learning_rate": 1.0883648613630475e-07, |
| "loss": 2.0115, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0005131090619141884, |
| "grad_norm": 1.4642307758331299, |
| "learning_rate": 1.1401917595231926e-07, |
| "loss": 2.0291, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.000536432201092106, |
| "grad_norm": 2.887909173965454, |
| "learning_rate": 1.1920186576833378e-07, |
| "loss": 2.1946, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0005597553402700236, |
| "grad_norm": 1.595544457435608, |
| "learning_rate": 1.2438455558434827e-07, |
| "loss": 2.0246, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0005830784794479413, |
| "grad_norm": 1.5648566484451294, |
| "learning_rate": 1.295672454003628e-07, |
| "loss": 2.1832, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.000606401618625859, |
| "grad_norm": 1.4702372550964355, |
| "learning_rate": 1.347499352163773e-07, |
| "loss": 1.6395, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0006297247578037766, |
| "grad_norm": 1.7178195714950562, |
| "learning_rate": 1.399326250323918e-07, |
| "loss": 1.6264, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0006530478969816942, |
| "grad_norm": 2.1751515865325928, |
| "learning_rate": 1.4511531484840635e-07, |
| "loss": 2.511, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0006763710361596119, |
| "grad_norm": 2.9443299770355225, |
| "learning_rate": 1.5029800466442085e-07, |
| "loss": 2.229, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0006996941753375296, |
| "grad_norm": 1.8316481113433838, |
| "learning_rate": 1.5548069448043536e-07, |
| "loss": 1.8414, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0007230173145154472, |
| "grad_norm": 1.9659239053726196, |
| "learning_rate": 1.6066338429644986e-07, |
| "loss": 2.0109, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0007463404536933648, |
| "grad_norm": 2.1653449535369873, |
| "learning_rate": 1.658460741124644e-07, |
| "loss": 2.0155, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0007696635928712825, |
| "grad_norm": 1.8755710124969482, |
| "learning_rate": 1.710287639284789e-07, |
| "loss": 2.1105, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0007929867320492002, |
| "grad_norm": 1.5989196300506592, |
| "learning_rate": 1.7621145374449343e-07, |
| "loss": 2.1583, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0008163098712271178, |
| "grad_norm": 1.865307331085205, |
| "learning_rate": 1.813941435605079e-07, |
| "loss": 2.001, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0008396330104050355, |
| "grad_norm": 1.4584789276123047, |
| "learning_rate": 1.8657683337652242e-07, |
| "loss": 1.8854, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0008629561495829531, |
| "grad_norm": 2.6818912029266357, |
| "learning_rate": 1.9175952319253695e-07, |
| "loss": 2.1888, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0008862792887608708, |
| "grad_norm": 2.17561674118042, |
| "learning_rate": 1.9694221300855146e-07, |
| "loss": 1.9616, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0009096024279387884, |
| "grad_norm": 1.252475619316101, |
| "learning_rate": 2.02124902824566e-07, |
| "loss": 1.9585, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0009329255671167061, |
| "grad_norm": 1.884366750717163, |
| "learning_rate": 2.073075926405805e-07, |
| "loss": 2.2436, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0009562487062946237, |
| "grad_norm": 1.4951350688934326, |
| "learning_rate": 2.1249028245659497e-07, |
| "loss": 1.7149, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0009795718454725414, |
| "grad_norm": 1.891728162765503, |
| "learning_rate": 2.176729722726095e-07, |
| "loss": 2.0472, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.001002894984650459, |
| "grad_norm": 1.8992432355880737, |
| "learning_rate": 2.22855662088624e-07, |
| "loss": 2.1471, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0010262181238283768, |
| "grad_norm": 1.3931283950805664, |
| "learning_rate": 2.2803835190463852e-07, |
| "loss": 1.5292, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0010495412630062942, |
| "grad_norm": 1.8894548416137695, |
| "learning_rate": 2.3322104172065305e-07, |
| "loss": 1.7759, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.001072864402184212, |
| "grad_norm": 1.592050552368164, |
| "learning_rate": 2.3840373153666755e-07, |
| "loss": 2.2498, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0010961875413621296, |
| "grad_norm": 1.3746178150177002, |
| "learning_rate": 2.4358642135268203e-07, |
| "loss": 1.8503, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0011195106805400473, |
| "grad_norm": 2.0268595218658447, |
| "learning_rate": 2.4876911116869654e-07, |
| "loss": 1.9358, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.001142833819717965, |
| "grad_norm": 1.7836228609085083, |
| "learning_rate": 2.539518009847111e-07, |
| "loss": 1.9855, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.0011661569588958826, |
| "grad_norm": 1.829447627067566, |
| "learning_rate": 2.591344908007256e-07, |
| "loss": 2.2802, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0011894800980738003, |
| "grad_norm": 2.2813496589660645, |
| "learning_rate": 2.643171806167401e-07, |
| "loss": 2.1593, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.001212803237251718, |
| "grad_norm": 3.019044876098633, |
| "learning_rate": 2.694998704327546e-07, |
| "loss": 1.9534, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0012361263764296354, |
| "grad_norm": 2.011425256729126, |
| "learning_rate": 2.746825602487691e-07, |
| "loss": 2.1284, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.0012594495156075531, |
| "grad_norm": 2.207106590270996, |
| "learning_rate": 2.798652500647836e-07, |
| "loss": 2.2427, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0012827726547854708, |
| "grad_norm": 1.3172473907470703, |
| "learning_rate": 2.8504793988079813e-07, |
| "loss": 1.9782, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0013060957939633885, |
| "grad_norm": 1.522895097732544, |
| "learning_rate": 2.902306296968127e-07, |
| "loss": 1.9455, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0013294189331413062, |
| "grad_norm": 2.657248020172119, |
| "learning_rate": 2.954133195128272e-07, |
| "loss": 1.959, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0013527420723192238, |
| "grad_norm": 1.9738789796829224, |
| "learning_rate": 3.005960093288417e-07, |
| "loss": 1.7878, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0013760652114971415, |
| "grad_norm": 1.5549254417419434, |
| "learning_rate": 3.057786991448562e-07, |
| "loss": 1.9405, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0013993883506750592, |
| "grad_norm": 2.9688899517059326, |
| "learning_rate": 3.109613889608707e-07, |
| "loss": 1.9969, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0014227114898529767, |
| "grad_norm": 1.4602586030960083, |
| "learning_rate": 3.1614407877688527e-07, |
| "loss": 1.9339, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0014460346290308943, |
| "grad_norm": 2.4017045497894287, |
| "learning_rate": 3.213267685928997e-07, |
| "loss": 2.0842, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.001469357768208812, |
| "grad_norm": 1.7433497905731201, |
| "learning_rate": 3.2650945840891423e-07, |
| "loss": 2.0223, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0014926809073867297, |
| "grad_norm": 1.7395591735839844, |
| "learning_rate": 3.316921482249288e-07, |
| "loss": 1.9257, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0015160040465646474, |
| "grad_norm": 1.8336257934570312, |
| "learning_rate": 3.3687483804094324e-07, |
| "loss": 1.948, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.001539327185742565, |
| "grad_norm": 1.6493985652923584, |
| "learning_rate": 3.420575278569578e-07, |
| "loss": 1.8672, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0015626503249204827, |
| "grad_norm": 1.5789337158203125, |
| "learning_rate": 3.472402176729723e-07, |
| "loss": 1.9446, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.0015859734640984004, |
| "grad_norm": 1.3755509853363037, |
| "learning_rate": 3.5242290748898686e-07, |
| "loss": 2.1796, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.001609296603276318, |
| "grad_norm": 1.7978087663650513, |
| "learning_rate": 3.576055973050013e-07, |
| "loss": 1.8974, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0016326197424542355, |
| "grad_norm": 1.8888216018676758, |
| "learning_rate": 3.627882871210158e-07, |
| "loss": 1.915, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0016559428816321532, |
| "grad_norm": 2.6150593757629395, |
| "learning_rate": 3.679709769370304e-07, |
| "loss": 2.2133, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.001679266020810071, |
| "grad_norm": 1.7009005546569824, |
| "learning_rate": 3.7315366675304483e-07, |
| "loss": 2.1024, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0017025891599879886, |
| "grad_norm": 1.741734266281128, |
| "learning_rate": 3.783363565690594e-07, |
| "loss": 2.1839, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.0017259122991659063, |
| "grad_norm": 2.7715041637420654, |
| "learning_rate": 3.835190463850739e-07, |
| "loss": 2.0734, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.001749235438343824, |
| "grad_norm": 1.9710502624511719, |
| "learning_rate": 3.8870173620108835e-07, |
| "loss": 2.18, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0017725585775217416, |
| "grad_norm": 2.077986478805542, |
| "learning_rate": 3.938844260171029e-07, |
| "loss": 2.1482, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0017958817166996593, |
| "grad_norm": 2.583721160888672, |
| "learning_rate": 3.990671158331174e-07, |
| "loss": 2.5364, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.0018192048558775768, |
| "grad_norm": 1.3425930738449097, |
| "learning_rate": 4.04249805649132e-07, |
| "loss": 1.8194, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0018425279950554944, |
| "grad_norm": 2.1111888885498047, |
| "learning_rate": 4.0943249546514643e-07, |
| "loss": 1.7878, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0018658511342334121, |
| "grad_norm": 2.0795626640319824, |
| "learning_rate": 4.14615185281161e-07, |
| "loss": 2.3006, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0018891742734113298, |
| "grad_norm": 1.273370623588562, |
| "learning_rate": 4.197978750971755e-07, |
| "loss": 1.7599, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0019124974125892475, |
| "grad_norm": 1.6202706098556519, |
| "learning_rate": 4.2498056491318994e-07, |
| "loss": 2.1727, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.0019358205517671651, |
| "grad_norm": 2.4593732357025146, |
| "learning_rate": 4.301632547292045e-07, |
| "loss": 2.4588, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.001959143690945083, |
| "grad_norm": 1.2617835998535156, |
| "learning_rate": 4.35345944545219e-07, |
| "loss": 1.9078, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0019824668301230003, |
| "grad_norm": 2.2640504837036133, |
| "learning_rate": 4.405286343612335e-07, |
| "loss": 1.8983, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.002005789969300918, |
| "grad_norm": 1.6804454326629639, |
| "learning_rate": 4.45711324177248e-07, |
| "loss": 2.1049, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0020291131084788356, |
| "grad_norm": 2.060009717941284, |
| "learning_rate": 4.5089401399326253e-07, |
| "loss": 2.0153, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0020524362476567535, |
| "grad_norm": 1.7166160345077515, |
| "learning_rate": 4.5607670380927703e-07, |
| "loss": 2.1093, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.002075759386834671, |
| "grad_norm": 1.6695979833602905, |
| "learning_rate": 4.6125939362529154e-07, |
| "loss": 1.8607, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0020990825260125885, |
| "grad_norm": 1.4339056015014648, |
| "learning_rate": 4.664420834413061e-07, |
| "loss": 2.2632, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0021224056651905064, |
| "grad_norm": 1.5228222608566284, |
| "learning_rate": 4.7162477325732055e-07, |
| "loss": 2.0851, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.002145728804368424, |
| "grad_norm": 1.540848731994629, |
| "learning_rate": 4.768074630733351e-07, |
| "loss": 2.1446, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.0021690519435463417, |
| "grad_norm": 1.480702519416809, |
| "learning_rate": 4.819901528893496e-07, |
| "loss": 2.0718, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.002192375082724259, |
| "grad_norm": 2.23518705368042, |
| "learning_rate": 4.871728427053641e-07, |
| "loss": 1.6198, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.002215698221902177, |
| "grad_norm": 1.6477755308151245, |
| "learning_rate": 4.923555325213786e-07, |
| "loss": 2.1136, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.0022390213610800945, |
| "grad_norm": 1.9548614025115967, |
| "learning_rate": 4.975382223373931e-07, |
| "loss": 1.9143, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0022623445002580124, |
| "grad_norm": 1.3557407855987549, |
| "learning_rate": 5.027209121534076e-07, |
| "loss": 2.0044, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.00228566763943593, |
| "grad_norm": 2.2781455516815186, |
| "learning_rate": 5.079036019694222e-07, |
| "loss": 1.7761, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.0023089907786138474, |
| "grad_norm": 2.1195600032806396, |
| "learning_rate": 5.130862917854368e-07, |
| "loss": 1.8174, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.0023323139177917653, |
| "grad_norm": 2.0798068046569824, |
| "learning_rate": 5.182689816014512e-07, |
| "loss": 2.1431, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0023556370569696827, |
| "grad_norm": 1.8773006200790405, |
| "learning_rate": 5.234516714174657e-07, |
| "loss": 1.5221, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.0023789601961476006, |
| "grad_norm": 1.7917876243591309, |
| "learning_rate": 5.286343612334802e-07, |
| "loss": 1.9383, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.002402283335325518, |
| "grad_norm": 1.4980329275131226, |
| "learning_rate": 5.338170510494947e-07, |
| "loss": 1.846, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.002425606474503436, |
| "grad_norm": 2.0081095695495605, |
| "learning_rate": 5.389997408655092e-07, |
| "loss": 1.8777, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.0024489296136813534, |
| "grad_norm": 1.525317907333374, |
| "learning_rate": 5.441824306815238e-07, |
| "loss": 1.971, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.002472252752859271, |
| "grad_norm": 1.4131786823272705, |
| "learning_rate": 5.493651204975382e-07, |
| "loss": 2.2224, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.002495575892037189, |
| "grad_norm": 1.164492130279541, |
| "learning_rate": 5.545478103135528e-07, |
| "loss": 1.8909, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.0025188990312151062, |
| "grad_norm": 1.9998016357421875, |
| "learning_rate": 5.597305001295673e-07, |
| "loss": 2.1197, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.002542222170393024, |
| "grad_norm": 1.6218236684799194, |
| "learning_rate": 5.649131899455818e-07, |
| "loss": 1.7799, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0025655453095709416, |
| "grad_norm": 1.535388708114624, |
| "learning_rate": 5.700958797615963e-07, |
| "loss": 1.7878, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0025888684487488595, |
| "grad_norm": 1.4929994344711304, |
| "learning_rate": 5.752785695776108e-07, |
| "loss": 2.0802, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.002612191587926777, |
| "grad_norm": 2.183293104171753, |
| "learning_rate": 5.804612593936254e-07, |
| "loss": 2.0506, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.002635514727104695, |
| "grad_norm": 1.6339191198349, |
| "learning_rate": 5.856439492096398e-07, |
| "loss": 1.7152, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0026588378662826123, |
| "grad_norm": 1.4886974096298218, |
| "learning_rate": 5.908266390256544e-07, |
| "loss": 1.8327, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.0026821610054605298, |
| "grad_norm": 1.4198302030563354, |
| "learning_rate": 5.960093288416688e-07, |
| "loss": 1.8342, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0027054841446384477, |
| "grad_norm": 2.041900157928467, |
| "learning_rate": 6.011920186576834e-07, |
| "loss": 1.9101, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.002728807283816365, |
| "grad_norm": 1.7576725482940674, |
| "learning_rate": 6.063747084736979e-07, |
| "loss": 2.3793, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.002752130422994283, |
| "grad_norm": 1.620440125465393, |
| "learning_rate": 6.115573982897124e-07, |
| "loss": 1.7363, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.0027754535621722005, |
| "grad_norm": 1.972102403640747, |
| "learning_rate": 6.16740088105727e-07, |
| "loss": 2.0338, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0027987767013501184, |
| "grad_norm": 1.5385342836380005, |
| "learning_rate": 6.219227779217414e-07, |
| "loss": 1.829, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.002822099840528036, |
| "grad_norm": 1.4439769983291626, |
| "learning_rate": 6.27105467737756e-07, |
| "loss": 1.9893, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.0028454229797059533, |
| "grad_norm": 1.5146026611328125, |
| "learning_rate": 6.322881575537705e-07, |
| "loss": 1.6563, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.002868746118883871, |
| "grad_norm": 1.7177401781082153, |
| "learning_rate": 6.374708473697849e-07, |
| "loss": 1.9483, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0028920692580617887, |
| "grad_norm": 2.484865188598633, |
| "learning_rate": 6.426535371857994e-07, |
| "loss": 2.0949, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.0029153923972397066, |
| "grad_norm": 1.5320651531219482, |
| "learning_rate": 6.47836227001814e-07, |
| "loss": 1.8557, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.002938715536417624, |
| "grad_norm": 1.3804417848587036, |
| "learning_rate": 6.530189168178285e-07, |
| "loss": 1.8733, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.002962038675595542, |
| "grad_norm": 2.0832831859588623, |
| "learning_rate": 6.58201606633843e-07, |
| "loss": 1.8556, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.0029853618147734594, |
| "grad_norm": 1.2582931518554688, |
| "learning_rate": 6.633842964498576e-07, |
| "loss": 2.1239, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0030086849539513773, |
| "grad_norm": 1.6449629068374634, |
| "learning_rate": 6.685669862658721e-07, |
| "loss": 2.1635, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0030320080931292947, |
| "grad_norm": 1.3350502252578735, |
| "learning_rate": 6.737496760818865e-07, |
| "loss": 1.801, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.003055331232307212, |
| "grad_norm": 1.7689651250839233, |
| "learning_rate": 6.78932365897901e-07, |
| "loss": 1.7541, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.00307865437148513, |
| "grad_norm": 1.4711276292800903, |
| "learning_rate": 6.841150557139156e-07, |
| "loss": 2.3916, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0031019775106630476, |
| "grad_norm": 1.2806516885757446, |
| "learning_rate": 6.892977455299301e-07, |
| "loss": 1.8609, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.0031253006498409655, |
| "grad_norm": 1.5531939268112183, |
| "learning_rate": 6.944804353459446e-07, |
| "loss": 1.7721, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.003148623789018883, |
| "grad_norm": 1.6541032791137695, |
| "learning_rate": 6.996631251619592e-07, |
| "loss": 2.1091, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.003171946928196801, |
| "grad_norm": 2.050734281539917, |
| "learning_rate": 7.048458149779737e-07, |
| "loss": 1.8932, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.0031952700673747183, |
| "grad_norm": 1.2903157472610474, |
| "learning_rate": 7.100285047939881e-07, |
| "loss": 2.0833, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.003218593206552636, |
| "grad_norm": 1.3316091299057007, |
| "learning_rate": 7.152111946100026e-07, |
| "loss": 1.9307, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0032419163457305536, |
| "grad_norm": 1.441341519355774, |
| "learning_rate": 7.203938844260172e-07, |
| "loss": 2.2529, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.003265239484908471, |
| "grad_norm": 2.159276008605957, |
| "learning_rate": 7.255765742420316e-07, |
| "loss": 1.847, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.003288562624086389, |
| "grad_norm": 1.8410853147506714, |
| "learning_rate": 7.307592640580462e-07, |
| "loss": 2.2465, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0033118857632643064, |
| "grad_norm": 1.8678739070892334, |
| "learning_rate": 7.359419538740608e-07, |
| "loss": 1.9261, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.0033352089024422243, |
| "grad_norm": 1.2097922563552856, |
| "learning_rate": 7.411246436900751e-07, |
| "loss": 2.0205, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.003358532041620142, |
| "grad_norm": 1.733077883720398, |
| "learning_rate": 7.463073335060897e-07, |
| "loss": 1.8389, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.0033818551807980597, |
| "grad_norm": 1.7118474245071411, |
| "learning_rate": 7.514900233221042e-07, |
| "loss": 1.9511, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.003405178319975977, |
| "grad_norm": 1.6960872411727905, |
| "learning_rate": 7.566727131381188e-07, |
| "loss": 1.8828, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.0034285014591538946, |
| "grad_norm": 1.2409390211105347, |
| "learning_rate": 7.618554029541332e-07, |
| "loss": 1.6878, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.0034518245983318125, |
| "grad_norm": 1.3440965414047241, |
| "learning_rate": 7.670380927701478e-07, |
| "loss": 1.64, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.00347514773750973, |
| "grad_norm": 1.539393663406372, |
| "learning_rate": 7.722207825861624e-07, |
| "loss": 1.6754, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.003498470876687648, |
| "grad_norm": 1.5395653247833252, |
| "learning_rate": 7.774034724021767e-07, |
| "loss": 1.9761, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0035217940158655653, |
| "grad_norm": 2.0169472694396973, |
| "learning_rate": 7.825861622181913e-07, |
| "loss": 1.6927, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0035451171550434832, |
| "grad_norm": 1.8776079416275024, |
| "learning_rate": 7.877688520342058e-07, |
| "loss": 1.9273, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.0035684402942214007, |
| "grad_norm": 2.078824043273926, |
| "learning_rate": 7.929515418502204e-07, |
| "loss": 1.6756, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.0035917634333993186, |
| "grad_norm": 1.407560110092163, |
| "learning_rate": 7.981342316662348e-07, |
| "loss": 1.6038, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.003615086572577236, |
| "grad_norm": 1.1770573854446411, |
| "learning_rate": 8.033169214822494e-07, |
| "loss": 1.6679, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.0036384097117551535, |
| "grad_norm": 1.2057602405548096, |
| "learning_rate": 8.08499611298264e-07, |
| "loss": 1.7916, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0036617328509330714, |
| "grad_norm": 1.117970585823059, |
| "learning_rate": 8.136823011142783e-07, |
| "loss": 1.7974, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.003685055990110989, |
| "grad_norm": 1.5996465682983398, |
| "learning_rate": 8.188649909302929e-07, |
| "loss": 1.6053, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.0037083791292889068, |
| "grad_norm": 1.4170929193496704, |
| "learning_rate": 8.240476807463074e-07, |
| "loss": 1.7155, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0037317022684668242, |
| "grad_norm": 1.8114391565322876, |
| "learning_rate": 8.29230370562322e-07, |
| "loss": 1.9192, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.003755025407644742, |
| "grad_norm": 1.3462793827056885, |
| "learning_rate": 8.344130603783364e-07, |
| "loss": 1.4624, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.0037783485468226596, |
| "grad_norm": 1.6305956840515137, |
| "learning_rate": 8.39595750194351e-07, |
| "loss": 1.8017, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.003801671686000577, |
| "grad_norm": 1.662576675415039, |
| "learning_rate": 8.447784400103655e-07, |
| "loss": 1.733, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.003824994825178495, |
| "grad_norm": 1.556788682937622, |
| "learning_rate": 8.499611298263799e-07, |
| "loss": 1.9586, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0038483179643564124, |
| "grad_norm": 1.5282272100448608, |
| "learning_rate": 8.551438196423944e-07, |
| "loss": 1.8254, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.0038716411035343303, |
| "grad_norm": 1.6790592670440674, |
| "learning_rate": 8.60326509458409e-07, |
| "loss": 2.1866, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.0038949642427122478, |
| "grad_norm": 1.5164263248443604, |
| "learning_rate": 8.655091992744236e-07, |
| "loss": 1.6651, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.003918287381890166, |
| "grad_norm": 1.5002336502075195, |
| "learning_rate": 8.70691889090438e-07, |
| "loss": 1.9295, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.0039416105210680836, |
| "grad_norm": 1.2122441530227661, |
| "learning_rate": 8.758745789064526e-07, |
| "loss": 1.761, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.003964933660246001, |
| "grad_norm": 1.637898564338684, |
| "learning_rate": 8.81057268722467e-07, |
| "loss": 1.8697, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0039882567994239185, |
| "grad_norm": 0.988777220249176, |
| "learning_rate": 8.862399585384815e-07, |
| "loss": 2.1249, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.004011579938601836, |
| "grad_norm": 1.8833587169647217, |
| "learning_rate": 8.91422648354496e-07, |
| "loss": 1.6915, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.004034903077779753, |
| "grad_norm": 1.8418108224868774, |
| "learning_rate": 8.966053381705106e-07, |
| "loss": 2.0019, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.004058226216957671, |
| "grad_norm": 1.6375901699066162, |
| "learning_rate": 9.017880279865251e-07, |
| "loss": 1.7625, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.004081549356135589, |
| "grad_norm": 1.8701720237731934, |
| "learning_rate": 9.069707178025396e-07, |
| "loss": 1.801, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.004104872495313507, |
| "grad_norm": 1.4488773345947266, |
| "learning_rate": 9.121534076185541e-07, |
| "loss": 1.9971, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.004128195634491424, |
| "grad_norm": 0.9587986469268799, |
| "learning_rate": 9.173360974345686e-07, |
| "loss": 1.6253, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.004151518773669342, |
| "grad_norm": 2.6533186435699463, |
| "learning_rate": 9.225187872505831e-07, |
| "loss": 1.572, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.00417484191284726, |
| "grad_norm": 2.4528841972351074, |
| "learning_rate": 9.277014770665976e-07, |
| "loss": 1.7586, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.004198165052025177, |
| "grad_norm": 1.1871824264526367, |
| "learning_rate": 9.328841668826122e-07, |
| "loss": 1.6765, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.004221488191203095, |
| "grad_norm": 1.1292660236358643, |
| "learning_rate": 9.380668566986266e-07, |
| "loss": 2.0673, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.004244811330381013, |
| "grad_norm": 1.3055285215377808, |
| "learning_rate": 9.432495465146411e-07, |
| "loss": 1.8103, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.004268134469558931, |
| "grad_norm": 1.5225868225097656, |
| "learning_rate": 9.484322363306557e-07, |
| "loss": 2.0813, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.004291457608736848, |
| "grad_norm": 1.2439767122268677, |
| "learning_rate": 9.536149261466702e-07, |
| "loss": 1.6919, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0043147807479147655, |
| "grad_norm": 1.2424002885818481, |
| "learning_rate": 9.587976159626847e-07, |
| "loss": 1.9506, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.0043381038870926834, |
| "grad_norm": 0.9796323776245117, |
| "learning_rate": 9.639803057786992e-07, |
| "loss": 1.7342, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.0043614270262706005, |
| "grad_norm": 1.2240192890167236, |
| "learning_rate": 9.691629955947138e-07, |
| "loss": 2.0646, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.004384750165448518, |
| "grad_norm": 0.8779449462890625, |
| "learning_rate": 9.743456854107281e-07, |
| "loss": 1.4535, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.004408073304626436, |
| "grad_norm": 1.3131407499313354, |
| "learning_rate": 9.795283752267427e-07, |
| "loss": 1.9817, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.004431396443804354, |
| "grad_norm": 1.3259912729263306, |
| "learning_rate": 9.847110650427573e-07, |
| "loss": 1.709, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.004454719582982271, |
| "grad_norm": 1.4236465692520142, |
| "learning_rate": 9.898937548587718e-07, |
| "loss": 1.7059, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.004478042722160189, |
| "grad_norm": 1.2791959047317505, |
| "learning_rate": 9.950764446747862e-07, |
| "loss": 1.9633, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.004501365861338107, |
| "grad_norm": 0.9857053160667419, |
| "learning_rate": 1.0002591344908007e-06, |
| "loss": 1.807, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.004524689000516025, |
| "grad_norm": 1.264302372932434, |
| "learning_rate": 1.0054418243068153e-06, |
| "loss": 1.5389, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.004548012139693942, |
| "grad_norm": 1.2205390930175781, |
| "learning_rate": 1.0106245141228298e-06, |
| "loss": 1.4549, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.00457133527887186, |
| "grad_norm": 1.055471420288086, |
| "learning_rate": 1.0158072039388444e-06, |
| "loss": 1.6931, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.004594658418049778, |
| "grad_norm": 1.0585546493530273, |
| "learning_rate": 1.020989893754859e-06, |
| "loss": 1.8054, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.004617981557227695, |
| "grad_norm": 2.16025972366333, |
| "learning_rate": 1.0261725835708735e-06, |
| "loss": 2.0077, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.004641304696405613, |
| "grad_norm": 2.125786781311035, |
| "learning_rate": 1.0313552733868879e-06, |
| "loss": 1.9117, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.0046646278355835305, |
| "grad_norm": 1.3560391664505005, |
| "learning_rate": 1.0365379632029024e-06, |
| "loss": 1.9871, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.004687950974761448, |
| "grad_norm": 1.3505181074142456, |
| "learning_rate": 1.041720653018917e-06, |
| "loss": 1.714, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.004711274113939365, |
| "grad_norm": 1.1724427938461304, |
| "learning_rate": 1.0469033428349313e-06, |
| "loss": 1.7611, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.004734597253117283, |
| "grad_norm": 1.1746799945831299, |
| "learning_rate": 1.0520860326509459e-06, |
| "loss": 1.867, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.004757920392295201, |
| "grad_norm": 1.0976382493972778, |
| "learning_rate": 1.0572687224669604e-06, |
| "loss": 1.808, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.004781243531473118, |
| "grad_norm": 1.3842298984527588, |
| "learning_rate": 1.062451412282975e-06, |
| "loss": 1.7973, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.004804566670651036, |
| "grad_norm": 1.6715288162231445, |
| "learning_rate": 1.0676341020989893e-06, |
| "loss": 1.9817, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.004827889809828954, |
| "grad_norm": 1.0734590291976929, |
| "learning_rate": 1.072816791915004e-06, |
| "loss": 1.4297, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.004851212949006872, |
| "grad_norm": 1.0182546377182007, |
| "learning_rate": 1.0779994817310185e-06, |
| "loss": 1.713, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.004874536088184789, |
| "grad_norm": 1.1884313821792603, |
| "learning_rate": 1.083182171547033e-06, |
| "loss": 1.5234, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.004897859227362707, |
| "grad_norm": 1.520266056060791, |
| "learning_rate": 1.0883648613630476e-06, |
| "loss": 2.0598, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.004921182366540625, |
| "grad_norm": 1.1709904670715332, |
| "learning_rate": 1.0935475511790621e-06, |
| "loss": 2.1461, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.004944505505718542, |
| "grad_norm": 1.2634027004241943, |
| "learning_rate": 1.0987302409950765e-06, |
| "loss": 1.5076, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.00496782864489646, |
| "grad_norm": 1.490717887878418, |
| "learning_rate": 1.103912930811091e-06, |
| "loss": 1.8628, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.004991151784074378, |
| "grad_norm": 2.077373743057251, |
| "learning_rate": 1.1090956206271056e-06, |
| "loss": 1.9295, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.0050144749232522955, |
| "grad_norm": 1.647877812385559, |
| "learning_rate": 1.1142783104431202e-06, |
| "loss": 1.7929, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.0050377980624302125, |
| "grad_norm": 1.1937353610992432, |
| "learning_rate": 1.1194610002591345e-06, |
| "loss": 1.6509, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.00506112120160813, |
| "grad_norm": 1.0805108547210693, |
| "learning_rate": 1.124643690075149e-06, |
| "loss": 1.6447, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.005084444340786048, |
| "grad_norm": 1.1077872514724731, |
| "learning_rate": 1.1298263798911636e-06, |
| "loss": 1.7675, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.005107767479963966, |
| "grad_norm": 0.8648241758346558, |
| "learning_rate": 1.135009069707178e-06, |
| "loss": 1.6687, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.005131090619141883, |
| "grad_norm": 1.0522700548171997, |
| "learning_rate": 1.1401917595231925e-06, |
| "loss": 1.2878, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.005154413758319801, |
| "grad_norm": 1.3021256923675537, |
| "learning_rate": 1.145374449339207e-06, |
| "loss": 1.8535, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.005177736897497719, |
| "grad_norm": 1.2912962436676025, |
| "learning_rate": 1.1505571391552216e-06, |
| "loss": 1.865, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.005201060036675636, |
| "grad_norm": 1.6733994483947754, |
| "learning_rate": 1.1557398289712362e-06, |
| "loss": 1.5748, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.005224383175853554, |
| "grad_norm": 1.0865724086761475, |
| "learning_rate": 1.1609225187872508e-06, |
| "loss": 1.8159, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.005247706315031472, |
| "grad_norm": 1.1498301029205322, |
| "learning_rate": 1.1661052086032653e-06, |
| "loss": 1.8579, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.00527102945420939, |
| "grad_norm": 1.9360573291778564, |
| "learning_rate": 1.1712878984192797e-06, |
| "loss": 1.7366, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.005294352593387307, |
| "grad_norm": 1.0133939981460571, |
| "learning_rate": 1.1764705882352942e-06, |
| "loss": 1.4571, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.005317675732565225, |
| "grad_norm": 1.6443811655044556, |
| "learning_rate": 1.1816532780513088e-06, |
| "loss": 1.5312, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.0053409988717431425, |
| "grad_norm": 1.1923338174819946, |
| "learning_rate": 1.1868359678673233e-06, |
| "loss": 1.6993, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.0053643220109210596, |
| "grad_norm": 1.0345349311828613, |
| "learning_rate": 1.1920186576833377e-06, |
| "loss": 1.5739, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0053876451500989775, |
| "grad_norm": 0.9833806753158569, |
| "learning_rate": 1.1972013474993522e-06, |
| "loss": 1.819, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.005410968289276895, |
| "grad_norm": 1.3315545320510864, |
| "learning_rate": 1.2023840373153668e-06, |
| "loss": 1.9472, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.005434291428454813, |
| "grad_norm": 1.0042314529418945, |
| "learning_rate": 1.2075667271313812e-06, |
| "loss": 1.993, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.00545761456763273, |
| "grad_norm": 1.2731118202209473, |
| "learning_rate": 1.2127494169473957e-06, |
| "loss": 1.6763, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.005480937706810648, |
| "grad_norm": 0.9664155840873718, |
| "learning_rate": 1.2179321067634103e-06, |
| "loss": 1.3091, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.005504260845988566, |
| "grad_norm": 1.6930897235870361, |
| "learning_rate": 1.2231147965794248e-06, |
| "loss": 1.6111, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.005527583985166483, |
| "grad_norm": 0.9807016253471375, |
| "learning_rate": 1.2282974863954394e-06, |
| "loss": 1.6131, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.005550907124344401, |
| "grad_norm": 1.321951150894165, |
| "learning_rate": 1.233480176211454e-06, |
| "loss": 1.242, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.005574230263522319, |
| "grad_norm": 1.1465637683868408, |
| "learning_rate": 1.2386628660274685e-06, |
| "loss": 1.7035, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.005597553402700237, |
| "grad_norm": 2.4264347553253174, |
| "learning_rate": 1.2438455558434829e-06, |
| "loss": 1.9859, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.005620876541878154, |
| "grad_norm": 1.429149866104126, |
| "learning_rate": 1.2490282456594974e-06, |
| "loss": 1.8249, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.005644199681056072, |
| "grad_norm": 1.1119049787521362, |
| "learning_rate": 1.254210935475512e-06, |
| "loss": 1.8005, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.00566752282023399, |
| "grad_norm": 1.9002227783203125, |
| "learning_rate": 1.2593936252915265e-06, |
| "loss": 1.6951, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.005690845959411907, |
| "grad_norm": 1.067659854888916, |
| "learning_rate": 1.264576315107541e-06, |
| "loss": 1.799, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.0057141690985898245, |
| "grad_norm": 1.2947990894317627, |
| "learning_rate": 1.2697590049235552e-06, |
| "loss": 1.7837, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.005737492237767742, |
| "grad_norm": 1.0790272951126099, |
| "learning_rate": 1.2749416947395698e-06, |
| "loss": 1.67, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.00576081537694566, |
| "grad_norm": 1.3589330911636353, |
| "learning_rate": 1.2801243845555843e-06, |
| "loss": 1.9282, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.005784138516123577, |
| "grad_norm": 1.4140998125076294, |
| "learning_rate": 1.285307074371599e-06, |
| "loss": 1.6708, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.005807461655301495, |
| "grad_norm": 1.000994086265564, |
| "learning_rate": 1.2904897641876135e-06, |
| "loss": 1.4077, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.005830784794479413, |
| "grad_norm": 1.3655062913894653, |
| "learning_rate": 1.295672454003628e-06, |
| "loss": 1.8862, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.005854107933657331, |
| "grad_norm": 1.1164065599441528, |
| "learning_rate": 1.3008551438196426e-06, |
| "loss": 1.528, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.005877431072835248, |
| "grad_norm": 1.1792149543762207, |
| "learning_rate": 1.306037833635657e-06, |
| "loss": 1.2879, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.005900754212013166, |
| "grad_norm": 2.236320734024048, |
| "learning_rate": 1.3112205234516715e-06, |
| "loss": 1.4929, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.005924077351191084, |
| "grad_norm": 1.8795088529586792, |
| "learning_rate": 1.316403213267686e-06, |
| "loss": 1.2468, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.005947400490369001, |
| "grad_norm": 1.2248806953430176, |
| "learning_rate": 1.3215859030837006e-06, |
| "loss": 1.769, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.005970723629546919, |
| "grad_norm": 1.252236008644104, |
| "learning_rate": 1.3267685928997152e-06, |
| "loss": 1.9014, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.005994046768724837, |
| "grad_norm": 1.3926386833190918, |
| "learning_rate": 1.3319512827157297e-06, |
| "loss": 1.9599, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.0060173699079027546, |
| "grad_norm": 1.5681990385055542, |
| "learning_rate": 1.3371339725317443e-06, |
| "loss": 1.8109, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.006040693047080672, |
| "grad_norm": 1.6841275691986084, |
| "learning_rate": 1.3423166623477584e-06, |
| "loss": 1.4601, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.0060640161862585895, |
| "grad_norm": 1.5262291431427002, |
| "learning_rate": 1.347499352163773e-06, |
| "loss": 1.6493, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.006087339325436507, |
| "grad_norm": 1.0905576944351196, |
| "learning_rate": 1.3526820419797875e-06, |
| "loss": 2.0847, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.006110662464614424, |
| "grad_norm": 1.4682683944702148, |
| "learning_rate": 1.357864731795802e-06, |
| "loss": 1.6889, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.006133985603792342, |
| "grad_norm": 1.1054515838623047, |
| "learning_rate": 1.3630474216118166e-06, |
| "loss": 1.55, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.00615730874297026, |
| "grad_norm": 1.3931388854980469, |
| "learning_rate": 1.3682301114278312e-06, |
| "loss": 1.655, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.006180631882148178, |
| "grad_norm": 1.1766420602798462, |
| "learning_rate": 1.3734128012438458e-06, |
| "loss": 1.9555, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.006203955021326095, |
| "grad_norm": 1.1652954816818237, |
| "learning_rate": 1.3785954910598601e-06, |
| "loss": 1.8446, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.006227278160504013, |
| "grad_norm": 1.378980278968811, |
| "learning_rate": 1.3837781808758747e-06, |
| "loss": 1.4449, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.006250601299681931, |
| "grad_norm": 1.2017453908920288, |
| "learning_rate": 1.3889608706918892e-06, |
| "loss": 1.6272, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.006273924438859848, |
| "grad_norm": 1.2221115827560425, |
| "learning_rate": 1.3941435605079038e-06, |
| "loss": 1.7299, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.006297247578037766, |
| "grad_norm": 1.189775824546814, |
| "learning_rate": 1.3993262503239183e-06, |
| "loss": 1.1664, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.006320570717215684, |
| "grad_norm": 1.0103381872177124, |
| "learning_rate": 1.404508940139933e-06, |
| "loss": 1.3519, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.006343893856393602, |
| "grad_norm": 1.1243481636047363, |
| "learning_rate": 1.4096916299559475e-06, |
| "loss": 1.6704, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.006367216995571519, |
| "grad_norm": 1.8137811422348022, |
| "learning_rate": 1.4148743197719616e-06, |
| "loss": 1.279, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.0063905401347494365, |
| "grad_norm": 1.0875202417373657, |
| "learning_rate": 1.4200570095879762e-06, |
| "loss": 1.1564, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.0064138632739273544, |
| "grad_norm": 1.0839550495147705, |
| "learning_rate": 1.4252396994039907e-06, |
| "loss": 1.7263, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.006437186413105272, |
| "grad_norm": 1.7203173637390137, |
| "learning_rate": 1.4304223892200053e-06, |
| "loss": 1.9309, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.006460509552283189, |
| "grad_norm": 1.3320658206939697, |
| "learning_rate": 1.4356050790360198e-06, |
| "loss": 1.8276, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.006483832691461107, |
| "grad_norm": 1.5260910987854004, |
| "learning_rate": 1.4407877688520344e-06, |
| "loss": 1.413, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.006507155830639025, |
| "grad_norm": 1.2401058673858643, |
| "learning_rate": 1.445970458668049e-06, |
| "loss": 1.4087, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.006530478969816942, |
| "grad_norm": 1.2722922563552856, |
| "learning_rate": 1.4511531484840633e-06, |
| "loss": 1.6216, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.00655380210899486, |
| "grad_norm": 1.2668229341506958, |
| "learning_rate": 1.4563358383000779e-06, |
| "loss": 1.6252, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.006577125248172778, |
| "grad_norm": 1.4556583166122437, |
| "learning_rate": 1.4615185281160924e-06, |
| "loss": 2.3276, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.006600448387350696, |
| "grad_norm": 1.537610411643982, |
| "learning_rate": 1.466701217932107e-06, |
| "loss": 1.4319, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.006623771526528613, |
| "grad_norm": 1.3130170106887817, |
| "learning_rate": 1.4718839077481215e-06, |
| "loss": 1.4978, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.006647094665706531, |
| "grad_norm": 1.5020934343338013, |
| "learning_rate": 1.477066597564136e-06, |
| "loss": 1.8697, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.006670417804884449, |
| "grad_norm": 1.6949779987335205, |
| "learning_rate": 1.4822492873801502e-06, |
| "loss": 1.7433, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.006693740944062366, |
| "grad_norm": 1.5566325187683105, |
| "learning_rate": 1.4874319771961648e-06, |
| "loss": 1.5674, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.006717064083240284, |
| "grad_norm": 1.015093445777893, |
| "learning_rate": 1.4926146670121793e-06, |
| "loss": 1.9903, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.0067403872224182015, |
| "grad_norm": 2.229853868484497, |
| "learning_rate": 1.497797356828194e-06, |
| "loss": 1.1905, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.006763710361596119, |
| "grad_norm": 1.5241860151290894, |
| "learning_rate": 1.5029800466442085e-06, |
| "loss": 1.958, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.006787033500774036, |
| "grad_norm": 0.8666454553604126, |
| "learning_rate": 1.508162736460223e-06, |
| "loss": 1.7141, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.006810356639951954, |
| "grad_norm": 1.4594520330429077, |
| "learning_rate": 1.5133454262762376e-06, |
| "loss": 1.7235, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.006833679779129872, |
| "grad_norm": 1.3267074823379517, |
| "learning_rate": 1.518528116092252e-06, |
| "loss": 1.6172, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.006857002918307789, |
| "grad_norm": 1.5386312007904053, |
| "learning_rate": 1.5237108059082665e-06, |
| "loss": 1.4843, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.006880326057485707, |
| "grad_norm": 1.3275539875030518, |
| "learning_rate": 1.528893495724281e-06, |
| "loss": 1.5444, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.006903649196663625, |
| "grad_norm": 1.1002707481384277, |
| "learning_rate": 1.5340761855402956e-06, |
| "loss": 1.717, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.006926972335841543, |
| "grad_norm": 1.172974944114685, |
| "learning_rate": 1.5392588753563102e-06, |
| "loss": 1.6963, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.00695029547501946, |
| "grad_norm": 1.0728440284729004, |
| "learning_rate": 1.5444415651723247e-06, |
| "loss": 1.6228, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.006973618614197378, |
| "grad_norm": 1.274348258972168, |
| "learning_rate": 1.5496242549883393e-06, |
| "loss": 1.2559, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.006996941753375296, |
| "grad_norm": 1.2520028352737427, |
| "learning_rate": 1.5548069448043534e-06, |
| "loss": 1.6118, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.007020264892553213, |
| "grad_norm": 1.5844305753707886, |
| "learning_rate": 1.559989634620368e-06, |
| "loss": 1.5645, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.007043588031731131, |
| "grad_norm": 2.285438299179077, |
| "learning_rate": 1.5651723244363825e-06, |
| "loss": 1.4541, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.007066911170909049, |
| "grad_norm": 1.2873152494430542, |
| "learning_rate": 1.570355014252397e-06, |
| "loss": 1.4835, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.0070902343100869665, |
| "grad_norm": 1.1332640647888184, |
| "learning_rate": 1.5755377040684116e-06, |
| "loss": 1.8279, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.0071135574492648835, |
| "grad_norm": 1.6483525037765503, |
| "learning_rate": 1.5807203938844262e-06, |
| "loss": 1.2509, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.007136880588442801, |
| "grad_norm": 1.0219485759735107, |
| "learning_rate": 1.5859030837004408e-06, |
| "loss": 1.8421, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.007160203727620719, |
| "grad_norm": 1.2478340864181519, |
| "learning_rate": 1.5910857735164551e-06, |
| "loss": 1.9144, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.007183526866798637, |
| "grad_norm": 1.4016437530517578, |
| "learning_rate": 1.5962684633324697e-06, |
| "loss": 1.5146, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.007206850005976554, |
| "grad_norm": 1.1399790048599243, |
| "learning_rate": 1.6014511531484842e-06, |
| "loss": 1.6714, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.007230173145154472, |
| "grad_norm": 2.047961473464966, |
| "learning_rate": 1.6066338429644988e-06, |
| "loss": 1.1777, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.00725349628433239, |
| "grad_norm": 1.1410201787948608, |
| "learning_rate": 1.6118165327805133e-06, |
| "loss": 1.6783, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.007276819423510307, |
| "grad_norm": 1.2840640544891357, |
| "learning_rate": 1.616999222596528e-06, |
| "loss": 1.9351, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.007300142562688225, |
| "grad_norm": 0.9116181135177612, |
| "learning_rate": 1.6221819124125425e-06, |
| "loss": 1.7705, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.007323465701866143, |
| "grad_norm": 1.3190463781356812, |
| "learning_rate": 1.6273646022285566e-06, |
| "loss": 1.4484, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.007346788841044061, |
| "grad_norm": 0.9988270401954651, |
| "learning_rate": 1.6325472920445712e-06, |
| "loss": 1.5159, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.007370111980221978, |
| "grad_norm": 0.8620725870132446, |
| "learning_rate": 1.6377299818605857e-06, |
| "loss": 1.5605, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.007393435119399896, |
| "grad_norm": 1.284604549407959, |
| "learning_rate": 1.6429126716766003e-06, |
| "loss": 1.4822, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.0074167582585778135, |
| "grad_norm": 1.2546097040176392, |
| "learning_rate": 1.6480953614926148e-06, |
| "loss": 1.436, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.0074400813977557306, |
| "grad_norm": 0.9116978645324707, |
| "learning_rate": 1.6532780513086294e-06, |
| "loss": 1.2708, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.0074634045369336485, |
| "grad_norm": 0.9910548329353333, |
| "learning_rate": 1.658460741124644e-06, |
| "loss": 1.8144, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.007486727676111566, |
| "grad_norm": 1.9879093170166016, |
| "learning_rate": 1.6636434309406583e-06, |
| "loss": 1.4826, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.007510050815289484, |
| "grad_norm": 1.0845030546188354, |
| "learning_rate": 1.6688261207566729e-06, |
| "loss": 1.3364, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.007533373954467401, |
| "grad_norm": 1.342966079711914, |
| "learning_rate": 1.6740088105726874e-06, |
| "loss": 1.6453, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.007556697093645319, |
| "grad_norm": 0.9570252895355225, |
| "learning_rate": 1.679191500388702e-06, |
| "loss": 1.5384, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.007580020232823237, |
| "grad_norm": 1.531516671180725, |
| "learning_rate": 1.6843741902047165e-06, |
| "loss": 1.5775, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.007603343372001154, |
| "grad_norm": 1.4623240232467651, |
| "learning_rate": 1.689556880020731e-06, |
| "loss": 1.7159, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.007626666511179072, |
| "grad_norm": 1.109586238861084, |
| "learning_rate": 1.6947395698367454e-06, |
| "loss": 1.7403, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.00764998965035699, |
| "grad_norm": 1.3199604749679565, |
| "learning_rate": 1.6999222596527598e-06, |
| "loss": 1.7208, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.007673312789534908, |
| "grad_norm": 1.0979784727096558, |
| "learning_rate": 1.7051049494687743e-06, |
| "loss": 1.6097, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.007696635928712825, |
| "grad_norm": 1.0952926874160767, |
| "learning_rate": 1.710287639284789e-06, |
| "loss": 1.8262, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.007719959067890743, |
| "grad_norm": 1.1149373054504395, |
| "learning_rate": 1.7154703291008035e-06, |
| "loss": 1.5762, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.007743282207068661, |
| "grad_norm": 1.2090753316879272, |
| "learning_rate": 1.720653018916818e-06, |
| "loss": 1.6161, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.007766605346246578, |
| "grad_norm": 1.3476163148880005, |
| "learning_rate": 1.7258357087328326e-06, |
| "loss": 1.6854, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.0077899284854244955, |
| "grad_norm": 1.3222614526748657, |
| "learning_rate": 1.7310183985488471e-06, |
| "loss": 1.5996, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.007813251624602413, |
| "grad_norm": 1.2350871562957764, |
| "learning_rate": 1.7362010883648615e-06, |
| "loss": 1.5052, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.007836574763780331, |
| "grad_norm": 1.4628745317459106, |
| "learning_rate": 1.741383778180876e-06, |
| "loss": 1.6268, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.00785989790295825, |
| "grad_norm": 1.3481048345565796, |
| "learning_rate": 1.7465664679968906e-06, |
| "loss": 1.4308, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.007883221042136167, |
| "grad_norm": 1.0008901357650757, |
| "learning_rate": 1.7517491578129052e-06, |
| "loss": 1.6487, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.007906544181314083, |
| "grad_norm": 2.4258437156677246, |
| "learning_rate": 1.7569318476289195e-06, |
| "loss": 1.5327, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.007929867320492001, |
| "grad_norm": 1.3444914817810059, |
| "learning_rate": 1.762114537444934e-06, |
| "loss": 1.5257, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.007953190459669919, |
| "grad_norm": 2.297591209411621, |
| "learning_rate": 1.7672972272609486e-06, |
| "loss": 1.9581, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.007976513598847837, |
| "grad_norm": 1.107711672782898, |
| "learning_rate": 1.772479917076963e-06, |
| "loss": 1.3486, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.007999836738025755, |
| "grad_norm": 1.4064106941223145, |
| "learning_rate": 1.7776626068929775e-06, |
| "loss": 1.3169, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.008023159877203673, |
| "grad_norm": 1.1236720085144043, |
| "learning_rate": 1.782845296708992e-06, |
| "loss": 2.0225, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.00804648301638159, |
| "grad_norm": 1.9214081764221191, |
| "learning_rate": 1.7880279865250066e-06, |
| "loss": 1.7269, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.008069806155559507, |
| "grad_norm": 1.1544204950332642, |
| "learning_rate": 1.7932106763410212e-06, |
| "loss": 1.8407, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.008093129294737425, |
| "grad_norm": 1.3266545534133911, |
| "learning_rate": 1.7983933661570358e-06, |
| "loss": 1.3316, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.008116452433915343, |
| "grad_norm": 1.4208300113677979, |
| "learning_rate": 1.8035760559730501e-06, |
| "loss": 1.7712, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.00813977557309326, |
| "grad_norm": 1.1849939823150635, |
| "learning_rate": 1.8087587457890647e-06, |
| "loss": 1.3843, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.008163098712271178, |
| "grad_norm": 0.9147690534591675, |
| "learning_rate": 1.8139414356050792e-06, |
| "loss": 1.703, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.008186421851449096, |
| "grad_norm": 1.2026822566986084, |
| "learning_rate": 1.8191241254210938e-06, |
| "loss": 1.642, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.008209744990627014, |
| "grad_norm": 1.6620279550552368, |
| "learning_rate": 1.8243068152371081e-06, |
| "loss": 1.2861, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.00823306812980493, |
| "grad_norm": 1.20318603515625, |
| "learning_rate": 1.8294895050531227e-06, |
| "loss": 1.7781, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.008256391268982848, |
| "grad_norm": 1.117148756980896, |
| "learning_rate": 1.8346721948691372e-06, |
| "loss": 1.7056, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.008279714408160766, |
| "grad_norm": 1.3435394763946533, |
| "learning_rate": 1.8398548846851516e-06, |
| "loss": 1.7352, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.008303037547338684, |
| "grad_norm": 1.6550534963607788, |
| "learning_rate": 1.8450375745011662e-06, |
| "loss": 1.4283, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.008326360686516602, |
| "grad_norm": 1.0326530933380127, |
| "learning_rate": 1.8502202643171807e-06, |
| "loss": 1.8726, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.00834968382569452, |
| "grad_norm": 1.1237214803695679, |
| "learning_rate": 1.8554029541331953e-06, |
| "loss": 1.7547, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.008373006964872438, |
| "grad_norm": 1.3457711935043335, |
| "learning_rate": 1.8605856439492098e-06, |
| "loss": 1.5047, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.008396330104050354, |
| "grad_norm": 1.3615081310272217, |
| "learning_rate": 1.8657683337652244e-06, |
| "loss": 1.3476, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.008419653243228272, |
| "grad_norm": 1.4443084001541138, |
| "learning_rate": 1.870951023581239e-06, |
| "loss": 1.4259, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.00844297638240619, |
| "grad_norm": 0.9154095649719238, |
| "learning_rate": 1.8761337133972533e-06, |
| "loss": 1.6089, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.008466299521584108, |
| "grad_norm": 1.1972756385803223, |
| "learning_rate": 1.8813164032132679e-06, |
| "loss": 1.5704, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.008489622660762025, |
| "grad_norm": 1.1325738430023193, |
| "learning_rate": 1.8864990930292822e-06, |
| "loss": 1.7252, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.008512945799939943, |
| "grad_norm": 1.2257301807403564, |
| "learning_rate": 1.8916817828452968e-06, |
| "loss": 1.5124, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.008536268939117861, |
| "grad_norm": 1.7714002132415771, |
| "learning_rate": 1.8968644726613113e-06, |
| "loss": 1.5799, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.008559592078295777, |
| "grad_norm": 1.1215579509735107, |
| "learning_rate": 1.9020471624773259e-06, |
| "loss": 1.7692, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.008582915217473695, |
| "grad_norm": 1.3264069557189941, |
| "learning_rate": 1.9072298522933404e-06, |
| "loss": 1.7848, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.008606238356651613, |
| "grad_norm": 0.9898104667663574, |
| "learning_rate": 1.912412542109355e-06, |
| "loss": 1.945, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.008629561495829531, |
| "grad_norm": 0.9507944583892822, |
| "learning_rate": 1.9175952319253693e-06, |
| "loss": 1.6469, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.008652884635007449, |
| "grad_norm": 1.1940997838974, |
| "learning_rate": 1.9227779217413837e-06, |
| "loss": 1.5144, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.008676207774185367, |
| "grad_norm": 1.2926305532455444, |
| "learning_rate": 1.9279606115573985e-06, |
| "loss": 1.6527, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.008699530913363285, |
| "grad_norm": 0.9909786581993103, |
| "learning_rate": 1.933143301373413e-06, |
| "loss": 1.8003, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.008722854052541201, |
| "grad_norm": 1.3900662660598755, |
| "learning_rate": 1.9383259911894276e-06, |
| "loss": 1.7743, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.008746177191719119, |
| "grad_norm": 0.9942039251327515, |
| "learning_rate": 1.943508681005442e-06, |
| "loss": 1.5635, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.008769500330897037, |
| "grad_norm": 1.3887672424316406, |
| "learning_rate": 1.9486913708214563e-06, |
| "loss": 1.744, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.008792823470074955, |
| "grad_norm": 1.2873059511184692, |
| "learning_rate": 1.953874060637471e-06, |
| "loss": 1.64, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.008816146609252873, |
| "grad_norm": 1.2259247303009033, |
| "learning_rate": 1.9590567504534854e-06, |
| "loss": 1.6418, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.00883946974843079, |
| "grad_norm": 1.5709097385406494, |
| "learning_rate": 1.9642394402695e-06, |
| "loss": 1.4343, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.008862792887608708, |
| "grad_norm": 1.016625165939331, |
| "learning_rate": 1.9694221300855145e-06, |
| "loss": 1.5838, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.008886116026786626, |
| "grad_norm": 1.5763674974441528, |
| "learning_rate": 1.9746048199015293e-06, |
| "loss": 1.3391, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.008909439165964542, |
| "grad_norm": 1.014722466468811, |
| "learning_rate": 1.9797875097175436e-06, |
| "loss": 1.7185, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.00893276230514246, |
| "grad_norm": 1.5255705118179321, |
| "learning_rate": 1.984970199533558e-06, |
| "loss": 1.5749, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.008956085444320378, |
| "grad_norm": 1.4036648273468018, |
| "learning_rate": 1.9901528893495723e-06, |
| "loss": 1.4134, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.008979408583498296, |
| "grad_norm": 1.327813982963562, |
| "learning_rate": 1.995335579165587e-06, |
| "loss": 1.8475, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.009002731722676214, |
| "grad_norm": 1.357269287109375, |
| "learning_rate": 2.0005182689816014e-06, |
| "loss": 1.4145, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.009026054861854132, |
| "grad_norm": 1.4663738012313843, |
| "learning_rate": 2.005700958797616e-06, |
| "loss": 1.5207, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.00904937800103205, |
| "grad_norm": 0.9792691469192505, |
| "learning_rate": 2.0108836486136305e-06, |
| "loss": 1.7392, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.009072701140209966, |
| "grad_norm": 1.9074856042861938, |
| "learning_rate": 2.0160663384296453e-06, |
| "loss": 1.5931, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.009096024279387884, |
| "grad_norm": 1.562455654144287, |
| "learning_rate": 2.0212490282456597e-06, |
| "loss": 1.3503, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.009119347418565802, |
| "grad_norm": 1.6827714443206787, |
| "learning_rate": 2.026431718061674e-06, |
| "loss": 1.8409, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.00914267055774372, |
| "grad_norm": 0.969691276550293, |
| "learning_rate": 2.0316144078776888e-06, |
| "loss": 1.5167, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.009165993696921637, |
| "grad_norm": 1.1107996702194214, |
| "learning_rate": 2.036797097693703e-06, |
| "loss": 1.5723, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.009189316836099555, |
| "grad_norm": 0.9862359762191772, |
| "learning_rate": 2.041979787509718e-06, |
| "loss": 1.1188, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.009212639975277473, |
| "grad_norm": 1.4997074604034424, |
| "learning_rate": 2.0471624773257322e-06, |
| "loss": 1.6742, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.00923596311445539, |
| "grad_norm": 1.1336885690689087, |
| "learning_rate": 2.052345167141747e-06, |
| "loss": 1.5602, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.009259286253633307, |
| "grad_norm": 1.4929397106170654, |
| "learning_rate": 2.057527856957761e-06, |
| "loss": 1.4891, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.009282609392811225, |
| "grad_norm": 1.3118637800216675, |
| "learning_rate": 2.0627105467737757e-06, |
| "loss": 1.5758, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.009305932531989143, |
| "grad_norm": 1.1043623685836792, |
| "learning_rate": 2.06789323658979e-06, |
| "loss": 1.9455, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.009329255671167061, |
| "grad_norm": 1.3472813367843628, |
| "learning_rate": 2.073075926405805e-06, |
| "loss": 1.4657, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.009352578810344979, |
| "grad_norm": 1.5614628791809082, |
| "learning_rate": 2.078258616221819e-06, |
| "loss": 1.3351, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.009375901949522897, |
| "grad_norm": 1.393477439880371, |
| "learning_rate": 2.083441306037834e-06, |
| "loss": 1.8887, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.009399225088700813, |
| "grad_norm": 1.0576095581054688, |
| "learning_rate": 2.0886239958538483e-06, |
| "loss": 1.7814, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.00942254822787873, |
| "grad_norm": 1.5161347389221191, |
| "learning_rate": 2.0938066856698626e-06, |
| "loss": 1.2316, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.009445871367056649, |
| "grad_norm": 1.05890691280365, |
| "learning_rate": 2.0989893754858774e-06, |
| "loss": 1.5303, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.009469194506234567, |
| "grad_norm": 0.801816463470459, |
| "learning_rate": 2.1041720653018918e-06, |
| "loss": 1.5165, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.009492517645412485, |
| "grad_norm": 1.2811832427978516, |
| "learning_rate": 2.1093547551179065e-06, |
| "loss": 1.8638, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.009515840784590402, |
| "grad_norm": 1.2984956502914429, |
| "learning_rate": 2.114537444933921e-06, |
| "loss": 1.4195, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.00953916392376832, |
| "grad_norm": 2.3772926330566406, |
| "learning_rate": 2.1197201347499356e-06, |
| "loss": 1.2616, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.009562487062946236, |
| "grad_norm": 1.102181315422058, |
| "learning_rate": 2.12490282456595e-06, |
| "loss": 1.6683, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.009585810202124154, |
| "grad_norm": 1.4473963975906372, |
| "learning_rate": 2.1300855143819643e-06, |
| "loss": 1.6474, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.009609133341302072, |
| "grad_norm": 2.3995816707611084, |
| "learning_rate": 2.1352682041979787e-06, |
| "loss": 1.6203, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.00963245648047999, |
| "grad_norm": 0.9490773677825928, |
| "learning_rate": 2.1404508940139935e-06, |
| "loss": 1.8082, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.009655779619657908, |
| "grad_norm": 0.9358771443367004, |
| "learning_rate": 2.145633583830008e-06, |
| "loss": 1.5929, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.009679102758835826, |
| "grad_norm": 0.9875616431236267, |
| "learning_rate": 2.1508162736460226e-06, |
| "loss": 1.4312, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.009702425898013744, |
| "grad_norm": 1.197416067123413, |
| "learning_rate": 2.155998963462037e-06, |
| "loss": 1.3165, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.00972574903719166, |
| "grad_norm": 2.0210750102996826, |
| "learning_rate": 2.1611816532780513e-06, |
| "loss": 1.4962, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.009749072176369578, |
| "grad_norm": 1.2700085639953613, |
| "learning_rate": 2.166364343094066e-06, |
| "loss": 1.6101, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.009772395315547496, |
| "grad_norm": 1.124679684638977, |
| "learning_rate": 2.1715470329100804e-06, |
| "loss": 1.7477, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.009795718454725414, |
| "grad_norm": 1.178290843963623, |
| "learning_rate": 2.176729722726095e-06, |
| "loss": 1.4108, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.009819041593903332, |
| "grad_norm": 1.792117953300476, |
| "learning_rate": 2.1819124125421095e-06, |
| "loss": 1.5568, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.00984236473308125, |
| "grad_norm": 1.7381610870361328, |
| "learning_rate": 2.1870951023581243e-06, |
| "loss": 1.3229, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.009865687872259167, |
| "grad_norm": 1.023553490638733, |
| "learning_rate": 2.1922777921741386e-06, |
| "loss": 1.1633, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.009889011011437084, |
| "grad_norm": 1.5537900924682617, |
| "learning_rate": 2.197460481990153e-06, |
| "loss": 1.291, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.009912334150615001, |
| "grad_norm": 1.722598671913147, |
| "learning_rate": 2.2026431718061673e-06, |
| "loss": 1.5201, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.00993565728979292, |
| "grad_norm": 1.546295166015625, |
| "learning_rate": 2.207825861622182e-06, |
| "loss": 1.3554, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.009958980428970837, |
| "grad_norm": 1.4075593948364258, |
| "learning_rate": 2.2130085514381964e-06, |
| "loss": 1.3831, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.009982303568148755, |
| "grad_norm": 1.441125512123108, |
| "learning_rate": 2.218191241254211e-06, |
| "loss": 1.4806, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.010005626707326673, |
| "grad_norm": 1.4198213815689087, |
| "learning_rate": 2.2233739310702255e-06, |
| "loss": 1.6962, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.010028949846504591, |
| "grad_norm": 1.1716971397399902, |
| "learning_rate": 2.2285566208862403e-06, |
| "loss": 1.0423, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.010052272985682507, |
| "grad_norm": 1.1271895170211792, |
| "learning_rate": 2.2337393107022547e-06, |
| "loss": 1.4246, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.010075596124860425, |
| "grad_norm": 1.2987208366394043, |
| "learning_rate": 2.238922000518269e-06, |
| "loss": 1.5946, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.010098919264038343, |
| "grad_norm": 1.7283997535705566, |
| "learning_rate": 2.2441046903342838e-06, |
| "loss": 1.5761, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.01012224240321626, |
| "grad_norm": 1.635098934173584, |
| "learning_rate": 2.249287380150298e-06, |
| "loss": 1.6912, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.010145565542394179, |
| "grad_norm": 2.1896469593048096, |
| "learning_rate": 2.254470069966313e-06, |
| "loss": 1.2961, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.010168888681572097, |
| "grad_norm": 1.1874053478240967, |
| "learning_rate": 2.2596527597823272e-06, |
| "loss": 1.4999, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.010192211820750014, |
| "grad_norm": 1.2898855209350586, |
| "learning_rate": 2.264835449598342e-06, |
| "loss": 1.7152, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.010215534959927932, |
| "grad_norm": 0.792107105255127, |
| "learning_rate": 2.270018139414356e-06, |
| "loss": 1.4129, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.010238858099105849, |
| "grad_norm": 1.2092666625976562, |
| "learning_rate": 2.2752008292303707e-06, |
| "loss": 1.4687, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.010262181238283766, |
| "grad_norm": 1.2261115312576294, |
| "learning_rate": 2.280383519046385e-06, |
| "loss": 1.5548, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.010285504377461684, |
| "grad_norm": 2.0835094451904297, |
| "learning_rate": 2.2855662088624e-06, |
| "loss": 1.5925, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.010308827516639602, |
| "grad_norm": 1.075907826423645, |
| "learning_rate": 2.290748898678414e-06, |
| "loss": 1.4967, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.01033215065581752, |
| "grad_norm": 0.9633646011352539, |
| "learning_rate": 2.295931588494429e-06, |
| "loss": 1.6798, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.010355473794995438, |
| "grad_norm": 1.6833699941635132, |
| "learning_rate": 2.3011142783104433e-06, |
| "loss": 1.3053, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.010378796934173356, |
| "grad_norm": 1.1333974599838257, |
| "learning_rate": 2.3062969681264576e-06, |
| "loss": 1.3658, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.010402120073351272, |
| "grad_norm": 1.3382309675216675, |
| "learning_rate": 2.3114796579424724e-06, |
| "loss": 1.6492, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.01042544321252919, |
| "grad_norm": 0.7148923873901367, |
| "learning_rate": 2.3166623477584868e-06, |
| "loss": 1.6269, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.010448766351707108, |
| "grad_norm": 1.084245204925537, |
| "learning_rate": 2.3218450375745015e-06, |
| "loss": 2.0708, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.010472089490885026, |
| "grad_norm": 1.1463004350662231, |
| "learning_rate": 2.327027727390516e-06, |
| "loss": 2.0115, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.010495412630062944, |
| "grad_norm": 1.5500133037567139, |
| "learning_rate": 2.3322104172065306e-06, |
| "loss": 1.5454, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.010518735769240862, |
| "grad_norm": 1.2993839979171753, |
| "learning_rate": 2.337393107022545e-06, |
| "loss": 1.5475, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.01054205890841878, |
| "grad_norm": 1.295839786529541, |
| "learning_rate": 2.3425757968385593e-06, |
| "loss": 1.2895, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.010565382047596696, |
| "grad_norm": 1.045040488243103, |
| "learning_rate": 2.3477584866545737e-06, |
| "loss": 1.7306, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.010588705186774613, |
| "grad_norm": 1.4592766761779785, |
| "learning_rate": 2.3529411764705885e-06, |
| "loss": 1.7795, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.010612028325952531, |
| "grad_norm": 0.9432761073112488, |
| "learning_rate": 2.358123866286603e-06, |
| "loss": 1.6963, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.01063535146513045, |
| "grad_norm": 1.3770086765289307, |
| "learning_rate": 2.3633065561026176e-06, |
| "loss": 1.2003, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.010658674604308367, |
| "grad_norm": 1.1453793048858643, |
| "learning_rate": 2.368489245918632e-06, |
| "loss": 1.9012, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.010681997743486285, |
| "grad_norm": 1.2836976051330566, |
| "learning_rate": 2.3736719357346467e-06, |
| "loss": 1.4324, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.010705320882664203, |
| "grad_norm": 1.6498123407363892, |
| "learning_rate": 2.378854625550661e-06, |
| "loss": 1.6212, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.010728644021842119, |
| "grad_norm": 1.3681795597076416, |
| "learning_rate": 2.3840373153666754e-06, |
| "loss": 1.6047, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.010751967161020037, |
| "grad_norm": 1.4474722146987915, |
| "learning_rate": 2.38922000518269e-06, |
| "loss": 1.5279, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.010775290300197955, |
| "grad_norm": 1.4832510948181152, |
| "learning_rate": 2.3944026949987045e-06, |
| "loss": 1.7073, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.010798613439375873, |
| "grad_norm": 1.343935251235962, |
| "learning_rate": 2.3995853848147193e-06, |
| "loss": 1.4637, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.01082193657855379, |
| "grad_norm": 1.8285539150238037, |
| "learning_rate": 2.4047680746307336e-06, |
| "loss": 1.3944, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.010845259717731709, |
| "grad_norm": 1.4653230905532837, |
| "learning_rate": 2.4099507644467484e-06, |
| "loss": 1.8847, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.010868582856909626, |
| "grad_norm": 1.4410351514816284, |
| "learning_rate": 2.4151334542627623e-06, |
| "loss": 1.7298, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.010891905996087543, |
| "grad_norm": 1.3057256937026978, |
| "learning_rate": 2.420316144078777e-06, |
| "loss": 1.6188, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.01091522913526546, |
| "grad_norm": 1.574479103088379, |
| "learning_rate": 2.4254988338947914e-06, |
| "loss": 1.585, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.010938552274443378, |
| "grad_norm": 1.4391696453094482, |
| "learning_rate": 2.430681523710806e-06, |
| "loss": 1.7272, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.010961875413621296, |
| "grad_norm": 2.304706335067749, |
| "learning_rate": 2.4358642135268205e-06, |
| "loss": 1.7127, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.010985198552799214, |
| "grad_norm": 1.2380545139312744, |
| "learning_rate": 2.4410469033428353e-06, |
| "loss": 1.5428, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.011008521691977132, |
| "grad_norm": 1.303446888923645, |
| "learning_rate": 2.4462295931588497e-06, |
| "loss": 1.609, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.01103184483115505, |
| "grad_norm": 1.3888837099075317, |
| "learning_rate": 2.451412282974864e-06, |
| "loss": 1.7134, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.011055167970332966, |
| "grad_norm": 0.9802701473236084, |
| "learning_rate": 2.4565949727908788e-06, |
| "loss": 1.4401, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.011078491109510884, |
| "grad_norm": 1.5808403491973877, |
| "learning_rate": 2.461777662606893e-06, |
| "loss": 1.7415, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.011101814248688802, |
| "grad_norm": 1.299912691116333, |
| "learning_rate": 2.466960352422908e-06, |
| "loss": 1.361, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.01112513738786672, |
| "grad_norm": 0.9326110482215881, |
| "learning_rate": 2.4721430422389222e-06, |
| "loss": 1.222, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.011148460527044638, |
| "grad_norm": 1.0385396480560303, |
| "learning_rate": 2.477325732054937e-06, |
| "loss": 1.4813, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.011171783666222556, |
| "grad_norm": 1.1004397869110107, |
| "learning_rate": 2.482508421870951e-06, |
| "loss": 1.5064, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.011195106805400474, |
| "grad_norm": 1.274898886680603, |
| "learning_rate": 2.4876911116869657e-06, |
| "loss": 1.3046, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.01121842994457839, |
| "grad_norm": 1.0818660259246826, |
| "learning_rate": 2.49287380150298e-06, |
| "loss": 1.878, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.011241753083756308, |
| "grad_norm": 1.2744652032852173, |
| "learning_rate": 2.498056491318995e-06, |
| "loss": 1.6394, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.011265076222934226, |
| "grad_norm": 1.0467538833618164, |
| "learning_rate": 2.503239181135009e-06, |
| "loss": 1.8949, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.011288399362112143, |
| "grad_norm": 1.2507177591323853, |
| "learning_rate": 2.508421870951024e-06, |
| "loss": 1.5386, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.011311722501290061, |
| "grad_norm": 2.0707380771636963, |
| "learning_rate": 2.5136045607670383e-06, |
| "loss": 1.3359, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.01133504564046798, |
| "grad_norm": 1.0060955286026, |
| "learning_rate": 2.518787250583053e-06, |
| "loss": 1.5551, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.011358368779645897, |
| "grad_norm": 2.1019294261932373, |
| "learning_rate": 2.5239699403990674e-06, |
| "loss": 1.4009, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.011381691918823813, |
| "grad_norm": 1.2085974216461182, |
| "learning_rate": 2.529152630215082e-06, |
| "loss": 1.1264, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.011405015058001731, |
| "grad_norm": 1.2670215368270874, |
| "learning_rate": 2.5343353200310965e-06, |
| "loss": 1.4005, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.011428338197179649, |
| "grad_norm": 0.976809024810791, |
| "learning_rate": 2.5395180098471104e-06, |
| "loss": 1.6539, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.011451661336357567, |
| "grad_norm": 1.8012447357177734, |
| "learning_rate": 2.5447006996631252e-06, |
| "loss": 1.5083, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.011474984475535485, |
| "grad_norm": 2.0657784938812256, |
| "learning_rate": 2.5498833894791396e-06, |
| "loss": 1.4127, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.011498307614713403, |
| "grad_norm": 1.4070103168487549, |
| "learning_rate": 2.5550660792951543e-06, |
| "loss": 1.4707, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.01152163075389132, |
| "grad_norm": 0.859045147895813, |
| "learning_rate": 2.5602487691111687e-06, |
| "loss": 1.6301, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.011544953893069239, |
| "grad_norm": 1.5209952592849731, |
| "learning_rate": 2.5654314589271835e-06, |
| "loss": 1.8438, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.011568277032247155, |
| "grad_norm": 1.1508231163024902, |
| "learning_rate": 2.570614148743198e-06, |
| "loss": 1.2495, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.011591600171425073, |
| "grad_norm": 0.9130313396453857, |
| "learning_rate": 2.5757968385592126e-06, |
| "loss": 1.1848, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.01161492331060299, |
| "grad_norm": 1.5925562381744385, |
| "learning_rate": 2.580979528375227e-06, |
| "loss": 1.4745, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.011638246449780908, |
| "grad_norm": 2.5118539333343506, |
| "learning_rate": 2.5861622181912417e-06, |
| "loss": 1.6218, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.011661569588958826, |
| "grad_norm": 1.272691249847412, |
| "learning_rate": 2.591344908007256e-06, |
| "loss": 1.2147, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.011684892728136744, |
| "grad_norm": 1.1436160802841187, |
| "learning_rate": 2.596527597823271e-06, |
| "loss": 1.5556, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.011708215867314662, |
| "grad_norm": 1.0195647478103638, |
| "learning_rate": 2.601710287639285e-06, |
| "loss": 1.3303, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.011731539006492578, |
| "grad_norm": 1.4576568603515625, |
| "learning_rate": 2.6068929774553e-06, |
| "loss": 1.6531, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.011754862145670496, |
| "grad_norm": 1.360716462135315, |
| "learning_rate": 2.612075667271314e-06, |
| "loss": 1.1761, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.011778185284848414, |
| "grad_norm": 2.7770462036132812, |
| "learning_rate": 2.617258357087328e-06, |
| "loss": 1.247, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.011801508424026332, |
| "grad_norm": 1.3706661462783813, |
| "learning_rate": 2.622441046903343e-06, |
| "loss": 1.5103, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.01182483156320425, |
| "grad_norm": 1.5405017137527466, |
| "learning_rate": 2.6276237367193573e-06, |
| "loss": 1.6827, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.011848154702382168, |
| "grad_norm": 1.1809494495391846, |
| "learning_rate": 2.632806426535372e-06, |
| "loss": 1.7162, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.011871477841560086, |
| "grad_norm": 1.085557222366333, |
| "learning_rate": 2.6379891163513864e-06, |
| "loss": 1.514, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.011894800980738002, |
| "grad_norm": 1.2155910730361938, |
| "learning_rate": 2.643171806167401e-06, |
| "loss": 1.4029, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.01191812411991592, |
| "grad_norm": 1.240242600440979, |
| "learning_rate": 2.6483544959834155e-06, |
| "loss": 1.4336, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.011941447259093838, |
| "grad_norm": 1.649802327156067, |
| "learning_rate": 2.6535371857994303e-06, |
| "loss": 1.9082, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.011964770398271755, |
| "grad_norm": 1.3479831218719482, |
| "learning_rate": 2.6587198756154447e-06, |
| "loss": 1.5424, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.011988093537449673, |
| "grad_norm": 1.2537102699279785, |
| "learning_rate": 2.6639025654314594e-06, |
| "loss": 1.6061, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.012011416676627591, |
| "grad_norm": 1.1049939393997192, |
| "learning_rate": 2.6690852552474738e-06, |
| "loss": 1.8361, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.012034739815805509, |
| "grad_norm": 2.9946062564849854, |
| "learning_rate": 2.6742679450634885e-06, |
| "loss": 1.4471, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.012058062954983425, |
| "grad_norm": 0.9455610513687134, |
| "learning_rate": 2.6794506348795025e-06, |
| "loss": 1.6831, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.012081386094161343, |
| "grad_norm": 1.4750438928604126, |
| "learning_rate": 2.684633324695517e-06, |
| "loss": 1.3143, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.012104709233339261, |
| "grad_norm": 1.1056557893753052, |
| "learning_rate": 2.6898160145115316e-06, |
| "loss": 1.5054, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.012128032372517179, |
| "grad_norm": 0.9718064069747925, |
| "learning_rate": 2.694998704327546e-06, |
| "loss": 1.3134, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.012151355511695097, |
| "grad_norm": 2.2384724617004395, |
| "learning_rate": 2.7001813941435607e-06, |
| "loss": 1.4851, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.012174678650873015, |
| "grad_norm": 1.2468239068984985, |
| "learning_rate": 2.705364083959575e-06, |
| "loss": 1.4873, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.012198001790050933, |
| "grad_norm": 1.4248602390289307, |
| "learning_rate": 2.71054677377559e-06, |
| "loss": 1.7643, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.012221324929228849, |
| "grad_norm": 1.3377385139465332, |
| "learning_rate": 2.715729463591604e-06, |
| "loss": 1.7064, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.012244648068406767, |
| "grad_norm": 0.9933966994285583, |
| "learning_rate": 2.720912153407619e-06, |
| "loss": 1.7187, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.012267971207584685, |
| "grad_norm": 1.018750548362732, |
| "learning_rate": 2.7260948432236333e-06, |
| "loss": 1.5915, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.012291294346762602, |
| "grad_norm": 1.356325387954712, |
| "learning_rate": 2.731277533039648e-06, |
| "loss": 1.7193, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.01231461748594052, |
| "grad_norm": 1.2781217098236084, |
| "learning_rate": 2.7364602228556624e-06, |
| "loss": 1.5494, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.012337940625118438, |
| "grad_norm": 1.561498761177063, |
| "learning_rate": 2.741642912671677e-06, |
| "loss": 1.6972, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.012361263764296356, |
| "grad_norm": 1.1695748567581177, |
| "learning_rate": 2.7468256024876915e-06, |
| "loss": 2.1633, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.012384586903474272, |
| "grad_norm": 1.4304964542388916, |
| "learning_rate": 2.7520082923037054e-06, |
| "loss": 1.6321, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.01240791004265219, |
| "grad_norm": 1.0513828992843628, |
| "learning_rate": 2.7571909821197202e-06, |
| "loss": 1.2897, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.012431233181830108, |
| "grad_norm": 1.0206960439682007, |
| "learning_rate": 2.7623736719357346e-06, |
| "loss": 1.7842, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.012454556321008026, |
| "grad_norm": 1.1440876722335815, |
| "learning_rate": 2.7675563617517493e-06, |
| "loss": 1.4399, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.012477879460185944, |
| "grad_norm": 1.0837441682815552, |
| "learning_rate": 2.7727390515677637e-06, |
| "loss": 1.5155, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.012501202599363862, |
| "grad_norm": 1.071378231048584, |
| "learning_rate": 2.7779217413837785e-06, |
| "loss": 1.6459, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.01252452573854178, |
| "grad_norm": 1.6966552734375, |
| "learning_rate": 2.783104431199793e-06, |
| "loss": 1.6015, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.012547848877719696, |
| "grad_norm": 1.2789183855056763, |
| "learning_rate": 2.7882871210158076e-06, |
| "loss": 1.2423, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.012571172016897614, |
| "grad_norm": 1.2072651386260986, |
| "learning_rate": 2.793469810831822e-06, |
| "loss": 1.69, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.012594495156075532, |
| "grad_norm": 1.5257117748260498, |
| "learning_rate": 2.7986525006478367e-06, |
| "loss": 1.7608, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.01261781829525345, |
| "grad_norm": 1.0233759880065918, |
| "learning_rate": 2.803835190463851e-06, |
| "loss": 1.1299, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.012641141434431367, |
| "grad_norm": 1.8280616998672485, |
| "learning_rate": 2.809017880279866e-06, |
| "loss": 1.3338, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.012664464573609285, |
| "grad_norm": 1.6891363859176636, |
| "learning_rate": 2.81420057009588e-06, |
| "loss": 1.5505, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.012687787712787203, |
| "grad_norm": 1.1501421928405762, |
| "learning_rate": 2.819383259911895e-06, |
| "loss": 1.6788, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.01271111085196512, |
| "grad_norm": 1.107029914855957, |
| "learning_rate": 2.824565949727909e-06, |
| "loss": 1.3782, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.012734433991143037, |
| "grad_norm": 0.9627429246902466, |
| "learning_rate": 2.829748639543923e-06, |
| "loss": 1.3155, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.012757757130320955, |
| "grad_norm": 2.330007791519165, |
| "learning_rate": 2.834931329359938e-06, |
| "loss": 1.425, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.012781080269498873, |
| "grad_norm": 1.4026503562927246, |
| "learning_rate": 2.8401140191759523e-06, |
| "loss": 1.5578, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.012804403408676791, |
| "grad_norm": 0.9430487155914307, |
| "learning_rate": 2.845296708991967e-06, |
| "loss": 1.6075, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.012827726547854709, |
| "grad_norm": 1.0779294967651367, |
| "learning_rate": 2.8504793988079814e-06, |
| "loss": 1.5169, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.012851049687032627, |
| "grad_norm": 1.130324125289917, |
| "learning_rate": 2.855662088623996e-06, |
| "loss": 1.5016, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.012874372826210545, |
| "grad_norm": 1.0127092599868774, |
| "learning_rate": 2.8608447784400105e-06, |
| "loss": 1.8715, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.01289769596538846, |
| "grad_norm": 1.1831302642822266, |
| "learning_rate": 2.8660274682560253e-06, |
| "loss": 1.678, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.012921019104566379, |
| "grad_norm": 1.3394455909729004, |
| "learning_rate": 2.8712101580720397e-06, |
| "loss": 1.4129, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.012944342243744297, |
| "grad_norm": 1.2189030647277832, |
| "learning_rate": 2.8763928478880544e-06, |
| "loss": 1.7364, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.012967665382922215, |
| "grad_norm": 1.2808138132095337, |
| "learning_rate": 2.8815755377040688e-06, |
| "loss": 1.6274, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.012990988522100132, |
| "grad_norm": 1.0384689569473267, |
| "learning_rate": 2.8867582275200835e-06, |
| "loss": 1.5942, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.01301431166127805, |
| "grad_norm": 1.8520807027816772, |
| "learning_rate": 2.891940917336098e-06, |
| "loss": 1.3067, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.013037634800455968, |
| "grad_norm": 1.1817374229431152, |
| "learning_rate": 2.897123607152112e-06, |
| "loss": 1.6405, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.013060957939633884, |
| "grad_norm": 1.1010823249816895, |
| "learning_rate": 2.9023062969681266e-06, |
| "loss": 1.4339, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.013084281078811802, |
| "grad_norm": 1.2461942434310913, |
| "learning_rate": 2.907488986784141e-06, |
| "loss": 1.9866, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.01310760421798972, |
| "grad_norm": 1.1503125429153442, |
| "learning_rate": 2.9126716766001557e-06, |
| "loss": 1.585, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.013130927357167638, |
| "grad_norm": 1.542434573173523, |
| "learning_rate": 2.91785436641617e-06, |
| "loss": 1.4524, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.013154250496345556, |
| "grad_norm": 1.0469673871994019, |
| "learning_rate": 2.923037056232185e-06, |
| "loss": 1.6884, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.013177573635523474, |
| "grad_norm": 1.5137437582015991, |
| "learning_rate": 2.928219746048199e-06, |
| "loss": 1.5377, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.013200896774701392, |
| "grad_norm": 1.1454534530639648, |
| "learning_rate": 2.933402435864214e-06, |
| "loss": 1.8508, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.013224219913879308, |
| "grad_norm": 1.310381531715393, |
| "learning_rate": 2.9385851256802283e-06, |
| "loss": 1.5774, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.013247543053057226, |
| "grad_norm": 1.1223838329315186, |
| "learning_rate": 2.943767815496243e-06, |
| "loss": 1.4496, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.013270866192235144, |
| "grad_norm": 1.4537910223007202, |
| "learning_rate": 2.9489505053122574e-06, |
| "loss": 1.4423, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.013294189331413062, |
| "grad_norm": 1.1783167123794556, |
| "learning_rate": 2.954133195128272e-06, |
| "loss": 1.9314, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.01331751247059098, |
| "grad_norm": 1.211719274520874, |
| "learning_rate": 2.9593158849442865e-06, |
| "loss": 1.5366, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.013340835609768897, |
| "grad_norm": 2.9552671909332275, |
| "learning_rate": 2.9644985747603004e-06, |
| "loss": 1.3431, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.013364158748946815, |
| "grad_norm": 1.2814795970916748, |
| "learning_rate": 2.9696812645763152e-06, |
| "loss": 1.3879, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.013387481888124731, |
| "grad_norm": 1.2598010301589966, |
| "learning_rate": 2.9748639543923296e-06, |
| "loss": 1.4775, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.01341080502730265, |
| "grad_norm": 1.3874925374984741, |
| "learning_rate": 2.9800466442083443e-06, |
| "loss": 1.4012, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.013434128166480567, |
| "grad_norm": 1.1846306324005127, |
| "learning_rate": 2.9852293340243587e-06, |
| "loss": 1.4491, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.013457451305658485, |
| "grad_norm": 1.388150691986084, |
| "learning_rate": 2.9904120238403734e-06, |
| "loss": 1.6913, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.013480774444836403, |
| "grad_norm": 1.8026880025863647, |
| "learning_rate": 2.995594713656388e-06, |
| "loss": 1.1754, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.013504097584014321, |
| "grad_norm": 1.9366620779037476, |
| "learning_rate": 3.0007774034724026e-06, |
| "loss": 1.4406, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.013527420723192239, |
| "grad_norm": 1.039657473564148, |
| "learning_rate": 3.005960093288417e-06, |
| "loss": 1.4823, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.013550743862370155, |
| "grad_norm": 1.0928449630737305, |
| "learning_rate": 3.0111427831044317e-06, |
| "loss": 1.4502, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.013574067001548073, |
| "grad_norm": 2.408292531967163, |
| "learning_rate": 3.016325472920446e-06, |
| "loss": 1.4778, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.01359739014072599, |
| "grad_norm": 1.2284953594207764, |
| "learning_rate": 3.021508162736461e-06, |
| "loss": 1.5887, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.013620713279903909, |
| "grad_norm": 1.3841763734817505, |
| "learning_rate": 3.026690852552475e-06, |
| "loss": 1.3778, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.013644036419081827, |
| "grad_norm": 1.305172324180603, |
| "learning_rate": 3.03187354236849e-06, |
| "loss": 1.2837, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.013667359558259744, |
| "grad_norm": 1.087904691696167, |
| "learning_rate": 3.037056232184504e-06, |
| "loss": 1.4361, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.013690682697437662, |
| "grad_norm": 1.1818716526031494, |
| "learning_rate": 3.042238922000518e-06, |
| "loss": 1.4903, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.013714005836615578, |
| "grad_norm": 0.9969412088394165, |
| "learning_rate": 3.047421611816533e-06, |
| "loss": 1.6923, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.013737328975793496, |
| "grad_norm": 1.3729232549667358, |
| "learning_rate": 3.0526043016325473e-06, |
| "loss": 1.4219, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.013760652114971414, |
| "grad_norm": 1.091769814491272, |
| "learning_rate": 3.057786991448562e-06, |
| "loss": 1.6978, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.013783975254149332, |
| "grad_norm": 1.1668254137039185, |
| "learning_rate": 3.0629696812645764e-06, |
| "loss": 1.4609, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.01380729839332725, |
| "grad_norm": 1.3739502429962158, |
| "learning_rate": 3.068152371080591e-06, |
| "loss": 1.7247, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.013830621532505168, |
| "grad_norm": 1.480758547782898, |
| "learning_rate": 3.0733350608966055e-06, |
| "loss": 1.6142, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.013853944671683086, |
| "grad_norm": 0.853581964969635, |
| "learning_rate": 3.0785177507126203e-06, |
| "loss": 1.5563, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.013877267810861002, |
| "grad_norm": 1.144692063331604, |
| "learning_rate": 3.0837004405286347e-06, |
| "loss": 1.6145, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.01390059095003892, |
| "grad_norm": 1.2413440942764282, |
| "learning_rate": 3.0888831303446494e-06, |
| "loss": 1.5762, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.013923914089216838, |
| "grad_norm": 1.147834062576294, |
| "learning_rate": 3.0940658201606638e-06, |
| "loss": 1.4478, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.013947237228394756, |
| "grad_norm": 1.0349398851394653, |
| "learning_rate": 3.0992485099766785e-06, |
| "loss": 1.612, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.013970560367572674, |
| "grad_norm": 1.4780391454696655, |
| "learning_rate": 3.104431199792693e-06, |
| "loss": 1.5179, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.013993883506750592, |
| "grad_norm": 1.1395933628082275, |
| "learning_rate": 3.109613889608707e-06, |
| "loss": 1.4845, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.01401720664592851, |
| "grad_norm": 1.37168550491333, |
| "learning_rate": 3.1147965794247216e-06, |
| "loss": 1.581, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.014040529785106426, |
| "grad_norm": 1.8260347843170166, |
| "learning_rate": 3.119979269240736e-06, |
| "loss": 1.1221, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.014063852924284343, |
| "grad_norm": 2.5528669357299805, |
| "learning_rate": 3.1251619590567507e-06, |
| "loss": 1.255, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.014087176063462261, |
| "grad_norm": 1.3272032737731934, |
| "learning_rate": 3.130344648872765e-06, |
| "loss": 1.2713, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.01411049920264018, |
| "grad_norm": 1.147449254989624, |
| "learning_rate": 3.13552733868878e-06, |
| "loss": 1.3694, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.014133822341818097, |
| "grad_norm": 1.173793077468872, |
| "learning_rate": 3.140710028504794e-06, |
| "loss": 1.5818, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.014157145480996015, |
| "grad_norm": 1.2347713708877563, |
| "learning_rate": 3.145892718320809e-06, |
| "loss": 1.501, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.014180468620173933, |
| "grad_norm": 1.3945446014404297, |
| "learning_rate": 3.1510754081368233e-06, |
| "loss": 1.8674, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.01420379175935185, |
| "grad_norm": 1.239762544631958, |
| "learning_rate": 3.156258097952838e-06, |
| "loss": 1.2516, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.014227114898529767, |
| "grad_norm": 1.552531361579895, |
| "learning_rate": 3.1614407877688524e-06, |
| "loss": 1.5358, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.014250438037707685, |
| "grad_norm": 1.576997995376587, |
| "learning_rate": 3.166623477584867e-06, |
| "loss": 1.7601, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.014273761176885603, |
| "grad_norm": 1.3251402378082275, |
| "learning_rate": 3.1718061674008815e-06, |
| "loss": 1.2758, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.01429708431606352, |
| "grad_norm": 1.2837574481964111, |
| "learning_rate": 3.1769888572168963e-06, |
| "loss": 1.528, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.014320407455241439, |
| "grad_norm": 0.9697505831718445, |
| "learning_rate": 3.1821715470329102e-06, |
| "loss": 1.6359, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.014343730594419356, |
| "grad_norm": 1.2682685852050781, |
| "learning_rate": 3.1873542368489246e-06, |
| "loss": 1.4759, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.014367053733597274, |
| "grad_norm": 0.9607746005058289, |
| "learning_rate": 3.1925369266649393e-06, |
| "loss": 1.7474, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.01439037687277519, |
| "grad_norm": 1.056736946105957, |
| "learning_rate": 3.1977196164809537e-06, |
| "loss": 1.8812, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.014413700011953108, |
| "grad_norm": 1.1990852355957031, |
| "learning_rate": 3.2029023062969684e-06, |
| "loss": 1.6217, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.014437023151131026, |
| "grad_norm": 1.1339764595031738, |
| "learning_rate": 3.208084996112983e-06, |
| "loss": 1.3557, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.014460346290308944, |
| "grad_norm": 1.0672523975372314, |
| "learning_rate": 3.2132676859289976e-06, |
| "loss": 1.8239, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.014483669429486862, |
| "grad_norm": 1.4371954202651978, |
| "learning_rate": 3.218450375745012e-06, |
| "loss": 1.4571, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.01450699256866478, |
| "grad_norm": 1.9893105030059814, |
| "learning_rate": 3.2236330655610267e-06, |
| "loss": 1.3716, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.014530315707842698, |
| "grad_norm": 1.7084318399429321, |
| "learning_rate": 3.228815755377041e-06, |
| "loss": 1.5201, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.014553638847020614, |
| "grad_norm": 1.308225154876709, |
| "learning_rate": 3.233998445193056e-06, |
| "loss": 1.9173, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.014576961986198532, |
| "grad_norm": 0.9914215803146362, |
| "learning_rate": 3.23918113500907e-06, |
| "loss": 1.7351, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.01460028512537645, |
| "grad_norm": 1.0292766094207764, |
| "learning_rate": 3.244363824825085e-06, |
| "loss": 1.4073, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.014623608264554368, |
| "grad_norm": 1.0998982191085815, |
| "learning_rate": 3.2495465146410993e-06, |
| "loss": 1.5979, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.014646931403732286, |
| "grad_norm": 1.1409685611724854, |
| "learning_rate": 3.254729204457113e-06, |
| "loss": 1.3442, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.014670254542910204, |
| "grad_norm": 1.7685736417770386, |
| "learning_rate": 3.259911894273128e-06, |
| "loss": 1.251, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.014693577682088121, |
| "grad_norm": 1.6536918878555298, |
| "learning_rate": 3.2650945840891423e-06, |
| "loss": 1.4698, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.014716900821266038, |
| "grad_norm": 2.046391248703003, |
| "learning_rate": 3.270277273905157e-06, |
| "loss": 1.5142, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.014740223960443955, |
| "grad_norm": 1.3458948135375977, |
| "learning_rate": 3.2754599637211714e-06, |
| "loss": 1.3999, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.014763547099621873, |
| "grad_norm": 1.7265046834945679, |
| "learning_rate": 3.280642653537186e-06, |
| "loss": 1.2212, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.014786870238799791, |
| "grad_norm": 1.3191124200820923, |
| "learning_rate": 3.2858253433532005e-06, |
| "loss": 1.4354, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.01481019337797771, |
| "grad_norm": 1.2317379713058472, |
| "learning_rate": 3.2910080331692153e-06, |
| "loss": 1.5661, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.014833516517155627, |
| "grad_norm": 1.400969386100769, |
| "learning_rate": 3.2961907229852297e-06, |
| "loss": 1.462, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.014856839656333545, |
| "grad_norm": 2.060718059539795, |
| "learning_rate": 3.3013734128012444e-06, |
| "loss": 1.7522, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.014880162795511461, |
| "grad_norm": 1.138715386390686, |
| "learning_rate": 3.3065561026172588e-06, |
| "loss": 1.4923, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.014903485934689379, |
| "grad_norm": 1.1973599195480347, |
| "learning_rate": 3.3117387924332735e-06, |
| "loss": 1.4462, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.014926809073867297, |
| "grad_norm": 1.266867756843567, |
| "learning_rate": 3.316921482249288e-06, |
| "loss": 1.3159, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.014950132213045215, |
| "grad_norm": 3.4681708812713623, |
| "learning_rate": 3.322104172065302e-06, |
| "loss": 1.3566, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.014973455352223133, |
| "grad_norm": 1.248502492904663, |
| "learning_rate": 3.3272868618813166e-06, |
| "loss": 1.6299, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.01499677849140105, |
| "grad_norm": 1.561563491821289, |
| "learning_rate": 3.332469551697331e-06, |
| "loss": 1.3246, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.015020101630578968, |
| "grad_norm": 1.1922053098678589, |
| "learning_rate": 3.3376522415133457e-06, |
| "loss": 1.6847, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.015043424769756885, |
| "grad_norm": 1.0779014825820923, |
| "learning_rate": 3.34283493132936e-06, |
| "loss": 1.8025, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.015066747908934803, |
| "grad_norm": 1.5236597061157227, |
| "learning_rate": 3.348017621145375e-06, |
| "loss": 1.3894, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.01509007104811272, |
| "grad_norm": 1.2087934017181396, |
| "learning_rate": 3.353200310961389e-06, |
| "loss": 1.9119, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.015113394187290638, |
| "grad_norm": 1.435085654258728, |
| "learning_rate": 3.358383000777404e-06, |
| "loss": 1.4334, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.015136717326468556, |
| "grad_norm": 1.3662467002868652, |
| "learning_rate": 3.3635656905934183e-06, |
| "loss": 1.6717, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.015160040465646474, |
| "grad_norm": 1.379262924194336, |
| "learning_rate": 3.368748380409433e-06, |
| "loss": 1.0914, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.015183363604824392, |
| "grad_norm": 1.436503529548645, |
| "learning_rate": 3.3739310702254474e-06, |
| "loss": 1.296, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.015206686744002308, |
| "grad_norm": 1.0189919471740723, |
| "learning_rate": 3.379113760041462e-06, |
| "loss": 1.5578, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.015230009883180226, |
| "grad_norm": 1.3371915817260742, |
| "learning_rate": 3.3842964498574765e-06, |
| "loss": 1.3883, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.015253333022358144, |
| "grad_norm": 1.152949333190918, |
| "learning_rate": 3.389479139673491e-06, |
| "loss": 1.3408, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.015276656161536062, |
| "grad_norm": 0.865856945514679, |
| "learning_rate": 3.3946618294895052e-06, |
| "loss": 1.8154, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.01529997930071398, |
| "grad_norm": 1.3607538938522339, |
| "learning_rate": 3.3998445193055196e-06, |
| "loss": 1.5139, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.015323302439891898, |
| "grad_norm": 1.0469399690628052, |
| "learning_rate": 3.4050272091215343e-06, |
| "loss": 1.4246, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.015346625579069816, |
| "grad_norm": 1.2417982816696167, |
| "learning_rate": 3.4102098989375487e-06, |
| "loss": 1.4392, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.015369948718247732, |
| "grad_norm": 2.018418073654175, |
| "learning_rate": 3.4153925887535634e-06, |
| "loss": 1.5175, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.01539327185742565, |
| "grad_norm": 1.2593055963516235, |
| "learning_rate": 3.420575278569578e-06, |
| "loss": 1.6338, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.015416594996603568, |
| "grad_norm": 1.0297298431396484, |
| "learning_rate": 3.4257579683855926e-06, |
| "loss": 1.6309, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.015439918135781485, |
| "grad_norm": 1.2963732481002808, |
| "learning_rate": 3.430940658201607e-06, |
| "loss": 1.3099, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.015463241274959403, |
| "grad_norm": 1.0868266820907593, |
| "learning_rate": 3.4361233480176217e-06, |
| "loss": 1.4949, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.015486564414137321, |
| "grad_norm": 1.156296968460083, |
| "learning_rate": 3.441306037833636e-06, |
| "loss": 1.7845, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.015509887553315239, |
| "grad_norm": 1.412965178489685, |
| "learning_rate": 3.446488727649651e-06, |
| "loss": 1.19, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.015533210692493155, |
| "grad_norm": 1.0419931411743164, |
| "learning_rate": 3.451671417465665e-06, |
| "loss": 1.7125, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.015556533831671073, |
| "grad_norm": 1.035372018814087, |
| "learning_rate": 3.4568541072816795e-06, |
| "loss": 1.7003, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.015579856970848991, |
| "grad_norm": 1.1559805870056152, |
| "learning_rate": 3.4620367970976943e-06, |
| "loss": 1.981, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.015603180110026909, |
| "grad_norm": 0.8634515404701233, |
| "learning_rate": 3.467219486913708e-06, |
| "loss": 1.2609, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.015626503249204827, |
| "grad_norm": 1.1953692436218262, |
| "learning_rate": 3.472402176729723e-06, |
| "loss": 1.3956, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.015649826388382745, |
| "grad_norm": 0.9668301939964294, |
| "learning_rate": 3.4775848665457373e-06, |
| "loss": 1.0568, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.015673149527560663, |
| "grad_norm": 2.4868035316467285, |
| "learning_rate": 3.482767556361752e-06, |
| "loss": 1.364, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.01569647266673858, |
| "grad_norm": 1.4255839586257935, |
| "learning_rate": 3.4879502461777664e-06, |
| "loss": 1.5207, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.0157197958059165, |
| "grad_norm": 1.2752389907836914, |
| "learning_rate": 3.493132935993781e-06, |
| "loss": 1.5141, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.015743118945094416, |
| "grad_norm": 1.2186245918273926, |
| "learning_rate": 3.4983156258097955e-06, |
| "loss": 1.3655, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.015766442084272334, |
| "grad_norm": 1.3544304370880127, |
| "learning_rate": 3.5034983156258103e-06, |
| "loss": 1.7428, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.01578976522345025, |
| "grad_norm": 1.0968130826950073, |
| "learning_rate": 3.5086810054418247e-06, |
| "loss": 1.3491, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.015813088362628167, |
| "grad_norm": 1.1593806743621826, |
| "learning_rate": 3.513863695257839e-06, |
| "loss": 1.6708, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.015836411501806084, |
| "grad_norm": 1.0408954620361328, |
| "learning_rate": 3.5190463850738538e-06, |
| "loss": 1.6977, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.015859734640984002, |
| "grad_norm": 1.196632742881775, |
| "learning_rate": 3.524229074889868e-06, |
| "loss": 1.2019, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.01588305778016192, |
| "grad_norm": 1.2698166370391846, |
| "learning_rate": 3.529411764705883e-06, |
| "loss": 1.8457, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.015906380919339838, |
| "grad_norm": 0.9075011014938354, |
| "learning_rate": 3.5345944545218972e-06, |
| "loss": 1.2717, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.015929704058517756, |
| "grad_norm": 1.0426501035690308, |
| "learning_rate": 3.5397771443379116e-06, |
| "loss": 1.6601, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.015953027197695674, |
| "grad_norm": 1.4904205799102783, |
| "learning_rate": 3.544959834153926e-06, |
| "loss": 1.6324, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.015976350336873592, |
| "grad_norm": 1.0664643049240112, |
| "learning_rate": 3.5501425239699407e-06, |
| "loss": 1.4896, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.01599967347605151, |
| "grad_norm": 1.3758978843688965, |
| "learning_rate": 3.555325213785955e-06, |
| "loss": 1.5457, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.016022996615229428, |
| "grad_norm": 1.4759879112243652, |
| "learning_rate": 3.56050790360197e-06, |
| "loss": 1.3865, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.016046319754407345, |
| "grad_norm": 1.4678733348846436, |
| "learning_rate": 3.565690593417984e-06, |
| "loss": 1.223, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.016069642893585263, |
| "grad_norm": 1.2057251930236816, |
| "learning_rate": 3.570873283233999e-06, |
| "loss": 1.4864, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.01609296603276318, |
| "grad_norm": 1.3976320028305054, |
| "learning_rate": 3.5760559730500133e-06, |
| "loss": 1.3371, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.016116289171941096, |
| "grad_norm": 1.0588197708129883, |
| "learning_rate": 3.5812386628660276e-06, |
| "loss": 1.264, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.016139612311119014, |
| "grad_norm": 0.891678512096405, |
| "learning_rate": 3.5864213526820424e-06, |
| "loss": 1.6566, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.01616293545029693, |
| "grad_norm": 1.1149228811264038, |
| "learning_rate": 3.5916040424980567e-06, |
| "loss": 1.6862, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.01618625858947485, |
| "grad_norm": 1.463218331336975, |
| "learning_rate": 3.5967867323140715e-06, |
| "loss": 1.5771, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.016209581728652767, |
| "grad_norm": 1.291648030281067, |
| "learning_rate": 3.601969422130086e-06, |
| "loss": 1.443, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.016232904867830685, |
| "grad_norm": 1.1534149646759033, |
| "learning_rate": 3.6071521119461002e-06, |
| "loss": 1.76, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.016256228007008603, |
| "grad_norm": 1.3349847793579102, |
| "learning_rate": 3.6123348017621146e-06, |
| "loss": 2.0584, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.01627955114618652, |
| "grad_norm": 1.665682315826416, |
| "learning_rate": 3.6175174915781293e-06, |
| "loss": 1.5989, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.01630287428536444, |
| "grad_norm": 1.6486263275146484, |
| "learning_rate": 3.6227001813941437e-06, |
| "loss": 1.7698, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.016326197424542357, |
| "grad_norm": 1.5153722763061523, |
| "learning_rate": 3.6278828712101584e-06, |
| "loss": 1.3312, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.016349520563720275, |
| "grad_norm": 1.3090248107910156, |
| "learning_rate": 3.633065561026173e-06, |
| "loss": 1.0735, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.016372843702898193, |
| "grad_norm": 1.5462753772735596, |
| "learning_rate": 3.6382482508421876e-06, |
| "loss": 1.5408, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.01639616684207611, |
| "grad_norm": 1.3447730541229248, |
| "learning_rate": 3.643430940658202e-06, |
| "loss": 1.5295, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.01641948998125403, |
| "grad_norm": 1.232865571975708, |
| "learning_rate": 3.6486136304742163e-06, |
| "loss": 1.8686, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.016442813120431946, |
| "grad_norm": 0.9742329120635986, |
| "learning_rate": 3.653796320290231e-06, |
| "loss": 1.5951, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.01646613625960986, |
| "grad_norm": 1.1572047472000122, |
| "learning_rate": 3.6589790101062454e-06, |
| "loss": 1.5068, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.01648945939878778, |
| "grad_norm": 1.2024304866790771, |
| "learning_rate": 3.66416169992226e-06, |
| "loss": 1.3933, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.016512782537965696, |
| "grad_norm": 2.442342758178711, |
| "learning_rate": 3.6693443897382745e-06, |
| "loss": 1.0126, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.016536105677143614, |
| "grad_norm": 1.2786589860916138, |
| "learning_rate": 3.6745270795542893e-06, |
| "loss": 1.6902, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.016559428816321532, |
| "grad_norm": 0.9200882315635681, |
| "learning_rate": 3.679709769370303e-06, |
| "loss": 1.3918, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.01658275195549945, |
| "grad_norm": 1.3768819570541382, |
| "learning_rate": 3.684892459186318e-06, |
| "loss": 1.6518, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.016606075094677368, |
| "grad_norm": 1.274484395980835, |
| "learning_rate": 3.6900751490023323e-06, |
| "loss": 1.3728, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.016629398233855286, |
| "grad_norm": 1.1752501726150513, |
| "learning_rate": 3.695257838818347e-06, |
| "loss": 1.4234, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.016652721373033204, |
| "grad_norm": 1.4458903074264526, |
| "learning_rate": 3.7004405286343614e-06, |
| "loss": 1.5695, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.01667604451221112, |
| "grad_norm": 1.2630547285079956, |
| "learning_rate": 3.705623218450376e-06, |
| "loss": 1.5334, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.01669936765138904, |
| "grad_norm": 1.3754082918167114, |
| "learning_rate": 3.7108059082663905e-06, |
| "loss": 1.4807, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.016722690790566958, |
| "grad_norm": 1.4704689979553223, |
| "learning_rate": 3.715988598082405e-06, |
| "loss": 1.5409, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.016746013929744875, |
| "grad_norm": 1.4692633152008057, |
| "learning_rate": 3.7211712878984197e-06, |
| "loss": 1.5922, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.016769337068922793, |
| "grad_norm": 1.2148405313491821, |
| "learning_rate": 3.726353977714434e-06, |
| "loss": 1.8115, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.016792660208100708, |
| "grad_norm": 1.5564905405044556, |
| "learning_rate": 3.7315366675304488e-06, |
| "loss": 1.4189, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.016815983347278626, |
| "grad_norm": 1.130292296409607, |
| "learning_rate": 3.736719357346463e-06, |
| "loss": 1.4455, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.016839306486456544, |
| "grad_norm": 2.0609545707702637, |
| "learning_rate": 3.741902047162478e-06, |
| "loss": 1.6052, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.01686262962563446, |
| "grad_norm": 1.0422543287277222, |
| "learning_rate": 3.7470847369784922e-06, |
| "loss": 1.5889, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.01688595276481238, |
| "grad_norm": 1.7926782369613647, |
| "learning_rate": 3.7522674267945066e-06, |
| "loss": 1.2304, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.016909275903990297, |
| "grad_norm": 1.2486250400543213, |
| "learning_rate": 3.757450116610521e-06, |
| "loss": 1.7512, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.016932599043168215, |
| "grad_norm": 1.6907048225402832, |
| "learning_rate": 3.7626328064265357e-06, |
| "loss": 1.2031, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.016955922182346133, |
| "grad_norm": 1.2899296283721924, |
| "learning_rate": 3.76781549624255e-06, |
| "loss": 1.3111, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.01697924532152405, |
| "grad_norm": 2.320288896560669, |
| "learning_rate": 3.7729981860585644e-06, |
| "loss": 1.2764, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.01700256846070197, |
| "grad_norm": 1.4165383577346802, |
| "learning_rate": 3.778180875874579e-06, |
| "loss": 1.2847, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.017025891599879887, |
| "grad_norm": 1.1537601947784424, |
| "learning_rate": 3.7833635656905935e-06, |
| "loss": 1.6002, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.017049214739057805, |
| "grad_norm": 1.3128899335861206, |
| "learning_rate": 3.7885462555066083e-06, |
| "loss": 1.4159, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.017072537878235722, |
| "grad_norm": 0.9494642615318298, |
| "learning_rate": 3.7937289453226226e-06, |
| "loss": 1.5425, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.01709586101741364, |
| "grad_norm": 1.8949923515319824, |
| "learning_rate": 3.7989116351386374e-06, |
| "loss": 1.109, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.017119184156591555, |
| "grad_norm": 1.3136776685714722, |
| "learning_rate": 3.8040943249546517e-06, |
| "loss": 1.4208, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.017142507295769473, |
| "grad_norm": 1.0108048915863037, |
| "learning_rate": 3.8092770147706665e-06, |
| "loss": 1.3101, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.01716583043494739, |
| "grad_norm": 1.1397989988327026, |
| "learning_rate": 3.814459704586681e-06, |
| "loss": 1.6643, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.01718915357412531, |
| "grad_norm": 0.9662717580795288, |
| "learning_rate": 3.819642394402696e-06, |
| "loss": 1.5524, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.017212476713303226, |
| "grad_norm": 1.5264514684677124, |
| "learning_rate": 3.82482508421871e-06, |
| "loss": 1.6702, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.017235799852481144, |
| "grad_norm": 1.1797709465026855, |
| "learning_rate": 3.830007774034724e-06, |
| "loss": 1.5751, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.017259122991659062, |
| "grad_norm": 1.3964486122131348, |
| "learning_rate": 3.835190463850739e-06, |
| "loss": 1.3497, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.01728244613083698, |
| "grad_norm": 1.0540798902511597, |
| "learning_rate": 3.840373153666753e-06, |
| "loss": 1.623, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.017305769270014898, |
| "grad_norm": 1.8619107007980347, |
| "learning_rate": 3.845555843482767e-06, |
| "loss": 1.836, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.017329092409192816, |
| "grad_norm": 1.190048098564148, |
| "learning_rate": 3.8507385332987826e-06, |
| "loss": 1.6031, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.017352415548370734, |
| "grad_norm": 1.32784903049469, |
| "learning_rate": 3.855921223114797e-06, |
| "loss": 1.6144, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.01737573868754865, |
| "grad_norm": 1.7393810749053955, |
| "learning_rate": 3.861103912930811e-06, |
| "loss": 1.4898, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.01739906182672657, |
| "grad_norm": 1.008122444152832, |
| "learning_rate": 3.866286602746826e-06, |
| "loss": 1.6506, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.017422384965904487, |
| "grad_norm": 1.3282239437103271, |
| "learning_rate": 3.871469292562841e-06, |
| "loss": 1.5178, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.017445708105082402, |
| "grad_norm": 1.4479358196258545, |
| "learning_rate": 3.876651982378855e-06, |
| "loss": 1.5896, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.01746903124426032, |
| "grad_norm": 1.9100661277770996, |
| "learning_rate": 3.8818346721948695e-06, |
| "loss": 1.2946, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.017492354383438238, |
| "grad_norm": 1.269235610961914, |
| "learning_rate": 3.887017362010884e-06, |
| "loss": 1.5707, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.017515677522616156, |
| "grad_norm": 1.3187369108200073, |
| "learning_rate": 3.892200051826899e-06, |
| "loss": 1.8153, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.017539000661794073, |
| "grad_norm": 1.3091131448745728, |
| "learning_rate": 3.8973827416429125e-06, |
| "loss": 1.5973, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.01756232380097199, |
| "grad_norm": 1.4826890230178833, |
| "learning_rate": 3.902565431458927e-06, |
| "loss": 1.3277, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.01758564694014991, |
| "grad_norm": 1.2626949548721313, |
| "learning_rate": 3.907748121274942e-06, |
| "loss": 1.5531, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.017608970079327827, |
| "grad_norm": 1.1990412473678589, |
| "learning_rate": 3.912930811090956e-06, |
| "loss": 1.349, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.017632293218505745, |
| "grad_norm": 1.3036906719207764, |
| "learning_rate": 3.918113500906971e-06, |
| "loss": 1.5648, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.017655616357683663, |
| "grad_norm": 1.3129525184631348, |
| "learning_rate": 3.923296190722985e-06, |
| "loss": 1.7147, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.01767893949686158, |
| "grad_norm": 1.4686280488967896, |
| "learning_rate": 3.928478880539e-06, |
| "loss": 1.6136, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.0177022626360395, |
| "grad_norm": 1.6845604181289673, |
| "learning_rate": 3.933661570355015e-06, |
| "loss": 1.763, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.017725585775217417, |
| "grad_norm": 2.019049644470215, |
| "learning_rate": 3.938844260171029e-06, |
| "loss": 1.2543, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.017748908914395334, |
| "grad_norm": 1.4184072017669678, |
| "learning_rate": 3.944026949987043e-06, |
| "loss": 1.596, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.017772232053573252, |
| "grad_norm": 1.127982497215271, |
| "learning_rate": 3.9492096398030585e-06, |
| "loss": 1.5485, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.017795555192751167, |
| "grad_norm": 1.5097321271896362, |
| "learning_rate": 3.954392329619073e-06, |
| "loss": 1.5452, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.017818878331929085, |
| "grad_norm": 1.3832807540893555, |
| "learning_rate": 3.959575019435087e-06, |
| "loss": 1.3865, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.017842201471107003, |
| "grad_norm": 1.065623164176941, |
| "learning_rate": 3.964757709251102e-06, |
| "loss": 1.2218, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.01786552461028492, |
| "grad_norm": 1.2190065383911133, |
| "learning_rate": 3.969940399067116e-06, |
| "loss": 1.2169, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.01788884774946284, |
| "grad_norm": 1.741749882698059, |
| "learning_rate": 3.97512308888313e-06, |
| "loss": 1.7316, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.017912170888640756, |
| "grad_norm": 1.2072060108184814, |
| "learning_rate": 3.980305778699145e-06, |
| "loss": 1.815, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.017935494027818674, |
| "grad_norm": 1.4645625352859497, |
| "learning_rate": 3.98548846851516e-06, |
| "loss": 1.2218, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.017958817166996592, |
| "grad_norm": 1.4466350078582764, |
| "learning_rate": 3.990671158331174e-06, |
| "loss": 1.7291, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.01798214030617451, |
| "grad_norm": 1.364358901977539, |
| "learning_rate": 3.9958538481471885e-06, |
| "loss": 1.6527, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.018005463445352428, |
| "grad_norm": 1.2262394428253174, |
| "learning_rate": 4.001036537963203e-06, |
| "loss": 1.5522, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.018028786584530346, |
| "grad_norm": 1.694001317024231, |
| "learning_rate": 4.006219227779218e-06, |
| "loss": 1.5791, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.018052109723708264, |
| "grad_norm": 0.7941157817840576, |
| "learning_rate": 4.011401917595232e-06, |
| "loss": 1.23, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.01807543286288618, |
| "grad_norm": 1.1942747831344604, |
| "learning_rate": 4.016584607411247e-06, |
| "loss": 1.4316, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.0180987560020641, |
| "grad_norm": 1.5809072256088257, |
| "learning_rate": 4.021767297227261e-06, |
| "loss": 1.7361, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.018122079141242014, |
| "grad_norm": 1.2918401956558228, |
| "learning_rate": 4.026949987043276e-06, |
| "loss": 1.3285, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.018145402280419932, |
| "grad_norm": 1.966123342514038, |
| "learning_rate": 4.032132676859291e-06, |
| "loss": 1.2037, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.01816872541959785, |
| "grad_norm": 1.3362590074539185, |
| "learning_rate": 4.037315366675304e-06, |
| "loss": 1.3811, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.018192048558775768, |
| "grad_norm": 1.0375605821609497, |
| "learning_rate": 4.042498056491319e-06, |
| "loss": 1.481, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.018215371697953685, |
| "grad_norm": 2.414684295654297, |
| "learning_rate": 4.047680746307334e-06, |
| "loss": 1.773, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.018238694837131603, |
| "grad_norm": 1.2252676486968994, |
| "learning_rate": 4.052863436123348e-06, |
| "loss": 1.514, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.01826201797630952, |
| "grad_norm": 1.517791748046875, |
| "learning_rate": 4.058046125939362e-06, |
| "loss": 1.3442, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.01828534111548744, |
| "grad_norm": 1.0303611755371094, |
| "learning_rate": 4.0632288157553776e-06, |
| "loss": 1.5593, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.018308664254665357, |
| "grad_norm": 1.3615033626556396, |
| "learning_rate": 4.068411505571392e-06, |
| "loss": 1.6971, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.018331987393843275, |
| "grad_norm": 1.1224147081375122, |
| "learning_rate": 4.073594195387406e-06, |
| "loss": 1.2134, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.018355310533021193, |
| "grad_norm": 1.3592679500579834, |
| "learning_rate": 4.078776885203421e-06, |
| "loss": 1.7391, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.01837863367219911, |
| "grad_norm": 1.6286187171936035, |
| "learning_rate": 4.083959575019436e-06, |
| "loss": 1.7279, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.01840195681137703, |
| "grad_norm": 1.2597742080688477, |
| "learning_rate": 4.08914226483545e-06, |
| "loss": 1.5227, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.018425279950554947, |
| "grad_norm": 1.2776849269866943, |
| "learning_rate": 4.0943249546514645e-06, |
| "loss": 1.3575, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.01844860308973286, |
| "grad_norm": 1.2529163360595703, |
| "learning_rate": 4.099507644467479e-06, |
| "loss": 1.6356, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.01847192622891078, |
| "grad_norm": 1.184187650680542, |
| "learning_rate": 4.104690334283494e-06, |
| "loss": 1.734, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.018495249368088697, |
| "grad_norm": 1.176222562789917, |
| "learning_rate": 4.1098730240995075e-06, |
| "loss": 1.5206, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.018518572507266615, |
| "grad_norm": 1.0694701671600342, |
| "learning_rate": 4.115055713915522e-06, |
| "loss": 1.1824, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.018541895646444533, |
| "grad_norm": 1.5169551372528076, |
| "learning_rate": 4.120238403731537e-06, |
| "loss": 1.3817, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.01856521878562245, |
| "grad_norm": 1.0996246337890625, |
| "learning_rate": 4.125421093547551e-06, |
| "loss": 1.0921, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.01858854192480037, |
| "grad_norm": 1.0202140808105469, |
| "learning_rate": 4.130603783363566e-06, |
| "loss": 1.2687, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.018611865063978286, |
| "grad_norm": 2.089864730834961, |
| "learning_rate": 4.13578647317958e-06, |
| "loss": 1.5417, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.018635188203156204, |
| "grad_norm": 1.1465847492218018, |
| "learning_rate": 4.140969162995595e-06, |
| "loss": 1.3415, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.018658511342334122, |
| "grad_norm": 1.1085565090179443, |
| "learning_rate": 4.14615185281161e-06, |
| "loss": 1.4662, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.01868183448151204, |
| "grad_norm": 1.2206768989562988, |
| "learning_rate": 4.151334542627624e-06, |
| "loss": 1.4954, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.018705157620689958, |
| "grad_norm": 1.1540756225585938, |
| "learning_rate": 4.156517232443638e-06, |
| "loss": 1.4953, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.018728480759867876, |
| "grad_norm": 1.9667025804519653, |
| "learning_rate": 4.1616999222596535e-06, |
| "loss": 1.1834, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.018751803899045794, |
| "grad_norm": 1.2202988862991333, |
| "learning_rate": 4.166882612075668e-06, |
| "loss": 1.7045, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.018775127038223708, |
| "grad_norm": 1.2399123907089233, |
| "learning_rate": 4.172065301891682e-06, |
| "loss": 1.4937, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.018798450177401626, |
| "grad_norm": 1.5780203342437744, |
| "learning_rate": 4.177247991707697e-06, |
| "loss": 1.6386, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.018821773316579544, |
| "grad_norm": 1.524564266204834, |
| "learning_rate": 4.182430681523711e-06, |
| "loss": 1.4951, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.01884509645575746, |
| "grad_norm": 1.342991590499878, |
| "learning_rate": 4.187613371339725e-06, |
| "loss": 1.3007, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.01886841959493538, |
| "grad_norm": 1.320813775062561, |
| "learning_rate": 4.19279606115574e-06, |
| "loss": 1.2112, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.018891742734113297, |
| "grad_norm": 1.2329927682876587, |
| "learning_rate": 4.197978750971755e-06, |
| "loss": 1.333, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.018915065873291215, |
| "grad_norm": 1.3429094552993774, |
| "learning_rate": 4.203161440787769e-06, |
| "loss": 1.4805, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.018938389012469133, |
| "grad_norm": 1.643641710281372, |
| "learning_rate": 4.2083441306037835e-06, |
| "loss": 1.5665, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.01896171215164705, |
| "grad_norm": 1.111887812614441, |
| "learning_rate": 4.213526820419798e-06, |
| "loss": 1.6087, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.01898503529082497, |
| "grad_norm": 1.3594610691070557, |
| "learning_rate": 4.218709510235813e-06, |
| "loss": 1.7666, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.019008358430002887, |
| "grad_norm": 1.2298046350479126, |
| "learning_rate": 4.223892200051827e-06, |
| "loss": 1.5032, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.019031681569180805, |
| "grad_norm": 1.2679171562194824, |
| "learning_rate": 4.229074889867842e-06, |
| "loss": 1.4375, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.019055004708358723, |
| "grad_norm": 1.0543935298919678, |
| "learning_rate": 4.234257579683856e-06, |
| "loss": 1.6645, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.01907832784753664, |
| "grad_norm": 1.2821168899536133, |
| "learning_rate": 4.239440269499871e-06, |
| "loss": 1.1945, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.01910165098671456, |
| "grad_norm": 1.5575084686279297, |
| "learning_rate": 4.244622959315886e-06, |
| "loss": 1.3262, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.019124974125892473, |
| "grad_norm": 1.2359989881515503, |
| "learning_rate": 4.2498056491319e-06, |
| "loss": 1.4127, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.01914829726507039, |
| "grad_norm": 1.0559273958206177, |
| "learning_rate": 4.254988338947914e-06, |
| "loss": 1.4455, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.01917162040424831, |
| "grad_norm": 1.3651732206344604, |
| "learning_rate": 4.260171028763929e-06, |
| "loss": 1.245, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.019194943543426227, |
| "grad_norm": 1.0067932605743408, |
| "learning_rate": 4.265353718579943e-06, |
| "loss": 1.4954, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.019218266682604145, |
| "grad_norm": 1.7477822303771973, |
| "learning_rate": 4.270536408395957e-06, |
| "loss": 1.8164, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.019241589821782062, |
| "grad_norm": 1.1976604461669922, |
| "learning_rate": 4.2757190982119726e-06, |
| "loss": 1.4552, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.01926491296095998, |
| "grad_norm": 1.306269884109497, |
| "learning_rate": 4.280901788027987e-06, |
| "loss": 1.6348, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.019288236100137898, |
| "grad_norm": 1.5786314010620117, |
| "learning_rate": 4.286084477844001e-06, |
| "loss": 1.4592, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.019311559239315816, |
| "grad_norm": 1.4481762647628784, |
| "learning_rate": 4.291267167660016e-06, |
| "loss": 1.3409, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.019334882378493734, |
| "grad_norm": 1.1410714387893677, |
| "learning_rate": 4.296449857476031e-06, |
| "loss": 1.5746, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.019358205517671652, |
| "grad_norm": 1.363434076309204, |
| "learning_rate": 4.301632547292045e-06, |
| "loss": 1.0836, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.01938152865684957, |
| "grad_norm": 1.1413646936416626, |
| "learning_rate": 4.3068152371080595e-06, |
| "loss": 1.8687, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.019404851796027488, |
| "grad_norm": 1.9734309911727905, |
| "learning_rate": 4.311997926924074e-06, |
| "loss": 1.3295, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.019428174935205406, |
| "grad_norm": 1.5119333267211914, |
| "learning_rate": 4.317180616740089e-06, |
| "loss": 1.6817, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.01945149807438332, |
| "grad_norm": 1.3933395147323608, |
| "learning_rate": 4.3223633065561025e-06, |
| "loss": 1.5288, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.019474821213561238, |
| "grad_norm": 1.3713746070861816, |
| "learning_rate": 4.327545996372117e-06, |
| "loss": 1.6361, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.019498144352739156, |
| "grad_norm": 1.1849229335784912, |
| "learning_rate": 4.332728686188132e-06, |
| "loss": 1.6611, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.019521467491917074, |
| "grad_norm": 2.122307777404785, |
| "learning_rate": 4.337911376004146e-06, |
| "loss": 1.6258, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.01954479063109499, |
| "grad_norm": 1.221781611442566, |
| "learning_rate": 4.343094065820161e-06, |
| "loss": 1.9081, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.01956811377027291, |
| "grad_norm": 1.2895511388778687, |
| "learning_rate": 4.348276755636175e-06, |
| "loss": 1.2742, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.019591436909450827, |
| "grad_norm": 1.1531336307525635, |
| "learning_rate": 4.35345944545219e-06, |
| "loss": 1.587, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.019614760048628745, |
| "grad_norm": 1.3979135751724243, |
| "learning_rate": 4.358642135268205e-06, |
| "loss": 1.5208, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.019638083187806663, |
| "grad_norm": 1.3758100271224976, |
| "learning_rate": 4.363824825084219e-06, |
| "loss": 1.246, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.01966140632698458, |
| "grad_norm": 1.3759677410125732, |
| "learning_rate": 4.369007514900233e-06, |
| "loss": 1.7344, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.0196847294661625, |
| "grad_norm": 1.5575461387634277, |
| "learning_rate": 4.3741902047162485e-06, |
| "loss": 1.5554, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.019708052605340417, |
| "grad_norm": 1.5018088817596436, |
| "learning_rate": 4.379372894532263e-06, |
| "loss": 1.3433, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.019731375744518335, |
| "grad_norm": 1.4393954277038574, |
| "learning_rate": 4.384555584348277e-06, |
| "loss": 1.7277, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.019754698883696253, |
| "grad_norm": 1.0249360799789429, |
| "learning_rate": 4.389738274164292e-06, |
| "loss": 1.6538, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.019778022022874167, |
| "grad_norm": 1.128587007522583, |
| "learning_rate": 4.394920963980306e-06, |
| "loss": 1.2935, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.019801345162052085, |
| "grad_norm": 1.301287293434143, |
| "learning_rate": 4.40010365379632e-06, |
| "loss": 1.4193, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.019824668301230003, |
| "grad_norm": 1.5180747509002686, |
| "learning_rate": 4.405286343612335e-06, |
| "loss": 1.2061, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.01984799144040792, |
| "grad_norm": 0.9110321402549744, |
| "learning_rate": 4.41046903342835e-06, |
| "loss": 1.2803, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.01987131457958584, |
| "grad_norm": 1.68843674659729, |
| "learning_rate": 4.415651723244364e-06, |
| "loss": 1.2037, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.019894637718763757, |
| "grad_norm": 1.2198610305786133, |
| "learning_rate": 4.4208344130603785e-06, |
| "loss": 1.6652, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.019917960857941674, |
| "grad_norm": 1.579087257385254, |
| "learning_rate": 4.426017102876393e-06, |
| "loss": 1.5859, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.019941283997119592, |
| "grad_norm": 1.7198874950408936, |
| "learning_rate": 4.431199792692408e-06, |
| "loss": 1.4662, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.01996460713629751, |
| "grad_norm": 2.817178726196289, |
| "learning_rate": 4.436382482508422e-06, |
| "loss": 1.3427, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.019987930275475428, |
| "grad_norm": 1.4508287906646729, |
| "learning_rate": 4.441565172324437e-06, |
| "loss": 1.2893, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.020011253414653346, |
| "grad_norm": 1.29767644405365, |
| "learning_rate": 4.446747862140451e-06, |
| "loss": 1.5759, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.020034576553831264, |
| "grad_norm": 1.84248685836792, |
| "learning_rate": 4.451930551956466e-06, |
| "loss": 2.1373, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.020057899693009182, |
| "grad_norm": 1.6153839826583862, |
| "learning_rate": 4.457113241772481e-06, |
| "loss": 1.3915, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.0200812228321871, |
| "grad_norm": 1.3203104734420776, |
| "learning_rate": 4.462295931588495e-06, |
| "loss": 1.569, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.020104545971365014, |
| "grad_norm": 1.6475995779037476, |
| "learning_rate": 4.467478621404509e-06, |
| "loss": 1.6446, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.020127869110542932, |
| "grad_norm": 1.165834665298462, |
| "learning_rate": 4.472661311220524e-06, |
| "loss": 1.7323, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.02015119224972085, |
| "grad_norm": 1.3182172775268555, |
| "learning_rate": 4.477844001036538e-06, |
| "loss": 1.6265, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.020174515388898768, |
| "grad_norm": 1.1236745119094849, |
| "learning_rate": 4.483026690852552e-06, |
| "loss": 1.2358, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.020197838528076686, |
| "grad_norm": 1.2104893922805786, |
| "learning_rate": 4.4882093806685676e-06, |
| "loss": 1.4677, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.020221161667254604, |
| "grad_norm": 1.6824678182601929, |
| "learning_rate": 4.493392070484582e-06, |
| "loss": 1.5802, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.02024448480643252, |
| "grad_norm": 1.0679930448532104, |
| "learning_rate": 4.498574760300596e-06, |
| "loss": 1.4105, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.02026780794561044, |
| "grad_norm": 1.3705253601074219, |
| "learning_rate": 4.503757450116611e-06, |
| "loss": 1.5095, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.020291131084788357, |
| "grad_norm": 1.307491660118103, |
| "learning_rate": 4.508940139932626e-06, |
| "loss": 1.3987, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.020314454223966275, |
| "grad_norm": 1.4814496040344238, |
| "learning_rate": 4.51412282974864e-06, |
| "loss": 1.635, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.020337777363144193, |
| "grad_norm": 0.935867190361023, |
| "learning_rate": 4.5193055195646545e-06, |
| "loss": 1.6734, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.02036110050232211, |
| "grad_norm": 1.3890215158462524, |
| "learning_rate": 4.524488209380669e-06, |
| "loss": 1.4458, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.02038442364150003, |
| "grad_norm": 1.628081202507019, |
| "learning_rate": 4.529670899196684e-06, |
| "loss": 1.4814, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.020407746780677947, |
| "grad_norm": 1.5255577564239502, |
| "learning_rate": 4.534853589012698e-06, |
| "loss": 1.3884, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.020431069919855865, |
| "grad_norm": 2.09283185005188, |
| "learning_rate": 4.540036278828712e-06, |
| "loss": 1.7396, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.02045439305903378, |
| "grad_norm": 0.9901561737060547, |
| "learning_rate": 4.545218968644727e-06, |
| "loss": 1.4941, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.020477716198211697, |
| "grad_norm": 1.8444923162460327, |
| "learning_rate": 4.550401658460741e-06, |
| "loss": 1.2724, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.020501039337389615, |
| "grad_norm": 1.414305567741394, |
| "learning_rate": 4.555584348276756e-06, |
| "loss": 1.5781, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.020524362476567533, |
| "grad_norm": 1.1960091590881348, |
| "learning_rate": 4.56076703809277e-06, |
| "loss": 1.536, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.02054768561574545, |
| "grad_norm": 2.241649627685547, |
| "learning_rate": 4.565949727908785e-06, |
| "loss": 1.6636, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.02057100875492337, |
| "grad_norm": 1.0672343969345093, |
| "learning_rate": 4.5711324177248e-06, |
| "loss": 1.6369, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.020594331894101287, |
| "grad_norm": 1.6761622428894043, |
| "learning_rate": 4.576315107540814e-06, |
| "loss": 1.2554, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.020617655033279204, |
| "grad_norm": 1.1365658044815063, |
| "learning_rate": 4.581497797356828e-06, |
| "loss": 1.6271, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.020640978172457122, |
| "grad_norm": 1.0631389617919922, |
| "learning_rate": 4.5866804871728435e-06, |
| "loss": 1.6393, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.02066430131163504, |
| "grad_norm": 3.27304744720459, |
| "learning_rate": 4.591863176988858e-06, |
| "loss": 1.3521, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.020687624450812958, |
| "grad_norm": 1.3354477882385254, |
| "learning_rate": 4.597045866804872e-06, |
| "loss": 1.5137, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.020710947589990876, |
| "grad_norm": 2.192812919616699, |
| "learning_rate": 4.602228556620887e-06, |
| "loss": 1.7294, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.020734270729168794, |
| "grad_norm": 0.9716669321060181, |
| "learning_rate": 4.607411246436901e-06, |
| "loss": 1.4244, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.020757593868346712, |
| "grad_norm": 1.0377227067947388, |
| "learning_rate": 4.612593936252915e-06, |
| "loss": 1.3041, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.020780917007524626, |
| "grad_norm": 1.971074104309082, |
| "learning_rate": 4.61777662606893e-06, |
| "loss": 1.4917, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.020804240146702544, |
| "grad_norm": 1.3108222484588623, |
| "learning_rate": 4.622959315884945e-06, |
| "loss": 1.5923, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.020827563285880462, |
| "grad_norm": 1.4194189310073853, |
| "learning_rate": 4.628142005700959e-06, |
| "loss": 1.2378, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.02085088642505838, |
| "grad_norm": 1.5872682332992554, |
| "learning_rate": 4.6333246955169735e-06, |
| "loss": 1.3573, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.020874209564236298, |
| "grad_norm": 1.351704716682434, |
| "learning_rate": 4.638507385332988e-06, |
| "loss": 1.8374, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.020897532703414216, |
| "grad_norm": 1.15986168384552, |
| "learning_rate": 4.643690075149003e-06, |
| "loss": 1.4303, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.020920855842592134, |
| "grad_norm": 1.912819743156433, |
| "learning_rate": 4.648872764965017e-06, |
| "loss": 1.7733, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.02094417898177005, |
| "grad_norm": 1.6582539081573486, |
| "learning_rate": 4.654055454781032e-06, |
| "loss": 1.4696, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.02096750212094797, |
| "grad_norm": 1.147661805152893, |
| "learning_rate": 4.659238144597046e-06, |
| "loss": 1.5037, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.020990825260125887, |
| "grad_norm": 1.1773402690887451, |
| "learning_rate": 4.664420834413061e-06, |
| "loss": 1.604, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.021014148399303805, |
| "grad_norm": 1.9128248691558838, |
| "learning_rate": 4.669603524229076e-06, |
| "loss": 1.3081, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.021037471538481723, |
| "grad_norm": 1.0742683410644531, |
| "learning_rate": 4.67478621404509e-06, |
| "loss": 1.5619, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.02106079467765964, |
| "grad_norm": 1.19862699508667, |
| "learning_rate": 4.679968903861104e-06, |
| "loss": 1.6896, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.02108411781683756, |
| "grad_norm": 1.276283860206604, |
| "learning_rate": 4.685151593677119e-06, |
| "loss": 1.65, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.021107440956015473, |
| "grad_norm": 1.3582435846328735, |
| "learning_rate": 4.690334283493133e-06, |
| "loss": 1.2686, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.02113076409519339, |
| "grad_norm": 1.2145341634750366, |
| "learning_rate": 4.695516973309147e-06, |
| "loss": 1.8032, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.02115408723437131, |
| "grad_norm": 1.1219233274459839, |
| "learning_rate": 4.7006996631251626e-06, |
| "loss": 1.7681, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.021177410373549227, |
| "grad_norm": 1.0474015474319458, |
| "learning_rate": 4.705882352941177e-06, |
| "loss": 1.4555, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.021200733512727145, |
| "grad_norm": 1.6325182914733887, |
| "learning_rate": 4.711065042757191e-06, |
| "loss": 1.432, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.021224056651905063, |
| "grad_norm": 1.5804178714752197, |
| "learning_rate": 4.716247732573206e-06, |
| "loss": 1.7409, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.02124737979108298, |
| "grad_norm": 1.226804256439209, |
| "learning_rate": 4.721430422389221e-06, |
| "loss": 1.8077, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.0212707029302609, |
| "grad_norm": 1.0747625827789307, |
| "learning_rate": 4.726613112205235e-06, |
| "loss": 1.411, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.021294026069438816, |
| "grad_norm": 1.2126623392105103, |
| "learning_rate": 4.7317958020212495e-06, |
| "loss": 1.6464, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.021317349208616734, |
| "grad_norm": 1.196486473083496, |
| "learning_rate": 4.736978491837264e-06, |
| "loss": 1.4365, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.021340672347794652, |
| "grad_norm": 1.4727115631103516, |
| "learning_rate": 4.742161181653279e-06, |
| "loss": 1.5059, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.02136399548697257, |
| "grad_norm": 1.293938159942627, |
| "learning_rate": 4.747343871469293e-06, |
| "loss": 1.5508, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.021387318626150488, |
| "grad_norm": 1.3074458837509155, |
| "learning_rate": 4.752526561285307e-06, |
| "loss": 1.364, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.021410641765328406, |
| "grad_norm": 1.708522081375122, |
| "learning_rate": 4.757709251101322e-06, |
| "loss": 1.2891, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.02143396490450632, |
| "grad_norm": 1.2926160097122192, |
| "learning_rate": 4.762891940917336e-06, |
| "loss": 1.1779, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.021457288043684238, |
| "grad_norm": 1.7751168012619019, |
| "learning_rate": 4.768074630733351e-06, |
| "loss": 1.3136, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.021480611182862156, |
| "grad_norm": 1.3698194026947021, |
| "learning_rate": 4.773257320549365e-06, |
| "loss": 1.5203, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.021503934322040074, |
| "grad_norm": 1.4710402488708496, |
| "learning_rate": 4.77844001036538e-06, |
| "loss": 2.0632, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.021527257461217992, |
| "grad_norm": 1.3340466022491455, |
| "learning_rate": 4.783622700181395e-06, |
| "loss": 0.9449, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.02155058060039591, |
| "grad_norm": 1.990078330039978, |
| "learning_rate": 4.788805389997409e-06, |
| "loss": 1.4095, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.021573903739573828, |
| "grad_norm": 2.6495463848114014, |
| "learning_rate": 4.793988079813423e-06, |
| "loss": 1.5914, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.021597226878751746, |
| "grad_norm": 1.368868350982666, |
| "learning_rate": 4.7991707696294385e-06, |
| "loss": 1.8007, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.021620550017929663, |
| "grad_norm": 1.3946820497512817, |
| "learning_rate": 4.804353459445453e-06, |
| "loss": 1.3846, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.02164387315710758, |
| "grad_norm": 1.6035547256469727, |
| "learning_rate": 4.809536149261467e-06, |
| "loss": 1.6677, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.0216671962962855, |
| "grad_norm": 1.29734468460083, |
| "learning_rate": 4.814718839077482e-06, |
| "loss": 1.3697, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.021690519435463417, |
| "grad_norm": 1.1746439933776855, |
| "learning_rate": 4.819901528893497e-06, |
| "loss": 1.6134, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.021713842574641335, |
| "grad_norm": 1.255861759185791, |
| "learning_rate": 4.82508421870951e-06, |
| "loss": 1.6253, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.021737165713819253, |
| "grad_norm": 1.5499615669250488, |
| "learning_rate": 4.830266908525525e-06, |
| "loss": 1.2794, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.02176048885299717, |
| "grad_norm": 1.6138273477554321, |
| "learning_rate": 4.83544959834154e-06, |
| "loss": 1.6365, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.021783811992175085, |
| "grad_norm": 1.7135401964187622, |
| "learning_rate": 4.840632288157554e-06, |
| "loss": 1.509, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.021807135131353003, |
| "grad_norm": 1.4290528297424316, |
| "learning_rate": 4.8458149779735685e-06, |
| "loss": 1.3415, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.02183045827053092, |
| "grad_norm": 2.034870147705078, |
| "learning_rate": 4.850997667789583e-06, |
| "loss": 1.6834, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.02185378140970884, |
| "grad_norm": 1.6626250743865967, |
| "learning_rate": 4.856180357605598e-06, |
| "loss": 1.3573, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.021877104548886757, |
| "grad_norm": 1.2256288528442383, |
| "learning_rate": 4.861363047421612e-06, |
| "loss": 1.5497, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.021900427688064675, |
| "grad_norm": 1.218955397605896, |
| "learning_rate": 4.866545737237627e-06, |
| "loss": 1.6823, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.021923750827242593, |
| "grad_norm": 1.0629289150238037, |
| "learning_rate": 4.871728427053641e-06, |
| "loss": 1.3894, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.02194707396642051, |
| "grad_norm": 2.6169822216033936, |
| "learning_rate": 4.876911116869656e-06, |
| "loss": 1.4063, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.02197039710559843, |
| "grad_norm": 1.1517153978347778, |
| "learning_rate": 4.882093806685671e-06, |
| "loss": 1.3838, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.021993720244776346, |
| "grad_norm": 1.6320403814315796, |
| "learning_rate": 4.887276496501685e-06, |
| "loss": 1.5752, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.022017043383954264, |
| "grad_norm": 1.7344862222671509, |
| "learning_rate": 4.892459186317699e-06, |
| "loss": 1.3182, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.022040366523132182, |
| "grad_norm": 1.2497214078903198, |
| "learning_rate": 4.897641876133714e-06, |
| "loss": 1.2266, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.0220636896623101, |
| "grad_norm": 1.996893048286438, |
| "learning_rate": 4.902824565949728e-06, |
| "loss": 1.2708, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.022087012801488018, |
| "grad_norm": 1.1130571365356445, |
| "learning_rate": 4.908007255765742e-06, |
| "loss": 1.4791, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.022110335940665932, |
| "grad_norm": 1.2698702812194824, |
| "learning_rate": 4.9131899455817576e-06, |
| "loss": 1.3711, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.02213365907984385, |
| "grad_norm": 1.0363445281982422, |
| "learning_rate": 4.918372635397772e-06, |
| "loss": 1.4153, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.022156982219021768, |
| "grad_norm": 1.1418310403823853, |
| "learning_rate": 4.923555325213786e-06, |
| "loss": 1.3377, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.022180305358199686, |
| "grad_norm": 1.3740698099136353, |
| "learning_rate": 4.928738015029801e-06, |
| "loss": 1.375, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.022203628497377604, |
| "grad_norm": 1.5656532049179077, |
| "learning_rate": 4.933920704845816e-06, |
| "loss": 1.651, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.022226951636555522, |
| "grad_norm": 1.209380865097046, |
| "learning_rate": 4.93910339466183e-06, |
| "loss": 1.6956, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.02225027477573344, |
| "grad_norm": 1.9917747974395752, |
| "learning_rate": 4.9442860844778445e-06, |
| "loss": 1.2802, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.022273597914911358, |
| "grad_norm": 2.168260097503662, |
| "learning_rate": 4.949468774293859e-06, |
| "loss": 1.9773, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.022296921054089276, |
| "grad_norm": 1.113978624343872, |
| "learning_rate": 4.954651464109874e-06, |
| "loss": 1.8121, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.022320244193267193, |
| "grad_norm": 1.4833635091781616, |
| "learning_rate": 4.959834153925888e-06, |
| "loss": 1.694, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.02234356733244511, |
| "grad_norm": 1.3287935256958008, |
| "learning_rate": 4.965016843741902e-06, |
| "loss": 1.4865, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.02236689047162303, |
| "grad_norm": 1.5515238046646118, |
| "learning_rate": 4.970199533557917e-06, |
| "loss": 1.6035, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.022390213610800947, |
| "grad_norm": 1.2824245691299438, |
| "learning_rate": 4.975382223373931e-06, |
| "loss": 1.5124, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.022413536749978865, |
| "grad_norm": 1.2062418460845947, |
| "learning_rate": 4.980564913189946e-06, |
| "loss": 1.5982, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.02243685988915678, |
| "grad_norm": 1.2790741920471191, |
| "learning_rate": 4.98574760300596e-06, |
| "loss": 1.586, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.022460183028334697, |
| "grad_norm": 1.202909231185913, |
| "learning_rate": 4.990930292821975e-06, |
| "loss": 1.7387, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.022483506167512615, |
| "grad_norm": 1.328963041305542, |
| "learning_rate": 4.99611298263799e-06, |
| "loss": 1.5611, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.022506829306690533, |
| "grad_norm": 1.3728841543197632, |
| "learning_rate": 5.001295672454004e-06, |
| "loss": 1.6887, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.02253015244586845, |
| "grad_norm": 1.2474596500396729, |
| "learning_rate": 5.006478362270018e-06, |
| "loss": 1.7337, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.02255347558504637, |
| "grad_norm": 1.4526808261871338, |
| "learning_rate": 5.0116610520860335e-06, |
| "loss": 1.4009, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.022576798724224287, |
| "grad_norm": 1.74959397315979, |
| "learning_rate": 5.016843741902048e-06, |
| "loss": 1.4153, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.022600121863402205, |
| "grad_norm": 1.7886738777160645, |
| "learning_rate": 5.022026431718062e-06, |
| "loss": 1.3897, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.022623445002580123, |
| "grad_norm": 1.3122284412384033, |
| "learning_rate": 5.027209121534077e-06, |
| "loss": 1.6551, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.02264676814175804, |
| "grad_norm": 1.5374927520751953, |
| "learning_rate": 5.032391811350092e-06, |
| "loss": 1.6396, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.02267009128093596, |
| "grad_norm": 1.6476905345916748, |
| "learning_rate": 5.037574501166106e-06, |
| "loss": 1.733, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.022693414420113876, |
| "grad_norm": 1.3407307863235474, |
| "learning_rate": 5.0427571909821205e-06, |
| "loss": 1.4984, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.022716737559291794, |
| "grad_norm": 1.5565712451934814, |
| "learning_rate": 5.047939880798135e-06, |
| "loss": 1.6524, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.022740060698469712, |
| "grad_norm": 1.381903052330017, |
| "learning_rate": 5.053122570614149e-06, |
| "loss": 1.5325, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.022763383837647626, |
| "grad_norm": 1.916326880455017, |
| "learning_rate": 5.058305260430164e-06, |
| "loss": 1.2326, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.022786706976825544, |
| "grad_norm": 1.1621575355529785, |
| "learning_rate": 5.063487950246179e-06, |
| "loss": 1.2568, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.022810030116003462, |
| "grad_norm": 1.3575561046600342, |
| "learning_rate": 5.068670640062193e-06, |
| "loss": 1.3755, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.02283335325518138, |
| "grad_norm": 1.482701063156128, |
| "learning_rate": 5.0738533298782065e-06, |
| "loss": 1.598, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.022856676394359298, |
| "grad_norm": 1.2530887126922607, |
| "learning_rate": 5.079036019694221e-06, |
| "loss": 1.66, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.022879999533537216, |
| "grad_norm": 1.4960439205169678, |
| "learning_rate": 5.084218709510236e-06, |
| "loss": 1.5341, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.022903322672715134, |
| "grad_norm": 1.507735252380371, |
| "learning_rate": 5.0894013993262504e-06, |
| "loss": 1.3987, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.022926645811893052, |
| "grad_norm": 2.0131475925445557, |
| "learning_rate": 5.094584089142265e-06, |
| "loss": 1.3134, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.02294996895107097, |
| "grad_norm": 1.8096015453338623, |
| "learning_rate": 5.099766778958279e-06, |
| "loss": 1.3707, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.022973292090248888, |
| "grad_norm": 1.0444198846817017, |
| "learning_rate": 5.104949468774294e-06, |
| "loss": 1.4119, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.022996615229426805, |
| "grad_norm": 1.3110159635543823, |
| "learning_rate": 5.110132158590309e-06, |
| "loss": 1.2187, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.023019938368604723, |
| "grad_norm": 1.3191614151000977, |
| "learning_rate": 5.115314848406323e-06, |
| "loss": 1.3691, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.02304326150778264, |
| "grad_norm": 1.3888386487960815, |
| "learning_rate": 5.120497538222337e-06, |
| "loss": 1.1934, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.02306658464696056, |
| "grad_norm": 1.2101585865020752, |
| "learning_rate": 5.1256802280383526e-06, |
| "loss": 1.4962, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.023089907786138477, |
| "grad_norm": 1.2938464879989624, |
| "learning_rate": 5.130862917854367e-06, |
| "loss": 1.4601, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.02311323092531639, |
| "grad_norm": 2.072444200515747, |
| "learning_rate": 5.136045607670381e-06, |
| "loss": 1.7241, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.02313655406449431, |
| "grad_norm": 1.7139407396316528, |
| "learning_rate": 5.141228297486396e-06, |
| "loss": 1.394, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.023159877203672227, |
| "grad_norm": 1.5825177431106567, |
| "learning_rate": 5.146410987302411e-06, |
| "loss": 1.4218, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.023183200342850145, |
| "grad_norm": 1.2233787775039673, |
| "learning_rate": 5.151593677118425e-06, |
| "loss": 1.2882, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.023206523482028063, |
| "grad_norm": 1.6474647521972656, |
| "learning_rate": 5.1567763669344395e-06, |
| "loss": 1.6499, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.02322984662120598, |
| "grad_norm": 1.669651985168457, |
| "learning_rate": 5.161959056750454e-06, |
| "loss": 1.1727, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.0232531697603839, |
| "grad_norm": 1.4976879358291626, |
| "learning_rate": 5.167141746566469e-06, |
| "loss": 1.2149, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.023276492899561817, |
| "grad_norm": 1.4033470153808594, |
| "learning_rate": 5.172324436382483e-06, |
| "loss": 1.3004, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.023299816038739735, |
| "grad_norm": 1.3042150735855103, |
| "learning_rate": 5.177507126198498e-06, |
| "loss": 1.3803, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.023323139177917653, |
| "grad_norm": 1.4327346086502075, |
| "learning_rate": 5.182689816014512e-06, |
| "loss": 1.7267, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.02334646231709557, |
| "grad_norm": 1.4823616743087769, |
| "learning_rate": 5.187872505830526e-06, |
| "loss": 1.6386, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.02336978545627349, |
| "grad_norm": 1.7083938121795654, |
| "learning_rate": 5.193055195646542e-06, |
| "loss": 1.3112, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.023393108595451406, |
| "grad_norm": 1.51584792137146, |
| "learning_rate": 5.198237885462556e-06, |
| "loss": 1.6169, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.023416431734629324, |
| "grad_norm": 1.0864455699920654, |
| "learning_rate": 5.20342057527857e-06, |
| "loss": 1.3013, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.02343975487380724, |
| "grad_norm": 1.9760619401931763, |
| "learning_rate": 5.208603265094585e-06, |
| "loss": 1.7865, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.023463078012985156, |
| "grad_norm": 2.5747292041778564, |
| "learning_rate": 5.2137859549106e-06, |
| "loss": 1.3345, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.023486401152163074, |
| "grad_norm": 1.689779281616211, |
| "learning_rate": 5.218968644726613e-06, |
| "loss": 1.7856, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.023509724291340992, |
| "grad_norm": 1.9847980737686157, |
| "learning_rate": 5.224151334542628e-06, |
| "loss": 1.8401, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.02353304743051891, |
| "grad_norm": 1.3654876947402954, |
| "learning_rate": 5.229334024358642e-06, |
| "loss": 1.7705, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.023556370569696828, |
| "grad_norm": 1.7249932289123535, |
| "learning_rate": 5.234516714174656e-06, |
| "loss": 1.1657, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.023579693708874746, |
| "grad_norm": 1.0710606575012207, |
| "learning_rate": 5.2396994039906716e-06, |
| "loss": 1.1676, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.023603016848052664, |
| "grad_norm": 1.213040828704834, |
| "learning_rate": 5.244882093806686e-06, |
| "loss": 1.4183, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.02362633998723058, |
| "grad_norm": 1.6341387033462524, |
| "learning_rate": 5.2500647836227e-06, |
| "loss": 1.6092, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.0236496631264085, |
| "grad_norm": 1.6445837020874023, |
| "learning_rate": 5.255247473438715e-06, |
| "loss": 1.6693, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.023672986265586417, |
| "grad_norm": 1.2804230451583862, |
| "learning_rate": 5.26043016325473e-06, |
| "loss": 1.5687, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.023696309404764335, |
| "grad_norm": 1.8683735132217407, |
| "learning_rate": 5.265612853070744e-06, |
| "loss": 1.3944, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.023719632543942253, |
| "grad_norm": 1.6504722833633423, |
| "learning_rate": 5.2707955428867585e-06, |
| "loss": 1.3018, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.02374295568312017, |
| "grad_norm": 1.71793532371521, |
| "learning_rate": 5.275978232702773e-06, |
| "loss": 1.4581, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.023766278822298086, |
| "grad_norm": 1.1414326429367065, |
| "learning_rate": 5.281160922518788e-06, |
| "loss": 1.4924, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.023789601961476003, |
| "grad_norm": 1.6553568840026855, |
| "learning_rate": 5.286343612334802e-06, |
| "loss": 1.6926, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.02381292510065392, |
| "grad_norm": 1.4217321872711182, |
| "learning_rate": 5.291526302150817e-06, |
| "loss": 1.4806, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.02383624823983184, |
| "grad_norm": 1.4322501420974731, |
| "learning_rate": 5.296708991966831e-06, |
| "loss": 1.5978, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.023859571379009757, |
| "grad_norm": 1.9824562072753906, |
| "learning_rate": 5.3018916817828454e-06, |
| "loss": 1.493, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.023882894518187675, |
| "grad_norm": 1.3815537691116333, |
| "learning_rate": 5.307074371598861e-06, |
| "loss": 1.3702, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.023906217657365593, |
| "grad_norm": 1.101647138595581, |
| "learning_rate": 5.312257061414875e-06, |
| "loss": 1.1745, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.02392954079654351, |
| "grad_norm": 1.2983593940734863, |
| "learning_rate": 5.317439751230889e-06, |
| "loss": 1.7473, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.02395286393572143, |
| "grad_norm": 1.2676076889038086, |
| "learning_rate": 5.322622441046904e-06, |
| "loss": 1.6349, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.023976187074899347, |
| "grad_norm": 1.2923870086669922, |
| "learning_rate": 5.327805130862919e-06, |
| "loss": 1.619, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.023999510214077265, |
| "grad_norm": 1.4195587635040283, |
| "learning_rate": 5.332987820678933e-06, |
| "loss": 1.4933, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.024022833353255182, |
| "grad_norm": 1.3498200178146362, |
| "learning_rate": 5.3381705104949476e-06, |
| "loss": 1.489, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.0240461564924331, |
| "grad_norm": 1.473960280418396, |
| "learning_rate": 5.343353200310962e-06, |
| "loss": 1.5181, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.024069479631611018, |
| "grad_norm": 1.2730071544647217, |
| "learning_rate": 5.348535890126977e-06, |
| "loss": 1.5796, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.024092802770788933, |
| "grad_norm": 1.2243895530700684, |
| "learning_rate": 5.3537185799429914e-06, |
| "loss": 1.4051, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.02411612590996685, |
| "grad_norm": 2.1219441890716553, |
| "learning_rate": 5.358901269759005e-06, |
| "loss": 1.4317, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.02413944904914477, |
| "grad_norm": 1.0719225406646729, |
| "learning_rate": 5.364083959575019e-06, |
| "loss": 1.3937, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.024162772188322686, |
| "grad_norm": 1.6711935997009277, |
| "learning_rate": 5.369266649391034e-06, |
| "loss": 1.5832, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.024186095327500604, |
| "grad_norm": 1.33745276927948, |
| "learning_rate": 5.374449339207049e-06, |
| "loss": 1.4582, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.024209418466678522, |
| "grad_norm": 1.4278967380523682, |
| "learning_rate": 5.379632029023063e-06, |
| "loss": 1.6069, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.02423274160585644, |
| "grad_norm": 1.2003988027572632, |
| "learning_rate": 5.3848147188390775e-06, |
| "loss": 1.4942, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.024256064745034358, |
| "grad_norm": 1.7350938320159912, |
| "learning_rate": 5.389997408655092e-06, |
| "loss": 1.637, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.024279387884212276, |
| "grad_norm": 1.6094862222671509, |
| "learning_rate": 5.395180098471107e-06, |
| "loss": 1.6944, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.024302711023390194, |
| "grad_norm": 1.369091510772705, |
| "learning_rate": 5.400362788287121e-06, |
| "loss": 1.6905, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.02432603416256811, |
| "grad_norm": 1.275787353515625, |
| "learning_rate": 5.405545478103136e-06, |
| "loss": 1.6749, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.02434935730174603, |
| "grad_norm": 1.24448823928833, |
| "learning_rate": 5.41072816791915e-06, |
| "loss": 1.4275, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.024372680440923947, |
| "grad_norm": 1.7868009805679321, |
| "learning_rate": 5.415910857735165e-06, |
| "loss": 1.5942, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.024396003580101865, |
| "grad_norm": 1.5386407375335693, |
| "learning_rate": 5.42109354755118e-06, |
| "loss": 1.6505, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.024419326719279783, |
| "grad_norm": 1.9666537046432495, |
| "learning_rate": 5.426276237367194e-06, |
| "loss": 1.7035, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.024442649858457698, |
| "grad_norm": 1.7937966585159302, |
| "learning_rate": 5.431458927183208e-06, |
| "loss": 1.7956, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.024465972997635616, |
| "grad_norm": 1.1397721767425537, |
| "learning_rate": 5.436641616999223e-06, |
| "loss": 1.3459, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.024489296136813533, |
| "grad_norm": 1.28958261013031, |
| "learning_rate": 5.441824306815238e-06, |
| "loss": 1.0963, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.02451261927599145, |
| "grad_norm": 1.3734923601150513, |
| "learning_rate": 5.447006996631252e-06, |
| "loss": 1.3196, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.02453594241516937, |
| "grad_norm": 1.8763736486434937, |
| "learning_rate": 5.4521896864472666e-06, |
| "loss": 1.7322, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.024559265554347287, |
| "grad_norm": 1.5179871320724487, |
| "learning_rate": 5.457372376263281e-06, |
| "loss": 1.2844, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.024582588693525205, |
| "grad_norm": 1.4944384098052979, |
| "learning_rate": 5.462555066079296e-06, |
| "loss": 1.442, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.024605911832703123, |
| "grad_norm": 1.499028205871582, |
| "learning_rate": 5.4677377558953105e-06, |
| "loss": 1.394, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.02462923497188104, |
| "grad_norm": 1.1869397163391113, |
| "learning_rate": 5.472920445711325e-06, |
| "loss": 1.2928, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.02465255811105896, |
| "grad_norm": 1.3456541299819946, |
| "learning_rate": 5.478103135527339e-06, |
| "loss": 1.5983, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.024675881250236877, |
| "grad_norm": 1.5931065082550049, |
| "learning_rate": 5.483285825343354e-06, |
| "loss": 1.4794, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.024699204389414794, |
| "grad_norm": 1.4096170663833618, |
| "learning_rate": 5.488468515159369e-06, |
| "loss": 1.471, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.024722527528592712, |
| "grad_norm": 1.5033949613571167, |
| "learning_rate": 5.493651204975383e-06, |
| "loss": 1.2857, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.02474585066777063, |
| "grad_norm": 1.632089614868164, |
| "learning_rate": 5.498833894791397e-06, |
| "loss": 1.5157, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.024769173806948545, |
| "grad_norm": 1.563462495803833, |
| "learning_rate": 5.504016584607411e-06, |
| "loss": 1.5072, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.024792496946126463, |
| "grad_norm": 1.4055378437042236, |
| "learning_rate": 5.509199274423426e-06, |
| "loss": 1.1545, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.02481582008530438, |
| "grad_norm": 1.3467985391616821, |
| "learning_rate": 5.5143819642394404e-06, |
| "loss": 1.4615, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.0248391432244823, |
| "grad_norm": 1.6450691223144531, |
| "learning_rate": 5.519564654055455e-06, |
| "loss": 1.8051, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.024862466363660216, |
| "grad_norm": 1.247313141822815, |
| "learning_rate": 5.524747343871469e-06, |
| "loss": 1.5971, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.024885789502838134, |
| "grad_norm": 1.7429383993148804, |
| "learning_rate": 5.529930033687484e-06, |
| "loss": 1.5401, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.024909112642016052, |
| "grad_norm": 1.7351207733154297, |
| "learning_rate": 5.535112723503499e-06, |
| "loss": 1.4898, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.02493243578119397, |
| "grad_norm": 1.5003080368041992, |
| "learning_rate": 5.540295413319513e-06, |
| "loss": 1.773, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.024955758920371888, |
| "grad_norm": 1.370918869972229, |
| "learning_rate": 5.545478103135527e-06, |
| "loss": 1.6648, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.024979082059549806, |
| "grad_norm": 1.125687837600708, |
| "learning_rate": 5.5506607929515426e-06, |
| "loss": 1.5297, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.025002405198727724, |
| "grad_norm": 1.984605073928833, |
| "learning_rate": 5.555843482767557e-06, |
| "loss": 1.4637, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.02502572833790564, |
| "grad_norm": 1.6429048776626587, |
| "learning_rate": 5.561026172583571e-06, |
| "loss": 1.2794, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.02504905147708356, |
| "grad_norm": 1.8730500936508179, |
| "learning_rate": 5.566208862399586e-06, |
| "loss": 1.4462, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.025072374616261477, |
| "grad_norm": 1.536036729812622, |
| "learning_rate": 5.5713915522156e-06, |
| "loss": 1.2484, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.025095697755439392, |
| "grad_norm": 1.2056294679641724, |
| "learning_rate": 5.576574242031615e-06, |
| "loss": 1.7819, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.02511902089461731, |
| "grad_norm": 1.4317046403884888, |
| "learning_rate": 5.5817569318476295e-06, |
| "loss": 1.5005, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.025142344033795228, |
| "grad_norm": 1.5313549041748047, |
| "learning_rate": 5.586939621663644e-06, |
| "loss": 1.6916, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.025165667172973145, |
| "grad_norm": 1.2438437938690186, |
| "learning_rate": 5.592122311479658e-06, |
| "loss": 1.4453, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.025188990312151063, |
| "grad_norm": 1.665187954902649, |
| "learning_rate": 5.597305001295673e-06, |
| "loss": 1.1324, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.02521231345132898, |
| "grad_norm": 1.910433053970337, |
| "learning_rate": 5.602487691111688e-06, |
| "loss": 2.003, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.0252356365905069, |
| "grad_norm": 1.6894274950027466, |
| "learning_rate": 5.607670380927702e-06, |
| "loss": 1.5041, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.025258959729684817, |
| "grad_norm": 1.246095061302185, |
| "learning_rate": 5.612853070743716e-06, |
| "loss": 1.7421, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.025282282868862735, |
| "grad_norm": 1.7268954515457153, |
| "learning_rate": 5.618035760559732e-06, |
| "loss": 1.4601, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.025305606008040653, |
| "grad_norm": 1.2897146940231323, |
| "learning_rate": 5.623218450375746e-06, |
| "loss": 1.4538, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.02532892914721857, |
| "grad_norm": 1.329236388206482, |
| "learning_rate": 5.62840114019176e-06, |
| "loss": 1.6763, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.02535225228639649, |
| "grad_norm": 1.4001597166061401, |
| "learning_rate": 5.633583830007775e-06, |
| "loss": 1.4887, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.025375575425574406, |
| "grad_norm": 2.036400079727173, |
| "learning_rate": 5.63876651982379e-06, |
| "loss": 1.4996, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.025398898564752324, |
| "grad_norm": 1.4963785409927368, |
| "learning_rate": 5.643949209639803e-06, |
| "loss": 1.6515, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.02542222170393024, |
| "grad_norm": 1.4221199750900269, |
| "learning_rate": 5.649131899455818e-06, |
| "loss": 1.814, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.025445544843108157, |
| "grad_norm": 1.7034932374954224, |
| "learning_rate": 5.654314589271832e-06, |
| "loss": 1.478, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.025468867982286075, |
| "grad_norm": 1.5419113636016846, |
| "learning_rate": 5.659497279087846e-06, |
| "loss": 1.8225, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.025492191121463992, |
| "grad_norm": 1.8337044715881348, |
| "learning_rate": 5.6646799689038616e-06, |
| "loss": 1.5037, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.02551551426064191, |
| "grad_norm": 1.3712172508239746, |
| "learning_rate": 5.669862658719876e-06, |
| "loss": 1.4449, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.02553883739981983, |
| "grad_norm": 1.312258005142212, |
| "learning_rate": 5.67504534853589e-06, |
| "loss": 1.5159, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.025562160538997746, |
| "grad_norm": 1.5284754037857056, |
| "learning_rate": 5.680228038351905e-06, |
| "loss": 1.4479, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.025585483678175664, |
| "grad_norm": 1.1178314685821533, |
| "learning_rate": 5.68541072816792e-06, |
| "loss": 1.4729, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.025608806817353582, |
| "grad_norm": 1.2439149618148804, |
| "learning_rate": 5.690593417983934e-06, |
| "loss": 1.436, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.0256321299565315, |
| "grad_norm": 1.580632209777832, |
| "learning_rate": 5.6957761077999485e-06, |
| "loss": 1.2718, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.025655453095709418, |
| "grad_norm": 1.6244875192642212, |
| "learning_rate": 5.700958797615963e-06, |
| "loss": 1.6024, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.025678776234887336, |
| "grad_norm": 1.2542647123336792, |
| "learning_rate": 5.706141487431977e-06, |
| "loss": 1.4344, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.025702099374065254, |
| "grad_norm": 1.227737307548523, |
| "learning_rate": 5.711324177247992e-06, |
| "loss": 1.2912, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.02572542251324317, |
| "grad_norm": 1.705132007598877, |
| "learning_rate": 5.716506867064007e-06, |
| "loss": 1.7786, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.02574874565242109, |
| "grad_norm": 1.4411309957504272, |
| "learning_rate": 5.721689556880021e-06, |
| "loss": 1.6456, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.025772068791599004, |
| "grad_norm": 1.5248507261276245, |
| "learning_rate": 5.7268722466960354e-06, |
| "loss": 1.308, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.02579539193077692, |
| "grad_norm": 1.3953535556793213, |
| "learning_rate": 5.732054936512051e-06, |
| "loss": 1.7294, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.02581871506995484, |
| "grad_norm": 2.0566859245300293, |
| "learning_rate": 5.737237626328065e-06, |
| "loss": 1.4392, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.025842038209132757, |
| "grad_norm": 1.4723169803619385, |
| "learning_rate": 5.742420316144079e-06, |
| "loss": 1.4799, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.025865361348310675, |
| "grad_norm": 1.4092565774917603, |
| "learning_rate": 5.747603005960094e-06, |
| "loss": 1.199, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.025888684487488593, |
| "grad_norm": 1.277365803718567, |
| "learning_rate": 5.752785695776109e-06, |
| "loss": 1.6108, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.02591200762666651, |
| "grad_norm": 2.465951919555664, |
| "learning_rate": 5.757968385592123e-06, |
| "loss": 1.6563, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.02593533076584443, |
| "grad_norm": 1.8686498403549194, |
| "learning_rate": 5.7631510754081376e-06, |
| "loss": 1.4241, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.025958653905022347, |
| "grad_norm": 1.6791915893554688, |
| "learning_rate": 5.768333765224152e-06, |
| "loss": 1.5922, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.025981977044200265, |
| "grad_norm": 1.7679352760314941, |
| "learning_rate": 5.773516455040167e-06, |
| "loss": 1.3589, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.026005300183378183, |
| "grad_norm": 1.535530686378479, |
| "learning_rate": 5.7786991448561814e-06, |
| "loss": 1.1027, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.0260286233225561, |
| "grad_norm": 1.5171246528625488, |
| "learning_rate": 5.783881834672196e-06, |
| "loss": 1.5711, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.02605194646173402, |
| "grad_norm": 1.101453185081482, |
| "learning_rate": 5.789064524488209e-06, |
| "loss": 1.2025, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.026075269600911936, |
| "grad_norm": 1.4143930673599243, |
| "learning_rate": 5.794247214304224e-06, |
| "loss": 1.4293, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.02609859274008985, |
| "grad_norm": 1.4917521476745605, |
| "learning_rate": 5.799429904120239e-06, |
| "loss": 1.5479, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.02612191587926777, |
| "grad_norm": 1.4023706912994385, |
| "learning_rate": 5.804612593936253e-06, |
| "loss": 1.7088, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.026145239018445687, |
| "grad_norm": 1.4056384563446045, |
| "learning_rate": 5.8097952837522675e-06, |
| "loss": 1.3657, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.026168562157623605, |
| "grad_norm": 1.3393616676330566, |
| "learning_rate": 5.814977973568282e-06, |
| "loss": 1.1497, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.026191885296801522, |
| "grad_norm": 1.6090584993362427, |
| "learning_rate": 5.820160663384296e-06, |
| "loss": 1.391, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.02621520843597944, |
| "grad_norm": 1.4391287565231323, |
| "learning_rate": 5.825343353200311e-06, |
| "loss": 1.4316, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.026238531575157358, |
| "grad_norm": 1.0588252544403076, |
| "learning_rate": 5.830526043016326e-06, |
| "loss": 1.3495, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.026261854714335276, |
| "grad_norm": 1.2646477222442627, |
| "learning_rate": 5.83570873283234e-06, |
| "loss": 1.9107, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.026285177853513194, |
| "grad_norm": 1.2594728469848633, |
| "learning_rate": 5.8408914226483545e-06, |
| "loss": 1.3878, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.026308500992691112, |
| "grad_norm": 2.413245677947998, |
| "learning_rate": 5.84607411246437e-06, |
| "loss": 1.2988, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.02633182413186903, |
| "grad_norm": 1.8143887519836426, |
| "learning_rate": 5.851256802280384e-06, |
| "loss": 1.8778, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.026355147271046948, |
| "grad_norm": 1.4549977779388428, |
| "learning_rate": 5.856439492096398e-06, |
| "loss": 1.7828, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.026378470410224866, |
| "grad_norm": 1.370773196220398, |
| "learning_rate": 5.861622181912413e-06, |
| "loss": 1.6647, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.026401793549402783, |
| "grad_norm": 1.7972664833068848, |
| "learning_rate": 5.866804871728428e-06, |
| "loss": 1.8871, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.026425116688580698, |
| "grad_norm": 1.6887913942337036, |
| "learning_rate": 5.871987561544442e-06, |
| "loss": 1.4938, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.026448439827758616, |
| "grad_norm": 1.4011859893798828, |
| "learning_rate": 5.8771702513604566e-06, |
| "loss": 1.2893, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.026471762966936534, |
| "grad_norm": 1.2820593118667603, |
| "learning_rate": 5.882352941176471e-06, |
| "loss": 1.8028, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.02649508610611445, |
| "grad_norm": 1.5501364469528198, |
| "learning_rate": 5.887535630992486e-06, |
| "loss": 1.5666, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.02651840924529237, |
| "grad_norm": 1.635021686553955, |
| "learning_rate": 5.8927183208085005e-06, |
| "loss": 1.4217, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.026541732384470287, |
| "grad_norm": 1.780432105064392, |
| "learning_rate": 5.897901010624515e-06, |
| "loss": 1.5926, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.026565055523648205, |
| "grad_norm": 1.747233271598816, |
| "learning_rate": 5.903083700440529e-06, |
| "loss": 1.7011, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.026588378662826123, |
| "grad_norm": 1.6612962484359741, |
| "learning_rate": 5.908266390256544e-06, |
| "loss": 1.1466, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.02661170180200404, |
| "grad_norm": 1.906965732574463, |
| "learning_rate": 5.913449080072559e-06, |
| "loss": 1.2679, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.02663502494118196, |
| "grad_norm": 1.3008593320846558, |
| "learning_rate": 5.918631769888573e-06, |
| "loss": 1.1242, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.026658348080359877, |
| "grad_norm": 1.2631815671920776, |
| "learning_rate": 5.923814459704587e-06, |
| "loss": 1.6476, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.026681671219537795, |
| "grad_norm": 1.3338450193405151, |
| "learning_rate": 5.928997149520601e-06, |
| "loss": 1.6404, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.026704994358715713, |
| "grad_norm": 1.4749959707260132, |
| "learning_rate": 5.934179839336616e-06, |
| "loss": 1.4754, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.02672831749789363, |
| "grad_norm": 1.399997353553772, |
| "learning_rate": 5.9393625291526304e-06, |
| "loss": 1.776, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.026751640637071545, |
| "grad_norm": 1.6688719987869263, |
| "learning_rate": 5.944545218968645e-06, |
| "loss": 1.4341, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.026774963776249463, |
| "grad_norm": 1.2055866718292236, |
| "learning_rate": 5.949727908784659e-06, |
| "loss": 1.366, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.02679828691542738, |
| "grad_norm": 1.834375262260437, |
| "learning_rate": 5.9549105986006735e-06, |
| "loss": 1.7205, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.0268216100546053, |
| "grad_norm": 1.6463091373443604, |
| "learning_rate": 5.960093288416689e-06, |
| "loss": 1.2175, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.026844933193783217, |
| "grad_norm": 1.2439314126968384, |
| "learning_rate": 5.965275978232703e-06, |
| "loss": 1.1599, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.026868256332961134, |
| "grad_norm": 1.428876519203186, |
| "learning_rate": 5.970458668048717e-06, |
| "loss": 1.7428, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.026891579472139052, |
| "grad_norm": 1.3530622720718384, |
| "learning_rate": 5.975641357864732e-06, |
| "loss": 1.4968, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.02691490261131697, |
| "grad_norm": 2.7352559566497803, |
| "learning_rate": 5.980824047680747e-06, |
| "loss": 1.5478, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.026938225750494888, |
| "grad_norm": 1.8357428312301636, |
| "learning_rate": 5.986006737496761e-06, |
| "loss": 1.5217, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.026961548889672806, |
| "grad_norm": 1.3974493741989136, |
| "learning_rate": 5.991189427312776e-06, |
| "loss": 1.6203, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.026984872028850724, |
| "grad_norm": 1.3089922666549683, |
| "learning_rate": 5.99637211712879e-06, |
| "loss": 1.7992, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.027008195168028642, |
| "grad_norm": 1.8275575637817383, |
| "learning_rate": 6.001554806944805e-06, |
| "loss": 1.4841, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.02703151830720656, |
| "grad_norm": 2.55710506439209, |
| "learning_rate": 6.0067374967608195e-06, |
| "loss": 1.3043, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.027054841446384478, |
| "grad_norm": 2.4591903686523438, |
| "learning_rate": 6.011920186576834e-06, |
| "loss": 1.3368, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.027078164585562395, |
| "grad_norm": 1.9370126724243164, |
| "learning_rate": 6.017102876392848e-06, |
| "loss": 1.4075, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.02710148772474031, |
| "grad_norm": 1.4310760498046875, |
| "learning_rate": 6.022285566208863e-06, |
| "loss": 1.5424, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.027124810863918228, |
| "grad_norm": 1.3892368078231812, |
| "learning_rate": 6.027468256024878e-06, |
| "loss": 1.6432, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.027148134003096146, |
| "grad_norm": 1.4820071458816528, |
| "learning_rate": 6.032650945840892e-06, |
| "loss": 1.409, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.027171457142274064, |
| "grad_norm": 1.1135878562927246, |
| "learning_rate": 6.037833635656906e-06, |
| "loss": 1.5977, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.02719478028145198, |
| "grad_norm": 1.6016969680786133, |
| "learning_rate": 6.043016325472922e-06, |
| "loss": 1.6486, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.0272181034206299, |
| "grad_norm": 1.5183762311935425, |
| "learning_rate": 6.048199015288936e-06, |
| "loss": 1.4068, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.027241426559807817, |
| "grad_norm": 1.4730808734893799, |
| "learning_rate": 6.05338170510495e-06, |
| "loss": 1.6202, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.027264749698985735, |
| "grad_norm": 1.4382350444793701, |
| "learning_rate": 6.058564394920965e-06, |
| "loss": 1.7055, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.027288072838163653, |
| "grad_norm": 0.9570834040641785, |
| "learning_rate": 6.06374708473698e-06, |
| "loss": 0.8602, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.02731139597734157, |
| "grad_norm": 1.2127379179000854, |
| "learning_rate": 6.068929774552994e-06, |
| "loss": 1.5333, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.02733471911651949, |
| "grad_norm": 1.5822348594665527, |
| "learning_rate": 6.074112464369008e-06, |
| "loss": 0.9605, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.027358042255697407, |
| "grad_norm": 1.3108526468276978, |
| "learning_rate": 6.079295154185022e-06, |
| "loss": 1.1987, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.027381365394875325, |
| "grad_norm": 2.005154848098755, |
| "learning_rate": 6.084477844001036e-06, |
| "loss": 1.7214, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.027404688534053243, |
| "grad_norm": 2.299222707748413, |
| "learning_rate": 6.089660533817051e-06, |
| "loss": 1.5244, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.027428011673231157, |
| "grad_norm": 1.2665340900421143, |
| "learning_rate": 6.094843223633066e-06, |
| "loss": 1.1735, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.027451334812409075, |
| "grad_norm": 1.418123483657837, |
| "learning_rate": 6.10002591344908e-06, |
| "loss": 1.6755, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.027474657951586993, |
| "grad_norm": 1.4280682802200317, |
| "learning_rate": 6.105208603265095e-06, |
| "loss": 1.6664, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.02749798109076491, |
| "grad_norm": 2.0804097652435303, |
| "learning_rate": 6.110391293081109e-06, |
| "loss": 1.4688, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.02752130422994283, |
| "grad_norm": 1.7536234855651855, |
| "learning_rate": 6.115573982897124e-06, |
| "loss": 1.5823, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.027544627369120746, |
| "grad_norm": 1.1604044437408447, |
| "learning_rate": 6.1207566727131385e-06, |
| "loss": 1.4818, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.027567950508298664, |
| "grad_norm": 1.3865594863891602, |
| "learning_rate": 6.125939362529153e-06, |
| "loss": 1.5467, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.027591273647476582, |
| "grad_norm": 1.526190996170044, |
| "learning_rate": 6.131122052345167e-06, |
| "loss": 1.3397, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.0276145967866545, |
| "grad_norm": 1.6010215282440186, |
| "learning_rate": 6.136304742161182e-06, |
| "loss": 1.5507, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.027637919925832418, |
| "grad_norm": 1.4297575950622559, |
| "learning_rate": 6.141487431977197e-06, |
| "loss": 1.397, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.027661243065010336, |
| "grad_norm": 1.380254864692688, |
| "learning_rate": 6.146670121793211e-06, |
| "loss": 1.251, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.027684566204188254, |
| "grad_norm": 1.5398340225219727, |
| "learning_rate": 6.1518528116092254e-06, |
| "loss": 1.7319, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.027707889343366172, |
| "grad_norm": 1.8836907148361206, |
| "learning_rate": 6.157035501425241e-06, |
| "loss": 1.1504, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.02773121248254409, |
| "grad_norm": 1.200628399848938, |
| "learning_rate": 6.162218191241255e-06, |
| "loss": 1.5138, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.027754535621722004, |
| "grad_norm": 1.7400058507919312, |
| "learning_rate": 6.167400881057269e-06, |
| "loss": 1.5398, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.027777858760899922, |
| "grad_norm": 1.2723171710968018, |
| "learning_rate": 6.172583570873284e-06, |
| "loss": 1.1157, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.02780118190007784, |
| "grad_norm": 1.4392553567886353, |
| "learning_rate": 6.177766260689299e-06, |
| "loss": 1.7444, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.027824505039255758, |
| "grad_norm": 1.533337950706482, |
| "learning_rate": 6.182948950505313e-06, |
| "loss": 1.4784, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.027847828178433676, |
| "grad_norm": 1.5458931922912598, |
| "learning_rate": 6.1881316403213276e-06, |
| "loss": 1.8139, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.027871151317611594, |
| "grad_norm": 1.133946180343628, |
| "learning_rate": 6.193314330137342e-06, |
| "loss": 1.5137, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.02789447445678951, |
| "grad_norm": 1.458628535270691, |
| "learning_rate": 6.198497019953357e-06, |
| "loss": 1.3172, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.02791779759596743, |
| "grad_norm": 2.2303454875946045, |
| "learning_rate": 6.2036797097693714e-06, |
| "loss": 1.2295, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.027941120735145347, |
| "grad_norm": 1.2555915117263794, |
| "learning_rate": 6.208862399585386e-06, |
| "loss": 1.5021, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.027964443874323265, |
| "grad_norm": 1.7872976064682007, |
| "learning_rate": 6.2140450894014e-06, |
| "loss": 0.9375, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.027987767013501183, |
| "grad_norm": 1.5110255479812622, |
| "learning_rate": 6.219227779217414e-06, |
| "loss": 1.871, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.0280110901526791, |
| "grad_norm": 1.5963770151138306, |
| "learning_rate": 6.224410469033428e-06, |
| "loss": 1.6184, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.02803441329185702, |
| "grad_norm": 1.7600239515304565, |
| "learning_rate": 6.229593158849443e-06, |
| "loss": 1.5337, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.028057736431034937, |
| "grad_norm": 1.3252232074737549, |
| "learning_rate": 6.2347758486654575e-06, |
| "loss": 1.4088, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.02808105957021285, |
| "grad_norm": 1.3839343786239624, |
| "learning_rate": 6.239958538481472e-06, |
| "loss": 1.305, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.02810438270939077, |
| "grad_norm": 1.6570122241973877, |
| "learning_rate": 6.245141228297486e-06, |
| "loss": 1.5596, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.028127705848568687, |
| "grad_norm": 1.4685866832733154, |
| "learning_rate": 6.250323918113501e-06, |
| "loss": 1.4931, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.028151028987746605, |
| "grad_norm": 1.263984203338623, |
| "learning_rate": 6.255506607929516e-06, |
| "loss": 1.5393, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.028174352126924523, |
| "grad_norm": 1.8634412288665771, |
| "learning_rate": 6.26068929774553e-06, |
| "loss": 1.2369, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.02819767526610244, |
| "grad_norm": 1.676034927368164, |
| "learning_rate": 6.2658719875615444e-06, |
| "loss": 1.5886, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.02822099840528036, |
| "grad_norm": 1.7271007299423218, |
| "learning_rate": 6.27105467737756e-06, |
| "loss": 1.2692, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.028244321544458276, |
| "grad_norm": 1.4238859415054321, |
| "learning_rate": 6.276237367193574e-06, |
| "loss": 1.6261, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.028267644683636194, |
| "grad_norm": 2.13999080657959, |
| "learning_rate": 6.281420057009588e-06, |
| "loss": 1.7009, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.028290967822814112, |
| "grad_norm": 2.1164069175720215, |
| "learning_rate": 6.286602746825603e-06, |
| "loss": 1.4856, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.02831429096199203, |
| "grad_norm": 1.6996465921401978, |
| "learning_rate": 6.291785436641618e-06, |
| "loss": 1.4621, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.028337614101169948, |
| "grad_norm": 1.466536045074463, |
| "learning_rate": 6.296968126457632e-06, |
| "loss": 1.5882, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.028360937240347866, |
| "grad_norm": 1.7248129844665527, |
| "learning_rate": 6.3021508162736466e-06, |
| "loss": 1.658, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.028384260379525784, |
| "grad_norm": 1.7973899841308594, |
| "learning_rate": 6.307333506089661e-06, |
| "loss": 1.4981, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.0284075835187037, |
| "grad_norm": 1.4502708911895752, |
| "learning_rate": 6.312516195905676e-06, |
| "loss": 1.8872, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.028430906657881616, |
| "grad_norm": 1.592411756515503, |
| "learning_rate": 6.3176988857216905e-06, |
| "loss": 1.4145, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.028454229797059534, |
| "grad_norm": 1.931400179862976, |
| "learning_rate": 6.322881575537705e-06, |
| "loss": 1.6221, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.028477552936237452, |
| "grad_norm": 1.5922832489013672, |
| "learning_rate": 6.328064265353719e-06, |
| "loss": 1.3897, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.02850087607541537, |
| "grad_norm": 1.4899603128433228, |
| "learning_rate": 6.333246955169734e-06, |
| "loss": 1.66, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.028524199214593288, |
| "grad_norm": 1.3820170164108276, |
| "learning_rate": 6.338429644985749e-06, |
| "loss": 1.8425, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.028547522353771206, |
| "grad_norm": 1.6127132177352905, |
| "learning_rate": 6.343612334801763e-06, |
| "loss": 1.3965, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.028570845492949123, |
| "grad_norm": 1.927259922027588, |
| "learning_rate": 6.348795024617777e-06, |
| "loss": 1.486, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.02859416863212704, |
| "grad_norm": 1.5987411737442017, |
| "learning_rate": 6.353977714433793e-06, |
| "loss": 1.4371, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.02861749177130496, |
| "grad_norm": 1.7805335521697998, |
| "learning_rate": 6.359160404249805e-06, |
| "loss": 1.56, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.028640814910482877, |
| "grad_norm": 1.7960704565048218, |
| "learning_rate": 6.3643430940658204e-06, |
| "loss": 1.5536, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.028664138049660795, |
| "grad_norm": 1.4014300107955933, |
| "learning_rate": 6.369525783881835e-06, |
| "loss": 1.4391, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.028687461188838713, |
| "grad_norm": 1.7049264907836914, |
| "learning_rate": 6.374708473697849e-06, |
| "loss": 1.9225, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.02871078432801663, |
| "grad_norm": 1.9948570728302002, |
| "learning_rate": 6.3798911635138635e-06, |
| "loss": 1.6279, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.02873410746719455, |
| "grad_norm": 2.101736068725586, |
| "learning_rate": 6.385073853329879e-06, |
| "loss": 1.5433, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.028757430606372463, |
| "grad_norm": 1.342325210571289, |
| "learning_rate": 6.390256543145893e-06, |
| "loss": 1.3606, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.02878075374555038, |
| "grad_norm": 1.5539692640304565, |
| "learning_rate": 6.395439232961907e-06, |
| "loss": 1.4339, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.0288040768847283, |
| "grad_norm": 1.6053344011306763, |
| "learning_rate": 6.400621922777922e-06, |
| "loss": 1.5735, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.028827400023906217, |
| "grad_norm": 1.1527775526046753, |
| "learning_rate": 6.405804612593937e-06, |
| "loss": 1.3265, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.028850723163084135, |
| "grad_norm": 2.401747465133667, |
| "learning_rate": 6.410987302409951e-06, |
| "loss": 1.3331, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.028874046302262053, |
| "grad_norm": 1.372536301612854, |
| "learning_rate": 6.416169992225966e-06, |
| "loss": 1.6371, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.02889736944143997, |
| "grad_norm": 1.528669834136963, |
| "learning_rate": 6.42135268204198e-06, |
| "loss": 1.4658, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.02892069258061789, |
| "grad_norm": 1.7370809316635132, |
| "learning_rate": 6.426535371857995e-06, |
| "loss": 1.4893, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.028944015719795806, |
| "grad_norm": 1.5757806301116943, |
| "learning_rate": 6.4317180616740095e-06, |
| "loss": 1.2563, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.028967338858973724, |
| "grad_norm": 1.2458890676498413, |
| "learning_rate": 6.436900751490024e-06, |
| "loss": 1.6522, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.028990661998151642, |
| "grad_norm": 1.743046760559082, |
| "learning_rate": 6.442083441306038e-06, |
| "loss": 1.6444, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.02901398513732956, |
| "grad_norm": 1.5543162822723389, |
| "learning_rate": 6.447266131122053e-06, |
| "loss": 1.6381, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.029037308276507478, |
| "grad_norm": 1.3490428924560547, |
| "learning_rate": 6.452448820938068e-06, |
| "loss": 1.4615, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.029060631415685396, |
| "grad_norm": 1.3732086420059204, |
| "learning_rate": 6.457631510754082e-06, |
| "loss": 1.4085, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.02908395455486331, |
| "grad_norm": 2.9364993572235107, |
| "learning_rate": 6.462814200570096e-06, |
| "loss": 1.4811, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.029107277694041228, |
| "grad_norm": 1.2069623470306396, |
| "learning_rate": 6.467996890386112e-06, |
| "loss": 1.3635, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.029130600833219146, |
| "grad_norm": 1.2883137464523315, |
| "learning_rate": 6.473179580202126e-06, |
| "loss": 1.4202, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.029153923972397064, |
| "grad_norm": 1.592976689338684, |
| "learning_rate": 6.47836227001814e-06, |
| "loss": 2.1116, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.029177247111574982, |
| "grad_norm": 1.394774079322815, |
| "learning_rate": 6.483544959834155e-06, |
| "loss": 1.5042, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.0292005702507529, |
| "grad_norm": 1.2127888202667236, |
| "learning_rate": 6.48872764965017e-06, |
| "loss": 1.3806, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.029223893389930818, |
| "grad_norm": 1.5445924997329712, |
| "learning_rate": 6.493910339466184e-06, |
| "loss": 1.5067, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.029247216529108735, |
| "grad_norm": 2.4520442485809326, |
| "learning_rate": 6.4990930292821985e-06, |
| "loss": 1.3649, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.029270539668286653, |
| "grad_norm": 2.032709836959839, |
| "learning_rate": 6.504275719098212e-06, |
| "loss": 1.2058, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.02929386280746457, |
| "grad_norm": 1.3742554187774658, |
| "learning_rate": 6.509458408914226e-06, |
| "loss": 1.4328, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.02931718594664249, |
| "grad_norm": 1.4859979152679443, |
| "learning_rate": 6.514641098730241e-06, |
| "loss": 1.6409, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.029340509085820407, |
| "grad_norm": 1.6881428956985474, |
| "learning_rate": 6.519823788546256e-06, |
| "loss": 1.6298, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.029363832224998325, |
| "grad_norm": 1.892412543296814, |
| "learning_rate": 6.52500647836227e-06, |
| "loss": 1.6898, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.029387155364176243, |
| "grad_norm": 1.4890961647033691, |
| "learning_rate": 6.530189168178285e-06, |
| "loss": 1.6164, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.029410478503354157, |
| "grad_norm": 1.530034065246582, |
| "learning_rate": 6.535371857994299e-06, |
| "loss": 1.4036, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.029433801642532075, |
| "grad_norm": 1.4801392555236816, |
| "learning_rate": 6.540554547810314e-06, |
| "loss": 1.5928, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.029457124781709993, |
| "grad_norm": 1.4419362545013428, |
| "learning_rate": 6.5457372376263285e-06, |
| "loss": 1.7833, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.02948044792088791, |
| "grad_norm": 1.6963889598846436, |
| "learning_rate": 6.550919927442343e-06, |
| "loss": 1.7366, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.02950377106006583, |
| "grad_norm": 1.4853816032409668, |
| "learning_rate": 6.556102617258357e-06, |
| "loss": 1.2297, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.029527094199243747, |
| "grad_norm": 1.6151559352874756, |
| "learning_rate": 6.561285307074372e-06, |
| "loss": 2.0062, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.029550417338421665, |
| "grad_norm": 1.3132925033569336, |
| "learning_rate": 6.566467996890387e-06, |
| "loss": 1.7708, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.029573740477599583, |
| "grad_norm": 1.4057172536849976, |
| "learning_rate": 6.571650686706401e-06, |
| "loss": 1.5811, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.0295970636167775, |
| "grad_norm": 1.5369668006896973, |
| "learning_rate": 6.5768333765224154e-06, |
| "loss": 1.5121, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.02962038675595542, |
| "grad_norm": 1.6567087173461914, |
| "learning_rate": 6.582016066338431e-06, |
| "loss": 1.2413, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.029643709895133336, |
| "grad_norm": 1.3374396562576294, |
| "learning_rate": 6.587198756154445e-06, |
| "loss": 1.5594, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.029667033034311254, |
| "grad_norm": 1.4892241954803467, |
| "learning_rate": 6.592381445970459e-06, |
| "loss": 1.6287, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.029690356173489172, |
| "grad_norm": 2.012141466140747, |
| "learning_rate": 6.597564135786474e-06, |
| "loss": 1.7356, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.02971367931266709, |
| "grad_norm": 2.2330586910247803, |
| "learning_rate": 6.602746825602489e-06, |
| "loss": 1.1928, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.029737002451845004, |
| "grad_norm": 1.7101742029190063, |
| "learning_rate": 6.607929515418503e-06, |
| "loss": 1.497, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.029760325591022922, |
| "grad_norm": 1.4773057699203491, |
| "learning_rate": 6.6131122052345175e-06, |
| "loss": 1.4135, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.02978364873020084, |
| "grad_norm": 1.4007784128189087, |
| "learning_rate": 6.618294895050532e-06, |
| "loss": 1.3921, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.029806971869378758, |
| "grad_norm": 1.7430599927902222, |
| "learning_rate": 6.623477584866547e-06, |
| "loss": 1.5352, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.029830295008556676, |
| "grad_norm": 2.562096118927002, |
| "learning_rate": 6.6286602746825614e-06, |
| "loss": 1.5325, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.029853618147734594, |
| "grad_norm": 1.192498803138733, |
| "learning_rate": 6.633842964498576e-06, |
| "loss": 1.1816, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.02987694128691251, |
| "grad_norm": 2.39277982711792, |
| "learning_rate": 6.63902565431459e-06, |
| "loss": 1.3732, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.02990026442609043, |
| "grad_norm": 1.3731800317764282, |
| "learning_rate": 6.644208344130604e-06, |
| "loss": 1.4175, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.029923587565268348, |
| "grad_norm": 2.297088146209717, |
| "learning_rate": 6.649391033946618e-06, |
| "loss": 1.5919, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.029946910704446265, |
| "grad_norm": 1.1062113046646118, |
| "learning_rate": 6.654573723762633e-06, |
| "loss": 1.3707, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.029970233843624183, |
| "grad_norm": 2.175673246383667, |
| "learning_rate": 6.6597564135786475e-06, |
| "loss": 1.4268, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.0299935569828021, |
| "grad_norm": 1.57578444480896, |
| "learning_rate": 6.664939103394662e-06, |
| "loss": 1.6065, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.03001688012198002, |
| "grad_norm": 1.757105827331543, |
| "learning_rate": 6.670121793210676e-06, |
| "loss": 1.5827, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.030040203261157937, |
| "grad_norm": 1.6778910160064697, |
| "learning_rate": 6.675304483026691e-06, |
| "loss": 1.4697, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.030063526400335855, |
| "grad_norm": 1.4940367937088013, |
| "learning_rate": 6.680487172842706e-06, |
| "loss": 1.2309, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.03008684953951377, |
| "grad_norm": 2.175011157989502, |
| "learning_rate": 6.68566986265872e-06, |
| "loss": 0.9675, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.030110172678691687, |
| "grad_norm": 2.0137412548065186, |
| "learning_rate": 6.6908525524747344e-06, |
| "loss": 1.6618, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.030133495817869605, |
| "grad_norm": 1.3541489839553833, |
| "learning_rate": 6.69603524229075e-06, |
| "loss": 1.2989, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.030156818957047523, |
| "grad_norm": 1.9265953302383423, |
| "learning_rate": 6.701217932106764e-06, |
| "loss": 1.3859, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.03018014209622544, |
| "grad_norm": 1.899145483970642, |
| "learning_rate": 6.706400621922778e-06, |
| "loss": 1.2468, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.03020346523540336, |
| "grad_norm": 1.6764010190963745, |
| "learning_rate": 6.711583311738793e-06, |
| "loss": 1.4796, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.030226788374581277, |
| "grad_norm": 1.502276062965393, |
| "learning_rate": 6.716766001554808e-06, |
| "loss": 1.6102, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.030250111513759195, |
| "grad_norm": 1.742180347442627, |
| "learning_rate": 6.721948691370822e-06, |
| "loss": 1.4743, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.030273434652937112, |
| "grad_norm": 1.503127098083496, |
| "learning_rate": 6.7271313811868366e-06, |
| "loss": 1.7023, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.03029675779211503, |
| "grad_norm": 1.4494696855545044, |
| "learning_rate": 6.732314071002851e-06, |
| "loss": 1.6774, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.030320080931292948, |
| "grad_norm": 1.3726390600204468, |
| "learning_rate": 6.737496760818866e-06, |
| "loss": 1.6272, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.030343404070470866, |
| "grad_norm": 1.6922540664672852, |
| "learning_rate": 6.7426794506348805e-06, |
| "loss": 1.6249, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.030366727209648784, |
| "grad_norm": 1.3822194337844849, |
| "learning_rate": 6.747862140450895e-06, |
| "loss": 1.779, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.030390050348826702, |
| "grad_norm": 1.2841784954071045, |
| "learning_rate": 6.753044830266909e-06, |
| "loss": 1.2516, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.030413373488004616, |
| "grad_norm": 2.045302152633667, |
| "learning_rate": 6.758227520082924e-06, |
| "loss": 1.4461, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.030436696627182534, |
| "grad_norm": 1.6968058347702026, |
| "learning_rate": 6.763410209898939e-06, |
| "loss": 1.545, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.030460019766360452, |
| "grad_norm": 1.6409857273101807, |
| "learning_rate": 6.768592899714953e-06, |
| "loss": 1.7205, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.03048334290553837, |
| "grad_norm": 1.2925307750701904, |
| "learning_rate": 6.773775589530967e-06, |
| "loss": 1.5889, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.030506666044716288, |
| "grad_norm": 1.4610506296157837, |
| "learning_rate": 6.778958279346982e-06, |
| "loss": 1.49, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.030529989183894206, |
| "grad_norm": 1.5941089391708374, |
| "learning_rate": 6.784140969162997e-06, |
| "loss": 1.8275, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.030553312323072124, |
| "grad_norm": 1.2063391208648682, |
| "learning_rate": 6.7893236589790104e-06, |
| "loss": 1.2659, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.03057663546225004, |
| "grad_norm": 1.512366771697998, |
| "learning_rate": 6.794506348795025e-06, |
| "loss": 1.502, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.03059995860142796, |
| "grad_norm": 2.0490636825561523, |
| "learning_rate": 6.799689038611039e-06, |
| "loss": 1.4567, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.030623281740605877, |
| "grad_norm": 2.196171522140503, |
| "learning_rate": 6.8048717284270535e-06, |
| "loss": 1.7189, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.030646604879783795, |
| "grad_norm": 1.434403419494629, |
| "learning_rate": 6.810054418243069e-06, |
| "loss": 1.4947, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.030669928018961713, |
| "grad_norm": 1.3586199283599854, |
| "learning_rate": 6.815237108059083e-06, |
| "loss": 1.5511, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.03069325115813963, |
| "grad_norm": 1.7212327718734741, |
| "learning_rate": 6.820419797875097e-06, |
| "loss": 1.625, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.03071657429731755, |
| "grad_norm": 1.7246372699737549, |
| "learning_rate": 6.825602487691112e-06, |
| "loss": 1.6043, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.030739897436495463, |
| "grad_norm": 1.401949405670166, |
| "learning_rate": 6.830785177507127e-06, |
| "loss": 0.9642, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.03076322057567338, |
| "grad_norm": 1.6501095294952393, |
| "learning_rate": 6.835967867323141e-06, |
| "loss": 1.4776, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.0307865437148513, |
| "grad_norm": 1.266641616821289, |
| "learning_rate": 6.841150557139156e-06, |
| "loss": 1.1332, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.030809866854029217, |
| "grad_norm": 1.0934447050094604, |
| "learning_rate": 6.84633324695517e-06, |
| "loss": 1.6201, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.030833189993207135, |
| "grad_norm": 1.4711166620254517, |
| "learning_rate": 6.851515936771185e-06, |
| "loss": 1.401, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.030856513132385053, |
| "grad_norm": 1.609348177909851, |
| "learning_rate": 6.8566986265871995e-06, |
| "loss": 1.5497, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.03087983627156297, |
| "grad_norm": 1.277185082435608, |
| "learning_rate": 6.861881316403214e-06, |
| "loss": 1.5056, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.03090315941074089, |
| "grad_norm": 1.4644626379013062, |
| "learning_rate": 6.867064006219228e-06, |
| "loss": 1.3443, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.030926482549918807, |
| "grad_norm": 1.4824533462524414, |
| "learning_rate": 6.872246696035243e-06, |
| "loss": 1.5054, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.030949805689096724, |
| "grad_norm": 1.4885330200195312, |
| "learning_rate": 6.877429385851258e-06, |
| "loss": 1.4403, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.030973128828274642, |
| "grad_norm": 1.639889121055603, |
| "learning_rate": 6.882612075667272e-06, |
| "loss": 1.7286, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.03099645196745256, |
| "grad_norm": 1.2644333839416504, |
| "learning_rate": 6.887794765483286e-06, |
| "loss": 1.4472, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.031019775106630478, |
| "grad_norm": 1.4533531665802002, |
| "learning_rate": 6.892977455299302e-06, |
| "loss": 1.6504, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.031043098245808396, |
| "grad_norm": 1.5860834121704102, |
| "learning_rate": 6.898160145115316e-06, |
| "loss": 1.3219, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.03106642138498631, |
| "grad_norm": 1.4244756698608398, |
| "learning_rate": 6.90334283493133e-06, |
| "loss": 1.2863, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.03108974452416423, |
| "grad_norm": 1.7279314994812012, |
| "learning_rate": 6.908525524747345e-06, |
| "loss": 1.5325, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.031113067663342146, |
| "grad_norm": 1.3759844303131104, |
| "learning_rate": 6.913708214563359e-06, |
| "loss": 1.7333, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.031136390802520064, |
| "grad_norm": 1.3596171140670776, |
| "learning_rate": 6.918890904379374e-06, |
| "loss": 1.4572, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.031159713941697982, |
| "grad_norm": 1.4598828554153442, |
| "learning_rate": 6.9240735941953885e-06, |
| "loss": 1.5375, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.0311830370808759, |
| "grad_norm": 1.7578270435333252, |
| "learning_rate": 6.929256284011402e-06, |
| "loss": 1.7456, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.031206360220053818, |
| "grad_norm": 1.8432106971740723, |
| "learning_rate": 6.934438973827416e-06, |
| "loss": 1.3632, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.031229683359231736, |
| "grad_norm": 1.3926173448562622, |
| "learning_rate": 6.939621663643431e-06, |
| "loss": 1.5246, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.031253006498409654, |
| "grad_norm": 1.639283299446106, |
| "learning_rate": 6.944804353459446e-06, |
| "loss": 1.4081, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.03127632963758757, |
| "grad_norm": 1.818247675895691, |
| "learning_rate": 6.94998704327546e-06, |
| "loss": 1.4222, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.03129965277676549, |
| "grad_norm": 1.7598317861557007, |
| "learning_rate": 6.955169733091475e-06, |
| "loss": 1.5457, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.03132297591594341, |
| "grad_norm": 1.9077101945877075, |
| "learning_rate": 6.960352422907489e-06, |
| "loss": 1.2585, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.031346299055121325, |
| "grad_norm": 1.7100765705108643, |
| "learning_rate": 6.965535112723504e-06, |
| "loss": 1.5487, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.03136962219429924, |
| "grad_norm": 1.4282541275024414, |
| "learning_rate": 6.9707178025395185e-06, |
| "loss": 1.7457, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.03139294533347716, |
| "grad_norm": 1.5989662408828735, |
| "learning_rate": 6.975900492355533e-06, |
| "loss": 1.7449, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.03141626847265508, |
| "grad_norm": 1.2489700317382812, |
| "learning_rate": 6.981083182171547e-06, |
| "loss": 1.4873, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.031439591611833, |
| "grad_norm": 1.60476815700531, |
| "learning_rate": 6.986265871987562e-06, |
| "loss": 1.4751, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.031462914751010915, |
| "grad_norm": 1.5303354263305664, |
| "learning_rate": 6.991448561803577e-06, |
| "loss": 1.5709, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.03148623789018883, |
| "grad_norm": 1.462499737739563, |
| "learning_rate": 6.996631251619591e-06, |
| "loss": 1.7366, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.03150956102936675, |
| "grad_norm": 1.4246290922164917, |
| "learning_rate": 7.0018139414356054e-06, |
| "loss": 1.1592, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.03153288416854467, |
| "grad_norm": 1.8897913694381714, |
| "learning_rate": 7.006996631251621e-06, |
| "loss": 1.1699, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.031556207307722586, |
| "grad_norm": 1.6516541242599487, |
| "learning_rate": 7.012179321067635e-06, |
| "loss": 1.4705, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.0315795304469005, |
| "grad_norm": 1.816272258758545, |
| "learning_rate": 7.017362010883649e-06, |
| "loss": 1.3166, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.031602853586078415, |
| "grad_norm": 1.631224274635315, |
| "learning_rate": 7.022544700699664e-06, |
| "loss": 1.9471, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.03162617672525633, |
| "grad_norm": 1.7657747268676758, |
| "learning_rate": 7.027727390515678e-06, |
| "loss": 1.6623, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.03164949986443425, |
| "grad_norm": 1.5499768257141113, |
| "learning_rate": 7.032910080331693e-06, |
| "loss": 1.328, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.03167282300361217, |
| "grad_norm": 1.5339092016220093, |
| "learning_rate": 7.0380927701477075e-06, |
| "loss": 1.79, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.03169614614279009, |
| "grad_norm": 2.1172358989715576, |
| "learning_rate": 7.043275459963722e-06, |
| "loss": 1.719, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.031719469281968005, |
| "grad_norm": 1.5365610122680664, |
| "learning_rate": 7.048458149779736e-06, |
| "loss": 1.2236, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.03174279242114592, |
| "grad_norm": 1.7277380228042603, |
| "learning_rate": 7.0536408395957514e-06, |
| "loss": 1.768, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.03176611556032384, |
| "grad_norm": 3.0157341957092285, |
| "learning_rate": 7.058823529411766e-06, |
| "loss": 1.023, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.03178943869950176, |
| "grad_norm": 1.682496190071106, |
| "learning_rate": 7.06400621922778e-06, |
| "loss": 1.5555, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.031812761838679676, |
| "grad_norm": 1.6679117679595947, |
| "learning_rate": 7.0691889090437945e-06, |
| "loss": 1.762, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.031836084977857594, |
| "grad_norm": 1.5026060342788696, |
| "learning_rate": 7.074371598859808e-06, |
| "loss": 1.2893, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.03185940811703551, |
| "grad_norm": 1.8401672840118408, |
| "learning_rate": 7.079554288675823e-06, |
| "loss": 1.4318, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.03188273125621343, |
| "grad_norm": 1.6953387260437012, |
| "learning_rate": 7.0847369784918375e-06, |
| "loss": 1.5304, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.03190605439539135, |
| "grad_norm": 1.7483880519866943, |
| "learning_rate": 7.089919668307852e-06, |
| "loss": 1.763, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.031929377534569266, |
| "grad_norm": 1.6970646381378174, |
| "learning_rate": 7.095102358123866e-06, |
| "loss": 1.4232, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.031952700673747184, |
| "grad_norm": 1.4489586353302002, |
| "learning_rate": 7.100285047939881e-06, |
| "loss": 1.2495, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.0319760238129251, |
| "grad_norm": 1.8368195295333862, |
| "learning_rate": 7.105467737755896e-06, |
| "loss": 1.3631, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.03199934695210302, |
| "grad_norm": 2.073723077774048, |
| "learning_rate": 7.11065042757191e-06, |
| "loss": 1.4958, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.03202267009128094, |
| "grad_norm": 1.7000291347503662, |
| "learning_rate": 7.1158331173879244e-06, |
| "loss": 1.5018, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.032045993230458855, |
| "grad_norm": 1.896183729171753, |
| "learning_rate": 7.12101580720394e-06, |
| "loss": 1.4754, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.03206931636963677, |
| "grad_norm": 1.4250632524490356, |
| "learning_rate": 7.126198497019954e-06, |
| "loss": 1.2758, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.03209263950881469, |
| "grad_norm": 1.968647837638855, |
| "learning_rate": 7.131381186835968e-06, |
| "loss": 1.5062, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.03211596264799261, |
| "grad_norm": 1.5044890642166138, |
| "learning_rate": 7.136563876651983e-06, |
| "loss": 1.7057, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.03213928578717053, |
| "grad_norm": 1.5252755880355835, |
| "learning_rate": 7.141746566467998e-06, |
| "loss": 1.4311, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.032162608926348445, |
| "grad_norm": 1.7001562118530273, |
| "learning_rate": 7.146929256284012e-06, |
| "loss": 1.5573, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.03218593206552636, |
| "grad_norm": 2.1587064266204834, |
| "learning_rate": 7.1521119461000266e-06, |
| "loss": 1.1552, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.03220925520470428, |
| "grad_norm": 1.5938003063201904, |
| "learning_rate": 7.157294635916041e-06, |
| "loss": 1.3843, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.03223257834388219, |
| "grad_norm": 1.5198419094085693, |
| "learning_rate": 7.162477325732055e-06, |
| "loss": 1.4412, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.03225590148306011, |
| "grad_norm": 1.8579787015914917, |
| "learning_rate": 7.1676600155480705e-06, |
| "loss": 1.2986, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.03227922462223803, |
| "grad_norm": 1.5341622829437256, |
| "learning_rate": 7.172842705364085e-06, |
| "loss": 1.2032, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.032302547761415945, |
| "grad_norm": 2.0681440830230713, |
| "learning_rate": 7.178025395180099e-06, |
| "loss": 1.7171, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.03232587090059386, |
| "grad_norm": 1.7611883878707886, |
| "learning_rate": 7.1832080849961135e-06, |
| "loss": 1.3376, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.03234919403977178, |
| "grad_norm": 1.6917016506195068, |
| "learning_rate": 7.188390774812129e-06, |
| "loss": 1.3909, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.0323725171789497, |
| "grad_norm": 1.1238902807235718, |
| "learning_rate": 7.193573464628143e-06, |
| "loss": 1.1826, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.03239584031812762, |
| "grad_norm": 1.5484822988510132, |
| "learning_rate": 7.198756154444157e-06, |
| "loss": 1.4476, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.032419163457305535, |
| "grad_norm": 1.703244686126709, |
| "learning_rate": 7.203938844260172e-06, |
| "loss": 1.5256, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.03244248659648345, |
| "grad_norm": 2.350940465927124, |
| "learning_rate": 7.209121534076187e-06, |
| "loss": 1.4486, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.03246580973566137, |
| "grad_norm": 1.2115894556045532, |
| "learning_rate": 7.2143042238922004e-06, |
| "loss": 1.2387, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.03248913287483929, |
| "grad_norm": 1.4883688688278198, |
| "learning_rate": 7.219486913708215e-06, |
| "loss": 1.4499, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.032512456014017206, |
| "grad_norm": 1.2324401140213013, |
| "learning_rate": 7.224669603524229e-06, |
| "loss": 1.3548, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.032535779153195124, |
| "grad_norm": 2.054262638092041, |
| "learning_rate": 7.2298522933402435e-06, |
| "loss": 1.4986, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.03255910229237304, |
| "grad_norm": 1.7639497518539429, |
| "learning_rate": 7.235034983156259e-06, |
| "loss": 1.4023, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.03258242543155096, |
| "grad_norm": 1.3556314706802368, |
| "learning_rate": 7.240217672972273e-06, |
| "loss": 1.4122, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.03260574857072888, |
| "grad_norm": 1.8941506147384644, |
| "learning_rate": 7.245400362788287e-06, |
| "loss": 1.1754, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.032629071709906796, |
| "grad_norm": 1.7958110570907593, |
| "learning_rate": 7.250583052604302e-06, |
| "loss": 1.9056, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.032652394849084714, |
| "grad_norm": 1.3702186346054077, |
| "learning_rate": 7.255765742420317e-06, |
| "loss": 1.5533, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.03267571798826263, |
| "grad_norm": 1.4540181159973145, |
| "learning_rate": 7.260948432236331e-06, |
| "loss": 1.4704, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.03269904112744055, |
| "grad_norm": 1.6024681329727173, |
| "learning_rate": 7.266131122052346e-06, |
| "loss": 1.4394, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.03272236426661847, |
| "grad_norm": 1.5546940565109253, |
| "learning_rate": 7.27131381186836e-06, |
| "loss": 1.54, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.032745687405796385, |
| "grad_norm": 1.5781769752502441, |
| "learning_rate": 7.276496501684375e-06, |
| "loss": 1.3658, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.0327690105449743, |
| "grad_norm": 1.4951281547546387, |
| "learning_rate": 7.2816791915003895e-06, |
| "loss": 1.3768, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.03279233368415222, |
| "grad_norm": 1.9413893222808838, |
| "learning_rate": 7.286861881316404e-06, |
| "loss": 1.3878, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.03281565682333014, |
| "grad_norm": 1.6263363361358643, |
| "learning_rate": 7.292044571132418e-06, |
| "loss": 1.4236, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.03283897996250806, |
| "grad_norm": 2.2151589393615723, |
| "learning_rate": 7.2972272609484325e-06, |
| "loss": 1.7296, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.032862303101685975, |
| "grad_norm": 1.3772640228271484, |
| "learning_rate": 7.302409950764448e-06, |
| "loss": 1.292, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.03288562624086389, |
| "grad_norm": 1.7607418298721313, |
| "learning_rate": 7.307592640580462e-06, |
| "loss": 1.6019, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.0329089493800418, |
| "grad_norm": 1.9470393657684326, |
| "learning_rate": 7.312775330396476e-06, |
| "loss": 1.3396, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.03293227251921972, |
| "grad_norm": 2.021190881729126, |
| "learning_rate": 7.317958020212491e-06, |
| "loss": 1.6207, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.03295559565839764, |
| "grad_norm": 1.7311667203903198, |
| "learning_rate": 7.323140710028506e-06, |
| "loss": 1.6409, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.03297891879757556, |
| "grad_norm": 1.6784627437591553, |
| "learning_rate": 7.32832339984452e-06, |
| "loss": 1.5595, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.033002241936753475, |
| "grad_norm": 1.517193078994751, |
| "learning_rate": 7.333506089660535e-06, |
| "loss": 1.574, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.03302556507593139, |
| "grad_norm": 1.4831286668777466, |
| "learning_rate": 7.338688779476549e-06, |
| "loss": 0.8727, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.03304888821510931, |
| "grad_norm": 1.6477752923965454, |
| "learning_rate": 7.343871469292564e-06, |
| "loss": 1.559, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.03307221135428723, |
| "grad_norm": 1.853326678276062, |
| "learning_rate": 7.3490541591085785e-06, |
| "loss": 1.8523, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.03309553449346515, |
| "grad_norm": 1.6894885301589966, |
| "learning_rate": 7.354236848924593e-06, |
| "loss": 1.5844, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.033118857632643064, |
| "grad_norm": 1.6442736387252808, |
| "learning_rate": 7.359419538740606e-06, |
| "loss": 1.986, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.03314218077182098, |
| "grad_norm": 1.787266731262207, |
| "learning_rate": 7.364602228556621e-06, |
| "loss": 1.4822, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.0331655039109989, |
| "grad_norm": 2.073798418045044, |
| "learning_rate": 7.369784918372636e-06, |
| "loss": 1.637, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.03318882705017682, |
| "grad_norm": 1.3428417444229126, |
| "learning_rate": 7.37496760818865e-06, |
| "loss": 1.5598, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.033212150189354736, |
| "grad_norm": 1.5737829208374023, |
| "learning_rate": 7.380150298004665e-06, |
| "loss": 1.2274, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.033235473328532654, |
| "grad_norm": 2.1165404319763184, |
| "learning_rate": 7.385332987820679e-06, |
| "loss": 1.4134, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.03325879646771057, |
| "grad_norm": 1.5476047992706299, |
| "learning_rate": 7.390515677636694e-06, |
| "loss": 1.6364, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.03328211960688849, |
| "grad_norm": 1.6927748918533325, |
| "learning_rate": 7.3956983674527085e-06, |
| "loss": 1.6977, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.03330544274606641, |
| "grad_norm": 1.4677228927612305, |
| "learning_rate": 7.400881057268723e-06, |
| "loss": 1.4168, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.033328765885244326, |
| "grad_norm": 1.5205353498458862, |
| "learning_rate": 7.406063747084737e-06, |
| "loss": 1.2843, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.03335208902442224, |
| "grad_norm": 1.5447300672531128, |
| "learning_rate": 7.411246436900752e-06, |
| "loss": 1.5689, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.03337541216360016, |
| "grad_norm": 1.63996160030365, |
| "learning_rate": 7.416429126716767e-06, |
| "loss": 1.5884, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.03339873530277808, |
| "grad_norm": 1.452081322669983, |
| "learning_rate": 7.421611816532781e-06, |
| "loss": 1.3101, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.033422058441956, |
| "grad_norm": 1.7910422086715698, |
| "learning_rate": 7.426794506348795e-06, |
| "loss": 1.4715, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.033445381581133915, |
| "grad_norm": 1.983233094215393, |
| "learning_rate": 7.43197719616481e-06, |
| "loss": 1.5482, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.03346870472031183, |
| "grad_norm": 1.767785906791687, |
| "learning_rate": 7.437159885980825e-06, |
| "loss": 1.6462, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.03349202785948975, |
| "grad_norm": 1.6161593198776245, |
| "learning_rate": 7.442342575796839e-06, |
| "loss": 1.279, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.03351535099866767, |
| "grad_norm": 1.4756333827972412, |
| "learning_rate": 7.447525265612854e-06, |
| "loss": 1.5475, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.03353867413784559, |
| "grad_norm": 1.8089308738708496, |
| "learning_rate": 7.452707955428868e-06, |
| "loss": 1.8059, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.0335619972770235, |
| "grad_norm": 1.6815400123596191, |
| "learning_rate": 7.457890645244883e-06, |
| "loss": 1.7294, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.033585320416201415, |
| "grad_norm": 2.2101638317108154, |
| "learning_rate": 7.4630733350608975e-06, |
| "loss": 1.1257, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.03360864355537933, |
| "grad_norm": 1.4447871446609497, |
| "learning_rate": 7.468256024876912e-06, |
| "loss": 1.5934, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.03363196669455725, |
| "grad_norm": 1.8209795951843262, |
| "learning_rate": 7.473438714692926e-06, |
| "loss": 1.4218, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.03365528983373517, |
| "grad_norm": 1.4553669691085815, |
| "learning_rate": 7.4786214045089414e-06, |
| "loss": 1.2059, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.03367861297291309, |
| "grad_norm": 1.7106033563613892, |
| "learning_rate": 7.483804094324956e-06, |
| "loss": 1.1671, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.033701936112091005, |
| "grad_norm": 1.3894087076187134, |
| "learning_rate": 7.48898678414097e-06, |
| "loss": 1.4522, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.03372525925126892, |
| "grad_norm": 1.1842654943466187, |
| "learning_rate": 7.4941694739569845e-06, |
| "loss": 1.4706, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.03374858239044684, |
| "grad_norm": 2.5644612312316895, |
| "learning_rate": 7.499352163773e-06, |
| "loss": 1.6062, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.03377190552962476, |
| "grad_norm": 1.5129215717315674, |
| "learning_rate": 7.504534853589013e-06, |
| "loss": 1.2178, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.033795228668802677, |
| "grad_norm": 1.7350616455078125, |
| "learning_rate": 7.5097175434050275e-06, |
| "loss": 1.7579, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.033818551807980594, |
| "grad_norm": 2.163621187210083, |
| "learning_rate": 7.514900233221042e-06, |
| "loss": 1.6504, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.03384187494715851, |
| "grad_norm": 1.946423888206482, |
| "learning_rate": 7.520082923037056e-06, |
| "loss": 1.6524, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.03386519808633643, |
| "grad_norm": 1.766641616821289, |
| "learning_rate": 7.525265612853071e-06, |
| "loss": 1.1683, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.03388852122551435, |
| "grad_norm": 1.928938627243042, |
| "learning_rate": 7.530448302669086e-06, |
| "loss": 1.4919, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.033911844364692266, |
| "grad_norm": 1.5574640035629272, |
| "learning_rate": 7.5356309924851e-06, |
| "loss": 1.3775, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.033935167503870184, |
| "grad_norm": 1.6000114679336548, |
| "learning_rate": 7.5408136823011144e-06, |
| "loss": 1.8033, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.0339584906430481, |
| "grad_norm": 1.4576321840286255, |
| "learning_rate": 7.545996372117129e-06, |
| "loss": 1.6291, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.03398181378222602, |
| "grad_norm": 1.67397940158844, |
| "learning_rate": 7.551179061933144e-06, |
| "loss": 1.501, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.03400513692140394, |
| "grad_norm": 1.6351300477981567, |
| "learning_rate": 7.556361751749158e-06, |
| "loss": 1.4177, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.034028460060581855, |
| "grad_norm": 1.806840181350708, |
| "learning_rate": 7.561544441565173e-06, |
| "loss": 1.2173, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.03405178319975977, |
| "grad_norm": 2.1059956550598145, |
| "learning_rate": 7.566727131381187e-06, |
| "loss": 1.2487, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.03407510633893769, |
| "grad_norm": 1.5448449850082397, |
| "learning_rate": 7.571909821197202e-06, |
| "loss": 1.4264, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.03409842947811561, |
| "grad_norm": 2.8610997200012207, |
| "learning_rate": 7.5770925110132166e-06, |
| "loss": 1.3305, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.03412175261729353, |
| "grad_norm": 1.7565038204193115, |
| "learning_rate": 7.582275200829231e-06, |
| "loss": 1.6971, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.034145075756471445, |
| "grad_norm": 1.5691516399383545, |
| "learning_rate": 7.587457890645245e-06, |
| "loss": 1.6759, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.03416839889564936, |
| "grad_norm": 1.4603890180587769, |
| "learning_rate": 7.5926405804612605e-06, |
| "loss": 1.6264, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.03419172203482728, |
| "grad_norm": 1.5885038375854492, |
| "learning_rate": 7.597823270277275e-06, |
| "loss": 1.124, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.0342150451740052, |
| "grad_norm": 1.4058237075805664, |
| "learning_rate": 7.603005960093289e-06, |
| "loss": 1.4257, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.03423836831318311, |
| "grad_norm": 1.552217721939087, |
| "learning_rate": 7.6081886499093035e-06, |
| "loss": 1.2563, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.03426169145236103, |
| "grad_norm": 2.235629081726074, |
| "learning_rate": 7.613371339725319e-06, |
| "loss": 1.8083, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.034285014591538945, |
| "grad_norm": 1.8639624118804932, |
| "learning_rate": 7.618554029541333e-06, |
| "loss": 1.5186, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.03430833773071686, |
| "grad_norm": 2.1537373065948486, |
| "learning_rate": 7.623736719357347e-06, |
| "loss": 1.3531, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.03433166086989478, |
| "grad_norm": 1.9041272401809692, |
| "learning_rate": 7.628919409173362e-06, |
| "loss": 1.4382, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.0343549840090727, |
| "grad_norm": 1.5207409858703613, |
| "learning_rate": 7.634102098989377e-06, |
| "loss": 1.349, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.03437830714825062, |
| "grad_norm": 1.446553349494934, |
| "learning_rate": 7.639284788805391e-06, |
| "loss": 1.1513, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.034401630287428535, |
| "grad_norm": 1.5411823987960815, |
| "learning_rate": 7.644467478621404e-06, |
| "loss": 1.2673, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.03442495342660645, |
| "grad_norm": 1.588210105895996, |
| "learning_rate": 7.64965016843742e-06, |
| "loss": 1.3414, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.03444827656578437, |
| "grad_norm": 1.4371896982192993, |
| "learning_rate": 7.654832858253434e-06, |
| "loss": 1.3386, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.03447159970496229, |
| "grad_norm": 1.2713488340377808, |
| "learning_rate": 7.660015548069449e-06, |
| "loss": 1.3674, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.034494922844140206, |
| "grad_norm": 1.9180690050125122, |
| "learning_rate": 7.665198237885463e-06, |
| "loss": 1.3761, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.034518245983318124, |
| "grad_norm": 1.7977988719940186, |
| "learning_rate": 7.670380927701477e-06, |
| "loss": 1.5416, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.03454156912249604, |
| "grad_norm": 1.6764715909957886, |
| "learning_rate": 7.675563617517492e-06, |
| "loss": 1.9225, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.03456489226167396, |
| "grad_norm": 1.8952007293701172, |
| "learning_rate": 7.680746307333506e-06, |
| "loss": 1.545, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.03458821540085188, |
| "grad_norm": 1.2648754119873047, |
| "learning_rate": 7.68592899714952e-06, |
| "loss": 1.3556, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.034611538540029796, |
| "grad_norm": 1.5882269144058228, |
| "learning_rate": 7.691111686965535e-06, |
| "loss": 1.4676, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.034634861679207714, |
| "grad_norm": 1.4746918678283691, |
| "learning_rate": 7.69629437678155e-06, |
| "loss": 1.5869, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.03465818481838563, |
| "grad_norm": 1.6212809085845947, |
| "learning_rate": 7.701477066597565e-06, |
| "loss": 1.7056, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.03468150795756355, |
| "grad_norm": 2.3746814727783203, |
| "learning_rate": 7.70665975641358e-06, |
| "loss": 1.4726, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.03470483109674147, |
| "grad_norm": 1.5706418752670288, |
| "learning_rate": 7.711842446229594e-06, |
| "loss": 1.3319, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.034728154235919385, |
| "grad_norm": 1.6811712980270386, |
| "learning_rate": 7.717025136045608e-06, |
| "loss": 1.4744, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.0347514773750973, |
| "grad_norm": 1.839852213859558, |
| "learning_rate": 7.722207825861623e-06, |
| "loss": 1.3563, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.03477480051427522, |
| "grad_norm": 1.2929447889328003, |
| "learning_rate": 7.727390515677637e-06, |
| "loss": 1.7944, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.03479812365345314, |
| "grad_norm": 1.7659885883331299, |
| "learning_rate": 7.732573205493651e-06, |
| "loss": 1.6265, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.03482144679263106, |
| "grad_norm": 1.7670022249221802, |
| "learning_rate": 7.737755895309667e-06, |
| "loss": 1.6311, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.034844769931808975, |
| "grad_norm": 1.7348347902297974, |
| "learning_rate": 7.742938585125682e-06, |
| "loss": 1.4573, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.03486809307098689, |
| "grad_norm": 1.5826637744903564, |
| "learning_rate": 7.748121274941696e-06, |
| "loss": 1.8183, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.034891416210164804, |
| "grad_norm": 1.6276066303253174, |
| "learning_rate": 7.75330396475771e-06, |
| "loss": 1.5105, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.03491473934934272, |
| "grad_norm": 1.4175602197647095, |
| "learning_rate": 7.758486654573725e-06, |
| "loss": 1.4397, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.03493806248852064, |
| "grad_norm": 1.2575039863586426, |
| "learning_rate": 7.763669344389739e-06, |
| "loss": 1.3638, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.03496138562769856, |
| "grad_norm": 1.591441035270691, |
| "learning_rate": 7.768852034205753e-06, |
| "loss": 1.2515, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.034984708766876475, |
| "grad_norm": 1.8170280456542969, |
| "learning_rate": 7.774034724021768e-06, |
| "loss": 1.6124, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03500803190605439, |
| "grad_norm": 1.825690507888794, |
| "learning_rate": 7.779217413837784e-06, |
| "loss": 1.5076, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.03503135504523231, |
| "grad_norm": 1.61045241355896, |
| "learning_rate": 7.784400103653798e-06, |
| "loss": 1.5944, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.03505467818441023, |
| "grad_norm": 2.1213035583496094, |
| "learning_rate": 7.78958279346981e-06, |
| "loss": 1.561, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.03507800132358815, |
| "grad_norm": 1.5680464506149292, |
| "learning_rate": 7.794765483285825e-06, |
| "loss": 1.1515, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.035101324462766065, |
| "grad_norm": 1.7792956829071045, |
| "learning_rate": 7.79994817310184e-06, |
| "loss": 1.7459, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.03512464760194398, |
| "grad_norm": 1.5262699127197266, |
| "learning_rate": 7.805130862917854e-06, |
| "loss": 1.4087, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.0351479707411219, |
| "grad_norm": 1.9013603925704956, |
| "learning_rate": 7.81031355273387e-06, |
| "loss": 1.745, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.03517129388029982, |
| "grad_norm": 2.1864850521087646, |
| "learning_rate": 7.815496242549884e-06, |
| "loss": 1.6892, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.035194617019477736, |
| "grad_norm": 1.6094999313354492, |
| "learning_rate": 7.820678932365898e-06, |
| "loss": 1.1677, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.035217940158655654, |
| "grad_norm": 1.6659038066864014, |
| "learning_rate": 7.825861622181913e-06, |
| "loss": 1.3676, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.03524126329783357, |
| "grad_norm": 1.5591635704040527, |
| "learning_rate": 7.831044311997927e-06, |
| "loss": 1.3353, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.03526458643701149, |
| "grad_norm": 1.6324151754379272, |
| "learning_rate": 7.836227001813942e-06, |
| "loss": 1.5816, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.03528790957618941, |
| "grad_norm": 1.8007915019989014, |
| "learning_rate": 7.841409691629956e-06, |
| "loss": 1.9207, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.035311232715367326, |
| "grad_norm": 1.6061041355133057, |
| "learning_rate": 7.84659238144597e-06, |
| "loss": 1.6949, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.035334555854545244, |
| "grad_norm": 1.5150330066680908, |
| "learning_rate": 7.851775071261986e-06, |
| "loss": 1.7975, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.03535787899372316, |
| "grad_norm": 1.7966561317443848, |
| "learning_rate": 7.856957761078e-06, |
| "loss": 1.3558, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.03538120213290108, |
| "grad_norm": 1.6751410961151123, |
| "learning_rate": 7.862140450894015e-06, |
| "loss": 1.3387, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.035404525272079, |
| "grad_norm": 1.7746779918670654, |
| "learning_rate": 7.86732314071003e-06, |
| "loss": 1.8068, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.035427848411256915, |
| "grad_norm": 1.4943839311599731, |
| "learning_rate": 7.872505830526044e-06, |
| "loss": 1.4922, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.03545117155043483, |
| "grad_norm": 1.3683398962020874, |
| "learning_rate": 7.877688520342058e-06, |
| "loss": 1.382, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.03547449468961275, |
| "grad_norm": 1.6939599514007568, |
| "learning_rate": 7.882871210158072e-06, |
| "loss": 1.6179, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.03549781782879067, |
| "grad_norm": 1.4292916059494019, |
| "learning_rate": 7.888053899974087e-06, |
| "loss": 1.4422, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.03552114096796859, |
| "grad_norm": 1.96234929561615, |
| "learning_rate": 7.893236589790103e-06, |
| "loss": 1.1728, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.035544464107146505, |
| "grad_norm": 1.8289707899093628, |
| "learning_rate": 7.898419279606117e-06, |
| "loss": 1.4281, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.035567787246324416, |
| "grad_norm": 1.563638687133789, |
| "learning_rate": 7.903601969422131e-06, |
| "loss": 1.441, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.035591110385502334, |
| "grad_norm": 1.7753417491912842, |
| "learning_rate": 7.908784659238146e-06, |
| "loss": 1.5371, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.03561443352468025, |
| "grad_norm": 1.4442288875579834, |
| "learning_rate": 7.91396734905416e-06, |
| "loss": 1.21, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.03563775666385817, |
| "grad_norm": 1.5175955295562744, |
| "learning_rate": 7.919150038870174e-06, |
| "loss": 1.2075, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.03566107980303609, |
| "grad_norm": 1.6752229928970337, |
| "learning_rate": 7.924332728686189e-06, |
| "loss": 1.2919, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.035684402942214005, |
| "grad_norm": 1.7506253719329834, |
| "learning_rate": 7.929515418502203e-06, |
| "loss": 1.5369, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.03570772608139192, |
| "grad_norm": 1.9442663192749023, |
| "learning_rate": 7.934698108318218e-06, |
| "loss": 1.6756, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.03573104922056984, |
| "grad_norm": 1.658495545387268, |
| "learning_rate": 7.939880798134232e-06, |
| "loss": 1.2362, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.03575437235974776, |
| "grad_norm": 1.2289533615112305, |
| "learning_rate": 7.945063487950246e-06, |
| "loss": 1.5051, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.03577769549892568, |
| "grad_norm": 1.5502135753631592, |
| "learning_rate": 7.95024617776626e-06, |
| "loss": 1.3702, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.035801018638103595, |
| "grad_norm": 1.8727954626083374, |
| "learning_rate": 7.955428867582275e-06, |
| "loss": 1.417, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.03582434177728151, |
| "grad_norm": 1.1890602111816406, |
| "learning_rate": 7.96061155739829e-06, |
| "loss": 1.1737, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.03584766491645943, |
| "grad_norm": 5.72725772857666, |
| "learning_rate": 7.965794247214305e-06, |
| "loss": 1.3097, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.03587098805563735, |
| "grad_norm": 1.2847952842712402, |
| "learning_rate": 7.97097693703032e-06, |
| "loss": 1.456, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.035894311194815266, |
| "grad_norm": 2.3652467727661133, |
| "learning_rate": 7.976159626846334e-06, |
| "loss": 1.7498, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.035917634333993184, |
| "grad_norm": 2.2748360633850098, |
| "learning_rate": 7.981342316662348e-06, |
| "loss": 1.4181, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.0359409574731711, |
| "grad_norm": 1.9288114309310913, |
| "learning_rate": 7.986525006478363e-06, |
| "loss": 1.4505, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.03596428061234902, |
| "grad_norm": 1.9735311269760132, |
| "learning_rate": 7.991707696294377e-06, |
| "loss": 1.5196, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.03598760375152694, |
| "grad_norm": 1.5026898384094238, |
| "learning_rate": 7.996890386110391e-06, |
| "loss": 1.2868, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.036010926890704856, |
| "grad_norm": 1.4773675203323364, |
| "learning_rate": 8.002073075926406e-06, |
| "loss": 1.3777, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.036034250029882774, |
| "grad_norm": 1.7095143795013428, |
| "learning_rate": 8.007255765742422e-06, |
| "loss": 1.2692, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.03605757316906069, |
| "grad_norm": 1.7218233346939087, |
| "learning_rate": 8.012438455558436e-06, |
| "loss": 1.4015, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.03608089630823861, |
| "grad_norm": 1.5240681171417236, |
| "learning_rate": 8.01762114537445e-06, |
| "loss": 1.5267, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.03610421944741653, |
| "grad_norm": 1.9092682600021362, |
| "learning_rate": 8.022803835190465e-06, |
| "loss": 1.2564, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.036127542586594445, |
| "grad_norm": 1.844650149345398, |
| "learning_rate": 8.027986525006479e-06, |
| "loss": 1.5158, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.03615086572577236, |
| "grad_norm": 1.5689501762390137, |
| "learning_rate": 8.033169214822493e-06, |
| "loss": 1.5708, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.03617418886495028, |
| "grad_norm": 2.210259437561035, |
| "learning_rate": 8.038351904638508e-06, |
| "loss": 1.5915, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.0361975120041282, |
| "grad_norm": 1.4000816345214844, |
| "learning_rate": 8.043534594454522e-06, |
| "loss": 1.2189, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.03622083514330611, |
| "grad_norm": 1.4790806770324707, |
| "learning_rate": 8.048717284270538e-06, |
| "loss": 1.3637, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.03624415828248403, |
| "grad_norm": 1.9432685375213623, |
| "learning_rate": 8.053899974086553e-06, |
| "loss": 1.4459, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.036267481421661946, |
| "grad_norm": 1.9427974224090576, |
| "learning_rate": 8.059082663902567e-06, |
| "loss": 1.8405, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.036290804560839864, |
| "grad_norm": 1.6169490814208984, |
| "learning_rate": 8.064265353718581e-06, |
| "loss": 1.5894, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.03631412770001778, |
| "grad_norm": 2.189110517501831, |
| "learning_rate": 8.069448043534596e-06, |
| "loss": 1.4458, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.0363374508391957, |
| "grad_norm": 1.6950788497924805, |
| "learning_rate": 8.074630733350608e-06, |
| "loss": 1.2485, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.03636077397837362, |
| "grad_norm": 1.5580222606658936, |
| "learning_rate": 8.079813423166624e-06, |
| "loss": 1.3971, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.036384097117551535, |
| "grad_norm": 1.68899405002594, |
| "learning_rate": 8.084996112982639e-06, |
| "loss": 1.5722, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.03640742025672945, |
| "grad_norm": 1.6812056303024292, |
| "learning_rate": 8.090178802798653e-06, |
| "loss": 1.8336, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.03643074339590737, |
| "grad_norm": 2.962195634841919, |
| "learning_rate": 8.095361492614667e-06, |
| "loss": 1.7488, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.03645406653508529, |
| "grad_norm": 1.6132487058639526, |
| "learning_rate": 8.100544182430682e-06, |
| "loss": 1.9727, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.03647738967426321, |
| "grad_norm": 1.6288578510284424, |
| "learning_rate": 8.105726872246696e-06, |
| "loss": 1.6962, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.036500712813441125, |
| "grad_norm": 1.5894676446914673, |
| "learning_rate": 8.11090956206271e-06, |
| "loss": 1.7313, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.03652403595261904, |
| "grad_norm": 1.702314019203186, |
| "learning_rate": 8.116092251878725e-06, |
| "loss": 1.5682, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.03654735909179696, |
| "grad_norm": 2.3464395999908447, |
| "learning_rate": 8.12127494169474e-06, |
| "loss": 1.1367, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.03657068223097488, |
| "grad_norm": 1.3930420875549316, |
| "learning_rate": 8.126457631510755e-06, |
| "loss": 1.2127, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.036594005370152796, |
| "grad_norm": 1.964519739151001, |
| "learning_rate": 8.13164032132677e-06, |
| "loss": 1.5458, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.036617328509330714, |
| "grad_norm": 1.7511687278747559, |
| "learning_rate": 8.136823011142784e-06, |
| "loss": 1.4957, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.03664065164850863, |
| "grad_norm": 1.403041958808899, |
| "learning_rate": 8.142005700958798e-06, |
| "loss": 1.5422, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.03666397478768655, |
| "grad_norm": 2.368617534637451, |
| "learning_rate": 8.147188390774813e-06, |
| "loss": 1.2203, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.03668729792686447, |
| "grad_norm": 1.7351584434509277, |
| "learning_rate": 8.152371080590827e-06, |
| "loss": 1.5534, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.036710621066042386, |
| "grad_norm": 1.7059663534164429, |
| "learning_rate": 8.157553770406841e-06, |
| "loss": 1.5554, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.036733944205220304, |
| "grad_norm": 1.9748015403747559, |
| "learning_rate": 8.162736460222857e-06, |
| "loss": 1.3837, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.03675726734439822, |
| "grad_norm": 1.7517926692962646, |
| "learning_rate": 8.167919150038872e-06, |
| "loss": 1.5008, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.03678059048357614, |
| "grad_norm": 2.074340343475342, |
| "learning_rate": 8.173101839854886e-06, |
| "loss": 1.1944, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.03680391362275406, |
| "grad_norm": 1.7943975925445557, |
| "learning_rate": 8.1782845296709e-06, |
| "loss": 1.5017, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.036827236761931975, |
| "grad_norm": 1.7202725410461426, |
| "learning_rate": 8.183467219486915e-06, |
| "loss": 1.3468, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.03685055990110989, |
| "grad_norm": 2.03446364402771, |
| "learning_rate": 8.188649909302929e-06, |
| "loss": 1.8081, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.03687388304028781, |
| "grad_norm": 1.8767874240875244, |
| "learning_rate": 8.193832599118943e-06, |
| "loss": 1.3877, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.03689720617946572, |
| "grad_norm": 1.4143779277801514, |
| "learning_rate": 8.199015288934958e-06, |
| "loss": 1.5551, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.03692052931864364, |
| "grad_norm": 1.4130569696426392, |
| "learning_rate": 8.204197978750974e-06, |
| "loss": 1.3058, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.03694385245782156, |
| "grad_norm": 1.4558956623077393, |
| "learning_rate": 8.209380668566988e-06, |
| "loss": 1.4228, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.036967175596999476, |
| "grad_norm": 2.6582729816436768, |
| "learning_rate": 8.214563358383e-06, |
| "loss": 1.5081, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.036990498736177393, |
| "grad_norm": 1.4754345417022705, |
| "learning_rate": 8.219746048199015e-06, |
| "loss": 1.5688, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.03701382187535531, |
| "grad_norm": 1.5351654291152954, |
| "learning_rate": 8.22492873801503e-06, |
| "loss": 1.8144, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.03703714501453323, |
| "grad_norm": 1.6197818517684937, |
| "learning_rate": 8.230111427831044e-06, |
| "loss": 1.5846, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.03706046815371115, |
| "grad_norm": 1.8108611106872559, |
| "learning_rate": 8.23529411764706e-06, |
| "loss": 1.3416, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.037083791292889065, |
| "grad_norm": 1.6245759725570679, |
| "learning_rate": 8.240476807463074e-06, |
| "loss": 1.4319, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.03710711443206698, |
| "grad_norm": 1.9573677778244019, |
| "learning_rate": 8.245659497279088e-06, |
| "loss": 1.4856, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.0371304375712449, |
| "grad_norm": 2.195033550262451, |
| "learning_rate": 8.250842187095103e-06, |
| "loss": 1.4553, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.03715376071042282, |
| "grad_norm": 1.7342851161956787, |
| "learning_rate": 8.256024876911117e-06, |
| "loss": 1.4633, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.03717708384960074, |
| "grad_norm": 1.499495506286621, |
| "learning_rate": 8.261207566727132e-06, |
| "loss": 1.6055, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.037200406988778655, |
| "grad_norm": 1.4192696809768677, |
| "learning_rate": 8.266390256543146e-06, |
| "loss": 1.3659, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.03722373012795657, |
| "grad_norm": 1.8910040855407715, |
| "learning_rate": 8.27157294635916e-06, |
| "loss": 1.703, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.03724705326713449, |
| "grad_norm": 1.4962915182113647, |
| "learning_rate": 8.276755636175176e-06, |
| "loss": 1.3212, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.03727037640631241, |
| "grad_norm": 2.1940252780914307, |
| "learning_rate": 8.28193832599119e-06, |
| "loss": 1.8816, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.037293699545490326, |
| "grad_norm": 1.415831208229065, |
| "learning_rate": 8.287121015807205e-06, |
| "loss": 1.229, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.037317022684668244, |
| "grad_norm": 1.5565876960754395, |
| "learning_rate": 8.29230370562322e-06, |
| "loss": 1.7503, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.03734034582384616, |
| "grad_norm": 2.6450204849243164, |
| "learning_rate": 8.297486395439234e-06, |
| "loss": 1.3618, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.03736366896302408, |
| "grad_norm": 1.5824869871139526, |
| "learning_rate": 8.302669085255248e-06, |
| "loss": 1.3587, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.037386992102202, |
| "grad_norm": 1.6635199785232544, |
| "learning_rate": 8.307851775071262e-06, |
| "loss": 1.7323, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.037410315241379916, |
| "grad_norm": 1.5391467809677124, |
| "learning_rate": 8.313034464887277e-06, |
| "loss": 1.7489, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.037433638380557833, |
| "grad_norm": 2.136975049972534, |
| "learning_rate": 8.318217154703293e-06, |
| "loss": 1.4696, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.03745696151973575, |
| "grad_norm": 1.4561282396316528, |
| "learning_rate": 8.323399844519307e-06, |
| "loss": 1.746, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.03748028465891367, |
| "grad_norm": 1.323926329612732, |
| "learning_rate": 8.328582534335321e-06, |
| "loss": 1.3654, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.03750360779809159, |
| "grad_norm": 1.6495275497436523, |
| "learning_rate": 8.333765224151336e-06, |
| "loss": 1.6208, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.037526930937269505, |
| "grad_norm": 1.4379764795303345, |
| "learning_rate": 8.33894791396735e-06, |
| "loss": 1.4988, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.037550254076447416, |
| "grad_norm": 1.676405668258667, |
| "learning_rate": 8.344130603783364e-06, |
| "loss": 1.5563, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.037573577215625334, |
| "grad_norm": 1.0886626243591309, |
| "learning_rate": 8.349313293599379e-06, |
| "loss": 1.2886, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.03759690035480325, |
| "grad_norm": 1.5499573945999146, |
| "learning_rate": 8.354495983415393e-06, |
| "loss": 1.6758, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.03762022349398117, |
| "grad_norm": 1.2256261110305786, |
| "learning_rate": 8.359678673231408e-06, |
| "loss": 1.1831, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.03764354663315909, |
| "grad_norm": 2.0278513431549072, |
| "learning_rate": 8.364861363047422e-06, |
| "loss": 1.5379, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.037666869772337006, |
| "grad_norm": 1.6582108736038208, |
| "learning_rate": 8.370044052863436e-06, |
| "loss": 1.4499, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.03769019291151492, |
| "grad_norm": 2.517474412918091, |
| "learning_rate": 8.37522674267945e-06, |
| "loss": 1.3365, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.03771351605069284, |
| "grad_norm": 1.651391863822937, |
| "learning_rate": 8.380409432495465e-06, |
| "loss": 1.5475, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.03773683918987076, |
| "grad_norm": 1.9716179370880127, |
| "learning_rate": 8.38559212231148e-06, |
| "loss": 1.4809, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.03776016232904868, |
| "grad_norm": 2.0555307865142822, |
| "learning_rate": 8.390774812127495e-06, |
| "loss": 1.7496, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.037783485468226595, |
| "grad_norm": 1.5695487260818481, |
| "learning_rate": 8.39595750194351e-06, |
| "loss": 1.3654, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.03780680860740451, |
| "grad_norm": 1.4122220277786255, |
| "learning_rate": 8.401140191759524e-06, |
| "loss": 1.7411, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.03783013174658243, |
| "grad_norm": 1.4024474620819092, |
| "learning_rate": 8.406322881575538e-06, |
| "loss": 1.3807, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.03785345488576035, |
| "grad_norm": 1.932897686958313, |
| "learning_rate": 8.411505571391553e-06, |
| "loss": 1.6327, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.03787677802493827, |
| "grad_norm": 1.3100526332855225, |
| "learning_rate": 8.416688261207567e-06, |
| "loss": 1.0531, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.037900101164116184, |
| "grad_norm": 1.5780110359191895, |
| "learning_rate": 8.421870951023581e-06, |
| "loss": 1.3187, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.0379234243032941, |
| "grad_norm": 1.905220866203308, |
| "learning_rate": 8.427053640839596e-06, |
| "loss": 1.5322, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.03794674744247202, |
| "grad_norm": 1.4416756629943848, |
| "learning_rate": 8.432236330655612e-06, |
| "loss": 1.3633, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.03797007058164994, |
| "grad_norm": 1.720937728881836, |
| "learning_rate": 8.437419020471626e-06, |
| "loss": 1.4178, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.037993393720827856, |
| "grad_norm": 1.891658902168274, |
| "learning_rate": 8.44260171028764e-06, |
| "loss": 1.5383, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.038016716860005774, |
| "grad_norm": 2.179572105407715, |
| "learning_rate": 8.447784400103655e-06, |
| "loss": 1.3399, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.03804003999918369, |
| "grad_norm": 1.4478271007537842, |
| "learning_rate": 8.452967089919669e-06, |
| "loss": 1.4122, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.03806336313836161, |
| "grad_norm": 1.4049443006515503, |
| "learning_rate": 8.458149779735683e-06, |
| "loss": 1.5164, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.03808668627753953, |
| "grad_norm": 2.1146810054779053, |
| "learning_rate": 8.463332469551698e-06, |
| "loss": 1.5005, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.038110009416717446, |
| "grad_norm": 1.8528714179992676, |
| "learning_rate": 8.468515159367712e-06, |
| "loss": 1.4598, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.03813333255589536, |
| "grad_norm": 2.274590492248535, |
| "learning_rate": 8.473697849183728e-06, |
| "loss": 1.4343, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.03815665569507328, |
| "grad_norm": 1.837266445159912, |
| "learning_rate": 8.478880538999743e-06, |
| "loss": 1.6039, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.0381799788342512, |
| "grad_norm": 1.735687494277954, |
| "learning_rate": 8.484063228815757e-06, |
| "loss": 1.3623, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.03820330197342912, |
| "grad_norm": 1.8133695125579834, |
| "learning_rate": 8.489245918631771e-06, |
| "loss": 1.2941, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.03822662511260703, |
| "grad_norm": 1.9450503587722778, |
| "learning_rate": 8.494428608447786e-06, |
| "loss": 1.4706, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.038249948251784946, |
| "grad_norm": 1.6004278659820557, |
| "learning_rate": 8.4996112982638e-06, |
| "loss": 1.4726, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.038273271390962864, |
| "grad_norm": 1.7052674293518066, |
| "learning_rate": 8.504793988079814e-06, |
| "loss": 1.5521, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.03829659453014078, |
| "grad_norm": 1.3694720268249512, |
| "learning_rate": 8.509976677895829e-06, |
| "loss": 1.1333, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.0383199176693187, |
| "grad_norm": 1.7958831787109375, |
| "learning_rate": 8.515159367711843e-06, |
| "loss": 1.6234, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.03834324080849662, |
| "grad_norm": 1.7349238395690918, |
| "learning_rate": 8.520342057527857e-06, |
| "loss": 1.3697, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.038366563947674535, |
| "grad_norm": 1.5960413217544556, |
| "learning_rate": 8.525524747343872e-06, |
| "loss": 1.4687, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.03838988708685245, |
| "grad_norm": 2.78328800201416, |
| "learning_rate": 8.530707437159886e-06, |
| "loss": 1.462, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.03841321022603037, |
| "grad_norm": 1.310705304145813, |
| "learning_rate": 8.5358901269759e-06, |
| "loss": 1.347, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.03843653336520829, |
| "grad_norm": 1.554968237876892, |
| "learning_rate": 8.541072816791915e-06, |
| "loss": 1.41, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.03845985650438621, |
| "grad_norm": 2.0181522369384766, |
| "learning_rate": 8.54625550660793e-06, |
| "loss": 1.3945, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.038483179643564125, |
| "grad_norm": 1.816375494003296, |
| "learning_rate": 8.551438196423945e-06, |
| "loss": 1.6109, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.03850650278274204, |
| "grad_norm": 2.1661388874053955, |
| "learning_rate": 8.55662088623996e-06, |
| "loss": 1.8344, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.03852982592191996, |
| "grad_norm": 1.9306049346923828, |
| "learning_rate": 8.561803576055974e-06, |
| "loss": 1.2227, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.03855314906109788, |
| "grad_norm": 1.3145751953125, |
| "learning_rate": 8.566986265871988e-06, |
| "loss": 1.3881, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.038576472200275796, |
| "grad_norm": 1.6416202783584595, |
| "learning_rate": 8.572168955688003e-06, |
| "loss": 1.2782, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.038599795339453714, |
| "grad_norm": 1.3195691108703613, |
| "learning_rate": 8.577351645504017e-06, |
| "loss": 1.5958, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.03862311847863163, |
| "grad_norm": 1.786651372909546, |
| "learning_rate": 8.582534335320031e-06, |
| "loss": 1.6379, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.03864644161780955, |
| "grad_norm": 1.685196876525879, |
| "learning_rate": 8.587717025136047e-06, |
| "loss": 1.2548, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.03866976475698747, |
| "grad_norm": 2.0508875846862793, |
| "learning_rate": 8.592899714952062e-06, |
| "loss": 1.635, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.038693087896165386, |
| "grad_norm": 1.7226320505142212, |
| "learning_rate": 8.598082404768076e-06, |
| "loss": 1.4694, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.038716411035343304, |
| "grad_norm": 1.5333112478256226, |
| "learning_rate": 8.60326509458409e-06, |
| "loss": 1.4825, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.03873973417452122, |
| "grad_norm": 1.4121674299240112, |
| "learning_rate": 8.608447784400105e-06, |
| "loss": 1.2056, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.03876305731369914, |
| "grad_norm": 1.6394184827804565, |
| "learning_rate": 8.613630474216119e-06, |
| "loss": 1.5131, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.03878638045287706, |
| "grad_norm": 2.2525839805603027, |
| "learning_rate": 8.618813164032133e-06, |
| "loss": 1.4413, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.038809703592054975, |
| "grad_norm": 1.6599324941635132, |
| "learning_rate": 8.623995853848148e-06, |
| "loss": 1.1568, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.03883302673123289, |
| "grad_norm": 1.930284857749939, |
| "learning_rate": 8.629178543664162e-06, |
| "loss": 1.2182, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.03885634987041081, |
| "grad_norm": 1.366219401359558, |
| "learning_rate": 8.634361233480178e-06, |
| "loss": 1.6951, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.03887967300958872, |
| "grad_norm": 1.8555302619934082, |
| "learning_rate": 8.639543923296192e-06, |
| "loss": 1.4508, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.03890299614876664, |
| "grad_norm": 2.110704183578491, |
| "learning_rate": 8.644726613112205e-06, |
| "loss": 1.5057, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.03892631928794456, |
| "grad_norm": 1.4422646760940552, |
| "learning_rate": 8.64990930292822e-06, |
| "loss": 1.5628, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.038949642427122476, |
| "grad_norm": 1.8097025156021118, |
| "learning_rate": 8.655091992744234e-06, |
| "loss": 1.5336, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.038972965566300394, |
| "grad_norm": 1.5321156978607178, |
| "learning_rate": 8.66027468256025e-06, |
| "loss": 1.4985, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.03899628870547831, |
| "grad_norm": 1.715100884437561, |
| "learning_rate": 8.665457372376264e-06, |
| "loss": 1.5365, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.03901961184465623, |
| "grad_norm": 1.7432835102081299, |
| "learning_rate": 8.670640062192278e-06, |
| "loss": 1.5822, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.03904293498383415, |
| "grad_norm": 1.7451759576797485, |
| "learning_rate": 8.675822752008293e-06, |
| "loss": 1.6363, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.039066258123012065, |
| "grad_norm": 1.6405068635940552, |
| "learning_rate": 8.681005441824307e-06, |
| "loss": 1.6819, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.03908958126218998, |
| "grad_norm": 1.7980347871780396, |
| "learning_rate": 8.686188131640322e-06, |
| "loss": 1.5362, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.0391129044013679, |
| "grad_norm": 1.6365665197372437, |
| "learning_rate": 8.691370821456336e-06, |
| "loss": 2.0277, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.03913622754054582, |
| "grad_norm": 1.9490535259246826, |
| "learning_rate": 8.69655351127235e-06, |
| "loss": 1.5076, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.03915955067972374, |
| "grad_norm": 1.4164410829544067, |
| "learning_rate": 8.701736201088366e-06, |
| "loss": 1.8005, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.039182873818901655, |
| "grad_norm": 1.4707103967666626, |
| "learning_rate": 8.70691889090438e-06, |
| "loss": 1.353, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.03920619695807957, |
| "grad_norm": 1.7562110424041748, |
| "learning_rate": 8.712101580720395e-06, |
| "loss": 1.5621, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.03922952009725749, |
| "grad_norm": 2.0748794078826904, |
| "learning_rate": 8.71728427053641e-06, |
| "loss": 1.4923, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.03925284323643541, |
| "grad_norm": 2.031003475189209, |
| "learning_rate": 8.722466960352424e-06, |
| "loss": 1.1706, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.039276166375613326, |
| "grad_norm": 2.4340038299560547, |
| "learning_rate": 8.727649650168438e-06, |
| "loss": 1.3371, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.039299489514791244, |
| "grad_norm": 2.129331111907959, |
| "learning_rate": 8.732832339984452e-06, |
| "loss": 1.558, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.03932281265396916, |
| "grad_norm": 1.907139778137207, |
| "learning_rate": 8.738015029800467e-06, |
| "loss": 1.6016, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.03934613579314708, |
| "grad_norm": 1.8079878091812134, |
| "learning_rate": 8.743197719616483e-06, |
| "loss": 1.4186, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.039369458932325, |
| "grad_norm": 1.9196524620056152, |
| "learning_rate": 8.748380409432497e-06, |
| "loss": 1.6435, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.039392782071502916, |
| "grad_norm": 1.5702369213104248, |
| "learning_rate": 8.753563099248511e-06, |
| "loss": 1.6279, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.039416105210680834, |
| "grad_norm": 1.8079639673233032, |
| "learning_rate": 8.758745789064526e-06, |
| "loss": 1.4299, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.03943942834985875, |
| "grad_norm": 1.5084450244903564, |
| "learning_rate": 8.76392847888054e-06, |
| "loss": 1.6051, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.03946275148903667, |
| "grad_norm": 1.8773257732391357, |
| "learning_rate": 8.769111168696554e-06, |
| "loss": 1.2258, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.03948607462821459, |
| "grad_norm": 1.662649154663086, |
| "learning_rate": 8.774293858512569e-06, |
| "loss": 1.5057, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.039509397767392505, |
| "grad_norm": 1.7742561101913452, |
| "learning_rate": 8.779476548328583e-06, |
| "loss": 1.5083, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.03953272090657042, |
| "grad_norm": 1.6094675064086914, |
| "learning_rate": 8.784659238144598e-06, |
| "loss": 1.4416, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.039556044045748334, |
| "grad_norm": 1.7892067432403564, |
| "learning_rate": 8.789841927960612e-06, |
| "loss": 1.6939, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.03957936718492625, |
| "grad_norm": 1.4669241905212402, |
| "learning_rate": 8.795024617776626e-06, |
| "loss": 1.3218, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.03960269032410417, |
| "grad_norm": 1.6289660930633545, |
| "learning_rate": 8.80020730759264e-06, |
| "loss": 1.4026, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.03962601346328209, |
| "grad_norm": 1.4103940725326538, |
| "learning_rate": 8.805389997408655e-06, |
| "loss": 1.5594, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.039649336602460006, |
| "grad_norm": 1.8094227313995361, |
| "learning_rate": 8.81057268722467e-06, |
| "loss": 1.4749, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.039672659741637924, |
| "grad_norm": 1.9171851873397827, |
| "learning_rate": 8.815755377040685e-06, |
| "loss": 1.5853, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.03969598288081584, |
| "grad_norm": 1.7482846975326538, |
| "learning_rate": 8.8209380668567e-06, |
| "loss": 1.8572, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.03971930601999376, |
| "grad_norm": 1.494166374206543, |
| "learning_rate": 8.826120756672714e-06, |
| "loss": 1.4618, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.03974262915917168, |
| "grad_norm": 1.8293770551681519, |
| "learning_rate": 8.831303446488728e-06, |
| "loss": 1.278, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.039765952298349595, |
| "grad_norm": 1.7367064952850342, |
| "learning_rate": 8.836486136304743e-06, |
| "loss": 1.65, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.03978927543752751, |
| "grad_norm": 1.783642292022705, |
| "learning_rate": 8.841668826120757e-06, |
| "loss": 1.5325, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.03981259857670543, |
| "grad_norm": 1.5297502279281616, |
| "learning_rate": 8.846851515936771e-06, |
| "loss": 1.4132, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.03983592171588335, |
| "grad_norm": 1.9751566648483276, |
| "learning_rate": 8.852034205752786e-06, |
| "loss": 1.4317, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.03985924485506127, |
| "grad_norm": 2.1414785385131836, |
| "learning_rate": 8.857216895568802e-06, |
| "loss": 1.6401, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.039882567994239185, |
| "grad_norm": 1.4582406282424927, |
| "learning_rate": 8.862399585384816e-06, |
| "loss": 1.4949, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.0399058911334171, |
| "grad_norm": 1.3729748725891113, |
| "learning_rate": 8.86758227520083e-06, |
| "loss": 0.8325, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.03992921427259502, |
| "grad_norm": 1.5666522979736328, |
| "learning_rate": 8.872764965016845e-06, |
| "loss": 1.6165, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.03995253741177294, |
| "grad_norm": 1.8730623722076416, |
| "learning_rate": 8.877947654832859e-06, |
| "loss": 1.5912, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.039975860550950856, |
| "grad_norm": 1.3995941877365112, |
| "learning_rate": 8.883130344648873e-06, |
| "loss": 1.4624, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.039999183690128774, |
| "grad_norm": 1.6787446737289429, |
| "learning_rate": 8.888313034464888e-06, |
| "loss": 1.7264, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.04002250682930669, |
| "grad_norm": 1.6797045469284058, |
| "learning_rate": 8.893495724280902e-06, |
| "loss": 1.458, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.04004582996848461, |
| "grad_norm": 1.4562252759933472, |
| "learning_rate": 8.898678414096917e-06, |
| "loss": 1.4469, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.04006915310766253, |
| "grad_norm": 1.8270559310913086, |
| "learning_rate": 8.903861103912933e-06, |
| "loss": 1.5524, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.040092476246840446, |
| "grad_norm": 2.2723021507263184, |
| "learning_rate": 8.909043793728947e-06, |
| "loss": 1.5524, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.040115799386018364, |
| "grad_norm": 1.6696120500564575, |
| "learning_rate": 8.914226483544961e-06, |
| "loss": 1.6466, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.04013912252519628, |
| "grad_norm": 1.8067409992218018, |
| "learning_rate": 8.919409173360976e-06, |
| "loss": 1.4901, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.0401624456643742, |
| "grad_norm": 1.6212742328643799, |
| "learning_rate": 8.92459186317699e-06, |
| "loss": 1.3791, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.04018576880355212, |
| "grad_norm": 1.5557783842086792, |
| "learning_rate": 8.929774552993004e-06, |
| "loss": 1.4122, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.04020909194273003, |
| "grad_norm": 2.65142822265625, |
| "learning_rate": 8.934957242809019e-06, |
| "loss": 1.3679, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.040232415081907946, |
| "grad_norm": 1.9991352558135986, |
| "learning_rate": 8.940139932625033e-06, |
| "loss": 1.4746, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.040255738221085864, |
| "grad_norm": 2.054579257965088, |
| "learning_rate": 8.945322622441047e-06, |
| "loss": 1.5759, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.04027906136026378, |
| "grad_norm": 1.62351393699646, |
| "learning_rate": 8.950505312257062e-06, |
| "loss": 1.4139, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.0403023844994417, |
| "grad_norm": 1.754712462425232, |
| "learning_rate": 8.955688002073076e-06, |
| "loss": 1.2871, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.04032570763861962, |
| "grad_norm": 1.744728922843933, |
| "learning_rate": 8.96087069188909e-06, |
| "loss": 1.5207, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.040349030777797536, |
| "grad_norm": 1.9871348142623901, |
| "learning_rate": 8.966053381705105e-06, |
| "loss": 1.7927, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.040372353916975454, |
| "grad_norm": 1.898793339729309, |
| "learning_rate": 8.97123607152112e-06, |
| "loss": 1.5487, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.04039567705615337, |
| "grad_norm": 1.6234720945358276, |
| "learning_rate": 8.976418761337135e-06, |
| "loss": 1.3666, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.04041900019533129, |
| "grad_norm": 1.7883436679840088, |
| "learning_rate": 8.98160145115315e-06, |
| "loss": 1.59, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.04044232333450921, |
| "grad_norm": 2.030747890472412, |
| "learning_rate": 8.986784140969164e-06, |
| "loss": 1.5484, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.040465646473687125, |
| "grad_norm": 1.5323489904403687, |
| "learning_rate": 8.991966830785178e-06, |
| "loss": 1.4076, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.04048896961286504, |
| "grad_norm": 1.545076847076416, |
| "learning_rate": 8.997149520601193e-06, |
| "loss": 1.9024, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.04051229275204296, |
| "grad_norm": 1.775343656539917, |
| "learning_rate": 9.002332210417207e-06, |
| "loss": 1.6269, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.04053561589122088, |
| "grad_norm": 1.5936089754104614, |
| "learning_rate": 9.007514900233221e-06, |
| "loss": 1.388, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.0405589390303988, |
| "grad_norm": 2.0282087326049805, |
| "learning_rate": 9.012697590049236e-06, |
| "loss": 1.5258, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.040582262169576715, |
| "grad_norm": 1.769651174545288, |
| "learning_rate": 9.017880279865252e-06, |
| "loss": 1.6468, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.04060558530875463, |
| "grad_norm": 1.671475887298584, |
| "learning_rate": 9.023062969681266e-06, |
| "loss": 1.457, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.04062890844793255, |
| "grad_norm": 1.5717363357543945, |
| "learning_rate": 9.02824565949728e-06, |
| "loss": 1.0661, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.04065223158711047, |
| "grad_norm": 2.1011769771575928, |
| "learning_rate": 9.033428349313295e-06, |
| "loss": 1.8212, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.040675554726288386, |
| "grad_norm": 1.8593213558197021, |
| "learning_rate": 9.038611039129309e-06, |
| "loss": 1.2838, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.040698877865466304, |
| "grad_norm": 3.45039963722229, |
| "learning_rate": 9.043793728945323e-06, |
| "loss": 1.2977, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.04072220100464422, |
| "grad_norm": 1.5961792469024658, |
| "learning_rate": 9.048976418761338e-06, |
| "loss": 1.598, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.04074552414382214, |
| "grad_norm": 1.7901935577392578, |
| "learning_rate": 9.054159108577352e-06, |
| "loss": 1.197, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.04076884728300006, |
| "grad_norm": 1.7534990310668945, |
| "learning_rate": 9.059341798393368e-06, |
| "loss": 1.5957, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.040792170422177976, |
| "grad_norm": 2.0215656757354736, |
| "learning_rate": 9.064524488209382e-06, |
| "loss": 1.4019, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.040815493561355894, |
| "grad_norm": 1.7355159521102905, |
| "learning_rate": 9.069707178025397e-06, |
| "loss": 1.6056, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.04083881670053381, |
| "grad_norm": 2.3358545303344727, |
| "learning_rate": 9.07488986784141e-06, |
| "loss": 1.3946, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.04086213983971173, |
| "grad_norm": 2.4582395553588867, |
| "learning_rate": 9.080072557657424e-06, |
| "loss": 1.3848, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.04088546297888964, |
| "grad_norm": 1.8667892217636108, |
| "learning_rate": 9.08525524747344e-06, |
| "loss": 1.5908, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.04090878611806756, |
| "grad_norm": 2.2128000259399414, |
| "learning_rate": 9.090437937289454e-06, |
| "loss": 1.4584, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.040932109257245476, |
| "grad_norm": 1.714179277420044, |
| "learning_rate": 9.095620627105468e-06, |
| "loss": 1.3882, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.040955432396423394, |
| "grad_norm": 1.7891523838043213, |
| "learning_rate": 9.100803316921483e-06, |
| "loss": 1.6921, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.04097875553560131, |
| "grad_norm": 2.0620603561401367, |
| "learning_rate": 9.105986006737497e-06, |
| "loss": 1.4833, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.04100207867477923, |
| "grad_norm": 1.4664239883422852, |
| "learning_rate": 9.111168696553512e-06, |
| "loss": 1.5, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.04102540181395715, |
| "grad_norm": 2.151362180709839, |
| "learning_rate": 9.116351386369526e-06, |
| "loss": 1.4189, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.041048724953135066, |
| "grad_norm": 2.1404523849487305, |
| "learning_rate": 9.12153407618554e-06, |
| "loss": 1.512, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.041072048092312984, |
| "grad_norm": 1.5175687074661255, |
| "learning_rate": 9.126716766001556e-06, |
| "loss": 1.3527, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.0410953712314909, |
| "grad_norm": 1.6199604272842407, |
| "learning_rate": 9.13189945581757e-06, |
| "loss": 1.1717, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.04111869437066882, |
| "grad_norm": 1.655900001525879, |
| "learning_rate": 9.137082145633585e-06, |
| "loss": 1.4903, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.04114201750984674, |
| "grad_norm": 1.6075772047042847, |
| "learning_rate": 9.1422648354496e-06, |
| "loss": 1.3745, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.041165340649024655, |
| "grad_norm": 1.5534958839416504, |
| "learning_rate": 9.147447525265614e-06, |
| "loss": 1.4659, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.04118866378820257, |
| "grad_norm": 2.197490930557251, |
| "learning_rate": 9.152630215081628e-06, |
| "loss": 1.5412, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.04121198692738049, |
| "grad_norm": 2.1121668815612793, |
| "learning_rate": 9.157812904897642e-06, |
| "loss": 1.7137, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.04123531006655841, |
| "grad_norm": 2.2003660202026367, |
| "learning_rate": 9.162995594713657e-06, |
| "loss": 1.6095, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.04125863320573633, |
| "grad_norm": 1.617874264717102, |
| "learning_rate": 9.168178284529671e-06, |
| "loss": 1.4913, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.041281956344914245, |
| "grad_norm": 1.6809815168380737, |
| "learning_rate": 9.173360974345687e-06, |
| "loss": 1.6014, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.04130527948409216, |
| "grad_norm": 1.8234214782714844, |
| "learning_rate": 9.178543664161701e-06, |
| "loss": 1.4921, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.04132860262327008, |
| "grad_norm": 1.605371117591858, |
| "learning_rate": 9.183726353977716e-06, |
| "loss": 1.526, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.041351925762448, |
| "grad_norm": 1.7158360481262207, |
| "learning_rate": 9.18890904379373e-06, |
| "loss": 1.6063, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.041375248901625916, |
| "grad_norm": 1.8888566493988037, |
| "learning_rate": 9.194091733609744e-06, |
| "loss": 1.3013, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.041398572040803834, |
| "grad_norm": 1.8596553802490234, |
| "learning_rate": 9.199274423425759e-06, |
| "loss": 1.3611, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.04142189517998175, |
| "grad_norm": 1.770941972732544, |
| "learning_rate": 9.204457113241773e-06, |
| "loss": 1.553, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.04144521831915967, |
| "grad_norm": 1.4563987255096436, |
| "learning_rate": 9.209639803057788e-06, |
| "loss": 1.3261, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.04146854145833759, |
| "grad_norm": 1.5590494871139526, |
| "learning_rate": 9.214822492873802e-06, |
| "loss": 1.6303, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.041491864597515506, |
| "grad_norm": 1.6040290594100952, |
| "learning_rate": 9.220005182689816e-06, |
| "loss": 1.6656, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.041515187736693424, |
| "grad_norm": 1.6253089904785156, |
| "learning_rate": 9.22518787250583e-06, |
| "loss": 1.3086, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.041538510875871335, |
| "grad_norm": 2.282277822494507, |
| "learning_rate": 9.230370562321845e-06, |
| "loss": 1.3154, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.04156183401504925, |
| "grad_norm": 1.6955877542495728, |
| "learning_rate": 9.23555325213786e-06, |
| "loss": 1.4742, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.04158515715422717, |
| "grad_norm": 2.6918323040008545, |
| "learning_rate": 9.240735941953875e-06, |
| "loss": 1.4942, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.04160848029340509, |
| "grad_norm": 2.111135244369507, |
| "learning_rate": 9.24591863176989e-06, |
| "loss": 1.4501, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.041631803432583006, |
| "grad_norm": 1.6524665355682373, |
| "learning_rate": 9.251101321585904e-06, |
| "loss": 1.2801, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.041655126571760924, |
| "grad_norm": 1.812553882598877, |
| "learning_rate": 9.256284011401918e-06, |
| "loss": 1.2928, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.04167844971093884, |
| "grad_norm": 1.7474865913391113, |
| "learning_rate": 9.261466701217933e-06, |
| "loss": 1.5489, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.04170177285011676, |
| "grad_norm": 1.91874098777771, |
| "learning_rate": 9.266649391033947e-06, |
| "loss": 1.5997, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.04172509598929468, |
| "grad_norm": 1.4715979099273682, |
| "learning_rate": 9.271832080849961e-06, |
| "loss": 0.921, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.041748419128472596, |
| "grad_norm": 1.599254846572876, |
| "learning_rate": 9.277014770665976e-06, |
| "loss": 1.5168, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.04177174226765051, |
| "grad_norm": 1.8970310688018799, |
| "learning_rate": 9.28219746048199e-06, |
| "loss": 1.4821, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.04179506540682843, |
| "grad_norm": 1.5975875854492188, |
| "learning_rate": 9.287380150298006e-06, |
| "loss": 1.4889, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.04181838854600635, |
| "grad_norm": 1.7852643728256226, |
| "learning_rate": 9.29256284011402e-06, |
| "loss": 1.4124, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.04184171168518427, |
| "grad_norm": 1.8535397052764893, |
| "learning_rate": 9.297745529930035e-06, |
| "loss": 1.5964, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.041865034824362185, |
| "grad_norm": 1.532125473022461, |
| "learning_rate": 9.302928219746049e-06, |
| "loss": 1.2431, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.0418883579635401, |
| "grad_norm": 1.542386531829834, |
| "learning_rate": 9.308110909562063e-06, |
| "loss": 1.6327, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.04191168110271802, |
| "grad_norm": 1.8671448230743408, |
| "learning_rate": 9.313293599378078e-06, |
| "loss": 1.7695, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.04193500424189594, |
| "grad_norm": 1.6148124933242798, |
| "learning_rate": 9.318476289194092e-06, |
| "loss": 1.7227, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.04195832738107386, |
| "grad_norm": 1.4859371185302734, |
| "learning_rate": 9.323658979010107e-06, |
| "loss": 1.2807, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.041981650520251775, |
| "grad_norm": 3.0297629833221436, |
| "learning_rate": 9.328841668826123e-06, |
| "loss": 1.3824, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.04200497365942969, |
| "grad_norm": 1.6791976690292358, |
| "learning_rate": 9.334024358642137e-06, |
| "loss": 1.7325, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.04202829679860761, |
| "grad_norm": 1.4695453643798828, |
| "learning_rate": 9.339207048458151e-06, |
| "loss": 1.2062, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.04205161993778553, |
| "grad_norm": 1.5592173337936401, |
| "learning_rate": 9.344389738274166e-06, |
| "loss": 1.1919, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.042074943076963446, |
| "grad_norm": 1.4761253595352173, |
| "learning_rate": 9.34957242809018e-06, |
| "loss": 1.2845, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.042098266216141364, |
| "grad_norm": 1.3584182262420654, |
| "learning_rate": 9.354755117906194e-06, |
| "loss": 1.6216, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.04212158935531928, |
| "grad_norm": 2.0344326496124268, |
| "learning_rate": 9.359937807722209e-06, |
| "loss": 1.2301, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.0421449124944972, |
| "grad_norm": 1.549643874168396, |
| "learning_rate": 9.365120497538223e-06, |
| "loss": 1.446, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.04216823563367512, |
| "grad_norm": 1.6695293188095093, |
| "learning_rate": 9.370303187354237e-06, |
| "loss": 1.7588, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.042191558772853036, |
| "grad_norm": 1.817617416381836, |
| "learning_rate": 9.375485877170252e-06, |
| "loss": 1.5394, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.04221488191203095, |
| "grad_norm": 1.917152762413025, |
| "learning_rate": 9.380668566986266e-06, |
| "loss": 1.6437, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.042238205051208864, |
| "grad_norm": 0.9892622828483582, |
| "learning_rate": 9.38585125680228e-06, |
| "loss": 1.1554, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.04226152819038678, |
| "grad_norm": 1.577576994895935, |
| "learning_rate": 9.391033946618295e-06, |
| "loss": 1.4737, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.0422848513295647, |
| "grad_norm": 1.739229679107666, |
| "learning_rate": 9.39621663643431e-06, |
| "loss": 1.4077, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.04230817446874262, |
| "grad_norm": 1.6817034482955933, |
| "learning_rate": 9.401399326250325e-06, |
| "loss": 0.9329, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.042331497607920536, |
| "grad_norm": 1.6616978645324707, |
| "learning_rate": 9.40658201606634e-06, |
| "loss": 1.6185, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.042354820747098454, |
| "grad_norm": 1.379654049873352, |
| "learning_rate": 9.411764705882354e-06, |
| "loss": 1.6863, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.04237814388627637, |
| "grad_norm": 2.3998191356658936, |
| "learning_rate": 9.416947395698368e-06, |
| "loss": 1.4281, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.04240146702545429, |
| "grad_norm": 2.078322410583496, |
| "learning_rate": 9.422130085514383e-06, |
| "loss": 1.2324, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.04242479016463221, |
| "grad_norm": 1.8474605083465576, |
| "learning_rate": 9.427312775330397e-06, |
| "loss": 1.3242, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.042448113303810125, |
| "grad_norm": 1.4538230895996094, |
| "learning_rate": 9.432495465146411e-06, |
| "loss": 1.3117, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.04247143644298804, |
| "grad_norm": 2.528913974761963, |
| "learning_rate": 9.437678154962426e-06, |
| "loss": 1.3846, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.04249475958216596, |
| "grad_norm": 1.5370780229568481, |
| "learning_rate": 9.442860844778442e-06, |
| "loss": 1.4712, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.04251808272134388, |
| "grad_norm": 1.7554328441619873, |
| "learning_rate": 9.448043534594456e-06, |
| "loss": 1.5354, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.0425414058605218, |
| "grad_norm": 1.490560531616211, |
| "learning_rate": 9.45322622441047e-06, |
| "loss": 1.1725, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.042564728999699715, |
| "grad_norm": 1.55622136592865, |
| "learning_rate": 9.458408914226485e-06, |
| "loss": 1.5401, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.04258805213887763, |
| "grad_norm": 1.6288939714431763, |
| "learning_rate": 9.463591604042499e-06, |
| "loss": 1.404, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.04261137527805555, |
| "grad_norm": 1.9815454483032227, |
| "learning_rate": 9.468774293858513e-06, |
| "loss": 1.538, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.04263469841723347, |
| "grad_norm": 1.8967722654342651, |
| "learning_rate": 9.473956983674528e-06, |
| "loss": 1.4561, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.04265802155641139, |
| "grad_norm": 2.010972023010254, |
| "learning_rate": 9.479139673490542e-06, |
| "loss": 1.692, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.042681344695589304, |
| "grad_norm": 1.82353937625885, |
| "learning_rate": 9.484322363306558e-06, |
| "loss": 1.6394, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.04270466783476722, |
| "grad_norm": 1.6288769245147705, |
| "learning_rate": 9.489505053122572e-06, |
| "loss": 1.7251, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.04272799097394514, |
| "grad_norm": 2.7632317543029785, |
| "learning_rate": 9.494687742938587e-06, |
| "loss": 1.5771, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.04275131411312306, |
| "grad_norm": 1.7157068252563477, |
| "learning_rate": 9.499870432754601e-06, |
| "loss": 1.9245, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.042774637252300976, |
| "grad_norm": 1.6728345155715942, |
| "learning_rate": 9.505053122570614e-06, |
| "loss": 1.5874, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.042797960391478894, |
| "grad_norm": 1.6265268325805664, |
| "learning_rate": 9.51023581238663e-06, |
| "loss": 1.6633, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.04282128353065681, |
| "grad_norm": 1.8013489246368408, |
| "learning_rate": 9.515418502202644e-06, |
| "loss": 1.3856, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.04284460666983473, |
| "grad_norm": 1.85427987575531, |
| "learning_rate": 9.520601192018658e-06, |
| "loss": 1.2233, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.04286792980901264, |
| "grad_norm": 1.6943988800048828, |
| "learning_rate": 9.525783881834673e-06, |
| "loss": 1.3198, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.04289125294819056, |
| "grad_norm": 1.7103756666183472, |
| "learning_rate": 9.530966571650687e-06, |
| "loss": 1.4118, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.042914576087368476, |
| "grad_norm": 2.0107672214508057, |
| "learning_rate": 9.536149261466702e-06, |
| "loss": 1.3456, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.042937899226546394, |
| "grad_norm": 1.505422830581665, |
| "learning_rate": 9.541331951282716e-06, |
| "loss": 1.2676, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.04296122236572431, |
| "grad_norm": 2.090595245361328, |
| "learning_rate": 9.54651464109873e-06, |
| "loss": 1.2113, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.04298454550490223, |
| "grad_norm": 1.7776191234588623, |
| "learning_rate": 9.551697330914745e-06, |
| "loss": 1.5694, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.04300786864408015, |
| "grad_norm": 3.0254878997802734, |
| "learning_rate": 9.55688002073076e-06, |
| "loss": 1.244, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.043031191783258066, |
| "grad_norm": 1.8657838106155396, |
| "learning_rate": 9.562062710546775e-06, |
| "loss": 1.9444, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.043054514922435984, |
| "grad_norm": 2.1006710529327393, |
| "learning_rate": 9.56724540036279e-06, |
| "loss": 1.3202, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.0430778380616139, |
| "grad_norm": 1.2389309406280518, |
| "learning_rate": 9.572428090178804e-06, |
| "loss": 1.1992, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.04310116120079182, |
| "grad_norm": 2.162818193435669, |
| "learning_rate": 9.577610779994818e-06, |
| "loss": 1.5446, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.04312448433996974, |
| "grad_norm": 2.476367950439453, |
| "learning_rate": 9.582793469810832e-06, |
| "loss": 1.6178, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.043147807479147655, |
| "grad_norm": 2.1805801391601562, |
| "learning_rate": 9.587976159626847e-06, |
| "loss": 1.5745, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.04317113061832557, |
| "grad_norm": 1.7875632047653198, |
| "learning_rate": 9.593158849442861e-06, |
| "loss": 1.6798, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.04319445375750349, |
| "grad_norm": 2.506103515625, |
| "learning_rate": 9.598341539258877e-06, |
| "loss": 1.2824, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.04321777689668141, |
| "grad_norm": 2.027400016784668, |
| "learning_rate": 9.603524229074891e-06, |
| "loss": 1.7745, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.04324110003585933, |
| "grad_norm": 1.5254895687103271, |
| "learning_rate": 9.608706918890906e-06, |
| "loss": 1.6716, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.043264423175037245, |
| "grad_norm": 1.9832854270935059, |
| "learning_rate": 9.61388960870692e-06, |
| "loss": 1.2432, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.04328774631421516, |
| "grad_norm": 1.3785820007324219, |
| "learning_rate": 9.619072298522934e-06, |
| "loss": 1.3452, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.04331106945339308, |
| "grad_norm": 2.0536274909973145, |
| "learning_rate": 9.624254988338949e-06, |
| "loss": 1.9594, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.043334392592571, |
| "grad_norm": 1.8014826774597168, |
| "learning_rate": 9.629437678154963e-06, |
| "loss": 1.4811, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.043357715731748916, |
| "grad_norm": 1.5722678899765015, |
| "learning_rate": 9.634620367970978e-06, |
| "loss": 1.2694, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.043381038870926834, |
| "grad_norm": 1.849761724472046, |
| "learning_rate": 9.639803057786994e-06, |
| "loss": 1.4856, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.04340436201010475, |
| "grad_norm": 1.412558913230896, |
| "learning_rate": 9.644985747603006e-06, |
| "loss": 1.4672, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.04342768514928267, |
| "grad_norm": 2.028230667114258, |
| "learning_rate": 9.65016843741902e-06, |
| "loss": 1.5573, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.04345100828846059, |
| "grad_norm": 2.5457494258880615, |
| "learning_rate": 9.655351127235035e-06, |
| "loss": 1.3734, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.043474331427638506, |
| "grad_norm": 1.6199779510498047, |
| "learning_rate": 9.66053381705105e-06, |
| "loss": 1.6676, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.043497654566816424, |
| "grad_norm": 1.4922274351119995, |
| "learning_rate": 9.665716506867064e-06, |
| "loss": 1.5909, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.04352097770599434, |
| "grad_norm": 1.545914649963379, |
| "learning_rate": 9.67089919668308e-06, |
| "loss": 1.0683, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.04354430084517225, |
| "grad_norm": 1.4928728342056274, |
| "learning_rate": 9.676081886499094e-06, |
| "loss": 1.1975, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.04356762398435017, |
| "grad_norm": 2.042757272720337, |
| "learning_rate": 9.681264576315108e-06, |
| "loss": 1.9166, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.04359094712352809, |
| "grad_norm": 1.9415842294692993, |
| "learning_rate": 9.686447266131123e-06, |
| "loss": 1.5207, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.043614270262706006, |
| "grad_norm": 1.6906239986419678, |
| "learning_rate": 9.691629955947137e-06, |
| "loss": 1.4171, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.043637593401883924, |
| "grad_norm": 1.5644055604934692, |
| "learning_rate": 9.696812645763151e-06, |
| "loss": 1.4997, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.04366091654106184, |
| "grad_norm": 1.7778024673461914, |
| "learning_rate": 9.701995335579166e-06, |
| "loss": 1.3872, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.04368423968023976, |
| "grad_norm": 1.9999544620513916, |
| "learning_rate": 9.70717802539518e-06, |
| "loss": 1.6039, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.04370756281941768, |
| "grad_norm": 2.1065220832824707, |
| "learning_rate": 9.712360715211196e-06, |
| "loss": 1.4525, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.043730885958595596, |
| "grad_norm": 1.785739541053772, |
| "learning_rate": 9.71754340502721e-06, |
| "loss": 1.7723, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.043754209097773514, |
| "grad_norm": 1.7912609577178955, |
| "learning_rate": 9.722726094843225e-06, |
| "loss": 1.8857, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.04377753223695143, |
| "grad_norm": 2.2229981422424316, |
| "learning_rate": 9.727908784659239e-06, |
| "loss": 1.7359, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.04380085537612935, |
| "grad_norm": 1.7545627355575562, |
| "learning_rate": 9.733091474475253e-06, |
| "loss": 1.3878, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.04382417851530727, |
| "grad_norm": 1.6687484979629517, |
| "learning_rate": 9.738274164291268e-06, |
| "loss": 1.3148, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.043847501654485185, |
| "grad_norm": 1.661619782447815, |
| "learning_rate": 9.743456854107282e-06, |
| "loss": 1.5319, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.0438708247936631, |
| "grad_norm": 1.6879695653915405, |
| "learning_rate": 9.748639543923297e-06, |
| "loss": 1.2871, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.04389414793284102, |
| "grad_norm": 1.614043116569519, |
| "learning_rate": 9.753822233739313e-06, |
| "loss": 1.0429, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.04391747107201894, |
| "grad_norm": 1.310645341873169, |
| "learning_rate": 9.759004923555327e-06, |
| "loss": 1.5535, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.04394079421119686, |
| "grad_norm": 1.677807092666626, |
| "learning_rate": 9.764187613371341e-06, |
| "loss": 1.5612, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.043964117350374775, |
| "grad_norm": 2.004786252975464, |
| "learning_rate": 9.769370303187356e-06, |
| "loss": 1.1547, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.04398744048955269, |
| "grad_norm": 2.4537112712860107, |
| "learning_rate": 9.77455299300337e-06, |
| "loss": 1.6863, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.04401076362873061, |
| "grad_norm": 1.8132030963897705, |
| "learning_rate": 9.779735682819384e-06, |
| "loss": 1.2049, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.04403408676790853, |
| "grad_norm": 1.954026699066162, |
| "learning_rate": 9.784918372635399e-06, |
| "loss": 1.3946, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.044057409907086446, |
| "grad_norm": 1.742790699005127, |
| "learning_rate": 9.790101062451413e-06, |
| "loss": 1.3851, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.044080733046264364, |
| "grad_norm": 2.010481357574463, |
| "learning_rate": 9.795283752267427e-06, |
| "loss": 1.4181, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.04410405618544228, |
| "grad_norm": 1.6661536693572998, |
| "learning_rate": 9.800466442083442e-06, |
| "loss": 1.1611, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.0441273793246202, |
| "grad_norm": 1.6758571863174438, |
| "learning_rate": 9.805649131899456e-06, |
| "loss": 1.0906, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.04415070246379812, |
| "grad_norm": 1.7925001382827759, |
| "learning_rate": 9.81083182171547e-06, |
| "loss": 1.4299, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.044174025602976036, |
| "grad_norm": 1.9415634870529175, |
| "learning_rate": 9.816014511531485e-06, |
| "loss": 1.5619, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.04419734874215395, |
| "grad_norm": 1.3546884059906006, |
| "learning_rate": 9.821197201347499e-06, |
| "loss": 1.4994, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.044220671881331865, |
| "grad_norm": 2.0756897926330566, |
| "learning_rate": 9.826379891163515e-06, |
| "loss": 1.7483, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.04424399502050978, |
| "grad_norm": 1.7983125448226929, |
| "learning_rate": 9.83156258097953e-06, |
| "loss": 1.586, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.0442673181596877, |
| "grad_norm": 1.5559202432632446, |
| "learning_rate": 9.836745270795544e-06, |
| "loss": 1.5093, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.04429064129886562, |
| "grad_norm": 1.772439956665039, |
| "learning_rate": 9.841927960611558e-06, |
| "loss": 1.3449, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.044313964438043536, |
| "grad_norm": 1.9158481359481812, |
| "learning_rate": 9.847110650427573e-06, |
| "loss": 1.3239, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.044337287577221454, |
| "grad_norm": 1.801500916481018, |
| "learning_rate": 9.852293340243587e-06, |
| "loss": 1.1534, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.04436061071639937, |
| "grad_norm": 1.5766456127166748, |
| "learning_rate": 9.857476030059601e-06, |
| "loss": 1.7678, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.04438393385557729, |
| "grad_norm": 1.852655053138733, |
| "learning_rate": 9.862658719875616e-06, |
| "loss": 1.7286, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.04440725699475521, |
| "grad_norm": 1.9849982261657715, |
| "learning_rate": 9.867841409691632e-06, |
| "loss": 1.3984, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.044430580133933126, |
| "grad_norm": 1.7213250398635864, |
| "learning_rate": 9.873024099507646e-06, |
| "loss": 1.6215, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.044453903273111044, |
| "grad_norm": 1.9416676759719849, |
| "learning_rate": 9.87820678932366e-06, |
| "loss": 1.6314, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.04447722641228896, |
| "grad_norm": 1.8408985137939453, |
| "learning_rate": 9.883389479139675e-06, |
| "loss": 1.6611, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.04450054955146688, |
| "grad_norm": 1.528350591659546, |
| "learning_rate": 9.888572168955689e-06, |
| "loss": 1.7559, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.0445238726906448, |
| "grad_norm": 1.6557738780975342, |
| "learning_rate": 9.893754858771703e-06, |
| "loss": 1.5072, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.044547195829822715, |
| "grad_norm": 2.0431089401245117, |
| "learning_rate": 9.898937548587718e-06, |
| "loss": 1.2895, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.04457051896900063, |
| "grad_norm": 1.8927110433578491, |
| "learning_rate": 9.904120238403732e-06, |
| "loss": 1.4221, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.04459384210817855, |
| "grad_norm": 1.547044038772583, |
| "learning_rate": 9.909302928219748e-06, |
| "loss": 1.2597, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.04461716524735647, |
| "grad_norm": 1.81504487991333, |
| "learning_rate": 9.914485618035762e-06, |
| "loss": 1.4845, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.04464048838653439, |
| "grad_norm": 3.442282199859619, |
| "learning_rate": 9.919668307851777e-06, |
| "loss": 1.0979, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.044663811525712305, |
| "grad_norm": 1.8255623579025269, |
| "learning_rate": 9.924850997667791e-06, |
| "loss": 1.6663, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.04468713466489022, |
| "grad_norm": 1.7657500505447388, |
| "learning_rate": 9.930033687483804e-06, |
| "loss": 1.765, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.04471045780406814, |
| "grad_norm": 1.6761666536331177, |
| "learning_rate": 9.935216377299818e-06, |
| "loss": 1.3525, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.04473378094324606, |
| "grad_norm": 2.3319602012634277, |
| "learning_rate": 9.940399067115834e-06, |
| "loss": 1.1265, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.044757104082423976, |
| "grad_norm": 1.6062688827514648, |
| "learning_rate": 9.945581756931848e-06, |
| "loss": 1.6085, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.044780427221601894, |
| "grad_norm": 1.4931232929229736, |
| "learning_rate": 9.950764446747863e-06, |
| "loss": 1.6418, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.04480375036077981, |
| "grad_norm": 2.0092151165008545, |
| "learning_rate": 9.955947136563877e-06, |
| "loss": 1.2352, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.04482707349995773, |
| "grad_norm": 2.2695815563201904, |
| "learning_rate": 9.961129826379892e-06, |
| "loss": 1.3626, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.04485039663913565, |
| "grad_norm": 1.6969548463821411, |
| "learning_rate": 9.966312516195906e-06, |
| "loss": 1.6971, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.04487371977831356, |
| "grad_norm": 1.8436291217803955, |
| "learning_rate": 9.97149520601192e-06, |
| "loss": 1.7701, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.04489704291749148, |
| "grad_norm": 1.7749122381210327, |
| "learning_rate": 9.976677895827935e-06, |
| "loss": 1.3771, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.044920366056669395, |
| "grad_norm": 1.9239168167114258, |
| "learning_rate": 9.98186058564395e-06, |
| "loss": 1.7554, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.04494368919584731, |
| "grad_norm": 1.5236059427261353, |
| "learning_rate": 9.987043275459965e-06, |
| "loss": 1.337, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.04496701233502523, |
| "grad_norm": 2.0506536960601807, |
| "learning_rate": 9.99222596527598e-06, |
| "loss": 1.7227, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.04499033547420315, |
| "grad_norm": 1.4491156339645386, |
| "learning_rate": 9.997408655091994e-06, |
| "loss": 1.4032, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.045013658613381066, |
| "grad_norm": 2.175860643386841, |
| "learning_rate": 1.0002591344908008e-05, |
| "loss": 1.5994, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.045036981752558984, |
| "grad_norm": 1.9326441287994385, |
| "learning_rate": 1.0007774034724022e-05, |
| "loss": 1.1194, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.0450603048917369, |
| "grad_norm": 1.8562779426574707, |
| "learning_rate": 1.0012956724540037e-05, |
| "loss": 1.7551, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.04508362803091482, |
| "grad_norm": 1.7570141553878784, |
| "learning_rate": 1.0018139414356051e-05, |
| "loss": 0.97, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.04510695117009274, |
| "grad_norm": 1.2578299045562744, |
| "learning_rate": 1.0023322104172067e-05, |
| "loss": 1.3259, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.045130274309270656, |
| "grad_norm": 2.211773633956909, |
| "learning_rate": 1.0028504793988081e-05, |
| "loss": 1.6072, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.045153597448448574, |
| "grad_norm": 1.7696832418441772, |
| "learning_rate": 1.0033687483804096e-05, |
| "loss": 1.4227, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.04517692058762649, |
| "grad_norm": 1.940531611442566, |
| "learning_rate": 1.003887017362011e-05, |
| "loss": 1.8458, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.04520024372680441, |
| "grad_norm": 2.282905101776123, |
| "learning_rate": 1.0044052863436124e-05, |
| "loss": 1.2556, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.04522356686598233, |
| "grad_norm": 1.643122673034668, |
| "learning_rate": 1.0049235553252139e-05, |
| "loss": 1.5571, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.045246890005160245, |
| "grad_norm": 1.6886086463928223, |
| "learning_rate": 1.0054418243068153e-05, |
| "loss": 1.481, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.04527021314433816, |
| "grad_norm": 2.349867105484009, |
| "learning_rate": 1.0059600932884168e-05, |
| "loss": 1.8651, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.04529353628351608, |
| "grad_norm": 2.0965826511383057, |
| "learning_rate": 1.0064783622700184e-05, |
| "loss": 1.5702, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.045316859422694, |
| "grad_norm": 1.4684425592422485, |
| "learning_rate": 1.0069966312516198e-05, |
| "loss": 1.4283, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.04534018256187192, |
| "grad_norm": 3.0096945762634277, |
| "learning_rate": 1.0075149002332212e-05, |
| "loss": 1.4832, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.045363505701049835, |
| "grad_norm": 2.2389118671417236, |
| "learning_rate": 1.0080331692148227e-05, |
| "loss": 1.6346, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.04538682884022775, |
| "grad_norm": 1.7624162435531616, |
| "learning_rate": 1.0085514381964241e-05, |
| "loss": 1.7017, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.04541015197940567, |
| "grad_norm": 1.8136117458343506, |
| "learning_rate": 1.0090697071780255e-05, |
| "loss": 1.5987, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.04543347511858359, |
| "grad_norm": 1.678236484527588, |
| "learning_rate": 1.009587976159627e-05, |
| "loss": 1.3684, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.045456798257761506, |
| "grad_norm": 1.7862106561660767, |
| "learning_rate": 1.0101062451412284e-05, |
| "loss": 1.7998, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.045480121396939424, |
| "grad_norm": 2.0441555976867676, |
| "learning_rate": 1.0106245141228298e-05, |
| "loss": 1.2902, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.04550344453611734, |
| "grad_norm": 1.5820708274841309, |
| "learning_rate": 1.0111427831044314e-05, |
| "loss": 1.2032, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.04552676767529525, |
| "grad_norm": 1.4560632705688477, |
| "learning_rate": 1.0116610520860329e-05, |
| "loss": 1.5599, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.04555009081447317, |
| "grad_norm": 2.3671185970306396, |
| "learning_rate": 1.0121793210676343e-05, |
| "loss": 1.6144, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.04557341395365109, |
| "grad_norm": 1.7525554895401, |
| "learning_rate": 1.0126975900492357e-05, |
| "loss": 1.913, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.04559673709282901, |
| "grad_norm": 1.2725483179092407, |
| "learning_rate": 1.0132158590308372e-05, |
| "loss": 1.2048, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.045620060232006925, |
| "grad_norm": 1.8041915893554688, |
| "learning_rate": 1.0137341280124386e-05, |
| "loss": 1.1796, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.04564338337118484, |
| "grad_norm": 2.3629374504089355, |
| "learning_rate": 1.01425239699404e-05, |
| "loss": 1.8434, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.04566670651036276, |
| "grad_norm": 1.3975788354873657, |
| "learning_rate": 1.0147706659756413e-05, |
| "loss": 1.5474, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.04569002964954068, |
| "grad_norm": 1.4148329496383667, |
| "learning_rate": 1.0152889349572427e-05, |
| "loss": 1.4695, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.045713352788718596, |
| "grad_norm": 3.3544209003448486, |
| "learning_rate": 1.0158072039388442e-05, |
| "loss": 1.6851, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.045736675927896514, |
| "grad_norm": 1.795784592628479, |
| "learning_rate": 1.0163254729204458e-05, |
| "loss": 1.2823, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.04575999906707443, |
| "grad_norm": 2.3135123252868652, |
| "learning_rate": 1.0168437419020472e-05, |
| "loss": 1.7222, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.04578332220625235, |
| "grad_norm": 1.62346351146698, |
| "learning_rate": 1.0173620108836487e-05, |
| "loss": 1.3822, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.04580664534543027, |
| "grad_norm": 1.9713786840438843, |
| "learning_rate": 1.0178802798652501e-05, |
| "loss": 1.1212, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.045829968484608186, |
| "grad_norm": 1.5502241849899292, |
| "learning_rate": 1.0183985488468515e-05, |
| "loss": 1.0937, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.045853291623786104, |
| "grad_norm": 1.893622875213623, |
| "learning_rate": 1.018916817828453e-05, |
| "loss": 1.7849, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.04587661476296402, |
| "grad_norm": 1.7515870332717896, |
| "learning_rate": 1.0194350868100544e-05, |
| "loss": 1.4724, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.04589993790214194, |
| "grad_norm": 1.7589161396026611, |
| "learning_rate": 1.0199533557916558e-05, |
| "loss": 1.4281, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.04592326104131986, |
| "grad_norm": 2.377809762954712, |
| "learning_rate": 1.0204716247732573e-05, |
| "loss": 1.0402, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.045946584180497775, |
| "grad_norm": 1.6169410943984985, |
| "learning_rate": 1.0209898937548589e-05, |
| "loss": 1.2902, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.04596990731967569, |
| "grad_norm": 1.7550357580184937, |
| "learning_rate": 1.0215081627364603e-05, |
| "loss": 1.274, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.04599323045885361, |
| "grad_norm": 1.846411943435669, |
| "learning_rate": 1.0220264317180617e-05, |
| "loss": 1.2554, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.04601655359803153, |
| "grad_norm": 1.880225419998169, |
| "learning_rate": 1.0225447006996632e-05, |
| "loss": 1.3451, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.04603987673720945, |
| "grad_norm": 1.6644784212112427, |
| "learning_rate": 1.0230629696812646e-05, |
| "loss": 1.5651, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.046063199876387365, |
| "grad_norm": 1.2287671566009521, |
| "learning_rate": 1.023581238662866e-05, |
| "loss": 1.2272, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.04608652301556528, |
| "grad_norm": 5.595534801483154, |
| "learning_rate": 1.0240995076444675e-05, |
| "loss": 1.2381, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.0461098461547432, |
| "grad_norm": 1.6219606399536133, |
| "learning_rate": 1.0246177766260689e-05, |
| "loss": 1.1357, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.04613316929392112, |
| "grad_norm": 1.7713710069656372, |
| "learning_rate": 1.0251360456076705e-05, |
| "loss": 1.3458, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.046156492433099036, |
| "grad_norm": 1.6285533905029297, |
| "learning_rate": 1.025654314589272e-05, |
| "loss": 1.6516, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.046179815572276954, |
| "grad_norm": 1.479745864868164, |
| "learning_rate": 1.0261725835708734e-05, |
| "loss": 1.2629, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.046203138711454865, |
| "grad_norm": 1.6205228567123413, |
| "learning_rate": 1.0266908525524748e-05, |
| "loss": 1.6772, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.04622646185063278, |
| "grad_norm": 1.845969319343567, |
| "learning_rate": 1.0272091215340763e-05, |
| "loss": 1.7172, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.0462497849898107, |
| "grad_norm": 1.71135413646698, |
| "learning_rate": 1.0277273905156777e-05, |
| "loss": 1.3776, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.04627310812898862, |
| "grad_norm": 1.5999668836593628, |
| "learning_rate": 1.0282456594972791e-05, |
| "loss": 1.3148, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.04629643126816654, |
| "grad_norm": 2.372850179672241, |
| "learning_rate": 1.0287639284788806e-05, |
| "loss": 1.5203, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.046319754407344454, |
| "grad_norm": 1.9471055269241333, |
| "learning_rate": 1.0292821974604822e-05, |
| "loss": 1.1852, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.04634307754652237, |
| "grad_norm": 2.035149574279785, |
| "learning_rate": 1.0298004664420836e-05, |
| "loss": 1.5986, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.04636640068570029, |
| "grad_norm": 1.9274436235427856, |
| "learning_rate": 1.030318735423685e-05, |
| "loss": 1.3578, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.04638972382487821, |
| "grad_norm": 1.8304780721664429, |
| "learning_rate": 1.0308370044052865e-05, |
| "loss": 1.2624, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.046413046964056126, |
| "grad_norm": 2.2276337146759033, |
| "learning_rate": 1.0313552733868879e-05, |
| "loss": 1.5508, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.046436370103234044, |
| "grad_norm": 1.7837759256362915, |
| "learning_rate": 1.0318735423684893e-05, |
| "loss": 1.4839, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.04645969324241196, |
| "grad_norm": 1.766287088394165, |
| "learning_rate": 1.0323918113500908e-05, |
| "loss": 1.7001, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.04648301638158988, |
| "grad_norm": 1.6771559715270996, |
| "learning_rate": 1.0329100803316922e-05, |
| "loss": 1.6349, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.0465063395207678, |
| "grad_norm": 1.7568877935409546, |
| "learning_rate": 1.0334283493132938e-05, |
| "loss": 1.4524, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.046529662659945716, |
| "grad_norm": 2.070405960083008, |
| "learning_rate": 1.0339466182948952e-05, |
| "loss": 1.3437, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.04655298579912363, |
| "grad_norm": 2.852936267852783, |
| "learning_rate": 1.0344648872764967e-05, |
| "loss": 1.2623, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.04657630893830155, |
| "grad_norm": 1.3660649061203003, |
| "learning_rate": 1.0349831562580981e-05, |
| "loss": 1.3146, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.04659963207747947, |
| "grad_norm": 1.672303318977356, |
| "learning_rate": 1.0355014252396995e-05, |
| "loss": 1.3361, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.04662295521665739, |
| "grad_norm": 1.6566362380981445, |
| "learning_rate": 1.036019694221301e-05, |
| "loss": 1.4374, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.046646278355835305, |
| "grad_norm": 1.6957907676696777, |
| "learning_rate": 1.0365379632029024e-05, |
| "loss": 1.4639, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 128625, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.082516707749724e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|