| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.023323139177917653, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 2.332313917791765e-05, |
| "grad_norm": 1.6235620975494385, |
| "learning_rate": 5.182689816014512e-09, |
| "loss": 1.9275, |
| "step": 1 |
| }, |
| { |
| "epoch": 4.66462783558353e-05, |
| "grad_norm": 1.5710082054138184, |
| "learning_rate": 1.0365379632029025e-08, |
| "loss": 1.5593, |
| "step": 2 |
| }, |
| { |
| "epoch": 6.996941753375295e-05, |
| "grad_norm": 2.3231985569000244, |
| "learning_rate": 1.5548069448043534e-08, |
| "loss": 2.0021, |
| "step": 3 |
| }, |
| { |
| "epoch": 9.32925567116706e-05, |
| "grad_norm": 1.8349288702011108, |
| "learning_rate": 2.073075926405805e-08, |
| "loss": 2.1141, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00011661569588958826, |
| "grad_norm": 2.039928436279297, |
| "learning_rate": 2.5913449080072562e-08, |
| "loss": 1.9361, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0001399388350675059, |
| "grad_norm": 1.8988783359527588, |
| "learning_rate": 3.109613889608707e-08, |
| "loss": 2.2441, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00016326197424542356, |
| "grad_norm": 1.4865813255310059, |
| "learning_rate": 3.6278828712101586e-08, |
| "loss": 1.8118, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0001865851134233412, |
| "grad_norm": 1.4033368825912476, |
| "learning_rate": 4.14615185281161e-08, |
| "loss": 1.8838, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.00020990825260125886, |
| "grad_norm": 1.876894235610962, |
| "learning_rate": 4.6644208344130604e-08, |
| "loss": 1.9916, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00023323139177917651, |
| "grad_norm": 2.4104366302490234, |
| "learning_rate": 5.1826898160145123e-08, |
| "loss": 1.8618, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0002565545309570942, |
| "grad_norm": 1.8457229137420654, |
| "learning_rate": 5.700958797615963e-08, |
| "loss": 1.7303, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0002798776701350118, |
| "grad_norm": 1.940317988395691, |
| "learning_rate": 6.219227779217413e-08, |
| "loss": 2.2692, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0003032008093129295, |
| "grad_norm": 2.455432891845703, |
| "learning_rate": 6.737496760818865e-08, |
| "loss": 2.3401, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0003265239484908471, |
| "grad_norm": 1.5163850784301758, |
| "learning_rate": 7.255765742420317e-08, |
| "loss": 2.1687, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0003498470876687648, |
| "grad_norm": 1.3012642860412598, |
| "learning_rate": 7.774034724021768e-08, |
| "loss": 1.8693, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0003731702268466824, |
| "grad_norm": 2.0896522998809814, |
| "learning_rate": 8.29230370562322e-08, |
| "loss": 1.7031, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0003964933660246001, |
| "grad_norm": 1.7818728685379028, |
| "learning_rate": 8.810572687224672e-08, |
| "loss": 2.0829, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0004198165052025177, |
| "grad_norm": 2.569828510284424, |
| "learning_rate": 9.328841668826121e-08, |
| "loss": 1.8998, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0004431396443804354, |
| "grad_norm": 1.4619100093841553, |
| "learning_rate": 9.847110650427573e-08, |
| "loss": 1.5964, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.00046646278355835303, |
| "grad_norm": 1.9832793474197388, |
| "learning_rate": 1.0365379632029025e-07, |
| "loss": 1.9292, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0004897859227362707, |
| "grad_norm": 2.0182175636291504, |
| "learning_rate": 1.0883648613630475e-07, |
| "loss": 2.0115, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0005131090619141884, |
| "grad_norm": 1.4642307758331299, |
| "learning_rate": 1.1401917595231926e-07, |
| "loss": 2.0291, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.000536432201092106, |
| "grad_norm": 2.887909173965454, |
| "learning_rate": 1.1920186576833378e-07, |
| "loss": 2.1946, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0005597553402700236, |
| "grad_norm": 1.595544457435608, |
| "learning_rate": 1.2438455558434827e-07, |
| "loss": 2.0246, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0005830784794479413, |
| "grad_norm": 1.5648566484451294, |
| "learning_rate": 1.295672454003628e-07, |
| "loss": 2.1832, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.000606401618625859, |
| "grad_norm": 1.4702372550964355, |
| "learning_rate": 1.347499352163773e-07, |
| "loss": 1.6395, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0006297247578037766, |
| "grad_norm": 1.7178195714950562, |
| "learning_rate": 1.399326250323918e-07, |
| "loss": 1.6264, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0006530478969816942, |
| "grad_norm": 2.1751515865325928, |
| "learning_rate": 1.4511531484840635e-07, |
| "loss": 2.511, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0006763710361596119, |
| "grad_norm": 2.9443299770355225, |
| "learning_rate": 1.5029800466442085e-07, |
| "loss": 2.229, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0006996941753375296, |
| "grad_norm": 1.8316481113433838, |
| "learning_rate": 1.5548069448043536e-07, |
| "loss": 1.8414, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0007230173145154472, |
| "grad_norm": 1.9659239053726196, |
| "learning_rate": 1.6066338429644986e-07, |
| "loss": 2.0109, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0007463404536933648, |
| "grad_norm": 2.1653449535369873, |
| "learning_rate": 1.658460741124644e-07, |
| "loss": 2.0155, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0007696635928712825, |
| "grad_norm": 1.8755710124969482, |
| "learning_rate": 1.710287639284789e-07, |
| "loss": 2.1105, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0007929867320492002, |
| "grad_norm": 1.5989196300506592, |
| "learning_rate": 1.7621145374449343e-07, |
| "loss": 2.1583, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0008163098712271178, |
| "grad_norm": 1.865307331085205, |
| "learning_rate": 1.813941435605079e-07, |
| "loss": 2.001, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0008396330104050355, |
| "grad_norm": 1.4584789276123047, |
| "learning_rate": 1.8657683337652242e-07, |
| "loss": 1.8854, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0008629561495829531, |
| "grad_norm": 2.6818912029266357, |
| "learning_rate": 1.9175952319253695e-07, |
| "loss": 2.1888, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0008862792887608708, |
| "grad_norm": 2.17561674118042, |
| "learning_rate": 1.9694221300855146e-07, |
| "loss": 1.9616, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0009096024279387884, |
| "grad_norm": 1.252475619316101, |
| "learning_rate": 2.02124902824566e-07, |
| "loss": 1.9585, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0009329255671167061, |
| "grad_norm": 1.884366750717163, |
| "learning_rate": 2.073075926405805e-07, |
| "loss": 2.2436, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0009562487062946237, |
| "grad_norm": 1.4951350688934326, |
| "learning_rate": 2.1249028245659497e-07, |
| "loss": 1.7149, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0009795718454725414, |
| "grad_norm": 1.891728162765503, |
| "learning_rate": 2.176729722726095e-07, |
| "loss": 2.0472, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.001002894984650459, |
| "grad_norm": 1.8992432355880737, |
| "learning_rate": 2.22855662088624e-07, |
| "loss": 2.1471, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0010262181238283768, |
| "grad_norm": 1.3931283950805664, |
| "learning_rate": 2.2803835190463852e-07, |
| "loss": 1.5292, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0010495412630062942, |
| "grad_norm": 1.8894548416137695, |
| "learning_rate": 2.3322104172065305e-07, |
| "loss": 1.7759, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.001072864402184212, |
| "grad_norm": 1.592050552368164, |
| "learning_rate": 2.3840373153666755e-07, |
| "loss": 2.2498, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0010961875413621296, |
| "grad_norm": 1.3746178150177002, |
| "learning_rate": 2.4358642135268203e-07, |
| "loss": 1.8503, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0011195106805400473, |
| "grad_norm": 2.0268595218658447, |
| "learning_rate": 2.4876911116869654e-07, |
| "loss": 1.9358, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.001142833819717965, |
| "grad_norm": 1.7836228609085083, |
| "learning_rate": 2.539518009847111e-07, |
| "loss": 1.9855, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.0011661569588958826, |
| "grad_norm": 1.829447627067566, |
| "learning_rate": 2.591344908007256e-07, |
| "loss": 2.2802, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0011894800980738003, |
| "grad_norm": 2.2813496589660645, |
| "learning_rate": 2.643171806167401e-07, |
| "loss": 2.1593, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.001212803237251718, |
| "grad_norm": 3.019044876098633, |
| "learning_rate": 2.694998704327546e-07, |
| "loss": 1.9534, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0012361263764296354, |
| "grad_norm": 2.011425256729126, |
| "learning_rate": 2.746825602487691e-07, |
| "loss": 2.1284, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.0012594495156075531, |
| "grad_norm": 2.207106590270996, |
| "learning_rate": 2.798652500647836e-07, |
| "loss": 2.2427, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0012827726547854708, |
| "grad_norm": 1.3172473907470703, |
| "learning_rate": 2.8504793988079813e-07, |
| "loss": 1.9782, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0013060957939633885, |
| "grad_norm": 1.522895097732544, |
| "learning_rate": 2.902306296968127e-07, |
| "loss": 1.9455, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0013294189331413062, |
| "grad_norm": 2.657248020172119, |
| "learning_rate": 2.954133195128272e-07, |
| "loss": 1.959, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0013527420723192238, |
| "grad_norm": 1.9738789796829224, |
| "learning_rate": 3.005960093288417e-07, |
| "loss": 1.7878, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0013760652114971415, |
| "grad_norm": 1.5549254417419434, |
| "learning_rate": 3.057786991448562e-07, |
| "loss": 1.9405, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0013993883506750592, |
| "grad_norm": 2.9688899517059326, |
| "learning_rate": 3.109613889608707e-07, |
| "loss": 1.9969, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0014227114898529767, |
| "grad_norm": 1.4602586030960083, |
| "learning_rate": 3.1614407877688527e-07, |
| "loss": 1.9339, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0014460346290308943, |
| "grad_norm": 2.4017045497894287, |
| "learning_rate": 3.213267685928997e-07, |
| "loss": 2.0842, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.001469357768208812, |
| "grad_norm": 1.7433497905731201, |
| "learning_rate": 3.2650945840891423e-07, |
| "loss": 2.0223, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0014926809073867297, |
| "grad_norm": 1.7395591735839844, |
| "learning_rate": 3.316921482249288e-07, |
| "loss": 1.9257, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0015160040465646474, |
| "grad_norm": 1.8336257934570312, |
| "learning_rate": 3.3687483804094324e-07, |
| "loss": 1.948, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.001539327185742565, |
| "grad_norm": 1.6493985652923584, |
| "learning_rate": 3.420575278569578e-07, |
| "loss": 1.8672, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0015626503249204827, |
| "grad_norm": 1.5789337158203125, |
| "learning_rate": 3.472402176729723e-07, |
| "loss": 1.9446, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.0015859734640984004, |
| "grad_norm": 1.3755509853363037, |
| "learning_rate": 3.5242290748898686e-07, |
| "loss": 2.1796, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.001609296603276318, |
| "grad_norm": 1.7978087663650513, |
| "learning_rate": 3.576055973050013e-07, |
| "loss": 1.8974, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0016326197424542355, |
| "grad_norm": 1.8888216018676758, |
| "learning_rate": 3.627882871210158e-07, |
| "loss": 1.915, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0016559428816321532, |
| "grad_norm": 2.6150593757629395, |
| "learning_rate": 3.679709769370304e-07, |
| "loss": 2.2133, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.001679266020810071, |
| "grad_norm": 1.7009005546569824, |
| "learning_rate": 3.7315366675304483e-07, |
| "loss": 2.1024, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0017025891599879886, |
| "grad_norm": 1.741734266281128, |
| "learning_rate": 3.783363565690594e-07, |
| "loss": 2.1839, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.0017259122991659063, |
| "grad_norm": 2.7715041637420654, |
| "learning_rate": 3.835190463850739e-07, |
| "loss": 2.0734, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.001749235438343824, |
| "grad_norm": 1.9710502624511719, |
| "learning_rate": 3.8870173620108835e-07, |
| "loss": 2.18, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0017725585775217416, |
| "grad_norm": 2.077986478805542, |
| "learning_rate": 3.938844260171029e-07, |
| "loss": 2.1482, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0017958817166996593, |
| "grad_norm": 2.583721160888672, |
| "learning_rate": 3.990671158331174e-07, |
| "loss": 2.5364, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.0018192048558775768, |
| "grad_norm": 1.3425930738449097, |
| "learning_rate": 4.04249805649132e-07, |
| "loss": 1.8194, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0018425279950554944, |
| "grad_norm": 2.1111888885498047, |
| "learning_rate": 4.0943249546514643e-07, |
| "loss": 1.7878, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0018658511342334121, |
| "grad_norm": 2.0795626640319824, |
| "learning_rate": 4.14615185281161e-07, |
| "loss": 2.3006, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0018891742734113298, |
| "grad_norm": 1.273370623588562, |
| "learning_rate": 4.197978750971755e-07, |
| "loss": 1.7599, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0019124974125892475, |
| "grad_norm": 1.6202706098556519, |
| "learning_rate": 4.2498056491318994e-07, |
| "loss": 2.1727, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.0019358205517671651, |
| "grad_norm": 2.4593732357025146, |
| "learning_rate": 4.301632547292045e-07, |
| "loss": 2.4588, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.001959143690945083, |
| "grad_norm": 1.2617835998535156, |
| "learning_rate": 4.35345944545219e-07, |
| "loss": 1.9078, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0019824668301230003, |
| "grad_norm": 2.2640504837036133, |
| "learning_rate": 4.405286343612335e-07, |
| "loss": 1.8983, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.002005789969300918, |
| "grad_norm": 1.6804454326629639, |
| "learning_rate": 4.45711324177248e-07, |
| "loss": 2.1049, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0020291131084788356, |
| "grad_norm": 2.060009717941284, |
| "learning_rate": 4.5089401399326253e-07, |
| "loss": 2.0153, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0020524362476567535, |
| "grad_norm": 1.7166160345077515, |
| "learning_rate": 4.5607670380927703e-07, |
| "loss": 2.1093, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.002075759386834671, |
| "grad_norm": 1.6695979833602905, |
| "learning_rate": 4.6125939362529154e-07, |
| "loss": 1.8607, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0020990825260125885, |
| "grad_norm": 1.4339056015014648, |
| "learning_rate": 4.664420834413061e-07, |
| "loss": 2.2632, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0021224056651905064, |
| "grad_norm": 1.5228222608566284, |
| "learning_rate": 4.7162477325732055e-07, |
| "loss": 2.0851, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.002145728804368424, |
| "grad_norm": 1.540848731994629, |
| "learning_rate": 4.768074630733351e-07, |
| "loss": 2.1446, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.0021690519435463417, |
| "grad_norm": 1.480702519416809, |
| "learning_rate": 4.819901528893496e-07, |
| "loss": 2.0718, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.002192375082724259, |
| "grad_norm": 2.23518705368042, |
| "learning_rate": 4.871728427053641e-07, |
| "loss": 1.6198, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.002215698221902177, |
| "grad_norm": 1.6477755308151245, |
| "learning_rate": 4.923555325213786e-07, |
| "loss": 2.1136, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.0022390213610800945, |
| "grad_norm": 1.9548614025115967, |
| "learning_rate": 4.975382223373931e-07, |
| "loss": 1.9143, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0022623445002580124, |
| "grad_norm": 1.3557407855987549, |
| "learning_rate": 5.027209121534076e-07, |
| "loss": 2.0044, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.00228566763943593, |
| "grad_norm": 2.2781455516815186, |
| "learning_rate": 5.079036019694222e-07, |
| "loss": 1.7761, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.0023089907786138474, |
| "grad_norm": 2.1195600032806396, |
| "learning_rate": 5.130862917854368e-07, |
| "loss": 1.8174, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.0023323139177917653, |
| "grad_norm": 2.0798068046569824, |
| "learning_rate": 5.182689816014512e-07, |
| "loss": 2.1431, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0023556370569696827, |
| "grad_norm": 1.8773006200790405, |
| "learning_rate": 5.234516714174657e-07, |
| "loss": 1.5221, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.0023789601961476006, |
| "grad_norm": 1.7917876243591309, |
| "learning_rate": 5.286343612334802e-07, |
| "loss": 1.9383, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.002402283335325518, |
| "grad_norm": 1.4980329275131226, |
| "learning_rate": 5.338170510494947e-07, |
| "loss": 1.846, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.002425606474503436, |
| "grad_norm": 2.0081095695495605, |
| "learning_rate": 5.389997408655092e-07, |
| "loss": 1.8777, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.0024489296136813534, |
| "grad_norm": 1.525317907333374, |
| "learning_rate": 5.441824306815238e-07, |
| "loss": 1.971, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.002472252752859271, |
| "grad_norm": 1.4131786823272705, |
| "learning_rate": 5.493651204975382e-07, |
| "loss": 2.2224, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.002495575892037189, |
| "grad_norm": 1.164492130279541, |
| "learning_rate": 5.545478103135528e-07, |
| "loss": 1.8909, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.0025188990312151062, |
| "grad_norm": 1.9998016357421875, |
| "learning_rate": 5.597305001295673e-07, |
| "loss": 2.1197, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.002542222170393024, |
| "grad_norm": 1.6218236684799194, |
| "learning_rate": 5.649131899455818e-07, |
| "loss": 1.7799, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0025655453095709416, |
| "grad_norm": 1.535388708114624, |
| "learning_rate": 5.700958797615963e-07, |
| "loss": 1.7878, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0025888684487488595, |
| "grad_norm": 1.4929994344711304, |
| "learning_rate": 5.752785695776108e-07, |
| "loss": 2.0802, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.002612191587926777, |
| "grad_norm": 2.183293104171753, |
| "learning_rate": 5.804612593936254e-07, |
| "loss": 2.0506, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.002635514727104695, |
| "grad_norm": 1.6339191198349, |
| "learning_rate": 5.856439492096398e-07, |
| "loss": 1.7152, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0026588378662826123, |
| "grad_norm": 1.4886974096298218, |
| "learning_rate": 5.908266390256544e-07, |
| "loss": 1.8327, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.0026821610054605298, |
| "grad_norm": 1.4198302030563354, |
| "learning_rate": 5.960093288416688e-07, |
| "loss": 1.8342, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0027054841446384477, |
| "grad_norm": 2.041900157928467, |
| "learning_rate": 6.011920186576834e-07, |
| "loss": 1.9101, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.002728807283816365, |
| "grad_norm": 1.7576725482940674, |
| "learning_rate": 6.063747084736979e-07, |
| "loss": 2.3793, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.002752130422994283, |
| "grad_norm": 1.620440125465393, |
| "learning_rate": 6.115573982897124e-07, |
| "loss": 1.7363, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.0027754535621722005, |
| "grad_norm": 1.972102403640747, |
| "learning_rate": 6.16740088105727e-07, |
| "loss": 2.0338, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0027987767013501184, |
| "grad_norm": 1.5385342836380005, |
| "learning_rate": 6.219227779217414e-07, |
| "loss": 1.829, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.002822099840528036, |
| "grad_norm": 1.4439769983291626, |
| "learning_rate": 6.27105467737756e-07, |
| "loss": 1.9893, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.0028454229797059533, |
| "grad_norm": 1.5146026611328125, |
| "learning_rate": 6.322881575537705e-07, |
| "loss": 1.6563, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.002868746118883871, |
| "grad_norm": 1.7177401781082153, |
| "learning_rate": 6.374708473697849e-07, |
| "loss": 1.9483, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0028920692580617887, |
| "grad_norm": 2.484865188598633, |
| "learning_rate": 6.426535371857994e-07, |
| "loss": 2.0949, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.0029153923972397066, |
| "grad_norm": 1.5320651531219482, |
| "learning_rate": 6.47836227001814e-07, |
| "loss": 1.8557, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.002938715536417624, |
| "grad_norm": 1.3804417848587036, |
| "learning_rate": 6.530189168178285e-07, |
| "loss": 1.8733, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.002962038675595542, |
| "grad_norm": 2.0832831859588623, |
| "learning_rate": 6.58201606633843e-07, |
| "loss": 1.8556, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.0029853618147734594, |
| "grad_norm": 1.2582931518554688, |
| "learning_rate": 6.633842964498576e-07, |
| "loss": 2.1239, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0030086849539513773, |
| "grad_norm": 1.6449629068374634, |
| "learning_rate": 6.685669862658721e-07, |
| "loss": 2.1635, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0030320080931292947, |
| "grad_norm": 1.3350502252578735, |
| "learning_rate": 6.737496760818865e-07, |
| "loss": 1.801, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.003055331232307212, |
| "grad_norm": 1.7689651250839233, |
| "learning_rate": 6.78932365897901e-07, |
| "loss": 1.7541, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.00307865437148513, |
| "grad_norm": 1.4711276292800903, |
| "learning_rate": 6.841150557139156e-07, |
| "loss": 2.3916, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0031019775106630476, |
| "grad_norm": 1.2806516885757446, |
| "learning_rate": 6.892977455299301e-07, |
| "loss": 1.8609, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.0031253006498409655, |
| "grad_norm": 1.5531939268112183, |
| "learning_rate": 6.944804353459446e-07, |
| "loss": 1.7721, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.003148623789018883, |
| "grad_norm": 1.6541032791137695, |
| "learning_rate": 6.996631251619592e-07, |
| "loss": 2.1091, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.003171946928196801, |
| "grad_norm": 2.050734281539917, |
| "learning_rate": 7.048458149779737e-07, |
| "loss": 1.8932, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.0031952700673747183, |
| "grad_norm": 1.2903157472610474, |
| "learning_rate": 7.100285047939881e-07, |
| "loss": 2.0833, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.003218593206552636, |
| "grad_norm": 1.3316091299057007, |
| "learning_rate": 7.152111946100026e-07, |
| "loss": 1.9307, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0032419163457305536, |
| "grad_norm": 1.441341519355774, |
| "learning_rate": 7.203938844260172e-07, |
| "loss": 2.2529, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.003265239484908471, |
| "grad_norm": 2.159276008605957, |
| "learning_rate": 7.255765742420316e-07, |
| "loss": 1.847, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.003288562624086389, |
| "grad_norm": 1.8410853147506714, |
| "learning_rate": 7.307592640580462e-07, |
| "loss": 2.2465, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0033118857632643064, |
| "grad_norm": 1.8678739070892334, |
| "learning_rate": 7.359419538740608e-07, |
| "loss": 1.9261, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.0033352089024422243, |
| "grad_norm": 1.2097922563552856, |
| "learning_rate": 7.411246436900751e-07, |
| "loss": 2.0205, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.003358532041620142, |
| "grad_norm": 1.733077883720398, |
| "learning_rate": 7.463073335060897e-07, |
| "loss": 1.8389, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.0033818551807980597, |
| "grad_norm": 1.7118474245071411, |
| "learning_rate": 7.514900233221042e-07, |
| "loss": 1.9511, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.003405178319975977, |
| "grad_norm": 1.6960872411727905, |
| "learning_rate": 7.566727131381188e-07, |
| "loss": 1.8828, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.0034285014591538946, |
| "grad_norm": 1.2409390211105347, |
| "learning_rate": 7.618554029541332e-07, |
| "loss": 1.6878, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.0034518245983318125, |
| "grad_norm": 1.3440965414047241, |
| "learning_rate": 7.670380927701478e-07, |
| "loss": 1.64, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.00347514773750973, |
| "grad_norm": 1.539393663406372, |
| "learning_rate": 7.722207825861624e-07, |
| "loss": 1.6754, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.003498470876687648, |
| "grad_norm": 1.5395653247833252, |
| "learning_rate": 7.774034724021767e-07, |
| "loss": 1.9761, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0035217940158655653, |
| "grad_norm": 2.0169472694396973, |
| "learning_rate": 7.825861622181913e-07, |
| "loss": 1.6927, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0035451171550434832, |
| "grad_norm": 1.8776079416275024, |
| "learning_rate": 7.877688520342058e-07, |
| "loss": 1.9273, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.0035684402942214007, |
| "grad_norm": 2.078824043273926, |
| "learning_rate": 7.929515418502204e-07, |
| "loss": 1.6756, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.0035917634333993186, |
| "grad_norm": 1.407560110092163, |
| "learning_rate": 7.981342316662348e-07, |
| "loss": 1.6038, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.003615086572577236, |
| "grad_norm": 1.1770573854446411, |
| "learning_rate": 8.033169214822494e-07, |
| "loss": 1.6679, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.0036384097117551535, |
| "grad_norm": 1.2057602405548096, |
| "learning_rate": 8.08499611298264e-07, |
| "loss": 1.7916, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0036617328509330714, |
| "grad_norm": 1.117970585823059, |
| "learning_rate": 8.136823011142783e-07, |
| "loss": 1.7974, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.003685055990110989, |
| "grad_norm": 1.5996465682983398, |
| "learning_rate": 8.188649909302929e-07, |
| "loss": 1.6053, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.0037083791292889068, |
| "grad_norm": 1.4170929193496704, |
| "learning_rate": 8.240476807463074e-07, |
| "loss": 1.7155, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0037317022684668242, |
| "grad_norm": 1.8114391565322876, |
| "learning_rate": 8.29230370562322e-07, |
| "loss": 1.9192, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.003755025407644742, |
| "grad_norm": 1.3462793827056885, |
| "learning_rate": 8.344130603783364e-07, |
| "loss": 1.4624, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.0037783485468226596, |
| "grad_norm": 1.6305956840515137, |
| "learning_rate": 8.39595750194351e-07, |
| "loss": 1.8017, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.003801671686000577, |
| "grad_norm": 1.662576675415039, |
| "learning_rate": 8.447784400103655e-07, |
| "loss": 1.733, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.003824994825178495, |
| "grad_norm": 1.556788682937622, |
| "learning_rate": 8.499611298263799e-07, |
| "loss": 1.9586, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0038483179643564124, |
| "grad_norm": 1.5282272100448608, |
| "learning_rate": 8.551438196423944e-07, |
| "loss": 1.8254, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.0038716411035343303, |
| "grad_norm": 1.6790592670440674, |
| "learning_rate": 8.60326509458409e-07, |
| "loss": 2.1866, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.0038949642427122478, |
| "grad_norm": 1.5164263248443604, |
| "learning_rate": 8.655091992744236e-07, |
| "loss": 1.6651, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.003918287381890166, |
| "grad_norm": 1.5002336502075195, |
| "learning_rate": 8.70691889090438e-07, |
| "loss": 1.9295, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.0039416105210680836, |
| "grad_norm": 1.2122441530227661, |
| "learning_rate": 8.758745789064526e-07, |
| "loss": 1.761, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.003964933660246001, |
| "grad_norm": 1.637898564338684, |
| "learning_rate": 8.81057268722467e-07, |
| "loss": 1.8697, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0039882567994239185, |
| "grad_norm": 0.988777220249176, |
| "learning_rate": 8.862399585384815e-07, |
| "loss": 2.1249, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.004011579938601836, |
| "grad_norm": 1.8833587169647217, |
| "learning_rate": 8.91422648354496e-07, |
| "loss": 1.6915, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.004034903077779753, |
| "grad_norm": 1.8418108224868774, |
| "learning_rate": 8.966053381705106e-07, |
| "loss": 2.0019, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.004058226216957671, |
| "grad_norm": 1.6375901699066162, |
| "learning_rate": 9.017880279865251e-07, |
| "loss": 1.7625, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.004081549356135589, |
| "grad_norm": 1.8701720237731934, |
| "learning_rate": 9.069707178025396e-07, |
| "loss": 1.801, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.004104872495313507, |
| "grad_norm": 1.4488773345947266, |
| "learning_rate": 9.121534076185541e-07, |
| "loss": 1.9971, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.004128195634491424, |
| "grad_norm": 0.9587986469268799, |
| "learning_rate": 9.173360974345686e-07, |
| "loss": 1.6253, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.004151518773669342, |
| "grad_norm": 2.6533186435699463, |
| "learning_rate": 9.225187872505831e-07, |
| "loss": 1.572, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.00417484191284726, |
| "grad_norm": 2.4528841972351074, |
| "learning_rate": 9.277014770665976e-07, |
| "loss": 1.7586, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.004198165052025177, |
| "grad_norm": 1.1871824264526367, |
| "learning_rate": 9.328841668826122e-07, |
| "loss": 1.6765, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.004221488191203095, |
| "grad_norm": 1.1292660236358643, |
| "learning_rate": 9.380668566986266e-07, |
| "loss": 2.0673, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.004244811330381013, |
| "grad_norm": 1.3055285215377808, |
| "learning_rate": 9.432495465146411e-07, |
| "loss": 1.8103, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.004268134469558931, |
| "grad_norm": 1.5225868225097656, |
| "learning_rate": 9.484322363306557e-07, |
| "loss": 2.0813, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.004291457608736848, |
| "grad_norm": 1.2439767122268677, |
| "learning_rate": 9.536149261466702e-07, |
| "loss": 1.6919, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0043147807479147655, |
| "grad_norm": 1.2424002885818481, |
| "learning_rate": 9.587976159626847e-07, |
| "loss": 1.9506, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.0043381038870926834, |
| "grad_norm": 0.9796323776245117, |
| "learning_rate": 9.639803057786992e-07, |
| "loss": 1.7342, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.0043614270262706005, |
| "grad_norm": 1.2240192890167236, |
| "learning_rate": 9.691629955947138e-07, |
| "loss": 2.0646, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.004384750165448518, |
| "grad_norm": 0.8779449462890625, |
| "learning_rate": 9.743456854107281e-07, |
| "loss": 1.4535, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.004408073304626436, |
| "grad_norm": 1.3131407499313354, |
| "learning_rate": 9.795283752267427e-07, |
| "loss": 1.9817, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.004431396443804354, |
| "grad_norm": 1.3259912729263306, |
| "learning_rate": 9.847110650427573e-07, |
| "loss": 1.709, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.004454719582982271, |
| "grad_norm": 1.4236465692520142, |
| "learning_rate": 9.898937548587718e-07, |
| "loss": 1.7059, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.004478042722160189, |
| "grad_norm": 1.2791959047317505, |
| "learning_rate": 9.950764446747862e-07, |
| "loss": 1.9633, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.004501365861338107, |
| "grad_norm": 0.9857053160667419, |
| "learning_rate": 1.0002591344908007e-06, |
| "loss": 1.807, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.004524689000516025, |
| "grad_norm": 1.264302372932434, |
| "learning_rate": 1.0054418243068153e-06, |
| "loss": 1.5389, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.004548012139693942, |
| "grad_norm": 1.2205390930175781, |
| "learning_rate": 1.0106245141228298e-06, |
| "loss": 1.4549, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.00457133527887186, |
| "grad_norm": 1.055471420288086, |
| "learning_rate": 1.0158072039388444e-06, |
| "loss": 1.6931, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.004594658418049778, |
| "grad_norm": 1.0585546493530273, |
| "learning_rate": 1.020989893754859e-06, |
| "loss": 1.8054, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.004617981557227695, |
| "grad_norm": 2.16025972366333, |
| "learning_rate": 1.0261725835708735e-06, |
| "loss": 2.0077, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.004641304696405613, |
| "grad_norm": 2.125786781311035, |
| "learning_rate": 1.0313552733868879e-06, |
| "loss": 1.9117, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.0046646278355835305, |
| "grad_norm": 1.3560391664505005, |
| "learning_rate": 1.0365379632029024e-06, |
| "loss": 1.9871, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.004687950974761448, |
| "grad_norm": 1.3505181074142456, |
| "learning_rate": 1.041720653018917e-06, |
| "loss": 1.714, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.004711274113939365, |
| "grad_norm": 1.1724427938461304, |
| "learning_rate": 1.0469033428349313e-06, |
| "loss": 1.7611, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.004734597253117283, |
| "grad_norm": 1.1746799945831299, |
| "learning_rate": 1.0520860326509459e-06, |
| "loss": 1.867, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.004757920392295201, |
| "grad_norm": 1.0976382493972778, |
| "learning_rate": 1.0572687224669604e-06, |
| "loss": 1.808, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.004781243531473118, |
| "grad_norm": 1.3842298984527588, |
| "learning_rate": 1.062451412282975e-06, |
| "loss": 1.7973, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.004804566670651036, |
| "grad_norm": 1.6715288162231445, |
| "learning_rate": 1.0676341020989893e-06, |
| "loss": 1.9817, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.004827889809828954, |
| "grad_norm": 1.0734590291976929, |
| "learning_rate": 1.072816791915004e-06, |
| "loss": 1.4297, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.004851212949006872, |
| "grad_norm": 1.0182546377182007, |
| "learning_rate": 1.0779994817310185e-06, |
| "loss": 1.713, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.004874536088184789, |
| "grad_norm": 1.1884313821792603, |
| "learning_rate": 1.083182171547033e-06, |
| "loss": 1.5234, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.004897859227362707, |
| "grad_norm": 1.520266056060791, |
| "learning_rate": 1.0883648613630476e-06, |
| "loss": 2.0598, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.004921182366540625, |
| "grad_norm": 1.1709904670715332, |
| "learning_rate": 1.0935475511790621e-06, |
| "loss": 2.1461, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.004944505505718542, |
| "grad_norm": 1.2634027004241943, |
| "learning_rate": 1.0987302409950765e-06, |
| "loss": 1.5076, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.00496782864489646, |
| "grad_norm": 1.490717887878418, |
| "learning_rate": 1.103912930811091e-06, |
| "loss": 1.8628, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.004991151784074378, |
| "grad_norm": 2.077373743057251, |
| "learning_rate": 1.1090956206271056e-06, |
| "loss": 1.9295, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.0050144749232522955, |
| "grad_norm": 1.647877812385559, |
| "learning_rate": 1.1142783104431202e-06, |
| "loss": 1.7929, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.0050377980624302125, |
| "grad_norm": 1.1937353610992432, |
| "learning_rate": 1.1194610002591345e-06, |
| "loss": 1.6509, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.00506112120160813, |
| "grad_norm": 1.0805108547210693, |
| "learning_rate": 1.124643690075149e-06, |
| "loss": 1.6447, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.005084444340786048, |
| "grad_norm": 1.1077872514724731, |
| "learning_rate": 1.1298263798911636e-06, |
| "loss": 1.7675, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.005107767479963966, |
| "grad_norm": 0.8648241758346558, |
| "learning_rate": 1.135009069707178e-06, |
| "loss": 1.6687, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.005131090619141883, |
| "grad_norm": 1.0522700548171997, |
| "learning_rate": 1.1401917595231925e-06, |
| "loss": 1.2878, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.005154413758319801, |
| "grad_norm": 1.3021256923675537, |
| "learning_rate": 1.145374449339207e-06, |
| "loss": 1.8535, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.005177736897497719, |
| "grad_norm": 1.2912962436676025, |
| "learning_rate": 1.1505571391552216e-06, |
| "loss": 1.865, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.005201060036675636, |
| "grad_norm": 1.6733994483947754, |
| "learning_rate": 1.1557398289712362e-06, |
| "loss": 1.5748, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.005224383175853554, |
| "grad_norm": 1.0865724086761475, |
| "learning_rate": 1.1609225187872508e-06, |
| "loss": 1.8159, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.005247706315031472, |
| "grad_norm": 1.1498301029205322, |
| "learning_rate": 1.1661052086032653e-06, |
| "loss": 1.8579, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.00527102945420939, |
| "grad_norm": 1.9360573291778564, |
| "learning_rate": 1.1712878984192797e-06, |
| "loss": 1.7366, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.005294352593387307, |
| "grad_norm": 1.0133939981460571, |
| "learning_rate": 1.1764705882352942e-06, |
| "loss": 1.4571, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.005317675732565225, |
| "grad_norm": 1.6443811655044556, |
| "learning_rate": 1.1816532780513088e-06, |
| "loss": 1.5312, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.0053409988717431425, |
| "grad_norm": 1.1923338174819946, |
| "learning_rate": 1.1868359678673233e-06, |
| "loss": 1.6993, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.0053643220109210596, |
| "grad_norm": 1.0345349311828613, |
| "learning_rate": 1.1920186576833377e-06, |
| "loss": 1.5739, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0053876451500989775, |
| "grad_norm": 0.9833806753158569, |
| "learning_rate": 1.1972013474993522e-06, |
| "loss": 1.819, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.005410968289276895, |
| "grad_norm": 1.3315545320510864, |
| "learning_rate": 1.2023840373153668e-06, |
| "loss": 1.9472, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.005434291428454813, |
| "grad_norm": 1.0042314529418945, |
| "learning_rate": 1.2075667271313812e-06, |
| "loss": 1.993, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.00545761456763273, |
| "grad_norm": 1.2731118202209473, |
| "learning_rate": 1.2127494169473957e-06, |
| "loss": 1.6763, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.005480937706810648, |
| "grad_norm": 0.9664155840873718, |
| "learning_rate": 1.2179321067634103e-06, |
| "loss": 1.3091, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.005504260845988566, |
| "grad_norm": 1.6930897235870361, |
| "learning_rate": 1.2231147965794248e-06, |
| "loss": 1.6111, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.005527583985166483, |
| "grad_norm": 0.9807016253471375, |
| "learning_rate": 1.2282974863954394e-06, |
| "loss": 1.6131, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.005550907124344401, |
| "grad_norm": 1.321951150894165, |
| "learning_rate": 1.233480176211454e-06, |
| "loss": 1.242, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.005574230263522319, |
| "grad_norm": 1.1465637683868408, |
| "learning_rate": 1.2386628660274685e-06, |
| "loss": 1.7035, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.005597553402700237, |
| "grad_norm": 2.4264347553253174, |
| "learning_rate": 1.2438455558434829e-06, |
| "loss": 1.9859, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.005620876541878154, |
| "grad_norm": 1.429149866104126, |
| "learning_rate": 1.2490282456594974e-06, |
| "loss": 1.8249, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.005644199681056072, |
| "grad_norm": 1.1119049787521362, |
| "learning_rate": 1.254210935475512e-06, |
| "loss": 1.8005, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.00566752282023399, |
| "grad_norm": 1.9002227783203125, |
| "learning_rate": 1.2593936252915265e-06, |
| "loss": 1.6951, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.005690845959411907, |
| "grad_norm": 1.067659854888916, |
| "learning_rate": 1.264576315107541e-06, |
| "loss": 1.799, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.0057141690985898245, |
| "grad_norm": 1.2947990894317627, |
| "learning_rate": 1.2697590049235552e-06, |
| "loss": 1.7837, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.005737492237767742, |
| "grad_norm": 1.0790272951126099, |
| "learning_rate": 1.2749416947395698e-06, |
| "loss": 1.67, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.00576081537694566, |
| "grad_norm": 1.3589330911636353, |
| "learning_rate": 1.2801243845555843e-06, |
| "loss": 1.9282, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.005784138516123577, |
| "grad_norm": 1.4140998125076294, |
| "learning_rate": 1.285307074371599e-06, |
| "loss": 1.6708, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.005807461655301495, |
| "grad_norm": 1.000994086265564, |
| "learning_rate": 1.2904897641876135e-06, |
| "loss": 1.4077, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.005830784794479413, |
| "grad_norm": 1.3655062913894653, |
| "learning_rate": 1.295672454003628e-06, |
| "loss": 1.8862, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.005854107933657331, |
| "grad_norm": 1.1164065599441528, |
| "learning_rate": 1.3008551438196426e-06, |
| "loss": 1.528, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.005877431072835248, |
| "grad_norm": 1.1792149543762207, |
| "learning_rate": 1.306037833635657e-06, |
| "loss": 1.2879, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.005900754212013166, |
| "grad_norm": 2.236320734024048, |
| "learning_rate": 1.3112205234516715e-06, |
| "loss": 1.4929, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.005924077351191084, |
| "grad_norm": 1.8795088529586792, |
| "learning_rate": 1.316403213267686e-06, |
| "loss": 1.2468, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.005947400490369001, |
| "grad_norm": 1.2248806953430176, |
| "learning_rate": 1.3215859030837006e-06, |
| "loss": 1.769, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.005970723629546919, |
| "grad_norm": 1.252236008644104, |
| "learning_rate": 1.3267685928997152e-06, |
| "loss": 1.9014, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.005994046768724837, |
| "grad_norm": 1.3926386833190918, |
| "learning_rate": 1.3319512827157297e-06, |
| "loss": 1.9599, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.0060173699079027546, |
| "grad_norm": 1.5681990385055542, |
| "learning_rate": 1.3371339725317443e-06, |
| "loss": 1.8109, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.006040693047080672, |
| "grad_norm": 1.6841275691986084, |
| "learning_rate": 1.3423166623477584e-06, |
| "loss": 1.4601, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.0060640161862585895, |
| "grad_norm": 1.5262291431427002, |
| "learning_rate": 1.347499352163773e-06, |
| "loss": 1.6493, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.006087339325436507, |
| "grad_norm": 1.0905576944351196, |
| "learning_rate": 1.3526820419797875e-06, |
| "loss": 2.0847, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.006110662464614424, |
| "grad_norm": 1.4682683944702148, |
| "learning_rate": 1.357864731795802e-06, |
| "loss": 1.6889, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.006133985603792342, |
| "grad_norm": 1.1054515838623047, |
| "learning_rate": 1.3630474216118166e-06, |
| "loss": 1.55, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.00615730874297026, |
| "grad_norm": 1.3931388854980469, |
| "learning_rate": 1.3682301114278312e-06, |
| "loss": 1.655, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.006180631882148178, |
| "grad_norm": 1.1766420602798462, |
| "learning_rate": 1.3734128012438458e-06, |
| "loss": 1.9555, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.006203955021326095, |
| "grad_norm": 1.1652954816818237, |
| "learning_rate": 1.3785954910598601e-06, |
| "loss": 1.8446, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.006227278160504013, |
| "grad_norm": 1.378980278968811, |
| "learning_rate": 1.3837781808758747e-06, |
| "loss": 1.4449, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.006250601299681931, |
| "grad_norm": 1.2017453908920288, |
| "learning_rate": 1.3889608706918892e-06, |
| "loss": 1.6272, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.006273924438859848, |
| "grad_norm": 1.2221115827560425, |
| "learning_rate": 1.3941435605079038e-06, |
| "loss": 1.7299, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.006297247578037766, |
| "grad_norm": 1.189775824546814, |
| "learning_rate": 1.3993262503239183e-06, |
| "loss": 1.1664, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.006320570717215684, |
| "grad_norm": 1.0103381872177124, |
| "learning_rate": 1.404508940139933e-06, |
| "loss": 1.3519, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.006343893856393602, |
| "grad_norm": 1.1243481636047363, |
| "learning_rate": 1.4096916299559475e-06, |
| "loss": 1.6704, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.006367216995571519, |
| "grad_norm": 1.8137811422348022, |
| "learning_rate": 1.4148743197719616e-06, |
| "loss": 1.279, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.0063905401347494365, |
| "grad_norm": 1.0875202417373657, |
| "learning_rate": 1.4200570095879762e-06, |
| "loss": 1.1564, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.0064138632739273544, |
| "grad_norm": 1.0839550495147705, |
| "learning_rate": 1.4252396994039907e-06, |
| "loss": 1.7263, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.006437186413105272, |
| "grad_norm": 1.7203173637390137, |
| "learning_rate": 1.4304223892200053e-06, |
| "loss": 1.9309, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.006460509552283189, |
| "grad_norm": 1.3320658206939697, |
| "learning_rate": 1.4356050790360198e-06, |
| "loss": 1.8276, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.006483832691461107, |
| "grad_norm": 1.5260910987854004, |
| "learning_rate": 1.4407877688520344e-06, |
| "loss": 1.413, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.006507155830639025, |
| "grad_norm": 1.2401058673858643, |
| "learning_rate": 1.445970458668049e-06, |
| "loss": 1.4087, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.006530478969816942, |
| "grad_norm": 1.2722922563552856, |
| "learning_rate": 1.4511531484840633e-06, |
| "loss": 1.6216, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.00655380210899486, |
| "grad_norm": 1.2668229341506958, |
| "learning_rate": 1.4563358383000779e-06, |
| "loss": 1.6252, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.006577125248172778, |
| "grad_norm": 1.4556583166122437, |
| "learning_rate": 1.4615185281160924e-06, |
| "loss": 2.3276, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.006600448387350696, |
| "grad_norm": 1.537610411643982, |
| "learning_rate": 1.466701217932107e-06, |
| "loss": 1.4319, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.006623771526528613, |
| "grad_norm": 1.3130170106887817, |
| "learning_rate": 1.4718839077481215e-06, |
| "loss": 1.4978, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.006647094665706531, |
| "grad_norm": 1.5020934343338013, |
| "learning_rate": 1.477066597564136e-06, |
| "loss": 1.8697, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.006670417804884449, |
| "grad_norm": 1.6949779987335205, |
| "learning_rate": 1.4822492873801502e-06, |
| "loss": 1.7433, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.006693740944062366, |
| "grad_norm": 1.5566325187683105, |
| "learning_rate": 1.4874319771961648e-06, |
| "loss": 1.5674, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.006717064083240284, |
| "grad_norm": 1.015093445777893, |
| "learning_rate": 1.4926146670121793e-06, |
| "loss": 1.9903, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.0067403872224182015, |
| "grad_norm": 2.229853868484497, |
| "learning_rate": 1.497797356828194e-06, |
| "loss": 1.1905, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.006763710361596119, |
| "grad_norm": 1.5241860151290894, |
| "learning_rate": 1.5029800466442085e-06, |
| "loss": 1.958, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.006787033500774036, |
| "grad_norm": 0.8666454553604126, |
| "learning_rate": 1.508162736460223e-06, |
| "loss": 1.7141, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.006810356639951954, |
| "grad_norm": 1.4594520330429077, |
| "learning_rate": 1.5133454262762376e-06, |
| "loss": 1.7235, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.006833679779129872, |
| "grad_norm": 1.3267074823379517, |
| "learning_rate": 1.518528116092252e-06, |
| "loss": 1.6172, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.006857002918307789, |
| "grad_norm": 1.5386312007904053, |
| "learning_rate": 1.5237108059082665e-06, |
| "loss": 1.4843, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.006880326057485707, |
| "grad_norm": 1.3275539875030518, |
| "learning_rate": 1.528893495724281e-06, |
| "loss": 1.5444, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.006903649196663625, |
| "grad_norm": 1.1002707481384277, |
| "learning_rate": 1.5340761855402956e-06, |
| "loss": 1.717, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.006926972335841543, |
| "grad_norm": 1.172974944114685, |
| "learning_rate": 1.5392588753563102e-06, |
| "loss": 1.6963, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.00695029547501946, |
| "grad_norm": 1.0728440284729004, |
| "learning_rate": 1.5444415651723247e-06, |
| "loss": 1.6228, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.006973618614197378, |
| "grad_norm": 1.274348258972168, |
| "learning_rate": 1.5496242549883393e-06, |
| "loss": 1.2559, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.006996941753375296, |
| "grad_norm": 1.2520028352737427, |
| "learning_rate": 1.5548069448043534e-06, |
| "loss": 1.6118, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.007020264892553213, |
| "grad_norm": 1.5844305753707886, |
| "learning_rate": 1.559989634620368e-06, |
| "loss": 1.5645, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.007043588031731131, |
| "grad_norm": 2.285438299179077, |
| "learning_rate": 1.5651723244363825e-06, |
| "loss": 1.4541, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.007066911170909049, |
| "grad_norm": 1.2873152494430542, |
| "learning_rate": 1.570355014252397e-06, |
| "loss": 1.4835, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.0070902343100869665, |
| "grad_norm": 1.1332640647888184, |
| "learning_rate": 1.5755377040684116e-06, |
| "loss": 1.8279, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.0071135574492648835, |
| "grad_norm": 1.6483525037765503, |
| "learning_rate": 1.5807203938844262e-06, |
| "loss": 1.2509, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.007136880588442801, |
| "grad_norm": 1.0219485759735107, |
| "learning_rate": 1.5859030837004408e-06, |
| "loss": 1.8421, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.007160203727620719, |
| "grad_norm": 1.2478340864181519, |
| "learning_rate": 1.5910857735164551e-06, |
| "loss": 1.9144, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.007183526866798637, |
| "grad_norm": 1.4016437530517578, |
| "learning_rate": 1.5962684633324697e-06, |
| "loss": 1.5146, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.007206850005976554, |
| "grad_norm": 1.1399790048599243, |
| "learning_rate": 1.6014511531484842e-06, |
| "loss": 1.6714, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.007230173145154472, |
| "grad_norm": 2.047961473464966, |
| "learning_rate": 1.6066338429644988e-06, |
| "loss": 1.1777, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.00725349628433239, |
| "grad_norm": 1.1410201787948608, |
| "learning_rate": 1.6118165327805133e-06, |
| "loss": 1.6783, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.007276819423510307, |
| "grad_norm": 1.2840640544891357, |
| "learning_rate": 1.616999222596528e-06, |
| "loss": 1.9351, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.007300142562688225, |
| "grad_norm": 0.9116181135177612, |
| "learning_rate": 1.6221819124125425e-06, |
| "loss": 1.7705, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.007323465701866143, |
| "grad_norm": 1.3190463781356812, |
| "learning_rate": 1.6273646022285566e-06, |
| "loss": 1.4484, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.007346788841044061, |
| "grad_norm": 0.9988270401954651, |
| "learning_rate": 1.6325472920445712e-06, |
| "loss": 1.5159, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.007370111980221978, |
| "grad_norm": 0.8620725870132446, |
| "learning_rate": 1.6377299818605857e-06, |
| "loss": 1.5605, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.007393435119399896, |
| "grad_norm": 1.284604549407959, |
| "learning_rate": 1.6429126716766003e-06, |
| "loss": 1.4822, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.0074167582585778135, |
| "grad_norm": 1.2546097040176392, |
| "learning_rate": 1.6480953614926148e-06, |
| "loss": 1.436, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.0074400813977557306, |
| "grad_norm": 0.9116978645324707, |
| "learning_rate": 1.6532780513086294e-06, |
| "loss": 1.2708, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.0074634045369336485, |
| "grad_norm": 0.9910548329353333, |
| "learning_rate": 1.658460741124644e-06, |
| "loss": 1.8144, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.007486727676111566, |
| "grad_norm": 1.9879093170166016, |
| "learning_rate": 1.6636434309406583e-06, |
| "loss": 1.4826, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.007510050815289484, |
| "grad_norm": 1.0845030546188354, |
| "learning_rate": 1.6688261207566729e-06, |
| "loss": 1.3364, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.007533373954467401, |
| "grad_norm": 1.342966079711914, |
| "learning_rate": 1.6740088105726874e-06, |
| "loss": 1.6453, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.007556697093645319, |
| "grad_norm": 0.9570252895355225, |
| "learning_rate": 1.679191500388702e-06, |
| "loss": 1.5384, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.007580020232823237, |
| "grad_norm": 1.531516671180725, |
| "learning_rate": 1.6843741902047165e-06, |
| "loss": 1.5775, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.007603343372001154, |
| "grad_norm": 1.4623240232467651, |
| "learning_rate": 1.689556880020731e-06, |
| "loss": 1.7159, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.007626666511179072, |
| "grad_norm": 1.109586238861084, |
| "learning_rate": 1.6947395698367454e-06, |
| "loss": 1.7403, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.00764998965035699, |
| "grad_norm": 1.3199604749679565, |
| "learning_rate": 1.6999222596527598e-06, |
| "loss": 1.7208, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.007673312789534908, |
| "grad_norm": 1.0979784727096558, |
| "learning_rate": 1.7051049494687743e-06, |
| "loss": 1.6097, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.007696635928712825, |
| "grad_norm": 1.0952926874160767, |
| "learning_rate": 1.710287639284789e-06, |
| "loss": 1.8262, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.007719959067890743, |
| "grad_norm": 1.1149373054504395, |
| "learning_rate": 1.7154703291008035e-06, |
| "loss": 1.5762, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.007743282207068661, |
| "grad_norm": 1.2090753316879272, |
| "learning_rate": 1.720653018916818e-06, |
| "loss": 1.6161, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.007766605346246578, |
| "grad_norm": 1.3476163148880005, |
| "learning_rate": 1.7258357087328326e-06, |
| "loss": 1.6854, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.0077899284854244955, |
| "grad_norm": 1.3222614526748657, |
| "learning_rate": 1.7310183985488471e-06, |
| "loss": 1.5996, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.007813251624602413, |
| "grad_norm": 1.2350871562957764, |
| "learning_rate": 1.7362010883648615e-06, |
| "loss": 1.5052, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.007836574763780331, |
| "grad_norm": 1.4628745317459106, |
| "learning_rate": 1.741383778180876e-06, |
| "loss": 1.6268, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.00785989790295825, |
| "grad_norm": 1.3481048345565796, |
| "learning_rate": 1.7465664679968906e-06, |
| "loss": 1.4308, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.007883221042136167, |
| "grad_norm": 1.0008901357650757, |
| "learning_rate": 1.7517491578129052e-06, |
| "loss": 1.6487, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.007906544181314083, |
| "grad_norm": 2.4258437156677246, |
| "learning_rate": 1.7569318476289195e-06, |
| "loss": 1.5327, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.007929867320492001, |
| "grad_norm": 1.3444914817810059, |
| "learning_rate": 1.762114537444934e-06, |
| "loss": 1.5257, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.007953190459669919, |
| "grad_norm": 2.297591209411621, |
| "learning_rate": 1.7672972272609486e-06, |
| "loss": 1.9581, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.007976513598847837, |
| "grad_norm": 1.107711672782898, |
| "learning_rate": 1.772479917076963e-06, |
| "loss": 1.3486, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.007999836738025755, |
| "grad_norm": 1.4064106941223145, |
| "learning_rate": 1.7776626068929775e-06, |
| "loss": 1.3169, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.008023159877203673, |
| "grad_norm": 1.1236720085144043, |
| "learning_rate": 1.782845296708992e-06, |
| "loss": 2.0225, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.00804648301638159, |
| "grad_norm": 1.9214081764221191, |
| "learning_rate": 1.7880279865250066e-06, |
| "loss": 1.7269, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.008069806155559507, |
| "grad_norm": 1.1544204950332642, |
| "learning_rate": 1.7932106763410212e-06, |
| "loss": 1.8407, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.008093129294737425, |
| "grad_norm": 1.3266545534133911, |
| "learning_rate": 1.7983933661570358e-06, |
| "loss": 1.3316, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.008116452433915343, |
| "grad_norm": 1.4208300113677979, |
| "learning_rate": 1.8035760559730501e-06, |
| "loss": 1.7712, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.00813977557309326, |
| "grad_norm": 1.1849939823150635, |
| "learning_rate": 1.8087587457890647e-06, |
| "loss": 1.3843, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.008163098712271178, |
| "grad_norm": 0.9147690534591675, |
| "learning_rate": 1.8139414356050792e-06, |
| "loss": 1.703, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.008186421851449096, |
| "grad_norm": 1.2026822566986084, |
| "learning_rate": 1.8191241254210938e-06, |
| "loss": 1.642, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.008209744990627014, |
| "grad_norm": 1.6620279550552368, |
| "learning_rate": 1.8243068152371081e-06, |
| "loss": 1.2861, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.00823306812980493, |
| "grad_norm": 1.20318603515625, |
| "learning_rate": 1.8294895050531227e-06, |
| "loss": 1.7781, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.008256391268982848, |
| "grad_norm": 1.117148756980896, |
| "learning_rate": 1.8346721948691372e-06, |
| "loss": 1.7056, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.008279714408160766, |
| "grad_norm": 1.3435394763946533, |
| "learning_rate": 1.8398548846851516e-06, |
| "loss": 1.7352, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.008303037547338684, |
| "grad_norm": 1.6550534963607788, |
| "learning_rate": 1.8450375745011662e-06, |
| "loss": 1.4283, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.008326360686516602, |
| "grad_norm": 1.0326530933380127, |
| "learning_rate": 1.8502202643171807e-06, |
| "loss": 1.8726, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.00834968382569452, |
| "grad_norm": 1.1237214803695679, |
| "learning_rate": 1.8554029541331953e-06, |
| "loss": 1.7547, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.008373006964872438, |
| "grad_norm": 1.3457711935043335, |
| "learning_rate": 1.8605856439492098e-06, |
| "loss": 1.5047, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.008396330104050354, |
| "grad_norm": 1.3615081310272217, |
| "learning_rate": 1.8657683337652244e-06, |
| "loss": 1.3476, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.008419653243228272, |
| "grad_norm": 1.4443084001541138, |
| "learning_rate": 1.870951023581239e-06, |
| "loss": 1.4259, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.00844297638240619, |
| "grad_norm": 0.9154095649719238, |
| "learning_rate": 1.8761337133972533e-06, |
| "loss": 1.6089, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.008466299521584108, |
| "grad_norm": 1.1972756385803223, |
| "learning_rate": 1.8813164032132679e-06, |
| "loss": 1.5704, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.008489622660762025, |
| "grad_norm": 1.1325738430023193, |
| "learning_rate": 1.8864990930292822e-06, |
| "loss": 1.7252, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.008512945799939943, |
| "grad_norm": 1.2257301807403564, |
| "learning_rate": 1.8916817828452968e-06, |
| "loss": 1.5124, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.008536268939117861, |
| "grad_norm": 1.7714002132415771, |
| "learning_rate": 1.8968644726613113e-06, |
| "loss": 1.5799, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.008559592078295777, |
| "grad_norm": 1.1215579509735107, |
| "learning_rate": 1.9020471624773259e-06, |
| "loss": 1.7692, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.008582915217473695, |
| "grad_norm": 1.3264069557189941, |
| "learning_rate": 1.9072298522933404e-06, |
| "loss": 1.7848, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.008606238356651613, |
| "grad_norm": 0.9898104667663574, |
| "learning_rate": 1.912412542109355e-06, |
| "loss": 1.945, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.008629561495829531, |
| "grad_norm": 0.9507944583892822, |
| "learning_rate": 1.9175952319253693e-06, |
| "loss": 1.6469, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.008652884635007449, |
| "grad_norm": 1.1940997838974, |
| "learning_rate": 1.9227779217413837e-06, |
| "loss": 1.5144, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.008676207774185367, |
| "grad_norm": 1.2926305532455444, |
| "learning_rate": 1.9279606115573985e-06, |
| "loss": 1.6527, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.008699530913363285, |
| "grad_norm": 0.9909786581993103, |
| "learning_rate": 1.933143301373413e-06, |
| "loss": 1.8003, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.008722854052541201, |
| "grad_norm": 1.3900662660598755, |
| "learning_rate": 1.9383259911894276e-06, |
| "loss": 1.7743, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.008746177191719119, |
| "grad_norm": 0.9942039251327515, |
| "learning_rate": 1.943508681005442e-06, |
| "loss": 1.5635, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.008769500330897037, |
| "grad_norm": 1.3887672424316406, |
| "learning_rate": 1.9486913708214563e-06, |
| "loss": 1.744, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.008792823470074955, |
| "grad_norm": 1.2873059511184692, |
| "learning_rate": 1.953874060637471e-06, |
| "loss": 1.64, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.008816146609252873, |
| "grad_norm": 1.2259247303009033, |
| "learning_rate": 1.9590567504534854e-06, |
| "loss": 1.6418, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.00883946974843079, |
| "grad_norm": 1.5709097385406494, |
| "learning_rate": 1.9642394402695e-06, |
| "loss": 1.4343, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.008862792887608708, |
| "grad_norm": 1.016625165939331, |
| "learning_rate": 1.9694221300855145e-06, |
| "loss": 1.5838, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.008886116026786626, |
| "grad_norm": 1.5763674974441528, |
| "learning_rate": 1.9746048199015293e-06, |
| "loss": 1.3391, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.008909439165964542, |
| "grad_norm": 1.014722466468811, |
| "learning_rate": 1.9797875097175436e-06, |
| "loss": 1.7185, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.00893276230514246, |
| "grad_norm": 1.5255705118179321, |
| "learning_rate": 1.984970199533558e-06, |
| "loss": 1.5749, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.008956085444320378, |
| "grad_norm": 1.4036648273468018, |
| "learning_rate": 1.9901528893495723e-06, |
| "loss": 1.4134, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.008979408583498296, |
| "grad_norm": 1.327813982963562, |
| "learning_rate": 1.995335579165587e-06, |
| "loss": 1.8475, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.009002731722676214, |
| "grad_norm": 1.357269287109375, |
| "learning_rate": 2.0005182689816014e-06, |
| "loss": 1.4145, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.009026054861854132, |
| "grad_norm": 1.4663738012313843, |
| "learning_rate": 2.005700958797616e-06, |
| "loss": 1.5207, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.00904937800103205, |
| "grad_norm": 0.9792691469192505, |
| "learning_rate": 2.0108836486136305e-06, |
| "loss": 1.7392, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.009072701140209966, |
| "grad_norm": 1.9074856042861938, |
| "learning_rate": 2.0160663384296453e-06, |
| "loss": 1.5931, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.009096024279387884, |
| "grad_norm": 1.562455654144287, |
| "learning_rate": 2.0212490282456597e-06, |
| "loss": 1.3503, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.009119347418565802, |
| "grad_norm": 1.6827714443206787, |
| "learning_rate": 2.026431718061674e-06, |
| "loss": 1.8409, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.00914267055774372, |
| "grad_norm": 0.969691276550293, |
| "learning_rate": 2.0316144078776888e-06, |
| "loss": 1.5167, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.009165993696921637, |
| "grad_norm": 1.1107996702194214, |
| "learning_rate": 2.036797097693703e-06, |
| "loss": 1.5723, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.009189316836099555, |
| "grad_norm": 0.9862359762191772, |
| "learning_rate": 2.041979787509718e-06, |
| "loss": 1.1188, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.009212639975277473, |
| "grad_norm": 1.4997074604034424, |
| "learning_rate": 2.0471624773257322e-06, |
| "loss": 1.6742, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.00923596311445539, |
| "grad_norm": 1.1336885690689087, |
| "learning_rate": 2.052345167141747e-06, |
| "loss": 1.5602, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.009259286253633307, |
| "grad_norm": 1.4929397106170654, |
| "learning_rate": 2.057527856957761e-06, |
| "loss": 1.4891, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.009282609392811225, |
| "grad_norm": 1.3118637800216675, |
| "learning_rate": 2.0627105467737757e-06, |
| "loss": 1.5758, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.009305932531989143, |
| "grad_norm": 1.1043623685836792, |
| "learning_rate": 2.06789323658979e-06, |
| "loss": 1.9455, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.009329255671167061, |
| "grad_norm": 1.3472813367843628, |
| "learning_rate": 2.073075926405805e-06, |
| "loss": 1.4657, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.009352578810344979, |
| "grad_norm": 1.5614628791809082, |
| "learning_rate": 2.078258616221819e-06, |
| "loss": 1.3351, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.009375901949522897, |
| "grad_norm": 1.393477439880371, |
| "learning_rate": 2.083441306037834e-06, |
| "loss": 1.8887, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.009399225088700813, |
| "grad_norm": 1.0576095581054688, |
| "learning_rate": 2.0886239958538483e-06, |
| "loss": 1.7814, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.00942254822787873, |
| "grad_norm": 1.5161347389221191, |
| "learning_rate": 2.0938066856698626e-06, |
| "loss": 1.2316, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.009445871367056649, |
| "grad_norm": 1.05890691280365, |
| "learning_rate": 2.0989893754858774e-06, |
| "loss": 1.5303, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.009469194506234567, |
| "grad_norm": 0.801816463470459, |
| "learning_rate": 2.1041720653018918e-06, |
| "loss": 1.5165, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.009492517645412485, |
| "grad_norm": 1.2811832427978516, |
| "learning_rate": 2.1093547551179065e-06, |
| "loss": 1.8638, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.009515840784590402, |
| "grad_norm": 1.2984956502914429, |
| "learning_rate": 2.114537444933921e-06, |
| "loss": 1.4195, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.00953916392376832, |
| "grad_norm": 2.3772926330566406, |
| "learning_rate": 2.1197201347499356e-06, |
| "loss": 1.2616, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.009562487062946236, |
| "grad_norm": 1.102181315422058, |
| "learning_rate": 2.12490282456595e-06, |
| "loss": 1.6683, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.009585810202124154, |
| "grad_norm": 1.4473963975906372, |
| "learning_rate": 2.1300855143819643e-06, |
| "loss": 1.6474, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.009609133341302072, |
| "grad_norm": 2.3995816707611084, |
| "learning_rate": 2.1352682041979787e-06, |
| "loss": 1.6203, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.00963245648047999, |
| "grad_norm": 0.9490773677825928, |
| "learning_rate": 2.1404508940139935e-06, |
| "loss": 1.8082, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.009655779619657908, |
| "grad_norm": 0.9358771443367004, |
| "learning_rate": 2.145633583830008e-06, |
| "loss": 1.5929, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.009679102758835826, |
| "grad_norm": 0.9875616431236267, |
| "learning_rate": 2.1508162736460226e-06, |
| "loss": 1.4312, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.009702425898013744, |
| "grad_norm": 1.197416067123413, |
| "learning_rate": 2.155998963462037e-06, |
| "loss": 1.3165, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.00972574903719166, |
| "grad_norm": 2.0210750102996826, |
| "learning_rate": 2.1611816532780513e-06, |
| "loss": 1.4962, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.009749072176369578, |
| "grad_norm": 1.2700085639953613, |
| "learning_rate": 2.166364343094066e-06, |
| "loss": 1.6101, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.009772395315547496, |
| "grad_norm": 1.124679684638977, |
| "learning_rate": 2.1715470329100804e-06, |
| "loss": 1.7477, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.009795718454725414, |
| "grad_norm": 1.178290843963623, |
| "learning_rate": 2.176729722726095e-06, |
| "loss": 1.4108, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.009819041593903332, |
| "grad_norm": 1.792117953300476, |
| "learning_rate": 2.1819124125421095e-06, |
| "loss": 1.5568, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.00984236473308125, |
| "grad_norm": 1.7381610870361328, |
| "learning_rate": 2.1870951023581243e-06, |
| "loss": 1.3229, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.009865687872259167, |
| "grad_norm": 1.023553490638733, |
| "learning_rate": 2.1922777921741386e-06, |
| "loss": 1.1633, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.009889011011437084, |
| "grad_norm": 1.5537900924682617, |
| "learning_rate": 2.197460481990153e-06, |
| "loss": 1.291, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.009912334150615001, |
| "grad_norm": 1.722598671913147, |
| "learning_rate": 2.2026431718061673e-06, |
| "loss": 1.5201, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.00993565728979292, |
| "grad_norm": 1.546295166015625, |
| "learning_rate": 2.207825861622182e-06, |
| "loss": 1.3554, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.009958980428970837, |
| "grad_norm": 1.4075593948364258, |
| "learning_rate": 2.2130085514381964e-06, |
| "loss": 1.3831, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.009982303568148755, |
| "grad_norm": 1.441125512123108, |
| "learning_rate": 2.218191241254211e-06, |
| "loss": 1.4806, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.010005626707326673, |
| "grad_norm": 1.4198213815689087, |
| "learning_rate": 2.2233739310702255e-06, |
| "loss": 1.6962, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.010028949846504591, |
| "grad_norm": 1.1716971397399902, |
| "learning_rate": 2.2285566208862403e-06, |
| "loss": 1.0423, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.010052272985682507, |
| "grad_norm": 1.1271895170211792, |
| "learning_rate": 2.2337393107022547e-06, |
| "loss": 1.4246, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.010075596124860425, |
| "grad_norm": 1.2987208366394043, |
| "learning_rate": 2.238922000518269e-06, |
| "loss": 1.5946, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.010098919264038343, |
| "grad_norm": 1.7283997535705566, |
| "learning_rate": 2.2441046903342838e-06, |
| "loss": 1.5761, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.01012224240321626, |
| "grad_norm": 1.635098934173584, |
| "learning_rate": 2.249287380150298e-06, |
| "loss": 1.6912, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.010145565542394179, |
| "grad_norm": 2.1896469593048096, |
| "learning_rate": 2.254470069966313e-06, |
| "loss": 1.2961, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.010168888681572097, |
| "grad_norm": 1.1874053478240967, |
| "learning_rate": 2.2596527597823272e-06, |
| "loss": 1.4999, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.010192211820750014, |
| "grad_norm": 1.2898855209350586, |
| "learning_rate": 2.264835449598342e-06, |
| "loss": 1.7152, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.010215534959927932, |
| "grad_norm": 0.792107105255127, |
| "learning_rate": 2.270018139414356e-06, |
| "loss": 1.4129, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.010238858099105849, |
| "grad_norm": 1.2092666625976562, |
| "learning_rate": 2.2752008292303707e-06, |
| "loss": 1.4687, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.010262181238283766, |
| "grad_norm": 1.2261115312576294, |
| "learning_rate": 2.280383519046385e-06, |
| "loss": 1.5548, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.010285504377461684, |
| "grad_norm": 2.0835094451904297, |
| "learning_rate": 2.2855662088624e-06, |
| "loss": 1.5925, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.010308827516639602, |
| "grad_norm": 1.075907826423645, |
| "learning_rate": 2.290748898678414e-06, |
| "loss": 1.4967, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.01033215065581752, |
| "grad_norm": 0.9633646011352539, |
| "learning_rate": 2.295931588494429e-06, |
| "loss": 1.6798, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.010355473794995438, |
| "grad_norm": 1.6833699941635132, |
| "learning_rate": 2.3011142783104433e-06, |
| "loss": 1.3053, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.010378796934173356, |
| "grad_norm": 1.1333974599838257, |
| "learning_rate": 2.3062969681264576e-06, |
| "loss": 1.3658, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.010402120073351272, |
| "grad_norm": 1.3382309675216675, |
| "learning_rate": 2.3114796579424724e-06, |
| "loss": 1.6492, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.01042544321252919, |
| "grad_norm": 0.7148923873901367, |
| "learning_rate": 2.3166623477584868e-06, |
| "loss": 1.6269, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.010448766351707108, |
| "grad_norm": 1.084245204925537, |
| "learning_rate": 2.3218450375745015e-06, |
| "loss": 2.0708, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.010472089490885026, |
| "grad_norm": 1.1463004350662231, |
| "learning_rate": 2.327027727390516e-06, |
| "loss": 2.0115, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.010495412630062944, |
| "grad_norm": 1.5500133037567139, |
| "learning_rate": 2.3322104172065306e-06, |
| "loss": 1.5454, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.010518735769240862, |
| "grad_norm": 1.2993839979171753, |
| "learning_rate": 2.337393107022545e-06, |
| "loss": 1.5475, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.01054205890841878, |
| "grad_norm": 1.295839786529541, |
| "learning_rate": 2.3425757968385593e-06, |
| "loss": 1.2895, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.010565382047596696, |
| "grad_norm": 1.045040488243103, |
| "learning_rate": 2.3477584866545737e-06, |
| "loss": 1.7306, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.010588705186774613, |
| "grad_norm": 1.4592766761779785, |
| "learning_rate": 2.3529411764705885e-06, |
| "loss": 1.7795, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.010612028325952531, |
| "grad_norm": 0.9432761073112488, |
| "learning_rate": 2.358123866286603e-06, |
| "loss": 1.6963, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.01063535146513045, |
| "grad_norm": 1.3770086765289307, |
| "learning_rate": 2.3633065561026176e-06, |
| "loss": 1.2003, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.010658674604308367, |
| "grad_norm": 1.1453793048858643, |
| "learning_rate": 2.368489245918632e-06, |
| "loss": 1.9012, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.010681997743486285, |
| "grad_norm": 1.2836976051330566, |
| "learning_rate": 2.3736719357346467e-06, |
| "loss": 1.4324, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.010705320882664203, |
| "grad_norm": 1.6498123407363892, |
| "learning_rate": 2.378854625550661e-06, |
| "loss": 1.6212, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.010728644021842119, |
| "grad_norm": 1.3681795597076416, |
| "learning_rate": 2.3840373153666754e-06, |
| "loss": 1.6047, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.010751967161020037, |
| "grad_norm": 1.4474722146987915, |
| "learning_rate": 2.38922000518269e-06, |
| "loss": 1.5279, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.010775290300197955, |
| "grad_norm": 1.4832510948181152, |
| "learning_rate": 2.3944026949987045e-06, |
| "loss": 1.7073, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.010798613439375873, |
| "grad_norm": 1.343935251235962, |
| "learning_rate": 2.3995853848147193e-06, |
| "loss": 1.4637, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.01082193657855379, |
| "grad_norm": 1.8285539150238037, |
| "learning_rate": 2.4047680746307336e-06, |
| "loss": 1.3944, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.010845259717731709, |
| "grad_norm": 1.4653230905532837, |
| "learning_rate": 2.4099507644467484e-06, |
| "loss": 1.8847, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.010868582856909626, |
| "grad_norm": 1.4410351514816284, |
| "learning_rate": 2.4151334542627623e-06, |
| "loss": 1.7298, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.010891905996087543, |
| "grad_norm": 1.3057256937026978, |
| "learning_rate": 2.420316144078777e-06, |
| "loss": 1.6188, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.01091522913526546, |
| "grad_norm": 1.574479103088379, |
| "learning_rate": 2.4254988338947914e-06, |
| "loss": 1.585, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.010938552274443378, |
| "grad_norm": 1.4391696453094482, |
| "learning_rate": 2.430681523710806e-06, |
| "loss": 1.7272, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.010961875413621296, |
| "grad_norm": 2.304706335067749, |
| "learning_rate": 2.4358642135268205e-06, |
| "loss": 1.7127, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.010985198552799214, |
| "grad_norm": 1.2380545139312744, |
| "learning_rate": 2.4410469033428353e-06, |
| "loss": 1.5428, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.011008521691977132, |
| "grad_norm": 1.303446888923645, |
| "learning_rate": 2.4462295931588497e-06, |
| "loss": 1.609, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.01103184483115505, |
| "grad_norm": 1.3888837099075317, |
| "learning_rate": 2.451412282974864e-06, |
| "loss": 1.7134, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.011055167970332966, |
| "grad_norm": 0.9802701473236084, |
| "learning_rate": 2.4565949727908788e-06, |
| "loss": 1.4401, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.011078491109510884, |
| "grad_norm": 1.5808403491973877, |
| "learning_rate": 2.461777662606893e-06, |
| "loss": 1.7415, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.011101814248688802, |
| "grad_norm": 1.299912691116333, |
| "learning_rate": 2.466960352422908e-06, |
| "loss": 1.361, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.01112513738786672, |
| "grad_norm": 0.9326110482215881, |
| "learning_rate": 2.4721430422389222e-06, |
| "loss": 1.222, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.011148460527044638, |
| "grad_norm": 1.0385396480560303, |
| "learning_rate": 2.477325732054937e-06, |
| "loss": 1.4813, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.011171783666222556, |
| "grad_norm": 1.1004397869110107, |
| "learning_rate": 2.482508421870951e-06, |
| "loss": 1.5064, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.011195106805400474, |
| "grad_norm": 1.274898886680603, |
| "learning_rate": 2.4876911116869657e-06, |
| "loss": 1.3046, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.01121842994457839, |
| "grad_norm": 1.0818660259246826, |
| "learning_rate": 2.49287380150298e-06, |
| "loss": 1.878, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.011241753083756308, |
| "grad_norm": 1.2744652032852173, |
| "learning_rate": 2.498056491318995e-06, |
| "loss": 1.6394, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.011265076222934226, |
| "grad_norm": 1.0467538833618164, |
| "learning_rate": 2.503239181135009e-06, |
| "loss": 1.8949, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.011288399362112143, |
| "grad_norm": 1.2507177591323853, |
| "learning_rate": 2.508421870951024e-06, |
| "loss": 1.5386, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.011311722501290061, |
| "grad_norm": 2.0707380771636963, |
| "learning_rate": 2.5136045607670383e-06, |
| "loss": 1.3359, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.01133504564046798, |
| "grad_norm": 1.0060955286026, |
| "learning_rate": 2.518787250583053e-06, |
| "loss": 1.5551, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.011358368779645897, |
| "grad_norm": 2.1019294261932373, |
| "learning_rate": 2.5239699403990674e-06, |
| "loss": 1.4009, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.011381691918823813, |
| "grad_norm": 1.2085974216461182, |
| "learning_rate": 2.529152630215082e-06, |
| "loss": 1.1264, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.011405015058001731, |
| "grad_norm": 1.2670215368270874, |
| "learning_rate": 2.5343353200310965e-06, |
| "loss": 1.4005, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.011428338197179649, |
| "grad_norm": 0.976809024810791, |
| "learning_rate": 2.5395180098471104e-06, |
| "loss": 1.6539, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.011451661336357567, |
| "grad_norm": 1.8012447357177734, |
| "learning_rate": 2.5447006996631252e-06, |
| "loss": 1.5083, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.011474984475535485, |
| "grad_norm": 2.0657784938812256, |
| "learning_rate": 2.5498833894791396e-06, |
| "loss": 1.4127, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.011498307614713403, |
| "grad_norm": 1.4070103168487549, |
| "learning_rate": 2.5550660792951543e-06, |
| "loss": 1.4707, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.01152163075389132, |
| "grad_norm": 0.859045147895813, |
| "learning_rate": 2.5602487691111687e-06, |
| "loss": 1.6301, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.011544953893069239, |
| "grad_norm": 1.5209952592849731, |
| "learning_rate": 2.5654314589271835e-06, |
| "loss": 1.8438, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.011568277032247155, |
| "grad_norm": 1.1508231163024902, |
| "learning_rate": 2.570614148743198e-06, |
| "loss": 1.2495, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.011591600171425073, |
| "grad_norm": 0.9130313396453857, |
| "learning_rate": 2.5757968385592126e-06, |
| "loss": 1.1848, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.01161492331060299, |
| "grad_norm": 1.5925562381744385, |
| "learning_rate": 2.580979528375227e-06, |
| "loss": 1.4745, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.011638246449780908, |
| "grad_norm": 2.5118539333343506, |
| "learning_rate": 2.5861622181912417e-06, |
| "loss": 1.6218, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.011661569588958826, |
| "grad_norm": 1.272691249847412, |
| "learning_rate": 2.591344908007256e-06, |
| "loss": 1.2147, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.011684892728136744, |
| "grad_norm": 1.1436160802841187, |
| "learning_rate": 2.596527597823271e-06, |
| "loss": 1.5556, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.011708215867314662, |
| "grad_norm": 1.0195647478103638, |
| "learning_rate": 2.601710287639285e-06, |
| "loss": 1.3303, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.011731539006492578, |
| "grad_norm": 1.4576568603515625, |
| "learning_rate": 2.6068929774553e-06, |
| "loss": 1.6531, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.011754862145670496, |
| "grad_norm": 1.360716462135315, |
| "learning_rate": 2.612075667271314e-06, |
| "loss": 1.1761, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.011778185284848414, |
| "grad_norm": 2.7770462036132812, |
| "learning_rate": 2.617258357087328e-06, |
| "loss": 1.247, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.011801508424026332, |
| "grad_norm": 1.3706661462783813, |
| "learning_rate": 2.622441046903343e-06, |
| "loss": 1.5103, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.01182483156320425, |
| "grad_norm": 1.5405017137527466, |
| "learning_rate": 2.6276237367193573e-06, |
| "loss": 1.6827, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.011848154702382168, |
| "grad_norm": 1.1809494495391846, |
| "learning_rate": 2.632806426535372e-06, |
| "loss": 1.7162, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.011871477841560086, |
| "grad_norm": 1.085557222366333, |
| "learning_rate": 2.6379891163513864e-06, |
| "loss": 1.514, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.011894800980738002, |
| "grad_norm": 1.2155910730361938, |
| "learning_rate": 2.643171806167401e-06, |
| "loss": 1.4029, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.01191812411991592, |
| "grad_norm": 1.240242600440979, |
| "learning_rate": 2.6483544959834155e-06, |
| "loss": 1.4336, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.011941447259093838, |
| "grad_norm": 1.649802327156067, |
| "learning_rate": 2.6535371857994303e-06, |
| "loss": 1.9082, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.011964770398271755, |
| "grad_norm": 1.3479831218719482, |
| "learning_rate": 2.6587198756154447e-06, |
| "loss": 1.5424, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.011988093537449673, |
| "grad_norm": 1.2537102699279785, |
| "learning_rate": 2.6639025654314594e-06, |
| "loss": 1.6061, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.012011416676627591, |
| "grad_norm": 1.1049939393997192, |
| "learning_rate": 2.6690852552474738e-06, |
| "loss": 1.8361, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.012034739815805509, |
| "grad_norm": 2.9946062564849854, |
| "learning_rate": 2.6742679450634885e-06, |
| "loss": 1.4471, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.012058062954983425, |
| "grad_norm": 0.9455610513687134, |
| "learning_rate": 2.6794506348795025e-06, |
| "loss": 1.6831, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.012081386094161343, |
| "grad_norm": 1.4750438928604126, |
| "learning_rate": 2.684633324695517e-06, |
| "loss": 1.3143, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.012104709233339261, |
| "grad_norm": 1.1056557893753052, |
| "learning_rate": 2.6898160145115316e-06, |
| "loss": 1.5054, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.012128032372517179, |
| "grad_norm": 0.9718064069747925, |
| "learning_rate": 2.694998704327546e-06, |
| "loss": 1.3134, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.012151355511695097, |
| "grad_norm": 2.2384724617004395, |
| "learning_rate": 2.7001813941435607e-06, |
| "loss": 1.4851, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.012174678650873015, |
| "grad_norm": 1.2468239068984985, |
| "learning_rate": 2.705364083959575e-06, |
| "loss": 1.4873, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.012198001790050933, |
| "grad_norm": 1.4248602390289307, |
| "learning_rate": 2.71054677377559e-06, |
| "loss": 1.7643, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.012221324929228849, |
| "grad_norm": 1.3377385139465332, |
| "learning_rate": 2.715729463591604e-06, |
| "loss": 1.7064, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.012244648068406767, |
| "grad_norm": 0.9933966994285583, |
| "learning_rate": 2.720912153407619e-06, |
| "loss": 1.7187, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.012267971207584685, |
| "grad_norm": 1.018750548362732, |
| "learning_rate": 2.7260948432236333e-06, |
| "loss": 1.5915, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.012291294346762602, |
| "grad_norm": 1.356325387954712, |
| "learning_rate": 2.731277533039648e-06, |
| "loss": 1.7193, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.01231461748594052, |
| "grad_norm": 1.2781217098236084, |
| "learning_rate": 2.7364602228556624e-06, |
| "loss": 1.5494, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.012337940625118438, |
| "grad_norm": 1.561498761177063, |
| "learning_rate": 2.741642912671677e-06, |
| "loss": 1.6972, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.012361263764296356, |
| "grad_norm": 1.1695748567581177, |
| "learning_rate": 2.7468256024876915e-06, |
| "loss": 2.1633, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.012384586903474272, |
| "grad_norm": 1.4304964542388916, |
| "learning_rate": 2.7520082923037054e-06, |
| "loss": 1.6321, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.01240791004265219, |
| "grad_norm": 1.0513828992843628, |
| "learning_rate": 2.7571909821197202e-06, |
| "loss": 1.2897, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.012431233181830108, |
| "grad_norm": 1.0206960439682007, |
| "learning_rate": 2.7623736719357346e-06, |
| "loss": 1.7842, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.012454556321008026, |
| "grad_norm": 1.1440876722335815, |
| "learning_rate": 2.7675563617517493e-06, |
| "loss": 1.4399, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.012477879460185944, |
| "grad_norm": 1.0837441682815552, |
| "learning_rate": 2.7727390515677637e-06, |
| "loss": 1.5155, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.012501202599363862, |
| "grad_norm": 1.071378231048584, |
| "learning_rate": 2.7779217413837785e-06, |
| "loss": 1.6459, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.01252452573854178, |
| "grad_norm": 1.6966552734375, |
| "learning_rate": 2.783104431199793e-06, |
| "loss": 1.6015, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.012547848877719696, |
| "grad_norm": 1.2789183855056763, |
| "learning_rate": 2.7882871210158076e-06, |
| "loss": 1.2423, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.012571172016897614, |
| "grad_norm": 1.2072651386260986, |
| "learning_rate": 2.793469810831822e-06, |
| "loss": 1.69, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.012594495156075532, |
| "grad_norm": 1.5257117748260498, |
| "learning_rate": 2.7986525006478367e-06, |
| "loss": 1.7608, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.01261781829525345, |
| "grad_norm": 1.0233759880065918, |
| "learning_rate": 2.803835190463851e-06, |
| "loss": 1.1299, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.012641141434431367, |
| "grad_norm": 1.8280616998672485, |
| "learning_rate": 2.809017880279866e-06, |
| "loss": 1.3338, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.012664464573609285, |
| "grad_norm": 1.6891363859176636, |
| "learning_rate": 2.81420057009588e-06, |
| "loss": 1.5505, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.012687787712787203, |
| "grad_norm": 1.1501421928405762, |
| "learning_rate": 2.819383259911895e-06, |
| "loss": 1.6788, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.01271111085196512, |
| "grad_norm": 1.107029914855957, |
| "learning_rate": 2.824565949727909e-06, |
| "loss": 1.3782, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.012734433991143037, |
| "grad_norm": 0.9627429246902466, |
| "learning_rate": 2.829748639543923e-06, |
| "loss": 1.3155, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.012757757130320955, |
| "grad_norm": 2.330007791519165, |
| "learning_rate": 2.834931329359938e-06, |
| "loss": 1.425, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.012781080269498873, |
| "grad_norm": 1.4026503562927246, |
| "learning_rate": 2.8401140191759523e-06, |
| "loss": 1.5578, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.012804403408676791, |
| "grad_norm": 0.9430487155914307, |
| "learning_rate": 2.845296708991967e-06, |
| "loss": 1.6075, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.012827726547854709, |
| "grad_norm": 1.0779294967651367, |
| "learning_rate": 2.8504793988079814e-06, |
| "loss": 1.5169, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.012851049687032627, |
| "grad_norm": 1.130324125289917, |
| "learning_rate": 2.855662088623996e-06, |
| "loss": 1.5016, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.012874372826210545, |
| "grad_norm": 1.0127092599868774, |
| "learning_rate": 2.8608447784400105e-06, |
| "loss": 1.8715, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.01289769596538846, |
| "grad_norm": 1.1831302642822266, |
| "learning_rate": 2.8660274682560253e-06, |
| "loss": 1.678, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.012921019104566379, |
| "grad_norm": 1.3394455909729004, |
| "learning_rate": 2.8712101580720397e-06, |
| "loss": 1.4129, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.012944342243744297, |
| "grad_norm": 1.2189030647277832, |
| "learning_rate": 2.8763928478880544e-06, |
| "loss": 1.7364, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.012967665382922215, |
| "grad_norm": 1.2808138132095337, |
| "learning_rate": 2.8815755377040688e-06, |
| "loss": 1.6274, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.012990988522100132, |
| "grad_norm": 1.0384689569473267, |
| "learning_rate": 2.8867582275200835e-06, |
| "loss": 1.5942, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.01301431166127805, |
| "grad_norm": 1.8520807027816772, |
| "learning_rate": 2.891940917336098e-06, |
| "loss": 1.3067, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.013037634800455968, |
| "grad_norm": 1.1817374229431152, |
| "learning_rate": 2.897123607152112e-06, |
| "loss": 1.6405, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.013060957939633884, |
| "grad_norm": 1.1010823249816895, |
| "learning_rate": 2.9023062969681266e-06, |
| "loss": 1.4339, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.013084281078811802, |
| "grad_norm": 1.2461942434310913, |
| "learning_rate": 2.907488986784141e-06, |
| "loss": 1.9866, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.01310760421798972, |
| "grad_norm": 1.1503125429153442, |
| "learning_rate": 2.9126716766001557e-06, |
| "loss": 1.585, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.013130927357167638, |
| "grad_norm": 1.542434573173523, |
| "learning_rate": 2.91785436641617e-06, |
| "loss": 1.4524, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.013154250496345556, |
| "grad_norm": 1.0469673871994019, |
| "learning_rate": 2.923037056232185e-06, |
| "loss": 1.6884, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.013177573635523474, |
| "grad_norm": 1.5137437582015991, |
| "learning_rate": 2.928219746048199e-06, |
| "loss": 1.5377, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.013200896774701392, |
| "grad_norm": 1.1454534530639648, |
| "learning_rate": 2.933402435864214e-06, |
| "loss": 1.8508, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.013224219913879308, |
| "grad_norm": 1.310381531715393, |
| "learning_rate": 2.9385851256802283e-06, |
| "loss": 1.5774, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.013247543053057226, |
| "grad_norm": 1.1223838329315186, |
| "learning_rate": 2.943767815496243e-06, |
| "loss": 1.4496, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.013270866192235144, |
| "grad_norm": 1.4537910223007202, |
| "learning_rate": 2.9489505053122574e-06, |
| "loss": 1.4423, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.013294189331413062, |
| "grad_norm": 1.1783167123794556, |
| "learning_rate": 2.954133195128272e-06, |
| "loss": 1.9314, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.01331751247059098, |
| "grad_norm": 1.211719274520874, |
| "learning_rate": 2.9593158849442865e-06, |
| "loss": 1.5366, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.013340835609768897, |
| "grad_norm": 2.9552671909332275, |
| "learning_rate": 2.9644985747603004e-06, |
| "loss": 1.3431, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.013364158748946815, |
| "grad_norm": 1.2814795970916748, |
| "learning_rate": 2.9696812645763152e-06, |
| "loss": 1.3879, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.013387481888124731, |
| "grad_norm": 1.2598010301589966, |
| "learning_rate": 2.9748639543923296e-06, |
| "loss": 1.4775, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.01341080502730265, |
| "grad_norm": 1.3874925374984741, |
| "learning_rate": 2.9800466442083443e-06, |
| "loss": 1.4012, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.013434128166480567, |
| "grad_norm": 1.1846306324005127, |
| "learning_rate": 2.9852293340243587e-06, |
| "loss": 1.4491, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.013457451305658485, |
| "grad_norm": 1.388150691986084, |
| "learning_rate": 2.9904120238403734e-06, |
| "loss": 1.6913, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.013480774444836403, |
| "grad_norm": 1.8026880025863647, |
| "learning_rate": 2.995594713656388e-06, |
| "loss": 1.1754, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.013504097584014321, |
| "grad_norm": 1.9366620779037476, |
| "learning_rate": 3.0007774034724026e-06, |
| "loss": 1.4406, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.013527420723192239, |
| "grad_norm": 1.039657473564148, |
| "learning_rate": 3.005960093288417e-06, |
| "loss": 1.4823, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.013550743862370155, |
| "grad_norm": 1.0928449630737305, |
| "learning_rate": 3.0111427831044317e-06, |
| "loss": 1.4502, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.013574067001548073, |
| "grad_norm": 2.408292531967163, |
| "learning_rate": 3.016325472920446e-06, |
| "loss": 1.4778, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.01359739014072599, |
| "grad_norm": 1.2284953594207764, |
| "learning_rate": 3.021508162736461e-06, |
| "loss": 1.5887, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.013620713279903909, |
| "grad_norm": 1.3841763734817505, |
| "learning_rate": 3.026690852552475e-06, |
| "loss": 1.3778, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.013644036419081827, |
| "grad_norm": 1.305172324180603, |
| "learning_rate": 3.03187354236849e-06, |
| "loss": 1.2837, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.013667359558259744, |
| "grad_norm": 1.087904691696167, |
| "learning_rate": 3.037056232184504e-06, |
| "loss": 1.4361, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.013690682697437662, |
| "grad_norm": 1.1818716526031494, |
| "learning_rate": 3.042238922000518e-06, |
| "loss": 1.4903, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.013714005836615578, |
| "grad_norm": 0.9969412088394165, |
| "learning_rate": 3.047421611816533e-06, |
| "loss": 1.6923, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.013737328975793496, |
| "grad_norm": 1.3729232549667358, |
| "learning_rate": 3.0526043016325473e-06, |
| "loss": 1.4219, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.013760652114971414, |
| "grad_norm": 1.091769814491272, |
| "learning_rate": 3.057786991448562e-06, |
| "loss": 1.6978, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.013783975254149332, |
| "grad_norm": 1.1668254137039185, |
| "learning_rate": 3.0629696812645764e-06, |
| "loss": 1.4609, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.01380729839332725, |
| "grad_norm": 1.3739502429962158, |
| "learning_rate": 3.068152371080591e-06, |
| "loss": 1.7247, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.013830621532505168, |
| "grad_norm": 1.480758547782898, |
| "learning_rate": 3.0733350608966055e-06, |
| "loss": 1.6142, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.013853944671683086, |
| "grad_norm": 0.853581964969635, |
| "learning_rate": 3.0785177507126203e-06, |
| "loss": 1.5563, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.013877267810861002, |
| "grad_norm": 1.144692063331604, |
| "learning_rate": 3.0837004405286347e-06, |
| "loss": 1.6145, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.01390059095003892, |
| "grad_norm": 1.2413440942764282, |
| "learning_rate": 3.0888831303446494e-06, |
| "loss": 1.5762, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.013923914089216838, |
| "grad_norm": 1.147834062576294, |
| "learning_rate": 3.0940658201606638e-06, |
| "loss": 1.4478, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.013947237228394756, |
| "grad_norm": 1.0349398851394653, |
| "learning_rate": 3.0992485099766785e-06, |
| "loss": 1.612, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.013970560367572674, |
| "grad_norm": 1.4780391454696655, |
| "learning_rate": 3.104431199792693e-06, |
| "loss": 1.5179, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.013993883506750592, |
| "grad_norm": 1.1395933628082275, |
| "learning_rate": 3.109613889608707e-06, |
| "loss": 1.4845, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.01401720664592851, |
| "grad_norm": 1.37168550491333, |
| "learning_rate": 3.1147965794247216e-06, |
| "loss": 1.581, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.014040529785106426, |
| "grad_norm": 1.8260347843170166, |
| "learning_rate": 3.119979269240736e-06, |
| "loss": 1.1221, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.014063852924284343, |
| "grad_norm": 2.5528669357299805, |
| "learning_rate": 3.1251619590567507e-06, |
| "loss": 1.255, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.014087176063462261, |
| "grad_norm": 1.3272032737731934, |
| "learning_rate": 3.130344648872765e-06, |
| "loss": 1.2713, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.01411049920264018, |
| "grad_norm": 1.147449254989624, |
| "learning_rate": 3.13552733868878e-06, |
| "loss": 1.3694, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.014133822341818097, |
| "grad_norm": 1.173793077468872, |
| "learning_rate": 3.140710028504794e-06, |
| "loss": 1.5818, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.014157145480996015, |
| "grad_norm": 1.2347713708877563, |
| "learning_rate": 3.145892718320809e-06, |
| "loss": 1.501, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.014180468620173933, |
| "grad_norm": 1.3945446014404297, |
| "learning_rate": 3.1510754081368233e-06, |
| "loss": 1.8674, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.01420379175935185, |
| "grad_norm": 1.239762544631958, |
| "learning_rate": 3.156258097952838e-06, |
| "loss": 1.2516, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.014227114898529767, |
| "grad_norm": 1.552531361579895, |
| "learning_rate": 3.1614407877688524e-06, |
| "loss": 1.5358, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.014250438037707685, |
| "grad_norm": 1.576997995376587, |
| "learning_rate": 3.166623477584867e-06, |
| "loss": 1.7601, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.014273761176885603, |
| "grad_norm": 1.3251402378082275, |
| "learning_rate": 3.1718061674008815e-06, |
| "loss": 1.2758, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.01429708431606352, |
| "grad_norm": 1.2837574481964111, |
| "learning_rate": 3.1769888572168963e-06, |
| "loss": 1.528, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.014320407455241439, |
| "grad_norm": 0.9697505831718445, |
| "learning_rate": 3.1821715470329102e-06, |
| "loss": 1.6359, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.014343730594419356, |
| "grad_norm": 1.2682685852050781, |
| "learning_rate": 3.1873542368489246e-06, |
| "loss": 1.4759, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.014367053733597274, |
| "grad_norm": 0.9607746005058289, |
| "learning_rate": 3.1925369266649393e-06, |
| "loss": 1.7474, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.01439037687277519, |
| "grad_norm": 1.056736946105957, |
| "learning_rate": 3.1977196164809537e-06, |
| "loss": 1.8812, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.014413700011953108, |
| "grad_norm": 1.1990852355957031, |
| "learning_rate": 3.2029023062969684e-06, |
| "loss": 1.6217, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.014437023151131026, |
| "grad_norm": 1.1339764595031738, |
| "learning_rate": 3.208084996112983e-06, |
| "loss": 1.3557, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.014460346290308944, |
| "grad_norm": 1.0672523975372314, |
| "learning_rate": 3.2132676859289976e-06, |
| "loss": 1.8239, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.014483669429486862, |
| "grad_norm": 1.4371954202651978, |
| "learning_rate": 3.218450375745012e-06, |
| "loss": 1.4571, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.01450699256866478, |
| "grad_norm": 1.9893105030059814, |
| "learning_rate": 3.2236330655610267e-06, |
| "loss": 1.3716, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.014530315707842698, |
| "grad_norm": 1.7084318399429321, |
| "learning_rate": 3.228815755377041e-06, |
| "loss": 1.5201, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.014553638847020614, |
| "grad_norm": 1.308225154876709, |
| "learning_rate": 3.233998445193056e-06, |
| "loss": 1.9173, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.014576961986198532, |
| "grad_norm": 0.9914215803146362, |
| "learning_rate": 3.23918113500907e-06, |
| "loss": 1.7351, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.01460028512537645, |
| "grad_norm": 1.0292766094207764, |
| "learning_rate": 3.244363824825085e-06, |
| "loss": 1.4073, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.014623608264554368, |
| "grad_norm": 1.0998982191085815, |
| "learning_rate": 3.2495465146410993e-06, |
| "loss": 1.5979, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.014646931403732286, |
| "grad_norm": 1.1409685611724854, |
| "learning_rate": 3.254729204457113e-06, |
| "loss": 1.3442, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.014670254542910204, |
| "grad_norm": 1.7685736417770386, |
| "learning_rate": 3.259911894273128e-06, |
| "loss": 1.251, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.014693577682088121, |
| "grad_norm": 1.6536918878555298, |
| "learning_rate": 3.2650945840891423e-06, |
| "loss": 1.4698, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.014716900821266038, |
| "grad_norm": 2.046391248703003, |
| "learning_rate": 3.270277273905157e-06, |
| "loss": 1.5142, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.014740223960443955, |
| "grad_norm": 1.3458948135375977, |
| "learning_rate": 3.2754599637211714e-06, |
| "loss": 1.3999, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.014763547099621873, |
| "grad_norm": 1.7265046834945679, |
| "learning_rate": 3.280642653537186e-06, |
| "loss": 1.2212, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.014786870238799791, |
| "grad_norm": 1.3191124200820923, |
| "learning_rate": 3.2858253433532005e-06, |
| "loss": 1.4354, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.01481019337797771, |
| "grad_norm": 1.2317379713058472, |
| "learning_rate": 3.2910080331692153e-06, |
| "loss": 1.5661, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.014833516517155627, |
| "grad_norm": 1.400969386100769, |
| "learning_rate": 3.2961907229852297e-06, |
| "loss": 1.462, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.014856839656333545, |
| "grad_norm": 2.060718059539795, |
| "learning_rate": 3.3013734128012444e-06, |
| "loss": 1.7522, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.014880162795511461, |
| "grad_norm": 1.138715386390686, |
| "learning_rate": 3.3065561026172588e-06, |
| "loss": 1.4923, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.014903485934689379, |
| "grad_norm": 1.1973599195480347, |
| "learning_rate": 3.3117387924332735e-06, |
| "loss": 1.4462, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.014926809073867297, |
| "grad_norm": 1.266867756843567, |
| "learning_rate": 3.316921482249288e-06, |
| "loss": 1.3159, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.014950132213045215, |
| "grad_norm": 3.4681708812713623, |
| "learning_rate": 3.322104172065302e-06, |
| "loss": 1.3566, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.014973455352223133, |
| "grad_norm": 1.248502492904663, |
| "learning_rate": 3.3272868618813166e-06, |
| "loss": 1.6299, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.01499677849140105, |
| "grad_norm": 1.561563491821289, |
| "learning_rate": 3.332469551697331e-06, |
| "loss": 1.3246, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.015020101630578968, |
| "grad_norm": 1.1922053098678589, |
| "learning_rate": 3.3376522415133457e-06, |
| "loss": 1.6847, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.015043424769756885, |
| "grad_norm": 1.0779014825820923, |
| "learning_rate": 3.34283493132936e-06, |
| "loss": 1.8025, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.015066747908934803, |
| "grad_norm": 1.5236597061157227, |
| "learning_rate": 3.348017621145375e-06, |
| "loss": 1.3894, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.01509007104811272, |
| "grad_norm": 1.2087934017181396, |
| "learning_rate": 3.353200310961389e-06, |
| "loss": 1.9119, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.015113394187290638, |
| "grad_norm": 1.435085654258728, |
| "learning_rate": 3.358383000777404e-06, |
| "loss": 1.4334, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.015136717326468556, |
| "grad_norm": 1.3662467002868652, |
| "learning_rate": 3.3635656905934183e-06, |
| "loss": 1.6717, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.015160040465646474, |
| "grad_norm": 1.379262924194336, |
| "learning_rate": 3.368748380409433e-06, |
| "loss": 1.0914, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.015183363604824392, |
| "grad_norm": 1.436503529548645, |
| "learning_rate": 3.3739310702254474e-06, |
| "loss": 1.296, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.015206686744002308, |
| "grad_norm": 1.0189919471740723, |
| "learning_rate": 3.379113760041462e-06, |
| "loss": 1.5578, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.015230009883180226, |
| "grad_norm": 1.3371915817260742, |
| "learning_rate": 3.3842964498574765e-06, |
| "loss": 1.3883, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.015253333022358144, |
| "grad_norm": 1.152949333190918, |
| "learning_rate": 3.389479139673491e-06, |
| "loss": 1.3408, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.015276656161536062, |
| "grad_norm": 0.865856945514679, |
| "learning_rate": 3.3946618294895052e-06, |
| "loss": 1.8154, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.01529997930071398, |
| "grad_norm": 1.3607538938522339, |
| "learning_rate": 3.3998445193055196e-06, |
| "loss": 1.5139, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.015323302439891898, |
| "grad_norm": 1.0469399690628052, |
| "learning_rate": 3.4050272091215343e-06, |
| "loss": 1.4246, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.015346625579069816, |
| "grad_norm": 1.2417982816696167, |
| "learning_rate": 3.4102098989375487e-06, |
| "loss": 1.4392, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.015369948718247732, |
| "grad_norm": 2.018418073654175, |
| "learning_rate": 3.4153925887535634e-06, |
| "loss": 1.5175, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.01539327185742565, |
| "grad_norm": 1.2593055963516235, |
| "learning_rate": 3.420575278569578e-06, |
| "loss": 1.6338, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.015416594996603568, |
| "grad_norm": 1.0297298431396484, |
| "learning_rate": 3.4257579683855926e-06, |
| "loss": 1.6309, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.015439918135781485, |
| "grad_norm": 1.2963732481002808, |
| "learning_rate": 3.430940658201607e-06, |
| "loss": 1.3099, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.015463241274959403, |
| "grad_norm": 1.0868266820907593, |
| "learning_rate": 3.4361233480176217e-06, |
| "loss": 1.4949, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.015486564414137321, |
| "grad_norm": 1.156296968460083, |
| "learning_rate": 3.441306037833636e-06, |
| "loss": 1.7845, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.015509887553315239, |
| "grad_norm": 1.412965178489685, |
| "learning_rate": 3.446488727649651e-06, |
| "loss": 1.19, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.015533210692493155, |
| "grad_norm": 1.0419931411743164, |
| "learning_rate": 3.451671417465665e-06, |
| "loss": 1.7125, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.015556533831671073, |
| "grad_norm": 1.035372018814087, |
| "learning_rate": 3.4568541072816795e-06, |
| "loss": 1.7003, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.015579856970848991, |
| "grad_norm": 1.1559805870056152, |
| "learning_rate": 3.4620367970976943e-06, |
| "loss": 1.981, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.015603180110026909, |
| "grad_norm": 0.8634515404701233, |
| "learning_rate": 3.467219486913708e-06, |
| "loss": 1.2609, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.015626503249204827, |
| "grad_norm": 1.1953692436218262, |
| "learning_rate": 3.472402176729723e-06, |
| "loss": 1.3956, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.015649826388382745, |
| "grad_norm": 0.9668301939964294, |
| "learning_rate": 3.4775848665457373e-06, |
| "loss": 1.0568, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.015673149527560663, |
| "grad_norm": 2.4868035316467285, |
| "learning_rate": 3.482767556361752e-06, |
| "loss": 1.364, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.01569647266673858, |
| "grad_norm": 1.4255839586257935, |
| "learning_rate": 3.4879502461777664e-06, |
| "loss": 1.5207, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.0157197958059165, |
| "grad_norm": 1.2752389907836914, |
| "learning_rate": 3.493132935993781e-06, |
| "loss": 1.5141, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.015743118945094416, |
| "grad_norm": 1.2186245918273926, |
| "learning_rate": 3.4983156258097955e-06, |
| "loss": 1.3655, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.015766442084272334, |
| "grad_norm": 1.3544304370880127, |
| "learning_rate": 3.5034983156258103e-06, |
| "loss": 1.7428, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.01578976522345025, |
| "grad_norm": 1.0968130826950073, |
| "learning_rate": 3.5086810054418247e-06, |
| "loss": 1.3491, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.015813088362628167, |
| "grad_norm": 1.1593806743621826, |
| "learning_rate": 3.513863695257839e-06, |
| "loss": 1.6708, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.015836411501806084, |
| "grad_norm": 1.0408954620361328, |
| "learning_rate": 3.5190463850738538e-06, |
| "loss": 1.6977, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.015859734640984002, |
| "grad_norm": 1.196632742881775, |
| "learning_rate": 3.524229074889868e-06, |
| "loss": 1.2019, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.01588305778016192, |
| "grad_norm": 1.2698166370391846, |
| "learning_rate": 3.529411764705883e-06, |
| "loss": 1.8457, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.015906380919339838, |
| "grad_norm": 0.9075011014938354, |
| "learning_rate": 3.5345944545218972e-06, |
| "loss": 1.2717, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.015929704058517756, |
| "grad_norm": 1.0426501035690308, |
| "learning_rate": 3.5397771443379116e-06, |
| "loss": 1.6601, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.015953027197695674, |
| "grad_norm": 1.4904205799102783, |
| "learning_rate": 3.544959834153926e-06, |
| "loss": 1.6324, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.015976350336873592, |
| "grad_norm": 1.0664643049240112, |
| "learning_rate": 3.5501425239699407e-06, |
| "loss": 1.4896, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.01599967347605151, |
| "grad_norm": 1.3758978843688965, |
| "learning_rate": 3.555325213785955e-06, |
| "loss": 1.5457, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.016022996615229428, |
| "grad_norm": 1.4759879112243652, |
| "learning_rate": 3.56050790360197e-06, |
| "loss": 1.3865, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.016046319754407345, |
| "grad_norm": 1.4678733348846436, |
| "learning_rate": 3.565690593417984e-06, |
| "loss": 1.223, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.016069642893585263, |
| "grad_norm": 1.2057251930236816, |
| "learning_rate": 3.570873283233999e-06, |
| "loss": 1.4864, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.01609296603276318, |
| "grad_norm": 1.3976320028305054, |
| "learning_rate": 3.5760559730500133e-06, |
| "loss": 1.3371, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.016116289171941096, |
| "grad_norm": 1.0588197708129883, |
| "learning_rate": 3.5812386628660276e-06, |
| "loss": 1.264, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.016139612311119014, |
| "grad_norm": 0.891678512096405, |
| "learning_rate": 3.5864213526820424e-06, |
| "loss": 1.6566, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.01616293545029693, |
| "grad_norm": 1.1149228811264038, |
| "learning_rate": 3.5916040424980567e-06, |
| "loss": 1.6862, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.01618625858947485, |
| "grad_norm": 1.463218331336975, |
| "learning_rate": 3.5967867323140715e-06, |
| "loss": 1.5771, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.016209581728652767, |
| "grad_norm": 1.291648030281067, |
| "learning_rate": 3.601969422130086e-06, |
| "loss": 1.443, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.016232904867830685, |
| "grad_norm": 1.1534149646759033, |
| "learning_rate": 3.6071521119461002e-06, |
| "loss": 1.76, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.016256228007008603, |
| "grad_norm": 1.3349847793579102, |
| "learning_rate": 3.6123348017621146e-06, |
| "loss": 2.0584, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.01627955114618652, |
| "grad_norm": 1.665682315826416, |
| "learning_rate": 3.6175174915781293e-06, |
| "loss": 1.5989, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.01630287428536444, |
| "grad_norm": 1.6486263275146484, |
| "learning_rate": 3.6227001813941437e-06, |
| "loss": 1.7698, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.016326197424542357, |
| "grad_norm": 1.5153722763061523, |
| "learning_rate": 3.6278828712101584e-06, |
| "loss": 1.3312, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.016349520563720275, |
| "grad_norm": 1.3090248107910156, |
| "learning_rate": 3.633065561026173e-06, |
| "loss": 1.0735, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.016372843702898193, |
| "grad_norm": 1.5462753772735596, |
| "learning_rate": 3.6382482508421876e-06, |
| "loss": 1.5408, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.01639616684207611, |
| "grad_norm": 1.3447730541229248, |
| "learning_rate": 3.643430940658202e-06, |
| "loss": 1.5295, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.01641948998125403, |
| "grad_norm": 1.232865571975708, |
| "learning_rate": 3.6486136304742163e-06, |
| "loss": 1.8686, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.016442813120431946, |
| "grad_norm": 0.9742329120635986, |
| "learning_rate": 3.653796320290231e-06, |
| "loss": 1.5951, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.01646613625960986, |
| "grad_norm": 1.1572047472000122, |
| "learning_rate": 3.6589790101062454e-06, |
| "loss": 1.5068, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.01648945939878778, |
| "grad_norm": 1.2024304866790771, |
| "learning_rate": 3.66416169992226e-06, |
| "loss": 1.3933, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.016512782537965696, |
| "grad_norm": 2.442342758178711, |
| "learning_rate": 3.6693443897382745e-06, |
| "loss": 1.0126, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.016536105677143614, |
| "grad_norm": 1.2786589860916138, |
| "learning_rate": 3.6745270795542893e-06, |
| "loss": 1.6902, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.016559428816321532, |
| "grad_norm": 0.9200882315635681, |
| "learning_rate": 3.679709769370303e-06, |
| "loss": 1.3918, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.01658275195549945, |
| "grad_norm": 1.3768819570541382, |
| "learning_rate": 3.684892459186318e-06, |
| "loss": 1.6518, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.016606075094677368, |
| "grad_norm": 1.274484395980835, |
| "learning_rate": 3.6900751490023323e-06, |
| "loss": 1.3728, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.016629398233855286, |
| "grad_norm": 1.1752501726150513, |
| "learning_rate": 3.695257838818347e-06, |
| "loss": 1.4234, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.016652721373033204, |
| "grad_norm": 1.4458903074264526, |
| "learning_rate": 3.7004405286343614e-06, |
| "loss": 1.5695, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.01667604451221112, |
| "grad_norm": 1.2630547285079956, |
| "learning_rate": 3.705623218450376e-06, |
| "loss": 1.5334, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.01669936765138904, |
| "grad_norm": 1.3754082918167114, |
| "learning_rate": 3.7108059082663905e-06, |
| "loss": 1.4807, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.016722690790566958, |
| "grad_norm": 1.4704689979553223, |
| "learning_rate": 3.715988598082405e-06, |
| "loss": 1.5409, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.016746013929744875, |
| "grad_norm": 1.4692633152008057, |
| "learning_rate": 3.7211712878984197e-06, |
| "loss": 1.5922, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.016769337068922793, |
| "grad_norm": 1.2148405313491821, |
| "learning_rate": 3.726353977714434e-06, |
| "loss": 1.8115, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.016792660208100708, |
| "grad_norm": 1.5564905405044556, |
| "learning_rate": 3.7315366675304488e-06, |
| "loss": 1.4189, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.016815983347278626, |
| "grad_norm": 1.130292296409607, |
| "learning_rate": 3.736719357346463e-06, |
| "loss": 1.4455, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.016839306486456544, |
| "grad_norm": 2.0609545707702637, |
| "learning_rate": 3.741902047162478e-06, |
| "loss": 1.6052, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.01686262962563446, |
| "grad_norm": 1.0422543287277222, |
| "learning_rate": 3.7470847369784922e-06, |
| "loss": 1.5889, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.01688595276481238, |
| "grad_norm": 1.7926782369613647, |
| "learning_rate": 3.7522674267945066e-06, |
| "loss": 1.2304, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.016909275903990297, |
| "grad_norm": 1.2486250400543213, |
| "learning_rate": 3.757450116610521e-06, |
| "loss": 1.7512, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.016932599043168215, |
| "grad_norm": 1.6907048225402832, |
| "learning_rate": 3.7626328064265357e-06, |
| "loss": 1.2031, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.016955922182346133, |
| "grad_norm": 1.2899296283721924, |
| "learning_rate": 3.76781549624255e-06, |
| "loss": 1.3111, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.01697924532152405, |
| "grad_norm": 2.320288896560669, |
| "learning_rate": 3.7729981860585644e-06, |
| "loss": 1.2764, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.01700256846070197, |
| "grad_norm": 1.4165383577346802, |
| "learning_rate": 3.778180875874579e-06, |
| "loss": 1.2847, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.017025891599879887, |
| "grad_norm": 1.1537601947784424, |
| "learning_rate": 3.7833635656905935e-06, |
| "loss": 1.6002, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.017049214739057805, |
| "grad_norm": 1.3128899335861206, |
| "learning_rate": 3.7885462555066083e-06, |
| "loss": 1.4159, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.017072537878235722, |
| "grad_norm": 0.9494642615318298, |
| "learning_rate": 3.7937289453226226e-06, |
| "loss": 1.5425, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.01709586101741364, |
| "grad_norm": 1.8949923515319824, |
| "learning_rate": 3.7989116351386374e-06, |
| "loss": 1.109, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.017119184156591555, |
| "grad_norm": 1.3136776685714722, |
| "learning_rate": 3.8040943249546517e-06, |
| "loss": 1.4208, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.017142507295769473, |
| "grad_norm": 1.0108048915863037, |
| "learning_rate": 3.8092770147706665e-06, |
| "loss": 1.3101, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.01716583043494739, |
| "grad_norm": 1.1397989988327026, |
| "learning_rate": 3.814459704586681e-06, |
| "loss": 1.6643, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.01718915357412531, |
| "grad_norm": 0.9662717580795288, |
| "learning_rate": 3.819642394402696e-06, |
| "loss": 1.5524, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.017212476713303226, |
| "grad_norm": 1.5264514684677124, |
| "learning_rate": 3.82482508421871e-06, |
| "loss": 1.6702, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.017235799852481144, |
| "grad_norm": 1.1797709465026855, |
| "learning_rate": 3.830007774034724e-06, |
| "loss": 1.5751, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.017259122991659062, |
| "grad_norm": 1.3964486122131348, |
| "learning_rate": 3.835190463850739e-06, |
| "loss": 1.3497, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.01728244613083698, |
| "grad_norm": 1.0540798902511597, |
| "learning_rate": 3.840373153666753e-06, |
| "loss": 1.623, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.017305769270014898, |
| "grad_norm": 1.8619107007980347, |
| "learning_rate": 3.845555843482767e-06, |
| "loss": 1.836, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.017329092409192816, |
| "grad_norm": 1.190048098564148, |
| "learning_rate": 3.8507385332987826e-06, |
| "loss": 1.6031, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.017352415548370734, |
| "grad_norm": 1.32784903049469, |
| "learning_rate": 3.855921223114797e-06, |
| "loss": 1.6144, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.01737573868754865, |
| "grad_norm": 1.7393810749053955, |
| "learning_rate": 3.861103912930811e-06, |
| "loss": 1.4898, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.01739906182672657, |
| "grad_norm": 1.008122444152832, |
| "learning_rate": 3.866286602746826e-06, |
| "loss": 1.6506, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.017422384965904487, |
| "grad_norm": 1.3282239437103271, |
| "learning_rate": 3.871469292562841e-06, |
| "loss": 1.5178, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.017445708105082402, |
| "grad_norm": 1.4479358196258545, |
| "learning_rate": 3.876651982378855e-06, |
| "loss": 1.5896, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.01746903124426032, |
| "grad_norm": 1.9100661277770996, |
| "learning_rate": 3.8818346721948695e-06, |
| "loss": 1.2946, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.017492354383438238, |
| "grad_norm": 1.269235610961914, |
| "learning_rate": 3.887017362010884e-06, |
| "loss": 1.5707, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.017515677522616156, |
| "grad_norm": 1.3187369108200073, |
| "learning_rate": 3.892200051826899e-06, |
| "loss": 1.8153, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.017539000661794073, |
| "grad_norm": 1.3091131448745728, |
| "learning_rate": 3.8973827416429125e-06, |
| "loss": 1.5973, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.01756232380097199, |
| "grad_norm": 1.4826890230178833, |
| "learning_rate": 3.902565431458927e-06, |
| "loss": 1.3277, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.01758564694014991, |
| "grad_norm": 1.2626949548721313, |
| "learning_rate": 3.907748121274942e-06, |
| "loss": 1.5531, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.017608970079327827, |
| "grad_norm": 1.1990412473678589, |
| "learning_rate": 3.912930811090956e-06, |
| "loss": 1.349, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.017632293218505745, |
| "grad_norm": 1.3036906719207764, |
| "learning_rate": 3.918113500906971e-06, |
| "loss": 1.5648, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.017655616357683663, |
| "grad_norm": 1.3129525184631348, |
| "learning_rate": 3.923296190722985e-06, |
| "loss": 1.7147, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.01767893949686158, |
| "grad_norm": 1.4686280488967896, |
| "learning_rate": 3.928478880539e-06, |
| "loss": 1.6136, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.0177022626360395, |
| "grad_norm": 1.6845604181289673, |
| "learning_rate": 3.933661570355015e-06, |
| "loss": 1.763, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.017725585775217417, |
| "grad_norm": 2.019049644470215, |
| "learning_rate": 3.938844260171029e-06, |
| "loss": 1.2543, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.017748908914395334, |
| "grad_norm": 1.4184072017669678, |
| "learning_rate": 3.944026949987043e-06, |
| "loss": 1.596, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.017772232053573252, |
| "grad_norm": 1.127982497215271, |
| "learning_rate": 3.9492096398030585e-06, |
| "loss": 1.5485, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.017795555192751167, |
| "grad_norm": 1.5097321271896362, |
| "learning_rate": 3.954392329619073e-06, |
| "loss": 1.5452, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.017818878331929085, |
| "grad_norm": 1.3832807540893555, |
| "learning_rate": 3.959575019435087e-06, |
| "loss": 1.3865, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.017842201471107003, |
| "grad_norm": 1.065623164176941, |
| "learning_rate": 3.964757709251102e-06, |
| "loss": 1.2218, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.01786552461028492, |
| "grad_norm": 1.2190065383911133, |
| "learning_rate": 3.969940399067116e-06, |
| "loss": 1.2169, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.01788884774946284, |
| "grad_norm": 1.741749882698059, |
| "learning_rate": 3.97512308888313e-06, |
| "loss": 1.7316, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.017912170888640756, |
| "grad_norm": 1.2072060108184814, |
| "learning_rate": 3.980305778699145e-06, |
| "loss": 1.815, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.017935494027818674, |
| "grad_norm": 1.4645625352859497, |
| "learning_rate": 3.98548846851516e-06, |
| "loss": 1.2218, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.017958817166996592, |
| "grad_norm": 1.4466350078582764, |
| "learning_rate": 3.990671158331174e-06, |
| "loss": 1.7291, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.01798214030617451, |
| "grad_norm": 1.364358901977539, |
| "learning_rate": 3.9958538481471885e-06, |
| "loss": 1.6527, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.018005463445352428, |
| "grad_norm": 1.2262394428253174, |
| "learning_rate": 4.001036537963203e-06, |
| "loss": 1.5522, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.018028786584530346, |
| "grad_norm": 1.694001317024231, |
| "learning_rate": 4.006219227779218e-06, |
| "loss": 1.5791, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.018052109723708264, |
| "grad_norm": 0.7941157817840576, |
| "learning_rate": 4.011401917595232e-06, |
| "loss": 1.23, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.01807543286288618, |
| "grad_norm": 1.1942747831344604, |
| "learning_rate": 4.016584607411247e-06, |
| "loss": 1.4316, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.0180987560020641, |
| "grad_norm": 1.5809072256088257, |
| "learning_rate": 4.021767297227261e-06, |
| "loss": 1.7361, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.018122079141242014, |
| "grad_norm": 1.2918401956558228, |
| "learning_rate": 4.026949987043276e-06, |
| "loss": 1.3285, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.018145402280419932, |
| "grad_norm": 1.966123342514038, |
| "learning_rate": 4.032132676859291e-06, |
| "loss": 1.2037, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.01816872541959785, |
| "grad_norm": 1.3362590074539185, |
| "learning_rate": 4.037315366675304e-06, |
| "loss": 1.3811, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.018192048558775768, |
| "grad_norm": 1.0375605821609497, |
| "learning_rate": 4.042498056491319e-06, |
| "loss": 1.481, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.018215371697953685, |
| "grad_norm": 2.414684295654297, |
| "learning_rate": 4.047680746307334e-06, |
| "loss": 1.773, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.018238694837131603, |
| "grad_norm": 1.2252676486968994, |
| "learning_rate": 4.052863436123348e-06, |
| "loss": 1.514, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.01826201797630952, |
| "grad_norm": 1.517791748046875, |
| "learning_rate": 4.058046125939362e-06, |
| "loss": 1.3442, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.01828534111548744, |
| "grad_norm": 1.0303611755371094, |
| "learning_rate": 4.0632288157553776e-06, |
| "loss": 1.5593, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.018308664254665357, |
| "grad_norm": 1.3615033626556396, |
| "learning_rate": 4.068411505571392e-06, |
| "loss": 1.6971, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.018331987393843275, |
| "grad_norm": 1.1224147081375122, |
| "learning_rate": 4.073594195387406e-06, |
| "loss": 1.2134, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.018355310533021193, |
| "grad_norm": 1.3592679500579834, |
| "learning_rate": 4.078776885203421e-06, |
| "loss": 1.7391, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.01837863367219911, |
| "grad_norm": 1.6286187171936035, |
| "learning_rate": 4.083959575019436e-06, |
| "loss": 1.7279, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.01840195681137703, |
| "grad_norm": 1.2597742080688477, |
| "learning_rate": 4.08914226483545e-06, |
| "loss": 1.5227, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.018425279950554947, |
| "grad_norm": 1.2776849269866943, |
| "learning_rate": 4.0943249546514645e-06, |
| "loss": 1.3575, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.01844860308973286, |
| "grad_norm": 1.2529163360595703, |
| "learning_rate": 4.099507644467479e-06, |
| "loss": 1.6356, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.01847192622891078, |
| "grad_norm": 1.184187650680542, |
| "learning_rate": 4.104690334283494e-06, |
| "loss": 1.734, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.018495249368088697, |
| "grad_norm": 1.176222562789917, |
| "learning_rate": 4.1098730240995075e-06, |
| "loss": 1.5206, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.018518572507266615, |
| "grad_norm": 1.0694701671600342, |
| "learning_rate": 4.115055713915522e-06, |
| "loss": 1.1824, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.018541895646444533, |
| "grad_norm": 1.5169551372528076, |
| "learning_rate": 4.120238403731537e-06, |
| "loss": 1.3817, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.01856521878562245, |
| "grad_norm": 1.0996246337890625, |
| "learning_rate": 4.125421093547551e-06, |
| "loss": 1.0921, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.01858854192480037, |
| "grad_norm": 1.0202140808105469, |
| "learning_rate": 4.130603783363566e-06, |
| "loss": 1.2687, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.018611865063978286, |
| "grad_norm": 2.089864730834961, |
| "learning_rate": 4.13578647317958e-06, |
| "loss": 1.5417, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.018635188203156204, |
| "grad_norm": 1.1465847492218018, |
| "learning_rate": 4.140969162995595e-06, |
| "loss": 1.3415, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.018658511342334122, |
| "grad_norm": 1.1085565090179443, |
| "learning_rate": 4.14615185281161e-06, |
| "loss": 1.4662, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.01868183448151204, |
| "grad_norm": 1.2206768989562988, |
| "learning_rate": 4.151334542627624e-06, |
| "loss": 1.4954, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.018705157620689958, |
| "grad_norm": 1.1540756225585938, |
| "learning_rate": 4.156517232443638e-06, |
| "loss": 1.4953, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.018728480759867876, |
| "grad_norm": 1.9667025804519653, |
| "learning_rate": 4.1616999222596535e-06, |
| "loss": 1.1834, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.018751803899045794, |
| "grad_norm": 1.2202988862991333, |
| "learning_rate": 4.166882612075668e-06, |
| "loss": 1.7045, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.018775127038223708, |
| "grad_norm": 1.2399123907089233, |
| "learning_rate": 4.172065301891682e-06, |
| "loss": 1.4937, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.018798450177401626, |
| "grad_norm": 1.5780203342437744, |
| "learning_rate": 4.177247991707697e-06, |
| "loss": 1.6386, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.018821773316579544, |
| "grad_norm": 1.524564266204834, |
| "learning_rate": 4.182430681523711e-06, |
| "loss": 1.4951, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.01884509645575746, |
| "grad_norm": 1.342991590499878, |
| "learning_rate": 4.187613371339725e-06, |
| "loss": 1.3007, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.01886841959493538, |
| "grad_norm": 1.320813775062561, |
| "learning_rate": 4.19279606115574e-06, |
| "loss": 1.2112, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.018891742734113297, |
| "grad_norm": 1.2329927682876587, |
| "learning_rate": 4.197978750971755e-06, |
| "loss": 1.333, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.018915065873291215, |
| "grad_norm": 1.3429094552993774, |
| "learning_rate": 4.203161440787769e-06, |
| "loss": 1.4805, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.018938389012469133, |
| "grad_norm": 1.643641710281372, |
| "learning_rate": 4.2083441306037835e-06, |
| "loss": 1.5665, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.01896171215164705, |
| "grad_norm": 1.111887812614441, |
| "learning_rate": 4.213526820419798e-06, |
| "loss": 1.6087, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.01898503529082497, |
| "grad_norm": 1.3594610691070557, |
| "learning_rate": 4.218709510235813e-06, |
| "loss": 1.7666, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.019008358430002887, |
| "grad_norm": 1.2298046350479126, |
| "learning_rate": 4.223892200051827e-06, |
| "loss": 1.5032, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.019031681569180805, |
| "grad_norm": 1.2679171562194824, |
| "learning_rate": 4.229074889867842e-06, |
| "loss": 1.4375, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.019055004708358723, |
| "grad_norm": 1.0543935298919678, |
| "learning_rate": 4.234257579683856e-06, |
| "loss": 1.6645, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.01907832784753664, |
| "grad_norm": 1.2821168899536133, |
| "learning_rate": 4.239440269499871e-06, |
| "loss": 1.1945, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.01910165098671456, |
| "grad_norm": 1.5575084686279297, |
| "learning_rate": 4.244622959315886e-06, |
| "loss": 1.3262, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.019124974125892473, |
| "grad_norm": 1.2359989881515503, |
| "learning_rate": 4.2498056491319e-06, |
| "loss": 1.4127, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.01914829726507039, |
| "grad_norm": 1.0559273958206177, |
| "learning_rate": 4.254988338947914e-06, |
| "loss": 1.4455, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.01917162040424831, |
| "grad_norm": 1.3651732206344604, |
| "learning_rate": 4.260171028763929e-06, |
| "loss": 1.245, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.019194943543426227, |
| "grad_norm": 1.0067932605743408, |
| "learning_rate": 4.265353718579943e-06, |
| "loss": 1.4954, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.019218266682604145, |
| "grad_norm": 1.7477822303771973, |
| "learning_rate": 4.270536408395957e-06, |
| "loss": 1.8164, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.019241589821782062, |
| "grad_norm": 1.1976604461669922, |
| "learning_rate": 4.2757190982119726e-06, |
| "loss": 1.4552, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.01926491296095998, |
| "grad_norm": 1.306269884109497, |
| "learning_rate": 4.280901788027987e-06, |
| "loss": 1.6348, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.019288236100137898, |
| "grad_norm": 1.5786314010620117, |
| "learning_rate": 4.286084477844001e-06, |
| "loss": 1.4592, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.019311559239315816, |
| "grad_norm": 1.4481762647628784, |
| "learning_rate": 4.291267167660016e-06, |
| "loss": 1.3409, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.019334882378493734, |
| "grad_norm": 1.1410714387893677, |
| "learning_rate": 4.296449857476031e-06, |
| "loss": 1.5746, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.019358205517671652, |
| "grad_norm": 1.363434076309204, |
| "learning_rate": 4.301632547292045e-06, |
| "loss": 1.0836, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.01938152865684957, |
| "grad_norm": 1.1413646936416626, |
| "learning_rate": 4.3068152371080595e-06, |
| "loss": 1.8687, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.019404851796027488, |
| "grad_norm": 1.9734309911727905, |
| "learning_rate": 4.311997926924074e-06, |
| "loss": 1.3295, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.019428174935205406, |
| "grad_norm": 1.5119333267211914, |
| "learning_rate": 4.317180616740089e-06, |
| "loss": 1.6817, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.01945149807438332, |
| "grad_norm": 1.3933395147323608, |
| "learning_rate": 4.3223633065561025e-06, |
| "loss": 1.5288, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.019474821213561238, |
| "grad_norm": 1.3713746070861816, |
| "learning_rate": 4.327545996372117e-06, |
| "loss": 1.6361, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.019498144352739156, |
| "grad_norm": 1.1849229335784912, |
| "learning_rate": 4.332728686188132e-06, |
| "loss": 1.6611, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.019521467491917074, |
| "grad_norm": 2.122307777404785, |
| "learning_rate": 4.337911376004146e-06, |
| "loss": 1.6258, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.01954479063109499, |
| "grad_norm": 1.221781611442566, |
| "learning_rate": 4.343094065820161e-06, |
| "loss": 1.9081, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.01956811377027291, |
| "grad_norm": 1.2895511388778687, |
| "learning_rate": 4.348276755636175e-06, |
| "loss": 1.2742, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.019591436909450827, |
| "grad_norm": 1.1531336307525635, |
| "learning_rate": 4.35345944545219e-06, |
| "loss": 1.587, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.019614760048628745, |
| "grad_norm": 1.3979135751724243, |
| "learning_rate": 4.358642135268205e-06, |
| "loss": 1.5208, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.019638083187806663, |
| "grad_norm": 1.3758100271224976, |
| "learning_rate": 4.363824825084219e-06, |
| "loss": 1.246, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.01966140632698458, |
| "grad_norm": 1.3759677410125732, |
| "learning_rate": 4.369007514900233e-06, |
| "loss": 1.7344, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.0196847294661625, |
| "grad_norm": 1.5575461387634277, |
| "learning_rate": 4.3741902047162485e-06, |
| "loss": 1.5554, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.019708052605340417, |
| "grad_norm": 1.5018088817596436, |
| "learning_rate": 4.379372894532263e-06, |
| "loss": 1.3433, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.019731375744518335, |
| "grad_norm": 1.4393954277038574, |
| "learning_rate": 4.384555584348277e-06, |
| "loss": 1.7277, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.019754698883696253, |
| "grad_norm": 1.0249360799789429, |
| "learning_rate": 4.389738274164292e-06, |
| "loss": 1.6538, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.019778022022874167, |
| "grad_norm": 1.128587007522583, |
| "learning_rate": 4.394920963980306e-06, |
| "loss": 1.2935, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.019801345162052085, |
| "grad_norm": 1.301287293434143, |
| "learning_rate": 4.40010365379632e-06, |
| "loss": 1.4193, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.019824668301230003, |
| "grad_norm": 1.5180747509002686, |
| "learning_rate": 4.405286343612335e-06, |
| "loss": 1.2061, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.01984799144040792, |
| "grad_norm": 0.9110321402549744, |
| "learning_rate": 4.41046903342835e-06, |
| "loss": 1.2803, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.01987131457958584, |
| "grad_norm": 1.68843674659729, |
| "learning_rate": 4.415651723244364e-06, |
| "loss": 1.2037, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.019894637718763757, |
| "grad_norm": 1.2198610305786133, |
| "learning_rate": 4.4208344130603785e-06, |
| "loss": 1.6652, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.019917960857941674, |
| "grad_norm": 1.579087257385254, |
| "learning_rate": 4.426017102876393e-06, |
| "loss": 1.5859, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.019941283997119592, |
| "grad_norm": 1.7198874950408936, |
| "learning_rate": 4.431199792692408e-06, |
| "loss": 1.4662, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.01996460713629751, |
| "grad_norm": 2.817178726196289, |
| "learning_rate": 4.436382482508422e-06, |
| "loss": 1.3427, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.019987930275475428, |
| "grad_norm": 1.4508287906646729, |
| "learning_rate": 4.441565172324437e-06, |
| "loss": 1.2893, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.020011253414653346, |
| "grad_norm": 1.29767644405365, |
| "learning_rate": 4.446747862140451e-06, |
| "loss": 1.5759, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.020034576553831264, |
| "grad_norm": 1.84248685836792, |
| "learning_rate": 4.451930551956466e-06, |
| "loss": 2.1373, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.020057899693009182, |
| "grad_norm": 1.6153839826583862, |
| "learning_rate": 4.457113241772481e-06, |
| "loss": 1.3915, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.0200812228321871, |
| "grad_norm": 1.3203104734420776, |
| "learning_rate": 4.462295931588495e-06, |
| "loss": 1.569, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.020104545971365014, |
| "grad_norm": 1.6475995779037476, |
| "learning_rate": 4.467478621404509e-06, |
| "loss": 1.6446, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.020127869110542932, |
| "grad_norm": 1.165834665298462, |
| "learning_rate": 4.472661311220524e-06, |
| "loss": 1.7323, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.02015119224972085, |
| "grad_norm": 1.3182172775268555, |
| "learning_rate": 4.477844001036538e-06, |
| "loss": 1.6265, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.020174515388898768, |
| "grad_norm": 1.1236745119094849, |
| "learning_rate": 4.483026690852552e-06, |
| "loss": 1.2358, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.020197838528076686, |
| "grad_norm": 1.2104893922805786, |
| "learning_rate": 4.4882093806685676e-06, |
| "loss": 1.4677, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.020221161667254604, |
| "grad_norm": 1.6824678182601929, |
| "learning_rate": 4.493392070484582e-06, |
| "loss": 1.5802, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.02024448480643252, |
| "grad_norm": 1.0679930448532104, |
| "learning_rate": 4.498574760300596e-06, |
| "loss": 1.4105, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.02026780794561044, |
| "grad_norm": 1.3705253601074219, |
| "learning_rate": 4.503757450116611e-06, |
| "loss": 1.5095, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.020291131084788357, |
| "grad_norm": 1.307491660118103, |
| "learning_rate": 4.508940139932626e-06, |
| "loss": 1.3987, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.020314454223966275, |
| "grad_norm": 1.4814496040344238, |
| "learning_rate": 4.51412282974864e-06, |
| "loss": 1.635, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.020337777363144193, |
| "grad_norm": 0.935867190361023, |
| "learning_rate": 4.5193055195646545e-06, |
| "loss": 1.6734, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.02036110050232211, |
| "grad_norm": 1.3890215158462524, |
| "learning_rate": 4.524488209380669e-06, |
| "loss": 1.4458, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.02038442364150003, |
| "grad_norm": 1.628081202507019, |
| "learning_rate": 4.529670899196684e-06, |
| "loss": 1.4814, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.020407746780677947, |
| "grad_norm": 1.5255577564239502, |
| "learning_rate": 4.534853589012698e-06, |
| "loss": 1.3884, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.020431069919855865, |
| "grad_norm": 2.09283185005188, |
| "learning_rate": 4.540036278828712e-06, |
| "loss": 1.7396, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.02045439305903378, |
| "grad_norm": 0.9901561737060547, |
| "learning_rate": 4.545218968644727e-06, |
| "loss": 1.4941, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.020477716198211697, |
| "grad_norm": 1.8444923162460327, |
| "learning_rate": 4.550401658460741e-06, |
| "loss": 1.2724, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.020501039337389615, |
| "grad_norm": 1.414305567741394, |
| "learning_rate": 4.555584348276756e-06, |
| "loss": 1.5781, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.020524362476567533, |
| "grad_norm": 1.1960091590881348, |
| "learning_rate": 4.56076703809277e-06, |
| "loss": 1.536, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.02054768561574545, |
| "grad_norm": 2.241649627685547, |
| "learning_rate": 4.565949727908785e-06, |
| "loss": 1.6636, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.02057100875492337, |
| "grad_norm": 1.0672343969345093, |
| "learning_rate": 4.5711324177248e-06, |
| "loss": 1.6369, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.020594331894101287, |
| "grad_norm": 1.6761622428894043, |
| "learning_rate": 4.576315107540814e-06, |
| "loss": 1.2554, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.020617655033279204, |
| "grad_norm": 1.1365658044815063, |
| "learning_rate": 4.581497797356828e-06, |
| "loss": 1.6271, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.020640978172457122, |
| "grad_norm": 1.0631389617919922, |
| "learning_rate": 4.5866804871728435e-06, |
| "loss": 1.6393, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.02066430131163504, |
| "grad_norm": 3.27304744720459, |
| "learning_rate": 4.591863176988858e-06, |
| "loss": 1.3521, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.020687624450812958, |
| "grad_norm": 1.3354477882385254, |
| "learning_rate": 4.597045866804872e-06, |
| "loss": 1.5137, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.020710947589990876, |
| "grad_norm": 2.192812919616699, |
| "learning_rate": 4.602228556620887e-06, |
| "loss": 1.7294, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.020734270729168794, |
| "grad_norm": 0.9716669321060181, |
| "learning_rate": 4.607411246436901e-06, |
| "loss": 1.4244, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.020757593868346712, |
| "grad_norm": 1.0377227067947388, |
| "learning_rate": 4.612593936252915e-06, |
| "loss": 1.3041, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.020780917007524626, |
| "grad_norm": 1.971074104309082, |
| "learning_rate": 4.61777662606893e-06, |
| "loss": 1.4917, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.020804240146702544, |
| "grad_norm": 1.3108222484588623, |
| "learning_rate": 4.622959315884945e-06, |
| "loss": 1.5923, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.020827563285880462, |
| "grad_norm": 1.4194189310073853, |
| "learning_rate": 4.628142005700959e-06, |
| "loss": 1.2378, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.02085088642505838, |
| "grad_norm": 1.5872682332992554, |
| "learning_rate": 4.6333246955169735e-06, |
| "loss": 1.3573, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.020874209564236298, |
| "grad_norm": 1.351704716682434, |
| "learning_rate": 4.638507385332988e-06, |
| "loss": 1.8374, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.020897532703414216, |
| "grad_norm": 1.15986168384552, |
| "learning_rate": 4.643690075149003e-06, |
| "loss": 1.4303, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.020920855842592134, |
| "grad_norm": 1.912819743156433, |
| "learning_rate": 4.648872764965017e-06, |
| "loss": 1.7733, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.02094417898177005, |
| "grad_norm": 1.6582539081573486, |
| "learning_rate": 4.654055454781032e-06, |
| "loss": 1.4696, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.02096750212094797, |
| "grad_norm": 1.147661805152893, |
| "learning_rate": 4.659238144597046e-06, |
| "loss": 1.5037, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.020990825260125887, |
| "grad_norm": 1.1773402690887451, |
| "learning_rate": 4.664420834413061e-06, |
| "loss": 1.604, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.021014148399303805, |
| "grad_norm": 1.9128248691558838, |
| "learning_rate": 4.669603524229076e-06, |
| "loss": 1.3081, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.021037471538481723, |
| "grad_norm": 1.0742683410644531, |
| "learning_rate": 4.67478621404509e-06, |
| "loss": 1.5619, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.02106079467765964, |
| "grad_norm": 1.19862699508667, |
| "learning_rate": 4.679968903861104e-06, |
| "loss": 1.6896, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.02108411781683756, |
| "grad_norm": 1.276283860206604, |
| "learning_rate": 4.685151593677119e-06, |
| "loss": 1.65, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.021107440956015473, |
| "grad_norm": 1.3582435846328735, |
| "learning_rate": 4.690334283493133e-06, |
| "loss": 1.2686, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.02113076409519339, |
| "grad_norm": 1.2145341634750366, |
| "learning_rate": 4.695516973309147e-06, |
| "loss": 1.8032, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.02115408723437131, |
| "grad_norm": 1.1219233274459839, |
| "learning_rate": 4.7006996631251626e-06, |
| "loss": 1.7681, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.021177410373549227, |
| "grad_norm": 1.0474015474319458, |
| "learning_rate": 4.705882352941177e-06, |
| "loss": 1.4555, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.021200733512727145, |
| "grad_norm": 1.6325182914733887, |
| "learning_rate": 4.711065042757191e-06, |
| "loss": 1.432, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.021224056651905063, |
| "grad_norm": 1.5804178714752197, |
| "learning_rate": 4.716247732573206e-06, |
| "loss": 1.7409, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.02124737979108298, |
| "grad_norm": 1.226804256439209, |
| "learning_rate": 4.721430422389221e-06, |
| "loss": 1.8077, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.0212707029302609, |
| "grad_norm": 1.0747625827789307, |
| "learning_rate": 4.726613112205235e-06, |
| "loss": 1.411, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.021294026069438816, |
| "grad_norm": 1.2126623392105103, |
| "learning_rate": 4.7317958020212495e-06, |
| "loss": 1.6464, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.021317349208616734, |
| "grad_norm": 1.196486473083496, |
| "learning_rate": 4.736978491837264e-06, |
| "loss": 1.4365, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.021340672347794652, |
| "grad_norm": 1.4727115631103516, |
| "learning_rate": 4.742161181653279e-06, |
| "loss": 1.5059, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.02136399548697257, |
| "grad_norm": 1.293938159942627, |
| "learning_rate": 4.747343871469293e-06, |
| "loss": 1.5508, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.021387318626150488, |
| "grad_norm": 1.3074458837509155, |
| "learning_rate": 4.752526561285307e-06, |
| "loss": 1.364, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.021410641765328406, |
| "grad_norm": 1.708522081375122, |
| "learning_rate": 4.757709251101322e-06, |
| "loss": 1.2891, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.02143396490450632, |
| "grad_norm": 1.2926160097122192, |
| "learning_rate": 4.762891940917336e-06, |
| "loss": 1.1779, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.021457288043684238, |
| "grad_norm": 1.7751168012619019, |
| "learning_rate": 4.768074630733351e-06, |
| "loss": 1.3136, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.021480611182862156, |
| "grad_norm": 1.3698194026947021, |
| "learning_rate": 4.773257320549365e-06, |
| "loss": 1.5203, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.021503934322040074, |
| "grad_norm": 1.4710402488708496, |
| "learning_rate": 4.77844001036538e-06, |
| "loss": 2.0632, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.021527257461217992, |
| "grad_norm": 1.3340466022491455, |
| "learning_rate": 4.783622700181395e-06, |
| "loss": 0.9449, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.02155058060039591, |
| "grad_norm": 1.990078330039978, |
| "learning_rate": 4.788805389997409e-06, |
| "loss": 1.4095, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.021573903739573828, |
| "grad_norm": 2.6495463848114014, |
| "learning_rate": 4.793988079813423e-06, |
| "loss": 1.5914, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.021597226878751746, |
| "grad_norm": 1.368868350982666, |
| "learning_rate": 4.7991707696294385e-06, |
| "loss": 1.8007, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.021620550017929663, |
| "grad_norm": 1.3946820497512817, |
| "learning_rate": 4.804353459445453e-06, |
| "loss": 1.3846, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.02164387315710758, |
| "grad_norm": 1.6035547256469727, |
| "learning_rate": 4.809536149261467e-06, |
| "loss": 1.6677, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.0216671962962855, |
| "grad_norm": 1.29734468460083, |
| "learning_rate": 4.814718839077482e-06, |
| "loss": 1.3697, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.021690519435463417, |
| "grad_norm": 1.1746439933776855, |
| "learning_rate": 4.819901528893497e-06, |
| "loss": 1.6134, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.021713842574641335, |
| "grad_norm": 1.255861759185791, |
| "learning_rate": 4.82508421870951e-06, |
| "loss": 1.6253, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.021737165713819253, |
| "grad_norm": 1.5499615669250488, |
| "learning_rate": 4.830266908525525e-06, |
| "loss": 1.2794, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.02176048885299717, |
| "grad_norm": 1.6138273477554321, |
| "learning_rate": 4.83544959834154e-06, |
| "loss": 1.6365, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.021783811992175085, |
| "grad_norm": 1.7135401964187622, |
| "learning_rate": 4.840632288157554e-06, |
| "loss": 1.509, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.021807135131353003, |
| "grad_norm": 1.4290528297424316, |
| "learning_rate": 4.8458149779735685e-06, |
| "loss": 1.3415, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.02183045827053092, |
| "grad_norm": 2.034870147705078, |
| "learning_rate": 4.850997667789583e-06, |
| "loss": 1.6834, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.02185378140970884, |
| "grad_norm": 1.6626250743865967, |
| "learning_rate": 4.856180357605598e-06, |
| "loss": 1.3573, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.021877104548886757, |
| "grad_norm": 1.2256288528442383, |
| "learning_rate": 4.861363047421612e-06, |
| "loss": 1.5497, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.021900427688064675, |
| "grad_norm": 1.218955397605896, |
| "learning_rate": 4.866545737237627e-06, |
| "loss": 1.6823, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.021923750827242593, |
| "grad_norm": 1.0629289150238037, |
| "learning_rate": 4.871728427053641e-06, |
| "loss": 1.3894, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.02194707396642051, |
| "grad_norm": 2.6169822216033936, |
| "learning_rate": 4.876911116869656e-06, |
| "loss": 1.4063, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.02197039710559843, |
| "grad_norm": 1.1517153978347778, |
| "learning_rate": 4.882093806685671e-06, |
| "loss": 1.3838, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.021993720244776346, |
| "grad_norm": 1.6320403814315796, |
| "learning_rate": 4.887276496501685e-06, |
| "loss": 1.5752, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.022017043383954264, |
| "grad_norm": 1.7344862222671509, |
| "learning_rate": 4.892459186317699e-06, |
| "loss": 1.3182, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.022040366523132182, |
| "grad_norm": 1.2497214078903198, |
| "learning_rate": 4.897641876133714e-06, |
| "loss": 1.2266, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.0220636896623101, |
| "grad_norm": 1.996893048286438, |
| "learning_rate": 4.902824565949728e-06, |
| "loss": 1.2708, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.022087012801488018, |
| "grad_norm": 1.1130571365356445, |
| "learning_rate": 4.908007255765742e-06, |
| "loss": 1.4791, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.022110335940665932, |
| "grad_norm": 1.2698702812194824, |
| "learning_rate": 4.9131899455817576e-06, |
| "loss": 1.3711, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.02213365907984385, |
| "grad_norm": 1.0363445281982422, |
| "learning_rate": 4.918372635397772e-06, |
| "loss": 1.4153, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.022156982219021768, |
| "grad_norm": 1.1418310403823853, |
| "learning_rate": 4.923555325213786e-06, |
| "loss": 1.3377, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.022180305358199686, |
| "grad_norm": 1.3740698099136353, |
| "learning_rate": 4.928738015029801e-06, |
| "loss": 1.375, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.022203628497377604, |
| "grad_norm": 1.5656532049179077, |
| "learning_rate": 4.933920704845816e-06, |
| "loss": 1.651, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.022226951636555522, |
| "grad_norm": 1.209380865097046, |
| "learning_rate": 4.93910339466183e-06, |
| "loss": 1.6956, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.02225027477573344, |
| "grad_norm": 1.9917747974395752, |
| "learning_rate": 4.9442860844778445e-06, |
| "loss": 1.2802, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.022273597914911358, |
| "grad_norm": 2.168260097503662, |
| "learning_rate": 4.949468774293859e-06, |
| "loss": 1.9773, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.022296921054089276, |
| "grad_norm": 1.113978624343872, |
| "learning_rate": 4.954651464109874e-06, |
| "loss": 1.8121, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.022320244193267193, |
| "grad_norm": 1.4833635091781616, |
| "learning_rate": 4.959834153925888e-06, |
| "loss": 1.694, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.02234356733244511, |
| "grad_norm": 1.3287935256958008, |
| "learning_rate": 4.965016843741902e-06, |
| "loss": 1.4865, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.02236689047162303, |
| "grad_norm": 1.5515238046646118, |
| "learning_rate": 4.970199533557917e-06, |
| "loss": 1.6035, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.022390213610800947, |
| "grad_norm": 1.2824245691299438, |
| "learning_rate": 4.975382223373931e-06, |
| "loss": 1.5124, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.022413536749978865, |
| "grad_norm": 1.2062418460845947, |
| "learning_rate": 4.980564913189946e-06, |
| "loss": 1.5982, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.02243685988915678, |
| "grad_norm": 1.2790741920471191, |
| "learning_rate": 4.98574760300596e-06, |
| "loss": 1.586, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.022460183028334697, |
| "grad_norm": 1.202909231185913, |
| "learning_rate": 4.990930292821975e-06, |
| "loss": 1.7387, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.022483506167512615, |
| "grad_norm": 1.328963041305542, |
| "learning_rate": 4.99611298263799e-06, |
| "loss": 1.5611, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.022506829306690533, |
| "grad_norm": 1.3728841543197632, |
| "learning_rate": 5.001295672454004e-06, |
| "loss": 1.6887, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.02253015244586845, |
| "grad_norm": 1.2474596500396729, |
| "learning_rate": 5.006478362270018e-06, |
| "loss": 1.7337, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.02255347558504637, |
| "grad_norm": 1.4526808261871338, |
| "learning_rate": 5.0116610520860335e-06, |
| "loss": 1.4009, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.022576798724224287, |
| "grad_norm": 1.74959397315979, |
| "learning_rate": 5.016843741902048e-06, |
| "loss": 1.4153, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.022600121863402205, |
| "grad_norm": 1.7886738777160645, |
| "learning_rate": 5.022026431718062e-06, |
| "loss": 1.3897, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.022623445002580123, |
| "grad_norm": 1.3122284412384033, |
| "learning_rate": 5.027209121534077e-06, |
| "loss": 1.6551, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.02264676814175804, |
| "grad_norm": 1.5374927520751953, |
| "learning_rate": 5.032391811350092e-06, |
| "loss": 1.6396, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.02267009128093596, |
| "grad_norm": 1.6476905345916748, |
| "learning_rate": 5.037574501166106e-06, |
| "loss": 1.733, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.022693414420113876, |
| "grad_norm": 1.3407307863235474, |
| "learning_rate": 5.0427571909821205e-06, |
| "loss": 1.4984, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.022716737559291794, |
| "grad_norm": 1.5565712451934814, |
| "learning_rate": 5.047939880798135e-06, |
| "loss": 1.6524, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.022740060698469712, |
| "grad_norm": 1.381903052330017, |
| "learning_rate": 5.053122570614149e-06, |
| "loss": 1.5325, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.022763383837647626, |
| "grad_norm": 1.916326880455017, |
| "learning_rate": 5.058305260430164e-06, |
| "loss": 1.2326, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.022786706976825544, |
| "grad_norm": 1.1621575355529785, |
| "learning_rate": 5.063487950246179e-06, |
| "loss": 1.2568, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.022810030116003462, |
| "grad_norm": 1.3575561046600342, |
| "learning_rate": 5.068670640062193e-06, |
| "loss": 1.3755, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.02283335325518138, |
| "grad_norm": 1.482701063156128, |
| "learning_rate": 5.0738533298782065e-06, |
| "loss": 1.598, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.022856676394359298, |
| "grad_norm": 1.2530887126922607, |
| "learning_rate": 5.079036019694221e-06, |
| "loss": 1.66, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.022879999533537216, |
| "grad_norm": 1.4960439205169678, |
| "learning_rate": 5.084218709510236e-06, |
| "loss": 1.5341, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.022903322672715134, |
| "grad_norm": 1.507735252380371, |
| "learning_rate": 5.0894013993262504e-06, |
| "loss": 1.3987, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.022926645811893052, |
| "grad_norm": 2.0131475925445557, |
| "learning_rate": 5.094584089142265e-06, |
| "loss": 1.3134, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.02294996895107097, |
| "grad_norm": 1.8096015453338623, |
| "learning_rate": 5.099766778958279e-06, |
| "loss": 1.3707, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.022973292090248888, |
| "grad_norm": 1.0444198846817017, |
| "learning_rate": 5.104949468774294e-06, |
| "loss": 1.4119, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.022996615229426805, |
| "grad_norm": 1.3110159635543823, |
| "learning_rate": 5.110132158590309e-06, |
| "loss": 1.2187, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.023019938368604723, |
| "grad_norm": 1.3191614151000977, |
| "learning_rate": 5.115314848406323e-06, |
| "loss": 1.3691, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.02304326150778264, |
| "grad_norm": 1.3888386487960815, |
| "learning_rate": 5.120497538222337e-06, |
| "loss": 1.1934, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.02306658464696056, |
| "grad_norm": 1.2101585865020752, |
| "learning_rate": 5.1256802280383526e-06, |
| "loss": 1.4962, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.023089907786138477, |
| "grad_norm": 1.2938464879989624, |
| "learning_rate": 5.130862917854367e-06, |
| "loss": 1.4601, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.02311323092531639, |
| "grad_norm": 2.072444200515747, |
| "learning_rate": 5.136045607670381e-06, |
| "loss": 1.7241, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.02313655406449431, |
| "grad_norm": 1.7139407396316528, |
| "learning_rate": 5.141228297486396e-06, |
| "loss": 1.394, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.023159877203672227, |
| "grad_norm": 1.5825177431106567, |
| "learning_rate": 5.146410987302411e-06, |
| "loss": 1.4218, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.023183200342850145, |
| "grad_norm": 1.2233787775039673, |
| "learning_rate": 5.151593677118425e-06, |
| "loss": 1.2882, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.023206523482028063, |
| "grad_norm": 1.6474647521972656, |
| "learning_rate": 5.1567763669344395e-06, |
| "loss": 1.6499, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.02322984662120598, |
| "grad_norm": 1.669651985168457, |
| "learning_rate": 5.161959056750454e-06, |
| "loss": 1.1727, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.0232531697603839, |
| "grad_norm": 1.4976879358291626, |
| "learning_rate": 5.167141746566469e-06, |
| "loss": 1.2149, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.023276492899561817, |
| "grad_norm": 1.4033470153808594, |
| "learning_rate": 5.172324436382483e-06, |
| "loss": 1.3004, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.023299816038739735, |
| "grad_norm": 1.3042150735855103, |
| "learning_rate": 5.177507126198498e-06, |
| "loss": 1.3803, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.023323139177917653, |
| "grad_norm": 1.4327346086502075, |
| "learning_rate": 5.182689816014512e-06, |
| "loss": 1.7267, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 128625, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.5429008193870234e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|