| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 15.0, | |
| "global_step": 1182, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0025380710659898475, | |
| "grad_norm": 152.0770602205149, | |
| "learning_rate": 2.7777777777777776e-07, | |
| "loss": 1.4538, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005076142131979695, | |
| "grad_norm": 154.43415342456026, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 1.6443, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.007614213197969543, | |
| "grad_norm": 83.042923862024, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 1.569, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01015228426395939, | |
| "grad_norm": 168.9469822153038, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 1.5581, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.012690355329949238, | |
| "grad_norm": 99.16296768200209, | |
| "learning_rate": 1.3888888888888892e-06, | |
| "loss": 1.5565, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.015228426395939087, | |
| "grad_norm": 77.46041883601814, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 1.834, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.017766497461928935, | |
| "grad_norm": 65.11628947265203, | |
| "learning_rate": 1.944444444444445e-06, | |
| "loss": 1.6277, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.02030456852791878, | |
| "grad_norm": 28.341949090239343, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 1.5202, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02284263959390863, | |
| "grad_norm": 45.05260802676402, | |
| "learning_rate": 2.5e-06, | |
| "loss": 1.4123, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.025380710659898477, | |
| "grad_norm": 28.73735471868825, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 1.5291, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.027918781725888325, | |
| "grad_norm": 68.32933011519918, | |
| "learning_rate": 3.055555555555556e-06, | |
| "loss": 1.2742, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.030456852791878174, | |
| "grad_norm": 26.127348649606496, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.357, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03299492385786802, | |
| "grad_norm": 20.661721515738268, | |
| "learning_rate": 3.6111111111111115e-06, | |
| "loss": 1.2094, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.03553299492385787, | |
| "grad_norm": 28.64253126706993, | |
| "learning_rate": 3.88888888888889e-06, | |
| "loss": 1.2889, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03807106598984772, | |
| "grad_norm": 11.29417859738243, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 1.1999, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04060913705583756, | |
| "grad_norm": 10.496811646115283, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 1.3311, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04314720812182741, | |
| "grad_norm": 3.222307468960679, | |
| "learning_rate": 4.722222222222222e-06, | |
| "loss": 1.2378, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.04568527918781726, | |
| "grad_norm": 5.95588179653401, | |
| "learning_rate": 5e-06, | |
| "loss": 1.2917, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.048223350253807105, | |
| "grad_norm": 4.595212148917514, | |
| "learning_rate": 5.2777777777777785e-06, | |
| "loss": 1.2182, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.050761421319796954, | |
| "grad_norm": 3.4919855269541316, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 1.1647, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0532994923857868, | |
| "grad_norm": 2.8992797662103706, | |
| "learning_rate": 5.833333333333334e-06, | |
| "loss": 1.2627, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.05583756345177665, | |
| "grad_norm": 6.2754366481134, | |
| "learning_rate": 6.111111111111112e-06, | |
| "loss": 1.2512, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0583756345177665, | |
| "grad_norm": 4.8408966557202735, | |
| "learning_rate": 6.3888888888888885e-06, | |
| "loss": 1.109, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06091370558375635, | |
| "grad_norm": 4.794271905930829, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.1957, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06345177664974619, | |
| "grad_norm": 3.8422011976344037, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 1.1104, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06598984771573604, | |
| "grad_norm": 2.9555249451534094, | |
| "learning_rate": 7.222222222222223e-06, | |
| "loss": 1.1429, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06852791878172589, | |
| "grad_norm": 3.420765898636476, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.0736, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.07106598984771574, | |
| "grad_norm": 3.4590250149649053, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 1.1766, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07360406091370558, | |
| "grad_norm": 6.343830946481616, | |
| "learning_rate": 8.055555555555557e-06, | |
| "loss": 1.2733, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.07614213197969544, | |
| "grad_norm": 6.9382893747130305, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 1.1572, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07868020304568528, | |
| "grad_norm": 4.116628130544569, | |
| "learning_rate": 8.611111111111112e-06, | |
| "loss": 1.0768, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08121827411167512, | |
| "grad_norm": 3.137655679928552, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 1.2086, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.08375634517766498, | |
| "grad_norm": 2.7173186967601377, | |
| "learning_rate": 9.166666666666666e-06, | |
| "loss": 1.0033, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.08629441624365482, | |
| "grad_norm": 4.422086595586005, | |
| "learning_rate": 9.444444444444445e-06, | |
| "loss": 1.2143, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.08883248730964467, | |
| "grad_norm": 4.287936560092926, | |
| "learning_rate": 9.722222222222223e-06, | |
| "loss": 1.1019, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09137055837563451, | |
| "grad_norm": 4.71773823753788, | |
| "learning_rate": 1e-05, | |
| "loss": 1.0297, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.09390862944162437, | |
| "grad_norm": 3.0619027718013845, | |
| "learning_rate": 9.999981212445786e-06, | |
| "loss": 1.2065, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.09644670050761421, | |
| "grad_norm": 5.557917883258693, | |
| "learning_rate": 9.999924849924331e-06, | |
| "loss": 1.1469, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.09898477157360407, | |
| "grad_norm": 3.3448139583001146, | |
| "learning_rate": 9.999830912859204e-06, | |
| "loss": 1.076, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.10152284263959391, | |
| "grad_norm": 3.4576008769883573, | |
| "learning_rate": 9.99969940195634e-06, | |
| "loss": 1.1861, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10406091370558376, | |
| "grad_norm": 2.859503835631421, | |
| "learning_rate": 9.99953031820405e-06, | |
| "loss": 1.1576, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.1065989847715736, | |
| "grad_norm": 3.01601354320767, | |
| "learning_rate": 9.999323662872998e-06, | |
| "loss": 1.0347, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.10913705583756345, | |
| "grad_norm": 3.5906060809534663, | |
| "learning_rate": 9.999079437516205e-06, | |
| "loss": 1.1774, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.1116751269035533, | |
| "grad_norm": 3.813820556604242, | |
| "learning_rate": 9.998797643969031e-06, | |
| "loss": 1.086, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.11421319796954314, | |
| "grad_norm": 3.2135626955254954, | |
| "learning_rate": 9.998478284349163e-06, | |
| "loss": 1.0939, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.116751269035533, | |
| "grad_norm": 3.038494005068585, | |
| "learning_rate": 9.998121361056588e-06, | |
| "loss": 1.1049, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.11928934010152284, | |
| "grad_norm": 4.836679741133791, | |
| "learning_rate": 9.997726876773599e-06, | |
| "loss": 0.9771, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.1218274111675127, | |
| "grad_norm": 3.285363024124718, | |
| "learning_rate": 9.99729483446475e-06, | |
| "loss": 1.0107, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.12436548223350254, | |
| "grad_norm": 4.762499743457535, | |
| "learning_rate": 9.996825237376852e-06, | |
| "loss": 1.1301, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.12690355329949238, | |
| "grad_norm": 4.073333426027345, | |
| "learning_rate": 9.996318089038935e-06, | |
| "loss": 1.0094, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12944162436548223, | |
| "grad_norm": 3.5281106754331906, | |
| "learning_rate": 9.99577339326223e-06, | |
| "loss": 1.107, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.1319796954314721, | |
| "grad_norm": 2.388804020934236, | |
| "learning_rate": 9.995191154140136e-06, | |
| "loss": 1.0684, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.13451776649746192, | |
| "grad_norm": 3.896150155122405, | |
| "learning_rate": 9.994571376048195e-06, | |
| "loss": 1.0195, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.13705583756345177, | |
| "grad_norm": 3.159381315379948, | |
| "learning_rate": 9.993914063644053e-06, | |
| "loss": 1.1289, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.13959390862944163, | |
| "grad_norm": 4.078159681191905, | |
| "learning_rate": 9.993219221867426e-06, | |
| "loss": 1.1211, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.14213197969543148, | |
| "grad_norm": 5.601995817902489, | |
| "learning_rate": 9.992486855940064e-06, | |
| "loss": 1.0514, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1446700507614213, | |
| "grad_norm": 2.1043015689108846, | |
| "learning_rate": 9.991716971365713e-06, | |
| "loss": 1.0456, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.14720812182741116, | |
| "grad_norm": 2.5356517486241827, | |
| "learning_rate": 9.990909573930075e-06, | |
| "loss": 1.192, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.14974619289340102, | |
| "grad_norm": 2.7360362373563953, | |
| "learning_rate": 9.990064669700756e-06, | |
| "loss": 0.9797, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.15228426395939088, | |
| "grad_norm": 2.208834445890474, | |
| "learning_rate": 9.989182265027232e-06, | |
| "loss": 1.1441, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1548223350253807, | |
| "grad_norm": 3.2583621910518734, | |
| "learning_rate": 9.988262366540792e-06, | |
| "loss": 0.9885, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.15736040609137056, | |
| "grad_norm": 3.9359528549878404, | |
| "learning_rate": 9.987304981154493e-06, | |
| "loss": 1.032, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1598984771573604, | |
| "grad_norm": 4.3721493801621465, | |
| "learning_rate": 9.986310116063108e-06, | |
| "loss": 1.0886, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.16243654822335024, | |
| "grad_norm": 3.2720085785918624, | |
| "learning_rate": 9.985277778743069e-06, | |
| "loss": 1.0736, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.1649746192893401, | |
| "grad_norm": 4.284093394240824, | |
| "learning_rate": 9.984207976952412e-06, | |
| "loss": 1.109, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.16751269035532995, | |
| "grad_norm": 2.744141216549521, | |
| "learning_rate": 9.98310071873072e-06, | |
| "loss": 1.0275, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1700507614213198, | |
| "grad_norm": 2.8863340000098474, | |
| "learning_rate": 9.981956012399068e-06, | |
| "loss": 1.0759, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.17258883248730963, | |
| "grad_norm": 4.033064317161934, | |
| "learning_rate": 9.980773866559946e-06, | |
| "loss": 1.07, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1751269035532995, | |
| "grad_norm": 3.7971135347720204, | |
| "learning_rate": 9.979554290097201e-06, | |
| "loss": 0.9832, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.17766497461928935, | |
| "grad_norm": 2.9283571123362697, | |
| "learning_rate": 9.978297292175984e-06, | |
| "loss": 1.0622, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1802030456852792, | |
| "grad_norm": 2.9862875820274453, | |
| "learning_rate": 9.977002882242657e-06, | |
| "loss": 1.1675, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.18274111675126903, | |
| "grad_norm": 2.458693353037259, | |
| "learning_rate": 9.975671070024741e-06, | |
| "loss": 0.9963, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.18527918781725888, | |
| "grad_norm": 2.7140438707634162, | |
| "learning_rate": 9.97430186553083e-06, | |
| "loss": 0.9998, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.18781725888324874, | |
| "grad_norm": 2.3819474228984596, | |
| "learning_rate": 9.972895279050532e-06, | |
| "loss": 1.1602, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.19035532994923857, | |
| "grad_norm": 2.869485463686646, | |
| "learning_rate": 9.971451321154368e-06, | |
| "loss": 1.1476, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.19289340101522842, | |
| "grad_norm": 3.5672507886894915, | |
| "learning_rate": 9.969970002693718e-06, | |
| "loss": 1.189, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.19543147208121828, | |
| "grad_norm": 4.6249979596983275, | |
| "learning_rate": 9.968451334800718e-06, | |
| "loss": 1.0321, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.19796954314720813, | |
| "grad_norm": 2.590141938018056, | |
| "learning_rate": 9.966895328888195e-06, | |
| "loss": 1.1738, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.20050761421319796, | |
| "grad_norm": 2.4382892427081226, | |
| "learning_rate": 9.965301996649563e-06, | |
| "loss": 1.0667, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.20304568527918782, | |
| "grad_norm": 2.774255541254283, | |
| "learning_rate": 9.96367135005875e-06, | |
| "loss": 0.9998, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.20558375634517767, | |
| "grad_norm": 2.4921808717304685, | |
| "learning_rate": 9.962003401370101e-06, | |
| "loss": 1.0051, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.20812182741116753, | |
| "grad_norm": 2.8238480847860785, | |
| "learning_rate": 9.960298163118284e-06, | |
| "loss": 0.9916, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.21065989847715735, | |
| "grad_norm": 6.206800727550207, | |
| "learning_rate": 9.958555648118207e-06, | |
| "loss": 1.0763, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.2131979695431472, | |
| "grad_norm": 2.443578917202038, | |
| "learning_rate": 9.956775869464901e-06, | |
| "loss": 0.9825, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.21573604060913706, | |
| "grad_norm": 2.847758348759497, | |
| "learning_rate": 9.954958840533447e-06, | |
| "loss": 1.0456, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.2182741116751269, | |
| "grad_norm": 3.170138811884141, | |
| "learning_rate": 9.953104574978854e-06, | |
| "loss": 1.1186, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.22081218274111675, | |
| "grad_norm": 2.6010611046656877, | |
| "learning_rate": 9.951213086735967e-06, | |
| "loss": 1.0233, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.2233502538071066, | |
| "grad_norm": 6.679956372744609, | |
| "learning_rate": 9.949284390019362e-06, | |
| "loss": 1.066, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.22588832487309646, | |
| "grad_norm": 2.6554799986019133, | |
| "learning_rate": 9.94731849932324e-06, | |
| "loss": 0.9623, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.22842639593908629, | |
| "grad_norm": 2.3433414348493273, | |
| "learning_rate": 9.945315429421307e-06, | |
| "loss": 1.0869, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.23096446700507614, | |
| "grad_norm": 2.8599286738704683, | |
| "learning_rate": 9.943275195366679e-06, | |
| "loss": 1.11, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.233502538071066, | |
| "grad_norm": 4.958755531266782, | |
| "learning_rate": 9.941197812491761e-06, | |
| "loss": 1.2542, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.23604060913705585, | |
| "grad_norm": 2.2479047075233427, | |
| "learning_rate": 9.939083296408127e-06, | |
| "loss": 1.0836, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.23857868020304568, | |
| "grad_norm": 1.8107915831749222, | |
| "learning_rate": 9.936931663006414e-06, | |
| "loss": 1.0384, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.24111675126903553, | |
| "grad_norm": 2.457682015648462, | |
| "learning_rate": 9.934742928456191e-06, | |
| "loss": 1.0374, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2436548223350254, | |
| "grad_norm": 2.972864834775759, | |
| "learning_rate": 9.932517109205849e-06, | |
| "loss": 1.1032, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.24619289340101522, | |
| "grad_norm": 2.3725301348700087, | |
| "learning_rate": 9.930254221982464e-06, | |
| "loss": 1.0623, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.24873096446700507, | |
| "grad_norm": 2.7944396518166355, | |
| "learning_rate": 9.927954283791687e-06, | |
| "loss": 1.0831, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2512690355329949, | |
| "grad_norm": 2.300115990070667, | |
| "learning_rate": 9.9256173119176e-06, | |
| "loss": 1.0785, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.25380710659898476, | |
| "grad_norm": 1.8208692795246306, | |
| "learning_rate": 9.923243323922598e-06, | |
| "loss": 0.9879, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2563451776649746, | |
| "grad_norm": 4.4476962273727665, | |
| "learning_rate": 9.920832337647252e-06, | |
| "loss": 1.0007, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.25888324873096447, | |
| "grad_norm": 3.3940854617337113, | |
| "learning_rate": 9.918384371210178e-06, | |
| "loss": 1.1891, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2614213197969543, | |
| "grad_norm": 2.555201632199259, | |
| "learning_rate": 9.915899443007894e-06, | |
| "loss": 1.021, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.2639593908629442, | |
| "grad_norm": 2.3220370794195113, | |
| "learning_rate": 9.91337757171469e-06, | |
| "loss": 1.1011, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.26649746192893403, | |
| "grad_norm": 2.9669260657059655, | |
| "learning_rate": 9.910818776282487e-06, | |
| "loss": 1.0555, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.26903553299492383, | |
| "grad_norm": 2.870314472945511, | |
| "learning_rate": 9.908223075940684e-06, | |
| "loss": 1.1542, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2715736040609137, | |
| "grad_norm": 2.7991064606918137, | |
| "learning_rate": 9.905590490196027e-06, | |
| "loss": 1.0811, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.27411167512690354, | |
| "grad_norm": 2.493650659366754, | |
| "learning_rate": 9.902921038832456e-06, | |
| "loss": 1.0137, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2766497461928934, | |
| "grad_norm": 6.9035091138911655, | |
| "learning_rate": 9.900214741910955e-06, | |
| "loss": 1.1594, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.27918781725888325, | |
| "grad_norm": 2.453399893747965, | |
| "learning_rate": 9.897471619769402e-06, | |
| "loss": 0.9992, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2817258883248731, | |
| "grad_norm": 5.298689006767713, | |
| "learning_rate": 9.89469169302242e-06, | |
| "loss": 1.0269, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.28426395939086296, | |
| "grad_norm": 3.312118607283353, | |
| "learning_rate": 9.891874982561222e-06, | |
| "loss": 1.0999, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2868020304568528, | |
| "grad_norm": 2.309806298343435, | |
| "learning_rate": 9.889021509553448e-06, | |
| "loss": 0.9932, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.2893401015228426, | |
| "grad_norm": 2.782632711667371, | |
| "learning_rate": 9.886131295443003e-06, | |
| "loss": 1.0546, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2918781725888325, | |
| "grad_norm": 3.0241898018425375, | |
| "learning_rate": 9.883204361949916e-06, | |
| "loss": 1.063, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.29441624365482233, | |
| "grad_norm": 4.881309577432392, | |
| "learning_rate": 9.880240731070152e-06, | |
| "loss": 0.9851, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2969543147208122, | |
| "grad_norm": 4.393148043595713, | |
| "learning_rate": 9.877240425075465e-06, | |
| "loss": 1.1928, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.29949238578680204, | |
| "grad_norm": 2.539173060449269, | |
| "learning_rate": 9.874203466513215e-06, | |
| "loss": 1.0185, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3020304568527919, | |
| "grad_norm": 3.088531691349216, | |
| "learning_rate": 9.871129878206213e-06, | |
| "loss": 1.0862, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.30456852791878175, | |
| "grad_norm": 2.646620091120325, | |
| "learning_rate": 9.868019683252543e-06, | |
| "loss": 0.9884, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.30710659898477155, | |
| "grad_norm": 2.435727031764384, | |
| "learning_rate": 9.864872905025386e-06, | |
| "loss": 1.0276, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3096446700507614, | |
| "grad_norm": 2.1078027560231183, | |
| "learning_rate": 9.861689567172849e-06, | |
| "loss": 1.0837, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.31218274111675126, | |
| "grad_norm": 2.3556477849722293, | |
| "learning_rate": 9.858469693617787e-06, | |
| "loss": 0.9124, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3147208121827411, | |
| "grad_norm": 2.574273891936511, | |
| "learning_rate": 9.855213308557618e-06, | |
| "loss": 1.1362, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.31725888324873097, | |
| "grad_norm": 2.954661863407828, | |
| "learning_rate": 9.851920436464146e-06, | |
| "loss": 1.1748, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3197969543147208, | |
| "grad_norm": 2.493459031496384, | |
| "learning_rate": 9.848591102083375e-06, | |
| "loss": 1.1093, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3223350253807107, | |
| "grad_norm": 2.1194534532314098, | |
| "learning_rate": 9.845225330435329e-06, | |
| "loss": 1.0326, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.3248730964467005, | |
| "grad_norm": 3.4687791362456477, | |
| "learning_rate": 9.84182314681385e-06, | |
| "loss": 1.0464, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.32741116751269034, | |
| "grad_norm": 6.5552134087262255, | |
| "learning_rate": 9.838384576786427e-06, | |
| "loss": 1.1425, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.3299492385786802, | |
| "grad_norm": 5.804701269137941, | |
| "learning_rate": 9.834909646193983e-06, | |
| "loss": 1.0323, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.33248730964467005, | |
| "grad_norm": 2.6380372208920697, | |
| "learning_rate": 9.831398381150698e-06, | |
| "loss": 1.0946, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.3350253807106599, | |
| "grad_norm": 2.292877023775666, | |
| "learning_rate": 9.82785080804381e-06, | |
| "loss": 0.883, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.33756345177664976, | |
| "grad_norm": 4.2977395996010435, | |
| "learning_rate": 9.824266953533402e-06, | |
| "loss": 0.9968, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3401015228426396, | |
| "grad_norm": 2.4822533121250254, | |
| "learning_rate": 9.82064684455222e-06, | |
| "loss": 0.9894, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3426395939086294, | |
| "grad_norm": 2.314919940633327, | |
| "learning_rate": 9.816990508305463e-06, | |
| "loss": 0.9332, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.34517766497461927, | |
| "grad_norm": 3.944138805330047, | |
| "learning_rate": 9.813297972270575e-06, | |
| "loss": 1.0342, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3477157360406091, | |
| "grad_norm": 4.083763205392443, | |
| "learning_rate": 9.809569264197046e-06, | |
| "loss": 1.0035, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.350253807106599, | |
| "grad_norm": 2.2341064648864335, | |
| "learning_rate": 9.805804412106197e-06, | |
| "loss": 1.0156, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.35279187817258884, | |
| "grad_norm": 4.73655672514693, | |
| "learning_rate": 9.802003444290975e-06, | |
| "loss": 1.0575, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.3553299492385787, | |
| "grad_norm": 3.6474051487216794, | |
| "learning_rate": 9.798166389315734e-06, | |
| "loss": 1.0331, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.35786802030456855, | |
| "grad_norm": 2.0950970123191235, | |
| "learning_rate": 9.794293276016024e-06, | |
| "loss": 0.8774, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3604060913705584, | |
| "grad_norm": 4.020417271141957, | |
| "learning_rate": 9.79038413349838e-06, | |
| "loss": 1.0517, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3629441624365482, | |
| "grad_norm": 2.343919859731207, | |
| "learning_rate": 9.786438991140086e-06, | |
| "loss": 1.0388, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.36548223350253806, | |
| "grad_norm": 3.5918593063524202, | |
| "learning_rate": 9.782457878588977e-06, | |
| "loss": 1.007, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3680203045685279, | |
| "grad_norm": 2.989308945487366, | |
| "learning_rate": 9.7784408257632e-06, | |
| "loss": 1.1402, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.37055837563451777, | |
| "grad_norm": 3.2999750398100898, | |
| "learning_rate": 9.774387862850993e-06, | |
| "loss": 1.0778, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3730964467005076, | |
| "grad_norm": 2.39286494783793, | |
| "learning_rate": 9.77029902031046e-06, | |
| "loss": 1.0588, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3756345177664975, | |
| "grad_norm": 7.549790221744935, | |
| "learning_rate": 9.766174328869344e-06, | |
| "loss": 1.0188, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.37817258883248733, | |
| "grad_norm": 2.026349609367698, | |
| "learning_rate": 9.762013819524788e-06, | |
| "loss": 0.9754, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.38071065989847713, | |
| "grad_norm": 1.9809815038815315, | |
| "learning_rate": 9.75781752354311e-06, | |
| "loss": 1.1272, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.383248730964467, | |
| "grad_norm": 2.332081936160816, | |
| "learning_rate": 9.753585472459564e-06, | |
| "loss": 1.0715, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.38578680203045684, | |
| "grad_norm": 2.5164597293319217, | |
| "learning_rate": 9.749317698078109e-06, | |
| "loss": 1.1051, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3883248730964467, | |
| "grad_norm": 4.1363517122069435, | |
| "learning_rate": 9.745014232471161e-06, | |
| "loss": 0.9861, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.39086294416243655, | |
| "grad_norm": 4.619566694659651, | |
| "learning_rate": 9.740675107979357e-06, | |
| "loss": 1.0185, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3934010152284264, | |
| "grad_norm": 3.055238311436169, | |
| "learning_rate": 9.736300357211309e-06, | |
| "loss": 1.0023, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.39593908629441626, | |
| "grad_norm": 3.7295880831016373, | |
| "learning_rate": 9.731890013043367e-06, | |
| "loss": 1.0002, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.39847715736040606, | |
| "grad_norm": 3.3508031862079637, | |
| "learning_rate": 9.727444108619365e-06, | |
| "loss": 1.1507, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.4010152284263959, | |
| "grad_norm": 2.723651526969142, | |
| "learning_rate": 9.722962677350367e-06, | |
| "loss": 1.1364, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4035532994923858, | |
| "grad_norm": 3.774699978960873, | |
| "learning_rate": 9.718445752914427e-06, | |
| "loss": 1.1025, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.40609137055837563, | |
| "grad_norm": 2.7659454853714927, | |
| "learning_rate": 9.713893369256334e-06, | |
| "loss": 1.1713, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4086294416243655, | |
| "grad_norm": 2.538756260743843, | |
| "learning_rate": 9.709305560587344e-06, | |
| "loss": 1.0911, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.41116751269035534, | |
| "grad_norm": 4.320782683762776, | |
| "learning_rate": 9.704682361384941e-06, | |
| "loss": 1.1632, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.4137055837563452, | |
| "grad_norm": 4.80159996293837, | |
| "learning_rate": 9.700023806392569e-06, | |
| "loss": 1.0217, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.41624365482233505, | |
| "grad_norm": 2.7673753672207595, | |
| "learning_rate": 9.695329930619368e-06, | |
| "loss": 1.1201, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.41878172588832485, | |
| "grad_norm": 3.104153414839049, | |
| "learning_rate": 9.690600769339916e-06, | |
| "loss": 1.078, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.4213197969543147, | |
| "grad_norm": 7.423796156142243, | |
| "learning_rate": 9.685836358093964e-06, | |
| "loss": 1.1045, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.42385786802030456, | |
| "grad_norm": 2.9797528305128083, | |
| "learning_rate": 9.681036732686165e-06, | |
| "loss": 1.1647, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.4263959390862944, | |
| "grad_norm": 2.851862749531238, | |
| "learning_rate": 9.676201929185809e-06, | |
| "loss": 0.9319, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4289340101522843, | |
| "grad_norm": 2.8454139750098557, | |
| "learning_rate": 9.671331983926548e-06, | |
| "loss": 1.0268, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.43147208121827413, | |
| "grad_norm": 1.9807490940657426, | |
| "learning_rate": 9.666426933506126e-06, | |
| "loss": 1.1493, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.434010152284264, | |
| "grad_norm": 4.858906862885905, | |
| "learning_rate": 9.661486814786104e-06, | |
| "loss": 1.024, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.4365482233502538, | |
| "grad_norm": 2.659416757635679, | |
| "learning_rate": 9.65651166489158e-06, | |
| "loss": 0.971, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.43908629441624364, | |
| "grad_norm": 2.1524399400114764, | |
| "learning_rate": 9.651501521210916e-06, | |
| "loss": 1.0744, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.4416243654822335, | |
| "grad_norm": 5.9861847351599415, | |
| "learning_rate": 9.646456421395447e-06, | |
| "loss": 0.9895, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.44416243654822335, | |
| "grad_norm": 5.818455135086296, | |
| "learning_rate": 9.64137640335921e-06, | |
| "loss": 1.102, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.4467005076142132, | |
| "grad_norm": 3.150237217459513, | |
| "learning_rate": 9.636261505278653e-06, | |
| "loss": 1.0035, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.44923857868020306, | |
| "grad_norm": 2.971416590549205, | |
| "learning_rate": 9.631111765592339e-06, | |
| "loss": 1.0352, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.4517766497461929, | |
| "grad_norm": 2.636645325503754, | |
| "learning_rate": 9.625927223000679e-06, | |
| "loss": 0.9163, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.4543147208121827, | |
| "grad_norm": 3.6938262542992093, | |
| "learning_rate": 9.620707916465622e-06, | |
| "loss": 0.9884, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.45685279187817257, | |
| "grad_norm": 2.0628042177329644, | |
| "learning_rate": 9.615453885210368e-06, | |
| "loss": 1.0689, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4593908629441624, | |
| "grad_norm": 2.490168476448274, | |
| "learning_rate": 9.610165168719079e-06, | |
| "loss": 1.0768, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.4619289340101523, | |
| "grad_norm": 2.931811841708527, | |
| "learning_rate": 9.604841806736572e-06, | |
| "loss": 1.0419, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.46446700507614214, | |
| "grad_norm": 2.678452333514363, | |
| "learning_rate": 9.599483839268027e-06, | |
| "loss": 1.0907, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.467005076142132, | |
| "grad_norm": 2.535226923041242, | |
| "learning_rate": 9.594091306578687e-06, | |
| "loss": 0.9734, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.46954314720812185, | |
| "grad_norm": 2.800305033909715, | |
| "learning_rate": 9.58866424919355e-06, | |
| "loss": 0.9831, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.4720812182741117, | |
| "grad_norm": 2.1777888495427495, | |
| "learning_rate": 9.583202707897075e-06, | |
| "loss": 0.9666, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4746192893401015, | |
| "grad_norm": 2.2069652409303493, | |
| "learning_rate": 9.577706723732858e-06, | |
| "loss": 1.0929, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.47715736040609136, | |
| "grad_norm": 2.9321078227457984, | |
| "learning_rate": 9.572176338003341e-06, | |
| "loss": 1.078, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4796954314720812, | |
| "grad_norm": 2.440269713026083, | |
| "learning_rate": 9.566611592269495e-06, | |
| "loss": 0.9856, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.48223350253807107, | |
| "grad_norm": 2.703608155740871, | |
| "learning_rate": 9.5610125283505e-06, | |
| "loss": 1.1808, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4847715736040609, | |
| "grad_norm": 2.205546763469659, | |
| "learning_rate": 9.555379188323448e-06, | |
| "loss": 1.0306, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.4873096446700508, | |
| "grad_norm": 2.6215335484791007, | |
| "learning_rate": 9.549711614523007e-06, | |
| "loss": 1.0581, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.48984771573604063, | |
| "grad_norm": 3.4288525903852793, | |
| "learning_rate": 9.54400984954112e-06, | |
| "loss": 1.0535, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.49238578680203043, | |
| "grad_norm": 2.2449382072438415, | |
| "learning_rate": 9.538273936226675e-06, | |
| "loss": 1.078, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.4949238578680203, | |
| "grad_norm": 2.10839131093687, | |
| "learning_rate": 9.532503917685179e-06, | |
| "loss": 1.0901, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.49746192893401014, | |
| "grad_norm": 2.6226023542694668, | |
| "learning_rate": 9.526699837278455e-06, | |
| "loss": 1.0362, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 2.233493733259919, | |
| "learning_rate": 9.520861738624288e-06, | |
| "loss": 1.1131, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.5025380710659898, | |
| "grad_norm": 3.472957860340417, | |
| "learning_rate": 9.514989665596114e-06, | |
| "loss": 1.0128, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5050761421319797, | |
| "grad_norm": 3.2276635592510554, | |
| "learning_rate": 9.509083662322697e-06, | |
| "loss": 0.9769, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5076142131979695, | |
| "grad_norm": 4.6768656571054175, | |
| "learning_rate": 9.503143773187773e-06, | |
| "loss": 0.9644, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5101522842639594, | |
| "grad_norm": 2.1315150716253464, | |
| "learning_rate": 9.497170042829737e-06, | |
| "loss": 1.0787, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5126903553299492, | |
| "grad_norm": 3.4880915500896466, | |
| "learning_rate": 9.491162516141308e-06, | |
| "loss": 1.0436, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5152284263959391, | |
| "grad_norm": 5.554999243181942, | |
| "learning_rate": 9.485121238269175e-06, | |
| "loss": 1.1596, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5177664974619289, | |
| "grad_norm": 3.0133431645363062, | |
| "learning_rate": 9.479046254613673e-06, | |
| "loss": 1.0238, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5203045685279187, | |
| "grad_norm": 2.6459316524578638, | |
| "learning_rate": 9.472937610828437e-06, | |
| "loss": 1.1887, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5228426395939086, | |
| "grad_norm": 2.616874115600168, | |
| "learning_rate": 9.466795352820055e-06, | |
| "loss": 1.0816, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5253807106598984, | |
| "grad_norm": 3.745002415078362, | |
| "learning_rate": 9.460619526747732e-06, | |
| "loss": 1.0252, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5279187817258884, | |
| "grad_norm": 5.920555908768616, | |
| "learning_rate": 9.454410179022932e-06, | |
| "loss": 0.9879, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5304568527918782, | |
| "grad_norm": 5.201646897805685, | |
| "learning_rate": 9.448167356309041e-06, | |
| "loss": 1.2475, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.5329949238578681, | |
| "grad_norm": 2.418499819521257, | |
| "learning_rate": 9.441891105521005e-06, | |
| "loss": 0.8801, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5355329949238579, | |
| "grad_norm": 3.1655419423032822, | |
| "learning_rate": 9.435581473824985e-06, | |
| "loss": 1.1705, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5380710659898477, | |
| "grad_norm": 2.3183177705846383, | |
| "learning_rate": 9.429238508638001e-06, | |
| "loss": 1.1676, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5406091370558376, | |
| "grad_norm": 2.9767162225106922, | |
| "learning_rate": 9.422862257627573e-06, | |
| "loss": 1.0033, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5431472081218274, | |
| "grad_norm": 3.438724989341635, | |
| "learning_rate": 9.416452768711367e-06, | |
| "loss": 0.9988, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5456852791878173, | |
| "grad_norm": 2.8923643224417424, | |
| "learning_rate": 9.41001009005683e-06, | |
| "loss": 1.0348, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.5482233502538071, | |
| "grad_norm": 3.9138770051171816, | |
| "learning_rate": 9.40353427008083e-06, | |
| "loss": 1.1145, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.550761421319797, | |
| "grad_norm": 4.634738293590086, | |
| "learning_rate": 9.397025357449298e-06, | |
| "loss": 1.0498, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5532994923857868, | |
| "grad_norm": 2.197412410654519, | |
| "learning_rate": 9.39048340107685e-06, | |
| "loss": 0.9445, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5558375634517766, | |
| "grad_norm": 2.9245714608885307, | |
| "learning_rate": 9.383908450126436e-06, | |
| "loss": 1.1004, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.5583756345177665, | |
| "grad_norm": 4.248925201224667, | |
| "learning_rate": 9.377300554008947e-06, | |
| "loss": 1.2622, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5609137055837563, | |
| "grad_norm": 2.321625730656605, | |
| "learning_rate": 9.370659762382873e-06, | |
| "loss": 1.044, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.5634517766497462, | |
| "grad_norm": 2.345083062827609, | |
| "learning_rate": 9.3639861251539e-06, | |
| "loss": 1.0076, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.565989847715736, | |
| "grad_norm": 5.040868975580229, | |
| "learning_rate": 9.357279692474563e-06, | |
| "loss": 1.0115, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.5685279187817259, | |
| "grad_norm": 4.7793447493473105, | |
| "learning_rate": 9.350540514743844e-06, | |
| "loss": 1.0627, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5710659898477157, | |
| "grad_norm": 4.709717368951021, | |
| "learning_rate": 9.343768642606813e-06, | |
| "loss": 0.9392, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5736040609137056, | |
| "grad_norm": 2.1830206359475723, | |
| "learning_rate": 9.336964126954235e-06, | |
| "loss": 1.0748, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5761421319796954, | |
| "grad_norm": 2.1944035396382704, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 0.9683, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.5786802030456852, | |
| "grad_norm": 2.3010930914999967, | |
| "learning_rate": 9.323257369891702e-06, | |
| "loss": 1.1036, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5812182741116751, | |
| "grad_norm": 2.719578205767353, | |
| "learning_rate": 9.316355231488324e-06, | |
| "loss": 1.0688, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.583756345177665, | |
| "grad_norm": 3.681549142591368, | |
| "learning_rate": 9.309420655581777e-06, | |
| "loss": 1.0006, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5862944162436549, | |
| "grad_norm": 2.279346619615211, | |
| "learning_rate": 9.302453694285549e-06, | |
| "loss": 1.1434, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5888324873096447, | |
| "grad_norm": 2.6059263472191754, | |
| "learning_rate": 9.29545439995651e-06, | |
| "loss": 1.035, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5913705583756346, | |
| "grad_norm": 2.4004634697913225, | |
| "learning_rate": 9.288422825194502e-06, | |
| "loss": 0.8866, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5939086294416244, | |
| "grad_norm": 2.028307692805792, | |
| "learning_rate": 9.281359022841966e-06, | |
| "loss": 0.9655, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5964467005076142, | |
| "grad_norm": 3.94892590556114, | |
| "learning_rate": 9.274263045983529e-06, | |
| "loss": 1.0461, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5989847715736041, | |
| "grad_norm": 2.5639365559174188, | |
| "learning_rate": 9.267134947945611e-06, | |
| "loss": 1.1191, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6015228426395939, | |
| "grad_norm": 2.5771861483758975, | |
| "learning_rate": 9.259974782296023e-06, | |
| "loss": 1.1159, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6040609137055838, | |
| "grad_norm": 4.259283248370748, | |
| "learning_rate": 9.252782602843565e-06, | |
| "loss": 1.0164, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6065989847715736, | |
| "grad_norm": 1.9739014796831083, | |
| "learning_rate": 9.245558463637623e-06, | |
| "loss": 0.9955, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6091370558375635, | |
| "grad_norm": 2.160593531593977, | |
| "learning_rate": 9.238302418967757e-06, | |
| "loss": 1.0866, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6116751269035533, | |
| "grad_norm": 2.648999822170712, | |
| "learning_rate": 9.231014523363303e-06, | |
| "loss": 1.0848, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6142131979695431, | |
| "grad_norm": 3.7793807875511805, | |
| "learning_rate": 9.223694831592953e-06, | |
| "loss": 1.0154, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.616751269035533, | |
| "grad_norm": 3.092399515241672, | |
| "learning_rate": 9.216343398664349e-06, | |
| "loss": 1.0185, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.6192893401015228, | |
| "grad_norm": 3.202644866882686, | |
| "learning_rate": 9.208960279823672e-06, | |
| "loss": 1.1874, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6218274111675127, | |
| "grad_norm": 2.0162635261279416, | |
| "learning_rate": 9.201545530555214e-06, | |
| "loss": 1.115, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6243654822335025, | |
| "grad_norm": 5.598301005765664, | |
| "learning_rate": 9.194099206580981e-06, | |
| "loss": 1.2179, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6269035532994924, | |
| "grad_norm": 2.22095674856811, | |
| "learning_rate": 9.18662136386026e-06, | |
| "loss": 0.9959, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6294416243654822, | |
| "grad_norm": 3.581389026739919, | |
| "learning_rate": 9.1791120585892e-06, | |
| "loss": 1.1269, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.631979695431472, | |
| "grad_norm": 5.445257086874675, | |
| "learning_rate": 9.171571347200392e-06, | |
| "loss": 1.2296, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6345177664974619, | |
| "grad_norm": 2.24392675323956, | |
| "learning_rate": 9.163999286362445e-06, | |
| "loss": 1.0254, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6370558375634517, | |
| "grad_norm": 4.167042508379098, | |
| "learning_rate": 9.156395932979563e-06, | |
| "loss": 1.1748, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6395939086294417, | |
| "grad_norm": 2.5891911677793664, | |
| "learning_rate": 9.14876134419111e-06, | |
| "loss": 1.1466, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6421319796954315, | |
| "grad_norm": 2.994985140303232, | |
| "learning_rate": 9.141095577371185e-06, | |
| "loss": 0.9818, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.6446700507614214, | |
| "grad_norm": 2.3912668038646854, | |
| "learning_rate": 9.133398690128194e-06, | |
| "loss": 1.0164, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6472081218274112, | |
| "grad_norm": 2.1809817266204106, | |
| "learning_rate": 9.125670740304412e-06, | |
| "loss": 1.0514, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.649746192893401, | |
| "grad_norm": 3.490749098234336, | |
| "learning_rate": 9.117911785975548e-06, | |
| "loss": 1.0449, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6522842639593909, | |
| "grad_norm": 4.203566340194239, | |
| "learning_rate": 9.110121885450311e-06, | |
| "loss": 1.1813, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.6548223350253807, | |
| "grad_norm": 3.196719647899695, | |
| "learning_rate": 9.102301097269974e-06, | |
| "loss": 1.0271, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6573604060913706, | |
| "grad_norm": 3.076073531440758, | |
| "learning_rate": 9.094449480207933e-06, | |
| "loss": 1.036, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.6598984771573604, | |
| "grad_norm": 5.182177678783312, | |
| "learning_rate": 9.086567093269253e-06, | |
| "loss": 1.1141, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6624365482233503, | |
| "grad_norm": 2.357962719234063, | |
| "learning_rate": 9.078653995690248e-06, | |
| "loss": 1.0862, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.6649746192893401, | |
| "grad_norm": 2.0784516657027297, | |
| "learning_rate": 9.070710246938017e-06, | |
| "loss": 0.9983, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6675126903553299, | |
| "grad_norm": 2.5130820726794227, | |
| "learning_rate": 9.062735906710004e-06, | |
| "loss": 1.0241, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.6700507614213198, | |
| "grad_norm": 2.229905635017876, | |
| "learning_rate": 9.05473103493355e-06, | |
| "loss": 1.012, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6725888324873096, | |
| "grad_norm": 4.327860759002616, | |
| "learning_rate": 9.046695691765436e-06, | |
| "loss": 1.062, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.6751269035532995, | |
| "grad_norm": 3.72671924192454, | |
| "learning_rate": 9.038629937591445e-06, | |
| "loss": 1.1112, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6776649746192893, | |
| "grad_norm": 3.125309450876512, | |
| "learning_rate": 9.03053383302589e-06, | |
| "loss": 1.0586, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.6802030456852792, | |
| "grad_norm": 2.5688387996257704, | |
| "learning_rate": 9.022407438911177e-06, | |
| "loss": 1.1105, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.682741116751269, | |
| "grad_norm": 3.1366648310116125, | |
| "learning_rate": 9.01425081631733e-06, | |
| "loss": 0.9882, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.6852791878172588, | |
| "grad_norm": 4.202684164859739, | |
| "learning_rate": 9.006064026541549e-06, | |
| "loss": 1.016, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6878172588832487, | |
| "grad_norm": 2.7148085939237383, | |
| "learning_rate": 8.997847131107731e-06, | |
| "loss": 1.0835, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.6903553299492385, | |
| "grad_norm": 2.9688800803660285, | |
| "learning_rate": 8.989600191766028e-06, | |
| "loss": 1.0059, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6928934010152284, | |
| "grad_norm": 3.2613550555880595, | |
| "learning_rate": 8.981323270492367e-06, | |
| "loss": 1.0845, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.6954314720812182, | |
| "grad_norm": 6.7618332493465765, | |
| "learning_rate": 8.973016429487989e-06, | |
| "loss": 1.1204, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6979695431472082, | |
| "grad_norm": 3.994336467537423, | |
| "learning_rate": 8.964679731178984e-06, | |
| "loss": 0.9889, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.700507614213198, | |
| "grad_norm": 2.9602083850724146, | |
| "learning_rate": 8.956313238215824e-06, | |
| "loss": 1.01, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.7030456852791879, | |
| "grad_norm": 2.36727770135421, | |
| "learning_rate": 8.947917013472885e-06, | |
| "loss": 1.14, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.7055837563451777, | |
| "grad_norm": 2.875102362311937, | |
| "learning_rate": 8.939491120047974e-06, | |
| "loss": 1.0227, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7081218274111675, | |
| "grad_norm": 2.3483599719919086, | |
| "learning_rate": 8.931035621261865e-06, | |
| "loss": 1.0908, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7106598984771574, | |
| "grad_norm": 3.5100771689299584, | |
| "learning_rate": 8.922550580657816e-06, | |
| "loss": 0.9979, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7131979695431472, | |
| "grad_norm": 2.403015013570231, | |
| "learning_rate": 8.914036062001089e-06, | |
| "loss": 1.0621, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.7157360406091371, | |
| "grad_norm": 3.7415825640690956, | |
| "learning_rate": 8.905492129278478e-06, | |
| "loss": 1.0554, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7182741116751269, | |
| "grad_norm": 3.4701854151842237, | |
| "learning_rate": 8.896918846697822e-06, | |
| "loss": 0.9954, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.7208121827411168, | |
| "grad_norm": 2.984814537688729, | |
| "learning_rate": 8.888316278687526e-06, | |
| "loss": 0.9768, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7233502538071066, | |
| "grad_norm": 3.943831792187661, | |
| "learning_rate": 8.879684489896073e-06, | |
| "loss": 0.9808, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7258883248730964, | |
| "grad_norm": 3.0862729355784553, | |
| "learning_rate": 8.871023545191547e-06, | |
| "loss": 0.998, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7284263959390863, | |
| "grad_norm": 3.4902562190166386, | |
| "learning_rate": 8.862333509661129e-06, | |
| "loss": 1.063, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7309644670050761, | |
| "grad_norm": 3.519136766999423, | |
| "learning_rate": 8.85361444861063e-06, | |
| "loss": 1.1137, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.733502538071066, | |
| "grad_norm": 3.439803607907018, | |
| "learning_rate": 8.844866427563983e-06, | |
| "loss": 1.029, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.7360406091370558, | |
| "grad_norm": 4.0784940602279836, | |
| "learning_rate": 8.836089512262753e-06, | |
| "loss": 0.997, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7385786802030457, | |
| "grad_norm": 3.4814302353846798, | |
| "learning_rate": 8.82728376866565e-06, | |
| "loss": 1.2135, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.7411167512690355, | |
| "grad_norm": 3.6326143786283467, | |
| "learning_rate": 8.818449262948028e-06, | |
| "loss": 1.0662, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7436548223350253, | |
| "grad_norm": 2.7030639665664666, | |
| "learning_rate": 8.80958606150139e-06, | |
| "loss": 1.1053, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.7461928934010152, | |
| "grad_norm": 6.2321761427881155, | |
| "learning_rate": 8.800694230932885e-06, | |
| "loss": 1.0579, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.748730964467005, | |
| "grad_norm": 4.294180075723966, | |
| "learning_rate": 8.791773838064812e-06, | |
| "loss": 1.0612, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.751269035532995, | |
| "grad_norm": 3.8269300062055196, | |
| "learning_rate": 8.78282494993412e-06, | |
| "loss": 0.9201, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7538071065989848, | |
| "grad_norm": 2.3404396349951706, | |
| "learning_rate": 8.773847633791897e-06, | |
| "loss": 1.017, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.7563451776649747, | |
| "grad_norm": 3.8128462739169495, | |
| "learning_rate": 8.764841957102866e-06, | |
| "loss": 1.0252, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7588832487309645, | |
| "grad_norm": 2.287311243612682, | |
| "learning_rate": 8.755807987544884e-06, | |
| "loss": 1.1611, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.7614213197969543, | |
| "grad_norm": 3.7690746307525744, | |
| "learning_rate": 8.74674579300843e-06, | |
| "loss": 0.9457, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7639593908629442, | |
| "grad_norm": 2.048134697904296, | |
| "learning_rate": 8.737655441596088e-06, | |
| "loss": 0.9724, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.766497461928934, | |
| "grad_norm": 2.566215053585969, | |
| "learning_rate": 8.72853700162205e-06, | |
| "loss": 0.9773, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.7690355329949239, | |
| "grad_norm": 2.151885805636772, | |
| "learning_rate": 8.71939054161159e-06, | |
| "loss": 1.0774, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.7715736040609137, | |
| "grad_norm": 3.313132981406172, | |
| "learning_rate": 8.710216130300551e-06, | |
| "loss": 1.1324, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.7741116751269036, | |
| "grad_norm": 2.056935287783317, | |
| "learning_rate": 8.701013836634833e-06, | |
| "loss": 1.0126, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.7766497461928934, | |
| "grad_norm": 5.346543217053909, | |
| "learning_rate": 8.691783729769874e-06, | |
| "loss": 0.9938, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.7791878172588832, | |
| "grad_norm": 2.9607570187514707, | |
| "learning_rate": 8.682525879070126e-06, | |
| "loss": 1.1109, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.7817258883248731, | |
| "grad_norm": 2.861744851184498, | |
| "learning_rate": 8.673240354108539e-06, | |
| "loss": 0.9496, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.7842639593908629, | |
| "grad_norm": 2.58606470902963, | |
| "learning_rate": 8.663927224666034e-06, | |
| "loss": 0.9539, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.7868020304568528, | |
| "grad_norm": 2.109484594420891, | |
| "learning_rate": 8.654586560730981e-06, | |
| "loss": 1.125, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7893401015228426, | |
| "grad_norm": 4.024325632708077, | |
| "learning_rate": 8.645218432498673e-06, | |
| "loss": 1.0123, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.7918781725888325, | |
| "grad_norm": 2.597662979848648, | |
| "learning_rate": 8.635822910370793e-06, | |
| "loss": 1.1692, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.7944162436548223, | |
| "grad_norm": 2.2166134294153075, | |
| "learning_rate": 8.626400064954897e-06, | |
| "loss": 0.9594, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.7969543147208121, | |
| "grad_norm": 2.0221730706708785, | |
| "learning_rate": 8.616949967063871e-06, | |
| "loss": 0.8921, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.799492385786802, | |
| "grad_norm": 2.8323976488528055, | |
| "learning_rate": 8.607472687715408e-06, | |
| "loss": 1.0321, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.8020304568527918, | |
| "grad_norm": 2.84440132443287, | |
| "learning_rate": 8.597968298131464e-06, | |
| "loss": 1.0595, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8045685279187818, | |
| "grad_norm": 2.320484455637419, | |
| "learning_rate": 8.588436869737737e-06, | |
| "loss": 1.0101, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.8071065989847716, | |
| "grad_norm": 2.4042485235745965, | |
| "learning_rate": 8.578878474163115e-06, | |
| "loss": 1.025, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.8096446700507615, | |
| "grad_norm": 2.2100438666281783, | |
| "learning_rate": 8.56929318323915e-06, | |
| "loss": 0.8702, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.8121827411167513, | |
| "grad_norm": 2.600141765954614, | |
| "learning_rate": 8.559681068999509e-06, | |
| "loss": 1.042, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8147208121827412, | |
| "grad_norm": 3.312136758797296, | |
| "learning_rate": 8.550042203679441e-06, | |
| "loss": 1.0783, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.817258883248731, | |
| "grad_norm": 2.355781755993528, | |
| "learning_rate": 8.540376659715226e-06, | |
| "loss": 1.121, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8197969543147208, | |
| "grad_norm": 2.033914941407007, | |
| "learning_rate": 8.530684509743639e-06, | |
| "loss": 0.9748, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.8223350253807107, | |
| "grad_norm": 4.43870060560367, | |
| "learning_rate": 8.520965826601394e-06, | |
| "loss": 1.1324, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8248730964467005, | |
| "grad_norm": 2.2923724345059813, | |
| "learning_rate": 8.511220683324608e-06, | |
| "loss": 0.918, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.8274111675126904, | |
| "grad_norm": 2.2068567850563228, | |
| "learning_rate": 8.501449153148243e-06, | |
| "loss": 0.9338, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8299492385786802, | |
| "grad_norm": 2.589213829583448, | |
| "learning_rate": 8.491651309505562e-06, | |
| "loss": 1.112, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.8324873096446701, | |
| "grad_norm": 2.1974385541047656, | |
| "learning_rate": 8.48182722602757e-06, | |
| "loss": 0.8901, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.8350253807106599, | |
| "grad_norm": 7.061053266476324, | |
| "learning_rate": 8.47197697654247e-06, | |
| "loss": 1.0895, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.8375634517766497, | |
| "grad_norm": 3.0310872002885847, | |
| "learning_rate": 8.462100635075097e-06, | |
| "loss": 0.9703, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8401015228426396, | |
| "grad_norm": 2.298254053197544, | |
| "learning_rate": 8.452198275846372e-06, | |
| "loss": 1.0766, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.8426395939086294, | |
| "grad_norm": 2.942514438530551, | |
| "learning_rate": 8.442269973272743e-06, | |
| "loss": 1.1541, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8451776649746193, | |
| "grad_norm": 2.0603391728533573, | |
| "learning_rate": 8.432315801965616e-06, | |
| "loss": 1.0356, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.8477157360406091, | |
| "grad_norm": 2.34696517553944, | |
| "learning_rate": 8.422335836730804e-06, | |
| "loss": 0.9852, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.850253807106599, | |
| "grad_norm": 4.403994489595826, | |
| "learning_rate": 8.412330152567965e-06, | |
| "loss": 1.1587, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.8527918781725888, | |
| "grad_norm": 3.7915353107441376, | |
| "learning_rate": 8.40229882467003e-06, | |
| "loss": 1.0281, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.8553299492385786, | |
| "grad_norm": 2.1893832327328115, | |
| "learning_rate": 8.392241928422644e-06, | |
| "loss": 1.1753, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.8578680203045685, | |
| "grad_norm": 3.6665615611814615, | |
| "learning_rate": 8.382159539403605e-06, | |
| "loss": 0.9433, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.8604060913705583, | |
| "grad_norm": 5.388123821372951, | |
| "learning_rate": 8.372051733382283e-06, | |
| "loss": 0.9838, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.8629441624365483, | |
| "grad_norm": 2.2459723217939045, | |
| "learning_rate": 8.361918586319058e-06, | |
| "loss": 0.9915, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8654822335025381, | |
| "grad_norm": 4.969255505433864, | |
| "learning_rate": 8.351760174364752e-06, | |
| "loss": 1.0536, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.868020304568528, | |
| "grad_norm": 2.1409299322309177, | |
| "learning_rate": 8.341576573860049e-06, | |
| "loss": 1.0697, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.8705583756345178, | |
| "grad_norm": 2.28979733193756, | |
| "learning_rate": 8.331367861334928e-06, | |
| "loss": 0.9796, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.8730964467005076, | |
| "grad_norm": 2.215159852607918, | |
| "learning_rate": 8.321134113508089e-06, | |
| "loss": 0.9487, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.8756345177664975, | |
| "grad_norm": 7.098608089968907, | |
| "learning_rate": 8.310875407286364e-06, | |
| "loss": 0.9679, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.8781725888324873, | |
| "grad_norm": 2.494977976840313, | |
| "learning_rate": 8.300591819764155e-06, | |
| "loss": 1.0262, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.8807106598984772, | |
| "grad_norm": 4.378236657118699, | |
| "learning_rate": 8.290283428222842e-06, | |
| "loss": 0.9634, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.883248730964467, | |
| "grad_norm": 2.5357774690099433, | |
| "learning_rate": 8.279950310130218e-06, | |
| "loss": 1.1934, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.8857868020304569, | |
| "grad_norm": 2.4838144435457172, | |
| "learning_rate": 8.269592543139883e-06, | |
| "loss": 1.1536, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.8883248730964467, | |
| "grad_norm": 4.7088721852127815, | |
| "learning_rate": 8.259210205090683e-06, | |
| "loss": 1.0596, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8908629441624365, | |
| "grad_norm": 2.3904986847786875, | |
| "learning_rate": 8.248803374006113e-06, | |
| "loss": 0.9723, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.8934010152284264, | |
| "grad_norm": 3.0593901416640823, | |
| "learning_rate": 8.238372128093736e-06, | |
| "loss": 1.0769, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.8959390862944162, | |
| "grad_norm": 5.24826281940527, | |
| "learning_rate": 8.227916545744588e-06, | |
| "loss": 1.0171, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.8984771573604061, | |
| "grad_norm": 2.9561545208571394, | |
| "learning_rate": 8.2174367055326e-06, | |
| "loss": 1.0627, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.9010152284263959, | |
| "grad_norm": 4.302396818897092, | |
| "learning_rate": 8.206932686213998e-06, | |
| "loss": 1.1551, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.9035532994923858, | |
| "grad_norm": 4.862418884348732, | |
| "learning_rate": 8.196404566726712e-06, | |
| "loss": 0.9596, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.9060913705583756, | |
| "grad_norm": 3.2420027025969103, | |
| "learning_rate": 8.185852426189794e-06, | |
| "loss": 1.0267, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.9086294416243654, | |
| "grad_norm": 3.376880772597684, | |
| "learning_rate": 8.175276343902802e-06, | |
| "loss": 0.947, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9111675126903553, | |
| "grad_norm": 4.603667399163658, | |
| "learning_rate": 8.16467639934523e-06, | |
| "loss": 0.9864, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.9137055837563451, | |
| "grad_norm": 2.9576724755101678, | |
| "learning_rate": 8.154052672175888e-06, | |
| "loss": 1.0051, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.916243654822335, | |
| "grad_norm": 4.320905650850623, | |
| "learning_rate": 8.143405242232317e-06, | |
| "loss": 0.998, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.9187817258883249, | |
| "grad_norm": 5.449431302042425, | |
| "learning_rate": 8.132734189530182e-06, | |
| "loss": 0.8851, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.9213197969543148, | |
| "grad_norm": 2.1253363817718136, | |
| "learning_rate": 8.122039594262679e-06, | |
| "loss": 0.8947, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.9238578680203046, | |
| "grad_norm": 3.187675631861296, | |
| "learning_rate": 8.111321536799921e-06, | |
| "loss": 1.0377, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.9263959390862944, | |
| "grad_norm": 2.022120835826271, | |
| "learning_rate": 8.100580097688342e-06, | |
| "loss": 1.0793, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.9289340101522843, | |
| "grad_norm": 2.645049531416873, | |
| "learning_rate": 8.08981535765009e-06, | |
| "loss": 0.9978, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.9314720812182741, | |
| "grad_norm": 4.212018855531683, | |
| "learning_rate": 8.07902739758242e-06, | |
| "loss": 1.0244, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.934010152284264, | |
| "grad_norm": 5.9126393432243916, | |
| "learning_rate": 8.068216298557088e-06, | |
| "loss": 0.9787, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9365482233502538, | |
| "grad_norm": 2.2116674048323857, | |
| "learning_rate": 8.057382141819734e-06, | |
| "loss": 0.9862, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.9390862944162437, | |
| "grad_norm": 4.548662027137182, | |
| "learning_rate": 8.046525008789283e-06, | |
| "loss": 0.9965, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9416243654822335, | |
| "grad_norm": 2.7198427380964105, | |
| "learning_rate": 8.035644981057327e-06, | |
| "loss": 1.174, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.9441624365482234, | |
| "grad_norm": 1.8298070803821809, | |
| "learning_rate": 8.024742140387506e-06, | |
| "loss": 0.8985, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.9467005076142132, | |
| "grad_norm": 3.1445038648413948, | |
| "learning_rate": 8.013816568714905e-06, | |
| "loss": 0.9881, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.949238578680203, | |
| "grad_norm": 2.9123896714012103, | |
| "learning_rate": 8.002868348145436e-06, | |
| "loss": 1.2471, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.9517766497461929, | |
| "grad_norm": 3.9489207704815428, | |
| "learning_rate": 7.99189756095521e-06, | |
| "loss": 1.0102, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.9543147208121827, | |
| "grad_norm": 2.317711292143962, | |
| "learning_rate": 7.980904289589932e-06, | |
| "loss": 1.153, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.9568527918781726, | |
| "grad_norm": 2.285220370350821, | |
| "learning_rate": 7.969888616664275e-06, | |
| "loss": 1.1004, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.9593908629441624, | |
| "grad_norm": 4.639698727940778, | |
| "learning_rate": 7.95885062496126e-06, | |
| "loss": 1.215, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.9619289340101523, | |
| "grad_norm": 2.187854846843315, | |
| "learning_rate": 7.947790397431631e-06, | |
| "loss": 1.0363, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.9644670050761421, | |
| "grad_norm": 2.087236809552686, | |
| "learning_rate": 7.936708017193242e-06, | |
| "loss": 1.093, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9670050761421319, | |
| "grad_norm": 2.47349126593795, | |
| "learning_rate": 7.92560356753042e-06, | |
| "loss": 0.994, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.9695431472081218, | |
| "grad_norm": 5.206032659383499, | |
| "learning_rate": 7.914477131893344e-06, | |
| "loss": 1.1066, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.9720812182741116, | |
| "grad_norm": 4.546492297905296, | |
| "learning_rate": 7.903328793897418e-06, | |
| "loss": 1.0431, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.9746192893401016, | |
| "grad_norm": 3.288068386496405, | |
| "learning_rate": 7.892158637322647e-06, | |
| "loss": 1.147, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.9771573604060914, | |
| "grad_norm": 5.711480442784147, | |
| "learning_rate": 7.880966746112995e-06, | |
| "loss": 1.0171, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.9796954314720813, | |
| "grad_norm": 3.988727515802901, | |
| "learning_rate": 7.869753204375772e-06, | |
| "loss": 0.8908, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.9822335025380711, | |
| "grad_norm": 3.060563447708997, | |
| "learning_rate": 7.858518096380984e-06, | |
| "loss": 0.9856, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.9847715736040609, | |
| "grad_norm": 2.4682459956195637, | |
| "learning_rate": 7.847261506560716e-06, | |
| "loss": 1.0148, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.9873096446700508, | |
| "grad_norm": 5.223626547133224, | |
| "learning_rate": 7.835983519508477e-06, | |
| "loss": 1.0348, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.9898477157360406, | |
| "grad_norm": 4.123284868828342, | |
| "learning_rate": 7.824684219978591e-06, | |
| "loss": 1.1459, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.9923857868020305, | |
| "grad_norm": 4.014923037363868, | |
| "learning_rate": 7.813363692885535e-06, | |
| "loss": 0.9656, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.9949238578680203, | |
| "grad_norm": 3.8267198551637507, | |
| "learning_rate": 7.802022023303319e-06, | |
| "loss": 1.0511, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.9974619289340102, | |
| "grad_norm": 3.328421605030423, | |
| "learning_rate": 7.790659296464833e-06, | |
| "loss": 1.1031, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.7683208975790343, | |
| "learning_rate": 7.779275597761215e-06, | |
| "loss": 1.0182, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.00253807106599, | |
| "grad_norm": 3.0484171116238525, | |
| "learning_rate": 7.76787101274121e-06, | |
| "loss": 0.7197, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.0050761421319796, | |
| "grad_norm": 2.659217933312505, | |
| "learning_rate": 7.756445627110523e-06, | |
| "loss": 0.8507, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.0076142131979695, | |
| "grad_norm": 2.0241729265622515, | |
| "learning_rate": 7.74499952673117e-06, | |
| "loss": 0.711, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.0101522842639594, | |
| "grad_norm": 2.7605289419713666, | |
| "learning_rate": 7.733532797620849e-06, | |
| "loss": 0.9235, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.0126903553299493, | |
| "grad_norm": 4.332032153367687, | |
| "learning_rate": 7.722045525952272e-06, | |
| "loss": 0.8808, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.015228426395939, | |
| "grad_norm": 2.8080304014227746, | |
| "learning_rate": 7.71053779805254e-06, | |
| "loss": 0.7194, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.017766497461929, | |
| "grad_norm": 3.161578778385084, | |
| "learning_rate": 7.699009700402476e-06, | |
| "loss": 0.7822, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.0203045685279188, | |
| "grad_norm": 2.988252242343333, | |
| "learning_rate": 7.68746131963598e-06, | |
| "loss": 0.7483, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.0228426395939085, | |
| "grad_norm": 2.888286673140523, | |
| "learning_rate": 7.675892742539392e-06, | |
| "loss": 0.8125, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.0253807106598984, | |
| "grad_norm": 2.2435180847004577, | |
| "learning_rate": 7.664304056050813e-06, | |
| "loss": 0.6922, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.0279187817258884, | |
| "grad_norm": 3.198583049737055, | |
| "learning_rate": 7.652695347259476e-06, | |
| "loss": 0.8004, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.0304568527918783, | |
| "grad_norm": 3.2992117950438185, | |
| "learning_rate": 7.641066703405076e-06, | |
| "loss": 0.8566, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.032994923857868, | |
| "grad_norm": 4.463047095431445, | |
| "learning_rate": 7.629418211877129e-06, | |
| "loss": 0.6083, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.0355329949238579, | |
| "grad_norm": 3.0485877960149477, | |
| "learning_rate": 7.6177499602143e-06, | |
| "loss": 0.6715, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.0380710659898478, | |
| "grad_norm": 2.7853627353229062, | |
| "learning_rate": 7.6060620361037495e-06, | |
| "loss": 0.6847, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.0406091370558375, | |
| "grad_norm": 2.519657419339155, | |
| "learning_rate": 7.594354527380485e-06, | |
| "loss": 0.846, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.0431472081218274, | |
| "grad_norm": 2.8052149935489137, | |
| "learning_rate": 7.582627522026686e-06, | |
| "loss": 0.7273, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.0456852791878173, | |
| "grad_norm": 6.754274818674537, | |
| "learning_rate": 7.5708811081710535e-06, | |
| "loss": 0.6285, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.0482233502538072, | |
| "grad_norm": 3.6424686022316433, | |
| "learning_rate": 7.55911537408814e-06, | |
| "loss": 0.6845, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.0507614213197969, | |
| "grad_norm": 2.7139182825484487, | |
| "learning_rate": 7.547330408197695e-06, | |
| "loss": 0.6806, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.0532994923857868, | |
| "grad_norm": 8.510738024789926, | |
| "learning_rate": 7.535526299063991e-06, | |
| "loss": 0.7512, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.0558375634517767, | |
| "grad_norm": 4.04200237479796, | |
| "learning_rate": 7.523703135395166e-06, | |
| "loss": 0.7041, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.0583756345177664, | |
| "grad_norm": 2.260811025516907, | |
| "learning_rate": 7.511861006042549e-06, | |
| "loss": 0.6766, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.0609137055837563, | |
| "grad_norm": 4.490561125470434, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.7141, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.0634517766497462, | |
| "grad_norm": 2.194578251618841, | |
| "learning_rate": 7.488120206403238e-06, | |
| "loss": 0.659, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.0659898477157361, | |
| "grad_norm": 2.6947953262722506, | |
| "learning_rate": 7.476221714529167e-06, | |
| "loss": 0.7283, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.0685279187817258, | |
| "grad_norm": 2.671286895370636, | |
| "learning_rate": 7.4643046137952135e-06, | |
| "loss": 0.7255, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.0710659898477157, | |
| "grad_norm": 4.777736320375133, | |
| "learning_rate": 7.452368993758646e-06, | |
| "loss": 0.6922, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.0736040609137056, | |
| "grad_norm": 2.839467923831252, | |
| "learning_rate": 7.440414944115909e-06, | |
| "loss": 0.708, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.0761421319796955, | |
| "grad_norm": 4.561346498567163, | |
| "learning_rate": 7.428442554701945e-06, | |
| "loss": 0.7903, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.0786802030456852, | |
| "grad_norm": 2.1485750563974624, | |
| "learning_rate": 7.416451915489521e-06, | |
| "loss": 0.7441, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.0812182741116751, | |
| "grad_norm": 3.8074935327749015, | |
| "learning_rate": 7.404443116588548e-06, | |
| "loss": 0.7575, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.083756345177665, | |
| "grad_norm": 2.4065976714180293, | |
| "learning_rate": 7.392416248245412e-06, | |
| "loss": 0.7296, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.0862944162436547, | |
| "grad_norm": 4.492198367779481, | |
| "learning_rate": 7.38037140084229e-06, | |
| "loss": 0.7928, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.0888324873096447, | |
| "grad_norm": 3.2332302381426707, | |
| "learning_rate": 7.368308664896471e-06, | |
| "loss": 0.7579, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.0913705583756346, | |
| "grad_norm": 6.182125255230219, | |
| "learning_rate": 7.356228131059675e-06, | |
| "loss": 0.6711, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.0939086294416245, | |
| "grad_norm": 2.73756664858534, | |
| "learning_rate": 7.344129890117377e-06, | |
| "loss": 0.7056, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.0964467005076142, | |
| "grad_norm": 2.1816031611580367, | |
| "learning_rate": 7.332014032988123e-06, | |
| "loss": 0.8818, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.098984771573604, | |
| "grad_norm": 2.1402473991159288, | |
| "learning_rate": 7.319880650722838e-06, | |
| "loss": 0.7043, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.101522842639594, | |
| "grad_norm": 4.3141992706587535, | |
| "learning_rate": 7.307729834504155e-06, | |
| "loss": 0.672, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.1040609137055837, | |
| "grad_norm": 2.5718559492040494, | |
| "learning_rate": 7.29556167564572e-06, | |
| "loss": 0.7288, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.1065989847715736, | |
| "grad_norm": 2.8145101788480473, | |
| "learning_rate": 7.283376265591514e-06, | |
| "loss": 0.7154, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.1091370558375635, | |
| "grad_norm": 3.3920536101786327, | |
| "learning_rate": 7.271173695915154e-06, | |
| "loss": 0.6787, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.1116751269035534, | |
| "grad_norm": 3.5945284430541182, | |
| "learning_rate": 7.2589540583192165e-06, | |
| "loss": 0.9233, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.114213197969543, | |
| "grad_norm": 4.886400955382691, | |
| "learning_rate": 7.2467174446345435e-06, | |
| "loss": 0.6844, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.116751269035533, | |
| "grad_norm": 4.600149769508583, | |
| "learning_rate": 7.234463946819553e-06, | |
| "loss": 0.7041, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.119289340101523, | |
| "grad_norm": 4.014196576676073, | |
| "learning_rate": 7.222193656959546e-06, | |
| "loss": 0.6791, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.1218274111675126, | |
| "grad_norm": 2.5402480546901773, | |
| "learning_rate": 7.209906667266018e-06, | |
| "loss": 0.9044, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.1243654822335025, | |
| "grad_norm": 3.1380754969776885, | |
| "learning_rate": 7.19760307007596e-06, | |
| "loss": 0.743, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.1269035532994924, | |
| "grad_norm": 4.388883522170381, | |
| "learning_rate": 7.185282957851175e-06, | |
| "loss": 0.7518, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.1294416243654823, | |
| "grad_norm": 2.3881650297375043, | |
| "learning_rate": 7.172946423177574e-06, | |
| "loss": 0.7223, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.131979695431472, | |
| "grad_norm": 5.0576028219200575, | |
| "learning_rate": 7.160593558764477e-06, | |
| "loss": 0.7502, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.134517766497462, | |
| "grad_norm": 4.182627482293584, | |
| "learning_rate": 7.148224457443933e-06, | |
| "loss": 0.7233, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.1370558375634519, | |
| "grad_norm": 3.323927436904356, | |
| "learning_rate": 7.135839212170008e-06, | |
| "loss": 0.7562, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.1395939086294415, | |
| "grad_norm": 5.674572111954262, | |
| "learning_rate": 7.123437916018084e-06, | |
| "loss": 0.6563, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.1421319796954315, | |
| "grad_norm": 5.193844514646635, | |
| "learning_rate": 7.111020662184174e-06, | |
| "loss": 0.6701, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.1446700507614214, | |
| "grad_norm": 2.745047805120342, | |
| "learning_rate": 7.098587543984208e-06, | |
| "loss": 0.6504, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.1472081218274113, | |
| "grad_norm": 5.2424783860025475, | |
| "learning_rate": 7.086138654853339e-06, | |
| "loss": 0.7568, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.149746192893401, | |
| "grad_norm": 2.525028908338044, | |
| "learning_rate": 7.073674088345239e-06, | |
| "loss": 0.7756, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.1522842639593909, | |
| "grad_norm": 3.94906824829304, | |
| "learning_rate": 7.061193938131397e-06, | |
| "loss": 0.7004, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.1548223350253808, | |
| "grad_norm": 3.9522980746979632, | |
| "learning_rate": 7.048698298000411e-06, | |
| "loss": 0.8008, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.1573604060913705, | |
| "grad_norm": 3.3433822313720345, | |
| "learning_rate": 7.036187261857289e-06, | |
| "loss": 0.7565, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.1598984771573604, | |
| "grad_norm": 2.786840629093061, | |
| "learning_rate": 7.023660923722737e-06, | |
| "loss": 0.7161, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.1624365482233503, | |
| "grad_norm": 3.054413975453938, | |
| "learning_rate": 7.011119377732459e-06, | |
| "loss": 0.7952, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.1649746192893402, | |
| "grad_norm": 3.8923977302485726, | |
| "learning_rate": 6.998562718136445e-06, | |
| "loss": 0.7485, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.16751269035533, | |
| "grad_norm": 2.84562510967333, | |
| "learning_rate": 6.985991039298263e-06, | |
| "loss": 0.7196, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.1700507614213198, | |
| "grad_norm": 2.4183432989182294, | |
| "learning_rate": 6.973404435694353e-06, | |
| "loss": 0.7646, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.1725888324873097, | |
| "grad_norm": 2.7200567540717877, | |
| "learning_rate": 6.960803001913315e-06, | |
| "loss": 0.7, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.1751269035532994, | |
| "grad_norm": 3.63959643248371, | |
| "learning_rate": 6.948186832655195e-06, | |
| "loss": 0.7266, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.1776649746192893, | |
| "grad_norm": 3.2936479977582365, | |
| "learning_rate": 6.93555602273078e-06, | |
| "loss": 0.7935, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.1802030456852792, | |
| "grad_norm": 5.592071642497226, | |
| "learning_rate": 6.922910667060881e-06, | |
| "loss": 0.7863, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.1827411167512691, | |
| "grad_norm": 4.04842721540907, | |
| "learning_rate": 6.910250860675618e-06, | |
| "loss": 0.7015, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.1852791878172588, | |
| "grad_norm": 3.4995823546724885, | |
| "learning_rate": 6.897576698713713e-06, | |
| "loss": 0.713, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.1878172588832487, | |
| "grad_norm": 2.0124542793260116, | |
| "learning_rate": 6.884888276421766e-06, | |
| "loss": 0.7089, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.1903553299492386, | |
| "grad_norm": 3.0245832112482685, | |
| "learning_rate": 6.872185689153548e-06, | |
| "loss": 0.7502, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.1928934010152283, | |
| "grad_norm": 3.1968776773224103, | |
| "learning_rate": 6.859469032369275e-06, | |
| "loss": 0.6792, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.1954314720812182, | |
| "grad_norm": 2.599351071193472, | |
| "learning_rate": 6.846738401634899e-06, | |
| "loss": 0.7182, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.1979695431472082, | |
| "grad_norm": 3.8378544182683254, | |
| "learning_rate": 6.833993892621388e-06, | |
| "loss": 0.6645, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.200507614213198, | |
| "grad_norm": 3.6586818009882616, | |
| "learning_rate": 6.821235601104001e-06, | |
| "loss": 0.7123, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.2030456852791878, | |
| "grad_norm": 2.495122167223983, | |
| "learning_rate": 6.8084636229615786e-06, | |
| "loss": 0.822, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.2055837563451777, | |
| "grad_norm": 2.311020743189909, | |
| "learning_rate": 6.795678054175811e-06, | |
| "loss": 0.7897, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.2081218274111676, | |
| "grad_norm": 3.3626364340836856, | |
| "learning_rate": 6.782878990830527e-06, | |
| "loss": 0.6936, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.2106598984771573, | |
| "grad_norm": 5.169275561140567, | |
| "learning_rate": 6.770066529110964e-06, | |
| "loss": 0.6622, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.2131979695431472, | |
| "grad_norm": 3.078776628999445, | |
| "learning_rate": 6.757240765303047e-06, | |
| "loss": 0.7513, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.215736040609137, | |
| "grad_norm": 5.617176822012553, | |
| "learning_rate": 6.744401795792673e-06, | |
| "loss": 0.7513, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.218274111675127, | |
| "grad_norm": 7.732644354996138, | |
| "learning_rate": 6.731549717064975e-06, | |
| "loss": 0.8617, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.2208121827411167, | |
| "grad_norm": 2.4117427336853696, | |
| "learning_rate": 6.718684625703603e-06, | |
| "loss": 0.7432, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.2233502538071066, | |
| "grad_norm": 1.8400980254586687, | |
| "learning_rate": 6.705806618389998e-06, | |
| "loss": 0.8022, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.2258883248730965, | |
| "grad_norm": 3.6097931824757032, | |
| "learning_rate": 6.6929157919026645e-06, | |
| "loss": 0.9118, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.2284263959390862, | |
| "grad_norm": 4.540403777710814, | |
| "learning_rate": 6.6800122431164425e-06, | |
| "loss": 0.8484, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.2309644670050761, | |
| "grad_norm": 5.750951180784241, | |
| "learning_rate": 6.6670960690017814e-06, | |
| "loss": 0.6695, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.233502538071066, | |
| "grad_norm": 3.3909591035556366, | |
| "learning_rate": 6.654167366624009e-06, | |
| "loss": 0.7365, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.236040609137056, | |
| "grad_norm": 4.588355673915564, | |
| "learning_rate": 6.641226233142605e-06, | |
| "loss": 0.7533, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.2385786802030456, | |
| "grad_norm": 6.6404536496522795, | |
| "learning_rate": 6.628272765810468e-06, | |
| "loss": 0.6466, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.2411167512690355, | |
| "grad_norm": 3.434312988744615, | |
| "learning_rate": 6.615307061973185e-06, | |
| "loss": 0.6203, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.2436548223350254, | |
| "grad_norm": 3.0930958381981815, | |
| "learning_rate": 6.602329219068302e-06, | |
| "loss": 0.7669, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.2461928934010151, | |
| "grad_norm": 2.8869650975031416, | |
| "learning_rate": 6.5893393346245906e-06, | |
| "loss": 0.7633, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.248730964467005, | |
| "grad_norm": 3.170504311720801, | |
| "learning_rate": 6.576337506261314e-06, | |
| "loss": 0.6953, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.251269035532995, | |
| "grad_norm": 2.780063933956294, | |
| "learning_rate": 6.563323831687493e-06, | |
| "loss": 0.709, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.2538071065989849, | |
| "grad_norm": 2.555837538242703, | |
| "learning_rate": 6.550298408701175e-06, | |
| "loss": 0.7809, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.2563451776649746, | |
| "grad_norm": 4.0633641977824855, | |
| "learning_rate": 6.537261335188696e-06, | |
| "loss": 0.6886, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.2588832487309645, | |
| "grad_norm": 4.623003200579342, | |
| "learning_rate": 6.524212709123947e-06, | |
| "loss": 0.7008, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.2614213197969544, | |
| "grad_norm": 3.5939963505107606, | |
| "learning_rate": 6.511152628567635e-06, | |
| "loss": 0.6717, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.263959390862944, | |
| "grad_norm": 2.34961246443107, | |
| "learning_rate": 6.498081191666549e-06, | |
| "loss": 0.6651, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.266497461928934, | |
| "grad_norm": 4.8848252156015395, | |
| "learning_rate": 6.48499849665282e-06, | |
| "loss": 0.7782, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.2690355329949239, | |
| "grad_norm": 2.747548072805645, | |
| "learning_rate": 6.471904641843187e-06, | |
| "loss": 0.7487, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.2715736040609138, | |
| "grad_norm": 2.8146283218783896, | |
| "learning_rate": 6.458799725638249e-06, | |
| "loss": 0.7939, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.2741116751269035, | |
| "grad_norm": 2.6115782469222437, | |
| "learning_rate": 6.4456838465217384e-06, | |
| "loss": 0.7964, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.2766497461928934, | |
| "grad_norm": 4.382058548644622, | |
| "learning_rate": 6.432557103059771e-06, | |
| "loss": 0.6758, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.2791878172588833, | |
| "grad_norm": 4.2761528963141044, | |
| "learning_rate": 6.419419593900109e-06, | |
| "loss": 0.6878, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.281725888324873, | |
| "grad_norm": 2.5161012535059433, | |
| "learning_rate": 6.4062714177714166e-06, | |
| "loss": 0.8262, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.284263959390863, | |
| "grad_norm": 3.4501823268959675, | |
| "learning_rate": 6.393112673482522e-06, | |
| "loss": 0.7008, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.2868020304568528, | |
| "grad_norm": 3.945300277429004, | |
| "learning_rate": 6.379943459921677e-06, | |
| "loss": 0.6499, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.2893401015228427, | |
| "grad_norm": 4.06451496648234, | |
| "learning_rate": 6.3667638760558055e-06, | |
| "loss": 0.7884, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.2918781725888324, | |
| "grad_norm": 2.7617149802354874, | |
| "learning_rate": 6.353574020929767e-06, | |
| "loss": 0.735, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.2944162436548223, | |
| "grad_norm": 3.167960813091416, | |
| "learning_rate": 6.340373993665607e-06, | |
| "loss": 0.9423, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.2969543147208122, | |
| "grad_norm": 2.8725417311483246, | |
| "learning_rate": 6.327163893461819e-06, | |
| "loss": 0.6771, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.299492385786802, | |
| "grad_norm": 3.9287674174064353, | |
| "learning_rate": 6.31394381959259e-06, | |
| "loss": 0.862, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.3020304568527918, | |
| "grad_norm": 3.0972388669258266, | |
| "learning_rate": 6.300713871407062e-06, | |
| "loss": 0.6995, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.3045685279187818, | |
| "grad_norm": 2.431302901450318, | |
| "learning_rate": 6.287474148328584e-06, | |
| "loss": 0.6403, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.3071065989847717, | |
| "grad_norm": 4.801341289977943, | |
| "learning_rate": 6.274224749853961e-06, | |
| "loss": 0.7267, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.3096446700507614, | |
| "grad_norm": 3.1112707577132324, | |
| "learning_rate": 6.2609657755527135e-06, | |
| "loss": 0.7341, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.3121827411167513, | |
| "grad_norm": 4.050638104301098, | |
| "learning_rate": 6.247697325066314e-06, | |
| "loss": 0.9268, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.3147208121827412, | |
| "grad_norm": 4.358249942985889, | |
| "learning_rate": 6.2344194981074616e-06, | |
| "loss": 0.8526, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.3172588832487309, | |
| "grad_norm": 2.606425333009026, | |
| "learning_rate": 6.22113239445931e-06, | |
| "loss": 0.7707, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.3197969543147208, | |
| "grad_norm": 2.16185838449471, | |
| "learning_rate": 6.2078361139747334e-06, | |
| "loss": 0.756, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.3223350253807107, | |
| "grad_norm": 11.438171951626867, | |
| "learning_rate": 6.194530756575567e-06, | |
| "loss": 0.8001, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.3248730964467006, | |
| "grad_norm": 4.268134675954473, | |
| "learning_rate": 6.1812164222518626e-06, | |
| "loss": 0.5958, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.3274111675126903, | |
| "grad_norm": 4.015099725456588, | |
| "learning_rate": 6.167893211061128e-06, | |
| "loss": 0.8645, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.3299492385786802, | |
| "grad_norm": 7.084231421312322, | |
| "learning_rate": 6.154561223127587e-06, | |
| "loss": 0.7082, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.33248730964467, | |
| "grad_norm": 3.8215913212518773, | |
| "learning_rate": 6.141220558641416e-06, | |
| "loss": 0.6995, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.3350253807106598, | |
| "grad_norm": 3.038519186584797, | |
| "learning_rate": 6.127871317857996e-06, | |
| "loss": 0.6656, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.3375634517766497, | |
| "grad_norm": 3.8116749862373958, | |
| "learning_rate": 6.114513601097165e-06, | |
| "loss": 0.8154, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.3401015228426396, | |
| "grad_norm": 4.817233367550986, | |
| "learning_rate": 6.101147508742456e-06, | |
| "loss": 0.7358, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.3426395939086295, | |
| "grad_norm": 3.646219613432323, | |
| "learning_rate": 6.0877731412403365e-06, | |
| "loss": 0.7699, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.3451776649746192, | |
| "grad_norm": 6.120745299164355, | |
| "learning_rate": 6.0743905990994714e-06, | |
| "loss": 0.8005, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.3477157360406091, | |
| "grad_norm": 3.0596765875812904, | |
| "learning_rate": 6.060999982889955e-06, | |
| "loss": 0.744, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.350253807106599, | |
| "grad_norm": 2.792466503447904, | |
| "learning_rate": 6.04760139324256e-06, | |
| "loss": 0.7307, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.3527918781725887, | |
| "grad_norm": 5.179116357662163, | |
| "learning_rate": 6.0341949308479755e-06, | |
| "loss": 0.6799, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.3553299492385786, | |
| "grad_norm": 3.822692771814533, | |
| "learning_rate": 6.020780696456059e-06, | |
| "loss": 0.7327, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.3578680203045685, | |
| "grad_norm": 2.3276793880324753, | |
| "learning_rate": 6.0073587908750715e-06, | |
| "loss": 0.7131, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.3604060913705585, | |
| "grad_norm": 5.9622605348086175, | |
| "learning_rate": 5.9939293149709265e-06, | |
| "loss": 0.8849, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.3629441624365481, | |
| "grad_norm": 4.449818782204746, | |
| "learning_rate": 5.9804923696664255e-06, | |
| "loss": 0.7274, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.365482233502538, | |
| "grad_norm": 10.86624898759871, | |
| "learning_rate": 5.967048055940503e-06, | |
| "loss": 0.7212, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.368020304568528, | |
| "grad_norm": 3.145159036375936, | |
| "learning_rate": 5.953596474827469e-06, | |
| "loss": 0.7319, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.3705583756345177, | |
| "grad_norm": 2.3980927107396197, | |
| "learning_rate": 5.940137727416247e-06, | |
| "loss": 0.6897, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.3730964467005076, | |
| "grad_norm": 3.418541376783986, | |
| "learning_rate": 5.9266719148496155e-06, | |
| "loss": 0.7733, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.3756345177664975, | |
| "grad_norm": 2.8126841157843057, | |
| "learning_rate": 5.9131991383234485e-06, | |
| "loss": 0.6699, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.3781725888324874, | |
| "grad_norm": 3.0318559748788534, | |
| "learning_rate": 5.8997194990859545e-06, | |
| "loss": 0.6653, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.380710659898477, | |
| "grad_norm": 3.5210511267819067, | |
| "learning_rate": 5.886233098436914e-06, | |
| "loss": 0.7593, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.383248730964467, | |
| "grad_norm": 4.2948950638851455, | |
| "learning_rate": 5.872740037726919e-06, | |
| "loss": 0.699, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.385786802030457, | |
| "grad_norm": 2.6028118744729007, | |
| "learning_rate": 5.859240418356614e-06, | |
| "loss": 0.7695, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.3883248730964466, | |
| "grad_norm": 4.782785847123683, | |
| "learning_rate": 5.845734341775933e-06, | |
| "loss": 0.8879, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.3908629441624365, | |
| "grad_norm": 2.6172139666414727, | |
| "learning_rate": 5.832221909483334e-06, | |
| "loss": 0.7758, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.3934010152284264, | |
| "grad_norm": 4.763054993480605, | |
| "learning_rate": 5.818703223025036e-06, | |
| "loss": 0.6957, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.3959390862944163, | |
| "grad_norm": 3.1414376067249923, | |
| "learning_rate": 5.805178383994264e-06, | |
| "loss": 0.7213, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.398477157360406, | |
| "grad_norm": 3.130650396595069, | |
| "learning_rate": 5.791647494030475e-06, | |
| "loss": 0.6065, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.401015228426396, | |
| "grad_norm": 3.261563931648429, | |
| "learning_rate": 5.778110654818602e-06, | |
| "loss": 0.7958, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.4035532994923858, | |
| "grad_norm": 3.8311803433345433, | |
| "learning_rate": 5.764567968088282e-06, | |
| "loss": 0.5946, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.4060913705583755, | |
| "grad_norm": 2.743364529127526, | |
| "learning_rate": 5.751019535613103e-06, | |
| "loss": 0.7435, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.4086294416243654, | |
| "grad_norm": 4.489529309087514, | |
| "learning_rate": 5.737465459209825e-06, | |
| "loss": 0.78, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.4111675126903553, | |
| "grad_norm": 3.7301506721368947, | |
| "learning_rate": 5.723905840737632e-06, | |
| "loss": 0.6712, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.4137055837563453, | |
| "grad_norm": 4.575944751601415, | |
| "learning_rate": 5.710340782097347e-06, | |
| "loss": 0.7624, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.4162436548223352, | |
| "grad_norm": 3.1435876497511863, | |
| "learning_rate": 5.696770385230679e-06, | |
| "loss": 0.8174, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.4187817258883249, | |
| "grad_norm": 2.431171340737915, | |
| "learning_rate": 5.683194752119457e-06, | |
| "loss": 0.7116, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.4213197969543148, | |
| "grad_norm": 4.370602698661378, | |
| "learning_rate": 5.6696139847848554e-06, | |
| "loss": 0.777, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.4238578680203045, | |
| "grad_norm": 4.718481956713903, | |
| "learning_rate": 5.656028185286638e-06, | |
| "loss": 0.6899, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.4263959390862944, | |
| "grad_norm": 2.821013688228599, | |
| "learning_rate": 5.6424374557223815e-06, | |
| "loss": 0.8273, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.4289340101522843, | |
| "grad_norm": 2.883810214582895, | |
| "learning_rate": 5.628841898226715e-06, | |
| "loss": 0.73, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.4314720812182742, | |
| "grad_norm": 2.2292511015864727, | |
| "learning_rate": 5.615241614970546e-06, | |
| "loss": 0.7264, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.434010152284264, | |
| "grad_norm": 2.820445001892862, | |
| "learning_rate": 5.601636708160297e-06, | |
| "loss": 0.7463, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.4365482233502538, | |
| "grad_norm": 5.707035941059044, | |
| "learning_rate": 5.588027280037139e-06, | |
| "loss": 0.8703, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.4390862944162437, | |
| "grad_norm": 6.244765987803611, | |
| "learning_rate": 5.5744134328762225e-06, | |
| "loss": 0.8395, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.4416243654822334, | |
| "grad_norm": 5.279336772760707, | |
| "learning_rate": 5.560795268985899e-06, | |
| "loss": 0.5548, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.4441624365482233, | |
| "grad_norm": 2.4337670623677936, | |
| "learning_rate": 5.547172890706969e-06, | |
| "loss": 0.81, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.4467005076142132, | |
| "grad_norm": 2.559001907523824, | |
| "learning_rate": 5.533546400411899e-06, | |
| "loss": 0.6723, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.4492385786802031, | |
| "grad_norm": 2.095739075458118, | |
| "learning_rate": 5.519915900504059e-06, | |
| "loss": 0.7547, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.451776649746193, | |
| "grad_norm": 2.7618676186626105, | |
| "learning_rate": 5.506281493416954e-06, | |
| "loss": 0.7759, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.4543147208121827, | |
| "grad_norm": 2.4429284260023025, | |
| "learning_rate": 5.492643281613444e-06, | |
| "loss": 0.7779, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.4568527918781726, | |
| "grad_norm": 2.248761204996824, | |
| "learning_rate": 5.4790013675849906e-06, | |
| "loss": 0.7139, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.4593908629441623, | |
| "grad_norm": 2.04849421734439, | |
| "learning_rate": 5.465355853850873e-06, | |
| "loss": 0.7967, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.4619289340101522, | |
| "grad_norm": 2.3149667017145474, | |
| "learning_rate": 5.4517068429574215e-06, | |
| "loss": 0.7546, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.4644670050761421, | |
| "grad_norm": 2.7279348376505372, | |
| "learning_rate": 5.438054437477249e-06, | |
| "loss": 0.7709, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.467005076142132, | |
| "grad_norm": 4.10133980464364, | |
| "learning_rate": 5.424398740008481e-06, | |
| "loss": 0.7447, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.469543147208122, | |
| "grad_norm": 3.0670495095077, | |
| "learning_rate": 5.4107398531739765e-06, | |
| "loss": 0.6942, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.4720812182741116, | |
| "grad_norm": 2.628033659732392, | |
| "learning_rate": 5.397077879620569e-06, | |
| "loss": 0.7497, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.4746192893401016, | |
| "grad_norm": 2.415953001245352, | |
| "learning_rate": 5.383412922018285e-06, | |
| "loss": 0.6987, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.4771573604060912, | |
| "grad_norm": 3.0726342117302208, | |
| "learning_rate": 5.3697450830595775e-06, | |
| "loss": 0.7094, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.4796954314720812, | |
| "grad_norm": 8.276976756150773, | |
| "learning_rate": 5.356074465458553e-06, | |
| "loss": 0.649, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.482233502538071, | |
| "grad_norm": 9.68509977234341, | |
| "learning_rate": 5.3424011719502e-06, | |
| "loss": 0.726, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.484771573604061, | |
| "grad_norm": 9.749912315585314, | |
| "learning_rate": 5.3287253052896125e-06, | |
| "loss": 0.6428, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.487309644670051, | |
| "grad_norm": 6.96335200544029, | |
| "learning_rate": 5.3150469682512275e-06, | |
| "loss": 0.7821, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.4898477157360406, | |
| "grad_norm": 7.183939152524903, | |
| "learning_rate": 5.301366263628045e-06, | |
| "loss": 0.6567, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.4923857868020305, | |
| "grad_norm": 9.732377228662223, | |
| "learning_rate": 5.287683294230855e-06, | |
| "loss": 0.8965, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.4949238578680202, | |
| "grad_norm": 7.097575405762116, | |
| "learning_rate": 5.273998162887472e-06, | |
| "loss": 0.6165, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.49746192893401, | |
| "grad_norm": 10.336609092325649, | |
| "learning_rate": 5.260310972441951e-06, | |
| "loss": 0.6666, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 10.173431901856254, | |
| "learning_rate": 5.246621825753827e-06, | |
| "loss": 0.7356, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.50253807106599, | |
| "grad_norm": 7.252928905504994, | |
| "learning_rate": 5.232930825697337e-06, | |
| "loss": 0.6349, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.5050761421319798, | |
| "grad_norm": 9.740020421735906, | |
| "learning_rate": 5.2192380751606365e-06, | |
| "loss": 0.7093, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.5076142131979695, | |
| "grad_norm": 6.881744795837598, | |
| "learning_rate": 5.20554367704505e-06, | |
| "loss": 0.7547, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.5101522842639594, | |
| "grad_norm": 7.403862646753793, | |
| "learning_rate": 5.191847734264272e-06, | |
| "loss": 0.7827, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.512690355329949, | |
| "grad_norm": 7.857449479117419, | |
| "learning_rate": 5.178150349743611e-06, | |
| "loss": 0.7318, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.515228426395939, | |
| "grad_norm": 7.8138561408079985, | |
| "learning_rate": 5.1644516264192075e-06, | |
| "loss": 0.7887, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.517766497461929, | |
| "grad_norm": 11.529490066787323, | |
| "learning_rate": 5.150751667237266e-06, | |
| "loss": 0.7767, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.5203045685279188, | |
| "grad_norm": 8.291871868386467, | |
| "learning_rate": 5.137050575153276e-06, | |
| "loss": 0.7236, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.5228426395939088, | |
| "grad_norm": 13.386290031605322, | |
| "learning_rate": 5.123348453131242e-06, | |
| "loss": 0.7494, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.5253807106598984, | |
| "grad_norm": 10.501239515721437, | |
| "learning_rate": 5.1096454041429064e-06, | |
| "loss": 0.7183, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.5279187817258884, | |
| "grad_norm": 8.400955073843619, | |
| "learning_rate": 5.095941531166982e-06, | |
| "loss": 0.7093, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.530456852791878, | |
| "grad_norm": 8.114490333939267, | |
| "learning_rate": 5.08223693718837e-06, | |
| "loss": 0.7364, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.532994923857868, | |
| "grad_norm": 6.9837214192909505, | |
| "learning_rate": 5.068531725197393e-06, | |
| "loss": 0.7058, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.5355329949238579, | |
| "grad_norm": 10.522759207505343, | |
| "learning_rate": 5.054825998189014e-06, | |
| "loss": 0.6368, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.5380710659898478, | |
| "grad_norm": 6.270463565331074, | |
| "learning_rate": 5.041119859162068e-06, | |
| "loss": 0.6091, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.5406091370558377, | |
| "grad_norm": 6.726382391943539, | |
| "learning_rate": 5.027413411118491e-06, | |
| "loss": 0.6602, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.5431472081218274, | |
| "grad_norm": 6.317567266105698, | |
| "learning_rate": 5.0137067570625345e-06, | |
| "loss": 0.6634, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.5456852791878173, | |
| "grad_norm": 6.934687119093674, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7209, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.548223350253807, | |
| "grad_norm": 5.596856448268176, | |
| "learning_rate": 4.986293242937467e-06, | |
| "loss": 0.7366, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.5507614213197969, | |
| "grad_norm": 6.021457219503407, | |
| "learning_rate": 4.97258658888151e-06, | |
| "loss": 0.7069, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.5532994923857868, | |
| "grad_norm": 4.6400991890100185, | |
| "learning_rate": 4.958880140837934e-06, | |
| "loss": 0.7575, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.5558375634517767, | |
| "grad_norm": 6.820325061206459, | |
| "learning_rate": 4.945174001810989e-06, | |
| "loss": 0.7669, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.5583756345177666, | |
| "grad_norm": 4.5981508300331395, | |
| "learning_rate": 4.9314682748026095e-06, | |
| "loss": 0.7572, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.5609137055837563, | |
| "grad_norm": 3.681334801007928, | |
| "learning_rate": 4.917763062811631e-06, | |
| "loss": 0.7417, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.5634517766497462, | |
| "grad_norm": 8.250248361704466, | |
| "learning_rate": 4.904058468833019e-06, | |
| "loss": 0.8454, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.565989847715736, | |
| "grad_norm": 9.8095466260306, | |
| "learning_rate": 4.8903545958570935e-06, | |
| "loss": 0.7939, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.5685279187817258, | |
| "grad_norm": 8.299972020814456, | |
| "learning_rate": 4.876651546868759e-06, | |
| "loss": 0.7407, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.5710659898477157, | |
| "grad_norm": 7.39524807040033, | |
| "learning_rate": 4.862949424846726e-06, | |
| "loss": 0.7403, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.5736040609137056, | |
| "grad_norm": 7.02310739787436, | |
| "learning_rate": 4.849248332762735e-06, | |
| "loss": 0.8179, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.5761421319796955, | |
| "grad_norm": 7.940559854591136, | |
| "learning_rate": 4.835548373580793e-06, | |
| "loss": 0.6895, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.5786802030456852, | |
| "grad_norm": 4.8572557004474675, | |
| "learning_rate": 4.8218496502563906e-06, | |
| "loss": 0.6896, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.5812182741116751, | |
| "grad_norm": 6.306241421914032, | |
| "learning_rate": 4.808152265735729e-06, | |
| "loss": 0.7573, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.5837563451776648, | |
| "grad_norm": 6.9262999066348, | |
| "learning_rate": 4.794456322954953e-06, | |
| "loss": 0.7389, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.5862944162436547, | |
| "grad_norm": 6.812308267341822, | |
| "learning_rate": 4.780761924839365e-06, | |
| "loss": 0.6992, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.5888324873096447, | |
| "grad_norm": 9.421565668117234, | |
| "learning_rate": 4.767069174302667e-06, | |
| "loss": 0.7858, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.5913705583756346, | |
| "grad_norm": 8.64577548883685, | |
| "learning_rate": 4.753378174246174e-06, | |
| "loss": 0.8545, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.5939086294416245, | |
| "grad_norm": 11.328643712467004, | |
| "learning_rate": 4.739689027558052e-06, | |
| "loss": 0.7147, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.5964467005076142, | |
| "grad_norm": 7.4543053008075235, | |
| "learning_rate": 4.726001837112529e-06, | |
| "loss": 0.7868, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.598984771573604, | |
| "grad_norm": 7.259409943232553, | |
| "learning_rate": 4.7123167057691446e-06, | |
| "loss": 0.7483, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.6015228426395938, | |
| "grad_norm": 5.847738992367483, | |
| "learning_rate": 4.6986337363719565e-06, | |
| "loss": 0.655, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.6040609137055837, | |
| "grad_norm": 7.737551812189505, | |
| "learning_rate": 4.684953031748773e-06, | |
| "loss": 0.6705, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.6065989847715736, | |
| "grad_norm": 10.181314318265251, | |
| "learning_rate": 4.671274694710388e-06, | |
| "loss": 0.771, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.6091370558375635, | |
| "grad_norm": 7.794905487902131, | |
| "learning_rate": 4.657598828049801e-06, | |
| "loss": 0.6708, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.6116751269035534, | |
| "grad_norm": 8.145058809261956, | |
| "learning_rate": 4.643925534541448e-06, | |
| "loss": 0.7587, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.614213197969543, | |
| "grad_norm": 8.922528793798906, | |
| "learning_rate": 4.630254916940424e-06, | |
| "loss": 0.7323, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.616751269035533, | |
| "grad_norm": 7.00326933899544, | |
| "learning_rate": 4.616587077981716e-06, | |
| "loss": 0.8153, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.6192893401015227, | |
| "grad_norm": 7.999681110308186, | |
| "learning_rate": 4.602922120379432e-06, | |
| "loss": 0.7417, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.6218274111675126, | |
| "grad_norm": 8.438392955386595, | |
| "learning_rate": 4.589260146826025e-06, | |
| "loss": 0.7151, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.6243654822335025, | |
| "grad_norm": 7.155521862678608, | |
| "learning_rate": 4.575601259991523e-06, | |
| "loss": 0.6388, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.6269035532994924, | |
| "grad_norm": 9.059179058520689, | |
| "learning_rate": 4.561945562522753e-06, | |
| "loss": 0.7679, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.6294416243654823, | |
| "grad_norm": 10.854310437275425, | |
| "learning_rate": 4.548293157042581e-06, | |
| "loss": 0.6177, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.631979695431472, | |
| "grad_norm": 5.8672300531571855, | |
| "learning_rate": 4.534644146149128e-06, | |
| "loss": 0.761, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.634517766497462, | |
| "grad_norm": 9.975674381554894, | |
| "learning_rate": 4.52099863241501e-06, | |
| "loss": 0.5939, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.6370558375634516, | |
| "grad_norm": 7.111751840405073, | |
| "learning_rate": 4.507356718386557e-06, | |
| "loss": 0.6766, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.6395939086294415, | |
| "grad_norm": 6.247398349216973, | |
| "learning_rate": 4.493718506583048e-06, | |
| "loss": 0.7125, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.6421319796954315, | |
| "grad_norm": 7.549756956695894, | |
| "learning_rate": 4.4800840994959426e-06, | |
| "loss": 0.6904, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.6446700507614214, | |
| "grad_norm": 6.500884950383986, | |
| "learning_rate": 4.466453599588103e-06, | |
| "loss": 0.7022, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.6472081218274113, | |
| "grad_norm": 5.2335504629722704, | |
| "learning_rate": 4.452827109293033e-06, | |
| "loss": 0.7422, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.649746192893401, | |
| "grad_norm": 6.146630683172185, | |
| "learning_rate": 4.439204731014102e-06, | |
| "loss": 0.7558, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.6522842639593909, | |
| "grad_norm": 10.375183815595266, | |
| "learning_rate": 4.42558656712378e-06, | |
| "loss": 0.7455, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.6548223350253806, | |
| "grad_norm": 5.9665414440217255, | |
| "learning_rate": 4.411972719962862e-06, | |
| "loss": 0.6903, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.6573604060913705, | |
| "grad_norm": 11.099866755003685, | |
| "learning_rate": 4.398363291839705e-06, | |
| "loss": 0.6755, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.6598984771573604, | |
| "grad_norm": 4.488907541300615, | |
| "learning_rate": 4.384758385029457e-06, | |
| "loss": 0.7513, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.6624365482233503, | |
| "grad_norm": 5.519100934281681, | |
| "learning_rate": 4.371158101773287e-06, | |
| "loss": 0.7022, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.6649746192893402, | |
| "grad_norm": 3.780623949899037, | |
| "learning_rate": 4.3575625442776185e-06, | |
| "loss": 0.6907, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.66751269035533, | |
| "grad_norm": 3.8981771965418073, | |
| "learning_rate": 4.3439718147133625e-06, | |
| "loss": 0.6957, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.6700507614213198, | |
| "grad_norm": 2.434198707923521, | |
| "learning_rate": 4.330386015215145e-06, | |
| "loss": 0.9086, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.6725888324873095, | |
| "grad_norm": 3.6107308436430294, | |
| "learning_rate": 4.316805247880546e-06, | |
| "loss": 0.7743, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.6751269035532994, | |
| "grad_norm": 5.461014114880707, | |
| "learning_rate": 4.3032296147693225e-06, | |
| "loss": 0.6636, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.6776649746192893, | |
| "grad_norm": 2.9489874414341166, | |
| "learning_rate": 4.289659217902655e-06, | |
| "loss": 0.7954, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.6802030456852792, | |
| "grad_norm": 3.2634794475561617, | |
| "learning_rate": 4.2760941592623686e-06, | |
| "loss": 0.6887, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.6827411167512691, | |
| "grad_norm": 1.908474618570056, | |
| "learning_rate": 4.262534540790176e-06, | |
| "loss": 0.7072, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.6852791878172588, | |
| "grad_norm": 4.2509717118594645, | |
| "learning_rate": 4.248980464386899e-06, | |
| "loss": 0.6831, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.6878172588832487, | |
| "grad_norm": 1.9684828164673867, | |
| "learning_rate": 4.23543203191172e-06, | |
| "loss": 0.7576, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.6903553299492384, | |
| "grad_norm": 2.701415470742069, | |
| "learning_rate": 4.2218893451814005e-06, | |
| "loss": 0.687, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.6928934010152283, | |
| "grad_norm": 2.4493930106706627, | |
| "learning_rate": 4.208352505969526e-06, | |
| "loss": 0.6285, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.6954314720812182, | |
| "grad_norm": 2.7555789348597193, | |
| "learning_rate": 4.194821616005738e-06, | |
| "loss": 0.6422, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.6979695431472082, | |
| "grad_norm": 2.4972926405616866, | |
| "learning_rate": 4.181296776974963e-06, | |
| "loss": 0.7912, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.700507614213198, | |
| "grad_norm": 4.468489099102334, | |
| "learning_rate": 4.167778090516667e-06, | |
| "loss": 0.7716, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.703045685279188, | |
| "grad_norm": 6.010959172263999, | |
| "learning_rate": 4.154265658224067e-06, | |
| "loss": 0.7673, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.7055837563451777, | |
| "grad_norm": 2.2355125785055208, | |
| "learning_rate": 4.140759581643386e-06, | |
| "loss": 0.72, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.7081218274111674, | |
| "grad_norm": 2.3727185967108757, | |
| "learning_rate": 4.127259962273082e-06, | |
| "loss": 0.7083, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.7106598984771573, | |
| "grad_norm": 2.686125518968802, | |
| "learning_rate": 4.113766901563087e-06, | |
| "loss": 0.7431, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.7131979695431472, | |
| "grad_norm": 2.7573692052287897, | |
| "learning_rate": 4.100280500914046e-06, | |
| "loss": 0.7454, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.715736040609137, | |
| "grad_norm": 2.732285095831061, | |
| "learning_rate": 4.086800861676552e-06, | |
| "loss": 0.721, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.718274111675127, | |
| "grad_norm": 3.015824065840091, | |
| "learning_rate": 4.073328085150386e-06, | |
| "loss": 0.6937, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.720812182741117, | |
| "grad_norm": 6.951787707897627, | |
| "learning_rate": 4.059862272583755e-06, | |
| "loss": 0.7863, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.7233502538071066, | |
| "grad_norm": 4.845757849106375, | |
| "learning_rate": 4.046403525172533e-06, | |
| "loss": 0.8036, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.7258883248730963, | |
| "grad_norm": 3.0283091863817155, | |
| "learning_rate": 4.0329519440595e-06, | |
| "loss": 0.7565, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.7284263959390862, | |
| "grad_norm": 2.9032977085985063, | |
| "learning_rate": 4.019507630333577e-06, | |
| "loss": 0.7419, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.7309644670050761, | |
| "grad_norm": 2.732682655925896, | |
| "learning_rate": 4.006070685029075e-06, | |
| "loss": 0.7498, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.733502538071066, | |
| "grad_norm": 3.1823751482501184, | |
| "learning_rate": 3.992641209124929e-06, | |
| "loss": 0.6556, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.736040609137056, | |
| "grad_norm": 2.053257796239711, | |
| "learning_rate": 3.979219303543942e-06, | |
| "loss": 0.701, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.7385786802030458, | |
| "grad_norm": 3.1992206206683047, | |
| "learning_rate": 3.965805069152025e-06, | |
| "loss": 0.8046, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.7411167512690355, | |
| "grad_norm": 2.5494366065199525, | |
| "learning_rate": 3.952398606757441e-06, | |
| "loss": 0.6908, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.7436548223350252, | |
| "grad_norm": 2.7057236158660842, | |
| "learning_rate": 3.939000017110046e-06, | |
| "loss": 0.7024, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.7461928934010151, | |
| "grad_norm": 4.085040235331109, | |
| "learning_rate": 3.92560940090053e-06, | |
| "loss": 0.8566, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.748730964467005, | |
| "grad_norm": 2.133417834739682, | |
| "learning_rate": 3.912226858759666e-06, | |
| "loss": 0.7274, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.751269035532995, | |
| "grad_norm": 3.895538290272276, | |
| "learning_rate": 3.898852491257547e-06, | |
| "loss": 0.7114, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.7538071065989849, | |
| "grad_norm": 2.6652563108062592, | |
| "learning_rate": 3.885486398902836e-06, | |
| "loss": 0.6814, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.7563451776649748, | |
| "grad_norm": 2.6709125412075205, | |
| "learning_rate": 3.872128682142005e-06, | |
| "loss": 0.585, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.7588832487309645, | |
| "grad_norm": 7.758065846396269, | |
| "learning_rate": 3.858779441358588e-06, | |
| "loss": 0.6351, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.7614213197969542, | |
| "grad_norm": 2.339854294027953, | |
| "learning_rate": 3.845438776872416e-06, | |
| "loss": 0.7193, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.763959390862944, | |
| "grad_norm": 3.7441081697726886, | |
| "learning_rate": 3.832106788938874e-06, | |
| "loss": 0.8051, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.766497461928934, | |
| "grad_norm": 1.898947089645352, | |
| "learning_rate": 3.818783577748138e-06, | |
| "loss": 0.751, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.7690355329949239, | |
| "grad_norm": 4.8494211071824695, | |
| "learning_rate": 3.8054692434244323e-06, | |
| "loss": 0.7666, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.7715736040609138, | |
| "grad_norm": 2.7092795808497767, | |
| "learning_rate": 3.7921638860252674e-06, | |
| "loss": 0.8202, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.7741116751269037, | |
| "grad_norm": 2.976217075499209, | |
| "learning_rate": 3.7788676055406913e-06, | |
| "loss": 0.6678, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.7766497461928934, | |
| "grad_norm": 2.680319225126857, | |
| "learning_rate": 3.76558050189254e-06, | |
| "loss": 0.6886, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.779187817258883, | |
| "grad_norm": 2.296642253813403, | |
| "learning_rate": 3.7523026749336868e-06, | |
| "loss": 0.7568, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.781725888324873, | |
| "grad_norm": 2.938434483894651, | |
| "learning_rate": 3.7390342244472886e-06, | |
| "loss": 0.7419, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.784263959390863, | |
| "grad_norm": 2.4053334656704357, | |
| "learning_rate": 3.7257752501460397e-06, | |
| "loss": 0.6514, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.7868020304568528, | |
| "grad_norm": 2.378949854019335, | |
| "learning_rate": 3.7125258516714175e-06, | |
| "loss": 0.6336, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.7893401015228427, | |
| "grad_norm": 2.593432990200205, | |
| "learning_rate": 3.6992861285929395e-06, | |
| "loss": 0.9736, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.7918781725888326, | |
| "grad_norm": 2.5151437698388355, | |
| "learning_rate": 3.6860561804074123e-06, | |
| "loss": 0.7368, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.7944162436548223, | |
| "grad_norm": 2.8036107393674663, | |
| "learning_rate": 3.6728361065381833e-06, | |
| "loss": 0.6944, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.796954314720812, | |
| "grad_norm": 2.8715557691268003, | |
| "learning_rate": 3.659626006334395e-06, | |
| "loss": 0.7879, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.799492385786802, | |
| "grad_norm": 4.003179403948973, | |
| "learning_rate": 3.646425979070233e-06, | |
| "loss": 0.7558, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.8020304568527918, | |
| "grad_norm": 8.130933032348212, | |
| "learning_rate": 3.633236123944195e-06, | |
| "loss": 0.8283, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.8045685279187818, | |
| "grad_norm": 2.8583002801807575, | |
| "learning_rate": 3.620056540078323e-06, | |
| "loss": 0.7255, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.8071065989847717, | |
| "grad_norm": 5.09693351683838, | |
| "learning_rate": 3.606887326517479e-06, | |
| "loss": 0.8537, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.8096446700507616, | |
| "grad_norm": 3.014904465977428, | |
| "learning_rate": 3.593728582228585e-06, | |
| "loss": 0.6737, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.8121827411167513, | |
| "grad_norm": 3.6365545634430667, | |
| "learning_rate": 3.5805804060998926e-06, | |
| "loss": 0.679, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.8147208121827412, | |
| "grad_norm": 2.018230067478512, | |
| "learning_rate": 3.567442896940231e-06, | |
| "loss": 0.7556, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.8172588832487309, | |
| "grad_norm": 2.582862863818379, | |
| "learning_rate": 3.554316153478263e-06, | |
| "loss": 0.6736, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.8197969543147208, | |
| "grad_norm": 5.148339265724092, | |
| "learning_rate": 3.5412002743617525e-06, | |
| "loss": 0.7093, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.8223350253807107, | |
| "grad_norm": 3.5984283826570045, | |
| "learning_rate": 3.5280953581568155e-06, | |
| "loss": 0.7092, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.8248730964467006, | |
| "grad_norm": 3.0704447744931933, | |
| "learning_rate": 3.5150015033471817e-06, | |
| "loss": 0.8085, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.8274111675126905, | |
| "grad_norm": 10.00445719642568, | |
| "learning_rate": 3.501918808333453e-06, | |
| "loss": 0.671, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.8299492385786802, | |
| "grad_norm": 2.2671818495794374, | |
| "learning_rate": 3.4888473714323675e-06, | |
| "loss": 0.8039, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.83248730964467, | |
| "grad_norm": 6.221189957212778, | |
| "learning_rate": 3.4757872908760554e-06, | |
| "loss": 0.7488, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.8350253807106598, | |
| "grad_norm": 3.973211223724199, | |
| "learning_rate": 3.4627386648113046e-06, | |
| "loss": 0.7566, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.8375634517766497, | |
| "grad_norm": 4.306273661655382, | |
| "learning_rate": 3.449701591298826e-06, | |
| "loss": 0.7067, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.8401015228426396, | |
| "grad_norm": 2.576999153874239, | |
| "learning_rate": 3.436676168312508e-06, | |
| "loss": 0.8613, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.8426395939086295, | |
| "grad_norm": 2.343907257705208, | |
| "learning_rate": 3.4236624937386874e-06, | |
| "loss": 0.8303, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.8451776649746194, | |
| "grad_norm": 2.0952349982740253, | |
| "learning_rate": 3.4106606653754102e-06, | |
| "loss": 0.7578, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.8477157360406091, | |
| "grad_norm": 2.166342472964004, | |
| "learning_rate": 3.397670780931699e-06, | |
| "loss": 0.8091, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.850253807106599, | |
| "grad_norm": 3.099485050888837, | |
| "learning_rate": 3.384692938026816e-06, | |
| "loss": 0.7944, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.8527918781725887, | |
| "grad_norm": 2.3170390474934397, | |
| "learning_rate": 3.3717272341895335e-06, | |
| "loss": 0.754, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.8553299492385786, | |
| "grad_norm": 8.508529834209089, | |
| "learning_rate": 3.358773766857397e-06, | |
| "loss": 0.8398, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.8578680203045685, | |
| "grad_norm": 2.7296031884976815, | |
| "learning_rate": 3.3458326333759927e-06, | |
| "loss": 0.5618, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.8604060913705585, | |
| "grad_norm": 5.436648352881961, | |
| "learning_rate": 3.3329039309982202e-06, | |
| "loss": 0.715, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.8629441624365484, | |
| "grad_norm": 6.288919164300693, | |
| "learning_rate": 3.319987756883559e-06, | |
| "loss": 0.7989, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.865482233502538, | |
| "grad_norm": 3.4817572272054367, | |
| "learning_rate": 3.307084208097337e-06, | |
| "loss": 0.678, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.868020304568528, | |
| "grad_norm": 4.519379736523107, | |
| "learning_rate": 3.2941933816100024e-06, | |
| "loss": 0.6747, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.8705583756345177, | |
| "grad_norm": 3.178920019176328, | |
| "learning_rate": 3.281315374296397e-06, | |
| "loss": 0.7039, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.8730964467005076, | |
| "grad_norm": 3.2505180835432097, | |
| "learning_rate": 3.268450282935026e-06, | |
| "loss": 0.6003, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.8756345177664975, | |
| "grad_norm": 2.9109069621999155, | |
| "learning_rate": 3.2555982042073287e-06, | |
| "loss": 0.8364, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.8781725888324874, | |
| "grad_norm": 2.9599340344644, | |
| "learning_rate": 3.2427592346969538e-06, | |
| "loss": 0.7817, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.8807106598984773, | |
| "grad_norm": 2.4356343730468284, | |
| "learning_rate": 3.2299334708890384e-06, | |
| "loss": 0.767, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.883248730964467, | |
| "grad_norm": 2.5412975036915046, | |
| "learning_rate": 3.217121009169474e-06, | |
| "loss": 0.746, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.885786802030457, | |
| "grad_norm": 3.3029683648181214, | |
| "learning_rate": 3.2043219458241896e-06, | |
| "loss": 0.6829, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.8883248730964466, | |
| "grad_norm": 3.380145523857882, | |
| "learning_rate": 3.1915363770384223e-06, | |
| "loss": 0.6645, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.8908629441624365, | |
| "grad_norm": 3.688747382996952, | |
| "learning_rate": 3.1787643988959993e-06, | |
| "loss": 0.7798, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.8934010152284264, | |
| "grad_norm": 2.887848658016839, | |
| "learning_rate": 3.1660061073786132e-06, | |
| "loss": 0.664, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.8959390862944163, | |
| "grad_norm": 5.719598275135355, | |
| "learning_rate": 3.1532615983651027e-06, | |
| "loss": 0.6847, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.8984771573604062, | |
| "grad_norm": 2.2084171353583755, | |
| "learning_rate": 3.1405309676307283e-06, | |
| "loss": 0.6702, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.901015228426396, | |
| "grad_norm": 2.499900700135605, | |
| "learning_rate": 3.127814310846453e-06, | |
| "loss": 0.7367, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.9035532994923858, | |
| "grad_norm": 2.097745954293279, | |
| "learning_rate": 3.1151117235782346e-06, | |
| "loss": 0.7415, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.9060913705583755, | |
| "grad_norm": 4.193177406705227, | |
| "learning_rate": 3.102423301286288e-06, | |
| "loss": 0.7195, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.9086294416243654, | |
| "grad_norm": 4.1333003833317665, | |
| "learning_rate": 3.089749139324383e-06, | |
| "loss": 0.7775, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.9111675126903553, | |
| "grad_norm": 2.732619968521544, | |
| "learning_rate": 3.0770893329391207e-06, | |
| "loss": 0.7035, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.9137055837563453, | |
| "grad_norm": 2.3159844015784654, | |
| "learning_rate": 3.06444397726922e-06, | |
| "loss": 0.7481, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.9162436548223352, | |
| "grad_norm": 2.3696071469298134, | |
| "learning_rate": 3.051813167344807e-06, | |
| "loss": 0.6548, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.9187817258883249, | |
| "grad_norm": 3.067435702712297, | |
| "learning_rate": 3.0391969980866874e-06, | |
| "loss": 0.6184, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.9213197969543148, | |
| "grad_norm": 2.327610059779149, | |
| "learning_rate": 3.026595564305649e-06, | |
| "loss": 0.577, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.9238578680203045, | |
| "grad_norm": 2.0617755977820207, | |
| "learning_rate": 3.0140089607017386e-06, | |
| "loss": 0.7574, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.9263959390862944, | |
| "grad_norm": 8.037980133671185, | |
| "learning_rate": 3.001437281863558e-06, | |
| "loss": 0.6502, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.9289340101522843, | |
| "grad_norm": 2.753352962706752, | |
| "learning_rate": 2.988880622267544e-06, | |
| "loss": 0.8281, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.9314720812182742, | |
| "grad_norm": 2.4771490810702947, | |
| "learning_rate": 2.976339076277265e-06, | |
| "loss": 0.7763, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.934010152284264, | |
| "grad_norm": 3.702930187531345, | |
| "learning_rate": 2.963812738142713e-06, | |
| "loss": 0.7555, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.9365482233502538, | |
| "grad_norm": 3.811516534289345, | |
| "learning_rate": 2.9513017019995892e-06, | |
| "loss": 0.7605, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.9390862944162437, | |
| "grad_norm": 2.870096463585816, | |
| "learning_rate": 2.938806061868603e-06, | |
| "loss": 0.7558, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.9416243654822334, | |
| "grad_norm": 3.341968457840719, | |
| "learning_rate": 2.9263259116547606e-06, | |
| "loss": 0.7995, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.9441624365482233, | |
| "grad_norm": 2.3250437330227034, | |
| "learning_rate": 2.9138613451466625e-06, | |
| "loss": 0.7095, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.9467005076142132, | |
| "grad_norm": 4.089011850638278, | |
| "learning_rate": 2.901412456015794e-06, | |
| "loss": 0.7799, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.9492385786802031, | |
| "grad_norm": 5.39145678330308, | |
| "learning_rate": 2.8889793378158284e-06, | |
| "loss": 0.7432, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.951776649746193, | |
| "grad_norm": 2.5757496001839058, | |
| "learning_rate": 2.8765620839819173e-06, | |
| "loss": 0.7705, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.9543147208121827, | |
| "grad_norm": 2.1532580815403324, | |
| "learning_rate": 2.864160787829994e-06, | |
| "loss": 0.6409, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.9568527918781726, | |
| "grad_norm": 2.40066418251008, | |
| "learning_rate": 2.8517755425560665e-06, | |
| "loss": 0.6311, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.9593908629441623, | |
| "grad_norm": 2.3586685801917646, | |
| "learning_rate": 2.8394064412355237e-06, | |
| "loss": 0.7186, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.9619289340101522, | |
| "grad_norm": 4.490275405727021, | |
| "learning_rate": 2.8270535768224306e-06, | |
| "loss": 0.6813, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.9644670050761421, | |
| "grad_norm": 5.464180958902756, | |
| "learning_rate": 2.814717042148827e-06, | |
| "loss": 0.8316, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.967005076142132, | |
| "grad_norm": 3.0045365799520285, | |
| "learning_rate": 2.802396929924042e-06, | |
| "loss": 0.6874, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.969543147208122, | |
| "grad_norm": 2.099521271054753, | |
| "learning_rate": 2.790093332733983e-06, | |
| "loss": 0.6727, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.9720812182741116, | |
| "grad_norm": 2.7219964531890986, | |
| "learning_rate": 2.7778063430404544e-06, | |
| "loss": 0.6503, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.9746192893401016, | |
| "grad_norm": 5.930105066525641, | |
| "learning_rate": 2.765536053180447e-06, | |
| "loss": 0.6837, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.9771573604060912, | |
| "grad_norm": 2.353332363904023, | |
| "learning_rate": 2.7532825553654565e-06, | |
| "loss": 0.6135, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.9796954314720812, | |
| "grad_norm": 1.8537629771923843, | |
| "learning_rate": 2.7410459416807856e-06, | |
| "loss": 0.7775, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.982233502538071, | |
| "grad_norm": 2.0438002285632533, | |
| "learning_rate": 2.7288263040848483e-06, | |
| "loss": 0.6534, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.984771573604061, | |
| "grad_norm": 2.0013275878832713, | |
| "learning_rate": 2.7166237344084883e-06, | |
| "loss": 0.9252, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.987309644670051, | |
| "grad_norm": 2.262360303216243, | |
| "learning_rate": 2.7044383243542804e-06, | |
| "loss": 0.7165, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.9898477157360406, | |
| "grad_norm": 2.8760412225109246, | |
| "learning_rate": 2.692270165495846e-06, | |
| "loss": 0.7468, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.9923857868020305, | |
| "grad_norm": 2.9100081502109614, | |
| "learning_rate": 2.6801193492771633e-06, | |
| "loss": 0.7086, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.9949238578680202, | |
| "grad_norm": 4.9781125187661255, | |
| "learning_rate": 2.6679859670118785e-06, | |
| "loss": 0.7527, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.99746192893401, | |
| "grad_norm": 2.927258479320422, | |
| "learning_rate": 2.6558701098826233e-06, | |
| "loss": 0.9075, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.313238030479698, | |
| "learning_rate": 2.643771868940327e-06, | |
| "loss": 0.5394, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 2.00253807106599, | |
| "grad_norm": 3.116665312104337, | |
| "learning_rate": 2.6316913351035313e-06, | |
| "loss": 0.5184, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 2.00507614213198, | |
| "grad_norm": 2.5385445062740546, | |
| "learning_rate": 2.6196285991577107e-06, | |
| "loss": 0.495, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.0076142131979697, | |
| "grad_norm": 2.9811678625183924, | |
| "learning_rate": 2.607583751754589e-06, | |
| "loss": 0.4928, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 2.010152284263959, | |
| "grad_norm": 2.337923779288377, | |
| "learning_rate": 2.5955568834114523e-06, | |
| "loss": 0.6939, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 2.012690355329949, | |
| "grad_norm": 6.989523174369998, | |
| "learning_rate": 2.58354808451048e-06, | |
| "loss": 0.7023, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 2.015228426395939, | |
| "grad_norm": 2.9832031040895943, | |
| "learning_rate": 2.571557445298055e-06, | |
| "loss": 0.51, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 2.017766497461929, | |
| "grad_norm": 3.6733364494132603, | |
| "learning_rate": 2.5595850558840908e-06, | |
| "loss": 0.4982, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 2.020304568527919, | |
| "grad_norm": 4.175046814149058, | |
| "learning_rate": 2.5476310062413544e-06, | |
| "loss": 0.5748, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 2.0228426395939088, | |
| "grad_norm": 2.7364939660482466, | |
| "learning_rate": 2.5356953862047894e-06, | |
| "loss": 0.5673, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 2.0253807106598987, | |
| "grad_norm": 2.4199170445654925, | |
| "learning_rate": 2.523778285470835e-06, | |
| "loss": 0.5405, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 2.027918781725888, | |
| "grad_norm": 2.7021906853383055, | |
| "learning_rate": 2.5118797935967643e-06, | |
| "loss": 0.4834, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 2.030456852791878, | |
| "grad_norm": 3.547205567316223, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.5416, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.032994923857868, | |
| "grad_norm": 3.0646162357404747, | |
| "learning_rate": 2.4881389939574524e-06, | |
| "loss": 0.6144, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 2.035532994923858, | |
| "grad_norm": 2.8896201736006466, | |
| "learning_rate": 2.4762968646048357e-06, | |
| "loss": 0.5462, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 2.0380710659898478, | |
| "grad_norm": 4.352165532233693, | |
| "learning_rate": 2.464473700936008e-06, | |
| "loss": 0.6707, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 2.0406091370558377, | |
| "grad_norm": 3.074843740914918, | |
| "learning_rate": 2.452669591802307e-06, | |
| "loss": 0.5552, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 2.0431472081218276, | |
| "grad_norm": 3.2593449644153405, | |
| "learning_rate": 2.4408846259118613e-06, | |
| "loss": 0.578, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 2.045685279187817, | |
| "grad_norm": 2.775527110324298, | |
| "learning_rate": 2.429118891828949e-06, | |
| "loss": 0.6023, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 2.048223350253807, | |
| "grad_norm": 3.3719031747934833, | |
| "learning_rate": 2.4173724779733153e-06, | |
| "loss": 0.5919, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 2.050761421319797, | |
| "grad_norm": 5.862091426284267, | |
| "learning_rate": 2.4056454726195166e-06, | |
| "loss": 0.618, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 2.053299492385787, | |
| "grad_norm": 2.8984948188584245, | |
| "learning_rate": 2.3939379638962513e-06, | |
| "loss": 0.4007, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 2.0558375634517767, | |
| "grad_norm": 2.393540340028062, | |
| "learning_rate": 2.3822500397857016e-06, | |
| "loss": 0.5127, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.0583756345177666, | |
| "grad_norm": 4.320212096479848, | |
| "learning_rate": 2.370581788122871e-06, | |
| "loss": 0.5212, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 2.0609137055837565, | |
| "grad_norm": 3.5308347197793486, | |
| "learning_rate": 2.3589332965949234e-06, | |
| "loss": 0.5261, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 2.063451776649746, | |
| "grad_norm": 3.1936209119502728, | |
| "learning_rate": 2.3473046527405273e-06, | |
| "loss": 0.5722, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 2.065989847715736, | |
| "grad_norm": 2.4389080171771442, | |
| "learning_rate": 2.33569594394919e-06, | |
| "loss": 0.6258, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 2.068527918781726, | |
| "grad_norm": 4.494023471618723, | |
| "learning_rate": 2.3241072574606105e-06, | |
| "loss": 0.427, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 2.0710659898477157, | |
| "grad_norm": 11.249234559465428, | |
| "learning_rate": 2.3125386803640188e-06, | |
| "loss": 0.52, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 2.0736040609137056, | |
| "grad_norm": 2.3608749757852965, | |
| "learning_rate": 2.300990299597525e-06, | |
| "loss": 0.5496, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 2.0761421319796955, | |
| "grad_norm": 3.0310274663393924, | |
| "learning_rate": 2.28946220194746e-06, | |
| "loss": 0.5861, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 2.0786802030456855, | |
| "grad_norm": 13.351538653185711, | |
| "learning_rate": 2.2779544740477276e-06, | |
| "loss": 0.7389, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 2.081218274111675, | |
| "grad_norm": 3.9122053899709437, | |
| "learning_rate": 2.266467202379154e-06, | |
| "loss": 0.4761, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.083756345177665, | |
| "grad_norm": 2.208921812478111, | |
| "learning_rate": 2.2550004732688312e-06, | |
| "loss": 0.4685, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 2.0862944162436547, | |
| "grad_norm": 2.591217022415893, | |
| "learning_rate": 2.243554372889479e-06, | |
| "loss": 0.6088, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 2.0888324873096447, | |
| "grad_norm": 2.8280555926678885, | |
| "learning_rate": 2.232128987258791e-06, | |
| "loss": 0.5576, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 2.0913705583756346, | |
| "grad_norm": 6.603890432169106, | |
| "learning_rate": 2.220724402238786e-06, | |
| "loss": 0.5969, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 2.0939086294416245, | |
| "grad_norm": 3.3341288390032746, | |
| "learning_rate": 2.2093407035351695e-06, | |
| "loss": 0.4989, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 2.0964467005076144, | |
| "grad_norm": 3.124499128405431, | |
| "learning_rate": 2.197977976696683e-06, | |
| "loss": 0.5676, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 2.098984771573604, | |
| "grad_norm": 3.4057044744978944, | |
| "learning_rate": 2.186636307114466e-06, | |
| "loss": 0.6385, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 2.1015228426395938, | |
| "grad_norm": 2.5789067088419677, | |
| "learning_rate": 2.1753157800214107e-06, | |
| "loss": 0.5224, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 2.1040609137055837, | |
| "grad_norm": 4.824453982320905, | |
| "learning_rate": 2.1640164804915235e-06, | |
| "loss": 0.5814, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 2.1065989847715736, | |
| "grad_norm": 2.3147558116459397, | |
| "learning_rate": 2.1527384934392865e-06, | |
| "loss": 0.4466, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.1091370558375635, | |
| "grad_norm": 3.3719742417701193, | |
| "learning_rate": 2.141481903619016e-06, | |
| "loss": 0.5856, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 2.1116751269035534, | |
| "grad_norm": 6.540941962641488, | |
| "learning_rate": 2.130246795624229e-06, | |
| "loss": 0.6698, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 2.1142131979695433, | |
| "grad_norm": 4.684636553430604, | |
| "learning_rate": 2.1190332538870055e-06, | |
| "loss": 0.5059, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 2.116751269035533, | |
| "grad_norm": 2.5793765377182094, | |
| "learning_rate": 2.1078413626773547e-06, | |
| "loss": 0.4879, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 2.1192893401015227, | |
| "grad_norm": 3.9524644895450747, | |
| "learning_rate": 2.096671206102582e-06, | |
| "loss": 0.6381, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 2.1218274111675126, | |
| "grad_norm": 2.9411715631403506, | |
| "learning_rate": 2.0855228681066585e-06, | |
| "loss": 0.6114, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 2.1243654822335025, | |
| "grad_norm": 2.1866657400102953, | |
| "learning_rate": 2.074396432469582e-06, | |
| "loss": 0.5324, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 2.1269035532994924, | |
| "grad_norm": 3.2222804642260585, | |
| "learning_rate": 2.063291982806759e-06, | |
| "loss": 0.5614, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 2.1294416243654823, | |
| "grad_norm": 3.2229524875080227, | |
| "learning_rate": 2.0522096025683696e-06, | |
| "loss": 0.5169, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 2.1319796954314723, | |
| "grad_norm": 2.429958118668916, | |
| "learning_rate": 2.0411493750387423e-06, | |
| "loss": 0.5079, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.1345177664974617, | |
| "grad_norm": 2.8612184869702912, | |
| "learning_rate": 2.0301113833357267e-06, | |
| "loss": 0.5712, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 2.1370558375634516, | |
| "grad_norm": 5.2526677276961555, | |
| "learning_rate": 2.0190957104100696e-06, | |
| "loss": 0.7002, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 2.1395939086294415, | |
| "grad_norm": 2.6846288863362995, | |
| "learning_rate": 2.0081024390447894e-06, | |
| "loss": 0.6155, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 2.1421319796954315, | |
| "grad_norm": 4.160029352630203, | |
| "learning_rate": 1.9971316518545652e-06, | |
| "loss": 0.6612, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 2.1446700507614214, | |
| "grad_norm": 2.457195652872765, | |
| "learning_rate": 1.9861834312850955e-06, | |
| "loss": 0.5828, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 2.1472081218274113, | |
| "grad_norm": 4.014662686653015, | |
| "learning_rate": 1.9752578596124955e-06, | |
| "loss": 0.6141, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 2.149746192893401, | |
| "grad_norm": 4.450005915541498, | |
| "learning_rate": 1.964355018942675e-06, | |
| "loss": 0.5587, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 2.152284263959391, | |
| "grad_norm": 2.8253377333126726, | |
| "learning_rate": 1.953474991210717e-06, | |
| "loss": 0.6734, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 2.1548223350253806, | |
| "grad_norm": 2.935991080321399, | |
| "learning_rate": 1.942617858180267e-06, | |
| "loss": 0.5352, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 2.1573604060913705, | |
| "grad_norm": 3.358221489855652, | |
| "learning_rate": 1.9317837014429135e-06, | |
| "loss": 0.5821, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.1598984771573604, | |
| "grad_norm": 2.9236610806200924, | |
| "learning_rate": 1.9209726024175807e-06, | |
| "loss": 0.5073, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 2.1624365482233503, | |
| "grad_norm": 2.4928791939237724, | |
| "learning_rate": 1.9101846423499113e-06, | |
| "loss": 0.5512, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 2.16497461928934, | |
| "grad_norm": 4.34545853472978, | |
| "learning_rate": 1.8994199023116617e-06, | |
| "loss": 0.5374, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 2.16751269035533, | |
| "grad_norm": 2.269126405099248, | |
| "learning_rate": 1.8886784632000827e-06, | |
| "loss": 0.5079, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 2.1700507614213196, | |
| "grad_norm": 3.8382124318789774, | |
| "learning_rate": 1.8779604057373234e-06, | |
| "loss": 0.4831, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 2.1725888324873095, | |
| "grad_norm": 3.0041853611664973, | |
| "learning_rate": 1.8672658104698178e-06, | |
| "loss": 0.5322, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 2.1751269035532994, | |
| "grad_norm": 2.418167929638173, | |
| "learning_rate": 1.856594757767684e-06, | |
| "loss": 0.5232, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 2.1776649746192893, | |
| "grad_norm": 2.730297885338874, | |
| "learning_rate": 1.8459473278241125e-06, | |
| "loss": 0.597, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 2.1802030456852792, | |
| "grad_norm": 5.087457491162711, | |
| "learning_rate": 1.8353236006547697e-06, | |
| "loss": 0.636, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 2.182741116751269, | |
| "grad_norm": 3.51557324596564, | |
| "learning_rate": 1.8247236560971986e-06, | |
| "loss": 0.607, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.185279187817259, | |
| "grad_norm": 6.790823174251955, | |
| "learning_rate": 1.8141475738102088e-06, | |
| "loss": 0.5617, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 2.187817258883249, | |
| "grad_norm": 3.273590099732762, | |
| "learning_rate": 1.803595433273289e-06, | |
| "loss": 0.5327, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 2.1903553299492384, | |
| "grad_norm": 2.858230950098354, | |
| "learning_rate": 1.7930673137860044e-06, | |
| "loss": 0.5177, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 2.1928934010152283, | |
| "grad_norm": 4.2865689479573135, | |
| "learning_rate": 1.7825632944674016e-06, | |
| "loss": 0.5136, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 2.1954314720812182, | |
| "grad_norm": 3.2188012772359826, | |
| "learning_rate": 1.7720834542554133e-06, | |
| "loss": 0.5637, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 2.197969543147208, | |
| "grad_norm": 8.719196870847238, | |
| "learning_rate": 1.7616278719062664e-06, | |
| "loss": 0.6844, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 2.200507614213198, | |
| "grad_norm": 4.04579650838903, | |
| "learning_rate": 1.751196625993888e-06, | |
| "loss": 0.5141, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 2.203045685279188, | |
| "grad_norm": 11.660414848243596, | |
| "learning_rate": 1.7407897949093184e-06, | |
| "loss": 0.5526, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 2.2055837563451774, | |
| "grad_norm": 2.9105610061958296, | |
| "learning_rate": 1.730407456860118e-06, | |
| "loss": 0.4903, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 2.2081218274111674, | |
| "grad_norm": 2.8496411326189874, | |
| "learning_rate": 1.7200496898697832e-06, | |
| "loss": 0.489, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.2106598984771573, | |
| "grad_norm": 6.120183294524192, | |
| "learning_rate": 1.7097165717771574e-06, | |
| "loss": 0.584, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 2.213197969543147, | |
| "grad_norm": 5.938159912914938, | |
| "learning_rate": 1.6994081802358464e-06, | |
| "loss": 0.5253, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 2.215736040609137, | |
| "grad_norm": 14.924835199243644, | |
| "learning_rate": 1.6891245927136368e-06, | |
| "loss": 0.5136, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 2.218274111675127, | |
| "grad_norm": 2.9503220087744557, | |
| "learning_rate": 1.6788658864919121e-06, | |
| "loss": 0.5102, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 2.220812182741117, | |
| "grad_norm": 6.73128614516956, | |
| "learning_rate": 1.6686321386650711e-06, | |
| "loss": 0.4889, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 2.223350253807107, | |
| "grad_norm": 3.1684587237311983, | |
| "learning_rate": 1.6584234261399535e-06, | |
| "loss": 0.5409, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 2.2258883248730963, | |
| "grad_norm": 2.4359330853449133, | |
| "learning_rate": 1.648239825635251e-06, | |
| "loss": 0.4786, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 2.228426395939086, | |
| "grad_norm": 6.6099362518324805, | |
| "learning_rate": 1.6380814136809442e-06, | |
| "loss": 0.5253, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 2.230964467005076, | |
| "grad_norm": 2.6729424419883707, | |
| "learning_rate": 1.6279482666177194e-06, | |
| "loss": 0.6108, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 2.233502538071066, | |
| "grad_norm": 3.0490641589625476, | |
| "learning_rate": 1.6178404605963965e-06, | |
| "loss": 0.5215, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.236040609137056, | |
| "grad_norm": 2.8438807078533794, | |
| "learning_rate": 1.6077580715773566e-06, | |
| "loss": 0.5095, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 2.238578680203046, | |
| "grad_norm": 2.922067738649112, | |
| "learning_rate": 1.5977011753299726e-06, | |
| "loss": 0.5084, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 2.2411167512690353, | |
| "grad_norm": 7.290805478826787, | |
| "learning_rate": 1.5876698474320368e-06, | |
| "loss": 0.531, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 2.2436548223350252, | |
| "grad_norm": 2.6342457019012144, | |
| "learning_rate": 1.5776641632691969e-06, | |
| "loss": 0.5642, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 2.246192893401015, | |
| "grad_norm": 2.2984543518164045, | |
| "learning_rate": 1.5676841980343854e-06, | |
| "loss": 0.5731, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 2.248730964467005, | |
| "grad_norm": 4.565973268522213, | |
| "learning_rate": 1.5577300267272583e-06, | |
| "loss": 0.6255, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 2.251269035532995, | |
| "grad_norm": 2.3144475105877476, | |
| "learning_rate": 1.5478017241536286e-06, | |
| "loss": 0.5928, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 2.253807106598985, | |
| "grad_norm": 8.823232328391466, | |
| "learning_rate": 1.5378993649249053e-06, | |
| "loss": 0.5253, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 2.2563451776649748, | |
| "grad_norm": 5.6654920156900115, | |
| "learning_rate": 1.5280230234575323e-06, | |
| "loss": 0.4534, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 2.2588832487309647, | |
| "grad_norm": 3.331386361906362, | |
| "learning_rate": 1.518172773972431e-06, | |
| "loss": 0.5013, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.261421319796954, | |
| "grad_norm": 3.7280229939433505, | |
| "learning_rate": 1.5083486904944388e-06, | |
| "loss": 0.5985, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 2.263959390862944, | |
| "grad_norm": 4.310349475560709, | |
| "learning_rate": 1.498550846851759e-06, | |
| "loss": 0.7753, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.266497461928934, | |
| "grad_norm": 2.6757480326038623, | |
| "learning_rate": 1.4887793166753944e-06, | |
| "loss": 0.6183, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 2.269035532994924, | |
| "grad_norm": 3.0512411111957016, | |
| "learning_rate": 1.4790341733986085e-06, | |
| "loss": 0.5618, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.271573604060914, | |
| "grad_norm": 2.524907186256771, | |
| "learning_rate": 1.4693154902563644e-06, | |
| "loss": 0.6436, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 2.2741116751269037, | |
| "grad_norm": 3.2128246126669207, | |
| "learning_rate": 1.4596233402847747e-06, | |
| "loss": 0.4731, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.276649746192893, | |
| "grad_norm": 2.608459261288969, | |
| "learning_rate": 1.4499577963205601e-06, | |
| "loss": 0.5055, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 2.279187817258883, | |
| "grad_norm": 6.430159511139313, | |
| "learning_rate": 1.4403189310004917e-06, | |
| "loss": 0.4387, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.281725888324873, | |
| "grad_norm": 2.1689415980620272, | |
| "learning_rate": 1.4307068167608506e-06, | |
| "loss": 0.4445, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 2.284263959390863, | |
| "grad_norm": 2.9547978217502817, | |
| "learning_rate": 1.4211215258368866e-06, | |
| "loss": 0.5166, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.286802030456853, | |
| "grad_norm": 2.7536924318067464, | |
| "learning_rate": 1.4115631302622645e-06, | |
| "loss": 0.4962, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 2.2893401015228427, | |
| "grad_norm": 2.4788684838036312, | |
| "learning_rate": 1.4020317018685364e-06, | |
| "loss": 0.5943, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.2918781725888326, | |
| "grad_norm": 2.4029703503544524, | |
| "learning_rate": 1.3925273122845933e-06, | |
| "loss": 0.6502, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 2.2944162436548226, | |
| "grad_norm": 2.5929113598327262, | |
| "learning_rate": 1.3830500329361295e-06, | |
| "loss": 0.4675, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.296954314720812, | |
| "grad_norm": 5.059656988070416, | |
| "learning_rate": 1.3735999350451047e-06, | |
| "loss": 0.5883, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 2.299492385786802, | |
| "grad_norm": 2.146040859938093, | |
| "learning_rate": 1.3641770896292083e-06, | |
| "loss": 0.6194, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.302030456852792, | |
| "grad_norm": 2.9986918388588215, | |
| "learning_rate": 1.3547815675013292e-06, | |
| "loss": 0.6045, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 2.3045685279187818, | |
| "grad_norm": 9.578347963285362, | |
| "learning_rate": 1.34541343926902e-06, | |
| "loss": 0.5556, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.3071065989847717, | |
| "grad_norm": 2.3226554091713787, | |
| "learning_rate": 1.3360727753339665e-06, | |
| "loss": 0.5353, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 2.3096446700507616, | |
| "grad_norm": 3.0191445124573124, | |
| "learning_rate": 1.3267596458914617e-06, | |
| "loss": 0.478, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.312182741116751, | |
| "grad_norm": 2.3138830524059117, | |
| "learning_rate": 1.3174741209298747e-06, | |
| "loss": 0.4937, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 2.314720812182741, | |
| "grad_norm": 2.6600407713277585, | |
| "learning_rate": 1.3082162702301276e-06, | |
| "loss": 0.6677, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.317258883248731, | |
| "grad_norm": 2.2466655935165254, | |
| "learning_rate": 1.2989861633651685e-06, | |
| "loss": 0.5972, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 2.3197969543147208, | |
| "grad_norm": 2.8945889792174593, | |
| "learning_rate": 1.2897838696994507e-06, | |
| "loss": 0.6878, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.3223350253807107, | |
| "grad_norm": 5.824487867348564, | |
| "learning_rate": 1.2806094583884115e-06, | |
| "loss": 0.5068, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 2.3248730964467006, | |
| "grad_norm": 3.253456602349523, | |
| "learning_rate": 1.2714629983779514e-06, | |
| "loss": 0.4579, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.3274111675126905, | |
| "grad_norm": 2.0950832497861978, | |
| "learning_rate": 1.262344558403913e-06, | |
| "loss": 0.5605, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 2.3299492385786804, | |
| "grad_norm": 4.266155555762764, | |
| "learning_rate": 1.2532542069915722e-06, | |
| "loss": 0.5819, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.33248730964467, | |
| "grad_norm": 3.9318894536968374, | |
| "learning_rate": 1.2441920124551166e-06, | |
| "loss": 0.5636, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 2.33502538071066, | |
| "grad_norm": 3.4644680747535324, | |
| "learning_rate": 1.2351580428971348e-06, | |
| "loss": 0.453, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.3375634517766497, | |
| "grad_norm": 7.165287296686917, | |
| "learning_rate": 1.2261523662081044e-06, | |
| "loss": 0.4424, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 2.3401015228426396, | |
| "grad_norm": 3.2849863508234773, | |
| "learning_rate": 1.2171750500658802e-06, | |
| "loss": 0.47, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.3426395939086295, | |
| "grad_norm": 3.6210630428685007, | |
| "learning_rate": 1.2082261619351888e-06, | |
| "loss": 0.5103, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.3451776649746194, | |
| "grad_norm": 3.1975974186492033, | |
| "learning_rate": 1.1993057690671174e-06, | |
| "loss": 0.5004, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.347715736040609, | |
| "grad_norm": 3.7031056763500936, | |
| "learning_rate": 1.1904139384986123e-06, | |
| "loss": 0.6611, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.350253807106599, | |
| "grad_norm": 2.4956480369490106, | |
| "learning_rate": 1.181550737051973e-06, | |
| "loss": 0.4588, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.3527918781725887, | |
| "grad_norm": 4.633911698168668, | |
| "learning_rate": 1.172716231334351e-06, | |
| "loss": 0.5715, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.3553299492385786, | |
| "grad_norm": 3.540866963095373, | |
| "learning_rate": 1.1639104877372475e-06, | |
| "loss": 0.5164, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.3578680203045685, | |
| "grad_norm": 2.915116721864807, | |
| "learning_rate": 1.1551335724360174e-06, | |
| "loss": 0.6323, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.3604060913705585, | |
| "grad_norm": 2.8606673835042424, | |
| "learning_rate": 1.1463855513893695e-06, | |
| "loss": 0.6057, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.3629441624365484, | |
| "grad_norm": 2.337064691161368, | |
| "learning_rate": 1.1376664903388711e-06, | |
| "loss": 0.5347, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.3654822335025383, | |
| "grad_norm": 8.581970877336293, | |
| "learning_rate": 1.128976454808457e-06, | |
| "loss": 0.6829, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.3680203045685277, | |
| "grad_norm": 3.2613702450095396, | |
| "learning_rate": 1.1203155101039293e-06, | |
| "loss": 0.4777, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.3705583756345177, | |
| "grad_norm": 6.487751635876781, | |
| "learning_rate": 1.111683721312477e-06, | |
| "loss": 0.5744, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.3730964467005076, | |
| "grad_norm": 4.368595166619369, | |
| "learning_rate": 1.10308115330218e-06, | |
| "loss": 0.5109, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.3756345177664975, | |
| "grad_norm": 3.4530686993130306, | |
| "learning_rate": 1.0945078707215224e-06, | |
| "loss": 0.4937, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.3781725888324874, | |
| "grad_norm": 2.9525714062800215, | |
| "learning_rate": 1.0859639379989113e-06, | |
| "loss": 0.5571, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.3807106598984773, | |
| "grad_norm": 3.128139822413409, | |
| "learning_rate": 1.0774494193421842e-06, | |
| "loss": 0.5093, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.3832487309644668, | |
| "grad_norm": 2.6341548262769483, | |
| "learning_rate": 1.0689643787381359e-06, | |
| "loss": 0.4875, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.3857868020304567, | |
| "grad_norm": 2.866892196074003, | |
| "learning_rate": 1.060508879952028e-06, | |
| "loss": 0.5049, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.3883248730964466, | |
| "grad_norm": 2.4087256253802756, | |
| "learning_rate": 1.0520829865271177e-06, | |
| "loss": 0.5916, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.3908629441624365, | |
| "grad_norm": 4.157497736307402, | |
| "learning_rate": 1.0436867617841768e-06, | |
| "loss": 0.4625, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.3934010152284264, | |
| "grad_norm": 3.1654196498309446, | |
| "learning_rate": 1.0353202688210169e-06, | |
| "loss": 0.6251, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.3959390862944163, | |
| "grad_norm": 5.217619527759743, | |
| "learning_rate": 1.0269835705120134e-06, | |
| "loss": 0.5803, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.3984771573604062, | |
| "grad_norm": 2.691020661991825, | |
| "learning_rate": 1.018676729507636e-06, | |
| "loss": 0.6559, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.401015228426396, | |
| "grad_norm": 2.311428676461019, | |
| "learning_rate": 1.0103998082339738e-06, | |
| "loss": 0.4166, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.4035532994923856, | |
| "grad_norm": 2.6077485330856405, | |
| "learning_rate": 1.0021528688922705e-06, | |
| "loss": 0.5724, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.4060913705583755, | |
| "grad_norm": 4.218759486510066, | |
| "learning_rate": 9.939359734584552e-07, | |
| "loss": 0.5034, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.4086294416243654, | |
| "grad_norm": 3.824090997578029, | |
| "learning_rate": 9.857491836826704e-07, | |
| "loss": 0.7422, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.4111675126903553, | |
| "grad_norm": 3.0368671180326103, | |
| "learning_rate": 9.775925610888243e-07, | |
| "loss": 0.5311, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.4137055837563453, | |
| "grad_norm": 2.205826825038038, | |
| "learning_rate": 9.694661669741102e-07, | |
| "loss": 0.6092, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.416243654822335, | |
| "grad_norm": 2.9931354115283257, | |
| "learning_rate": 9.613700624085564e-07, | |
| "loss": 0.5511, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.4187817258883246, | |
| "grad_norm": 2.604908851382699, | |
| "learning_rate": 9.533043082345644e-07, | |
| "loss": 0.6022, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.4213197969543145, | |
| "grad_norm": 2.7757352275956007, | |
| "learning_rate": 9.452689650664515e-07, | |
| "loss": 0.5459, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.4238578680203045, | |
| "grad_norm": 2.9681544482964095, | |
| "learning_rate": 9.372640932899962e-07, | |
| "loss": 0.5155, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.4263959390862944, | |
| "grad_norm": 2.821374797475668, | |
| "learning_rate": 9.292897530619843e-07, | |
| "loss": 0.5359, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.4289340101522843, | |
| "grad_norm": 6.320399959191023, | |
| "learning_rate": 9.213460043097533e-07, | |
| "loss": 0.551, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.431472081218274, | |
| "grad_norm": 2.7724376062249263, | |
| "learning_rate": 9.134329067307485e-07, | |
| "loss": 0.6175, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.434010152284264, | |
| "grad_norm": 2.6764330813108947, | |
| "learning_rate": 9.0555051979207e-07, | |
| "loss": 0.473, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.436548223350254, | |
| "grad_norm": 2.730873477751639, | |
| "learning_rate": 8.976989027300265e-07, | |
| "loss": 0.5239, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.4390862944162435, | |
| "grad_norm": 5.344549869935821, | |
| "learning_rate": 8.898781145496898e-07, | |
| "loss": 0.521, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.4416243654822334, | |
| "grad_norm": 3.1454862125612038, | |
| "learning_rate": 8.820882140244541e-07, | |
| "loss": 0.5779, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.4441624365482233, | |
| "grad_norm": 3.458054376324759, | |
| "learning_rate": 8.743292596955894e-07, | |
| "loss": 0.6853, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.446700507614213, | |
| "grad_norm": 2.481931965861367, | |
| "learning_rate": 8.666013098718068e-07, | |
| "loss": 0.6854, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.449238578680203, | |
| "grad_norm": 2.532848059098252, | |
| "learning_rate": 8.589044226288157e-07, | |
| "loss": 0.6088, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.451776649746193, | |
| "grad_norm": 2.8979752935507515, | |
| "learning_rate": 8.512386558088919e-07, | |
| "loss": 0.5605, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.4543147208121825, | |
| "grad_norm": 2.413856110480652, | |
| "learning_rate": 8.436040670204382e-07, | |
| "loss": 0.6307, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.4568527918781724, | |
| "grad_norm": 2.6122764787309567, | |
| "learning_rate": 8.360007136375553e-07, | |
| "loss": 0.4842, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.4593908629441623, | |
| "grad_norm": 4.235121776718993, | |
| "learning_rate": 8.284286527996094e-07, | |
| "loss": 0.5184, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.4619289340101522, | |
| "grad_norm": 3.45109626188492, | |
| "learning_rate": 8.208879414108006e-07, | |
| "loss": 0.6053, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.464467005076142, | |
| "grad_norm": 2.6257857510140252, | |
| "learning_rate": 8.1337863613974e-07, | |
| "loss": 0.5008, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.467005076142132, | |
| "grad_norm": 2.3264527378512696, | |
| "learning_rate": 8.059007934190194e-07, | |
| "loss": 0.5054, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.469543147208122, | |
| "grad_norm": 4.073372597574256, | |
| "learning_rate": 7.984544694447871e-07, | |
| "loss": 0.5902, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.472081218274112, | |
| "grad_norm": 2.5118544268541974, | |
| "learning_rate": 7.910397201763309e-07, | |
| "loss": 0.619, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.4746192893401013, | |
| "grad_norm": 2.669293871398301, | |
| "learning_rate": 7.836566013356523e-07, | |
| "loss": 0.5093, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.4771573604060912, | |
| "grad_norm": 2.840349366691089, | |
| "learning_rate": 7.763051684070477e-07, | |
| "loss": 0.5888, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.479695431472081, | |
| "grad_norm": 2.9032925993956504, | |
| "learning_rate": 7.689854766366972e-07, | |
| "loss": 0.427, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.482233502538071, | |
| "grad_norm": 3.3473807928759816, | |
| "learning_rate": 7.61697581032243e-07, | |
| "loss": 0.5261, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.484771573604061, | |
| "grad_norm": 2.3723924084628027, | |
| "learning_rate": 7.544415363623792e-07, | |
| "loss": 0.6448, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.487309644670051, | |
| "grad_norm": 2.736374696720529, | |
| "learning_rate": 7.472173971564361e-07, | |
| "loss": 0.4803, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.489847715736041, | |
| "grad_norm": 3.637118886170699, | |
| "learning_rate": 7.400252177039785e-07, | |
| "loss": 0.5371, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.4923857868020303, | |
| "grad_norm": 3.0756471804049856, | |
| "learning_rate": 7.328650520543906e-07, | |
| "loss": 0.507, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.49492385786802, | |
| "grad_norm": 4.1622186651564235, | |
| "learning_rate": 7.257369540164727e-07, | |
| "loss": 0.5822, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.49746192893401, | |
| "grad_norm": 2.9186699671335603, | |
| "learning_rate": 7.186409771580355e-07, | |
| "loss": 0.4696, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 2.3325874106320756, | |
| "learning_rate": 7.115771748054995e-07, | |
| "loss": 0.4986, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.50253807106599, | |
| "grad_norm": 2.731412272019914, | |
| "learning_rate": 7.045456000434925e-07, | |
| "loss": 0.4617, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.50507614213198, | |
| "grad_norm": 6.200544247742595, | |
| "learning_rate": 6.97546305714451e-07, | |
| "loss": 0.5896, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.5076142131979697, | |
| "grad_norm": 2.423999317326736, | |
| "learning_rate": 6.905793444182257e-07, | |
| "loss": 0.4819, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.5101522842639596, | |
| "grad_norm": 3.3721509568099406, | |
| "learning_rate": 6.83644768511677e-07, | |
| "loss": 0.6536, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.512690355329949, | |
| "grad_norm": 3.0729278287610757, | |
| "learning_rate": 6.76742630108298e-07, | |
| "loss": 0.4877, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.515228426395939, | |
| "grad_norm": 2.7655715334433157, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 0.5287, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.517766497461929, | |
| "grad_norm": 5.420361267055273, | |
| "learning_rate": 6.630358730457648e-07, | |
| "loss": 0.5142, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.520304568527919, | |
| "grad_norm": 2.2241447721743004, | |
| "learning_rate": 6.562313573931867e-07, | |
| "loss": 0.5397, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.5228426395939088, | |
| "grad_norm": 2.286123538701192, | |
| "learning_rate": 6.494594852561559e-07, | |
| "loss": 0.4858, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.525380710659898, | |
| "grad_norm": 2.507215871577876, | |
| "learning_rate": 6.42720307525439e-07, | |
| "loss": 0.6809, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.527918781725888, | |
| "grad_norm": 2.483754095566079, | |
| "learning_rate": 6.360138748461015e-07, | |
| "loss": 0.5784, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.530456852791878, | |
| "grad_norm": 3.22268003866779, | |
| "learning_rate": 6.293402376171298e-07, | |
| "loss": 0.6292, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.532994923857868, | |
| "grad_norm": 3.8860117411826525, | |
| "learning_rate": 6.22699445991054e-07, | |
| "loss": 0.4521, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.535532994923858, | |
| "grad_norm": 4.434057239061569, | |
| "learning_rate": 6.160915498735664e-07, | |
| "loss": 0.5582, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.5380710659898478, | |
| "grad_norm": 7.565664182710548, | |
| "learning_rate": 6.0951659892315e-07, | |
| "loss": 0.511, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.5406091370558377, | |
| "grad_norm": 2.1821928037343463, | |
| "learning_rate": 6.029746425507032e-07, | |
| "loss": 0.6412, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.5431472081218276, | |
| "grad_norm": 4.229105673009159, | |
| "learning_rate": 5.964657299191712e-07, | |
| "loss": 0.5368, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.5456852791878175, | |
| "grad_norm": 2.2429582646631254, | |
| "learning_rate": 5.899899099431716e-07, | |
| "loss": 0.4948, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.548223350253807, | |
| "grad_norm": 2.184367924546276, | |
| "learning_rate": 5.835472312886342e-07, | |
| "loss": 0.5617, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.550761421319797, | |
| "grad_norm": 2.5036433489360266, | |
| "learning_rate": 5.771377423724272e-07, | |
| "loss": 0.5757, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.553299492385787, | |
| "grad_norm": 2.599186092121524, | |
| "learning_rate": 5.707614913619991e-07, | |
| "loss": 0.6208, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.5558375634517767, | |
| "grad_norm": 3.383363675548512, | |
| "learning_rate": 5.644185261750151e-07, | |
| "loss": 0.6874, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.5583756345177666, | |
| "grad_norm": 3.0495382632934103, | |
| "learning_rate": 5.581088944789953e-07, | |
| "loss": 0.4623, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.560913705583756, | |
| "grad_norm": 2.825397606512342, | |
| "learning_rate": 5.518326436909599e-07, | |
| "loss": 0.6549, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.563451776649746, | |
| "grad_norm": 3.0335585318562512, | |
| "learning_rate": 5.455898209770682e-07, | |
| "loss": 0.6025, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.565989847715736, | |
| "grad_norm": 4.290032799679505, | |
| "learning_rate": 5.393804732522695e-07, | |
| "loss": 0.4766, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.568527918781726, | |
| "grad_norm": 4.641552208588503, | |
| "learning_rate": 5.332046471799468e-07, | |
| "loss": 0.5483, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.5710659898477157, | |
| "grad_norm": 4.309109302142682, | |
| "learning_rate": 5.270623891715659e-07, | |
| "loss": 0.7154, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.5736040609137056, | |
| "grad_norm": 3.083159554412698, | |
| "learning_rate": 5.20953745386329e-07, | |
| "loss": 0.4923, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.5761421319796955, | |
| "grad_norm": 3.160768286522952, | |
| "learning_rate": 5.148787617308271e-07, | |
| "loss": 0.5154, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.5786802030456855, | |
| "grad_norm": 2.512698193443565, | |
| "learning_rate": 5.088374838586924e-07, | |
| "loss": 0.5437, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.5812182741116754, | |
| "grad_norm": 2.7151353977857227, | |
| "learning_rate": 5.028299571702622e-07, | |
| "loss": 0.4243, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.583756345177665, | |
| "grad_norm": 2.6177818738112246, | |
| "learning_rate": 4.968562268122285e-07, | |
| "loss": 0.528, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.5862944162436547, | |
| "grad_norm": 2.6170555417311636, | |
| "learning_rate": 4.909163376773046e-07, | |
| "loss": 0.5059, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.5888324873096447, | |
| "grad_norm": 2.9877171855142555, | |
| "learning_rate": 4.850103344038853e-07, | |
| "loss": 0.597, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.5913705583756346, | |
| "grad_norm": 3.51833599501728, | |
| "learning_rate": 4.791382613757139e-07, | |
| "loss": 0.5726, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.5939086294416245, | |
| "grad_norm": 3.138702316023805, | |
| "learning_rate": 4.7330016272154665e-07, | |
| "loss": 0.4273, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.596446700507614, | |
| "grad_norm": 3.1926684107783037, | |
| "learning_rate": 4.6749608231482113e-07, | |
| "loss": 0.3683, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.598984771573604, | |
| "grad_norm": 2.7769255659459984, | |
| "learning_rate": 4.6172606377332785e-07, | |
| "loss": 0.5339, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.6015228426395938, | |
| "grad_norm": 2.8899240615730157, | |
| "learning_rate": 4.5599015045888096e-07, | |
| "loss": 0.4951, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.6040609137055837, | |
| "grad_norm": 2.292725750412371, | |
| "learning_rate": 4.502883854769935e-07, | |
| "loss": 0.571, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.6065989847715736, | |
| "grad_norm": 3.5657269907131965, | |
| "learning_rate": 4.446208116765532e-07, | |
| "loss": 0.5531, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.6091370558375635, | |
| "grad_norm": 3.164676305535399, | |
| "learning_rate": 4.389874716495013e-07, | |
| "loss": 0.5276, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.6116751269035534, | |
| "grad_norm": 2.432956704979966, | |
| "learning_rate": 4.333884077305062e-07, | |
| "loss": 0.4574, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.6142131979695433, | |
| "grad_norm": 2.957712204580237, | |
| "learning_rate": 4.2782366199665917e-07, | |
| "loss": 0.5386, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.6167512690355332, | |
| "grad_norm": 4.230756909710852, | |
| "learning_rate": 4.222932762671428e-07, | |
| "loss": 0.5932, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.6192893401015227, | |
| "grad_norm": 5.653532698485646, | |
| "learning_rate": 4.167972921029262e-07, | |
| "loss": 0.4788, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.6218274111675126, | |
| "grad_norm": 2.6961243222320808, | |
| "learning_rate": 4.113357508064492e-07, | |
| "loss": 0.6741, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.6243654822335025, | |
| "grad_norm": 2.7983676395323775, | |
| "learning_rate": 4.059086934213141e-07, | |
| "loss": 0.4202, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.6269035532994924, | |
| "grad_norm": 3.9618667349834094, | |
| "learning_rate": 4.005161607319746e-07, | |
| "loss": 0.6864, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.6294416243654823, | |
| "grad_norm": 3.5271934230244923, | |
| "learning_rate": 3.9515819326343017e-07, | |
| "loss": 0.4295, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.631979695431472, | |
| "grad_norm": 6.627886770611666, | |
| "learning_rate": 3.898348312809225e-07, | |
| "loss": 0.6542, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.6345177664974617, | |
| "grad_norm": 2.989736226872522, | |
| "learning_rate": 3.8454611478963235e-07, | |
| "loss": 0.5271, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.6370558375634516, | |
| "grad_norm": 2.693964153812789, | |
| "learning_rate": 3.792920835343794e-07, | |
| "loss": 0.5553, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.6395939086294415, | |
| "grad_norm": 4.193853141678041, | |
| "learning_rate": 3.7407277699932187e-07, | |
| "loss": 0.4709, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.6421319796954315, | |
| "grad_norm": 3.640169779501388, | |
| "learning_rate": 3.688882344076622e-07, | |
| "loss": 0.536, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.6446700507614214, | |
| "grad_norm": 2.2708939549023004, | |
| "learning_rate": 3.637384947213496e-07, | |
| "loss": 0.4799, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.6472081218274113, | |
| "grad_norm": 2.8336682475235793, | |
| "learning_rate": 3.5862359664079026e-07, | |
| "loss": 0.5862, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.649746192893401, | |
| "grad_norm": 3.0122729813814657, | |
| "learning_rate": 3.535435786045538e-07, | |
| "loss": 0.5927, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.652284263959391, | |
| "grad_norm": 3.9233115175621607, | |
| "learning_rate": 3.484984787890855e-07, | |
| "loss": 0.4916, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.6548223350253806, | |
| "grad_norm": 2.1508006487042395, | |
| "learning_rate": 3.434883351084212e-07, | |
| "loss": 0.5636, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.6573604060913705, | |
| "grad_norm": 2.3147748243714057, | |
| "learning_rate": 3.385131852138979e-07, | |
| "loss": 0.5902, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.6598984771573604, | |
| "grad_norm": 3.4618558128201937, | |
| "learning_rate": 3.335730664938758e-07, | |
| "loss": 0.5067, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.6624365482233503, | |
| "grad_norm": 2.5681555150355653, | |
| "learning_rate": 3.286680160734534e-07, | |
| "loss": 0.5044, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.66497461928934, | |
| "grad_norm": 2.5460990508281274, | |
| "learning_rate": 3.237980708141919e-07, | |
| "loss": 0.6435, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.6675126903553297, | |
| "grad_norm": 2.847346337059293, | |
| "learning_rate": 3.1896326731383596e-07, | |
| "loss": 0.4842, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.6700507614213196, | |
| "grad_norm": 2.9472866736988235, | |
| "learning_rate": 3.1416364190603734e-07, | |
| "loss": 0.5303, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.6725888324873095, | |
| "grad_norm": 2.7050299480086752, | |
| "learning_rate": 3.0939923066008517e-07, | |
| "loss": 0.5928, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.6751269035532994, | |
| "grad_norm": 2.4390180219689483, | |
| "learning_rate": 3.046700693806337e-07, | |
| "loss": 0.5757, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.6776649746192893, | |
| "grad_norm": 7.749480947114099, | |
| "learning_rate": 2.99976193607433e-07, | |
| "loss": 0.5894, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.6802030456852792, | |
| "grad_norm": 3.7341604385626073, | |
| "learning_rate": 2.9531763861505967e-07, | |
| "loss": 0.6333, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.682741116751269, | |
| "grad_norm": 3.2249839563418874, | |
| "learning_rate": 2.9069443941265764e-07, | |
| "loss": 0.5734, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.685279187817259, | |
| "grad_norm": 3.6993587505420256, | |
| "learning_rate": 2.8610663074366773e-07, | |
| "loss": 0.5485, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.687817258883249, | |
| "grad_norm": 2.8224525408738375, | |
| "learning_rate": 2.8155424708557365e-07, | |
| "loss": 0.5519, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.6903553299492384, | |
| "grad_norm": 2.7593290213685466, | |
| "learning_rate": 2.770373226496342e-07, | |
| "loss": 0.4702, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.6928934010152283, | |
| "grad_norm": 2.1562630932218627, | |
| "learning_rate": 2.725558913806364e-07, | |
| "loss": 0.5119, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.6954314720812182, | |
| "grad_norm": 2.7385572688548043, | |
| "learning_rate": 2.681099869566328e-07, | |
| "loss": 0.4392, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.697969543147208, | |
| "grad_norm": 3.819660887407419, | |
| "learning_rate": 2.6369964278869174e-07, | |
| "loss": 0.607, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.700507614213198, | |
| "grad_norm": 2.3352928692496877, | |
| "learning_rate": 2.5932489202064535e-07, | |
| "loss": 0.5592, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.703045685279188, | |
| "grad_norm": 2.564229225426386, | |
| "learning_rate": 2.5498576752884087e-07, | |
| "loss": 0.6144, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.7055837563451774, | |
| "grad_norm": 3.773405104057773, | |
| "learning_rate": 2.506823019218918e-07, | |
| "loss": 0.5583, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.7081218274111674, | |
| "grad_norm": 3.3735100724103084, | |
| "learning_rate": 2.464145275404367e-07, | |
| "loss": 0.6441, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.7106598984771573, | |
| "grad_norm": 9.223286176926033, | |
| "learning_rate": 2.4218247645689306e-07, | |
| "loss": 0.5076, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.713197969543147, | |
| "grad_norm": 2.4484463385198474, | |
| "learning_rate": 2.3798618047521372e-07, | |
| "loss": 0.6121, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.715736040609137, | |
| "grad_norm": 5.020879232183135, | |
| "learning_rate": 2.338256711306569e-07, | |
| "loss": 0.5001, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.718274111675127, | |
| "grad_norm": 4.845875804435141, | |
| "learning_rate": 2.2970097968953996e-07, | |
| "loss": 0.4871, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.720812182741117, | |
| "grad_norm": 2.721208919370908, | |
| "learning_rate": 2.2561213714900775e-07, | |
| "loss": 0.5706, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.723350253807107, | |
| "grad_norm": 2.5900068662341096, | |
| "learning_rate": 2.2155917423680063e-07, | |
| "loss": 0.5944, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.7258883248730963, | |
| "grad_norm": 2.7454739994015225, | |
| "learning_rate": 2.175421214110235e-07, | |
| "loss": 0.5512, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.728426395939086, | |
| "grad_norm": 2.1388171014946877, | |
| "learning_rate": 2.1356100885991605e-07, | |
| "loss": 0.5571, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.730964467005076, | |
| "grad_norm": 3.171163810394876, | |
| "learning_rate": 2.0961586650162348e-07, | |
| "loss": 0.4753, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.733502538071066, | |
| "grad_norm": 2.0827428970199593, | |
| "learning_rate": 2.0570672398397716e-07, | |
| "loss": 0.6027, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.736040609137056, | |
| "grad_norm": 2.665595206273622, | |
| "learning_rate": 2.0183361068426778e-07, | |
| "loss": 0.4947, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.738578680203046, | |
| "grad_norm": 2.693865043507274, | |
| "learning_rate": 1.9799655570902576e-07, | |
| "loss": 0.5106, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.7411167512690353, | |
| "grad_norm": 2.3800902578530474, | |
| "learning_rate": 1.941955878938029e-07, | |
| "loss": 0.7141, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.7436548223350252, | |
| "grad_norm": 2.777013918634036, | |
| "learning_rate": 1.9043073580295445e-07, | |
| "loss": 0.5651, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.746192893401015, | |
| "grad_norm": 2.9611036549232117, | |
| "learning_rate": 1.867020277294257e-07, | |
| "loss": 0.5357, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.748730964467005, | |
| "grad_norm": 5.735049614775947, | |
| "learning_rate": 1.830094916945385e-07, | |
| "loss": 0.5207, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.751269035532995, | |
| "grad_norm": 2.401365336192109, | |
| "learning_rate": 1.7935315544778064e-07, | |
| "loss": 0.4534, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.753807106598985, | |
| "grad_norm": 3.4850018370045133, | |
| "learning_rate": 1.757330464665996e-07, | |
| "loss": 0.4799, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.7563451776649748, | |
| "grad_norm": 6.477937909442904, | |
| "learning_rate": 1.721491919561913e-07, | |
| "loss": 0.6103, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.7588832487309647, | |
| "grad_norm": 2.272537968806992, | |
| "learning_rate": 1.686016188493017e-07, | |
| "loss": 0.4658, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.761421319796954, | |
| "grad_norm": 3.5140457515001513, | |
| "learning_rate": 1.650903538060189e-07, | |
| "loss": 0.4613, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.763959390862944, | |
| "grad_norm": 2.9061043312498094, | |
| "learning_rate": 1.6161542321357526e-07, | |
| "loss": 0.7196, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.766497461928934, | |
| "grad_norm": 2.0962438282475198, | |
| "learning_rate": 1.581768531861505e-07, | |
| "loss": 0.4196, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.769035532994924, | |
| "grad_norm": 2.3922675748643774, | |
| "learning_rate": 1.5477466956467345e-07, | |
| "loss": 0.4667, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.771573604060914, | |
| "grad_norm": 3.0462687101587367, | |
| "learning_rate": 1.514088979166256e-07, | |
| "loss": 0.5318, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.7741116751269037, | |
| "grad_norm": 5.670334178126804, | |
| "learning_rate": 1.480795635358556e-07, | |
| "loss": 0.58, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.776649746192893, | |
| "grad_norm": 2.6536444394772767, | |
| "learning_rate": 1.4478669144238345e-07, | |
| "loss": 0.5004, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.779187817258883, | |
| "grad_norm": 2.628505699299408, | |
| "learning_rate": 1.4153030638221377e-07, | |
| "loss": 0.5458, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.781725888324873, | |
| "grad_norm": 7.357134641271442, | |
| "learning_rate": 1.3831043282715007e-07, | |
| "loss": 0.5301, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.784263959390863, | |
| "grad_norm": 5.709242282448729, | |
| "learning_rate": 1.3512709497461417e-07, | |
| "loss": 0.5423, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 2.786802030456853, | |
| "grad_norm": 2.560755061605383, | |
| "learning_rate": 1.3198031674745814e-07, | |
| "loss": 0.4878, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.7893401015228427, | |
| "grad_norm": 7.521968288073685, | |
| "learning_rate": 1.2887012179378822e-07, | |
| "loss": 0.706, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 2.7918781725888326, | |
| "grad_norm": 5.23634159896722, | |
| "learning_rate": 1.2579653348678666e-07, | |
| "loss": 0.5018, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.7944162436548226, | |
| "grad_norm": 2.779118492134684, | |
| "learning_rate": 1.2275957492453695e-07, | |
| "loss": 0.4316, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 2.796954314720812, | |
| "grad_norm": 6.466855188925993, | |
| "learning_rate": 1.1975926892984768e-07, | |
| "loss": 0.5546, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.799492385786802, | |
| "grad_norm": 2.395408752537727, | |
| "learning_rate": 1.1679563805008453e-07, | |
| "loss": 0.5074, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 2.802030456852792, | |
| "grad_norm": 3.3570845960467066, | |
| "learning_rate": 1.138687045569975e-07, | |
| "loss": 0.5307, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.8045685279187818, | |
| "grad_norm": 3.59995031210504, | |
| "learning_rate": 1.1097849044655496e-07, | |
| "loss": 0.4549, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.8071065989847717, | |
| "grad_norm": 4.307551357849026, | |
| "learning_rate": 1.0812501743877824e-07, | |
| "loss": 0.5132, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.8096446700507616, | |
| "grad_norm": 3.4899798506842132, | |
| "learning_rate": 1.053083069775801e-07, | |
| "loss": 0.4659, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 2.812182741116751, | |
| "grad_norm": 3.347922930728175, | |
| "learning_rate": 1.0252838023059985e-07, | |
| "loss": 0.579, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.814720812182741, | |
| "grad_norm": 2.748810524512811, | |
| "learning_rate": 9.978525808904738e-08, | |
| "loss": 0.4253, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 2.817258883248731, | |
| "grad_norm": 2.80515401589405, | |
| "learning_rate": 9.70789611675449e-08, | |
| "loss": 0.6182, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.8197969543147208, | |
| "grad_norm": 3.2009134732352704, | |
| "learning_rate": 9.440950980397268e-08, | |
| "loss": 0.4551, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 2.8223350253807107, | |
| "grad_norm": 4.606365951182387, | |
| "learning_rate": 9.177692405931637e-08, | |
| "loss": 0.6177, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.8248730964467006, | |
| "grad_norm": 3.267084225461773, | |
| "learning_rate": 8.918122371751381e-08, | |
| "loss": 0.4555, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 2.8274111675126905, | |
| "grad_norm": 2.537425408509751, | |
| "learning_rate": 8.662242828530953e-08, | |
| "loss": 0.5387, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.8299492385786804, | |
| "grad_norm": 7.031905715818571, | |
| "learning_rate": 8.410055699210718e-08, | |
| "loss": 0.452, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.8324873096446703, | |
| "grad_norm": 2.89545139077652, | |
| "learning_rate": 8.161562878982399e-08, | |
| "loss": 0.4785, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.83502538071066, | |
| "grad_norm": 3.5675178609176323, | |
| "learning_rate": 7.916766235274931e-08, | |
| "loss": 0.5354, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 2.8375634517766497, | |
| "grad_norm": 2.287121692564442, | |
| "learning_rate": 7.675667607740356e-08, | |
| "loss": 0.5081, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.8401015228426396, | |
| "grad_norm": 4.284144020649474, | |
| "learning_rate": 7.438268808240167e-08, | |
| "loss": 0.5649, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 2.8426395939086295, | |
| "grad_norm": 2.6441521888851427, | |
| "learning_rate": 7.204571620831436e-08, | |
| "loss": 0.5411, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.8451776649746194, | |
| "grad_norm": 2.544897882073251, | |
| "learning_rate": 6.974577801753591e-08, | |
| "loss": 0.4644, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 2.847715736040609, | |
| "grad_norm": 3.543692852658327, | |
| "learning_rate": 6.74828907941516e-08, | |
| "loss": 0.6353, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.850253807106599, | |
| "grad_norm": 6.663293623437551, | |
| "learning_rate": 6.52570715438089e-08, | |
| "loss": 0.5767, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 2.8527918781725887, | |
| "grad_norm": 4.196734730879769, | |
| "learning_rate": 6.306833699358694e-08, | |
| "loss": 0.4889, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.8553299492385786, | |
| "grad_norm": 2.8129447744269807, | |
| "learning_rate": 6.09167035918734e-08, | |
| "loss": 0.4871, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.8578680203045685, | |
| "grad_norm": 2.4298809321794606, | |
| "learning_rate": 5.880218750823952e-08, | |
| "loss": 0.5245, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.8604060913705585, | |
| "grad_norm": 2.514214664332499, | |
| "learning_rate": 5.672480463332075e-08, | |
| "loss": 0.5704, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 2.8629441624365484, | |
| "grad_norm": 2.523678806401503, | |
| "learning_rate": 5.468457057869358e-08, | |
| "loss": 0.5769, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.8654822335025383, | |
| "grad_norm": 6.062005054394934, | |
| "learning_rate": 5.268150067676114e-08, | |
| "loss": 0.4635, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 2.868020304568528, | |
| "grad_norm": 5.487134260248954, | |
| "learning_rate": 5.071560998063774e-08, | |
| "loss": 0.4576, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.8705583756345177, | |
| "grad_norm": 3.969605616924183, | |
| "learning_rate": 4.8786913264033955e-08, | |
| "loss": 0.5012, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 2.8730964467005076, | |
| "grad_norm": 6.889570976912757, | |
| "learning_rate": 4.6895425021147856e-08, | |
| "loss": 0.4497, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.8756345177664975, | |
| "grad_norm": 2.4515069445669457, | |
| "learning_rate": 4.5041159466554516e-08, | |
| "loss": 0.538, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 2.8781725888324874, | |
| "grad_norm": 5.727328009407996, | |
| "learning_rate": 4.322413053509944e-08, | |
| "loss": 0.4938, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.8807106598984773, | |
| "grad_norm": 5.63850483058095, | |
| "learning_rate": 4.14443518817953e-08, | |
| "loss": 0.6336, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.8832487309644668, | |
| "grad_norm": 4.543104033206608, | |
| "learning_rate": 3.970183688171592e-08, | |
| "loss": 0.6856, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.8857868020304567, | |
| "grad_norm": 9.09297907029027, | |
| "learning_rate": 3.799659862990024e-08, | |
| "loss": 0.7819, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 2.8883248730964466, | |
| "grad_norm": 3.27038419301367, | |
| "learning_rate": 3.632864994125129e-08, | |
| "loss": 0.5517, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.8908629441624365, | |
| "grad_norm": 3.974886726560242, | |
| "learning_rate": 3.469800335043849e-08, | |
| "loss": 0.5948, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 2.8934010152284264, | |
| "grad_norm": 6.7615687008901935, | |
| "learning_rate": 3.31046711118066e-08, | |
| "loss": 0.534, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.8959390862944163, | |
| "grad_norm": 3.6418150113945207, | |
| "learning_rate": 3.1548665199282457e-08, | |
| "loss": 0.6924, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 2.8984771573604062, | |
| "grad_norm": 2.3784081766815444, | |
| "learning_rate": 3.002999730628342e-08, | |
| "loss": 0.5198, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.901015228426396, | |
| "grad_norm": 2.191671552701455, | |
| "learning_rate": 2.8548678845632394e-08, | |
| "loss": 0.5889, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 2.903553299492386, | |
| "grad_norm": 3.1322620752135983, | |
| "learning_rate": 2.710472094946959e-08, | |
| "loss": 0.4742, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.9060913705583755, | |
| "grad_norm": 3.5217814037906874, | |
| "learning_rate": 2.5698134469169246e-08, | |
| "loss": 0.4336, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.9086294416243654, | |
| "grad_norm": 2.7931780637489267, | |
| "learning_rate": 2.4328929975260262e-08, | |
| "loss": 0.4718, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.9111675126903553, | |
| "grad_norm": 2.960565978633607, | |
| "learning_rate": 2.2997117757344035e-08, | |
| "loss": 0.4998, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 2.9137055837563453, | |
| "grad_norm": 2.579574803704386, | |
| "learning_rate": 2.1702707824017287e-08, | |
| "loss": 0.5634, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.916243654822335, | |
| "grad_norm": 2.932826440727978, | |
| "learning_rate": 2.0445709902798817e-08, | |
| "loss": 0.4722, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 2.9187817258883246, | |
| "grad_norm": 2.479489445919581, | |
| "learning_rate": 1.9226133440056194e-08, | |
| "loss": 0.5862, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.9213197969543145, | |
| "grad_norm": 2.401202836041865, | |
| "learning_rate": 1.8043987600932512e-08, | |
| "loss": 0.6049, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 2.9238578680203045, | |
| "grad_norm": 2.922087096147672, | |
| "learning_rate": 1.6899281269279756e-08, | |
| "loss": 0.5152, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.9263959390862944, | |
| "grad_norm": 2.7792051289106614, | |
| "learning_rate": 1.5792023047589978e-08, | |
| "loss": 0.5535, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 2.9289340101522843, | |
| "grad_norm": 3.2918821693850773, | |
| "learning_rate": 1.4722221256933677e-08, | |
| "loss": 0.5192, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.931472081218274, | |
| "grad_norm": 3.0985981138740954, | |
| "learning_rate": 1.36898839368943e-08, | |
| "loss": 0.6679, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.934010152284264, | |
| "grad_norm": 2.4402483348583006, | |
| "learning_rate": 1.2695018845508278e-08, | |
| "loss": 0.5892, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.936548223350254, | |
| "grad_norm": 5.683858792689427, | |
| "learning_rate": 1.173763345920953e-08, | |
| "loss": 0.59, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 2.939086294416244, | |
| "grad_norm": 2.7239649446352794, | |
| "learning_rate": 1.0817734972768946e-08, | |
| "loss": 0.5954, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.9416243654822334, | |
| "grad_norm": 2.4708097002353853, | |
| "learning_rate": 9.935330299244427e-09, | |
| "loss": 0.5463, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 2.9441624365482233, | |
| "grad_norm": 3.2101057585416926, | |
| "learning_rate": 9.090426069925939e-09, | |
| "loss": 0.5796, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.946700507614213, | |
| "grad_norm": 5.884986825608284, | |
| "learning_rate": 8.283028634287205e-09, | |
| "loss": 0.5288, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 2.949238578680203, | |
| "grad_norm": 4.916264982936341, | |
| "learning_rate": 7.513144059937417e-09, | |
| "loss": 0.5468, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.951776649746193, | |
| "grad_norm": 2.8617024796040553, | |
| "learning_rate": 6.780778132575716e-09, | |
| "loss": 0.5463, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 2.9543147208121825, | |
| "grad_norm": 2.9735229315289256, | |
| "learning_rate": 6.085936355947897e-09, | |
| "loss": 0.5341, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.9568527918781724, | |
| "grad_norm": 2.9722625059513015, | |
| "learning_rate": 5.428623951805323e-09, | |
| "loss": 0.5023, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.9593908629441623, | |
| "grad_norm": 2.271860754785485, | |
| "learning_rate": 4.808845859864408e-09, | |
| "loss": 0.5569, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.9619289340101522, | |
| "grad_norm": 2.5277247252299997, | |
| "learning_rate": 4.226606737771643e-09, | |
| "loss": 0.5931, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 2.964467005076142, | |
| "grad_norm": 2.6218878236770746, | |
| "learning_rate": 3.6819109610658486e-09, | |
| "loss": 0.4648, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.967005076142132, | |
| "grad_norm": 4.7305366536735285, | |
| "learning_rate": 3.1747626231481977e-09, | |
| "loss": 0.6022, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 2.969543147208122, | |
| "grad_norm": 3.491277684694074, | |
| "learning_rate": 2.7051655352494654e-09, | |
| "loss": 0.5136, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.972081218274112, | |
| "grad_norm": 2.4808907499527, | |
| "learning_rate": 2.273123226401719e-09, | |
| "loss": 0.6368, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 2.974619289340102, | |
| "grad_norm": 3.8438275108188438, | |
| "learning_rate": 1.8786389434122254e-09, | |
| "loss": 0.5318, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.9771573604060912, | |
| "grad_norm": 5.372296775384867, | |
| "learning_rate": 1.5217156508390286e-09, | |
| "loss": 0.5529, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 2.979695431472081, | |
| "grad_norm": 4.5433956784200955, | |
| "learning_rate": 1.2023560309687431e-09, | |
| "loss": 0.5474, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.982233502538071, | |
| "grad_norm": 3.3272677410816933, | |
| "learning_rate": 9.205624837949068e-10, | |
| "loss": 0.6755, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.984771573604061, | |
| "grad_norm": 5.764132998482088, | |
| "learning_rate": 6.763371270035457e-10, | |
| "loss": 0.5627, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.987309644670051, | |
| "grad_norm": 2.6228604553150583, | |
| "learning_rate": 4.696817959520816e-10, | |
| "loss": 0.4996, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 2.9898477157360404, | |
| "grad_norm": 3.5774398995312136, | |
| "learning_rate": 3.005980436604494e-10, | |
| "loss": 0.7654, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.9923857868020303, | |
| "grad_norm": 2.9590286177610383, | |
| "learning_rate": 1.6908714079721944e-10, | |
| "loss": 0.6136, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 2.99492385786802, | |
| "grad_norm": 2.67848875719845, | |
| "learning_rate": 7.515007566849531e-11, | |
| "loss": 0.6381, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.99746192893401, | |
| "grad_norm": 5.3474868651106044, | |
| "learning_rate": 1.8787554214583227e-11, | |
| "loss": 0.5582, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.276042560935072, | |
| "learning_rate": 0.0, | |
| "loss": 0.468, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1182, | |
| "total_flos": 6.976297471026659e+18, | |
| "train_loss": 0.7863800849406247, | |
| "train_runtime": 8818.5969, | |
| "train_samples_per_second": 8.569, | |
| "train_steps_per_second": 0.134 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1182, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.976297471026659e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |