diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7071 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 2008, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00398406374501992, + "grad_norm": 4.348448276519775, + "learning_rate": 5.940594059405941e-08, + "loss": 2.1171607971191406, + "step": 2 + }, + { + "epoch": 0.00796812749003984, + "grad_norm": 2.55696177482605, + "learning_rate": 1.782178217821782e-07, + "loss": 2.068465232849121, + "step": 4 + }, + { + "epoch": 0.01195219123505976, + "grad_norm": 3.159899950027466, + "learning_rate": 2.9702970297029703e-07, + "loss": 2.136167287826538, + "step": 6 + }, + { + "epoch": 0.01593625498007968, + "grad_norm": 2.0796260833740234, + "learning_rate": 4.158415841584159e-07, + "loss": 1.8786698579788208, + "step": 8 + }, + { + "epoch": 0.0199203187250996, + "grad_norm": 5.41955041885376, + "learning_rate": 5.346534653465346e-07, + "loss": 1.9257912635803223, + "step": 10 + }, + { + "epoch": 0.02390438247011952, + "grad_norm": 11.406185150146484, + "learning_rate": 6.534653465346535e-07, + "loss": 2.368868827819824, + "step": 12 + }, + { + "epoch": 0.027888446215139442, + "grad_norm": 1.901093602180481, + "learning_rate": 7.722772277227723e-07, + "loss": 1.9428346157073975, + "step": 14 + }, + { + "epoch": 0.03187250996015936, + "grad_norm": 1.393601894378662, + "learning_rate": 8.910891089108911e-07, + "loss": 1.7873543500900269, + "step": 16 + }, + { + "epoch": 0.035856573705179286, + "grad_norm": 1.4436230659484863, + "learning_rate": 1.00990099009901e-06, + "loss": 1.2166668176651, + "step": 18 + }, + { + "epoch": 0.0398406374501992, + "grad_norm": 1.8145285844802856, + "learning_rate": 1.1287128712871288e-06, + "loss": 1.6057647466659546, + "step": 20 + }, + { + "epoch": 0.043824701195219126, + "grad_norm": 1.2188401222229004, + "learning_rate": 1.2475247524752474e-06, + "loss": 1.7550266981124878, + "step": 22 + }, + { + "epoch": 0.04780876494023904, + "grad_norm": 1.664843201637268, + "learning_rate": 1.3663366336633665e-06, + "loss": 1.5540839433670044, + "step": 24 + }, + { + "epoch": 0.05179282868525897, + "grad_norm": 9.42098617553711, + "learning_rate": 1.4851485148514852e-06, + "loss": 2.344756841659546, + "step": 26 + }, + { + "epoch": 0.055776892430278883, + "grad_norm": 2.532942771911621, + "learning_rate": 1.603960396039604e-06, + "loss": 1.6220295429229736, + "step": 28 + }, + { + "epoch": 0.05976095617529881, + "grad_norm": 14.82198429107666, + "learning_rate": 1.7227722772277227e-06, + "loss": 1.83803129196167, + "step": 30 + }, + { + "epoch": 0.06374501992031872, + "grad_norm": 4.736570358276367, + "learning_rate": 1.8415841584158415e-06, + "loss": 0.9668034315109253, + "step": 32 + }, + { + "epoch": 0.06772908366533864, + "grad_norm": 1.077216386795044, + "learning_rate": 1.9603960396039604e-06, + "loss": 1.5424432754516602, + "step": 34 + }, + { + "epoch": 0.07171314741035857, + "grad_norm": 2.908050060272217, + "learning_rate": 2.079207920792079e-06, + "loss": 1.578654408454895, + "step": 36 + }, + { + "epoch": 0.07569721115537849, + "grad_norm": 1.1227400302886963, + "learning_rate": 2.198019801980198e-06, + "loss": 1.4881534576416016, + "step": 38 + }, + { + "epoch": 0.0796812749003984, + "grad_norm": 1.1037142276763916, + "learning_rate": 2.316831683168317e-06, + "loss": 1.4990899562835693, + "step": 40 + }, + { + "epoch": 0.08366533864541832, + "grad_norm": 11.898150444030762, + "learning_rate": 2.4356435643564358e-06, + "loss": 0.6503542065620422, + "step": 42 + }, + { + "epoch": 0.08764940239043825, + "grad_norm": 1.8804869651794434, + "learning_rate": 2.5544554455445544e-06, + "loss": 1.5055851936340332, + "step": 44 + }, + { + "epoch": 0.09163346613545817, + "grad_norm": 1.0558547973632812, + "learning_rate": 2.6732673267326735e-06, + "loss": 1.4046030044555664, + "step": 46 + }, + { + "epoch": 0.09561752988047809, + "grad_norm": 1.6279054880142212, + "learning_rate": 2.792079207920792e-06, + "loss": 1.228537678718567, + "step": 48 + }, + { + "epoch": 0.099601593625498, + "grad_norm": 9.176322937011719, + "learning_rate": 2.9108910891089108e-06, + "loss": 1.4280906915664673, + "step": 50 + }, + { + "epoch": 0.10358565737051793, + "grad_norm": 0.7322860956192017, + "learning_rate": 3.02970297029703e-06, + "loss": 1.3861970901489258, + "step": 52 + }, + { + "epoch": 0.10756972111553785, + "grad_norm": 1.9037761688232422, + "learning_rate": 3.148514851485149e-06, + "loss": 1.8983885049819946, + "step": 54 + }, + { + "epoch": 0.11155378486055777, + "grad_norm": 2.3619227409362793, + "learning_rate": 3.2673267326732676e-06, + "loss": 1.265608549118042, + "step": 56 + }, + { + "epoch": 0.11553784860557768, + "grad_norm": 1.079222559928894, + "learning_rate": 3.3861386138613858e-06, + "loss": 1.3720718622207642, + "step": 58 + }, + { + "epoch": 0.11952191235059761, + "grad_norm": 2.0091183185577393, + "learning_rate": 3.504950495049505e-06, + "loss": 1.221197485923767, + "step": 60 + }, + { + "epoch": 0.12350597609561753, + "grad_norm": 1.6702687740325928, + "learning_rate": 3.623762376237624e-06, + "loss": 1.3659617900848389, + "step": 62 + }, + { + "epoch": 0.12749003984063745, + "grad_norm": 1.4772052764892578, + "learning_rate": 3.7425742574257425e-06, + "loss": 0.9504954218864441, + "step": 64 + }, + { + "epoch": 0.13147410358565736, + "grad_norm": 1.1839888095855713, + "learning_rate": 3.861386138613861e-06, + "loss": 1.3058485984802246, + "step": 66 + }, + { + "epoch": 0.13545816733067728, + "grad_norm": 2.274195671081543, + "learning_rate": 3.98019801980198e-06, + "loss": 1.142093300819397, + "step": 68 + }, + { + "epoch": 0.1394422310756972, + "grad_norm": 1.23581063747406, + "learning_rate": 4.099009900990099e-06, + "loss": 1.3501553535461426, + "step": 70 + }, + { + "epoch": 0.14342629482071714, + "grad_norm": 0.9609231352806091, + "learning_rate": 4.2178217821782175e-06, + "loss": 1.394975185394287, + "step": 72 + }, + { + "epoch": 0.14741035856573706, + "grad_norm": 1.0941250324249268, + "learning_rate": 4.336633663366337e-06, + "loss": 1.3037439584732056, + "step": 74 + }, + { + "epoch": 0.15139442231075698, + "grad_norm": 2.8824353218078613, + "learning_rate": 4.455445544554456e-06, + "loss": 1.0416653156280518, + "step": 76 + }, + { + "epoch": 0.1553784860557769, + "grad_norm": 0.9365191459655762, + "learning_rate": 4.574257425742575e-06, + "loss": 1.2687112092971802, + "step": 78 + }, + { + "epoch": 0.1593625498007968, + "grad_norm": 1.5261850357055664, + "learning_rate": 4.693069306930693e-06, + "loss": 1.317929744720459, + "step": 80 + }, + { + "epoch": 0.16334661354581673, + "grad_norm": 1.2774893045425415, + "learning_rate": 4.811881188118812e-06, + "loss": 1.0010876655578613, + "step": 82 + }, + { + "epoch": 0.16733067729083664, + "grad_norm": 2.2119550704956055, + "learning_rate": 4.93069306930693e-06, + "loss": 0.4978000223636627, + "step": 84 + }, + { + "epoch": 0.17131474103585656, + "grad_norm": 0.8584203124046326, + "learning_rate": 5.049504950495049e-06, + "loss": 0.5417638421058655, + "step": 86 + }, + { + "epoch": 0.1752988047808765, + "grad_norm": 1.1234948635101318, + "learning_rate": 5.168316831683168e-06, + "loss": 1.3661960363388062, + "step": 88 + }, + { + "epoch": 0.17928286852589642, + "grad_norm": 1.9819002151489258, + "learning_rate": 5.2871287128712874e-06, + "loss": 0.8402650952339172, + "step": 90 + }, + { + "epoch": 0.18326693227091634, + "grad_norm": 9.981027603149414, + "learning_rate": 5.4059405940594065e-06, + "loss": 1.0407862663269043, + "step": 92 + }, + { + "epoch": 0.18725099601593626, + "grad_norm": 5.5226335525512695, + "learning_rate": 5.524752475247525e-06, + "loss": 1.3026604652404785, + "step": 94 + }, + { + "epoch": 0.19123505976095617, + "grad_norm": 2.536931037902832, + "learning_rate": 5.643564356435644e-06, + "loss": 1.050534963607788, + "step": 96 + }, + { + "epoch": 0.1952191235059761, + "grad_norm": 2.8480377197265625, + "learning_rate": 5.762376237623762e-06, + "loss": 1.2697981595993042, + "step": 98 + }, + { + "epoch": 0.199203187250996, + "grad_norm": 1.2788549661636353, + "learning_rate": 5.881188118811881e-06, + "loss": 1.3857513666152954, + "step": 100 + }, + { + "epoch": 0.20318725099601595, + "grad_norm": 4.938348293304443, + "learning_rate": 6e-06, + "loss": 1.2503310441970825, + "step": 102 + }, + { + "epoch": 0.20717131474103587, + "grad_norm": 1.3763278722763062, + "learning_rate": 5.99998534480079e-06, + "loss": 1.316627025604248, + "step": 104 + }, + { + "epoch": 0.21115537848605578, + "grad_norm": 1.2016820907592773, + "learning_rate": 5.9999413793622525e-06, + "loss": 1.3336181640625, + "step": 106 + }, + { + "epoch": 0.2151394422310757, + "grad_norm": 2.7037742137908936, + "learning_rate": 5.9998681041616624e-06, + "loss": 0.848972737789154, + "step": 108 + }, + { + "epoch": 0.21912350597609562, + "grad_norm": 2.082820177078247, + "learning_rate": 5.999765519994475e-06, + "loss": 1.1773113012313843, + "step": 110 + }, + { + "epoch": 0.22310756972111553, + "grad_norm": 1.349158525466919, + "learning_rate": 5.999633627974312e-06, + "loss": 1.838499903678894, + "step": 112 + }, + { + "epoch": 0.22709163346613545, + "grad_norm": 1.0457987785339355, + "learning_rate": 5.9994724295329546e-06, + "loss": 1.2931954860687256, + "step": 114 + }, + { + "epoch": 0.23107569721115537, + "grad_norm": 1.0233925580978394, + "learning_rate": 5.999281926420326e-06, + "loss": 1.3657619953155518, + "step": 116 + }, + { + "epoch": 0.2350597609561753, + "grad_norm": 1.456226110458374, + "learning_rate": 5.999062120704471e-06, + "loss": 0.39271149039268494, + "step": 118 + }, + { + "epoch": 0.23904382470119523, + "grad_norm": 1.1591161489486694, + "learning_rate": 5.998813014771534e-06, + "loss": 1.283569097518921, + "step": 120 + }, + { + "epoch": 0.24302788844621515, + "grad_norm": 1.4893031120300293, + "learning_rate": 5.998534611325737e-06, + "loss": 1.3696374893188477, + "step": 122 + }, + { + "epoch": 0.24701195219123506, + "grad_norm": 1.0916317701339722, + "learning_rate": 5.998226913389344e-06, + "loss": 1.2977485656738281, + "step": 124 + }, + { + "epoch": 0.250996015936255, + "grad_norm": 1.5058797597885132, + "learning_rate": 5.997889924302632e-06, + "loss": 1.2800962924957275, + "step": 126 + }, + { + "epoch": 0.2549800796812749, + "grad_norm": 2.89294695854187, + "learning_rate": 5.997523647723856e-06, + "loss": 0.9177144169807434, + "step": 128 + }, + { + "epoch": 0.2589641434262948, + "grad_norm": 2.416161060333252, + "learning_rate": 5.997128087629205e-06, + "loss": 1.280983567237854, + "step": 130 + }, + { + "epoch": 0.26294820717131473, + "grad_norm": 1.2975496053695679, + "learning_rate": 5.996703248312762e-06, + "loss": 1.2503688335418701, + "step": 132 + }, + { + "epoch": 0.26693227091633465, + "grad_norm": 0.9795719385147095, + "learning_rate": 5.996249134386455e-06, + "loss": 1.2679003477096558, + "step": 134 + }, + { + "epoch": 0.27091633466135456, + "grad_norm": 1.4742954969406128, + "learning_rate": 5.995765750780013e-06, + "loss": 0.5531994700431824, + "step": 136 + }, + { + "epoch": 0.2749003984063745, + "grad_norm": 2.563380241394043, + "learning_rate": 5.995253102740903e-06, + "loss": 1.901612401008606, + "step": 138 + }, + { + "epoch": 0.2788844621513944, + "grad_norm": 1.4704535007476807, + "learning_rate": 5.994711195834279e-06, + "loss": 1.1717365980148315, + "step": 140 + }, + { + "epoch": 0.28286852589641437, + "grad_norm": 1.1811615228652954, + "learning_rate": 5.994140035942923e-06, + "loss": 0.7471544742584229, + "step": 142 + }, + { + "epoch": 0.2868525896414343, + "grad_norm": 1.6094988584518433, + "learning_rate": 5.993539629267178e-06, + "loss": 0.9018757939338684, + "step": 144 + }, + { + "epoch": 0.2908366533864542, + "grad_norm": 2.305218458175659, + "learning_rate": 5.992909982324879e-06, + "loss": 1.277273178100586, + "step": 146 + }, + { + "epoch": 0.2948207171314741, + "grad_norm": 3.697319746017456, + "learning_rate": 5.992251101951287e-06, + "loss": 1.0025593042373657, + "step": 148 + }, + { + "epoch": 0.29880478087649404, + "grad_norm": 1.539844036102295, + "learning_rate": 5.991562995299011e-06, + "loss": 1.3024755716323853, + "step": 150 + }, + { + "epoch": 0.30278884462151395, + "grad_norm": 1.0249600410461426, + "learning_rate": 5.990845669837933e-06, + "loss": 1.5959429740905762, + "step": 152 + }, + { + "epoch": 0.30677290836653387, + "grad_norm": 0.8561967015266418, + "learning_rate": 5.990099133355126e-06, + "loss": 1.2801433801651, + "step": 154 + }, + { + "epoch": 0.3107569721115538, + "grad_norm": 4.086156845092773, + "learning_rate": 5.989323393954767e-06, + "loss": 0.4956245422363281, + "step": 156 + }, + { + "epoch": 0.3147410358565737, + "grad_norm": 3.771010398864746, + "learning_rate": 5.988518460058054e-06, + "loss": 0.4668130576610565, + "step": 158 + }, + { + "epoch": 0.3187250996015936, + "grad_norm": 1.3703054189682007, + "learning_rate": 5.9876843404031096e-06, + "loss": 1.2212884426116943, + "step": 160 + }, + { + "epoch": 0.32270916334661354, + "grad_norm": 1.210668921470642, + "learning_rate": 5.986821044044889e-06, + "loss": 1.7916109561920166, + "step": 162 + }, + { + "epoch": 0.32669322709163345, + "grad_norm": 1.0227242708206177, + "learning_rate": 5.985928580355082e-06, + "loss": 0.8739029765129089, + "step": 164 + }, + { + "epoch": 0.33067729083665337, + "grad_norm": 2.860746383666992, + "learning_rate": 5.985006959022008e-06, + "loss": 0.4693869352340698, + "step": 166 + }, + { + "epoch": 0.3346613545816733, + "grad_norm": 1.755257487297058, + "learning_rate": 5.984056190050517e-06, + "loss": 1.324602723121643, + "step": 168 + }, + { + "epoch": 0.3386454183266932, + "grad_norm": 7.148312568664551, + "learning_rate": 5.983076283761872e-06, + "loss": 1.3821817636489868, + "step": 170 + }, + { + "epoch": 0.3426294820717131, + "grad_norm": 1.2952216863632202, + "learning_rate": 5.982067250793646e-06, + "loss": 1.2612062692642212, + "step": 172 + }, + { + "epoch": 0.3466135458167331, + "grad_norm": 1.727574348449707, + "learning_rate": 5.981029102099601e-06, + "loss": 1.341408133506775, + "step": 174 + }, + { + "epoch": 0.350597609561753, + "grad_norm": 2.543426513671875, + "learning_rate": 5.979961848949572e-06, + "loss": 0.5157387852668762, + "step": 176 + }, + { + "epoch": 0.3545816733067729, + "grad_norm": 1.489472508430481, + "learning_rate": 5.978865502929343e-06, + "loss": 1.3691034317016602, + "step": 178 + }, + { + "epoch": 0.35856573705179284, + "grad_norm": 3.3407742977142334, + "learning_rate": 5.977740075940517e-06, + "loss": 1.2798420190811157, + "step": 180 + }, + { + "epoch": 0.36254980079681276, + "grad_norm": 0.7936763763427734, + "learning_rate": 5.976585580200399e-06, + "loss": 1.2865771055221558, + "step": 182 + }, + { + "epoch": 0.3665338645418327, + "grad_norm": 1.722764492034912, + "learning_rate": 5.9754020282418505e-06, + "loss": 0.9274950623512268, + "step": 184 + }, + { + "epoch": 0.3705179282868526, + "grad_norm": 1.4277971982955933, + "learning_rate": 5.974189432913161e-06, + "loss": 1.2118057012557983, + "step": 186 + }, + { + "epoch": 0.3745019920318725, + "grad_norm": 0.7755621671676636, + "learning_rate": 5.972947807377905e-06, + "loss": 1.262542724609375, + "step": 188 + }, + { + "epoch": 0.3784860557768924, + "grad_norm": 2.0006139278411865, + "learning_rate": 5.971677165114801e-06, + "loss": 1.1163339614868164, + "step": 190 + }, + { + "epoch": 0.38247011952191234, + "grad_norm": 1.9247850179672241, + "learning_rate": 5.970377519917563e-06, + "loss": 1.0671018362045288, + "step": 192 + }, + { + "epoch": 0.38645418326693226, + "grad_norm": 1.1371593475341797, + "learning_rate": 5.969048885894754e-06, + "loss": 1.2458205223083496, + "step": 194 + }, + { + "epoch": 0.3904382470119522, + "grad_norm": 1.5814062356948853, + "learning_rate": 5.967691277469631e-06, + "loss": 1.2479208707809448, + "step": 196 + }, + { + "epoch": 0.3944223107569721, + "grad_norm": 1.3527947664260864, + "learning_rate": 5.9663047093799874e-06, + "loss": 0.46853581070899963, + "step": 198 + }, + { + "epoch": 0.398406374501992, + "grad_norm": 0.9908071160316467, + "learning_rate": 5.964889196677996e-06, + "loss": 1.2344821691513062, + "step": 200 + }, + { + "epoch": 0.40239043824701193, + "grad_norm": 0.9923727512359619, + "learning_rate": 5.9634447547300415e-06, + "loss": 1.2732172012329102, + "step": 202 + }, + { + "epoch": 0.4063745019920319, + "grad_norm": 2.537524700164795, + "learning_rate": 5.961971399216556e-06, + "loss": 1.234106183052063, + "step": 204 + }, + { + "epoch": 0.4103585657370518, + "grad_norm": 3.067852735519409, + "learning_rate": 5.960469146131851e-06, + "loss": 0.38716864585876465, + "step": 206 + }, + { + "epoch": 0.41434262948207173, + "grad_norm": 0.8039565086364746, + "learning_rate": 5.95893801178394e-06, + "loss": 1.223067045211792, + "step": 208 + }, + { + "epoch": 0.41832669322709165, + "grad_norm": 1.5125787258148193, + "learning_rate": 5.957378012794361e-06, + "loss": 0.698806881904602, + "step": 210 + }, + { + "epoch": 0.42231075697211157, + "grad_norm": 1.2418526411056519, + "learning_rate": 5.955789166098002e-06, + "loss": 0.7970227599143982, + "step": 212 + }, + { + "epoch": 0.4262948207171315, + "grad_norm": 2.7106666564941406, + "learning_rate": 5.954171488942911e-06, + "loss": 0.8325067758560181, + "step": 214 + }, + { + "epoch": 0.4302788844621514, + "grad_norm": 3.5096561908721924, + "learning_rate": 5.952524998890109e-06, + "loss": 1.1556031703948975, + "step": 216 + }, + { + "epoch": 0.4342629482071713, + "grad_norm": 1.513983130455017, + "learning_rate": 5.950849713813405e-06, + "loss": 1.263627529144287, + "step": 218 + }, + { + "epoch": 0.43824701195219123, + "grad_norm": 0.7860940098762512, + "learning_rate": 5.949145651899196e-06, + "loss": 1.2762495279312134, + "step": 220 + }, + { + "epoch": 0.44223107569721115, + "grad_norm": 1.6819899082183838, + "learning_rate": 5.947412831646271e-06, + "loss": 0.5981872081756592, + "step": 222 + }, + { + "epoch": 0.44621513944223107, + "grad_norm": 1.2630786895751953, + "learning_rate": 5.945651271865616e-06, + "loss": 1.120012879371643, + "step": 224 + }, + { + "epoch": 0.450199203187251, + "grad_norm": 0.9950310587882996, + "learning_rate": 5.943860991680195e-06, + "loss": 1.2754716873168945, + "step": 226 + }, + { + "epoch": 0.4541832669322709, + "grad_norm": 1.6684496402740479, + "learning_rate": 5.942042010524764e-06, + "loss": 0.9846575856208801, + "step": 228 + }, + { + "epoch": 0.4581673306772908, + "grad_norm": 1.4847872257232666, + "learning_rate": 5.9401943481456386e-06, + "loss": 1.2583152055740356, + "step": 230 + }, + { + "epoch": 0.46215139442231074, + "grad_norm": 0.9578908681869507, + "learning_rate": 5.9383180246004935e-06, + "loss": 1.2739794254302979, + "step": 232 + }, + { + "epoch": 0.46613545816733065, + "grad_norm": 1.1821162700653076, + "learning_rate": 5.936413060258143e-06, + "loss": 1.4074854850769043, + "step": 234 + }, + { + "epoch": 0.4701195219123506, + "grad_norm": 0.8178677558898926, + "learning_rate": 5.9344794757983115e-06, + "loss": 1.2413185834884644, + "step": 236 + }, + { + "epoch": 0.47410358565737054, + "grad_norm": 2.4166979789733887, + "learning_rate": 5.932517292211418e-06, + "loss": 1.1744059324264526, + "step": 238 + }, + { + "epoch": 0.47808764940239046, + "grad_norm": 1.1220707893371582, + "learning_rate": 5.930526530798347e-06, + "loss": 1.2574900388717651, + "step": 240 + }, + { + "epoch": 0.4820717131474104, + "grad_norm": 0.7189679741859436, + "learning_rate": 5.928507213170211e-06, + "loss": 1.2059662342071533, + "step": 242 + }, + { + "epoch": 0.4860557768924303, + "grad_norm": 1.4799033403396606, + "learning_rate": 5.926459361248125e-06, + "loss": 0.7257046103477478, + "step": 244 + }, + { + "epoch": 0.4900398406374502, + "grad_norm": 8.812633514404297, + "learning_rate": 5.9243829972629584e-06, + "loss": 1.0781515836715698, + "step": 246 + }, + { + "epoch": 0.4940239043824701, + "grad_norm": 2.5435431003570557, + "learning_rate": 5.922278143755105e-06, + "loss": 0.9890032410621643, + "step": 248 + }, + { + "epoch": 0.49800796812749004, + "grad_norm": 1.1066993474960327, + "learning_rate": 5.920144823574229e-06, + "loss": 1.275596261024475, + "step": 250 + }, + { + "epoch": 0.50199203187251, + "grad_norm": 3.8385164737701416, + "learning_rate": 5.917983059879021e-06, + "loss": 0.5777413249015808, + "step": 252 + }, + { + "epoch": 0.5059760956175299, + "grad_norm": 2.5549728870391846, + "learning_rate": 5.915792876136944e-06, + "loss": 1.2903834581375122, + "step": 254 + }, + { + "epoch": 0.5099601593625498, + "grad_norm": 1.1752848625183105, + "learning_rate": 5.913574296123985e-06, + "loss": 1.2607370615005493, + "step": 256 + }, + { + "epoch": 0.5139442231075697, + "grad_norm": 3.4985756874084473, + "learning_rate": 5.9113273439243885e-06, + "loss": 0.6077223420143127, + "step": 258 + }, + { + "epoch": 0.5179282868525896, + "grad_norm": 0.8346880674362183, + "learning_rate": 5.909052043930402e-06, + "loss": 1.2486491203308105, + "step": 260 + }, + { + "epoch": 0.5219123505976095, + "grad_norm": 1.6400198936462402, + "learning_rate": 5.9067484208420046e-06, + "loss": 0.3859616219997406, + "step": 262 + }, + { + "epoch": 0.5258964143426295, + "grad_norm": 2.0709147453308105, + "learning_rate": 5.904416499666646e-06, + "loss": 1.250545620918274, + "step": 264 + }, + { + "epoch": 0.5298804780876494, + "grad_norm": 3.2738661766052246, + "learning_rate": 5.902056305718969e-06, + "loss": 0.5132614970207214, + "step": 266 + }, + { + "epoch": 0.5338645418326693, + "grad_norm": 1.4471163749694824, + "learning_rate": 5.89966786462054e-06, + "loss": 1.2536060810089111, + "step": 268 + }, + { + "epoch": 0.5378486055776892, + "grad_norm": 2.023653030395508, + "learning_rate": 5.897251202299566e-06, + "loss": 1.7837636470794678, + "step": 270 + }, + { + "epoch": 0.5418326693227091, + "grad_norm": 0.7867792248725891, + "learning_rate": 5.894806344990614e-06, + "loss": 0.7907792329788208, + "step": 272 + }, + { + "epoch": 0.545816733067729, + "grad_norm": 0.9616872072219849, + "learning_rate": 5.892333319234332e-06, + "loss": 1.240364670753479, + "step": 274 + }, + { + "epoch": 0.549800796812749, + "grad_norm": 1.5364048480987549, + "learning_rate": 5.889832151877152e-06, + "loss": 0.6271519064903259, + "step": 276 + }, + { + "epoch": 0.5537848605577689, + "grad_norm": 1.9956889152526855, + "learning_rate": 5.887302870071004e-06, + "loss": 1.354748010635376, + "step": 278 + }, + { + "epoch": 0.5577689243027888, + "grad_norm": 3.179105043411255, + "learning_rate": 5.88474550127302e-06, + "loss": 0.7769224047660828, + "step": 280 + }, + { + "epoch": 0.5617529880478087, + "grad_norm": 2.1050288677215576, + "learning_rate": 5.882160073245238e-06, + "loss": 0.7815161347389221, + "step": 282 + }, + { + "epoch": 0.5657370517928287, + "grad_norm": 1.0835380554199219, + "learning_rate": 5.879546614054295e-06, + "loss": 1.2420227527618408, + "step": 284 + }, + { + "epoch": 0.5697211155378487, + "grad_norm": 0.9784935712814331, + "learning_rate": 5.876905152071131e-06, + "loss": 1.2437528371810913, + "step": 286 + }, + { + "epoch": 0.5737051792828686, + "grad_norm": 1.059682011604309, + "learning_rate": 5.874235715970671e-06, + "loss": 1.1747212409973145, + "step": 288 + }, + { + "epoch": 0.5776892430278885, + "grad_norm": 1.0844000577926636, + "learning_rate": 5.87153833473152e-06, + "loss": 1.2218478918075562, + "step": 290 + }, + { + "epoch": 0.5816733067729084, + "grad_norm": 1.2831990718841553, + "learning_rate": 5.868813037635649e-06, + "loss": 1.1690454483032227, + "step": 292 + }, + { + "epoch": 0.5856573705179283, + "grad_norm": 2.694718360900879, + "learning_rate": 5.866059854268076e-06, + "loss": 0.49895596504211426, + "step": 294 + }, + { + "epoch": 0.5896414342629482, + "grad_norm": 1.1014599800109863, + "learning_rate": 5.863278814516539e-06, + "loss": 1.4519755840301514, + "step": 296 + }, + { + "epoch": 0.5936254980079682, + "grad_norm": 6.0046305656433105, + "learning_rate": 5.860469948571181e-06, + "loss": 0.6872335076332092, + "step": 298 + }, + { + "epoch": 0.5976095617529881, + "grad_norm": 1.493370771408081, + "learning_rate": 5.857633286924219e-06, + "loss": 1.241629958152771, + "step": 300 + }, + { + "epoch": 0.601593625498008, + "grad_norm": 1.3740859031677246, + "learning_rate": 5.854768860369607e-06, + "loss": 1.0279847383499146, + "step": 302 + }, + { + "epoch": 0.6055776892430279, + "grad_norm": 4.5894083976745605, + "learning_rate": 5.85187670000271e-06, + "loss": 0.8594214916229248, + "step": 304 + }, + { + "epoch": 0.6095617529880478, + "grad_norm": 1.9348714351654053, + "learning_rate": 5.848956837219964e-06, + "loss": 1.1640937328338623, + "step": 306 + }, + { + "epoch": 0.6135458167330677, + "grad_norm": 3.6650631427764893, + "learning_rate": 5.846009303718529e-06, + "loss": 1.083706259727478, + "step": 308 + }, + { + "epoch": 0.6175298804780877, + "grad_norm": 0.8985078930854797, + "learning_rate": 5.8430341314959565e-06, + "loss": 1.2840549945831299, + "step": 310 + }, + { + "epoch": 0.6215139442231076, + "grad_norm": 3.3366034030914307, + "learning_rate": 5.840031352849833e-06, + "loss": 0.6729341149330139, + "step": 312 + }, + { + "epoch": 0.6254980079681275, + "grad_norm": 0.5400150418281555, + "learning_rate": 5.83700100037743e-06, + "loss": 0.9031069874763489, + "step": 314 + }, + { + "epoch": 0.6294820717131474, + "grad_norm": 0.8818338513374329, + "learning_rate": 5.833943106975355e-06, + "loss": 1.403872013092041, + "step": 316 + }, + { + "epoch": 0.6334661354581673, + "grad_norm": 0.9534677267074585, + "learning_rate": 5.830857705839191e-06, + "loss": 0.7257641553878784, + "step": 318 + }, + { + "epoch": 0.6374501992031872, + "grad_norm": 1.2703937292099, + "learning_rate": 5.8277448304631385e-06, + "loss": 1.2789297103881836, + "step": 320 + }, + { + "epoch": 0.6414342629482072, + "grad_norm": 2.5597033500671387, + "learning_rate": 5.824604514639647e-06, + "loss": 0.5666279792785645, + "step": 322 + }, + { + "epoch": 0.6454183266932271, + "grad_norm": 1.932152509689331, + "learning_rate": 5.8214367924590515e-06, + "loss": 0.9416989088058472, + "step": 324 + }, + { + "epoch": 0.649402390438247, + "grad_norm": 2.5085222721099854, + "learning_rate": 5.818241698309205e-06, + "loss": 0.9871986508369446, + "step": 326 + }, + { + "epoch": 0.6533864541832669, + "grad_norm": 0.8283513784408569, + "learning_rate": 5.8150192668751015e-06, + "loss": 1.2529672384262085, + "step": 328 + }, + { + "epoch": 0.6573705179282868, + "grad_norm": 7.669778347015381, + "learning_rate": 5.811769533138499e-06, + "loss": 0.46496719121932983, + "step": 330 + }, + { + "epoch": 0.6613545816733067, + "grad_norm": 3.1111960411071777, + "learning_rate": 5.808492532377542e-06, + "loss": 1.1308894157409668, + "step": 332 + }, + { + "epoch": 0.6653386454183267, + "grad_norm": 1.0599477291107178, + "learning_rate": 5.805188300166379e-06, + "loss": 1.1927093267440796, + "step": 334 + }, + { + "epoch": 0.6693227091633466, + "grad_norm": 0.7919442653656006, + "learning_rate": 5.801856872374772e-06, + "loss": 1.2229902744293213, + "step": 336 + }, + { + "epoch": 0.6733067729083665, + "grad_norm": 0.874751627445221, + "learning_rate": 5.798498285167714e-06, + "loss": 1.239054560661316, + "step": 338 + }, + { + "epoch": 0.6772908366533864, + "grad_norm": 3.267413854598999, + "learning_rate": 5.795112575005031e-06, + "loss": 0.5422060489654541, + "step": 340 + }, + { + "epoch": 0.6812749003984063, + "grad_norm": 0.603284478187561, + "learning_rate": 5.791699778640985e-06, + "loss": 0.5057201385498047, + "step": 342 + }, + { + "epoch": 0.6852589641434262, + "grad_norm": 1.073237419128418, + "learning_rate": 5.788259933123882e-06, + "loss": 1.212401270866394, + "step": 344 + }, + { + "epoch": 0.6892430278884463, + "grad_norm": 0.9039257168769836, + "learning_rate": 5.7847930757956626e-06, + "loss": 1.2373487949371338, + "step": 346 + }, + { + "epoch": 0.6932270916334662, + "grad_norm": 0.6864405870437622, + "learning_rate": 5.7812992442915016e-06, + "loss": 1.1827311515808105, + "step": 348 + }, + { + "epoch": 0.6972111553784861, + "grad_norm": 1.7330577373504639, + "learning_rate": 5.777778476539397e-06, + "loss": 0.7856748104095459, + "step": 350 + }, + { + "epoch": 0.701195219123506, + "grad_norm": 4.816940784454346, + "learning_rate": 5.774230810759756e-06, + "loss": 0.7216228246688843, + "step": 352 + }, + { + "epoch": 0.7051792828685259, + "grad_norm": 2.1332626342773438, + "learning_rate": 5.7706562854649866e-06, + "loss": 0.49049532413482666, + "step": 354 + }, + { + "epoch": 0.7091633466135459, + "grad_norm": 2.8059940338134766, + "learning_rate": 5.767054939459075e-06, + "loss": 1.3019351959228516, + "step": 356 + }, + { + "epoch": 0.7131474103585658, + "grad_norm": 4.427498817443848, + "learning_rate": 5.763426811837164e-06, + "loss": 0.48208871483802795, + "step": 358 + }, + { + "epoch": 0.7171314741035857, + "grad_norm": 4.743298530578613, + "learning_rate": 5.759771941985128e-06, + "loss": 1.6483818292617798, + "step": 360 + }, + { + "epoch": 0.7211155378486056, + "grad_norm": 0.8030229210853577, + "learning_rate": 5.75609036957915e-06, + "loss": 0.7936917543411255, + "step": 362 + }, + { + "epoch": 0.7250996015936255, + "grad_norm": 4.138736248016357, + "learning_rate": 5.752382134585289e-06, + "loss": 0.19702184200286865, + "step": 364 + }, + { + "epoch": 0.7290836653386454, + "grad_norm": 0.7204448580741882, + "learning_rate": 5.748647277259041e-06, + "loss": 1.3097480535507202, + "step": 366 + }, + { + "epoch": 0.7330677290836654, + "grad_norm": 0.6811744570732117, + "learning_rate": 5.744885838144908e-06, + "loss": 1.282241702079773, + "step": 368 + }, + { + "epoch": 0.7370517928286853, + "grad_norm": 1.3216296434402466, + "learning_rate": 5.741097858075958e-06, + "loss": 1.1899917125701904, + "step": 370 + }, + { + "epoch": 0.7410358565737052, + "grad_norm": 0.7291891574859619, + "learning_rate": 5.737283378173377e-06, + "loss": 1.289171576499939, + "step": 372 + }, + { + "epoch": 0.7450199203187251, + "grad_norm": 1.4926878213882446, + "learning_rate": 5.733442439846028e-06, + "loss": 0.9133517742156982, + "step": 374 + }, + { + "epoch": 0.749003984063745, + "grad_norm": 1.1999213695526123, + "learning_rate": 5.729575084789995e-06, + "loss": 1.2485815286636353, + "step": 376 + }, + { + "epoch": 0.7529880478087649, + "grad_norm": 0.4571026563644409, + "learning_rate": 5.725681354988137e-06, + "loss": 0.41173255443573, + "step": 378 + }, + { + "epoch": 0.7569721115537849, + "grad_norm": 0.9662789106369019, + "learning_rate": 5.72176129270963e-06, + "loss": 1.3222002983093262, + "step": 380 + }, + { + "epoch": 0.7609561752988048, + "grad_norm": 0.8864423036575317, + "learning_rate": 5.717814940509503e-06, + "loss": 1.2533366680145264, + "step": 382 + }, + { + "epoch": 0.7649402390438247, + "grad_norm": 1.8013001680374146, + "learning_rate": 5.713842341228187e-06, + "loss": 1.132637858390808, + "step": 384 + }, + { + "epoch": 0.7689243027888446, + "grad_norm": 1.4815607070922852, + "learning_rate": 5.70984353799104e-06, + "loss": 0.28086692094802856, + "step": 386 + }, + { + "epoch": 0.7729083665338645, + "grad_norm": 0.8467429280281067, + "learning_rate": 5.705818574207883e-06, + "loss": 1.4608538150787354, + "step": 388 + }, + { + "epoch": 0.7768924302788844, + "grad_norm": 2.4864161014556885, + "learning_rate": 5.701767493572526e-06, + "loss": 0.7464155554771423, + "step": 390 + }, + { + "epoch": 0.7808764940239044, + "grad_norm": 2.4926576614379883, + "learning_rate": 5.6976903400623e-06, + "loss": 0.5242215991020203, + "step": 392 + }, + { + "epoch": 0.7848605577689243, + "grad_norm": 3.3884170055389404, + "learning_rate": 5.693587157937572e-06, + "loss": 0.7744420766830444, + "step": 394 + }, + { + "epoch": 0.7888446215139442, + "grad_norm": 1.3466330766677856, + "learning_rate": 5.689457991741267e-06, + "loss": 0.8062616586685181, + "step": 396 + }, + { + "epoch": 0.7928286852589641, + "grad_norm": 0.8415664434432983, + "learning_rate": 5.685302886298392e-06, + "loss": 0.9788842797279358, + "step": 398 + }, + { + "epoch": 0.796812749003984, + "grad_norm": 1.0375547409057617, + "learning_rate": 5.681121886715534e-06, + "loss": 1.068263053894043, + "step": 400 + }, + { + "epoch": 0.8007968127490039, + "grad_norm": 1.184495210647583, + "learning_rate": 5.676915038380384e-06, + "loss": 0.7641897797584534, + "step": 402 + }, + { + "epoch": 0.8047808764940239, + "grad_norm": 0.5623915195465088, + "learning_rate": 5.67268238696124e-06, + "loss": 1.194584846496582, + "step": 404 + }, + { + "epoch": 0.8087649402390438, + "grad_norm": 1.6544809341430664, + "learning_rate": 5.668423978406509e-06, + "loss": 1.8557928800582886, + "step": 406 + }, + { + "epoch": 0.8127490039840638, + "grad_norm": 0.9776933193206787, + "learning_rate": 5.664139858944209e-06, + "loss": 1.157083511352539, + "step": 408 + }, + { + "epoch": 0.8167330677290837, + "grad_norm": 0.9368433356285095, + "learning_rate": 5.65983007508147e-06, + "loss": 1.1894208192825317, + "step": 410 + }, + { + "epoch": 0.8207171314741036, + "grad_norm": 1.024929165840149, + "learning_rate": 5.655494673604024e-06, + "loss": 1.2211333513259888, + "step": 412 + }, + { + "epoch": 0.8247011952191236, + "grad_norm": 0.9331441521644592, + "learning_rate": 5.651133701575706e-06, + "loss": 0.9813644289970398, + "step": 414 + }, + { + "epoch": 0.8286852589641435, + "grad_norm": 0.43455296754837036, + "learning_rate": 5.64674720633793e-06, + "loss": 0.2262841910123825, + "step": 416 + }, + { + "epoch": 0.8326693227091634, + "grad_norm": 0.9842036366462708, + "learning_rate": 5.642335235509189e-06, + "loss": 1.2737834453582764, + "step": 418 + }, + { + "epoch": 0.8366533864541833, + "grad_norm": 1.0286755561828613, + "learning_rate": 5.637897836984526e-06, + "loss": 1.2228126525878906, + "step": 420 + }, + { + "epoch": 0.8406374501992032, + "grad_norm": 0.8756253123283386, + "learning_rate": 5.633435058935023e-06, + "loss": 1.1928170919418335, + "step": 422 + }, + { + "epoch": 0.8446215139442231, + "grad_norm": 0.758901834487915, + "learning_rate": 5.628946949807274e-06, + "loss": 1.1966356039047241, + "step": 424 + }, + { + "epoch": 0.848605577689243, + "grad_norm": 2.6789400577545166, + "learning_rate": 5.624433558322859e-06, + "loss": 0.7115716338157654, + "step": 426 + }, + { + "epoch": 0.852589641434263, + "grad_norm": 1.1329255104064941, + "learning_rate": 5.619894933477816e-06, + "loss": 1.2351547479629517, + "step": 428 + }, + { + "epoch": 0.8565737051792829, + "grad_norm": 0.8669703602790833, + "learning_rate": 5.615331124542109e-06, + "loss": 1.0460853576660156, + "step": 430 + }, + { + "epoch": 0.8605577689243028, + "grad_norm": 1.4718725681304932, + "learning_rate": 5.610742181059092e-06, + "loss": 1.8136500120162964, + "step": 432 + }, + { + "epoch": 0.8645418326693227, + "grad_norm": 1.955024003982544, + "learning_rate": 5.606128152844975e-06, + "loss": 1.2090433835983276, + "step": 434 + }, + { + "epoch": 0.8685258964143426, + "grad_norm": 2.959174156188965, + "learning_rate": 5.601489089988277e-06, + "loss": 0.4959055483341217, + "step": 436 + }, + { + "epoch": 0.8725099601593626, + "grad_norm": 0.8022291660308838, + "learning_rate": 5.596825042849287e-06, + "loss": 1.2489244937896729, + "step": 438 + }, + { + "epoch": 0.8764940239043825, + "grad_norm": 0.867755651473999, + "learning_rate": 5.592136062059517e-06, + "loss": 1.187935709953308, + "step": 440 + }, + { + "epoch": 0.8804780876494024, + "grad_norm": 2.0213284492492676, + "learning_rate": 5.587422198521149e-06, + "loss": 1.6624571084976196, + "step": 442 + }, + { + "epoch": 0.8844621513944223, + "grad_norm": 1.8472967147827148, + "learning_rate": 5.582683503406488e-06, + "loss": 1.3048073053359985, + "step": 444 + }, + { + "epoch": 0.8884462151394422, + "grad_norm": 0.8281286954879761, + "learning_rate": 5.5779200281574e-06, + "loss": 1.043340802192688, + "step": 446 + }, + { + "epoch": 0.8924302788844621, + "grad_norm": 1.8063609600067139, + "learning_rate": 5.573131824484758e-06, + "loss": 0.371786892414093, + "step": 448 + }, + { + "epoch": 0.896414342629482, + "grad_norm": 0.8337019681930542, + "learning_rate": 5.56831894436788e-06, + "loss": 1.1593928337097168, + "step": 450 + }, + { + "epoch": 0.900398406374502, + "grad_norm": 0.808246374130249, + "learning_rate": 5.563481440053964e-06, + "loss": 0.8130660057067871, + "step": 452 + }, + { + "epoch": 0.9043824701195219, + "grad_norm": 0.7648867964744568, + "learning_rate": 5.55861936405752e-06, + "loss": 1.2445188760757446, + "step": 454 + }, + { + "epoch": 0.9083665338645418, + "grad_norm": 4.679040431976318, + "learning_rate": 5.5537327691598026e-06, + "loss": 0.9090757966041565, + "step": 456 + }, + { + "epoch": 0.9123505976095617, + "grad_norm": 0.8703306317329407, + "learning_rate": 5.548821708408234e-06, + "loss": 1.2912606000900269, + "step": 458 + }, + { + "epoch": 0.9163346613545816, + "grad_norm": 3.33894681930542, + "learning_rate": 5.543886235115832e-06, + "loss": 1.0427659749984741, + "step": 460 + }, + { + "epoch": 0.9203187250996016, + "grad_norm": 1.598880410194397, + "learning_rate": 5.538926402860631e-06, + "loss": 1.2816940546035767, + "step": 462 + }, + { + "epoch": 0.9243027888446215, + "grad_norm": 1.35460364818573, + "learning_rate": 5.533942265485095e-06, + "loss": 1.3399840593338013, + "step": 464 + }, + { + "epoch": 0.9282868525896414, + "grad_norm": 7.064363956451416, + "learning_rate": 5.528933877095541e-06, + "loss": 0.40876510739326477, + "step": 466 + }, + { + "epoch": 0.9322709163346613, + "grad_norm": 0.7858706712722778, + "learning_rate": 5.523901292061547e-06, + "loss": 1.1805975437164307, + "step": 468 + }, + { + "epoch": 0.9362549800796812, + "grad_norm": 8.24327278137207, + "learning_rate": 5.518844565015361e-06, + "loss": 0.38794469833374023, + "step": 470 + }, + { + "epoch": 0.9402390438247012, + "grad_norm": 0.7928199768066406, + "learning_rate": 5.51376375085131e-06, + "loss": 1.2316607236862183, + "step": 472 + }, + { + "epoch": 0.9442231075697212, + "grad_norm": 4.031145095825195, + "learning_rate": 5.508658904725206e-06, + "loss": 0.5695405602455139, + "step": 474 + }, + { + "epoch": 0.9482071713147411, + "grad_norm": 2.9237377643585205, + "learning_rate": 5.503530082053741e-06, + "loss": 0.338968962430954, + "step": 476 + }, + { + "epoch": 0.952191235059761, + "grad_norm": 0.8833221793174744, + "learning_rate": 5.498377338513894e-06, + "loss": 1.2102028131484985, + "step": 478 + }, + { + "epoch": 0.9561752988047809, + "grad_norm": 25.611223220825195, + "learning_rate": 5.493200730042317e-06, + "loss": 0.4739567041397095, + "step": 480 + }, + { + "epoch": 0.9601593625498008, + "grad_norm": 5.376172065734863, + "learning_rate": 5.488000312834735e-06, + "loss": 0.9883483648300171, + "step": 482 + }, + { + "epoch": 0.9641434262948207, + "grad_norm": 1.7662686109542847, + "learning_rate": 5.482776143345333e-06, + "loss": 1.2430894374847412, + "step": 484 + }, + { + "epoch": 0.9681274900398407, + "grad_norm": 2.5627293586730957, + "learning_rate": 5.477528278286145e-06, + "loss": 1.2240179777145386, + "step": 486 + }, + { + "epoch": 0.9721115537848606, + "grad_norm": 0.8417234420776367, + "learning_rate": 5.472256774626435e-06, + "loss": 1.1680150032043457, + "step": 488 + }, + { + "epoch": 0.9760956175298805, + "grad_norm": 0.8709147572517395, + "learning_rate": 5.4669616895920826e-06, + "loss": 1.2006162405014038, + "step": 490 + }, + { + "epoch": 0.9800796812749004, + "grad_norm": 5.11852502822876, + "learning_rate": 5.46164308066496e-06, + "loss": 0.7005679607391357, + "step": 492 + }, + { + "epoch": 0.9840637450199203, + "grad_norm": 2.7665576934814453, + "learning_rate": 5.456301005582304e-06, + "loss": 0.7001307606697083, + "step": 494 + }, + { + "epoch": 0.9880478087649402, + "grad_norm": 0.8219811320304871, + "learning_rate": 5.4509355223360956e-06, + "loss": 1.254296898841858, + "step": 496 + }, + { + "epoch": 0.9920318725099602, + "grad_norm": 1.0245788097381592, + "learning_rate": 5.445546689172432e-06, + "loss": 1.267047643661499, + "step": 498 + }, + { + "epoch": 0.9960159362549801, + "grad_norm": 1.1505917310714722, + "learning_rate": 5.440134564590883e-06, + "loss": 0.7141546010971069, + "step": 500 + }, + { + "epoch": 1.0, + "grad_norm": 6.24027681350708, + "learning_rate": 5.434699207343867e-06, + "loss": 1.0391122102737427, + "step": 502 + }, + { + "epoch": 1.00398406374502, + "grad_norm": 1.2134792804718018, + "learning_rate": 5.429240676436008e-06, + "loss": 0.7802969217300415, + "step": 504 + }, + { + "epoch": 1.0079681274900398, + "grad_norm": 1.5164703130722046, + "learning_rate": 5.423759031123498e-06, + "loss": 0.31817543506622314, + "step": 506 + }, + { + "epoch": 1.0119521912350598, + "grad_norm": 0.6141365170478821, + "learning_rate": 5.41825433091345e-06, + "loss": 1.0097558498382568, + "step": 508 + }, + { + "epoch": 1.0159362549800797, + "grad_norm": 0.8733232021331787, + "learning_rate": 5.4127266355632575e-06, + "loss": 1.0352897644042969, + "step": 510 + }, + { + "epoch": 1.0199203187250996, + "grad_norm": 2.5583245754241943, + "learning_rate": 5.407176005079938e-06, + "loss": 1.0885701179504395, + "step": 512 + }, + { + "epoch": 1.0239043824701195, + "grad_norm": 1.0007575750350952, + "learning_rate": 5.401602499719488e-06, + "loss": 1.0486167669296265, + "step": 514 + }, + { + "epoch": 1.0278884462151394, + "grad_norm": 1.1661553382873535, + "learning_rate": 5.396006179986228e-06, + "loss": 1.0347387790679932, + "step": 516 + }, + { + "epoch": 1.0318725099601593, + "grad_norm": 0.8863986134529114, + "learning_rate": 5.390387106632143e-06, + "loss": 1.0672526359558105, + "step": 518 + }, + { + "epoch": 1.0358565737051793, + "grad_norm": 2.13053035736084, + "learning_rate": 5.384745340656227e-06, + "loss": 0.8640899062156677, + "step": 520 + }, + { + "epoch": 1.0398406374501992, + "grad_norm": 2.6343281269073486, + "learning_rate": 5.379080943303814e-06, + "loss": 0.943762481212616, + "step": 522 + }, + { + "epoch": 1.043824701195219, + "grad_norm": 1.45510733127594, + "learning_rate": 5.373393976065921e-06, + "loss": 0.9649692177772522, + "step": 524 + }, + { + "epoch": 1.047808764940239, + "grad_norm": 1.4119848012924194, + "learning_rate": 5.367684500678576e-06, + "loss": 1.1445621252059937, + "step": 526 + }, + { + "epoch": 1.051792828685259, + "grad_norm": 1.0543644428253174, + "learning_rate": 5.361952579122149e-06, + "loss": 0.9114750027656555, + "step": 528 + }, + { + "epoch": 1.0557768924302788, + "grad_norm": 1.5039920806884766, + "learning_rate": 5.356198273620678e-06, + "loss": 0.8998257517814636, + "step": 530 + }, + { + "epoch": 1.0597609561752988, + "grad_norm": 2.6351239681243896, + "learning_rate": 5.350421646641195e-06, + "loss": 0.3897404074668884, + "step": 532 + }, + { + "epoch": 1.0637450199203187, + "grad_norm": 1.1779015064239502, + "learning_rate": 5.344622760893049e-06, + "loss": 1.2084486484527588, + "step": 534 + }, + { + "epoch": 1.0677290836653386, + "grad_norm": 0.50465989112854, + "learning_rate": 5.338801679327221e-06, + "loss": 0.48134946823120117, + "step": 536 + }, + { + "epoch": 1.0717131474103585, + "grad_norm": 6.834875106811523, + "learning_rate": 5.332958465135645e-06, + "loss": 0.8534721732139587, + "step": 538 + }, + { + "epoch": 1.0756972111553784, + "grad_norm": 0.8775362372398376, + "learning_rate": 5.327093181750519e-06, + "loss": 0.1745588630437851, + "step": 540 + }, + { + "epoch": 1.0796812749003983, + "grad_norm": 0.8401792049407959, + "learning_rate": 5.3212058928436175e-06, + "loss": 1.0862375497817993, + "step": 542 + }, + { + "epoch": 1.0836653386454183, + "grad_norm": 1.2075270414352417, + "learning_rate": 5.3152966623256026e-06, + "loss": 1.2837507724761963, + "step": 544 + }, + { + "epoch": 1.0876494023904382, + "grad_norm": 3.44868803024292, + "learning_rate": 5.309365554345325e-06, + "loss": 0.4348865747451782, + "step": 546 + }, + { + "epoch": 1.091633466135458, + "grad_norm": 1.060323715209961, + "learning_rate": 5.303412633289133e-06, + "loss": 0.7609821557998657, + "step": 548 + }, + { + "epoch": 1.095617529880478, + "grad_norm": 0.48030683398246765, + "learning_rate": 5.297437963780171e-06, + "loss": 0.5199949741363525, + "step": 550 + }, + { + "epoch": 1.099601593625498, + "grad_norm": 0.8254769444465637, + "learning_rate": 5.2914416106776745e-06, + "loss": 1.0883558988571167, + "step": 552 + }, + { + "epoch": 1.1035856573705178, + "grad_norm": 2.637892246246338, + "learning_rate": 5.2854236390762755e-06, + "loss": 0.48916831612586975, + "step": 554 + }, + { + "epoch": 1.1075697211155378, + "grad_norm": 1.684272050857544, + "learning_rate": 5.2793841143052855e-06, + "loss": 1.0254663228988647, + "step": 556 + }, + { + "epoch": 1.1115537848605577, + "grad_norm": 2.17739200592041, + "learning_rate": 5.273323101927994e-06, + "loss": 0.9679847359657288, + "step": 558 + }, + { + "epoch": 1.1155378486055776, + "grad_norm": 5.525514125823975, + "learning_rate": 5.26724066774095e-06, + "loss": 0.9007784128189087, + "step": 560 + }, + { + "epoch": 1.1195219123505975, + "grad_norm": 1.1246291399002075, + "learning_rate": 5.261136877773254e-06, + "loss": 1.0599032640457153, + "step": 562 + }, + { + "epoch": 1.1235059760956174, + "grad_norm": 1.811063289642334, + "learning_rate": 5.255011798285838e-06, + "loss": 1.053318738937378, + "step": 564 + }, + { + "epoch": 1.1274900398406373, + "grad_norm": 1.0067085027694702, + "learning_rate": 5.248865495770747e-06, + "loss": 1.0161441564559937, + "step": 566 + }, + { + "epoch": 1.1314741035856573, + "grad_norm": 1.653944730758667, + "learning_rate": 5.242698036950416e-06, + "loss": 1.211927890777588, + "step": 568 + }, + { + "epoch": 1.1354581673306772, + "grad_norm": 5.520211219787598, + "learning_rate": 5.236509488776946e-06, + "loss": 0.2512112259864807, + "step": 570 + }, + { + "epoch": 1.139442231075697, + "grad_norm": 0.6854221224784851, + "learning_rate": 5.230299918431381e-06, + "loss": 0.20837584137916565, + "step": 572 + }, + { + "epoch": 1.1434262948207172, + "grad_norm": 1.0965662002563477, + "learning_rate": 5.224069393322971e-06, + "loss": 0.8550689220428467, + "step": 574 + }, + { + "epoch": 1.1474103585657371, + "grad_norm": 0.5142279863357544, + "learning_rate": 5.2178179810884465e-06, + "loss": 0.5071516633033752, + "step": 576 + }, + { + "epoch": 1.151394422310757, + "grad_norm": 1.3928073644638062, + "learning_rate": 5.211545749591285e-06, + "loss": 1.1629210710525513, + "step": 578 + }, + { + "epoch": 1.155378486055777, + "grad_norm": 4.516799449920654, + "learning_rate": 5.205252766920967e-06, + "loss": 0.615897536277771, + "step": 580 + }, + { + "epoch": 1.159362549800797, + "grad_norm": 1.9076368808746338, + "learning_rate": 5.198939101392247e-06, + "loss": 0.6484902501106262, + "step": 582 + }, + { + "epoch": 1.1633466135458168, + "grad_norm": 2.9412710666656494, + "learning_rate": 5.192604821544402e-06, + "loss": 0.22438056766986847, + "step": 584 + }, + { + "epoch": 1.1673306772908367, + "grad_norm": 0.8736124038696289, + "learning_rate": 5.186249996140492e-06, + "loss": 1.1574631929397583, + "step": 586 + }, + { + "epoch": 1.1713147410358566, + "grad_norm": 1.56623375415802, + "learning_rate": 5.179874694166617e-06, + "loss": 1.0566999912261963, + "step": 588 + }, + { + "epoch": 1.1752988047808766, + "grad_norm": 3.406691551208496, + "learning_rate": 5.1734789848311635e-06, + "loss": 1.28257417678833, + "step": 590 + }, + { + "epoch": 1.1792828685258965, + "grad_norm": 1.163465976715088, + "learning_rate": 5.16706293756405e-06, + "loss": 1.0826280117034912, + "step": 592 + }, + { + "epoch": 1.1832669322709164, + "grad_norm": 3.0535504817962646, + "learning_rate": 5.160626622015983e-06, + "loss": 1.4529417753219604, + "step": 594 + }, + { + "epoch": 1.1872509960159363, + "grad_norm": 0.8099126815795898, + "learning_rate": 5.154170108057693e-06, + "loss": 1.1337939500808716, + "step": 596 + }, + { + "epoch": 1.1912350597609562, + "grad_norm": 3.8160228729248047, + "learning_rate": 5.147693465779179e-06, + "loss": 0.3046616017818451, + "step": 598 + }, + { + "epoch": 1.1952191235059761, + "grad_norm": 1.2103179693222046, + "learning_rate": 5.141196765488946e-06, + "loss": 0.8739789724349976, + "step": 600 + }, + { + "epoch": 1.199203187250996, + "grad_norm": 3.3165013790130615, + "learning_rate": 5.134680077713244e-06, + "loss": 0.5771604776382446, + "step": 602 + }, + { + "epoch": 1.203187250996016, + "grad_norm": 1.3412213325500488, + "learning_rate": 5.1281434731953e-06, + "loss": 1.1980223655700684, + "step": 604 + }, + { + "epoch": 1.207171314741036, + "grad_norm": 14.288922309875488, + "learning_rate": 5.121587022894554e-06, + "loss": 0.4752068817615509, + "step": 606 + }, + { + "epoch": 1.2111553784860558, + "grad_norm": 0.9397494196891785, + "learning_rate": 5.115010797985882e-06, + "loss": 0.5870952010154724, + "step": 608 + }, + { + "epoch": 1.2151394422310757, + "grad_norm": 0.735195517539978, + "learning_rate": 5.108414869858831e-06, + "loss": 1.0899227857589722, + "step": 610 + }, + { + "epoch": 1.2191235059760956, + "grad_norm": 0.9480123519897461, + "learning_rate": 5.1017993101168374e-06, + "loss": 1.1740434169769287, + "step": 612 + }, + { + "epoch": 1.2231075697211156, + "grad_norm": 1.5338431596755981, + "learning_rate": 5.095164190576452e-06, + "loss": 1.4396584033966064, + "step": 614 + }, + { + "epoch": 1.2270916334661355, + "grad_norm": 11.36307144165039, + "learning_rate": 5.0885095832665666e-06, + "loss": 0.3999689817428589, + "step": 616 + }, + { + "epoch": 1.2310756972111554, + "grad_norm": 1.546046495437622, + "learning_rate": 5.081835560427619e-06, + "loss": 0.9995384812355042, + "step": 618 + }, + { + "epoch": 1.2350597609561753, + "grad_norm": 1.254744291305542, + "learning_rate": 5.075142194510823e-06, + "loss": 1.0542714595794678, + "step": 620 + }, + { + "epoch": 1.2390438247011952, + "grad_norm": 2.047104597091675, + "learning_rate": 5.068429558177369e-06, + "loss": 0.9798321723937988, + "step": 622 + }, + { + "epoch": 1.2430278884462151, + "grad_norm": 1.0986047983169556, + "learning_rate": 5.061697724297646e-06, + "loss": 1.068199872970581, + "step": 624 + }, + { + "epoch": 1.247011952191235, + "grad_norm": 1.8080114126205444, + "learning_rate": 5.054946765950443e-06, + "loss": 0.9513214230537415, + "step": 626 + }, + { + "epoch": 1.250996015936255, + "grad_norm": 1.3059947490692139, + "learning_rate": 5.048176756422159e-06, + "loss": 0.7849744558334351, + "step": 628 + }, + { + "epoch": 1.254980079681275, + "grad_norm": 0.7330244779586792, + "learning_rate": 5.041387769206009e-06, + "loss": 1.0498535633087158, + "step": 630 + }, + { + "epoch": 1.2589641434262948, + "grad_norm": 5.962719440460205, + "learning_rate": 5.034579878001222e-06, + "loss": 0.2894093096256256, + "step": 632 + }, + { + "epoch": 1.2629482071713147, + "grad_norm": 4.925858974456787, + "learning_rate": 5.027753156712246e-06, + "loss": 0.36715632677078247, + "step": 634 + }, + { + "epoch": 1.2669322709163346, + "grad_norm": 3.4104573726654053, + "learning_rate": 5.020907679447936e-06, + "loss": 0.844882071018219, + "step": 636 + }, + { + "epoch": 1.2709163346613546, + "grad_norm": 1.9961673021316528, + "learning_rate": 5.0140435205207636e-06, + "loss": 0.8165204524993896, + "step": 638 + }, + { + "epoch": 1.2749003984063745, + "grad_norm": 2.4332053661346436, + "learning_rate": 5.007160754446002e-06, + "loss": 0.3054620623588562, + "step": 640 + }, + { + "epoch": 1.2788844621513944, + "grad_norm": 0.6446577906608582, + "learning_rate": 5.000259455940913e-06, + "loss": 0.9809127449989319, + "step": 642 + }, + { + "epoch": 1.2828685258964143, + "grad_norm": 1.2125827074050903, + "learning_rate": 4.9933396999239455e-06, + "loss": 0.7705118060112, + "step": 644 + }, + { + "epoch": 1.2868525896414342, + "grad_norm": 0.7487397193908691, + "learning_rate": 4.986401561513917e-06, + "loss": 1.0824811458587646, + "step": 646 + }, + { + "epoch": 1.2908366533864541, + "grad_norm": 1.9600952863693237, + "learning_rate": 4.979445116029199e-06, + "loss": 0.6253088116645813, + "step": 648 + }, + { + "epoch": 1.294820717131474, + "grad_norm": 1.7079068422317505, + "learning_rate": 4.972470438986896e-06, + "loss": 1.5013655424118042, + "step": 650 + }, + { + "epoch": 1.298804780876494, + "grad_norm": 1.1496132612228394, + "learning_rate": 4.965477606102033e-06, + "loss": 0.8948485255241394, + "step": 652 + }, + { + "epoch": 1.302788844621514, + "grad_norm": 1.8034613132476807, + "learning_rate": 4.9584666932867285e-06, + "loss": 0.24509888887405396, + "step": 654 + }, + { + "epoch": 1.3067729083665338, + "grad_norm": 0.6996963620185852, + "learning_rate": 4.951437776649368e-06, + "loss": 1.0769448280334473, + "step": 656 + }, + { + "epoch": 1.3107569721115537, + "grad_norm": 0.571880578994751, + "learning_rate": 4.944390932493787e-06, + "loss": 0.8138774633407593, + "step": 658 + }, + { + "epoch": 1.3147410358565736, + "grad_norm": 0.9483959674835205, + "learning_rate": 4.937326237318431e-06, + "loss": 0.6459387540817261, + "step": 660 + }, + { + "epoch": 1.3187250996015936, + "grad_norm": 0.9495901465415955, + "learning_rate": 4.930243767815534e-06, + "loss": 1.1829910278320312, + "step": 662 + }, + { + "epoch": 1.3227091633466135, + "grad_norm": 1.2907254695892334, + "learning_rate": 4.923143600870284e-06, + "loss": 0.5661064386367798, + "step": 664 + }, + { + "epoch": 1.3266932270916334, + "grad_norm": 1.5633907318115234, + "learning_rate": 4.916025813559983e-06, + "loss": 0.8189319372177124, + "step": 666 + }, + { + "epoch": 1.3306772908366533, + "grad_norm": 1.9113082885742188, + "learning_rate": 4.908890483153218e-06, + "loss": 0.38532766699790955, + "step": 668 + }, + { + "epoch": 1.3346613545816732, + "grad_norm": 0.9342731237411499, + "learning_rate": 4.901737687109019e-06, + "loss": 1.0321613550186157, + "step": 670 + }, + { + "epoch": 1.3386454183266931, + "grad_norm": 3.1048390865325928, + "learning_rate": 4.894567503076014e-06, + "loss": 0.5770927667617798, + "step": 672 + }, + { + "epoch": 1.342629482071713, + "grad_norm": 0.820324182510376, + "learning_rate": 4.887380008891593e-06, + "loss": 1.0886192321777344, + "step": 674 + }, + { + "epoch": 1.3466135458167332, + "grad_norm": 1.3751561641693115, + "learning_rate": 4.880175282581059e-06, + "loss": 0.97751384973526, + "step": 676 + }, + { + "epoch": 1.3505976095617531, + "grad_norm": 0.7426400184631348, + "learning_rate": 4.872953402356782e-06, + "loss": 1.076625943183899, + "step": 678 + }, + { + "epoch": 1.354581673306773, + "grad_norm": 1.1565395593643188, + "learning_rate": 4.86571444661735e-06, + "loss": 1.0121248960494995, + "step": 680 + }, + { + "epoch": 1.358565737051793, + "grad_norm": 0.7444704174995422, + "learning_rate": 4.858458493946716e-06, + "loss": 1.0811046361923218, + "step": 682 + }, + { + "epoch": 1.3625498007968129, + "grad_norm": 1.0144495964050293, + "learning_rate": 4.851185623113349e-06, + "loss": 1.1279915571212769, + "step": 684 + }, + { + "epoch": 1.3665338645418328, + "grad_norm": 0.7559702396392822, + "learning_rate": 4.843895913069377e-06, + "loss": 1.0942429304122925, + "step": 686 + }, + { + "epoch": 1.3705179282868527, + "grad_norm": 0.8456003069877625, + "learning_rate": 4.836589442949727e-06, + "loss": 1.0091909170150757, + "step": 688 + }, + { + "epoch": 1.3745019920318726, + "grad_norm": 0.7402591705322266, + "learning_rate": 4.829266292071268e-06, + "loss": 0.9695682525634766, + "step": 690 + }, + { + "epoch": 1.3784860557768925, + "grad_norm": 1.815006136894226, + "learning_rate": 4.821926539931952e-06, + "loss": 0.3355652689933777, + "step": 692 + }, + { + "epoch": 1.3824701195219125, + "grad_norm": 1.0571285486221313, + "learning_rate": 4.814570266209952e-06, + "loss": 1.1081352233886719, + "step": 694 + }, + { + "epoch": 1.3864541832669324, + "grad_norm": 1.3027758598327637, + "learning_rate": 4.80719755076279e-06, + "loss": 1.0507612228393555, + "step": 696 + }, + { + "epoch": 1.3904382470119523, + "grad_norm": 0.9322640299797058, + "learning_rate": 4.799808473626476e-06, + "loss": 1.1305720806121826, + "step": 698 + }, + { + "epoch": 1.3944223107569722, + "grad_norm": 1.1364309787750244, + "learning_rate": 4.792403115014637e-06, + "loss": 0.1400398164987564, + "step": 700 + }, + { + "epoch": 1.3984063745019921, + "grad_norm": 1.2325326204299927, + "learning_rate": 4.7849815553176476e-06, + "loss": 1.1220163106918335, + "step": 702 + }, + { + "epoch": 1.402390438247012, + "grad_norm": 1.0282156467437744, + "learning_rate": 4.777543875101757e-06, + "loss": 1.0591614246368408, + "step": 704 + }, + { + "epoch": 1.406374501992032, + "grad_norm": 0.7515193223953247, + "learning_rate": 4.770090155108215e-06, + "loss": 1.1357749700546265, + "step": 706 + }, + { + "epoch": 1.4103585657370519, + "grad_norm": 1.05164635181427, + "learning_rate": 4.7626204762523905e-06, + "loss": 0.9992522597312927, + "step": 708 + }, + { + "epoch": 1.4143426294820718, + "grad_norm": 0.7848185896873474, + "learning_rate": 4.755134919622901e-06, + "loss": 1.0771911144256592, + "step": 710 + }, + { + "epoch": 1.4183266932270917, + "grad_norm": 2.0036990642547607, + "learning_rate": 4.747633566480726e-06, + "loss": 0.6499975323677063, + "step": 712 + }, + { + "epoch": 1.4223107569721116, + "grad_norm": 1.088212251663208, + "learning_rate": 4.740116498258328e-06, + "loss": 1.0736567974090576, + "step": 714 + }, + { + "epoch": 1.4262948207171315, + "grad_norm": 1.0202051401138306, + "learning_rate": 4.73258379655877e-06, + "loss": 1.1317867040634155, + "step": 716 + }, + { + "epoch": 1.4302788844621515, + "grad_norm": 0.6986392140388489, + "learning_rate": 4.7250355431548244e-06, + "loss": 0.1079653948545456, + "step": 718 + }, + { + "epoch": 1.4342629482071714, + "grad_norm": 1.2315129041671753, + "learning_rate": 4.717471819988088e-06, + "loss": 1.070616364479065, + "step": 720 + }, + { + "epoch": 1.4382470119521913, + "grad_norm": 2.786571502685547, + "learning_rate": 4.709892709168096e-06, + "loss": 0.2563188672065735, + "step": 722 + }, + { + "epoch": 1.4422310756972112, + "grad_norm": 0.634524941444397, + "learning_rate": 4.702298292971422e-06, + "loss": 1.0500552654266357, + "step": 724 + }, + { + "epoch": 1.4462151394422311, + "grad_norm": 0.7324956059455872, + "learning_rate": 4.6946886538407975e-06, + "loss": 1.092575192451477, + "step": 726 + }, + { + "epoch": 1.450199203187251, + "grad_norm": 1.8564890623092651, + "learning_rate": 4.687063874384204e-06, + "loss": 0.8989277482032776, + "step": 728 + }, + { + "epoch": 1.454183266932271, + "grad_norm": 0.6646371483802795, + "learning_rate": 4.679424037373984e-06, + "loss": 1.0014073848724365, + "step": 730 + }, + { + "epoch": 1.4581673306772909, + "grad_norm": 2.136218786239624, + "learning_rate": 4.671769225745939e-06, + "loss": 1.0647640228271484, + "step": 732 + }, + { + "epoch": 1.4621513944223108, + "grad_norm": 0.5179296135902405, + "learning_rate": 4.664099522598432e-06, + "loss": 0.12710000574588776, + "step": 734 + }, + { + "epoch": 1.4661354581673307, + "grad_norm": 0.8502590656280518, + "learning_rate": 4.656415011191484e-06, + "loss": 1.085228681564331, + "step": 736 + }, + { + "epoch": 1.4701195219123506, + "grad_norm": 1.1160621643066406, + "learning_rate": 4.648715774945869e-06, + "loss": 1.1700797080993652, + "step": 738 + }, + { + "epoch": 1.4741035856573705, + "grad_norm": 4.530128002166748, + "learning_rate": 4.641001897442209e-06, + "loss": 0.19807864725589752, + "step": 740 + }, + { + "epoch": 1.4780876494023905, + "grad_norm": 1.182551383972168, + "learning_rate": 4.633273462420069e-06, + "loss": 1.2210465669631958, + "step": 742 + }, + { + "epoch": 1.4820717131474104, + "grad_norm": 7.367408752441406, + "learning_rate": 4.625530553777045e-06, + "loss": 1.2010120153427124, + "step": 744 + }, + { + "epoch": 1.4860557768924303, + "grad_norm": 0.8875226378440857, + "learning_rate": 4.617773255567855e-06, + "loss": 1.0283279418945312, + "step": 746 + }, + { + "epoch": 1.4900398406374502, + "grad_norm": 1.780938744544983, + "learning_rate": 4.610001652003426e-06, + "loss": 1.0667709112167358, + "step": 748 + }, + { + "epoch": 1.4940239043824701, + "grad_norm": 1.2433035373687744, + "learning_rate": 4.602215827449976e-06, + "loss": 1.0492123365402222, + "step": 750 + }, + { + "epoch": 1.49800796812749, + "grad_norm": 0.8798750638961792, + "learning_rate": 4.594415866428108e-06, + "loss": 1.0049997568130493, + "step": 752 + }, + { + "epoch": 1.50199203187251, + "grad_norm": 1.146921992301941, + "learning_rate": 4.586601853611882e-06, + "loss": 0.994334876537323, + "step": 754 + }, + { + "epoch": 1.5059760956175299, + "grad_norm": 3.869616746902466, + "learning_rate": 4.578773873827901e-06, + "loss": 0.7532044053077698, + "step": 756 + }, + { + "epoch": 1.5099601593625498, + "grad_norm": 1.7733598947525024, + "learning_rate": 4.57093201205439e-06, + "loss": 1.0711463689804077, + "step": 758 + }, + { + "epoch": 1.5139442231075697, + "grad_norm": 4.040090560913086, + "learning_rate": 4.563076353420272e-06, + "loss": 1.1239742040634155, + "step": 760 + }, + { + "epoch": 1.5179282868525896, + "grad_norm": 1.1118268966674805, + "learning_rate": 4.5552069832042455e-06, + "loss": 0.22398273646831512, + "step": 762 + }, + { + "epoch": 1.5219123505976095, + "grad_norm": 0.8436402678489685, + "learning_rate": 4.547323986833857e-06, + "loss": 1.0367255210876465, + "step": 764 + }, + { + "epoch": 1.5258964143426295, + "grad_norm": 1.7664424180984497, + "learning_rate": 4.539427449884576e-06, + "loss": 0.7687526941299438, + "step": 766 + }, + { + "epoch": 1.5298804780876494, + "grad_norm": 1.0416488647460938, + "learning_rate": 4.53151745807886e-06, + "loss": 0.5652468204498291, + "step": 768 + }, + { + "epoch": 1.5338645418326693, + "grad_norm": 1.3710383176803589, + "learning_rate": 4.523594097285234e-06, + "loss": 1.0875599384307861, + "step": 770 + }, + { + "epoch": 1.5378486055776892, + "grad_norm": 1.310120701789856, + "learning_rate": 4.51565745351735e-06, + "loss": 0.8149851560592651, + "step": 772 + }, + { + "epoch": 1.5418326693227091, + "grad_norm": 1.0462884902954102, + "learning_rate": 4.507707612933059e-06, + "loss": 1.044182300567627, + "step": 774 + }, + { + "epoch": 1.545816733067729, + "grad_norm": 2.2944624423980713, + "learning_rate": 4.4997446618334664e-06, + "loss": 1.1731159687042236, + "step": 776 + }, + { + "epoch": 1.549800796812749, + "grad_norm": 6.394598960876465, + "learning_rate": 4.491768686662005e-06, + "loss": 0.5516869425773621, + "step": 778 + }, + { + "epoch": 1.5537848605577689, + "grad_norm": 2.329699754714966, + "learning_rate": 4.483779774003498e-06, + "loss": 0.5405542850494385, + "step": 780 + }, + { + "epoch": 1.5577689243027888, + "grad_norm": 0.42006587982177734, + "learning_rate": 4.475778010583205e-06, + "loss": 0.20549674332141876, + "step": 782 + }, + { + "epoch": 1.5617529880478087, + "grad_norm": 2.271444082260132, + "learning_rate": 4.467763483265897e-06, + "loss": 0.9095351696014404, + "step": 784 + }, + { + "epoch": 1.5657370517928286, + "grad_norm": 1.6157774925231934, + "learning_rate": 4.459736279054901e-06, + "loss": 1.3291853666305542, + "step": 786 + }, + { + "epoch": 1.5697211155378485, + "grad_norm": 4.978515625, + "learning_rate": 4.451696485091164e-06, + "loss": 0.7586594223976135, + "step": 788 + }, + { + "epoch": 1.5737051792828685, + "grad_norm": 1.2765519618988037, + "learning_rate": 4.4436441886523025e-06, + "loss": 1.1358023881912231, + "step": 790 + }, + { + "epoch": 1.5776892430278884, + "grad_norm": 8.105411529541016, + "learning_rate": 4.435579477151655e-06, + "loss": 0.8000907897949219, + "step": 792 + }, + { + "epoch": 1.5816733067729083, + "grad_norm": 0.7435089349746704, + "learning_rate": 4.427502438137337e-06, + "loss": 1.073531150817871, + "step": 794 + }, + { + "epoch": 1.5856573705179282, + "grad_norm": 0.9908289313316345, + "learning_rate": 4.419413159291284e-06, + "loss": 1.011960744857788, + "step": 796 + }, + { + "epoch": 1.5896414342629481, + "grad_norm": 1.1573151350021362, + "learning_rate": 4.411311728428307e-06, + "loss": 0.8743354082107544, + "step": 798 + }, + { + "epoch": 1.593625498007968, + "grad_norm": 6.756656646728516, + "learning_rate": 4.403198233495133e-06, + "loss": 0.32545700669288635, + "step": 800 + }, + { + "epoch": 1.597609561752988, + "grad_norm": 1.2311936616897583, + "learning_rate": 4.395072762569457e-06, + "loss": 0.9778568744659424, + "step": 802 + }, + { + "epoch": 1.6015936254980079, + "grad_norm": 3.5830166339874268, + "learning_rate": 4.386935403858977e-06, + "loss": 1.0981725454330444, + "step": 804 + }, + { + "epoch": 1.6055776892430278, + "grad_norm": 0.9334324598312378, + "learning_rate": 4.378786245700443e-06, + "loss": 1.3115934133529663, + "step": 806 + }, + { + "epoch": 1.6095617529880477, + "grad_norm": 0.8329153656959534, + "learning_rate": 4.370625376558698e-06, + "loss": 1.028051733970642, + "step": 808 + }, + { + "epoch": 1.6135458167330676, + "grad_norm": 1.030179500579834, + "learning_rate": 4.362452885025713e-06, + "loss": 0.9735574722290039, + "step": 810 + }, + { + "epoch": 1.6175298804780875, + "grad_norm": 6.181675434112549, + "learning_rate": 4.35426885981963e-06, + "loss": 0.42590758204460144, + "step": 812 + }, + { + "epoch": 1.6215139442231075, + "grad_norm": 3.902128219604492, + "learning_rate": 4.346073389783799e-06, + "loss": 0.7486605048179626, + "step": 814 + }, + { + "epoch": 1.6254980079681274, + "grad_norm": 0.6811983585357666, + "learning_rate": 4.337866563885808e-06, + "loss": 0.2310914248228073, + "step": 816 + }, + { + "epoch": 1.6294820717131473, + "grad_norm": 0.7712540030479431, + "learning_rate": 4.329648471216523e-06, + "loss": 1.112511157989502, + "step": 818 + }, + { + "epoch": 1.6334661354581672, + "grad_norm": 1.0290017127990723, + "learning_rate": 4.321419200989117e-06, + "loss": 0.287282794713974, + "step": 820 + }, + { + "epoch": 1.6374501992031871, + "grad_norm": 2.3703389167785645, + "learning_rate": 4.313178842538107e-06, + "loss": 0.7247891426086426, + "step": 822 + }, + { + "epoch": 1.641434262948207, + "grad_norm": 1.919006586074829, + "learning_rate": 4.304927485318375e-06, + "loss": 0.21648265421390533, + "step": 824 + }, + { + "epoch": 1.645418326693227, + "grad_norm": 1.1350631713867188, + "learning_rate": 4.296665218904207e-06, + "loss": 1.0472216606140137, + "step": 826 + }, + { + "epoch": 1.6494023904382469, + "grad_norm": 0.42043375968933105, + "learning_rate": 4.288392132988313e-06, + "loss": 0.40000608563423157, + "step": 828 + }, + { + "epoch": 1.6533864541832668, + "grad_norm": 1.6645681858062744, + "learning_rate": 4.280108317380859e-06, + "loss": 0.4568580985069275, + "step": 830 + }, + { + "epoch": 1.6573705179282867, + "grad_norm": 1.5291117429733276, + "learning_rate": 4.27181386200849e-06, + "loss": 0.9923895597457886, + "step": 832 + }, + { + "epoch": 1.6613545816733066, + "grad_norm": 1.294873833656311, + "learning_rate": 4.263508856913346e-06, + "loss": 0.994326651096344, + "step": 834 + }, + { + "epoch": 1.6653386454183265, + "grad_norm": 2.7709615230560303, + "learning_rate": 4.2551933922521e-06, + "loss": 0.8918184041976929, + "step": 836 + }, + { + "epoch": 1.6693227091633465, + "grad_norm": 1.2106887102127075, + "learning_rate": 4.246867558294967e-06, + "loss": 1.1439393758773804, + "step": 838 + }, + { + "epoch": 1.6733067729083664, + "grad_norm": 1.091464877128601, + "learning_rate": 4.2385314454247275e-06, + "loss": 1.0264958143234253, + "step": 840 + }, + { + "epoch": 1.6772908366533863, + "grad_norm": 1.5609543323516846, + "learning_rate": 4.230185144135749e-06, + "loss": 0.8460158109664917, + "step": 842 + }, + { + "epoch": 1.6812749003984062, + "grad_norm": 0.8120943903923035, + "learning_rate": 4.221828745033002e-06, + "loss": 1.0981191396713257, + "step": 844 + }, + { + "epoch": 1.6852589641434261, + "grad_norm": 1.0494468212127686, + "learning_rate": 4.2134623388310706e-06, + "loss": 0.3851274847984314, + "step": 846 + }, + { + "epoch": 1.6892430278884463, + "grad_norm": 1.039975643157959, + "learning_rate": 4.20508601635318e-06, + "loss": 0.7145401239395142, + "step": 848 + }, + { + "epoch": 1.6932270916334662, + "grad_norm": 1.385925054550171, + "learning_rate": 4.1966998685302e-06, + "loss": 1.1264657974243164, + "step": 850 + }, + { + "epoch": 1.697211155378486, + "grad_norm": 0.7857804894447327, + "learning_rate": 4.18830398639966e-06, + "loss": 1.1105672121047974, + "step": 852 + }, + { + "epoch": 1.701195219123506, + "grad_norm": 1.1625089645385742, + "learning_rate": 4.179898461104764e-06, + "loss": 1.078861117362976, + "step": 854 + }, + { + "epoch": 1.705179282868526, + "grad_norm": 0.9041614532470703, + "learning_rate": 4.1714833838934006e-06, + "loss": 1.0313189029693604, + "step": 856 + }, + { + "epoch": 1.7091633466135459, + "grad_norm": 0.8065091967582703, + "learning_rate": 4.163058846117148e-06, + "loss": 0.34671998023986816, + "step": 858 + }, + { + "epoch": 1.7131474103585658, + "grad_norm": 1.2888925075531006, + "learning_rate": 4.154624939230289e-06, + "loss": 1.031374454498291, + "step": 860 + }, + { + "epoch": 1.7171314741035857, + "grad_norm": 0.8425755500793457, + "learning_rate": 4.146181754788813e-06, + "loss": 1.0426599979400635, + "step": 862 + }, + { + "epoch": 1.7211155378486056, + "grad_norm": 1.4209198951721191, + "learning_rate": 4.13772938444942e-06, + "loss": 0.6024843454360962, + "step": 864 + }, + { + "epoch": 1.7250996015936255, + "grad_norm": 1.0409010648727417, + "learning_rate": 4.129267919968536e-06, + "loss": 0.4379670023918152, + "step": 866 + }, + { + "epoch": 1.7290836653386454, + "grad_norm": 1.4887381792068481, + "learning_rate": 4.120797453201309e-06, + "loss": 0.8161473274230957, + "step": 868 + }, + { + "epoch": 1.7330677290836654, + "grad_norm": 12.129778861999512, + "learning_rate": 4.112318076100608e-06, + "loss": 0.22986909747123718, + "step": 870 + }, + { + "epoch": 1.7370517928286853, + "grad_norm": 2.050231456756592, + "learning_rate": 4.103829880716036e-06, + "loss": 0.5155397057533264, + "step": 872 + }, + { + "epoch": 1.7410358565737052, + "grad_norm": 3.127119541168213, + "learning_rate": 4.0953329591929204e-06, + "loss": 0.42298442125320435, + "step": 874 + }, + { + "epoch": 1.745019920318725, + "grad_norm": 1.210281491279602, + "learning_rate": 4.08682740377132e-06, + "loss": 1.0322401523590088, + "step": 876 + }, + { + "epoch": 1.749003984063745, + "grad_norm": 0.7078624367713928, + "learning_rate": 4.0783133067850185e-06, + "loss": 1.0741485357284546, + "step": 878 + }, + { + "epoch": 1.752988047808765, + "grad_norm": 0.9627106189727783, + "learning_rate": 4.069790760660525e-06, + "loss": 0.08892940729856491, + "step": 880 + }, + { + "epoch": 1.7569721115537849, + "grad_norm": 2.872758388519287, + "learning_rate": 4.06125985791607e-06, + "loss": 1.2808747291564941, + "step": 882 + }, + { + "epoch": 1.7609561752988048, + "grad_norm": 1.4781732559204102, + "learning_rate": 4.0527206911606025e-06, + "loss": 1.6314507722854614, + "step": 884 + }, + { + "epoch": 1.7649402390438247, + "grad_norm": 0.4292491674423218, + "learning_rate": 4.044173353092779e-06, + "loss": 0.2118670642375946, + "step": 886 + }, + { + "epoch": 1.7689243027888446, + "grad_norm": 1.0890276432037354, + "learning_rate": 4.035617936499967e-06, + "loss": 1.1356523036956787, + "step": 888 + }, + { + "epoch": 1.7729083665338645, + "grad_norm": 1.0168540477752686, + "learning_rate": 4.0270545342572265e-06, + "loss": 0.9910404086112976, + "step": 890 + }, + { + "epoch": 1.7768924302788844, + "grad_norm": 0.8853142261505127, + "learning_rate": 4.018483239326312e-06, + "loss": 0.9891409277915955, + "step": 892 + }, + { + "epoch": 1.7808764940239044, + "grad_norm": 0.7593168020248413, + "learning_rate": 4.009904144754655e-06, + "loss": 1.1023067235946655, + "step": 894 + }, + { + "epoch": 1.7848605577689243, + "grad_norm": 3.0125675201416016, + "learning_rate": 4.00131734367436e-06, + "loss": 0.9771660566329956, + "step": 896 + }, + { + "epoch": 1.7888446215139442, + "grad_norm": 1.7285772562026978, + "learning_rate": 3.99272292930119e-06, + "loss": 0.5689830780029297, + "step": 898 + }, + { + "epoch": 1.792828685258964, + "grad_norm": 0.7325118184089661, + "learning_rate": 3.984120994933558e-06, + "loss": 1.026572823524475, + "step": 900 + }, + { + "epoch": 1.796812749003984, + "grad_norm": 1.3268436193466187, + "learning_rate": 3.975511633951506e-06, + "loss": 0.5517056584358215, + "step": 902 + }, + { + "epoch": 1.800796812749004, + "grad_norm": 0.8117510676383972, + "learning_rate": 3.966894939815702e-06, + "loss": 0.3609198033809662, + "step": 904 + }, + { + "epoch": 1.8047808764940239, + "grad_norm": 1.122198224067688, + "learning_rate": 3.958271006066421e-06, + "loss": 0.9236494898796082, + "step": 906 + }, + { + "epoch": 1.8087649402390438, + "grad_norm": 2.9102554321289062, + "learning_rate": 3.949639926322527e-06, + "loss": 0.8726416230201721, + "step": 908 + }, + { + "epoch": 1.812749003984064, + "grad_norm": 13.756661415100098, + "learning_rate": 3.941001794280458e-06, + "loss": 1.0099586248397827, + "step": 910 + }, + { + "epoch": 1.8167330677290838, + "grad_norm": 3.1848342418670654, + "learning_rate": 3.932356703713212e-06, + "loss": 0.25727564096450806, + "step": 912 + }, + { + "epoch": 1.8207171314741037, + "grad_norm": 1.389024019241333, + "learning_rate": 3.923704748469326e-06, + "loss": 1.0060839653015137, + "step": 914 + }, + { + "epoch": 1.8247011952191237, + "grad_norm": 0.8609137535095215, + "learning_rate": 3.915046022471857e-06, + "loss": 1.0158603191375732, + "step": 916 + }, + { + "epoch": 1.8286852589641436, + "grad_norm": 0.8087533116340637, + "learning_rate": 3.906380619717363e-06, + "loss": 1.0479439496994019, + "step": 918 + }, + { + "epoch": 1.8326693227091635, + "grad_norm": 3.3105380535125732, + "learning_rate": 3.897708634274886e-06, + "loss": 0.36958053708076477, + "step": 920 + }, + { + "epoch": 1.8366533864541834, + "grad_norm": 1.9331108331680298, + "learning_rate": 3.889030160284922e-06, + "loss": 0.35556235909461975, + "step": 922 + }, + { + "epoch": 1.8406374501992033, + "grad_norm": 0.7566105723381042, + "learning_rate": 3.88034529195841e-06, + "loss": 1.1607534885406494, + "step": 924 + }, + { + "epoch": 1.8446215139442232, + "grad_norm": 0.2870655953884125, + "learning_rate": 3.871654123575704e-06, + "loss": 0.14478978514671326, + "step": 926 + }, + { + "epoch": 1.8486055776892432, + "grad_norm": 0.3280292749404907, + "learning_rate": 3.8629567494855445e-06, + "loss": 0.0896715372800827, + "step": 928 + }, + { + "epoch": 1.852589641434263, + "grad_norm": 1.354030728340149, + "learning_rate": 3.854253264104045e-06, + "loss": 1.078214168548584, + "step": 930 + }, + { + "epoch": 1.856573705179283, + "grad_norm": 1.015066146850586, + "learning_rate": 3.845543761913657e-06, + "loss": 1.114577293395996, + "step": 932 + }, + { + "epoch": 1.860557768924303, + "grad_norm": 0.39395958185195923, + "learning_rate": 3.836828337462152e-06, + "loss": 0.5930612087249756, + "step": 934 + }, + { + "epoch": 1.8645418326693228, + "grad_norm": 3.372042417526245, + "learning_rate": 3.82810708536159e-06, + "loss": 0.34988486766815186, + "step": 936 + }, + { + "epoch": 1.8685258964143427, + "grad_norm": 1.3925652503967285, + "learning_rate": 3.819380100287294e-06, + "loss": 1.0657780170440674, + "step": 938 + }, + { + "epoch": 1.8725099601593627, + "grad_norm": 1.6448031663894653, + "learning_rate": 3.810647476976824e-06, + "loss": 1.0907565355300903, + "step": 940 + }, + { + "epoch": 1.8764940239043826, + "grad_norm": 0.7891445159912109, + "learning_rate": 3.801909310228945e-06, + "loss": 0.35766711831092834, + "step": 942 + }, + { + "epoch": 1.8804780876494025, + "grad_norm": 1.724031686782837, + "learning_rate": 3.7931656949026028e-06, + "loss": 1.7528119087219238, + "step": 944 + }, + { + "epoch": 1.8844621513944224, + "grad_norm": 1.0190646648406982, + "learning_rate": 3.784416725915887e-06, + "loss": 0.706551194190979, + "step": 946 + }, + { + "epoch": 1.8884462151394423, + "grad_norm": 3.7524330615997314, + "learning_rate": 3.7756624982450105e-06, + "loss": 1.3365905284881592, + "step": 948 + }, + { + "epoch": 1.8924302788844622, + "grad_norm": 1.1480021476745605, + "learning_rate": 3.7669031069232684e-06, + "loss": 0.7811166048049927, + "step": 950 + }, + { + "epoch": 1.8964143426294822, + "grad_norm": 0.7147510647773743, + "learning_rate": 3.7581386470400106e-06, + "loss": 1.0117745399475098, + "step": 952 + }, + { + "epoch": 1.900398406374502, + "grad_norm": 2.004282236099243, + "learning_rate": 3.7493692137396153e-06, + "loss": 0.5164535045623779, + "step": 954 + }, + { + "epoch": 1.904382470119522, + "grad_norm": 0.7438123822212219, + "learning_rate": 3.7405949022204435e-06, + "loss": 1.0378838777542114, + "step": 956 + }, + { + "epoch": 1.908366533864542, + "grad_norm": 3.5988733768463135, + "learning_rate": 3.731815807733818e-06, + "loss": 0.6023346781730652, + "step": 958 + }, + { + "epoch": 1.9123505976095618, + "grad_norm": 2.4353888034820557, + "learning_rate": 3.723032025582982e-06, + "loss": 0.5875221490859985, + "step": 960 + }, + { + "epoch": 1.9163346613545817, + "grad_norm": 1.3933720588684082, + "learning_rate": 3.7142436511220676e-06, + "loss": 0.1774052381515503, + "step": 962 + }, + { + "epoch": 1.9203187250996017, + "grad_norm": 2.9852864742279053, + "learning_rate": 3.7054507797550564e-06, + "loss": 1.3314721584320068, + "step": 964 + }, + { + "epoch": 1.9243027888446216, + "grad_norm": 0.7507312893867493, + "learning_rate": 3.6966535069347523e-06, + "loss": 1.0096935033798218, + "step": 966 + }, + { + "epoch": 1.9282868525896415, + "grad_norm": 1.7996251583099365, + "learning_rate": 3.6878519281617354e-06, + "loss": 1.0307931900024414, + "step": 968 + }, + { + "epoch": 1.9322709163346614, + "grad_norm": 1.16811203956604, + "learning_rate": 3.6790461389833317e-06, + "loss": 0.9180192351341248, + "step": 970 + }, + { + "epoch": 1.9362549800796813, + "grad_norm": 0.7789274454116821, + "learning_rate": 3.670236234992576e-06, + "loss": 1.1056816577911377, + "step": 972 + }, + { + "epoch": 1.9402390438247012, + "grad_norm": 0.8071714639663696, + "learning_rate": 3.661422311827169e-06, + "loss": 1.061263084411621, + "step": 974 + }, + { + "epoch": 1.9442231075697212, + "grad_norm": 2.5436365604400635, + "learning_rate": 3.652604465168444e-06, + "loss": 0.9830687642097473, + "step": 976 + }, + { + "epoch": 1.948207171314741, + "grad_norm": 0.7201181054115295, + "learning_rate": 3.6437827907403273e-06, + "loss": 1.0000416040420532, + "step": 978 + }, + { + "epoch": 1.952191235059761, + "grad_norm": 0.7345990538597107, + "learning_rate": 3.6349573843082966e-06, + "loss": 1.0285298824310303, + "step": 980 + }, + { + "epoch": 1.956175298804781, + "grad_norm": 0.6029013395309448, + "learning_rate": 3.6261283416783447e-06, + "loss": 0.3689904808998108, + "step": 982 + }, + { + "epoch": 1.9601593625498008, + "grad_norm": 5.31935977935791, + "learning_rate": 3.6172957586959372e-06, + "loss": 1.075624704360962, + "step": 984 + }, + { + "epoch": 1.9641434262948207, + "grad_norm": 2.391829252243042, + "learning_rate": 3.6084597312449725e-06, + "loss": 0.8474624156951904, + "step": 986 + }, + { + "epoch": 1.9681274900398407, + "grad_norm": 5.1822967529296875, + "learning_rate": 3.599620355246742e-06, + "loss": 0.31603577733039856, + "step": 988 + }, + { + "epoch": 1.9721115537848606, + "grad_norm": 1.8022582530975342, + "learning_rate": 3.5907777266588856e-06, + "loss": 0.911726713180542, + "step": 990 + }, + { + "epoch": 1.9760956175298805, + "grad_norm": 0.7391871213912964, + "learning_rate": 3.5819319414743555e-06, + "loss": 1.0421473979949951, + "step": 992 + }, + { + "epoch": 1.9800796812749004, + "grad_norm": 1.211188554763794, + "learning_rate": 3.573083095720369e-06, + "loss": 1.0375580787658691, + "step": 994 + }, + { + "epoch": 1.9840637450199203, + "grad_norm": 6.231225967407227, + "learning_rate": 3.5642312854573686e-06, + "loss": 0.5392568707466125, + "step": 996 + }, + { + "epoch": 1.9880478087649402, + "grad_norm": 1.1782855987548828, + "learning_rate": 3.5553766067779785e-06, + "loss": 1.188450813293457, + "step": 998 + }, + { + "epoch": 1.9920318725099602, + "grad_norm": 0.6256092190742493, + "learning_rate": 3.546519155805962e-06, + "loss": 1.0698131322860718, + "step": 1000 + }, + { + "epoch": 1.99601593625498, + "grad_norm": 0.89486163854599, + "learning_rate": 3.5376590286951774e-06, + "loss": 1.02101469039917, + "step": 1002 + }, + { + "epoch": 2.0, + "grad_norm": 0.5744116902351379, + "learning_rate": 3.5287963216285337e-06, + "loss": 0.08481757342815399, + "step": 1004 + }, + { + "epoch": 2.00398406374502, + "grad_norm": 0.4444674849510193, + "learning_rate": 3.519931130816947e-06, + "loss": 0.14744052290916443, + "step": 1006 + }, + { + "epoch": 2.00796812749004, + "grad_norm": 1.0349431037902832, + "learning_rate": 3.511063552498299e-06, + "loss": 0.894745945930481, + "step": 1008 + }, + { + "epoch": 2.0119521912350598, + "grad_norm": 0.5005489587783813, + "learning_rate": 3.502193682936385e-06, + "loss": 0.29803839325904846, + "step": 1010 + }, + { + "epoch": 2.0159362549800797, + "grad_norm": 1.0027674436569214, + "learning_rate": 3.493321618419877e-06, + "loss": 0.6132505536079407, + "step": 1012 + }, + { + "epoch": 2.0199203187250996, + "grad_norm": 0.722247302532196, + "learning_rate": 3.484447455261272e-06, + "loss": 0.8650059700012207, + "step": 1014 + }, + { + "epoch": 2.0239043824701195, + "grad_norm": 0.1125183254480362, + "learning_rate": 3.4755712897958524e-06, + "loss": 0.06626415252685547, + "step": 1016 + }, + { + "epoch": 2.0278884462151394, + "grad_norm": 2.244713306427002, + "learning_rate": 3.4666932183806345e-06, + "loss": 0.6729474663734436, + "step": 1018 + }, + { + "epoch": 2.0318725099601593, + "grad_norm": 0.8710299730300903, + "learning_rate": 3.4578133373933263e-06, + "loss": 0.8701741099357605, + "step": 1020 + }, + { + "epoch": 2.0358565737051793, + "grad_norm": 0.8872413635253906, + "learning_rate": 3.4489317432312796e-06, + "loss": 0.8716042041778564, + "step": 1022 + }, + { + "epoch": 2.039840637450199, + "grad_norm": 1.219373106956482, + "learning_rate": 3.4400485323104426e-06, + "loss": 0.34580960869789124, + "step": 1024 + }, + { + "epoch": 2.043824701195219, + "grad_norm": 1.7070385217666626, + "learning_rate": 3.431163801064317e-06, + "loss": 0.3066391348838806, + "step": 1026 + }, + { + "epoch": 2.047808764940239, + "grad_norm": 3.4397644996643066, + "learning_rate": 3.422277645942907e-06, + "loss": 0.3099243938922882, + "step": 1028 + }, + { + "epoch": 2.051792828685259, + "grad_norm": 20.93805694580078, + "learning_rate": 3.413390163411675e-06, + "loss": 0.6691966652870178, + "step": 1030 + }, + { + "epoch": 2.055776892430279, + "grad_norm": 1.0854685306549072, + "learning_rate": 3.4045014499504923e-06, + "loss": 0.8780809640884399, + "step": 1032 + }, + { + "epoch": 2.0597609561752988, + "grad_norm": 11.395671844482422, + "learning_rate": 3.3956116020525924e-06, + "loss": 0.2683337926864624, + "step": 1034 + }, + { + "epoch": 2.0637450199203187, + "grad_norm": 2.4742014408111572, + "learning_rate": 3.3867207162235272e-06, + "loss": 0.7748890519142151, + "step": 1036 + }, + { + "epoch": 2.0677290836653386, + "grad_norm": 2.432234525680542, + "learning_rate": 3.377828888980112e-06, + "loss": 0.8894884586334229, + "step": 1038 + }, + { + "epoch": 2.0717131474103585, + "grad_norm": 2.468468427658081, + "learning_rate": 3.3689362168493844e-06, + "loss": 0.6649755239486694, + "step": 1040 + }, + { + "epoch": 2.0756972111553784, + "grad_norm": 0.6127830147743225, + "learning_rate": 3.3600427963675516e-06, + "loss": 0.8452335596084595, + "step": 1042 + }, + { + "epoch": 2.0796812749003983, + "grad_norm": 1.180112361907959, + "learning_rate": 3.3511487240789483e-06, + "loss": 0.929725170135498, + "step": 1044 + }, + { + "epoch": 2.0836653386454183, + "grad_norm": 0.738735020160675, + "learning_rate": 3.3422540965349806e-06, + "loss": 0.8923982381820679, + "step": 1046 + }, + { + "epoch": 2.087649402390438, + "grad_norm": 3.025284767150879, + "learning_rate": 3.333359010293085e-06, + "loss": 0.9607875347137451, + "step": 1048 + }, + { + "epoch": 2.091633466135458, + "grad_norm": 0.7996847033500671, + "learning_rate": 3.3244635619156786e-06, + "loss": 0.4797319769859314, + "step": 1050 + }, + { + "epoch": 2.095617529880478, + "grad_norm": 10.094463348388672, + "learning_rate": 3.315567847969106e-06, + "loss": 0.2578115165233612, + "step": 1052 + }, + { + "epoch": 2.099601593625498, + "grad_norm": 0.6219993233680725, + "learning_rate": 3.306671965022598e-06, + "loss": 0.315256267786026, + "step": 1054 + }, + { + "epoch": 2.103585657370518, + "grad_norm": 1.1088297367095947, + "learning_rate": 3.2977760096472184e-06, + "loss": 0.9286193251609802, + "step": 1056 + }, + { + "epoch": 2.1075697211155378, + "grad_norm": 1.1025009155273438, + "learning_rate": 3.2888800784148174e-06, + "loss": 0.7976268529891968, + "step": 1058 + }, + { + "epoch": 2.1115537848605577, + "grad_norm": 0.7398043274879456, + "learning_rate": 3.2799842678969835e-06, + "loss": 0.3379042148590088, + "step": 1060 + }, + { + "epoch": 2.1155378486055776, + "grad_norm": 1.8223795890808105, + "learning_rate": 3.2710886746639964e-06, + "loss": 0.29785844683647156, + "step": 1062 + }, + { + "epoch": 2.1195219123505975, + "grad_norm": 0.9167846441268921, + "learning_rate": 3.262193395283773e-06, + "loss": 0.10107379406690598, + "step": 1064 + }, + { + "epoch": 2.1235059760956174, + "grad_norm": 6.6176300048828125, + "learning_rate": 3.2532985263208266e-06, + "loss": 0.4440305829048157, + "step": 1066 + }, + { + "epoch": 2.1274900398406373, + "grad_norm": 0.8213241696357727, + "learning_rate": 3.244404164335213e-06, + "loss": 0.8258364796638489, + "step": 1068 + }, + { + "epoch": 2.1314741035856573, + "grad_norm": 2.339560031890869, + "learning_rate": 3.2355104058814874e-06, + "loss": 0.9001627564430237, + "step": 1070 + }, + { + "epoch": 2.135458167330677, + "grad_norm": 1.07158625125885, + "learning_rate": 3.226617347507649e-06, + "loss": 0.3943869471549988, + "step": 1072 + }, + { + "epoch": 2.139442231075697, + "grad_norm": 0.9587336182594299, + "learning_rate": 3.2177250857541007e-06, + "loss": 1.0341042280197144, + "step": 1074 + }, + { + "epoch": 2.143426294820717, + "grad_norm": 0.8883066773414612, + "learning_rate": 3.208833717152594e-06, + "loss": 0.19238322973251343, + "step": 1076 + }, + { + "epoch": 2.147410358565737, + "grad_norm": 1.4621644020080566, + "learning_rate": 3.199943338225189e-06, + "loss": 0.7075263261795044, + "step": 1078 + }, + { + "epoch": 2.151394422310757, + "grad_norm": 0.9659390449523926, + "learning_rate": 3.1910540454832e-06, + "loss": 0.9844989776611328, + "step": 1080 + }, + { + "epoch": 2.1553784860557768, + "grad_norm": 0.9126376509666443, + "learning_rate": 3.1821659354261478e-06, + "loss": 0.8773077130317688, + "step": 1082 + }, + { + "epoch": 2.1593625498007967, + "grad_norm": 1.5047764778137207, + "learning_rate": 3.173279104540719e-06, + "loss": 0.7283194065093994, + "step": 1084 + }, + { + "epoch": 2.1633466135458166, + "grad_norm": 2.4488370418548584, + "learning_rate": 3.164393649299711e-06, + "loss": 1.0191715955734253, + "step": 1086 + }, + { + "epoch": 2.1673306772908365, + "grad_norm": 0.6298505663871765, + "learning_rate": 3.155509666160986e-06, + "loss": 0.19404178857803345, + "step": 1088 + }, + { + "epoch": 2.1713147410358564, + "grad_norm": 3.298346519470215, + "learning_rate": 3.1466272515664287e-06, + "loss": 0.4330817759037018, + "step": 1090 + }, + { + "epoch": 2.1752988047808763, + "grad_norm": 1.4736095666885376, + "learning_rate": 3.137746501940894e-06, + "loss": 0.8412344455718994, + "step": 1092 + }, + { + "epoch": 2.1792828685258963, + "grad_norm": 1.3612383604049683, + "learning_rate": 3.1288675136911653e-06, + "loss": 0.7719582915306091, + "step": 1094 + }, + { + "epoch": 2.183266932270916, + "grad_norm": 1.6760456562042236, + "learning_rate": 3.1199903832049025e-06, + "loss": 0.8681936264038086, + "step": 1096 + }, + { + "epoch": 2.187250996015936, + "grad_norm": 0.9944242238998413, + "learning_rate": 3.1111152068495982e-06, + "loss": 0.8590313196182251, + "step": 1098 + }, + { + "epoch": 2.191235059760956, + "grad_norm": 1.1411633491516113, + "learning_rate": 3.102242080971531e-06, + "loss": 0.8502429723739624, + "step": 1100 + }, + { + "epoch": 2.195219123505976, + "grad_norm": 1.0093145370483398, + "learning_rate": 3.0933711018947217e-06, + "loss": 0.8326080441474915, + "step": 1102 + }, + { + "epoch": 2.199203187250996, + "grad_norm": 1.3518801927566528, + "learning_rate": 3.084502365919887e-06, + "loss": 0.31851112842559814, + "step": 1104 + }, + { + "epoch": 2.2031872509960158, + "grad_norm": 0.8486732840538025, + "learning_rate": 3.0756359693233897e-06, + "loss": 0.12462817877531052, + "step": 1106 + }, + { + "epoch": 2.2071713147410357, + "grad_norm": 3.158237934112549, + "learning_rate": 3.066772008356201e-06, + "loss": 0.7065569162368774, + "step": 1108 + }, + { + "epoch": 2.2111553784860556, + "grad_norm": 1.6595673561096191, + "learning_rate": 3.057910579242848e-06, + "loss": 0.32911333441734314, + "step": 1110 + }, + { + "epoch": 2.2151394422310755, + "grad_norm": 0.9766960740089417, + "learning_rate": 3.0490517781803748e-06, + "loss": 0.8282409906387329, + "step": 1112 + }, + { + "epoch": 2.2191235059760954, + "grad_norm": 2.551868438720703, + "learning_rate": 3.040195701337296e-06, + "loss": 0.8591130971908569, + "step": 1114 + }, + { + "epoch": 2.2231075697211153, + "grad_norm": 2.4142255783081055, + "learning_rate": 3.0313424448525513e-06, + "loss": 0.6863746643066406, + "step": 1116 + }, + { + "epoch": 2.2270916334661353, + "grad_norm": 1.8660197257995605, + "learning_rate": 3.022492104834467e-06, + "loss": 0.867939829826355, + "step": 1118 + }, + { + "epoch": 2.231075697211155, + "grad_norm": 1.012052297592163, + "learning_rate": 3.013644777359706e-06, + "loss": 0.862476110458374, + "step": 1120 + }, + { + "epoch": 2.235059760956175, + "grad_norm": 1.3242058753967285, + "learning_rate": 3.004800558472228e-06, + "loss": 0.8478327393531799, + "step": 1122 + }, + { + "epoch": 2.239043824701195, + "grad_norm": 1.5202715396881104, + "learning_rate": 2.995959544182248e-06, + "loss": 0.8780950307846069, + "step": 1124 + }, + { + "epoch": 2.243027888446215, + "grad_norm": 1.5164873600006104, + "learning_rate": 2.9871218304651926e-06, + "loss": 0.8773269653320312, + "step": 1126 + }, + { + "epoch": 2.247011952191235, + "grad_norm": 12.062283515930176, + "learning_rate": 2.9782875132606573e-06, + "loss": 0.5782788991928101, + "step": 1128 + }, + { + "epoch": 2.2509960159362548, + "grad_norm": 0.4626627266407013, + "learning_rate": 2.969456688471368e-06, + "loss": 0.17795492708683014, + "step": 1130 + }, + { + "epoch": 2.2549800796812747, + "grad_norm": 8.622909545898438, + "learning_rate": 2.960629451962137e-06, + "loss": 0.876864492893219, + "step": 1132 + }, + { + "epoch": 2.2589641434262946, + "grad_norm": 2.5603370666503906, + "learning_rate": 2.9518058995588217e-06, + "loss": 0.5039679408073425, + "step": 1134 + }, + { + "epoch": 2.2629482071713145, + "grad_norm": 1.9047883749008179, + "learning_rate": 2.9429861270472884e-06, + "loss": 0.8298702836036682, + "step": 1136 + }, + { + "epoch": 2.2669322709163344, + "grad_norm": 1.333377480506897, + "learning_rate": 2.9341702301723704e-06, + "loss": 0.8177191019058228, + "step": 1138 + }, + { + "epoch": 2.2709163346613543, + "grad_norm": 0.8072558641433716, + "learning_rate": 2.9253583046368243e-06, + "loss": 0.8483671545982361, + "step": 1140 + }, + { + "epoch": 2.2749003984063743, + "grad_norm": 1.162376046180725, + "learning_rate": 2.916550446100299e-06, + "loss": 0.8442429900169373, + "step": 1142 + }, + { + "epoch": 2.278884462151394, + "grad_norm": 2.1500282287597656, + "learning_rate": 2.907746750178293e-06, + "loss": 0.40876924991607666, + "step": 1144 + }, + { + "epoch": 2.2828685258964145, + "grad_norm": 1.5930662155151367, + "learning_rate": 2.8989473124411136e-06, + "loss": 0.3929884433746338, + "step": 1146 + }, + { + "epoch": 2.2868525896414345, + "grad_norm": 0.9812231659889221, + "learning_rate": 2.8901522284128454e-06, + "loss": 0.8924030661582947, + "step": 1148 + }, + { + "epoch": 2.2908366533864544, + "grad_norm": 4.809815883636475, + "learning_rate": 2.881361593570308e-06, + "loss": 0.412593275308609, + "step": 1150 + }, + { + "epoch": 2.2948207171314743, + "grad_norm": 0.34295371174812317, + "learning_rate": 2.872575503342027e-06, + "loss": 0.07170237600803375, + "step": 1152 + }, + { + "epoch": 2.298804780876494, + "grad_norm": 2.6662888526916504, + "learning_rate": 2.8637940531071856e-06, + "loss": 0.9125880599021912, + "step": 1154 + }, + { + "epoch": 2.302788844621514, + "grad_norm": 1.016099214553833, + "learning_rate": 2.8550173381946035e-06, + "loss": 0.20460867881774902, + "step": 1156 + }, + { + "epoch": 2.306772908366534, + "grad_norm": 1.2535561323165894, + "learning_rate": 2.84624545388169e-06, + "loss": 0.18213213980197906, + "step": 1158 + }, + { + "epoch": 2.310756972111554, + "grad_norm": 5.914939880371094, + "learning_rate": 2.837478495393418e-06, + "loss": 1.015434980392456, + "step": 1160 + }, + { + "epoch": 2.314741035856574, + "grad_norm": 3.516514539718628, + "learning_rate": 2.828716557901286e-06, + "loss": 0.4791782796382904, + "step": 1162 + }, + { + "epoch": 2.318725099601594, + "grad_norm": 1.2415333986282349, + "learning_rate": 2.819959736522286e-06, + "loss": 0.6430278420448303, + "step": 1164 + }, + { + "epoch": 2.3227091633466137, + "grad_norm": 6.374106407165527, + "learning_rate": 2.8112081263178727e-06, + "loss": 0.7340620756149292, + "step": 1166 + }, + { + "epoch": 2.3266932270916336, + "grad_norm": 0.7349236011505127, + "learning_rate": 2.8024618222929257e-06, + "loss": 0.8904776573181152, + "step": 1168 + }, + { + "epoch": 2.3306772908366535, + "grad_norm": 3.1692311763763428, + "learning_rate": 2.793720919394726e-06, + "loss": 0.3335300385951996, + "step": 1170 + }, + { + "epoch": 2.3346613545816735, + "grad_norm": 1.9627305269241333, + "learning_rate": 2.7849855125119204e-06, + "loss": 0.9338223338127136, + "step": 1172 + }, + { + "epoch": 2.3386454183266934, + "grad_norm": 1.715811014175415, + "learning_rate": 2.7762556964734925e-06, + "loss": 0.8548279404640198, + "step": 1174 + }, + { + "epoch": 2.3426294820717133, + "grad_norm": 1.2761598825454712, + "learning_rate": 2.7675315660477342e-06, + "loss": 0.6551219820976257, + "step": 1176 + }, + { + "epoch": 2.346613545816733, + "grad_norm": 0.5829970836639404, + "learning_rate": 2.7588132159412153e-06, + "loss": 0.8633916974067688, + "step": 1178 + }, + { + "epoch": 2.350597609561753, + "grad_norm": 0.8791594505310059, + "learning_rate": 2.7501007407977554e-06, + "loss": 0.8312200903892517, + "step": 1180 + }, + { + "epoch": 2.354581673306773, + "grad_norm": 0.8145209550857544, + "learning_rate": 2.7413942351973994e-06, + "loss": 0.8451777696609497, + "step": 1182 + }, + { + "epoch": 2.358565737051793, + "grad_norm": 0.8338920474052429, + "learning_rate": 2.7326937936553845e-06, + "loss": 0.9415311813354492, + "step": 1184 + }, + { + "epoch": 2.362549800796813, + "grad_norm": 0.9346828460693359, + "learning_rate": 2.7239995106211244e-06, + "loss": 0.8471455574035645, + "step": 1186 + }, + { + "epoch": 2.366533864541833, + "grad_norm": 1.4322340488433838, + "learning_rate": 2.715311480477173e-06, + "loss": 0.30060604214668274, + "step": 1188 + }, + { + "epoch": 2.3705179282868527, + "grad_norm": 1.1024688482284546, + "learning_rate": 2.7066297975382065e-06, + "loss": 0.7530568838119507, + "step": 1190 + }, + { + "epoch": 2.3745019920318726, + "grad_norm": 0.5967240333557129, + "learning_rate": 2.697954556049997e-06, + "loss": 0.867277204990387, + "step": 1192 + }, + { + "epoch": 2.3784860557768925, + "grad_norm": 0.9026405811309814, + "learning_rate": 2.689285850188391e-06, + "loss": 0.9335858225822449, + "step": 1194 + }, + { + "epoch": 2.3824701195219125, + "grad_norm": 0.48514679074287415, + "learning_rate": 2.6806237740582855e-06, + "loss": 0.2793917655944824, + "step": 1196 + }, + { + "epoch": 2.3864541832669324, + "grad_norm": 2.9039154052734375, + "learning_rate": 2.671968421692607e-06, + "loss": 1.4733071327209473, + "step": 1198 + }, + { + "epoch": 2.3904382470119523, + "grad_norm": 3.6072850227355957, + "learning_rate": 2.6633198870512927e-06, + "loss": 0.3655731976032257, + "step": 1200 + }, + { + "epoch": 2.394422310756972, + "grad_norm": 0.6584874391555786, + "learning_rate": 2.6546782640202666e-06, + "loss": 0.8660189509391785, + "step": 1202 + }, + { + "epoch": 2.398406374501992, + "grad_norm": 0.5407839417457581, + "learning_rate": 2.6460436464104216e-06, + "loss": 0.848800003528595, + "step": 1204 + }, + { + "epoch": 2.402390438247012, + "grad_norm": 1.0635416507720947, + "learning_rate": 2.6374161279566035e-06, + "loss": 0.9516815543174744, + "step": 1206 + }, + { + "epoch": 2.406374501992032, + "grad_norm": 0.41980046033859253, + "learning_rate": 2.628795802316591e-06, + "loss": 0.120535708963871, + "step": 1208 + }, + { + "epoch": 2.410358565737052, + "grad_norm": 0.3191829323768616, + "learning_rate": 2.620182763070081e-06, + "loss": 0.023226367309689522, + "step": 1210 + }, + { + "epoch": 2.414342629482072, + "grad_norm": 1.4996663331985474, + "learning_rate": 2.61157710371767e-06, + "loss": 0.45069432258605957, + "step": 1212 + }, + { + "epoch": 2.4183266932270917, + "grad_norm": 1.0962636470794678, + "learning_rate": 2.6029789176798417e-06, + "loss": 0.6983217000961304, + "step": 1214 + }, + { + "epoch": 2.4223107569721116, + "grad_norm": 0.8529632091522217, + "learning_rate": 2.594388298295949e-06, + "loss": 0.17169800400733948, + "step": 1216 + }, + { + "epoch": 2.4262948207171315, + "grad_norm": 0.9947030544281006, + "learning_rate": 2.585805338823208e-06, + "loss": 0.8718166947364807, + "step": 1218 + }, + { + "epoch": 2.4302788844621515, + "grad_norm": 0.39905738830566406, + "learning_rate": 2.577230132435678e-06, + "loss": 0.5236790776252747, + "step": 1220 + }, + { + "epoch": 2.4342629482071714, + "grad_norm": 1.6986416578292847, + "learning_rate": 2.5686627722232518e-06, + "loss": 0.4206949770450592, + "step": 1222 + }, + { + "epoch": 2.4382470119521913, + "grad_norm": 0.8914661407470703, + "learning_rate": 2.560103351190651e-06, + "loss": 0.8530100584030151, + "step": 1224 + }, + { + "epoch": 2.442231075697211, + "grad_norm": 1.940697193145752, + "learning_rate": 2.5515519622564086e-06, + "loss": 0.03098766878247261, + "step": 1226 + }, + { + "epoch": 2.446215139442231, + "grad_norm": 0.740294873714447, + "learning_rate": 2.543008698251863e-06, + "loss": 0.8904476165771484, + "step": 1228 + }, + { + "epoch": 2.450199203187251, + "grad_norm": 1.2256784439086914, + "learning_rate": 2.534473651920153e-06, + "loss": 0.6660670042037964, + "step": 1230 + }, + { + "epoch": 2.454183266932271, + "grad_norm": 1.3577665090560913, + "learning_rate": 2.5259469159152063e-06, + "loss": 0.8957257270812988, + "step": 1232 + }, + { + "epoch": 2.458167330677291, + "grad_norm": 5.5895209312438965, + "learning_rate": 2.5174285828007387e-06, + "loss": 0.4879809319972992, + "step": 1234 + }, + { + "epoch": 2.462151394422311, + "grad_norm": 1.602962851524353, + "learning_rate": 2.5089187450492464e-06, + "loss": 0.8527651429176331, + "step": 1236 + }, + { + "epoch": 2.4661354581673307, + "grad_norm": 1.6139048337936401, + "learning_rate": 2.5004174950409996e-06, + "loss": 0.814254641532898, + "step": 1238 + }, + { + "epoch": 2.4701195219123506, + "grad_norm": 2.1591413021087646, + "learning_rate": 2.4919249250630463e-06, + "loss": 0.620861828327179, + "step": 1240 + }, + { + "epoch": 2.4741035856573705, + "grad_norm": 2.2499430179595947, + "learning_rate": 2.483441127308202e-06, + "loss": 0.622882068157196, + "step": 1242 + }, + { + "epoch": 2.4780876494023905, + "grad_norm": 0.8735558390617371, + "learning_rate": 2.47496619387406e-06, + "loss": 0.8819273114204407, + "step": 1244 + }, + { + "epoch": 2.4820717131474104, + "grad_norm": 1.0973459482192993, + "learning_rate": 2.4665002167619798e-06, + "loss": 0.85080885887146, + "step": 1246 + }, + { + "epoch": 2.4860557768924303, + "grad_norm": 1.19606351852417, + "learning_rate": 2.4580432878760968e-06, + "loss": 0.5080418586730957, + "step": 1248 + }, + { + "epoch": 2.49003984063745, + "grad_norm": 0.36084145307540894, + "learning_rate": 2.449595499022318e-06, + "loss": 0.3111553192138672, + "step": 1250 + }, + { + "epoch": 2.49402390438247, + "grad_norm": 0.7546538710594177, + "learning_rate": 2.441156941907333e-06, + "loss": 0.6624001264572144, + "step": 1252 + }, + { + "epoch": 2.49800796812749, + "grad_norm": 0.7720620632171631, + "learning_rate": 2.432727708137612e-06, + "loss": 0.7852078676223755, + "step": 1254 + }, + { + "epoch": 2.50199203187251, + "grad_norm": 2.640068292617798, + "learning_rate": 2.424307889218414e-06, + "loss": 0.9888243079185486, + "step": 1256 + }, + { + "epoch": 2.50597609561753, + "grad_norm": 0.47891512513160706, + "learning_rate": 2.415897576552795e-06, + "loss": 0.11806351691484451, + "step": 1258 + }, + { + "epoch": 2.50996015936255, + "grad_norm": 1.773125171661377, + "learning_rate": 2.407496861440611e-06, + "loss": 0.712026834487915, + "step": 1260 + }, + { + "epoch": 2.5139442231075697, + "grad_norm": 0.8916162848472595, + "learning_rate": 2.3991058350775316e-06, + "loss": 0.27510854601860046, + "step": 1262 + }, + { + "epoch": 2.5179282868525896, + "grad_norm": 2.915144205093384, + "learning_rate": 2.3907245885540473e-06, + "loss": 0.5907682180404663, + "step": 1264 + }, + { + "epoch": 2.5219123505976095, + "grad_norm": 0.7523391842842102, + "learning_rate": 2.382353212854483e-06, + "loss": 0.875799298286438, + "step": 1266 + }, + { + "epoch": 2.5258964143426295, + "grad_norm": 0.7640947699546814, + "learning_rate": 2.373991798856008e-06, + "loss": 0.8100597858428955, + "step": 1268 + }, + { + "epoch": 2.5298804780876494, + "grad_norm": 0.9602063894271851, + "learning_rate": 2.3656404373276496e-06, + "loss": 0.8617823719978333, + "step": 1270 + }, + { + "epoch": 2.5338645418326693, + "grad_norm": 1.0857386589050293, + "learning_rate": 2.35729921892931e-06, + "loss": 0.7695320248603821, + "step": 1272 + }, + { + "epoch": 2.537848605577689, + "grad_norm": 2.655921220779419, + "learning_rate": 2.3489682342107787e-06, + "loss": 1.0393037796020508, + "step": 1274 + }, + { + "epoch": 2.541832669322709, + "grad_norm": 1.602705478668213, + "learning_rate": 2.3406475736107537e-06, + "loss": 0.8128276467323303, + "step": 1276 + }, + { + "epoch": 2.545816733067729, + "grad_norm": 1.7629623413085938, + "learning_rate": 2.332337327455856e-06, + "loss": 0.8416529893875122, + "step": 1278 + }, + { + "epoch": 2.549800796812749, + "grad_norm": 0.3072420656681061, + "learning_rate": 2.3240375859596493e-06, + "loss": 0.21107147634029388, + "step": 1280 + }, + { + "epoch": 2.553784860557769, + "grad_norm": 0.7584460973739624, + "learning_rate": 2.3157484392216645e-06, + "loss": 0.7613718509674072, + "step": 1282 + }, + { + "epoch": 2.557768924302789, + "grad_norm": 0.7467636466026306, + "learning_rate": 2.3074699772264184e-06, + "loss": 0.9068883657455444, + "step": 1284 + }, + { + "epoch": 2.5617529880478087, + "grad_norm": 2.827934503555298, + "learning_rate": 2.2992022898424358e-06, + "loss": 0.9814170002937317, + "step": 1286 + }, + { + "epoch": 2.5657370517928286, + "grad_norm": 0.6314749717712402, + "learning_rate": 2.2909454668212763e-06, + "loss": 0.9777659177780151, + "step": 1288 + }, + { + "epoch": 2.5697211155378485, + "grad_norm": 1.5785683393478394, + "learning_rate": 2.2826995977965586e-06, + "loss": 0.14857736229896545, + "step": 1290 + }, + { + "epoch": 2.5737051792828685, + "grad_norm": 0.8036978244781494, + "learning_rate": 2.27446477228299e-06, + "loss": 0.9405508041381836, + "step": 1292 + }, + { + "epoch": 2.5776892430278884, + "grad_norm": 0.7155508399009705, + "learning_rate": 2.2662410796753924e-06, + "loss": 0.8522077202796936, + "step": 1294 + }, + { + "epoch": 2.5816733067729083, + "grad_norm": 1.1586476564407349, + "learning_rate": 2.2580286092477285e-06, + "loss": 0.8515244722366333, + "step": 1296 + }, + { + "epoch": 2.585657370517928, + "grad_norm": 1.105276346206665, + "learning_rate": 2.2498274501521414e-06, + "loss": 0.8348259925842285, + "step": 1298 + }, + { + "epoch": 2.589641434262948, + "grad_norm": 0.5298115611076355, + "learning_rate": 2.2416376914179776e-06, + "loss": 0.37851282954216003, + "step": 1300 + }, + { + "epoch": 2.593625498007968, + "grad_norm": 0.8865681290626526, + "learning_rate": 2.2334594219508283e-06, + "loss": 0.493791401386261, + "step": 1302 + }, + { + "epoch": 2.597609561752988, + "grad_norm": 0.8937894105911255, + "learning_rate": 2.2252927305315587e-06, + "loss": 0.768490731716156, + "step": 1304 + }, + { + "epoch": 2.601593625498008, + "grad_norm": 2.249807119369507, + "learning_rate": 2.2171377058153465e-06, + "loss": 0.28239089250564575, + "step": 1306 + }, + { + "epoch": 2.605577689243028, + "grad_norm": 0.7723252773284912, + "learning_rate": 2.2089944363307165e-06, + "loss": 0.8856875896453857, + "step": 1308 + }, + { + "epoch": 2.6095617529880477, + "grad_norm": 0.43645548820495605, + "learning_rate": 2.2008630104785874e-06, + "loss": 0.352665513753891, + "step": 1310 + }, + { + "epoch": 2.6135458167330676, + "grad_norm": 2.615204095840454, + "learning_rate": 2.1927435165313036e-06, + "loss": 0.1691545695066452, + "step": 1312 + }, + { + "epoch": 2.6175298804780875, + "grad_norm": 0.7458433508872986, + "learning_rate": 2.184636042631679e-06, + "loss": 0.06585448980331421, + "step": 1314 + }, + { + "epoch": 2.6215139442231075, + "grad_norm": 1.3437604904174805, + "learning_rate": 2.176540676792046e-06, + "loss": 0.956698477268219, + "step": 1316 + }, + { + "epoch": 2.6254980079681274, + "grad_norm": 2.3479928970336914, + "learning_rate": 2.168457506893292e-06, + "loss": 0.669885516166687, + "step": 1318 + }, + { + "epoch": 2.6294820717131473, + "grad_norm": 0.6726356744766235, + "learning_rate": 2.1603866206839074e-06, + "loss": 0.9108378887176514, + "step": 1320 + }, + { + "epoch": 2.633466135458167, + "grad_norm": 0.6728199124336243, + "learning_rate": 2.152328105779041e-06, + "loss": 0.46163687109947205, + "step": 1322 + }, + { + "epoch": 2.637450199203187, + "grad_norm": 3.6970763206481934, + "learning_rate": 2.1442820496595337e-06, + "loss": 1.0799225568771362, + "step": 1324 + }, + { + "epoch": 2.641434262948207, + "grad_norm": 2.347198009490967, + "learning_rate": 2.1362485396709847e-06, + "loss": 0.2297479808330536, + "step": 1326 + }, + { + "epoch": 2.645418326693227, + "grad_norm": 1.014694094657898, + "learning_rate": 2.128227663022794e-06, + "loss": 0.7543836832046509, + "step": 1328 + }, + { + "epoch": 2.649402390438247, + "grad_norm": 1.9803884029388428, + "learning_rate": 2.1202195067872153e-06, + "loss": 0.8650748133659363, + "step": 1330 + }, + { + "epoch": 2.653386454183267, + "grad_norm": 1.038819432258606, + "learning_rate": 2.112224157898416e-06, + "loss": 0.7467201352119446, + "step": 1332 + }, + { + "epoch": 2.6573705179282867, + "grad_norm": 4.248292922973633, + "learning_rate": 2.1042417031515303e-06, + "loss": 1.0267494916915894, + "step": 1334 + }, + { + "epoch": 2.6613545816733066, + "grad_norm": 0.40952640771865845, + "learning_rate": 2.096272229201716e-06, + "loss": 0.06949189305305481, + "step": 1336 + }, + { + "epoch": 2.6653386454183265, + "grad_norm": 1.2858881950378418, + "learning_rate": 2.0883158225632168e-06, + "loss": 0.9944968223571777, + "step": 1338 + }, + { + "epoch": 2.6693227091633465, + "grad_norm": 1.2663077116012573, + "learning_rate": 2.0803725696084224e-06, + "loss": 0.32381299138069153, + "step": 1340 + }, + { + "epoch": 2.6733067729083664, + "grad_norm": 2.5092110633850098, + "learning_rate": 2.072442556566928e-06, + "loss": 0.5067175626754761, + "step": 1342 + }, + { + "epoch": 2.6772908366533863, + "grad_norm": 0.4816880226135254, + "learning_rate": 2.0645258695245993e-06, + "loss": 0.06836852431297302, + "step": 1344 + }, + { + "epoch": 2.681274900398406, + "grad_norm": 0.8811363577842712, + "learning_rate": 2.0566225944226414e-06, + "loss": 0.8118082284927368, + "step": 1346 + }, + { + "epoch": 2.685258964143426, + "grad_norm": 0.7595816850662231, + "learning_rate": 2.0487328170566643e-06, + "loss": 0.833029568195343, + "step": 1348 + }, + { + "epoch": 2.6892430278884465, + "grad_norm": 0.9555457830429077, + "learning_rate": 2.0408566230757465e-06, + "loss": 0.8837859034538269, + "step": 1350 + }, + { + "epoch": 2.6932270916334664, + "grad_norm": 2.7736618518829346, + "learning_rate": 2.0329940979815116e-06, + "loss": 0.3744777739048004, + "step": 1352 + }, + { + "epoch": 2.6972111553784863, + "grad_norm": 1.4651148319244385, + "learning_rate": 2.0251453271272e-06, + "loss": 0.3069399297237396, + "step": 1354 + }, + { + "epoch": 2.7011952191235062, + "grad_norm": 1.0298899412155151, + "learning_rate": 2.0173103957167367e-06, + "loss": 0.8419727087020874, + "step": 1356 + }, + { + "epoch": 2.705179282868526, + "grad_norm": 1.365960955619812, + "learning_rate": 2.009489388803809e-06, + "loss": 0.8394007682800293, + "step": 1358 + }, + { + "epoch": 2.709163346613546, + "grad_norm": 0.9906344413757324, + "learning_rate": 2.0016823912909486e-06, + "loss": 0.8413975238800049, + "step": 1360 + }, + { + "epoch": 2.713147410358566, + "grad_norm": 0.6724693775177002, + "learning_rate": 1.9938894879286024e-06, + "loss": 0.8469905853271484, + "step": 1362 + }, + { + "epoch": 2.717131474103586, + "grad_norm": 1.9248793125152588, + "learning_rate": 1.9861107633142155e-06, + "loss": 0.8509299755096436, + "step": 1364 + }, + { + "epoch": 2.721115537848606, + "grad_norm": 1.4797543287277222, + "learning_rate": 1.978346301891312e-06, + "loss": 0.35483643412590027, + "step": 1366 + }, + { + "epoch": 2.7250996015936257, + "grad_norm": 0.8299886584281921, + "learning_rate": 1.9705961879485813e-06, + "loss": 0.8987928628921509, + "step": 1368 + }, + { + "epoch": 2.7290836653386457, + "grad_norm": 1.4776321649551392, + "learning_rate": 1.962860505618958e-06, + "loss": 0.6491652131080627, + "step": 1370 + }, + { + "epoch": 2.7330677290836656, + "grad_norm": 6.724909782409668, + "learning_rate": 1.955139338878714e-06, + "loss": 0.19401389360427856, + "step": 1372 + }, + { + "epoch": 2.7370517928286855, + "grad_norm": 0.943676233291626, + "learning_rate": 1.9474327715465444e-06, + "loss": 0.8299869894981384, + "step": 1374 + }, + { + "epoch": 2.7410358565737054, + "grad_norm": 1.2990317344665527, + "learning_rate": 1.9397408872826545e-06, + "loss": 0.871895968914032, + "step": 1376 + }, + { + "epoch": 2.7450199203187253, + "grad_norm": 1.9206279516220093, + "learning_rate": 1.9320637695878555e-06, + "loss": 0.30201855301856995, + "step": 1378 + }, + { + "epoch": 2.7490039840637452, + "grad_norm": 0.7692667841911316, + "learning_rate": 1.924401501802659e-06, + "loss": 0.6371020078659058, + "step": 1380 + }, + { + "epoch": 2.752988047808765, + "grad_norm": 0.8262352347373962, + "learning_rate": 1.9167541671063703e-06, + "loss": 0.9497525691986084, + "step": 1382 + }, + { + "epoch": 2.756972111553785, + "grad_norm": 1.0128363370895386, + "learning_rate": 1.9091218485161824e-06, + "loss": 0.9976522922515869, + "step": 1384 + }, + { + "epoch": 2.760956175298805, + "grad_norm": 0.8022831082344055, + "learning_rate": 1.9015046288862815e-06, + "loss": 0.8430491089820862, + "step": 1386 + }, + { + "epoch": 2.764940239043825, + "grad_norm": 1.4386292695999146, + "learning_rate": 1.893902590906943e-06, + "loss": 0.6075490117073059, + "step": 1388 + }, + { + "epoch": 2.768924302788845, + "grad_norm": 1.3775461912155151, + "learning_rate": 1.8863158171036336e-06, + "loss": 0.12825116515159607, + "step": 1390 + }, + { + "epoch": 2.7729083665338647, + "grad_norm": 1.3699278831481934, + "learning_rate": 1.8787443898361158e-06, + "loss": 1.1316020488739014, + "step": 1392 + }, + { + "epoch": 2.7768924302788847, + "grad_norm": 0.8569239377975464, + "learning_rate": 1.8711883912975575e-06, + "loss": 0.655997633934021, + "step": 1394 + }, + { + "epoch": 2.7808764940239046, + "grad_norm": 0.7035950422286987, + "learning_rate": 1.8636479035136368e-06, + "loss": 0.8871821165084839, + "step": 1396 + }, + { + "epoch": 2.7848605577689245, + "grad_norm": 0.7683161497116089, + "learning_rate": 1.8561230083416488e-06, + "loss": 0.9570977687835693, + "step": 1398 + }, + { + "epoch": 2.7888446215139444, + "grad_norm": 0.8087801337242126, + "learning_rate": 1.8486137874696223e-06, + "loss": 0.8703477382659912, + "step": 1400 + }, + { + "epoch": 2.7928286852589643, + "grad_norm": 0.9088819622993469, + "learning_rate": 1.8411203224154289e-06, + "loss": 0.8619301915168762, + "step": 1402 + }, + { + "epoch": 2.7968127490039842, + "grad_norm": 0.3485574424266815, + "learning_rate": 1.833642694525902e-06, + "loss": 0.13462619483470917, + "step": 1404 + }, + { + "epoch": 2.800796812749004, + "grad_norm": 0.9604331851005554, + "learning_rate": 1.826180984975948e-06, + "loss": 0.8676316142082214, + "step": 1406 + }, + { + "epoch": 2.804780876494024, + "grad_norm": 1.302273154258728, + "learning_rate": 1.8187352747676718e-06, + "loss": 1.241036295890808, + "step": 1408 + }, + { + "epoch": 2.808764940239044, + "grad_norm": 1.2466564178466797, + "learning_rate": 1.8113056447294936e-06, + "loss": 1.0569744110107422, + "step": 1410 + }, + { + "epoch": 2.812749003984064, + "grad_norm": 0.9512035846710205, + "learning_rate": 1.8038921755152704e-06, + "loss": 0.8206438422203064, + "step": 1412 + }, + { + "epoch": 2.816733067729084, + "grad_norm": 1.0051904916763306, + "learning_rate": 1.7964949476034223e-06, + "loss": 0.9369583129882812, + "step": 1414 + }, + { + "epoch": 2.8207171314741037, + "grad_norm": 3.8374409675598145, + "learning_rate": 1.7891140412960615e-06, + "loss": 1.116792917251587, + "step": 1416 + }, + { + "epoch": 2.8247011952191237, + "grad_norm": 1.1146875619888306, + "learning_rate": 1.7817495367181132e-06, + "loss": 0.8257051110267639, + "step": 1418 + }, + { + "epoch": 2.8286852589641436, + "grad_norm": 0.2130766063928604, + "learning_rate": 1.774401513816454e-06, + "loss": 0.08374066650867462, + "step": 1420 + }, + { + "epoch": 2.8326693227091635, + "grad_norm": 0.8484716415405273, + "learning_rate": 1.76707005235904e-06, + "loss": 0.9364421963691711, + "step": 1422 + }, + { + "epoch": 2.8366533864541834, + "grad_norm": 0.7365440130233765, + "learning_rate": 1.759755231934039e-06, + "loss": 0.9269137978553772, + "step": 1424 + }, + { + "epoch": 2.8406374501992033, + "grad_norm": 0.9674385190010071, + "learning_rate": 1.7524571319489695e-06, + "loss": 0.24093596637248993, + "step": 1426 + }, + { + "epoch": 2.8446215139442232, + "grad_norm": 0.8217137455940247, + "learning_rate": 1.7451758316298386e-06, + "loss": 0.8590070605278015, + "step": 1428 + }, + { + "epoch": 2.848605577689243, + "grad_norm": 0.818912148475647, + "learning_rate": 1.7379114100202824e-06, + "loss": 0.8883748650550842, + "step": 1430 + }, + { + "epoch": 2.852589641434263, + "grad_norm": 2.239244222640991, + "learning_rate": 1.7306639459807026e-06, + "loss": 0.8789231777191162, + "step": 1432 + }, + { + "epoch": 2.856573705179283, + "grad_norm": 1.3130366802215576, + "learning_rate": 1.7234335181874197e-06, + "loss": 0.41715553402900696, + "step": 1434 + }, + { + "epoch": 2.860557768924303, + "grad_norm": 2.1881866455078125, + "learning_rate": 1.7162202051318092e-06, + "loss": 0.8317433595657349, + "step": 1436 + }, + { + "epoch": 2.864541832669323, + "grad_norm": 0.4997340440750122, + "learning_rate": 1.7090240851194576e-06, + "loss": 0.06248881667852402, + "step": 1438 + }, + { + "epoch": 2.8685258964143427, + "grad_norm": 0.7684650421142578, + "learning_rate": 1.7018452362693062e-06, + "loss": 0.9771674871444702, + "step": 1440 + }, + { + "epoch": 2.8725099601593627, + "grad_norm": 2.6358094215393066, + "learning_rate": 1.694683736512807e-06, + "loss": 0.4274534285068512, + "step": 1442 + }, + { + "epoch": 2.8764940239043826, + "grad_norm": 3.7041735649108887, + "learning_rate": 1.6875396635930767e-06, + "loss": 0.8502193689346313, + "step": 1444 + }, + { + "epoch": 2.8804780876494025, + "grad_norm": 1.7656716108322144, + "learning_rate": 1.6804130950640492e-06, + "loss": 0.2269526571035385, + "step": 1446 + }, + { + "epoch": 2.8844621513944224, + "grad_norm": 0.9704077839851379, + "learning_rate": 1.6733041082896355e-06, + "loss": 0.9017117619514465, + "step": 1448 + }, + { + "epoch": 2.8884462151394423, + "grad_norm": 1.1423131227493286, + "learning_rate": 1.666212780442887e-06, + "loss": 0.7310890555381775, + "step": 1450 + }, + { + "epoch": 2.8924302788844622, + "grad_norm": 0.8818380832672119, + "learning_rate": 1.659139188505152e-06, + "loss": 0.9649314880371094, + "step": 1452 + }, + { + "epoch": 2.896414342629482, + "grad_norm": 0.9627234935760498, + "learning_rate": 1.652083409265246e-06, + "loss": 0.1323651671409607, + "step": 1454 + }, + { + "epoch": 2.900398406374502, + "grad_norm": 0.625633955001831, + "learning_rate": 1.6450455193186137e-06, + "loss": 0.8300275206565857, + "step": 1456 + }, + { + "epoch": 2.904382470119522, + "grad_norm": 1.691175103187561, + "learning_rate": 1.638025595066499e-06, + "loss": 0.7612891793251038, + "step": 1458 + }, + { + "epoch": 2.908366533864542, + "grad_norm": 0.9278882145881653, + "learning_rate": 1.6310237127151137e-06, + "loss": 0.9076191782951355, + "step": 1460 + }, + { + "epoch": 2.912350597609562, + "grad_norm": 2.7954494953155518, + "learning_rate": 1.624039948274815e-06, + "loss": 0.37150129675865173, + "step": 1462 + }, + { + "epoch": 2.9163346613545817, + "grad_norm": 0.423910528421402, + "learning_rate": 1.6170743775592773e-06, + "loss": 0.20058873295783997, + "step": 1464 + }, + { + "epoch": 2.9203187250996017, + "grad_norm": 0.9244667887687683, + "learning_rate": 1.610127076184667e-06, + "loss": 0.8625198602676392, + "step": 1466 + }, + { + "epoch": 2.9243027888446216, + "grad_norm": 0.8803090453147888, + "learning_rate": 1.6031981195688252e-06, + "loss": 0.9291595816612244, + "step": 1468 + }, + { + "epoch": 2.9282868525896415, + "grad_norm": 1.0361244678497314, + "learning_rate": 1.59628758293045e-06, + "loss": 0.23180729150772095, + "step": 1470 + }, + { + "epoch": 2.9322709163346614, + "grad_norm": 5.147000789642334, + "learning_rate": 1.5893955412882733e-06, + "loss": 0.5987867712974548, + "step": 1472 + }, + { + "epoch": 2.9362549800796813, + "grad_norm": 0.5982325673103333, + "learning_rate": 1.582522069460253e-06, + "loss": 0.8363850116729736, + "step": 1474 + }, + { + "epoch": 2.9402390438247012, + "grad_norm": 3.7226884365081787, + "learning_rate": 1.5756672420627596e-06, + "loss": 0.8606371283531189, + "step": 1476 + }, + { + "epoch": 2.944223107569721, + "grad_norm": 1.0484495162963867, + "learning_rate": 1.5688311335097646e-06, + "loss": 0.9633500576019287, + "step": 1478 + }, + { + "epoch": 2.948207171314741, + "grad_norm": 0.7016828656196594, + "learning_rate": 1.5620138180120331e-06, + "loss": 0.8571369647979736, + "step": 1480 + }, + { + "epoch": 2.952191235059761, + "grad_norm": 2.1188414096832275, + "learning_rate": 1.5552153695763156e-06, + "loss": 0.44183531403541565, + "step": 1482 + }, + { + "epoch": 2.956175298804781, + "grad_norm": 2.2254960536956787, + "learning_rate": 1.5484358620045534e-06, + "loss": 0.28760015964508057, + "step": 1484 + }, + { + "epoch": 2.960159362549801, + "grad_norm": 2.748490333557129, + "learning_rate": 1.5416753688930654e-06, + "loss": 0.6493697166442871, + "step": 1486 + }, + { + "epoch": 2.9641434262948207, + "grad_norm": 1.3967127799987793, + "learning_rate": 1.5349339636317584e-06, + "loss": 0.8622140288352966, + "step": 1488 + }, + { + "epoch": 2.9681274900398407, + "grad_norm": 1.959518313407898, + "learning_rate": 1.528211719403328e-06, + "loss": 0.722124457359314, + "step": 1490 + }, + { + "epoch": 2.9721115537848606, + "grad_norm": 1.3386509418487549, + "learning_rate": 1.521508709182461e-06, + "loss": 0.9694193601608276, + "step": 1492 + }, + { + "epoch": 2.9760956175298805, + "grad_norm": 0.9864974617958069, + "learning_rate": 1.514825005735045e-06, + "loss": 0.8088407516479492, + "step": 1494 + }, + { + "epoch": 2.9800796812749004, + "grad_norm": 2.115551471710205, + "learning_rate": 1.5081606816173814e-06, + "loss": 0.12242338061332703, + "step": 1496 + }, + { + "epoch": 2.9840637450199203, + "grad_norm": 0.75198894739151, + "learning_rate": 1.5015158091753958e-06, + "loss": 0.1432493031024933, + "step": 1498 + }, + { + "epoch": 2.9880478087649402, + "grad_norm": 1.4102544784545898, + "learning_rate": 1.4948904605438477e-06, + "loss": 0.0790117010474205, + "step": 1500 + }, + { + "epoch": 2.99203187250996, + "grad_norm": 0.6461302638053894, + "learning_rate": 1.488284707645557e-06, + "loss": 0.7927932739257812, + "step": 1502 + }, + { + "epoch": 2.99601593625498, + "grad_norm": 0.9944819211959839, + "learning_rate": 1.4816986221906159e-06, + "loss": 0.8774588704109192, + "step": 1504 + }, + { + "epoch": 3.0, + "grad_norm": 2.3869407176971436, + "learning_rate": 1.4751322756756127e-06, + "loss": 0.23395386338233948, + "step": 1506 + }, + { + "epoch": 3.00398406374502, + "grad_norm": 0.6929567456245422, + "learning_rate": 1.4685857393828543e-06, + "loss": 0.6813750267028809, + "step": 1508 + }, + { + "epoch": 3.00796812749004, + "grad_norm": 1.4428455829620361, + "learning_rate": 1.4620590843795967e-06, + "loss": 0.27471280097961426, + "step": 1510 + }, + { + "epoch": 3.0119521912350598, + "grad_norm": 1.1208453178405762, + "learning_rate": 1.4555523815172693e-06, + "loss": 0.7926130294799805, + "step": 1512 + }, + { + "epoch": 3.0159362549800797, + "grad_norm": 1.4112131595611572, + "learning_rate": 1.449065701430705e-06, + "loss": 0.3855717182159424, + "step": 1514 + }, + { + "epoch": 3.0199203187250996, + "grad_norm": 7.652811527252197, + "learning_rate": 1.4425991145373788e-06, + "loss": 0.1316222846508026, + "step": 1516 + }, + { + "epoch": 3.0239043824701195, + "grad_norm": 1.6621893644332886, + "learning_rate": 1.4361526910366368e-06, + "loss": 0.2520155906677246, + "step": 1518 + }, + { + "epoch": 3.0278884462151394, + "grad_norm": 0.8125709891319275, + "learning_rate": 1.4297265009089397e-06, + "loss": 0.7272902727127075, + "step": 1520 + }, + { + "epoch": 3.0318725099601593, + "grad_norm": 1.4255092144012451, + "learning_rate": 1.423320613915099e-06, + "loss": 0.5655202865600586, + "step": 1522 + }, + { + "epoch": 3.0358565737051793, + "grad_norm": 1.9694007635116577, + "learning_rate": 1.416935099595522e-06, + "loss": 0.21059830486774445, + "step": 1524 + }, + { + "epoch": 3.039840637450199, + "grad_norm": 0.7592612504959106, + "learning_rate": 1.4105700272694578e-06, + "loss": 0.6575446724891663, + "step": 1526 + }, + { + "epoch": 3.043824701195219, + "grad_norm": 1.133392572402954, + "learning_rate": 1.4042254660342408e-06, + "loss": 0.9429333209991455, + "step": 1528 + }, + { + "epoch": 3.047808764940239, + "grad_norm": 1.231631875038147, + "learning_rate": 1.3979014847645435e-06, + "loss": 0.2242284119129181, + "step": 1530 + }, + { + "epoch": 3.051792828685259, + "grad_norm": 1.1999961137771606, + "learning_rate": 1.391598152111631e-06, + "loss": 0.15949700772762299, + "step": 1532 + }, + { + "epoch": 3.055776892430279, + "grad_norm": 1.6939618587493896, + "learning_rate": 1.385315536502609e-06, + "loss": 0.21413640677928925, + "step": 1534 + }, + { + "epoch": 3.0597609561752988, + "grad_norm": 1.3219988346099854, + "learning_rate": 1.3790537061396887e-06, + "loss": 0.6202045679092407, + "step": 1536 + }, + { + "epoch": 3.0637450199203187, + "grad_norm": 0.998444676399231, + "learning_rate": 1.372812728999442e-06, + "loss": 0.7671471238136292, + "step": 1538 + }, + { + "epoch": 3.0677290836653386, + "grad_norm": 1.4698975086212158, + "learning_rate": 1.3665926728320632e-06, + "loss": 0.47750726342201233, + "step": 1540 + }, + { + "epoch": 3.0717131474103585, + "grad_norm": 0.9587137699127197, + "learning_rate": 1.3603936051606346e-06, + "loss": 0.7269394397735596, + "step": 1542 + }, + { + "epoch": 3.0756972111553784, + "grad_norm": 2.3286054134368896, + "learning_rate": 1.3542155932803954e-06, + "loss": 0.7805855870246887, + "step": 1544 + }, + { + "epoch": 3.0796812749003983, + "grad_norm": 0.7439804077148438, + "learning_rate": 1.3480587042580092e-06, + "loss": 0.6787388324737549, + "step": 1546 + }, + { + "epoch": 3.0836653386454183, + "grad_norm": 1.8882228136062622, + "learning_rate": 1.3419230049308333e-06, + "loss": 0.6134771108627319, + "step": 1548 + }, + { + "epoch": 3.087649402390438, + "grad_norm": 1.0494561195373535, + "learning_rate": 1.3358085619062003e-06, + "loss": 0.7737662196159363, + "step": 1550 + }, + { + "epoch": 3.091633466135458, + "grad_norm": 0.31838488578796387, + "learning_rate": 1.3297154415606864e-06, + "loss": 0.034840308129787445, + "step": 1552 + }, + { + "epoch": 3.095617529880478, + "grad_norm": 1.5378990173339844, + "learning_rate": 1.3236437100393992e-06, + "loss": 0.21899044513702393, + "step": 1554 + }, + { + "epoch": 3.099601593625498, + "grad_norm": 0.9580462574958801, + "learning_rate": 1.3175934332552511e-06, + "loss": 0.635277271270752, + "step": 1556 + }, + { + "epoch": 3.103585657370518, + "grad_norm": 1.2689288854599, + "learning_rate": 1.3115646768882522e-06, + "loss": 0.6710810661315918, + "step": 1558 + }, + { + "epoch": 3.1075697211155378, + "grad_norm": 0.9133360385894775, + "learning_rate": 1.3055575063847923e-06, + "loss": 0.7197314500808716, + "step": 1560 + }, + { + "epoch": 3.1115537848605577, + "grad_norm": 3.067455768585205, + "learning_rate": 1.29957198695693e-06, + "loss": 0.21895435452461243, + "step": 1562 + }, + { + "epoch": 3.1155378486055776, + "grad_norm": 0.27349138259887695, + "learning_rate": 1.2936081835816867e-06, + "loss": 0.19600287079811096, + "step": 1564 + }, + { + "epoch": 3.1195219123505975, + "grad_norm": 1.1419686079025269, + "learning_rate": 1.2876661610003428e-06, + "loss": 0.7878577709197998, + "step": 1566 + }, + { + "epoch": 3.1235059760956174, + "grad_norm": 1.1395351886749268, + "learning_rate": 1.2817459837177298e-06, + "loss": 0.7802326679229736, + "step": 1568 + }, + { + "epoch": 3.1274900398406373, + "grad_norm": 1.9237797260284424, + "learning_rate": 1.2758477160015355e-06, + "loss": 0.5069929361343384, + "step": 1570 + }, + { + "epoch": 3.1314741035856573, + "grad_norm": 0.7889575958251953, + "learning_rate": 1.2699714218816036e-06, + "loss": 0.6714158654212952, + "step": 1572 + }, + { + "epoch": 3.135458167330677, + "grad_norm": 0.9449037313461304, + "learning_rate": 1.2641171651492383e-06, + "loss": 0.6565294861793518, + "step": 1574 + }, + { + "epoch": 3.139442231075697, + "grad_norm": 1.7222603559494019, + "learning_rate": 1.2582850093565115e-06, + "loss": 0.2423674762248993, + "step": 1576 + }, + { + "epoch": 3.143426294820717, + "grad_norm": 0.8361628651618958, + "learning_rate": 1.2524750178155762e-06, + "loss": 0.6483781933784485, + "step": 1578 + }, + { + "epoch": 3.147410358565737, + "grad_norm": 0.4106227159500122, + "learning_rate": 1.2466872535979755e-06, + "loss": 0.06941226869821548, + "step": 1580 + }, + { + "epoch": 3.151394422310757, + "grad_norm": 1.131303071975708, + "learning_rate": 1.2409217795339592e-06, + "loss": 0.6722179651260376, + "step": 1582 + }, + { + "epoch": 3.1553784860557768, + "grad_norm": 1.3526575565338135, + "learning_rate": 1.2351786582118018e-06, + "loss": 0.37432199716567993, + "step": 1584 + }, + { + "epoch": 3.1593625498007967, + "grad_norm": 1.5046707391738892, + "learning_rate": 1.2294579519771246e-06, + "loss": 0.36908501386642456, + "step": 1586 + }, + { + "epoch": 3.1633466135458166, + "grad_norm": 0.14365744590759277, + "learning_rate": 1.2237597229322155e-06, + "loss": 0.01732539013028145, + "step": 1588 + }, + { + "epoch": 3.1673306772908365, + "grad_norm": 0.7536062598228455, + "learning_rate": 1.2180840329353564e-06, + "loss": 0.2823001444339752, + "step": 1590 + }, + { + "epoch": 3.1713147410358564, + "grad_norm": 6.318256855010986, + "learning_rate": 1.2124309436001533e-06, + "loss": 0.5411125421524048, + "step": 1592 + }, + { + "epoch": 3.1752988047808763, + "grad_norm": 1.1654754877090454, + "learning_rate": 1.2068005162948668e-06, + "loss": 0.7602944374084473, + "step": 1594 + }, + { + "epoch": 3.1792828685258963, + "grad_norm": 2.5576841831207275, + "learning_rate": 1.2011928121417431e-06, + "loss": 0.1262691169977188, + "step": 1596 + }, + { + "epoch": 3.183266932270916, + "grad_norm": 1.2924350500106812, + "learning_rate": 1.195607892016354e-06, + "loss": 0.6975268721580505, + "step": 1598 + }, + { + "epoch": 3.187250996015936, + "grad_norm": 2.0278656482696533, + "learning_rate": 1.1900458165469345e-06, + "loss": 0.5072341561317444, + "step": 1600 + }, + { + "epoch": 3.191235059760956, + "grad_norm": 2.13330078125, + "learning_rate": 1.184506646113724e-06, + "loss": 0.7287152409553528, + "step": 1602 + }, + { + "epoch": 3.195219123505976, + "grad_norm": 0.19735604524612427, + "learning_rate": 1.1789904408483123e-06, + "loss": 0.20490704476833344, + "step": 1604 + }, + { + "epoch": 3.199203187250996, + "grad_norm": 2.342869997024536, + "learning_rate": 1.1734972606329874e-06, + "loss": 0.6201443076133728, + "step": 1606 + }, + { + "epoch": 3.2031872509960158, + "grad_norm": 1.9951808452606201, + "learning_rate": 1.1680271651000819e-06, + "loss": 0.2740911543369293, + "step": 1608 + }, + { + "epoch": 3.2071713147410357, + "grad_norm": 1.075411319732666, + "learning_rate": 1.162580213631328e-06, + "loss": 0.6568232774734497, + "step": 1610 + }, + { + "epoch": 3.2111553784860556, + "grad_norm": 2.3391730785369873, + "learning_rate": 1.1571564653572148e-06, + "loss": 1.0995919704437256, + "step": 1612 + }, + { + "epoch": 3.2151394422310755, + "grad_norm": 0.11555808782577515, + "learning_rate": 1.1517559791563439e-06, + "loss": 0.003191891126334667, + "step": 1614 + }, + { + "epoch": 3.2191235059760954, + "grad_norm": 2.371424674987793, + "learning_rate": 1.1463788136547887e-06, + "loss": 0.396582692861557, + "step": 1616 + }, + { + "epoch": 3.2231075697211153, + "grad_norm": 1.8076469898223877, + "learning_rate": 1.141025027225463e-06, + "loss": 0.3241533637046814, + "step": 1618 + }, + { + "epoch": 3.2270916334661353, + "grad_norm": 0.9942080974578857, + "learning_rate": 1.1356946779874825e-06, + "loss": 0.6740264296531677, + "step": 1620 + }, + { + "epoch": 3.231075697211155, + "grad_norm": 1.624965786933899, + "learning_rate": 1.1303878238055357e-06, + "loss": 0.44572022557258606, + "step": 1622 + }, + { + "epoch": 3.235059760956175, + "grad_norm": 1.6572600603103638, + "learning_rate": 1.1251045222892553e-06, + "loss": 0.21951913833618164, + "step": 1624 + }, + { + "epoch": 3.239043824701195, + "grad_norm": 1.5844409465789795, + "learning_rate": 1.119844830792595e-06, + "loss": 0.7072573900222778, + "step": 1626 + }, + { + "epoch": 3.243027888446215, + "grad_norm": 1.0160541534423828, + "learning_rate": 1.1146088064132052e-06, + "loss": 0.6218189001083374, + "step": 1628 + }, + { + "epoch": 3.247011952191235, + "grad_norm": 0.6660611033439636, + "learning_rate": 1.10939650599181e-06, + "loss": 0.15160006284713745, + "step": 1630 + }, + { + "epoch": 3.2509960159362548, + "grad_norm": 4.854979038238525, + "learning_rate": 1.1042079861115967e-06, + "loss": 0.4013654887676239, + "step": 1632 + }, + { + "epoch": 3.2549800796812747, + "grad_norm": 1.7456501722335815, + "learning_rate": 1.099043303097596e-06, + "loss": 0.6942977905273438, + "step": 1634 + }, + { + "epoch": 3.2589641434262946, + "grad_norm": 0.6688535809516907, + "learning_rate": 1.0939025130160743e-06, + "loss": 0.7660707831382751, + "step": 1636 + }, + { + "epoch": 3.2629482071713145, + "grad_norm": 1.3489729166030884, + "learning_rate": 1.088785671673921e-06, + "loss": 0.4087866544723511, + "step": 1638 + }, + { + "epoch": 3.2669322709163344, + "grad_norm": 3.7537801265716553, + "learning_rate": 1.0836928346180481e-06, + "loss": 0.26779600977897644, + "step": 1640 + }, + { + "epoch": 3.2709163346613543, + "grad_norm": 1.0913664102554321, + "learning_rate": 1.0786240571347827e-06, + "loss": 0.11661072820425034, + "step": 1642 + }, + { + "epoch": 3.2749003984063743, + "grad_norm": 1.3544014692306519, + "learning_rate": 1.0735793942492676e-06, + "loss": 0.9415394067764282, + "step": 1644 + }, + { + "epoch": 3.278884462151394, + "grad_norm": 1.880513072013855, + "learning_rate": 1.068558900724865e-06, + "loss": 0.6600284576416016, + "step": 1646 + }, + { + "epoch": 3.2828685258964145, + "grad_norm": 2.517366647720337, + "learning_rate": 1.0635626310625637e-06, + "loss": 0.3240680694580078, + "step": 1648 + }, + { + "epoch": 3.2868525896414345, + "grad_norm": 0.825859010219574, + "learning_rate": 1.058590639500382e-06, + "loss": 0.6646403074264526, + "step": 1650 + }, + { + "epoch": 3.2908366533864544, + "grad_norm": 0.9859835505485535, + "learning_rate": 1.0536429800127851e-06, + "loss": 0.642147958278656, + "step": 1652 + }, + { + "epoch": 3.2948207171314743, + "grad_norm": 1.7152155637741089, + "learning_rate": 1.0487197063100961e-06, + "loss": 0.7060829401016235, + "step": 1654 + }, + { + "epoch": 3.298804780876494, + "grad_norm": 1.7756178379058838, + "learning_rate": 1.0438208718379124e-06, + "loss": 0.7361951470375061, + "step": 1656 + }, + { + "epoch": 3.302788844621514, + "grad_norm": 1.7107096910476685, + "learning_rate": 1.0389465297765253e-06, + "loss": 0.6126337647438049, + "step": 1658 + }, + { + "epoch": 3.306772908366534, + "grad_norm": 1.4858530759811401, + "learning_rate": 1.0340967330403468e-06, + "loss": 0.614052414894104, + "step": 1660 + }, + { + "epoch": 3.310756972111554, + "grad_norm": 6.398506164550781, + "learning_rate": 1.02927153427733e-06, + "loss": 0.6388739347457886, + "step": 1662 + }, + { + "epoch": 3.314741035856574, + "grad_norm": 4.903992652893066, + "learning_rate": 1.0244709858683996e-06, + "loss": 0.1377391517162323, + "step": 1664 + }, + { + "epoch": 3.318725099601594, + "grad_norm": 1.644950270652771, + "learning_rate": 1.0196951399268847e-06, + "loss": 0.3214379549026489, + "step": 1666 + }, + { + "epoch": 3.3227091633466137, + "grad_norm": 6.5153608322143555, + "learning_rate": 1.0149440482979503e-06, + "loss": 0.23638975620269775, + "step": 1668 + }, + { + "epoch": 3.3266932270916336, + "grad_norm": 1.4857839345932007, + "learning_rate": 1.0102177625580375e-06, + "loss": 0.22218865156173706, + "step": 1670 + }, + { + "epoch": 3.3306772908366535, + "grad_norm": 8.828252792358398, + "learning_rate": 1.0055163340143e-06, + "loss": 0.6645467877388, + "step": 1672 + }, + { + "epoch": 3.3346613545816735, + "grad_norm": 1.0999014377593994, + "learning_rate": 1.0008398137040507e-06, + "loss": 0.5620592832565308, + "step": 1674 + }, + { + "epoch": 3.3386454183266934, + "grad_norm": 2.528717279434204, + "learning_rate": 9.961882523942068e-07, + "loss": 0.6080818176269531, + "step": 1676 + }, + { + "epoch": 3.3426294820717133, + "grad_norm": 0.1366569846868515, + "learning_rate": 9.915617005807357e-07, + "loss": 0.01138792559504509, + "step": 1678 + }, + { + "epoch": 3.346613545816733, + "grad_norm": 5.231603622436523, + "learning_rate": 9.869602084881103e-07, + "loss": 0.29557374119758606, + "step": 1680 + }, + { + "epoch": 3.350597609561753, + "grad_norm": 0.9051138758659363, + "learning_rate": 9.823838260687635e-07, + "loss": 0.41544756293296814, + "step": 1682 + }, + { + "epoch": 3.354581673306773, + "grad_norm": 1.6163842678070068, + "learning_rate": 9.778326030025432e-07, + "loss": 0.45938849449157715, + "step": 1684 + }, + { + "epoch": 3.358565737051793, + "grad_norm": 7.820988178253174, + "learning_rate": 9.733065886961764e-07, + "loss": 0.4935106337070465, + "step": 1686 + }, + { + "epoch": 3.362549800796813, + "grad_norm": 1.3769513368606567, + "learning_rate": 9.688058322827313e-07, + "loss": 0.5252028107643127, + "step": 1688 + }, + { + "epoch": 3.366533864541833, + "grad_norm": 1.0140272378921509, + "learning_rate": 9.643303826210824e-07, + "loss": 0.7207529544830322, + "step": 1690 + }, + { + "epoch": 3.3705179282868527, + "grad_norm": 1.3448855876922607, + "learning_rate": 9.598802882953828e-07, + "loss": 0.7529066205024719, + "step": 1692 + }, + { + "epoch": 3.3745019920318726, + "grad_norm": 1.0818604230880737, + "learning_rate": 9.554555976145349e-07, + "loss": 0.6526249647140503, + "step": 1694 + }, + { + "epoch": 3.3784860557768925, + "grad_norm": 0.858180046081543, + "learning_rate": 9.510563586116686e-07, + "loss": 0.6609078645706177, + "step": 1696 + }, + { + "epoch": 3.3824701195219125, + "grad_norm": 1.1475756168365479, + "learning_rate": 9.466826190436147e-07, + "loss": 0.7812352180480957, + "step": 1698 + }, + { + "epoch": 3.3864541832669324, + "grad_norm": 2.1600332260131836, + "learning_rate": 9.423344263903926e-07, + "loss": 0.7400810122489929, + "step": 1700 + }, + { + "epoch": 3.3904382470119523, + "grad_norm": 1.9892051219940186, + "learning_rate": 9.380118278546906e-07, + "loss": 0.6348077654838562, + "step": 1702 + }, + { + "epoch": 3.394422310756972, + "grad_norm": 0.9929773211479187, + "learning_rate": 9.337148703613554e-07, + "loss": 0.6541098356246948, + "step": 1704 + }, + { + "epoch": 3.398406374501992, + "grad_norm": 5.213384628295898, + "learning_rate": 9.29443600556881e-07, + "loss": 0.20520062744617462, + "step": 1706 + }, + { + "epoch": 3.402390438247012, + "grad_norm": 1.8277703523635864, + "learning_rate": 9.251980648089045e-07, + "loss": 0.596899688243866, + "step": 1708 + }, + { + "epoch": 3.406374501992032, + "grad_norm": 0.9781650304794312, + "learning_rate": 9.209783092057025e-07, + "loss": 0.7202063202857971, + "step": 1710 + }, + { + "epoch": 3.410358565737052, + "grad_norm": 1.2887661457061768, + "learning_rate": 9.16784379555688e-07, + "loss": 0.668391764163971, + "step": 1712 + }, + { + "epoch": 3.414342629482072, + "grad_norm": 1.2524248361587524, + "learning_rate": 9.126163213869171e-07, + "loss": 0.6738901138305664, + "step": 1714 + }, + { + "epoch": 3.4183266932270917, + "grad_norm": 0.8974006772041321, + "learning_rate": 9.084741799465915e-07, + "loss": 0.6369835734367371, + "step": 1716 + }, + { + "epoch": 3.4223107569721116, + "grad_norm": 0.9271976351737976, + "learning_rate": 9.043580002005681e-07, + "loss": 0.7468122839927673, + "step": 1718 + }, + { + "epoch": 3.4262948207171315, + "grad_norm": 0.9398600459098816, + "learning_rate": 9.002678268328732e-07, + "loss": 0.6316313743591309, + "step": 1720 + }, + { + "epoch": 3.4302788844621515, + "grad_norm": 3.112898111343384, + "learning_rate": 8.962037042452146e-07, + "loss": 0.3467191755771637, + "step": 1722 + }, + { + "epoch": 3.4342629482071714, + "grad_norm": 0.8903955817222595, + "learning_rate": 8.921656765564998e-07, + "loss": 0.5496594309806824, + "step": 1724 + }, + { + "epoch": 3.4382470119521913, + "grad_norm": 2.7363598346710205, + "learning_rate": 8.881537876023597e-07, + "loss": 0.6846615076065063, + "step": 1726 + }, + { + "epoch": 3.442231075697211, + "grad_norm": 1.7913397550582886, + "learning_rate": 8.841680809346684e-07, + "loss": 0.4614332914352417, + "step": 1728 + }, + { + "epoch": 3.446215139442231, + "grad_norm": 2.286719560623169, + "learning_rate": 8.802085998210754e-07, + "loss": 0.6514830589294434, + "step": 1730 + }, + { + "epoch": 3.450199203187251, + "grad_norm": 1.2754535675048828, + "learning_rate": 8.762753872445316e-07, + "loss": 0.6596709489822388, + "step": 1732 + }, + { + "epoch": 3.454183266932271, + "grad_norm": 3.1973865032196045, + "learning_rate": 8.723684859028244e-07, + "loss": 0.3601575791835785, + "step": 1734 + }, + { + "epoch": 3.458167330677291, + "grad_norm": 1.0521482229232788, + "learning_rate": 8.684879382081163e-07, + "loss": 0.6533339023590088, + "step": 1736 + }, + { + "epoch": 3.462151394422311, + "grad_norm": 1.2476742267608643, + "learning_rate": 8.646337862864804e-07, + "loss": 0.7225340604782104, + "step": 1738 + }, + { + "epoch": 3.4661354581673307, + "grad_norm": 8.218664169311523, + "learning_rate": 8.608060719774452e-07, + "loss": 0.14243163168430328, + "step": 1740 + }, + { + "epoch": 3.4701195219123506, + "grad_norm": 0.8877552151679993, + "learning_rate": 8.570048368335411e-07, + "loss": 0.7387225031852722, + "step": 1742 + }, + { + "epoch": 3.4741035856573705, + "grad_norm": 0.26608389616012573, + "learning_rate": 8.532301221198491e-07, + "loss": 0.060973528772592545, + "step": 1744 + }, + { + "epoch": 3.4780876494023905, + "grad_norm": 1.655069351196289, + "learning_rate": 8.494819688135502e-07, + "loss": 0.6722233891487122, + "step": 1746 + }, + { + "epoch": 3.4820717131474104, + "grad_norm": 0.3229190409183502, + "learning_rate": 8.457604176034851e-07, + "loss": 0.16490302979946136, + "step": 1748 + }, + { + "epoch": 3.4860557768924303, + "grad_norm": 0.3072760999202728, + "learning_rate": 8.42065508889708e-07, + "loss": 0.06224316358566284, + "step": 1750 + }, + { + "epoch": 3.49003984063745, + "grad_norm": 1.0425161123275757, + "learning_rate": 8.383972827830517e-07, + "loss": 0.6595985293388367, + "step": 1752 + }, + { + "epoch": 3.49402390438247, + "grad_norm": 1.6916478872299194, + "learning_rate": 8.347557791046892e-07, + "loss": 0.18403995037078857, + "step": 1754 + }, + { + "epoch": 3.49800796812749, + "grad_norm": 0.8162530064582825, + "learning_rate": 8.311410373857033e-07, + "loss": 0.6693860292434692, + "step": 1756 + }, + { + "epoch": 3.50199203187251, + "grad_norm": 3.898818254470825, + "learning_rate": 8.275530968666578e-07, + "loss": 0.5436112880706787, + "step": 1758 + }, + { + "epoch": 3.50597609561753, + "grad_norm": 0.576738178730011, + "learning_rate": 8.239919964971689e-07, + "loss": 0.1252291202545166, + "step": 1760 + }, + { + "epoch": 3.50996015936255, + "grad_norm": 0.9629335403442383, + "learning_rate": 8.20457774935485e-07, + "loss": 0.2324841022491455, + "step": 1762 + }, + { + "epoch": 3.5139442231075697, + "grad_norm": 1.051251769065857, + "learning_rate": 8.16950470548067e-07, + "loss": 0.5175900459289551, + "step": 1764 + }, + { + "epoch": 3.5179282868525896, + "grad_norm": 5.374156951904297, + "learning_rate": 8.134701214091691e-07, + "loss": 0.19936859607696533, + "step": 1766 + }, + { + "epoch": 3.5219123505976095, + "grad_norm": 1.134244680404663, + "learning_rate": 8.100167653004285e-07, + "loss": 0.09222012758255005, + "step": 1768 + }, + { + "epoch": 3.5258964143426295, + "grad_norm": 1.0654293298721313, + "learning_rate": 8.065904397104543e-07, + "loss": 0.6717595458030701, + "step": 1770 + }, + { + "epoch": 3.5298804780876494, + "grad_norm": 2.4975504875183105, + "learning_rate": 8.031911818344201e-07, + "loss": 0.5180625915527344, + "step": 1772 + }, + { + "epoch": 3.5338645418326693, + "grad_norm": 0.9296510219573975, + "learning_rate": 7.998190285736589e-07, + "loss": 0.6407575607299805, + "step": 1774 + }, + { + "epoch": 3.537848605577689, + "grad_norm": 2.6143455505371094, + "learning_rate": 7.964740165352664e-07, + "loss": 0.6667947769165039, + "step": 1776 + }, + { + "epoch": 3.541832669322709, + "grad_norm": 0.19827701151371002, + "learning_rate": 7.931561820317005e-07, + "loss": 0.023438258096575737, + "step": 1778 + }, + { + "epoch": 3.545816733067729, + "grad_norm": 1.148992657661438, + "learning_rate": 7.898655610803869e-07, + "loss": 0.6734960675239563, + "step": 1780 + }, + { + "epoch": 3.549800796812749, + "grad_norm": 1.8085567951202393, + "learning_rate": 7.866021894033296e-07, + "loss": 0.6972249150276184, + "step": 1782 + }, + { + "epoch": 3.553784860557769, + "grad_norm": 2.9096920490264893, + "learning_rate": 7.833661024267235e-07, + "loss": 0.6476399302482605, + "step": 1784 + }, + { + "epoch": 3.557768924302789, + "grad_norm": 0.7224079966545105, + "learning_rate": 7.80157335280568e-07, + "loss": 0.9946411848068237, + "step": 1786 + }, + { + "epoch": 3.5617529880478087, + "grad_norm": 1.2070460319519043, + "learning_rate": 7.769759227982855e-07, + "loss": 0.711801290512085, + "step": 1788 + }, + { + "epoch": 3.5657370517928286, + "grad_norm": 2.714474678039551, + "learning_rate": 7.738218995163462e-07, + "loss": 0.15059031546115875, + "step": 1790 + }, + { + "epoch": 3.5697211155378485, + "grad_norm": 1.3999918699264526, + "learning_rate": 7.70695299673891e-07, + "loss": 0.139665424823761, + "step": 1792 + }, + { + "epoch": 3.5737051792828685, + "grad_norm": 0.37299129366874695, + "learning_rate": 7.67596157212359e-07, + "loss": 0.11374976485967636, + "step": 1794 + }, + { + "epoch": 3.5776892430278884, + "grad_norm": 0.8067252039909363, + "learning_rate": 7.645245057751201e-07, + "loss": 0.6304631233215332, + "step": 1796 + }, + { + "epoch": 3.5816733067729083, + "grad_norm": 1.578432559967041, + "learning_rate": 7.614803787071115e-07, + "loss": 0.22770892083644867, + "step": 1798 + }, + { + "epoch": 3.585657370517928, + "grad_norm": 3.3027656078338623, + "learning_rate": 7.584638090544717e-07, + "loss": 0.20699705183506012, + "step": 1800 + }, + { + "epoch": 3.589641434262948, + "grad_norm": 0.14634272456169128, + "learning_rate": 7.554748295641862e-07, + "loss": 0.055411506444215775, + "step": 1802 + }, + { + "epoch": 3.593625498007968, + "grad_norm": 1.2589038610458374, + "learning_rate": 7.525134726837289e-07, + "loss": 0.15108336508274078, + "step": 1804 + }, + { + "epoch": 3.597609561752988, + "grad_norm": 1.8965911865234375, + "learning_rate": 7.49579770560711e-07, + "loss": 0.4452376961708069, + "step": 1806 + }, + { + "epoch": 3.601593625498008, + "grad_norm": 1.1629970073699951, + "learning_rate": 7.46673755042531e-07, + "loss": 0.6423868536949158, + "step": 1808 + }, + { + "epoch": 3.605577689243028, + "grad_norm": 0.5293740630149841, + "learning_rate": 7.437954576760312e-07, + "loss": 0.21336103975772858, + "step": 1810 + }, + { + "epoch": 3.6095617529880477, + "grad_norm": 1.164920449256897, + "learning_rate": 7.409449097071536e-07, + "loss": 0.5466434359550476, + "step": 1812 + }, + { + "epoch": 3.6135458167330676, + "grad_norm": 1.1033563613891602, + "learning_rate": 7.381221420805999e-07, + "loss": 0.6399943232536316, + "step": 1814 + }, + { + "epoch": 3.6175298804780875, + "grad_norm": 1.056943416595459, + "learning_rate": 7.353271854394979e-07, + "loss": 0.5917325019836426, + "step": 1816 + }, + { + "epoch": 3.6215139442231075, + "grad_norm": 0.9444670677185059, + "learning_rate": 7.325600701250674e-07, + "loss": 0.7685708403587341, + "step": 1818 + }, + { + "epoch": 3.6254980079681274, + "grad_norm": 1.8602865934371948, + "learning_rate": 7.298208261762906e-07, + "loss": 0.45633015036582947, + "step": 1820 + }, + { + "epoch": 3.6294820717131473, + "grad_norm": 0.10787267237901688, + "learning_rate": 7.271094833295859e-07, + "loss": 0.011536069214344025, + "step": 1822 + }, + { + "epoch": 3.633466135458167, + "grad_norm": 0.2886284291744232, + "learning_rate": 7.244260710184868e-07, + "loss": 0.024275042116642, + "step": 1824 + }, + { + "epoch": 3.637450199203187, + "grad_norm": 0.6795600652694702, + "learning_rate": 7.21770618373321e-07, + "loss": 0.45940348505973816, + "step": 1826 + }, + { + "epoch": 3.641434262948207, + "grad_norm": 2.2104618549346924, + "learning_rate": 7.191431542208935e-07, + "loss": 0.6470014452934265, + "step": 1828 + }, + { + "epoch": 3.645418326693227, + "grad_norm": 1.12752103805542, + "learning_rate": 7.165437070841758e-07, + "loss": 0.7721574902534485, + "step": 1830 + }, + { + "epoch": 3.649402390438247, + "grad_norm": 6.11736536026001, + "learning_rate": 7.139723051819938e-07, + "loss": 0.5740348696708679, + "step": 1832 + }, + { + "epoch": 3.653386454183267, + "grad_norm": 0.4044356048107147, + "learning_rate": 7.114289764287227e-07, + "loss": 0.05502355471253395, + "step": 1834 + }, + { + "epoch": 3.6573705179282867, + "grad_norm": 4.303436279296875, + "learning_rate": 7.08913748433985e-07, + "loss": 0.17597807943820953, + "step": 1836 + }, + { + "epoch": 3.6613545816733066, + "grad_norm": 1.0884654521942139, + "learning_rate": 7.064266485023493e-07, + "loss": 0.6930414438247681, + "step": 1838 + }, + { + "epoch": 3.6653386454183265, + "grad_norm": 2.256512403488159, + "learning_rate": 7.039677036330331e-07, + "loss": 0.6587978601455688, + "step": 1840 + }, + { + "epoch": 3.6693227091633465, + "grad_norm": 0.19702738523483276, + "learning_rate": 7.015369405196132e-07, + "loss": 0.016245799139142036, + "step": 1842 + }, + { + "epoch": 3.6733067729083664, + "grad_norm": 0.9400996565818787, + "learning_rate": 6.991343855497312e-07, + "loss": 0.15207843482494354, + "step": 1844 + }, + { + "epoch": 3.6772908366533863, + "grad_norm": 1.0055437088012695, + "learning_rate": 6.967600648048113e-07, + "loss": 0.6164069175720215, + "step": 1846 + }, + { + "epoch": 3.681274900398406, + "grad_norm": 1.8582080602645874, + "learning_rate": 6.944140040597742e-07, + "loss": 0.7226882576942444, + "step": 1848 + }, + { + "epoch": 3.685258964143426, + "grad_norm": 1.656290054321289, + "learning_rate": 6.920962287827587e-07, + "loss": 0.07943466305732727, + "step": 1850 + }, + { + "epoch": 3.6892430278884465, + "grad_norm": 1.666813611984253, + "learning_rate": 6.898067641348459e-07, + "loss": 0.30842339992523193, + "step": 1852 + }, + { + "epoch": 3.6932270916334664, + "grad_norm": 0.8802257776260376, + "learning_rate": 6.875456349697834e-07, + "loss": 0.6316725611686707, + "step": 1854 + }, + { + "epoch": 3.6972111553784863, + "grad_norm": 2.5803232192993164, + "learning_rate": 6.853128658337188e-07, + "loss": 0.09659645706415176, + "step": 1856 + }, + { + "epoch": 3.7011952191235062, + "grad_norm": 1.351311206817627, + "learning_rate": 6.831084809649302e-07, + "loss": 0.6809911131858826, + "step": 1858 + }, + { + "epoch": 3.705179282868526, + "grad_norm": 1.1612941026687622, + "learning_rate": 6.809325042935666e-07, + "loss": 0.3540644943714142, + "step": 1860 + }, + { + "epoch": 3.709163346613546, + "grad_norm": 0.9889734387397766, + "learning_rate": 6.787849594413833e-07, + "loss": 0.6793351173400879, + "step": 1862 + }, + { + "epoch": 3.713147410358566, + "grad_norm": 1.0778642892837524, + "learning_rate": 6.766658697214906e-07, + "loss": 0.6664227247238159, + "step": 1864 + }, + { + "epoch": 3.717131474103586, + "grad_norm": 2.6285629272460938, + "learning_rate": 6.745752581380965e-07, + "loss": 0.33559897541999817, + "step": 1866 + }, + { + "epoch": 3.721115537848606, + "grad_norm": 1.0389450788497925, + "learning_rate": 6.72513147386261e-07, + "loss": 0.5156994462013245, + "step": 1868 + }, + { + "epoch": 3.7250996015936257, + "grad_norm": 0.9331614375114441, + "learning_rate": 6.704795598516451e-07, + "loss": 0.5414950251579285, + "step": 1870 + }, + { + "epoch": 3.7290836653386457, + "grad_norm": 1.0866365432739258, + "learning_rate": 6.684745176102714e-07, + "loss": 0.735094428062439, + "step": 1872 + }, + { + "epoch": 3.7330677290836656, + "grad_norm": 1.4017014503479004, + "learning_rate": 6.664980424282842e-07, + "loss": 0.2802731692790985, + "step": 1874 + }, + { + "epoch": 3.7370517928286855, + "grad_norm": 2.2784199714660645, + "learning_rate": 6.645501557617104e-07, + "loss": 0.5592929124832153, + "step": 1876 + }, + { + "epoch": 3.7410358565737054, + "grad_norm": 4.115759372711182, + "learning_rate": 6.626308787562294e-07, + "loss": 0.41764435172080994, + "step": 1878 + }, + { + "epoch": 3.7450199203187253, + "grad_norm": 0.9289363622665405, + "learning_rate": 6.607402322469429e-07, + "loss": 0.6480333209037781, + "step": 1880 + }, + { + "epoch": 3.7490039840637452, + "grad_norm": 2.0568838119506836, + "learning_rate": 6.588782367581475e-07, + "loss": 0.773093581199646, + "step": 1882 + }, + { + "epoch": 3.752988047808765, + "grad_norm": 3.918016195297241, + "learning_rate": 6.570449125031144e-07, + "loss": 0.5592324137687683, + "step": 1884 + }, + { + "epoch": 3.756972111553785, + "grad_norm": 0.8172755241394043, + "learning_rate": 6.552402793838667e-07, + "loss": 0.6393176913261414, + "step": 1886 + }, + { + "epoch": 3.760956175298805, + "grad_norm": 0.3844411075115204, + "learning_rate": 6.534643569909665e-07, + "loss": 0.08161535859107971, + "step": 1888 + }, + { + "epoch": 3.764940239043825, + "grad_norm": 2.660936117172241, + "learning_rate": 6.517171646032988e-07, + "loss": 0.7531623244285583, + "step": 1890 + }, + { + "epoch": 3.768924302788845, + "grad_norm": 2.1934661865234375, + "learning_rate": 6.499987211878666e-07, + "loss": 0.6893159747123718, + "step": 1892 + }, + { + "epoch": 3.7729083665338647, + "grad_norm": 1.1734172105789185, + "learning_rate": 6.483090453995811e-07, + "loss": 0.09743469953536987, + "step": 1894 + }, + { + "epoch": 3.7768924302788847, + "grad_norm": 1.5317673683166504, + "learning_rate": 6.466481555810608e-07, + "loss": 0.6921253204345703, + "step": 1896 + }, + { + "epoch": 3.7808764940239046, + "grad_norm": 0.8458757996559143, + "learning_rate": 6.450160697624327e-07, + "loss": 0.6649323105812073, + "step": 1898 + }, + { + "epoch": 3.7848605577689245, + "grad_norm": 1.0291515588760376, + "learning_rate": 6.434128056611361e-07, + "loss": 0.6685061454772949, + "step": 1900 + }, + { + "epoch": 3.7888446215139444, + "grad_norm": 0.8199694156646729, + "learning_rate": 6.418383806817298e-07, + "loss": 0.7103414535522461, + "step": 1902 + }, + { + "epoch": 3.7928286852589643, + "grad_norm": 0.8696004748344421, + "learning_rate": 6.40292811915704e-07, + "loss": 0.6235980987548828, + "step": 1904 + }, + { + "epoch": 3.7968127490039842, + "grad_norm": 2.7558107376098633, + "learning_rate": 6.387761161412942e-07, + "loss": 0.14641408622264862, + "step": 1906 + }, + { + "epoch": 3.800796812749004, + "grad_norm": 0.8049102425575256, + "learning_rate": 6.372883098232999e-07, + "loss": 0.6313645839691162, + "step": 1908 + }, + { + "epoch": 3.804780876494024, + "grad_norm": 1.0484040975570679, + "learning_rate": 6.358294091129044e-07, + "loss": 0.689453661441803, + "step": 1910 + }, + { + "epoch": 3.808764940239044, + "grad_norm": 1.3624324798583984, + "learning_rate": 6.34399429847501e-07, + "loss": 0.4293438196182251, + "step": 1912 + }, + { + "epoch": 3.812749003984064, + "grad_norm": 2.118128538131714, + "learning_rate": 6.329983875505202e-07, + "loss": 0.7885560989379883, + "step": 1914 + }, + { + "epoch": 3.816733067729084, + "grad_norm": 1.88889479637146, + "learning_rate": 6.316262974312607e-07, + "loss": 0.12458698451519012, + "step": 1916 + }, + { + "epoch": 3.8207171314741037, + "grad_norm": 2.0474905967712402, + "learning_rate": 6.302831743847255e-07, + "loss": 0.7278786897659302, + "step": 1918 + }, + { + "epoch": 3.8247011952191237, + "grad_norm": 1.8699114322662354, + "learning_rate": 6.289690329914599e-07, + "loss": 0.10339318215847015, + "step": 1920 + }, + { + "epoch": 3.8286852589641436, + "grad_norm": 0.9766838550567627, + "learning_rate": 6.276838875173931e-07, + "loss": 0.7524492144584656, + "step": 1922 + }, + { + "epoch": 3.8326693227091635, + "grad_norm": 0.34323349595069885, + "learning_rate": 6.264277519136821e-07, + "loss": 0.051684651523828506, + "step": 1924 + }, + { + "epoch": 3.8366533864541834, + "grad_norm": 1.1233506202697754, + "learning_rate": 6.252006398165622e-07, + "loss": 0.7036517262458801, + "step": 1926 + }, + { + "epoch": 3.8406374501992033, + "grad_norm": 1.529929757118225, + "learning_rate": 6.240025645471986e-07, + "loss": 0.8575693368911743, + "step": 1928 + }, + { + "epoch": 3.8446215139442232, + "grad_norm": 0.11210882663726807, + "learning_rate": 6.228335391115402e-07, + "loss": 0.02451253868639469, + "step": 1930 + }, + { + "epoch": 3.848605577689243, + "grad_norm": 1.864715576171875, + "learning_rate": 6.216935762001803e-07, + "loss": 0.5305463671684265, + "step": 1932 + }, + { + "epoch": 3.852589641434263, + "grad_norm": 1.8157854080200195, + "learning_rate": 6.205826881882179e-07, + "loss": 0.13252875208854675, + "step": 1934 + }, + { + "epoch": 3.856573705179283, + "grad_norm": 0.9740794897079468, + "learning_rate": 6.195008871351232e-07, + "loss": 0.7859750986099243, + "step": 1936 + }, + { + "epoch": 3.860557768924303, + "grad_norm": 1.070713758468628, + "learning_rate": 6.184481847846074e-07, + "loss": 0.7027934789657593, + "step": 1938 + }, + { + "epoch": 3.864541832669323, + "grad_norm": 1.440918207168579, + "learning_rate": 6.174245925644948e-07, + "loss": 0.30577710270881653, + "step": 1940 + }, + { + "epoch": 3.8685258964143427, + "grad_norm": 2.0320322513580322, + "learning_rate": 6.164301215865982e-07, + "loss": 0.9369683265686035, + "step": 1942 + }, + { + "epoch": 3.8725099601593627, + "grad_norm": 0.6125801801681519, + "learning_rate": 6.154647826465999e-07, + "loss": 0.03845952823758125, + "step": 1944 + }, + { + "epoch": 3.8764940239043826, + "grad_norm": 3.9984986782073975, + "learning_rate": 6.145285862239327e-07, + "loss": 0.6496099233627319, + "step": 1946 + }, + { + "epoch": 3.8804780876494025, + "grad_norm": 0.08795814216136932, + "learning_rate": 6.136215424816668e-07, + "loss": 0.04779617115855217, + "step": 1948 + }, + { + "epoch": 3.8844621513944224, + "grad_norm": 0.9127535820007324, + "learning_rate": 6.127436612664e-07, + "loss": 0.6776239275932312, + "step": 1950 + }, + { + "epoch": 3.8884462151394423, + "grad_norm": 1.5462641716003418, + "learning_rate": 6.118949521081495e-07, + "loss": 0.7221356630325317, + "step": 1952 + }, + { + "epoch": 3.8924302788844622, + "grad_norm": 0.6864924430847168, + "learning_rate": 6.11075424220251e-07, + "loss": 0.6018074154853821, + "step": 1954 + }, + { + "epoch": 3.896414342629482, + "grad_norm": 8.130626678466797, + "learning_rate": 6.102850864992553e-07, + "loss": 0.15544459223747253, + "step": 1956 + }, + { + "epoch": 3.900398406374502, + "grad_norm": 1.5887444019317627, + "learning_rate": 6.095239475248345e-07, + "loss": 0.5947393178939819, + "step": 1958 + }, + { + "epoch": 3.904382470119522, + "grad_norm": 0.9882814288139343, + "learning_rate": 6.087920155596867e-07, + "loss": 0.016275843605399132, + "step": 1960 + }, + { + "epoch": 3.908366533864542, + "grad_norm": 0.3859656751155853, + "learning_rate": 6.080892985494482e-07, + "loss": 0.04228988662362099, + "step": 1962 + }, + { + "epoch": 3.912350597609562, + "grad_norm": 1.2562545537948608, + "learning_rate": 6.074158041226068e-07, + "loss": 0.6111615300178528, + "step": 1964 + }, + { + "epoch": 3.9163346613545817, + "grad_norm": 3.6256649494171143, + "learning_rate": 6.067715395904173e-07, + "loss": 0.6986129283905029, + "step": 1966 + }, + { + "epoch": 3.9203187250996017, + "grad_norm": 1.0995627641677856, + "learning_rate": 6.061565119468247e-07, + "loss": 0.7141016125679016, + "step": 1968 + }, + { + "epoch": 3.9243027888446216, + "grad_norm": 2.30956768989563, + "learning_rate": 6.055707278683863e-07, + "loss": 0.22550952434539795, + "step": 1970 + }, + { + "epoch": 3.9282868525896415, + "grad_norm": 1.4764176607131958, + "learning_rate": 6.050141937142003e-07, + "loss": 0.1283264309167862, + "step": 1972 + }, + { + "epoch": 3.9322709163346614, + "grad_norm": 0.9012427926063538, + "learning_rate": 6.04486915525836e-07, + "loss": 0.8311380743980408, + "step": 1974 + }, + { + "epoch": 3.9362549800796813, + "grad_norm": 1.559435486793518, + "learning_rate": 6.039888990272691e-07, + "loss": 0.1916397362947464, + "step": 1976 + }, + { + "epoch": 3.9402390438247012, + "grad_norm": 0.8929998874664307, + "learning_rate": 6.035201496248188e-07, + "loss": 0.6807030439376831, + "step": 1978 + }, + { + "epoch": 3.944223107569721, + "grad_norm": 0.25589969754219055, + "learning_rate": 6.030806724070893e-07, + "loss": 0.07943480461835861, + "step": 1980 + }, + { + "epoch": 3.948207171314741, + "grad_norm": 1.3471908569335938, + "learning_rate": 6.026704721449152e-07, + "loss": 0.805228590965271, + "step": 1982 + }, + { + "epoch": 3.952191235059761, + "grad_norm": 0.9127321243286133, + "learning_rate": 6.022895532913081e-07, + "loss": 0.6197107434272766, + "step": 1984 + }, + { + "epoch": 3.956175298804781, + "grad_norm": 2.661827802658081, + "learning_rate": 6.019379199814108e-07, + "loss": 0.49690714478492737, + "step": 1986 + }, + { + "epoch": 3.960159362549801, + "grad_norm": 0.08383038640022278, + "learning_rate": 6.016155760324495e-07, + "loss": 0.00437126774340868, + "step": 1988 + }, + { + "epoch": 3.9641434262948207, + "grad_norm": 0.9041069746017456, + "learning_rate": 6.013225249436945e-07, + "loss": 0.7191581726074219, + "step": 1990 + }, + { + "epoch": 3.9681274900398407, + "grad_norm": 1.6254363059997559, + "learning_rate": 6.010587698964216e-07, + "loss": 0.5217870473861694, + "step": 1992 + }, + { + "epoch": 3.9721115537848606, + "grad_norm": 1.7610574960708618, + "learning_rate": 6.008243137538774e-07, + "loss": 0.7896353006362915, + "step": 1994 + }, + { + "epoch": 3.9760956175298805, + "grad_norm": 0.506505012512207, + "learning_rate": 6.006191590612478e-07, + "loss": 0.06072104722261429, + "step": 1996 + }, + { + "epoch": 3.9800796812749004, + "grad_norm": 1.679490566253662, + "learning_rate": 6.004433080456312e-07, + "loss": 0.0873764306306839, + "step": 1998 + }, + { + "epoch": 3.9840637450199203, + "grad_norm": 1.07437002658844, + "learning_rate": 6.002967626160147e-07, + "loss": 0.6510695219039917, + "step": 2000 + }, + { + "epoch": 3.9880478087649402, + "grad_norm": 1.063508152961731, + "learning_rate": 6.001795243632514e-07, + "loss": 0.6352625489234924, + "step": 2002 + }, + { + "epoch": 3.99203187250996, + "grad_norm": 0.9537666440010071, + "learning_rate": 6.00091594560045e-07, + "loss": 0.7177177667617798, + "step": 2004 + }, + { + "epoch": 3.99601593625498, + "grad_norm": 4.541738986968994, + "learning_rate": 6.000329741609355e-07, + "loss": 0.23844213783740997, + "step": 2006 + }, + { + "epoch": 4.0, + "grad_norm": 0.5011924505233765, + "learning_rate": 6.000036638022886e-07, + "loss": 0.15317194163799286, + "step": 2008 + }, + { + "epoch": 4.0, + "step": 2008, + "total_flos": 3.519329208629199e+18, + "train_loss": 0.7788769946752703, + "train_runtime": 8944.5824, + "train_samples_per_second": 6.735, + "train_steps_per_second": 0.224 + } + ], + "logging_steps": 2, + "max_steps": 2008, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 99999, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.519329208629199e+18, + "train_batch_size": 3, + "trial_name": null, + "trial_params": null +}